当前位置: 首页 > news >正文

PaddleOCR 截图自动文字识别

春节假期在家无聊,撸了三个小工具:PC截图+编辑/PC录屏(用于meeting录屏)/PC截屏文字识别。因为感觉这三个小工具是工作中常常需要用到的,github上也有很多开源的,不过总有点或多或少的小问题,不利于自己的使用。脚本的编写尽量减少对三方库的使用。

已全部完成,这是其中的一个,后续将三个集成在在一个工具中。

import tkinter as tk
from tkinter import ttk, messagebox, font, filedialog
from PIL import Image, ImageTk, ImageGrab
import sys
import tempfile
import threading
from pathlib import Path
import ctypes
import logging.handlers
from datetime import datetime# 最小化控制台窗口
def minimize_console():ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 6)minimize_console()  # 调用最小化函数# 获取脚本所在目录路径
def get_script_directory():return Path(__file__).parent# 配置日志文件路径和日志级别
log_file_path = get_script_directory() / 'ocr_errors.log'
logging.basicConfig(filename=log_file_path,level=logging.DEBUG,format='%(asctime)s - %(levelname)s - %(message)s'
)
# 添加日志轮转
handler = logging.handlers.RotatingFileHandler(log_file_path, maxBytes=1024*1024*5, backupCount=3)
logger = logging.getLogger()
logger.addHandler(handler)# 保存临时图片到磁盘
def save_temp_image(image, suffix='.png'):with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:image.save(temp_file.name)return Path(temp_file.name)class OCRApp:def __init__(self):try:self.root = tk.Tk()self.root.withdraw()# 禁用最大化按钮self.root.resizable(False, True)self.screenshot = Noneself.ocr_model = None  # 延迟初始化self.recognized_text = ""self.main_frame = Noneself.load_win = None  # 初始化 load_win 为 None# 启动后台线程加载OCR模型以优化性能,使run脚本后能马上进入截图状态threading.Thread(target=self.load_ocr_model, daemon=True).start()# 立即开始截图选择self.start_selection()except Exception as e:self.show_crash_message(f"程序启动失败: {str(e)}")sys.exit(1)def load_ocr_model(self):from paddleocr import PaddleOCRtry:self.ocr_model = PaddleOCR(use_angle_cls=True, show_log=False, lang='ch')except Exception as e:logger.error(f"OCR模型加载失败: {str(e)}")# 开始截图选择区域def start_selection(self):self.selection_win = tk.Toplevel()self.selection_win.attributes("-fullscreen", True)self.selection_win.attributes("-alpha", 0.3)# 绑定整个窗口的 ESC 键事件self.selection_win.bind("<Escape>", self.on_escape)self.canvas = tk.Canvas(self.selection_win,cursor="cross",bg="gray30",highlightthickness=0)self.canvas.pack(fill=tk.BOTH, expand=True)self.start_x = self.start_y = 0self.rect_id = Noneself.crosshair_ids = []self.canvas.bind("<Button-1>", self.on_mouse_down)self.canvas.bind("<B1-Motion>", self.on_mouse_drag)self.canvas.bind("<ButtonRelease-1>", self.on_mouse_up)self.canvas.bind("<Motion>", self.on_mouse_move)self.escape_label = tk.Label(self.selection_win,text="按ESC键退出截图",fg="yellow",bg="gray20",font=("Helvetica", 12, "bold"))self.escape_label.place(x=10, y=10)self.update_crosshair(0, 0)# 鼠标按下事件处理def on_mouse_down(self, event):self.start_x = event.xself.start_y = event.yself.clear_crosshair()if self.rect_id:self.canvas.delete(self.rect_id)self.rect_id = None# 鼠标拖动事件处理def on_mouse_drag(self, event):current_x = event.xcurrent_y = event.yif self.rect_id:self.canvas.coords(self.rect_id, self.start_x, self.start_y, current_x, current_y)else:self.rect_id = self.canvas.create_rectangle(self.start_x, self.start_y,current_x, current_y,outline="blue", width=2, fill="gray75", tags="rect")# 鼠标释放事件处理def on_mouse_up(self, event):try:x1 = min(self.start_x, event.x)y1 = min(self.start_y, event.y)x2 = max(self.start_x, event.x)y2 = max(self.start_y, event.y)if (x2 - x1) < 10 or (y2 - y1) < 10:raise ValueError("选区过小,请选择更大的区域")if (x2 - x1) > self.canvas.winfo_width() or (y2 - y1) > self.canvas.winfo_height():raise ValueError("选区过大,请选择更小的区域")self.screenshot = ImageGrab.grab(bbox=(x1, y1, x2, y2))self.selection_win.destroy()self.initialize_ocr_and_process()except Exception as e:logger.error(f"截图错误: {str(e)}")messagebox.showerror("截图错误", str(e))self.restart_selection()# 初始化OCR引擎并处理截图def initialize_ocr_and_process(self):try:if self.ocr_model is None:self.load_win = self.show_loading("OCR模型正在加载中,请稍后...")self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.process_ocr()self.setup_main_ui()self.root.deiconify()except Exception as e:logger.error(f"OCR初始化失败: {str(e)}")if self.load_win:self.load_win.destroy()self.handle_ocr_init_error(str(e))def check_ocr_model(self):if self.ocr_model is None:self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:if self.load_win:self.load_win.destroy()self.process_ocr()self.setup_main_ui()self.root.deiconify()# 执行OCR处理def process_ocr(self):try:temp_image_path = save_temp_image(self.screenshot)result = self.ocr_model.ocr(str(temp_image_path), cls=True)temp_image_path.unlink()  # 确保临时文件被删除# 后处理识别结果,合并同一行的文字merged_text = self.merge_lines(result[0])self.recognized_text = merged_textexcept Exception as e:logger.error(f"OCR处理失败: {str(e)}")messagebox.showerror("识别错误", f"OCR处理失败: {str(e)}")self.restart_selection()# 合并同一行的文字def merge_lines(self, ocr_result):merged_text = []current_line = []current_y1 = Nonecurrent_y2 = Noneline_threshold = 5  # 设置行间距阈值,可以根据需要调整for line in ocr_result:# 提取坐标点x1, y1 = line[0][0]  # 第一个坐标点x2, y2 = line[0][2]  # 第三个坐标点text = line[1][0]  # 提取文本if current_y1 is None or current_y2 is None:current_y1 = y1current_y2 = y2current_line.append(text)elif abs(y1 - current_y1) <= line_threshold and abs(y2 - current_y2) <= line_threshold:current_line.append(text)else:merged_text.append(" ".join(current_line))current_line = [text]current_y1 = y1current_y2 = y2if current_line:merged_text.append(" ".join(current_line))return "\n".join(merged_text)# 设置主界面UIdef setup_main_ui(self):if self.main_frame is None:self.main_frame = ttk.Frame(self.root, padding=20)self.main_frame.grid(row=0, column=0, sticky="nsew")self.root.grid_rowconfigure(0, weight=1)self.root.grid_columnconfigure(0, weight=1)# 使用 PanedWindow 来分割图片框和文本框self.paned_window = ttk.PanedWindow(self.main_frame, orient=tk.VERTICAL)self.paned_window.grid(row=0, column=0, sticky="nsew")# 创建一个 Frame 来包含图片和滚动条self.image_frame = ttk.Frame(self.paned_window)self.image_frame.pack(fill=tk.BOTH, expand=True)# 使用 Canvas 来显示图片并添加滚动条self.image_canvas = tk.Canvas(self.image_frame, highlightbackground=self.root.cget("bg"), highlightthickness=0)self.image_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)self.image_scrollbar = ttk.Scrollbar(self.image_frame, orient=tk.VERTICAL, command=self.image_canvas.yview)self.image_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)self.image_canvas.config(yscrollcommand=self.image_scrollbar.set)self.image_canvas.bind("<Configure>", self.on_canvas_configure)self.image_container = ttk.Frame(self.image_canvas)self.image_container_id = self.image_canvas.create_window((0, 0), window=self.image_container, anchor="nw")self.img_label = ttk.Label(self.image_container)self.img_label.pack(fill=tk.BOTH, expand=True)# 定义字体custom_font = font.Font(family="Microsoft YaHei", size=9)self.text_area = tk.Text(self.paned_window,wrap=tk.WORD,font=custom_font,  # 设置字体height=15  # 初始高度设置为15行)self.text_area.pack(fill=tk.BOTH, expand=True)self.paned_window.add(self.image_frame)self.paned_window.add(self.text_area)btn_frame = ttk.Frame(self.main_frame)btn_frame.grid(row=1, column=0, sticky="ew", pady=10)# 确保按钮行不会被压缩self.main_frame.grid_rowconfigure(0, weight=1)self.main_frame.grid_rowconfigure(1, weight=0)ttk.Button(btn_frame,text="重新选择",command=self.restart_selection).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="复制文本",command=self.copy_result).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="保存图片",command=self.save_image).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="退出",command=self.safe_exit).pack(side=tk.RIGHT, padx=5)# 设置窗口标题self.root.title("文字识别@PDM3")self.update_image_display()self.text_area.delete(1.0, tk.END)self.text_area.insert(tk.END, self.recognized_text.strip())self.update_text_area_height()  # 更新文本框高度# 设置窗口总是最顶层self.root.attributes('-topmost', True)# 更新图片显示def update_image_display(self):if self.screenshot:photo = ImageTk.PhotoImage(self.screenshot)self.img_label.config(image=photo)self.img_label.image = photo# 获取图片的实际大小img_width, img_height = self.screenshot.size# 获取屏幕高度screen_height = self.root.winfo_screenheight()# 计算图片框的最大高度max_image_height = screen_height // 2# 设置 Canvas 的滚动区域self.image_canvas.config(scrollregion=(0, 0, img_width, img_height))# 调整 image_canvas 的高度if img_height > max_image_height:self.image_canvas.config(height=max_image_height)else:self.image_canvas.config(height=img_height)# 配置 Canvas 大小def on_canvas_configure(self, event):# 更新 Canvas 的滚动区域self.image_canvas.config(scrollregion=self.image_canvas.bbox("all"))# 显示加载中的窗口def show_loading(self, message):load_win = tk.Toplevel()load_win.title("请稍候")frame = ttk.Frame(load_win, padding=20)frame.pack()ttk.Label(frame, text=message).pack(pady=10)progress = ttk.Progressbar(frame, mode='indeterminate')progress.pack(pady=5)progress.start()return load_win# 处理OCR初始化错误def handle_ocr_init_error(self, error_msg):choice = messagebox.askretrycancel("OCR初始化失败",f"{error_msg}\n\n是否重试?",icon='error')if choice:threading.Thread(target=self.initialize_ocr_and_process).start()else:self.safe_exit()# 重新开始截图选择def restart_selection(self):if self.root.winfo_exists():self.root.withdraw()self.screenshot = Noneself.recognized_text = ""self.clear_ui()self.start_selection()# 清理UI界面def clear_ui(self):if hasattr(self, 'img_label'):self.img_label.config(image='')self.img_label.image = Noneif hasattr(self, 'text_area'):self.text_area.delete(1.0, tk.END)# 复制识别结果到剪贴板def copy_result(self):self.root.clipboard_clear()self.root.clipboard_append(self.recognized_text)messagebox.showinfo("成功", "已复制到剪贴板")# 安全退出程序def safe_exit(self):if self.root.winfo_exists():self.root.destroy()sys.exit(0)# 显示程序崩溃错误信息def show_crash_message(self, message):crash_win = tk.Tk()crash_win.withdraw()messagebox.showerror("致命错误", message)crash_win.destroy()# 按下ESC键时退出程序def on_escape(self, event):self.selection_win.destroy()self.safe_exit()# 鼠标移动事件处理def on_mouse_move(self, event):current_x = event.xcurrent_y = event.yself.update_crosshair(current_x, current_y)# 更新十字线位置def update_crosshair(self, x, y):self.clear_crosshair()self.crosshair_ids.append(self.canvas.create_line(0, y, self.canvas.winfo_width(), y,tags="crosshair", fill="yellow", width=2))self.crosshair_ids.append(self.canvas.create_line(x, 0, x, self.canvas.winfo_height(),tags="crosshair", fill="yellow", width=2))# 清除十字线def clear_crosshair(self):for crosshair_id in self.crosshair_ids:self.canvas.delete(crosshair_id)self.crosshair_ids = []# 保存图片def save_image(self):if self.screenshot:# 获取用户桌面路径desktop_path = Path.home() / 'Desktop'# 生成当前日期和时间的字符串current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")default_filename = f"screenshot_{current_datetime}.png"file_path = filedialog.asksaveasfilename(initialdir=desktop_path,  # 设置初始目录为用户桌面initialfile=default_filename,  # 设置默认文件名defaultextension=".png",filetypes=[("PNG files", "*.png"), ("JPEG files", "*.jpg"), ("All files", "*.*")])if file_path:self.screenshot.save(file_path)messagebox.showinfo("保存成功", f"图片已保存到 {file_path}")# 更新文本框高度def update_text_area_height(self):# 计算当前文本行数line_count = int(self.text_area.index('end-1c').split('.')[0])if line_count > 15:self.text_area.config(height=15)  # 如果行数超过15行,固定高度为15行else:self.text_area.config(height=line_count)  # 否则根据内容调整高度# 运行主循环def run(self):self.root.mainloop()if __name__ == "__main__":app = OCRApp()app.run()

http://www.lryc.cn/news/531441.html

相关文章:

  • 【Blazor学习笔记】.NET Blazor学习笔记
  • UE求职Demo开发日志#21 背包-仓库-装备栏移动物品
  • 力扣988. 从叶结点开始的最小字符串
  • 《PYTHON语言程序设计》(2018版)1.7近似π。利用步幅来进行修改
  • 低通滤波算法的数学原理和C语言实现
  • 【BUUCTF杂项题】荷兰宽带数据泄露、九连环
  • 安全策略实验报告
  • Haproxy+keepalived高可用集群,haproxy宕机的解决方案
  • 亚博microros小车-原生ubuntu支持系列:20 ROS Robot APP建图
  • Dockerfile构建容器镜像
  • python 在包含类似字符\x16、\x12、\x某某的数组中将以\x开头的字符找出来的方法
  • Spring Bean 的生命周期介绍
  • 调用腾讯云批量文本翻译API翻译srt字幕
  • 车载软件架构 --- 软件定义汽车面向服务架构的应用迁移
  • Baklib引领内容中台与人工智能技术的创新融合之路
  • 想品客老师的第十一天:模块化开发
  • 接入DeepSeek大模型
  • 基于遗传算法的256QAM星座图的最优概率整形matlab仿真,对比优化前后整形星座图和误码率
  • JavaScript系列(57)--工程化实践详解
  • Linux-CentOS的yum源
  • 【大数据技术】案例03:用户行为日志分析(python+hadoop+mapreduce+yarn+hive)
  • LeetCode 0680.验证回文串 II:两侧向中间,不同就试删
  • 第二十章 存储函数
  • 架构规划之任务边界划分过程中承接分配
  • 【C++】线程池实现
  • vsnprintf的概念和使用案例
  • 解读隐私保护工具 Fluidkey:如何畅游链上世界而不暴露地址?
  • Linux环境Kanass安装配置简明教程
  • 数据分析常用的AI工具
  • 项目中常用中间件有哪些?分别起什么作用?