当前位置: 首页 > news >正文

OCR实现微信截图改名

pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple/                                                                                   ──(Sat,Nov30)─┘
pip install shapely -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install easyocr 
import easyocr
import os
import cv2
import time
from paddleocr import PaddleOCRdef get_photo_paths(photos_folder_path):return [ f"{photos_folder_path}{os.sep}{photo_name}" for photo_name in os.listdir(photos_folder_path) if ".PNG" in photo_name]def preprocess_image(image):# 转为灰度图gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)# 二值化_, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)# 去噪声denoised = cv2.medianBlur(binary, 3)return denoiseddef image_cutting(image_path,up_ratio=0.03,down_ratio=0.13):image = cv2.imread(image_path)# 计算切割的宽度和高度width = int(image.shape[1] * 1)down = int(image.shape[0] * down_ratio)up = int(image.shape[0] * up_ratio)# 切割图片cropped_image = image[up:down, :width]# 显示切割后的图片# cv2.imshow('Cropped Image', cropped_image)preprocessed_image = preprocess_image(cropped_image)# plt.imshow(preprocessed_image)return preprocessed_imagedef get_key_fields_from_easyorc(image,keyword=''):res = Ereader.readtext(image)texts = []if keyword != '':for fields in res:boundaries = fields[0] ## 边界坐标text = fields[1] ## 文字内容if keyword in text:return {True:text}else:texts.append(text)return {False:texts}else:text = res[0][1]return text def get_key_fields_from_PaddleOCR(image,keyword=''):res = Preader.ocr(image, cls=True)[0]texts = []if keyword != '':for fields in res:boundaries = fields[0] ## 边界坐标text = fields[1][0] ## 文字内容if keyword in text:return {True:text}else:texts.append(text)return {False:texts}else:text = res[0][1][0]return {True:text} # res = Preader.ocr(image, cls=True)[0]# texts = []# for fields in res:#     boundaries = fields[0] ## 边界坐标#     text = fields[1][0] ## 文字内容#     if keyword in text:#         return {True:text}#     else:#         texts.append(text)# return {False:texts}def time_counter(begin_time, end_time):# 根据传入的时间计算,通过run_time.round()函数取整runtime = round(end_time - begin_time)# 计算时分秒hour = runtime // 3600minute = (runtime - 3600 * hour) // 60second = runtime - 3600 * hour - 60 * minute# 输出return f'用时:{hour}小时{minute}分钟{second}秒'def change_wechat_photo_name_logic(photos_folder_path,keyword = '22级实习-',up_ratio=0.05,down_ratio=0.13,model_name = "paddleorc"):if model_name == "paddleorc":global PreaderPreader = PaddleOCR(use_angle_cls=True,det=False, lang="ch")elif model_name == "easyorc":global EreaderEreader = easyocr.Reader(['ch_sim','en']) # 只需要运行一次就可以将模型加载到内存中photo_paths = get_photo_paths(photos_folder_path)m = len(photo_paths)t = 0start_time = time.time()for photo_path in photo_paths:start_time_of_each_step = time.time()try:preprocessed_image = image_cutting(photo_path,up_ratio,down_ratio)if model_name == "easyorc":wechat_name = get_key_fields_from_easyorc(preprocessed_image,keyword) elif model_name == "paddleorc":wechat_name = get_key_fields_from_PaddleOCR(preprocessed_image,keyword)if keyword != "":     split_wechate_name = wechat_name[True].split('-')friend_name = split_wechate_name[-1]else:friend_name = wechat_name[True]# folder_path = os.path.split(photo_path)[0]new_photo_path = os.path.join(photos_folder_path,f"{friend_name}.PNG")os.rename(photo_path,new_photo_path)t += 1end_time_of_each_step = time.time()time_for_this_time = time_counter(start_time_of_each_step,end_time_of_each_step)print(f"当前好友名为{friend_name},是第{t}个,完成{t/m*100}%,{time_for_this_time}")except Exception as e:print(f"\033[31m当前文件为:【{photo_path}】\n,错误:{e}\033[0m")# print(f"当前好友名为{friend_name},错误:{e}")print(f"总计{time_counter(start_time,end_time_of_each_step)},完成{t}个")    if __name__ == '__main__':photos_folder_path = "/Users/magu/Downloads/淮职课程准备/1就业材料/就业实习留存材料/回访截图/张晶实习回访(11月1日-11月30)"change_wechat_photo_name_logic(photos_folder_path,keyword = '22级实习-',up_ratio=0.05,down_ratio=0.101)#change_wechat_photo_name_logic(photos_folder_path,keyword = '',up_ratio=0.05,down_ratio=0.10)
http://www.lryc.cn/news/495388.html

相关文章:

  • 第一届“吾杯”网络安全技能大赛 Writeup
  • 再谈Java中的String类型是否相同的判断方法
  • <一>51单片机环境
  • 【0x0001】HCI_Set_Event_Mask详解
  • 第三方Express 路由和路由中间件
  • 七、Python —— 元组、集合和字典
  • Aes加解密
  • 【时时三省】Tessy 故障入侵 使用教程
  • .NET 9 AOT的突破 - 支持老旧Win7与XP环境
  • CondaValueError: Malformed version string ‘~‘: invalid character(s).
  • 01-Ubuntu24.04LTS上安装PGSQL
  • Esp32使用micropython基于espnow实现语音对讲机
  • Docker 容器隔离关键技术:SELinux
  • Java并发07之ThreadLocal
  • 【单细胞数据库】癌症单细胞数据库CancerSEA
  • Rsa加解密 + 签名验签
  • bugku-web-留言板1
  • 进程状态的学习
  • Vue 2.0->3.0学习笔记(Vue 3 (四)- Composition API 的优势)
  • close and shutdown?
  • PostgreSQL + hasura + Apollo + GraphQL + React + Antd
  • Android笔记【10】
  • Leetcode打卡:N皇后
  • Linux内核4.14版本——ccf时钟子系统(3)——ccf一些核心结构体
  • [Deep Learning] 深度学习中常用函数的整理与介绍(pytorch为例)
  • 【ETCD】etcd简单入门之单节点部署etcd
  • Cadence基础语法
  • GAMES101虚拟机使用教程与探讨
  • 王道考研编程题总结
  • 算法2--滑动窗口