2. 环境安装
pip install paddlepaddle -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install paddleocr -i https://mirror.baidu.com/pypi/simple
pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
3. 用法: my_ocr.py 图片或文件夹
import os
import sys
import time
import logging
from tqdm.auto import trange
from paddleocr import PaddleOCR logging.disable(logging.DEBUG)
logging.disable(logging.WARNING) file = "out.txt"def ocr_imgs(img):result = ocr.ocr(img, cls=False)if len(result[0]) == 0:result = ocr.ocr(img, cls=False, det=False)with open(file, 'a') as f:f.write(f'\n{img.center(50, "-")}\n')for idx in range(len(result)):res = result[idx]for line in res:if isinstance(line, list):f.write(f'{line[-1][0]}\n')elif isinstance(line, tuple):f.write(f'{line[0]}\n')def check_args():if len(sys.argv) < 2:print("Usage: %s <path> or <path/file>" % sys.argv[0])exit()arg = sys.argv[1]if os.path.isfile(arg):arg = os.path.dirname(arg)single_file = Trueelif os.path.isdir(arg):single_file = Falseos.chdir(arg)os.remove(file) if os.path.exists(file) else Falsereturn single_file
if __name__ == "__main__":print(f"[{time.strftime('%X')}] 识别开始...")start = time.time()imagelist = [os.path.basename(sys.argv[1])] if check_args() else list(filter(os.path.isfile, os.listdir()))imagelist.sort(key=str.lower)ocr = PaddleOCR(use_angle_cls=False, lang="ch") for i in trange(len(imagelist),leave=False):image = imagelist[i]fn, ex = os.path.splitext(image)if ex in ['.jpg', '.jpeg', '.png']: ocr_imgs(image)end = time.time()run_time = round(end - start)print(f"[{time.strftime('%X')}] 结束耗时{run_time}秒")cmd="gedit " + file + "&"os.system(cmd)