如题~对应文字游戏录屏制作字幕的第二模块
from aip import AipOcr
import os
import time
'''
Author: AleryXiao
Date: 2022.10.15
Title: 批量OCR识别图片并输出文件名和内容到txt文档(利用百度OCR的Api) v1.0
Content:
0. 填写自己的APP_ID,APP_KEY和SECRET_KEY
1. 通过弹出的文件资源窗口选择一个仅含有目标图片的目录
2. 对于每个图片文件:
OCR并将OCR结果和名字分别写入两个txt文本文档 (在控制台输出处理结果)
休眠0.4S (百度API有调用频率上限qwq)
'''
def vcode2str(img_url):
APP_ID = "2333" #填自己的信息
API_KEY = "qwq"
SECRET_KEY = "OvO"
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
image = get_file_content(img_url)
#调用通用文字识别, 图片参数为本地图片
client.basicGeneral(image)
# 如果有可选参数
options = {}
options["language_type"] = "JAP" #不说明是日语的话会爆炸!(x
options["detect_direction"] = "false"
options["detect_language"] = "false"
options["probability"] = "false"
#带参数调用通用文字识别, 图片参数为本地图片
res = client.basicGeneral(image, options)
strx = ""
for tex in res["words_result"]: # 遍历结果
strx += tex["words"] # 每一行,不换行
return strx
def get_file_content(file):
with open(file, 'rb') as fp:
return fp.read()
if __name__ == '__main__':
from tkinter import filedialog as fd # 窗口式输入
import tkinter as tk
root = tk.Tk()
root.withdraw()
input_path = fd.askdirectory() # 选择一个只有目标图片(只有图片!!!)的文件夹
image_path = os.listdir(input_path)
output_path=input_path+'/Output'
if not os.path.exists(output_path): # 如果文件目录不存在则创建目录
os.makedirs(output_path)
f = open(output_path+r'\Script.txt','w') # 文本目录
# 文件名目录 (其实也可以生成到excel,稍微改改就行2333)
p = open(output_path+r'\Name.txt','w')
i = 0
for image in image_path:
try:
i += 1 # 每处理一个图片都数数, base1
str = vcode2str(os.path.join(input_path, image)) #OCR
# print(str)
p.write(image[:-4]+'\n')
f.write(str+'\n')
print(f"{i} is completed")
time.sleep(0.4)
except Exception as E:
p.write(image[:-4]+'\n')
f.write(f'{i} return error\n')
print(f"{i} return error")