In [1]: from PIL import Image
In [2]: import pytesseract
In [3]: text = pytesseract.image_to_string(Image.open('C:/Users/ASUS/totals.png'))
In [4]: text #测试tesseract引擎是否能用
Out[4]: '1'
In [5]: import json
In [6]: import requests
In [7]: render = 'http://www.porters.vip:8050/execute'
In [8]: url = 'http://www.porters.vip/confusion/movie.html'
In [9]: script = """
...: function main(splash)
...: assert(splash:go('%s'))
...: assert(splash:wait(0.5))
...: total_png = splash:select('.movie-index-content.box .stonefont'):png()
...: return{
...: total = total_png
...: }
...: end
...: """ % url
In [10]: header = {'content-type':'application/json'}
In [11]: data = json.dumps({"lua_source":script})
In [12]: resp = requests.post(render,data = data,headers = header) #截图
In [13]: resp
Out[13]: <Response [200]>
In [14]: image = resp.json()
In [15]: import base64
In [16]: import os
In [17]: for key,value in image.items():#有可能有多张图片
...: #splash返回的图片数据以base64进行编码,我们需要解码
...: image_body = base64.b64decode(value)
...: filename = "%s.png"%key
...: path = os.path.join(os.path.dirname(os.path.abspath('C:/Users/ASUS')),filename)
...: print(path)
...: with open(filename,'wb') as f:
...: f.write(image_body) #存入指定位置
...:
...:
D:\total.png
In [21]: print(pytesseract.image_to_string('C:\\Users\\ASUS\\total.png')) #调用引擎进行识别
56.83
Tesseract安装地址:安装地址
安装教程:傻瓜式下一步+将Tesseract-OCR文件夹添加至环境变量+pip install pytesseract