科技的发展是日新月异的,很多纸质的档案也需要电子版的存档,ocr扫描可以很好的实现这个功能,当然这只是核心算法,真正的软件还是需要后期开发。
目前想到该算法的应用就是根据照片或者扫描件,可以快速的获取到相应的信息。比如统计身份证号码和名字,性别,地址等到Excel表格中。可以减少手动输入和人工出错。
具体预处理代码和透视变换代码如下,识别部分使用相应插件,这里不在赘述。
import cv2
import numpy as np
#图像显示函数
def show(img):
cv2.imshow('name',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
#缩放函数
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized
def four_point_transform(image,pts):
#找到原图中的4个坐标点
#一共4个坐标点
#print(pts)
#按顺序找到对应坐标0123分别是左上,左下,右下,右上
#[[[57,54]],[[]]]按行计算得到的值可以减少一个维度变为[[57,54],[]]
rect = np.zeros((4,2),dtype = "float32")
s = pts.sum(axis = 1)
for i in range(len(pts)):
rect[i] = s[i]
print(rect)
#计算变换后的四点坐标最大值
(tl,bl,br,tr) = rect
widthA = np.sqrt(((br[0]-bl[0])**2) + ((br[1]-bl[1])**2))
widthB = np.sqrt(((tr[0]-tl[0])**2) + ((tr[1]-tl[1])**2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
print(maxWidth,maxHeight)
#变换后对应位置
dst = np.array([[0,0],[0, maxHeight - 1],[maxWidth - 1, maxHeight - 1],[maxWidth - 1, 0]],dtype="float32")
print(dst)
#计算变换矩阵
M = cv2.getPerspectiveTransform(rect,dst)
warped = cv2.warpPerspective(image,M,(maxWidth, maxHeight))
return warped
image = cv2.imread('./img/page.jpg')
#缩放比例
ratio = image.shape[0] / 500.0
#深拷贝,为后期显示使用
orig = image.copy()
#预处理阶段
image = resize(image,height=500)
#灰度化
gray = cv2.cvtColor(image,cv2.COLOR_BGR2BGRA)
#show(images)
Guss_gary = cv2.GaussianBlur(gray,(5,5),10,10)
#边缘检测
edged = cv2.Canny(gray,75,200)
#show(edged)
#轮廓检测
cnts,_ = cv2.findContours(edged.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#排序
cnts = sorted(cnts,key = lambda x:cv2.contourArea(x), reverse=True)[:5]
for c in cnts:
#得到的轮廓可能不是矩形,而是多边形,使用逼近矩形
#c表示输入的点集
#0.02*表示从原始轮廓到近似轮廓的最大距离,是一个准确度
approx = cv2.approxPolyDP(c,0.02*cv2.arcLength(c,True),True)
#判断四个点的逼近型就拿出来
if len(approx) == 4:
screenCnt = approx
break
#cv2.drawContours(image,[screenCnt],-1,(0,255,0),2)
#透视变换
value = screenCnt.reshape(4,2)*ratio
warped = four_point_transform(orig,value)
#show(orig)
#warped = cv2.cvtColor(warped,cv2.COLOR_BGR2GRAY)
#ref = cv2.threshold(warped,100,255,cv2.THRESH_BINARY)[1]
cv2.imwrite('./img/scan.jpg',warped)
print(value)