案例-使用python实现基于opencv的ocr识别及开发思路

科技的发展是日新月异的，很多纸质的档案也需要电子版的存档，ocr扫描可以很好的实现这个功能，当然这只是核心算法，真正的软件还是需要后期开发。
目前想到该算法的应用就是根据照片或者扫描件，可以快速的获取到相应的信息。比如统计身份证号码和名字，性别，地址等到Excel表格中。可以减少手动输入和人工出错。
具体预处理代码和透视变换代码如下，识别部分使用相应插件，这里不在赘述。

import cv2
import numpy as np

#图像显示函数
def show(img):
    cv2.imshow('name',img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

#缩放函数
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
	dim = None
	(h, w) = image.shape[:2]
	if width is None and height is None:
		return image
	if width is None:
		r = height / float(h)
		dim = (int(w * r), height)
	else:
		r = width / float(w)
		dim = (width, int(h * r))
	resized = cv2.resize(image, dim, interpolation=inter)
	return resized

def four_point_transform(image,pts):
    #找到原图中的4个坐标点
    #一共4个坐标点
    #print(pts)

    #按顺序找到对应坐标0123分别是左上，左下,右下，右上
    #[[[57,54]],[[]]]按行计算得到的值可以减少一个维度变为[[57,54],[]]
    rect = np.zeros((4,2),dtype = "float32")
    s = pts.sum(axis = 1)
    for i in range(len(pts)):
        rect[i] = s[i]
    print(rect)
    #计算变换后的四点坐标最大值
    (tl,bl,br,tr) = rect
    widthA = np.sqrt(((br[0]-bl[0])**2) + ((br[1]-bl[1])**2))
    widthB = np.sqrt(((tr[0]-tl[0])**2) + ((tr[1]-tl[1])**2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    print(maxWidth,maxHeight)
    #变换后对应位置
    dst = np.array([[0,0],[0, maxHeight - 1],[maxWidth - 1, maxHeight - 1],[maxWidth - 1, 0]],dtype="float32")
    print(dst)
    #计算变换矩阵
    M = cv2.getPerspectiveTransform(rect,dst)
    warped = cv2.warpPerspective(image,M,(maxWidth, maxHeight))
    return warped






image = cv2.imread('./img/page.jpg')
#缩放比例
ratio = image.shape[0] / 500.0
#深拷贝，为后期显示使用
orig = image.copy()

#预处理阶段
image = resize(image,height=500)
#灰度化
gray = cv2.cvtColor(image,cv2.COLOR_BGR2BGRA)
#show(images)
Guss_gary = cv2.GaussianBlur(gray,(5,5),10,10)
#边缘检测
edged = cv2.Canny(gray,75,200)
#show(edged)

#轮廓检测
cnts,_ = cv2.findContours(edged.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)

#排序
cnts = sorted(cnts,key = lambda x:cv2.contourArea(x), reverse=True)[:5]

for c in cnts:
    #得到的轮廓可能不是矩形，而是多边形，使用逼近矩形
    #c表示输入的点集
    #0.02*表示从原始轮廓到近似轮廓的最大距离，是一个准确度
    approx = cv2.approxPolyDP(c,0.02*cv2.arcLength(c,True),True)
    #判断四个点的逼近型就拿出来
    if len(approx) == 4:
        screenCnt = approx
        break
#cv2.drawContours(image,[screenCnt],-1,(0,255,0),2)
#透视变换
value = screenCnt.reshape(4,2)*ratio
warped = four_point_transform(orig,value)
#show(orig)
#warped = cv2.cvtColor(warped,cv2.COLOR_BGR2GRAY)
#ref = cv2.threshold(warped,100,255,cv2.THRESH_BINARY)[1]
cv2.imwrite('./img/scan.jpg',warped)
print(value)

案例-使用python实现基于opencv的ocr识别及开发思路

猜你喜欢