运行chineseocr(yolov3+crnn) 中单独检测的部分(darknet_detect), 由于cuda版本问题,遇到:
OSError: libcudart.so.9.2: cannot open shared object file: No such file or directory
所以用到darknet_ocr中单独检测的部分,源码链接:添加链接描述
该链接中包含darknet框架下文字检测text.py脚本,在dnn目录下,但是由于有额外的输出需求以及遇到的一些问题,所以做了一些修改,之后会附上完整的代码集合以供参考。
import cv2
import numpy as np
import time
from config import textPath, anchors
from helper.image import resize_img, get_origin_box, soft_max, reshape
from helper.detectors import TextDetector
from config import scale, maxScale, TEXT_LINE_SCORE
from dnn.image import rotate_cut_img, sort_box
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
textNet = cv2.dnn.readNetFromDarknet(textPath.replace('weights','cfg'),textPath)
def detect_box(image, scale=600, maxScale=900):
H, W = image.shape[:2]
image, rate = resize_img(image, scale, maxScale=maxScale)
h, w = image.shape[:2]
inputBlob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(w, h), swapRB=False, crop=False);
outputName = textNet.getUnconnectedOutLayersNames()
textNet.setInput(inputBlob)
out = textNet.forward(outputName)[0]
clsOut = reshape(out[:, :20, ...])
boxOut = reshape(out[:, 20:, ...])
boxes = get_origin_box((w, h), anchors, boxOut[0])
scores = soft_max(clsOut[0])
boxes[:, 0:4][boxes[:, 0:4] < 0] = 0
boxes[:, 0][boxes[:, 0] >= w] = w - 1
boxes[:, 1][boxes[:, 1] >= h] = h - 1
boxes[:, 2][boxes[:, 2] >= w] = w - 1
boxes[:, 3][boxes[:, 3] >= h] = h - 1
print('scores:', scores)
print('boxes:', boxes)
print('rate:', rate)
print('w:', w)
print('h:', h)
return scores, boxes, rate, w, h
timeTake = time.time()
def detect_lines(image, scale=600,
maxScale=900,
MAX_HORIZONTAL_GAP=30,
MIN_V_OVERLAPS=0.6,
MIN_SIZE_SIM=0.6,
TEXT_PROPOSALS_MIN_SCORE=0.7,
TEXT_PROPOSALS_NMS_THRESH=0.3,
TEXT_LINE_NMS_THRESH=0.9,
TEXT_LINE_SCORE=0.9
):
MAX_HORIZONTAL_GAP = max(16, MAX_HORIZONTAL_GAP)
detectors = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM)
scores, boxes, rate, w, h = detect_box(image, scale, maxScale)
size = (h, w)
text_lines, scores = detectors.detect(boxes, scores, size, \
TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH,
TEXT_LINE_SCORE)
if len(text_lines) > 0:
text_lines = text_lines / rate
print('text_lines:', text_lines)
print('scores:', scores)
return text_lines, scores
timeTake = time.time()-timeTake
print('It take:{}s'.format(timeTake))
def detect(img):
image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
boxes, scores = detect_lines(image, scale=scale, maxScale=maxScale)
data = []
n = len(boxes)
for i in range(n):
box = boxes[i]
box = [int(x) for x in box]
if scores[i] > TEXT_LINE_SCORE:
data.append({'box': box, 'prob': round(float(scores[i]), 2), 'text': None})
res = {'data': data, 'errCode': 0}
return res
def ocr_batch(img, boxes, leftAdjustAlph=0.01, rightAdjustAlph=0.01):
"""
batch for ocr
"""
im = Image.fromarray(img)
newBoxes = []
for index, box in enumerate(boxes):
partImg, box = rotate_cut_img(im, box, leftAdjustAlph, rightAdjustAlph)
box['img'] = partImg.convert('L')
newBoxes.append(box)
cvPartImg = np.array(partImg)
#cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
cvImg = cv2.cvtColor(cvPartImg, cv2.COLOR_RGB2BGR)
#cv2.imshow('part', cvImg)
cv2.waitKey(0)
#return res
def drawDetectBox(img, resJson):
for idx in range(len(resJson['data'])):
box = resJson['data'][idx]['box']
[x1,y1,x2,y2,x3,y3,x4,y4] = box
p1 = (int(x1), int(y1))
p2 = (int(x2), int(y2))
p3 = (int(x3), int(y3))
p4 = (int(x4), int(y4))
cv2.line(img, p1, p2, (0, 255, 0))
cv2.line(img, p2, p3, (0, 255, 0))
cv2.line(img, p3, p4, (0, 255, 0))
cv2.line(img, p4, p1, (0, 255, 0))
#cv2.putText(img, str(text_tags[idx]), (int(p1[0]), int(p1[1])), 1, 1, (0, 0, 255))
#cv2.imshow('detect', img)
# cv2.waitKey(0)
#return box
def show_img(imgs: np.ndarray, color=True):
if (len(imgs.shape) == 3 and color) or (len(imgs.shape) == 2 and not color):
imgs = np.expand_dims(imgs, axis=0)
for img in imgs:
plt.figure()
plt.imshow(img, cmap=None if color else 'gray')
imgDir = './test/'
img = cv2.imread(imgDir + 'img.jpeg')
res = detect(img)
print(res)
boxes = []
for idx in range(len(res['data'])):
box = res['data'][idx]['box']
boxes.append(box)
ocr_batch(img, boxes)
drawDetectBox(img, res)
cv2.imwrite('detect7.jpg', img)
show_img(img, color = True)
plt.show()
由于之前在代码中用到cv2.imshow()语句在linux系统下运行,显示:cannot connect to X server, 需要将该语句注释掉,之后用plt.show()替换。
运行之后得到结果:
另外整个代码的运行都在上篇文章中同样的镜像里,所以运行text.py文件的代码是:
docker run -v /.../OCR-DARKNET/darknet-ocr:/chineseocr/darknet-ocr -w /chineseocr/darknet-ocr chineseocr:v2 python text.py
改了几个版本的text.py满足不同需求,有需要的可以一起讨论,可能大家环境不同或者是有opencv库的问题,如果有什么差异或者更新欢迎随时讨论!