Opencv学习实践(一)——文本框检测

opencv博客汇:

1.阈值处理和轮廓检测
1.0 阈值轮廓的实践


前言:

本文为Opencv学习笔记(一)—阈值处理和轮廓检测 的一个实践版本。
对理论不熟悉的可以跳转:
Opencv学习笔记(一)—阈值处理和轮廓检测

本代码环境要求:

在ubuntu下
pycharm下 opencv4.0.X
(如果想在windows下运行,可以通过改部分代码来实现)
目前文本检测仅在规则的屏幕渲染文本上实验过,在场景文本上自适应性不一定好
这里仅给出了代码,部分检测图像在github上提供,后续的更新代码也将在github上提供。
github地址:https://github.com/simplify23/OCR-learnning

效果图展示:

在这里插入图片描述

源代码展示:

# coding: utf-8
import cv2
import os
import shutil
import math
import numpy as np


folder = 'data/'
show_resize = 3
print(os.getcwd())              #返回当前工作目录



class Rect:
    '''
    矩形类
    '''
    startx = 0
    endx = 0
    starty = 0
    endy = 0

    def __init__(self):
        pass

    def __str__(self):
        return '%d %d %d %d' % (self.startx, self.starty, self.endx, self.endy)


def find_contours(filename):
    '''
    对图像进行连通域检测,找到图像中的文字区域
    :param filename:图像名
    :return:包含连通域矩形框的列表
    '''

    # 获取包含矩阵信息的list
    result = thre(filename=filename, is_adapteive=False, thre_value=70)
    #img_show(result, window_name='thre', resize=show_resize)

    cv2.imwrite(folder + 'temp/thre.jpg', result)

    # cv2.bitwise_not(result, dst)                                  #位运算,非
    # 膨胀
    dst = cv2.Canny(result, 100, 100, 3)                            #连通域检测:输入图 maxval minval 卷积大小(默认为3)
    #img_show(dst, window_name='dst', resize=show_resize)
    element = cv2.getStructuringElement(cv2.MORPH_CROSS, (25, 20))  #获取十字结构元素25*20
    dilated = cv2.dilate(dst, element)
    #img_show(dilated, window_name='dilated', resize=show_resize)


    # 轮廓检测: 输入图 轮廓检测办法(外边缘检测) 近似办法
    #contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    #opencv3
    image, contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


    rect_list = []
    for i in contours:

        first = i[0]
        rect = Rect()
        rect.startx = first[0][0]
        rect.endx = first[0][0]
        rect.starty = first[0][1]
        rect.endy = first[0][1]
        for x in i:
            rect.startx = min(x[0][0], rect.startx)
            rect.endx = max(x[0][0], rect.endx)
            rect.starty = min(x[0][1], rect.starty)
            rect.endy = max(x[0][1], rect.endy)
        rect_list.append(rect)
        '''
        #动态打印轮廓
        img = cv2.drawContours(result, i, -1, (0, 0, 255), 10)
        img_show(img,  window_name='contours',  resize=show_resize)        
        '''
    #打印全部轮廓
    img = cv2.drawContours(result, contours, -1, (0, 255, 0), 10)
    img_show(img, window_name='contours', resize=show_resize)

    avg_height = get_avg_height_or_width(rect_list)
    print('avg_height',avg_height)
    temp_list = [rect for rect in rect_list if
                 rect.endy - rect.starty <= avg_height * 2 and rect.endy - rect.starty > avg_height * 0.7]
    
    return temp_list

def bubble_sort(rect_list):
    '''
    冒泡排序,稳定的排序,对列表进行排序,从上到下,从左至右
    :param rect_list:
    :return:
    '''
    max_height = 0
    for rect in rect_list:
        temp_width = rect.endy - rect.starty
        if temp_width > max_height:
            max_height = temp_width

    for i in range(0, len(rect_list) - 1):
        for j in range(0, len(rect_list) - 1 - i):
            mid_iy = rect_list[j].starty + (rect_list[j].endy - rect_list[j].starty) / 2
            if mid_iy > rect_list[j + 1].endy:
                # rect_i在rect_j下面,交换
                rect_list[j], rect_list[j + 1] = rect_list[j + 1], rect_list[j]
            elif mid_iy <= rect_list[j + 1].endy and mid_iy >= rect_list[j + 1].starty:
                # rect_i和rect_j在同一行
                if rect_list[j].startx >= rect_list[j + 1].startx:
                    rect_list[j], rect_list[j + 1] = rect_list[j + 1], rect_list[j]
            else:
                # rect_i在rect_j上面
                pass


def draw_rect_line(filename, rect_list, windowsname="矩形", save=False, save_filename='temp'):
    '''
    在图片上绘制连通域并保存会之后的图片
    :param filename:带绘制图像的的名字
    :param rect_list:连通域矩形列表
    :param windowsname:显示窗口的名字
    :param save:是否保存绘制后的图像
    :param save_filename:保存的图像名字
    :return:None
    '''
    # src = cv2.imread(filename, 0)
    result2 = thre(filename=filename, is_adapteive=True)
    result2 = cv2.cvtColor(result2, cv2.COLOR_GRAY2BGR)             #颜色转换 RGB转换为灰度图
    # cv2.imwrite(folder + 'temp/thre.jpg', result2)
    i = 0
    for rect in rect_list:
        i = i + 1
        #添加的文字
        # cv2.putText(result2, str(i), (rect.startx - 20, rect.endy + 20),
        #             cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255),
        #             thickness=1,
        #             lineType=1)
        cv2.rectangle(result2, (rect.startx, rect.starty), (rect.endx, rect.endy), (0, 0, 255), 2)
        #画候选框

    img_show(result2, windowsname)
    if save:
        if not os.path.exists(folder + 'temp/'):
            os.makedirs(folder + 'temp/')
        cv2.imwrite(folder + 'temp/' + save_filename + '.jpg', result2)

def thre(filename, is_adapteive=False, thre_value=95):
    '''
    对图像进行二值化
    :param filename:图像文件名
    :param is_adapteive: 是否自适应
    :param thre_value: 非自适应模式下的阈值
    :return: 二值化后的图像
    '''
    src = cv2.imread(filename, 0)
    if is_adapteive:
        # result = cv2.adaptiveThreshold(src, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 195, 50)
        #输入图 超小阈值赋的值 值域的操作方法 二值化操作类型 分块大小 常数项
        ret, result = cv2.threshold(src, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)   
        #输入图 阈值 超过或小于阈值赋的值 选择类型 otsu可自动寻找合适阈值
        
    else:
        retval, result = cv2.threshold(src, thre_value, 255, cv2.THRESH_BINARY)

    return result

def img_show(src, window_name='defoult', resize=show_resize):
    '''
    显示图片
    :param src:带显示的图像
    :param window_name:图像窗口名字
    :param resize:resize比例
    :return:None
    '''
    res = cv2.resize(src, (int(src.shape[1] // resize), int(src.shape[0] // resize)), interpolation=cv2.INTER_CUBIC)
    cv2.imshow(window_name, res)
    #cv2.waitKey(1000)                                                  #使窗口始终保持

def get_avg_height_or_width(rect_list, h_or_w='h'):
    '''
    计算高度的分布,并且得出分布最大区域的平均值
    :param rect_list:连通域矩形列表
    :return:所有连通域中最大的高度
    '''
    # 计算最大值
    max_height = 0
    for rect in rect_list:
        if h_or_w == 'h':
            temp_max_height = rect.endy - rect.starty
        else:
            temp_max_height = rect.endx - rect.startx
        max_height = max((max_height,temp_max_height))

    # 向上取整
    max_height = math.ceil(max_height)

    # 计算分布区间
    if max_height % 10 >= 0:
        max_height = int(max_height // 10 + 1)
    else:
        max_height = int(max_height // 10)
    # 存放每个区间的宽度
    list2 = [[] for i in range(max_height)]
    # 将宽度放入每一个区间
    for rect in rect_list:
        if h_or_w == 'h':
            height = rect.endy - rect.starty
        else:
            height = rect.endx - rect.startx
        if h_or_w=='h':
            if height < 10:
                continue
        temp = height // 10
        list2[int(temp)].append(height)
    # 先取出数量最多的宽度区间
    index_top1 = get_max_height_or_width_index(list2)
    sum_height = sum(list2[index_top1])
    length = len(list2[index_top1])
    del (list2[index_top1])        #删除内部变量对该数据的引用

    # 取出数量最第二多的宽度区间
    index_top1 = get_max_height_or_width_index(list2)

    sum_height += sum(list2[index_top1])
    length += len(list2[index_top1])
    return math.ceil(sum_height / length)


def get_max_height_or_width_index(list2):
    '''
    得到数量最多的高度区间索引
    :param list2: 存储了所有矩形高度信息的列表
    :return:数量最多的高度区间索引
    '''
    max_height = 0
    for i in range(len(list2)-1):
        if len(list2[i]) > max_height:
            max_height = len(list2[i])
            index_top1 = i
    return index_top1

def combine_connected_domain(rect_list):
    '''
    将相邻的连通域融合在一起
    :param rect_list: 连通域矩阵列表
    :return:
    '''
    #if isinstance(rect_list, filter):
    rect_list = list(rect_list)
    # 计算所有框中的高度最大值
    max_height = 0
    for rect in rect_list:
        temp_width = rect.endy - rect.starty
        if temp_width > max_height:
            max_height = temp_width
    # y方向的允许误差设定为最大高度的0.3
    wucha_y = max_height * 0.3

    for i in range(0, len(rect_list) - 1):
        previous = i
        last = i + 1
        while rect_list[previous].startx == -10:
            # 当前矩形已经被合并到前一个,取出前一个进行比较
            previous = previous - 1

        # 两个举行的starty和endy之差在一定范围内
        mid_p = (rect_list[previous].starty + rect_list[previous].endy) / 2
        mid_l = (rect_list[last].starty + rect_list[last].endy) / 2

        if abs(mid_p - mid_l) < wucha_y and rect_list[last].startx - rect_list[previous].endx < max_height:
            rect_list[previous].startx = min(rect_list[previous].startx, rect_list[last].startx)
            rect_list[previous].starty = min(rect_list[previous].starty, rect_list[last].starty)
            rect_list[previous].endx = max(rect_list[previous].endx, rect_list[last].endx)
            rect_list[previous].endy = max(rect_list[previous].endy, rect_list[last].endy)
            rect_list[last].startx = -10
            rect_list[last].starty = -10
            rect_list[last].endx = -10
            rect_list[last].endy = -10
    rect_list = [rect for rect in rect_list if rect.startx != -10]
    return rect_list

if __name__ == '__main__':
    filename = folder + '4.jpg'
    # 获取连通域
    rect_list = find_contours(filename)
    #通过轮廓检测,给出候选文本框
    draw_rect_line(filename=filename, rect_list=rect_list, windowsname='find_contours', save=True,
                   save_filename='1_find_contours')
    # 连通域排序,从左到右,从上到下
    bubble_sort(rect_list)
    draw_rect_line(filename, rect_list, 'sort', True, 'ocr_yi_sort')
    # 连通域融合
    rect_list = combine_connected_domain(rect_list)
    draw_rect_line(filename, rect_list, 'combine', True, 'ocr_yi_combine')
    cv2.waitKey(0)




有什么问题,可以在最后面戳我
如果觉得这篇文章对你有帮助,麻烦给个喜欢~

猜你喜欢

转载自blog.csdn.net/qq_35307005/article/details/88812380