keras 字符分类

本文主要实现使用keras对字符进行分类。

项目目录结构：

一、生成数据。

注：生成数据的代码只能在ubuntu操作系统中运行。

generate_image.py:

import os
import random
import numpy as np
import uuid

PATH_TO_LIGHT_BACKGROUNDS = 'light_backgrounds/'
PATH_TO_DARK_BACKGROUNDS = 'dark_backgrounds/'
PATH_TO_FONT_FILES = 'fonts/'
OUTPUT_DIR = 'ouput/'
NUM_IMAGES_PER_CLASS = 10

# Get all files from directory
def get_files_from_dir(dirname):
  list_files = (os.listdir(dirname))
  list_files = [dirname + x for x in list_files]
  return list_files


# Random perspective distortion created by randomly moving the for corners of the image.
def get_distort_arg():
  amount = 5
  hundred_minus_amount = 100 - amount
  return '\'0,0 ' + str(np.random.randint(0,amount)) + ',' + str(np.random.randint(0,amount)) + ' 100,0 '  + str(np.random.randint(hundred_minus_amount,100)) + ',' + str(np.random.randint(0,amount)) + ' 0,100 '  + str(np.random.randint(0,amount)) + ',' + str(np.random.randint(hundred_minus_amount,100)) + ' 100,100 '  + str(np.random.randint(hundred_minus_amount,100)) + ',' + str(np.random.randint(hundred_minus_amount,100)) + '\''

# Randomly extracts 32x32 regions of an image and saves it to outdir
def create_random_crops(image_filename, num_crops, out_dir):
  dim = os.popen('convert ' + image_filename + ' -ping -format "%w %h" info:').read()
  dim = dim.split()
  im_width = int(dim[0])
  im_height = int(dim[1])
  
  for i in range(0, num_crops):
    # Randomly select first co-ordinate of square for cropping image
    x = random.randint(0,im_width - 32)
    y = random.randint(0,im_height - 32)
    outfile = uuid.uuid4().hex + '.jpg'
    command = "convert "+ image_filename + " -crop 32x32"+"+"+str(x)+"+"+str(y)+" " + os.path.join(out_dir, outfile)
    os.system(str(command))

# Generate crops for all files in file_list and store them in dirname
def generate_crops(file_list, dirname):
  if not os.path.isdir(dirname):
    os.mkdir(dirname)
    for f in file_list:
      create_random_crops(f, 10, dirname)


# List of characters
char_list = []
for i in range(65, 65+26):
  char_list.append(chr(i))

# List of digits
for j in range(48,48+10):
  char_list.append(chr(j))

# List of light font colors
color_light = ['white','lime','gray','yellow','silver','aqua']

# List of light dark colors
color_dark = ['black','green','maroon','blue','purple','red']


# List of light backgrounds
light_backgrounds = get_files_from_dir(PATH_TO_LIGHT_BACKGROUNDS)

# List of dark backgrounds
dark_backgrounds = get_files_from_dir(PATH_TO_DARK_BACKGROUNDS)

# List of font files
list_files_fontt = get_files_from_dir(PATH_TO_FONT_FILES)



light_backgrounds_crops_dir = 'light_backgrounds_crops/'
dark_backgrounds_crops_dir = 'dark_backgrounds_crops/'

generate_crops(light_backgrounds, light_backgrounds_crops_dir)
generate_crops(dark_backgrounds, dark_backgrounds_crops_dir)

# List of all files in the crops directory
light_backgrounds = get_files_from_dir(light_backgrounds_crops_dir)
dark_backgrounds = get_files_from_dir(dark_backgrounds_crops_dir)

# List of all backgrounds
all_backgrounds = [dark_backgrounds, light_backgrounds]


# Sample Command-----  magick convert image.jpg -fill Black -font Courier-Oblique -weight 50 -pointsize 12 -gravity center -blur 0x8 -evaluate Gaussian-noise 1.2  -annotate 0+0 "Some text" output_image

for i in range(0,len(char_list)):
  char = char_list[i]
  char_output_dir = OUTPUT_DIR + str(char) + "/"
	
  if not os.path.exists(char_output_dir):
    os.makedirs(char_output_dir)

  print("Generating data " + char_output_dir)
  

  # Generate synthetic images
  for j in range(0,NUM_IMAGES_PER_CLASS):
    
    # Choose a light or dark background
    path = random.choice(all_backgrounds)
    
    # Choose a file
    list_filernd = random.choice(path)
    
    # Choose a font
    list_rfo = random.choice(list_files_fontt)
    
    # Get random distortion
    distort_arg = get_distort_arg()
    
    # Get random blur amount
    blur = random.randint(0,3)
    
    # Get random noise amount
    noise = random.randint(0,5)
    
    # Add random shifts from the center
    x = str(random.randint(-3,3))
    y = str(random.randint(-3,3))
    
    # Choose light color for dark backgrounds and vice-versa
    if path == all_backgrounds[0] :
      color = random.choice(color_light)
    else:
      color = random.choice(color_dark)

    command =  "convert " + str(list_filernd) + " -fill "+str(color)+" -font "+ \
            str(list_rfo) + " -weight 200 -pointsize 24 -distort Perspective "+str(distort_arg)+" "+"-gravity center" + " -blur 0x" + str(blur) \
+ " -evaluate Gaussian-noise " + str(noise) +  " " + " -annotate +" + x + "+" + y + " " +  str(char_list[i]) + " " + char_output_dir + "output_file"+str(i)+str(j)+".jpg"
		
    # Uncomment line below to see what command is executed.
    # print(command)
    os.system(str(command))

生成训练数据时，将OUTPUT_DIR = 'ouput/'改为OUTPUT_DIR = 'train/'，将NUM_IMAGES_PER_CLASS = 10改为NUM_IMAGES_PER_CLASS = 800。

生成测试数据时，将OUTPUT_DIR = 'ouput/'改为OUTPUT_DIR = 'test/'，将NUM_IMAGES_PER_CLASS = 10改为NUM_IMAGES_PER_CLASS = 200。

然后就会生成A-Z，0-9的数据（0如下所示）。

二、运行模型对数据进行训练。

net.py:

# import required modules
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
import matplotlib.pyplot as plt

class Net:
    @staticmethod
    def build(width, height, depth, weightsPath=None):
        '''
        modified lenet structure
        input: input_shape (width, height, channels)
        returns: trained/loaded model
        '''
        # initialize the model
        model = Sequential()
        
        # first layer CONV => RELU => POOL
        model.add(Convolution2D(32, (3, 3), input_shape = (width, height, depth)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size = (2, 2)))

        # second layer CONV => RELU => POOL
        model.add(Convolution2D(32, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size = (2, 2)))

        # third layer of CONV => RELU => POOL
        model.add(Convolution2D(64, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size = (2, 2)))

        # set of FC => RELU layers
        model.add(Flatten())

        # number of neurons in FC layer = 128
        model.add(Dense(128))
        model.add(Activation('relu'))
        model.add(Dropout(0.5))
        
        # as number of classes is 36
        model.add(Dense(36))
        model.add(Activation('softmax'))
        
        # if weightsPath is specified load the weights
        if weightsPath is not None:
            print('weights loaded')
            model.load_weights(weightsPath)
            # return model

        return model

train_model.py:

# import required modules
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import matplotlib.pyplot as plt 

# import created model
from net import Net

# Dimensions of our images
img_width, img_height = 32, 32

# 3 channel image
no_of_channels = 3

# train data Directory
train_data_dir = 'train/' 
# test data Directory
validation_data_dir = 'test/' 

epochs = 80
batch_size = 32

#initialize model
model = Net.build(width = img_width, height = img_height, depth = no_of_channels)
print('building done')
# Compile model
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
print('optimizing done')

model.compile(loss='categorical_crossentropy',
              optimizer=rms,
              metrics=['accuracy'])

print('compiling')

# this is the augmentation configuration used for training
# horizontal_flip = False, as we need to retain Characters
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rescale=1. / 255,
    shear_range=0.1,
    zoom_range=0.1,
    rotation_range=5,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=False)

# this is the augmentation configuration used for testing, only rescaling
test_datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

# fit the model
history = model.fit_generator(
    train_generator,
    steps_per_epoch=train_generator.samples / batch_size, 
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples / batch_size)  

# evaluate on validation dataset
model.evaluate_generator(validation_generator)
# save weights in a file
model.save_weights('trained_weights.h5') 

print(history.history)

# Loss Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)
 
# Accuracy Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['acc'],'r',linewidth=3.0)
plt.plot(history.history['val_acc'],'b',linewidth=3.0)

plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)
plt.show()

训练结果：

三、对模型训练结果进行测试。

predict.py:

import cv2 # for reading and writing or showing image
import numpy as np
import matplotlib.pyplot as plt
from keras.models import load_model
import keras
from keras.preprocessing import image
from keras.models import load_model
from net import Net
import sys 

def load_image(img_path, show=False):
    '''
    Function: Convert image to tensor
    Input: image_path (eg. /home/user/filename.jpg) 
        (Note prefer having absolute path)
           show (default = False), set if you want to visualize the image
    Return: tensor format of image
    '''
    # load image using image module
    # convert to (32, 32) - if not already 
    img = image.load_img(img_path, target_size=(32, 32))  # Path of test image
    # show the image if show=True
    if show:
        plt.imshow(img)                           
        plt.axis('off')
    
    # converting image to a tensor
    img_tensor = image.img_to_array(img)                  # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0)         
    img_tensor /= 255.                                      
    
    # return converted image
    return img_tensor

def predict(weights_path, image_path):
    '''
    Function: loads a trained model and predicts the class of given image
    Input: weights_path (.h5 file, prefer adding absolute path)
           image_path (image to predict, prefer adding absolute path)
    Returns: none
    '''
    model = Net.build(32, 32, 3, weights_path)
    
    image = load_image(image_path, show=True) # load image, rescale to 0 to 1
    class_ = model.predict(image) # predict the output, returns 36 length array
    print("Detected: ", class_[0]) # print what is predicted
    output_indice = -1 # set it initially to -1
    
    # get class index having maximum predicted score
    for i in range(36):
        if(i == 0):
            max = class_[0][i]
            output_indice = 0
        else:
            if(class_[0][i] > max):
                max = class_[0][i]
                output_indice = i
    
    # append 26 characters (A to Z) to list characters
    characters = []
    for i in range(65, 65+26):
        characters.append(chr(i))
    # if output indice > 9 (means characters)
    if(output_indice > 9):
        final_result = characters[(output_indice - 9) - 1]
        print("Predicted: ", final_result)
        print("value: ", max) # print predicted score
    # else it's a digit, print directly
    else:
        print("Predicted: ", output_indice)
        print("value: ", max) # print it's predicted score


if(len(sys.argv) < 2):
    print("Enter test image path as an argument")
    sys.exit(0)
test_image = sys.argv[1]

predict("trained_weights.h5", test_image) # Specify weights file and Test image

预测结果：

最后，如果需要本项目完整文件的可以关注一下我的Github：https://github.com/cchangcs/CharClassification。

猜你喜欢