以前模拟别人的代码写端到端的验证码识别程序,今天被人问及端到端OCR如何做,竟然想不起来细节了… 再好的记忆里也不如一个烂笔头, 把网络结构保留在这里备忘.
图示
代码
class CAPTCHANet(nn.Block):
def __init__(self,outputNum,verbose = False, **kwargs):
super(CAPTCHANet,self).__init__(**kwargs)
self.verbose = verbose
with self.name_scope():
layers = []
layers.append( nn.Conv2D(16, kernel_size = 3, strides = 1, padding = 1, activation='relu') )
layers.append( nn.MaxPool2D(pool_size = 3, strides = 2) )
layers.append( nn.Conv2D(32, kernel_size = 3, strides = 1, padding = 1, activation='relu') )
#layers.append( nn.Conv2D(64, kernel_size = 3, strides = 1, padding = 1, activation='relu') )
layers.append( nn.Dense(256,activation='relu') )
self.netbody = nn.Sequential()
for layer in layers:
self.netbody.add(layer)
self.netout = nn.Sequential()
self.netout.add(nn.Dense(outputNum,activation='relu'))
self.netout.add(nn.Dense(outputNum,activation='relu'))
self.netout.add(nn.Dense(outputNum,activation='relu'))
self.netout.add(nn.Dense(outputNum,activation='relu'))
return
def forward(self,X):
bodyout = X
for i,layer in enumerate(self.netbody):
bodyout = layer(bodyout)
if self.verbose:
print 'bodylayer#',i+1,' shape:',bodyout.shape
out = []
for i,layer in enumerate(self.netout):
tmp = layer(bodyout)
if self.verbose:
print 'outlayer#',i+1,' shape:',tmp.shape
out.append(tmp)
return out