textcnn结构图
def get_model(self, dict_size):
x = keras.layers.Input(shape=(self.config.max_len,), dtype=tf.int32)
x_word = keras.layers.Input(shape=(self.config.max_len,self.config.words_dic_num,), dtype=tf.float32)
x_embedded = keras.layers.Embedding(dict_size, self.config.embedding_dim)(x)
x_embeddeds = keras.layers.concatenate([x_embedded, x_word],axis=2)
# reshape to multiple channels
n_channels = self.config.channels
assert self.config.embedding_dim % n_channels == 0
if n_channels > 1:
x_embedded = keras.layers.Reshape((self.config.max_len, (self.config.embedding_dim + self.config.words_dic_num)// n_channels, n_channels))(x_embeddeds)
conv_outputs = []
for conv_size in self.config.blocks:
conv = keras.layers.Conv2D(filters=self.config.filters, kernel_size=(conv_size, self.config.embedding_dim // n_channels),
padding='valid', activation='relu')(x_embedded)
pool = keras.layers.MaxPool2D(pool_size=(self.config.max_len - conv_size + 1, 1))(conv)
import pdb; pdb.set_trace()
# conv = keras.layers.Conv1D(filters=config.filter_count, kernel_size=(conv_size),
# padding='valid', activation='relu')(x_embedded)
# pool = keras.layers.MaxPool1D(pool_size=config.max_len - conv_size + 1)(conv)
conv_outputs.append(pool)
if len(conv_outputs) > 1:
result = keras.layers.Concatenate()(conv_outputs)
result = keras.layers.Flatten()(result)
else:
result = conv_outputs[0]
if self.config.dropout > 0.0:
result = keras.layers.Dropout(self.config.dropout)(result)
# test maxout
# k = 2
# result = keras.layers.Dense(k * self.labels.size(),
# activity_regularizer=keras.regularizers.l2(
# self.config.act_l2) if self.config.act_l2 > 0.0 else None
# )(result)
# result = tf.reshape(result, shape=[-1, k, self.labels.size()])
# result = tf.reduce_max(result, axis=1)
result = keras.layers.Dense(self.labels.size(),
activity_regularizer=keras.regularizers.l2(self.config.act_l2) if self.config.act_l2 > 0.0 else None
) (result)
result = keras.layers.Softmax()(result)
model = keras.models.Model(inputs=[x,x_word], outputs=result)
return model
涉及到的参数:
embedding_dim 32 --max_len 70 --batch_size 2000 --lr 0.001 --epochs 1 --dropout 0.25 --num_gpus 1 --filters 128 --channels 4 --act_l2 0.0001
网络有两个输入,一个是文本的输入,另一个是词典的输入。?为一个batch文本的条数
input1.shape=[?,70],input2.shape=[?,70,12],
input1使用embeding.shape=[6886,32]进行编码得到[?,70,32],
再将编码后的结果与input2进行拼接,得到[?,70,44]的网络总输入。
再对44进行reshape,得到[?,70,11,4],
使用三通道分别进行卷积,卷积核尺寸分别为[3,8],[4,8],[5,8],filters=128,由并行的128个分别卷积。
再对卷积的结果分别进行最大池化,池化的size分别为[68,1],[67,1],[66,1],步长分别为[68,1],[67,1],[66,1]。
分别得到[?,1,4,128],再将三个按最后一列拼接,得到[?,1,4,384],后用Flatten进行展平得到[?,1536],
再使用L2正则进行dropout,
后用[1536,826]的dense进行dense得到[?,826]的,最后接上softmax构成完整网络结构。