前些天在师兄的帮助下,在此感谢工大的薛师兄,实现了BLSTM的语音识别声学模型的搭建,由于实验室存在保密协议,只能提供部分代码,还望各位同学体谅,代码如下:
# -*- coding : utf-8 -*- # author : zhangwei import tensorflow as tf import numpy as np filename_01 = '/home/zhangwei/data/train_mfcc_800000.txt' filename_02 = '/home/zhangwei/data/train_label_800000.txt' filename_03 = '/home/zhangwei/data/test_mfcc.txt' filename_04 = '/home/zhangwei/data/test_label.txt' X_train = np.loadtxt(filename_01) Y_train = np.loadtxt(filename_02) X_test = np.loadtxt(filename_03) Y_test = np.loadtxt(filename_04) batch_size = 50 n_steps = 1 n_inputs = 39 n_epoch = 100 n_classes = 219 n_hidden_units = 128 lr = 0.01 x = tf.placeholder(dtype=tf.float32 , shape=[batch_size , n_steps , n_inputs]) y = tf.placeholder(dtype=tf.float32 , shape=[batch_size , n_classes]) keep_prob = tf.placeholder(tf.float32) def get_cell(): n_cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden_units , activation=tf.nn.relu) return tf.nn.rnn_cell.DropoutWrapper(cell=n_cell , input_keep_prob=1.0 , output_keep_prob=keep_prob) cell_fw = get_cell() cell_bw = get_cell() init_cell_fw = cell_fw.zero_state(batch_size=batch_size , dtype=tf.float32) init_cell_bw = cell_bw.zero_state(batch_size=batch_size , dtype=tf.float32) output , _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw , cell_bw=cell_bw , inputs=x , initial_state_fw=init_cell_fw , initial_state_bw=init_cell_bw) W = tf.Variable(tf.truncated_normal([2 , n_hidden_units , n_classes] , stddev=0.01)) b = tf.Variable(tf.zeros([n_classes])) output_fw = tf.reshape(output , shape=[-1 , n_hidden_units]) output_bw = tf.reshape(output , shape=[-1 , n_hidden_units]) logist = tf.matmul(output_fw , W[0]) + tf.matmul(output_bw , W[1]) + b prediction = tf.nn.softmax(logits=logist) loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction , labels=y)) train_op = tf.train.AdamOptimizer(0.01).minimize(loss_op) correct_prediction = tf.equal(tf.argmax(prediction , 1) , tf.argmax(y , 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction , tf.float32)) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(n_epoch): print 'Iter : ' + str(i) + ' ; Loss : ' + str(loss) + ' ; Train Acc : ' + str(train_acc) + ' ; Test Acc : ' + str(test_acc)