基于深度学习方法的语音识别研究（三）

前些天在师兄的帮助下，在此感谢工大的薛师兄，实现了BLSTM的语音识别声学模型的搭建，由于实验室存在保密协议，只能提供部分代码，还望各位同学体谅，代码如下：

# -*- coding : utf-8 -*-
# author : zhangwei

import tensorflow as tf
import numpy as np

filename_01 = '/home/zhangwei/data/train_mfcc_800000.txt'
filename_02 = '/home/zhangwei/data/train_label_800000.txt'
filename_03 = '/home/zhangwei/data/test_mfcc.txt'
filename_04 = '/home/zhangwei/data/test_label.txt'
X_train = np.loadtxt(filename_01)
Y_train = np.loadtxt(filename_02)
X_test = np.loadtxt(filename_03)
Y_test = np.loadtxt(filename_04)

batch_size = 50
n_steps = 1
n_inputs = 39
n_epoch = 100
n_classes = 219
n_hidden_units = 128
lr = 0.01

x = tf.placeholder(dtype=tf.float32 , shape=[batch_size , n_steps , n_inputs])
y = tf.placeholder(dtype=tf.float32 , shape=[batch_size , n_classes])
keep_prob = tf.placeholder(tf.float32)

def get_cell():
    n_cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden_units , activation=tf.nn.relu)
    return tf.nn.rnn_cell.DropoutWrapper(cell=n_cell , input_keep_prob=1.0 , output_keep_prob=keep_prob)

cell_fw = get_cell()
cell_bw = get_cell()
init_cell_fw = cell_fw.zero_state(batch_size=batch_size , dtype=tf.float32)
init_cell_bw = cell_bw.zero_state(batch_size=batch_size , dtype=tf.float32)
output , _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw , cell_bw=cell_bw , inputs=x , initial_state_fw=init_cell_fw , initial_state_bw=init_cell_bw)

W = tf.Variable(tf.truncated_normal([2 , n_hidden_units , n_classes] , stddev=0.01))
b = tf.Variable(tf.zeros([n_classes]))
output_fw = tf.reshape(output , shape=[-1 , n_hidden_units])
output_bw = tf.reshape(output , shape=[-1 , n_hidden_units])

logist = tf.matmul(output_fw , W[0]) + tf.matmul(output_bw , W[1]) + b
prediction = tf.nn.softmax(logits=logist)

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction , labels=y))
train_op = tf.train.AdamOptimizer(0.01).minimize(loss_op)
correct_prediction = tf.equal(tf.argmax(prediction , 1) , tf.argmax(y , 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction , tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for i in range(n_epoch):
        
        print 'Iter : ' + str(i) + ' ; Loss : ' + str(loss) + ' ; Train Acc : ' + str(train_acc) + ' ; Test Acc : ' + str(test_acc)

基于深度学习方法的语音识别研究（三）

猜你喜欢