得到加噪合成数据后,下面就可以进行特征生成,本博文这里采用的是26维的mfcc特征,当然你也可以根据自己的需求选择不同的特征。准备好了数据,导入直接生成即可,注意这里分帧的方式要和标签制作一致,这里给出的程序当然是一致的。另外需要注意的就是数据均衡的问题,程序中也有体现,具体实现如下:
# -*- coding: utf-8 -*-
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import utils
import json
import math
import numpy as np
from audiolib import audiowrite
from random import shuffle
import os
with open('./feature.json','r') as f:
params = json.load(f)
print("params = ",params)
def read_label_info(path):
f = open(path,'r')
lines = f.readlines()
label = []
for line in lines:
line = line.strip().split(' ')
tmp = []
for l in line:
tmp.append(int(l))
label.append(tmp)
f.close()
return label
def time2frame(labels,frame_len = 400,frame_step = 160):
frame_label = []
for label in labels:
tmp = []
for l in label:
numframe = 1 + int(math.ceil((1.0 * l - frame_len) / frame_step))
tmp.append(numframe)
frame_label.append(tmp)
return frame_label
def get_frame_labels(frame_indexs,length):
frame_labels = np.zeros((length,1),dtype = int)
for ind in frame_indexs:
frame_labels[ind[0] : ind[1],:] = 1
return frame_labels
def write_frame_labels(frame_labels,fp):
for label in frame_labels:
fp.write(str(int(label)) + ' ')
fp.write('\n')
return fp
def write_frame_feats(frame_feats,path):
fp = open(path,'w')
for feat in frame_feats:
for f in feat:
fp.write(str(f) + ' ')
fp.write('\n')
fp.close()
def get_mfcc_features(params,mode,flag = 0):
'''
语音加噪数据生成mfcc特征
'''
if flag:
print('--->>>train feat dirs = ',params['train']['feats_data_dir'])
utils.del_folder(params['train']['feats_data_dir'])
utils.del_folder(params['test']['feats_data_dir'])
utils.del_folder(params['valid']['feats_data_dir'])
utils.create_folder(params['train']['feats_data_dir'])
utils.create_folder(params['test']['feats_data_dir'])
utils.create_folder(params['valid']['feats_data_dir'])
data_dir = params[mode]['noisy_data_dir']
#print('data_dir = ',data_dir)
data_path,names = utils.read_audio_file1(data_dir,'.wav')
data_len = len(data_path)
print('data_len = ',data_len)
#对数据进行划分
train_len = int(data_len * 0.7)
valid_len = int(data_len * 0.85)
train_data_len = train_len
valid_data_len = valid_len - train_len
test_data_len = data_len - valid_len
acc_sum = 0
pos_sum = 0
cnt = 0
ind = [i for i in range(data_len)]
shuffle(ind)
for i in ind:
dp_p = data_path[i]
name = names[i]
(fs,sig) = wav.read(dp_p)
mfcc_feat = mfcc(sig,fs,numcep = params[mode]['mel_num'])
lb_len = len(mfcc_feat)
#print('lb_len = ',lb_len)
#print('mfcc_feat.shape = ',mfcc_feat.shape)
print('cnt = ',cnt)
#将生成的特征和标签信息写入文本保存
if cnt < train_len:
save_feat_path = params['train']['feats_data_dir'] + '/' + name + '.npy'
np.save(save_feat_path,mfcc_feat)
#write_frame_feats(mfcc_feat,save_feat_path)
elif cnt > train_len and cnt < valid_len:
save_feat_path = params['valid']['feats_data_dir'] + '/' + name + '.npy'
np.save(save_feat_path,mfcc_feat)
#write_frame_feats(mfcc_feat,save_feat_path)
else:
save_feat_path = params['test']['feats_data_dir'] + '/' + name + '.npy'
np.save(save_feat_path,mfcc_feat)
#write_frame_feats(mfcc_feat,save_feat_path)
cnt = cnt + 1
return train_data_len,valid_data_len,test_data_len
class CombineNoise():
'''
噪声数据拼接
'''
def __init__(self,params,sample_length):
self.noise_data_dir = params['noise']['noise_data_dir']
self.sample_length = sample_length
self.fs = params['noise']['sample_rate']
self.data_paths,self.names = utils.read_audio_file1(self.noise_data_dir,'.wav')
def combine_noise(self):
remain_length = self.sample_length * self.fs
sig_combine = np.zeros((remain_length),dtype = np.int16)
data_len = len(self.data_paths) - 1
ind = [i for i in range(data_len)]
shuffle(ind)
cnt = 0
idx = 0
while remain_length > 0:
index = ind[idx]
(fs,sig) = wav.read(self.data_paths[index])
sig_len = len(sig)
if sig_len > remain_length:
sig_combine[:(cnt + remain_length)] = np.concatenate([sig_combine[:cnt],sig[:remain_length]])
cnt = cnt + remain_length
else:
sig_combine[:(cnt + sig_len)] = np.concatenate([sig_combine[:cnt],sig])
cnt = cnt + sig_len
#index = index + 1
idx = idx + 1
remain_length = remain_length - sig_len
return sig_combine,self.fs
def get_combine_noise(self):
index = 0
data_len = len(self.data_paths)
cnt = 0
while index < data_len:
sig_combine,_ = self.combine_noise(index)
save_path = self.combine_data_dir + '/' + str(cnt) + '.wav'
audiowrite(save_path,sig_combine,self.fs)
cnt = cnt + 1
def get_noise_mfcc_features0(params,mode,length,fp,flag = 0):
'''
噪声数据生成mfcc特征
'''
data_dir = params['noise']['noise_data_dir']
data_path,names = utils.read_audio_file1(data_dir,'.wav')
data_len = len(data_path)
print('data_len = ',data_len)
ind = [i for i in range(data_len)]
shuffle(ind)
#ind = [n for n in range(16000)]
comNoise = CombineNoise(params,params['noise']['wav_time'])
cnt = 0
for j in range(length):
print('cnt = ',cnt)
sig,fs = comNoise.combine_noise()
mfcc_feat = mfcc(sig,fs,numcep = params[mode]['mel_num'])
lb_len = len(mfcc_feat)
print('mfcc_feat.shape = ',mfcc_feat.shape)
#每帧的标签为0
frame_labels = np.zeros((lb_len,1),dtype = int)
#写入每个音频文件对应帧特征和对应帧标签
save_feat_path = params[mode]['feats_data_dir'] + '/' + mode + str(cnt) + '_noisy_snr100.npy'
np.save(save_feat_path,mfcc_feat)
#write_frame_feats(mfcc_feat,save_feat_path)
label_name = mode + str(cnt) + ':'
fp.write(label_name)
fp = write_frame_labels(frame_labels,fp)
cnt = cnt + 1
return fp
def get_noise_mfcc_features1(params,mode,length,fp,idx = 0,flag = 0):
'''
噪声数据生成mfcc特征
'''
data_dir = params['noise']['noise_data_dir']
data_path,names = utils.read_audio_file1(data_dir,'.wav')
data_len = len(data_path)
print('data_len = ',data_len)
ind = [i for i in range(data_len)]
shuffle(ind)
#ind = [n for n in range(16000)]
#comNoise = CombineNoise(params,params['noise']['wav_time'])
sub_data_path = data_path[idx:(idx + length)]
cnt = 0
for dpath in sub_data_path:
print('cnt = ',cnt)
(fs,sig) = wav.read(dpath)
#sig,fs = comNoise.combine_noise()
mfcc_feat = mfcc(sig,fs,numcep = params[mode]['mel_num'])
lb_len = len(mfcc_feat)
#print('mfcc_feat.shape = ',mfcc_feat.shape)
#每帧的标签为0
frame_labels = np.zeros((lb_len,1),dtype = int)
#写入每个音频文件对应帧特征和对应帧标签
save_feat_path = params[mode]['feats_data_dir'] + '/' + mode + str(cnt) + '_noisy_snr100.npy'
np.save(save_feat_path,mfcc_feat)
label_name = mode + str(cnt) + ':'
fp.write(label_name)
fp = write_frame_labels(frame_labels,fp)
cnt = cnt + 1
return fp
import shutil
def copy_files1(sources_path,dst_path1,dst_path2,dst_path3,fmt = '.wav'):
files,names = utils.read_audio_file1(sources_path,fmt)
sources_label_path = sources_path.replace('feats','labels')
labels,lnames = utils.read_audio_file1(sources_label_path,fmt)
len1 = 25097
len2 = len1 + 5904
len3 = len2 + 5906
cnt = 0
for (file,label) in zip(files,labels):
if cnt < len1:
shutil.copy(file,dst_path1)
dst_label_path1 = dst_path1.replace('feats','labels')
shutil.copy(label,dst_label_path1)
elif cnt > len1 and cnt < len2:
shutil.copy(file,dst_path2)
dst_label_path2 = dst_path2.replace('feats','labels')
shutil.copy(label,dst_label_path2)
elif cnt > len2 and cnt < len3:
shutil.copy(file,dst_path3)
dst_label_path3 = dst_path3.replace('feats','labels')
shutil.copy(label,dst_label_path3)
cnt = cnt + 1
def copy_files2(sources_path,dst_path,fmt = '.wav'):
files,names = utils.read_audio_file1(sources_path,fmt)
sources_label_path = sources_path.replace('feats','labels')
labels,lnames = utils.read_audio_file1(sources_label_path,fmt)
for (fi,label) in zip(files,labels):
shutil.copy(fi,dst_path)
dst_label_path = dst_path.replace('feats','labels')
shutil.copy(label,dst_label_path)
if __name__ == '__main__':
print('---------get pos features-------')
ratio = 0.78 #ratio可以通过vad标签制作过程统计获得
train_data_len,valid_data_len,test_data_len = get_mfcc_features(params,'noisy',0)
print('---------add neg features-------')
'''
train_data_len = 12600
valid_data_len = 2699
test_data_len = 2702
'''
train_neg_add = int(train_data_len * ratio - train_data_len * (1 - ratio))
valid_neg_add = int(valid_data_len * ratio - valid_data_len * (1 - ratio))
test_neg_add = int(test_data_len * ratio - test_data_len * (1 - ratio))
# fp = open(params['labels']['noise_label_dir'],'w')
# fp = get_noise_mfcc_features0(params,'train',train_neg_add,fp)
# fp = get_noise_mfcc_features0(params,'valid',valid_neg_add,fp)
# fp = get_noise_mfcc_features0(params,'test',test_neg_add,fp)
fp = open(params['labels']['noise_label_dir'],'w')
ind = 0
fp = get_noise_mfcc_features1(params,'train',train_neg_add,fp,ind)
ind = ind + train_neg_add
fp = get_noise_mfcc_features1(params,'valid',valid_neg_add,fp,ind)
ind = ind + valid_neg_add
fp = get_noise_mfcc_features1(params,'test',test_neg_add,fp,ind)
至此,特征生成就完成了,水平有限,不当之处请指教,谢谢!