class AudioProcessor(object):
"""Handles loading, partitioning, and preparing audio training data."""
def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
wanted_words, validation_percentage, testing_percentage,
model_settings):
self.data_dir = data_dir
self.maybe_download_and_extract_dataset(data_url, data_dir)
self.prepare_data_index(silence_percentage, unknown_percentage,
wanted_words, validation_percentage,
testing_percentage)
self.prepare_background_data()
self.prepare_processing_graph(model_settings)
data_url:指向datasets链接地址
data_dir:指向datasets存储位置
silence_percentage:假如训练时期望识别的Words在datasets中的总个数是100个,当silence_percentage=10的话,就表示给训练的集合中添加10个静音元素。
unknown_percentage:假如训练时期望识别的Words在datasets中的总个数是100个,当unknown_percentage=10的话,就表示给训练的集合中添加10个未知元素。
PS:所以参与训练的集合元素按照数据性质分为三类:静音+未知+期望words。
wanted_words:期望识别的Word
PS:参与训练的集合元素按照训练流程分为三类training、validation、testing。
validation_percentage:validation占了多少。
testing_percentage:testing占了多少。
model_settings:
def maybe_download_and_extract_dataset(self, data_url, dest_directory):
"""Download and extract data set tar file.
根据data_url下载datasets,data_dir表示下载到某个地方。
如果本地已有datasets,可以在在给脚本的参数里写'--data_url=',data_dir指向datasets所在位置。
def prepare_data_index(self, silence_percentage, unknown_percentage,
wanted_words, validation_percentage,
testing_percentage):
"""Prepares a list of the samples organized by set and label.
该函数主要目的是获得三个参数:self.data_index、self.words_list、self.word_to_index。
1.self.data_index
self.data_index = {'validation': [], 'testing': [], 'training': []}
元素:{'label': word, 'file': wav_path}
silence | unknown | wanted words... | |
training | |||
validation | |||
testing |
# Look through all the subfolders to find audio samples
search_path = os.path.join(self.data_dir, '*', '*.wav')
for wav_path in gfile.Glob(search_path):
_, word = os.path.split(os.path.dirname(wav_path))
word = word.lower()
# Treat the '_background_noise_' folder as a special case, since we expect
# it to contain long audio samples we mix in to improve training.
if word == BACKGROUND_NOISE_DIR_NAME:
continue
all_words[word] = True
set_index = which_set(wav_path, validation_percentage, testing_percentage)
# If it's a known class, store its detail, otherwise add it to the list
# we'll use to train the unknown label.
if word in wanted_words_index:
self.data_index[set_index].append({'label': word, 'file': wav_path})
else:
unknown_index[set_index].append({'label': word, 'file': wav_path})
# We need an arbitrary file to load as the input for the silence samples.
# It's multiplied by zero later, so the content doesn't matter.
silence_wav_path = self.data_index['training'][0]['file']
for set_index in ['validation', 'testing', 'training']:
set_size = len(self.data_index[set_index])
silence_size = int(math.ceil(set_size * silence_percentage / 100))
for _ in range(silence_size):
self.data_index[set_index].append({
'label': SILENCE_LABEL,
'file': silence_wav_path
})
# Pick some unknowns to add to each partition of the data set.
random.shuffle(unknown_index[set_index])
unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
self.data_index[set_index].extend(unknown_index[set_index][:unknown_size])
# Make sure the ordering is random.
for set_index in ['validation', 'testing', 'training']:
random.shuffle(self.data_index[set_index])
2.self.words_list
self.words_list = prepare_words_list(wanted_words)
3.self.word_to_index
self.word_to_index = {}
for word in all_words:
if word in wanted_words_index:
self.word_to_index[word] = wanted_words_index[word]
else:
self.word_to_index[word] = UNKNOWN_WORD_INDEX
self.word_to_index[SILENCE_LABEL] = SILENCE_INDEX