歌声合成环境docker 镜像
FROM ubuntu:16.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
g++ \
gcc \
make \
automake \
autoconf \
bzip2 \
unzip \
wget \
sox \
libtool \
git \
subversion \
python3 \
python3-pip \
python3-dev \
zlib1g-dev \
ca-certificates \
patch \
ffmpeg \
vim && \
rm -rf /var/lib/apt/lists/*
RUN ln -s /usr/bin/python3 /usr/bin/python &&\
ln -s /usr/bin/pip3 /usr/bin/pip
RUN pip install numpy==1.16.0 \
scipy==1.1.0 numba==0.37.0 \
argparse librosa audioread \
pysoundfile picklable_itertools~=0.1.1 \
sacred~=0.6.10 tqdm~=4.8.4 q~=2.6 \
keras==2.1.2 tensorflow-gpu==1.8.0 \
h5py==2.7.1 matplotlib==2.1.1 \
pyworld -i https://pypi.douban.com/simple
$ sudo docker build -t sing:v1.0 . # 建立镜像
$ sudo docker run -it -d -v /work:/work -p 9090:9090 --name sings sing:v1.0 # 建立容器
核心源码
# -*- coding:utf-8 -*-
# /usr/bin/python
'''
@Describe: 音频处理
@Evn : pip install librosa
@Date : 2019-08-04 15:23
'''
'''
pip install librosa
ffmpeg is very stronger.
librosa.beat:用于检测速度和节拍
librosa.core:用于从磁盘加载音频和计算各种频谱图
librosa.decompose:实现矩阵分解进行谐波和冲击源分离通用频谱图分解
librosa.display:音频特征的显示
librosa.effects:时域音频处理,音高变换和时间拉伸,时域包装器。
librosa.feature:特征提取和操作:色度图,伪常数Q(对数频率)变换,Mel频谱图,MFCC和调谐估计
librosa.filters:滤波器生成色度。伪CQT、CQT等
librosa.onset:其实检测和起始强度计算。
librosa.segment:用于结构分段的函数
librosa.swquence:顺序建模功能
librosa.util:辅助工具(规范化。填充、居中)
'''
# Beat tracking example
#from __future__ import print_function
import librosa
from librosa import display
import matplotlib.pyplot as plt
# 1. Get the file path to the included audio example
filename = librosa.util.example_audio_file()
# 2. Load the audio as a waveform `y`data,sampling_rate
# Store the sampling rate as `sr`
filename = "a.wav"
y, sr = librosa.load(filename)
plt.figure(figsize=(12, 4))
display.waveplot(y,sr=sr)
plt.show()
plt.savefig('./test.png')
print("y",y,"\nsr",sr)
# 3. Run the default beat tracker
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
print("tempo",tempo, "\nbeat_frames",beat_frames)
print('Estimated tempo: {:.2f} beats per minute'.format(tempo))
# 4. Convert the frame indices of beat events into timestamps
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
# 5.Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)
print(y_harmonic,'\n', y_percussive)
# -*- coding:utf-8 -*-
# /usr/bin/python
'''
@Describe: madmom
@Evn : pip install madmom
@Date : 2019-09-01 12:57
'''
import numpy as np
import matplotlib.pyplot as plt
import madmom
filename = "../datasets/test.wav"
signal, sample_rate = madmom.audio.signal.load_wave_file(filename)
print('signal',signal,'sample_rate' ,sample_rate)
sig = madmom.audio.signal.Signal(filename)
sample_rate = sig.sample_rate
print('sig',sig,'sample_rate',sample_rate)
fs = madmom.audio.signal.FramedSignal(sig, frame_size=2048, hop_size=441)
print('fs',fs)
print(fs.frame_rate, fs.hop_size, fs[0])
stft = madmom.audio.stft.STFT(fs)
print(stft[0:2])
spec = madmom.audio.spectrogram.Spectrogram(stft)
plt.figure()
plt.imshow(spec[:, :200].T, aspect='auto', origin='lower')
plt.savefig('./test.png')
# calculate the difference
diff = np.diff(spec, axis=0)
# keep only the positive differences
pos_diff = np.maximum(0, diff)
# sum everything to get the spectral flux
sf = np.sum(pos_diff, axis=1)
plt.figure()
plt.imshow(spec[:, :200].T, origin='lower', aspect='auto')
plt.savefig('./test1.png')
plt.figure()
plt.imshow(pos_diff[:, :200].T, origin='lower', aspect='auto')
plt.savefig('./test2.png')
plt.figure()
plt.plot(sf)
plt.savefig('./test3.png')
filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank,
num_bands=24)
plt.imshow(filt_spec.T, origin='lower', aspect='auto')
plt.figure()
log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
plt.imshow(log_spec.T, origin='lower', aspect='auto')
plt.savefig('./test4.png')
生成歌声样本
- 链接:https://pan.baidu.com/s/1Tl9mSQM2kmD8vgBfKVvFaA 密码:ma2r