在图像增强过程中,通常利用各类图像平滑算法消除噪声,图像的常见噪声主要有加性噪声、乘性噪声和量化噪声等。
一般来说,图像的能量主要集中在其低频部分,噪声所在的频段主要在高频段,同时图像边缘信息也主要集中在其高频部分。
这将导致原始图像在平滑处理之后,图像边缘和图像轮廓模糊的情况出现。为了减少这类不利效果的影响,就需要利用图像锐化技术,使图像的边缘变得清晰。
1. 常见的锐化算子
import librosa
import matplotlib.pyplot as plt
import numpy as np
import torch
from torchaudio.transforms import MelSpectrogram
# note use the torch1.11. as the environment;
n_fft = 1024
win_len = None
hop_len = 512
n_mels = 96
sample_rate = 6000
# from scipy import signal
# import signal
from scipy.signal import butter, lfilter,filtfilt
def butter_bandpass(lowcut, highcut, fs, order=5):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype='band')
return b, a
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
b, a = butter_bandpass(lowcut, highcut, fs, order=order)
y = lfilter(b, a, data)
return y
def butter_highpass(cutoff, fs, order=5):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
b, a = butter(order, normal_cutoff, btype = "high", analog = False)
return b, a
def butter_highpass_filter(data, cutoff, fs, order=5):
b, a = butter_highpass(cutoff, fs, order=order)
y = filtfilt(b, a, data)
return y
#path = 'test/torchaudio_unittest/assets/steam-train-whistle-daniel_simon.wav'
#path = '../example_audio/145_2b2_Al_mc_AKGC417L.wav' # normal
# note :crackle
#path = '../example_audio/198_6p1_Pr_mc_AKGC417L.wav' # crackle
#path = '../example_audio/198_6p1_Pr_mc_AKGC417L_4idx_class_1/198_6p1_Pr_mc_AKGC417L_4idx_class_1_cA.wav'
#path = '../example_audio/198_6p1_Pr_mc_AKGC417L_4idx_class_1/198_6p1_Pr_mc_AKGC417L_4idx_class_1_cD.wav'
path = '../example_audio/198_6p1_Pr_mc_AKGC417L_4idx_class_1/198_6p1_Pr_mc_AKGC417L_4idx_class_1_res.wav'
# note : wheeze
# path = '../example_audio/198_1b5_Pl_mc_AKGC417L.wav' # wheeze
#path = '../example_audio/198_1b5_Pl_mc_AKGC417L_4idx_class_2/198_1b5_Pl_mc_AKGC417L_4idx_class_2_cA.wav'
#path = '../example_audio/198_1b5_Pl_mc_AKGC417L_4idx_class_2/198_1b5_Pl_mc_AKGC417L_4idx_class_2_cD.wav'
#path = '../example_audio/198_1b5_Pl_mc_AKGC417L_4idx_class_2/198_1b5_Pl_mc_AKGC417L_4idx_class_2_res.wav'
# note: both
# path = '../example_audio/160_1b2_Al_mc_AKGC417L.wav' # both
#path = '../example_audio/160_1b2_Al_mc_AKGC417L_0idx_class_3/160_1b2_Al_mc_AKGC417L_0idx_class_3_cA.wav'
#path = '../example_audio/160_1b2_Al_mc_AKGC417L_0idx_class_3/160_1b2_Al_mc_AKGC417L_0idx_class_3_cD.wav'
#path = '../example_audio/160_1b2_Al_mc_AKGC417L_0idx_class_3/160_1b2_Al_mc_AKGC417L_0idx_class_3_res.wav'
path = '../example_audio/gonggong_Oct_2/gonggong_Oct_2_cA.wav'
#path = '../example_audio/gonggong_Oct_2/gonggong_Oct_2_cD.wav'
#path = '../example_audio/gonggong_Oct_2/gonggong_Oct_2_res.wav'
waveform, sample_rate = librosa.load(path, sr=sample_rate)
y = torch.Tensor(waveform)
torchaudio_melspec = MelSpectrogram(
sample_rate=sample_rate,
n_fft=n_fft,
win_length=win_len,
hop_length=hop_len,
center=True,
pad_mode="reflect",
power=2.0,
norm='slaney',
onesided=True,
n_mels=n_mels,
f_min=200,
f_max=3000)(y)
import cv2, cmapy
S = torchaudio_melspec
img = librosa.power_to_db(S, ref=np.max)
img = np.array(img, dtype='uint8')
# roberts算子
# kernelx = np.array([[-1, 0], [0, 1]], dtype=int)
# kernely = np.array([[0, -1], [1, 0]], dtype=int)
# x = cv2.filter2D(img, cv2.CV_16S, kernelx)
# y = cv2.filter2D(img, cv2.CV_16S, kernely)
#
# # 转uint8
# absX = cv2.convertScaleAbs(x)
# absY = cv2.convertScaleAbs(y)
#
# # 加权和
# Roberts = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
# Roberts = torch.tensor(Roberts)
# prewitt
kernelX = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=int)
kernelY = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=int)
x = cv2.filter2D(img, cv2.CV_16S, kernelX)
y = cv2.filter2D(img, cv2.CV_16S, kernelY)
# 转uint8
absX = cv2.convertScaleAbs(x)
absY = cv2.convertScaleAbs(y)
# 加权和
Prewitt = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
#Sobel算子
x = cv2.Sobel(img, cv2.CV_16S, 1, 0)#对x一阶求导
y = cv2.Sobel(img, cv2.CV_16S, 0, 1)#对y一阶求导
absX = cv2.convertScaleAbs(x)
absY = cv2.convertScaleAbs(y)
Sobel = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
# Scharr 算子
x = cv2.Scharr(img, cv2.CV_16S, 1, 0) # X 方向
y = cv2.Scharr(img, cv2.CV_16S, 0, 1) # Y 方向
absX = cv2.convertScaleAbs(x)
absY = cv2.convertScaleAbs(y)
Scharr = cv2.addWeighted(absX, 0.5, absY, 0.5, 0)
#拉普拉斯算法
dst = cv2.Laplacian(img, cv2.CV_16S, ksize= 3)
Laplacian = cv2.convertScaleAbs(dst)
equalhist_img = cv2.equalizeHist(img)
librosa_melspec = librosa.feature.melspectrogram(
waveform,
sr=sample_rate,
n_fft=n_fft,
hop_length=hop_len,
win_length=win_len,
center=True,
pad_mode="reflect",
power=2.0,
n_mels=n_mels,
norm='slaney',
htk=True,
)
mse = ((torchaudio_melspec - librosa_melspec) ** 2).mean()
print(f'MSE:\t{
mse}')
fig, axs = plt.subplots(1, 3, figsize=(20, 5))
fig.suptitle('Mel Spectrogram')
axs[0].set_title('torchaudio')
axs[0].set_ylabel('mel bin')
axs[0].set_xlabel('frame')
axs[0].imshow(Scharr, aspect='auto')
axs[1].set_title(' librosa')
axs[1].set_ylabel('mel bin')
axs[1].set_xlabel('frame')
axs[1].imshow(librosa.power_to_db(librosa_melspec), aspect='auto')
axs[2].set_title(' librosa')
axs[2].set_ylabel('mel bin')
axs[2].set_xlabel('frame')
axs[2].imshow(librosa.power_to_db(librosa_melspec), aspect='auto')
plt.show()
参考:
https://zhuanlan.zhihu.com/p/521073026;
https://blog.csdn.net/AI_girl/article/details/114899815#t2