DNS2020 盲测集结果不一致_问答-阿里云开发者社区

您好，我们针对DNS2020测试集复核了ModelScope官方模型的效果，全部指标得分都符合或微高于官网上公开的分数。以下是我们的测试代码，供您比较是否有差异:

from pypesq import pesq as pesq_nb
from pesq import pesq as pesq_wb
from pystoi import stoi
import soundfile as sf
import librosa
import numpy as np
import sys

audio_root='/data/DNS-Challenge/datasets/test_set/synthetic/no_reverb'
data_to_score='/data/speech_frcrn_ans_cirm_16k/dns_out'


def cal_sisnr(ref_sig, out_sig, eps=1e-8):
    """Calcuate Scale-Invariant Source-to-Noise Ratio (SI-SNR)
    Args:
        ref_sig: numpy.ndarray, [T]
        out_sig: numpy.ndarray, [T]
    Returns:
        SISNR
    """
    assert len(ref_sig) == len(out_sig)
    ref_sig = ref_sig - np.mean(ref_sig)
    out_sig = out_sig - np.mean(out_sig)
    ref_energy = np.sum(ref_sig ** 2) + eps
    proj = np.sum(ref_sig * out_sig) * ref_sig / ref_energy
    noise = out_sig - proj
    ratio = np.sum(proj ** 2) / (np.sum(noise ** 2) + eps)
    sisnr = 10 * np.log(ratio + eps) / np.log(10.0)
    return sisnr

wav_list = audio_root + '/wav.lst'
f = open(wav_list, 'r')

scores_pesq_wb = []
scores_pesq_nb = []
scores_stoi = []
scores_sisnr = []

USE_16K = True
while 1:
    audio_name = f.readline().strip()
    if not audio_name: break

    target_path = audio_root+"/target/"+audio_name
    noisy_path = data_to_score + '/' + audio_name
    clean, fs = sf.read(target_path)
    clean = librosa.resample(clean, fs, 16000)
    noisy, fs = sf.read(noisy_path)
    noisy = librosa.resample(noisy, fs, 16000)
    min_frames = min(clean.shape[0],noisy.shape[0])
    clean = clean[:min_frames]
    noisy = noisy[:min_frames]
    assert len(clean) == len(noisy), print('Wave lengths are mismatchted! target: {}, noisy: {}'.format(len(clean), len(noisy)))


    if fs > 16000:
        clean_16k = librosa.resample(clean, fs, 16000)
        noisy_16k = librosa.resample(noisy, fs, 16000)
        min_frames = min(clean_16k.shape[0],noisy_16k.shape[0])
        clean_16k = clean_16k[:min_frames]
        noisy_16k = noisy_16k[:min_frames]
        fs_16k = 16000
    elif fs == 16000:
        clean_16k = clean
        noisy_16k = noisy
        fs_16k = fs
    else:
        print('Sampling rate is less than 16000 !')
        break

    ## PESQ is scored only at 16000 Hz or 8000 Hz
    pesq_score_wb = pesq_wb(fs_16k, clean_16k, noisy_16k, 'wb')
    pesq_score_nb = pesq_nb(clean_16k, noisy_16k, fs_16k)
    ##calcualte SI-SNR and STOI at 16000 Hz sampliing rate
    if USE_16K:
        sisnr_score=cal_sisnr(clean_16k, noisy_16k)
        stoi_score = stoi(clean, noisy, fs, extended=False) * 100
    else:
        sisnr_score=cal_sisnr(clean, noisy)
        stoi_score = stoi(clean, noisy, fs, extended=False) * 100

    scores_sisnr.append(sisnr_score)
    scores_pesq_wb.append(pesq_score_wb)
    scores_pesq_nb.append(pesq_score_nb)
    scores_stoi.append(stoi_score)
    print('File: {}, WB_PESQ: {}, PESQ: {}, STOI: {}, SI-SNR: {}'.format(audio_name, np.around(pesq_score_wb, decimals=2), np.around(pesq_score_nb, decimals=2), np.around(stoi_score, decimals=2), np.around(sisnr_score, decimals=2)))

scores_pesq_wb = np.asarray(scores_pesq_wb)
scores_pesq_nb = np.asarray(scores_pesq_nb)
scores_stoi = np.asarray(scores_stoi)
scores_sisnr = np.asarray(scores_sisnr)

pesq_wb_mean = np.around(np.mean(scores_pesq_wb), decimals=2)
pesq_nb_mean = np.around(np.mean(scores_pesq_nb), decimals=2)
stoi_mean = np.around(np.mean(scores_stoi), decimals=2)
si_snr_mean = np.around(np.mean(scores_sisnr), decimals=2)

print('Average score, WB_PESQ: {}, PESQ: {}, STOI: {}, SI-SNR: {}'.format(pesq_wb_mean, pesq_nb_mean, stoi_mean, si_snr_mean))
f.close()

def __init__(self, channel, reduction=16): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc_r = nn.Sequential( nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), nn.Linear(channel // reduction, channel), nn.Sigmoid()) self.fc_i = nn.Sequential( nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), nn.Linear(channel // reduction, channel), nn.Sigmoid()) def forward(self, x): b, c, _, _, _ = x.size() x_r = self.avg_pool(x[:, :, :, :, 0]).view(b, c) x_i = self.avg_pool(x[:, :, :, :, 1]).view(b, c) y_r = self.fc_r(x_r).view(b, c, 1, 1, 1) - self.fc_i(x_i).view( b, c, 1, 1, 1) y_i = self.fc_r(x_i).view(b, c, 1, 1, 1) + self.fc_i(x_r).view( b, c, 1, 1, 1) y = torch.cat([y_r, y_i], 4) return x * y

DNS2020 盲测集结果不一致

语音

相关文章

热门讨论

热门文章

DNS2020 盲测集 结果不一致

语音

相关文章

热门讨论

热门文章

DNS2020 盲测集结果不一致