我在no reverb上测试的结果如下: 
WB pesq 3.13 NB pesq 3.53 STOI 97.52 SISNR 19.03
目前官方提供的权重是否和DNS训练出来的权重不一致?
您好,我们针对DNS2020测试集复核了ModelScope官方模型的效果,全部指标得分都符合或微高于官网上公开的分数。以下是我们的测试代码,供您比较是否有差异:
from pypesq import pesq as pesq_nb
from pesq import pesq as pesq_wb
from pystoi import stoi
import soundfile as sf
import librosa
import numpy as np
import sys
audio_root='/data/DNS-Challenge/datasets/test_set/synthetic/no_reverb'
data_to_score='/data/speech_frcrn_ans_cirm_16k/dns_out'
def cal_sisnr(ref_sig, out_sig, eps=1e-8):
    """Calcuate Scale-Invariant Source-to-Noise Ratio (SI-SNR)
    Args:
        ref_sig: numpy.ndarray, [T]
        out_sig: numpy.ndarray, [T]
    Returns:
        SISNR
    """
    assert len(ref_sig) == len(out_sig)
    ref_sig = ref_sig - np.mean(ref_sig)
    out_sig = out_sig - np.mean(out_sig)
    ref_energy = np.sum(ref_sig ** 2) + eps
    proj = np.sum(ref_sig * out_sig) * ref_sig / ref_energy
    noise = out_sig - proj
    ratio = np.sum(proj ** 2) / (np.sum(noise ** 2) + eps)
    sisnr = 10 * np.log(ratio + eps) / np.log(10.0)
    return sisnr
wav_list = audio_root + '/wav.lst'
f = open(wav_list, 'r')
scores_pesq_wb = []
scores_pesq_nb = []
scores_stoi = []
scores_sisnr = []
USE_16K = True
while 1:
    audio_name = f.readline().strip()
    if not audio_name: break
    target_path = audio_root+"/target/"+audio_name
    noisy_path = data_to_score + '/' + audio_name
    clean, fs = sf.read(target_path)
    clean = librosa.resample(clean, fs, 16000)
    noisy, fs = sf.read(noisy_path)
    noisy = librosa.resample(noisy, fs, 16000)
    min_frames = min(clean.shape[0],noisy.shape[0])
    clean = clean[:min_frames]
    noisy = noisy[:min_frames]
    assert len(clean) == len(noisy), print('Wave lengths are mismatchted! target: {}, noisy: {}'.format(len(clean), len(noisy)))
    if fs > 16000:
        clean_16k = librosa.resample(clean, fs, 16000)
        noisy_16k = librosa.resample(noisy, fs, 16000)
        min_frames = min(clean_16k.shape[0],noisy_16k.shape[0])
        clean_16k = clean_16k[:min_frames]
        noisy_16k = noisy_16k[:min_frames]
        fs_16k = 16000
    elif fs == 16000:
        clean_16k = clean
        noisy_16k = noisy
        fs_16k = fs
    else:
        print('Sampling rate is less than 16000 !')
        break
    ## PESQ is scored only at 16000 Hz or 8000 Hz
    pesq_score_wb = pesq_wb(fs_16k, clean_16k, noisy_16k, 'wb')
    pesq_score_nb = pesq_nb(clean_16k, noisy_16k, fs_16k)
    ##calcualte SI-SNR and STOI at 16000 Hz sampliing rate
    if USE_16K:
        sisnr_score=cal_sisnr(clean_16k, noisy_16k)
        stoi_score = stoi(clean, noisy, fs, extended=False) * 100
    else:
        sisnr_score=cal_sisnr(clean, noisy)
        stoi_score = stoi(clean, noisy, fs, extended=False) * 100
    scores_sisnr.append(sisnr_score)
    scores_pesq_wb.append(pesq_score_wb)
    scores_pesq_nb.append(pesq_score_nb)
    scores_stoi.append(stoi_score)
    print('File: {}, WB_PESQ: {}, PESQ: {}, STOI: {}, SI-SNR: {}'.format(audio_name, np.around(pesq_score_wb, decimals=2), np.around(pesq_score_nb, decimals=2), np.around(stoi_score, decimals=2), np.around(sisnr_score, decimals=2)))
scores_pesq_wb = np.asarray(scores_pesq_wb)
scores_pesq_nb = np.asarray(scores_pesq_nb)
scores_stoi = np.asarray(scores_stoi)
scores_sisnr = np.asarray(scores_sisnr)
pesq_wb_mean = np.around(np.mean(scores_pesq_wb), decimals=2)
pesq_nb_mean = np.around(np.mean(scores_pesq_nb), decimals=2)
stoi_mean = np.around(np.mean(scores_stoi), decimals=2)
si_snr_mean = np.around(np.mean(scores_sisnr), decimals=2)
print('Average score, WB_PESQ: {}, PESQ: {}, STOI: {}, SI-SNR: {}'.format(pesq_wb_mean, pesq_nb_mean, stoi_mean, si_snr_mean))
f.close()
您好,我测试noisy的结果如下 
 这里和各种论文中的DNS2020测试集数据应该是一致的。我看了您提供的代码,测试程序中使用的pesqwb,pesqnb,stoi,都是一致的,另外配置方式也一致。所以应该可以排除测试程序的问题。 我这边拆解了github上的代码,并且在缓存中加载了FRCRN的模型权重。enhane了官方提供的测试noisywav,效果和官方一致。 但是从模型中,1.没有看出使用了CBAM结构,2.另外这个模型似乎是两个Unet级联,3.从SElayer实现来看,似乎是一个非因果模型,似乎论文中没有提到这个模块。所以,我很疑惑是不是提供的模型和模型权重有问题。 这里贴出SElayer的代码
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
    super(SELayer, self).__init__()
    self.avg_pool = nn.AdaptiveAvgPool2d(1)
    self.fc_r = nn.Sequential(
        nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True),
        nn.Linear(channel // reduction, channel), nn.Sigmoid())
    self.fc_i = nn.Sequential(
        nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True),
        nn.Linear(channel // reduction, channel), nn.Sigmoid())
def forward(self, x):
    b, c, _, _, _ = x.size()
    x_r = self.avg_pool(x[:, :, :, :, 0]).view(b, c)
    x_i = self.avg_pool(x[:, :, :, :, 1]).view(b, c)
    y_r = self.fc_r(x_r).view(b, c, 1, 1, 1) - self.fc_i(x_i).view(
        b, c, 1, 1, 1)
    y_i = self.fc_r(x_i).view(b, c, 1, 1, 1) + self.fc_i(x_r).view(
        b, c, 1, 1, 1)
    y = torch.cat([y_r, y_i], 4)
    return x * y