from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import soundfile
waveform, sample_rate = soundfile.read("zh_test.wav")
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
rec_result = inference_pipeline(audio_in=waveform)
print(rec_result)
zh_test.wav文件与.py文件放在同一目录下
环境:直接在docker上pull 官方提供的镜像
registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-py37-torch1.11.0-tf1.15.5-1.2.0
error log如下:
2023-02-09 17:39:58,452 (asr_inference_pipeline:278) INFO: Decoding with pcm files ...
Traceback (most recent call last):
File "modelscope_test.py", line 10, in <module>
rec_result = inference_pipeline(audio_in=waveform)
File "/opt/conda/lib/python3.7/site-packages/modelscope/pipelines/audio/asr_inference_pipeline.py", line 136, in __call__
output = self.forward(output)
File "/opt/conda/lib/python3.7/site-packages/modelscope/pipelines/audio/asr_inference_pipeline.py", line 305, in forward
inputs['asr_result'] = self.run_inference(self.cmd)
File "/opt/conda/lib/python3.7/site-packages/modelscope/pipelines/audio/asr_inference_pipeline.py", line 382, in run_inference
output_dir_v2=cmd['output_dir'])
File "/opt/conda/lib/python3.7/site-packages/funasr/bin/asr_inference_paraformer.py", line 618, in _forward
results = speech2text(**batch)
File "/opt/conda/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/funasr/bin/asr_inference_paraformer.py", line 203, in __call__
feats, feats_len = self.frontend.forward(speech, speech_lengths)
File "/opt/conda/lib/python3.7/site-packages/funasr/models/frontend/wav_frontend.py", line 131, in forward
sample_frequency=self.fs)
File "/opt/conda/lib/python3.7/site-packages/torchaudio/compliance/kaldi.py", line 614, in fbank
preemphasis_coefficient,
File "/opt/conda/lib/python3.7/site-packages/torchaudio/compliance/kaldi.py", line 177, in _get_window
strided_input = _get_strided(waveform, window_size, window_shift, snip_edges)
File "/opt/conda/lib/python3.7/site-packages/torchaudio/compliance/kaldi.py", line 59, in _get_strided
assert waveform.dim() == 1
AssertionError
不清楚为什么会报错,以及不知道如何解决
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。