用的付费的
8核 32GB 显存16G 预装 ModelScope Library 预装镜像 ubuntu20.04-cuda11.3.0-py38-torch1.11.0-tf1.15.5-1.6.1
这个环境
InvalidProtobuf Traceback (most recent call last)
Cell In[10], line 33
22 kwargs = dict(
23 model=pretrained_model_id, # 指定要finetune的模型
24 model_revision = "v1.0.6",
(...)
27 train_type=train_info # 指定要训练类型及参数
28 )
30 trainer = build_trainer(Trainers.speech_kantts_trainer,
31 default_args=kwargs)
---> 33 trainer.train()
File /opt/conda/lib/python3.8/site-packages/modelscope/trainers/audio/tts_trainer.py:229, in KanttsTrainer.train(self, *args, **kwargs)
226 ignore_pretrain = kwargs['ignore_pretrain']
228 if TtsTrainType.TRAIN_TYPE_SAMBERT in self.train_type or TtsTrainType.TRAIN_TYPE_VOC in self.train_type:
--> 229 self.prepare_data()
230 if TtsTrainType.TRAIN_TYPE_BERT in self.train_type:
231 self.prepare_text()
File /opt/conda/lib/python3.8/site-packages/modelscope/trainers/audio/tts_trainer.py:205, in KanttsTrainer.prepare_data(self)
202 audio_config = self.model.get_voice_audio_config_path(
203 self.speaker)
204 se_model = self.model.get_voice_se_model_path(self.speaker)
--> 205 self.audio_data_preprocessor(self.raw_dataset_path, self.data_dir,
206 audio_config, self.speaker,
207 self.lang_type, self.skip_script,
208 se_model)
File /opt/conda/lib/python3.8/site-packages/modelscope/preprocessors/tts.py:36, in KanttsDataPreprocessor.__call__(self, data_dir, output_dir, audio_config_path, speaker_name, target_lang, skip_script, se_model)
28 def __call__(self,
29 data_dir,
30 output_dir,
(...)
34 skip_script=False,
35 se_model=None):
---> 36 self.do_data_process(data_dir, output_dir, audio_config_path,
37 speaker_name, target_lang, skip_script, se_model)
File /opt/conda/lib/python3.8/site-packages/modelscope/preprocessors/tts.py:56, in KanttsDataPreprocessor.do_data_process(self, datadir, outputdir, audio_config, speaker_name, targetLang, skip_script, se_model)
53 if not os.path.exists(audio_config):
54 raise TtsDataPreprocessorAudioConfigNotExistsException(
55 'Preprocessor: audio config not exists')
---> 56 process_data(datadir, outputdir, audio_config, speaker_name,
57 targetLang, skip_script, se_model)
File /opt/conda/lib/python3.8/site-packages/kantts/preprocess/data_process.py:198, in process_data(voice_input_dir, voice_output_dir, audio_config, speaker_name, targetLang, skip_script, se_model, split_ratio)
196 if se_enable:
197 sep = SpeakerEmbeddingProcessor()
--> 198 sep.process(
199 voice_output_dir,
200 se_model,
201 )
202 logging.info("Processing speaker embedding done.")
204 logging.info("Processing done.")
File /opt/conda/lib/python3.8/site-packages/kantts/preprocess/se_processor/se_processor.py:67, in SpeakerEmbeddingProcessor.process(self, src_voice_dir, se_onnx)
65 opts = onnxruntime.SessionOptions()
66 opts.intra_op_num_threads = 1
---> 67 sess = onnxruntime.InferenceSession(se_onnx, sess_options=opts)
69 wav_dir = os.path.join(src_voice_dir, "wav")
70 se_dir = os.path.join(src_voice_dir, "se")
File /opt/conda/lib/python3.8/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py:383, in InferenceSession.__init__(self, path_or_bytes, sess_options, providers, provider_options, **kwargs)
380 disabled_optimizers = kwargs["disabled_optimizers"] if "disabled_optimizers" in kwargs else None
382 try:
--> 383 self._create_inference_session(providers, provider_options, disabled_optimizers)
384 except (ValueError, RuntimeError) as e:
385 if self._enable_fallback:
File /opt/conda/lib/python3.8/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py:424, in InferenceSession._create_inference_session(self, providers, provider_options, disabled_optimizers)
422 session_options = self._sess_options if self._sess_options else C.get_default_session_options()
423 if self._model_path:
--> 424 sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model)
425 else:
426 sess = C.InferenceSession(session_options, self._model_bytes, False, self._read_config_from_model)
InvalidProtobuf: [ONNXRuntimeError] : 7 : INVALID_PROTOBUF : Load model from ./pretrain_work_dir/orig_model/basemodel_16k/speaker_embedding/se.onnx failed:Protobuf parsing failed.
确认您的输入数据是否符合 ONNX 格式。ONNX 格式要求输入数据为 Tensor 类型,并且需要指定正确的形状和数据类型。您可以使用以下代码检查您的输入数据:
python
Copy
import torch
import numpy as np
input_data = np.random.rand(1, 3, 224, 224) # 假设输入形状为 (1, 3, 224, 224)
input_tensor = torch.from_numpy(input_data).float() # 将 numpy 数组转换为 PyTorch 张量,并指定数据类型为 float
print(input_tensor.shape)
print(input_tensor.dtype)
如果您的输入数据不符合要求,请根据您的具体情况进行修改。
检查您的模型是否符合 ONNX 格式。您可以使用 PyTorch 提供的 torch.onnx.export() 函数将 PyTorch 模型导出为 ONNX 格式,并使用 ONNX 官方提供的工具检查导出的 ONNX 模型是否符合规范。例如,您可以使用以下命令检查模型是否符合规范:
bash
Copy
onnx-check model.onnx
其中 model.onnx
是您导出的 ONNX 模型文件名。如果模型不符合规范,则需要修改模型代码以确保其符合规范。
确认您使用的 ONNX 运行时是否正确安装。您可以通过以下命令检查 ONNX 运行时是否正确安装:
python
Copy
import onnxruntime
print(onnxruntime.version)
sess = onnxruntime.InferenceSession('model.onnx')
```
其中 'model.onnx'
是您导出的 ONNX 模型文件名。如果 ONNX 运行时没有正确安装,则需要重新安装 ONNX 运行时。
提供能在其他服务器ubuntu环境下跑通的脚本,亲测有效
环境如下:
Ubuntu 20.04 + Python3.8
+---------------------------------------------------------------------------------------+ | NVIDIA-SMI 530.30.02 Driver Version: 530.30.02 CUDA Version: 12.1 | |-----------------------------------------+----------------------+----------------------+
#!/bin/bash
cat>/etc/profile.d/proxy.sh<<EOF export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32 EOF
apt update apt upgrade -y apt list --upgradable -a
apt-get install libsndfile1 sox nano wget curl git zip -y
apt autoclean -y apt autoremove -y
source /etc/profile.d/proxy.sh
git clone https://github.com/modelscope/modelscope.git
cd modelscope python -m pip install --upgrade pip pip install -r requirements/tests.txt pip install -r requirements/framework.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html pip install -r requirements/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html pip install -r requirements/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html pip install . pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install typeguard==2.13.3 pydantic==1.10.10 numpy==1.21.6 kantts==0.0.1 -y pip uninstall funasr -y
cd ~ wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/TTS/download_files/nltk_data.zip unzip nltk_data.zip
from modelscope.tools import run_auto_label from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer from modelscope.utils.audio.audio_utils import TtsTrainType import os from modelscope.models.audio.tts import SambertHifigan from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks import torch print(torch.version) print(torch.cuda.is_available())
from modelscope.outputs import OutputKeys from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks
text = '待合成文本' model_id = 'damo/speech_sambert-hifigan_tts_zh-cn_16k' sambert_hifigan_tts = pipeline(task=Tasks.text_to_speech, model=model_id) output = sambert_hifigan_tts(input=text, voice='zhitian_emo') wav = output[OutputKeys.OUTPUT_WAV] with open('output.wav', 'wb') as f: f.write(wav)
kwargs = dict( model=pretrained_model_id, # 指定要finetune的模型 model_revision = "v1.0.4", # 就是这里,只有改成1.0.4才顺利通过 work_dir=pretrain_work_dir, # 指定临时工作目录 train_dataset=dataset_id, # 指定数据集id train_type=train_info # 指定要训练类型及参数 )