转ONNX模型报错
Traceback (most recent call last): File "/root/miniconda3/lib/python3.9/runpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "/root/miniconda3/lib/python3.9/runpy.py", line 87, in _run_code exec(code, run_globals) File "/root/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/__main__.py", line 39, in <module> cli.main() File "/root/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main run() File "/root/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 284, in run_file runpy.run_path(target, run_name="__main__") File "/root/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 321, in run_path return _run_module_code(code, init_globals, run_name, File "/root/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 135, in _run_module_code _run_code(code, mod_globals, init_globals, File "/root/.vscode-server/extensions/ms-python.python-2022.20.1/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 124, in _run_code exec(code, run_globals) File "/cephfs/HZ-AI/usr/hezhiqiang/dou-ai/src/tools/onnx_model.py", line 73, in <module> onnx_model.load_weights(model_path='/cephfs/HZ-AI/project/doudizhu/experiments/exp.20221122191411/models/learner_cid_0') File "/cephfs/HZ-AI/usr/hezhiqiang/dou-ai/src/tools/onnx_model.py", line 35, in load_weights self.export_onnx() File "/cephfs/HZ-AI/usr/hezhiqiang/dou-ai/src/tools/onnx_model.py", line 15, in export_onnx torch.onnx.export( File "/root/miniconda3/lib/python3.9/site-packages/torch/onnx/__init__.py", line 316, in export return utils.export(model, args, f, export_params, verbose, training, File "/root/miniconda3/lib/python3.9/site-packages/torch/onnx/utils.py", line 107, in export _export(model, args, f, export_params, verbose, training, input_names, output_names, File "/root/miniconda3/lib/python3.9/site-packages/torch/onnx/utils.py", line 707, in _export _set_opset_version(opset_version) File "/root/miniconda3/lib/python3.9/site-packages/torch/onnx/symbolic_helper.py", line 849, in _set_opset_version raise ValueError("Unsupported ONNX opset version: " + str(opset_version)) ValueError: Unsupported ONNX opset version: 16
修复步骤
- 安装PyTorch1.12.1
pip install torch==1.12.1 -ihttps://pypi.tuna.tsinghua.edu.cn/simple
- 卸载之前的horovod
pip uninstall horovod
- 更新cmake
pip install cmake --upgrade -ihttps://pypi.tuna.tsinghua.edu.cn/simple
- 安装horovod
HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_GLOO=1 HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_CMAKE=/root/miniconda3/bin/cmake /root/miniconda3/bin/pip install --verbose --no-cache-dir horovod
安装horovod的时候需要GPU。