开发者社区 > ModelScope模型即服务 > 正文

ModelScope 请问怎么修改?

import tempfile

from modelscope.msdatasets import MsDataset
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer

通过data_files参数传入list来批量加载

my_csv_1 = 'results/re1.csv'
my_csv_2 = 'results/sam.csv'

ds = MsDataset.load('csv', data_files={'train': [my_csv_1], 'test': [my_csv_2]})

以此类推,其它类型(txt/json/jsonl)数据也可采用此方式加载

训练数据的输入出均为文本,需要将数据集预处理为输入为 src_txt,输出为 tgt_txt 的格式:

train_dataset = ds['train'].remap_columns({'input_text': 'src_txt', 'category': 'tgt_txt'})
eval_dataset = ds['test'].remap_columns({'input_text': 'src_txt', 'category': 'tgt_txt'})

num_warmup_steps = 500
def noam_lambda(current_step: int):
current_step += 1
return min(current_step(-0.5),
current_step * num_warmup_steps
(-1.5))

可以在代码修改 configuration 的配置

def cfg_modify_fn(cfg):
cfg.train.lr_scheduler = {
'type': 'LambdaLR',
'lr_lambda': noam_lambda,
'options': {
'by_epoch': False
}
}
cfg.train.optimizer = {
"type": "AdamW",
"lr": 1e-3,
"options": {}
}
cfg.train.max_epochs = 15
cfg.train.dataloader = {
"batch_size_per_gpu": 8,
"workers_per_gpu": 1
}
return cfg

kwargs = dict(
model='F:/study/graduationProject/openingReport/nlp_mt5_zero-shot-augment_chinese-base',
model_revision="master",
train_dataset=train_dataset,
eval_dataset=eval_dataset,
work_dir=tempfile.TemporaryDirectory().name,
cfg_modify_fn=cfg_modify_fn)
trainer = build_trainer(
name=Trainers.text_generation_trainer, default_args=kwargs)
trainer.train()
ModelScope这个代码对模型nlp_mt5_zero-shot-augment_chinese-base进行微调,使用的是文本分类的数据集,报错是:Traceback (most recent call last):
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\builder.py", line 1973, in _prepare_splitsingle
for
, table in generator:
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\packaged_modules\csv\csv.py", line 185, in _generate_tables
csv_file_reader = pd.read_csv(file, iterator=True, dtype=dtype, self.config.pd_read_csv_kwargs)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\streaming.py", line 75, in wrapper
return function(*args, download_config=download_config,
kwargs)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\download\streaming_download_manager.py", line 778, in xpandas_read_csv
return pd.read_csv(xopen(filepath_or_buffer, "rb", download_config=download_config), kwargs)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\pandas\io\parsers\readers.py", line 912, in read_csv
return _read(filepath_or_buffer, kwds)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\pandas\io\parsers\readers.py", line 577, in _read
parser = TextFileReader(filepath_or_buffer,
kwds)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\pandas\io\parsers\readers.py", line 1407, in init
self._engine = self._make_engine(f, self.engine)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\pandas\io\parsers\readers.py", line 1679, in _make_engine
return mapping engine ](f, self.options)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 93, in init
self._reader = parsers.TextReader(src,
kwds)
File "pandas_libs\parsers.pyx", line 550, in pandas._libs.parsers.TextReader.cinit
File "pandas_libs\parsers.pyx", line 639, in pandas._libs.parsers.TextReader._get_header
File "pandas_libs\parsers.pyx", line 850, in pandas._libs.parsers.TextReader._tokenize_rows
File "pandas_libs\parsers.pyx", line 861, in pandas._libs.parsers.TextReader._check_tokenize_status
File "pandas_libs\parsers.pyx", line 2021, in pandas._libs.parsers.raise_parser_error
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 21: invalid continuation byte

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "F:\study\graduationProject\vue2_mt5\vue_flask\finetune.py", line 17, in
ds = MsDataset.load('csv', data_files={'train': [my_csv_1], 'test': [my_csv_2]})
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\modelscope\msdatasets\ms_dataset.py", line 258, in load
dataset_inst = LocalDataLoaderManager(
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\modelscope\msdatasets\data_loader\data_loader_manager.py", line 74, in load_dataset
return hf_data_loader(
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\load.py", line 2549, in load_dataset
builder_instance.download_and_prepare(
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\builder.py", line 1005, in download_and_prepare
self._download_and_prepare(
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\builder.py", line 1100, in _download_and_prepare
self._prepare_split(split_generator, **prepare_split_kwargs)
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\builder.py", line 1860, in _prepare_split
for job_id, done, content in self._prepare_split_single(
File "D:\tool\Anaconda\anaconda3\envs\modelscope\lib\site-packages\datasets\builder.py", line 2016, in _prepare_split_single
raise DatasetGenerationError("An error occurred while generating the dataset") from e
datasets.exceptions.DatasetGenerationError: An error occurred while generating the dataset

Process finished with exit code 1
请问怎么修改

展开
收起
夹心789 2024-06-09 08:25:33 59 0
2 条回答
写回答
取消 提交回答
  • my_csv = '/path/to/my_file.csv'
    train_ds = MsDataset.load('csv', data_files=[my_csv])
    print(next(iter(ds)))
    https://github.com/google-research/timesfm?tab=readme-ov-file
    ,对这个代码库不熟悉哦,您看下能否指定本地路径加载 ,此回答整理自钉群“魔搭ModelScope开发者联盟群 ①”

    2024-06-11 09:44:40
    赞同 展开评论 打赏
  • 北京阿里云ACE会长

    使用 Python 库如 chardet 自动检测和指定文件的编码。

    在加载 CSV 文件时,你可以传递额外的参数给 pd.read_csv。由于 MsDataset.load 方法使用了 datasets 库的 load_dataset 函数,你可能需要查看 datasets 库的相关文档来了解如何传递这些参数。

    2024-06-09 08:56:06
    赞同 1 展开评论 打赏

ModelScope旨在打造下一代开源的模型即服务共享平台,为泛AI开发者提供灵活、易用、低成本的一站式模型服务产品,让模型应用更简单!欢迎加入技术交流群:微信公众号:魔搭ModelScope社区,钉钉群号:44837352

相关电子书

更多
低代码开发师(初级)实战教程 立即下载
冬季实战营第三期:MySQL数据库进阶实战 立即下载
阿里巴巴DevOps 最佳实践手册 立即下载