暂无个人介绍
import os
import pandas as pd
import chardet
from PIL import Image
from datasets import Dataset
import tempfile
from modelscope.msdatasets import MsDataset
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.constant import DownloadMode
with open('./ocr_labels_modelscope.csv', 'rb') as f:
result = chardet.detect(f.read())
data = pd.read_csv('./ocr_labels_modelscope.csv', encoding=result['encoding'])
ds = Dataset.from_pandas(data)
ds = MsDataset(ds)
print(next(iter(ds)))
文件格式:
image_id,text,image
000000000,硖,/mnt/workspace/images/000000000.jpg
000000001,饰,/mnt/workspace/images/000000001.jpg
000000002,晟,/mnt/workspace/images/000000002.jpg