!pip install paddlex
!unzip -oq /home/aistudio/data/data146107/dataset.zip -d /home/aistudio/data !mkdir data/dataset/train import pandas as pd #引入pandas包 train_data_labels=pd.read_table('data/dataset/train.txt',sep='\t',header=None) train_data_labels[1].value_counts() import os import shutil for i in range(train_data_labels.shape[0]): src = 'data/dataset/images/'+ train_data_labels.iloc[i,0] dst = 'data/dataset/train/{}'.format(train_data_labels.iloc[i,1]) if not os.path.exists(dst): os.makedirs(dst) shutil.copy(src, dst)
!paddlex --split_dataset --format ImageNet --dataset_dir data/dataset/train --val_value 0.2 --test_value 0
# 导入Python库 import matplotlib matplotlib.use('Agg') import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' import paddlex as pdx from paddlex import transforms as T
# 设置数据增强的方式 # API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/transforms/transforms.md train_transforms = T.Compose([ T.RandomCrop(crop_size=224), T.RandomHorizontalFlip(), T.RandomVerticalFlip(), T.RandomScaleAspect(min_scale=0.5, aspect_ratio=0.33), T.RandomDistort(), T.RandomBlur(), T.Normalize(), ]) eval_transforms = T.Compose([ T.ResizeByShort(short_size=256), T.CenterCrop(crop_size=224), T.Normalize() ])
# 定义训练和验证所用的数据集 # API说明:https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/apis/datasets.md train_dataset = pdx.datasets.ImageNet( data_dir='data/dataset/train', file_list='data/dataset/train/train_list.txt', label_list='data/dataset/train/labels.txt', transforms=train_transforms, shuffle=True) eval_dataset = pdx.datasets.ImageNet( data_dir='data/dataset/train', file_list='data/dataset/train/val_list.txt', label_list='data/dataset/train/labels.txt', transforms=eval_transforms)
# 初始化模型,并进行训练 num_classes = len(train_dataset.labels) model = pdx.cls.ResNet50_vd_ssld(num_classes=num_classes)
# API说明:https://github.com/PaddlePaddle/PaddleX/blob/release/2.0.0/docs/apis/models/classification.md # 各参数介绍与调整说明:https://github.com/PaddlePaddle/PaddleX/tree/release/2.0.0/docs/parameters.md model.train( num_epochs=100, train_dataset=train_dataset, train_batch_size=128, eval_dataset=eval_dataset, lr_decay_epochs=[4, 6, 8], learning_rate=0.025, save_dir='output/resnet50', use_vdl=True)
# 将Test.txt加载成列表 import os test_files = [] with open('data/dataset/test.txt', 'r') as file_to_read: for line in file_to_read: test_files.append(os.path.join('data/dataset/images/',line.strip()))
# 批量预测 import paddlex as pdx # 模型载入(记得根据模型修改路径) model = pdx.load_model('output/resnet50/best_model') result_list = model.predict(test_files)
result_list
# 将预测结果写入result.txt import pandas as pd test_data = pd.read_table('data/dataset/test.txt',sep='\t',header=None) with open('result.txt', mode='w') as file_out: for i in range(len(result_list)): file_out.write('%s\t%s\n'%(test_data.values.tolist()[i][0] , result_list[i][0].get('category')))