一、机器学习PaddlePaddle项目训练代码模板
二、导入包以及设置随机种子
paddle.seed(seed)[源代码]
设置全局默认generator的随机种子。
参数:
seed (int) - 要设置的的随机种子,推荐使用较大的整数。
返回:
Generator:全局默认generator对象。
代码示例:
import paddle paddle.seed(102)
import numpy as np import paddle import paddle.nn as nn import numpy as np import pandas as pd from paddle.io import DataLoader, Dataset from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import warnings # 忽略警告 warnings.filterwarnings("ignore") import random seed = 42 # 设置随机数 paddle.seed(seed) np.random.seed(seed) random.seed(seed)
三、以类的方式定义超参数
class argparse(): pass # 查看版本 print(paddle.utils.run_check()) # 查看cpu、gpu print(paddle.device.get_device()) # 超参设置 args = argparse() args.epochs, args.learning_rate, args.patience = [30, 0.001, 4] args.hidden_size, args.input_size= [40, 30] # 设备设置,paddlepaddle自动设置,可不用手动设置 args.device, = [paddle.device.set_device(paddle.device.get_device() ),]
Running verify PaddlePaddle program ... PaddlePaddle works well on 1 CPU. PaddlePaddle works well on 2 CPUs. PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now. None cpu
四、定义模型
# 自定义模型 class Your_model(nn.Layer): def __init__(self): super(Your_model, self).__init__() pass def forward(self,x): pass return x
五、定义早停类(此步骤可以省略)
class EarlyStopping(): def __init__(self,patience=7,verbose=False,delta=0): self.patience = patience self.verbose = verbose self.counter = 0 self.best_score = None self.early_stop = False self.val_loss_min = np.Inf self.delta = delta def __call__(self,val_loss,model,path): print("val_loss={}".format(val_loss)) score = -val_loss if self.best_score is None: self.best_score = score self.save_checkpoint(val_loss,model,path) elif score < self.best_score+self.delta: self.counter+=1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter>=self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(val_loss,model,path) self.counter = 0 def save_checkpoint(self,val_loss,model,path): if self.verbose: print( f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') paddle.save(model.state_dict(), path+'/'+'model_checkpoint.pdparams') self.val_loss_min = val_loss
六、定义自己的数据集Dataset,DataLoader
class Dataset_name(Dataset): def __init__(self, flag='train'): assert flag in ['train', 'test', 'valid'] self.flag = flag self.__load_data__() def __getitem__(self, index): pass def __len__(self): pass def __load_data__(self, csv_paths: list): pass print( "train_X.shape:{}\ntrain_Y.shape:{}\nvalid_X.shape:{}\nvalid_Y.shape:{}\n" .format(self.train_X.shape, self.train_Y.shape, self.valid_X.shape, self.valid_Y.shape)) train_dataset = Dataset_name(flag='train') train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True) valid_dataset = Dataset_name(flag='valid') valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=64, shuffle=True)
七、实例化模型,设置loss,优化器等
device = paddle.set_device('cpu') # or 'gpu' model = paddle.Model(Your_model(), input, label) criterion = paddle.nn.MSELoss() optimizer = paddle.optimizer.Adam(Your_model.parameters(),lr=args.learning_rate) train_loss = [] valid_loss = [] train_epochs_loss = [] valid_epochs_loss = [] early_stopping = EarlyStopping(patience=args.patience,verbose=True)
八、开始训练以及调整lr
for epoch in range(args.epochs): Your_model.train() train_epoch_loss = [] for idx,(data_x,data_y) in enumerate(train_dataloader,0): data_x = data_x.to(torch.float32).to(args.device) data_y = data_y.to(torch.float32).to(args.device) outputs = Your_model(data_x) optimizer.zero_grad() loss = criterion(data_y,outputs) loss.backward() optimizer.step() train_epoch_loss.append(loss.item()) train_loss.append(loss.item()) if idx%(len(train_dataloader)//2)==0: print("epoch={}/{},{}/{}of train, loss={}".format( epoch, args.epochs, idx, len(train_dataloader),loss.item())) train_epochs_loss.append(np.average(train_epoch_loss)) #=====================valid============================ Your_model.eval() valid_epoch_loss = [] for idx,(data_x,data_y) in enumerate(valid_dataloader,0): data_x = paddle.to_tensor(data_x, dtype='float32', place=paddle.CUDAPlace(0), stop_gradient=False) data_y = paddle.to_tensor(data_y, dtype='float32', place=paddle.CUDAPlace(0), stop_gradient=False) outputs = Your_model(data_x) loss = criterion(outputs,data_y) valid_epoch_loss.append(loss.item()) valid_loss.append(loss.item()) valid_epochs_loss.append(np.average(valid_epoch_loss)) #==================early stopping====================== early_stopping(valid_epochs_loss[-1],model=Your_model,path=r'c:\\your_model_to_save') if early_stopping.early_stop: print("Early stopping") break #====================adjust lr======================== lr_adjust = { 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8 } if epoch in lr_adjust.keys(): lr = lr_adjust[epoch] for param_group in optimizer.param_groups: param_group['lr'] = lr print('Updating learning rate to {}'.format(lr))
九、绘图
可手工绘图,也可以借助VisualDL绘图,目前状况下,可用下面代码绘图
plt.figure(figsize=(12,4)) plt.subplot(121) plt.plot(train_loss[:]) plt.title("train_loss") plt.subplot(122) plt.plot(train_epochs_loss[1:],'-o',label="train_loss") plt.plot(valid_epochs_loss[1:],'-o',label="valid_loss") plt.title("epochs_loss") plt.legend() plt.show()
十、预测
# 此处可定义一个预测集的Dataloader。也可以直接将你的预测数据reshape,添加batch_size=1 Your_model.eval() predict = Your_model(data)
十一、总结
从参数定义,到网络模型定义,再到训练步骤,验证步骤,测试步骤,总结了一套较为直观的模板。目录如下:
- 导入包以及设置随机种子
- 以类的方式定义超参数
- 定义自己的模型
- 定义早停类(此步骤可以省略)
- 定义自己的数据集Dataset,DataLoader
- 实例化模型,设置loss,优化器等
- 开始训练以及调整lr
- 绘图
- 预测