创造数据
x的数据维度为(200,100)
w的数据维度为(100,1)
利用data_iter获得批次数据
import torch from torch.utils import data import torch.nn as nn n_examples=200 n_features=100 true_w=torch.randn(100,1) true_b=torch.tensor(0.54) x_=torch.randn(200,100) y_=x_@true_w+true_b y_+=torch.normal(0,0.01,y_.shape) def data_iter(x,y,batch_size): n_example=len(x) indices=torch.randperm(n_example) for i in range(0,n_example,batch_size): batch_indices=indices[i:min(i+batch_size,n_example)] yield x[batch_indices],y[batch_indices]
只对参数w进行权重衰减,b不需要
方式一
在优化器的参数中,利用字典的方式指名对待不同的参数实行不同的执行原则
wd=3 net=nn.Linear(100,1) loss_function=nn.MSELoss() optimizer=torch.optim.SGD([{'params':net.weight, 'weight_decay':wd}, {'params':net.bias}],lr=0.03) epochs=3 for epoch in range(epochs): net.train() losses=0.0 for x,y in data_iter(x_,y_,batch_size=20): y_hat=net(x) loss=loss_function(y_hat,y) optimizer.zero_grad() loss.backward() optimizer.step() losses+=loss.item() print(losses)
方式二
方式二用了两个优化器,第一个掌管参数w的优化,第二个负责偏置b的优化,但是这样较为麻烦,需要两次梯度清0,且进行两次梯度更新
wd=3 net=nn.Linear(100,1) loss_function=nn.MSELoss() optimizer_w=torch.optim.SGD([net.weight],lr=0.03,weight_decay=wd) optimizer_b=torch.optim.SGD([net.bias],lr=0.03) epochs=3 for epoch in range(epochs): net.train() losses=0.0 for x,y in data_iter(x_,y_,batch_size=20): y_hat=net(x) loss=loss_function(y_hat,y) optimizer_w.zero_grad() optimizer_b.zero_grad() loss.backward() optimizer_w.step() optimizer_b.step() losses+=loss.item() print(losses)