RNN、LSTM、GRU神经网络构建人名分类器(一)+https://developer.aliyun.com/article/1544720?spm=a2c6h.13148508.setting.18.2a1e4f0eMtMqGK
构建RNN模型
1构建RNN模型
class RNN(nn.Module): def __init__(self, input_size, hidden_size, output_size,num_layers = 1): super(RNN,self).__init__() self.num_layers = num_layers self.hidden_size = hidden_size self.input_size = input_size self.output_size = output_size self.rnn = nn.RNN(input_size, hidden_size, num_layers) self.linear = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=-1) def forward(self, input1, hidden): input1 = input1.unsqueeze(0) rr, hn = self.rnn(input1, hidden) # 生成输出rr和新的隐藏层hn return self.softmax(self.linear(rr)) ,hn def initHidden(self): return torch.zeros(self.num_layers, 1, self.hidden_size)
torch.unsqueeze用法:
>>> x = torch.tensor([1, 2, 3, 4]) >>> torch.unsqueeze(x, 0) tensor([[ 1, 2, 3, 4]]) >>> torch.unsqueeze(x, 1) tensor([[ 1], [ 2], [ 3], [ 4]])
2 构建LSTM模型
class LSTM(nn.Module): def __init__(self, input_size, hidden_size, output_size,num_layers = 1): super(LSTM, self).__init__() self.hidden_size = hidden_size self.input_size = input_size self.output_size = output_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size,num_layers) self.linear = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=-1) def forward(self, input1, hidden, c): input1 = input1.unsqueeze(0) rr , (hn,cn) = self.lstm(input1, (hidden,c)) # 生成输出output和新的隐藏层hn return self.softmax(self.linear(rr)) ,hn,cn def initHiddenAndC(self): c = hidden = torch.zeros(self.num_layers, 1, self.hidden_size) return hidden,c
3 构建GRU模型
class GRU(nn.Module): def __init__(self, input_size, hidden_size, output_size,num_layers = 1): super(GRU, self).__init__() self.hidden_size = hidden_size self.input_size = input_size self.output_size = output_size self.num_layers = num_layers self.gru = nn.GRU(input_size, hidden_size,num_layers) self.linear = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=-1) def forward(self, input1, hidden): input1 = input1.unsqueeze(0) rr, hn = self.gru(input1, hidden) # 生成输出output和新的隐藏层hn return self.softmax(self.linear(rr)), hn def initHidden(self): return torch.zeros(self.num_layers, 1, self.hidden_size)
4 实例化参数
input_size = n_letters n_hidden = 128 output_size = n_categories
5 输入参数
input = lineToTensor('B').squeeze(0) hidden = c = torch.zeros(1, 1, n_hidden)
调用
rnn = RNN(n_letters, n_hidden, n_categories) lstm = LSTM(n_letters, n_hidden, n_categories) gru = GRU(n_letters, n_hidden, n_categories) rnn_output, next_hidden = rnn(input, hidden) print("rnn:", rnn_output) lstm_output, next_hidden, c = lstm(input, hidden, c) print("lstm:", lstm_output) gru_output, next_hidden = gru(input, hidden) print("gru:", gru_output)
输出
rnn: tensor([[[-2.8822, -2.8615, -2.9488, -2.8898, -2.9205, -2.8113, -2.9328, -2.8239, -2.8678, -2.9474, -2.8724, -2.9703, -2.9019, -2.8871, -2.9340, -2.8436, -2.8442, -2.9047]]], grad_fn=<LogSoftmaxBackward>) lstm: tensor([[[-2.9427, -2.8574, -2.9175, -2.8492, -2.8962, -2.9276, -2.8500, -2.9306, -2.8304, -2.9559, -2.9751, -2.8071, -2.9138, -2.8196, -2.8575, -2.8416, -2.9395, -2.9384]]], grad_fn=<LogSoftmaxBackward>) gru: tensor([[[-2.8042, -2.8894, -2.8355, -2.8951, -2.8682, -2.9502, -2.9056, -2.8963, -2.8671, -2.9109, -2.9425, -2.8390, -2.9229, -2.8081, -2.8800, -2.9561, -2.9205, -2.9546]]], grad_fn=<LogSoftmaxBackward>)
构建训练函数并进行训练
1从输出结果中获得指定类别函数
def categoryFromOutput(output): """从输出结果中获得指定类别, 参数为输出张量output""" top_n, top_i = output.topk(1) category_i = top_i[0].item() return all_categories[category_i], category_i
2 输入参数
output = gru_output
3 随机生成训练数据
def randomTrainingExample(): """该函数用于随机产生训练数据""" category = random.choice(all_categories) line = random.choice(category_lines[category]) category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long) line_tensor = lineToTensor(line) return category, line, category_tensor, line_tensor
for i in range(10): category, line, category_tensor, line_tensor = randomTrainingExample() print('category =', category, '/ line =', line, '/ category_tensor =', category_tensor)
4 构建RNN训练函数
criterion = nn.NLLLoss() learning_rate = 0.005 def trainRNN(category_tensor, line_tensor): """定义训练函数, 它的两个参数是category_tensor类别的张量表示, 相当于训练数据的标签, line_tensor名字的张量表示, 相当于对应训练数据""" hidden = rnn.initHidden() rnn.zero_grad() for i in range(line_tensor.size()[0]): output, hidden = rnn(line_tensor[i], hidden) loss = criterion(output.squeeze(0), category_tensor) loss.backward() for p in rnn.parameters(): p.data.add_(-learning_rate, p.grad.data) return output, loss.item()
- torch.add演示:
>>> a = torch.randn(4) >>> a tensor([-0.9732, -0.3497, 0.6245, 0.4022]) >>> b = torch.randn(4, 1) >>> b tensor([[ 0.3743], [-1.7724], [-0.5811], [-0.8017]]) >>> torch.add(a, b, alpha=10) tensor([[ 2.7695, 3.3930, 4.3672, 4.1450], [-18.6971, -18.0736, -17.0994, -17.3216], [ -6.7845, -6.1610, -5.1868, -5.4090], [ -8.9902, -8.3667, -7.3925, -7.6147]])
5 构建LSTM训练函数
def trainLSTM(category_tensor, line_tensor): hidden, c = lstm.initHiddenAndC() lstm.zero_grad() for i in range(line_tensor.size()[0]): output, hidden, c = lstm(line_tensor[i], hidden, c) loss = criterion(output.squeeze(0), category_tensor) loss.backward() for p in lstm.parameters(): p.data.add_(-learning_rate, p.grad.data) return output, loss.item()
6构建GRU训练函数
def trainGRU(category_tensor, line_tensor): hidden = gru.initHidden() gru.zero_grad() for i in range(line_tensor.size()[0]): output, hidden= gru(line_tensor[i], hidden) loss = criterion(output.squeeze(0), category_tensor) loss.backward() for p in gru.parameters(): p.data.add_(-learning_rate, p.grad.data) return output, loss.item()
optimizer.step() # 这一行替换了原来的手动更新参数代码
RNN、LSTM、GRU神经网络构建人名分类器(三)+https://developer.aliyun.com/article/1544722?spm=a2c6h.13148508.setting.16.2a1e4f0eMtMqGK