9.Seq2Seq(编码器+解码器)代码实现
classSeq2Seq(nn.Module): def__init__(self, Encoder_LSTM, Decoder_LSTM): super(Seq2Seq, self).__init__() self.Encoder_LSTM=Encoder_LSTMself.Decoder_LSTM=Decoder_LSTMdefforward(self, source, target, tfr=0.5): #Shape-Source : (10, 32) [(SentencelengthGerman+somepadding), NumberofSentences] batch_size=source.shape[1] #Shape-Source : (14, 32) [(SentencelengthEnglish+somepadding), NumberofSentences] target_len=target.shape[0] target_vocab_size=len(english.vocab) #Shape-->outputs (14, 32, 5766) outputs=torch.zeros(target_len, batch_size, target_vocab_size).to(device) #Shape--> (hs, cs) (2, 32, 1024) ,(2, 32, 1024) [num_layers, batch_sizesize, hidden_size] (containsencoder's hs, cs - context vectors)hidden_state_encoder, cell_state_encoder = self.Encoder_LSTM(source)# Shape of x (32 elements)x = target[0] # Trigger token <SOS>for i in range(1, target_len):# Shape --> output (32, 5766)output, hidden_state_decoder, cell_state_decoder = self.Decoder_LSTM(x, hidden_state_encoder, cell_state_encoder)outputs[i] = outputbest_guess = output.argmax(1) # 0th dimension is batch size, 1st dimension is word embeddingx = target[i] if random.random() < tfr else best_guess # Either pass the next word correctly from the dataset or use the earlier predicted word# Shape --> outputs (14, 32, 5766)return outputsprint(model)************************************************ OUTPUT ************************************************Seq2Seq((Encoder_LSTM): EncoderLSTM((dropout): Dropout(p=0.5, inplace=False)(embedding): Embedding(5376, 300)(LSTM): LSTM(300, 1024, num_layers=2, dropout=0.5))(Decoder_LSTM): DecoderLSTM((dropout): Dropout(p=0.5, inplace=False)(embedding): Embedding(4556, 300)(LSTM): LSTM(300, 1024, num_layers=2, dropout=0.5)(fc): Linear(in_features=1024, out_features=4556, bias=True)))
10.Seq2Seq模型训练
epoch_loss=0.0num_epochs=100best_loss=999999best_epoch=-1sentence1="ein mann in einem blauen hemd steht auf einer leiter und putzt ein fenster"ts1= [] forepochinrange(num_epochs): print("Epoch - {} / {}".format(epoch+1, num_epochs)) model.eval() translated_sentence1=translate_sentence(model, sentence1, german, english, device, max_length=50) print(f"Translated example sentence 1: \n {translated_sentence1}") ts1.append(translated_sentence1) model.train(True) forbatch_idx, batchinenumerate(train_iterator): input=batch.src.to(device) target=batch.trg.to(device) #Passtheinputandtargetformodel's forward methodoutput = model(input, target)output = output[1:].reshape(-1, output.shape[2])target = target[1:].reshape(-1)# Clear the accumulating gradientsoptimizer.zero_grad()# Calculate the loss value for every epochloss = criterion(output, target)# Calculate the gradients for weights & biases using back-propagationloss.backward()# Clip the gradient value is it exceeds > 1torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)# Update the weights values using the gradients we calculated using bpoptimizer.step()step += 1epoch_loss += loss.item()writer.add_scalar("Training loss", loss, global_step=step)if epoch_loss < best_loss:best_loss = epoch_lossbest_epoch = epochcheckpoint_and_save(model, best_loss, epoch, optimizer, epoch_loss)if ((epoch - best_epoch) >= 10):print("no improvement in 10 epochs, break")breakprint("Epoch_Loss - {}".format(loss.item()))print()print(epoch_loss / len(train_iterator))score = bleu(test_data[1:100], model, german, english, device)print(f"Bleu score {score*100:.2f}")************************************************ OUTPUT ************************************************Bleu score 15.62
例句训练进度:
训练损失:
11.Seq2Seq模型推理
现在,让我们将我们训练有素的模型与SOTA Google Translate的模型进行比较。
model.eval() test_sentences= ["Zwei Männer gehen die Straße entlang", "Kinder spielen im Park.", "Diese Stadt verdient eine bessere Klasse von Verbrechern. Der Spaßvogel"] actual_sentences= ["Two men are walking down the street", "Children play in the park", "This city deserves a better class of criminals. The joker"] pred_sentences= [] foridx, iinenumerate(test_sentences): model.eval() translated_sentence=translate_sentence(model, i, german, english, device, max_length=50) progress.append(TreebankWordDetokenizer().detokenize(translated_sentence)) print("German : {}".format(i)) print("Actual Sentence in English : {}".format(actual_sentences[idx])) print("Predicted Sentence in English : {}".format(progress[-1])) print() *******************************************OUTPUT*******************************************German : "Zwei Männer gehen die Straße entlang"ActualSentenceinEnglish : "Two men are walking down the street"PredictedSentenceinEnglish : "two men are walking on the street . <eos>"German : "Kinder spielen im Park."ActualSentenceinEnglish : "Children play in the park"PredictedSentenceinEnglish : "children playing in the park . <eos>"German : "Diese Stadt verdient eine bessere Klasse von Verbrechern. Der Spaßvogel"ActualSentenceinEnglish : "This city deserves a better class of criminals. The joker"PredictedSentenceinEnglish : "this <unk>'s <unk> from a <unk> green team <unk> by the sidelines . <eos>"
不错,但是很明显,该模型不能理解复杂的句子。因此,在接下来的系列文章中,我将通过更改模型的体系结构来提高上述模型的性能,例如使用双向LSTM,添加注意力机制或将LSTM替换为Transformers模型来克服这些明显的缺点。
希望我能够对Seq2Seq模型如何处理数据有一些直观的了解,在评论部分告诉我您的想法。