BERT+PET方式模型训练(一)+https://developer.aliyun.com/article/1544775?spm=a2c6h.13148508.setting.31.22454f0eHFZZj3
二、实现模型训练函数,验证函数
- 目的:实现模型的训练和验证
- 代码路径:/Users/**/PycharmProjects/llm/prompt_tasks/PET/train.py
- 脚本里面包含两个函数:model2train()和evaluate_model()
- 导入必备的工具包
import os import time from transformers import AutoModelForMaskedLM, AutoTokenizer, get_scheduler from pet_config import * import sys sys.path.append('/Users/ligang/PycharmProjects/llm/prompt_tasks/PET/data_handle') sys.path.append('/Users/ligang/PycharmProjects/llm/prompt_tasks/PET/utils') from utils.metirc_utils import ClassEvaluator from utils.common_utils import * from data_handle.data_loader import * from utils.verbalizer import Verbalizer from pet_config import * pc = ProjectConfig()
- 定义model2train()函数
def model2train(): model = AutoModelForMaskedLM.from_pretrained(pc.pre_model) tokenizer = AutoTokenizer.from_pretrained(pc.pre_model) verbalizer = Verbalizer(verbalizer_file=pc.verbalizer, tokenizer=tokenizer, max_label_len=pc.max_label_len) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": pc.weight_decay, }, { "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0, }, ] optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=pc.learning_rate) model.to(pc.device) train_dataloader, dev_dataloader = get_data() num_update_steps_per_epoch = len(train_dataloader) max_train_steps = pc.epochs * num_update_steps_per_epoch warm_steps = int(pc.warmup_ratio * max_train_steps) # 预热阶段的训练步数 lr_scheduler = get_scheduler( name='linear', optimizer=optimizer, num_warmup_steps=warm_steps, num_training_steps=max_train_steps, ) loss_list = [] tic_train = time.time() metric = ClassEvaluator() criterion = torch.nn.CrossEntropyLoss() global_step, best_f1 = 0, 0 print('开始训练:') for epoch in range(pc.epochs): for batch in train_dataloader: logits = model(input_ids=batch['input_ids'].to(pc.device), token_type_ids=batch['token_type_ids'].to(pc.device), attention_mask=batch['attention_mask'].to(pc.device)).logits mask_labels = batch['mask_labels'].numpy().tolist() sub_labels = verbalizer.batch_find_sub_labels(mask_labels) sub_labels = [ele['token_ids'] for ele in sub_labels] loss = mlm_loss(logits, batch['mask_positions'].to(pc.device), sub_labels, criterion, pc.device, 1.0) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step() loss_list.append(float(loss.cpu().detach())) global_step += 1 if global_step % pc.logging_steps == 0: time_diff = time.time() - tic_train loss_avg = sum(loss_list) / len(loss_list) print("global step %d, epoch: %d, loss: %.5f, speed: %.2f step/s" % (global_step, epoch, loss_avg, pc.logging_steps / time_diff)) tic_train = time.time() if global_step % pc.valid_steps == 0: cur_save_dir = os.path.join(pc.save_dir, "model_%d" % global_step) if not os.path.exists(cur_save_dir): os.makedirs(cur_save_dir) model.save_pretrained(os.path.join(cur_save_dir)) tokenizer.save_pretrained(os.path.join(cur_save_dir)) acc, precision, recall, f1, class_metrics = evaluate_model(model, metric, dev_dataloader, tokenizer, verbalizer) print("Evaluation precision: %.5f, recall: %.5f, F1: %.5f" % (precision, recall, f1)) if f1 > best_f1: print( f"best F1 performence has been updated: {best_f1:.5f} --> {f1:.5f}" ) print(f'Each Class Metrics are: {class_metrics}') best_f1 = f1 cur_save_dir = os.path.join(pc.save_dir, "model_best") if not os.path.exists(cur_save_dir): os.makedirs(cur_save_dir) model.save_pretrained(os.path.join(cur_save_dir)) tokenizer.save_pretrained(os.path.join(cur_save_dir)) tic_train = time.time() print('训练结束')
- 定义evaluate_model()函数
def evaluate_model(model, metric, data_loader, tokenizer, verbalizer): """ 在测试集上评估当前模型的训练效果。 Args: model: 当前模型 metric: 评估指标类(metric) data_loader: 测试集的dataloader global_step: 当前训练步数 """ model.eval() metric.reset() with torch.no_grad(): for step, batch in enumerate(data_loader): logits = model(input_ids=batch['input_ids'].to(pc.device), token_type_ids=batch['token_type_ids'].to(pc.device), attention_mask=batch['attention_mask'].to(pc.device)).logits mask_labels = batch['mask_labels'].numpy().tolist() # (batch, label_num) for i in range(len(mask_labels)): # 去掉label中的[PAD] token while tokenizer.pad_token_id in mask_labels[i]: mask_labels[i].remove(tokenizer.pad_token_id) mask_labels = [''.join(tokenizer.convert_ids_to_tokens(t)) for t in mask_labels] predictions = convert_logits_to_ids(logits, batch['mask_positions']).cpu().numpy().tolist() predictions = verbalizer.batch_find_main_label(predictions) predictions = [ele['label'] for ele in predictions] metric.add_batch(pred_batch=predictions, gold_batch=mask_labels) eval_metric = metric.compute() model.train() return eval_metric['accuracy'], eval_metric['precision'], \ eval_metric['recall'], eval_metric['f1'], \ eval_metric['class_metrics']
- 调用:
cd /Users/**/PycharmProjects/llm/prompt_tasks/PET python train.py
- 输出结果:
..... global step 40, epoch: 4, loss: 0.62105, speed: 1.27 step/s Evaluation precision: 0.78000, recall: 0.77000, F1: 0.76000 Each Class Metrics are: {'书籍': {'precision': 0.97, 'recall': 0.82, 'f1': 0.89}, '平板': {'precision': 0.57, 'recall': 0.84, 'f1': 0.68}, '手机': {'precision': 0.0, 'recall': 0.0, 'f1': 0}, '水果': {'precision': 0.95, 'recall': 0.81, 'f1': 0.87}, '洗浴': {'precision': 0.7, 'recall': 0.71, 'f1': 0.7}, '电器': {'precision': 0.0, 'recall': 0.0, 'f1': 0}, '电脑': {'precision': 0.86, 'recall': 0.38, 'f1': 0.52}, '蒙牛': {'precision': 1.0, 'recall': 0.68, 'f1': 0.81}, '衣服': {'precision': 0.71, 'recall': 0.91, 'f1': 0.79}, '酒店': {'precision': 1.0, 'recall': 0.88, 'f1': 0.93}} global step 50, epoch: 6, loss: 0.50076, speed: 1.23 step/s global step 60, epoch: 7, loss: 0.41744, speed: 1.23 step/s ... global step 390, epoch: 48, loss: 0.06674, speed: 1.20 step/s global step 400, epoch: 49, loss: 0.06507, speed: 1.21 step/s Evaluation precision: 0.78000, recall: 0.76000, F1: 0.75000
- 结论: BERT+PET模型在训练集上的表现是精确率=78%
- 注意:本项目中只用了60条样本,在接近600条样本上精确率就已经达到了78%,如果想让指标更高,可以扩增样本。
三、实现模型预测函数
- 目的:加载训练好的模型并测试效果
- 代码路径:/Users/**/PycharmProjects/llm/prompt_tasks/PET/inference.py
- 导入必备的工具包
import time from typing import List import torch from rich import print from transformers import AutoTokenizer, AutoModelForMaskedLM import sys sys.path.append('/Users/**/PycharmProjects/llm/prompt_tasks/PET/data_handle') sys.path.append('/Users/**/PycharmProjects/llm/prompt_tasks/PET/utils') from utils.verbalizer import Verbalizer from data_handle.template import HardTemplate from data_handle.data_preprocess import convert_example from utils.common_utils import convert_logits_to_ids
- 预测代码具体实现
device = 'mps:0' model_path = 'checkpoints/model_best' tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForMaskedLM.from_pretrained(model_path) model.to(device).eval() max_label_len = 2 # 标签最大长度 verbalizer = Verbalizer( verbalizer_file='data/verbalizer.txt', tokenizer=tokenizer, max_label_len=max_label_len ) prompt = open('data/prompt.txt', 'r', encoding='utf8').readlines()[0].strip() # prompt定义 hard_template = HardTemplate(prompt=prompt) # 模板转换器定义 print(f'Prompt is -> {prompt}') def inference(contents: List[str]): """ 推理函数,输入原始句子,输出mask label的预测值。 Args: contents (List[str]): 描原始句子列表。 """ with torch.no_grad(): start_time = time.time() examples = {'text': contents} tokenized_output = convert_example( examples, tokenizer, hard_template=hard_template, max_seq_len=128, max_label_len=max_label_len, train_mode=False, return_tensor=True ) logits = model(input_ids=tokenized_output['input_ids'].to(device), token_type_ids=tokenized_output['token_type_ids'].to(device), attention_mask=tokenized_output['attention_mask'].to(device)).logits predictions = convert_logits_to_ids(logits, tokenized_output['mask_positions']).cpu().numpy().tolist() # (batch, label_num) predictions = verbalizer.batch_find_main_label(predictions) predictions = [ele['label'] for ele in predictions] used = time.time() - start_time print(f'Used {used}s.') return predictions if __name__ == '__main__': contents = [ '天台很好看,躺在躺椅上很悠闲,因为活动所以我觉得性价比还不错,适合一家出行,特别是去迪士尼也蛮近的,下次有机会肯定还会再来的,值得推荐', '环境,设施,很棒,周边配套设施齐全,前台小姐姐超级漂亮!酒店很赞,早餐不错,服务态度很好,前台美眉很漂亮。性价比超高的一家酒店。强烈推荐', "物流超快,隔天就到了,还没用,屯着出游的时候用的,听方便的,占地小", "福行市来到无早集市,因为是喜欢的面包店,所以跑来集市看看。第一眼就看到了,之前在微店买了小刘,这次买了老刘,还有一直喜欢的巧克力磅蛋糕。好奇老板为啥不做柠檬磅蛋糕了,微店一直都是买不到的状态。因为不爱碱水硬欧之类的,所以期待老板多来点其他小点,饼干一直也是大爱,那天好像也没看到", "服务很用心,房型也很舒服,小朋友很喜欢,下次去嘉定还会再选择。床铺柔软舒适,晚上休息很安逸,隔音效果不错赞,下次还会来" ] print("针对下面的文本评论,请分别给出对应所属类别:") res = inference(contents) new_dict = {} for i in range(len(contents)): new_dict[contents[i]] = res[i] print(new_dict)
- 结果展示
{ '天台很好看,躺在躺椅上很悠闲,因为活动所以我觉得性价比还不错,适合一家出 行,特别是去迪士尼也蛮近的,下次有机会肯定还会再来的,值得推荐': '酒店', '环境,设施,很棒,周边配套设施齐全,前台小姐姐超级漂亮!酒店很赞,早餐不 错,服务态度很好,前台美眉很漂亮。性价比超高的一家酒店。强烈推荐': '酒店', '物流超快,隔天就到了,还没用,屯着出游的时候用的,听方便的,占地小': '平板', '福行市来到无早集市,因为是喜欢的面包店,所以跑来集市看看。第一眼就看到了 ,之前在微店买了小刘,这次买了老刘,还有一直喜欢的巧克力磅蛋糕。好奇老板为啥不做 柠檬磅蛋糕了,微店一直都是买不到的状态。因为不爱碱水硬欧之类的,所以期待老板多来 点其他小点,饼干一直也是大爱,那天好像也没看到': '水果', '服务很用心,房型也很舒服,小朋友很喜欢,下次去嘉定还会再选择。床铺柔软舒 适,晚上休息很安逸,隔音效果不错赞,下次还会来': '酒店' }
总结
- 实现了基于BERT+PET模型的构建,并完成了训练和测试评估