参考文章
机器学习算法(八):基于BP神经网络的预测(乳腺癌分类实践)
实现代码
# Step1:库函数导入 # 导入乳腺癌数据集 from sklearn.datasets import load_breast_cancer # 导入BP模型 from sklearn.neural_network import MLPClassifier # 导入训练集分割方法 from sklearn.model_selection import train_test_split # 导入预测指标计算函数和混淆矩阵计算函数 from sklearn.metrics import classification_report, confusion_matrix # 导入绘图包 import seaborn as sns import matplotlib import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # Step2:数据读取/载入 # 导入乳腺癌数据集 cancer = load_breast_cancer() # Step3:数据信息简单查看与可视化 # 查看数据集信息 print('breast_cancer数据集的长度为:',len(cancer)) print('breast_cancer数据集的类型为:',type(cancer)) # 分割数据为训练集和测试集 cancer_data = cancer['data'] print('cancer_data数据维度为:',cancer_data.shape) cancer_target = cancer['target'] print('cancer_target标签维度为:',cancer_target.shape) cancer_names = cancer['feature_names'] cancer_desc = cancer['DESCR'] #分为训练集与测试集 cancer_data_train,cancer_data_test = train_test_split(cancer_data,test_size=0.2,random_state=42)#训练集 cancer_target_train,cancer_target_test = train_test_split(cancer_target,test_size=0.2,random_state=42)#测试集 # Step4:利用BP在乳腺癌数据上进行训练和预测 # 建立 BP 模型, 采用Adam优化器,relu非线性映射函数 BP = MLPClassifier(solver='adam',activation = 'relu',max_iter = 1000,alpha = 1e-3,hidden_layer_sizes = (64,32, 32),random_state = 1) # 进行模型训练 BP.fit(cancer_data_train, cancer_target_train) # 进行模型预测 predict_train_labels = BP.predict(cancer_data_train) # 可视化真实数据 fig = plt.figure() ax = Axes3D(fig, rect=[0, 0, 1, 1], elev=20, azim=20) ax.scatter(cancer_data_train[:, 0], cancer_data_train[:, 1], cancer_data_train[:, 2], marker='o', c=cancer_target_train) plt.title('True Label Map') plt.show() # 可视化预测数据 fig = plt.figure() ax = Axes3D(fig, rect=[0, 0, 1, 1], elev=20, azim=20) ax.scatter(cancer_data_train[:, 0], cancer_data_train[:, 1], cancer_data_train[:, 2], marker='o', c=predict_train_labels) plt.title('Cancer with BP Model') plt.show() # 显示预测分数 print("预测准确率: {:.4f}".format(BP.score(cancer_data_test, cancer_target_test))) # 进行测试集数据的类别预测 predict_test_labels = BP.predict(cancer_data_test) print("测试集的真实标签:\n", cancer_target_test) print("测试集的预测标签:\n", predict_test_labels) # 进行预测结果指标统计 统计每一类别的预测准确率、召回率、F1分数 print(classification_report(cancer_target_test, predict_test_labels)) # 计算混淆矩阵 confusion_mat = confusion_matrix(cancer_target_test, predict_test_labels) # 打混淆矩阵 print(confusion_mat) # 将混淆矩阵以热力图的防线显示 sns.set() figure, ax = plt.subplots() # 画热力图 sns.heatmap(confusion_mat, cmap="YlGnBu_r", annot=True, ax=ax) # 标题 ax.set_title('confusion matrix') # x轴为预测类别 ax.set_xlabel('predict') # y轴实际类别 ax.set_ylabel('true') plt.show()
自己代码
''' 网络模型分类 ''' # Step1:库函数导入 # 导入乳腺癌数据集 from sklearn.datasets import load_breast_cancer import torch # 导入训练集分割方法 from sklearn.model_selection import train_test_split # 导入预测指标计算函数和混淆矩阵计算函数 from sklearn.metrics import classification_report, confusion_matrix # 导入绘图包 import seaborn as sns import matplotlib import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D matplotlib.rc("font", family='kaiti') # Step2:数据读取/载入 # 导入乳腺癌数据集 cancer = load_breast_cancer() # Step3:数据信息简单查看与可视化 # 查看数据集信息 print('breast_cancer数据集的长度为:',len(cancer)) print('breast_cancer数据集的类型为:',type(cancer)) # 分割数据为训练集和测试集 cancer_data = cancer['data'] print('cancer_data数据维度为:',cancer_data.shape) cancer_target = cancer['target'] print('cancer_target标签维度为:',cancer_target.shape) cancer_names = cancer['feature_names'] # print(cancer_names) cancer_desc = cancer['DESCR'] # print(cancer_desc) x = load_breast_cancer()['data'] y = load_breast_cancer()['target'] # 划分数据集和训练集 x_train,x_test,y_train,y_test = train_test_split(torch.FloatTensor(x), torch.LongTensor(y), test_size=0.2, random_state=42) # 建立 模型 net=torch.nn.Sequential( torch.nn.Linear(30, 50), torch.nn.Sigmoid(), torch.nn.Linear(50, 50), torch.nn.Sigmoid(), torch.nn.Linear(50, 2), ) optimizer=torch.optim.SGD(net.parameters(),lr=0.05) loss_func=torch.nn.CrossEntropyLoss() # 4. 训练数据 # 进行模型训练 for t in range(500): out = net(x_train) # 输入input,输出out loss = loss_func(out, y_train) # 输出与label对比 optimizer.zero_grad() # 梯度清零 loss.backward() # 前馈操作 optimizer.step() # 使用梯度优化器 # 进行模型预测 out = net(x_train) #out是一个计算矩阵,可以用Fun.softmax(out)转化为概率矩阵 pred = torch.max(out, 1)[1] # 返回index 0返回原值 target_train = y_train.data.numpy() pred_train = pred.data.numpy() # 可视化真实数据 fig = plt.figure() ax = Axes3D(fig, rect=(0, 0, 1, 1), elev=20, azim=20) ax.scatter(x_train[:, 0], x_train[:, 1], x_train[:, 2], marker='o', c=target_train) plt.title('True Label Map') plt.show() # 可视化预测数据 fig = plt.figure() ax = Axes3D(fig, rect=(0, 0, 1, 1), elev=20, azim=20) ax.scatter(x_train[:, 0], x_train[:, 1], x_train[:, 2], marker='o', c=pred_train) plt.title('Cancer with BP Model') plt.show() # 5. 预测结果 out = net(x_test) #out是一个计算矩阵,可以用Fun.softmax(out)转化为概率矩阵 prediction = torch.max(out, 1)[1] # 返回index 0返回原值 target_test = y_test.data.numpy() pred_test = prediction.data.numpy() # 显示预测分数 accuracy = float((pred_test == target_test).astype(int).sum()) / float(target_test.size) print("准确率",accuracy) # 进行测试集数据的类别预测 print("测试集的真实标签:\n",target_test) print("测试集的预测标签:\n",pred_test) # 进行预测结果指标统计 统计每一类别的预测准确率、召回率、F1分数 print(classification_report(target_test, pred_test)) # 计算混淆矩阵 confusion_mat = confusion_matrix(target_test, pred_test) # 打混淆矩阵 print(confusion_mat) # 将混淆矩阵以热力图的防线显示 sns.set() figure, ax = plt.subplots() # 画热力图 sns.heatmap(confusion_mat, cmap="YlGnBu_r", annot=True, ax=ax) # 标题 ax.set_title('confusion matrix') # x轴为预测类别 ax.set_xlabel('predict') # y轴实际类别 ax.set_ylabel('true') plt.show()