✌ 案例实战:多种模型的学习曲线
1、✌ 导入相关库
from sklearn.ensemble import RandomForestClassifier # 随机森林模型 from sklearn.tree import DecisionTreeClassifier # 决策树 from sklearn.linear_model import LogisticRegression # 逻辑回归 from sklearn.svm import SVC # 支持向量机 from sklearn.naive_bayes import GaussianNB # 朴素贝叶斯 import lightgbm as lgb # lightgbm模型 import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import learning_curve # 用于画学习曲线 from sklearn.model_selection import ShuffleSplit # 分割数据集 from time import time # 导入时间模块 import datetime from sklearn.datasets import load_digits # 手写数字数据集
2、✌ 定义画图函数
def plot_learning_curve(estimator,title,x,y,ax,ylim=None,cv=None,n_jobs=None): train_sizes,train_scores,test_scores=learning_curve(estimator,x,y,cv=cv,n_jobs=n_jobs) ax.set_title(title) # 设置每个子图的标题 if ylim is not None: ax.set_ylim(*ylim) # 设置纵坐标的范围 ax.set_xlabel("training examples") # 设置子图的x轴名称 ax.set_ylabel("score") # 设置子图的y轴名称 ax.grid() # 画网格图 # 横坐标为训练样本数,纵坐标为每折下的分数均值 ax.plot(train_sizesLin,np.mean(train_scores,axis=1),'o-',color='r',label='train score') ax.plot(train_sizes,np.mean(test_scores,axis=1),'o-',color='g',label='test score') ax.legend(loc='best') # 设置图例 return ax
3、✌ 准备数据
data=load_digits() # 加载数据集 x=data.data # 特征矩阵 y=data.target # 标签 # 每张子图的名称 title=['Naive Bayes','DecisionTree','SVM','RandomForest','Logistic','lgb'] # 每个模型 model=[GaussianNB(),DecisionTreeClassifier(),SVC(gamma=0.001),RandomForestClassifier(n_estimators=50),LogisticRegression(C=0.1,solver='lbfgs'),lgb.LGBMClassifier()] # 定义分割数据集的类 cv=ShuffleSplit(n_splits=50,test_size=0.2,random_state=0)
4、✌ 循环调用函数画图
fig,axes=plt.subplots(2,3,figsize=(18,12)) # 定义画布和子图,2行3列 axes=axes.ravel() # 子图数据降维,便于后文引用,否则为二维数组 for ind,title_,estimator in zip(range(len(title)),title,model): times=time() # 定义初始时间 # 调用函数 plot_learning_curve(estimator,title_,x,y,ax=axes[ind],ylim=[0.7,1.05],n_jobs=4,cv=cv) # 打印各模型的运行时间信息 print("{:15s}{}".format(title_,datetime.datetime.fromtimestamp(time()-times).strftime("%M:%S:%f"))) plt.show()