四、模型训练
!pip install catboost
1.决策树预测 Decision Tree Classifier
import xgboost import lightgbm from sklearn.svm import SVC from sklearn.ensemble import GradientBoostingClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from catboost import CatBoostClassifier
# 训练决策树分类器 clf = DecisionTreeClassifier(random_state=1024) clf.fit(X_train, y_train) accuracy_list = []
# test数据集预测 y_pred = clf.predict(X_test)
from sklearn.metrics import accuracy_score # 评估精度 accuracy = accuracy_score(y_test, y_pred) print("精度: %.4f%%" % (accuracy * 100.0)) accuracy_list.append(accuracy*100)
精度: 76.2585%
2.随机森林 RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier r_clf = RandomForestClassifier(max_features=0.5, max_depth=15, random_state=1) r_clf.fit(X_train, y_train) r_pred = r_clf.predict(X_test) r_acc = accuracy_score(y_test, r_pred) print(r_acc) accuracy_list.append(100*r_acc)
0.8279972416510688
3.逻辑回归Logistic Regression
log_reg = LogisticRegression() log_reg.fit(X_train, y_train) log_reg_pred = log_reg.predict(X_test) log_reg_acc = accuracy_score(y_test, log_reg_pred) print(log_reg_acc) accuracy_list.append(100*log_reg_acc)
0.8344990641316127
4.支持向量机Support Vector
sv_clf = SVC() sv_clf.fit(X_train, y_train) sv_clf_pred = sv_clf.predict(X_test) sv_clf_acc = accuracy_score(y_test, sv_clf_pred) print(sv_clf_acc) accuracy_list.append(100* sv_clf_acc)
0.8360752635208354
5.K临近算法K Neighbors Classifier
kn_clf = KNeighborsClassifier(n_neighbors=6) kn_clf.fit(X_train, y_train) kn_pred = kn_clf.predict(X_test) kn_acc = accuracy_score(y_test, kn_pred) print(kn_acc) accuracy_list.append(100*kn_acc)
0.8304600531967293
6.梯度增强分类器 Gradient Boosting Classifier
gradientboost_clf = GradientBoostingClassifier(max_depth=2, random_state=1) gradientboost_clf.fit(X_train,y_train) gradientboost_pred = gradientboost_clf.predict(X_test) gradientboost_acc = accuracy_score(y_test, gradientboost_pred) print(gradientboost_acc) accuracy_list.append(100*gradientboost_acc)
0.8364693133681411
7.xgbrf分类器 xgbrf classifier
xgb_clf = xgboost.XGBRFClassifier(max_depth=3, random_state=1) xgb_clf.fit(X_train,y_train) xgb_pred = xgb_clf.predict(X_test) xgb_acc = accuracy_score(y_test, xgb_pred) accuracy_list.append(100*xgb_acc) print(xgb_acc)
[23:09:17] WARNING: ../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior. 0.8360752635208354
8.LGBMClassifier
lgb_clf = lightgbm.LGBMClassifier(max_depth=2, random_state=4) lgb_clf.fit(X_train,y_train) lgb_pred = lgb_clf.predict(X_test) lgb_acc = accuracy_score(y_test, lgb_pred) print(lgb_acc) accuracy_list.append(100*lgb_acc)
0.8363708009063147
9.Cat Boost Classifier
cat_clf = CatBoostClassifier() cat_clf.fit(X_train,y_train) cat_pred = cat_clf.predict(X_test) cat_acc = accuracy_score(y_test, cat_pred) print(cat_acc) accuracy_list.append(100*cat_acc)
989: learn: 0.4191992 total: 12.3s remaining: 124ms 990: learn: 0.4190779 total: 12.3s remaining: 112ms 991: learn: 0.4189944 total: 12.3s remaining: 99.5ms 992: learn: 0.4189137 total: 12.3s remaining: 87ms 993: learn: 0.4188195 total: 12.4s remaining: 74.6ms 994: learn: 0.4187774 total: 12.4s remaining: 62.2ms 995: learn: 0.4187095 total: 12.4s remaining: 49.7ms 996: learn: 0.4186093 total: 12.4s remaining: 37.3ms 997: learn: 0.4185544 total: 12.4s remaining: 24.9ms 998: learn: 0.4184855 total: 12.4s remaining: 12.4ms 999: learn: 0.4183650 total: 12.4s remaining: 0us 0.8307555905822086
五、各模型结果对比
print(accuracy_list)
[76.25849669983253, 82.79972416510688, 83.44990641316127, 83.60752635208354, 83.04600531967293, 83.6469313368141, 83.60752635208354, 83.63708009063147, 83.07555905822086]
model_list = ['DecisionTree', 'RandomForest', 'Logistic Regression', 'SVC','KNearestNeighbours', 'GradientBooster', 'XGBRF','LGBM', 'CatBoostClassifier']
plt.rcParams['figure.figsize']=20,8 sns.set_style('darkgrid') ax = sns.barplot(x=model_list, y=accuracy_list, palette = "husl", saturation =2.0) plt.xlabel('Classifier Models', fontsize = 20 ) plt.ylabel('% of Accuracy', fontsize = 20) plt.title('Accuracy of different Classifier Models', fontsize = 20) plt.xticks(fontsize = 12, horizontalalignment = 'center', rotation = 8) plt.yticks(fontsize = 12) for i in ax.patches: width, height = i.get_width(), i.get_height() x, y = i.get_xy() ax.annotate(f'{round(height,2)}%', (x + width/2, y + height*1.02), ha='center', fontsize = 'x-large') plt.show()
SVC支持向量机、xgbrf 、lightgbm 耗时特别长,以后不用它!
项目地址: aistudio.baidu.com/aistudio/pr…