随机森林和集成学习
import warnings warnings.filterwarnings("ignore") import pandas as pd from sklearn.model_selection import train_test_split
1. 生成数据
生成12000行的数据,训练集和测试集按照3:1划分
#生成12000个,维度为10维,类别为2的数据 from sklearn.datasets import make_hastie_10_2 data,target=make_hastie_10_2() data.shape,target.shape
((12000, 10), (12000,))
len(target)
12000
target[target==-1]=0
target
array([1., 0., 0., ..., 0., 1., 0.])
data
array([[ 0.78630486, -1.11400257, 0.70382256, ..., 0.82847917, -1.37139053, -2.19668365], [-1.03368454, 1.38197532, 0.14687589, ..., -0.1266635 , -0.33387158, -0.26654399], [-0.19796369, 0.3510344 , -0.36834309, ..., -0.06335681, 1.28379355, 0.78997227], ..., [ 1.28611027, -0.12302968, -1.69093227, ..., 1.03900117, -0.00831804, 0.07232734], [-0.92488308, -0.40587571, 0.27916008, ..., 1.05126813, 0.45418349, 1.15997838], [ 0.34342477, 0.16226858, -0.76778841, ..., 0.6696298 , 0.28746692, -0.92645814]])
data[:2]
array([[ 0.78630486, -1.11400257, 0.70382256, -1.11910071, 0.13506472, -0.75552071, 0.40645088, 0.82847917, -1.37139053, -2.19668365], [-1.03368454, 1.38197532, 0.14687589, 0.1376996 , -0.5647537 , 2.01163563, 0.38650119, -0.1266635 , -0.33387158, -0.26654399]])
#划分训练集和测试集 from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test=train_test_split(data,target,test_size=0.2,random_state=0)
X_train.shape,X_test.shape
((9600, 10), (2400, 10))
2. 模型对比
对比六大模型,都使用默认参数
from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.neighbors import KNeighborsClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.model_selection import cross_val_score import time clf0 = KNeighborsClassifier(n_neighbors=3) clf1 = LogisticRegression() clf2 = RandomForestClassifier() clf3 = AdaBoostClassifier() clf4 = GradientBoostingClassifier() clf5 = XGBClassifier() clf6 = LGBMClassifier() for clf, label in zip([clf0,clf1, clf2, clf3, clf4, clf5, clf6], ["KNN", 'Logistic Regression', 'Random Forest', 'AdaBoost', 'GBDT', 'XGBoost','LightGBM']): start = time.time() scores = cross_val_score(clf, X_train, y_train, scoring='accuracy', cv=5) end = time.time() running_time = end - start print(label+"的Accuracy: %0.8f (+/- %0.2f),耗时%0.2f秒。模型名称[%s]" % (scores.mean(), scores.std(), running_time, label))
KNN的Accuracy: 0.72895833 (+/- 0.01),耗时0.93秒。模型名称[KNN]
Logistic Regression的Accuracy: 0.51510417 (+/- 0.01),耗时0.12秒。模型名称[Logistic Regression]
Random Forest的Accuracy: 0.88510417 (+/- 0.01),耗时11.39秒。模型名称[Random Forest]
AdaBoost的Accuracy: 0.87906250 (+/- 0.00),耗时2.36秒。模型名称[AdaBoost]
GBDT的Accuracy: 0.91541667 (+/- 0.01),耗时9.03秒。模型名称[GBDT]
XGBoost的Accuracy: 0.92989583 (+/- 0.01),耗时2.66秒。模型名称[XGBoost]
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000728 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000247 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000334 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000362 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
LightGBM的Accuracy: 0.93510417 (+/- 0.00),耗时0.71秒。模型名称[LightGBM]
对比了六大模型,可以看出,逻辑回归速度最快,但准确率最低。
而LightGBM,速度快,而且准确率最高,所以,现在处理结构化数据的时候,大部分都是用LightGBM算法。
3. XGBoost的使用
3.1 原生XGBoost的使用
import xgboost as xgb #记录程序运行时间 import time start_time = time.time() #xgb矩阵赋值 xgb_train = xgb.DMatrix(X_train, y_train) xgb_test = xgb.DMatrix(X_test, label=y_test) ##参数 params = { 'booster': 'gbtree', #'silent': 1, #设置成1则没有运行信息输出,最好是设置为0. #'nthread':7,# cpu 线程数 默认最大 'eta': 0.007, # 如同学习率 'min_child_weight': 3, # 这个参数默认是 1,是每个叶子里面 h 的和至少是多少,对正负样本不均衡时的 0-1 分类而言 #,假设 h 在 0.01 附近,min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。 #这个参数非常影响结果,控制叶子节点中二阶导的和的最小值,该参数值越小,越容易 overfitting。 'max_depth': 6, # 构建树的深度,越大越容易过拟合 'gamma': 0.1, # 树的叶子节点上作进一步分区所需的最小损失减少,越大越保守,一般0.1、0.2这样子。 'subsample': 0.7, # 随机采样训练样本 'colsample_bytree': 0.7, # 生成树时进行的列采样 'lambda': 2, # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。 #'alpha':0, # L1 正则项参数 #'scale_pos_weight':1, #如果取值大于0的话,在类别样本不平衡的情况下有助于快速收敛。 #'objective': 'multi:softmax', #多分类的问题 #'num_class':10, # 类别数,多分类与 multisoftmax 并用 'seed': 1000, #随机种子 #'eval_metric': 'auc' } plst = list(params.items()) num_rounds = 500 # 迭代次数 watchlist = [(xgb_train, 'train'), (xgb_test, 'val')]
#训练模型并保存 #early_stopping_rounds 当设置的迭代次数较大时,early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练 model = xgb.train( plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=100, ) #model.save_model('./model/xgb.model') # 用于存储训练出的模型 print("best best_ntree_limit", model.best_ntree_limit) y_pred = model.predict(xgb_test, ntree_limit=model.best_ntree_limit) print('error=%f' %(sum(1 for i in range(len(y_pred)) if int(y_pred[i] > 0.5) != y_test[i]) / float(len(y_pred)))) # 输出运行时长 cost_time = time.time() - start_time print("xgboost success!", '\n', "cost time:", cost_time, "(s)......")
[0] train-rmse:0.49936 val-rmse:0.49942 [1] train-rmse:0.49870 val-rmse:0.49887 [2] train-rmse:0.49809 val-rmse:0.49834 [3] train-rmse:0.49743 val-rmse:0.49781 [4] train-rmse:0.49679 val-rmse:0.49734 [5] train-rmse:0.49618 val-rmse:0.49676 [6] train-rmse:0.49555 val-rmse:0.49622 [7] train-rmse:0.49493 val-rmse:0.49570 [8] train-rmse:0.49430 val-rmse:0.49518 [9] train-rmse:0.49374 val-rmse:0.49472 [10] train-rmse:0.49311 val-rmse:0.49421 [11] train-rmse:0.49252 val-rmse:0.49376 [12] train-rmse:0.49192 val-rmse:0.49324 [13] train-rmse:0.49129 val-rmse:0.49277 [14] train-rmse:0.49067 val-rmse:0.49218 [15] train-rmse:0.49006 val-rmse:0.49169 [16] train-rmse:0.48947 val-rmse:0.49120 [17] train-rmse:0.48886 val-rmse:0.49066 [18] train-rmse:0.48828 val-rmse:0.49025 [19] train-rmse:0.48771 val-rmse:0.48983 [20] train-rmse:0.48710 val-rmse:0.48934 [21] train-rmse:0.48648 val-rmse:0.48878 [22] train-rmse:0.48591 val-rmse:0.48829 [23] train-rmse:0.48532 val-rmse:0.48781 [24] train-rmse:0.48476 val-rmse:0.48738 [25] train-rmse:0.48418 val-rmse:0.48691 [26] train-rmse:0.48359 val-rmse:0.48645 [27] train-rmse:0.48296 val-rmse:0.48593 [28] train-rmse:0.48238 val-rmse:0.48548 [29] train-rmse:0.48180 val-rmse:0.48504 [30] train-rmse:0.48123 val-rmse:0.48452 [31] train-rmse:0.48063 val-rmse:0.48405 [32] train-rmse:0.48008 val-rmse:0.48362 [33] train-rmse:0.47953 val-rmse:0.48316 [34] train-rmse:0.47895 val-rmse:0.48271 [35] train-rmse:0.47837 val-rmse:0.48222 [36] train-rmse:0.47780 val-rmse:0.48176 [37] train-rmse:0.47721 val-rmse:0.48130 [38] train-rmse:0.47664 val-rmse:0.48085 [39] train-rmse:0.47608 val-rmse:0.48046 [40] train-rmse:0.47549 val-rmse:0.47995 [41] train-rmse:0.47489 val-rmse:0.47949 [42] train-rmse:0.47434 val-rmse:0.47905 [43] train-rmse:0.47380 val-rmse:0.47861 [44] train-rmse:0.47322 val-rmse:0.47814 [45] train-rmse:0.47264 val-rmse:0.47763 [46] train-rmse:0.47209 val-rmse:0.47716 [47] train-rmse:0.47153 val-rmse:0.47670 [48] train-rmse:0.47095 val-rmse:0.47624 [49] train-rmse:0.47041 val-rmse:0.47574 [50] train-rmse:0.46984 val-rmse:0.47531 [51] train-rmse:0.46928 val-rmse:0.47485 [52] train-rmse:0.46875 val-rmse:0.47440 [53] train-rmse:0.46823 val-rmse:0.47396 [54] train-rmse:0.46768 val-rmse:0.47352 [55] train-rmse:0.46712 val-rmse:0.47303 [56] train-rmse:0.46658 val-rmse:0.47260 [57] train-rmse:0.46602 val-rmse:0.47216 [58] train-rmse:0.46548 val-rmse:0.47179 [59] train-rmse:0.46491 val-rmse:0.47132 [60] train-rmse:0.46436 val-rmse:0.47077 [61] train-rmse:0.46384 val-rmse:0.47038 [62] train-rmse:0.46329 val-rmse:0.46986 [63] train-rmse:0.46270 val-rmse:0.46937 [64] train-rmse:0.46214 val-rmse:0.46895 [65] train-rmse:0.46164 val-rmse:0.46853 [66] train-rmse:0.46111 val-rmse:0.46808 [67] train-rmse:0.46056 val-rmse:0.46764 [68] train-rmse:0.46003 val-rmse:0.46715 [69] train-rmse:0.45949 val-rmse:0.46676 [70] train-rmse:0.45894 val-rmse:0.46627 [71] train-rmse:0.45844 val-rmse:0.46582 [72] train-rmse:0.45790 val-rmse:0.46538 [73] train-rmse:0.45737 val-rmse:0.46500 [74] train-rmse:0.45686 val-rmse:0.46457 [75] train-rmse:0.45634 val-rmse:0.46418 [76] train-rmse:0.45582 val-rmse:0.46372 [77] train-rmse:0.45527 val-rmse:0.46327 [78] train-rmse:0.45476 val-rmse:0.46284 [79] train-rmse:0.45423 val-rmse:0.46243 [80] train-rmse:0.45370 val-rmse:0.46198 [81] train-rmse:0.45318 val-rmse:0.46158 [82] train-rmse:0.45267 val-rmse:0.46112 [83] train-rmse:0.45218 val-rmse:0.46075 [84] train-rmse:0.45169 val-rmse:0.46028 [85] train-rmse:0.45118 val-rmse:0.45980 [86] train-rmse:0.45066 val-rmse:0.45941 [87] train-rmse:0.45014 val-rmse:0.45900 [88] train-rmse:0.44960 val-rmse:0.45857 [89] train-rmse:0.44907 val-rmse:0.45813 [90] train-rmse:0.44855 val-rmse:0.45769 [91] train-rmse:0.44803 val-rmse:0.45719 [92] train-rmse:0.44755 val-rmse:0.45678 [93] train-rmse:0.44703 val-rmse:0.45638 [94] train-rmse:0.44654 val-rmse:0.45592 [95] train-rmse:0.44606 val-rmse:0.45550 [96] train-rmse:0.44559 val-rmse:0.45515 [97] train-rmse:0.44508 val-rmse:0.45474 [98] train-rmse:0.44455 val-rmse:0.45435 [99] train-rmse:0.44407 val-rmse:0.45393 [100] train-rmse:0.44357 val-rmse:0.45347 [101] train-rmse:0.44306 val-rmse:0.45300 [102] train-rmse:0.44256 val-rmse:0.45260 [103] train-rmse:0.44208 val-rmse:0.45217 [104] train-rmse:0.44156 val-rmse:0.45176 [105] train-rmse:0.44107 val-rmse:0.45139 [106] train-rmse:0.44055 val-rmse:0.45095 [107] train-rmse:0.44005 val-rmse:0.45059 [108] train-rmse:0.43954 val-rmse:0.45019 [109] train-rmse:0.43903 val-rmse:0.44969 [110] train-rmse:0.43855 val-rmse:0.44934 [111] train-rmse:0.43805 val-rmse:0.44895 [112] train-rmse:0.43757 val-rmse:0.44851 [113] train-rmse:0.43710 val-rmse:0.44813 [114] train-rmse:0.43664 val-rmse:0.44777 [115] train-rmse:0.43620 val-rmse:0.44736 [116] train-rmse:0.43573 val-rmse:0.44700 [117] train-rmse:0.43523 val-rmse:0.44663 [118] train-rmse:0.43475 val-rmse:0.44621 [119] train-rmse:0.43424 val-rmse:0.44585 [120] train-rmse:0.43378 val-rmse:0.44548 [121] train-rmse:0.43332 val-rmse:0.44513 [122] train-rmse:0.43283 val-rmse:0.44474 [123] train-rmse:0.43237 val-rmse:0.44435 [124] train-rmse:0.43188 val-rmse:0.44393 [125] train-rmse:0.43140 val-rmse:0.44354 [126] train-rmse:0.43093 val-rmse:0.44313 [127] train-rmse:0.43046 val-rmse:0.44274 [128] train-rmse:0.43000 val-rmse:0.44234 [129] train-rmse:0.42955 val-rmse:0.44197 [130] train-rmse:0.42910 val-rmse:0.44156 [131] train-rmse:0.42866 val-rmse:0.44116 [132] train-rmse:0.42820 val-rmse:0.44083 [133] train-rmse:0.42774 val-rmse:0.44048 [134] train-rmse:0.42731 val-rmse:0.44017 [135] train-rmse:0.42687 val-rmse:0.43978 [136] train-rmse:0.42643 val-rmse:0.43943 [137] train-rmse:0.42602 val-rmse:0.43911 [138] train-rmse:0.42558 val-rmse:0.43879 [139] train-rmse:0.42514 val-rmse:0.43842 [140] train-rmse:0.42468 val-rmse:0.43806 [141] train-rmse:0.42424 val-rmse:0.43762 [142] train-rmse:0.42378 val-rmse:0.43724 [143] train-rmse:0.42333 val-rmse:0.43689 [144] train-rmse:0.42292 val-rmse:0.43654 [145] train-rmse:0.42248 val-rmse:0.43616 [146] train-rmse:0.42203 val-rmse:0.43578 [147] train-rmse:0.42157 val-rmse:0.43543 [148] train-rmse:0.42109 val-rmse:0.43506 [149] train-rmse:0.42069 val-rmse:0.43471 [150] train-rmse:0.42028 val-rmse:0.43433 [151] train-rmse:0.41984 val-rmse:0.43394 [152] train-rmse:0.41940 val-rmse:0.43360 [153] train-rmse:0.41896 val-rmse:0.43320 [154] train-rmse:0.41851 val-rmse:0.43282 [155] train-rmse:0.41808 val-rmse:0.43244 [156] train-rmse:0.41764 val-rmse:0.43209 [157] train-rmse:0.41723 val-rmse:0.43178 [158] train-rmse:0.41679 val-rmse:0.43145 [159] train-rmse:0.41637 val-rmse:0.43110 [160] train-rmse:0.41596 val-rmse:0.43078 [161] train-rmse:0.41551 val-rmse:0.43045 [162] train-rmse:0.41510 val-rmse:0.43015 [163] train-rmse:0.41467 val-rmse:0.42975 [164] train-rmse:0.41422 val-rmse:0.42939 [165] train-rmse:0.41379 val-rmse:0.42906 [166] train-rmse:0.41336 val-rmse:0.42871 [167] train-rmse:0.41294 val-rmse:0.42834 [168] train-rmse:0.41251 val-rmse:0.42805 [169] train-rmse:0.41210 val-rmse:0.42768 [170] train-rmse:0.41169 val-rmse:0.42734 [171] train-rmse:0.41128 val-rmse:0.42696 [172] train-rmse:0.41085 val-rmse:0.42660 [173] train-rmse:0.41047 val-rmse:0.42629 [174] train-rmse:0.41004 val-rmse:0.42594 [175] train-rmse:0.40960 val-rmse:0.42561 [176] train-rmse:0.40919 val-rmse:0.42529 [177] train-rmse:0.40875 val-rmse:0.42493 [178] train-rmse:0.40831 val-rmse:0.42459 [179] train-rmse:0.40788 val-rmse:0.42423 [180] train-rmse:0.40742 val-rmse:0.42388 [181] train-rmse:0.40700 val-rmse:0.42350 [182] train-rmse:0.40661 val-rmse:0.42317 [183] train-rmse:0.40622 val-rmse:0.42286 [184] train-rmse:0.40580 val-rmse:0.42249 [185] train-rmse:0.40539 val-rmse:0.42211 [186] train-rmse:0.40498 val-rmse:0.42177 [187] train-rmse:0.40458 val-rmse:0.42141 [188] train-rmse:0.40415 val-rmse:0.42106 [189] train-rmse:0.40376 val-rmse:0.42072 [190] train-rmse:0.40334 val-rmse:0.42039 [191] train-rmse:0.40292 val-rmse:0.42007 [192] train-rmse:0.40253 val-rmse:0.41973 [193] train-rmse:0.40213 val-rmse:0.41939 [194] train-rmse:0.40173 val-rmse:0.41909 [195] train-rmse:0.40131 val-rmse:0.41877 [196] train-rmse:0.40088 val-rmse:0.41841 [197] train-rmse:0.40047 val-rmse:0.41805 [198] train-rmse:0.40008 val-rmse:0.41770 [199] train-rmse:0.39969 val-rmse:0.41737 [200] train-rmse:0.39929 val-rmse:0.41709 [201] train-rmse:0.39888 val-rmse:0.41677 [202] train-rmse:0.39847 val-rmse:0.41645 [203] train-rmse:0.39808 val-rmse:0.41612 [204] train-rmse:0.39768 val-rmse:0.41580 [205] train-rmse:0.39731 val-rmse:0.41550 [206] train-rmse:0.39693 val-rmse:0.41516 [207] train-rmse:0.39653 val-rmse:0.41485 [208] train-rmse:0.39610 val-rmse:0.41452 [209] train-rmse:0.39572 val-rmse:0.41422 [210] train-rmse:0.39534 val-rmse:0.41391 [211] train-rmse:0.39495 val-rmse:0.41358 [212] train-rmse:0.39456 val-rmse:0.41332 [213] train-rmse:0.39420 val-rmse:0.41307 [214] train-rmse:0.39381 val-rmse:0.41275 [215] train-rmse:0.39343 val-rmse:0.41243 [216] train-rmse:0.39305 val-rmse:0.41213 [217] train-rmse:0.39267 val-rmse:0.41184 [218] train-rmse:0.39230 val-rmse:0.41160 [219] train-rmse:0.39193 val-rmse:0.41130 [220] train-rmse:0.39154 val-rmse:0.41095 [221] train-rmse:0.39116 val-rmse:0.41062 [222] train-rmse:0.39078 val-rmse:0.41029 [223] train-rmse:0.39042 val-rmse:0.41001 [224] train-rmse:0.39002 val-rmse:0.40968 [225] train-rmse:0.38964 val-rmse:0.40937 [226] train-rmse:0.38926 val-rmse:0.40905 [227] train-rmse:0.38887 val-rmse:0.40876 [228] train-rmse:0.38848 val-rmse:0.40846 [229] train-rmse:0.38811 val-rmse:0.40818 [230] train-rmse:0.38771 val-rmse:0.40789 [231] train-rmse:0.38734 val-rmse:0.40758 [232] train-rmse:0.38697 val-rmse:0.40733 [233] train-rmse:0.38658 val-rmse:0.40699 [234] train-rmse:0.38622 val-rmse:0.40677 [235] train-rmse:0.38585 val-rmse:0.40646 [236] train-rmse:0.38548 val-rmse:0.40615 [237] train-rmse:0.38510 val-rmse:0.40584 [238] train-rmse:0.38474 val-rmse:0.40553 [239] train-rmse:0.38438 val-rmse:0.40521 [240] train-rmse:0.38401 val-rmse:0.40488 [241] train-rmse:0.38362 val-rmse:0.40456 [242] train-rmse:0.38326 val-rmse:0.40428 [243] train-rmse:0.38289 val-rmse:0.40398 [244] train-rmse:0.38255 val-rmse:0.40369 [245] train-rmse:0.38219 val-rmse:0.40337 [246] train-rmse:0.38181 val-rmse:0.40305 [247] train-rmse:0.38147 val-rmse:0.40277 [248] train-rmse:0.38110 val-rmse:0.40248 [249] train-rmse:0.38075 val-rmse:0.40221 [250] train-rmse:0.38039 val-rmse:0.40194 [251] train-rmse:0.38001 val-rmse:0.40162 [252] train-rmse:0.37967 val-rmse:0.40135 [253] train-rmse:0.37929 val-rmse:0.40106 [254] train-rmse:0.37894 val-rmse:0.40074 [255] train-rmse:0.37856 val-rmse:0.40045 [256] train-rmse:0.37821 val-rmse:0.40016 [257] train-rmse:0.37787 val-rmse:0.39990 [258] train-rmse:0.37751 val-rmse:0.39963 [259] train-rmse:0.37717 val-rmse:0.39934 [260] train-rmse:0.37681 val-rmse:0.39909 [261] train-rmse:0.37645 val-rmse:0.39881 [262] train-rmse:0.37613 val-rmse:0.39858 [263] train-rmse:0.37578 val-rmse:0.39831 [264] train-rmse:0.37542 val-rmse:0.39804 [265] train-rmse:0.37507 val-rmse:0.39778 [266] train-rmse:0.37474 val-rmse:0.39752 [267] train-rmse:0.37438 val-rmse:0.39726 [268] train-rmse:0.37404 val-rmse:0.39698 [269] train-rmse:0.37372 val-rmse:0.39671 [270] train-rmse:0.37338 val-rmse:0.39642 [271] train-rmse:0.37306 val-rmse:0.39618 [272] train-rmse:0.37271 val-rmse:0.39591 [273] train-rmse:0.37236 val-rmse:0.39561 [274] train-rmse:0.37204 val-rmse:0.39532 [275] train-rmse:0.37171 val-rmse:0.39506 [276] train-rmse:0.37139 val-rmse:0.39479 [277] train-rmse:0.37106 val-rmse:0.39455 [278] train-rmse:0.37072 val-rmse:0.39432 [279] train-rmse:0.37040 val-rmse:0.39409 [280] train-rmse:0.37006 val-rmse:0.39379 [281] train-rmse:0.36971 val-rmse:0.39352 [282] train-rmse:0.36938 val-rmse:0.39330 [283] train-rmse:0.36906 val-rmse:0.39307 [284] train-rmse:0.36872 val-rmse:0.39281 [285] train-rmse:0.36842 val-rmse:0.39259 [286] train-rmse:0.36809 val-rmse:0.39235 [287] train-rmse:0.36773 val-rmse:0.39204 [288] train-rmse:0.36741 val-rmse:0.39179 [289] train-rmse:0.36708 val-rmse:0.39155 [290] train-rmse:0.36677 val-rmse:0.39129 [291] train-rmse:0.36643 val-rmse:0.39107 [292] train-rmse:0.36612 val-rmse:0.39079 [293] train-rmse:0.36582 val-rmse:0.39054 [294] train-rmse:0.36550 val-rmse:0.39029 [295] train-rmse:0.36521 val-rmse:0.39003 [296] train-rmse:0.36488 val-rmse:0.38980 [297] train-rmse:0.36455 val-rmse:0.38954 [298] train-rmse:0.36422 val-rmse:0.38926 [299] train-rmse:0.36391 val-rmse:0.38901 [300] train-rmse:0.36360 val-rmse:0.38874 [301] train-rmse:0.36330 val-rmse:0.38850 [302] train-rmse:0.36298 val-rmse:0.38823 [303] train-rmse:0.36271 val-rmse:0.38801 [304] train-rmse:0.36240 val-rmse:0.38775 [305] train-rmse:0.36209 val-rmse:0.38752 [306] train-rmse:0.36178 val-rmse:0.38729 [307] train-rmse:0.36148 val-rmse:0.38708 [308] train-rmse:0.36116 val-rmse:0.38682 [309] train-rmse:0.36085 val-rmse:0.38659 [310] train-rmse:0.36056 val-rmse:0.38636 [311] train-rmse:0.36024 val-rmse:0.38609 [312] train-rmse:0.35996 val-rmse:0.38582 [313] train-rmse:0.35966 val-rmse:0.38559 [314] train-rmse:0.35934 val-rmse:0.38532 [315] train-rmse:0.35904 val-rmse:0.38506 [316] train-rmse:0.35873 val-rmse:0.38486 [317] train-rmse:0.35843 val-rmse:0.38464 [318] train-rmse:0.35815 val-rmse:0.38441 [319] train-rmse:0.35783 val-rmse:0.38412 [320] train-rmse:0.35752 val-rmse:0.38387 [321] train-rmse:0.35724 val-rmse:0.38367 [322] train-rmse:0.35692 val-rmse:0.38344 [323] train-rmse:0.35660 val-rmse:0.38321 [324] train-rmse:0.35631 val-rmse:0.38299 [325] train-rmse:0.35601 val-rmse:0.38274 [326] train-rmse:0.35572 val-rmse:0.38253 [327] train-rmse:0.35544 val-rmse:0.38231 [328] train-rmse:0.35513 val-rmse:0.38210 [329] train-rmse:0.35484 val-rmse:0.38188 [330] train-rmse:0.35453 val-rmse:0.38167 [331] train-rmse:0.35425 val-rmse:0.38142 [332] train-rmse:0.35395 val-rmse:0.38117 [333] train-rmse:0.35368 val-rmse:0.38093 [334] train-rmse:0.35340 val-rmse:0.38074 [335] train-rmse:0.35310 val-rmse:0.38052 [336] train-rmse:0.35279 val-rmse:0.38028 [337] train-rmse:0.35248 val-rmse:0.38004 [338] train-rmse:0.35219 val-rmse:0.37980 [339] train-rmse:0.35187 val-rmse:0.37957 [340] train-rmse:0.35160 val-rmse:0.37932 [341] train-rmse:0.35132 val-rmse:0.37913 [342] train-rmse:0.35105 val-rmse:0.37895 [343] train-rmse:0.35076 val-rmse:0.37871 [344] train-rmse:0.35049 val-rmse:0.37853 [345] train-rmse:0.35021 val-rmse:0.37834 [346] train-rmse:0.34992 val-rmse:0.37807 [347] train-rmse:0.34965 val-rmse:0.37787 [348] train-rmse:0.34937 val-rmse:0.37765 [349] train-rmse:0.34909 val-rmse:0.37745 [350] train-rmse:0.34880 val-rmse:0.37721 [351] train-rmse:0.34851 val-rmse:0.37695 [352] train-rmse:0.34823 val-rmse:0.37676 [353] train-rmse:0.34796 val-rmse:0.37652 [354] train-rmse:0.34768 val-rmse:0.37632 [355] train-rmse:0.34743 val-rmse:0.37610 [356] train-rmse:0.34714 val-rmse:0.37586 [357] train-rmse:0.34687 val-rmse:0.37562 [358] train-rmse:0.34659 val-rmse:0.37539 [359] train-rmse:0.34633 val-rmse:0.37519 [360] train-rmse:0.34606 val-rmse:0.37500 [361] train-rmse:0.34578 val-rmse:0.37482 [362] train-rmse:0.34550 val-rmse:0.37459 [363] train-rmse:0.34523 val-rmse:0.37442 [364] train-rmse:0.34494 val-rmse:0.37424 [365] train-rmse:0.34465 val-rmse:0.37402 [366] train-rmse:0.34439 val-rmse:0.37382 [367] train-rmse:0.34414 val-rmse:0.37363 [368] train-rmse:0.34385 val-rmse:0.37341 [369] train-rmse:0.34357 val-rmse:0.37321 [370] train-rmse:0.34331 val-rmse:0.37301 [371] train-rmse:0.34305 val-rmse:0.37277 [372] train-rmse:0.34279 val-rmse:0.37259 [373] train-rmse:0.34251 val-rmse:0.37238 [374] train-rmse:0.34223 val-rmse:0.37218 [375] train-rmse:0.34195 val-rmse:0.37199 [376] train-rmse:0.34168 val-rmse:0.37177 [377] train-rmse:0.34140 val-rmse:0.37157 [378] train-rmse:0.34114 val-rmse:0.37139 [379] train-rmse:0.34085 val-rmse:0.37121 [380] train-rmse:0.34057 val-rmse:0.37101 [381] train-rmse:0.34032 val-rmse:0.37083 [382] train-rmse:0.34003 val-rmse:0.37060 [383] train-rmse:0.33978 val-rmse:0.37042 [384] train-rmse:0.33951 val-rmse:0.37020 [385] train-rmse:0.33923 val-rmse:0.36997 [386] train-rmse:0.33897 val-rmse:0.36981 [387] train-rmse:0.33871 val-rmse:0.36964 [388] train-rmse:0.33844 val-rmse:0.36940 [389] train-rmse:0.33818 val-rmse:0.36918 [390] train-rmse:0.33791 val-rmse:0.36899 [391] train-rmse:0.33766 val-rmse:0.36882 [392] train-rmse:0.33743 val-rmse:0.36862 [393] train-rmse:0.33714 val-rmse:0.36842 [394] train-rmse:0.33688 val-rmse:0.36826 [395] train-rmse:0.33661 val-rmse:0.36807 [396] train-rmse:0.33636 val-rmse:0.36791 [397] train-rmse:0.33612 val-rmse:0.36774 [398] train-rmse:0.33584 val-rmse:0.36754 [399] train-rmse:0.33555 val-rmse:0.36735 [400] train-rmse:0.33529 val-rmse:0.36714 [401] train-rmse:0.33505 val-rmse:0.36696 [402] train-rmse:0.33479 val-rmse:0.36677 [403] train-rmse:0.33453 val-rmse:0.36655 [404] train-rmse:0.33426 val-rmse:0.36635 [405] train-rmse:0.33402 val-rmse:0.36620 [406] train-rmse:0.33376 val-rmse:0.36604 [407] train-rmse:0.33352 val-rmse:0.36585 [408] train-rmse:0.33325 val-rmse:0.36567 [409] train-rmse:0.33299 val-rmse:0.36548 [410] train-rmse:0.33274 val-rmse:0.36530 [411] train-rmse:0.33250 val-rmse:0.36509 [412] train-rmse:0.33225 val-rmse:0.36491 [413] train-rmse:0.33200 val-rmse:0.36472 [414] train-rmse:0.33175 val-rmse:0.36452 [415] train-rmse:0.33150 val-rmse:0.36434 [416] train-rmse:0.33125 val-rmse:0.36415 [417] train-rmse:0.33099 val-rmse:0.36398 [418] train-rmse:0.33076 val-rmse:0.36379 [419] train-rmse:0.33051 val-rmse:0.36359 [420] train-rmse:0.33025 val-rmse:0.36341 [421] train-rmse:0.33001 val-rmse:0.36326 [422] train-rmse:0.32975 val-rmse:0.36310 [423] train-rmse:0.32949 val-rmse:0.36291 [424] train-rmse:0.32923 val-rmse:0.36273 [425] train-rmse:0.32898 val-rmse:0.36251 [426] train-rmse:0.32877 val-rmse:0.36235 [427] train-rmse:0.32852 val-rmse:0.36219 [428] train-rmse:0.32828 val-rmse:0.36204 [429] train-rmse:0.32803 val-rmse:0.36188 [430] train-rmse:0.32781 val-rmse:0.36172 [431] train-rmse:0.32755 val-rmse:0.36153 [432] train-rmse:0.32733 val-rmse:0.36134 [433] train-rmse:0.32709 val-rmse:0.36116 [434] train-rmse:0.32685 val-rmse:0.36100 [435] train-rmse:0.32662 val-rmse:0.36084 [436] train-rmse:0.32638 val-rmse:0.36063 [437] train-rmse:0.32615 val-rmse:0.36044 [438] train-rmse:0.32592 val-rmse:0.36027 [439] train-rmse:0.32570 val-rmse:0.36012 [440] train-rmse:0.32547 val-rmse:0.35994 [441] train-rmse:0.32526 val-rmse:0.35977 [442] train-rmse:0.32503 val-rmse:0.35964 [443] train-rmse:0.32480 val-rmse:0.35949 [444] train-rmse:0.32456 val-rmse:0.35930 [445] train-rmse:0.32433 val-rmse:0.35912 [446] train-rmse:0.32409 val-rmse:0.35896 [447] train-rmse:0.32387 val-rmse:0.35878 [448] train-rmse:0.32364 val-rmse:0.35859 [449] train-rmse:0.32341 val-rmse:0.35844 [450] train-rmse:0.32317 val-rmse:0.35823 [451] train-rmse:0.32291 val-rmse:0.35808 [452] train-rmse:0.32269 val-rmse:0.35790 [453] train-rmse:0.32246 val-rmse:0.35773 [454] train-rmse:0.32222 val-rmse:0.35757 [455] train-rmse:0.32201 val-rmse:0.35742 [456] train-rmse:0.32179 val-rmse:0.35724 [457] train-rmse:0.32158 val-rmse:0.35710 [458] train-rmse:0.32135 val-rmse:0.35692 [459] train-rmse:0.32112 val-rmse:0.35679 [460] train-rmse:0.32089 val-rmse:0.35661 [461] train-rmse:0.32065 val-rmse:0.35644 [462] train-rmse:0.32042 val-rmse:0.35628 [463] train-rmse:0.32019 val-rmse:0.35611 [464] train-rmse:0.31998 val-rmse:0.35596 [465] train-rmse:0.31976 val-rmse:0.35580 [466] train-rmse:0.31954 val-rmse:0.35562 [467] train-rmse:0.31933 val-rmse:0.35546 [468] train-rmse:0.31912 val-rmse:0.35531 [469] train-rmse:0.31889 val-rmse:0.35515 [470] train-rmse:0.31865 val-rmse:0.35499 [471] train-rmse:0.31844 val-rmse:0.35481 [472] train-rmse:0.31819 val-rmse:0.35465 [473] train-rmse:0.31796 val-rmse:0.35453 [474] train-rmse:0.31774 val-rmse:0.35441 [475] train-rmse:0.31751 val-rmse:0.35424 [476] train-rmse:0.31730 val-rmse:0.35410 [477] train-rmse:0.31707 val-rmse:0.35392 [478] train-rmse:0.31685 val-rmse:0.35374 [479] train-rmse:0.31663 val-rmse:0.35358 [480] train-rmse:0.31643 val-rmse:0.35342 [481] train-rmse:0.31622 val-rmse:0.35326 [482] train-rmse:0.31602 val-rmse:0.35311 [483] train-rmse:0.31580 val-rmse:0.35297 [484] train-rmse:0.31560 val-rmse:0.35284 [485] train-rmse:0.31539 val-rmse:0.35268 [486] train-rmse:0.31517 val-rmse:0.35253 [487] train-rmse:0.31496 val-rmse:0.35242 [488] train-rmse:0.31473 val-rmse:0.35228 [489] train-rmse:0.31452 val-rmse:0.35212 [490] train-rmse:0.31432 val-rmse:0.35198 [491] train-rmse:0.31411 val-rmse:0.35183 [492] train-rmse:0.31391 val-rmse:0.35167 [493] train-rmse:0.31369 val-rmse:0.35154 [494] train-rmse:0.31348 val-rmse:0.35135 [495] train-rmse:0.31329 val-rmse:0.35119 [496] train-rmse:0.31309 val-rmse:0.35105 [497] train-rmse:0.31291 val-rmse:0.35090 [498] train-rmse:0.31271 val-rmse:0.35077 [499] train-rmse:0.31250 val-rmse:0.35059 best best_ntree_limit 500 error=0.098750 xgboost success! cost time: 8.833775758743286 (s)......
3.2 使用scikit-learn接口
会改变的函数名是:
eta -> learning_rate
lambda -> reg_lambda
alpha -> reg_alpha
from sklearn.model_selection import train_test_split from sklearn import metrics from xgboost import XGBClassifier clf = XGBClassifier( # silent=0, #设置成1则没有运行信息输出,最好是设置为0.是否在运行升级时打印消息。 #nthread=4,# cpu 线程数 默认最大 learning_rate=0.3, # 如同学习率 min_child_weight=1, # 这个参数默认是 1,是每个叶子里面 h 的和至少是多少,对正负样本不均衡时的 0-1 分类而言 #,假设 h 在 0.01 附近,min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。 #这个参数非常影响结果,控制叶子节点中二阶导的和的最小值,该参数值越小,越容易 overfitting。 max_depth=6, # 构建树的深度,越大越容易过拟合 gamma=0, # 树的叶子节点上作进一步分区所需的最小损失减少,越大越保守,一般0.1、0.2这样子。 subsample=1, # 随机采样训练样本 训练实例的子采样比 max_delta_step=0, #最大增量步长,我们允许每个树的权重估计。 colsample_bytree=1, # 生成树时进行的列采样 reg_lambda=1, # 控制模型复杂度的权重值的L2正则化项参数,参数越大,模型越不容易过拟合。 #reg_alpha=0, # L1 正则项参数 #scale_pos_weight=1, #如果取值大于0的话,在类别样本不平衡的情况下有助于快速收敛。平衡正负权重 #objective= 'multi:softmax', #多分类的问题 指定学习任务和相应的学习目标 #num_class=10, # 类别数,多分类与 multisoftmax 并用 n_estimators=100, #树的个数 seed=1000 #随机种子 #eval_metric= 'auc' ) clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict(X_test) print("Accuracy : %.4g" % metrics.accuracy_score(y_true, y_pred))
Accuracy : 0.9263
4. LIghtGBM的使用
4.1 原生接口
import lightgbm as lgb from sklearn.metrics import mean_squared_error # 加载你的数据 # print('Load data...') # df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t') # df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t') # # y_train = df_train[0].values # y_test = df_test[0].values # X_train = df_train.drop(0, axis=1).values # X_test = df_test.drop(0, axis=1).values # 创建成lgb特征的数据集格式 lgb_train = lgb.Dataset(X_train, y_train) # 将数据保存到LightGBM二进制文件将使加载更快 lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) # 创建验证数据 # 将参数写成字典下形式 params = { 'task': 'train', 'boosting_type': 'gbdt', # 设置提升类型 'objective': 'regression', # 目标函数 'metric': {'l2', 'auc'}, # 评估函数 'num_leaves': 31, # 叶子节点数 'learning_rate': 0.05, # 学习速率 'feature_fraction': 0.9, # 建树的特征选择比例 'bagging_fraction': 0.8, # 建树的样本采样比例 'bagging_freq': 5, # k 意味着每 k 次迭代执行bagging 'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息 } print('Start training...') # 训练 cv and train gbm = lgb.train(params,lgb_train,num_boost_round=500,valid_sets=lgb_eval) # 训练数据需要参数列表和数据集 print('Save model...') gbm.save_model('model.txt') # 训练后保存模型到文件 print('Start predicting...') # 预测数据集 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) #如果在训练期间启用了早期停止,可以通过best_iteration方式从最佳迭代中获得预测 # 评估模型 print('error=%f' %(sum(1 for i in range(len(y_pred)) if int(y_pred[i] > 0.5) != y_test[i]) / float(len(y_pred))))
Start training... [LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000427 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 2550 [LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 10 [LightGBM] [Info] Start training from score 0.496354 Save model... Start predicting... error=0.096250
4.2 scikit-learn接口
from sklearn import metrics from lightgbm import LGBMClassifier clf = LGBMClassifier( boosting_type='gbdt', # 提升树的类型 gbdt,dart,goss,rf num_leaves=31, #树的最大叶子数,对比xgboost一般为2^(max_depth) max_depth=-1, #最大树的深度 learning_rate=0.1, #学习率 n_estimators=100, # 拟合的树的棵树,相当于训练轮数 subsample_for_bin=200000, objective=None, class_weight=None, min_split_gain=0.0, # 最小分割增益 min_child_weight=0.001, # 分支结点的最小权重 min_child_samples=20, subsample=1.0, # 训练样本采样率 行 subsample_freq=0, # 子样本频率 colsample_bytree=1.0, # 训练特征采样率 列 reg_alpha=0.0, # L1正则化系数 reg_lambda=0.0, # L2正则化系数 random_state=None, n_jobs=-1, silent=True, ) clf.fit(X_train, y_train, eval_metric='auc') #设置验证集合 verbose=False不打印过程 clf.fit(X_train, y_train) y_true, y_pred = y_test, clf.predict(X_test) print("Accuracy : %.4g" % metrics.accuracy_score(y_true, y_pred))
[LightGBM] [Warning] Unknown parameter: silent [LightGBM] [Warning] Unknown parameter: silent [LightGBM] [Info] Number of positive: 4765, number of negative: 4835 [LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000432 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 2550 [LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 10 [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584 [LightGBM] [Info] Start training from score -0.014584 [LightGBM] [Warning] Unknown parameter: silent [LightGBM] [Warning] Unknown parameter: silent [LightGBM] [Info] Number of positive: 4765, number of negative: 4835 [LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000274 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 2550 [LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 10 [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584 [LightGBM] [Info] Start training from score -0.014584 [LightGBM] [Warning] Unknown parameter: silent Accuracy : 0.9233
实验:在鸢尾花数据集上,分别比较决策树、随机森林、adaboost, gbdt、xgboost和lightgbm的性能差异并进行分析
from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import GradientBoostingClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.model_selection import cross_val_score from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import time iris=load_iris() X=iris["data"] y=iris["target"] X_train,X_test,y_train,y_test=train_test_split(data,target,test_size=0.2,random_state=0) clf1 = DecisionTreeClassifier(criterion="entropy") #决策树 clf2 = RandomForestClassifier() #随机森林 clf3 = AdaBoostClassifier() #adaboost clf4 = GradientBoostingClassifier() #gbdt clf5 = XGBClassifier() #xgboost clf6 = LGBMClassifier() #lightgbm for clf, label in zip([clf1, clf2, clf3, clf4, clf5, clf6], ['DecisionTree', 'Random Forest', 'AdaBoost', 'GBDT', 'XGBoost','LightGBM']): start = time.time() scores = cross_val_score(clf, X_train, y_train, scoring='accuracy', cv=5) end = time.time() running_time = end - start print(label+"的Accuracy: %0.8f (+/- %0.2f),耗时%0.2f秒。模型名称[%s]" % (scores.mean(), scores.std(), running_time, label))
DecisionTree的Accuracy: 0.80208333 (+/- 0.01),耗时0.80秒。模型名称[DecisionTree]
Random Forest的Accuracy: 0.88375000 (+/- 0.01),耗时10.57秒。模型名称[Random Forest]
AdaBoost的Accuracy: 0.87906250 (+/- 0.00),耗时2.26秒。模型名称[AdaBoost]
GBDT的Accuracy: 0.91541667 (+/- 0.01),耗时8.60秒。模型名称[GBDT]
XGBoost的Accuracy: 0.92989583 (+/- 0.01),耗时3.38秒。模型名称[XGBoost]
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000291 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000310 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000324 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000318 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
[LightGBM] [Info] Number of positive: 3812, number of negative: 3868
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000397 seconds.
You can set force_col_wise=true to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 7680, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496354 -> initscore=-0.014584
[LightGBM] [Info] Start training from score -0.014584
LightGBM的Accuracy: 0.93510417 (+/- 0.00),耗时0.61秒。模型名称[LightGBM]