def run_lgb(df_train, df_test, ycol, n_splits=5, seed=2022): use_feats = [col for col in df_test.columns if col not in ['time','Label1','Label2','label']] model = lgb.LGBMRegressor(num_leaves=32,objective='mape', max_depth=16, learning_rate=0.1, n_estimators=10000, subsample=0.8, feature_fraction=0.6, reg_alpha=0.5, reg_lambda=0.25, random_state=seed, metric=None) oof = [] prediction = df_test[['time']] prediction[ycol] = 0 df_importance_list = [] from tscv import GapKFold cv = GapKFold(n_splits=n_splits, gap_before=0, gap_after=0) for fold_id, (trn_idx, val_idx) in enumerate(cv.split(df_train[use_feats])): X_train = df_train.iloc[trn_idx][use_feats] Y_train = df_train.iloc[trn_idx][ycol] X_val = df_train.iloc[val_idx][use_feats] Y_val = df_train.iloc[val_idx][ycol] lgb_model = model.fit(X_train, Y_train, eval_names=['train', 'valid'], eval_set=[(X_train, Y_train), (X_val, Y_val)], verbose=100, eval_metric='rmse', early_stopping_rounds=100) pred_val = lgb_model.predict(X_val, num_iteration=lgb_model.best_iteration_) df_oof = df_train.iloc[val_idx][['time', ycol]].copy() df_oof['pred'] = pred_val oof.append(df_oof) pred_test = lgb_model.predict(df_test[use_feats], num_iteration=lgb_model.best_iteration_) prediction[ycol] += pred_test / n_splits df_importance = pd.DataFrame({ 'column': use_feats, 'importance': lgb_model.feature_importances_, }) df_importance_list.append(df_importance) del lgb_model, pred_val, pred_test, X_train, Y_train, X_val, Y_val gc.collect() df_importance = pd.concat(df_importance_list) df_importance = df_importance.groupby(['column'])['importance'].agg( 'mean').sort_values(ascending=False).reset_index() display(df_importance.head(50)) df_oof = pd.concat(oof).reset_index(drop=True) df_oof[ycol] = np.expm1(df_oof[ycol]) df_oof['pred'] = np.expm1(df_oof['pred']) prediction[ycol] = np.expm1(prediction[ycol]) return df_oof, prediction
df_oof_B, pred_B = run_lgb(train_B, test_B, ycol='Label1',n_splits=10) df_oof_N, pred_N = run_lgb(train_N, test_N, ycol='Label2',n_splits=10)
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0682995 train's mape: 0.00477136 valid's rmse: 0.381781 valid's mape: 0.02531 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0997627 train's mape: 0.00536544 valid's rmse: 0.111005 valid's mape: 0.00902414 [200] train's rmse: 0.0936389 train's mape: 0.0046146 valid's rmse: 0.108918 valid's mape: 0.00883392 [300] train's rmse: 0.0911426 train's mape: 0.00426411 valid's rmse: 0.108513 valid's mape: 0.00881604 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.100048 train's mape: 0.00532624 valid's rmse: 0.11552 valid's mape: 0.00944425 [200] train's rmse: 0.0931428 train's mape: 0.00457714 valid's rmse: 0.113447 valid's mape: 0.00923072 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0991928 train's mape: 0.00531579 valid's rmse: 0.1323 valid's mape: 0.0108636 [200] train's rmse: 0.0921373 train's mape: 0.00455098 valid's rmse: 0.130984 valid's mape: 0.010802 [300] train's rmse: 0.0895399 train's mape: 0.00418137 valid's rmse: 0.1299 valid's mape: 0.0107338 [400] train's rmse: 0.0878624 train's mape: 0.00392812 valid's rmse: 0.129883 valid's mape: 0.0107537 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.101522 train's mape: 0.00544425 valid's rmse: 0.110895 valid's mape: 0.0095798 [200] train's rmse: 0.0945011 train's mape: 0.00468858 valid's rmse: 0.110495 valid's mape: 0.0095569 [300] train's rmse: 0.0914781 train's mape: 0.00430155 valid's rmse: 0.110368 valid's mape: 0.00955819 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.10166 train's mape: 0.00548662 valid's rmse: 0.1026 valid's mape: 0.0084273 [200] train's rmse: 0.0946172 train's mape: 0.00472591 valid's rmse: 0.100453 valid's mape: 0.00825736 [300] train's rmse: 0.0910066 train's mape: 0.00431003 valid's rmse: 0.0998826 valid's mape: 0.00820407 [400] train's rmse: 0.0891095 train's mape: 0.00405137 valid's rmse: 0.0995016 valid's mape: 0.00817543 [500] train's rmse: 0.0879999 train's mape: 0.00387715 valid's rmse: 0.0992015 valid's mape: 0.00814727 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0985657 train's mape: 0.00537416 valid's rmse: 0.110957 valid's mape: 0.00810068 [200] train's rmse: 0.091681 train's mape: 0.0046274 valid's rmse: 0.109885 valid's mape: 0.00807423 [300] train's rmse: 0.0887717 train's mape: 0.00424917 valid's rmse: 0.109228 valid's mape: 0.00801878 [400] train's rmse: 0.0872038 train's mape: 0.00400984 valid's rmse: 0.108977 valid's mape: 0.00800319 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0992089 train's mape: 0.00540927 valid's rmse: 0.108537 valid's mape: 0.00926928 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0996883 train's mape: 0.00527525 valid's rmse: 0.108295 valid's mape: 0.00929339 [300] train's rmse: 0.0896013 train's mape: 0.00408298 valid's rmse: 0.107469 valid's mape: 0.00922724 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0990105 train's mape: 0.00518857 valid's rmse: 0.130121 valid's mape: 0.0108163 .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
column | importance | |
0 | A_HYC_NH4 | 181.3 |
1 | B_HYC_DO_roll_16_mean_diff | 151.0 |
2 | B_HYC_DO_roll_8_mean_diff | 126.2 |
3 | B_HYC_DO_roll_16_mean | 123.1 |
4 | day | 121.9 |
5 | B_HYC_DO | 121.3 |
6 | A_HYC_DO | 117.1 |
7 | B_HYC_DO_roll_8_mean | 111.6 |
8 | B_HYC_JS_DO_roll_16_mean | 109.5 |
9 | B_QY_ORP_roll_16_mean | 106.0 |
10 | hour | 98.9 |
11 | B_QY_ORP_roll_8_mean | 94.5 |
12 | B_HYC_MLSS_roll_16_mean | 91.6 |
13 | B_HYC_DO_roll_8_std | 85.4 |
14 | B_HYC_JS_DO_roll_8_mean | 82.9 |
15 | A_QY_ORP | 80.3 |
16 | CS_COD_roll_16_mean | 77.9 |
17 | B_QY_ORP_roll_16_mean_diff | 75.5 |
18 | MCCS_NO3_roll_16_mean | 71.7 |
19 | A_HYC_XD | 70.0 |
20 | JS_NH3_roll_16_mean | 68.1 |
21 | B_HYC_DO_roll_4_mean_diff | 67.9 |
22 | JS_CS_SW_ratio | 67.8 |
23 | B_HYC_DO_roll_1_mean_diff | 67.0 |
24 | B_HYC_MLSS_roll_8_mean | 66.8 |
25 | JS_COD_roll_16_mean | 66.8 |
26 | JS_COD | 65.4 |
27 | JS_CS_COD_ratio | 64.8 |
28 | MCCS_NH4_NH3_ratio | 64.2 |
29 | B_HYC_XD_roll_16_mean | 63.1 |
30 | B_HYC_MLSS_roll_8_std | 57.3 |
31 | JS_COD_roll_8_mean | 57.1 |
32 | JS_NH3_roll_8_mean | 57.0 |
33 | CS_SW_roll_16_mean_diff | 56.8 |
34 | JS_NH3 | 56.4 |
35 | MCCS_NO3_roll_8_mean | 55.7 |
36 | B_QY_ORP | 55.6 |
37 | MCCS_NO3 | 55.3 |
38 | CS_TN_roll_16_mean | 54.7 |
39 | JS_CS_TN_ratio | 54.3 |
40 | JS_TN_roll_16_mean | 53.1 |
41 | JS_SW_roll_16_mean | 53.0 |
42 | JS_SW | 52.8 |
43 | CS_COD | 52.8 |
44 | MCCS_NH4_roll_16_mean | 52.6 |
45 | B_HYC_XD_roll_8_mean | 52.0 |
46 | CS_COD_roll_8_mean | 51.9 |
47 | B_HYC_XD_roll_8_std | 51.9 |
48 | JS_TN_roll_8_mean | 51.7 |
49 | CS_LL_roll_16_mean | 49.1 |
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0649399 train's mape: 0.00447779 valid's rmse: 0.326266 valid's mape: 0.0202435 [200] train's rmse: 0.0589523 train's mape: 0.0038157 valid's rmse: 0.324164 valid's mape: 0.0199591 [300] train's rmse: 0.056316 train's mape: 0.00350081 valid's rmse: 0.323525 valid's mape: 0.0198733 [400] train's rmse: 0.0546665 train's mape: 0.0032988 valid's rmse: 0.323402 valid's mape: 0.0198767 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0966226 train's mape: 0.0050093 valid's rmse: 0.0947593 valid's mape: 0.00804917 [200] train's rmse: 0.0908854 train's mape: 0.00427998 valid's rmse: 0.0952295 valid's mape: 0.00802535 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [200] train's rmse: 0.0909109 train's mape: 0.00428306 valid's rmse: 0.104525 valid's mape: 0.00856856 [300] train's rmse: 0.0884061 train's mape: 0.00394001 valid's rmse: 0.103213 valid's mape: 0.00845724 [400] train's rmse: 0.0871361 train's mape: 0.00372665 valid's rmse: 0.102839 valid's mape: 0.00843402 [500] train's rmse: 0.0861158 train's mape: 0.00355518 valid's rmse: 0.102888 valid's mape: 0.00843368 [600] train's rmse: 0.0853805 train's mape: 0.00342962 valid's rmse: 0.102818 valid's mape: 0.00842934 [700] train's rmse: 0.084807 train's mape: 0.00332854 valid's rmse: 0.102704 valid's mape: 0.00842081 [800] train's rmse: 0.0843748 train's mape: 0.00324818 valid's rmse: 0.102675 valid's mape: 0.00842031 [900] train's rmse: 0.0840164 train's mape: 0.00318269 valid's rmse: 0.102522 valid's mape: 0.00841078 [1000] train's rmse: 0.0836993 train's mape: 0.00312375 valid's rmse: 0.102441 valid's mape: 0.00840459 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.097044 train's mape: 0.00500287 valid's rmse: 0.127756 valid's mape: 0.0109849 [200] train's rmse: 0.0912583 train's mape: 0.00430848 valid's rmse: 0.12718 valid's mape: 0.0109229 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0966238 train's mape: 0.00504979 valid's rmse: 0.105028 valid's mape: 0.00899874 [200] train's rmse: 0.0911481 train's mape: 0.00435584 valid's rmse: 0.103159 valid's mape: 0.00879998 [300] train's rmse: 0.0887552 train's mape: 0.00399865 valid's rmse: 0.102576 valid's mape: 0.00873083 [400] train's rmse: 0.0873985 train's mape: 0.00377394 valid's rmse: 0.102245 valid's mape: 0.00868814 [500] train's rmse: 0.0864179 train's mape: 0.00361417 valid's rmse: 0.101963 valid's mape: 0.00866178 [600] train's rmse: 0.0857301 train's mape: 0.00349917 valid's rmse: 0.101646 valid's mape: 0.00863081 [700] train's rmse: 0.0851571 train's mape: 0.00340448 valid's rmse: 0.101611 valid's mape: 0.00862662 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0963757 train's mape: 0.00499246 valid's rmse: 0.109182 valid's mape: 0.00923029 [200] train's rmse: 0.0907536 train's mape: 0.00429021 valid's rmse: 0.109664 valid's mape: 0.00930657 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0937155 train's mape: 0.00496063 valid's rmse: 0.122471 valid's mape: 0.00966484 [200] train's rmse: 0.0883576 train's mape: 0.00428758 valid's rmse: 0.120803 valid's mape: 0.00949359 [300] train's rmse: 0.086389 train's mape: 0.00396836 valid's rmse: 0.120372 valid's mape: 0.00944192 [400] train's rmse: 0.0849846 train's mape: 0.00375229 valid's rmse: 0.120196 valid's mape: 0.00942663 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0963075 train's mape: 0.00505446 valid's rmse: 0.192358 valid's mape: 0.0152874 [200] train's rmse: 0.0905988 train's mape: 0.00431873 valid's rmse: 0.19259 valid's mape: 0.0153366 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0957068 train's mape: 0.0048757 valid's rmse: 0.0905758 valid's mape: 0.00785261 [200] train's rmse: 0.0901619 train's mape: 0.0041519 valid's rmse: 0.0896947 valid's mape: 0.00777332 [300] train's rmse: 0.0875689 train's mape: 0.00378635 valid's rmse: 0.0894099 valid's mape: 0.00774488 [400] train's rmse: 0.0858833 train's mape: 0.00353321 valid's rmse: 0.0890786 valid's mape: 0.00771644 [500] train's rmse: 0.0850022 train's mape: 0.00337261 valid's rmse: 0.088931 valid's mape: 0.00770126 [600] train's rmse: 0.0844369 train's mape: 0.00325934 valid's rmse: 0.0888908 valid's mape: 0.00769568 [LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6 [100] train's rmse: 0.0947394 train's mape: 0.00472927 valid's rmse: 0.100599 valid's mape: 0.00840882 [200] train's rmse: 0.0894733 train's mape: 0.00401217 valid's rmse: 0.0994893 valid's mape: 0.00832889 [300] train's rmse: 0.086662 train's mape: 0.00364783 valid's rmse: 0.0990981 valid's mape: 0.00829234 [400] train's rmse: 0.0852868 train's mape: 0.00342472 valid's rmse: 0.0990155 valid's mape: 0.00828222 [500] train's rmse: 0.0842718 train's mape: 0.0032557 valid's rmse: 0.0988224 valid's mape: 0.00826666 [600] train's rmse: 0.0836751 train's mape: 0.00314394 valid's rmse: 0.0987258 valid's mape: 0.00825825 [700] train's rmse: 0.083101 train's mape: 0.00304433 valid's rmse: 0.0987078 valid's mape: 0.00825759 .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
column | importance | |
0 | N_HYC_DO_roll_16_mean_diff | 206.6 |
1 | N_HYC_NH4_roll_8_mean | 200.4 |
2 | A_HYC_DO | 185.7 |
3 | N_HYC_NH4_roll_16_mean | 184.6 |
4 | N_HYC_DO_roll_8_mean_diff | 183.0 |
5 | N_HYC_DO | 176.2 |
6 | day | 170.5 |
7 | N_HYC_DO_roll_8_mean | 160.6 |
8 | N_HYC_DO_roll_16_mean | 159.7 |
9 | N_QY_ORP_roll_16_mean | 149.1 |
10 | JS_CS_SW_ratio | 130.0 |
11 | N_HYC_DO_roll_8_std | 125.3 |
12 | hour | 123.9 |
13 | N_HYC_DO_roll_1_mean_diff | 118.8 |
14 | N_HYC_DO_roll_4_mean_diff | 117.5 |
15 | N_QY_ORP | 116.8 |
16 | N_QY_ORP_roll_8_mean | 114.1 |
17 | N_HYC_XD_roll_16_mean | 110.7 |
18 | N_QY_ORP_roll_16_mean_diff | 109.8 |
19 | N_HYC_XD_roll_8_mean | 109.1 |
20 | JS_CS_COD_ratio | 107.3 |
21 | N_HYC_MLSS_roll_16_mean | 106.6 |
22 | A_QY_ORP | 102.1 |
23 | MCCS_NO3_roll_16_mean | 101.4 |
24 | MCCS_NH4_roll_16_mean_diff | 100.9 |
25 | CS_SW_roll_16_mean_diff | 100.7 |
26 | CS_LL_roll_16_mean | 100.7 |
27 | JS_CS_TN_ratio | 100.5 |
28 | CS_COD_roll_16_mean | 97.3 |
29 | JS_COD_roll_16_mean | 96.4 |
30 | N_HYC_NH4_roll_8_std | 95.6 |
31 | N_HYC_MLSS_roll_8_mean | 93.8 |
32 | CS_TN_roll_16_mean | 93.6 |
33 | CS_TN | 93.5 |
34 | JS_LL_roll_16_mean | 91.6 |
35 | JS_SW_roll_16_mean_diff | 91.3 |
36 | MCCS_NH4_roll_8_mean_diff | 91.3 |
37 | JS_NH3_roll_16_mean_diff | 90.3 |
38 | MCCS_NH4_NH3_ratio | 90.2 |
39 | N_HYC_JS_DO_roll_16_mean | 89.8 |
40 | MCCS_NO3_roll_8_mean | 89.8 |
41 | MCCS_NH4 | 89.5 |
42 | N_CS_MQ_SSLL_roll_16_mean | 88.3 |
43 | MCCS_NO3 | 87.1 |
44 | JS_NH3_roll_16_mean | 86.6 |
45 | JS_TN_roll_16_mean | 85.6 |
46 | CS_SW | 85.4 |
47 | MCCS_NH4_roll_16_mean | 84.1 |
48 | N_HYC_MLSS_roll_8_std | 83.8 |
49 | CS_COD_roll_8_mean | 83.2 |
四、预测提交
def calc_score(df1, df2): rmse_1 = np.sqrt(mean_squared_error(df1['pred'], (df1['Label1']))) rmse_2 = np.sqrt(mean_squared_error(df2['pred'], (df2['Label2']))) loss = (rmse_1+rmse_2)/2 print(rmse_1,rmse_2) score = (1 / (1 + loss)) * 1000 return score calc_score(df_oof_B, df_oof_N)
3091.5013527627148 2248.255071349608 0.37440868531034793
# 提交 sub = pd.read_csv('data/data169443/sample_submission.csv') sub['Label1'] = pred_B['Label1'].values sub['Label2'] = pred_N['Label2'].values sub .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
time | Label1 | Label2 | |
0 | 2022/7/18 2:40 | 10277.715094 | 9309.105213 |
1 | 2022/7/18 2:42 | 10297.708783 | 9423.129296 |
2 | 2022/7/18 2:44 | 10305.087200 | 9483.911192 |
3 | 2022/7/18 2:46 | 10392.180776 | 9332.600185 |
4 | 2022/7/18 2:48 | 10324.405182 | 9341.154754 |
... | ... | ... | ... |
9995 | 2022/7/31 23:50 | 13868.010120 | 14701.357535 |
9996 | 2022/7/31 23:52 | 13993.966089 | 14665.620693 |
9997 | 2022/7/31 23:54 | 14279.151838 | 14728.293917 |
9998 | 2022/7/31 23:56 | 14398.115205 | 14508.477282 |
9999 | 2022/7/31 23:58 | 14604.189585 | 14580.044369 |
10000 rows × 3 columns
sub.to_csv('result.csv', index=False)
下载提交即可。