【时序预测】之水质净化厂工艺控制-曝气量预测(下)

简介: 【时序预测】之水质净化厂工艺控制-曝气量预测(下)
def run_lgb(df_train, df_test, ycol, n_splits=5, seed=2022):
    use_feats = [col for col in df_test.columns if col not in ['time','Label1','Label2','label']]
    model = lgb.LGBMRegressor(num_leaves=32,objective='mape',
                              max_depth=16,
                              learning_rate=0.1,
                              n_estimators=10000,
                              subsample=0.8,
                              feature_fraction=0.6,
                              reg_alpha=0.5,
                              reg_lambda=0.25,
                              random_state=seed,
                              metric=None)
    oof = []
    prediction = df_test[['time']]
    prediction[ycol] = 0
    df_importance_list = []
    from tscv import GapKFold
    cv = GapKFold(n_splits=n_splits, gap_before=0, gap_after=0)
    for fold_id, (trn_idx, val_idx) in enumerate(cv.split(df_train[use_feats])):
        X_train = df_train.iloc[trn_idx][use_feats]
        Y_train = df_train.iloc[trn_idx][ycol]        
        X_val = df_train.iloc[val_idx][use_feats]
        Y_val = df_train.iloc[val_idx][ycol]
        lgb_model = model.fit(X_train,
                              Y_train,
                              eval_names=['train', 'valid'],
                              eval_set=[(X_train, Y_train), (X_val, Y_val)],
                              verbose=100,
                              eval_metric='rmse',
                              early_stopping_rounds=100)
        pred_val = lgb_model.predict(X_val, num_iteration=lgb_model.best_iteration_)
        df_oof = df_train.iloc[val_idx][['time', ycol]].copy()
        df_oof['pred'] = pred_val
        oof.append(df_oof)
        pred_test = lgb_model.predict(df_test[use_feats], num_iteration=lgb_model.best_iteration_)
        prediction[ycol] += pred_test / n_splits
        df_importance = pd.DataFrame({
            'column': use_feats,
            'importance': lgb_model.feature_importances_,
        })
        df_importance_list.append(df_importance)
        del lgb_model, pred_val, pred_test, X_train, Y_train, X_val, Y_val
        gc.collect()
    df_importance = pd.concat(df_importance_list)
    df_importance = df_importance.groupby(['column'])['importance'].agg(
        'mean').sort_values(ascending=False).reset_index()
    display(df_importance.head(50))
    df_oof = pd.concat(oof).reset_index(drop=True)
    df_oof[ycol] = np.expm1(df_oof[ycol])
    df_oof['pred'] = np.expm1(df_oof['pred'])
    prediction[ycol] = np.expm1(prediction[ycol])
    return df_oof, prediction
df_oof_B, pred_B = run_lgb(train_B, test_B, ycol='Label1',n_splits=10)
df_oof_N, pred_N = run_lgb(train_N, test_N, ycol='Label2',n_splits=10)
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0682995 train's mape: 0.00477136  valid's rmse: 0.381781  valid's mape: 0.02531
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0997627 train's mape: 0.00536544  valid's rmse: 0.111005  valid's mape: 0.00902414
[200] train's rmse: 0.0936389 train's mape: 0.0046146 valid's rmse: 0.108918  valid's mape: 0.00883392
[300] train's rmse: 0.0911426 train's mape: 0.00426411  valid's rmse: 0.108513  valid's mape: 0.00881604
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.100048  train's mape: 0.00532624  valid's rmse: 0.11552 valid's mape: 0.00944425
[200] train's rmse: 0.0931428 train's mape: 0.00457714  valid's rmse: 0.113447  valid's mape: 0.00923072
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0991928 train's mape: 0.00531579  valid's rmse: 0.1323  valid's mape: 0.0108636
[200] train's rmse: 0.0921373 train's mape: 0.00455098  valid's rmse: 0.130984  valid's mape: 0.010802
[300] train's rmse: 0.0895399 train's mape: 0.00418137  valid's rmse: 0.1299  valid's mape: 0.0107338
[400] train's rmse: 0.0878624 train's mape: 0.00392812  valid's rmse: 0.129883  valid's mape: 0.0107537
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.101522  train's mape: 0.00544425  valid's rmse: 0.110895  valid's mape: 0.0095798
[200] train's rmse: 0.0945011 train's mape: 0.00468858  valid's rmse: 0.110495  valid's mape: 0.0095569
[300] train's rmse: 0.0914781 train's mape: 0.00430155  valid's rmse: 0.110368  valid's mape: 0.00955819
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.10166 train's mape: 0.00548662  valid's rmse: 0.1026  valid's mape: 0.0084273
[200] train's rmse: 0.0946172 train's mape: 0.00472591  valid's rmse: 0.100453  valid's mape: 0.00825736
[300] train's rmse: 0.0910066 train's mape: 0.00431003  valid's rmse: 0.0998826 valid's mape: 0.00820407
[400] train's rmse: 0.0891095 train's mape: 0.00405137  valid's rmse: 0.0995016 valid's mape: 0.00817543
[500] train's rmse: 0.0879999 train's mape: 0.00387715  valid's rmse: 0.0992015 valid's mape: 0.00814727
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0985657 train's mape: 0.00537416  valid's rmse: 0.110957  valid's mape: 0.00810068
[200] train's rmse: 0.091681  train's mape: 0.0046274 valid's rmse: 0.109885  valid's mape: 0.00807423
[300] train's rmse: 0.0887717 train's mape: 0.00424917  valid's rmse: 0.109228  valid's mape: 0.00801878
[400] train's rmse: 0.0872038 train's mape: 0.00400984  valid's rmse: 0.108977  valid's mape: 0.00800319
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0992089 train's mape: 0.00540927  valid's rmse: 0.108537  valid's mape: 0.00926928
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0996883 train's mape: 0.00527525  valid's rmse: 0.108295  valid's mape: 0.00929339
[300] train's rmse: 0.0896013 train's mape: 0.00408298  valid's rmse: 0.107469  valid's mape: 0.00922724
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0990105 train's mape: 0.00518857  valid's rmse: 0.130121  valid's mape: 0.0108163
.dataframe tbody tr th:only-of-type {         vertical-align: middle;     } .dataframe tbody tr th {     vertical-align: top; } .dataframe thead th {     text-align: right; }

column importance
0 A_HYC_NH4 181.3
1 B_HYC_DO_roll_16_mean_diff 151.0
2 B_HYC_DO_roll_8_mean_diff 126.2
3 B_HYC_DO_roll_16_mean 123.1
4 day 121.9
5 B_HYC_DO 121.3
6 A_HYC_DO 117.1
7 B_HYC_DO_roll_8_mean 111.6
8 B_HYC_JS_DO_roll_16_mean 109.5
9 B_QY_ORP_roll_16_mean 106.0
10 hour 98.9
11 B_QY_ORP_roll_8_mean 94.5
12 B_HYC_MLSS_roll_16_mean 91.6
13 B_HYC_DO_roll_8_std 85.4
14 B_HYC_JS_DO_roll_8_mean 82.9
15 A_QY_ORP 80.3
16 CS_COD_roll_16_mean 77.9
17 B_QY_ORP_roll_16_mean_diff 75.5
18 MCCS_NO3_roll_16_mean 71.7
19 A_HYC_XD 70.0
20 JS_NH3_roll_16_mean 68.1
21 B_HYC_DO_roll_4_mean_diff 67.9
22 JS_CS_SW_ratio 67.8
23 B_HYC_DO_roll_1_mean_diff 67.0
24 B_HYC_MLSS_roll_8_mean 66.8
25 JS_COD_roll_16_mean 66.8
26 JS_COD 65.4
27 JS_CS_COD_ratio 64.8
28 MCCS_NH4_NH3_ratio 64.2
29 B_HYC_XD_roll_16_mean 63.1
30 B_HYC_MLSS_roll_8_std 57.3
31 JS_COD_roll_8_mean 57.1
32 JS_NH3_roll_8_mean 57.0
33 CS_SW_roll_16_mean_diff 56.8
34 JS_NH3 56.4
35 MCCS_NO3_roll_8_mean 55.7
36 B_QY_ORP 55.6
37 MCCS_NO3 55.3
38 CS_TN_roll_16_mean 54.7
39 JS_CS_TN_ratio 54.3
40 JS_TN_roll_16_mean 53.1
41 JS_SW_roll_16_mean 53.0
42 JS_SW 52.8
43 CS_COD 52.8
44 MCCS_NH4_roll_16_mean 52.6
45 B_HYC_XD_roll_8_mean 52.0
46 CS_COD_roll_8_mean 51.9
47 B_HYC_XD_roll_8_std 51.9
48 JS_TN_roll_8_mean 51.7
49 CS_LL_roll_16_mean 49.1
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0649399 train's mape: 0.00447779  valid's rmse: 0.326266  valid's mape: 0.0202435
[200] train's rmse: 0.0589523 train's mape: 0.0038157 valid's rmse: 0.324164  valid's mape: 0.0199591
[300] train's rmse: 0.056316  train's mape: 0.00350081  valid's rmse: 0.323525  valid's mape: 0.0198733
[400] train's rmse: 0.0546665 train's mape: 0.0032988 valid's rmse: 0.323402  valid's mape: 0.0198767
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0966226 train's mape: 0.0050093 valid's rmse: 0.0947593 valid's mape: 0.00804917
[200] train's rmse: 0.0908854 train's mape: 0.00427998  valid's rmse: 0.0952295 valid's mape: 0.00802535
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[200] train's rmse: 0.0909109 train's mape: 0.00428306  valid's rmse: 0.104525  valid's mape: 0.00856856
[300] train's rmse: 0.0884061 train's mape: 0.00394001  valid's rmse: 0.103213  valid's mape: 0.00845724
[400] train's rmse: 0.0871361 train's mape: 0.00372665  valid's rmse: 0.102839  valid's mape: 0.00843402
[500] train's rmse: 0.0861158 train's mape: 0.00355518  valid's rmse: 0.102888  valid's mape: 0.00843368
[600] train's rmse: 0.0853805 train's mape: 0.00342962  valid's rmse: 0.102818  valid's mape: 0.00842934
[700] train's rmse: 0.084807  train's mape: 0.00332854  valid's rmse: 0.102704  valid's mape: 0.00842081
[800] train's rmse: 0.0843748 train's mape: 0.00324818  valid's rmse: 0.102675  valid's mape: 0.00842031
[900] train's rmse: 0.0840164 train's mape: 0.00318269  valid's rmse: 0.102522  valid's mape: 0.00841078
[1000]  train's rmse: 0.0836993 train's mape: 0.00312375  valid's rmse: 0.102441  valid's mape: 0.00840459
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.097044  train's mape: 0.00500287  valid's rmse: 0.127756  valid's mape: 0.0109849
[200] train's rmse: 0.0912583 train's mape: 0.00430848  valid's rmse: 0.12718 valid's mape: 0.0109229
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0966238 train's mape: 0.00504979  valid's rmse: 0.105028  valid's mape: 0.00899874
[200] train's rmse: 0.0911481 train's mape: 0.00435584  valid's rmse: 0.103159  valid's mape: 0.00879998
[300] train's rmse: 0.0887552 train's mape: 0.00399865  valid's rmse: 0.102576  valid's mape: 0.00873083
[400] train's rmse: 0.0873985 train's mape: 0.00377394  valid's rmse: 0.102245  valid's mape: 0.00868814
[500] train's rmse: 0.0864179 train's mape: 0.00361417  valid's rmse: 0.101963  valid's mape: 0.00866178
[600] train's rmse: 0.0857301 train's mape: 0.00349917  valid's rmse: 0.101646  valid's mape: 0.00863081
[700] train's rmse: 0.0851571 train's mape: 0.00340448  valid's rmse: 0.101611  valid's mape: 0.00862662
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0963757 train's mape: 0.00499246  valid's rmse: 0.109182  valid's mape: 0.00923029
[200] train's rmse: 0.0907536 train's mape: 0.00429021  valid's rmse: 0.109664  valid's mape: 0.00930657
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0937155 train's mape: 0.00496063  valid's rmse: 0.122471  valid's mape: 0.00966484
[200] train's rmse: 0.0883576 train's mape: 0.00428758  valid's rmse: 0.120803  valid's mape: 0.00949359
[300] train's rmse: 0.086389  train's mape: 0.00396836  valid's rmse: 0.120372  valid's mape: 0.00944192
[400] train's rmse: 0.0849846 train's mape: 0.00375229  valid's rmse: 0.120196  valid's mape: 0.00942663
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0963075 train's mape: 0.00505446  valid's rmse: 0.192358  valid's mape: 0.0152874
[200] train's rmse: 0.0905988 train's mape: 0.00431873  valid's rmse: 0.19259 valid's mape: 0.0153366
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0957068 train's mape: 0.0048757 valid's rmse: 0.0905758 valid's mape: 0.00785261
[200] train's rmse: 0.0901619 train's mape: 0.0041519 valid's rmse: 0.0896947 valid's mape: 0.00777332
[300] train's rmse: 0.0875689 train's mape: 0.00378635  valid's rmse: 0.0894099 valid's mape: 0.00774488
[400] train's rmse: 0.0858833 train's mape: 0.00353321  valid's rmse: 0.0890786 valid's mape: 0.00771644
[500] train's rmse: 0.0850022 train's mape: 0.00337261  valid's rmse: 0.088931  valid's mape: 0.00770126
[600] train's rmse: 0.0844369 train's mape: 0.00325934  valid's rmse: 0.0888908 valid's mape: 0.00769568
[LightGBM] [Warning] feature_fraction is set=0.6, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.6
[100] train's rmse: 0.0947394 train's mape: 0.00472927  valid's rmse: 0.100599  valid's mape: 0.00840882
[200] train's rmse: 0.0894733 train's mape: 0.00401217  valid's rmse: 0.0994893 valid's mape: 0.00832889
[300] train's rmse: 0.086662  train's mape: 0.00364783  valid's rmse: 0.0990981 valid's mape: 0.00829234
[400] train's rmse: 0.0852868 train's mape: 0.00342472  valid's rmse: 0.0990155 valid's mape: 0.00828222
[500] train's rmse: 0.0842718 train's mape: 0.0032557 valid's rmse: 0.0988224 valid's mape: 0.00826666
[600] train's rmse: 0.0836751 train's mape: 0.00314394  valid's rmse: 0.0987258 valid's mape: 0.00825825
[700] train's rmse: 0.083101  train's mape: 0.00304433  valid's rmse: 0.0987078 valid's mape: 0.00825759
.dataframe tbody tr th:only-of-type {         vertical-align: middle;     } .dataframe tbody tr th {     vertical-align: top; } .dataframe thead th {     text-align: right; }

column importance
0 N_HYC_DO_roll_16_mean_diff 206.6
1 N_HYC_NH4_roll_8_mean 200.4
2 A_HYC_DO 185.7
3 N_HYC_NH4_roll_16_mean 184.6
4 N_HYC_DO_roll_8_mean_diff 183.0
5 N_HYC_DO 176.2
6 day 170.5
7 N_HYC_DO_roll_8_mean 160.6
8 N_HYC_DO_roll_16_mean 159.7
9 N_QY_ORP_roll_16_mean 149.1
10 JS_CS_SW_ratio 130.0
11 N_HYC_DO_roll_8_std 125.3
12 hour 123.9
13 N_HYC_DO_roll_1_mean_diff 118.8
14 N_HYC_DO_roll_4_mean_diff 117.5
15 N_QY_ORP 116.8
16 N_QY_ORP_roll_8_mean 114.1
17 N_HYC_XD_roll_16_mean 110.7
18 N_QY_ORP_roll_16_mean_diff 109.8
19 N_HYC_XD_roll_8_mean 109.1
20 JS_CS_COD_ratio 107.3
21 N_HYC_MLSS_roll_16_mean 106.6
22 A_QY_ORP 102.1
23 MCCS_NO3_roll_16_mean 101.4
24 MCCS_NH4_roll_16_mean_diff 100.9
25 CS_SW_roll_16_mean_diff 100.7
26 CS_LL_roll_16_mean 100.7
27 JS_CS_TN_ratio 100.5
28 CS_COD_roll_16_mean 97.3
29 JS_COD_roll_16_mean 96.4
30 N_HYC_NH4_roll_8_std 95.6
31 N_HYC_MLSS_roll_8_mean 93.8
32 CS_TN_roll_16_mean 93.6
33 CS_TN 93.5
34 JS_LL_roll_16_mean 91.6
35 JS_SW_roll_16_mean_diff 91.3
36 MCCS_NH4_roll_8_mean_diff 91.3
37 JS_NH3_roll_16_mean_diff 90.3
38 MCCS_NH4_NH3_ratio 90.2
39 N_HYC_JS_DO_roll_16_mean 89.8
40 MCCS_NO3_roll_8_mean 89.8
41 MCCS_NH4 89.5
42 N_CS_MQ_SSLL_roll_16_mean 88.3
43 MCCS_NO3 87.1
44 JS_NH3_roll_16_mean 86.6
45 JS_TN_roll_16_mean 85.6
46 CS_SW 85.4
47 MCCS_NH4_roll_16_mean 84.1
48 N_HYC_MLSS_roll_8_std 83.8
49 CS_COD_roll_8_mean 83.2


四、预测提交


def calc_score(df1, df2):
    rmse_1 = np.sqrt(mean_squared_error(df1['pred'], (df1['Label1'])))
    rmse_2 = np.sqrt(mean_squared_error(df2['pred'], (df2['Label2'])))
    loss = (rmse_1+rmse_2)/2
    print(rmse_1,rmse_2)
    score = (1 / (1 + loss)) * 1000
    return score
calc_score(df_oof_B, df_oof_N)
3091.5013527627148 2248.255071349608
0.37440868531034793
# 提交
sub = pd.read_csv('data/data169443/sample_submission.csv')
sub['Label1'] = pred_B['Label1'].values
sub['Label2'] = pred_N['Label2'].values
sub
.dataframe tbody tr th:only-of-type {         vertical-align: middle;     } .dataframe tbody tr th {     vertical-align: top; } .dataframe thead th {     text-align: right; }

time Label1 Label2
0 2022/7/18 2:40 10277.715094 9309.105213
1 2022/7/18 2:42 10297.708783 9423.129296
2 2022/7/18 2:44 10305.087200 9483.911192
3 2022/7/18 2:46 10392.180776 9332.600185
4 2022/7/18 2:48 10324.405182 9341.154754
... ... ... ...
9995 2022/7/31 23:50 13868.010120 14701.357535
9996 2022/7/31 23:52 13993.966089 14665.620693
9997 2022/7/31 23:54 14279.151838 14728.293917
9998 2022/7/31 23:56 14398.115205 14508.477282
9999 2022/7/31 23:58 14604.189585 14580.044369

10000 rows × 3 columns

sub.to_csv('result.csv', index=False)

下载提交即可。

image.png

目录
相关文章
|
6月前
|
算法
考虑泄流效应的光伏并网点电压系统侧增援调控方法matlab
考虑泄流效应的光伏并网点电压系统侧增援调控方法matlab
|
6月前
|
算法 数据挖掘
R语言面板数据回归:含时间固定效应混合模型分析交通死亡率、酒驾法和啤酒税
R语言面板数据回归:含时间固定效应混合模型分析交通死亡率、酒驾法和啤酒税
车辆纵向动力学、加速性能和燃料消耗研究(Matlab代码实现)
车辆纵向动力学、加速性能和燃料消耗研究(Matlab代码实现)
|
新能源 调度
【2023A题】电采暖负荷参与电力系统功率调节的技术经济分析(思路、代码)
【2023A题】电采暖负荷参与电力系统功率调节的技术经济分析(思路、代码)
115 0
|
6月前
|
Web App开发 数据可视化 数据挖掘
基于非侵入式负荷检测与分解的电力数据挖掘
基于非侵入式负荷检测与分解的电力数据挖掘
|
算法
【漂移-扩散通量重建 FV 方案】用于半导体和气体放电模拟的电子传输的更准确的 Sharfetter-Gummel 算法(Matlab代码实现)
【漂移-扩散通量重建 FV 方案】用于半导体和气体放电模拟的电子传输的更准确的 Sharfetter-Gummel 算法(Matlab代码实现)
|
算法 安全
车-电-路网时空分布负荷预测研究(Matlab代码)
车-电-路网时空分布负荷预测研究(Matlab代码)
113 0
|
调度
混合动力电动车优化调度与建模(发动机,电机,电池组等组件建模)(Matlab代码实现)
混合动力电动车优化调度与建模(发动机,电机,电池组等组件建模)(Matlab代码实现)
104 0
|
安全 测试技术 数据库
【状态估计】基于数据模型融合的电动车辆动力电池组状态估计研究(Matlab代码实现)
【状态估计】基于数据模型融合的电动车辆动力电池组状态估计研究(Matlab代码实现)
独立储能的现货电能量与调频辅助服务市场出清协调机制(Matlab代码实现)
独立储能的现货电能量与调频辅助服务市场出清协调机制(Matlab代码实现)
115 0