1. # Traditional Credit Scoring Using Logistic Regression
2. import scorecardpy as sc
3. 
4. # data prepare ------
5. # load germancredit data
6. dat = sc.germancredit()
7. 
8. # filter variable via missing rate, iv, identical value rate
9. dt_s = sc.var_filter(dat, y="creditability")
10. 
11. # breaking dt into train and test
12. train, test = sc.split_df(dt_s, 'creditability').values()
13. 
14. # woe binning ------
15. bins = sc.woebin(dt_s, y="creditability")
16. # sc.woebin_plot(bins)
17. 
18. # binning adjustment
19. # # adjust breaks interactively
20. # breaks_adj = sc.woebin_adj(dt_s, "creditability", bins) 
21. # # or specify breaks manually
22. breaks_adj = {
23. 'age.in.years': [26, 35, 40],
24. 'other.debtors.or.guarantors': ["none", "co-applicant%,%guarantor"]
25. }
26. bins_adj = sc.woebin(dt_s, y="creditability", breaks_list=breaks_adj)
27. 
28. # converting train and test into woe values
29. train_woe = sc.woebin_ply(train, bins_adj)
30. test_woe = sc.woebin_ply(test, bins_adj)
31. 
32. y_train = train_woe.loc[:,'creditability']
33. X_train = train_woe.loc[:,train_woe.columns != 'creditability']
34. y_test = test_woe.loc[:,'creditability']
35. X_test = test_woe.loc[:,train_woe.columns != 'creditability']
36. 
37. # logistic regression ------
38. from sklearn.linear_model import LogisticRegression
39. lr = LogisticRegression(penalty='l1', C=0.9, solver='saga', n_jobs=-1)
40. lr.fit(X_train, y_train)
41. # lr.coef_
42. # lr.intercept_
43. 
44. # predicted proability
45. train_pred = lr.predict_proba(X_train)[:,1]
46. test_pred = lr.predict_proba(X_test)[:,1]
47. 
48. # performance ks & roc ------
49. train_perf = sc.perf_eva(y_train, train_pred, title = "train")
50. test_perf = sc.perf_eva(y_test, test_pred, title = "test")
51. 
52. # score ------
53. card = sc.scorecard(bins_adj, lr, X_train.columns)
54. # credit score
55. train_score = sc.scorecard_ply(train, card, print_step=0)
56. test_score = sc.scorecard_ply(test, card, print_step=0)
57. 
58. # psi
59. sc.perf_psi(
60.   score = {'train':train_score, 'test':test_score},
61.   label = {'train':y_train, 'test':y_test}
62. )