# PYTHON银行机器学习:回归、随机森林、KNN近邻、决策树、高斯朴素贝叶斯、支持向量机SVM分析营销活动数据|数据分享（下）

PYTHON银行机器学习:回归、随机森林、KNN近邻、决策树、高斯朴素贝叶斯、支持向量机SVM分析营销活动数据|数据分享（上）：https://developer.aliyun.com/article/1492254

# 特征选择和工程

data = pd.get_dummies(data=data, columns = $'job', 'marital', 'education' , 'month'$, \
prefix = $'job', 'marital', 'education' , 'month'$)

data.head(5)

age = pearsonr(data$'age'$, data$'y'$)

sns.heatmap(corr

pylab.show()

# 算法的实现

## 逻辑回归

K=5
kf = KFold(n_splits=K, shuffle=True)
logreg = LogisticRegression()
$\[7872 93$
$992 86$\]

$\[7919 81$
$956 86$\]

$\[7952 60$
$971 59$\]

$\[7871 82$
$1024 65$\]

$\[7923 69$
$975 75$\]

## 决策树

dt2 = tree.DecisionTreeClassifier(random\_state=1, max\_depth=2)
$\[7988 0$
$1055 0$\]

$\[7986 0$
$1056 0$\]

$\[7920 30$
$1061 31$\]

$\[8021 0$
$1021 0$\]

$\[7938 39$
$1039 26$\]

## 随机森林

random_forest = RandomForestClassifier
$\[7812 183$
$891 157$\]

$\[7825 183$
$870 164$\]

$\[7774 184$
$915 169$\]

$\[7770 177$
$912 183$\]

$\[7818 196$
$866 162$\]

## KNN近邻

classifier = KNeighborsClassifier(n_neighbors =13,metric = 'minkowski' , p=2)
print("Mean accuracy: ",accuracyknn/K)
print("The best AUC: ", bestaucknn)
$\[7952 30$
$1046 15$\]

$\[7987 30$
$1010 15$\]

$\[7989 23$
$1017 13$\]

$\[7920 22$
$1083 17$\]

$\[7948 21$
$1052 21$\]

## 高斯朴素贝叶斯

kf = KFold(n_splits=K, shuffle=True)
gaussian = GaussianNB()
$\[7340 690$
$682 331$\]

$\[7321 633$
$699 389$\]

$\[7291 672$
$693 386$\]

$\[7300 659$
$714 369$\]

$\[7327 689$
$682 344$\]


models = pd.DataFrame({
'Model': $'KNN', 'Logistic Regression', 'Naive Bayes', 'Decision Tree','Random Forest'$,
'Score': $accuracyknn/K, accuracylogreg/K, accuracygnb/K, accuracydt/K, accuracyrf/K$,
'BestAUC': $bestaucknn,bestauclogreg,bestaucgnb, bestaucdt,bestaucrf$})

# 欠采样

gTrain, gValid = train\_test\_split

## 逻辑回归

predsTrain = logreg.predict(gTrainUrandom)

predsTrain = logreg.predict(gTrain20Urandom)

predsTrain = logreg.predict(gTrrandom)

## 决策树

print("Train AUC:", metrics.roc\_auc\_score(ygTrds))

## 随机森林

print("Train AUC:", metrics.roc\_auc\_score(ygTr, predsTrain),
"Valid AUC:", metrics.roc\_auc\_score(ygVd, preds))

## KNN近邻

print("Train AUC:", metrics.roc\_auc\_score(ygTrm, predsTrain),
"Valid AUC:", metrics.roc\_auc\_score(ygVal10, preds))

## 高斯朴素贝叶斯

print("Train AUC:", metrics.roc\_auc\_score(ygTraom, predsTrain),
"Valid AUC:", metrics.roc\_auc\_score(ygid, preds))

# 过采样

feates = datolist()
print(feures)
feaes.remove('y')

print(gTrainOSM.shape)
(31945, 39)

smt = SMOT
(32345, 39)

smt = SMOT
(32595, 39)

ygTrain10OSM=gTrain10OSM$'y'$
gTrain10OSM=gTrain10OSM.drop(columns=$'y'$)

## 逻辑回归

print("Train AUC:", metrics.roc\_auc\_score(ygTrin10SM, predsTrain),
"Valid AUC:", metrics.roc\_auc\_score(ygValid, preds))

## 决策树

dt2.fit(,ygTranOS)
predsTrain = dtpreict(TrainOSM)
preds = dt2.predict(gValid)

## 随机森林

random_forest.fit(rainOSM, ygTranOS)
predsTrain = random_forest.prect(gTraiOSM)
p

## KNN近邻

classifier.fit(granOSM, yTanOSM)
predsTrain = classifier.predict(gTaiSM)
preds = classifier.predict(Vaid)

## 高斯朴素贝叶斯

gaussian.fit(gTriOM, ygrainM)
predsTrain = gaussian.predcti)

# 结论

