Chp7-2
2019 年 12 月 23 日
In [9]: import pandas as pd import numpy as np from scipy import stats import matplotlib.pyplot as plt from sklearn import datasets iris=datasets.load_iris() print(type(iris.data)) print(iris.data) #print(type(iris.target)) #print(iris.target) <class 'numpy.ndarray'> [[5.1 3.5 1.4 0.2] [4.9 3. 1.4 0.2] [4.7 3.2 1.3 0.2] [4.6 3.1 1.5 0.2] [5. 3.6 1.4 0.2] [5.4 3.9 1.7 0.4] [4.6 3.4 1.4 0.3] [5. 3.4 1.5 0.2] [4.4 2.9 1.4 0.2] [4.9 3.1 1.5 0.1] [5.4 3.7 1.5 0.2] [4.8 3.4 1.6 0.2] [4.8 3. 1.4 0.1] [4.3 3. 1.1 0.1] [5.8 4. 1.2 0.2] [5.7 4.4 1.5 0.4] [5.4 3.9 1.3 0.4] [5.1 3.5 1.4 0.3] [5.7 3.8 1.7 0.3] [5.1 3.8 1.5 0.3] [5.4 3.4 1.7 0.2] [5.1 3.7 1.5 0.4] [4.6 3.6 1. 0.2] [5.1 3.3 1.7 0.5] [4.8 3.4 1.9 0.2] [5. 3. 1.6 0.2] [5. 3.4 1.6 0.4] [5.2 3.5 1.5 0.2] [5.2 3.4 1.4 0.2] [4.7 3.2 1.6 0.2] [4.8 3.1 1.6 0.2] [5.4 3.4 1.5 0.4] [5.2 4.1 1.5 0.1] [5.5 4.2 1.4 0.2] [4.9 3.1 1.5 0.1] [5. 3.2 1.2 0.2] [5.5 3.5 1.3 0.2] [4.9 3.1 1.5 0.1] [4.4 3. 1.3 0.2] [5.1 3.4 1.5 0.2] [5. 3.5 1.3 0.3] [4.5 2.3 1.3 0.3] [4.4 3.2 1.3 0.2] [5. 3.5 1.6 0.6] [5.1 3.8 1.9 0.4] [4.8 3. 1.4 0.3] [5.1 3.8 1.6 0.2] [4.6 3.2 1.4 0.2] [5.3 3.7 1.5 0.2] [5. 3.3 1.4 0.2] [7. 3.2 4.7 1.4] [6.4 3.2 4.5 1.5] [6.9 3.1 4.9 1.5] [5.5 2.3 4. 1.3] [6.5 2.8 4.6 1.5] [5.7 2.8 4.5 1.3] [6.3 3.3 4.7 1.6] [4.9 2.4 3.3 1. ] [6.6 2.9 4.6 1.3] [5.2 2.7 3.9 1.4] [5. 2. 3.5 1. ] [5.9 3. 4.2 1.5] [6. 2.2 4. 1. ] [6.1 2.9 4.7 1.4] [5.6 2.9 3.6 1.3] [6.7 3.1 4.4 1.4] [5.6 3. 4.5 1.5] [5.8 2.7 4.1 1. ] [6.2 2.2 4.5 1.5] [5.6 2.5 3.9 1.1] [5.9 3.2 4.8 1.8] [6.1 2.8 4. 1.3] [6.3 2.5 4.9 1.5] [6.1 2.8 4.7 1.2] [6.4 2.9 4.3 1.3] [6.6 3. 4.4 1.4] [6.8 2.8 4.8 1.4] [6.7 3. 5. 1.7] [6. 2.9 4.5 1.5] [5.7 2.6 3.5 1. ] [5.5 2.4 3.8 1.1] [5.5 2.4 3.7 1. ] [5.8 2.7 3.9 1.2] [6. 2.7 5.1 1.6] [5.4 3. 4.5 1.5] [6. 3.4 4.5 1.6] [6.7 3.1 4.7 1.5] [6.3 2.3 4.4 1.3] [5.6 3. 4.1 1.3] [5.5 2.5 4. 1.3] [5.5 2.6 4.4 1.2] [6.1 3. 4.6 1.4] [5.8 2.6 4. 1.2] [5. 2.3 3.3 1. ] [5.6 2.7 4.2 1.3] [5.7 3. 4.2 1.2] [5.7 2.9 4.2 1.3] [6.2 2.9 4.3 1.3] [5.1 2.5 3. 1.1] [5.7 2.8 4.1 1.3] [6.3 3.3 6. 2.5] [5.8 2.7 5.1 1.9] [7.1 3. 5.9 2.1] [6.3 2.9 5.6 1.8] [6.5 3. 5.8 2.2] [7.6 3. 6.6 2.1] [4.9 2.5 4.5 1.7] [7.3 2.9 6.3 1.8] [6.7 2.5 5.8 1.8] [7.2 3.6 6.1 2.5] [6.5 3.2 5.1 2. ] [6.4 2.7 5.3 1.9] [6.8 3. 5.5 2.1] [5.7 2.5 5. 2. ] [5.8 2.8 5.1 2.4] [6.4 3.2 5.3 2.3] [6.5 3. 5.5 1.8] [7.7 3.8 6.7 2.2] [7.7 2.6 6.9 2.3] [6. 2.2 5. 1.5] [6.9 3.2 5.7 2.3] [5.6 2.8 4.9 2. ] [7.7 2.8 6.7 2. ] [6.3 2.7 4.9 1.8] [6.7 3.3 5.7 2.1] [7.2 3.2 6. 1.8] [6.2 2.8 4.8 1.8] [6.1 3. 4.9 1.8] [6.4 2.8 5.6 2.1] [7.2 3. 5.8 1.6] [7.4 2.8 6.1 1.9] [7.9 3.8 6.4 2. ] [6.4 2.8 5.6 2.2] [6.3 2.8 5.1 1.5] [6.1 2.6 5.6 1.4] [7.7 3. 6.1 2.3] [6.3 3.4 5.6 2.4] [6.4 3.1 5.5 1.8] [6. 3. 4.8 1.8] [6.9 3.1 5.4 2.1] [6.7 3.1 5.6 2.4] [6.9 3.1 5.1 2.3] [5.8 2.7 5.1 1.9] [6.8 3.2 5.9 2.3] [6.7 3.3 5.7 2.5] [6.7 3. 5.2 2.3] [6.3 2.5 5. 1.9] [6.5 3. 5.2 2. ] [6.2 3.4 5.4 2.3] [5.9 3. 5.1 1.8]]
In [11]: import pandas as pd import numpy as np from sklearn import datasets from scipy import stats import matplotlib.pyplot as plt iris=datasets.load_iris() #print((iris)) plt.figure(figsize=(12,15)) for n in range(4): print(n) for m in range(3): x=( iris.data[m*50:m*50+50,n]-iris.data[m*50:m*50+50,n].mean())/iris.data [m*50:m*50+50,n].std() plt.subplot(4,3,n*3+m+1) stats.probplot(x,dist='norm',plot=plt) plt.text(-2,2,iris.feature_names[n]) if n==0: plt.title(iris.target_names[m]) else: plt.title('') plt.xlim([-2.5,2.5]) plt.ylim([-2.5,2.5]) plt.plot([-2.5,2.5],[-2.5,2.5],c='g') plt.savefig('chap72.png') 0 1 2 3
In [40]: my_data=iris.data[:,:2] print((my_data).shape) (150, 2) In [41]: from sklearn.model_selection import train_test_split #X_train,X_test,Y_train,Y_test=train_test_split(iris.data,iris.target,test_size=0.2, random_state=0)test_size=0.2,random_state=0) X_train,X_test,Y_train,Y_test=train_test_split(my_data,iris.target, test_size=0.2,random_state=0) test_size=0.2,random_state=0) from sklearn.naive_bayes import GaussianNB clf=GaussianNB() clf.fit(X_train,Y_train) y_pred=clf.predict(X_test) Y=pd.DataFrame(np.transpose([Y_test,y_pred]),columns={'true_type','predict_type'}) Y.head(30) Out[41]: predict_type true_type 0 2 1 1 1 1 2 0 0 3 2 2 4 0 0 5 2 2 6 0 0 7 1 2 8 1 2 9 1 1 10 2 1 11 1 2 12 1 1 13 1 2 14 1 1 15 0 0 16 1 1 17 1 1 18 0 0 19 0 0 20 2 1 21 1 1 22 0 0 23 0 0 24 2 1 25 0 0 26 0 0 27 1 1 28 1 1 29 0 0 In [42]: from sklearn.metrics import confusion_matrix print(confusion_matrix(y_pred,Y_test)) [[11 0 0] [ 0 9 4] [ 0 4 2]] In [43]: from sklearn.metrics import classification_report print(classification_report(y_pred,Y_test)) precision recall f1-score support 0 1.00 1.00 1.00 11 1 0.69 0.69 0.69 13 2 0.33 0.33 0.33 6 avg / total 0.73 0.73 0.73 30 In [44]: from sklearn.metrics import roc_curve from sklearn.metrics import roc_auc_score my_auc=[] for n in range(4): #fpr,tpr,th=roc_curve(iris.target[:100],iris.data[:,n) my_auc.append(roc_auc_score(iris.target[:100],iris.data[:100,n])) print(my_auc) #print(iris.target[:100]) #print(iris.data[:100,:]) [0.9325999999999999, 0.07780000000000002, 1.0, 1.0] In [56]: plt.plot(np.ones([50,1]),iris.data[:50,0],'or') plt.plot(np.ones([50,1])+0.2,iris.data[50:100,0],'*g') plt.plot(np.ones([50,1])+1,iris.data[:50,1],'or') plt.plot(np.ones([50,1])+1.2,iris.data[50:100,1],'*g') plt.plot(np.ones([50,1])+2,iris.data[:50,2],'or') plt.plot(np.ones([50,1])+2.2,iris.data[50:100,2],'*g') plt.plot(np.ones([50,1])+3,iris.data[:50,3],'or') plt.plot(np.ones([50,1])+3.2,iris.data[50:100,3],'*g') plt.xticks([1,2,3,4],iris.feature_names) plt.legend(iris.target_names[:2]) Out[56]: <matplotlib.legend.Legend at 0x272c66ebac8>
In [21]: plt.figure(figsize=(5,12)) plt.subplot(3,1,1) plt.scatter(iris.data[:50,0],iris.data[:50,1],c='r',marker='o') plt.scatter(iris.data[50:100,0],iris.data[50:100,1],c='b',marker='*') plt.subplot(3,1,2) plt.scatter(iris.data[:50,0],iris.data[:50,2],c='r',marker='o') plt.scatter(iris.data[50:100,0],iris.data[50:100,2],c='b',marker='*') plt.subplot(3,1,3) plt.scatter(iris.data[:50,0],iris.data[:50,3],c='r',marker='o') plt.scatter(iris.data[50:100,0],iris.data[50:100,3],c='b',marker='*') Out[21]: <matplotlib.collections.PathCollection at 0x272c83ff9e8>