Chp6-3
2019 年 12 月 20 日
In [5]: import pandas as pd import numpy as np from scipy import stats from matplotlib import pyplot as plt from sklearn.linear_model import LinearRegression from sklearn import metrics my_iris=pd.read_csv('C:\Python\Scripts\my_data\iris.csv',sep=',',decimal='.', header=None, names=['sepal_length','sepal_width', 'petal_length','petal_width','target']) feature_cols='petal_length' #feature_cols='sepal_width' x=my_iris[[feature_cols]] y=np.array(my_iris['sepal_length']) plt.plot(x,y,'o',alpha=0.5) linreg=LinearRegression() linreg.fit(x,y) print('f(x) = ',linreg.intercept_,'+',linreg.coef_[0],'x') pred_y=linreg.predict(x) plt.plot(x,pred_y,'g',alpha=0.5) plt.plot(np.array(x).mean(),y.mean(),'r*',ms=12) plt.gca().set_xlabel(feature_cols) plt.gca().set_ylabel('sepal_length') print('RMSE = ',np.sqrt(metrics.mean_squared_error(y,pred_y))) print('\n') f(x) = 4.305565456292049 + 0.4091258984678836 x RMSE = 0.40435105064202476
In [3]: print('r_square = ',linreg.score(x,y)) r_square = 0.7599553107783261 In [59]: print(my_iris[[feature_cols,'sepal_length']].corr()) print('\n') r=np.array(my_iris[[feature_cols,'sepal_length']].corr()[['sepal_length']]. iloc(0)[0]) print('r = ',r) print('square of r = ',r**2) petal_length sepal_length petal_length 1.000000 0.871754 sepal_length 0.871754 1.000000 r = [0.87175416] square of r = [0.75995531]