Chp5-3
2019 年 12 月 20 日
In [2]: # 位置性测度 import pandas as pd import numpy as np my_data = pd.read_csv("C:\Python\Scripts\my_data\Titanic.csv") print('对 Fare 的位置性测度统计结果: ') print('均值: \t\t',my_data[['Fare']].mean()[0]) #mean 这里返回的是 series,可以用方括号序号来访问,下同 print('中位数: \t',my_data[['Fare']].median()[0]) print('第 25 个百分位数: ',my_data[['Fare']].quantile(q=0.25)[0]) #q 参数指明第几个百分位数,默认值是 0.5 print('众数: \t\t',my_data[['Fare']].mode().values[0,0]) #mode 返回的是 dataframe,所以用 dataframe 的 values 属性获取值 #values 本身是 ndarray,所以用二维数组的方式访问 对 Fare 的位置性测度统计结果: 均值: 32.204207968574636 中位数: 14.4542 第 25 个百分位数: 7.9104 众数: 8.05 In [3]: # 离散性测度 print('对 Fare 的离散性测度统计结果: ') print('变化范围: \t [',my_data[['Fare']].min()[0],'\t',my_data[['Fare']].max()[0],']') print('极差: \t\t',my_data[['Fare']].max()[0]-my_data[['Fare']].min()[0]) print('方差: \t\t',my_data[['Fare']].var()[0]) print('标准差: \t',my_data[['Fare']].std()[0]) print('变异系数: \t',my_data[['Fare']].std()[0]/my_data[['Fare']].mean()[0]) 对 Fare 的离散性测度统计结果: 变化范围: [ 0.0 512.3292 ] 极差: 512.3292 方差: 2469.436845743116 标准差: 49.6934285971809 变异系数: 1.5430725278408497 In [4]: #print(my_data[['Fare']].describe()) my_data.describe() Out[4]: PassengerId Survived Pclass Age SibSp \ count 891.000000 891.000000 891.000000 714.000000 891.000000 mean 446.000000 0.383838 2.308642 29.699118 0.523008 std 257.353842 0.486592 0.836071 14.526497 1.102743 min 1.000000 0.000000 1.000000 0.420000 0.000000 25% 223.500000 0.000000 2.000000 20.125000 0.000000 50% 446.000000 0.000000 3.000000 28.000000 0.000000 75% 668.500000 1.000000 3.000000 38.000000 1.000000 max 891.000000 1.000000 3.000000 80.000000 8.000000 Parch Fare count 891.000000 891.000000 mean 0.381594 32.204208 std 0.806057 49.693429 min 0.000000 0.000000 25% 0.000000 7.910400 50% 0.000000 14.454200 75% 0.000000 31.000000 max 6.000000 512.329200
In [17]: my_data[['Fare']].hist(bins=40,figsize=(18,5),xlabelsize=16,ylabelsize=16) Out[17]: array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000026D5EC2A6A0>]], dtype=object)
In [60]: my_data[['Fare']].boxplot() Out[60]: <matplotlib.axes._subplots.AxesSubplot at 0x28818e41c50>
In [58]: import pandas as pd import numpy as np from scipy import stats from matplotlib import pyplot as plt 3my_iris=pd.read_csv('C:\Python\Scripts\my_data\iris.csv',sep=',',decimal='.', header=None, names=['sepal_length','sepal_width', 'petal_length','petal_width','target']) #print(my_iris.head(5)) tar_str='sepal_length' #print(type(my_iris[tar_str])) print('Table 1. Mean of Group') print(my_iris.groupby(['target']).mean()) print('\nTable 2. Variance of Group') print(my_iris.groupby(['target']).var()) print('\n') print('\nTable 3. Minimum of Group') print(my_iris.groupby(['target']).min()) print('\nTable 4. Maximum of Group') print(my_iris.groupby(['target']).max()) print('\n') print('The mode of sepal_length is: ') print(stats.mode(my_iris['sepal_length'])) # 可对照 hist 看众数 Table 1. Mean of Group sepal_length sepal_width petal_length petal_width target setosa 5.006 3.418 1.464 0.244 versicolor 5.936 2.770 4.260 1.326 virginica 6.588 2.974 5.552 2.026 Table 2. Variance of Group sepal_length sepal_width petal_length petal_width target setosa 0.124249 0.145180 0.030106 0.011494 versicolor 0.266433 0.098469 0.220816 0.039106 virginica 0.404343 0.104004 0.304588 0.075433 4Table 3. Minimum of Group sepal_length sepal_width petal_length petal_width target setosa 4.3 2.3 1.0 0.1 versicolor 4.9 2.0 3.0 1.0 virginica 4.9 2.2 4.5 1.4 Table 4. Maximum of Group sepal_length sepal_width petal_length petal_width target setosa 5.8 4.4 1.9 0.6 versicolor 7.0 3.4 5.1 1.8 virginica 7.9 3.8 6.9 2.5 The mode of sepal_length is: ModeResult(mode=array([5.]), count=array([10]))