Chp5-2
2019 年 12 月 20 日
In [1]: import pandas as pd import numpy as np my_data = pd.read_csv("C:\Python\Scripts\my_data\Titanic.csv") my_data.head(15) Out[1]: PassengerId Survived Pclass \ 0 1 0 3 1 2 1 1 2 3 1 3 3 4 1 1 4 5 0 3 5 6 0 3 6 7 0 1 7 8 0 3 8 9 1 3 9 10 1 2 10 11 1 3 11 12 1 1 12 13 0 3 13 14 0 3 14 15 0 3 Name Sex Age SibSp \ 0 Braund, Mr. Owen Harris male 22.0 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 2 Heikkinen, Miss. Laina female 26.0 0 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 14 Allen, Mr. William Henry male 35.0 0 5 Moran, Mr. James male NaN 0 6 McCarthy, Mr. Timothy J male 54.0 0 7 Palsson, Master. Gosta Leonard male 2.0 3 8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 10 Sandstrom, Miss. Marguerite Rut female 4.0 1 11 Bonnell, Miss. Elizabeth female 58.0 0 12 Saundercock, Mr. William Henry male 20.0 0 13 Andersson, Mr. Anders Johan male 39.0 1 14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 Parch Ticket Fare Cabin Embarked 0 0 A/5 21171 7.2500 NaN S 1 0 PC 17599 71.2833 C85 C 2 0 STON/O2. 3101282 7.9250 NaN S 3 0 113803 53.1000 C123 S 4 0 373450 8.0500 NaN S 5 0 330877 8.4583 NaN Q 6 0 17463 51.8625 E46 S 7 1 349909 21.0750 NaN S 8 2 347742 11.1333 NaN S 9 0 237736 30.0708 NaN C 10 1 PP 9549 16.7000 G6 S 11 0 113783 26.5500 C103 S 12 0 A/5. 2151 8.0500 NaN S 13 5 347082 31.2750 NaN S 14 0 350406 7.8542 NaN S In [27]: print(my_data[['PassengerId','Age','Cabin']].iloc[:6]) PassengerId Age Cabin 0 1 22.0 NaN 1 2 38.0 C85 2 3 26.0 NaN 3 4 35.0 C123 4 5 35.0 NaN 5 6 NaN NaN In [28]: my_fil_data1=my_data.dropna(axis=0) my_fil_data1.head(7) Out[28]: PassengerId Survived Pclass \ 1 2 1 1 3 4 1 1 6 7 0 1 10 11 1 3 11 12 1 1 21 22 1 2 23 24 1 1 Name Sex Age SibSp \ 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 6 McCarthy, Mr. Timothy J male 54.0 0 10 Sandstrom, Miss. Marguerite Rut female 4.0 1 11 Bonnell, Miss. Elizabeth female 58.0 0 21 Beesley, Mr. Lawrence male 34.0 0 23 Sloper, Mr. William Thompson male 28.0 0 Parch Ticket Fare Cabin Embarked 1 0 PC 17599 71.2833 C85 C 3 0 113803 53.1000 C123 S 6 0 17463 51.8625 E46 S 10 1 PP 9549 16.7000 G6 S 11 0 113783 26.5500 C103 S 21 0 248698 13.0000 D56 S 23 0 113788 35.5000 A6 S In [29]: my_fil_data2=my_data.dropna(axis=1) my_fil_data2.head(7) Out[29]: PassengerId Survived Pclass \ 0 1 0 3 1 2 1 1 2 3 1 3 33 4 1 1 4 5 0 3 5 6 0 3 6 7 0 1 Name Sex SibSp Parch \ 0 Braund, Mr. Owen Harris male 1 0 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 1 0 2 Heikkinen, Miss. Laina female 0 0 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 1 0 4 Allen, Mr. William Henry male 0 0 5 Moran, Mr. James male 0 0 6 McCarthy, Mr. Timothy J male 0 0 Ticket Fare 0 A/5 21171 7.2500 1 PC 17599 71.2833 2 STON/O2. 3101282 7.9250 3 113803 53.1000 4 373450 8.0500 5 330877 8.4583 6 17463 51.8625 In [30]: mean_Age=int(my_data[['Age']].mean()[0]) my_dict={'Age':mean_Age,'Cabin':'haha'} my_fil_data3=my_data.fillna(my_dict) my_fil_data3.head(7) Out[30]: PassengerId Survived Pclass \ 0 1 0 3 1 2 1 1 2 3 1 3 3 4 1 1 4 5 0 3 5 6 0 3 6 7 0 1 Name Sex Age SibSp \ 40 Braund, Mr. Owen Harris male 22.0 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 2 Heikkinen, Miss. Laina female 26.0 0 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 4 Allen, Mr. William Henry male 35.0 0 5 Moran, Mr. James male 29.0 0 6 McCarthy, Mr. Timothy J male 54.0 0 Parch Ticket Fare Cabin Embarked 0 0 A/5 21171 7.2500 haha S 1 0 PC 17599 71.2833 C85 C 2 0 STON/O2. 3101282 7.9250 haha S 3 0 113803 53.1000 C123 S 4 0 373450 8.0500 haha S 5 0 330877 8.4583 haha Q 6 0 17463 51.8625 E46 S In [31]: my_fil_data4=my_data.fillna(method='ffill') my_fil_data4.head(7) Out[31]: PassengerId Survived Pclass \ 0 1 0 3 1 2 1 1 2 3 1 3 3 4 1 1 4 5 0 3 5 6 0 3 6 7 0 1 Name Sex Age SibSp \ 0 Braund, Mr. Owen Harris male 22.0 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 2 Heikkinen, Miss. Laina female 26.0 0 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 4 Allen, Mr. William Henry male 35.0 0 5 Moran, Mr. James male 35.0 0 6 McCarthy, Mr. Timothy J male 54.0 0 Parch Ticket Fare Cabin Embarked 0 0 A/5 21171 7.2500 NaN S 1 0 PC 17599 71.2833 C85 C 2 0 STON/O2. 3101282 7.9250 C85 S 3 0 113803 53.1000 C123 S 4 0 373450 8.0500 C123 S 5 0 330877 8.4583 C123 Q 6 0 17463 51.8625 E46 S In [32]: my_fil_data5=my_data.fillna(method='bfill') my_fil_data5.head(7) Out[32]: PassengerId Survived Pclass \ 0 1 0 3 1 2 1 1 2 3 1 3 3 4 1 1 4 5 0 3 5 6 0 3 6 7 0 1 Name Sex Age SibSp \ 0 Braund, Mr. Owen Harris male 22.0 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 2 Heikkinen, Miss. Laina female 26.0 0 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 4 Allen, Mr. William Henry male 35.0 0 5 Moran, Mr. James male 54.0 0 6 McCarthy, Mr. Timothy J male 54.0 0 Parch Ticket Fare Cabin Embarked 0 0 A/5 21171 7.2500 C85 S 1 0 PC 17599 71.2833 C85 C 2 0 STON/O2. 3101282 7.9250 C123 S 3 0 113803 53.1000 C123 S 4 0 373450 8.0500 E46 S 5 0 330877 8.4583 E46 Q 6 0 17463 51.8625 E46 S In [1]: import pandas as pd student_scores=pd.DataFrame({'姓名':['张三']*3+['李四']*3+['王五']*3, '成绩':[10,10,10,8,8,8,5,5,5]}) student_scores Out[1]: 姓名 成绩 0 张三 10 1 张三 10 2 张三 10 3 李四 8 4 李四 8 5 李四 8 6 王五 5 7 王五 5 8 王五 5 In [2]: student_scores.duplicated() Out[2]: 0 False 1 True 2 True 3 False 4 True 5 True 6 False 7 True 8 True dtype: bool In [3]: my_fil_data5=student_scores.drop_duplicates() my_fil_data5 Out[3]: 姓名 成绩 0 张三 10 3 李四 8 6 王五 5 7In [4]: import pandas as pd import numpy as np my_data = pd.read_csv("C:\Python\Scripts\my_data\iris.csv",header=None, names=['sepal_length','sepal_width','petal_length', 'petal_width','target']) print(my_data.corr(method='pearson')) sepal_length sepal_width petal_length petal_width sepal_length 1.000000 -0.109369 0.871754 0.817954 sepal_width -0.109369 1.000000 -0.420516 -0.356544 petal_length 0.871754 -0.420516 1.000000 0.962757 petal_width 0.817954 -0.356544 0.962757 1.000000