目录
对人类性别相关属性数据集进行数据特征分布可视化分析与挖掘
输出结果
实现代码
1. # coding: utf8 2. import pandas as pd 3. import matplotlib.pyplot as plt 4. 5. 6. # ML之FE:对人类性别相关属性数据集进行数据特征分布可视化分析与挖掘 7. 8. 9. #1、定义数据集 10. # 头发(长发/短发)、身高、下巴(棱角/圆滑)、胡长(mm)、皮肤、体重 11. contents={"name": ['Mary', 'Bob', 'Lisa', 'Tom', 'Alan', 'Jason','Sophia', 'Aiden', 'Sarah', 'Miqi', 'Temp01', 'Temp02'], 12. "age": [ 16, 24, 19, 20, 33, 23, 29, 31, 34, 24, 27, 30], 13. "Hair": ['长发', '短发', '长发', '短发', '长发', '短发', '长发', '长发', '长发', '长发', '短发', '长发'], 14. "Height": [158, 175, 162, 170, 175, 168, 166, 169, 164, 157, 182, 161], 15. "Jaw": ['圆滑', '棱角', '圆滑', '棱角', '圆滑', '圆滑', '圆滑', '棱角', '圆滑', '圆滑', '棱角', '圆滑'], 16. "Beard": [2, 7, 3, 5, 2, 3, 5, 6, 3, 4, 5, 3], 17. "Skin": ['细腻', '粗糙', '细腻', '粗糙', '细腻', '粗糙', '细腻', '粗糙', '细腻', '细腻', '粗糙', '粗糙'], 18. "Weight": [99, 143, 105, 135, 120, 160, 95, 145, 125, 112, 155, 100], 19. "Sex": ['女性', '男性', '女性', '男性', '男性', '男性', '女性', '男性', '女性', '女性', '男性', '女性'], 20. } 21. data_frame = pd.DataFrame(contents) 22. print(type(data_frame)) 23. 24. data_name = 'HumanGender_RelatedAttributes' 25. col_cat='Jaw' 26. label_name='Sex' 27. 28. 29. for col in data_frame.columns[1:-2]: 30. if data_frame[col].dtypes in ['object']: 31. print(col) 32. # T1、采用函数 33. col_cats=[col,label_name] 34. # SNCountPlot(col_cats,data_frame,imgName='') 35. 36. # T2、自定义函数??? 37. x_subname = list(data_frame[col].value_counts().to_dict().keys()) 38. label_y1 = list(data_frame[label_name].value_counts().to_dict().keys())[0] 39. label_y2 = list(data_frame[label_name].value_counts().to_dict().keys())[1] 40. y1=list(data_frame[data_frame[label_name]==label_y1][col].value_counts().to_dict().values()) 41. y2=list(data_frame[data_frame[label_name]==label_y2][col].value_counts().to_dict().values()) 42. print(x_subname) 43. print(label_y1,label_y2) 44. print(y1,y2) 45. 46. # # T2、自定义函数??? 47. # y01Lists,y02Lists=[],[] 48. # for x in x_subname: 49. # if x not in data_frame[data_frame[label_name]==label_y2][col].value_counts(dropna=False).to_dict().keys(): 50. # pass 51. # else: 52. # 53. # y01=data_frame[data_frame[label_name]==label_y1][col].value_counts(dropna=False).to_dict()[x] 54. # y02=data_frame[data_frame[label_name]==label_y2][col].value_counts(dropna=False).to_dict()[x] 55. # y01Lists.append(y01) 56. # y02Lists.append(y02) 57. # print(y01Lists,y02Lists) 58. 59. 60. DoubleBarAddText(y1,y2, col,label_name, x_subname,label_y1,label_y2,data_name) 61. else: 62. Num_col_Plot2_ByLabels(data_name,data_frame,label_name,col)