#-*- coding : utf-8-*- import numpy as np import pandas as pd import seaborn as sns import numpy as np import matplotlib.pyplot as plt from collections import Counter # lj_data=pd.read_csv("lj_gb.csv",encoding="gb18030") print(lj_data.head()) print(lj_data.columns) # Index(['line', 'station', 'property_name', 'bedrooms', 'livingrooms', # 'building_area', 'direction', 'decoration', 'has_elevator', 'hml', # 'building_height', 'building_year', 'building_style', # 'building_location', 'price_sqm', 'price_ttl'], # dtype='object') # 第一问: cout_hml=Counter(lj_data["hml"].values) print(cout_hml) plt.rcParams["font.sans-serif"] = ['Simhei'] plt.rcParams["axes.unicode_minus"] = False # 柱状图 Y=[int(i) for i in list(cout_hml.values())][:-1] X=list(cout_hml.keys())[:-1] print(X) print(Y) plt.bar(X,Y,0.6,color="green") plt.xticks(rotation=90, fontsize=14) plt.xlabel("hml",fontsize=14) plt.ylabel("sum",fontsize=14) plt.title("hml统计") plt.show() cout_bedrooms=Counter(lj_data["bedrooms"].values) Y=[int(i) for i in list(cout_bedrooms.values())][:-1] X=list(cout_bedrooms.keys())[:-1] print(X) print(Y) plt.bar(X,Y,0.6,color="green") plt.xticks(rotation=90, fontsize=14) plt.xlabel("bedrooms",fontsize=14) plt.ylabel("sum",fontsize=14) plt.title("bedrooms统计") plt.show() # 多变量分析:热力图 data=lj_data[["bedrooms","livingrooms","has_elevator","building_height","price_ttl"]] ax=plt.subplots(figsize=(20,16)) ax=sns.heatmap(data.corr(),vmax=.8,square=True,annot=True) plt.show() # 我们对 "bedrooms","livingrooms"进行二维相关性探索 plt.figure(figsize=(16,8)) plt.title("bedrooms和livingrooms相关性图") plt.xlabel('bedrooms',fontsize=8) plt.ylabel('livingrooms',fontsize=8) plt.scatter(data["bedrooms"].values,data["livingrooms"].values) plt.show() # 我们得出了在房地产中bedrooms和 livingrooms 是正相关的关系 而且他们是有一定关联性的
数据代码+报告: