该案例可以使用和鲸社区线分析工具ModelWhale在线使用
https://www.heywhale.com/mw/project/60d67d0fee16460017a2ca51
导入相关库
import pandas as pd from pyecharts.charts import * from pyecharts import options as opts from pyecharts.commons.utils import JsCode
pandas读取数据
df = pd.read_csv('/home/mw/input/house4500/二手房数据-sz.csv', encoding='GBK') df['参考单价'] = df['参考单价'].str.replace('元/平米', '').astype('int') df['建筑面积'] = df['建筑面积'].str.replace('㎡', '').astype('float') df['参考总价'] = df['参考总价'].str.replace('万', '').astype('float') df['抵押信息'] = df['抵押信息'].str.extract('(.+?抵押)') df['所在楼层'] = df['所在楼层'].str.extract('(.+?楼层)') df['房屋朝向'] = df['房屋朝向'].str.extract('(.+?) ') df.head()
房源分布
统计深圳各区域/小区二手房数量;
df_t = df.groupby(['行政区', '区域', '小区名称'])['编号'].count().reset_index() data = [] area, county = [], [] for idx, row in df_t.iterrows(): if row['行政区'] in county: pass if row['区域'] in area: data[-1]['children'][-1]['children'].append(dict(name=row['小区名称'], value=row['编号'])) else: data[-1]['children'].append(dict(name=row['区域'], children=[dict(name=row['小区名称'], value=row['编号'])])) else: data.append(dict(name=row['行政区'], children=[dict(name=row['区域'], children=[dict(name=row['小区名称'], value=row['编号'])])])) area.append(row['区域']) county.append(row['行政区']) tree = TreeMap( init_opts=opts.InitOpts( theme='chalk', width='1000px', height='600px' )) tree.add( "房源分布", data, leaf_depth=2, label_opts=opts.LabelOpts(position="inside", formatter='{b}: {c}套'), levels=[ opts.TreeMapLevelsOpts( treemap_itemstyle_opts=opts.TreeMapItemStyleOpts( border_color="#555", border_width=4, gap_width=4 ) ), opts.TreeMapLevelsOpts( color_saturation=[0.3, 0.6], treemap_itemstyle_opts=opts.TreeMapItemStyleOpts( border_color_saturation=0.7, gap_width=2, border_width=2 ), ), opts.TreeMapLevelsOpts( color_saturation=[0.3, 0.5], treemap_itemstyle_opts=opts.TreeMapItemStyleOpts( border_color_saturation=0.6, gap_width=1 ), ), ], ) tree.set_global_opts( title_opts=opts.TitleOpts(title="深圳二手房房源分布", pos_left='center', pos_top='2%', title_textstyle_opts=opts.TextStyleOpts(color='#00BFFF', font_size=20)), legend_opts=opts.LegendOpts(is_show=False) ) tree.render_notebook()
各区域二手房数据总览
tool_js = """function (param) {return '<div style="border-bottom: 1px solid rgba(255,255,255,.3); font-size: 18px;padding-bottom: 7px;margin-bottom: 7px">' +param.data[3] + '-' +param.data[2]+'<br/>' + '</div>' +'平均面积: '+param.data[1]+' ㎡<br/>' +'平均单价: '+param.data[0]+' 元/㎡<br/>' +'房源数量: '+param.data[4]+' 套';}""" st = Scatter( init_opts=opts.InitOpts( theme='chalk', width='1000px', height='600px') ) st.set_global_opts( xaxis_opts=opts.AxisOpts(name='平均单价', type_="value", is_scale=True), yaxis_opts=opts.AxisOpts(is_scale=True, name='平均建筑面积', type_="value"), tooltip_opts=opts.TooltipOpts(formatter=JsCode(tool_js)), legend_opts=opts.LegendOpts(is_show=True, pos_right=10, pos_top='10%', orient='vertical'), visualmap_opts=opts.VisualMapOpts(is_show=True, type_='size', min_=0, max_=2500, pos_top='55%', pos_right='1%', orient='vertical', range_text=['房源数量\n', ''], range_size=[10, 50], textstyle_opts=opts.TextStyleOpts(color='#fff')), title_opts=opts.TitleOpts(title="各区域二手房数据总览", pos_left='center', title_textstyle_opts=opts.TextStyleOpts(color='#1E90FF')) ) df_t = df.groupby(['行政区', '区域']).agg( {'参考单价': 'mean', '建筑面积': 'mean', '编号': 'count'}).reset_index() for area in df_t['行政区'].unique(): data_x, data_y = [], [] d = df_t[df_t['行政区'] == area] for idx, row in d.iterrows(): data_x.append(int(row['参考单价'])) data_y.append([int(row['建筑面积']), row['区域'], area, row['编号']]) s = Scatter() s.add_xaxis(data_x) s.add_yaxis(area, data_y, label_opts=opts.LabelOpts(is_show=False), itemstyle_opts={'shadowBlur': 2, 'shadowColor': 'rgba(0, 0, 0, 0.9)', 'shadowOffsetY': 2, 'shadowOffsetX': 3, 'opacity': 0.9, 'borderColor': '#fff', 'boderWidth': 1} ) st.overlap(s) st.render_notebook()
房屋属性分布
def cate_stat(col): data_pair = [] else_v = 0 for k, v in dict(df[col].value_counts()).items(): if len(data_pair) < 5: data_pair.append([k, int(v)]) else: else_v += int(v) if else_v: data_pair.append(['其他', else_v]) return data_pair pie = Pie( init_opts=opts.InitOpts( theme='chalk', width='1000px', height='600px' ) ) pie.add( "", cate_stat('房屋朝向'), # 指定饼图中心位置 center=["20%", "35%"], # 将饼图尺寸相应缩小,不然饼图会重叠 radius=["10%", "15%"], label_opts=opts.LabelOpts(formatter='{b}\n{d}%') ) pie.add( "", cate_stat('建筑结构'), # 指定饼图中心位置 center=["50%", "35%"], # 将饼图尺寸相应缩小,不然饼图会重叠 radius=["10%", "15%"], label_opts=opts.LabelOpts(formatter='{b}\n{d}%') ) pie.add( "", cate_stat('装修情况'), # 指定饼图中心位置 center=["80%", "35%"], # 将饼图尺寸相应缩小,不然饼图会重叠 radius=["10%", "15%"], label_opts=opts.LabelOpts(formatter='{b}\n{d}%') ) pie.add( "", cate_stat('所在楼层'), # 指定饼图中心位置 center=["20%", "70%"], # 将饼图尺寸相应缩小,不然饼图会重叠 radius=["10%", "15%"], label_opts=opts.LabelOpts(formatter='{b}\n{d}%') ) pie.add( "", cate_stat('房屋年限'), # 指定饼图中心位置 center=["50%", "70%"], # 将饼图尺寸相应缩小,不然饼图会重叠 radius=["10%", "15%"], label_opts=opts.LabelOpts(formatter='{b}\n{d}%') ) pie.add( "", cate_stat('抵押信息'), # 指定饼图中心位置 center=["80%", "70%"], # 将饼图尺寸相应缩小,不然饼图会重叠 radius=["10%", "15%"], label_opts=opts.LabelOpts(formatter='{b}\n{d}%'), ) pie.set_series_opts( itemstyle_opts={'shadowBlur': 2, 'shadowColor': 'rgba(0, 0, 0, 0.5)', 'shadowOffsetY': 2, 'shadowOffsetX': 3, 'opacity': 1} ) pie.set_global_opts( legend_opts=opts.LegendOpts(is_show=False), title_opts=[ dict( text='房屋属性分布', left='2%', top='1%', textStyle=dict( color='#00BFFF', fontSize=20)), dict( text='朝向 ', left='20%', top='33%', textAlign='center', textStyle=dict( color='#fff', fontWeight='normal', fontSize=15)), dict( text='结构 ', left='50%', top='33%', textAlign='center', textStyle=dict( color='#fff', fontWeight='normal', fontSize=15)), dict( text='装修 ', left='80%', top='33%', textAlign='center', textStyle=dict( color='#fff', fontWeight='normal', fontSize=15)), dict( text='楼层 ', left='20%', top='68%', textAlign='center', textStyle=dict( color='#fff', fontWeight='normal', fontSize=15)), dict( text='年限 ', left='50%', top='68%', textAlign='center', textStyle=dict( color='#fff', fontWeight='normal', fontSize=15)), dict( text='抵押 ', left='80%', top='68%', textAlign='center', textStyle=dict( color='#fff', fontWeight='normal', fontSize=15)) ], graphic_opts=[ opts.GraphicGroup( graphic_item=opts.GraphicItem( id_='2', left="center", top="40px"), children=[ opts.GraphicRect( graphic_item=opts.GraphicItem( left="center", top="center", z=1 ), graphic_shape_opts=opts.GraphicShapeOpts( width=950, height=520 ), graphic_basicstyle_opts=opts.GraphicBasicStyleOpts( fill="rgba(0,0,0,0)", line_width=5, stroke="#fff", ), ) ], ) ] ) pie.render_notebook()
房价统计—行政区域
df_t = df.groupby(['行政区']).agg({'参考单价': 'mean'}).reset_index() data_pair=[] for idx, row in df_t.iterrows(): data_pair.append([row['行政区'], int(row['参考单价'])]) chart = Map( init_opts=opts.InitOpts( width='1000px', height='600px', theme='chalk', ) ) chart.add( "", data_pair=data_pair, maptype="深圳", is_map_symbol_show=False, label_opts=opts.LabelOpts(is_show=True) ) chart.set_global_opts( visualmap_opts=opts.VisualMapOpts( is_show=False, type_='color', min_=3e4, max_=8e4, range_color=[ '#313695', '#4575b4', '#74add1', '#abd9e9', '#e0f3f8', '#ffffbf', '#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026'] ), title_opts=opts.TitleOpts( title="深圳各行政区二手房房价统计", subtitle='单位:元/平方米', pos_left="center", pos_top='1%', title_textstyle_opts=opts.TextStyleOpts(color='#00BFFF', font_size=20) ), tooltip_opts=opts.TooltipOpts(is_show=True, formatter='{b}:{c}元/平方米'), legend_opts=opts.LegendOpts(is_show=False), ) chart.render_notebook()
房价统计—各区域
df_t = df.groupby(['区域']).agg( {'参考单价': 'mean', '经度': 'mean', '纬度': 'mean'}).reset_index() data_pair=[] for idx, row in df_t.iterrows(): data_pair.append([row['区域'], [row['经度'], row['纬度'], int(row['参考单价'])]]) chart = Map3D( init_opts=opts.InitOpts( width='1000px', height='600px', theme='chalk', ) ) # 引用添加的地图 chart.add_schema( maptype="深圳", ground_color='#999', shading="lambert", emphasis_label_opts=opts.LabelOpts(is_show=True), itemstyle_opts=opts.ItemStyleOpts( border_width=0.2, border_color="rgb(0,0,0)", ), light_opts=opts.Map3DLightOpts( main_shadow_quality='high', is_main_shadow=True, main_intensity=1, main_alpha=30, ambient_cubemap_texture='https://echarts.apache.org/examples/data-gl/asset/canyon.hdr', ambient_cubemap_diffuse_intensity=0.5, ambient_cubemap_specular_intensity=0.5 ), post_effect_opts=opts.Map3DPostEffectOpts( is_enable=True, is_ssao_enable=True, ssao_radius=1, ssao_intensity=1 ) ) chart.add( "", data_pair=data_pair, type_="bar3D", bar_size=1.5, min_height=3, shading="lambert", label_opts=opts.LabelOpts( is_show=False, formatter=JsCode( "function(data){return data.name + ': ' + data.value[2];}"), ) ) chart.set_global_opts( visualmap_opts=opts.VisualMapOpts( is_show=False, type_='color', dimension=2, min_=3e4, max_=1e5, range_color=[ '#313695', '#4575b4', '#74add1', '#abd9e9', '#e0f3f8', '#ffffbf', '#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026'] ), title_opts=opts.TitleOpts( title="深圳各区域二手房房价统计", subtitle='单位:元/平方米', pos_left="center", pos_top='1%', title_textstyle_opts=opts.TextStyleOpts(color='#00BFFF', font_size=20) ), tooltip_opts=opts.TooltipOpts(is_show=False), legend_opts=opts.LegendOpts(is_show=False), ) chart.render_notebook()
BMap热力图
Bmap需要前往百度地区申请app key,各位可以Fork后替换成自己的ak;可视化代码已注释掉~
# from pyecharts.globals import BMapType, ChartType # data = [] # for idx, row in df.iterrows(): # data.append([row['房协编码'], [row['经度'], row['纬度'], 1]]) # bmp = BMap(init_opts=opts.InitOpts(width="1000px", height="800px")) # bmp.add_schema( # # 替换自己的AK # baidu_ak="AK", # center=[114.058196, 22.650279], # zoom=12, # is_roam=True, # ) # bmp.add( # series_name="", # type_='heatmap', # data_pair=data, # is_large=True, # blur_size=5, # point_size=1, # ) # bmp.add_control_panel( # copyright_control_opts=opts.BMapCopyrightTypeOpts(position=3), # maptype_control_opts=opts.BMapTypeControlOpts( # type_=BMapType.MAPTYPE_CONTROL_DROPDOWN # ), # scale_control_opts=opts.BMapScaleControlOpts(), # overview_map_opts=opts.BMapOverviewMapControlOpts(is_open=True), # navigation_control_opts=opts.BMapNavigationControlOpts(), # geo_location_control_opts=opts.BMapGeoLocationControlOpts(), # ) # bmp.set_global_opts( # visualmap_opts=opts.VisualMapOpts( # is_show=False, # max_=1000, # series_index=0, # range_color=['blue', 'blue', 'green', 'yellow', 'red'] # ), # ) # bmp.render_notebook()