# 商店客流量数据可视化

## 实验要求：

### 绘制所有便利店的10月的客流量折线图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = data_total.iloc[data_total.index.month == 10]
data_id = data.groupby('shop_id')
for key in data_id.groups.keys():
data_id.get_group(key).plot(y=['pay_num'], title='customer flow of shop '+str(key))
plt.show()


### 绘制每类商家10月份的日平均客流量折线图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = data_total.iloc[data_total.index.month == 10]
data_id = data.groupby('cate_2_name')
for keys in data_id.groups.keys():
data_id.get_group(keys).groupby(data_id.get_group(keys).index.day).mean().plot(y=['pay_num'], kind='line', title=keys)
plt.show()


### 选择一个商家，统计每月的总客流量，绘制柱状图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data_14 = data_total[data_total['shop_id'] == 14]
data_14_id = data_14.groupby(data_14.index.month).sum()
data_14_id.plot(kind='bar', y=['pay_num'], title='total custom of shop-14')
plt.xlabel('month')
plt.show()


### 选择一个商家，统计某个月中，周一到周日的每天平均客流量，并绘制柱状图。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
data_14 = data_total[(data_total['shop_id'] == 14) & (data_total.index.month == 1)]
data_14_id = data_14.groupby(data_14.index.strftime('%w'))
data_14_id.mean().plot(y=['pay_num'], kind='bar', title='Average custom of shop 14 in January')
plt.xlabel('day')
plt.show()


### 选择一个商家，绘制客流量直方图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data_14 = data_total[data_total['shop_id'] == 14]
data_14.plot(kind='hist', y=['pay_num'], title='shop-14-block')
plt.show()


### 选择一个商家，绘制客流量密度图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data_14 = data_total[data_total['shop_id'] == 14]
data_14.plot(kind='kde', y=['pay_num'], title='shop-14-density')
plt.show()


### 统计某个月各个类别商店总客流量占该月总客流量的比例，绘制饼图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data_month1 = data_total[data_total.index.month == 1]
data_month1_rate = data_month1.groupby('cate_2_name').sum() / data_month1['pay_num'].sum()
data_month1_rate['pay_num'].plot(kind='pie', autopct='%.2f')
plt.ylabel('')
plt.title('January')
plt.show()


# 皮马印第安人糖尿病数据可视化

## 数据来源：http://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes。“pima.csv”数据前9个字段的含义：

(1)Number of times pregnant

(2)Plasma glucose concentration a 2 hours in an oral glucosetolerancetest

(3)Diastolic blood pressure (mm Hg)

(4)Triceps skin fold thickness (mm)

(5)2-Hour serum insulin (mu U/ml)

(6)Body mass index (weight in kg/(height in m)^2)

(7)Diabetes pedigree function

(8)Age (years)

(9)Class variable (0 or 1)

## 实验要求：

### 任选两个字段绘制散点图。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
close_px_all.columns = ['Number of times pregnant',
'Plasma glucose concentration a 2 hours in an oral glucosetolerancetest',
'Diastolic blood pressure (mm Hg)', 'Triceps skin fold thickness (mm)',
'2-Hour serum insulin (mu U/ml)', 'Body mass index', 'Diabetes pedigree function',
'Age (years)', 'Class variable']
# # 任选两个字段绘制散点图
pregnant_age = close_px_all[['Number of times pregnant', 'Age (years)', 'Class variable']]
ax = pregnant_age[pregnant_age['Class variable'] == 0].plot(kind='scatter', y='Number of times pregnant', c='red',
x='Age (years)', title='Number of times pregnant-Age',
ax=None)
pregnant_age[pregnant_age['Class variable'] == 1].plot(kind='scatter', y='Number of times pregnant', c='blue',
x='Age (years)', title='Number of times pregnant-Age', ax=ax)
plt.show()


### 使用全部或者部分特征绘制散布图。

【代码】

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
close_px_all.columns = ['Number of times pregnant',
'Plasma glucose concentration a 2 hours in an oral glucosetolerancetest',
'Diastolic blood pressure (mm Hg)', 'Triceps skin fold thickness (mm)',
'2-Hour serum insulin (mu U/ml)', 'Body mass index', 'Diabetes pedigree function',
'Age (years)', 'Class variable']
# 使用全部或者部分特征绘制散布图
color = {1: 'red', 0: 'blue'}
pd.plotting.scatter_matrix(close_px_all.iloc[:, [0, 3, 4]], figsize=(9, 9), diagonal='kde', s=40, alpha=0.6,
c=close_px_all['Class variable'].apply(lambda x: color[x]))
plt.show()


### 绘制调和曲线图。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
close_px_all.columns = ['Number of times pregnant',
'Plasma glucose concentration a 2 hours in an oral glucosetolerancetest',
'Diastolic blood pressure (mm Hg)', 'Triceps skin fold thickness (mm)',
'2-Hour serum insulin (mu U/ml)', 'Body mass index', 'Diabetes pedigree function',
'Age (years)', 'Class variable']
# 绘制调和曲线图
pd.plotting.andrews_curves(close_px_all, 'Class variable', color=['red', 'blue'])
plt.show()


