直方图内显示折线图分布
import matplotlib.mlab as mlab import matplotlib.pyplot as plt mpl.rcParams['font.sans-serif']=['SimHei'] #显示中文 plt.rcParams['font.sans-serif'] = 'KaiTi' # 设置全局字体为中文 楷体 plt.rcParams['axes.unicode_minus']=False #正常显示负号 plt.figure(figsize=(17,8),dpi=120) import numpy as np from scipy.stats import norm np.random.seed(10680801) mu=100 sigma=15 x=mu+sigma*np.random.randn(500) num_bins=60 fig,ax=plt.subplots() #fig,ax=plt.subplots(ncols=2) #ax1 = ax[0] #ax2 = ax[1] n,bins,patches=ax.hist(x,num_bins,density=True) y=norm.pdf(bins,mu,sigma) ax.plot(bins,y,'--') ax.set_xlabel('IQ') ax.set_ylabel('概率密度') ax.set_title(r'智商分布情况直方图') fig.tight_layout()
堆叠面积直方图
import numpy as np import pandas as pd from matplotlib import pyplot as plt crime=pd.read_csv(r"http://datasets.flowingdata.com/crimeRatesByState2005.csv") fig,ax=plt.subplots() ax.hist(crime["robbery"],bins=12,histtype="bar",alpha=0.6,label="robbery",stacked=True) ax.hist(crime["aggravated_assault"],bins=12,histtype="bar",alpha=0.6,label="aggravated_assault",stacked=True) ax.legend() ax.set_xticks(np.arange(0,721,60)) ax.set_xlim(0,720) ax.set_yticks(np.arange(0,21,4)) plt.show()
在不同的子图中绘制各种类犯罪数据的数值分布
import numpy as np import pandas as pd from matplotlib import pyplot as plt crime=pd.read_csv(r"http://datasets.flowingdata.com/crimeRatesByState2005.csv") crime = crime.query("state!='United States'").query("state!='District of Columbia'") plt.figure(figsize=(10,5),dpi=120) nrows=2 ncols=4 n = np.arange(nrows*ncols)+1 for i in n: ax = plt.subplot(nrows,ncols,i) ax.hist(crime.iloc[:,i]) ax.set_title(crime.columns[i]) plt.suptitle("各种类犯罪数据的数值分布",y=1.02) plt.tight_layout()
其他案例
乘客年龄分布频数直方图
# 导入第三方库 import pandas as pd import matplotlib.pyplot as plt # 设置中文 plt.rcParams['font.sans-serif'] = ['SimHei'] # 创建图形 plt.figure(figsize=(20,8),dpi=80) # 准备数据(读取Titanic数据集) titanic = pd.read_csv(r'E:\PythonData\exercise_data\train.csv') # 检查年龄是否有缺失 any(titanic.Age.isnull()) # 删除含有缺失年龄的观察 titanic.dropna(subset=['Age'], inplace=True) # 绘图:乘客年龄的频数直方图 plt.hist(titanic.Age, # 绘图数据 bins = 20, # 指定直方图的条形数为20个 color = 'steelblue', # 指定填充色 edgecolor = 'k', # 设置直方图边界颜色 label = '直方图' )# 为直方图呈现标签 # 刻度设置 plt.xticks(fontsize=15) plt.yticks(fontsize=15) # 添加描述信息 plt.xlabel('年龄:岁',fontsize=20) plt.ylabel('人数:个',fontsize=20) plt.title('乘客年龄分布',fontsize=20) # 显示图形 plt.show()
男女乘客直方图(二维数据)
设置了组距和其他的参数
# 导入库 import matplotlib.pyplot as plt import numpy as np # 设置字体 plt.rcParams['font.sans-serif'] = ['SimHei'] # 创建图形 plt.figure(figsize=(20,8),dpi=80) # 提取不同性别的年龄数据 age_female = titanic.Age[titanic.Sex == 'female'] age_male = titanic.Age[titanic.Sex == 'male'] # 设置直方图的组距 bins = np.arange(titanic.Age.min(), titanic.Age.max(), 2) # 男性乘客年龄直方图 plt.hist(age_male, bins = bins, label = '男性',edgecolor = 'k', color = 'steelblue', alpha = 0.7) # 女性乘客年龄直方图 plt.hist(age_female, bins = bins, label = '女性',edgecolor = 'k', alpha = 0.6,color='r') # 调整刻度 plt.xticks(fontsize=15) plt.yticks(fontsize=15) # 设置坐标轴标签和标题 plt.title('男女乘客年龄直方图',fontsize=20) plt.xlabel('年龄',fontsize=20) plt.ylabel('人数',fontsize=20) # 去除图形顶部边界和右边界的刻度 plt.tick_params(top='off', right='off') # 显示图例 plt.legend(loc='best',fontsize=20) # 显示图形 plt.show()
电影时长分布直方图
# 导入库 import matplotlib.pyplot as plt # 设置字体 plt.rcParams['font.sans-serif'] = ['SimHei'] # 创建图形 plt.figure(figsize=(20,8),dpi=80) # 准备数据 time=[131,98,125,131,124,139,131,117,128,108,135,138,131,102,107,114,119,128,121,142,127,130,124,101,110,116,117,110,128,128,115,99,136,126, 134,95,138,117,111,78,132,124,113,150,110,117,86,95,144,105,126,130,126,130,126,116,123,106,112,138,123,86,101,99,136,123,117,119,105, 137,123,128,125,104,109,134,125,127,105,120,107,129,116,108,132,103,136,118,102,120,114,105,115,132,145,119,121,112,139,125,138,109, 132,134,156,106,117,127,144,139,139,119,140,83,110,102,123,107,143,115,136,118,139,123,112,118,125,109,119,133,112,114,122,109,106, 123,116,131,127,115,118,112,135,115,146,137,116,103,144,83,123,111,110,111, 100,154,136,100,118,119,133,134,106,129,126,110,111,109, 141,120,117,106,149,122,122,110,118,127,121,114,125,126,114,140,103,130,141,117,106,114,121,114,133,137,92,121,112,146,97,137,105,98, 117,112,81,97,139,113,134,106,144,110,137,137,111,104,117,100,111,101,110,105,129,137,112,120,113,133,112,83,94,146, 133,101,131,116, 111, 84,137,115,122,106,144,109,123,116,111,111,133,150] # 设置组距 bins=2 groups = int((max(time)-min(time))/bins) # 绘制直方图 plt.hist(time,groups,color='b', edgecolor = 'k', density = True) # 指定直方从图的边界色) # 调整刻度 plt.xticks(list(range(min(time),max(time)))[::2],fontsize=15) plt.yticks(fontsize=15) # 添加描述信息 plt.xlabel('电影时长:分钟',fontsize=20) plt.ylabel('电影数量占比',fontsize=20) # 增加网格 plt.grid(True,linestyle='--',alpha=1) # 添加标题 plt.title('电影时长分布直方图',fontsize=20) plt.show()