1.代码:
算法的介绍和原理就不多阐述了,链接放在这里:
import numpy as np import seaborn as sns import pandas as pd from matplotlib import pyplot as plt from mlxtend.frequent_patterns import apriori from mlxtend.frequent_patterns import association_rules # input df_Retails = pd.read_excel("关联.xlsx") df_Retails.head() # process df_Retails = df_Retails.set_index("记录") # output print(df_Retails.shape) print(df_Retails.columns) print(df_Retails.describe()) # 统计频繁项集(支持度筛选) df_Frequent_Itemsets = apriori(df_Retails,min_support=0.07,use_colnames=True) print(df_Frequent_Itemsets) # 计算关联规则(提升度筛选) df_AssociationRules = association_rules(df_Frequent_Itemsets ,metric="lift" ,min_threshold=1) print(df_AssociationRules) # 筛选关联规则(提升度和置信度筛选) df_A=df_AssociationRules[(df_AssociationRules["lift"]>=1.4)&(df_AssociationRules[ "confidence"]>=0.35)] print(df_A) #处理中文乱码 plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 画散点图 # 创建一个新的图形,并设置背景色为浅棕色 fig = plt.figure(facecolor='#F5DEB3') ax = fig.add_subplot(111) ax.set_facecolor('#F5DEB3') plt.title("散点图") plt.scatter(df_AssociationRules["support"], df_AssociationRules["confidence"], s=[lift ** 13 for lift in df_AssociationRules["lift"]], c="#866D0D") # 这里数据可能不太好,所以为了效果明显,所以扩大为了原来的13次方 plt.xlabel('support') plt.ylabel('confidence') plt.title('散点图') plt.show()
2.效果: