词云(wordcloud)也叫文字云 是对文本中出现频率较高的关键词数据给予视觉差异化的展现方式,词云图突出展示高频高质的信息,也能过滤大部分低频的文本,利用词云,可以通过可视化形式凸显数据所体现的主旨,快速显示数据中各种文本信息的频率
from sklearn.datasets import load_iris from sklearn.datasets import load_boston import pandas as pd import pylab import matplotlib; matplotlib.use('TkAgg') import matplotlib.pyplot as plt from wordcloud import WordCloud from imageio import imread import numpy as np f=open(r'zhangsan.txt').read() bgpic=imread(r'C:\Users\Admin\Desktop\test.jpg') wdcd=WordCloud(mask=bgpic,background_color="white",scale=1.5) wdcd=wdcd.generate(f) plt.imshow(wdcd) #wordcloud=WordCloud(background_color="white",width=1000,height=860,margin=2).generate(f) #plt.imshow(wordcloud) plt.axis("off") plt.show() wdcd.to_file('pic.jpg') pylab.show() #wordcloud.to_file('1.png')
同样可以自行设置过滤的词
效果如下
代码如下 测试文件可以自己编写 输入想要的字符
from sklearn.datasets import load_iris from sklearn.datasets import load_boston import pandas as pd import pylab import matplotlib; matplotlib.use('TkAgg') import matplotlib.pyplot as plt from wordcloud import WordCloud from imageio import imread import numpy as np f=open(r'zhangsan.txt').read() bgpic=imread(r'C:\Users\Admin\Desktop\test.jpg') wdcd=WordCloud(mask=bgpic,background_color="white",scale=1.5) wdcd=wdcd.generate(f) plt.imshow(wdcd) #wordcloud=WordCloud(background_color="white",width=1000,height=860,margin=2).generate(f) #plt.imshow(wordcloud) plt.axis("off") plt.show() wdcd.to_file('pic.jpg') pylab.show() #wordcloud.to_file('1.png')