关于从网页获取文本
import requests from bs4 import BeautifulSoup code = requests.request("post","url").content.decode("utf-8") soup = BeautifulSoup(code, "lxml") text = soup.findAll("div",attrs={"class":"RichText"})[0].text text = str(text).replace("\u3000"," ") text
关于绘制词云图
import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 正常显示负号 # 分词 words = ' '.join(jieba.cut(text)) # 创建词云 wordcloud = WordCloud(font_path="AlimamaFangYuanTiVF-Thin.ttf",width=1000,height=700,background_color="pink").generate(words) # 显示词云 plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show()