包含术语“生物信息学”的推文示例
第1步:加载所需的软件包
# 加载包 library(twitteR) library(igraph) library(stringr)
第2步:收集关于“生物信息学”的推文
# tweets dm_tweets = searchTwitter("bioinformatics", n=500,) #获取推文 dm_txt = sapply(dm_tweets, function(x) x$getText())
第3步:识别转发
# 正则表达式获取推文 grep("(RT|via)((?:\\b\\W*@\\w+)+)", dm_tweets, ignore.case=TRUE, value=TRUE) rt_patterns = grep("(RT|via)((?:\\b\\W*@\\w+)+)", dm_txt, ignore.case=TRUE) dm_txt[rt_patterns]
第4步:收集谁转发和谁发布
我们将使用这些结果来形成边缘列表以创建图形
# 创建列表存储信息 who_retweet = as.list(1:length(rt_patterns)) who_post = as.list(1:length(rt_patterns)) # 循环 for (i in 1:length(rt_patterns)) { # 获取推文和转发 twit = dm_tweets[[rt_patterns[i]]] poster = str_extract_all(twit$getText(), "(RT|via)((?:\\b\\W*@\\w+)+)") #删除 ':' poster = gsub(":", "", unlist(poster)) who_post[[i]] = gsub("(RT @|via @)", "", poster, ignore.case=TRUE) # 转发用户 who_retweet[[i]] = rep(twit$getScreenName(), length(poster)) } # unlist who_post = unlist(who_post) who_retweet = unlist(who_retweet)
第5步:从编辑清单创建图形
# 边 retweeter_poster = cbind(who_retweet, who_post) # 绘制图像 rt_graph = graph.edgelist(retweeter_poster) # 得到点的名称 ver_labs = get.vertex.attribute(rt_graph, "name", index=V(rt_graph))
第6步:让我们绘制图
# 绘图布局 glay = layout.fruchterman.reingold(rt_graph) # 绘制 par(bg="gray15", mar=c(1,1,1,1)) plot(rt_graph, layout=glay, vertex.color="gray25", vertex.size=10, vertex.label=ver_labs, vertex.label.family="sans", vertex.shape="none", vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5), vertex.label.cex=0.85, edge.arrow.size=0.8, edge.arrow.width=0.5, edge.width=3, edge.color=hsv(h=.95, s=1, v=.7, alpha=0.5)) # 添加标题 title("\nTweets with 'bioinformatics': Who retweets whom", cex.main=1, col.main="gray95")
第7步:让我们试着给它一个更生物信息学的外观
# par(bg="gray15", mar=c(1,1,1,1)) plot(rt_graph, layout=glay, vertex.color=hsv(h=.35, s=1, v=.7, alpha=0.1), vertex.frame.color=hsv(h=.35, s=1, v=.7, alpha=0.1), vertex.size=5, vertex.label=ver_labs, vertex.label.family="mono", vertex.label.color=hsv(h=0, s=0, v=.95, alpha=0.5), vertex.label.cex=0.85, edge.arrow.size=0.8, edge.arrow.width=0.5, edge.width=3, edge.color=hsv(h=.35, s=1, v=.7, alpha=0.4)) # 添加标题 title("\nTweets with 'bioinformatics': Who retweets whom", cex.main=1, col.main="gray95", family="mono")