R公交地铁路线网络图实战

简介: R公交地铁路线网络图实战

对于庞大的公交地铁路线信息的数据挖掘,一般软件遇到的问题主要有两点:1.对于文本信息的挖掘,特别是中文词汇的挖掘,缺乏成熟的工具或者软件包,2.对于大数据量,一般软件的读取和处理会遇到问题。即使一个月的部分区域路线信息也会达到几百m以上,因此,对于这类数据,无论从算法运行还是数据读取来说普通的SQL语言或者matlab软件处理起来都乏善可陈。对于这类数据,我们一般用r软件可以轻松实现读取,数据挖掘以及可视化的过程。

例如对于下面这样的车站数据:

和近600M的进出站信息的数据, 如果要实现每隔一段时间的对应路线的进出站人数整理以及可视化的过程,我们可以进行一下的步骤进行分析:

首先我们进行数据的读取和预处理




install.packages("dplyr")
library("dplyr")#读取dplyr包用以排序
###对数据读取
data=read.table("E:\\201501一卡通进出站.txt",stringsAsFactors=F)
##对数据列进行命名
colnames(data)=c("逻辑卡号",
"交易日期" ,
"交易时间",
"票种",
"交易代码",
"交易车站",
"上次交易车站")
###对数据进行预处理
for( ii in 20150101:20150131){#每天的数据
data1=data[which(data[,2]==ii),]#筛选出日期为20150101这天的数据
data2=data1[,c(2,3,6,7)]#筛选出"交易日期" ,"交易时间", "交易车站","上次交易车站"的数据
data2#查看数据
data2=data2[order(data2$交易车站),]
line1=data2[substr(data2$交易车站,1,1)=="1",]#1号线
line2=data2[substr(data2$交易车站,1,1)=="2",]#2号线
###筛选出车站为243
bus=unique(data2[,3])####################每个站的数据
for(busi in 1:length(bus)){
index=which(data2[,3]==bus[busi])#筛选出车站为243的数据行号
data3=data2[index,]#获取交易车站为243的数据
###data3=data2[order(data2$交易车站),]#如果不筛选车站,直接按交易车站递增排序
data4=arrange(data3,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
###按每十分钟时间分割
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
datat=data4[index,]
outnum=length(which(datat[,4]!=0))
innum=length(which(datat[,4]==0))
if(i!=6)cat(file=paste("E:\\",bus[busi],"车站",ii,"日一卡通进出站时间.txt"),append=TRUE,ii,"日",time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file=paste("E:\\",bus[busi],"车站",ii,"日一卡通进出站时间.txt"),append=TRUE,ii,"日",time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
#cat(file="E:\\243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
#筛选出出站人数
dataout=data3[which(data3[,4]!=0),]#上次交易车站不为0,为出站人数
datain=data3[which(data3[,4]==0),]
###将数据进行输出
write.table(data4,paste("E:\\",ii,"日 ",bus[busi],"车站一卡通进出站整理.txt"))#将数据整理好输出到指定的目录文件名
}
}
####################################################################################3
################1,2号线##########
data2=data2[order(data2$交易车站),]
line1=data2[substr(data2$交易车站,1,1)=="1",]#1号线
line2=data2[substr(data2$交易车站,1,1)=="2",]#2号线
#########1号线
data4=arrange(line1,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
###按每十分钟时间分割
cat(file="E:\\1号线一卡通进出站时间.txt",append=TRUE, " 点", " 分"," 出站人数", " ","进站人数 " ,"\n")
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
datat=data4[index,]
outnum=length(which(datat[,4]!=0))
innum=length(which(datat[,4]==0))
if(i!=6)cat(file="E:\\1号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file="E:\\1号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n") #
#cat(file="E:\\20150101日243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
#筛选出出站人数
dataout=data3[which(data3[,4]!=0),]#上次交易车站不为0,为出站人数
datain=data3[which(data3[,4]==0),]
numout=dim(dataout)[1]#出站人数总和
numin=dim(datain)[1]#进站人数总和
###将数据进行输出
write.table(data4,"E:\\1号线一卡通进出站整理.txt")#将数据整理好输出到指定的目录文件名
########2号线
data4=arrange(line2,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
###按每十分钟时间分割
cat(file="E:\\2号线一卡通进出站时间.txt",append=TRUE, " 点", " 分"," 出站人数", " ","进站人数 " ,"\n")
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
datat=data4[index,]
outnum=length(which(datat[,4]!=0))
innum=length(which(datat[,4]==0))
if(i!=6)cat(file="E:\\2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file="E:\\2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 ", " ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n") #
#cat(file="E:\\TB related\\Service\\temp\\20150101日243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
#筛选出出站人数
dataout=data3[which(data3[,4]!=0),]#上次交易车站不为0,为出站人数
datain=data3[which(data3[,4]==0),]
###将数据进行输出
write.table(data4,"E:\\2号线一卡通进出站整理.txt")#将数据整理好输出到指定的目录文件名
#########1,2总和
data4=arrange(line1,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
data44=arrange(line2,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
cat(file="E:\\1,2号线一卡通进出站时间.txt",append=TRUE, " 点", " 分"," 出站人数", " ","进站人数 " ,"\n")
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
index2=intersect(which(data44[,2]>time*10000+(i-1)*1000),which(data44[,2]<=time*10000+1000*i))
datat=data4[index,]
datat1=data44[index2,]
outnum=length(which(datat[,4]!=0))
outnum1=length(which(datat1[,4]!=0))
innum=length(which(datat[,4]==0))
innum1=length(which(datat1[,4]==0))
if(i!=6)cat(file="E:\\1,2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum+outnum1," "," ",innum+innum1,"\n")#cat(time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file="E:\\1,2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 ", " ",outnum+outnum1," "," ",innum+innum1,"\n")#cat(time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n") #
#cat(file="E:\\20150101日243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
}


install.packages("dplyr")
library("dplyr")#读取dplyr包用以排序
###对数据读取
data=read.table("E:\\201501一卡通进出站.txt",stringsAsFactors=F)
##对数据列进行命名
colnames(data)=c("逻辑卡号",
"交易日期" ,
"交易时间",
"票种",
"交易代码",
"交易车站",
"上次交易车站")
###对数据进行预处理
for( ii in 20150101:20150131){#每天的数据
data1=data[which(data[,2]==ii),]#筛选出日期为20150101这天的数据
data2=data1[,c(2,3,6,7)]#筛选出"交易日期" ,"交易时间", "交易车站","上次交易车站"的数据
data2#查看数据
data2=data2[order(data2$交易车站),]
line1=data2[substr(data2$交易车站,1,1)=="1",]#1号线
line2=data2[substr(data2$交易车站,1,1)=="2",]#2号线
###筛选出车站为243
bus=unique(data2[,3])####################每个站的数据
for(busi in 1:length(bus)){
index=which(data2[,3]==bus[busi])#筛选出车站为243的数据行号
data3=data2[index,]#获取交易车站为243的数据
###data3=data2[order(data2$交易车站),]#如果不筛选车站,直接按交易车站递增排序
data4=arrange(data3,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
###按每十分钟时间分割
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
datat=data4[index,]
outnum=length(which(datat[,4]!=0))
innum=length(which(datat[,4]==0))
if(i!=6)cat(file=paste("E:\\",bus[busi],"车站",ii,"日一卡通进出站时间.txt"),append=TRUE,ii,"日",time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file=paste("E:\\",bus[busi],"车站",ii,"日一卡通进出站时间.txt"),append=TRUE,ii,"日",time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
#cat(file="E:\\243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
#筛选出出站人数
dataout=data3[which(data3[,4]!=0),]#上次交易车站不为0,为出站人数
datain=data3[which(data3[,4]==0),]
###将数据进行输出
write.table(data4,paste("E:\\",ii,"日 ",bus[busi],"车站一卡通进出站整理.txt"))#将数据整理好输出到指定的目录文件名
}
}
####################################################################################3
################1,2号线##########
data2=data2[order(data2$交易车站),]
line1=data2[substr(data2$交易车站,1,1)=="1",]#1号线
line2=data2[substr(data2$交易车站,1,1)=="2",]#2号线
#########1号线
data4=arrange(line1,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
###按每十分钟时间分割
cat(file="E:\\1号线一卡通进出站时间.txt",append=TRUE, " 点", " 分"," 出站人数", " ","进站人数 " ,"\n")
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
datat=data4[index,]
outnum=length(which(datat[,4]!=0))
innum=length(which(datat[,4]==0))
if(i!=6)cat(file="E:\\1号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file="E:\\1号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n") #
#cat(file="E:\\20150101日243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
#筛选出出站人数
dataout=data3[which(data3[,4]!=0),]#上次交易车站不为0,为出站人数
datain=data3[which(data3[,4]==0),]
numout=dim(dataout)[1]#出站人数总和
numin=dim(datain)[1]#进站人数总和
###将数据进行输出
write.table(data4,"E:\\1号线一卡通进出站整理.txt")#将数据整理好输出到指定的目录文件名
########2号线
data4=arrange(line2,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
###按每十分钟时间分割
cat(file="E:\\2号线一卡通进出站时间.txt",append=TRUE, " 点", " 分"," 出站人数", " ","进站人数 " ,"\n")
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
datat=data4[index,]
outnum=length(which(datat[,4]!=0))
innum=length(which(datat[,4]==0))
if(i!=6)cat(file="E:\\2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file="E:\\2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 ", " ",outnum," "," ",innum,"\n")#cat(time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n") #
#cat(file="E:\\TB related\\Service\\temp\\20150101日243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
#筛选出出站人数
dataout=data3[which(data3[,4]!=0),]#上次交易车站不为0,为出站人数
datain=data3[which(data3[,4]==0),]
###将数据进行输出
write.table(data4,"E:\\2号线一卡通进出站整理.txt")#将数据整理好输出到指定的目录文件名
#########1,2总和
data4=arrange(line1,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
data44=arrange(line2,交易日期,交易时间)#对时间排序,先按年份递增排序,然后按照时间递增排序
cat(file="E:\\1,2号线一卡通进出站时间.txt",append=TRUE, " 点", " 分"," 出站人数", " ","进站人数 " ,"\n")
for (time in 6:21){
for(i in 1:6){
index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))
index2=intersect(which(data44[,2]>time*10000+(i-1)*1000),which(data44[,2]<=time*10000+1000*i))
datat=data4[index,]
datat1=data44[index2,]
outnum=length(which(datat[,4]!=0))
outnum1=length(which(datat1[,4]!=0))
innum=length(which(datat[,4]==0))
innum1=length(which(datat1[,4]==0))
if(i!=6)cat(file="E:\\1,2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 "," ",outnum+outnum1," "," ",innum+innum1,"\n")#cat(time,"点",i-1,"0分到",i,"0分的出站人数为",outnum," ","进站人数为",innum,"\n")
else cat(file="E:\\1,2号线一卡通进出站时间.txt",append=TRUE,time," ",i-1,"0 ", " ",outnum+outnum1," "," ",innum+innum1,"\n")#cat(time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n") #
#cat(file="E:\\20150101日243车站一卡通进出站时间.txt",append=TRUE,time,"点",i-1,"0分到",time+1,"点0分的出站人数为",outnum," ","进站人数为",innum,"\n")
}
}
}

通过以上过程,我们可以将整理后的数据输出到对应的文件中:

以及交通路线的可视化过程;

对于交通路线的网络图来说,r中igraph包的确是实现利器:



#读取数据
ljhdat1=readLines("E:/ shanghai_1.txt" )
ljhdat2=readLines("E:/ shanghai_2.txt")
ljhdat3=readLines("E:/ shanghai_3.txt")
ljhdat4=readLines("E:/ shanghai_4.txt")
ljhdat5=readLines("E:/ shanghai_5.txt")
bus=""#建立巴士信息库
for(i in 1:length(ljhdat1)){
if(ljhdat1[i]=="")bus=c(bus,ljhdat1[i-1])#提取每个巴士的路线信息
}
for(i in 1:length(ljhdat2)){
if(ljhdat2[i]=="")bus=c(bus,ljhdat2[i-1])#提取每个巴士的路线信息
}
for(i in 1:length(ljhdat3)){
if(ljhdat3[i]=="")bus=c(bus,ljhdat3[i-1])#提取每个巴士的路线信息
}
for(i in 1:length(ljhdat4)){
if(ljhdat4[i]=="")bus=c(bus,ljhdat4[i-1])#提取每个巴士的路线信息
}
for(i in 1:length(ljhdat5)){
if(ljhdat5[i]=="")bus=c(bus,ljhdat5[i-1])#提取每个巴士的路线信息
}
bus;
bus=bus[-1]
route=list(0)#建立路线信息
#######################分割路线得到站点信息 #################################
route[[1]]=unlist(strsplit(bus[1],split=" "))[-1]
route[[1]]=route[[1]][-which(route[[1]]=="#")]#删除#号
n=length(route[[1]])
library(igraph)
d = data.frame(route[[1]][1:n-1] ,route[[1]][2:n ]#建立邻接矩阵
)
g = graph.data.frame(d, directed = TRUE)
plot(g )
################################分割所有路线得到站点信息###########################
library(igraph)
route1=character(0);

对于最后生成的网络图由于路线众多,在查看的过程中可以通过设置可视化参数来进一步优化。

相关文章
|
2月前
|
机器学习/深度学习 PyTorch TensorFlow
卷积神经网络深度解析:从基础原理到实战应用的完整指南
蒋星熠Jaxonic,深度学习探索者。深耕TensorFlow与PyTorch,分享框架对比、性能优化与实战经验,助力技术进阶。
|
2月前
|
监控 Linux 测试技术
C++零拷贝网络编程实战:从理论到生产环境的性能优化之路
🌟 蒋星熠Jaxonic,技术宇宙中的星际旅人。深耕C++与零拷贝网络编程,从sendfile到DPDK,实战优化服务器性能,毫秒级响应、CPU降60%。分享架构思维,共探代码星辰大海!
|
6月前
|
运维 Kubernetes 前端开发
如何用 eBPF 实现 Kubernetes 网络可观测性?实战指南
本文深入探讨了Kubernetes网络观测的挑战与eBPF技术的应用。首先分析了传统工具在数据碎片化、上下文缺失和性能瓶颈上的局限性,接着阐述了eBPF通过零拷贝观测、全链路关联和动态过滤等特性带来的优势。文章进一步解析了eBPF观测架构的设计与实现,包括关键数据结构、内核探针部署及生产环境拓扑。实战部分展示了如何构建全栈观测系统,并结合NetworkPolicy验证、跨节点流量分析等高级场景,提供具体代码示例。最后,通过典型案例分析和性能数据对比,验证了eBPF方案的有效性,并展望了未来演进方向,如智能诊断系统与Wasm集成。
230 1
|
2月前
|
机器学习/深度学习 数据采集 人工智能
深度学习实战指南:从神经网络基础到模型优化的完整攻略
🌟 蒋星熠Jaxonic,AI探索者。深耕深度学习,从神经网络到Transformer,用代码践行智能革命。分享实战经验,助你构建CV、NLP模型,共赴二进制星辰大海。
|
3月前
|
机器学习/深度学习 人工智能 算法
卷积神经网络深度解析:从基础原理到实战应用的完整指南
蒋星熠Jaxonic带你深入卷积神经网络(CNN)核心技术,从生物启发到数学原理,详解ResNet、注意力机制与模型优化,探索视觉智能的演进之路。
401 11
|
5月前
|
机器学习/深度学习 人工智能 PyTorch
零基础入门CNN:聚AI卷积神经网络核心原理与工业级实战指南
卷积神经网络(CNN)通过局部感知和权值共享两大特性,成为计算机视觉的核心技术。本文详解CNN的卷积操作、架构设计、超参数调优及感受野计算,结合代码示例展示其在图像分类、目标检测等领域的应用价值。
287 7
|
6月前
|
存储 监控 网络协议
HarmonyOS NEXT实战:网络状态监控
本教程介绍如何在HarmonyOS Next中使用@ohos.net.connection模块实现网络状态监控,并通过AppStorage进行状态管理,适用于教育场景下的网络检测功能开发。
202 2
|
6月前
|
机器学习/深度学习 移动开发 供应链
基于时间图神经网络多的产品需求预测:跨序列依赖性建模实战指南
本文展示了如何通过学习稀疏影响图、应用图卷积融合邻居节点信息,并结合时间卷积捕获演化模式的完整技术路径,深入分析每个步骤的机制原理和数学基础。
221 1
|
6月前
|
JavaScript 前端开发 开发工具
HarmonyOS NEXT实战:加载网络页面资源
本课程讲解如何在HarmonyOS SDK中使用Web组件加载网络页面,包括权限配置、页面加载及动态切换。适合教育场景下开发具备网页浏览功能的应用。
161 0
|
数据采集 存储 JSON
Python网络爬虫:Scrapy框架的实战应用与技巧分享
【10月更文挑战第27天】本文介绍了Python网络爬虫Scrapy框架的实战应用与技巧。首先讲解了如何创建Scrapy项目、定义爬虫、处理JSON响应、设置User-Agent和代理,以及存储爬取的数据。通过具体示例,帮助读者掌握Scrapy的核心功能和使用方法,提升数据采集效率。
532 6