一、标题:Effect of HSP90AB1 on the local immune response of hepatocellular carcinoma and it realtionship to prognosis(HSP90β对肝癌局部免疫的影响及对肝癌患者预后的影响)
二、部分代码及结果展示:
1、整理TCGA数据库肝细胞癌临床数据的部分PERL脚本
use strict; #use warnings; use XML::Simple; opendir(RD, ".") or die $!; my @dirs=readdir(RD); closedir(RD); open(WF,">clinical.xls") or die $!; print WF "Id\tfutime\tfustat\tAge\tGender\tGrade\tStage\tT\tM\tN\n"; foreach my $dir(@dirs){ #print $dir . "\n"; next if($dir eq '.'); next if($dir eq '..'); #print $dir . "\n"; if(-d $dir){ opendir(RD,"$dir") or die $!; while(my $xmlfile=readdir(RD)){ if($xmlfile=~/\.xml$/){ #print "$dir\\$xmlfile\n"; my $userxs = XML::Simple->new(KeyAttr => "name"); my $userxml=""; if(-f "$dir/$xmlfile"){ $userxml = $userxs->XMLin("$dir/$xmlfile"); }else{ $userxml = $userxs->XMLin("$dir\$xmlfile"); } # print output #open(WF,">dumper.txt") or die $!; #print WF Dumper($userxml); #close(WF); my $disease_code=$userxml->{'admin:admin'}{'admin:disease_code'}{'content'}; #get disease code my $disease_code_lc=lc($disease_code); my $patient_key=$disease_code_lc . ':patient'; #ucec:patient my $follow_key=$disease_code_lc . ':follow_ups'; my $patient_barcode=$userxml->{$patient_key}{'shared:bcr_patient_barcode'}{'content'}; #TCGA-AX-A1CJ my $gender=$userxml->{$patient_key}{'shared:gender'}{'content'}; #male/female my $age=$userxml->{$patient_key}{'clin_shared:age_at_initial_pathologic_diagnosis'}{'content'}; my $race=$userxml->{$patient_key}{'clin_shared:race_list'}{'clin_shared:race'}{'content'}; #white/black my $grade=$userxml->{$patient_key}{'shared:neoplasm_histologic_grade'}{'content'}; #G1/G2/G3 my $clinical_stage=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:clinical_stage'}{'content'}; #stage I my $clinical_T=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_T'}{'content'}; my $clinical_M=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_M'}{'content'}; my $clinical_N=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_N'}{'content'}; my $pathologic_stage=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:pathologic_stage'}{'content'}; #stage I my $pathologic_T=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:pathologic_T'}{'content'}; my $pathologic_M=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:pathologic_M'}{'content'}; my $pathologic_N=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:pathologic_N'}{'content'}; $gender=(defined $gender)?$gender:"unknow"; $age=(defined $age)?$age:"unknow"; $race=(defined $race)?$race:"unknow"; $grade=(defined $grade)?$grade:"unknow"; $clinical_stage=(defined $clinical_stage)?$clinical_stage:"unknow"; $clinical_T=(defined $clinical_T)?$clinical_T:"unknow"; $clinical_M=(defined $clinical_M)?$clinical_M:"unknow"; $clinical_N=(defined $clinical_N)?$clinical_N:"unknow"; $pathologic_stage=(defined $pathologic_stage)?$pathologic_stage:"unknow"; $pathologic_T=(defined $pathologic_T)?$pathologic_T:"unknow"; $pathologic_M=(defined $pathologic_M)?$pathologic_M:"unknow"; $pathologic_N=(defined $pathologic_N)?$pathologic_N:"unknow"; my $survivalTime=""; my $vital_status=$userxml->{$patient_key}{'clin_shared:vital_status'}{'content'}; my $followup=$userxml->{$patient_key}{'clin_shared:days_to_last_followup'}{'content'}; my $death=$userxml->{$patient_key}{'clin_shared:days_to_death'}{'content'}; if($vital_status eq 'Alive'){ $survivalTime="$followup\t0"; } else{ $survivalTime="$death\t1"; } for my $i(keys %{$userxml->{$patient_key}{$follow_key}}){ eval{ $followup=$userxml->{$patient_key}{$follow_key}{$i}{'clin_shared:days_to_last_followup'}{'content'}; $vital_status=$userxml->{$patient_key}{$follow_key}{$i}{'clin_shared:vital_status'}{'content'}; $death=$userxml->{$patient_key}{$follow_key}{$i}{'clin_shared:days_to_death'}{'content'}; }; if($@){ for my $j(0..5){ #假设最多有6次随访 my $followup_for=$userxml->{$patient_key}{$follow_key}{$i}[$j]{'clin_shared:days_to_last_followup'}{'content'}; my $vital_status_for=$userxml->{$patient_key}{$follow_key}{$i}[$j]{'clin_shared:vital_status'}{'content'}; my $death_for=$userxml->{$patient_key}{$follow_key}{$i}[$j]{'clin_shared:days_to_death'}{'content'}; if( ($followup_for =~ /\d+/) || ($death_for =~ /\d+/) ){ $followup=$followup_for; $vital_status=$vital_status_for; $death=$death_for; my @survivalArr=split(/\t/,$survivalTime); if($vital_status eq 'Alive'){ if($followup>$survivalArr[0]){ $survivalTime="$followup\t0"; } } else{ if($death>$survivalArr[0]){ $survivalTime="$death\t1"; } } } } } my @survivalArr=split(/\t/,$survivalTime); if($vital_status eq 'Alive'){ if($followup>$survivalArr[0]){ $survivalTime="$followup\t0"; } } else{ if($death>$survivalArr[0]){ $survivalTime="$death\t1"; } } } print WF "$patient_barcode\t$survivalTime\t$age\t$gender\t$grade\t$pathologic_stage\t$pathologic_T\t$pathologic_M\t$pathologic_N\n"; } } close(RD); } } close(WF);
2、使用R语言分析正常组与肿瘤组中HSP90AB1的表达情况
#if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") #BiocManager::install("limma") #install.packages("ggplot2") #install.packages("ggpubr") #引用包 library(limma) library(ggplot2) library(ggpubr) expFile="symbol.txt" #表达输入文件 gene="VCAN" #基因的名称 setwd("C:\\Users\\lexb4\\Desktop\\geneImmune\\07.diff") #设置工作目录 #读取基因表达文件,并对数据进行处理 rt=read.table(expFile, header=T, sep="\t", check.names=F) rt=as.matrix(rt) rownames(rt)=rt[,1] exp=rt[,2:ncol(rt)] dimnames=list(rownames(exp),colnames(exp)) data=matrix(as.numeric(as.matrix(exp)), nrow=nrow(exp), dimnames=dimnames) data=avereps(data) data=t(data[gene,,drop=F]) #正常和肿瘤数目 group=sapply(strsplit(rownames(data),"\\-"), "[", 4) group=sapply(strsplit(group,""), "[", 1) group=gsub("2", "1", group) conNum=length(group[group==1]) #正常组样品数目 treatNum=length(group[group==0]) #肿瘤组样品数目 Type=c(rep(1,conNum), rep(2,treatNum)) #差异分析 exp=cbind(data, Type) exp=as.data.frame(exp) colnames(exp)=c("gene", "Type") exp$Type=ifelse(exp$Type==1, "Normal", "Tumor") exp$gene=log2(exp$gene+1) #设置比较组 group=levels(factor(exp$Type)) exp$Type=factor(exp$Type, levels=group) comp=combn(group,2) my_comparisons=list() for(i in 1:ncol(comp)){my_comparisons[[i]]<-comp[,i]} #绘制boxplot boxplot=ggboxplot(exp, x="Type", y="gene", color="Type", xlab="", ylab=paste0(gene, " expression"), legend.title="Type", palette = c("blue","red"), add = "jitter")+ stat_compare_means(comparisons=my_comparisons,symnum.args=list(cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "ns")),label = "p.signif") #输出图片 pdf(file=paste0(gene,".diff.pdf"), width=5, height=4.5) print(boxplot) dev.off()
3、使用R语言分析不同类型免疫细胞在肝细胞癌中的表达水平及相关关系
#install.packages("corrplot") library(corrplot) #引用包 immFile="CIBERSORT-Results.txt" #免疫细胞浸润的结果文件 pFilter=0.05 #免疫细胞浸润结果过滤条件 setwd("C:\\Users\\Administrator\\Desktop\\geneimmune\\10immunePlot") #设置工作目录 #读取免疫细胞浸润的结果文件,并对数据进行整理 immune=read.table(immFile, header=T, sep="\t", check.names=F, row.names=1) immune=immune[immune[,"P-value"]<pFilter,] immune=as.matrix(immune[,1:(ncol(immune)-3)]) data=t(immune) #绘制柱状图 col=rainbow(nrow(data), s=0.7, v=0.7) pdf(file="barplot.pdf", width=22, height=10) par(las=1,mar=c(8,5,4,16),mgp=c(3,0.1,0),cex.axis=1.5) a1=barplot(data,col=col,yaxt="n",ylab="Relative Percent",xaxt="n",cex.lab=1.8) a2=axis(2,tick=F,labels=F) axis(2,a2,paste0(a2*100,"%")) axis(1,a1,labels=F) par(srt=60,xpd=T);text(a1,-0.02,colnames(data),adj=1,cex=0.6);par(srt=0) ytick2=cumsum(data[,ncol(data)]);ytick1=c(0,ytick2[-length(ytick2)]) legend(par('usr')[2]*0.98,par('usr')[4],legend=rownames(data),col=col,pch=15,bty="n",cex=1.3) dev.off() #删除正常样品 group=sapply(strsplit(colnames(data),"\\-"), "[", 4) group=sapply(strsplit(group,""), "[", 1) group=gsub("2", "1", group) data=data[,group==0,drop=F] #绘制免疫细胞相关性的图形 pdf(file="corrplot.pdf", width=13, height=13) par(oma=c(0.5,1,1,1.2)) immune=immune[,colMeans(immune)>0] M=cor(immune) corrplot(M, method = "color", order = "hclust", tl.col="black", addCoef.col = "black", number.cex = 0.8, col=colorRampPalette(c("blue", "white", "red"))(50) ) dev.off()
4、使用R语言分析正常组及肝癌组中不同免疫细胞浸润水平
#install.packages("pheatmap") #install.packages("vioplot") #引用包 library(vioplot) library(pheatmap) input="CIBERSORT-Results.txt" #免疫细胞浸润文件 pFilter=0.05 #免疫细胞浸润结果过滤条件 setwd("C:\\Users\\Administrator\\Desktop\\生信文章\\geneimmune\\11heatmap\\vioplot-high") #设置工作目录 #读取免疫结果文件,并对数据进行整理 immune=read.table("CIBERSORT-Results.txt", header=T, sep="\t", check.names=F, row.names=1) immune=immune[immune[,"P-value"]<pFilter,,drop=F] immune=as.matrix(immune[,1:(ncol(immune)-3)]) data=t(immune) #正常和肿瘤数目 group=sapply(strsplit(colnames(data),"\\-"), "[", 4) group=sapply(strsplit(group,""), "[", 1) group=gsub("2", "1", group) conNum=length(group[group==1]) #正常组样品数目 treatNum=length(group[group==0]) #肿瘤组样品数目 #定义热图的注释文件 Type=c(rep("Normal",conNum), rep("Tumor",treatNum)) names(Type)=colnames(data) Type=as.data.frame(Type) #绘制热图 pdf(file="heatmap.pdf", width=12, height=6) pheatmap(data, annotation=Type, color = colorRampPalette(c(rep("green",1), rep("black",1), rep("red",3)))(100), cluster_cols =F, show_colnames=F, fontsize = 8, fontsize_row=7, fontsize_col=5) dev.off() #绘制小提琴图 data=t(data) outTab=data.frame() pdf(file="vioplot.pdf", width=13, height=8) par(las=1, mar=c(10,6,3,3)) x=c(1:ncol(data)) y=c(1:ncol(data)) xMax=ncol(data)*3-2 plot(x,y, xlim=c(0,xMax),ylim=c(min(data),max(data)+0.02), main="", xlab="", ylab="Fraction", pch=21, col="white", xaxt="n") #对每个免疫细胞循环,绘制小提琴图,正常样品用绿色表示,肿瘤样品用红色表示 for(i in 1:ncol(data)){ if(sd(data[1:conNum,i])==0){ data[1,i]=0.00001 } if(sd(data[(conNum+1):(conNum+treatNum),i])==0){ data[(conNum+1),i]=0.00001 } conData=data[1:conNum,i] treatData=data[(conNum+1):(conNum+treatNum),i] vioplot(conData,at=3*(i-1),lty=1,add = T,col = 'green') vioplot(treatData,at=3*(i-1)+1,lty=1,add = T,col = 'red') wilcoxTest=wilcox.test(conData, treatData) p=wilcoxTest$p.value if(p<pFilter){ cellPvalue=cbind(Cell=colnames(data)[i], pvalue=p) outTab=rbind(outTab, cellPvalue) } mx=max(c(conData,treatData)) lines(c(x=3*(i-1)+0.2,x=3*(i-1)+0.8),c(mx,mx)) text(x=3*(i-1)+0.5, y=mx+0.02, labels=ifelse(p<0.001, paste0("p<0.001"), paste0("p=",sprintf("%.03f",p))), cex = 0.8) } legend("topright", c("Normal", "Tumor"), lwd=5,bty="n",cex=1.2, col=c("green","red")) text(seq(1,xMax,3),-0.05,xpd = NA,labels=colnames(data),cex = 1,srt = 45,pos=2) dev.off() #输出免疫细胞和p值表格文件 write.table(outTab,file="diff.result.txt",sep="\t",row.names=F,quote=F)
5、不同拷贝子数目的HSP90β对中性粒细胞和CD8阳性T细胞在肝癌局部浸润的影响
6、HSP90β基因的表达水平、拷贝子水平及甲基化水平与不同淋巴细胞数量之间的关系