将 gff 文件转成 gtf (featurecounts需要使用gtf文件)
gffread coreset.gff -T -o amur_ide.gtf # -o write the output records into <outfile> instead of stdout #-T main output will be GTF instead of GFF3
构建参考基因组的索引文件
hisat2-build -p 8 genome.fa amur_ide
hisat2批量比对
for i in 39 40 41 42 43 44 do nohup hisat2 -x /home/genome_index/amur_ide -1 SRR75089${i}_1.fq -2 SRR75089${i}_2.fq | samtools view -S -b > xx.bam & done
bam文件排序
samtools sort XX.bam -o xxx_sorted.bam
featurecounts 定量
for i in 39 40 41 42 43 44 do nohup featureCounts -p -a /home/jiamj/analysis/ref/TAIR10.gtf -o ${i}_counts.txt /home/jiamj/analysis/clean/${i}_sorted.bam & done
-p If specified, libraries are assumed to contain paired-end reads. For any library that contains paired-end reads, the 'countReadPairs' parameter controls if read pairs or reads should be counted
结果包含有 geneid,染色体位置,基因起始结束的位置以及基因的 count 数
featureCounts进行fpkm标准化
countdata <- read.csv("countdata.csv") #countdata.csv是提取了上一步的counts数据以及gene length rownames(countdata) <- countdata[,1] countdata <- countdata[,-1] kb <- countdata$length / 1000 count <- countdata[,1:8] rpk <- count / kb tpm <- t(t(rpk)/colSums(rpk) * 1000000) fpkm <- t(t(rpk)/colSums(count) * 10^6) #想计算数据框中每列的总和,使用colSums函数。 write.table(fpkm,file="eight_tissues_fpkm.xls",sep="\t",quote = F)