本文首发于“生信补给站”公众号 https://mp.weixin.qq.com/s/dqxGfDRS2jg8jV_y1BBonA
三 scRepertoire-VDJ + RNA
以上即为单细胞免疫组库的常规分析,我们更多的还是与scRNA-seq数据结合,就可以在umap上查看clone的分布情况 以及 基于cluster/celltype展示clonotype的分布情况。
3.1 Seurat标准流程
folders的样本文件夹内为cellranger的3个标准文件,的简单的走一遍标准流程单细胞工具箱|Seurat官网标准流程,此处仅为示例就不考虑批次问题了。
folders=list.files('./') folders #[1] "t2_Normal" "t2_PBMC" "t3_Center" "t3_Normal" "t3_PBMC" "t4_Center" "t4_Normal" "t4_PBMC" "UT1_Normal" #批量读取10X单细胞转录组数据文件夹 sceList = lapply(folders,function(folder){ CreateSeuratObject(counts = Read10X(folder), project = folder ) }) sce.all <- merge(sceList[[1]], y = c(sceList[[2]],sceList[[3]],sceList[[4]], sceList[[5]],sceList[[6]],sceList[[7]],sceList[[8]],sceList[[9]]), add.cell.ids = folders, #添加样本名 project = "scRNA") sce.all #查看 #线粒体比例 sce.all[["percent.mt"]] <- PercentageFeatureSet(sce.all, pattern = "^MT-") #过滤 sce <- subset(sce.all, subset = nFeature_RNA > 500 & nFeature_RNA < 5000 & percent.mt < 20) VlnPlot(sce, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) #标准化 sce <- NormalizeData(sce) #高变基因 sce <- FindVariableFeatures(sce, selection.method = "vst", nfeatures = 2000) all.genes <- rownames(sce) #归一化 sce <- ScaleData(sce, features = all.genes) #降维聚类 sce <- RunPCA(sce, npcs = 50) sce <- FindNeighbors(sce, dims = 1:30) sce <- FindClusters(sce, resolution = 0.5) sce <- RunUMAP(sce, dims = 1:30) sce <- RunTSNE(sce, dims = 1:30) p1 <- DimPlot(sce, reduction = "umap", pt.size=0.5, label = F,repel = TRUE) p2 <- DimPlot(sce, reduction = "umap",group.by = "orig.ident", pt.size=0.5, label = F,repel = TRUE) p1 + p2
(1) 添加meta,处理barcode
读取文章提供的meta信息,如下图所示,红框中的不一致的部分需要tidyverse处理一下Tidyverse|数据列的分分合合,一分多,多合一
meta <- read.table("ccRCC_6pat_cell_annotations.txt", sep = "\t",header = T) meta2 <- meta %>% separate(cell,into = c( "CB","sample","pos"),sep = "_") %>% mutate(cell = paste(sample,pos,CB,sep = "_")) sce@meta.data <- sce@meta.data %>% rownames_to_column("cell") %>% inner_join(meta2,by = "cell") %>% column_to_rownames("cell") head(sce@meta.data) # orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.5 seurat_clusters CB #t2_Normal_AAACCTGCAACACCTA-1 t2_Normal 7863 2244 3.700878 9 9 AAACCTGCAACACCTA-1 #t2_Normal_AAACCTGCACCTCGTT-1 t2_Normal 2194 1188 10.027347 7 7 AAACCTGCACCTCGTT-1 # sample pos type region Sample Sample2 cluster cluster_name UMAP1 UMAP2 #t2_Normal_AAACCTGCAACACCTA-1 t2 Normal Normal Normal t2 t2_Normal 13 cDC2 -9.836647 0.07936505 #t2_Normal_AAACCTGCACCTCGTT-1 t2 Normal Normal Normal t2 t2_Normal 11 CD45- Vascular Endothelium -7.601253 9.00771236 # Sample_name #t2_Normal_AAACCTGCAACACCTA-1 Ipi/Nivo resistant #t2_Normal_AAACCTGCACCTCGTT-1 Ipi/Nivo resistant
(2) sce的细胞数 和 meta信息 不一致
因为前面Seurat流程使用的自己的参数,而meta信息是文章给的。
解决:使用subset函数 提取 meta.data中的cell
sce@meta.data <- sce@meta.data %>% rownames_to_column("Cell") %>% mutate(Cell2 = Cell) %>% column_to_rownames("Cell2") sce <- subset(sce,Cell %in% sce@meta.data$Cell)
3.2 VDJ结果结合RNA
使用combineExpression函数将VDJ结果和 RNA结果结合。
注:combined的barcode 和 sce的rownames 一定要一致,有出入的使用tidyverse包进行数据处理。
seurat <- combineExpression(combined, sce, cloneCall="gene", # group.by = "Sample", proportion = FALSE, cloneTypes=c(Single=1, Small=5, Medium=20, Large=100, Hyperexpanded=500)) head(seurat)
因为很多barcode 并没有TCR clone ,因此cloneTypes 含有一些NA是正常的 。
3.3 clonoType umap分布
可以使用DimPlot函数 绘制 clonetype在UUMAP中的分布
colorblind_vector <- colorRampPalette(rev(c("#0D0887FF", "#47039FFF", "#7301A8FF", "#9C179EFF", "#BD3786FF", "#D8576BFF", "#ED7953FF","#FA9E3BFF", "#FDC926FF", "#F0F921FF"))) p111 <- DimPlot(seurat, group.by = "cluster_name",label = T) p222 <- DimPlot(seurat, group.by = "cloneType",label = T) + #NoLegend() + scale_color_manual(values=colorblind_vector(5)) + theme(plot.title = element_blank()) p111 + p222
可以大概看出来CD8细胞中clone更多,符合文献的研究结果。
3.4 highlightClonotypes 高亮特定克隆型
使用highlightClonotypes函数 展示特定序列的克隆型分布
seurat <- highlightClonotypes(seurat, cloneCall= "aa", sequence = c("CAVGGRSNSGYALNF;CAASRNNNDMRF_CASSSLGNEQFF", "CAASRNNNDMRF_CASSSLGNEQFF")) DimPlot(seurat, group.by = "highlight") + theme(plot.title = element_blank())
3.5 occupiedscRepertoire cloneType分布
查看CD8T和 CD4T细胞中cloneType的分布情况
seurat_T <- subset(seurat, cluster_name %in% c("CD8A+ Exhausted","CD8A+ Exhausted IEG", "CD8A+ NK-like","CD8A+ Proliferating","CD8A+ Tissue-resident", "CD4+ Activated IEG","CD4+ Effector" ,"CD4+ Naive","CD4+ Proliferating" , "CD4+ Treg") ) occupiedscRepertoire(seurat_T, x.axis = "cluster_name")
3.6 alluvialClonotypes
使用alluvialClonotypes函数绘制T细胞种各个celltype之间的clonetype信息的桑葚图
alluvialClonotypes(seurat_T, cloneCall = "gene", y.axes = c("cluster_name", "cloneType"), color = "cluster_name")
3.7 getCirclize
使用getCirclize函数绘制circos图
library(circlize) library(scales) circles <- getCirclize(seurat, cloneCall = "gene+nt", groupBy = "cluster_name" )
四 scRepertoire基于celltype
第二部分中是根据sample计算的clone的一系列指标,如克隆型分布,长度,空间稳态等 。那经过第三部分中和单细胞转录组结合后就可以根据celltype维度进行clone的上述所有统计,这里仅示例几个。
4.1 按照celltype拆分
首先使用expression2List函数将seurat按照cluster_name进行拆分,然后就可进行二中的所有分析 。
注意这里不是官网的split.by ,而要使用group
combined2 <- expression2List(seurat_T, group = "cluster_name") length(combined2) p01 <- clonalDiversity(combined2, cloneCall = "nt") p02 <- clonalHomeostasis(combined2, cloneCall = "nt") p03 <- clonalOverlap(combined2, cloneCall="aa", method="overlap") p01 p02 #保存数据(后台获取),以备后用 save(combined,seurat_T,file = "combined_seurat_T.RData")