# 跟着Nature Biotechnology学作图:R语言pca分析并使用ggplot2可视化结果

+关注继续查看

## 论文

Removing unwanted variation from large-scale RNA sequencing data with PRPS

https://www.nature.com/articles/s41587-022-01440-w#data-availability

## 数据链接

https://zenodo.org/record/6459560#.Y2D2NHZBzid

https://zenodo.org/record/6392171#.Y2D2SXZBzid

## 代码链接

https://github.com/RMolania/TCGA_PanCancer_UnwantedVariation

.pca <- function(data, is.log) {
if (is.log)
data <- data
else
data <- log2(data + 1)
svd <- base::svd(scale(
x = t(data),
center = TRUE,
scale = FALSE
))
percent <- svd$d ^ 2 / sum(svd$d ^ 2) * 100
percent <-
sapply(seq_along(percent),
function(i) {
round(percent[i], 1)
})
return(list(
sing.val = svd,
variation = percent))
}

.scatter.density.pc <- function(
pcs,
pc.var,
group.name,
group,
color,
strokeSize,
pointSize,
strokeColor,
alpha,
title
){
pair.pcs <- utils::combn(ncol(pcs), 2)
pList <- list()
for(i in 1:ncol(pair.pcs)){
if(i == 1){
x <- pair.pcs[1,i]
y <- pair.pcs[2,i]
p <- ggplot(mapping = aes(
x = pcs[,x],
y = pcs[,y],
fill = group)) +
xlab(paste0('PC', x, ' (', pc.var[x], '%)')) +
ylab(paste0('PC', y, ' (', pc.var[y], '%)')) +
geom_point(
aes(fill = group),
pch = 21,
color = strokeColor,
stroke = strokeSize,
size = pointSize,
alpha = alpha) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 5)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
ggtitle(title) +
theme(
legend.position = "right",
panel.background = element_blank(),
axis.line = element_line(colour = "black", size = 1.1),
legend.background = element_blank(),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14),
legend.key = element_blank(),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14)) +
guides(fill = guide_legend(override.aes = list(size = 4))) +
scale_fill_manual(name = group.name, values = color)

le <- ggpubr::get_legend(p)
}else{
x <- pair.pcs[1,i]
y <- pair.pcs[2,i]
p <- ggplot(mapping = aes(
x = pcs[,x],
y = pcs[,y],
fill = group)) +
xlab(paste0('PC', x, ' (',pc.var[x],  '%)')) +
ylab(paste0('PC', y, ' (',pc.var[y], '%)')) +
geom_point(
aes(fill = group),
pch = 21,
color = strokeColor,
stroke = strokeSize,
size = pointSize,
alpha = alpha) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 5)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
theme(
panel.background = element_blank(),
axis.line = element_line(colour = "black", size = 1.1),
legend.position = "none",
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14)) +
scale_fill_manual(values = color, name = group.name)
}
p <- p + theme(legend.position = "none")
xdens <- cowplot::axis_canvas(p, axis = "x")+
geom_density(
mapping = aes(
x = pcs[,x],
fill = group),
alpha = 0.7,
size = 0.2
) +
theme(legend.position = "none") +
scale_fill_manual(values = color)

ydens <- cowplot::axis_canvas(
p,
axis = "y",
coord_flip = TRUE) +
geom_density(
mapping = aes(
x = pcs[,y],
fill = group),
alpha = 0.7,
size = 0.2) +
theme(legend.position = "none") +
scale_fill_manual(name = group.name, values = color) +
coord_flip()

p1 <- insert_xaxis_grob(
p,
xdens,
grid::unit(.2, "null"),
position = "top"
)
p2 <- insert_yaxis_grob(
p1,
ydens,
grid::unit(.2, "null"),
position = "right"
)
pList[[i]] <- ggdraw(p2)
}
pList[[i+1]] <- le
return(pList)
}

source("pca_and_ggplot2.R")

library(ggplot2)
library(ggpubr)
library(cowplot)

pca.ncg<-.pca(data = iris[,1:4],
is.log = FALSE)
.scatter.density.pc(pcs = pca.ncg$sing.val$v[, 1:3],
pc.var = pca.ncg$variation, strokeColor = 'gray30', strokeSize = .2, pointSize = 2, alpha = .6, title = "A", group.name = "B", group=iris$Species,
color=c("red","blue","green")) -> p

do.call(
gridExtra::grid.arrange,
c(p,ncol=4))

|
7月前
|

74 0
|
7月前
|

64 0
|
7月前
|

111 0
|
7月前
|

45 0
|
7月前
|

173 0
|
7月前
|

64 0
|
7月前
|

96 0
|
7月前
|

88 0
|
7月前
|

102 0
|
7月前
|

69 0