在模仿中精进数据可视化_一文搞定PCA及其美化
❝
在模仿中精进数据可视化
该系列推文中,我们将从各大顶级学术期刊的Figure
入手,
解读文章的绘图思路,
模仿文章的作图风格,
构建适宜的绘图数据,
并且将代码应用到自己的实际论文中。
绘图缘由:小伙伴们总会展示出一些非常好看且精美的图片。我大概率会去学习和复现一下。其实每个人的时间和精力都非常有限和异常宝贵的。之所以我会去做,主要有以下原因:
图片非常好看,我自己看着也手痒痒 图片我自己在Paper也用的上,储备着留着用 保持了持续学习的状态
❝原图:不知道具体哪一篇文章,反正是一个师弟标记出来的
❝复现结果
❝简简单单,不会要画出散点图的质感,才是好看的散点图。
直接上代码:
加载R
包
rm(list = ls())
####----Load R Package----####
library(tidyverse)
library(FactoMineR)
library(factoextra)
library(ggfun)
library(ggforce)
library(ggpubr)
library(plyr)
library(patchwork)
library(cowplot)
library(ggsignif)
library(aplot)
加载数据
####----Load Data----####
OTU_table <- read_delim(file = "test.file.csv", delim = ",") %>%
column_to_rownames(var = "...1")
group_list <- str_split(colnames(OTU_table),
pattern = "_",
simplify = T)[,1]
datExpr1 <- as.data.frame(t(OTU_table[rownames(OTU_table) %in%
names(sort(apply(OTU_table, 1,
function(x){sd(x)}),
decreasing = T))[1:15000],]))
PCA_df=cbind(as.data.frame(datExpr1), group_list)
dat.pca <- PCA(PCA_df[, -ncol(PCA_df)], graph = FALSE)
dat.pca$eig
dat.pca$ind$coord
dat.pca$ind$contrib
dat.pca$eig[1,2]
dat.pca$eig[2,2]
dat_pca <- dat.pca$ind$coord[, c(1:2)]
tmp <- rownames(dat_pca)
dat_pca <- as.data.frame(dat_pca)
dat_pca$sample <- tmp
dat_pca$sample2 <- factor(group_list)
可视化
####----ggforce add ellipse----####
pca_plot <- ggplot(data = dat_pca, aes(x = Dim.1, y = Dim.2, colour = sample2)) +
geom_vline(xintercept = c(0), col = "#708090", linetype = 5) +
geom_hline(yintercept = c(0), col = "#708090", linetype = 5)+
geom_point(aes(fill = sample2),
shape = 21,size = 6 ,position = "jitter", colour = "#000000") +
ggforce::geom_mark_ellipse(aes(x = Dim.1, y = Dim.2,colour = sample2),
expand = unit(4, "mm"),alpha = 15,n = 100) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_color_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
xlab(paste0("PC1 (", round(dat.pca$eig[1,2], 1), "%)")) +
ylab(paste0("PC2 (", round(dat.pca$eig[2,2], 1), "%)")) +
xlim(c(-185, 185)) +
ylim(c(-120, 100))+
labs(fill = "Sample",color = "Sample") +
ggtitle(label = "add ellipse line") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.title = element_text(size = 15),
axis.text = element_text(size = 15)
)
pca_plot
####----ggforce add ellipse add fill----####
pca_plot2 <- ggplot(data = dat_pca, aes(x = Dim.1, y = Dim.2, colour = sample2)) +
geom_vline(xintercept = c(0), col = "#708090", linetype = 5) +
geom_hline(yintercept = c(0), col = "#708090", linetype = 5)+
geom_point(aes(fill = sample2),
shape = 21,size = 6 ,position = "jitter", colour = "#000000") +
ggforce::geom_mark_ellipse(aes(x = Dim.1, y = Dim.2,color = sample2, fill = sample2),
expand = unit(4, "mm"),alpha = 0.2,n = 100) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_color_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
xlab(paste0("PC1 (", round(dat.pca$eig[1,2], 1), "%)")) +
ylab(paste0("PC2 (", round(dat.pca$eig[2,2], 1), "%)")) +
xlim(c(-185, 185)) +
ylim(c(-120, 100))+
labs(fill = "Sample",color = "Sample") +
ggtitle(label = "add ellipse and background") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.title = element_text(size = 15),
axis.text = element_text(size = 15)
)
pca_plot2
####----stat_ellipse add ellipse add fill----####
pca_plot3 <- ggplot(data = dat_pca, aes(x = Dim.1, y = Dim.2, colour = sample2)) +
geom_vline(xintercept = c(0), col = "#708090", linetype = 5) +
geom_hline(yintercept = c(0), col = "#708090", linetype = 5)+
geom_point(aes(fill = sample2),
shape = 21,size = 6 ,position = "jitter", colour = "#000000") +
stat_ellipse(aes(x = Dim.1, y = Dim.2, color = sample2, fill = sample2),
level = 0.99, geom = "polygon",
alpha = 0.2)+
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_color_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
xlab(paste0("PC1 (", round(dat.pca$eig[1,2], 1), "%)")) +
ylab(paste0("PC2 (", round(dat.pca$eig[2,2], 1), "%)")) +
xlim(c(-185, 185)) +
ylim(c(-120, 100))+
labs(fill = "Sample",color = "Sample") +
ggtitle(label = "add ellipse and 99% confidence interval") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.title = element_text(size = 15),
axis.text = element_text(size = 15)
)
pca_plot3
####----结合一下上面的两个图----####
pca_plot4 <- ggplot(data = dat_pca, aes(x = Dim.1, y = Dim.2, colour = sample2)) +
geom_vline(xintercept = c(0), col = "#708090", linetype = 5) +
geom_hline(yintercept = c(0), col = "#708090", linetype = 5)+
geom_point(aes(fill = sample2),
shape = 21,size = 6 ,position = "jitter", colour = "#000000") +
stat_ellipse(aes(x = Dim.1, y = Dim.2, fill = sample2),
level = 0.99, geom = "polygon",
alpha = 0.2,
color = NA)+
ggforce::geom_mark_ellipse(aes(x = Dim.1, y = Dim.2,color = sample2),
expand = unit(18, "mm"),alpha = 0.2, n = 100) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_color_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
xlab(paste0("PC1 (", round(dat.pca$eig[1,2], 1), "%)")) +
ylab(paste0("PC2 (", round(dat.pca$eig[2,2], 1), "%)")) +
xlim(c(-185, 185)) +
ylim(c(-120, 100))+
labs(fill = "Sample",color = "Sample") +
ggtitle(label = "add ellipse and 99% confidence interval") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.title = element_text(size = 15),
axis.text = element_text(size = 15)
)
pca_plot4
####----add polygon add fill----####
group_border = ddply(dat_pca, "sample2", function(df) df[chull(df[[1]], df[[2]]), ])
pca_plot5 <- ggplot(data = dat_pca, aes(x = Dim.1, y = Dim.2, colour = sample2)) +
geom_vline(xintercept = c(0), col = "#708090", linetype = 5) +
geom_hline(yintercept = c(0), col = "#708090", linetype = 5)+
geom_point(aes(fill = sample2),
shape = 21,size = 6 ,position = "jitter", colour = "#000000") +
geom_polygon(data = group_border, aes(x = Dim.1, y = Dim.2, fill = sample2), alpha = 0.3) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_color_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
xlab(paste0("PC1 (", round(dat.pca$eig[1,2], 1), "%)")) +
ylab(paste0("PC2 (", round(dat.pca$eig[2,2], 1), "%)")) +
xlim(c(-185, 185)) +
ylim(c(-120, 100))+
labs(fill = "Sample",color = "Sample") +
# ggtitle(label = "add polygon") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.title = element_text(size = 15),
axis.text = element_text(size = 15)
)
pca_plot5
####----在图片边缘增加一些统计信息----####
pca_plot6 <- ggplot(data = dat_pca, aes(x = Dim.1, y = Dim.2, colour = sample2)) +
geom_vline(xintercept = c(0), col = "#708090", linetype = 5) +
geom_hline(yintercept = c(0), col = "#708090", linetype = 5)+
geom_point(aes(fill = sample2),
shape = 21,size = 6 ,position = "jitter", colour = "#000000") +
ggforce::geom_mark_ellipse(aes(x = Dim.1, y = Dim.2,color = sample2, fill = sample2),
expand = unit(4, "mm"),alpha = 0.2,n = 100) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_color_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
xlab(paste0("PC1 (", round(dat.pca$eig[1,2], 1), "%)")) +
ylab(paste0("PC2 (", round(dat.pca$eig[2,2], 1), "%)")) +
xlim(c(-185, 185)) +
ylim(c(-120, 100))+
labs(fill = "Sample",color = "Sample") +
# ggtitle(label = "add ellipse and background") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
# legend.position = "bottom",
axis.title = element_text(size = 15),
axis.text = element_text(size = 15)
)
pca_plot6
p_density <- ggplot(dat_pca, aes(x = Dim.1)) +
geom_density(aes(fill = sample2), alpha = 0.6) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
scale_y_continuous(expand = c(0,0)) +
labs(x = "") +
theme_classic() +
theme(legend.position = "none",
axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
p_density
my_comparisons <- list(c("LocationA","LocationB"), c("LocationB", "LocationC"),
c("LocationD", "LocationE"))
p_boxplot <- ggplot(dat_pca, aes(x = sample2, y = Dim.2)) +
geom_boxplot(aes(fill = sample2), alpha = 0.75) +
stat_compare_means(comparisons = my_comparisons) +
stat_compare_means(label.y = 110) +
scale_fill_manual(values = c("#225ea8","#1d91c0","#41b6c4","#7fcdbb","#fdb863"))+
labs(x = "", y = "") +
theme_bw() +
theme(
plot.title = element_text(color = "#000000", hjust = 0.5, size = 20),
panel.border = element_rect(color = "#000000", linewidth = 1),
legend.background = element_roundrect(color = "#808080",linetype = 1),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
legend.position = "none",
axis.title = element_text(size = 15),
axis.text.y = element_blank(),
axis.text.x = element_text(angle = 90, size = 10, vjust = 0.5)
)
p_boxplot
p_combine <- pca_plot6 %>%
insert_top(p_density, height = .3) %>%
insert_right(p_boxplot, width=.45)
p_combine
ggsave(filename = "p_combine.pdf",
plot = p_combine,
height = 8,
width = 12)
####----patchwork combine plot----####
pca_plot + pca_plot2 + pca_plot3 + pca_plot5 + plot_layout(nrow = 2)
pca_plot + pca_plot2 + pca_plot3 + pca_plot5 +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
####----cowplot combine plot----####
combine_plot <- plot_grid(pca_plot + theme(legend.position = "none"),
pca_plot2 + theme(legend.position = "none"),
pca_plot3 + theme(legend.position = "none"),
pca_plot5 + theme(legend.position = "none"),
nrow = 2)
plot_grid(combine_plot, get_legend(pca_plot), rel_widths = c(4, 1))
####----combine plot legend----####
ggpubr::ggarrange(pca_plot, pca_plot2, pca_plot3, pca_plot5,
common.legend = TRUE, legend = "top")
ggsave(filename = "PCA.pdf", height = 10, width = 11)
版本信息
####----sessionInfo----####
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS 14.6.1
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Asia/Shanghai
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] aplot_0.2.3 ggsignif_0.6.4 ggExtra_0.10.1.9000 cowplot_1.1.3 patchwork_1.2.0.9000
[6] plyr_1.8.9 ggpubr_0.6.0 ggforce_0.4.2 ggfun_0.1.5 factoextra_1.0.7
[11] FactoMineR_2.9 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[16] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
[21] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] gridExtra_2.3 sandwich_3.0-2 rlang_1.1.4 magrittr_2.0.3 multcomp_1.4-25
[6] compiler_4.3.0 systemfonts_1.1.0 vctrs_0.6.5 pkgconfig_2.0.3 crayon_1.5.2
[11] fastmap_1.2.0 backports_1.4.1 ellipsis_0.3.2 labeling_0.4.3 utf8_1.2.4
[16] promises_1.2.1 tzdb_0.4.0 ragg_1.2.6 bit_4.0.5 cachem_1.1.0
[21] jsonlite_1.8.7 flashClust_1.01-2 later_1.3.1 tweenr_2.0.3 broom_1.0.5
[26] parallel_4.3.0 cluster_2.1.6 R6_2.5.1 stringi_1.8.3 car_3.1-2
[31] estimability_1.4.1 Rcpp_1.0.13 zoo_1.8-12 httpuv_1.6.12 Matrix_1.6-5
[36] splines_4.3.0 timechange_0.2.0 tidyselect_1.2.1 rstudioapi_0.15.0 abind_1.4-5
[41] codetools_0.2-19 miniUI_0.1.1.1 lattice_0.22-5 treeio_1.26.0 shiny_1.8.0
[46] withr_3.0.1 coda_0.19-4 gridGraphics_0.5-1 survival_3.5-7 polyclip_1.10-7
[51] ggtree_3.10.0 pillar_1.9.0 carData_3.0-5 DT_0.30 generics_0.1.3
[56] vroom_1.6.4 hms_1.1.3 tidytree_0.4.5 munsell_0.5.1 scales_1.3.0
[61] xtable_1.8-4 leaps_3.1 glue_1.7.0 lazyeval_0.2.2 emmeans_1.8.9
[66] scatterplot3d_0.3-44 tools_4.3.0 fs_1.6.4 mvtnorm_1.2-3 grid_4.3.0
[71] ape_5.8 colorspace_2.1-1 nlme_3.1-163 cli_3.6.3 textshaping_0.3.7
[76] fansi_1.0.6 gtable_0.3.5 rstatix_0.7.2 yulab.utils_0.1.5 digest_0.6.36
[81] ggrepel_0.9.6 ggplotify_0.1.2 TH.data_1.1-2 htmlwidgets_1.6.3 farver_2.1.2
[86] memoise_2.0.1 htmltools_0.5.7 lifecycle_1.0.4 multcompView_0.1-9 mime_0.12
[91] bit64_4.0.5 MASS_7.3-60
历史绘图合集
进化树合集
环状图
散点图
基因家族合集
换一个排布方式:
首先查看基础版热图:
然后再看进阶版热图:
基因组共线性
WGCNA ggplot2版本
其他科研绘图
合作、联系和交流
有很多小伙伴在后台私信作者,非常抱歉,我经常看不到导致错过,请添加下面的微信联系作者,一起交流数据分析和可视化。