在模仿中精进数据可视化_继续玩儿转GO和KEGG富集分析可视化
❝
在模仿中精进数据可视化
该系列推文中,我们将从各大顶级学术期刊的Figure
入手,
解读文章的绘图思路,
模仿文章的作图风格,
构建适宜的绘图数据,
并且将代码应用到自己的实际论文中。
绘图缘由:小伙伴们总会展示出一些非常好看且精美的图片。我大概率会去学习和复现一下。其实每个人的时间和精力都非常有限和异常宝贵的。之所以我会去做,主要有以下原因:
图片非常好看,我自己看着也手痒痒 图片我自己在Paper也用的上,储备着留着用 保持了持续学习的状态
笔者之前也写过非常多GO和KEGG富集分析的可视化推文 链接如下:
在模仿中精进数据可视化_模仿clusterProfiler手搓一个GO富集分析的结果
在模仿中精进数据可视化_另类的展示GO_KEGG富集注释结果
在模仿中精进数据可视化_自定义气泡图美化GO_KEGG富集分析的结果
在模仿中精进数据可视化_使用网络图绘制不一样的GO和KEGG可视化
在模仿中精进数据可视化_使用circlize绘制不一样的GO和KEGG可视化
今天我们继续玩转GO和KEGG的可视化,直接上图, 有多少细节,就看大家仔细看看吧!:
直接上代码:
加载R
包
rm(list = ls())
####----load R Package----####
library(tidyverse)
library(ggh4x)
library(ggfun)
library(ggnewscale)
library(grid)
library(clusterProfiler)
library(org.Hs.eg.db)
加载数据
####----load Data----####
data(geneList, package='DOSE')
de <- names(geneList)[1:300]
# GO enrichment
deg_go <- enrichGO(gene = de,
OrgDb = "org.Hs.eg.db",
ont = "ALL",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# trans ID
deg_go_2 <- setReadable(deg_go, OrgDb = "org.Hs.eg.db", keyType = "ENTREZID") %>% as.data.frame()
# KEGG enrichment
deg_kegg <- enrichKEGG(gene = de,
organism = "hsa",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05
)
deg_kegg_2 <- setReadable(deg_kegg, OrgDb = "org.Hs.eg.db", keyType = "ENTREZID") %>% as.data.frame()
# 挑选top10
GO_top10 <- deg_go_2 %>%
dplyr::group_by(ONTOLOGY) %>%
dplyr::arrange(desc(p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::ungroup()
KEGG_top10 <- deg_kegg_2 %>%
dplyr::arrange(desc(p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::select(ID:Count) %>%
dplyr::mutate(ONTOLOGY = "KEGG") %>%
dplyr::select(ONTOLOGY, everything())
plot_df <- rbind(GO_top10, KEGG_top10) %>%
dplyr::mutate(ONTOLOGY = factor(ONTOLOGY, levels = rev(c("BP", "CC", "MF", "KEGG")), ordered = T)) %>%
dplyr::arrange(ONTOLOGY, desc(Count)) %>%
dplyr::mutate(Description = str_remove(Description, pattern = ",.*")) %>%
dplyr::mutate(Description = factor(Description, levels = rev(Description), ordered = T))
可视化
####----Plot----####
plot <- plot_df %>%
ggplot() +
geom_point(data = plot_df %>% dplyr::filter(ONTOLOGY == "KEGG"),
aes(x = Count, y = interaction(Description, ONTOLOGY), fill = p.adjust, size = Count), shape = 21) +
scale_fill_gradient(low = "#a1d99b", high = "#238b45", name = "KEGG p.adjust") +
ggnewscale::new_scale_fill() +
geom_point(data = plot_df %>% dplyr::filter(ONTOLOGY == "MF"),
aes(x = Count, y = interaction(Description, ONTOLOGY), fill = p.adjust, size = Count), shape = 21) +
scale_fill_gradient(low = "#a6bddb", high = "#0570b0", name = "MF p.adjust") +
ggnewscale::new_scale_fill() +
geom_point(data = plot_df %>% dplyr::filter(ONTOLOGY == "CC"),
aes(x = Count, y = interaction(Description, ONTOLOGY), fill = p.adjust, size = Count), shape = 21) +
scale_fill_gradient(low = "#fdd49e", high = "#d7301f", name = "CC p.adjust") +
ggnewscale::new_scale_fill() +
geom_point(data = plot_df %>% dplyr::filter(ONTOLOGY == "BP"),
aes(x = Count, y = interaction(Description, ONTOLOGY), fill = p.adjust, size = Count), shape = 21) +
scale_fill_gradient(low = "#8c96c6", high = "#8c6bb1", name = "BP p.adjust") +
guides(y = "axis_nested",
y.sec = guide_axis_manual(breaks = 1:40,
labels = plot_df$Description)) +
ggtitle(label = "GO and KEGG annotation") +
labs(x = "Count", y = "Description") +
scale_size(range = c(3, 7),
guide = guide_legend(override.aes = list(fill = "#000000"))) +
theme_bw() +
theme(
ggh4x.axis.nestline.y = element_line(size = 3, color = c("#74c476", "#41b6c4", "#f46d43", "#9e9ac8")),
ggh4x.axis.nesttext.y = element_text(colour = c("#74c476", "#41b6c4", "#f46d43", "#9e9ac8")),
legend.background = element_roundrect(color = "#969696"),
panel.border = element_rect(size = 0.5),
plot.margin = margin(t = 1, r = 1, b = 1, l = 1, unit = "cm"),
axis.text = element_text(color = "#000000", size = 11),
axis.text.y = element_text(color = rep(c("#41ae76", "#225ea8", "#fc4e2a", "#88419d"), each = 10)),
axis.text.y.left = element_blank(),
axis.ticks.length.y.left = unit(10, "pt"),
axis.ticks.y.left = element_line(color = NA),
axis.title = element_text(color = "#000000", size = 15),
plot.title = element_text(color = "#000000", size = 20, hjust = 0.5)
) +
coord_cartesian(clip = "off") +
annotation_custom(grob = roundrectGrob(r = unit(0.2, "snpc"),
gp = gpar(col = "#969696", lwd = 1.5)),
xmin = unit(3, "native"),
xmax = unit(15, "native"),
ymin = unit(40.85, "native"),
ymax = unit(42.25, "native"))
plot
ggsave(filename = "GO_KEGG.pdf",
plot = plot,
height = 11,
width = 12.5)
版本信息
####----sessionInfo----####
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS 14.6.1
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Asia/Shanghai
tzcode source: internal
attached base packages:
[1] stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] org.Hs.eg.db_3.18.0 AnnotationDbi_1.64.1 IRanges_2.36.0 S4Vectors_0.40.2
[5] Biobase_2.62.0 BiocGenerics_0.48.1 clusterProfiler_4.10.0 ggnewscale_0.5.0
[9] ggfun_0.1.5 ggh4x_0.2.8.9000 lubridate_1.9.3 forcats_1.0.0
[13] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
[17] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
[4] magrittr_2.0.3 farver_2.1.2 ragg_1.2.6
[7] fs_1.6.4 zlibbioc_1.48.0 vctrs_0.6.5
[10] memoise_2.0.1 RCurl_1.98-1.13 ggtree_3.10.0
[13] htmltools_0.5.7 AnnotationHub_3.10.0 curl_5.1.0
[16] gridGraphics_0.5-1 plyr_1.8.9 cachem_1.1.0
[19] igraph_2.0.3 mime_0.12 lifecycle_1.0.4
[22] pkgconfig_2.0.3 Matrix_1.6-5 R6_2.5.1
[25] fastmap_1.2.0 gson_0.1.0 GenomeInfoDbData_1.2.11
[28] shiny_1.8.0 digest_0.6.36 aplot_0.2.3
[31] enrichplot_1.22.0 colorspace_2.1-1 patchwork_1.2.0.9000
[34] textshaping_0.3.7 RSQLite_2.3.3 labeling_0.4.3
[37] filelock_1.0.2 fansi_1.0.6 timechange_0.2.0
[40] httr_1.4.7 polyclip_1.10-7 compiler_4.3.0
[43] bit64_4.0.5 withr_3.0.1 BiocParallel_1.36.0
[46] viridis_0.6.4 DBI_1.1.3 ggforce_0.4.2
[49] MASS_7.3-60 rappdirs_0.3.3 HDO.db_0.99.1
[52] tools_4.3.0 ape_5.8 scatterpie_0.2.1
[55] interactiveDisplayBase_1.40.0 httpuv_1.6.12 glue_1.7.0
[58] nlme_3.1-163 GOSemSim_2.28.0 promises_1.2.1
[61] shadowtext_0.1.2 reshape2_1.4.4 fgsea_1.28.0
[64] generics_0.1.3 gtable_0.3.5 tzdb_0.4.0
[67] data.table_1.16.0 hms_1.1.3 tidygraph_1.2.3
[70] utf8_1.2.4 XVector_0.42.0 ggrepel_0.9.6
[73] BiocVersion_3.18.1 pillar_1.9.0 yulab.utils_0.1.5
[76] later_1.3.1 splines_4.3.0 tweenr_2.0.3
[79] BiocFileCache_2.10.1 treeio_1.26.0 lattice_0.22-5
[82] bit_4.0.5 tidyselect_1.2.1 GO.db_3.18.0
[85] Biostrings_2.70.1 gridExtra_2.3 graphlayouts_1.0.2
[88] stringi_1.8.3 lazyeval_0.2.2 yaml_2.3.7
[91] codetools_0.2-19 ggraph_2.1.0 qvalue_2.34.0
[94] BiocManager_1.30.22 ggplotify_0.1.2 cli_3.6.3
[97] systemfonts_1.1.0 xtable_1.8-4 munsell_0.5.1
[100] Rcpp_1.0.13 GenomeInfoDb_1.38.1 dbplyr_2.4.0
[103] png_0.1-8 parallel_4.3.0 ellipsis_0.3.2
[106] blob_1.2.4 DOSE_3.28.1 bitops_1.0-7
[109] viridisLite_0.4.2 tidytree_0.4.5 scales_1.3.0
[112] crayon_1.5.2 rlang_1.1.4 cowplot_1.1.3
[115] fastmatch_1.1-4 KEGGREST_1.42.0
历史绘图合集
进化树合集
环状图
散点图
基因家族合集
换一个排布方式:
首先查看基础版热图:
然后再看进阶版热图:
基因组共线性
WGCNA ggplot2版本
其他科研绘图
合作、联系和交流
有很多小伙伴在后台私信作者,非常抱歉,我经常看不到导致错过,请添加下面的微信联系作者,一起交流数据分析和可视化。