在模仿中精进数据可视化_使用R语言绘制散点图和显著性
❝
在模仿中精进数据可视化
该系列推文中,我们将从各大顶级学术期刊的Figure
入手,
解读文章的绘图思路,
模仿文章的作图风格,
构建适宜的绘图数据,
并且将代码应用到自己的实际论文中。
绘图缘由:小伙伴们总会展示出一些非常好看且精美的图片。我大概率会去学习和复现一下。其实每个人的时间和精力都非常有限和异常宝贵的。之所以我会去做,主要有以下原因:
图片非常好看,我自己看着也手痒痒 图片我自己在Paper也用的上,储备着留着用 保持了持续学习的状态
论文
原图
复现
直接上代码:
加载R
包
rm(list = ls())
####----load R Package----####
library(tidyverse)
library(ggbeeswarm)
library(scales)
library(ggfun)
加载数据
####----load Data----####
df_clean <- read_delim(file = "Input/test.csv", col_names = T, delim = ",") %>%
tidyr::gather(key = "key", value = "value") %>%
dplyr::mutate(group = str_split(string = key, pattern="-", simplify = T)[,2])
# data statistics
df_clean_mean <- df_clean %>%
group_by(key) %>%
summarise(mean = mean(value),
mean_scale = log10(mean))
df_clean_mean
# data signigicant
table(df_clean$key)
signif_out <- c()
for (i in c("Day 0","Day 21","Day 35")) {
out <- t.test(df_clean %>% dplyr::filter(key == paste(i,"Old",sep = "-")) %>% pull(value),
df_clean %>% dplyr::filter(key == paste(i,"Young",sep = "-")) %>% pull(value))
signif_out <- c(signif_out, out$p.value)
}
signif_out
# scale_y_log10
breaks_log10 <- function(x) {
low <- floor(log10(min(x)))
high <- ceiling(log10(max(x)))
10^(seq.int(low, high))
}
开始绘图
####----plot----####
p <- ggplot(data = df_clean, aes(x = key, y = value)) +
annotate(geom = "rect", xmin = -Inf, xmax = 2.5, ymin = 0, ymax = Inf, fill = "#edf8e9", alpha = 0.4) +
annotate(geom = "rect", xmin = 2.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "#c7e9c0", alpha = 0.4) +
annotate(geom = "rect", xmin = 4.5, xmax = Inf, ymin = 0, ymax = Inf, fill = "#a1d99b", alpha = 0.4) +
geom_quasirandom(aes(x = key, y = value, shape = group, fill = group),
method = "pseudorandom", size = 3, alpha = 0.85) +
scale_shape_manual(values = c(21, 22)) +
scale_fill_manual(values = c("#e9a3c9", "#67a9cf")) +
annotate(geom = "segment", x = 0.6, xend = 1.4, y = 11.4, yend = 11.4, linewidth = 1) +
annotate(geom = "segment", x = 1.6, xend = 2.4, y = 34.8, yend = 34.8, linewidth = 1) +
annotate(geom = "segment", x = 2.6, xend = 3.4, y = 68.4, yend = 68.4, linewidth = 1) +
annotate(geom = "segment", x = 3.6, xend = 4.4, y = 529, yend = 529, linewidth = 1) +
annotate(geom = "segment", x = 4.6, xend = 5.4, y = 1179, yend = 1179, linewidth = 1) +
annotate(geom = "segment", x = 5.6, xend = 6.4, y = 6217, yend = 6217, linewidth = 1) +
scale_y_log10(breaks = breaks_log10,
labels = trans_format(log10, math_format(10^.x))) +
annotation_logticks(sides = "l", outside = TRUE) +
coord_cartesian(clip = "off") +
scale_x_discrete(labels = c("Day 0", "Day 0", "Day 21", "Day 21", "Day 35", "Day 35")) +
labs(x = "Sample", y = "Data") +
geom_hline(yintercept = 100, linetype = "dashed") +
# Day 0
annotate(geom = "segment", x = 1, xend = 2, y = 500, yend = 500) +
annotate(geom = "segment", x = 1, xend = 1, y = 500, yend = 300) +
annotate(geom = "segment", x = 2, xend = 2, y = 500, yend = 300) +
annotate(geom = "text", x = 1.5, y = 800, label = bquote(italic("p")~"< 0.01"), size = 5) +
# Day 21
annotate(geom = "segment", x = 3, xend = 4, y = 10000, yend = 10000) +
annotate(geom = "segment", x = 3, xend = 3, y = 10000, yend = 2000) +
annotate(geom = "segment", x = 4, xend = 4, y = 10000, yend = 8000) +
annotate(geom = "text", x = 3.5, y = 15000, label = bquote(italic("p")~"< 0.01"), size = 5) +
# Day 35
annotate(geom = "segment", x = 5, xend = 6, y = 80000, yend = 80000) +
annotate(geom = "segment", x = 5, xend = 5, y = 80000, yend = 10000) +
annotate(geom = "segment", x = 6, xend = 6, y = 80000, yend = 60000) +
annotate(geom = "text", x = 5.5, y = 120000, label = bquote(italic("p")~"< 0.01"), size = 5) +
theme_classic() +
theme(axis.text = element_text(size = 15),
axis.text.y.left = element_text(margin = margin(r = 10)),
legend.background = element_roundrect(color = "#808080", linetype = 1))
p
ggsave(filename = "./Output/figure.pdf",
plot = p,
height = 6, width = 8)
版本信息
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS 15.1.1
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Asia/Shanghai
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggfun_0.1.5 scales_1.3.0 ggbeeswarm_0.7.2 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
[7] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
[13] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] bit_4.0.5 gtable_0.3.5 crayon_1.5.2 compiler_4.3.0 tidyselect_1.2.1
[6] parallel_4.3.0 textshaping_0.3.7 systemfonts_1.1.0 R6_2.5.1 generics_0.1.3
[11] munsell_0.5.1 pillar_1.9.0 tzdb_0.4.0 rlang_1.1.4 utf8_1.2.4
[16] stringi_1.8.3 bit64_4.0.5 timechange_0.2.0 cli_3.6.3 withr_3.0.1
[21] magrittr_2.0.3 grid_4.3.0 vroom_1.6.4 rstudioapi_0.15.0 hms_1.1.3
[26] beeswarm_0.4.0 lifecycle_1.0.4 vipor_0.4.5 vctrs_0.6.5 glue_1.8.0
[31] farver_2.1.2 ragg_1.2.6 fansi_1.0.6 colorspace_2.1-1 tools_4.3.0
[36] pkgconfig_2.0.3
历史绘图合集
公众号推文一览
进化树合集
环状图
散点图
基因家族合集
换一个排布方式:
首先查看基础版热图:
然后再看进阶版热图:
基因组共线性
WGCNA ggplot2版本
其他科研绘图
合作、联系和交流
有很多小伙伴在后台私信作者,非常抱歉,我经常看不到导致错过,请添加下面的微信联系作者,一起交流数据分析和可视化。