引言
R 语言作图(例如 ggplot2)时怎么添加物种图片(剪影)?又见之前的一篇推送,
👉 临摹 Ecology Letter 文章的系统发育树
(可惜 👉 忘设置超链接了。)
1 包和数据
加载包,
library(tidyverse)
library(ggplot2)
library(ggforce)
library(vegan)
library(indicspecies)
library(BiodiversityR)
library(rphylopic)
数据来自以下论文,
👉 Big rodents disperse small seeds and spores in Neotropical wetlands
All data supporting the results of this paper are available via fig- share: https://doi.org/10.6084/m9.figshare.25742442 (Hoffmann et al., 2024).
读取 .csv 数据,
# 数据
dat <- read.csv("Data_Big Rodents.csv", header = TRUE)
# 查看列名
colnames(dat)
[1] "samples" "species"
[3] "alternanthera" "amaranthus"
[5] "spirodela_intermedia" "hydrocotyle"
[7] "eclipta_prostrata" "stellaria_media"
[9] "cyperus_squarrosus" "cyperus_odoratus"
[11] "cyperus_cf_brevifolius" "cyperus_cf_iria"
[13] "cyperus" "eleocharis_cf_maculosa"
[15] "fimbristylis_autumnalis" "juncus_cf_capitatus"
[17] "juncus_cf_bufonius" "juncus_cf_tenuis"
[19] "marsilea_cf_ancylopoda" "ludwigia_leptocarpa"
[21] "ludwigia_peploides" "eragrostis_hypnoides"
[23] "eragrostis" "cynodon"
[25] "lolium" "bacopa_monnieri"
[27] "azolla" "boehmeria_cylindrica"
[29] "salvinia"
# 显示前 5 列,前 20 行
dat[1:20, 1:5]
samples species alternanthera amaranthus spirodela_intermedia
1 CA1_1 capybara 0 0 0
2 CA1_3 capybara 0 0 0
3 CA1_4 capybara 3 0 0
4 CA1_5 capybara 0 0 0
5 CA1_6 capybara 0 0 0
6 CA1_7 capybara 0 1 0
7 CA1_9 capybara 0 0 0
8 CA1_10 capybara 0 0 0
9 CA2_1 capybara 0 0 0
10 CA2_2 capybara 0 0 0
11 CA2_3 capybara 0 0 0
12 CA2_4 capybara 0 0 0
13 CA2_5 capybara 0 0 0
14 CA2_6 capybara 0 0 0
15 CA2_7 capybara 0 0 0
16 CA2_8 capybara 0 0 0
17 CA2_9 capybara 0 0 0
18 CA2_10 capybara 0 0 0
19 CA3_3 capybara 0 0 0
20 CA3_6 capybara 0 0 0
1.1 去掉空行
看看数据有没有空行,
一行中,数值全是 0。
# 去掉分类变量的前两列
plant.dat <- dat[, -1:-3]
# 对每一行求和
rowSums(plant.dat)
[1] 2 1 5 1 1 18 3 7 8 13 2 2 4 13 2 7 5 1 1
[20] 1 1 1 3 2 1 1 1 5 3 1 7 5 6 4 2 2 7 5
[39] 3 63 84 30 55 10 26 62 26 29 5 22 23 26 28 7 9 9 7
[58] 4 24 18 29 9 14 7 45 19 269 2 23 41 42 9 0 3 3 1
[77] 6 7 20 13 24 18
which(rowSums(plant.dat) == 0) # 第 73 行为空行
[1] 73
1.2 指示物种
# 函数
# ?indicspecies::multipatt()
# Runs the combination analysis
mpt <- indicspecies::multipatt(
plant.dat,
dat$species,
control = how(nperm = 999)
)
# Species with significant association to one combination
summary(mpt)
Multilevel pattern analysis
---------------------------
Association function: IndVal.g
Significance level (alpha): 0.05
Total number of species: 26
Selected number of species: 10
Number of species associated to 1 group: 10
List of species associated to each combination:
Group capybara #sps. 3
stat p.value
juncus_cf_bufonius 0.506 0.002 **
juncus_cf_capitatus 0.480 0.002 **
cyperus_cf_brevifolius 0.358 0.021 *
Group nutria #sps. 7
stat p.value
salvinia 0.953 0.001 ***
eclipta_prostrata 0.550 0.001 ***
marsilea_cf_ancylopoda 0.506 0.001 ***
lolium 0.457 0.002 **
azolla 0.431 0.007 **
cyperus_odoratus 0.425 0.018 *
hydrocotyle 0.404 0.025 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 筛选
mpt.sign2 <- filter(mpt$sign, p.value < 0.05)
(s.capybara2 <- rownames(mpt.sign2[mpt.sign2$s.capybara == 1, ]))
[1] "cyperus_cf_brevifolius" "juncus_cf_capitatus" "juncus_cf_bufonius"
(s.nutria2 <- rownames(mpt.sign2[mpt.sign2$s.nutria == 1, ]))
[1] "hydrocotyle" "eclipta_prostrata" "cyperus_odoratus"
[4] "marsilea_cf_ancylopoda" "lolium" "azolla"
[7] "salvinia"
2 PCoA
需要说明的是,原论文(即数据可用性部分)没有对使用了哪几种植物数据做说明,此处的 PCoA 分析结果与原论文不完全一致(但是结果的性质没改变)。
其实 MDS 就是 PCoA。
# 拟合 PCoA 的函数
# ?cmdscale()
# cmdscale(d, k = 2, ...)
# d - a distance structure
# k - the dimension of the space
# ...
# 排除空行
plant.dat2 <- plant.dat[-which(rowSums(plant.dat) == 0), ]
# Bray–Curtis 距离
# 函数 vegdist 来自 vegan 包
d <- vegdist(plant.dat2, method = "bray")
# 拟合 PCoA
pcoa <- cmdscale(d = d, k = 2, eig = TRUE)
# 各轴解释率
(Dim1.pct <- pcoa$eig[1] / sum(pcoa$eig))
[1] 0.2701407
(Dim2.pct <- pcoa$eig[2] / sum(pcoa$eig))
[1] 0.1773207
# 添加植物物种坐标点
plant.scores <- as.data.frame(BiodiversityR::add.spec.scores(pcoa, plant.dat2)$cproj)
plant.scores$plant.nms <- rownames(plant.scores)
3 可视化
3.1 整理数据
# 提取 PCoA 坐标
Dim1 <- vegan::scores(pcoa)[, 1]
Dim2 <- vegan::scores(pcoa)[, 2]
# 整理到一个数据框中
species <- dat[-73, "species"]
species.clrs <- ifelse(species == "capybara", "#67b715", "#1b81e5")
PCoA.df <- data.frame(Dim1, Dim2, species, species.clrs)
3.2 ggplot2 可视化
# 绘图
FONT <- "Myriad Pro"
CEX <- 18
p0 <- ggplot(
data = PCoA.df,
aes(Dim1, Dim2, color = species)
) +
geom_point() +
ggforce::geom_mark_ellipse(
expand = 0,
show.legend = FALSE
) +
geom_text(
data = filter(plant.scores, plant.scores$plant.nms %in% c(s.capybara2, s.nutria2)),
aes(Dim1, Dim2, label = plant.nms),
size = 4,
family = FONT,
inherit.aes = FALSE
) +
scale_color_manual(
name = NULL,
values = unique(species.clrs)
) +
scale_y_continuous(
breaks = round(seq(-0.9, 0.3, 0.3), 1),
expand = expansion(mult = c(0, 0.06))
) +
labs(
x = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
y = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
) +
coord_fixed(
xlim = c(-0.6, 0.6),
ylim = c(-0.6, 0.3),
clip = "off"
) +
theme_classic(
# theme_bw(
base_family = FONT,
base_size = CEX,
# base_rect_size = 0.4,
base_line_size = 0.4
) +
theme(
panel.grid = element_blank(),
axis.title = element_text(size = CEX),
axis.text = element_text(size = CEX, color = "black"),
axis.ticks.length = unit(8, "pt"),
legend.position = c(0, 0),
legend.justification = c(0, 0),
legend.background = element_blank(),
legend.text = element_text(size = CEX)
)
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
p0
# ggview::ggview(p0, width = 6, height = 5, device = "png", dpi = 900)
ggsave(
"poca-p0.png",
p0,
width = 6,
height = 5,
dpi = 900
)
3.3 添加动物形象图
从 phylopic.org 网站上获取剪影,
# 使用 get_uuid 函数获取 uuid
get_uuid(name = "Hydrochoerus hydrochaeris") # capybara
[1] "097336ef-db5f-4edc-a2e4-2c29a10aa512"
get_uuid(name = "Myocastor coypus") # "nutria" also known as "coypu"
[1] "5ee3b11f-9cd8-4aaf-a59e-ac1ddc18d5e8"
# 或使用拉丁名
p1 <- p0 +
add_phylopic(
# uuid = "097336ef-db5f-4edc-a2e4-2c29a10aa512",
name = "Hydrochoerus hydrochaeris",
x = -0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[1],
alpha = 0.6,
verbose = TRUE
) +
add_phylopic(
# uuid = "5ee3b11f-9cd8-4aaf-a59e-ac1ddc18d5e8",
name = "Myocastor coypus",
x = 0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[2],
alpha = 0.6,
verbose = TRUE
) +
theme(
legend.position = "none"
)
p1
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Steven Traver, 2012 (CC0 1.0). Silhouette was contributed by Steven Traver.
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Mattia Menchetti, 2014 (CC0 1.0). Silhouette was contributed by Mattia Menchetti.
# ggview::ggview(p1, width = 5.5, height = 4.5, device = "png", dpi = 900)
ggsave(
"poca-p1.png",
p1,
width = 6,
height = 5,
dpi = 900
)
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Steven Traver, 2012 (CC0 1.0). Silhouette was contributed by Steven Traver.
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Mattia Menchetti, 2014 (CC0 1.0). Silhouette was contributed by Mattia Menchetti.
4 基础绘图可视化 base plot
4.1 绘图 1
标记 species 名称,按照 species 着色,
# 绘图参数
par(mar = c(4, 4, 1, 1), family = FONT, cex = 1.6)
# 绘图 1
plot(
Dim1,
Dim2,
type = "n",
xlab = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
ylab = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
)
text(
Dim1,
Dim2,
species,
cex = 0.6,
col = species.clrs
)
4.2 绘图 2
标记 samples 名称,还是按照 species 着色,
# 绘图参数
FONT <- "Myriad Pro"
par(mar = c(4, 4, 1, 1), family = FONT, cex = 1.6)
# 绘图 2
samples <- dat[-73, "samples"]
plot(
Dim1,
Dim2,
type = "n",
xlab = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
ylab = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
)
text(
Dim1,
Dim2,
samples,
cex = 0.6,
col = species.clrs
)
4.3 绘图 3
包围每种颜色,
# 绘图参数
FONT <- "Myriad Pro"
par(mar = c(4, 4, 1, 1), family = FONT, cex = 1.6, xpd = TRUE)
# 绘图 3
plot(
Dim1,
Dim2,
type = "n",
xlab = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
ylab = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
)
points(
Dim1,
Dim2,
pch = 16,
cex = 0.6,
col = species.clrs
)
for (i.species in unique(PCoA.df$species)) {
temp.df <- subset(PCoA.df, species == i.species)
polygon(
temp.df[chull(temp.df$Dim1, temp.df$Dim2), ],
border = temp.df$species.clrs
)
}
add_phylopic_base(
# uuid = "097336ef-db5f-4edc-a2e4-2c29a10aa512",
name = "Hydrochoerus hydrochaeris",
x = -0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[1],
alpha = 0.6,
verbose = TRUE
)
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Steven Traver, 2012 (CC0 1.0). Silhouette was contributed by Steven Traver.
add_phylopic_base(
# uuid = "5ee3b11f-9cd8-4aaf-a59e-ac1ddc18d5e8",
name = "Myocastor coypus",
x = 0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[2],
alpha = 0.6,
verbose = TRUE
)
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Mattia Menchetti, 2014 (CC0 1.0). Silhouette was contributed by Mattia Menchetti.
legend(
"bottomleft",
legend = unique(species),
pch = 19,
col = unique(unique(species.clrs)),
bty = "n"
)