R 语言|在 PCoA 图里添加物种图片|添加水豚剪影|卡皮巴拉|实战代码

文摘   2024-07-19 00:31   江苏  

引言

R 语言作图(例如 ggplot2)时怎么添加物种图片(剪影)?又见之前的一篇推送,

👉 临摹 Ecology Letter 文章的系统发育树

(可惜 👉 忘设置超链接了。)

1 包和数据

加载包,

library(tidyverse)
library(ggplot2)
library(ggforce)
library(vegan)
library(indicspecies)
library(BiodiversityR)
library(rphylopic)

数据来自以下论文,

👉 Big rodents disperse small seeds and spores in Neotropical wetlands

All data supporting the results of this paper are available via fig- share: https://doi.org/10.6084/m9.figshare.25742442 (Hoffmann et al., 2024).

读取 .csv 数据,

# 数据
dat <- read.csv("Data_Big Rodents.csv", header = TRUE)

# 查看列名
colnames(dat)
 [1] "samples"                 "species"                
[3] "alternanthera" "amaranthus"
[5] "spirodela_intermedia" "hydrocotyle"
[7] "eclipta_prostrata" "stellaria_media"
[9] "cyperus_squarrosus" "cyperus_odoratus"
[11] "cyperus_cf_brevifolius" "cyperus_cf_iria"
[13] "cyperus" "eleocharis_cf_maculosa"
[15] "fimbristylis_autumnalis" "juncus_cf_capitatus"
[17] "juncus_cf_bufonius" "juncus_cf_tenuis"
[19] "marsilea_cf_ancylopoda" "ludwigia_leptocarpa"
[21] "ludwigia_peploides" "eragrostis_hypnoides"
[23] "eragrostis" "cynodon"
[25] "lolium" "bacopa_monnieri"
[27] "azolla" "boehmeria_cylindrica"
[29] "salvinia"
# 显示前 5 列,前 20 行
dat[1:20, 1:5]
   samples  species alternanthera amaranthus spirodela_intermedia
1 CA1_1 capybara 0 0 0
2 CA1_3 capybara 0 0 0
3 CA1_4 capybara 3 0 0
4 CA1_5 capybara 0 0 0
5 CA1_6 capybara 0 0 0
6 CA1_7 capybara 0 1 0
7 CA1_9 capybara 0 0 0
8 CA1_10 capybara 0 0 0
9 CA2_1 capybara 0 0 0
10 CA2_2 capybara 0 0 0
11 CA2_3 capybara 0 0 0
12 CA2_4 capybara 0 0 0
13 CA2_5 capybara 0 0 0
14 CA2_6 capybara 0 0 0
15 CA2_7 capybara 0 0 0
16 CA2_8 capybara 0 0 0
17 CA2_9 capybara 0 0 0
18 CA2_10 capybara 0 0 0
19 CA3_3 capybara 0 0 0
20 CA3_6 capybara 0 0 0

1.1 去掉空行

看看数据有没有空行,

  • 一行中,数值全是 0。

# 去掉分类变量的前两列
plant.dat <- dat[, -1:-3]

# 对每一行求和
rowSums(plant.dat)
 [1]   2   1   5   1   1  18   3   7   8  13   2   2   4  13   2   7   5   1   1
[20] 1 1 1 3 2 1 1 1 5 3 1 7 5 6 4 2 2 7 5
[39] 3 63 84 30 55 10 26 62 26 29 5 22 23 26 28 7 9 9 7
[58] 4 24 18 29 9 14 7 45 19 269 2 23 41 42 9 0 3 3 1
[77] 6 7 20 13 24 18
which(rowSums(plant.dat) == 0) # 第 73 行为空行
[1] 73

1.2 指示物种

# 函数
# ?indicspecies::multipatt()

# Runs the combination analysis
mpt <- indicspecies::multipatt(
plant.dat,
dat$species,
control = how(nperm = 999)
)

# Species with significant association to one combination
summary(mpt)

Multilevel pattern analysis
---------------------------

Association function: IndVal.g
Significance level (alpha): 0.05

Total number of species: 26
Selected number of species: 10
Number of species associated to 1 group: 10

List of species associated to each combination:

Group capybara #sps. 3
stat p.value
juncus_cf_bufonius 0.506 0.002 **
juncus_cf_capitatus 0.480 0.002 **
cyperus_cf_brevifolius 0.358 0.021 *

Group nutria #sps. 7
stat p.value
salvinia 0.953 0.001 ***
eclipta_prostrata 0.550 0.001 ***
marsilea_cf_ancylopoda 0.506 0.001 ***
lolium 0.457 0.002 **
azolla 0.431 0.007 **
cyperus_odoratus 0.425 0.018 *
hydrocotyle 0.404 0.025 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 筛选
mpt.sign2 <- filter(mpt$sign, p.value < 0.05)
(s.capybara2 <- rownames(mpt.sign2[mpt.sign2$s.capybara == 1, ]))
[1] "cyperus_cf_brevifolius" "juncus_cf_capitatus"    "juncus_cf_bufonius"
(s.nutria2 <- rownames(mpt.sign2[mpt.sign2$s.nutria == 1, ]))
[1] "hydrocotyle"            "eclipta_prostrata"      "cyperus_odoratus"      
[4] "marsilea_cf_ancylopoda" "lolium" "azolla"
[7] "salvinia"

2 PCoA

需要说明的是,原论文(即数据可用性部分)没有对使用了哪几种植物数据做说明,此处的 PCoA 分析结果与原论文不完全一致(但是结果的性质没改变)。

  • 其实 MDS 就是 PCoA。

# 拟合 PCoA 的函数
# ?cmdscale()
# cmdscale(d, k = 2, ...)
# d - a distance structure
# k - the dimension of the space
# ...

# 排除空行
plant.dat2 <- plant.dat[-which(rowSums(plant.dat) == 0), ]

# Bray–Curtis 距离
# 函数 vegdist 来自 vegan 包
d <- vegdist(plant.dat2, method = "bray")

# 拟合 PCoA
pcoa <- cmdscale(d = d, k = 2, eig = TRUE)

# 各轴解释率
(Dim1.pct <- pcoa$eig[1] / sum(pcoa$eig))
[1] 0.2701407
(Dim2.pct <- pcoa$eig[2] / sum(pcoa$eig))
[1] 0.1773207
# 添加植物物种坐标点
plant.scores <- as.data.frame(BiodiversityR::add.spec.scores(pcoa, plant.dat2)$cproj)
plant.scores$plant.nms <- rownames(plant.scores)

3 可视化

3.1 整理数据

# 提取 PCoA 坐标
Dim1 <- vegan::scores(pcoa)[, 1]
Dim2 <- vegan::scores(pcoa)[, 2]

# 整理到一个数据框中
species <- dat[-73, "species"]
species.clrs <- ifelse(species == "capybara", "#67b715", "#1b81e5")
PCoA.df <- data.frame(Dim1, Dim2, species, species.clrs)

3.2 ggplot2 可视化

# 绘图
FONT <- "Myriad Pro"
CEX <- 18

p0 <- ggplot(
data = PCoA.df,
aes(Dim1, Dim2, color = species)
) +
geom_point() +
ggforce::geom_mark_ellipse(
expand = 0,
show.legend = FALSE
) +
geom_text(
data = filter(plant.scores, plant.scores$plant.nms %in% c(s.capybara2, s.nutria2)),
aes(Dim1, Dim2, label = plant.nms),
size = 4,
family = FONT,
inherit.aes = FALSE
) +
scale_color_manual(
name = NULL,
values = unique(species.clrs)
) +
scale_y_continuous(
breaks = round(seq(-0.9, 0.3, 0.3), 1),
expand = expansion(mult = c(0, 0.06))
) +
labs(
x = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
y = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
) +
coord_fixed(
xlim = c(-0.6, 0.6),
ylim = c(-0.6, 0.3),
clip = "off"
) +
theme_classic(
# theme_bw(
base_family = FONT,
base_size = CEX,
# base_rect_size = 0.4,
base_line_size = 0.4
) +
theme(
panel.grid = element_blank(),
axis.title = element_text(size = CEX),
axis.text = element_text(size = CEX, color = "black"),
axis.ticks.length = unit(8, "pt"),
legend.position = c(0, 0),
legend.justification = c(0, 0),
legend.background = element_blank(),
legend.text = element_text(size = CEX)
)
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
p0


# ggview::ggview(p0, width = 6, height = 5, device = "png", dpi = 900)
ggsave(
"poca-p0.png",
p0,
width = 6,
height = 5,
dpi = 900
)

3.3 添加动物形象图

从 phylopic.org 网站上获取剪影,

# 使用 get_uuid 函数获取 uuid
get_uuid(name = "Hydrochoerus hydrochaeris") # capybara
[1] "097336ef-db5f-4edc-a2e4-2c29a10aa512"
get_uuid(name = "Myocastor coypus") # "nutria" also known as "coypu"
[1] "5ee3b11f-9cd8-4aaf-a59e-ac1ddc18d5e8"
# 或使用拉丁名
p1 <- p0 +
add_phylopic(
# uuid = "097336ef-db5f-4edc-a2e4-2c29a10aa512",
name = "Hydrochoerus hydrochaeris",
x = -0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[1],
alpha = 0.6,
verbose = TRUE
) +
add_phylopic(
# uuid = "5ee3b11f-9cd8-4aaf-a59e-ac1ddc18d5e8",
name = "Myocastor coypus",
x = 0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[2],
alpha = 0.6,
verbose = TRUE
) +
theme(
legend.position = "none"
)
p1
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Steven Traver, 2012 (CC0 1.0). Silhouette was contributed by Steven Traver.
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Mattia Menchetti, 2014 (CC0 1.0). Silhouette was contributed by Mattia Menchetti.


# ggview::ggview(p1, width = 5.5, height = 4.5, device = "png", dpi = 900)
ggsave(
"poca-p1.png",
p1,
width = 6,
height = 5,
dpi = 900
)
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Steven Traver, 2012 (CC0 1.0). Silhouette was contributed by Steven Traver.
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Mattia Menchetti, 2014 (CC0 1.0). Silhouette was contributed by Mattia Menchetti.

4 基础绘图可视化 base plot

4.1 绘图 1

标记 species 名称,按照 species 着色,

# 绘图参数
par(mar = c(4, 4, 1, 1), family = FONT, cex = 1.6)

# 绘图 1
plot(
Dim1,
Dim2,
type = "n",
xlab = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
ylab = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
)
text(
Dim1,
Dim2,
species,
cex = 0.6,
col = species.clrs
)

4.2 绘图 2

标记 samples 名称,还是按照 species 着色,

# 绘图参数
FONT <- "Myriad Pro"
par(mar = c(4, 4, 1, 1), family = FONT, cex = 1.6)

# 绘图 2
samples <- dat[-73, "samples"]
plot(
Dim1,
Dim2,
type = "n",
xlab = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
ylab = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
)
text(
Dim1,
Dim2,
samples,
cex = 0.6,
col = species.clrs
)

4.3 绘图 3

包围每种颜色,

# 绘图参数
FONT <- "Myriad Pro"
par(mar = c(4, 4, 1, 1), family = FONT, cex = 1.6, xpd = TRUE)

# 绘图 3
plot(
Dim1,
Dim2,
type = "n",
xlab = sprintf("PCoA 1 (%.1f%%)", Dim1.pct * 100),
ylab = sprintf("PCoA 2 (%.1f%%)", Dim2.pct * 100)
)
points(
Dim1,
Dim2,
pch = 16,
cex = 0.6,
col = species.clrs
)
for (i.species in unique(PCoA.df$species)) {
temp.df <- subset(PCoA.df, species == i.species)
polygon(
temp.df[chull(temp.df$Dim1, temp.df$Dim2), ],
border = temp.df$species.clrs
)
}
add_phylopic_base(
# uuid = "097336ef-db5f-4edc-a2e4-2c29a10aa512",
name = "Hydrochoerus hydrochaeris",
x = -0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[1],
alpha = 0.6,
verbose = TRUE
)
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Steven Traver, 2012 (CC0 1.0). Silhouette was contributed by Steven Traver.
add_phylopic_base(
# uuid = "5ee3b11f-9cd8-4aaf-a59e-ac1ddc18d5e8",
name = "Myocastor coypus",
x = 0.3,
y = 0.1,
ysize = 0.3,
fill = unique(species.clrs)[2],
alpha = 0.6,
verbose = TRUE
)
Organism silhouettes are from PhyloPic (https://www.phylopic.org/; T. Michael Keesey, 2023) and were added using the rphylopic R package ver. 1.4.0 (Gearty & Jones, 2023). Silhouette was made by Mattia Menchetti, 2014 (CC0 1.0). Silhouette was contributed by Mattia Menchetti.
legend(
"bottomleft",
legend = unique(species),
pch = 19,
col = unique(unique(species.clrs)),
bty = "n"
)

ecologyR
🚀打赏可获取本公众号代码合集(见置顶文章)📌统计案例📌统计制图📌显著性标记📌结构方程模型可视化工具📌SEM教程与案例📌论文代码复现📌地图可视化
 最新文章