在模仿中精进数据可视化_使用circlize绘制circos圈图
❝
在模仿中精进数据可视化
该系列推文中,我们将从各大顶级学术期刊的Figure
入手,
解读文章的绘图思路,
模仿文章的作图风格,
构建适宜的绘图数据,
并且将代码应用到自己的实际论文中。
绘图缘由:小伙伴们总会展示出一些非常好看且精美的图片。我大概率会去学习和复现一下。其实每个人的时间和精力都非常有限和异常宝贵的。之所以我会去做,主要有以下原因:
图片非常好看,我自己看着也手痒痒 图片我自己在Paper也用的上,储备着留着用 保持了持续学习的状态
❝顾祖光老师的
circlize
实在是过于优秀。我也是真的爱不释手。
今天这一期推文主要是更新一下基因家族分析流程
中的Circos
圈图。
❝之前写过
R
语言绘制Circos
圈图的推文,可视化结果如下:
❝我们可以发现,内圈添加了基因ID。但是如果基因家族成员过多的时候,就会叠加的非常满。
因此笔者对可视化代码进行了更新。
将 GeneID
绘制在外圈将 染色体ID
,染色体的刻度
放置在内圈
这样整体的可视化效果会更好。
直接上代码:
加载R
包
rm(list = ls())
####----load R Package----####
library(tidyverse)
library(circlize)
library(ComplexHeatmap)
source("R/stat_windows.R")
source("R/add_legend.R")
# 自定义了两个函数,进行染色体的滑窗统计,以及添加图例
加载数据
####----load Data----####
####----load ID----####
NAC_ID <- read_delim(file = "Input/NAC_File.txt", col_names = T, delim = "\t")
####---- load bed file----####
AT_bed <- read_delim(file = "Input/AT_gene.bed", col_names = T, delim = "\t")
NAC_ID_bed <- AT_bed %>% dplyr::filter(ID %in% NAC_ID$ID)
####----load Chr----####
AT_Chr <- read_delim(file = "Input/AT_Chr.txt", delim = "\t", col_names = T)
####----load gene type----####
Athaliana_genetype <- read_delim(file = "Input/Athaliana.gene_type", delim = "\t", col_names = T)
####----load tandem duplicate----####
AT_tandem <- read_delim(file = "Input/Athaliana.tandem.txt", delim = "\t", col_names = T)
####----load collinarity duplicate----####
AT_collinearity <- read_delim(file = "Input/Athaliana.collinearity.txt", col_names = T, delim = "\t")
####----滑动窗口,统计格式----####
Chr_window <- stat_windows(chr_info = AT_Chr,
windows = 500000,
GF_info = NAC_ID_bed)
####----查看家族成员的基因类型----####
NAC_ID_bed %>%
# dplyr::select(-Type) %>%
left_join(Athaliana_genetype,
by = c("ID" = "ID")) %>%
dplyr::mutate(Type = case_when(
Type == 0 ~ 0,
Type == 1 ~ 1,
Type == 2 ~ 2,
Type == 3 ~ 3,
Type == 4 ~ 4,
)) -> gene_type
gene_type
####----tandem bed----####
TPS_tandem_bed1 <- read_delim(file = "Input/TPS_tandem_bed1.txt", delim = "\t", col_names = T)
TPS_tandem_bed2 <- read_delim(file = "Input/TPS_tandem_bed2.txt", delim = "\t", col_names = T)
####----WGD bed----####
TPS_collin_bed1 <- read_delim(file = "Input/TPS_collin_bed1.txt", delim = "\t", col_names = T)
TPS_collin_bed2 <-read_delim(file = "Input/TPS_collin_bed2.txt", delim = "\t", col_names = T)
TPS_tandem <- cbind(TPS_tandem_bed1, TPS_tandem_bed2) %>%
purrr::set_names(c("Chr1","Start1", "End1", "ID1",
"Chr2","Start2", "End2", "ID2")) %>%
dplyr::mutate(Type = "tandem")
TPS_collin <- cbind(TPS_collin_bed1, TPS_collin_bed2) %>%
purrr::set_names(c("Chr1","Start1", "End1", "ID1",
"Chr2","Start2", "End2", "ID2")) %>%
dplyr::mutate(Type = "WGD")
####----Plot----####
pdf(file = "./Output/circos_AT.pdf",
height = 8,
width = 8)
#####-----开始画染色体-----#####
circos.genomicInitialize(AT_Chr,
plotType = NULL,
axis.labels.cex = 0.4*par("cex"),
labels.cex = 0.6*par("cex"),
track.height = 0.01,
major.by = 5000000
)
#####-----添加label-----#####
circos.genomicLabels(NAC_ID_bed,
labels.column = 4,
padding = 0.1,
connection_height = mm_h(3),
col = as.numeric(factor(NAC_ID_bed[[1]])),
line_col = as.numeric(factor(NAC_ID_bed[[1]])),
cex = 0.6,
side = "outside")
#####-----染色体填充颜色-----#####
circos.genomicTrackPlotRegion(
AT_Chr, track.height = 0.05, stack = TRUE, bg.border = NA,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = "#f768a1", border = "black", ...)
} )
circos.track(track.index = get.current.track.index(),
bg.border = NA,
panel.fun = function(x, y) {
circos.genomicAxis(h = "bottom", direction = "inside")}
)
circos.track(ylim = c(0,0.5), track.height = 0.05, bg.border = NA,
panel.fun = function(x,y){
chr = CELL_META$sector.index
xlim = CELL_META$xlim
ylim = CELL_META$ylim
circos.text(mean(xlim), mean(ylim)-0.3, chr, cex = 0.5, col = "#000000",
facing = "inside", niceFacing = TRUE)
}
)
#####-----添加滑动窗口的统计-----#####
circos.genomicTrack(
Chr_window,
track.height = 0.1,
bg.col = "#f0f0f0",
bg.border = NA,
panel.fun = function(region, value, ...){
circos.genomicLines(region, value, col="#2171b5", lwd=0.35,...)
circos.lines(c(0, 0.5, 1),
c(0, 0.5, 1),
col = "#2171b5",
lwd = 0.15,
lty = 2)
circos.yaxis(labels.cex = 0.2,
lwd = 0.1,
tick.length = convert_x(0.2, "mm"))
}
)
#####-----添加复制事件----####
color_assign <- colorRamp2(breaks = c(0,1,2,3,4),
col = c("#00ADFF", "#e66101","#fdb863", "#b2abd2", "#5e3c99"))
circos.genomicTrackPlotRegion(
gene_type,
track.height = 0.1, stack = TRUE, bg.border = NA,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]),
border = color_assign(value[[1]]), ...)
} )
#####-----添加连线Link----####
colors_tmp <- scales::alpha("black", alpha = 1)
# colline
circos.genomicLink(TPS_collin_bed1, TPS_collin_bed2,
col = colors_tmp,
border = "#8c6bb1",
lwd = 1)
# tandem
circos.genomicLink(TPS_tandem_bed1, TPS_tandem_bed2,
col = colors_tmp,
border = "#41ab5d",
lwd = 1)
add_legend()
circos.clear()
dev.off()
版本信息
####----sessionInfo----####
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS 15.0.1
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Asia/Shanghai
tzcode source: internal
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] ComplexHeatmap_2.18.0 circlize_0.4.15 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
[6] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
[11] ggplot2_3.5.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] utf8_1.2.4 generics_0.1.3 shape_1.4.6 stringi_1.8.3 hms_1.1.3 digest_0.6.36
[7] magrittr_2.0.3 RColorBrewer_1.1-3 timechange_0.2.0 iterators_1.0.14 foreach_1.5.2 doParallel_1.0.17
[13] GlobalOptions_0.1.2 fansi_1.0.6 scales_1.3.0 codetools_0.2-19 cli_3.6.3 rlang_1.1.4
[19] crayon_1.5.2 bit64_4.0.5 munsell_0.5.1 withr_3.0.1 tools_4.3.0 parallel_4.3.0
[25] tzdb_0.4.0 colorspace_2.1-1 GetoptLong_1.0.5 BiocGenerics_0.48.1 vctrs_0.6.5 R6_2.5.1
[31] png_0.1-8 stats4_4.3.0 matrixStats_1.1.0 lifecycle_1.0.4 bit_4.0.5 S4Vectors_0.40.2
[37] IRanges_2.36.0 vroom_1.6.4 clue_0.3-65 cluster_2.1.6 pkgconfig_2.0.3 pillar_1.9.0
[43] gtable_0.3.5 glue_1.7.0 tidyselect_1.2.1 rstudioapi_0.15.0 farver_2.1.2 rjson_0.2.21
[49] compiler_4.3.0
历史绘图合集
进化树合集
环状图
散点图
基因家族合集
换一个排布方式:
首先查看基础版热图:
然后再看进阶版热图:
基因组共线性
WGCNA ggplot2版本
其他科研绘图
合作、联系和交流
有很多小伙伴在后台私信作者,非常抱歉,我经常看不到导致错过,请添加下面的微信联系作者,一起交流数据分析和可视化。