2023-05-20
2023-05-20
############ggplot2,CCLE数据实战##############
# CCLE数据的下载、读取、整理和可视化
# CCLE数据下载的两种方式:
## ①通过基因名称检索后下载,探索基因在细胞系中的表达谱
## ②下载含有所有细胞系表达谱信息的文件,综合分析
## 加载包
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(ggpubr)
## 读取数据,这里的数据下载自CCLE,任何感兴趣的基因都可以
(data <- read_csv("ITGAV Expression 22Q2 Public.csv", skip = 0))
## Rows: 1406 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Depmap ID, Cell Line Name, Primary Disease, Lineage, Lineage Subtype
## dbl (1): ITGAV Expression 22Q2 Public
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 1,406 × 6
## `Depmap ID` ITGAV Expression 22Q…¹ `Cell Line Name` `Primary Disease` Lineage
## <chr> <dbl> <chr> <chr> <chr>
## 1 ACH-000001 6.05 NIHOVCAR3 Ovarian Cancer Ovary
## 2 ACH-000002 1.10 HL60 Leukemia Blood
## 3 ACH-000003 4.96 CACO2 Colon/Colorectal… Colore…
## 4 ACH-000004 3.41 HEL Leukemia Blood
## 5 ACH-000005 4.23 HEL9217 Leukemia Blood
## 6 ACH-000006 2.34 MONOMAC6 Leukemia Blood
## 7 ACH-000007 3.15 LS513 Colon/Colorectal… Colore…
## 8 ACH-000008 5.60 A101D Skin Cancer Skin
## 9 ACH-000009 5.76 C2BBE1 Colon/Colorectal… Colore…
## 10 ACH-000011 5.13 253J Bladder Cancer Urinar…
## # ℹ 1,396 more rows
## # ℹ abbreviated name: ¹`ITGAV Expression 22Q2 Public`
## # ℹ 1 more variable: `Lineage Subtype` <chr>
## ggplot作图
colnames(data) <- c("ID","Expression", "Celltype", "Disease", "Lineage", "Subtype")
data = as.data.frame(data)
(g <- ggplot(data,aes(x = Disease, y = Expression, color = Disease)))
g + geom_point()
g + geom_point() + geom_boxplot()
data$Disease = with(data, reorder(Disease, Expression, mean, decreasing = TRUE))
g <- ggplot(data,aes(x = Disease, y = Expression, color = Disease))
g + geom_boxplot()
g + geom_boxplot() +
rotate_x_text(60) +
labs(x="",y="mRNA expression / Log2(TPM+1)") +
theme(legend.position = "none")
theme_set(theme_classic2())
g + geom_boxplot() +
geom_segment(aes(y = mean(Expression), xend = Disease, yend = Expression)) +
geom_hline(yintercept = mean(data$Expression),lty = 2) +
rotate_x_text(60) +
labs(x = "",y = "mRNA Expression / Log(TPM+1)",
title = "ITGAV Expression in Pan-cancer Cell Lines",
caption ="Data from CCLE. website: https://sites.broadinstitute.org/ccle") +
theme(legend.position = "none") +
theme(plot.background = element_rect(color = "black", linewidth = 1),
panel.border = element_rect(color = "gray", size = 1, fill = NA),
plot.margin = margin(0.5,2.5,1.5,2.5, unit = "cm"),
plot.title = element_text(hjust = 0.5,size = 16, face = "bold",
margin = margin(10, 0, 10, 0))) +
theme(axis.text = element_blank(),
axis.line = element_line( color = "white")) +
theme(axis.text.x = element_text())
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
group <- rep(T,nrow(data))
data2 <- cbind(data,group)
data2$group[data$Disease =="Colon/Colorectal Cancer"] <- "CRC"
data2$group[data$Disease !="Colon/Colorectal Cancer"] <- "Other"
(p <- ggplot(data2,aes(x = group, y= Expression,color = group)))
p + geom_boxplot()
p + geom_boxplot() +
theme_classic(base_size = 12) +
labs(x = "", y = "mRNA expression \nRNAseq") +
theme(legend.position = "none")+
stat_summary(fun.data = "mean_sd",geom = "errorbar", width=0.3,position = position_dodge(0.9))+
stat_compare_means(method = "wilcox.test", label.x = 1,label.y = 7.5) +
ggtitle("TP53") +
theme(plot.title = element_text(hjust = 0.5))
data3 <- data[data$Disease =="Colon/Colorectal Cancer",]
(s <- ggplot(data3,aes(x = reorder(Celltype, Expression, FUN = median),
y = Expression)))
s + geom_bar(stat = "identity")
s + geom_segment(aes(y = mean(Expression), xend = Celltype,
yend = Expression))+
geom_point(aes(size = Expression,
color = Expression))+
geom_hline(yintercept = mean(data3$Expression),lty = 2)+
theme_classic2(base_size = 10)+
labs(x="",y="mRNA Expression / Log(TPM+1)",
color="Expression",
size="Expression")+
scale_color_viridis_c(alpha = 1,begin = 0,end= 1,direction = -1)+
coord_flip()