ggplot2优雅的绘制配对气泡图

原创赵小明777 生信随笔 2023-06-16 13:03

去年年底看到闫哥公众号R语言数据分析指南发过一个帖子《ggplot2优雅的绘制配对连线云雨图》。本文也给出我的一个绘图方案，也是我平时一直在用的代码：

Step1. R包和数据加载、主题设置

测试数据在：

链接：https://pan.baidu.com/s/1MuMgMZZCcdO-IGS7_ysfkQ?pwd=1234 提取码：1234

library(dplyr)
library(ggplot2)
library(ggpubr)
library(rstatix)
#### 1.读入数据
pan.meta = readRDS("./Paired_test.data.rds")

head(pan.meta)
#   ID    Group Type      paired Expression
# 1 TCGA-BL-A13J-01A tumor BLCA TCGA-BL-A13J   5.418168
# 2 TCGA-BT-A20N-01A tumor BLCA TCGA-BT-A20N   2.472168
# 3 TCGA-BT-A20Q-01A tumor BLCA TCGA-BT-A20Q   5.149659
# 4 TCGA-BT-A20R-01A tumor BLCA TCGA-BT-A20R   5.590747
# 5 TCGA-BT-A20U-01A tumor BLCA TCGA-BT-A20U   6.717855
# 6 TCGA-BT-A20W-01A tumor BLCA TCGA-BT-A20W   7.140064

ID是TCGA的barcode，Group是肿瘤与对照样本，paired是配对的barcode信息，Expression则是目标基因的表达量。

然后设置主题和颜色：

mytheme <- theme(plot.margin=unit(c(0.5,0.5,0.5,0.5),units=,"cm"),
                 axis.line = element_line(color = "black",size = 0.4),
                 panel.grid.minor = element_blank(),
                 panel.grid.major = element_line(size = 0.2,color = "#e5e5e5"),
                 axis.text.y = element_text(color="black",size=10),
                 axis.text.x = element_text(angle = 45, hjust = 1 ,color="black",size=10),
                 legend.position = "none",
                 panel.spacing = unit(0,"lines"))
fill.color = c(normal="#56B4E9",tumor= "#E69F00")

Step2. 可视化绘图

p1 = ggplot(pan.meta, aes(x = Group,y = Expression,fill = Group)) +
  geom_line(aes(group=paired),position = position_dodge(0.2),color="grey80") +
  geom_point(aes(group=paired,size=Expression),
             pch=21,
             size=3,
             # position = position_dodge(0),
             position = position_jitter(w = 0.1))+
  stat_summary(fun.data = 'mean_se', geom = "errorbar", color = "red", 
               width = 0.25, size = 0.8, position = position_dodge( .9)) +
  # scale_size_continuous(range=c(1,3)) +
  facet_wrap(.~Type,scales = "free_y",ncol = 11) +
  scale_fill_manual(values = fill.color) +
  # scale_y_continuous(limits = c(-4,4),minor_breaks = seq(0,90,1)) +
  labs(x= NULL,y="Gene expression")+
  stat_compare_means(aes(group =  Group),
                     paired = T,
                     # label.y = 3.5, 
                     label.x = 1.5,
                     method = "t.test",
                     label = "p.signif",
                     size = 4) + 
  theme_bw() + mytheme
p1

上面的P值是用stat_compare_means计算的，其实多组间的两两比较还可以考虑用校正后的P值，可以使用rstatix包进行计算：

stat.test<- pan.meta %>%
  group_by(Type) %>%
  t_test(Expression~Group) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance("p.adj")%>%
  add_xy_position(x="Group",dodge = 0.8)
stat.test

可视化绘图：

p2 = ggplot(pan.meta, aes(x = Group, y = Expression)) + 
  geom_line(aes(group=paired),position = position_dodge(0.2),color="grey80") +
  geom_point(aes(group=paired,size=Expression,
                 # alpha=Expression,
                 fill= Group),pch=21,size=3,
             position = position_jitter(w = 0.1))+
  stat_summary(fun.data = 'mean_se', geom = "errorbar", color = "red", 
               width = 0.25, size = 0.8, position = position_dodge( .9)) +
  facet_wrap(~Type ,scale="free_y",nrow = 2) + 
  scale_fill_manual(values= fill.color) +
  scale_x_discrete( labels=NULL ) + 
  stat_pvalue_manual(stat.test,label = "p.adj", #p.adj.signif
                     tip.length = 0.05,
                     # y.position = 6,
                     remove.bracket = T,hjust=1)+
  labs(x= NULL,y="Gene expression")+
  theme_bw() + mytheme
p2

P值太长了，这里可以用星号，或者是科学计数法显示保留前两位小数：

用星号

###用星号
p3 = ggplot(pan.meta, aes(x = Group, y = Expression)) + 
  geom_line(aes(group=paired),position = position_dodge(0.2),color="grey80") +
  geom_point(aes(group=paired,size=Expression,
                 # alpha=Expression,
                 fill= Group),pch=21,size=3,
             position = position_jitter(w = 0.1))+
  stat_summary(fun.data = 'mean_se', geom = "errorbar", color = "red", 
               width = 0.25, size = 0.8, position = position_dodge( .9)) +
  facet_wrap(~Type ,scale="free_y",nrow = 2) + 
  scale_fill_manual(values= fill.color) +
  scale_x_discrete( labels=NULL ) + 
  stat_pvalue_manual(stat.test,label = "p.adj.signif",
                     tip.length = 0.05,
                     # y.position = 6,
                     remove.bracket = F)+
  labs(x= NULL,y="Gene expression")+
  theme_bw() + mytheme
p3

科学计数法显示保留前两位小数

###科学计数法显示保留前两位小数
stat.test$p.adj <- format(stat.test$p.adj, scientific = TRUE) %>% as.numeric()
stat.test$p.adj <- sprintf("%.2e", stat.test$p.adj)
stat.test$p.adj = ifelse(as.numeric(stat.test$p.adj) > 0.05,"NS",stat.test$p.adj)

p4 = ggplot(pan.meta, aes(x = Group, y = Expression)) + 
  geom_line(aes(group=paired),position = position_dodge(0.2),color="grey80") +
  geom_point(aes(group=paired,size=Expression,
                 # alpha=Expression,
                 fill= Group),pch=21,size=3,
             position = position_jitter(w = 0.1))+
  stat_summary(fun.data = 'mean_se', geom = "errorbar", color = "red", 
               width = 0.25, size = 0.8, position = position_dodge( .9)) +
  facet_wrap(~Type ,scale="free_y",nrow = 2) + 
  scale_fill_manual(values= fill.color) +
  scale_x_discrete( labels=NULL ) + 
  stat_pvalue_manual(stat.test,label = "p.adj", #p.adj.signif
                     tip.length = 0.05,
                     # y.position = 6,
                     remove.bracket = T,hjust=1)+
  labs(x= NULL,y="Gene expression")+
  theme_bw() + mytheme
p4

写在最后

其实我的ggplot2功底也不是很好，没有很系统的去钻研ggplot2的语法和结构。因为我认为我只要会修改别人的ggplot2绘图代码，然后把自己想要绘制的各种元素，能转化为语言去进行网络搜索，这样想绘制的图，基于上都可以根据百度谷歌和工具书去实现。绘图当然很重要，但是科研节奏这么紧张，ggplot2的学习到底应该投入多少时间（当然也看悟性），这点见仁见智。最后给大家分享一本我经常翻阅的ggplot2工具书：

链接：https://pan.baidu.com/s/1MuMgMZZCcdO-IGS7_ysfkQ?pwd=1234 提取码：1234

心情记录
一晃六月了，我的博三已接近尾声，马上就是博四了，毕业压力，毕设压力，还有就业压力扑面而来。马上奔三了，这种复杂的心情应该只有局内人才能体会。小小吐槽一下，国内大多数的博士点的薪资真的太少了。。。
我不想贩卖焦虑，我想记录下生活的点滴。数年之后回首这段时光，应该会有很多酸甜苦辣的回忆。也希望自己能够保持初心，屠龙少年不要成为恶龙。
这应该是我第一次在这个公众号里分享一些与数据分析无关的私人心情。。。
加油，尚在奋斗的各位！

- END -

收录于合集 #可视化

8个

上一篇DIY气泡图

喜欢此内容的人还喜欢

微信扫一扫
关注该公众号