KEGG层级网络图——再重复不出来就不是代码的原因了（附全部代码）

原创西瓜站长生信益站 2023-09-17 08:00

收录于合集

生信益站，一点就有益！祝友友们天天开心，早日发 CNS~

大家好，我是站长！事情是这样的：

❝
有友友提醒上周发的推文《绘图专题| KEGG五大功能层级分类网络图》，代码不全。这次为重新编辑版，公开了2个自定义函数的代码，希望能给站友们带来帮助。
❞

引言

图片效果如下：

这个图一共 3 圈，其实这个图本质是一个网络图。由内到外依次表示：富集到通路的所有差异基因数量；富集到 KEGG 每大类（一共 5 类）的差异基因数量；富集到每一个 KEGG pathway 的差异基因数量。

从这个图，大致可以看出哪个通路富集到的差异基因比较多，然后这些通路又属于哪一大类（主要是好看，似乎也没什么太多的含义）。

知识分享

KEGG 最高层级 5 大类为：

Cellular Processes
Environmental Information Processing
Genetic Information Processing
Metabolism
Organismal Systems

安装 R 包

install.packages("ggraph")
install.packages("tidygraph")

数据准备

画图的输入文件为kegg_class.txt，具体准备方法就不介绍了。格式如下：

$ head -5 KEGG_class.txt
Pathway_Hierarchy1                     KEGG_Pathway                          DEG_Number
------------------------------------   -----------------------------------   ----------
Environmental Information Processing   ECM-receptor interaction              8
Cellular Processes                     Apoptosis                             10
Organismal Systems                     Longevity regulating pathway - worm   8
Cellular Processes                     Lysosome                              9

可左右滑动看哦。

开始做图

首先读取文件kegg_class.txt，获取数据框kegg：

kegg <- read.table("KEGG_class.txt", sep = "\t", header = T)

head(kegg)
                    Pathway_Hierarchy1                                 KEGG_Pathway DEG_Number
1 Environmental Information Processing                     ECM-receptor interaction          8
2                   Cellular Processes                                    Apoptosis         10
3                   Organismal Systems          Longevity regulating pathway - worm          8
4                   Cellular Processes                                     Lysosome          9
5                           Metabolism            Drug metabolism - cytochrome P450          6
6                           Metabolism Metabolism of xenobiotics by cytochrome P450          6

接下来定义2个函数，用于获取网络的边和节点对象：

get_graph_node <- function(df,index=NULL,value=tail(colnames(df),1),root=NULL){
  require(dplyr)
  if (length(index) < 2){
    stop("please specify at least two index column(s)")
  } else {
    list <- lapply(seq_along(index), function(i){
      dots <- index[1:i]
      df %>%
        group_by(.dots=dots) %>%
        summarise(node.size=sum(.data[[value]]),
                  node.level=index[[i]],
                  node.count=n()) %>%
        mutate(node.short_name=as.character(.data[[ dots[[length(dots)]] ]]),
               node.branch = as.character(.data[[ dots[[1]]]])) %>%
        tidyr::unite(node.name,dots,sep = "/")
    })
    data <- do.call("rbind",list) %>% as_tibble()
    data$node.level <- factor(data$node.level,levels = index)

    if (is.null(root)){
      return(data)
    } else {
      root_data <- data.frame(node.name=root,
                              node.size=sum(df[[value]]),
                              node.level=root,
                              node.count=1,
                              node.short_name=root,
                              node.branch=root,
                              stringsAsFactors = F)
      data <- rbind(root_data,data)
      data$node.level <- factor(data$node.level, levels = c(root,index))
      return(data)
    }
  }
}

get_graph_edge <- function(df,index=NULL,root=NULL){
  require(dplyr)
  if (length(index) < 2){
    stop("please specify at least two index column(s)")
  } else if (length(index)==2){
    data <- df %>% mutate(from=.data[[index[[1]]]]) %>%
      tidyr::unite(to,index,sep="/") %>%
      select(from,to) %>%
      mutate_at(c("from","to"),as.character)
  } else {
    list <- lapply(seq(2,length(index)), function(i){
      dots <- index[1:i]
      df %>% tidyr::unite(from,dots[-length(dots)],sep = "/",remove = F)  %>%
        tidyr::unite(to,dots,sep="/") %>%
        select(from,to) %>%
        mutate_at(c("from","to"),as.character)
    })
    data <- do.call("rbind",list)
  }
  data <- as_tibble(data)
  if (is.null(root)){
    return(data)
  } else {
    root_data <- df %>% group_by(.dots=index[[1]]) %>%
      summarise(count=n()) %>%
      mutate(from=root,to=as.character(.data[[index[[1]]]] )) %>%
      select(from,to)
    rbind(root_data,data)
  }
}

使用这 2 个自定义函数对数据进行处理：

kegg_index <- c("Pathway_Hierarchy1", "KEGG_Pathway")
nodes_kegg <- get_graph_node(kegg,
                             index = kegg_index,
                             value = "DEG_Number",
                             root = "KEGG")
edges_kegg <- get_graph_edge(kegg,
                             index = kegg_index,
                             root = "KEGG")
# head(edges_kegg,10)
# head(nodes_kegg,10)

get_graph_node，获取网络节点数据；
get_graph_edge，获取网络边数据。

有了节点和边的数据，使用 tbl_graph() 便可以得到一个图：

library(tidygraph)
graph_kegg <- tbl_graph(nodes_kegg, edges_kegg)

然后使用 ggraph 可视化：

library(ggraph)
p <- ggraph(graph_kegg,
            layout = 'dendrogram',
            circular = TRUE) +
  geom_edge_diagonal(aes(color = node1.node.branch),alpha=1/3) +
  geom_node_point(aes(size = node.size,
                      color = node.branch),alpha=1/3) +
  coord_fixed() +
  theme(legend.position = "none",
        panel.background = element_blank())

p

增加文本注释：

p2 <- p + scale_size(range = c(0.5,40)) +
    geom_node_text(
    aes(
      x = 1.0175 * x,
      y = 1.0175 * y,
      label = node.short_name,
      angle = -((-node_angle(x, y) + 90) %% 180) + 90,
      filter = leaf,
      color = node.branch
    ),
    size = 1.5, hjust = 'outward'
    ) +
  geom_node_text(
    aes(label=node.short_name,
        filter = !leaf,
        color = node.branch),
    fontface="bold",
    size=3,
    family="sans"
  )

扩大坐标系，避免文本显示不全，使用下面的命令即可。然后使用 ggsave() 保存：

p2 + coord_cartesian(xlim = c(-1.5,1.5),
                     ylim = c(-1.5,1.5))

# ggsave("kegg_class.png", width = 17, height = 17, unit = "cm", dpi = 300)

最终效果见本文引言部分。

小结

今天的要点如下：

学会使用 ggraph 的 dendrogram 布局；
学会使用 ggplot2 对 ggraph 图片进行美化；
了解 KEGG 层级。

OK，今天的分享到此为止，希望能对您有所帮助。

您的关注、点赞、在看、转发是对益站最大的鼓励和支持哈。

联系站长

❝
对本篇文章有疑问，可以在益站发消息留言，也欢迎各位童鞋扫码加入我们的 QQ 交流群。
❞

收录于合集 #绘图专题

120个

上一篇单细胞cell比例差异检验柱状图——同时展示显著水平 + P值

个人观点，仅供参考

喜欢此内容的人还喜欢

微信扫一扫
关注该公众号