通常GSEA的结果会用下面类似的图可视化。
但是,对于多个通路的可视化,以及想展示通路之间的关联时就不友好了。
aPEAR包可以通过检测相似路径的聚类并将其可视化为富集网络,简化路径富集分析结果,其中节点和边分别描述路径和它们之间的相似性。这减少了重叠路径的冗余,并有助于注意数据中最重要的生物学问题。# install.packages("aPEAR")
library(data.table)
library(ggplot2)
library(dplyr)
library(stringr)
library(clusterProfiler)
library(DOSE)
library(org.Hs.eg.db)
library(aPEAR)
加载差异分析的结果
差异表达分析可参考:生物信息数据分析教程视频——13-3种R包(DESeq2、edgeR和limma)进行RNAseq的差异表达分析与比较#DEG
load("DESeq2-filtered.Rdata")
head(DEG)
处理一下数据,用于GSEA分析。pcDEG = DEG[DEG$gene_type == "protein_coding",]
geneMap <- bitr(pcDEG$gene_name,
fromType="SYMBOL",
toType="ENTREZID",
OrgDb="org.Hs.eg.db")
geneMap = geneMap[!duplicated(geneMap$SYMBOL),]
sordeg <- pcDEG[(pcDEG$gene_name %in% geneMap$SYMBOL),]
sordeg <- merge(pcDEG,geneMap,by.x = "gene_name",by.y = "SYMBOL")
sordeg <- arrange(sordeg, desc(logFC))
geneList = sordeg$logFC
names(geneList) = sordeg$ENTREZID
2.执行GSEA
GSEA分析【视频】;clusterProfiler包进行KEGG,GO,GSEA富集分析.set.seed(42)
enrich <- gseGO(geneList, OrgDb = org.Hs.eg.db, ont = 'BP')
3.可视化网络pdf("Network.pdf",height = 20,width = 20)
enrichmentNetwork(enrich@result)
dev.off()
所有结果展现太复杂,我们选取前100个富集通路进行可视化:pdf("Network2.pdf",height = 10,width = 10)
enrichmentNetwork(enrich@result[1:100,])
dev.off()
通过 colorBy 列和 colorType = 'pval'指定颜色通道。pdf("Network3.pdf",height = 10,width = 10)
enrichmentNetwork(enrich1:100,], [
colorBy = 'pvalue',
colorType = 'pval', pCutoff = -5)
dev.off()
使用函数 findPathClusters 获取冗余通路的聚类。 findPathClusters 接受一个带有富集结果的 data.frame,并返回一个通路聚类列表和相似度矩阵:clus<- findPathClusters(enrich@result,
cluster = 'hier',
minClusterSize = 5)
clus <- findPathClusters(enrich@result,
cluster = 'hier',
minClusterSize = 5)
clusdf = clus$clusters
10,width = 10) =
plotPathClusters(
enrichment = enrich@result,
sim = clus$similarity,
clusters = clus$clusters,
fontSize = 4,
outerCutoff = 0.01, # Decrease cutoff between clusters and show some connections
drawEllipses = TRUE
)
dev.off()
完整代码:# install.packages("aPEAR")
library(data.table)
library(ggplot2)
library(dplyr)
library(stringr)
library(clusterProfiler)
library(DOSE)
library(org.Hs.eg.db)
library(aPEAR)
#DEG
load("DESeq2-filtered.Rdata")
head(DEG)
pcDEG = DEG[DEG$gene_type == "protein_coding",]
geneMap <- bitr(pcDEG$gene_name,
fromType="SYMBOL",
toType="ENTREZID",
OrgDb="org.Hs.eg.db")
geneMap = geneMap[!duplicated(geneMap$SYMBOL),]
sordeg <- pcDEG[(pcDEG$gene_name %in% geneMap$SYMBOL),]
sordeg <- merge(pcDEG,geneMap,by.x = "gene_name",by.y = "SYMBOL")
sordeg <- arrange(sordeg, desc(logFC))
geneList = sordeg$logFC
names(geneList) = sordeg$ENTREZID
# data(geneList)
set.seed(42)
enrich <- gseGO(geneList, OrgDb = org.Hs.eg.db, ont = 'BP')
pdf("Network.pdf",height = 20,width = 20)
enrichmentNetwork(enrich@result)
dev.off()
dim(enrich@result)
pdf("Network2.pdf",height = 10,width = 10)
enrichmentNetwork(enrich@result[1:100,])
dev.off()
pdf("Network3.pdf",height = 10,width = 10)
enrichmentNetwork(enrich@result[1:100,],
colorBy = 'pvalue',
colorType = 'pval', pCutoff = -5)
dev.off()
clus <- findPathClusters(enrich@result,
cluster = 'hier',
minClusterSize = 5)
clusdf = clus$clusters
pdf("Network4.pdf",height = 10,width = 10)
plotPathClusters(
enrichment = enrich@result,
sim = clus$similarity,
clusters = clus$clusters,
fontSize = 4,
outerCutoff = 0.01, # Decrease cutoff between clusters and show some connections
drawEllipses = TRUE
)
dev.off()
B站视频合集
经 典 栏 目
微信扫一扫
关注该公众号