对单细胞每个cluster进行批量富集分析

生信小博士 生信菜鸟团 2023-09-24 17:06

本期内容着重于批量富集分析操作


  1. 每个cluster之间进行findallmarkers,然后对每个cluster进行富集分析



  2. 对每个cluster里的不同组别的差异基因(deg)做富集分析




例1


图片



例2

图片


  1. 每个cluster之间进行findallmarkers,然后对每个cluster进行富集分析



#这里的d.all可以等同换成pbmc
load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")
Idents(d.all)Idents(d.all) %>%table()head(d.all$SCT_snn_res.0.5)

d.all$orig.ident=factor(x = d.all$orig.ident, levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))
table(d.all$stim)library(dplyr)d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56", d.all$stim=="sio2_7"~"SiO2_7", d.all$stim=="NS_7"~"NS_7", d.all$stim=="NS_56"~"NS_56")table(d.all$stim)

Idents(d.all)=d.all$stimtable(d.all$stim)
d.all$group=case_when( d.all$stim=="SiO2_56"~"SiO2_56", d.all$stim=="SiO2_7"~"SiO2_7", d.all$stim=="NS_7"~"NS", d.all$stim=="NS_56"~"NS")table(d.all$group)
Idents(d.all)=d.all$group

图片

这里有三个cluster,接下来找其marker基因,并合并成大的dataframe


Idents(d.all)=d.all$group
table(Idents(d.all))#Idents(d.all)=d.all$SCT_snn_res.0.5table(Idents(d.all))#degs=list()#Idents(d.all)=d.all$group#https://github.com/satijalab/seurat/discussions/4433#speed up findallmarkerslibrary(presto)Assays(d.all)degs= FindAllMarkers(d.all,logfc.threshold = 0.1)
head(degs)names(degs)
# # degs=lapply(degs, function(x){# x$gene=rownames(x)# return(x)# })# head(degs)

merged_degs=do.call(rbind,degs)head(merged_degs)

图片


degs=  FindAllMarkers(d.all,logfc.threshold = 0.3)
head(degs)names(degs)dim(degs)# # degs=lapply(degs, function(x){# x$gene=rownames(x)# return(x)# })# head(degs)

# merged_degs=do.call(rbind,degs)# head(merged_degs)
markers=degs{
all_degs=markers df=all_degs ##筛选阈值确定:p<0.05,|log2FC|>1 p_val_adj = 0.05 avg_log2FC = 0.3 head(all_degs)

#根据阈值添加上下调分组标签: df$direction <- case_when( df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up", df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down", TRUE ~ 'none' ) head(df) print(getwd()) df=df[df$direction!="none",] head(df) dim(df) df$mygroup=paste(df$group,df$direction,sep = "_") head(df) table(df$mygroup) dim(df)
print(getwd())
dir.create("./degs_enrichments0.3fc") setwd("./degs_enrichments0.3fc") ##########################----------------------enrichment analysis================================================== #https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg

{ library(clusterProfiler) library(org.Hs.eg.db) #人 library(org.Mm.eg.db) #鼠 library(ggplot2) # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung) head(df) #df$cluster=df$mygroup
df=df %>%dplyr::group_by(cluster)%>% filter(p_val_adj <0.05, abs(avg_log2FC) >avg_log2FC) sce.markers=df head(sce.markers) print(getwd()) ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db')) head(ids)
head(sce.markers) tail(sce.markers) dim(sce.markers) sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL') head(sce.markers) dim(sce.markers) sce.markers$group=sce.markers$cluster sce.markers=sce.markers[sce.markers$group!="none",] dim(sce.markers) head(sce.markers) #sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx") #sce.markers$cluster=sce.markers$mygroup dim(sce.markers) head(sce.markers) gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
library(clusterProfiler) gcSample # entrez id , compareCluster names(gcSample) print("===========开始go============") xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05) #organism="hsa",
xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="BP") #organism="hsa", p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="CC") #organism="hsa", p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="MF") #organism="hsa", p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)


print(getwd())
.libPaths()

print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE, keyType = 'kegg', #KEGG 富集 organism='mmu',#"rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 )
p=clusterProfiler::dotplot(object = xx,showCategory = 20, label_format =100) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx write.csv(xx,file = "compareCluster-GO_enrichment.csv")
p=clusterProfiler::dotplot(gg,showCategory = 20, label_format = 40) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F) ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F)
gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
getwd() openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx")

# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
xx.BP@compareClusterResult$oncology="BP" xx.CC@compareClusterResult$oncology="CC" xx.MF@compareClusterResult$oncology="MF"

xx.all=do.call(rbind,list(xx.BP@compareClusterResult, xx.CC@compareClusterResult, xx.MF@compareClusterResult)) head(xx.all) openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx") print(getwd()) save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")

if (F) { #大鼠 print("===========开始go============") xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db", readable=TRUE, ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者 pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", keyType = 'kegg', #KEGG 富集 organism="rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 )
p=dotplot(xx) p2=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p2 ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F) xx openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx") # ----- p=dotplot(gg) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F) gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx") } }

}

图片


图片




2.每个cluster的不同组别之间做批量富集分析

#2新版本findallmarkers做富集分析 每个cluster的不同组别之间------load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")
Idents(d.all)Idents(d.all) %>%table()head(d.all$SCT_snn_res.0.5)

d.all$orig.ident=factor(x = d.all$orig.ident, levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))
table(d.all$stim)library(dplyr)d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56", d.all$stim=="sio2_7"~"SiO2_7", d.all$stim=="NS_7"~"NS_7", d.all$stim=="NS_56"~"NS_56")table(d.all$stim)

Idents(d.all)=d.all$stimtable(d.all$stim)
d.all$group=case_when( d.all$stim=="SiO2_56"~"SiO2_56", d.all$stim=="SiO2_7"~"SiO2_7", d.all$stim=="NS_7"~"NS", d.all$stim=="NS_56"~"NS")table(d.all$group)
dim(d.all)#devtools::install_github("immunogenomics/presto")
table(Idents(d.all))Idents(d.all)=d.all$SCT_snn_res.0.5table(Idents(d.all))
degs=list()#Idents(d.all)=d.all$groupfor (cluster in unique(d.all$SCT_snn_res.0.5) ) {
Idents(d.all)=d.all$SCT_snn_res.0.5 subset_data=base::subset(d.all,idents = cluster)

for (eachgroup in c('SiO2_56' , 'SiO2_7' )) {
Idents(subset_data)=subset_data$group

try({ degs[[paste0(cluster,eachgroup)]] = FindMarkers(subset_data,ident.1 = eachgroup, ident.2 = "NS",logfc.threshold = 0.1)
degs[[paste0(cluster,eachgroup)]]$group=paste0(cluster,"_",eachgroup,"_VS_NS")
}, silent = TRUE) # 设置 silent = TRUE 以捕获错误但不中断执行


print(paste0(cluster," __",eachgroup,"======done!=========")) }

print(paste0(" __",cluster,"======done!=========")) # degs[[eachgroup]]$group=paste0(eachgroup,"_VS_NS") print(getwd())}head(degs)names(degs)
degs=lapply(degs, function(x){ x$gene=rownames(x) return(x)})head(degs)

# merged_degs=do.call(rbind,degs)# head(merged_degs)
markers=merged_degs{
all_degs=markers df=all_degs ##筛选阈值确定:p<0.05,|log2FC|>1 p_val_adj = 0.05 avg_log2FC = 1 head(all_degs)

#根据阈值添加上下调分组标签: df$direction <- case_when( df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up", df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down", TRUE ~ 'none' ) head(df) print(getwd()) df=df[df$direction!="none",] head(df) dim(df) df$mygroup=paste(df$group,df$direction,sep = "_") head(df) table(df$mygroup) dim(df)
print(getwd())
dir.create("./degs_enrichments1fc") setwd("./degs_enrichments1fc") ##########################----------------------enrichment analysis================================================== #https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg

{ library(clusterProfiler) library(org.Hs.eg.db) #人 library(org.Mm.eg.db) #鼠 library(ggplot2) # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung) head(df) df$cluster=df$mygroup
df=df %>%dplyr::group_by(cluster)%>% filter(p_val_adj <0.05, abs(avg_log2FC) >avg_log2FC) sce.markers=df head(sce.markers) print(getwd()) ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db')) head(ids)
head(sce.markers) tail(sce.markers) dim(sce.markers) sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL') head(sce.markers) dim(sce.markers) sce.markers$group=sce.markers$cluster sce.markers=sce.markers[sce.markers$group!="none",] dim(sce.markers) head(sce.markers) #sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx") #sce.markers$cluster=sce.markers$mygroup dim(sce.markers) head(sce.markers) gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
library(clusterProfiler) gcSample # entrez id , compareCluster names(gcSample) print("===========开始go============") xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05) #organism="hsa",
xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="BP") #organism="hsa", p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="CC") #organism="hsa", p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="MF") #organism="hsa", p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)


print(getwd())
.libPaths()

print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE, keyType = 'kegg', #KEGG 富集 organism='mmu',#"rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 )
p=clusterProfiler::dotplot(object = xx,showCategory = 20, label_format =100) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx write.csv(xx,file = "compareCluster-GO_enrichment.csv")
p=clusterProfiler::dotplot(gg,showCategory = 20, label_format = 40) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F) ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F)
gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
getwd() openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx")

# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
xx.BP@compareClusterResult$oncology="BP" xx.CC@compareClusterResult$oncology="CC" xx.MF@compareClusterResult$oncology="MF"

xx.all=do.call(rbind,list(xx.BP@compareClusterResult, xx.CC@compareClusterResult, xx.MF@compareClusterResult)) head(xx.all) openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx") print(getwd()) save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")

if (F) { #大鼠 print("===========开始go============") xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db", readable=TRUE, ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者 pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", keyType = 'kegg', #KEGG 富集 organism="rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 )
p=dotplot(xx) p2=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p2 ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F) xx openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx") # ----- p=dotplot(gg) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F) gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx") } }

}

bp

图片

cc


图片

mf


图片






收录于合集 #单细胞
 6
上一篇比monocle更快的slingshot-CNS高分文章常用

微信扫一扫
关注该公众号