本期内容着重于批量富集分析操作
每个cluster之间进行findallmarkers,然后对每个cluster进行富集分析
对每个cluster里的不同组别的差异基因(deg)做富集分析
例1
例2
每个cluster之间进行findallmarkers,然后对每个cluster进行富集分析
#这里的d.all可以等同换成pbmcload("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")Idents(d.all)Idents(d.all) %>%table()head(d.all$SCT_snn_res.0.5)d.all$orig.ident=factor(x = d.all$orig.ident,levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))table(d.all$stim)library(dplyr)d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56",d.all$stim=="sio2_7"~"SiO2_7",d.all$stim=="NS_7"~"NS_7",d.all$stim=="NS_56"~"NS_56")table(d.all$stim)Idents(d.all)=d.all$stimtable(d.all$stim)d.all$group=case_when(d.all$stim=="SiO2_56"~"SiO2_56",d.all$stim=="SiO2_7"~"SiO2_7",d.all$stim=="NS_7"~"NS",d.all$stim=="NS_56"~"NS")table(d.all$group)Idents(d.all)=d.all$group
这里有三个cluster,接下来找其marker基因,并合并成大的dataframeIdents(d.all)=d.all$grouptable(Idents(d.all))#Idents(d.all)=d.all$SCT_snn_res.0.5table(Idents(d.all))#degs=list()#Idents(d.all)=d.all$group#https://github.com/satijalab/seurat/discussions/4433#speed up findallmarkerslibrary(presto)Assays(d.all)degs= FindAllMarkers(d.all,logfc.threshold = 0.1)head(degs)names(degs)# # degs=lapply(degs, function(x){# x$gene=rownames(x)# return(x)# })# head(degs)merged_degs=do.call(rbind,degs)head(merged_degs)
degs= FindAllMarkers(d.all,logfc.threshold = 0.3)head(degs)names(degs)dim(degs)# # degs=lapply(degs, function(x){# x$gene=rownames(x)# return(x)# })# head(degs)# merged_degs=do.call(rbind,degs)# head(merged_degs)markers=degs{ all_degs=markers df=all_degs ##筛选阈值确定:p<0.05,|log2FC|>1 p_val_adj = 0.05 avg_log2FC = 0.3 head(all_degs) #根据阈值添加上下调分组标签: df$direction <- case_when( df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up", df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down", TRUE ~ 'none' ) head(df) print(getwd()) df=df[df$direction!="none",] head(df) dim(df) df$mygroup=paste(df$group,df$direction,sep = "_") head(df) table(df$mygroup) dim(df) print(getwd()) dir.create("./degs_enrichments0.3fc") setwd("./degs_enrichments0.3fc") ##########################----------------------enrichment analysis================================================== #https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg { library(clusterProfiler) library(org.Hs.eg.db) #人 library(org.Mm.eg.db) #鼠 library(ggplot2) # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung) head(df) #df$cluster=df$mygroup df=df %>%dplyr::group_by(cluster)%>% filter(p_val_adj <0.05, abs(avg_log2FC) >avg_log2FC) sce.markers=df head(sce.markers) print(getwd()) ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db')) head(ids) head(sce.markers) tail(sce.markers) dim(sce.markers) sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL') head(sce.markers) dim(sce.markers) sce.markers$group=sce.markers$cluster sce.markers=sce.markers[sce.markers$group!="none",] dim(sce.markers) head(sce.markers) #sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx") #sce.markers$cluster=sce.markers$mygroup dim(sce.markers) head(sce.markers) gcSample=split(sce.markers$ENTREZID, sce.markers$cluster) library(clusterProfiler) gcSample # entrez id , compareCluster names(gcSample) print("===========开始go============") xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05) #organism="hsa", xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="BP") #organism="hsa", p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="CC") #organism="hsa", p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="MF") #organism="hsa", p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) print(getwd()) .libPaths() print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE, keyType = 'kegg', #KEGG 富集 organism='mmu',#"rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 ) p=clusterProfiler::dotplot(object = xx,showCategory = 20, label_format =100) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) xx write.csv(xx,file = "compareCluster-GO_enrichment.csv") p=clusterProfiler::dotplot(gg,showCategory = 20, label_format = 40) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F) ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F) gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx") getwd() openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx") # save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata") xx.BP@compareClusterResult$oncology="BP" xx.CC@compareClusterResult$oncology="CC" xx.MF@compareClusterResult$oncology="MF" xx.all=do.call(rbind,list(xx.BP@compareClusterResult, xx.CC@compareClusterResult, xx.MF@compareClusterResult)) head(xx.all) openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx") print(getwd()) save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata") if (F) { #大鼠 print("===========开始go============") xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db", readable=TRUE, ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者 pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db', print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", keyType = 'kegg', #KEGG 富集 organism="rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 ) p=dotplot(xx) p2=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p2 ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F) xx openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx") # ----- p=dotplot(gg) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F) gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx") } }}
2.每个cluster的不同组别之间做批量富集分析#2新版本findallmarkers做富集分析 每个cluster的不同组别之间------load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")Idents(d.all)Idents(d.all) %>%table()head(d.all$SCT_snn_res.0.5)d.all$orig.ident=factor(x = d.all$orig.ident, levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))table(d.all$stim)library(dplyr)d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56", d.all$stim=="sio2_7"~"SiO2_7", d.all$stim=="NS_7"~"NS_7", d.all$stim=="NS_56"~"NS_56")table(d.all$stim)Idents(d.all)=d.all$stimtable(d.all$stim)d.all$group=case_when( d.all$stim=="SiO2_56"~"SiO2_56", d.all$stim=="SiO2_7"~"SiO2_7", d.all$stim=="NS_7"~"NS", d.all$stim=="NS_56"~"NS")table(d.all$group)dim(d.all)#devtools::install_github("immunogenomics/presto")table(Idents(d.all))Idents(d.all)=d.all$SCT_snn_res.0.5table(Idents(d.all))degs=list()#Idents(d.all)=d.all$groupfor (cluster in unique(d.all$SCT_snn_res.0.5) ) { Idents(d.all)=d.all$SCT_snn_res.0.5 subset_data=base::subset(d.all,idents = cluster) for (eachgroup in c('SiO2_56' , 'SiO2_7' )) { Idents(subset_data)=subset_data$group try({ degs[[paste0(cluster,eachgroup)]] = FindMarkers(subset_data,ident.1 = eachgroup, ident.2 = "NS",logfc.threshold = 0.1) degs[[paste0(cluster,eachgroup)]]$group=paste0(cluster,"_",eachgroup,"_VS_NS") }, silent = TRUE) # 设置 silent = TRUE 以捕获错误但不中断执行 print(paste0(cluster," __",eachgroup,"======done!=========")) } print(paste0(" __",cluster,"======done!=========")) # degs[[eachgroup]]$group=paste0(eachgroup,"_VS_NS") print(getwd())}head(degs)names(degs)degs=lapply(degs, function(x){ x$gene=rownames(x) return(x)})head(degs)# merged_degs=do.call(rbind,degs)# head(merged_degs)markers=merged_degs{ all_degs=markers df=all_degs ##筛选阈值确定:p<0.05,|log2FC|>1 p_val_adj = 0.05 avg_log2FC = 1 head(all_degs) #根据阈值添加上下调分组标签: df$direction <- case_when( df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up", df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down", TRUE ~ 'none' ) head(df) print(getwd()) df=df[df$direction!="none",] head(df) dim(df) df$mygroup=paste(df$group,df$direction,sep = "_") head(df) table(df$mygroup) dim(df) print(getwd()) dir.create("./degs_enrichments1fc") setwd("./degs_enrichments1fc") ##########################----------------------enrichment analysis================================================== #https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg { library(clusterProfiler) library(org.Hs.eg.db) #人 library(org.Mm.eg.db) #鼠 library(ggplot2) # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung) head(df) df$cluster=df$mygroup df=df %>%dplyr::group_by(cluster)%>% filter(p_val_adj <0.05, abs(avg_log2FC) >avg_log2FC) sce.markers=df head(sce.markers) print(getwd()) ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db')) head(ids) head(sce.markers) tail(sce.markers) dim(sce.markers) sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL') head(sce.markers) dim(sce.markers) sce.markers$group=sce.markers$cluster sce.markers=sce.markers[sce.markers$group!="none",] dim(sce.markers) head(sce.markers) #sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx") #sce.markers$cluster=sce.markers$mygroup dim(sce.markers) head(sce.markers) gcSample=split(sce.markers$ENTREZID, sce.markers$cluster) library(clusterProfiler) gcSample # entrez id , compareCluster names(gcSample) print("===========开始go============") xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05) #organism="hsa", xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="BP") #organism="hsa", p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="CC") #organism="hsa", p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db', pvalueCutoff=0.05,readable=TRUE, ont="MF") #organism="hsa", p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20, label_format =60) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) print(getwd()) .libPaths() print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE, keyType = 'kegg', #KEGG 富集 organism='mmu',#"rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 ) p=clusterProfiler::dotplot(object = xx,showCategory = 20, label_format =100) p=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F) ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F) xx write.csv(xx,file = "compareCluster-GO_enrichment.csv") p=clusterProfiler::dotplot(gg,showCategory = 20, label_format = 40) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F) ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F) gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx") getwd() openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx") # save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata") xx.BP@compareClusterResult$oncology="BP" xx.CC@compareClusterResult$oncology="CC" xx.MF@compareClusterResult$oncology="MF" xx.all=do.call(rbind,list(xx.BP@compareClusterResult, xx.CC@compareClusterResult, xx.MF@compareClusterResult)) head(xx.all) openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx") print(getwd()) save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata") if (F) { #大鼠 print("===========开始go============") xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db", readable=TRUE, ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者 pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db', print("===========开始 kegg============") gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", keyType = 'kegg', #KEGG 富集 organism="rno", pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部 ) p=dotplot(xx) p2=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p2 ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F) xx openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx") # ----- p=dotplot(gg) p4=p+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)) p4 print(paste("保存位置",getwd(),sep = " : ")) ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F) gg openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx") } }}
bp
cc
mf
微信扫一扫
关注该公众号