对单细胞每个cluster进行批量富集分析

原创生信小博士生信菜鸟团 2023-09-24 17:06

收录于合集

本期内容着重于批量富集分析操作

每个cluster之间进行findallmarkers，然后对每个cluster进行富集分析
对每个cluster里的不同组别的差异基因（deg）做富集分析

例1

例2

每个cluster之间进行findallmarkers，然后对每个cluster进行富集分析

#这里的d.all可以等同换成pbmc
load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")
Idents(d.all)Idents(d.all) %>%table()head(d.all$SCT_snn_res.0.5)

d.all$orig.ident=factor(x = d.all$orig.ident,                        levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))
table(d.all$stim)library(dplyr)d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56",                     d.all$stim=="sio2_7"~"SiO2_7",                     d.all$stim=="NS_7"~"NS_7",                     d.all$stim=="NS_56"~"NS_56")table(d.all$stim)

Idents(d.all)=d.all$stimtable(d.all$stim)
d.all$group=case_when(  d.all$stim=="SiO2_56"~"SiO2_56",  d.all$stim=="SiO2_7"~"SiO2_7",  d.all$stim=="NS_7"~"NS",  d.all$stim=="NS_56"~"NS")table(d.all$group)
Idents(d.all)=d.all$group

这里有三个cluster，接下来找其marker基因，并合并成大的dataframe


Idents(d.all)=d.all$group
table(Idents(d.all))#Idents(d.all)=d.all$SCT_snn_res.0.5table(Idents(d.all))#degs=list()#Idents(d.all)=d.all$group#https://github.com/satijalab/seurat/discussions/4433#speed up findallmarkerslibrary(presto)Assays(d.all)degs=  FindAllMarkers(d.all,logfc.threshold = 0.1)
head(degs)names(degs)
# # degs=lapply(degs, function(x){#   x$gene=rownames(x)#   return(x)# })# head(degs)

merged_degs=do.call(rbind,degs)head(merged_degs)

degs=  FindAllMarkers(d.all,logfc.threshold = 0.3)
head(degs)names(degs)dim(degs)# # degs=lapply(degs, function(x){#   x$gene=rownames(x)#   return(x)# })# head(degs)

# merged_degs=do.call(rbind,degs)# head(merged_degs)
markers=degs{
  all_degs=markers  df=all_degs  ##筛选阈值确定：p＜0.05，|log2FC|＞1  p_val_adj = 0.05  avg_log2FC = 0.3  head(all_degs)

  #根据阈值添加上下调分组标签：  df$direction <- case_when(    df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",    df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",    TRUE ~ 'none'  )  head(df)  print(getwd())  df=df[df$direction!="none",]  head(df)  dim(df)  df$mygroup=paste(df$group,df$direction,sep = "_")  head(df)  table(df$mygroup)  dim(df)
  print(getwd())
  dir.create("./degs_enrichments0.3fc")  setwd("./degs_enrichments0.3fc")  ##########################----------------------enrichment analysis==================================================  #https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg

  {    library(clusterProfiler)    library(org.Hs.eg.db) #人    library(org.Mm.eg.db) #鼠    library(ggplot2)    # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)    head(df)    #df$cluster=df$mygroup
    df=df %>%dplyr::group_by(cluster)%>%      filter(p_val_adj <0.05,             abs(avg_log2FC) >avg_log2FC)    sce.markers=df    head(sce.markers)    print(getwd())    ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))    head(ids)
    head(sce.markers)    tail(sce.markers)    dim(sce.markers)    sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')    head(sce.markers)    dim(sce.markers)    sce.markers$group=sce.markers$cluster    sce.markers=sce.markers[sce.markers$group!="none",]    dim(sce.markers)    head(sce.markers)    #sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx")    #sce.markers$cluster=sce.markers$mygroup    dim(sce.markers)    head(sce.markers)    gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
    library(clusterProfiler)    gcSample # entrez id , compareCluster     names(gcSample)    print("===========开始go============")    xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                         pvalueCutoff=0.05) #organism="hsa",
    xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                            pvalueCutoff=0.05,readable=TRUE,                            ont="BP") #organism="hsa",    p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,                               label_format =60)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
    xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                            pvalueCutoff=0.05,readable=TRUE,                            ont="CC") #organism="hsa",    p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,                               label_format =60)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
    xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                            pvalueCutoff=0.05,readable=TRUE,                            ont="MF") #organism="hsa",    p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,                               label_format =60)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)


    print(getwd())
    .libPaths()

    print("===========开始 kegg============")    gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",  #readable=TRUE,                                        keyType = 'kegg',  #KEGG 富集                                        organism='mmu',#"rno",                                        pvalueCutoff = 0.05 #指定 p 值阈值（可指定 1 以输出全部    )
    p=clusterProfiler::dotplot(object = xx,showCategory = 20,                               label_format =100)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
    xx    write.csv(xx,file = "compareCluster-GO_enrichment.csv")
    p=clusterProfiler::dotplot(gg,showCategory = 20,                               label_format = 40)     p4=p+ theme(axis.text.x = element_text(angle = 90,                                            vjust = 0.5, hjust=0.5))    p4    print(paste("保存位置",getwd(),sep = "  :   "))    ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F)    ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F)
    gg    openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
    getwd()    openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx")

    #   save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
    xx.BP@compareClusterResult$oncology="BP"    xx.CC@compareClusterResult$oncology="CC"    xx.MF@compareClusterResult$oncology="MF"

    xx.all=do.call(rbind,list(xx.BP@compareClusterResult,                              xx.CC@compareClusterResult,                              xx.MF@compareClusterResult))    head(xx.all)        openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx")        print(getwd())    save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")

    if (F) {       #大鼠      print("===========开始go============")      xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",                                           readable=TRUE,                                           ont = 'ALL',  #GO Ontology，可选 BP、MF、CC，也可以指定 ALL 同时计算 3 者                                           pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
      print("===========开始 kegg============")      gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",                                          keyType = 'kegg',  #KEGG 富集                                          organism="rno",                                          pvalueCutoff = 0.05 #指定 p 值阈值（可指定 1 以输出全部      )
      p=dotplot(xx)       p2=p+ theme(axis.text.x = element_text(angle = 90,                                              vjust = 0.5, hjust=0.5))      p2      ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)      xx      openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")      # -----        p=dotplot(gg)       p4=p+ theme(axis.text.x = element_text(angle = 90,                                              vjust = 0.5, hjust=0.5))      p4      print(paste("保存位置",getwd(),sep = "  :   "))      ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)      gg      openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")       }    }

}

2.每个cluster的不同组别之间做批量富集分析

#2新版本findallmarkers做富集分析 每个cluster的不同组别之间------load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")
Idents(d.all)Idents(d.all) %>%table()head(d.all$SCT_snn_res.0.5)

d.all$orig.ident=factor(x = d.all$orig.ident,                        levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))
table(d.all$stim)library(dplyr)d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56",                     d.all$stim=="sio2_7"~"SiO2_7",                     d.all$stim=="NS_7"~"NS_7",                     d.all$stim=="NS_56"~"NS_56")table(d.all$stim)

Idents(d.all)=d.all$stimtable(d.all$stim)
d.all$group=case_when(  d.all$stim=="SiO2_56"~"SiO2_56",  d.all$stim=="SiO2_7"~"SiO2_7",  d.all$stim=="NS_7"~"NS",  d.all$stim=="NS_56"~"NS")table(d.all$group)
dim(d.all)#devtools::install_github("immunogenomics/presto")
table(Idents(d.all))Idents(d.all)=d.all$SCT_snn_res.0.5table(Idents(d.all))
degs=list()#Idents(d.all)=d.all$groupfor (cluster in unique(d.all$SCT_snn_res.0.5)  ) {
  Idents(d.all)=d.all$SCT_snn_res.0.5  subset_data=base::subset(d.all,idents = cluster)

  for (eachgroup in c('SiO2_56' , 'SiO2_7' )) {
    Idents(subset_data)=subset_data$group

    try({      degs[[paste0(cluster,eachgroup)]] = FindMarkers(subset_data,ident.1 = eachgroup,                                                      ident.2 = "NS",logfc.threshold = 0.1)
      degs[[paste0(cluster,eachgroup)]]$group=paste0(cluster,"_",eachgroup,"_VS_NS")
    }, silent = TRUE)  # 设置 silent = TRUE 以捕获错误但不中断执行


    print(paste0(cluster," __",eachgroup,"======done!========="))  }

  print(paste0(" __",cluster,"======done!=========")) # degs[[eachgroup]]$group=paste0(eachgroup,"_VS_NS")  print(getwd())}head(degs)names(degs)
degs=lapply(degs, function(x){  x$gene=rownames(x)  return(x)})head(degs)

# merged_degs=do.call(rbind,degs)# head(merged_degs)
markers=merged_degs{
  all_degs=markers  df=all_degs  ##筛选阈值确定：p＜0.05，|log2FC|＞1  p_val_adj = 0.05  avg_log2FC = 1  head(all_degs)

   #根据阈值添加上下调分组标签：  df$direction <- case_when(    df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",    df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",    TRUE ~ 'none'  )  head(df)  print(getwd())  df=df[df$direction!="none",]  head(df)  dim(df)  df$mygroup=paste(df$group,df$direction,sep = "_")  head(df)  table(df$mygroup)  dim(df)
  print(getwd())
  dir.create("./degs_enrichments1fc")  setwd("./degs_enrichments1fc")  ##########################----------------------enrichment analysis==================================================  #https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg

  {    library(clusterProfiler)    library(org.Hs.eg.db) #人    library(org.Mm.eg.db) #鼠    library(ggplot2)    # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)    head(df)    df$cluster=df$mygroup
    df=df %>%dplyr::group_by(cluster)%>%      filter(p_val_adj <0.05,             abs(avg_log2FC) >avg_log2FC)    sce.markers=df    head(sce.markers)    print(getwd())    ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))    head(ids)
    head(sce.markers)    tail(sce.markers)    dim(sce.markers)    sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')    head(sce.markers)    dim(sce.markers)    sce.markers$group=sce.markers$cluster    sce.markers=sce.markers[sce.markers$group!="none",]    dim(sce.markers)    head(sce.markers)    #sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx")    #sce.markers$cluster=sce.markers$mygroup    dim(sce.markers)    head(sce.markers)    gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
    library(clusterProfiler)    gcSample # entrez id , compareCluster     names(gcSample)    print("===========开始go============")    xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                         pvalueCutoff=0.05) #organism="hsa",
    xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                            pvalueCutoff=0.05,readable=TRUE,                            ont="BP") #organism="hsa",    p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,                               label_format =60)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
    xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                            pvalueCutoff=0.05,readable=TRUE,                            ont="CC") #organism="hsa",    p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,                               label_format =60)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
    xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',                            pvalueCutoff=0.05,readable=TRUE,                            ont="MF") #organism="hsa",    p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,                               label_format =60)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)


    print(getwd())
    .libPaths()

    print("===========开始 kegg============")    gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",  #readable=TRUE,                                        keyType = 'kegg',  #KEGG 富集                                        organism='mmu',#"rno",                                        pvalueCutoff = 0.05 #指定 p 值阈值（可指定 1 以输出全部    )
    p=clusterProfiler::dotplot(object = xx,showCategory = 20,                               label_format =100)     p=p+ theme(axis.text.x = element_text(angle = 90,                                           vjust = 0.5, hjust=0.5))    p    ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)    ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
    xx    write.csv(xx,file = "compareCluster-GO_enrichment.csv")
    p=clusterProfiler::dotplot(gg,showCategory = 20,                               label_format = 40)     p4=p+ theme(axis.text.x = element_text(angle = 90,                                            vjust = 0.5, hjust=0.5))    p4    print(paste("保存位置",getwd(),sep = "  :   "))    ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F)    ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F)
    gg    openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
    getwd()    openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx")

    #   save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
    xx.BP@compareClusterResult$oncology="BP"    xx.CC@compareClusterResult$oncology="CC"    xx.MF@compareClusterResult$oncology="MF"

    xx.all=do.call(rbind,list(xx.BP@compareClusterResult,                              xx.CC@compareClusterResult,                              xx.MF@compareClusterResult))    head(xx.all)        openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx")        print(getwd())    save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")

    if (F) {       #大鼠      print("===========开始go============")      xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",                                           readable=TRUE,                                           ont = 'ALL',  #GO Ontology，可选 BP、MF、CC，也可以指定 ALL 同时计算 3 者                                           pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
      print("===========开始 kegg============")      gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",                                          keyType = 'kegg',  #KEGG 富集                                          organism="rno",                                          pvalueCutoff = 0.05 #指定 p 值阈值（可指定 1 以输出全部      )
      p=dotplot(xx)       p2=p+ theme(axis.text.x = element_text(angle = 90,                                              vjust = 0.5, hjust=0.5))      p2      ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)      xx      openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")      # -----        p=dotplot(gg)       p4=p+ theme(axis.text.x = element_text(angle = 90,                                              vjust = 0.5, hjust=0.5))      p4      print(paste("保存位置",getwd(),sep = "  :   "))      ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)      gg      openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")       }    }

}

收录于合集 #单细胞

6个

上一篇比monocle更快的slingshot-CNS高分文章常用

喜欢此内容的人还喜欢

微信扫一扫
关注该公众号