本期内容着重于批量富集分析操作
每个cluster之间进行findallmarkers,然后对每个cluster进行富集分析
对每个cluster里的不同组别的差异基因(deg)做富集分析
例1
例2
每个cluster之间进行findallmarkers,然后对每个cluster进行富集分析
#这里的d.all可以等同换成pbmc
load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")
Idents(d.all)
Idents(d.all) %>%table()
head(d.all$SCT_snn_res.0.5)
d.all$orig.ident=factor(x = d.all$orig.ident,
levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))
table(d.all$stim)
library(dplyr)
d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56",
d.all$stim=="sio2_7"~"SiO2_7",
d.all$stim=="NS_7"~"NS_7",
d.all$stim=="NS_56"~"NS_56"
)
table(d.all$stim)
Idents(d.all)=d.all$stim
table(d.all$stim)
d.all$group=case_when(
d.all$stim=="SiO2_56"~"SiO2_56",
d.all$stim=="SiO2_7"~"SiO2_7",
d.all$stim=="NS_7"~"NS",
d.all$stim=="NS_56"~"NS"
)
table(d.all$group)
Idents(d.all)=d.all$group
这里有三个cluster,接下来找其marker基因,并合并成大的dataframeIdents(d.all)=d.all$group
table(Idents(d.all))
#Idents(d.all)=d.all$SCT_snn_res.0.5
table(Idents(d.all))
#degs=list()
#Idents(d.all)=d.all$group
#https://github.com/satijalab/seurat/discussions/4433
#speed up findallmarkers
library(presto)
Assays(d.all)
degs= FindAllMarkers(d.all,logfc.threshold = 0.1)
head(degs)
names(degs)
#
# degs=lapply(degs, function(x){
# x$gene=rownames(x)
# return(x)
# })
# head(degs)
merged_degs=do.call(rbind,degs)
head(merged_degs)
degs= FindAllMarkers(d.all,logfc.threshold = 0.3)
head(degs)
names(degs)
dim(degs)
#
# degs=lapply(degs, function(x){
# x$gene=rownames(x)
# return(x)
# })
# head(degs)
# merged_degs=do.call(rbind,degs)
# head(merged_degs)
markers=degs
{
all_degs=markers
df=all_degs
##筛选阈值确定:p<0.05,|log2FC|>1
p_val_adj = 0.05
avg_log2FC = 0.3
head(all_degs)
#根据阈值添加上下调分组标签:
df$direction <- case_when(
df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",
df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",
TRUE ~ 'none'
)
head(df)
print(getwd())
df=df[df$direction!="none",]
head(df)
dim(df)
df$mygroup=paste(df$group,df$direction,sep = "_")
head(df)
table(df$mygroup)
dim(df)
print(getwd())
dir.create("./degs_enrichments0.3fc")
setwd("./degs_enrichments0.3fc")
##########################----------------------enrichment analysis==================================================
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
{
library(clusterProfiler)
library(org.Hs.eg.db) #人
library(org.Mm.eg.db) #鼠
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
head(df)
#df$cluster=df$mygroup
df=df %>%dplyr::group_by(cluster)%>%
filter(p_val_adj <0.05,
abs(avg_log2FC) >avg_log2FC)
sce.markers=df
head(sce.markers)
print(getwd())
ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))
head(ids)
head(sce.markers)
tail(sce.markers)
dim(sce.markers)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
dim(sce.markers)
sce.markers$group=sce.markers$cluster
sce.markers=sce.markers[sce.markers$group!="none",]
dim(sce.markers)
head(sce.markers)
#sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx")
#sce.markers$cluster=sce.markers$mygroup
dim(sce.markers)
head(sce.markers)
gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
library(clusterProfiler)
gcSample # entrez id , compareCluster
names(gcSample)
print("===========开始go============")
xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05) #organism="hsa",
xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="BP") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="CC") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="MF") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
print(getwd())
.libPaths()
print("===========开始 kegg============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE,
keyType = 'kegg', #KEGG 富集
organism='mmu',#"rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=clusterProfiler::dotplot(object = xx,showCategory = 20,
label_format =100)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx
write.csv(xx,file = "compareCluster-GO_enrichment.csv")
p=clusterProfiler::dotplot(gg,showCategory = 20,
label_format = 40)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F)
ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
getwd()
openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx")
# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
xx.BP@compareClusterResult$oncology="BP"
xx.CC@compareClusterResult$oncology="CC"
xx.MF@compareClusterResult$oncology="MF"
xx.all=do.call(rbind,list(xx.BP@compareClusterResult,
xx.CC@compareClusterResult,
xx.MF@compareClusterResult))
head(xx.all)
openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx")
print(getwd())
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")
if (F) {
#大鼠
print("===========开始go============")
xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")
# -----
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
}
}
}
2.每个cluster的不同组别之间做批量富集分析#2新版本findallmarkers做富集分析 每个cluster的不同组别之间------
load("/home/data/t040413/silicosis/spatial_transcriptomics/silicosis_ST_harmony_SCT_r0.5.rds")
Idents(d.all)
Idents(d.all) %>%table()
head(d.all$SCT_snn_res.0.5)
d.all$orig.ident=factor(x = d.all$orig.ident,
levels = c("NS_7" ,"SiO2_7","NS_56","SiO2_56"))
table(d.all$stim)
library(dplyr)
d.all$stim=case_when(d.all$stim=="sio2_56"~"SiO2_56",
d.all$stim=="sio2_7"~"SiO2_7",
d.all$stim=="NS_7"~"NS_7",
d.all$stim=="NS_56"~"NS_56"
)
table(d.all$stim)
Idents(d.all)=d.all$stim
table(d.all$stim)
d.all$group=case_when(
d.all$stim=="SiO2_56"~"SiO2_56",
d.all$stim=="SiO2_7"~"SiO2_7",
d.all$stim=="NS_7"~"NS",
d.all$stim=="NS_56"~"NS"
)
table(d.all$group)
dim(d.all)
#devtools::install_github("immunogenomics/presto")
table(Idents(d.all))
Idents(d.all)=d.all$SCT_snn_res.0.5
table(Idents(d.all))
degs=list()
#Idents(d.all)=d.all$group
for (cluster in unique(d.all$SCT_snn_res.0.5) ) {
Idents(d.all)=d.all$SCT_snn_res.0.5
subset_data=base::subset(d.all,idents = cluster)
for (eachgroup in c('SiO2_56' , 'SiO2_7' )) {
Idents(subset_data)=subset_data$group
try({
degs[[paste0(cluster,eachgroup)]] = FindMarkers(subset_data,ident.1 = eachgroup,
ident.2 = "NS",logfc.threshold = 0.1)
degs[[paste0(cluster,eachgroup)]]$group=paste0(cluster,"_",eachgroup,"_VS_NS")
}, silent = TRUE) # 设置 silent = TRUE 以捕获错误但不中断执行
print(paste0(cluster," __",eachgroup,"======done!========="))
}
print(paste0(" __",cluster,"======done!========="))
# degs[[eachgroup]]$group=paste0(eachgroup,"_VS_NS")
print(getwd())
}
head(degs)
names(degs)
degs=lapply(degs, function(x){
x$gene=rownames(x)
return(x)
})
head(degs)
# merged_degs=do.call(rbind,degs)
# head(merged_degs)
markers=merged_degs
{
all_degs=markers
df=all_degs
##筛选阈值确定:p<0.05,|log2FC|>1
p_val_adj = 0.05
avg_log2FC = 1
head(all_degs)
#根据阈值添加上下调分组标签:
df$direction <- case_when(
df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",
df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",
TRUE ~ 'none'
)
head(df)
print(getwd())
df=df[df$direction!="none",]
head(df)
dim(df)
df$mygroup=paste(df$group,df$direction,sep = "_")
head(df)
table(df$mygroup)
dim(df)
print(getwd())
dir.create("./degs_enrichments1fc")
setwd("./degs_enrichments1fc")
##########################----------------------enrichment analysis==================================================
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
{
library(clusterProfiler)
library(org.Hs.eg.db) #人
library(org.Mm.eg.db) #鼠
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
head(df)
df$cluster=df$mygroup
df=df %>%dplyr::group_by(cluster)%>%
filter(p_val_adj <0.05,
abs(avg_log2FC) >avg_log2FC)
sce.markers=df
head(sce.markers)
print(getwd())
ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))
head(ids)
head(sce.markers)
tail(sce.markers)
dim(sce.markers)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
dim(sce.markers)
sce.markers$group=sce.markers$cluster
sce.markers=sce.markers[sce.markers$group!="none",]
dim(sce.markers)
head(sce.markers)
#sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx")
#sce.markers$cluster=sce.markers$mygroup
dim(sce.markers)
head(sce.markers)
gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
library(clusterProfiler)
gcSample # entrez id , compareCluster
names(gcSample)
print("===========开始go============")
xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05) #organism="hsa",
xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="BP") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-BP_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-BP_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="CC") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-CC_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-CC_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="MF") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-MF_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-MF_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
print(getwd())
.libPaths()
print("===========开始 kegg============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE,
keyType = 'kegg', #KEGG 富集
organism='mmu',#"rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=clusterProfiler::dotplot(object = xx,showCategory = 20,
label_format =100)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave('degs_compareCluster-GO_enrichment--3.pdf',plot = p,width = 13,height = 40,limitsize = F)
ggsave('degs_compareCluster-GO_enrichment--3.png',plot = p,width = 13,height = 40,limitsize = F)
xx
write.csv(xx,file = "compareCluster-GO_enrichment.csv")
p=clusterProfiler::dotplot(gg,showCategory = 20,
label_format = 40)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 13,height = 25,limitsize = F)
ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 13,height = 25,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
getwd()
openxlsx::write.xlsx(sce.markers,file = "sce.markers_for_each_clusterfor_enrichment.xlsx")
# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
xx.BP@compareClusterResult$oncology="BP"
xx.CC@compareClusterResult$oncology="CC"
xx.MF@compareClusterResult$oncology="MF"
xx.all=do.call(rbind,list(xx.BP@compareClusterResult,
xx.CC@compareClusterResult,
xx.MF@compareClusterResult))
head(xx.all)
openxlsx::write.xlsx(xx.all,file = "enrichments_all.xlsx")
print(getwd())
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")
if (F) {
#大鼠
print("===========开始go============")
xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")
# -----
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
}
}
}
bp
cc
mf
微信扫一扫
关注该公众号