打开APP
userphoto
未登录

开通VIP,畅享免费电子书等14项超值服

开通VIP
利用单细胞集合做免疫浸润分析

以前一直用经典的MCP法分免疫浸润 总感觉不好 上篇文章上也写过 这次用肠癌单细胞6万多的细胞的特异性基因,特异性基因我是一个一个看的,我选的标准是logfc大于1,而且如果是肿瘤细胞的特异性基因,那么他在间质细胞和免疫细胞的表达量和表达值很低,总之用单细胞的marker分免疫浸润比用传统的cibersort和mcp群分的好看的多,个人也觉得mcp法适合用于通用的肿瘤,在特异性肿瘤中,如果手中有单细胞数据,可以用单细胞数据集合更加试用于特定的肿瘤。

下载数据

xena数据库下载结肠癌fpkm数据和临床数据

#表达矩阵dd <- data.table::fread('TCGA-COAD.htseq_fpkm.tsv',data.table = F)rownames(dd) <- dd$Ensembl_IDdd <- dd[,-1]library(stringr)dd =dd[,str_sub(colnames(dd),14,16)=='01A']colnames(dd) <- str_sub(colnames(dd),1,15)test <- dd[1:10,1:10]tcga_panmRNA_expr1 <- ddtcga_panmRNA_expr1$gene_id <- rownames(tcga_panmRNA_expr1 )tcga_panmRNA_expr1 <- tcga_panmRNA_expr1 %>% tidyr::separate(gene_id,into = c('gene_id','drop'),sep='\\.') %>% dplyr::select(-drop)rownames(tcga_panmRNA_expr1) <- tcga_panmRNA_expr1$gene_idtcga_panmRNA_expr1 <- tcga_panmRNA_expr1[,-454]#去除最后最后一行基因名字IGHG1 <- tcga_panmRNA_expr1['ENSG00000211896',]IGHG2 <- tcga_panmRNA_expr1['ENSG00000211893',] IGHG3 <- tcga_panmRNA_expr1['ENSG00000211897',]IGHG4 <- tcga_panmRNA_expr1['ENSG00000211892',]IGHA1 <- tcga_panmRNA_expr1['ENSG00000211895',]IGHA2 <- tcga_panmRNA_expr1['ENSG00000211890',]JCHAIN <- tcga_panmRNA_expr1['ENSG00000132465',]IGHD <-tcga_panmRNA_expr1['ENSG00000211898',]tcga_panmRNA_expr2 <- rbind(IGHG1,IGHG2,IGHG3,IGHG4,IGHA1,IGHA2,JCHAIN)rownames(tcga_panmRNA_expr2) <- c('IGHG1','IGHG2','IGHG3','IGHG4','IGHA1','IGHA2','JCHAIN')#MIR4435<-tcga_panmRNA_expr1['ENSG00000172965',] #tcga_panmRNA_expr2 <- rbind(IGHG1,IGHG2,IGHG3,IGHG4,IGHA1,IGHA2,JCHAIN,MIR4435)#rownames(tcga_panmRNA_expr2) <- c('IGHG1','IGHG2','IGHG3','IGHG4','IGHA1','IGHA2','JCHAIN','MIR4435')

临床数据下载

clinic_data = read.table('COAD_clinicalMatrix', header = T, sep = '\t')surv_data = read.table('COAD_survival.txt', header = T, sep = '\t')clinic_data = merge(clinic_data, surv_data, by.x = 'sampleID', by.y = 'sample')rownames(clinic_data) <- clinic_data$sampleIDclinic_data=clinic_data[,c('gender','age_at_initial_pathologic_diagnosis','sampleID','CDE_ID_3226963','MSI_updated_Oct62011','pathologic_stage','OS','OS.time','DSS','DSS.time','PFI','PFI.time')]clin =clinic_data[str_sub(rownames(clinic_data),14,15)=='01',]index <- intersect(rownames(clin),colnames(dd))table(index)### 获取有临床信息的样本clin <- clin[index,]write.csv(clin,'clin.csv' )clin <- read.csv('clin1.csv',header = T)rownames(clin) <- clin$sampleIDtable(clin$pathologic_stage)index <- intersect(rownames(clin),colnames(dd))dd <- dd[,index]tcga_panmRNA_expr2 <- tcga_panmRNA_expr2[,index]clin <- clin[index,]IGHD <- IGHD[,index]

探针转换

dd$gene_id <- rownames(dd)load('anno.Rdata')### 提取编码mRNAlibrary(dplyr)library(tidyr)library(tibble)tcga_panmRNA_expr <- mRNA_anno %>% dplyr::select(c(gene_name,gene_id)) %>% inner_join(dd,by ='gene_id') %>% dplyr::select(-gene_id) %>% filter(gene_name!='NA') %>% distinct(gene_name,.keep_all = T) %>% column_to_rownames('gene_name')identical(colnames(tcga_panmRNA_expr),colnames(IGHD))tcga_panmRNA_expr <- rbind(tcga_panmRNA_expr,IGHD)gene <- c('PDCD1','CD274','PDCD1LG2','CTLA4','TGFB1','IL10','ENTPD1','HAVCR2','LAG3')gene1 <- c('FCER2','ENSG00000211898','CXCL13')gene2 <- c('CD40','CD80','CD86')tcga_panmRNA_expr3 <- tcga_panmRNA_expr[gene,]tcga_panmRNA_expr4 <- tcga_panmRNA_expr[gene1,]tcga_panmRNA_expr5 <- tcga_panmRNA_expr[gene2,]cellMarker <- data.table::fread('cellMarker 3.csv',data.table = F)colnames(cellMarker)[2] <- 'celltype'cellMarker <- lapply(split(cellMarker,cellMarker$celltype), function(x){ dd = x$Metagene unique(dd)})expr <- as.matrix(tcga_panmRNA_expr)### 3.使用ssGSEA量化免疫浸润library(GSVA)gsva_data <- gsva(expr,cellMarker, method = 'ssgsea')

GSVA分析

cellMarker <- data.table::fread('cellMarker 1.csv',data.table = F)colnames(cellMarker)[2] <- 'celltype'cellMarker <- lapply(split(cellMarker,cellMarker$celltype), function(x){  dd = x$Metagene  unique(dd)})expr <- as.matrix(tcga_panmRNA_expr)### 3.使用ssGSEA量化免疫浸润library(GSVA)gsva_data <- gsva(expr,cellMarker, method = 'ssgsea')

免疫亚群分析

#去除肿瘤上皮细胞gsva_data2<- gsva_data[c(1,2,6,8,9,10,11,13),]title='D:\\GZ04' library(ConsensusClusterPlus)test2 = ConsensusClusterPlus( gsva_data2,maxK=10, #聚类的最大类数,所以会评估聚2类、3类...6类 reps=50, #50个重采样 pItem=0.8, #重采样样本为80% pFeature=1, #重采样基因为80% title=title,#输出文件的保存位置 clusterAlg='km', #聚合层次聚类算法 distance='pearson', #Pearson 相关距离 seed=1262118388.71279, #设置特定的随机种子,使例子是可重复的 plot='png')dd1 <- as.data.frame(test2[[4]][['consensusClass']])colnames(dd1) <- 'CIC'dd1$sample <- rownames(dd1)dd1$CIC <- ifelse(dd1$CIC=='1','C',ifelse(dd1$CIC=='2','A',ifelse(dd1$CIC=='3','D','B')))library(dplyr)dd1 <- dd1 %>% arrange(dd1$CIC)names <- rownames(dd1)gsva_data3 <- gsva_data[c(13,3,11,1,10,2,9,6,8),names]gsva_data4 <- gsva_data[c(4,5),names]tcga_panmRNA_expr5 <-tcga_panmRNA_expr5[,names]tcga_panmRNA_expr6 <- rbind(tcga_panmRNA_expr5,gsva_data4)tcga_panmRNA_expr2 <-tcga_panmRNA_expr2[,names] tcga_panmRNA_expr3 <-tcga_panmRNA_expr3[,names] tcga_panmRNA_expr4 <-tcga_panmRNA_expr4[,names]clin <- clin[names,]identical(rownames(dd1),rownames(clin))dd1$MSI <- clin$MSI.Statusdd1$Stage <- clin$pathologic_stagedd1 <- dd1 %>% select(-sample)result3<- t(scale(t(gsva_data3)))result3[result3>2]=2result3[result3<-2]=-2DD1 <- pheatmap(result3, #热图的数据 cluster_rows = F,#行聚类 cluster_cols = F,#列聚类,可以看出样本之间的区分度 annotation_col=dd1, #标注样本分类 annotation_legend=TRUE, # 显示注释 show_rownames = T, show_colnames = F,# 显示行名 #scale = 'row', #以行来标准化 color =colorRampPalette(c('blue','white', 'red'))(100),#调色 #filename = 'heatmap_F.pdf',#是否保存 cellwidth = 1.5, cellheight = 15,# 格子比例 fontsize = 10)pdf('DD1.pdf',width = 15,height = 7)print(DD1)dev.off()#计算热图钟的p值#三组以上的p值计算用 Kruskal–WallisLast1$group <- as.factor(Last1$CIC)kruskal.test(`T cells`~group, data=Last1)p=c('2.2e-8','7e-16','2.2e-17','2.9e-16')p.adjust(p,method = 'fdr',n=length(p))#求各个组中的P值library(rgdal)library(pgirmess)library(coin)library(multcomp)kruskalmc(`T cells`~group, data=Last1, probs=0.05)library(PMCMR)posthoc.kruskal.nemenyi.test(`T cells`~group, data=Last1)posthoc.kruskal.nemenyi.test(`T cells`~group, data=Last1,dist='Chisq')
图片.png

生存分析

gsva_data4<- as.data.frame(t(gsva_data3))Last <- cbind(gsva_data4,clin)Last1 <- cbind(Last,dd1)write.csv(Last1,'Last1.csv')library(survival)library(survminer)Last1$group1 <- ifelse(Last1$`CD20+ B cells`> median(Last1$`CD20+ B cells`),'high','low') sfit1 <- survfit(Surv(OS.time/30, OS)~CIC, data=Last1)DD10<-ggsurvplot(sfit1,pval =T,data = Last1,risk.table = TRUE,size = 1,palette = 'nejm') pdf('DD10.pdf',width = 7,height = 6) print(DD10) dev.off()
图片.png

相关性

gsva_data1 <- rbind(gsva_data,hotair)results2<- as.data.frame(t(gsva_data1))library(ggplot2) corr_eqn <- function(x,y,digits=2) { test <- cor.test(x,y,method='spearman') paste(paste0('n = ',length(x)), paste0('r = ',round(test$estimate,digits),'(pearson)'), paste0('p.value= ',round(test$p.value,digits)), sep = ', ') } PP2 <- results2 %>% ggplot(aes(Hotair,Activated))+ geom_point(col='#984ea3')+ geom_smooth(method=lm, se=T,na.rm=T, fullrange=T,size=2,col='#fdc086')+ geom_rug(col='#7fc97f')+ theme_minimal()+ xlab(paste0('Hotair',' (log2(TPM))'))+ ylab(paste0('Activated',' (NES)'))+ labs(title = paste0(corr_eqn(results2$Hotair,results2$Activated)))+ theme(plot.title = element_text(hjust = 0.5), plot.margin = margin(1, 1, 1, 1, 'cm'))
图片.png
本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请点击举报
打开APP,阅读全文并永久保存 查看更多类似文章
猜你喜欢
类似文章
200块的代码我的学徒免费送给你,GSVA和生存分析
批量生存分析(logrank和单因素COX)
TCGA转录组差异分析后多种基因功能富集分析:从GO/KEGG到GSEA和GSVA/ssGSEA(含基因ID转换)
就想把表达矩阵区分成为蛋白编码基因和非编码有这么难吗?
GSVA分析
纯R代码实现ssGSEA算法评估肿瘤免疫浸润程度
更多类似文章 >>
生活服务
热点新闻
分享 收藏 导长图 关注 下载文章
绑定账号成功
后续可登录账号畅享VIP特权!
如果VIP功能使用有故障,
可点击这里联系客服!

联系客服