一 载入R包,数据
library(tidyverse)
library(openxlsx)
library("survival")
library("survminer")
load("RNAseq.SKCM.RData")
#选取部分基因作示例
data.mat <- t(expr[order(apply(expr, 1, mad), decreasing = T)[1:1000],])
#读取生存数据
surv <- read.table("TCGA-SKCM.survival.tsv",sep = "\t" , header = T,
stringsAsFactors = FALSE ,
check.names = FALSE)
phe <- read.xlsx("cli.xlsx",sheet = 1)
二 批量单因素分生存分析
module_exp <- as.data.frame(data.mat) %>%
rownames_to_column("sample") %>%
inner_join(surv) %>% #添加生存数据
select(sample,OS,OS.time,`_PATIENT`,everything()) %>% #将生存列放到前面
select_all(~str_replace_all(., "-", "_")) #基因ID不规范会报错,下划线替换-
dim(module_exp)
#指定待分析的基因
module_expr.cox <- module_exp
covariates <- names(module_expr.cox[,5:ncol(module_expr.cox)])
#构建单因素模型
univ_formulas <- sapply(covariates,
function(x) as.formula(paste('Surv(OS.time, OS)~', x)))
univ_models <- lapply( univ_formulas, function(x){coxph(x, data = module_expr.cox)})
# 提取结果
univ_results <- lapply(univ_models,
function(x){
x <- summary(x)
p.value<-signif(x$wald["pvalue"], digits=3)
wald.test<-signif(x$wald["test"], digits=3)
beta<-signif(x$coef[1], digits=3);#coeficient beta
HR <-signif(x$coef[2], digits=3);#exp(beta)
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 3)
HR.confint.upper <- signif(x$conf.int[,"upper .95"], 3)
HR <- paste0(HR, " (",
HR.confint.lower, "-", HR.confint.upper, ")")
res<-c(beta, HR, wald.test, p.value)
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test",
"p.value")
return(res)
})
res <- t(as.data.frame(univ_results, check.names = FALSE))
三 绘制森林图
set.seed(1234)
res_2_test <- res_2[sample(nrow(res_2), 30), ]
#构建tabletext
sample <- as.data.frame(res_2_test)
tabletext1<-as.character(sample[,1])
tabletext2<-round(as.numeric(sample[,'HR']),5)
tabletext3<-paste(round(as.numeric(sample[,'HR']),3),round(as.numeric(sample[,'lower_95']),2),sep="(")
tabletext4<-paste(tabletext3,round(as.numeric(sample[,'upper_95']),2),sep="-")
tabletext5<-paste0(tabletext4,sep=")")
tabletext<-cbind(tabletext1,tabletext2,tabletext5)
forestplot(labeltext=tabletext, #文本信息
mean = round(sample[,'HR'],3),##HR值
lower = round(sample[,"lower_95"],2),##95%置信区间
upper = round(sample[,"upper_95"],2),#95%置信区间
boxsize = 0.8,##大小
graph.pos=4,#图在表中的列位置
graphwidth = unit(0.4,"npc"),#图在表中的宽度比例
fn.ci_norm="fpDrawDiamondCI",#box类型选择钻石,可以更改fpDrawNormalCI;fpDrawCircleCI等
col=fpColors(box="steelblue", lines="black", zero = "black"),#颜色设置
lwd.ci=2,ci.vertices.height = 0.1,ci.vertices=TRUE,#置信区间用线宽、高、型
zero=1,#zero线横坐标
lwd.zero=2,#zero线宽
grid=T,
lwd.xaxis=2,#X轴线宽
title="Hazard Ratio",
xlab="",#X轴标题
clip=c(-Inf,3),#边界
colgap = unit(0.5,"cm")
)
ggplot(res_2_test, aes(HR, Variable))+ ##定义X轴和Y轴,以类型分类
geom_point(size=2.5)+ #点的大小
geom_errorbarh(aes(xmax =upper_95, xmin = lower_95), height = 0.4)+ ##95%置信区间,误差线
geom_vline(aes(xintercept = 1))+ #以1为分界线
xlab('HR(95%CI)') + ylab(' ')+ #定义横纵坐标
theme_bw(base_size = 12)
res_2_test$type <- c(rep("immune",5),rep("",10) ,rep("apoptosis",7) ,rep("",8) )
#排序 ,标识type信息
ggplot(res_2_test, aes(HR, reorder(Variable,HR),col=type,shape=type))+
geom_point(size=2.5)+
geom_errorbarh(aes(xmax =upper_95, xmin = lower_95), height = 0.4)+
geom_vline(aes(xintercept = 1))+
xlab('HR(95%CI)') + ylab(' ')+
theme_bw(base_size = 12)+
scale_color_manual(values = c("gray", "steelblue", "red")) #设置颜色
四 单因素预后显著基因
根据二中得到的所有基因的单因素生存分析结果,可以根据阈值(p < 0.05)筛选 预后显著的基因集,
KM_sig <- res_2$res %>%
filter(p.value <= 0.05)
◆ ◆ ◆ ◆ ◆
更多精心内容详见:精心整理(含图PLUS版)|R语言生信分析,可视化(R统计,ggplot2绘图,生信图形可视化汇总)
联系客服