服务热线
178 0020 3020
#生存曲线绘制
library(survival)
library(ggplot2)
library(survminer)
library(magrittr)
setwd("D:/geo/TCGA_LUAD/")
exprSet = read.csv("exprSet.csv",header = T,row.names = 1)
sample = colnames(exprSet)
treat_type=sample
for(i in 1:length(sample)){
if(substr(sample[i],14,15)=="11"){
#是肿瘤,要知道是哪一期
treat_type[i]="Contr"
}else{
treat_type[i]="Tumor"
}
}
treat_type = as.factor(treat_type)
exprSet_tumor = exprSet[,treat_type=="Tumor"]
# 60380 533
#获得data属性
clinical = read.csv("clinical.csv", header = T, stringsAsFactors = F)
data_trait = data.frame(
"caseid"=clinical$submitter_id,
"gender"=0,
"OS"=0,
"status"=0,
"stage_1"=0,
"stage_2"=0,
"stage_3"=0,
"stage_4"=0
)
for(i in 1:length(clinical[,1])){
if(clinical$gender[i]=="male"){
data_trait[i,c("gender")]=1
}
if(clinical$vital_status[i]=="alive"){
data_trait[i,c('status')]=1
data_trait[i,c('OS')]=clinical$days_to_last_follow_up[i]
}else{
data_trait[i,c('status')]=2
data_trait[i,c('OS')]=clinical$days_to_death[i]
}
if(clinical$tumor_stage[i]=="stage i"){
data_trait[i,c("stage_1")]=1
}else if(clinical$tumor_stage[i]=="stage ia"){
data_trait[i,c("stage_1")]=1
}else if(clinical$tumor_stage[i]=="stage ib"){
data_trait[i,c("stage_1")]=1
}else if(clinical$tumor_stage[i]=="stage ii"){
data_trait[i,c("stage_2")]=1
}else if(clinical$tumor_stage[i]=="stage iia"){
data_trait[i,c("stage_2")]=1
}else if(clinical$tumor_stage[i]=="stage iib"){
data_trait[i,c("stage_2")]=1
}else if(clinical$tumor_stage[i]=="stage iiia"){
data_trait[i,c("stage_3")]=1
}else if(clinical$tumor_stage[i]=="stage iiib"){
data_trait[i,c("stage_3")]=1
}else if(clinical$tumor_stage[i]=="stage iv"){
data_trait[i,c("stage_4")]=1
}
}
data_trait$caseid = gsub("-","\\.",data_trait$caseid)
row.names(data_trait)=data_trait$caseid
data_trait = data_trait[,-1]
dim(data_trait)
# 515 7
data_trait = data_trait[data_trait$OS!="--" & data_trait$OS!=0,]
dim(data_trait)
# 502
#删除重复的样本
exprSet_tumor = exprSet[,treat_type=="Tumor"]
exprSet_tumor =as.data.frame(t(exprSet_tumor))
exprSet_tumor$caseid =substr(row.names(exprSet_tumor),1,12)
exprSet_tumor<- aggregate(x = exprSet_tumor[,-535],by = list(exprSet_tumor$caseid), FUN = min)
row.names(exprSet_tumor)=exprSet_tumor$Group.1
exprSet_tumor = exprSet_tumor[,-1]
exprSet_tumor = t(as.matrix(exprSet_tumor))
dim(exprSet_tumor)
#获得data_trait对应的表达数据
expr_new = c()
caseid = as.vector(row.names(data_trait))
for(i in 1:length(caseid)){
if(i==132 | i ==134){
temp = exprSet[,substr(colnames(exprSet),1,12)==caseid[i]]
}else{
temp = exprSet_tumor[,substr(colnames(exprSet_tumor),1,12) ==caseid[i]]
}
if(length(expr_new)==0){
expr_new = temp
}else{
expr_new = cbind(expr_new,temp)
}
}
colnames(expr_new) = caseid
#> dim(trait)
# 533,7
# FHL1,CAVIN2,EDNRB,VEGFD,TEK,AGER,AOC3,RGCC,CA4,INMT,
#PRX,S1PR1,LDB2,ROBO4,CDH5,EPAS1,PECAM1,CLIC5,EMCN,ADH1B
################################# 获得某一基因在LUAD中的生存曲线 ,可以修改为其他基因 ###########
gene="AOC1"
#trait_temp =trait[trait$stage_1==1,]
trait_temp = data_trait
#stage_case = row.names(trait_temp)
#expr =exprSet_tumor[row.names(exprSet_tumor)==gene,colnames(exprSet_tumor) %in% stage_case]
expr =expr_new[row.names(expr_new)==gene,]
expr_clinical = data.frame(
"ID"=caseid,
"value"=as.vector(as.numeric(expr)),
"OS"=trait_temp$OS,
"status"= trait_temp$status,
"s"=1,
"tag"=1
)
q = quantile(expr_clinical$value, na.rm = T)
#前25%为高表达组
low_num = 0
high_num = 0
for(j in 1:length(expr_clinical[,1])){
if(expr_clinical[j,2] > q[4]){
expr_clinical$tag[j]=2
high_num = high_num+1
}else{
expr_clinical$tag[j]=1
low_num = low_num+1
}
if(expr_clinical$status[j]==1){
expr_clinical$s[j]=1
}else{
expr_clinical$s[j]=2
}
}
expr_clinical$OS = as.numeric(as.character(expr_clinical$OS))
expr_clinical$tag = as.factor(expr_clinical$tag)
data1=expr_clinical
pdfname = paste0("sur-0.25-",gene,".pdf")
pdf(pdfname,width=4,height=4)
my_df <- survfit(Surv(OS,s)~tag,data=data1)
ggsurvplot(
my_df,
palette = c("#e01f27", "#154898"),
pval=TRUE,
size = 0.5,
pval.coord=c(1,1),
surv.median.line="hv",
legend.title=gene,
legend.labs=c(paste0("low&median(n=",low_num,")"),paste0("high(n=",high_num,")")),
risk.table=FALSE,
ncensor.plot=FALSE)+ggtitle(gene)
dev.off()
附件