服务热线
178 0020 3020
rm(list=ls())
library("survival")
setwd("D:/TCGA/")
library("plyr")
#计算函数
func_unicox <- function(x){
FML <- as.formula(paste0('coxData~',x))
GCox <- coxph(FML, data = surData)
GSum <- summary(GCox)
HR <- round(GSum$coefficients[,2],3)
PValue <-GSum$coefficients[,5]
CI <- paste0(round(GSum$conf.int[,3:4],3),collapse = '-')
unicox <- data.frame(
'var' = x,
'Hazard Ratio' = HR,
'CI95' = CI,
'P Value' = PValue
)
return(unicox)
}
#
#cancerList = c(
# "ACC","BLCA","BRCA","CESC","CHOL",
# "COAD","DLBC","ESCA","GBM","HNSC",
# "KICH","KIRP","LAML","LGG",
# "LIHC","LUAD","LUSC","MESO","OV",
# "PAAD","PCPG","PRAD","READ","SARC",
# "SKCM","STAD","TGCT","THCA","THYM",
# "UCEC","UCS","UVM"
#)
cancerList = c(
"KICH"
)
ensg2symbol = read.csv("geneType.csv",header = T)
for(cancer in cancerList){
#cancer="DLBC"
print(cancer)
mRNA = read.csv(paste0("data/",cancer,".csv"),header = T, row.names = 1)
tumor_mRNA = mRNA[,as.numeric(substr(colnames(mRNA),14,15))<11]
dim(tumor_mRNA)
row.names(tumor_mRNA) = substr(row.names(tumor_mRNA),1,15)
tumor_mRNA_median = data.frame("ense"=substr(row.names(tumor_mRNA),1,15),"median"=apply(tumor_mRNA, 1, median))
tumor_mRNA = log2(tumor_mRNA+0.1)
tumor_mRNA = as.data.frame(t(tumor_mRNA))
tumor_mRNA$tcgaid=substr(row.names(tumor_mRNA),1,12)
dim(tumor_mRNA)
tumor_mRNA = tumor_mRNA[!duplicated(tumor_mRNA$tcgaid),]
dim(tumor_mRNA)
row.names(tumor_mRNA)=tumor_mRNA$tcgaid
clinical = read.csv(paste0("clinical/",cancer,".csv"),header = T, row.names = 1, stringsAsFactors = F)
dim(clinical)
clinical$vital_status = toupper(as.character(clinical$vital_status))
clinical$submitter_id = gsub("-",".",clinical$submitter_id)
clinical = clinical[clinical$vital_status =="DEAD" | clinical$vital_status=="ALIVE",]
OS = data.frame("tcgaid"=clinical$submitter_id, "status"=ifelse(clinical$vital_status=="DEAD",1,0), "time"=ifelse(clinical$vital_status=="DEAD",as.numeric(clinical$days_to_death), as.numeric(clinical$days_to_last_follow_up)))
OS = OS[!is.na(OS$time) & OS$time!="--" & OS$time!="",]
#剔除少于30天的数据
OS = OS[OS$time>30,]
dim(OS)
dim(tumor_mRNA)
surData = merge(OS, tumor_mRNA, by="tcgaid", all=T)
dim(surData)
coxData = Surv(time =surData$time , event = surData$status )
varCox =c(colnames(surData)[4:ncol(surData)])
uniData <- lapply(varCox, func_unicox)
uniData <- ldply(uniData, data.frame)
uniData$var = substr(uniData$var,1,15)
uniResult = merge(uniData, ensg2symbol, by.x="var", by.y="ensg", all.x = TRUE)
uniResult = merge(uniResult, tumor_mRNA_median, by.x="var",by.y="ense", all.x=T)
write.csv(uniResult,file = paste0("unicox/",cancer,".csv"), row.names = F)
}
附件