setwd("C:/cwru/classes/epbi473_05/lec7(staudt)") library(Biobase) library(hu6800) library(survival) load("C:/cwru/data/shipp/shipp.RData") eset pData(eset) E=exprs(eset) p=dim(E)[1] pDat=pData(eset) DFS=Surv(pDat$SURTIME,pDat$OUTCOME==1);DFS plot(survfit(DFS~pDat$IPI),main="Disease-Free Survival",xlab="Months",xlim=c(0,61),col=c('red','orange','green','blue')) SLogRT=survdiff(DFS~pDat$IPI);SLogRT Probs=rep(0,p) names(Probs)=row.names(E) for (i in 1:p) #for (i in 1:3) # to debug # for each gene, partition into two groups, those above and below median # Then do a log rank test to get the P values for differences in survival # Rank order genes based on these P values. { grp=(E[i,]-median(E[i,]))>0 S=survdiff(DFS~grp) chi=S[[5]] Probs[i]=1-pchisq(chi,1) # always two groups here, so n=2-1=1 degrees of freedom } hist(Probs) 1/7129 sort(Probs)[1:10] # none of the P values are less than expected for uniformly distributed P values coxProbs=rep(1,p) names(coxProbs)=row.names(E) for (i in 1:p) #for (i in 1:1) { S=coxph(DFS~E[i,]) coxProbs[i]=2*(1-pnorm(abs(S$coefficients/sqrt(S$var)))) } S sort(coxProbs)[1:10] # none of the P values are less than expected for uniformly distributed P values load("C:/cwru/data/staudt/lymphodat.RData") clinData dim(dat) summary(dat)