setwd("C:/cwru/classes/epbi473_05/lec7(staudt)")
library(Biobase)
library(hu6800)
library(survival)
load("C:/cwru/data/shipp/shipp.RData")
eset
pData(eset)

E=exprs(eset)
p=dim(E)[1]
pDat=pData(eset)
DFS=Surv(pDat$SURTIME,pDat$OUTCOME==1);DFS
plot(survfit(DFS~pDat$IPI),main="Disease-Free Survival",xlab="Months",xlim=c(0,61),col=c('red','orange','green','blue'))
SLogRT=survdiff(DFS~pDat$IPI);SLogRT

Probs=rep(0,p)
names(Probs)=row.names(E)

for (i in 1:p)
#for (i in 1:3) # to debug
# for each gene, partition into two groups, those above and below median
# Then do a log rank test to get the P values for differences in survival
# Rank order genes based on these P values. 
{
grp=(E[i,]-median(E[i,]))>0
S=survdiff(DFS~grp)
chi=S[[5]]
Probs[i]=1-pchisq(chi,1)  # always two groups here, so n=2-1=1 degrees of freedom
}

hist(Probs)
1/7129
sort(Probs)[1:10]  # none of the P values are less than expected for uniformly distributed P values


coxProbs=rep(1,p)
names(coxProbs)=row.names(E)
for (i in 1:p)
#for (i in 1:1)
{
S=coxph(DFS~E[i,])
coxProbs[i]=2*(1-pnorm(abs(S$coefficients/sqrt(S$var))))
}
S
sort(coxProbs)[1:10]  # none of the P values are less than expected for uniformly distributed P values






load("C:/cwru/data/staudt/lymphodat.RData")
clinData
dim(dat)
summary(dat)