# See 2002 PNAS paper, vol 99, 15095-100 #Multistage carcinogenesis and the incidence of colorectal cancer #E. Georg Luebeck* and Suresh H. Moolgavkar library(Bhat) age=seq(2.5,87.5,5); years=1973:2000 m=length(years) bYearsVec=seq(1970,1885,-5); n=length(bYearsVec) byear.matrix=matrix(rep(0,n*m),nrow=n) colnames(byear.matrix)<-years rownames(byear.matrix)<-1:n for (i in 1:n) byear.matrix[i,]=bYearsVec[i]+0:(m-1) byear.matrix = byear.matrix # ********** please run the scripts at http://epbi-radivot.cwru.edu/ICB/seer/ to make this file ************************************ load("c:/SEER/pops.RData") # this loads in pops attach(pops) na=length(age);na pop.by.age=matrix(rep(0,m*na),nrow=na) rownames(pop.by.age)<-1:na colnames(pop.by.age)<-years for (j in 1:na) for (i in 1:m) { if (j==1) pop.by.age[j,i]=sum(population[(poprace==1)&(popsex==1)&(popage<=1)&(popyear==years[i])]) else pop.by.age[j,i]=sum(population[(poprace==1)&(popsex==1)&(popage==j)&(popyear==years[i])]) } detach(pops) # now make incidence matrix # load("c:/SEER/colrect.RData") # this loads in DF DF=DF[!is.na(DF$ICD9),] # get rid of missing ICD9 entries Indx=((((DF$ICD9>=1530)&(DF$ICD9<=1549))|(DF$ICD9==1590)|(DF$ICD9==2303) )&(DF$sex==1)&(DF$race==1)) # &(DF$numPrim==1) ) ndat=DF[Indx,] attach(ndat) inc.by.age=matrix(rep(0,m*na),nrow=na) rownames(inc.by.age)<-1:na colnames(inc.by.age)<-years for (j in 1:na) for (i in 1:m) { if (j==1) inc.by.age[j,i]=length(ageRC[(ageRC<=1)&(yDiag==years[i])]) else inc.by.age[j,i]=length(ageRC[(ageRC==j)&(yDiag==years[i])]) } inc.by.age=inc.by.age detach(ndat) # first dump data to screen to see what is being inputted from SEER pop.by.age byear.matrix inc.by.age # white males, colon (1530s) + rectal (1540s) age ################################################################## ## Four Stage model Hazard (Mu0=Mu1) ################################################################# integrand <- function(u,r,p,q,t) { (((q-p)/(q*exp(-p*(t-u))-p*exp(-q*(t-u))))^(r)-1) } hz <- function(param,t) { mu0<- param[1] mu1<- mu0 r<- param[2] p <- param[3] q <- param[4] intf <- integrate(integrand,0,t,,,,,,,r,p,q,t) mu0*X*(1-exp(mu1*intf[[1]])) } ######################################################################## ### Estimation with NO birth year and calendar year coefficients ####### ######################################################################## ### Negative Log-likelihood No ci's and bi's nlogf <- function(param){ aux <- 0 for(i in 1:17) {h=hz(param,age[i]) for (j in 1:28) { aux1 <- pop.by.age[i,j]*h aux <- aux+inc.by.age[i,j]*log(aux1)-aux1 }} -aux } ######### Setting up the problem and calling the optimization routine X <- 10^8 # number of target cells x <- list(label=c("mu0","r","p","q"),est=c(1e-6,1,-.15,1e-5),low=c(1e-20,1e-20,-2,1e-15),upp=c(1,100,-1e-15,2)) mle=dfp(x, nlogf) hofAge=NULL for (i in 1:length(age)) hofAge[i]=hz(mle$est,age[i]) plot(age,hofAge,ylab="Incidence",log="y",ylim=c(1e-5,1e-2)) # this recreates one of the Fig. 2 plots ###################################################################### ### Estimation with calendar year coefficients ########## ###################################################################### ### Negative Log-likelihood with ci's only nlogfC <- function(s){ aux <- 0 param <- s[1:4] ci <- c(s[5:21],1,s[22:31]) for(i in 1:17) { h <- hz(param,age[i]) for (j in 1:28) { aux1 <- ci[j]*pop.by.age[i,j]*h aux <- aux+inc.by.age[i,j]*log(aux1)-aux1 }} -aux} ######### Setting up the problem and calling the optimization routine b=c() ci=c() clab <- paste("c", c(1:17,19:28), sep = "") #for (i in 1:17) {b <- c(b,1)} for (j in 1:27) {ci <- c(ci,1)} lc <- ci*.5 uc <- ci*1.5 x <- list(label=c("mu0","r","p","q",clab),est=c(mle$est[1:4],ci),low=c(1e-15,1e-15,-1,1e-15,lc),upp=c(1,100,-1e-15,1,uc)) #### Using the previous estimates as the initial points mleC=dfp(x, nlogfC) Year<-1973:2000 Ci=c(mleC$est[5:21],1,mleC$est[22:31]) plot(Year,Ci,ylab="Incidence multiplier") # redo model fit plot now that calendar year effects have been controlled for param=mleC$est[1:4] hofAge=NULL for (i in 1:length(age)) hofAge[i]=hz(param,age[i]) Indx=(age>55)&(age<85) elders=hofAge[Indx] mat=elders%o%Ci mat rb=c("red","orange","yellow","green","blue","violet") # code 5 year age groups by light spectrum matplot(Year,t(mat),ylab="Incidence",pch=1:6,col=rb,main="Incidence age groups 55-60 (red) => 80-84 (violet)") # which looks much like a Fig. 3 plot ###################################################################### ### Estimation with birth year coefficients ########## ###################################################################### ### Negative Log-likelihood with bi's nlogfB <- function(s){ aux <- 0 param <- s[1:4] b <- c(1,s[5:21]) for(i in 1:17) { h <- hz(param,age[i]) for (j in 1:28) { birth <-min(floor((byear.matrix[i,j]-1890)/5)+1,18) aux1 <- ci[j]*b[birth]*pop.by.age[i,j]*h aux <- aux+inc.by.age[i,j]*log(aux1)-aux1 }} -aux} ######### Setting up the problem and calling the optimization routine ci <- c(mleC$est[5:21],1,mleC$est[22:31]) b=c() blab <- paste("b", 2:18, sep = "") for (i in 1:17) {b <- c(b,1)} lb <- b*.2 ub <- b*20 x <- list(label=c("mu0","r","p","q",blab),est=c(mleC$est[1:4],b),low=c(1e-15,1e-2,-1,1e-15,lb),upp=c(1,100,-1e-15,1,ub)) #### Using the previous estimates as the initial points mleB=dfp(x, nlogfB) BYear<-1:18 Bi=c(1,mleB$est[5:21]) plot(BYear,Bi,ylab="Incidence multiplier",xlab="Birth year group") # redo model fit plot now that birth year and calendar year effects have been controlled for param=mleB$est[1:4] hofAge=NULL for (i in 1:length(age)) hofAge[i]=hz(param,age[i]) elders=hofAge[Indx] mat=elders%o%Ci # this comes close to reproducing the white males birth year plot in Figure 3 incid=list() plot(1890:1940,rep(0,51),type="n",ylim=c(0,.008),ylab="Incidence",xlab="Birth Year",main="Incidence age groups 55-60 (red) => 80-84 (violet)") for(i in 1:6) { h <- hz(param,age[i+11]) tmph=NULL tmpB=NULL for (j in 1:28) # we want a list 6 long, each with 28 elements { birth <-min(floor((byear.matrix[i+11,j]-1890)/5)+1,18) tmph[j]=Ci[j]*Bi[birth]*elders[i] tmpB[j]=byear.matrix[i+11,j] } incid[[i]]=list(incid=tmph,years=tmpB) points(tmpB,tmph,pch=i,col=rb[i]) } incid