# The goal here is to generate the file at the end. Plots are given just to monitor progress. filecanc=c('breast','digothr','malegen','femgen','other','respir','colrect','lymyleuk','urinary') load(paste("~/data/seer/00/pops.RData",sep="")) # this loads in pops attach(pops) PY=tapply(population,list(sex=popsex,age=popage),sum) dimnames(PY)$sex<-c("males","females") dimnames(PY)$age<-c("0","1to4","5to9","10to14","15to19","20to24","25to29","30to34","35to39", "40to44","45to49","50to54","55to59","60to64","65to69","70to74","75to79","80to84","85+") sum(PY) pym=PY["males",] pyf=PY["females",] PY graphics.off() #X11(width=9.7,height=7) # windows(width=9.7,height=7) # quartz(width=9.7,height=7) par(mfcol=c(2,3)) j=1 cases=list() head(DF) for (k in c(1:9)) { load(paste("~/data/seer/00/",filecanc[k],".RData",sep="")) # this loads in DF DF=DF[!is.na(DF$ICD9),] # get rid of missing ICD9 entries if (k==1) { canc=c('Breast'); code=list(c(1741,1748)) } if (k==2) {canc=c('Esophogeal','Stomach','Liver','Pancreatic'); code=list(c(1504,1505),c(1510,1519),c(1550,1550),c(1570,1579)) } if (k==3) {canc=c("Prostate"); code=list(c(185,185))} if (k==4) {canc=c("Cervical","Uterine","Vaginal"); code=list(c(1809,1809),c(1820,1820),c(1830,1830))} if (k==5) {canc=c("Thyroid","Brain"); code=list(c(193,193),c(1911,1919))} if (k==6){canc=c("Laryngeal","Lung");code=list(c(1610,1610),c(1622,1629))} if (k==7){canc=c("Colorectal"); code=list(c(1530,1539)) } if (k==8) { canc=c("NHL", "Hodgkins", "MML" ,"ALL", "CLL", "AML", "CML2051", "CML9863") code=list(c(2000,2008),c(2015,2016),c(2030,2030),c(2040,2040),c(2041,2041),c(2050,2050),c(2051,2051),c(9863,9863))} if (k==9) { canc=c("Bladder","Kidney"); code=list(c(1880,1889),c(1890,1891))} for (i in 1:length(code)) { Indx=((DF$ICD9>=code[[i]][1])&(DF$ICD9<=code[[i]][2])) if (canc[i]=="CML9863") Indx=((DF$histo2>=code[[i]][1])&(DF$histo2<=code[[i]][2])) ndat=DF[Indx,] attach(ndat) print(canc[i]) if ((k!=1)&(k!=4)) # if not breast and not fem gen {m=hist(boys<-agerec[sex==1],breaks=c(seq(-.5,17.5,1),100),plot=FALSE) nm=length(boys)} if (k!=3) # if not male gen {f=hist(girls<-agerec[sex==2],breaks=c(seq(-.5,17.5,1),100),plot=FALSE) nf=length(girls)} age=c(0.5,3,seq(7.5,87.5,5)) if ((k!=1)&(k!=3)&(k!=4)) # plot both sexes { comb=c(m$counts/pym,f$counts/pyf) comb=comb[comb>0] # remove zeros to avoid taking their logs in the plots plot(age,m$counts/pym,log="y",type='b',col='blue',ylab="Cases per Person-Year", main=paste(canc[i],": ",nm, " Males ",nf," Females"),ylim=c(min(comb),max(comb))) lines(age,f$counts/pyf,type='b',col='red') cases[[canc[i]]]=list(males=m$counts,females=f$counts) } if ((k==1)|(k==4)) # plot females only { plot(age,f$counts/pyf,log="y",type='b',col='red',ylab="Cases per Person-Year",main=paste(canc[i],": ",nf," Females")) cases[[canc[i]]]=list(females=f$counts) } if (k==3) # plot males only { plot(age,m$counts/pym,log="y",type='b',col='blue',ylab="Cases per Person-Year",main=paste(canc[i],": ",nm," Males")) cases[[canc[i]]]=list(males=m$counts) } #if (j%%6==0) locator(n=1) # point and click in figure to move ahead to next plot set j=j+1 } # the i for loop over within file cancer types detach(ndat) } # k loop on files detach(pops) #cases=case[[2:length(cases)]] cases PY save(cases,PY,file="~/data/seer/casesPY00to11.RData")