# from Matloff chapter 1 (dL<-list(kids=c("Jack","Jill"),ages=c(12,10))) dput(dL) (dL=data.frame(dL)) # now it goes to the print method as an object of class data.frame dput(dL) (d <- data.frame(kids=c("Jack","Jill"),ages=c(12,10), stringsAsFactors=F)) dput(d) # Matloff Chapter 5 on Dataframes getwd() # with RStudio you can read it off the console bar setwd("/Users/radivot/src/htdocs/EPBI415/files14/week3lists") library(XLConnect) wb <- loadWorkbook("data.xls") dput(wb) (d<- readWorksheet(wb, sheet = "data")) # data is also the sheet name dput(d) # keeping strings as strings by default is preferable, I think # library(xlsx) (d=read.xlsx("data.xls",sheetName="data")) # nice because one line does it all dput(d) #but here strings come in as factors # unless stringsAsFactors is overrided through ... ; hit f1 with cursor in # read.xlsx for help that suggests this for the ... argument (d=read.xlsx("data.xls",sheetName="data", stringsAsFactors=F)) dput(d) d[2:4,1] class(d[2:4,1]) (dd=d[2:4,1,drop=F])# conceptually like drop=F arg of [ on matrices dput(dd) d d[d$Col1>20,] d[3,1]=NA d complete.cases(d) d[complete.cases(d),] rr=rbind(d,c(2,"F",35,"joe")) dput(rr) rr=rbind(d,list(2,"F",35,"joe")) cbind(d,five=1:4) d$six=1:2 d # just extending the list (dd=data.frame(Col4=c(d$Col4,"sally","harry","joe"),weight=rnorm(7))) merge(d,dd) # only takes rows that match col4 names in d merge(dd,d) # no diff, as long as indexing columns by names load("aba.RData") head(aba) sapply(aba,class) aba=aba[aba$gender!="I",] # remove infants head(aba) d=aba[,-1] head(d) y=as.factor(aba[,1]) dput(y) y # want first col as factor f=function(x) glm(y~x,family=binomial)$coef lapply(d,f) sapply(d,f) f=function(x) glm(y~x,family=binomial) (L=lapply(d,f)) sapply(d,f) # here sapply is worthless. As codes get more interesting, this happens more lapply(L,summary) # whereas the list output L is still useful (d=aba[1:15,]) order(d$gender) d[order(d$gender),] # note that row names are strings of integers, so they stick with their rows d[order(d$gender,d$rings),] d[order(d$gender,d$rings,d$shellWT),] # within 10 wt was decreasing, now it's increasing ##### From An introduction to R by Longhow Lam (LHL) mtcars head(mtcars,10) tail(mtcars) mtcars[mtcars$wt > 3 & mtcars$mpg > 20,] subset(mtcars, wt > 3 & mpg > 20) # all 3 of these appear in my codes with(mtcars,mtcars[wt > 3 & mpg > 20,]) # they seem equally logical # FYI, missing values are printed as when strings are printed without quotes. (s=c("orange", NA, "apple", "peach")) (fs=as.factor(s)) (d=data.frame(s,fs,num=c(1,NA,3:4),anum=c("a",NA,3,4),stringsAsFactors=F)) sapply(d,class) get("fs",d) get("s",d) dput(d) d