# make a model of (neg) in Philadelphia # Kyle Gorman # libraries library(arm) library(Design) library(languageR) # read in data dat = read.csv('lcv_neg.csv', header=1) # look at collinearities cor(dat$Occ, dat$Res) cor(dat$Occ, dat$Sc1) cor(dat$Occ, dat$Sc2) cor(dat$Occ, dat$Mo) cor(dat$Res, dat$Sc1) cor(dat$Res, dat$Sc2) cor(dat$Res, dat$Mo) cor(dat$Sc1, dat$Sc2) # huge cor(dat$Sc1, dat$Mo) cor(dat$Sc2, dat$Mo) collin.fnc(dat, c('Occ', 'Res'))$cnumber # just an example vif(glm(Neg ~ Occ + Res, data=dat, family=binomial)) # another one # plot 'em pdf('pairs.pdf') pairs( ~ Occ + Res + Sc1 + Sc2 + Mo, data=dat) lower.panel=panel.smooth, upper.panel=panel.cor dev.off() # do some residualization dat$rRes = residuals(lm(Res ~ Occ, data=dat)) dat$rSc2 = residuals(lm(Sc2 ~ Occ + rRes, data=dat)) dat$rSc1 = residuals(lm(Sc1 ~ Occ + rRes + rSc2, data=dat)) dat$rMo = residuals(lm(Mo ~ Occ + rRes + rSc2 + rSc1, data=dat)) # standardize dat$sOcc = (dat$Occ - mean(dat$Occ)) / (2 * sd(dat$Occ)) dat$srRes = (dat$rRes - mean(dat$rRes)) / (2 * sd(dat$rRes)) dat$srSc2 = (dat$rSc2 - mean(dat$rSc2)) / (2 * sd(dat$rSc2)) dat$srSc1 = (dat$rSc1 - mean(dat$rSc1)) / (2 * sd(dat$rSc1)) dat$srMo = (dat$rMo - mean(dat$rMo)) / (2 * sd(dat$rMo)) # fit a flat model m0 = glm(Neg ~ sOcc + srRes + srSc2 + srSc1 + srMo + Sex + Style + rcs(Age, 4), family=binomial, data=dat) # bootstrap validation analysis based on Baayen, 2008 p. 212, not shown here. # fit a hierarchical model m1 = lmer(Neg ~ sOcc + srRes + srSc2 + srSc1 + srMo + Sex + Style + rcs(Age, 4) + (1 | Name), family=binomial, data=dat) source('http://ling.upenn.edu/~kgorman/R/ranOutliers.R') # get this if necessary ranOutliers(m1) pvals.fnc(m1) # how i did the plot png('newNegOcc.png') plot((jitter(Neg, .8) + .21) / 1.42 ~ jitter(sOcc, 2.35), xaxt="n", xlab="Occupation", ylab="Pr(neg)", main="% (neg) by Occupation", data=dat) axis(1, seq(min(dat$sOcc), max(dat$sOcc), length.out=7), 1:7) x = seq(min(dat$sOcc), max(dat$sOcc), length.out=1000) y = invlogit(-1.36 + -2.01 * seq(1, 7, 1000) * x) lines(y ~ x, col=2, lwd=3) dev.off() # this is why i'm hot ma = subset(dat$NCA, dat$NCB != 'NA' & dat$NCA != 'NA' & dat$Occ != 'NA' & dat$NTB != 'NA') mb = subset(dat$NCB, dat$NCB != 'NA' & dat$NCA != 'NA' & dat$Occ != 'NA' & dat$NTB != 'NA') na = subset(dat$NTA, dat$NTB != 'NA' & dat$NTA != 'NA' & dat$Occ != 'NA' & dat$NCB != 'NA') nb = subset(dat$NTB, dat$NTB != 'NA' & dat$NTA != 'NA' & dat$Occ != 'NA' & dat$NCB != 'NA') o = 1 + subset(dat$Occ, dat$NTB != 'NA' & dat$NTA != 'NA' & dat$Occ != 'NA' & dat$NCB != 'NA') onea = sum(subset(ma, o == 1)) / sum(subset(na, o == 1)) twoa = sum(subset(ma, o == 2)) / sum(subset(na, o == 2)) trea = sum(subset(ma, o == 3)) / sum(subset(na, o == 3)) foua = sum(subset(ma, o == 4)) / sum(subset(na, o == 4)) fiva = sum(subset(ma, o == 5)) / sum(subset(na, o == 5)) sixa = sum(subset(ma, o == 6)) / sum(subset(na, o == 6)) seva = sum(subset(ma, o == 7)) / sum(subset(na, o == 7)) oneb = sum(subset(mb, o == 1)) / sum(subset(nb, o == 1)) twob = sum(subset(mb, o == 2)) / sum(subset(nb, o == 2)) treb = sum(subset(mb, o == 3)) / sum(subset(nb, o == 3)) foub = sum(subset(mb, o == 4)) / sum(subset(nb, o == 4)) fivb = sum(subset(mb, o == 5)) / sum(subset(nb, o == 5)) sixb = sum(subset(mb, o == 6)) / sum(subset(nb, o == 6)) sevb = sum(subset(mb, o == 7)) / sum(subset(nb, o == 7)) pdf('neg-occ.pdf') plot(c(onea, twoa, trea, foua, fiva, sixa, seva), lty=5, type='b', main="% (neg) by occupation", xlab="Occ index", ylab="% (neg)", ylim=c(0, 1)) lines(c(oneb, twob, treb, foub, fivb, sixb, sevb), type='b') legend("topright", c("A", "B"), lty=c(5, 1), title="Style") dev.off()