# # Read in pronoun frequencies by age and sex, from PCC dataset: download.file("http://ling.upenn.edu/courses/ling005/FP.txt", "FP.txt") Pronouns = read.table("FP.txt") ################################################################# # Pronouns is a data.frame with 90 rows (45 ages for males, 45 ages for females) # and 9 columns ("AGE" "SEX" "FPS" "SP" "FPP" "TPF" "TPM" "TPN" "TPP") where # FPS is First Person Singular ("I", "me", "my", "mine") # SP is Second Person ("you", "your", "yours") # FPP is First Person Plural ("we", "us", "our", "ours") # TPF is Third Person Feminine ("she", "her", "hers") # TPM is Third Person Masculine ("he", "him", "his") # TPN is Third Person Neuter ("it", "its") # TPP is Third Person Plural ("they", "them", "their", "theirs") # # "AGE" is 15-41 and 43-60 # leaving out 42 because it was the default assigns to subjects of unknown age ################################################################ # a useful boolean: IsMale = Pronouns[,"SEX"]==0 IsFemale = Pronouns[,"SEX"]==1 # The set of ages: Ages = Pronouns[IsFemale,"AGE"] ##################################### # Tell R to ask us before making a new plot par(ask=TRUE) ###################################### # Let's plot FPS pronouns as a function of age, # averaging males and females: allFPS = (Pronouns[IsMale,"FPS"]+Pronouns[IsFemale,"FPS"])/2 plot(Ages, allFPS, type="b", col="magenta", pch="I", xlab="Age", ylab="Frequency (per million words)", main="First Person Singular Pronouns (PPC Facebook dataset)") # # Now FPP: allFPP = (Pronouns[IsMale,"FPP"]+Pronouns[IsFemale,"FPP"])/2 plot(Ages, allFPP, type="b", col="forestgreen", pch="W", xlab="Age", ylab="Frequency (per million words)", main="First Person Plural Pronouns (PPC Facebook dataset)") # Now both on the same plot: yrange = range(c(allFPS,allFPP)) plot(Ages, allFPS, type="b", col="magenta", pch="I", xlab="Age", ylab="Frequency (per million words)", ylim=yrange, main="First Person Pronouns (PPC Facebook dataset)") points(Ages, allFPP, type="b", col="forestgreen", pch="W") # Now the ratio: plot(Ages, allFPS/allFPP, type="b", pch="E", col="red", xlab="Ages", ylab="Ratio", main="First person singular / First person plural pronouns\n\"me/we ratio\"") ############################################################### # Let's pull out the frequency of masculine vs. feminine pronoun references # for male vs. female Facebookers of different ages: M2M = Pronouns[IsMale,"TPM"] # males referring to males M2F = Pronouns[IsMale,"TPF"] # males referring to females F2M = Pronouns[IsFemale,"TPM"] # females referring to males F2F = Pronouns[IsFemale,"TPF"] # females referring to females # # Now we plot the rate of same-sex vs. cross-sex reference # as a function of age: SameSexRef = (M2M+F2F)/2 CrossSexRef = (M2F+F2M)/2 yrange = range(c(SameSexRef,CrossSexRef)) plot(Ages, SameSexRef, type="b", pch="=", col="blue", xlab="Age", ylab="Frequency (per million words)", ylim=yrange, main="Same Sex reference (blue =) and Cross Sex reference (red x)") points(Ages, CrossSexRef, type="b", pch="x", col="red")