########################################################################## ## Preliminaries ########################################################################## ## Load necessary packages library(languageR) library(Design) ## Get data into R. The perils here are to make sure everything is tab separated and that you designate tab ## as the separator. ## A caution on the input file: you have to put "NA" for every missing value in the input file. ## Also, whenever you take a mean or some other expression that would be affected by missing values, you must add ## the expression na.rm=TRUE so that it will compute means ignoring these values. MyData=read.table("C:/251Simulations/HajRossWithR/HajRossRInput.txt", header=T, sep="\t") head(MyData) colnames(MyData) ########################################################################## ## Basic descriptive statistics ########################################################################## ## This prints out a list of all the subject ID's levels(MyData$Word) levels(Stressed) ## The following give you 2 x 2 tables allowing you to eyeball an effect size xtabs( ~ Stressed + IsNonCoronal, data = MyData) xtabs( ~ Stressed + IsObstruent, data = MyData) xtabs( ~ Stressed + Arab, data = MyData) Arab Stressed 0 1 0 4945 1252 1 479 11 ## Combos with Arab xtabs( ~ Stressed + IsNoncoronal + Arab, data = MyData) xtabs( ~ Stressed + IsObstruent + Arab, data = MyData) ########################################################################## ## Statistical testing with linear mixed effects model ########################################################################## ## We want to model the data in a way that suitably discounts for idiosyncracies of individual suffixes. ## The latter are "random effects" and the ones we care about are "fixed effects". ## We'll start by creating a series of very simple regression models. ## Note that flunking this test is not conclusive: a fixed effect might actually perform better in concert with ## other fixed effects. MyLMer = lmer(Stressed ~ Arab + (1|Suffix), data=MyData, family=binomial) MyLMer Estimate Std. Error z value Pr(>|z|) (Intercept) -4.8585 1.4106 -3.444 0.000573 *** Arab -2.5281 0.3074 -8.224 < 2e-16 *** MyLMer = lmer(Stressed ~ IsObstruent + (1|Suffix), data=MyData, family=binomial) MyLMer Estimate Std. Error z value Pr(>|z|) (Intercept) -5.78421 1.56342 -3.70 0.000216 *** IsObstruent 1.05514 0.09838 10.72 < 2e-16 *** MyLMer = lmer(Stressed ~ IsNonCoronal + (1|Suffix), data=MyData, family=binomial) MyLMer Estimate Std. Error z value Pr(>|z|) (Intercept) -5.3159 1.4443 -3.681 0.000233 *** IsNonCoronal 0.7205 0.1230 5.859 4.65e-09 *** ## A kitchen sink model MyLMerKitchenSink = lmer(Stressed ~ IsNonCoronal + IsObstruent + Arab + (1|Suffix), data=MyData, family=binomial) MyLMerKitchenSink Estimate Std. Error z value Pr(>|z|) (Intercept) -6.0937 1.6057 -3.795 0.000148 *** IsNonCoronal 1.2124 0.1342 9.037 < 2e-16 *** IsObstruent 1.1308 0.1018 11.108 < 2e-16 *** Arab -2.7474 0.3106 -8.845 < 2e-16 *** ## Are there interactions of noncoronality and obstruency? MyLMerGrandKitchenSink = lmer(Stressed ~ IsNonCoronal * IsObstruent + Arab + (1|Suffix), data=MyData, family=binomial) MyLMerGrandKitchenSink Estimate Std. Error z value Pr(>|z|) (Intercept) -6.2945 1.6596 -3.793 0.000149 *** IsNonCoronal 0.4419 0.2066 2.139 0.032439 * IsObstruent 0.8611 0.1103 7.806 5.90e-15 *** Arab -2.8001 0.3165 -8.846 < 2e-16 *** IsNonCoronal:IsObstruent 1.7057 0.2939 5.804 6.47e-09 *** ## Yes indeed, there is a greater-than-ganging effect here. Indeed, noncoronality seems to have its main role among the obstruents. anova(MyLMerKitchenSink,MyLMerGrandKitchenSink) ## Look for three-way interactions MyLMerGrandKitchenSinkObsessive = lmer(Stressed ~ IsNonCoronal * IsObstruent * Arab + (1|Suffix), data=MyData, family=binomial) MyLMerGrandKitchenSinkObsessive Estimate Std. Error z value Pr(>|z|) (Intercept) -6.3665 1.6766 -3.797 0.000146 *** IsNonCoronal 0.4897 0.2105 2.326 0.019996 * IsObstruent 0.8975 0.1118 8.028 9.87e-16 *** Arab -1.6936 0.3893 -4.350 1.36e-05 *** IsNonCoronal:IsObstruent 1.7682 0.3074 5.752 8.84e-09 *** IsNonCoronal:Arab -1.5283 1.0933 -1.398 0.162154 IsObstruent:Arab -1.5968 0.8139 -1.962 0.049783 * IsNonCoronal:IsObstruent:Arab 0.4786 1.6650 0.287 0.773750 anova(MyLMerGrandKitchenSinkObsessive,MyLMerGrandKitchenSink) Df AIC BIC logLik Chisq Chi Df Pr(>Chisq) MyLMerGrandKitchenSink 6 2861.0 2901.9 -1424.5 MyLMerGrandKitchenSinkObsessive 9 2856.8 2918.0 -1419.4 10.257 3 0.0165 * ## This command gives the random effects. ranef(MyLMerGrandKitchenSink) (Intercept) al 0.4111798 ar 8.0498690 ian -1.2937365 ic -0.7802352 ish -1.1727162 ive -0.6209391 NoSuffix 3.8458712 ous -2.3107557 MyFitted=fitted(MyLMerGrandKitchenSink) MyFitted write.csv(MyFitted, "C:/x.txt", sep="\t") ########################################################################## ## Directory of functions ########################################################################## ## use this time-consuming routine to get significance values for a regular linear mixed effects model. It does ## a Monte Carlo computation pvals.fnc(MyLMer) ## use this to compare two models of the same data anova(MySignatureLMer, MyDiminishedLMer) ## This command gives the random effects in a linear mixed effects model ranef(MyLMerGrandKitchenSink) ##This doesn't work. fitted(MyLMerGrandKitchenSink) ########################################################################## ##Modeling continuous data ########################################################################## ## It's still lmer(), but you leave out the tag family=binomial. ## Also, computing significance values is harder. There's no standard formula and you have to use a Monte Carlo ## simulation. This is available. MyLMer = lmer(Tapped ~ StemFinal + AlveolarStopFollows + Pretonic + PostTonic + (1|SubjectID) + (1|Word), data=MyData) MyPVals = pvals.fnc(MyLMer) MyPVals