#clean environment
remove(list = ls())
#check working directory
getwd()
## [1] "/Users/naominewmacbook/Desktop/Post-Doc I/R Tutorial"
#load in data
WordImageMatching_Tutorial_May2021 <- read.csv("~/Desktop/Post-Doc I/R Tutorial/WordImageMatching_Tutorial_May2021.csv")
#load packages (we won't need all of them today, but it's good practice to always begin by loading in some frequently used packages)
library(lme4)
library(lmerTest)
library(effects)
library(dplyr)
library(plyr)
library(ggplot2)
library(png)
library(tidyverse)
#tell R not to use scientific notation
options(scipen=999)
Redo basic cleaning steps
##subset correct reaction times
correct <- subset(WordImageMatching_Tutorial_May2021, Accuracy == 1)
## log transform RT
correct$logRT <- log(correct$ReactionTime)
##create a vector that only contains observation larger than the mean + 2.5 SD or smaller than the mean - 2.5 SD
outliers <- which(correct$ReactionTime > mean(correct$ReactionTime) + 2.5*sd(correct$ReactionTime) | correct$ReactionTime < mean(correct$ReactionTime) - 2.5*sd(correct$ReactionTime))
## create a data frame contains the same data as "correct" without the values identified as "outliers"
df <- correct[-outliers, ]
In this demonstration, we will analyze the effects of condition (i.e., whether the word was an interlingual homograph), L2 usage (participant level variable) and orthographic frequency per million (item level variable) on correct reaction time. We will begin with a simple lmer model and work our way up to a more complext model.
While t-tests, anovas and simple regressions can be run using base R, conducting a complete analysis using LME requires three packages: lme4, lmerTest and effects. The first two are used to calcultate model and output and the last is used to calculate predicitons that you can save as a data frame or plot.
As with other stastistical tests, lmer assumes a normal distribution of the DV. So before you can run a model, make sure you inspect you data for normality and transform as needed (i.e., log).
Categorical variables: R will automatically define a reference point in your variable. This means that it will create a default contrast in order to be able to fit the model. It is generally prefered to do this manually, in order to be able to select the contrast coding that best fits your variable. There are different types of coding, depending on how many levels your variable has and what the different levels means (e.g., 0,1; -0.5, 0.5). Here, we will aply the contrast function, contr.sum(), which gives orthogonal contrasts where you compare every level of the variable to the overall mean.
unique(df$CondGen) # double check how many levels your variable has
## [1] "Homograph" "Unique"
df$CondGen <- as.factor(df$CondGen) # make sure variable is a factor
contrasts(df$CondGen) <- contr.sum(2)/2 # define contrasts
Continuous variables: It is generally recommended that you standardize the continuus variables in a model. You might think of this as “comparing apples to apples”. This is easily done using the scale() function, which can be used inside the model command or outside of the model by defining a new variable. Scale() will calculate the mean and standard deviation of the entire column/variable, then “scale” each element by those values by subtracting the mean and dividing by the sd.
Below is an example of maximally complex model, followed by explanations of each of the components. Note that this is only meant to serve as an example for the syntax used to construct a maximal random effects structure. The model does not converge so the results are not reliable.
ComplexMod <-lmer(logRT ~ CondGen*scale(L2PercentUse)*scale(FreqMillion) + (1 |Participant) + (1 | Item), data=df, REML=FALSE)
summary(ComplexMod)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: logRT ~ CondGen * scale(L2PercentUse) * scale(FreqMillion) +
## (1 | Participant) + (1 | Item)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 2895.0 2967.3 -1436.5 2873.0 5302
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5980 -0.6862 -0.0854 0.5987 4.3006
##
## Random effects:
## Groups Name Variance Std.Dev.
## Item (Intercept) 0.01003 0.1002
## Participant (Intercept) 0.02494 0.1579
## Residual 0.09315 0.3052
## Number of obs: 5313, groups: Item, 169; Participant, 48
##
## Fixed effects:
## Estimate Std. Error
## (Intercept) 6.3213137 0.0245523
## CondGen1 0.1989606 0.0182459
## scale(L2PercentUse) 0.0061618 0.0231858
## scale(FreqMillion) -0.0039902 0.0077017
## CondGen1:scale(L2PercentUse) -0.0061125 0.0091682
## CondGen1:scale(FreqMillion) 0.0037276 0.0153965
## scale(L2PercentUse):scale(FreqMillion) 0.0001979 0.0043969
## CondGen1:scale(L2PercentUse):scale(FreqMillion) 0.0177617 0.0087939
## df t value
## (Intercept) 59.9332473 257.463
## CondGen1 174.4193531 10.904
## scale(L2PercentUse) 48.5474366 0.266
## scale(FreqMillion) 273.6373342 -0.518
## CondGen1:scale(L2PercentUse) 5117.2224657 -0.667
## CondGen1:scale(FreqMillion) 273.5224874 0.242
## scale(L2PercentUse):scale(FreqMillion) 5105.7500471 0.045
## CondGen1:scale(L2PercentUse):scale(FreqMillion) 5105.6423127 2.020
## Pr(>|t|)
## (Intercept) <0.0000000000000002 ***
## CondGen1 <0.0000000000000002 ***
## scale(L2PercentUse) 0.7916
## scale(FreqMillion) 0.6048
## CondGen1:scale(L2PercentUse) 0.5050
## CondGen1:scale(FreqMillion) 0.8089
## scale(L2PercentUse):scale(FreqMillion) 0.9641
## CondGen1:scale(L2PercentUse):scale(FreqMillion) 0.0435 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndGn1 sc(L2PU) sc(FM) CnG1:(L2PU) CG1:(F s(L2PU):
## CondGen1 -0.086
## scl(L2PrcU) 0.006 0.000
## scl(FrqMll) -0.003 0.008 0.000
## CnG1:(L2PU) 0.001 0.002 -0.079 -0.001
## CndGn1:(FM) 0.003 -0.009 0.000 -0.570 0.001
## s(L2PU):(FM 0.001 -0.001 -0.004 0.011 0.021 -0.002
## CG1:(L2PU): 0.000 0.001 0.004 -0.002 -0.020 0.012 -0.342
The first argument is the model formula. The part to the left of the ~ is the DV (same as for anova and regression; in this case correct RT, log transformed). The part to the right of the ~ has two components:
This is where simple effects and interactions are defined (also similar to an anova or simple regression). Remember, use “+” for main effects and "*" for interactions. If you define an interaction, the output will automatically also list the main effect, so you do not need to list your variables more than once here.
This is something that is novel and interesting about lmer models. The chunk “(1 | Participant)” allows individual participants to vary randomly in terms of their intercept (starting RT)
In most psychology/ psycholinguistics experiments, our random intercepts will be item and participant because we assume that there are inherent differences across words/ sentences/ other stimuli and across people
Lmer does not only allow for different starting points for different people (i.e., intercepts), but it also allows for different trajectories (i.e., slopes) based on specific IVs. These are defined before the “|” in the chunk for each random effect. For example, to express that each Participant responds differently to changes in word frequency, we would use “(1 + FreqMillion| Participant)”
It is important to be careful about what random slopes you assign to which random intercepts. This is a very powerful tool, but if used incorrectly you might run the risk of your model not converging or overfitting. If you would like to read more about this, I would recommend Dr. Morgan Sonderegger’s tutorials http://people.linguistics.mcgill.ca/~morgan/book/index.html
The “REML =” argument is optional. The default in lmer is to fit models using the REML (REstricted Maximum Likelihood) criterion. There are good reasons for this, but we often use the likelihood ratio test to compare models based on log-likelihoods, so we should use the Maximum Likelihood (ML) criterion.
As we saw above, the model containing a maximal random effect/ slope structure did not converge. We will now turn to constructing a simpler model that will converge with our data.
Mod <-lmer(logRT ~ CondGen*scale(FreqMillion)*scale(L2PercentUse) + (1 |Participant) + (1 | Item), data=df, REML=FALSE)
summary(Mod)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: logRT ~ CondGen * scale(FreqMillion) * scale(L2PercentUse) +
## (1 | Participant) + (1 | Item)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 2895.0 2967.3 -1436.5 2873.0 5302
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5980 -0.6862 -0.0854 0.5987 4.3006
##
## Random effects:
## Groups Name Variance Std.Dev.
## Item (Intercept) 0.01003 0.1002
## Participant (Intercept) 0.02494 0.1579
## Residual 0.09315 0.3052
## Number of obs: 5313, groups: Item, 169; Participant, 48
##
## Fixed effects:
## Estimate Std. Error
## (Intercept) 6.3213137 0.0245523
## CondGen1 0.1989606 0.0182459
## scale(FreqMillion) -0.0039902 0.0077017
## scale(L2PercentUse) 0.0061618 0.0231858
## CondGen1:scale(FreqMillion) 0.0037276 0.0153965
## CondGen1:scale(L2PercentUse) -0.0061125 0.0091682
## scale(FreqMillion):scale(L2PercentUse) 0.0001979 0.0043969
## CondGen1:scale(FreqMillion):scale(L2PercentUse) 0.0177617 0.0087939
## df t value
## (Intercept) 59.9332473 257.463
## CondGen1 174.4193531 10.904
## scale(FreqMillion) 273.6373342 -0.518
## scale(L2PercentUse) 48.5474366 0.266
## CondGen1:scale(FreqMillion) 273.5224874 0.242
## CondGen1:scale(L2PercentUse) 5117.2224656 -0.667
## scale(FreqMillion):scale(L2PercentUse) 5105.7500471 0.045
## CondGen1:scale(FreqMillion):scale(L2PercentUse) 5105.6423127 2.020
## Pr(>|t|)
## (Intercept) <0.0000000000000002 ***
## CondGen1 <0.0000000000000002 ***
## scale(FreqMillion) 0.6048
## scale(L2PercentUse) 0.7916
## CondGen1:scale(FreqMillion) 0.8089
## CondGen1:scale(L2PercentUse) 0.5050
## scale(FreqMillion):scale(L2PercentUse) 0.9641
## CondGen1:scale(FreqMillion):scale(L2PercentUse) 0.0435 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndGn1 sc(FM) s(L2PU CnG1:(FM) CG1:(L s(FM):
## CondGen1 -0.086
## scl(FrqMll) -0.003 0.008
## scl(L2PrcU) 0.006 0.000 0.000
## CndGn1:(FM) 0.003 -0.009 -0.570 0.000
## CnG1:(L2PU) 0.001 0.002 -0.001 -0.079 0.001
## s(FM):(L2PU 0.001 -0.001 0.011 -0.004 -0.002 0.021
## CG1:(FM):(L 0.000 0.001 -0.002 0.004 0.012 -0.020 -0.342
Model comparison The model above contains a 3-way interaction that is only marginally significant. Given that our general goal is to find the simplest model that best fits our data, we can try out a model with a two-way interaction that controls for the third variable.
# new model with two-way interaction, controlling for frequency
Mod1 <-lmer(logRT ~ CondGen*scale(FreqMillion) + scale(L2PercentUse) + (1 |Participant) + (1 | Item), data=df, REML=FALSE)
summary(Mod1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: logRT ~ CondGen * scale(FreqMillion) + scale(L2PercentUse) +
## (1 | Participant) + (1 | Item)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 2894.1 2946.7 -1439.0 2878.1 5305
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5872 -0.6855 -0.0854 0.5952 4.3083
##
## Random effects:
## Groups Name Variance Std.Dev.
## Item (Intercept) 0.01002 0.1001
## Participant (Intercept) 0.02493 0.1579
## Residual 0.09324 0.3054
## Number of obs: 5313, groups: Item, 169; Participant, 48
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 6.321332 0.024550 59.921917 257.493
## CondGen1 0.198940 0.018240 174.497192 10.907
## scale(FreqMillion) -0.004030 0.007700 273.771760 -0.523
## scale(L2PercentUse) 0.004858 0.023113 47.950313 0.210
## CondGen1:scale(FreqMillion) 0.003368 0.015393 273.656353 0.219
## Pr(>|t|)
## (Intercept) <0.0000000000000002 ***
## CondGen1 <0.0000000000000002 ***
## scale(FreqMillion) 0.601
## scale(L2PercentUse) 0.834
## CondGen1:scale(FreqMillion) 0.827
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndGn1 sc(FM) s(L2PU
## CondGen1 -0.086
## scl(FrqMll) -0.003 0.008
## scl(L2PrcU) 0.006 0.000 0.000
## CndGn1:(FM) 0.003 -0.009 -0.569 0.000
The general pattern of results in this model is similar to the previous model (containing the more complex three-way interaction). To decide which model to use, we can compare the slightly simplified model to the model containing the three-way interaction using an anova to see if having the three-way interaction, rather than a two-way interaction significantly improves the fit.
anova(Mod, Mod1)
## Data: df
## Models:
## Mod1: logRT ~ CondGen * scale(FreqMillion) + scale(L2PercentUse) +
## Mod1: (1 | Participant) + (1 | Item)
## Mod: logRT ~ CondGen * scale(FreqMillion) * scale(L2PercentUse) +
## Mod: (1 | Participant) + (1 | Item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## Mod1 8 2894.1 2946.7 -1439.0 2878.1
## Mod 11 2895.0 2967.3 -1436.5 2873.0 5.0966 3 0.1649
This anova suggests that including a three-way interaction does not significantly improve model fit. If this is the case, it’s usually better to go with the simpler model.
Once you have finalized your models, you can plot the predicted effects. Plotting predictions will give you cleaner looking plots as it takes away a lot of the noise that is usually present in raw data (e.g., from other variables or latent variables).
To plot predicitions you must first have R predict values for your DV based on some combination of IVs that you have isolated from your model. This is done using the effect() command. You can generate predictions based on individual variables or an interaction term. Note that for every effect you want to plot, you should run a separate effect() command to generate predictions for that specific effects (otherwise, you might reintroduce noise from other variables than the ones displayed in your plot)
ef <- effect('CondGen', Mod) #this command generates a list of predictions based on the condition*FreqMillion*L2PercentUse interaction that was significant in the model called "Mod1"
## NOTE: CondGen is not a high-order term in the model
## Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
## predictors scale(FreqMillion), scale(L2PercentUse) are one-column matrices that
## were converted to vectors
summary(ef) # before we can create a data frame, we must summarize (i.e., organize) all the values that went in to generating the predictions
##
## CondGen effect
## CondGen
## Homograph Unique
## 6.420796 6.221840
##
## Lower 95 Percent Confidence Limits
## CondGen
## Homograph Unique
## 6.370915 6.169065
##
## Upper 95 Percent Confidence Limits
## CondGen
## Homograph Unique
## 6.470677 6.274614
x <- as.data.frame(ef) # now, we can save a data frame containing values for condition, FreqMillion, L2 Usage and the predictions generated based on their interaction (including SE)
# we can now plot the values in the data file called "x" using a regular ggplot command
Now that we have a new data frame containing the predictions for the effect of Condition, we can use this to plot the effect. A few notes before we do this: - When plotting a three-way interaction that includes more than one continuous variable, you can median split one of them to use this as another categorical grouping variable (under facet_wrap(~) in your ggplot command) - Whenever you log transform your DV for a model, you should remember to use exp() before calling on it (i.e., the fit column) here so that your plot will show the raw data in the original unit of measurement (e.g., RT in ms) - Your axis labels should also indicate that you are plotting predicted data
ggplot(x, aes(x=CondGen, y=exp(fit), group = 0)) +
geom_errorbar(aes(ymin=exp(fit - se), ymax=exp(fit + se)), width=.1) +
geom_line() +
geom_point() +
theme_bw(base_size = 22) + ylab('Predicted Correct Reaction Time') + xlab('Condition') + theme(legend.position = "bottom", axis.text.x = element_text(size = 22), axis.text.y = element_text(size = 22), axis.title.x= element_text(size = 22), axis.title.y = element_text(size = 22), legend.title = element_text(size = 22), legend.text = element_text(size = 22), legend.key = element_rect(colour = NULL), plot.title=element_text(size = rel(1), lineheight = 1, face = 'bold'), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# GLMER: Generalized Linear Mixed Models
Accuracy data is binary and therefore violated the expectations of an lmer model. However, we can use a generalized linear mixed model, which works very similarly to a linear mixed model
Because we are using all our data (including the incorrect trials), we will use the “WordImageMatching_Tutorial_May2021” data frame and redo the contrast coding.
# redo contrast coding
unique(WordImageMatching_Tutorial_May2021$CondGen) # double check how many levels your variable has
## [1] "Homograph" "Unique"
WordImageMatching_Tutorial_May2021$CondGen <- as.factor(WordImageMatching_Tutorial_May2021$CondGen) # make sure variable is a factor
contrasts(WordImageMatching_Tutorial_May2021$CondGen) <- contr.sum(2)/2 # define contrasts
#glmer model
Mod2 <-glmer(Accuracy ~ CondGen*scale(FreqMillion) + scale(L2PercentUse) + (1 |Participant) + (1 | Item), data=WordImageMatching_Tutorial_May2021, family = binomial, glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
summary(Mod2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Accuracy ~ CondGen * scale(FreqMillion) + scale(L2PercentUse) +
## (1 | Participant) + (1 | Item)
## Data: WordImageMatching_Tutorial_May2021
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000))
##
## AIC BIC logLik deviance df.resid
## 3895.5 3942.7 -1940.8 3881.5 6240
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -25.8731 0.1147 0.2008 0.3498 1.4998
##
## Random effects:
## Groups Name Variance Std.Dev.
## Item (Intercept) 1.7949 1.3397
## Participant (Intercept) 0.1463 0.3825
## Number of obs: 6247, groups: Item, 169; Participant, 48
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.16082 0.15780 20.030 < 0.0000000000000002
## CondGen1 -1.70900 0.27724 -6.164 0.000000000708
## scale(FreqMillion) 0.47728 0.13027 3.664 0.000248
## scale(L2PercentUse) 0.07124 0.06973 1.022 0.306935
## CondGen1:scale(FreqMillion) 0.55603 0.26141 2.127 0.033419
##
## (Intercept) ***
## CondGen1 ***
## scale(FreqMillion) ***
## scale(L2PercentUse)
## CondGen1:scale(FreqMillion) *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) CndGn1 sc(FM) s(L2PU
## CondGen1 -0.407
## scl(FrqMll) 0.071 -0.015
## scl(L2PrcU) 0.013 -0.003 0.002
## CndGn1:(FM) 0.024 0.048 -0.673 0.001