#clean environment
remove(list = ls())

#check working directory
getwd() 
## [1] "/Users/naominewmacbook/Desktop/Post-Doc I/R Tutorial"
#load in data

#load packages (we won't need all of them today, but it's good practice to always begin by loading in some frequently used packages)
library(lme4)
library(lmerTest)
library(effects)

library(dplyr)
library(plyr)
library(ggplot2)
library(png)
library(tidyverse)

#tell R not to use scientific notation
options(scipen=999)

Redo basic cleaning steps

##subset correct reaction times
correct <- subset(WordImageMatching_Tutorial_May2021, Accuracy == 1)

## log transform RT
correct$logRT <- log(correct$ReactionTime)

##create a vector that only contains observation larger than the mean + 2.5 SD or smaller than the mean - 2.5 SD
outliers <- which(correct$ReactionTime > mean(correct$ReactionTime) + 2.5*sd(correct$ReactionTime) | correct$ReactionTime < mean(correct$ReactionTime) - 2.5*sd(correct$ReactionTime))

## create a data frame contains the same data as "correct" without the values identified as "outliers"
df <- correct[-outliers, ]

In this demonstration, we will analyze the effects of condition (i.e., whether the word was an interlingual homograph), L2 usage (participant level variable) and orthographic frequency per million (item level variable) on correct reaction time. We will begin with a simple lmer model and work our way up to a more complext model.

## Packages

While t-tests, anovas and simple regressions can be run using base R, conducting a complete analysis using LME requires three packages: lme4, lmerTest and effects. The first two are used to calcultate model and output and the last is used to calculate predicitons that you can save as a data frame or plot.

# LMER: Linear Mixed Models

### Model Preparation

As with other stastistical tests, lmer assumes a normal distribution of the DV. So before you can run a model, make sure you inspect you data for normality and transform as needed (i.e., log).

Categorical variables: R will automatically define a reference point in your variable. This means that it will create a default contrast in order to be able to fit the model. It is generally prefered to do this manually, in order to be able to select the contrast coding that best fits your variable. There are different types of coding, depending on how many levels your variable has and what the different levels means (e.g., 0,1; -0.5, 0.5). Here, we will aply the contrast function, contr.sum(), which gives orthogonal contrasts where you compare every level of the variable to the overall mean.

unique(df$CondGen) # double check how many levels your variable has ## [1] "Homograph" "Unique" df$CondGen <- as.factor(df$CondGen) # make sure variable is a factor contrasts(df$CondGen) <- contr.sum(2)/2 # define contrasts

Continuous variables: It is generally recommended that you standardize the continuus variables in a model. You might think of this as “comparing apples to apples”. This is easily done using the scale() function, which can be used inside the model command or outside of the model by defining a new variable. Scale() will calculate the mean and standard deviation of the entire column/variable, then “scale” each element by those values by subtracting the mean and dividing by the sd.

### Components of an LME model

Below is an example of maximally complex model, followed by explanations of each of the components. Note that this is only meant to serve as an example for the syntax used to construct a maximal random effects structure. The model does not converge so the results are not reliable.

ComplexMod <-lmer(logRT ~  CondGen*scale(L2PercentUse)*scale(FreqMillion) + (1 |Participant) + (1 | Item), data=df, REML=FALSE)
summary(ComplexMod)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: logRT ~ CondGen * scale(L2PercentUse) * scale(FreqMillion) +
##     (1 | Participant) + (1 | Item)
##    Data: df
##
##      AIC      BIC   logLik deviance df.resid
##   2895.0   2967.3  -1436.5   2873.0     5302
##
## Scaled residuals:
##     Min      1Q  Median      3Q     Max
## -3.5980 -0.6862 -0.0854  0.5987  4.3006
##
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Item        (Intercept) 0.01003  0.1002
##  Participant (Intercept) 0.02494  0.1579
##  Residual                0.09315  0.3052
## Number of obs: 5313, groups:  Item, 169; Participant, 48
##
## Fixed effects:
##                                                     Estimate   Std. Error
## (Intercept)                                        6.3213137    0.0245523
## CondGen1                                           0.1989606    0.0182459
## scale(L2PercentUse)                                0.0061618    0.0231858
## scale(FreqMillion)                                -0.0039902    0.0077017
## CondGen1:scale(L2PercentUse)                      -0.0061125    0.0091682
## CondGen1:scale(FreqMillion)                        0.0037276    0.0153965
## scale(L2PercentUse):scale(FreqMillion)             0.0001979    0.0043969
## CondGen1:scale(L2PercentUse):scale(FreqMillion)    0.0177617    0.0087939
##                                                           df t value
## (Intercept)                                       59.9332473 257.463
## CondGen1                                         174.4193531  10.904
## scale(L2PercentUse)                               48.5474366   0.266
## scale(FreqMillion)                               273.6373342  -0.518
## CondGen1:scale(L2PercentUse)                    5117.2224657  -0.667
## CondGen1:scale(FreqMillion)                      273.5224874   0.242
## scale(L2PercentUse):scale(FreqMillion)          5105.7500471   0.045
## CondGen1:scale(L2PercentUse):scale(FreqMillion) 5105.6423127   2.020
##                                                            Pr(>|t|)
## (Intercept)                                     <0.0000000000000002 ***
## CondGen1                                        <0.0000000000000002 ***
## scale(L2PercentUse)                                          0.7916
## scale(FreqMillion)                                           0.6048
## CondGen1:scale(L2PercentUse)                                 0.5050
## CondGen1:scale(FreqMillion)                                  0.8089
## scale(L2PercentUse):scale(FreqMillion)                       0.9641
## CondGen1:scale(L2PercentUse):scale(FreqMillion)              0.0435 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
##             (Intr) CndGn1 sc(L2PU) sc(FM) CnG1:(L2PU) CG1:(F s(L2PU):
## CondGen1    -0.086
## scl(L2PrcU)  0.006  0.000
## scl(FrqMll) -0.003  0.008  0.000
## CnG1:(L2PU)  0.001  0.002 -0.079   -0.001
## CndGn1:(FM)  0.003 -0.009  0.000   -0.570  0.001
## s(L2PU):(FM  0.001 -0.001 -0.004    0.011  0.021      -0.002
## CG1:(L2PU):  0.000  0.001  0.004   -0.002 -0.020       0.012 -0.342

The first argument is the model formula. The part to the left of the ~ is the DV (same as for anova and regression; in this case correct RT, log transformed). The part to the right of the ~ has two components:

##### Fixed effects:

This is where simple effects and interactions are defined (also similar to an anova or simple regression). Remember, use “+” for main effects and "*" for interactions. If you define an interaction, the output will automatically also list the main effect, so you do not need to list your variables more than once here.

##### Random effects:

This is something that is novel and interesting about lmer models. The chunk “(1 | Participant)” allows individual participants to vary randomly in terms of their intercept (starting RT)

In most psychology/ psycholinguistics experiments, our random intercepts will be item and participant because we assume that there are inherent differences across words/ sentences/ other stimuli and across people

##### Random slopes:

Lmer does not only allow for different starting points for different people (i.e., intercepts), but it also allows for different trajectories (i.e., slopes) based on specific IVs. These are defined before the “|” in the chunk for each random effect. For example, to express that each Participant responds differently to changes in word frequency, we would use “(1 + FreqMillion| Participant)”

It is important to be careful about what random slopes you assign to which random intercepts. This is a very powerful tool, but if used incorrectly you might run the risk of your model not converging or overfitting. If you would like to read more about this, I would recommend Dr. Morgan Sonderegger’s tutorials http://people.linguistics.mcgill.ca/~morgan/book/index.html

##### REML:

The “REML =” argument is optional. The default in lmer is to fit models using the REML (REstricted Maximum Likelihood) criterion. There are good reasons for this, but we often use the likelihood ratio test to compare models based on log-likelihoods, so we should use the Maximum Likelihood (ML) criterion.

### Model comparison: Constructing the best model for your data

As we saw above, the model containing a maximal random effect/ slope structure did not converge. We will now turn to constructing a simpler model that will converge with our data.

Mod <-lmer(logRT ~  CondGen*scale(FreqMillion)*scale(L2PercentUse) + (1 |Participant) + (1 | Item), data=df, REML=FALSE)
summary(Mod)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: logRT ~ CondGen * scale(FreqMillion) * scale(L2PercentUse) +
##     (1 | Participant) + (1 | Item)
##    Data: df
##
##      AIC      BIC   logLik deviance df.resid
##   2895.0   2967.3  -1436.5   2873.0     5302
##
## Scaled residuals:
##     Min      1Q  Median      3Q     Max
## -3.5980 -0.6862 -0.0854  0.5987  4.3006
##
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Item        (Intercept) 0.01003  0.1002
##  Participant (Intercept) 0.02494  0.1579
##  Residual                0.09315  0.3052
## Number of obs: 5313, groups:  Item, 169; Participant, 48
##
## Fixed effects:
##                                                     Estimate   Std. Error
## (Intercept)                                        6.3213137    0.0245523
## CondGen1                                           0.1989606    0.0182459
## scale(FreqMillion)                                -0.0039902    0.0077017
## scale(L2PercentUse)                                0.0061618    0.0231858
## CondGen1:scale(FreqMillion)                        0.0037276    0.0153965
## CondGen1:scale(L2PercentUse)                      -0.0061125    0.0091682
## scale(FreqMillion):scale(L2PercentUse)             0.0001979    0.0043969
## CondGen1:scale(FreqMillion):scale(L2PercentUse)    0.0177617    0.0087939
##                                                           df t value
## (Intercept)                                       59.9332473 257.463
## CondGen1                                         174.4193531  10.904
## scale(FreqMillion)                               273.6373342  -0.518
## scale(L2PercentUse)                               48.5474366   0.266
## CondGen1:scale(FreqMillion)                      273.5224874   0.242
## CondGen1:scale(L2PercentUse)                    5117.2224656  -0.667
## scale(FreqMillion):scale(L2PercentUse)          5105.7500471   0.045
## CondGen1:scale(FreqMillion):scale(L2PercentUse) 5105.6423127   2.020
##                                                            Pr(>|t|)
## (Intercept)                                     <0.0000000000000002 ***
## CondGen1                                        <0.0000000000000002 ***
## scale(FreqMillion)                                           0.6048
## scale(L2PercentUse)                                          0.7916
## CondGen1:scale(FreqMillion)                                  0.8089
## CondGen1:scale(L2PercentUse)                                 0.5050
## scale(FreqMillion):scale(L2PercentUse)                       0.9641
## CondGen1:scale(FreqMillion):scale(L2PercentUse)              0.0435 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
##             (Intr) CndGn1 sc(FM) s(L2PU CnG1:(FM) CG1:(L s(FM):
## CondGen1    -0.086
## scl(FrqMll) -0.003  0.008
## scl(L2PrcU)  0.006  0.000  0.000
## CndGn1:(FM)  0.003 -0.009 -0.570  0.000
## CnG1:(L2PU)  0.001  0.002 -0.001 -0.079  0.001
## s(FM):(L2PU  0.001 -0.001  0.011 -0.004 -0.002     0.021
## CG1:(FM):(L  0.000  0.001 -0.002  0.004  0.012    -0.020 -0.342

Model comparison The model above contains a 3-way interaction that is only marginally significant. Given that our general goal is to find the simplest model that best fits our data, we can try out a model with a two-way interaction that controls for the third variable.

# new model with two-way interaction, controlling for frequency
Mod1 <-lmer(logRT ~  CondGen*scale(FreqMillion) + scale(L2PercentUse) + (1 |Participant) + (1 | Item), data=df, REML=FALSE)
summary(Mod1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: logRT ~ CondGen * scale(FreqMillion) + scale(L2PercentUse) +
##     (1 | Participant) + (1 | Item)
##    Data: df
##
##      AIC      BIC   logLik deviance df.resid
##   2894.1   2946.7  -1439.0   2878.1     5305
##
## Scaled residuals:
##     Min      1Q  Median      3Q     Max
## -3.5872 -0.6855 -0.0854  0.5952  4.3083
##
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Item        (Intercept) 0.01002  0.1001
##  Participant (Intercept) 0.02493  0.1579
##  Residual                0.09324  0.3054
## Number of obs: 5313, groups:  Item, 169; Participant, 48
##
## Fixed effects:
##                               Estimate Std. Error         df t value
## (Intercept)                   6.321332   0.024550  59.921917 257.493
## CondGen1                      0.198940   0.018240 174.497192  10.907
## scale(FreqMillion)           -0.004030   0.007700 273.771760  -0.523
## scale(L2PercentUse)           0.004858   0.023113  47.950313   0.210
## CondGen1:scale(FreqMillion)   0.003368   0.015393 273.656353   0.219
##                                        Pr(>|t|)
## (Intercept)                 <0.0000000000000002 ***
## CondGen1                    <0.0000000000000002 ***
## scale(FreqMillion)                        0.601
## scale(L2PercentUse)                       0.834
## CondGen1:scale(FreqMillion)               0.827
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
##             (Intr) CndGn1 sc(FM) s(L2PU
## CondGen1    -0.086
## scl(FrqMll) -0.003  0.008
## scl(L2PrcU)  0.006  0.000  0.000
## CndGn1:(FM)  0.003 -0.009 -0.569  0.000

The general pattern of results in this model is similar to the previous model (containing the more complex three-way interaction). To decide which model to use, we can compare the slightly simplified model to the model containing the three-way interaction using an anova to see if having the three-way interaction, rather than a two-way interaction significantly improves the fit.

anova(Mod, Mod1)
## Data: df
## Models:
## Mod1: logRT ~ CondGen * scale(FreqMillion) + scale(L2PercentUse) +
## Mod1:     (1 | Participant) + (1 | Item)
## Mod: logRT ~ CondGen * scale(FreqMillion) * scale(L2PercentUse) +
## Mod:     (1 | Participant) + (1 | Item)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## Mod1    8 2894.1 2946.7 -1439.0   2878.1
## Mod    11 2895.0 2967.3 -1436.5   2873.0 5.0966  3     0.1649

This anova suggests that including a three-way interaction does not significantly improve model fit. If this is the case, it’s usually better to go with the simpler model.

Once you have finalized your models, you can plot the predicted effects. Plotting predictions will give you cleaner looking plots as it takes away a lot of the noise that is usually present in raw data (e.g., from other variables or latent variables).

To plot predicitions you must first have R predict values for your DV based on some combination of IVs that you have isolated from your model. This is done using the effect() command. You can generate predictions based on individual variables or an interaction term. Note that for every effect you want to plot, you should run a separate effect() command to generate predictions for that specific effects (otherwise, you might reintroduce noise from other variables than the ones displayed in your plot)

ef <- effect('CondGen', Mod) #this command generates a list of predictions based on the condition*FreqMillion*L2PercentUse interaction that was significant in the model called "Mod1"
## NOTE: CondGen is not a high-order term in the model
## Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
## predictors scale(FreqMillion), scale(L2PercentUse) are one-column matrices that
## were converted to vectors
summary(ef) # before we can create a data frame, we must summarize (i.e., organize) all the values that went in to generating the predictions
##
##  CondGen effect
## CondGen
## Homograph    Unique
##  6.420796  6.221840
##
##  Lower 95 Percent Confidence Limits
## CondGen
## Homograph    Unique
##  6.370915  6.169065
##
##  Upper 95 Percent Confidence Limits
## CondGen
## Homograph    Unique
##  6.470677  6.274614
x <- as.data.frame(ef) # now, we can save a data frame containing values for condition, FreqMillion, L2 Usage and the predictions generated based on their interaction (including SE)
# we can now plot the values in the data file called "x" using a regular ggplot command

Now that we have a new data frame containing the predictions for the effect of Condition, we can use this to plot the effect. A few notes before we do this: - When plotting a three-way interaction that includes more than one continuous variable, you can median split one of them to use this as another categorical grouping variable (under facet_wrap(~) in your ggplot command) - Whenever you log transform your DV for a model, you should remember to use exp() before calling on it (i.e., the fit column) here so that your plot will show the raw data in the original unit of measurement (e.g., RT in ms) - Your axis labels should also indicate that you are plotting predicted data

ggplot(x, aes(x=CondGen, y=exp(fit), group = 0)) +
geom_errorbar(aes(ymin=exp(fit - se), ymax=exp(fit + se)), width=.1) +
geom_line() +
geom_point() +
theme_bw(base_size = 22) + ylab('Predicted Correct Reaction Time') +  xlab('Condition') + theme(legend.position = "bottom", axis.text.x = element_text(size = 22), axis.text.y = element_text(size = 22), axis.title.x= element_text(size = 22), axis.title.y = element_text(size = 22), legend.title = element_text(size = 22), legend.text = element_text(size = 22), legend.key = element_rect(colour = NULL), plot.title=element_text(size = rel(1), lineheight = 1, face = 'bold'), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# GLMER: Generalized Linear Mixed Models

Accuracy data is binary and therefore violated the expectations of an lmer model. However, we can use a generalized linear mixed model, which works very similarly to a linear mixed model

Because we are using all our data (including the incorrect trials), we will use the “WordImageMatching_Tutorial_May2021” data frame and redo the contrast coding.

# redo contrast coding
unique(WordImageMatching_Tutorial_May2021$CondGen) # double check how many levels your variable has ## [1] "Homograph" "Unique" WordImageMatching_Tutorial_May2021$CondGen <- as.factor(WordImageMatching_Tutorial_May2021$CondGen) # make sure variable is a factor contrasts(WordImageMatching_Tutorial_May2021$CondGen) <- contr.sum(2)/2 # define contrasts

#glmer model
Mod2 <-glmer(Accuracy ~  CondGen*scale(FreqMillion) + scale(L2PercentUse) + (1 |Participant) + (1 | Item), data=WordImageMatching_Tutorial_May2021, family = binomial, glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
summary(Mod2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Accuracy ~ CondGen * scale(FreqMillion) + scale(L2PercentUse) +
##     (1 | Participant) + (1 | Item)
##    Data: WordImageMatching_Tutorial_May2021
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000))
##
##      AIC      BIC   logLik deviance df.resid
##   3895.5   3942.7  -1940.8   3881.5     6240
##
## Scaled residuals:
##      Min       1Q   Median       3Q      Max
## -25.8731   0.1147   0.2008   0.3498   1.4998
##
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Item        (Intercept) 1.7949   1.3397
##  Participant (Intercept) 0.1463   0.3825
## Number of obs: 6247, groups:  Item, 169; Participant, 48
##
## Fixed effects:
##                             Estimate Std. Error z value             Pr(>|z|)
## (Intercept)                  3.16082    0.15780  20.030 < 0.0000000000000002
## CondGen1                    -1.70900    0.27724  -6.164       0.000000000708
## scale(FreqMillion)           0.47728    0.13027   3.664             0.000248
## scale(L2PercentUse)          0.07124    0.06973   1.022             0.306935
## CondGen1:scale(FreqMillion)  0.55603    0.26141   2.127             0.033419
##
## (Intercept)                 ***
## CondGen1                    ***
## scale(FreqMillion)          ***
## scale(L2PercentUse)
## CondGen1:scale(FreqMillion) *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
##             (Intr) CndGn1 sc(FM) s(L2PU
## CondGen1    -0.407
## scl(FrqMll)  0.071 -0.015
## scl(L2PrcU)  0.013 -0.003  0.002
## CndGn1:(FM)  0.024  0.048 -0.673  0.001