Stat5: Demo

Veröffentlichungsdatum

13. November 2023

Split-plot ANOVA

Reaktionszeitenbeispiel aus Kapitel 14 von Logan (2010)

# Daten laden
library("readr")

spf <- read_delim("datasets/stat5-8/spf.csv", ";")
# Daten anschauen
head(spf)
# A tibble: 6 × 4
  Signal    VP    Messung Reaktion
  <chr>     <chr> <chr>      <dbl>
1 akustisch VP1   H1             3
2 akustisch VP1   H2             4
3 akustisch VP1   H3             7
4 akustisch VP1   H4             7
5 akustisch VP2   H1             6
6 akustisch VP2   H2             5
# LM mit random intercept = einfaches LMM
spf.aov <- aov(Reaktion ~ Signal * Messung + Error(VP), data = spf)
summary(spf.aov)

Error: VP
          Df Sum Sq Mean Sq F value Pr(>F)
Signal     1  3.125   3.125       2  0.207
Residuals  6  9.375   1.562               

Error: Within
               Df Sum Sq Mean Sq F value   Pr(>F)    
Messung         3 194.50   64.83  127.89 2.52e-12 ***
Signal:Messung  3  19.37    6.46   12.74 0.000105 ***
Residuals      18   9.13    0.51                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interaktion anschauen
interaction.plot(spf$Messung, spf$Signal, spf$Reaktion)

# Nun als LMM
library("nlme")

# Mit random intercept (VP) und random slope (Messung)
spf.lme.1 <- lme(Reaktion ~ Signal * Messung, random = ~ Messung | VP, data = spf)
# Nur random intercept
spf.lme.2 <- lme(Reaktion ~ Signal * Messung, random = ~ 1 | VP, data = spf)
# Modelle anschauen
anova(spf.lme.1)
               numDF denDF   F-value p-value
(Intercept)        1    18 1488.1631  <.0001
Signal             1     6    2.0808  0.1993
Messung            3    18   70.7887  <.0001
Signal:Messung     3    18   11.8592  0.0002
anova(spf.lme.2)
               numDF denDF  F-value p-value
(Intercept)        1    18 591.6800  <.0001
Signal             1     6   2.0000  0.2070
Messung            3    18 127.8904  <.0001
Signal:Messung     3    18  12.7397  0.0001
summary(spf.lme.1)
Linear mixed-effects model fit by REML
  Data: spf 
       AIC      BIC    logLik
  97.63924 120.0223 -29.81962

Random effects:
 Formula: ~Messung | VP
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev    Corr                
(Intercept) 1.0801385 (Intr) MssnH2 MssnH3
MessungH2   0.6455527 -0.717              
MessungH3   0.6455528 -0.837  0.600       
MessungH4   1.3229024 -0.816  0.390  0.878
Residual    0.2886126                     

Fixed effects:  Reaktion ~ Signal * Messung 
                        Value Std.Error DF   t-value p-value
(Intercept)              3.75 0.5590162 18  6.708213  0.0000
Signalvisuell           -2.00 0.7905683  6 -2.529826  0.0447
MessungH2                0.25 0.3818811 18  0.654654  0.5210
MessungH3                3.25 0.3818811 18  8.510501  0.0000
MessungH4                4.25 0.6922184 18  6.139681  0.0000
Signalvisuell:MessungH2  1.00 0.5400614 18  1.851641  0.0806
Signalvisuell:MessungH3  0.50 0.5400615 18  0.925821  0.3668
Signalvisuell:MessungH4  4.00 0.9789446 18  4.086033  0.0007
 Correlation: 
                        (Intr) Sgnlvs MssnH2 MssnH3 MssnH4 Sg:MH2 Sg:MH3
Signalvisuell           -0.707                                          
MessungH2               -0.683  0.483                                   
MessungH3               -0.781  0.552  0.571                            
MessungH4               -0.808  0.571  0.394  0.788                     
Signalvisuell:MessungH2  0.483 -0.683 -0.707 -0.404 -0.279              
Signalvisuell:MessungH3  0.552 -0.781 -0.404 -0.707 -0.557  0.571       
Signalvisuell:MessungH4  0.571 -0.808 -0.279 -0.557 -0.707  0.394  0.788

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-0.86583578 -0.26649517 -0.03263197  0.23603775  0.95285749 

Number of Observations: 32
Number of Groups: 8 
summary(spf.lme.2)
Linear mixed-effects model fit by REML
  Data: spf 
       AIC      BIC    logLik
  89.64876 101.4293 -34.82438

Random effects:
 Formula: ~1 | VP
        (Intercept)  Residual
StdDev:   0.5137012 0.7120003

Fixed effects:  Reaktion ~ Signal * Messung 
                        Value Std.Error DF   t-value p-value
(Intercept)              3.75 0.4389856 18  8.542422  0.0000
Signalvisuell           -2.00 0.6208194  6 -3.221549  0.0181
MessungH2                0.25 0.5034602 18  0.496564  0.6255
MessungH3                3.25 0.5034602 18  6.455326  0.0000
MessungH4                4.25 0.5034602 18  8.441580  0.0000
Signalvisuell:MessungH2  1.00 0.7120003 18  1.404494  0.1772
Signalvisuell:MessungH3  0.50 0.7120003 18  0.702247  0.4915
Signalvisuell:MessungH4  4.00 0.7120003 18  5.617975  0.0000
 Correlation: 
                        (Intr) Sgnlvs MssnH2 MssnH3 MssnH4 Sg:MH2 Sg:MH3
Signalvisuell           -0.707                                          
MessungH2               -0.573  0.405                                   
MessungH3               -0.573  0.405  0.500                            
MessungH4               -0.573  0.405  0.500  0.500                     
Signalvisuell:MessungH2  0.405 -0.573 -0.707 -0.354 -0.354              
Signalvisuell:MessungH3  0.405 -0.573 -0.354 -0.707 -0.354  0.500       
Signalvisuell:MessungH4  0.405 -0.573 -0.354 -0.354 -0.707  0.500  0.500

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.34519292 -0.63943480 -0.06164167  0.41510594  2.15199656 

Number of Observations: 32
Number of Groups: 8 

GLMM

-> Hirschparasitenbeispiel aus Kapitel 13 von Zuur u. a. (2009)

# Daten laden und für GLMM aufbereiten
DeerEcervi <- read_delim("datasets/stat5-8/DeerEcervi.txt", "\t",  col_types = cols("Farm" = "f"))
# Daten anschauen
head(DeerEcervi)
# A tibble: 6 × 4
  Farm    Sex Length Ecervi
  <fct> <dbl>  <dbl>  <dbl>
1 AL        2    164    0  
2 AL        1    216    0  
3 AL        1    208    0  
4 AL        1    206   14.4
5 AL        1    204    0  
6 AL        1    200    0  
summary(DeerEcervi)
      Farm          Sex            Length          Ecervi       
 MO     :209   Min.   :1.000   Min.   : 75.0   Min.   :   0.00  
 CB     : 85   1st Qu.:1.000   1st Qu.:151.0   1st Qu.:   0.00  
 QM     : 60   Median :1.000   Median :163.0   Median :   6.60  
 BA     : 50   Mean   :1.458   Mean   :161.8   Mean   :  45.42  
 PN     : 37   3rd Qu.:2.000   3rd Qu.:174.9   3rd Qu.:  35.79  
 MB     : 34   Max.   :2.000   Max.   :216.0   Max.   :2186.60  
 (Other):351                                                    
# Anzahl Larven hier in Presence/Absence übersetzt
DeerEcervi$Ecervi.01 <- DeerEcervi$Ecervi
DeerEcervi$Ecervi.01[DeerEcervi$Ecervi > 0] <- 1
# Numerische Geschlechtscodierung als Factor
DeerEcervi$fSex <- as.factor(DeerEcervi$Sex)
# Hischlänge standardisieren
DeerEcervi$CLength <- DeerEcervi$Length - mean(DeerEcervi$Length)

-> Nun sind die Daten bereit:

  • Die Parasitenbefalldaten wurden in Parasiten Präsenz/Absenz (1/0) übersetzt.
  • Die Hirschlänge wurde standardisiert, damit der Achsenabschnitt (intercept) des Modells interpretierbar ist, standardisierte entspricht nun der Achsenabschnitt einem durschnittlich langen Hirsch.
# Zunächst als GLM
# Interaktionen mit fFarm nicht berücksichtigt, da zu viele Freiheitsgrade verbraucht würden
DE.glm <- glm(Ecervi.01 ~ CLength * fSex + Farm, family = binomial, data = DeerEcervi)

drop1(DE.glm, test = "Chi")
Single term deletions

Model:
Ecervi.01 ~ CLength * fSex + Farm
             Df Deviance     AIC     LRT  Pr(>Chi)    
<none>            745.50  799.50                      
Farm         23  1003.72 1011.72 258.225 < 2.2e-16 ***
CLength:fSex  1   755.48  807.48   9.984  0.001579 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(DE.glm)

Call:
glm(formula = Ecervi.01 ~ CLength * fSex + Farm, family = binomial, 
    data = DeerEcervi)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.8400  -0.7576   0.3556   0.6431   2.2964  

Coefficients:
                Estimate Std. Error z value Pr(>|z|)    
(Intercept)   -1.796e+00  5.900e-01  -3.044 0.002336 ** 
CLength        4.062e-02  7.132e-03   5.695 1.24e-08 ***
fSex2          6.280e-01  2.292e-01   2.740 0.006150 ** 
FarmAU         3.340e+00  7.841e-01   4.259 2.05e-05 ***
FarmBA         3.510e+00  7.150e-01   4.908 9.19e-07 ***
FarmBE         1.883e+01  6.216e+02   0.030 0.975831    
FarmCB         3.012e+00  6.573e-01   4.583 4.58e-06 ***
FarmCRC       -1.293e+01  2.400e+03  -0.005 0.995701    
FarmHB        -2.364e-01  9.730e-01  -0.243 0.808045    
FarmLN         3.831e+00  8.881e-01   4.314 1.60e-05 ***
FarmMAN        1.046e+00  6.960e-01   1.503 0.132855    
FarmMB         3.693e+00  8.152e-01   4.529 5.91e-06 ***
FarmMO         9.722e-01  5.969e-01   1.629 0.103380    
FarmNC         1.370e+00  6.904e-01   1.985 0.047169 *  
FarmNV         2.098e+00  7.702e-01   2.725 0.006435 ** 
FarmPN         4.185e+00  8.584e-01   4.875 1.09e-06 ***
FarmQM         3.975e+00  7.220e-01   5.506 3.68e-08 ***
FarmRF         4.552e+00  1.050e+00   4.337 1.45e-05 ***
FarmRN         8.706e-01  7.454e-01   1.168 0.242822    
FarmRO         4.555e+00  9.556e-01   4.766 1.88e-06 ***
FarmSAU       -1.545e+01  1.368e+03  -0.011 0.990986    
FarmSE         2.785e+00  7.876e-01   3.536 0.000407 ***
FarmTI         3.900e+00  1.166e+00   3.343 0.000828 ***
FarmTN         3.102e+00  7.665e-01   4.046 5.21e-05 ***
FarmVISO       3.720e+00  1.011e+00   3.679 0.000234 ***
FarmVY         3.974e+00  1.257e+00   3.162 0.001565 ** 
CLength:fSex2  3.618e-02  1.168e-02   3.097 0.001953 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1073.1  on 825  degrees of freedom
Residual deviance:  745.5  on 799  degrees of freedom
AIC: 799.5

Number of Fisher Scoring iterations: 15
anova(DE.glm)
Analysis of Deviance Table

Model: binomial, link: logit

Response: Ecervi.01

Terms added sequentially (first to last)

             Df Deviance Resid. Df Resid. Dev
NULL                           825    1073.13
CLength       1   64.815       824    1008.31
fSex          1    0.191       823    1008.12
Farm         23  252.638       800     755.48
CLength:fSex  1    9.984       799     745.50
# Response curves für die einzelnen Farmen (Weibliche Tiere: fSex = "1" )
plot(DeerEcervi$CLength, DeerEcervi$Ecervi.01,
  xlab = "Length", ylab = "Probability of \
     presence of E. cervi L1"
)
I <- order(DeerEcervi$CLength)
AllFarms <- unique(DeerEcervi$Farm)
for (j in AllFarms) {
  mydata <- data.frame(
    CLength = DeerEcervi$CLength, fSex = "1",
    Farm = j
  )
  n <- dim(mydata)[1]
  if (n > 10) {
    P.DE2 <- predict(DE.glm, mydata, type = "response")
    lines(mydata$CLength[I], P.DE2[I])
  }
}

# glmm
library("MASS")

DE.PQL <- glmmPQL(Ecervi.01 ~ CLength * fSex,
  random = ~ 1 | Farm, family = binomial, data = DeerEcervi
)
summary(DE.PQL)
Linear mixed-effects model fit by maximum likelihood
  Data: DeerEcervi 
  AIC BIC logLik
   NA  NA     NA

Random effects:
 Formula: ~1 | Farm
        (Intercept)  Residual
StdDev:    1.462108 0.9620576

Variance function:
 Structure: fixed weights
 Formula: ~invwt 
Fixed effects:  Ecervi.01 ~ CLength * fSex 
                  Value Std.Error  DF  t-value p-value
(Intercept)   0.8883697 0.3373283 799 2.633547  0.0086
CLength       0.0378608 0.0065269 799 5.800768  0.0000
fSex2         0.6104570 0.2137293 799 2.856216  0.0044
CLength:fSex2 0.0350666 0.0108558 799 3.230228  0.0013
 Correlation: 
              (Intr) CLngth fSex2 
CLength       -0.108              
fSex2         -0.191  0.230       
CLength:fSex2  0.092 -0.522  0.235

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-6.3466592 -0.6387839  0.2978382  0.5218829  3.4912879 

Number of Observations: 826
Number of Groups: 24 
DE.PQL$coefficients[1][1]
$fixed
  (Intercept)       CLength         fSex2 CLength:fSex2 
   0.88836974    0.03786082    0.61045698    0.03506664 
# Modellvoraussagen berechnen
# So könnte direkt auf die coefficients zugegriffen werden: DE.PQL$coefficients[[1]][1]
g <- 0.8883697 + 0.0378608 * DeerEcervi$CLength
# Rücktransformieren aus Logit
p.averageFarm1 <- exp(g) / (1 + exp(g))
# Sortierung der Hirschgrössen für's Plotten
I <- order(DeerEcervi$CLength)
# Plotten
plot(DeerEcervi$CLength, DeerEcervi$Ecervi.01,
  xlab = "Length",
  ylab = "Probability of presence of E. cervi L1"
)
lines(DeerEcervi$CLength[I], p.averageFarm1[I], lwd = 3)
# Vertrauensintervalle (CI) mit SD von Random factor berechnen
# Generell CI = mean + 1.96*SD
p.Upp <- exp(g + 1.96 * 1.462108) / (1 + exp(g + 1.96 * 1.462108))
p.Low <- exp(g - 1.96 * 1.462108) / (1 + exp(g - 1.96 * 1.462108))
lines(DeerEcervi$CLength[I], p.Upp[I])
lines(DeerEcervi$CLength[I], p.Low[I])

# Dasselbe mit dem lme4-Package
library("lme4")
DE.lme4 <- glmer(Ecervi.01 ~ CLength * fSex + (1 | Farm),
  family = binomial, data = DeerEcervi
)
summary(DE.lme4)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: Ecervi.01 ~ CLength * fSex + (1 | Farm)
   Data: DeerEcervi

     AIC      BIC   logLik deviance df.resid 
   832.6    856.1   -411.3    822.6      821 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-6.2678 -0.6090  0.2809  0.5022  3.4546 

Random effects:
 Groups Name        Variance Std.Dev.
 Farm   (Intercept) 2.391    1.546   
Number of obs: 826, groups:  Farm, 24

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)   0.938969   0.356004   2.638  0.00835 ** 
CLength       0.038964   0.006917   5.633 1.77e-08 ***
fSex2         0.624487   0.222938   2.801  0.00509 ** 
CLength:fSex2 0.035859   0.011409   3.143  0.00167 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) CLngth fSex2 
CLength     -0.107              
fSex2       -0.189  0.238       
CLngth:fSx2  0.091 -0.514  0.232
library("glmmML")
DE.glmmML <- glmmML(Ecervi.01 ~ CLength * fSex,
  cluster = Farm, family = binomial, data = DeerEcervi
)
summary(DE.glmmML)

Call:  glmmML(formula = Ecervi.01 ~ CLength * fSex, family = binomial,      data = DeerEcervi, cluster = Farm) 

                 coef se(coef)     z Pr(>|z|)
(Intercept)   0.93968 0.357915 2.625 8.65e-03
CLength       0.03898 0.006956 5.604 2.10e-08
fSex2         0.62451 0.224251 2.785 5.35e-03
CLength:fSex2 0.03586 0.011437 3.135 1.72e-03

Scale parameter in mixing distribution:  1.547 gaussian 
Std. Error:                              0.2975 

        LR p-value for H_0: sigma = 0:  1.346e-41 

Residual deviance: 822.6 on 821 degrees of freedom  AIC: 832.6