library("readr")
library("ggplot2")
library("dplyr")Statistik 6: Demo
Demoscript herunterladen (.qmd)
- Datensatz rm_plants.csv
- Datensatz DeerEcervi.csv
Repeated measurement
Es handelt sich um einen Düngeversuch (Daten aus Lepš & Šmilauer 2020). 18 Pflanzenindividuen wurden zufällig einer von drei Düngevarianten zugewiesen und dabei zu vier Zeitpunkten ihre Wuchshöhe gemessen. Abhängigkeiten sind hier in zwei Aspekten vorhanden: einerseits wurde jedes Pflanzenindividuum mehrfach gemessen, andererseits hat es nur jeweils eine Düngervariante erhalten.
# Daten laden
plantf <- read_delim("datasets/stat/rm_plants.csv", delim = ";") |>
mutate( across(where(is.character), as.factor))str(plantf)tibble [72 × 4] (S3: tbl_df/tbl/data.frame)
$ PlantHeight: num [1:72] 5 7 9 11 6 9 12 15 7 8 ...
$ Treatment : Factor w/ 3 levels "Nitrogen","Phosphorus",..: 3 3 3 3 1 1 1 1 2 2 ...
$ Time : Factor w/ 4 levels "T0","T1","T2",..: 1 2 3 4 1 2 3 4 1 2 ...
$ PlantID : Factor w/ 18 levels "P01","P02","P03",..: 1 1 1 1 2 2 2 2 3 3 ...
summary(plantf) PlantHeight Treatment Time PlantID
Min. : 4.000 Nitrogen :24 T0:18 P01 : 4
1st Qu.: 6.000 Phosphorus:24 T1:18 P02 : 4
Median : 9.000 Water :24 T2:18 P03 : 4
Mean : 9.306 T3:18 P04 : 4
3rd Qu.:11.000 P05 : 4
Max. :18.000 P06 : 4
(Other):48
table(plantf$Treatment, plantf$PlantID)
P01 P02 P03 P04 P05 P06 P07 P08 P09 P10 P11 P12 P13 P14 P15 P16
Nitrogen 0 4 0 0 4 0 0 4 0 0 4 0 0 4 0 0
Phosphorus 0 0 4 0 0 4 0 0 4 0 0 4 0 0 4 0
Water 4 0 0 4 0 0 4 0 0 4 0 0 4 0 0 4
P17 P18
Nitrogen 4 0
Phosphorus 0 4
Water 0 0
# Mit aov
pf_eaov <- aov(PlantHeight ~ Treatment * Time + Error(PlantID), data = plantf)
summary(pf_eaov)
Error: PlantID
Df Sum Sq Mean Sq F value Pr(>F)
Treatment 2 115.36 57.68 21.68 3.76e-05 ***
Residuals 15 39.92 2.66
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
Time 3 588.1 196.02 382.13 < 2e-16 ***
Treatment:Time 6 64.9 10.81 21.07 1.37e-11 ***
Residuals 45 23.1 0.51
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Als LMM
library("glmmTMB")
pf_lmm <- glmmTMB(PlantHeight ~ Treatment * Time + (1 | PlantID),
family = gaussian,
REML = TRUE,
data = plantf)
library("car")
Anova(pf_lmm)Analysis of Deviance Table (Type II Wald chisquare tests)
Response: PlantHeight
Chisq Df Pr(>Chisq)
Treatment 43.351 2 3.859e-10 ***
Time 1146.390 3 < 2.2e-16 ***
Treatment:Time 126.444 6 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(pf_lmm) Family: gaussian ( identity )
Formula: PlantHeight ~ Treatment * Time + (1 | PlantID)
Data: plantf
AIC BIC logLik -2*log(L) df.resid
204.4 236.3 -88.2 176.4 58
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
PlantID (Intercept) 0.537 0.7328
Residual 0.513 0.7162
Number of obs: 72, groups: PlantID, 18
Dispersion estimate for gaussian family (sigma^2): 0.513
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 5.1667 0.4183 12.351 < 2e-16 ***
TreatmentPhosphorus 0.6667 0.5916 1.127 0.259796
TreatmentWater 0.1667 0.5916 0.282 0.778160
TimeT1 4.3333 0.4135 10.479 < 2e-16 ***
TimeT2 7.6667 0.4135 18.541 < 2e-16 ***
TimeT3 11.3333 0.4135 27.408 < 2e-16 ***
TreatmentPhosphorus:TimeT1 -2.1667 0.5848 -3.705 0.000211 ***
TreatmentWater:TimeT1 -2.8333 0.5848 -4.845 1.27e-06 ***
TreatmentPhosphorus:TimeT2 -3.6667 0.5848 -6.270 3.61e-10 ***
TreatmentWater:TimeT2 -4.1667 0.5848 -7.125 1.04e-12 ***
TreatmentPhosphorus:TimeT3 -5.0000 0.5848 -8.550 < 2e-16 ***
TreatmentWater:TimeT3 -5.8333 0.5848 -9.975 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library("performance")
r2(pf_lmm)# R2 for Mixed Models
Conditional R2: 0.957
Marginal R2: 0.912
# Modellvalidierung mit DHARMa
library("DHARMa")
set.seed(123)
simulationOutput <-
simulateResiduals(fittedModel = pf_lmm, plot = TRUE, n = 1000)
# Plot residuals vs. covariates des models
plotResiduals(simulationOutput, form = plantf$Time)
plotResiduals(simulationOutput, form = plantf$Treatment) 
# Darstellung (Interaktions Plot)
library("ggeffects")
pred <- ggpredict(pf_lmm, terms = c("Time", "Treatment"))
pred# Predicted values of PlantHeight
Treatment: Nitrogen
Time | Predicted | 95% CI
-------------------------------
T0 | 5.17 | 4.59, 5.74
T1 | 9.50 | 8.93, 10.07
T2 | 12.83 | 12.26, 13.41
T3 | 16.50 | 15.93, 17.07
Treatment: Phosphorus
Time | Predicted | 95% CI
-------------------------------
T0 | 5.83 | 5.26, 6.41
T1 | 8.00 | 7.43, 8.57
T2 | 9.83 | 9.26, 10.41
T3 | 12.17 | 11.59, 12.74
Treatment: Water
Time | Predicted | 95% CI
-------------------------------
T0 | 5.33 | 4.76, 5.91
T1 | 6.83 | 6.26, 7.41
T2 | 8.83 | 8.26, 9.41
T3 | 10.83 | 10.26, 11.41
Adjusted for:
* PlantID = NA (population-level)
plot(pred) +
theme_classic() +
theme(plot.title = element_blank())
Split-plot Design
Es wurden zufällig fünf Untersuchungsgebiete (sites) ausgewählt, in denen jeweils der gleiche Versuchsblock implementiert wurde, bestehend aus dem Management (treatment) und der Frage, ob wilde Grossherbivoren Zugang zur Fläche hatten (plot type) (Abb. 6.1). Je ein Drittel jeder Untersuchungsfläche (zufällig zugewiesen) blieb gänzlich ungenutzt(U), wurde zu Beginn kontrolliert abgebrannt und danach sich selbst überlassen (B) oder wurde jährlich gemäht (M). Innerhalb jedes Drittels wiederum wurde (zufällig zugewiesen) die Hälfte eingezäunt (F = fenced vs. O = open), um die Beweidung durch Grossherbivoren zu verhindern.
# Import Data
glex <-
read_delim("datasets/stat/Riesch_et_al_2020_grassland.csv", delim = ";") |>
mutate(across(where(is.character), as.factor))str(glex)tibble [60 × 9] (S3: tbl_df/tbl/data.frame)
$ ID_plot : Factor w/ 60 levels "Eul_A1_MP_14",..: 1 2 3 4 5 6 7 8 9 10 ...
$ year : num [1:60] 2014 2018 2014 2018 2014 ...
$ site_code : Factor w/ 5 levels "Eul","Kug","Nun",..: 1 1 1 1 1 1 1 1 1 1 ...
$ site_no : num [1:60] 4 4 4 4 4 4 4 4 4 4 ...
$ treatment : Factor w/ 3 levels "B","M","U": 2 2 2 2 3 3 3 3 1 1 ...
$ plot_type : Factor w/ 2 levels "F","O": 2 2 1 1 2 2 1 1 1 1 ...
$ SR : num [1:60] 44 48 44 47 43 29 44 24 45 37 ...
$ inverseSimpson: num [1:60] 11.41 9.9 10.25 6.51 12.35 ...
$ BergerParker : num [1:60] 0.193 0.21 0.187 0.35 0.155 ...
glex$year <- as.factor(glex$year)
summary(glex) ID_plot year site_code site_no treatment plot_type
Eul_A1_MP_14: 1 2014:30 Eul :12 Min. :1 B:20 F:30
Eul_A1_MP_18: 1 2018:30 Kug :12 1st Qu.:2 M:20 O:30
Eul_A2_MK_14: 1 Nun :12 Median :3 U:20
Eul_A2_MK_18: 1 SomO:12 Mean :3
Eul_B1_WP_14: 1 SomW:12 3rd Qu.:4
Eul_B1_WP_18: 1 Max. :5
(Other) :54
SR inverseSimpson BergerParker
Min. :24.00 Min. : 2.538 Min. :0.1200
1st Qu.:39.75 1st Qu.: 7.169 1st Qu.:0.1732
Median :44.00 Median : 9.945 Median :0.2133
Mean :43.52 Mean : 9.347 Mean :0.2551
3rd Qu.:48.00 3rd Qu.:11.555 3rd Qu.:0.2891
Max. :61.00 Max. :16.102 Max. :0.6100
table(glex$site_code, glex$treatment, glex$plot_type, glex$year), , = F, = 2014
B M U
Eul 1 1 1
Kug 1 1 1
Nun 1 1 1
SomO 1 1 1
SomW 1 1 1
, , = O, = 2014
B M U
Eul 1 1 1
Kug 1 1 1
Nun 1 1 1
SomO 1 1 1
SomW 1 1 1
, , = F, = 2018
B M U
Eul 1 1 1
Kug 1 1 1
Nun 1 1 1
SomO 1 1 1
SomW 1 1 1
, , = O, = 2018
B M U
Eul 1 1 1
Kug 1 1 1
Nun 1 1 1
SomO 1 1 1
SomW 1 1 1
balanced Design (nicht zwingend nötig für LMM)
LMM with random interecept
REML = TRUE : (Restricted maximum likelihood) v.s
REML = FALSE: Maximum likelihood (ML)
Bei REML sind die estimates genauer, aber REML sollte nicht für Modelvergleiche (z.B mit AIcc oder drop1 funktion) benutzt werden Dies ist nur relevant für Gaussian mixed models (LMM) nicht für GLMMs
# 2.1 Model fitten
lmm_1 <- glmmTMB(SR ~ year * treatment * plot_type +
(1| site_code/treatment/plot_type),
family = gaussian,
REML = FALSE,
data = glex)# Manuelle Modelselektion
drop1(lmm_1, test = "Chisq")Single term deletions
Model:
SR ~ year * treatment * plot_type + (1 | site_code/treatment/plot_type)
Df AIC LRT Pr(>Chi)
<none> 385.27
year:treatment:plot_type 2 382.20 0.92894 0.6285
lmm_2 <- update(lmm_1,~. -year:treatment:plot_type)
drop1(lmm_2, test = "Chisq")Single term deletions
Model:
SR ~ year + treatment + plot_type + (1 | site_code/treatment/plot_type) +
year:treatment + year:plot_type + treatment:plot_type
Df AIC LRT Pr(>Chi)
<none> 382.20
year:treatment 2 398.84 20.6412 3.295e-05 ***
year:plot_type 1 387.26 7.0678 0.007848 **
treatment:plot_type 2 381.44 3.2413 0.197775
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lmm_3 <- update(lmm_2,~. -treatment:plot_type)
drop1(lmm_3, test = "Chi")Single term deletions
Model:
SR ~ year + treatment + plot_type + (1 | site_code/treatment/plot_type) +
year:treatment + year:plot_type
Df AIC LRT Pr(>Chi)
<none> 381.44
year:treatment 2 397.16 19.7253 5.208e-05 ***
year:plot_type 1 386.37 6.9307 0.008473 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Refit with REML
lmm_4 <- update(lmm_3, REML = TRUE)
Anova(lmm_4)Analysis of Deviance Table (Type II Wald chisquare tests)
Response: SR
Chisq Df Pr(>Chisq)
year 49.3013 1 2.195e-12 ***
treatment 4.3128 2 0.115741
plot_type 2.3609 1 0.124407
year:treatment 23.1682 2 9.313e-06 ***
year:plot_type 6.7571 1 0.009338 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modellselektion mit AICc
library(MuMIn)
options(na.action = "na.fail")
allmodels <- dredge(lmm_1)
allmodelsGlobal model call: glmmTMB(formula = SR ~ year * treatment * plot_type + (1 | site_code/treatment/plot_type),
data = glex, family = gaussian, REML = FALSE, ziformula = ~0,
dispformula = ~1)
---
Model selection table
cnd((Int)) dsp((Int)) cnd(plt_typ) cnd(trt) cnd(yer) cnd(plt_typ:trt)
56 45.90 + + + +
64 47.30 + + + + +
40 44.45 + + + +
48 45.85 + + + + +
128 46.60 + + + + +
5 47.43 + +
24 47.17 + + + +
7 46.67 + + +
6 46.48 + + +
8 45.72 + + + +
32 48.57 + + + + +
16 47.12 + + + + +
3 42.75 + +
2 42.57 + +
4 41.80 + + +
1 43.52 +
12 43.20 + + + +
22 47.93 + + +
39 45.40 + + +
cnd(plt_typ:yer) cnd(trt:yer) cnd(plt_typ:trt:yer) df logLik AICc delta
56 + + 12 -178.719 388.1 0.00
64 + + 14 -177.098 391.5 3.45
40 + 11 -182.184 391.9 3.79
48 + 13 -180.632 395.2 7.10
128 + + + 16 -176.634 397.9 9.84
5 6 -193.850 401.3 13.21
24 + 10 -188.582 401.7 13.58
7 8 -191.639 402.1 14.02
6 7 -192.974 402.1 14.03
8 9 -190.763 403.1 15.05
32 + 12 -187.419 405.5 17.40
16 11 -189.711 406.9 18.84
3 7 -202.937 422.0 33.95
2 6 -204.570 422.7 34.65
4 8 -202.396 423.6 35.54
1 5
12 10
22 + 8
39 + 10
Models ranked by AICc(x)
Random terms (all models):
cond(1 | site_code/treatment/plot_type)
best_model <- get.models(allmodels, delta==0)[[1]]# Mit REML refitten
lmm_bm_2 <- update(best_model, REML = TRUE)
# Resultat
Anova(lmm_bm_2)Analysis of Deviance Table (Type II Wald chisquare tests)
Response: SR
Chisq Df Pr(>Chisq)
plot_type 2.3609 1 0.124407
treatment 4.3128 2 0.115741
year 49.3013 1 2.195e-12 ***
plot_type:year 6.7571 1 0.009338 **
treatment:year 23.1682 2 9.313e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lmm_bm_2) Family: gaussian ( identity )
Formula:
SR ~ plot_type + treatment + year + (1 | site_code/treatment/plot_type) +
plot_type:year + treatment:year
Data: glex
AIC BIC logLik -2*log(L) df.resid
358.2 383.3 -167.1 334.2 48
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
plot_type:treatment:site_code (Intercept) 2.1332 1.4606
treatment:site_code (Intercept) 8.0931 2.8448
site_code (Intercept) 0.8542 0.9242
Residual 18.6692 4.3208
Number of obs: 60, groups:
plot_type:treatment:site_code, 30; treatment:site_code, 15; site_code, 5
Dispersion estimate for gaussian family (sigma^2): 18.7
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 45.900 2.136 21.487 < 2e-16 ***
plot_typeO -1.000 1.665 -0.600 0.548210
treatmentM 2.300 2.720 0.846 0.397761
treatmentU 3.800 2.720 1.397 0.162377
year2018 -8.200 2.231 -3.675 0.000238 ***
plot_typeO:year2018 5.800 2.231 2.599 0.009338 **
treatmentM:year2018 2.400 2.733 0.878 0.379808
treatmentU:year2018 -10.000 2.733 -3.659 0.000253 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2(lmm_bm_2)# R2 for Mixed Models
Conditional R2: 0.688
Marginal R2: 0.502
Modelvalidierung
Wenn wir LMM für Zähldaten wie Anzahl Arten verwenden, sollten wir sicherstellen das keine Werte <0 predicted werden (ansonsten brauchen wir GLMM
mit poisson Verteilung)
sum( fitted(lmm_2) < 0)[1] 0
set.seed(123)
simulationOutput <-
simulateResiduals(fittedModel = lmm_2, plot = TRUE, n = 1000)
# Plot residuals vs. covariates des Modells
plotResiduals(simulationOutput, form = glex$year)
plotResiduals(simulationOutput, form = glex$treatment)
plotResiduals(simulationOutput, form = glex$plot_type)
# Darstellung
library("ggeffects")
pred_2 <- ggpredict(lmm_2, terms = c("year", "treatment", "plot_type") )
plot(pred_2) +
theme_classic() +
theme(plot.title = element_blank())
Random slope & random intercept
Im folgenden Beispiel wurde bei 18 Versuchspersonen die Wirkung von zunehmenden Schlafentzug auf die Reaktionszeit (in ms) gemessen. Bei jeder Versuchsperson wurde der Effekt an den Tagen 2–9 des Versuchs je einmal gemessen (Tage 0 und 1 sind die Trainingsphase und bleiben daher unberücksichtigt).
# Daten laden
library(lme4)
data(sleepstudy)
?sleepstudy
# Daten ohne Trainingsphase
sleepstudy_2 <- subset(sleepstudy, Days>=2)
str(sleepstudy_2)'data.frame': 144 obs. of 3 variables:
$ Reaction: num 251 321 357 415 382 ...
$ Days : num 2 3 4 5 6 7 8 9 2 3 ...
$ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 2 2 ...
summary(sleepstudy_2) Reaction Days Subject
Min. :203.0 Min. :2.00 308 : 8
1st Qu.:265.2 1st Qu.:3.75 309 : 8
Median :303.2 Median :5.50 310 : 8
Mean :308.0 Mean :5.50 330 : 8
3rd Qu.:347.7 3rd Qu.:7.25 331 : 8
Max. :466.4 Max. :9.00 332 : 8
(Other):96
table(sleepstudy_2$Subject)
308 309 310 330 331 332 333 334 335 337 349 350 351 352 369 370 371 372
8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
# Visualisierung
ggplot(sleepstudy_2, aes(y = Reaction, x = Days)) +
geom_point() +
xlab("Number of days of sleep deprivation") +
ylab("Average reaction time (ms)") +
geom_smooth(method = "lm", formula = 'y ~ x', se = F, fullrange = T) +
theme_classic() +
facet_wrap(~Subject)
Wie man in der Visualisierung sehen kann, unterscheiden sich nicht nur die Intercepts (Reaktionszeit ohne Schlafmangel), sondern der Schlafmangel scheint sich auch unterschiedlich stark auf die Reaktionszeit der Probanden auszuwirken. In diesem Fall ist es daher sinnvoll, nicht nur einen Random Intercept, sondern auch einen Random Slope zu fitten.
# Fit models
# Random intercept
lmm_1 <- glmmTMB(Reaction ~ Days + (1 | Subject),
family = gaussian,
data = sleepstudy_2)
# Random slope + random intercept
lmm_2 <- glmmTMB(Reaction ~ Days + (Days | Subject),
family = gaussian,
data = sleepstudy_2)
# Modelle vergleichen
anova(lmm_1, lmm_2)Data: sleepstudy_2
Models:
lmm_1: Reaction ~ Days + (1 | Subject), zi=~0, disp=~1
lmm_2: Reaction ~ Days + (Days | Subject), zi=~0, disp=~1
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
lmm_1 4 1446.5 1458.4 -719.25 1438.5
lmm_2 6 1425.2 1443.0 -706.58 1413.2 25.332 2 3.156e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modellvalidierung
set.seed(123)
simulationOutput <- simulateResiduals(fittedModel = lmm_2, plot = TRUE, n = 1000)
# Model resultat
summary(lmm_2) Family: gaussian ( identity )
Formula: Reaction ~ Days + (Days | Subject)
Data: sleepstudy_2
AIC BIC logLik -2*log(L) df.resid
1425.2 1443.0 -706.6 1413.2 138
Random effects:
Conditional model:
Groups Name Variance Std.Dev. Corr
Subject (Intercept) 906.89 30.115
Days 42.37 6.509 -0.24
Residual 651.60 25.526
Number of obs: 144, groups: Subject, 18
Dispersion estimate for gaussian family (sigma^2): 652
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 245.097 8.999 27.236 < 2e-16 ***
Days 11.435 1.793 6.377 1.81e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2(lmm_2)# R2 for Mixed Models
Conditional R2: 0.799
Marginal R2: 0.213
# Visualisierung
plot( ggpredict(lmm_2) ) +
geom_line(color = "blue") +
geom_point(data = sleepstudy_2, aes(Days, Reaction), alpha = 0.5) +
theme_classic() +
theme(plot.title = element_blank())
GLMM
Befall von Rothirschen (Cervus elaphus) in spanischen Farmen mit dem Parasiten Elaphostrongylus cervi. Modelliert wird Vorkommen/Nichtvorkommen von L1-Larven 130 dieser Nematode in Abhängigkeit von Körperlänge und Geschlecht der Hirsche. Erhoben wurden die Daten auf 24 Farmen.
# Daten laden und für GLMM aufbereiten
DeerEcervi <- read_delim("datasets/stat/DeerEcervi.csv", delim = ";") |>
mutate( across(where(is.character), as.factor))# Daten anschauen
str(DeerEcervi)tibble [826 × 4] (S3: tbl_df/tbl/data.frame)
$ Farm : Factor w/ 24 levels "AL","AU","BA",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Sex : Factor w/ 2 levels "female","male": 2 1 1 1 1 1 1 1 1 1 ...
$ Length: num [1:826] 164 216 208 206 204 200 199 197 196 196 ...
$ Ecervi: num [1:826] 0 0 0 1 0 0 1 1 1 0 ...
summary(DeerEcervi) Farm Sex Length Ecervi
MO :209 female:448 Min. : 75.0 Min. :0.0000
CB : 85 male :378 1st Qu.:151.0 1st Qu.:0.0000
QM : 60 Median :163.0 Median :1.0000
BA : 50 Mean :161.8 Mean :0.6465
PN : 37 3rd Qu.:174.9 3rd Qu.:1.0000
MB : 34 Max. :216.0 Max. :1.0000
(Other):351
table(DeerEcervi$Farm)
AL AU BA BE CB CRC HB LN MAN MB MO NC NV PN QM RF
15 32 50 13 85 1 17 33 27 34 209 27 20 37 60 20
RN RO SAU SE TI TN VISO VY
23 30 3 26 19 25 13 7
# Model fitten. Die function "scale" standartisiert die Kontinuierliche variable
# Hischlänge:
# ( DeerEcervi$Length - mean(DeerEcervi$Length)) / sd(DeerEcervi$Length )
glmm_1 <- glmmTMB(Ecervi ~ scale(Length) * Sex + (1 | Farm),
family = binomial,
data = DeerEcervi)
Anova(glmm_1)Analysis of Deviance Table (Type II Wald chisquare tests)
Response: Ecervi
Chisq Df Pr(>Chisq)
scale(Length) 70.7552 1 < 2.2e-16 ***
Sex 4.4962 1 0.033969 *
scale(Length):Sex 9.8309 1 0.001716 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(glmm_1) Family: binomial ( logit )
Formula: Ecervi ~ scale(Length) * Sex + (1 | Farm)
Data: DeerEcervi
AIC BIC logLik -2*log(L) df.resid
832.6 856.1 -411.3 822.6 821
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
Farm (Intercept) 2.393 1.547
Number of obs: 826, groups: Farm, 24
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.9397 0.3579 2.625 0.00865 **
scale(Length) 0.7638 0.1363 5.604 2.09e-08 ***
Sexmale 0.6245 0.2242 2.785 0.00535 **
scale(Length):Sexmale 0.7027 0.2241 3.135 0.00172 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library("performance")
r2(glmm_1)# R2 for Mixed Models
Conditional R2: 0.504
Marginal R2: 0.143
performance_roc(glmm_1)AUC: 84.90%
# Modellvalidierung
set.seed(123)
simulationOutput <-
simulateResiduals(fittedModel = glmm_1, plot = TRUE, n = 1000)
plotResiduals(simulationOutput, form = DeerEcervi$Sex)
plotResiduals(simulationOutput, form = scale(DeerEcervi$Length) )
# Visualisierung
pred_3 <- predict_response(glmm_1, c("Length[all]", "Sex") )
# mit all werden für mehr Körperlängen Werte predicted was zu smootheren
# lines im plot führt
ggplot(pred_3, aes(x, predicted, color = group)) +
geom_line(linewidth = 1) +
geom_point(data = DeerEcervi, aes(Length, Ecervi, color = Sex), alpha = 0.5) +
labs(x = "\n Dear body length [cm]",
y = "Wahrscheinlichkeiten einer Elaphostrongylus cervi infection \n") +
theme_classic() +
theme(plot.title = element_blank())