library("lme4")
library("bbmle")
library("MuMIn")
library("dplyr")
library("readr")
library("ggplot2")
library("DHARMa")
library("car")
library("MASS")
library("ROCR")
library("sjPlot")
library("ggeffects")
library("sjstats")
library("cowplot")
library("gstat")
library("purrr")
library("broom.mixed")
Modellgüte und -diagnostics MM
Libraries laden
Packages die wir für die Modelle und die Diagnostics brauchen
Ausgangslage
- Der Modellfit aus Aufgabe 5 von letzter Woche dient als Ausgangspunkt für die heutigen Übungen.
<- read_delim("datasets/fallstudie_n/Aufgabe4_Datensatz_Habitatnutzung_Modelle_241028.csv", delim = ";") |>
DF_mod_day filter(time_of_day == "day") |>
mutate(
slope_scaled = scale(slope),
topo_pos_scaled = scale(topo_pos),
us_scaled = scale(us_2014),
os_scaled = scale(os_2014),
forest_prop_scaled = scale(forest_prop),
dist_road_trails_scaled = scale(dist_road_trails),
dist_road_only_scaled = scale(dist_road_only),
dist_sett_scaled = scale(dist_sett),
id = as.factor(id)
)
<- pres_abs ~
f +
slope_scaled +
topo_pos_scaled +
us_scaled +
forest_prop_scaled +
dist_road_only_scaled
dist_sett_scaled
<- paste(c(f, "+ (1 | id)"), collapse = " ") |> as.formula()
f
<- glmer(f, data = DF_mod_day, family = binomial, na.action = "na.fail")
m_day
<- dredge(m_day)
all_m
all_m## Global model call: glmer(formula = pres_abs ~ slope_scaled + topo_pos_scaled + us_scaled +
## forest_prop_scaled + dist_road_only_scaled + dist_sett_scaled +
## (1 | id), data = DF_mod_day, family = binomial, na.action = "na.fail")
## ---
## Model selection table
## (Int) dst_rod_onl_scl dst_stt_scl frs_prp_scl slp_scl top_pos_scl us_scl
## 46 -0.4277 0.4145 0.8012 -0.15640 0.3920
## 48 -0.4369 0.4226 -0.079960 0.8387 -0.15920 0.3887
## 62 -0.4285 0.4090 0.8022 -0.15950 0.0213600 0.3931
## 64 -0.4379 0.4170 -0.081080 0.8403 -0.16250 0.0226900 0.3898
## 38 -0.4117 0.3804 0.7412 0.3627
## 40 -0.4197 0.3869 -0.071040 0.7737 0.3593
## 54 -0.4119 0.3781 0.7411 0.0084000 0.3629
## 56 -0.4199 0.3843 -0.071390 0.7737 0.0092950 0.3596
## 61 -0.4341 0.7850 -0.07165 0.0831200 0.3920
## 53 -0.4255 0.7552 0.0742700 0.3787
## 63 -0.4339 0.001142 0.7845 -0.07165 0.0830900 0.3920
## 55 -0.4254 0.001146 0.7547 0.0742400 0.3787
## 37 -0.4239 0.7540 0.3761
## 45 -0.4299 0.7760 -0.05319 0.3858
## 39 -0.4231 0.006821 0.7509 0.3764
## 47 -0.4291 0.007447 0.7727 -0.05324 0.3861
## 8 -0.4390 0.4070 -0.112600 0.9136
## 16 -0.4463 0.4204 -0.117100 0.9414 -0.05910
## 6 -0.4262 0.3966 0.8639
## 24 -0.4390 0.4067 -0.112700 0.9136 0.0011510
## 14 -0.4324 0.4083 0.8876 -0.05398
## 32 -0.4466 0.4191 -0.117500 0.9420 -0.05996 0.0061540
## 22 -0.4262 0.3968 0.8639 -0.0007422
## 30 -0.4326 0.4075 0.8878 -0.05449 0.0036870
## 21 -0.4374 0.8824 0.0638700
## 5 -0.4351 0.8805
## 29 -0.4334 0.8678 0.03079 0.0601800
## 23 -0.4412 -0.034380 0.8972 0.0648100
## 13 -0.4298 0.8603 0.04319
## 7 -0.4383 -0.028830 0.8930
## 31 -0.4373 -0.033890 0.8826 0.03050 0.0611400
## 15 -0.4330 -0.028670 0.8727 0.04314
## 44 -0.2756 0.3214 0.389700 0.10970 0.5118
## 60 -0.2756 0.3213 0.389700 0.10960 0.0005417 0.5119
## 36 -0.2822 0.3431 0.411700 0.5425
## 52 -0.2824 0.3407 0.411300 0.0102700 0.5428
## 43 -0.2764 0.424900 0.17800 0.4999
## 59 -0.2773 0.424200 0.17050 0.0421400 0.5038
## 42 -0.2855 0.3410 0.17340 0.5253
## 58 -0.2855 0.3402 0.17280 0.0042270 0.5256
## 51 -0.2874 0.465000 0.0623500 0.5514
## 34 -0.2978 0.3782 0.5763
## 35 -0.2866 0.468800 0.5488
## 50 -0.2980 0.3737 0.0209100 0.5767
## 41 -0.2860 0.25420 0.5137
## 57 -0.2868 0.24550 0.0463100 0.5178
## 49 -0.3036 0.0779400 0.5905
## 33 -0.3032 0.5879
## 12 -0.2624 0.2999 0.421000 0.29320
## 28 -0.2615 0.3051 0.421200 0.29670 -0.0298900
## 4 -0.2835 0.3525 0.490900
## 20 -0.2834 0.3533 0.491100 -0.0043180
## 11 -0.2604 0.453400 0.34800
## 27 -0.2606 0.453400 0.34720 0.0059370
## 10 -0.2714 0.3218 0.36790
## 26 -0.2707 0.3261 0.37140 -0.0270500
## 3 -0.2827 0.552500
## 19 -0.2839 0.549900 0.0434200
## 9 -0.2701 0.43490
## 25 -0.2704 0.43330 0.0098340
## 2 -0.3024 0.3950
## 18 -0.3026 0.3935 0.0082640
## 17 -0.3050 0.0618200
## 1 -0.3039
## df logLik AICc delta weight
## 46 6 -2268.173 4548.4 0.00 0.373
## 48 7 -2267.341 4548.7 0.34 0.314
## 62 7 -2268.008 4550.0 1.68 0.161
## 64 8 -2267.155 4550.3 1.98 0.139
## 38 5 -2273.321 4556.7 8.29 0.006
## 40 6 -2272.657 4557.3 8.97 0.004
## 54 6 -2273.295 4558.6 10.24 0.002
## 56 7 -2272.625 4559.3 10.91 0.002
## 61 6 -2308.279 4628.6 80.21 0.000
## 53 5 -2309.406 4628.8 80.46 0.000
## 63 7 -2308.279 4630.6 82.22 0.000
## 55 6 -2309.406 4630.8 82.47 0.000
## 37 4 -2311.629 4631.3 82.90 0.000
## 45 5 -2310.989 4632.0 83.62 0.000
## 39 5 -2311.623 4633.3 84.89 0.000
## 47 6 -2310.981 4634.0 85.62 0.000
## 8 5 -2315.206 4640.4 92.06 0.000
## 16 6 -2314.415 4640.9 92.48 0.000
## 6 4 -2316.927 4641.9 93.50 0.000
## 24 6 -2315.206 4642.4 94.06 0.000
## 14 5 -2316.266 4642.5 94.18 0.000
## 32 7 -2314.401 4642.8 94.46 0.000
## 22 5 -2316.927 4643.9 95.50 0.000
## 30 6 -2316.261 4644.5 96.17 0.000
## 21 4 -2358.374 4724.8 176.39 0.000
## 5 3 -2360.069 4726.1 177.78 0.000
## 29 5 -2358.151 4726.3 177.95 0.000
## 23 5 -2358.215 4726.4 178.08 0.000
## 13 4 -2359.618 4727.2 178.88 0.000
## 7 4 -2359.957 4727.9 179.56 0.000
## 31 6 -2357.997 4728.0 179.65 0.000
## 15 5 -2359.507 4729.0 180.66 0.000
## 44 6 -2380.847 4773.7 225.35 0.000
## 60 7 -2380.847 4775.7 227.36 0.000
## 36 5 -2383.854 4777.7 229.35 0.000
## 52 6 -2383.813 4779.6 231.28 0.000
## 43 5 -2409.667 4829.3 280.98 0.000
## 59 6 -2408.958 4829.9 281.57 0.000
## 42 5 -2410.217 4830.4 282.08 0.000
## 58 6 -2410.211 4832.4 284.07 0.000
## 51 5 -2416.609 4843.2 294.87 0.000
## 34 4 -2418.060 4844.1 295.76 0.000
## 35 4 -2418.198 4844.4 296.04 0.000
## 50 5 -2417.890 4845.8 297.43 0.000
## 41 4 -2444.453 4896.9 348.55 0.000
## 57 5 -2443.591 4897.2 348.83 0.000
## 49 4 -2460.365 4928.7 380.37 0.000
## 33 3 -2462.877 4931.8 383.39 0.000
## 12 5 -2468.927 4947.9 399.50 0.000
## 28 6 -2468.571 4949.2 400.79 0.000
## 4 4 -2493.662 4995.3 446.97 0.000
## 20 5 -2493.654 4997.3 448.96 0.000
## 11 4 -2496.060 5000.1 451.76 0.000
## 27 5 -2496.045 5002.1 453.74 0.000
## 10 4 -2504.812 5017.6 469.27 0.000
## 26 5 -2504.515 5019.0 470.68 0.000
## 3 3 -2533.366 5072.7 524.37 0.000
## 19 4 -2532.551 5073.1 524.74 0.000
## 9 3 -2537.714 5081.4 533.07 0.000
## 25 4 -2537.673 5083.4 534.99 0.000
## 2 3 -2546.184 5098.4 550.01 0.000
## 18 4 -2546.156 5100.3 551.95 0.000
## 17 3 -2598.835 5203.7 655.31 0.000
## 1 2 -2600.524 5205.1 656.68 0.000
## Models ranked by AICc(x)
## Random terms (all models):
## 1 | id
<- model.avg(all_m, rank = "AICc", subset = delta < 2)
avgmodel summary(avgmodel)
##
## Call:
## model.avg(object = get.models(object = all_m, subset = delta <
## 2), rank = "AICc")
##
## Component model call:
## glmer(formula = pres_abs ~ <4 unique rhs>, data = DF_mod_day, family =
## binomial, na.action = na.fail)
##
## Component models:
## df logLik AICc delta weight
## 1346 6 -2268.17 4548.37 0.00 0.38
## 12346 7 -2267.34 4548.71 0.34 0.32
## 13456 7 -2268.01 4550.04 1.68 0.16
## 123456 8 -2267.16 4550.35 1.98 0.14
##
## Term codes:
## dist_road_only_scaled dist_sett_scaled forest_prop_scaled
## 1 2 3
## slope_scaled topo_pos_scaled us_scaled
## 4 5 6
##
## Model-averaged coefficients:
## (full average)
## Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept) -0.432191 0.139554 0.139597 3.096 0.00196 **
## dist_road_only_scaled 0.416554 0.046861 0.046875 8.886 < 2e-16 ***
## forest_prop_scaled 0.818821 0.057266 0.057282 14.295 < 2e-16 ***
## slope_scaled -0.158651 0.049230 0.049245 3.222 0.00127 **
## us_scaled 0.390805 0.041070 0.041083 9.513 < 2e-16 ***
## dist_sett_scaled -0.036839 0.058045 0.058054 0.635 0.52571
## topo_pos_scaled 0.006678 0.022846 0.022852 0.292 0.77013
##
## (conditional average)
## Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept) -0.43219 0.13955 0.13960 3.096 0.00196 **
## dist_road_only_scaled 0.41655 0.04686 0.04688 8.886 < 2e-16 ***
## forest_prop_scaled 0.81882 0.05727 0.05728 14.295 < 2e-16 ***
## slope_scaled -0.15865 0.04923 0.04924 3.222 0.00127 **
## us_scaled 0.39081 0.04107 0.04108 9.513 < 2e-16 ***
## dist_sett_scaled -0.08030 0.06208 0.06210 1.293 0.19597
## topo_pos_scaled 0.02197 0.03717 0.03718 0.591 0.55451
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Die Modellresultate aus dem avgmodel sind grundsätzlich die finalen Resultate die bereits interpretiert werden könnten. Allerdings funktionieren die Diagnosetests und die Darstellung der Resultate mit diesem gemittelten Modell nicht sehr gut, weshalb wir einen re-fit mit glmer machen müssen (an den Resultaten ändert sich dadurch nichts)
# hier zum Vergleich, dass die Resulate sich nur marginal verändern
summary(avgmodel)
##
## Call:
## model.avg(object = get.models(object = all_m, subset = delta <
## 2), rank = "AICc")
##
## Component model call:
## glmer(formula = pres_abs ~ <4 unique rhs>, data = DF_mod_day, family =
## binomial, na.action = na.fail)
##
## Component models:
## df logLik AICc delta weight
## 1346 6 -2268.17 4548.37 0.00 0.38
## 12346 7 -2267.34 4548.71 0.34 0.32
## 13456 7 -2268.01 4550.04 1.68 0.16
## 123456 8 -2267.16 4550.35 1.98 0.14
##
## Term codes:
## dist_road_only_scaled dist_sett_scaled forest_prop_scaled
## 1 2 3
## slope_scaled topo_pos_scaled us_scaled
## 4 5 6
##
## Model-averaged coefficients:
## (full average)
## Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept) -0.432191 0.139554 0.139597 3.096 0.00196 **
## dist_road_only_scaled 0.416554 0.046861 0.046875 8.886 < 2e-16 ***
## forest_prop_scaled 0.818821 0.057266 0.057282 14.295 < 2e-16 ***
## slope_scaled -0.158651 0.049230 0.049245 3.222 0.00127 **
## us_scaled 0.390805 0.041070 0.041083 9.513 < 2e-16 ***
## dist_sett_scaled -0.036839 0.058045 0.058054 0.635 0.52571
## topo_pos_scaled 0.006678 0.022846 0.022852 0.292 0.77013
##
## (conditional average)
## Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept) -0.43219 0.13955 0.13960 3.096 0.00196 **
## dist_road_only_scaled 0.41655 0.04686 0.04688 8.886 < 2e-16 ***
## forest_prop_scaled 0.81882 0.05727 0.05728 14.295 < 2e-16 ***
## slope_scaled -0.15865 0.04923 0.04924 3.222 0.00127 **
## us_scaled 0.39081 0.04107 0.04108 9.513 < 2e-16 ***
## dist_sett_scaled -0.08030 0.06208 0.06210 1.293 0.19597
## topo_pos_scaled 0.02197 0.03717 0.03718 0.591 0.55451
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m_day)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## pres_abs ~ slope_scaled + topo_pos_scaled + us_scaled + forest_prop_scaled +
## dist_road_only_scaled + dist_sett_scaled + (1 | id)
## Data: DF_mod_day
##
## AIC BIC logLik deviance df.resid
## 4550.3 4600.6 -2267.2 4534.3 3956
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.5371 -0.7152 -0.3711 0.8621 3.8118
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2106 0.4589
## Number of obs: 3964, groups: id, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.43788 0.13888 -3.153 0.00162 **
## slope_scaled -0.16249 0.04946 -3.285 0.00102 **
## topo_pos_scaled 0.02269 0.03718 0.610 0.54172
## us_scaled 0.38984 0.04111 9.482 < 2e-16 ***
## forest_prop_scaled 0.84030 0.05835 14.401 < 2e-16 ***
## dist_road_only_scaled 0.41697 0.04740 8.797 < 2e-16 ***
## dist_sett_scaled -0.08108 0.06215 -1.305 0.19203
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) slp_sc tp_ps_ us_scl frst__ dst___
## slope_scald 0.040
## top_ps_scld -0.011 -0.110
## us_scaled 0.006 -0.233 0.048
## frst_prp_sc -0.093 -0.358 0.046 -0.144
## dst_rd_nly_ -0.008 -0.213 -0.194 0.024 0.158
## dst_stt_scl 0.053 0.050 -0.030 0.056 -0.508 -0.128
Aufgabe 1
Berechung der AUC (area under the receiver operating characteristic curve) = Mass der Modellgüte
Für die Berechnung des AUC findet ihr weiterführende Informationen unter: Link
<- predict(m_day, type = c("response"))
prob <- prediction(prob, DF_mod_day$pres_abs)
pred
# AUC
<- performance(pred, measure = "auc")@y.values[[1]]
auc auc
Aufgabe 2
Interpretieren der Modell-Residuen mittels Tests auf verschiedene Aspekte
- Model testing for over/underdispersion, zeroinflation and spatial autocorrelation following the DHARMa package.
- unbedingt die Vignette des DHARMa-Package konsultieren: Link
# Residuals werden über eine Simulation auf eine Standard-Skala transformiert und
# können anschliessend getestet werden. Dabei kann die Anzahl Simulationen eingestellt
# werden (dauert je nach dem sehr lange)
<- simulateResiduals(fittedModel = m_day, n = 10000)
simulationOutput
# plotting and testing scaled residuals
plot(simulationOutput)
testResiduals(simulationOutput)
# The most common concern for GLMMs is overdispersion, underdispersion and
# zero-inflation.
# separate test for dispersion
testDispersion(simulationOutput)
# test for Zeroinflation
testZeroInflation(simulationOutput)
# test for spatial Autocorrelation
# calculating x, y positions per group
<- aggregate(DF_mod_day[, 3:4], list(DF_mod_day$x, DF_mod_day$y), mean)
groupLocations $group <- paste(groupLocations$Group.1, groupLocations$Group.2)
groupLocations<- groupLocations |> dplyr::select(x,y,group)
groupLocations
# calculating residuals per group
= recalculateResiduals(simulationOutput, group = groupLocations$group)
res2
# running the spatial test on grouped residuals
testSpatialAutocorrelation(res2, groupLocations$x, groupLocations$y, plot = F)
# Testen auf Multicollinearität (dh zu starke Korrelationen im finalen Modell, zB falls
# auf Grund der ökologischen Plausibilität stark korrelierte Variablen im Modell)
# use VIF values: if values less then 5 is ok (sometimes > 10), if mean of VIF values
# not substantially greater than 1 (say 5), no need to worry.
::vif(m_day)
carmean(car::vif(m_day))
Aufgabe 3
Graphische Darstellung der Modellresultate
# graphische Darstellung der gesamten Modellresultate
plot_model(m_day, transform = NULL, show.values = TRUE, value.offset = .3)
# Plotten der vorhergesagten Wahrscheinlichkeit, dass ein Kreis besetzt ist, in
# Abhängigkeit der erklärenden Variable basierend auf den Modellresultaten.
plot_model(m_day, type = "pred", terms = "us_scaled [all]")
# Problem: skalierte Variablen lassen sich nicht so ohne weiteres plotten, hier ein Hack um das Problem zu umgehen (Danke chatGPT!).
# Die Einstellungen müssen für jede Variable separat geändert werden
# Plot erstellen
<- plot_model(m_day, type = "pred", terms = "us_scaled [all]")
plot_obj
# Rücktransformation für die Achse: hier skalierten Werte anpassen
<- mean(DF_mod_day$us_2014)
original_mean <- sd(DF_mod_day$us_2014)
original_sd
<- min(DF_mod_day$us_2014)
min_us <- max(DF_mod_day$us_2014)
max_us
+
plot_obj scale_x_continuous(
limits = c((min_us - original_mean) / original_sd, (max_us - original_mean) / original_sd), # skaliert
breaks = seq((min_us - original_mean) / original_sd, (max_us - original_mean) / original_sd, length.out = 5), # automatische Schritte
labels = function(x) round(x * original_sd + original_mean, 2), # Rücktransformierte Labels
name = "Deckungsgrad Strauchschicht"
)
# Funktion um viele Plots auf einem zusammenbringen: cowplot-package (hat auch sonst
# gute Funktionen für schöne Layouts für Grafiken)
::plot_grid() cowplot
Aufgabe 4
Ermittlung des individuellen Beitrags der einzelnen Variablen im Gesamtmodell (leave-one-out Ansatz)
- Bestimmen delta AIC nach Coppes u. a. (2017) → Vergleich des Gesamtmodells gegenüber einem Modell ohne die entsprechende Variable.
- Auftrag auf nächste Woche: Kurze Vorstellung der Modellresultate & Diagnostics im Plenum und Diskussion der Ergebnisse (keine PP-Präsentation nötig)
Musterlösung
<- c("slope_scaled", "forest_prop_scaled", "dist_road_only_scaled", "dist_sett_scaled", "topo_pos_scaled", "us_scaled")
vars
# leave-one-out models
<- lapply(vars, function(var) {
modelle <- as.formula(paste("pres_abs ~", paste(setdiff(vars, var), collapse = " + "), "+ (1 | id)"))
formula glmer(formula, data = DF_mod_day, family = binomial, na.action = "na.fail")
})
names(modelle) <- paste0("m_", vars)
<- c(list(m_day = m_day), modelle)
modelle
::AICtab(modelle)
bbmle## dAIC df
## m_topo_pos_scaled 0.0 7
## m_dist_sett_scaled 1.3 7
## m_day 1.6 8
## m_slope_scaled 10.6 7
## m_dist_road_only_scaled 81.9 7
## m_us_scaled 94.1 7
## m_forest_prop_scaled 227.0 7