Modellgüte und -diagnostics MM

Libraries laden

Packages die wir für die Modelle und die Diagnostics brauchen

library("lme4")
library("bbmle")
library("MuMIn")
library("dplyr")
library("readr")
library("ggplot2")
library("DHARMa")
library("car")
library("MASS")
library("ROCR")
library("sjPlot")
library("ggeffects")
library("sjstats")
library("cowplot")
library("gstat")
library("purrr")
library("broom.mixed")

Ausgangslage

Der Modellfit aus Aufgabe 5 von letzter Woche dient als Ausgangspunkt für die heutigen Übungen.

DF_mod_day <- read_delim("datasets/fallstudie_n/Aufgabe4_Datensatz_Habitatnutzung_Modelle_241028.csv", delim = ";") |>
  filter(time_of_day == "day") |>
  mutate(
    slope_scaled = scale(slope),
    topo_pos_scaled = scale(topo_pos),
    us_scaled = scale(us_2014),
    os_scaled = scale(os_2014),
    forest_prop_scaled = scale(forest_prop),
    dist_road_trails_scaled = scale(dist_road_trails),
    dist_road_only_scaled = scale(dist_road_only),
    dist_sett_scaled = scale(dist_sett),
    id = as.factor(id)
  )

f <- pres_abs ~
  slope_scaled +
  topo_pos_scaled +
  us_scaled +
  forest_prop_scaled +
  dist_road_only_scaled +
  dist_sett_scaled 

f <- paste(c(f, "+ (1 | id)"), collapse = " ") |> as.formula()

m_day <- glmer(f, data = DF_mod_day, family = binomial, na.action = "na.fail")

all_m <- dredge(m_day)
all_m
## Global model call: glmer(formula = pres_abs ~ slope_scaled + topo_pos_scaled + us_scaled + 
##     forest_prop_scaled + dist_road_only_scaled + dist_sett_scaled + 
##     (1 | id), data = DF_mod_day, family = binomial, na.action = "na.fail")
## ---
## Model selection table 
##      (Int) dst_rod_onl_scl dst_stt_scl frs_prp_scl  slp_scl top_pos_scl us_scl
## 46 -0.4277          0.4145                  0.8012 -0.15640             0.3920
## 48 -0.4369          0.4226   -0.079960      0.8387 -0.15920             0.3887
## 62 -0.4285          0.4090                  0.8022 -0.15950   0.0213600 0.3931
## 64 -0.4379          0.4170   -0.081080      0.8403 -0.16250   0.0226900 0.3898
## 38 -0.4117          0.3804                  0.7412                      0.3627
## 40 -0.4197          0.3869   -0.071040      0.7737                      0.3593
## 54 -0.4119          0.3781                  0.7411            0.0084000 0.3629
## 56 -0.4199          0.3843   -0.071390      0.7737            0.0092950 0.3596
## 61 -0.4341                                  0.7850 -0.07165   0.0831200 0.3920
## 53 -0.4255                                  0.7552            0.0742700 0.3787
## 63 -0.4339                    0.001142      0.7845 -0.07165   0.0830900 0.3920
## 55 -0.4254                    0.001146      0.7547            0.0742400 0.3787
## 37 -0.4239                                  0.7540                      0.3761
## 45 -0.4299                                  0.7760 -0.05319             0.3858
## 39 -0.4231                    0.006821      0.7509                      0.3764
## 47 -0.4291                    0.007447      0.7727 -0.05324             0.3861
## 8  -0.4390          0.4070   -0.112600      0.9136                            
## 16 -0.4463          0.4204   -0.117100      0.9414 -0.05910                   
## 6  -0.4262          0.3966                  0.8639                            
## 24 -0.4390          0.4067   -0.112700      0.9136            0.0011510       
## 14 -0.4324          0.4083                  0.8876 -0.05398                   
## 32 -0.4466          0.4191   -0.117500      0.9420 -0.05996   0.0061540       
## 22 -0.4262          0.3968                  0.8639           -0.0007422       
## 30 -0.4326          0.4075                  0.8878 -0.05449   0.0036870       
## 21 -0.4374                                  0.8824            0.0638700       
## 5  -0.4351                                  0.8805                            
## 29 -0.4334                                  0.8678  0.03079   0.0601800       
## 23 -0.4412                   -0.034380      0.8972            0.0648100       
## 13 -0.4298                                  0.8603  0.04319                   
## 7  -0.4383                   -0.028830      0.8930                            
## 31 -0.4373                   -0.033890      0.8826  0.03050   0.0611400       
## 15 -0.4330                   -0.028670      0.8727  0.04314                   
## 44 -0.2756          0.3214    0.389700              0.10970             0.5118
## 60 -0.2756          0.3213    0.389700              0.10960   0.0005417 0.5119
## 36 -0.2822          0.3431    0.411700                                  0.5425
## 52 -0.2824          0.3407    0.411300                        0.0102700 0.5428
## 43 -0.2764                    0.424900              0.17800             0.4999
## 59 -0.2773                    0.424200              0.17050   0.0421400 0.5038
## 42 -0.2855          0.3410                          0.17340             0.5253
## 58 -0.2855          0.3402                          0.17280   0.0042270 0.5256
## 51 -0.2874                    0.465000                        0.0623500 0.5514
## 34 -0.2978          0.3782                                              0.5763
## 35 -0.2866                    0.468800                                  0.5488
## 50 -0.2980          0.3737                                    0.0209100 0.5767
## 41 -0.2860                                          0.25420             0.5137
## 57 -0.2868                                          0.24550   0.0463100 0.5178
## 49 -0.3036                                                    0.0779400 0.5905
## 33 -0.3032                                                              0.5879
## 12 -0.2624          0.2999    0.421000              0.29320                   
## 28 -0.2615          0.3051    0.421200              0.29670  -0.0298900       
## 4  -0.2835          0.3525    0.490900                                        
## 20 -0.2834          0.3533    0.491100                       -0.0043180       
## 11 -0.2604                    0.453400              0.34800                   
## 27 -0.2606                    0.453400              0.34720   0.0059370       
## 10 -0.2714          0.3218                          0.36790                   
## 26 -0.2707          0.3261                          0.37140  -0.0270500       
## 3  -0.2827                    0.552500                                        
## 19 -0.2839                    0.549900                        0.0434200       
## 9  -0.2701                                          0.43490                   
## 25 -0.2704                                          0.43330   0.0098340       
## 2  -0.3024          0.3950                                                    
## 18 -0.3026          0.3935                                    0.0082640       
## 17 -0.3050                                                    0.0618200       
## 1  -0.3039                                                                    
##    df    logLik   AICc  delta weight
## 46  6 -2268.173 4548.4   0.00  0.373
## 48  7 -2267.341 4548.7   0.34  0.314
## 62  7 -2268.008 4550.0   1.68  0.161
## 64  8 -2267.155 4550.3   1.98  0.139
## 38  5 -2273.321 4556.7   8.29  0.006
## 40  6 -2272.657 4557.3   8.97  0.004
## 54  6 -2273.295 4558.6  10.24  0.002
## 56  7 -2272.625 4559.3  10.91  0.002
## 61  6 -2308.279 4628.6  80.21  0.000
## 53  5 -2309.406 4628.8  80.46  0.000
## 63  7 -2308.279 4630.6  82.22  0.000
## 55  6 -2309.406 4630.8  82.47  0.000
## 37  4 -2311.629 4631.3  82.90  0.000
## 45  5 -2310.989 4632.0  83.62  0.000
## 39  5 -2311.623 4633.3  84.89  0.000
## 47  6 -2310.981 4634.0  85.62  0.000
## 8   5 -2315.206 4640.4  92.06  0.000
## 16  6 -2314.415 4640.9  92.48  0.000
## 6   4 -2316.927 4641.9  93.50  0.000
## 24  6 -2315.206 4642.4  94.06  0.000
## 14  5 -2316.266 4642.5  94.18  0.000
## 32  7 -2314.401 4642.8  94.46  0.000
## 22  5 -2316.927 4643.9  95.50  0.000
## 30  6 -2316.261 4644.5  96.17  0.000
## 21  4 -2358.374 4724.8 176.39  0.000
## 5   3 -2360.069 4726.1 177.78  0.000
## 29  5 -2358.151 4726.3 177.95  0.000
## 23  5 -2358.215 4726.4 178.08  0.000
## 13  4 -2359.618 4727.2 178.88  0.000
## 7   4 -2359.957 4727.9 179.56  0.000
## 31  6 -2357.997 4728.0 179.65  0.000
## 15  5 -2359.507 4729.0 180.66  0.000
## 44  6 -2380.847 4773.7 225.35  0.000
## 60  7 -2380.847 4775.7 227.36  0.000
## 36  5 -2383.854 4777.7 229.35  0.000
## 52  6 -2383.813 4779.6 231.28  0.000
## 43  5 -2409.667 4829.3 280.98  0.000
## 59  6 -2408.958 4829.9 281.57  0.000
## 42  5 -2410.217 4830.4 282.08  0.000
## 58  6 -2410.211 4832.4 284.07  0.000
## 51  5 -2416.609 4843.2 294.87  0.000
## 34  4 -2418.060 4844.1 295.76  0.000
## 35  4 -2418.198 4844.4 296.04  0.000
## 50  5 -2417.890 4845.8 297.43  0.000
## 41  4 -2444.453 4896.9 348.55  0.000
## 57  5 -2443.591 4897.2 348.83  0.000
## 49  4 -2460.365 4928.7 380.37  0.000
## 33  3 -2462.877 4931.8 383.39  0.000
## 12  5 -2468.927 4947.9 399.50  0.000
## 28  6 -2468.571 4949.2 400.79  0.000
## 4   4 -2493.662 4995.3 446.97  0.000
## 20  5 -2493.654 4997.3 448.96  0.000
## 11  4 -2496.060 5000.1 451.76  0.000
## 27  5 -2496.045 5002.1 453.74  0.000
## 10  4 -2504.812 5017.6 469.27  0.000
## 26  5 -2504.515 5019.0 470.68  0.000
## 3   3 -2533.366 5072.7 524.37  0.000
## 19  4 -2532.551 5073.1 524.74  0.000
## 9   3 -2537.714 5081.4 533.07  0.000
## 25  4 -2537.673 5083.4 534.99  0.000
## 2   3 -2546.184 5098.4 550.01  0.000
## 18  4 -2546.156 5100.3 551.95  0.000
## 17  3 -2598.835 5203.7 655.31  0.000
## 1   2 -2600.524 5205.1 656.68  0.000
## Models ranked by AICc(x) 
## Random terms (all models): 
##   1 | id

avgmodel <- model.avg(all_m, rank = "AICc", subset = delta < 2)
summary(avgmodel)
## 
## Call:
## model.avg(object = get.models(object = all_m, subset = delta < 
##     2), rank = "AICc")
## 
## Component model call: 
## glmer(formula = pres_abs ~ <4 unique rhs>, data = DF_mod_day, family = 
##      binomial, na.action = na.fail)
## 
## Component models: 
##        df   logLik    AICc delta weight
## 1346    6 -2268.17 4548.37  0.00   0.38
## 12346   7 -2267.34 4548.71  0.34   0.32
## 13456   7 -2268.01 4550.04  1.68   0.16
## 123456  8 -2267.16 4550.35  1.98   0.14
## 
## Term codes: 
## dist_road_only_scaled      dist_sett_scaled    forest_prop_scaled 
##                     1                     2                     3 
##          slope_scaled       topo_pos_scaled             us_scaled 
##                     4                     5                     6 
## 
## Model-averaged coefficients:  
## (full average) 
##                        Estimate Std. Error Adjusted SE z value Pr(>|z|)    
## (Intercept)           -0.432191   0.139554    0.139597   3.096  0.00196 ** 
## dist_road_only_scaled  0.416554   0.046861    0.046875   8.886  < 2e-16 ***
## forest_prop_scaled     0.818821   0.057266    0.057282  14.295  < 2e-16 ***
## slope_scaled          -0.158651   0.049230    0.049245   3.222  0.00127 ** 
## us_scaled              0.390805   0.041070    0.041083   9.513  < 2e-16 ***
## dist_sett_scaled      -0.036839   0.058045    0.058054   0.635  0.52571    
## topo_pos_scaled        0.006678   0.022846    0.022852   0.292  0.77013    
##  
## (conditional average) 
##                       Estimate Std. Error Adjusted SE z value Pr(>|z|)    
## (Intercept)           -0.43219    0.13955     0.13960   3.096  0.00196 ** 
## dist_road_only_scaled  0.41655    0.04686     0.04688   8.886  < 2e-16 ***
## forest_prop_scaled     0.81882    0.05727     0.05728  14.295  < 2e-16 ***
## slope_scaled          -0.15865    0.04923     0.04924   3.222  0.00127 ** 
## us_scaled              0.39081    0.04107     0.04108   9.513  < 2e-16 ***
## dist_sett_scaled      -0.08030    0.06208     0.06210   1.293  0.19597    
## topo_pos_scaled        0.02197    0.03717     0.03718   0.591  0.55451    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Die Modellresultate aus dem avgmodel sind grundsätzlich die finalen Resultate die bereits interpretiert werden könnten. Allerdings funktionieren die Diagnosetests und die Darstellung der Resultate mit diesem gemittelten Modell nicht sehr gut, weshalb wir einen re-fit mit glmer machen müssen (an den Resultaten ändert sich dadurch nichts)

# hier zum Vergleich, dass die Resulate sich nur marginal verändern

summary(avgmodel)
## 
## Call:
## model.avg(object = get.models(object = all_m, subset = delta < 
##     2), rank = "AICc")
## 
## Component model call: 
## glmer(formula = pres_abs ~ <4 unique rhs>, data = DF_mod_day, family = 
##      binomial, na.action = na.fail)
## 
## Component models: 
##        df   logLik    AICc delta weight
## 1346    6 -2268.17 4548.37  0.00   0.38
## 12346   7 -2267.34 4548.71  0.34   0.32
## 13456   7 -2268.01 4550.04  1.68   0.16
## 123456  8 -2267.16 4550.35  1.98   0.14
## 
## Term codes: 
## dist_road_only_scaled      dist_sett_scaled    forest_prop_scaled 
##                     1                     2                     3 
##          slope_scaled       topo_pos_scaled             us_scaled 
##                     4                     5                     6 
## 
## Model-averaged coefficients:  
## (full average) 
##                        Estimate Std. Error Adjusted SE z value Pr(>|z|)    
## (Intercept)           -0.432191   0.139554    0.139597   3.096  0.00196 ** 
## dist_road_only_scaled  0.416554   0.046861    0.046875   8.886  < 2e-16 ***
## forest_prop_scaled     0.818821   0.057266    0.057282  14.295  < 2e-16 ***
## slope_scaled          -0.158651   0.049230    0.049245   3.222  0.00127 ** 
## us_scaled              0.390805   0.041070    0.041083   9.513  < 2e-16 ***
## dist_sett_scaled      -0.036839   0.058045    0.058054   0.635  0.52571    
## topo_pos_scaled        0.006678   0.022846    0.022852   0.292  0.77013    
##  
## (conditional average) 
##                       Estimate Std. Error Adjusted SE z value Pr(>|z|)    
## (Intercept)           -0.43219    0.13955     0.13960   3.096  0.00196 ** 
## dist_road_only_scaled  0.41655    0.04686     0.04688   8.886  < 2e-16 ***
## forest_prop_scaled     0.81882    0.05727     0.05728  14.295  < 2e-16 ***
## slope_scaled          -0.15865    0.04923     0.04924   3.222  0.00127 ** 
## us_scaled              0.39081    0.04107     0.04108   9.513  < 2e-16 ***
## dist_sett_scaled      -0.08030    0.06208     0.06210   1.293  0.19597    
## topo_pos_scaled        0.02197    0.03717     0.03718   0.591  0.55451    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m_day)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## pres_abs ~ slope_scaled + topo_pos_scaled + us_scaled + forest_prop_scaled +  
##     dist_road_only_scaled + dist_sett_scaled + (1 | id)
##    Data: DF_mod_day
## 
##      AIC      BIC   logLik deviance df.resid 
##   4550.3   4600.6  -2267.2   4534.3     3956 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.5371 -0.7152 -0.3711  0.8621  3.8118 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2106   0.4589  
## Number of obs: 3964, groups:  id, 12
## 
## Fixed effects:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.43788    0.13888  -3.153  0.00162 ** 
## slope_scaled          -0.16249    0.04946  -3.285  0.00102 ** 
## topo_pos_scaled        0.02269    0.03718   0.610  0.54172    
## us_scaled              0.38984    0.04111   9.482  < 2e-16 ***
## forest_prop_scaled     0.84030    0.05835  14.401  < 2e-16 ***
## dist_road_only_scaled  0.41697    0.04740   8.797  < 2e-16 ***
## dist_sett_scaled      -0.08108    0.06215  -1.305  0.19203    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) slp_sc tp_ps_ us_scl frst__ dst___
## slope_scald  0.040                                   
## top_ps_scld -0.011 -0.110                            
## us_scaled    0.006 -0.233  0.048                     
## frst_prp_sc -0.093 -0.358  0.046 -0.144              
## dst_rd_nly_ -0.008 -0.213 -0.194  0.024  0.158       
## dst_stt_scl  0.053  0.050 -0.030  0.056 -0.508 -0.128

Aufgabe 1

Berechung der AUC (area under the receiver operating characteristic curve) = Mass der Modellgüte

Für die Berechnung des AUC findet ihr weiterführende Informationen unter: Link

prob <- predict(m_day, type = c("response"))
pred <- prediction(prob, DF_mod_day$pres_abs)

# AUC

auc <- performance(pred, measure = "auc")@y.values[[1]]
auc

Aufgabe 2

Interpretieren der Modell-Residuen mittels Tests auf verschiedene Aspekte

Model testing for over/underdispersion, zeroinflation and spatial autocorrelation following the DHARMa package.
unbedingt die Vignette des DHARMa-Package konsultieren: Link

# Residuals werden über eine Simulation auf eine Standard-Skala transformiert und
# können anschliessend getestet werden. Dabei kann die Anzahl Simulationen eingestellt
# werden (dauert je nach dem sehr lange)

simulationOutput <- simulateResiduals(fittedModel = m_day, n = 10000)

# plotting and testing scaled residuals

plot(simulationOutput)
testResiduals(simulationOutput)

# The most common concern for GLMMs is overdispersion, underdispersion and
# zero-inflation.

# separate test for dispersion

testDispersion(simulationOutput)

# test for Zeroinflation

testZeroInflation(simulationOutput)

# test for spatial Autocorrelation

# calculating x, y positions per group
groupLocations <- aggregate(DF_mod_day[, 3:4], list(DF_mod_day$x, DF_mod_day$y), mean)
groupLocations$group <- paste(groupLocations$Group.1, groupLocations$Group.2)
groupLocations <- groupLocations |> dplyr::select(x,y,group)



# calculating residuals per group
res2 = recalculateResiduals(simulationOutput, group = groupLocations$group)

# running the spatial test on grouped residuals
testSpatialAutocorrelation(res2, groupLocations$x, groupLocations$y, plot = F)

# Testen auf Multicollinearität (dh zu starke Korrelationen im finalen Modell, zB falls
# auf Grund der ökologischen Plausibilität stark korrelierte Variablen im Modell)
# use VIF values: if values less then 5 is ok (sometimes > 10), if mean of VIF values
# not substantially greater than 1 (say 5), no need to worry.

car::vif(m_day)
mean(car::vif(m_day))

Aufgabe 3

Graphische Darstellung der Modellresultate

# graphische Darstellung der gesamten Modellresultate

plot_model(m_day, transform = NULL, show.values = TRUE, value.offset = .3)

# Plotten der vorhergesagten Wahrscheinlichkeit, dass ein Kreis besetzt ist, in
# Abhängigkeit der erklärenden Variable basierend auf den Modellresultaten.

plot_model(m_day, type = "pred", terms = "us_scaled [all]")

# Problem: skalierte Variablen lassen sich nicht so ohne weiteres plotten, hier ein Hack um das Problem zu umgehen (Danke chatGPT!). 
# Die Einstellungen müssen für jede Variable separat geändert werden

# Plot erstellen
plot_obj <- plot_model(m_day, type = "pred", terms = "us_scaled [all]")

# Rücktransformation für die Achse: hier skalierten Werte anpassen

original_mean <- mean(DF_mod_day$us_2014)  
original_sd <- sd(DF_mod_day$us_2014)  

min_us <- min(DF_mod_day$us_2014)
max_us <- max(DF_mod_day$us_2014)


plot_obj +
  scale_x_continuous(
    limits = c((min_us - original_mean) / original_sd, (max_us - original_mean) / original_sd),  # skaliert
    breaks = seq((min_us - original_mean) / original_sd, (max_us - original_mean) / original_sd, length.out = 5),  # automatische Schritte
    labels = function(x) round(x * original_sd + original_mean, 2),  # Rücktransformierte Labels
    name = "Deckungsgrad Strauchschicht"
  )


# Funktion um viele Plots auf einem zusammenbringen: cowplot-package (hat auch sonst
# gute Funktionen für schöne Layouts für Grafiken)

cowplot::plot_grid()

Aufgabe 4

Ermittlung des individuellen Beitrags der einzelnen Variablen im Gesamtmodell (leave-one-out Ansatz)

Bestimmen delta AIC nach Coppes u. a. (2017) → Vergleich des Gesamtmodells gegenüber einem Modell ohne die entsprechende Variable.
Auftrag auf nächste Woche: Kurze Vorstellung der Modellresultate & Diagnostics im Plenum und Diskussion der Ergebnisse (keine PP-Präsentation nötig)

Musterlösung


vars <- c("slope_scaled", "forest_prop_scaled", "dist_road_only_scaled", "dist_sett_scaled", "topo_pos_scaled", "us_scaled")

# leave-one-out models
modelle <- lapply(vars, function(var) {
  formula <- as.formula(paste("pres_abs ~", paste(setdiff(vars, var), collapse = " + "), "+ (1 | id)"))
  glmer(formula, data = DF_mod_day, family = binomial, na.action = "na.fail")
})


names(modelle) <- paste0("m_", vars)

modelle <- c(list(m_day = m_day), modelle)


bbmle::AICtab(modelle)
##                         dAIC  df
## m_topo_pos_scaled         0.0 7 
## m_dist_sett_scaled        1.3 7 
## m_day                     1.6 8 
## m_slope_scaled           10.6 7 
## m_dist_road_only_scaled  81.9 7 
## m_us_scaled              94.1 7 
## m_forest_prop_scaled    227.0 7