# Go to help page: http://docs.ggplot2.org/current/ -> Search for icon of fit-line# http://docs.ggplot2.org/current/geom_smooth.html
# build a scatterplot for a first inspection, with regression lineggplot(people, aes(x = age, y = height)) +geom_point() +scale_y_continuous(limits =c(0, 2.0)) +geom_smooth()
# stem and leaf plotstem(people$height)## ## The decimal point is 1 digit(s) to the left of the |## ## 8 | 25593## 10 | 037## 12 | 523## 14 | 19556## 16 | 255789916## 18 | 04774stem(people$height, scale =2)## ## The decimal point is 1 digit(s) to the left of the |## ## 8 | 2559## 9 | 3## 10 | ## 11 | 037## 12 | 5## 13 | 23## 14 | 19## 15 | 556## 16 | 2557899## 17 | 16## 18 | 0477## 19 | 4
# explore the two variables with box-whiskerplotssummary(people$age)## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 5.00 8.70 30.20 59.14 65.15 512.30boxplot(people$age)
summary(people$height)## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.820 1.190 1.555 1.455 1.690 1.940boxplot(people$height)
# explore data with a histgramggplot(people, aes(x = age)) +geom_histogram(binwidth =20)
density(x = people$height)## ## Call:## density.default(x = people$height)## ## Data: people$height (30 obs.); Bandwidth 'bw' = 0.1576## ## x y ## Min. :0.3472 Min. :0.001577 ## 1st Qu.:0.8636 1st Qu.:0.102659 ## Median :1.3800 Median :0.510260 ## Mean :1.3800 Mean :0.483081 ## 3rd Qu.:1.8964 3rd Qu.:0.721733 ## Max. :2.4128 Max. :1.215724# re-expression: use log or sqrt axes## Find here guideline about scaling axes# http://www.cookbook-r.com/Graphs/Axes_(ggplot2)/# http://docs.ggplot2.org/0.9.3.1/scale_continuous.html
# logarithmic axis: respond to skewness in the data, e.g. log10ggplot(people, aes(x = age, y = height)) +geom_point() +scale_y_continuous(limits =c(0, 2.0)) +geom_smooth() +scale_x_log10()
# outliers: Remove very small and very old peoplepeopleClean <- people |>filter(ID !=27) |># Diese Person war zu klein.filter(age <100) # Fehler in der Erhebung des Alters