# Randomly sample 500 values from a normal distribution
myvar <- rnorm(500, mean = 0, sd = 1)1. Distributions
A fundamental concept in Bayesian statistics
Why do statisticians hate normal distributions? Because they are ‘mean’!

Note
A Normal distribution with \(\mu = 0\) and \(\sigma = 1\) is also called a z-Distribution (aka a Standard Normal Distribution)
r*() functions are used to draw random samples from distributionsrnorm() = random + normalx <- rnorm(100000, mean = 0, sd = 1)
x_strictly_positive <- x[x >= 0]
ggplot(data.frame(x = x), aes(x = x, y = after_stat(count))) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_density(linewidth = 2) +
geom_density(data = data.frame(x = x_strictly_positive),
color = "red", linewidth = 1) +
theme_minimal()
estimate_density() function returns a data frame with the estimated densityx (possible values of the variable) and y (its associated probability)d*() functions
dnorm() = density + normalx), and the parameters of the distribution (e.g., mean, sd)df_norm <- data.frame()
for (u in c(0, 2.5, 5)) {
for (sd in c(1, 2, 3)) {
dat <- data.frame(x = seq(-10, 10, length.out = 200))
dat$p <- dnorm(dat$x, mean = u, sd = sd)
dat$distribution <- "Normal"
dat$parameters <- paste0("mean = ", u, ", SD = ", sd)
df_norm <- rbind(dat, df_norm)
}
}
p <- df_norm |>
ggplot(aes(x = x, y = p, group = parameters)) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_line(aes(color = distribution), linewidth = 2, color = "#2196F3") +
theme_minimal() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
plot.tag = element_text(size = 20, face = "bold", hjust = 0.5),
strip.text = element_text(size = 15, face = "bold")
)
p + facet_wrap(~parameters)rnorm() and dnorm()?
Why did the binomial distribution break up with its partner? Because it found their relationship too binary
size (number of trials) and prob (probability of “success”)2library(gganimate)
df_binom <- data.frame()
for (p in c(1 / 3, 1 / 2, 2 / 3)) {
dat <- data.frame()
for (i in 1:1000) {
rez <- rbinom(1, size = 1, prob = p)
dat2 <- data.frame(Zero = 0, One = 0, i = i)
dat2$Zero[rez == 0] <- 1
dat2$One[rez == 1] <- 1
dat2$distribution <- "Binomial"
dat2$parameters <- paste0("p = ", insight::format_value(p))
dat <- rbind(dat, dat2)
}
dat$Zeros <- cumsum(dat$Zero)
dat$Ones <- cumsum(dat$One)
df_binom <- rbind(df_binom, dat)
}
df_binom <- pivot_longer(
df_binom,
c("Zeros", "Ones"),
names_to = "Outcome",
values_to = "Count"
) |>
mutate(Outcome = factor(Outcome, levels = c("Zeros", "Ones")))
anim <- df_binom |>
ggplot(aes(x = Outcome, y = Count, group = parameters)) +
geom_bar(aes(fill = parameters), stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = 'N events: {frame_time}', tag = "Binomial Distribution") +
transition_time(i)
anim_save(
"img/binomial.gif",
animate(anim, height = 500, width = 1000, fps = 10)
)

Tip
for(i in 1:20) {...}data.frame() function can be used to initialize an empty data framerbind() function can be used to concatenate data frames verticallydata <- data.frame() # Initialize empty data frame
for (i in 1:20) {
walk_data <- data.frame(
trajectory = random_walk(10),
time = seq(0, 10),
iteration = i
)
data <- rbind(data, walk_data)
}
data trajectory time iteration
1 0 0 1
2 -1 1 1
3 -2 2 1
4 -1 3 1
5 0 4 1
6 1 5 1
7 0 6 1
8 1 7 1
9 0 8 1
10 -1 9 1
11 0 10 1
12 0 0 2
13 -1 1 2
14 0 2 2
15 1 3 2
16 2 4 2
17 3 5 2
18 2 6 2
19 3 7 2
20 4 8 2
21 5 9 2
22 6 10 2
23 0 0 3
24 1 1 3
25 0 2 3
26 -1 3 3
27 0 4 3
28 -1 5 3
29 0 6 3
30 1 7 3
31 0 8 3
32 -1 9 3
33 0 10 3
34 0 0 4
35 1 1 4
36 2 2 4
37 1 3 4
38 0 4 4
39 1 5 4
40 2 6 4
41 3 7 4
42 4 8 4
43 5 9 4
44 6 10 4
45 0 0 5
46 -1 1 5
47 0 2 5
48 1 3 5
49 2 4 5
50 3 5 5
51 2 6 5
52 1 7 5
53 0 8 5
54 1 9 5
55 2 10 5
56 0 0 6
57 -1 1 6
58 -2 2 6
59 -1 3 6
60 0 4 6
61 1 5 6
62 0 6 6
63 -1 7 6
64 -2 8 6
65 -1 9 6
66 -2 10 6
67 0 0 7
68 1 1 7
69 0 2 7
70 1 3 7
71 0 4 7
72 -1 5 7
73 0 6 7
74 1 7 7
75 0 8 7
76 -1 9 7
77 -2 10 7
78 0 0 8
79 1 1 8
80 2 2 8
81 1 3 8
82 2 4 8
83 3 5 8
84 4 6 8
85 3 7 8
86 2 8 8
87 1 9 8
88 0 10 8
89 0 0 9
90 1 1 9
91 0 2 9
92 -1 3 9
93 -2 4 9
94 -3 5 9
95 -4 6 9
96 -3 7 9
97 -2 8 9
98 -1 9 9
99 0 10 9
100 0 0 10
101 -1 1 10
102 0 2 10
103 1 3 10
104 2 4 10
105 3 5 10
106 2 6 10
107 1 7 10
108 2 8 10
109 1 9 10
110 0 10 10
111 0 0 11
112 1 1 11
113 2 2 11
114 1 3 11
115 0 4 11
116 -1 5 11
117 0 6 11
118 -1 7 11
119 0 8 11
120 -1 9 11
121 0 10 11
122 0 0 12
123 1 1 12
124 2 2 12
125 1 3 12
126 2 4 12
127 1 5 12
128 0 6 12
129 1 7 12
130 0 8 12
131 1 9 12
132 2 10 12
133 0 0 13
134 1 1 13
135 0 2 13
136 -1 3 13
137 0 4 13
138 -1 5 13
139 -2 6 13
140 -3 7 13
141 -4 8 13
142 -3 9 13
143 -2 10 13
144 0 0 14
145 1 1 14
146 2 2 14
147 3 3 14
148 2 4 14
149 1 5 14
150 0 6 14
151 1 7 14
152 0 8 14
153 -1 9 14
154 0 10 14
155 0 0 15
156 -1 1 15
157 -2 2 15
158 -1 3 15
159 -2 4 15
160 -3 5 15
161 -2 6 15
162 -3 7 15
163 -4 8 15
164 -3 9 15
165 -2 10 15
166 0 0 16
167 1 1 16
168 2 2 16
169 1 3 16
170 2 4 16
171 1 5 16
172 0 6 16
173 -1 7 16
174 -2 8 16
175 -3 9 16
176 -2 10 16
177 0 0 17
178 -1 1 17
179 0 2 17
180 -1 3 17
181 -2 4 17
182 -1 5 17
183 -2 6 17
184 -1 7 17
185 0 8 17
186 1 9 17
187 0 10 17
188 0 0 18
189 1 1 18
190 0 2 18
191 -1 3 18
192 0 4 18
193 1 5 18
194 0 6 18
195 1 7 18
196 2 8 18
197 3 9 18
198 2 10 18
199 0 0 19
200 -1 1 19
201 0 2 19
202 -1 3 19
203 -2 4 19
204 -3 5 19
205 -4 6 19
206 -3 7 19
207 -4 8 19
208 -5 9 19
209 -6 10 19
210 0 0 20
211 1 1 20
212 2 2 20
213 1 3 20
214 2 4 20
215 3 5 20
216 4 6 20
217 5 7 20
218 6 8 20
219 5 9 20
220 6 10 20
color aesthetic needs to be converted to a factor to be interpreted as a discrete variabledata <- data.frame() # Initialize empty data frame
for (i in 1:20) {
walk_data <- data.frame(
trajectory = random_walk(500),
time = seq(0, 500),
iteration = i
)
data <- rbind(data, walk_data)
}
data |>
mutate(iteration = as.factor(iteration)) |>
ggplot(aes(x = time, y = trajectory, color = iteration)) +
geom_line(linewidth = 0.5) +
labs(y = "Position", x = "Time") +
scale_color_viridis_d(option = "inferno", guide = "none") +
see::theme_blackboard() +
coord_flip()library(gganimate)
set.seed(1)
data <- data.frame()
all_iter <- data.frame()
distr <- data.frame()
for (i in 1:100) {
walk_data <- data.frame(
trajectory = random_walk(99),
time = seq(0, 99),
group = i,
iteration = NA
)
data <- rbind(data, walk_data)
data$iteration <- i
max_t <- max(walk_data$time)
distr_data <- filter(data, time == max_t)$trajectory |>
bayestestR::estimate_density(precision = 256) |>
mutate(
y = datawizard::rescale(y, to = c(max_t, max_t + max_t / 3)),
iteration = i
)
distr <- rbind(distr, distr_data)
all_iter <- rbind(all_iter, data)
}
df <- all_iter |>
mutate(iteration = as.factor(iteration), group = as.factor(group))
p <- df |>
# filter(iteration == 100) |>
ggplot() +
geom_line(
aes(x = time, y = trajectory, color = group),
alpha = 0.7,
linewidth = 1.3
) +
geom_ribbon(
data = distr,
aes(xmin = max(all_iter$time), xmax = y, y = x),
fill = "#FF9800"
) +
geom_point(
data = filter(df, time == max(time)),
aes(x = time, y = trajectory, color = group),
size = 5,
alpha = 0.2
) +
labs(y = "Position", x = "Time", title = "N random walks: {frame}") +
scale_color_viridis_d(option = "turbo", guide = "none") +
see::theme_blackboard() +
theme(plot.title = element_text(hjust = 0.5, size = 15, face = "bold")) +
coord_flip()
# p
anim <- p + transition_manual(iteration)
gganimate::anim_save(
"img/random.gif",
animate(
anim,
height = 1000,
width = 1000,
fps = 7,
end_pause = 7 * 4,
duration = 20
)
)
🤯🤯🤯🤯🤯🤯🤯🤯
A NORMAL DISTRIBUTION
🤯🤯🤯🤯🤯🤯🤯🤯

Why did the uniform distribution get hired as a referee? Because it always calls it fair and square, giving every player an equal chance!
runif()Why do beta distributions love to go to the gym? So that they are not out of shape!
shape1 & shape2)
shape1) and \(\beta\) (shape2) parameters control the shape of the distributiondf_beta <- data.frame()
for (shape1 in c(1, 2, 3, 6)) {
for (shape2 in c(1, 2, 3, 6)) {
dat <- data.frame(x = seq(-0.25, 1.25, 0.0001))
dat$p <- dbeta(dat$x, shape1 = shape1, shape2 = shape2)
dat$distribution <- "Beta"
dat$parameters <- paste0("alpha = ", shape1, ", beta = ", shape2)
df_beta <- rbind(dat, df_beta)
}
}
p <- df_beta |>
ggplot(aes(x = x, y = p, group = parameters)) +
geom_vline(xintercept = c(0, 1), linetype = "dashed") +
geom_line(aes(color = distribution), linewidth = 2, color = "#4CAF50") +
theme_minimal() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
plot.tag = element_text(size = 20, face = "bold", hjust = 0.5),
strip.text = element_text(size = 15, face = "bold")
) +
facet_wrap(~parameters, scales = "free_y")
pGamma distributions receive many compliments about their long tail. That’s why there are always positive!
shape) and \(\theta\) (scale) parameters control the shape of the distributiondf_gamma <- data.frame()
for (shape in c(0.5, 1, 2, 4)) {
for (scale in c(0.1, 0.5, 1, 2)) {
dat <- data.frame(x = seq(0.001, 25, length.out = 1000))
dat$p <- dgamma(dat$x, shape = shape, scale = scale)
dat$distribution <- "Beta"
dat$parameters <- paste0("k = ", shape, ", theta = ", scale)
df_gamma <- rbind(dat, df_gamma)
}
}
p <- df_gamma |>
ggplot(aes(x = x, y = p, group = parameters)) +
# geom_vline(xintercept=c(0, 1), linetype="dashed") +
geom_line(aes(color = distribution), linewidth = 2, color = "#9C27B0") +
theme_minimal() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
plot.tag = element_text(size = 20, face = "bold", hjust = 0.5),
strip.text = element_text(size = 15, face = "bold")
) +
facet_wrap(~parameters, scales = "free_y")
pThey all look like…
🤯🤯🤯🤯🤯🤯🤯🤯
A NORMAL DISTRIBUTION
🤯🤯🤯🤯🤯🤯🤯🤯
Why don’t statisticians play hide and seek with Cauchy distributions? Because they never know where they’re going to show up and how far away they could be!
How do you call the PhD diploma of a Student’s t-distribution? A degree of freedom!
df <- data.frame(x = seq(-5, 5, length.out = 1000))
df$Normal <- dnorm(df$x, mean = 0, sd = 1)
df$Cauchy <- dcauchy(df$x, location = 0, scale = 1)
df$Student_1.5 <- dt(df$x, df = 1.5)
df$Student_2 <- dt(df$x, df = 2)
df$Student_3 <- dt(df$x, df = 3)
df$Student_5 <- dt(df$x, df = 5)
df$Student_10 <- dt(df$x, df = 10)
df$Student_20 <- dt(df$x, df = 20)
df |>
pivot_longer(
starts_with("Student"),
names_to = "Distribution",
values_to = "PDF"
) |>
separate(Distribution, into = c("Distribution", "df"), sep = "_") |>
mutate(label = paste0("Student (df=", df, ")")) |>
ggplot(aes(x = x)) +
geom_line(
data = df,
aes(y = Normal, color = "Normal (0, 1)"),
linewidth = 2
) +
geom_line(
data = df,
aes(y = Cauchy, color = "Cauchy (0, 1)"),
linewidth = 2
) +
geom_line(aes(y = PDF, color = label)) +
scale_color_manual(
values = c(
`Cauchy (0, 1)` = "#F44336",
`Student (df=1.5)` = "#FF5722",
`Student (df=2)` = "#FF9800",
`Student (df=3)` = "#FFC107",
`Student (df=5)` = "#FFEB3B",
`Student (df=10)` = "#CDDC39",
`Student (df=20)` = "#8BC34A",
`Normal (0, 1)` = "#2196F3"
),
breaks = c(
"Normal (0, 1)",
"Student (df=20)",
"Student (df=10)",
"Student (df=5)",
"Student (df=3)",
"Student (df=2)",
"Student (df=1.5)",
"Cauchy (0, 1)"
)
) +
see::theme_modern() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
strip.text = element_text(size = 15, face = "bold")
) +
labs(title = "Normal, Cauchy, and Student's t-Distribution", color = NULL)
var1 alone (i.e;, its marginal distribution)?var2?
var1 and var2?
var1 and var2?

Thank you!
![]()