x <- rnorm(1000, mean=50, sd=10)
MASS::fitdistr(x, "normal") mean sd
49.8828661 9.9825633
( 0.3156764) ( 0.2232169)
2. On Regression Parameters in a Frequentist Framework
fitdistr() (“fit distribution”) function from the MASS package. mean sd
49.8828661 9.9825633
( 0.3156764) ( 0.2232169)
params <- MASS::fitdistr(x, "normal")
data.frame(x=x) |>
ggplot(aes(x=x)) +
geom_histogram(aes(y=after_stat(density)), bins=60, fill="grey", color="black") +
geom_line(
data = data.frame(
x=seq(0, 100, length.out = 500),
y=dnorm(seq(0, 100, length.out = 500), mean=params$estimate["mean"], sd=params$estimate["sd"])),
aes(x=x, y=y), color="red", linewidth=2) +
theme_bw() +
labs(x="x", y="Density")
mean sd
0.65923460 0.45948465
(0.01453018) (0.01027439)
params <- MASS::fitdistr(x, "normal")
data.frame(x=x) |>
ggplot(aes(x=x)) +
geom_histogram(aes(y=after_stat(density)), bins=100, fill="grey", color="black") +
geom_line(
data = data.frame(
x=seq(-1.5, 3.5, length.out = 500),
y=dnorm(seq(-1.5, 3.5, length.out = 500), mean=params$estimate["mean"], sd=params$estimate["sd"])),
aes(x=x, y=y), color="red", linewidth=2) +
theme_bw() +
labs(x="x", y="Density")
mean sd
0.500284152 0.360421491
(0.008059270) (0.005698764)
params <- MASS::fitdistr(x, "normal")
data.frame(x=x) |>
ggplot(aes(x=x)) +
geom_histogram(aes(y=after_stat(density)), bins=100, fill="grey", color="black") +
geom_line(
data = data.frame(
x=seq(-0.5, 1.5, length.out = 500),
y=dnorm(seq(-0.5, 1.5, length.out = 500), mean=params$estimate["mean"], sd=params$estimate["sd"])),
aes(x=x, y=y), color="red", linewidth=2) +
theme_bw() +
labs(x="x", y="Density")
y?y and look at its summary.
Call:
lm(formula = y ~ 1, data = df)
Residuals:
Min 1Q Median 3Q Max
-0.06266 -0.05617 -0.03399 0.02038 1.30855
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0626590 0.0003967 158 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.0887 on 49999 degrees of freedom
df <- data.frame(y = rnorm(5000, 10, 5))
ggplot(df, aes(y=y)) +
geom_jitter(aes(x=0), alpha=0.3) +
geom_linerange(data=data.frame(x=0, y=mean(df$y), ymin=mean(df$y)-0.5*sd(df$y), ymax=mean(df$y)+0.5*sd(df$y)),
aes(x=x, y=y, ymin=ymin, ymax=ymax), color="red", linewidth=2) +
geom_point(data=data.frame(x=0, y=mean(df$y)), aes(x=x, y=y), color="red", size=4) +
geom_hline(yintercept = 10, linetype="dashed", color="red", linewidth=1) +
coord_cartesian(xlim = c(-1, 0.4)) +
labs(x="", y="y") +
ggside::geom_ysidedensity(data=data.frame(y = rnorm(50000, 10, 5)), color = "red", linewidth = 2) +
theme_minimal() +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) 
model <- lm(mpg ~ qsec, data=mtcars)
p <- mtcars |>
ggplot(aes(x=qsec, y=mpg)) +
geom_smooth(method="lm", formula = 'y ~ x', se=FALSE, linewidth=2) +
theme_bw()
# Function to add normal distribution curves
add_normals <- function(p, model) {
sigma <- summary(model)$sigma # Standard deviation of residuals
n <- 100 # Number of points for each curve
for(i in 1:nrow(mtcars)) {
x_val <- mtcars$qsec[i]
y_pred <- predict(model, newdata = data.frame(qsec = x_val))
# Create a sequence of y values for the normal curve
y_seq <- seq(y_pred - 3*sigma, y_pred + 3*sigma, length.out = n)
density <- dnorm(y_seq, mean = y_pred, sd = sigma)
# Adjust density to match the scale of the plot
max_width <- 1 # Max width of areas
density_scaled <- (density / max(density)) * max_width
# Create a dataframe for each path
path_df <- data.frame(x = x_val + density_scaled, y = y_seq)
path_dfv <- data.frame(x=path_df$x[1], ymin=min(path_df$y), ymax=max(path_df$y))
# Add the path to the plot
p <- p +
geom_segment(data = path_dfv, aes(x = x, xend=x, y = ymin, yend=ymax),
color = "#FF9800", linewidth = 0.7, alpha=0.8, linetype="dotted") +
geom_path(data = path_df, aes(x = x, y = y),
color = "#FF9800", linewidth = 1, alpha=0.8)
}
p
}
# Create the final plot
p_ml <- add_normals(p, model) +
geom_segment(aes(x = qsec, y = mpg, xend = qsec, yend = predict(lm(mpg ~ qsec, data=mtcars))),
color="red", linetype="solid", linewidth=1) +
geom_point(alpha=0.8, size=6)
p_mlp <- mtcars |>
ggplot(aes(x=qsec, y=mpg)) +
geom_point(alpha=0.7, size=6) +
geom_smooth(method="lm", formula = 'y ~ x', se=FALSE, linewidth=2) +
geom_segment(aes(x = qsec, y = mpg, xend = qsec, yend = predict(lm(mpg ~ qsec, data=mtcars))),
color="red", linetype="dotted", linewidth=1) +
theme_bw() +
labs(x="x", y="y", title="The residuals are the vertical distances between each point and the line.")
p
lm() solves a Gaussian model using OLSglm() solves general linear models using MLE and can be used for simple Gaussian models too
Call:
lm(formula = mpg ~ qsec, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-9.8760 -3.4539 -0.7203 2.2774 11.6491
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.1140 10.0295 -0.510 0.6139
qsec 1.4121 0.5592 2.525 0.0171 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.564 on 30 degrees of freedom
Multiple R-squared: 0.1753, Adjusted R-squared: 0.1478
F-statistic: 6.377 on 1 and 30 DF, p-value: 0.01708
Call:
glm(formula = mpg ~ qsec, family = "gaussian", data = mtcars)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.1140 10.0295 -0.510 0.6139
qsec 1.4121 0.5592 2.525 0.0171 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 30.95518)
Null deviance: 1126.05 on 31 degrees of freedom
Residual deviance: 928.66 on 30 degrees of freedom
AIC: 204.59
Number of Fisher Scoring iterations: 2
y given x with a testPearson's product-moment correlation
Parameter1 | Parameter2 | r | 95% CI | t(30) | p
----------------------------------------------------------------
mtcars$wt | mtcars$qsec | -0.17 | [-0.49, 0.19] | -0.97 | 0.339
Alternative hypothesis: true correlation is not equal to 0
df <- bayestestR::simulate_correlation(n=500, r=0.7)
ggplot(df, aes(x=V1, y=V2)) +
geom_point(alpha=0) +
geom_vline(xintercept = 0, linetype="dotted") +
geom_abline(intercept = 1, slope = 3, color="red", linewidth=2) +
geom_segment(aes(x = 0, y = 1, xend = 1, yend = 1), linewidth=1,
color="green", linetype="dashed") +
geom_segment(aes(x = 1, y = 1, xend = 1, yend = 4), linewidth=1,
color="green", linetype="solid", arrow=arrow(length = unit(0.1, "inches"))) +
geom_segment(aes(x = 0, y = 0, xend = 0, yend = 1), linewidth=1,
color="blue", linetype="solid", arrow=arrow(length = unit(0.1, "inches"))) +
geom_point(aes(x = 0, y = 0), color="purple", size=8, shape="+") +
labs(x="x", y="y") +
theme_bw() +
coord_cartesian(xlim = c(-1, 1.5), ylim = c(-3, 4))mpgqsecmtcars (built-in in R)lm() function (linear model) with the response ~ predictor formula to specify the modelsummary() method to get the results (base R equivalent of easystats’ model_parameters())
Call:
lm(formula = mpg ~ qsec, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-9.8760 -3.4539 -0.7203 2.2774 11.6491
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.1140 10.0295 -0.510 0.6139
qsec 1.4121 0.5592 2.525 0.0171 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.564 on 30 degrees of freedom
Multiple R-squared: 0.1753, Adjusted R-squared: 0.1478
F-statistic: 6.377 on 1 and 30 DF, p-value: 0.01708
Call:
lm(formula = mpg ~ qsec10, data = df)
Residuals:
Min 1Q Median 3Q Max
-9.8760 -3.4539 -0.7203 2.2774 11.6491
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.114 10.030 -0.510 0.6139
qsec10 14.121 5.592 2.525 0.0171 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.564 on 30 degrees of freedom
Multiple R-squared: 0.1753, Adjusted R-squared: 0.1478
F-statistic: 6.377 on 1 and 30 DF, p-value: 0.01708
Call:
lm(formula = mpg ~ qsec, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-9.8760 -3.4539 -0.7203 2.2774 11.6491
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.1140 10.0295 -0.510 0.6139
qsec 1.4121 0.5592 2.525 0.0171 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.564 on 30 degrees of freedom
Multiple R-squared: 0.1753, Adjusted R-squared: 0.1478
F-statistic: 6.377 on 1 and 30 DF, p-value: 0.01708
# Plot t-distribution
x <- seq(-5, 5, length.out = 1000)
y <- dt(x, df=30)
df <- data.frame(x = x, y = y)
t_value <- insight::get_statistic(model)[2, 2]
df |>
ggplot(aes(x = x, y = y)) +
geom_area(color="black", fill="grey") +
geom_segment(aes(x = t_value, y = 0, xend = t_value, yend = dt(t_value, df=30)),
color="red", linetype="solid", linewidth=1) +
geom_point(aes(x = t_value, y = dt(t_value, df=30)), color="red", size=3) +
theme_minimal() +
labs(x = "\nt-values - standardized coefficient under the null hypothesis", y = "Probability",
title = paste0("t-Distribution (df=30); ", "t-value = ", round(t_value, 2), "\n"))df$Probability <- ifelse(df$x < -t_value, "< -t", "Smaller")
df$Probability <- ifelse(df$x > t_value, "> +t", df$Probability)
df |>
ggplot(aes(x = x, y = y)) +
geom_line() +
geom_area(aes(x = x, y = y, fill = Probability), alpha = 0.5) +
geom_segment(aes(x = t_value, y = 0, xend = t_value, yend = dt(t_value, df=30)),
color="red", linetype="solid", linewidth=1) +
geom_point(aes(x = t_value, y = dt(t_value, df=30)), color="red", size=3) +
theme_minimal() +
scale_fill_manual(values = c("red", "red", "grey")) +
labs(x = "\nt-values - standardized coefficient under the null hypothesis", y = "Probability",
title = paste0("t-Distribution (df=30); ", "t-value = ", round(t_value, 2), "\n"))
Call:
lm(formula = mpg ~ qsec, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-9.8760 -3.4539 -0.7203 2.2774 11.6491
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.1140 10.0295 -0.510 0.6139
qsec 1.4121 0.5592 2.525 0.0171 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.564 on 30 degrees of freedom
Multiple R-squared: 0.1753, Adjusted R-squared: 0.1478
F-statistic: 6.377 on 1 and 30 DF, p-value: 0.01708
# Get 95% ETI of the t distribution
ci <- qt(c(0.025, 0.975), df=30, lower.tail = TRUE) + t_value
# ci * parameters::parameters(model)$SE[2] # Indeed shows the right CI
df |>
ggplot(aes(x = x, y = y)) +
geom_area(fill="grey", alpha=0.5) +
geom_area(aes(x = x + t_value, y = y), color="red", fill="red", alpha=0.5) +
geom_segment(aes(x = t_value, y = 0, xend = t_value, yend = dt(t_value, df=30)),
color="red", linetype="solid", linewidth=1) +
geom_point(aes(x = t_value, y = dt(t_value, df=30)), color="red", size=3) +
# Horizontal segment indicating the CI range
geom_segment(aes(x = ci[1], y = 0.05, xend = ci[2], yend = 0.05),
color="orange", linetype="solid", linewidth=1, arrow = arrow(ends='both')) +
geom_vline(xintercept = ci[1], color="orange", linetype="dotted", linewidth=0.5) +
geom_vline(xintercept = ci[2], color="orange", linetype="dotted", linewidth=0.5) +
theme_minimal() +
labs(x = "\nt-values - standardized coefficient under the null hypothesis", y = "Probability",
title = paste0("Shifted t-Distribution (df=30); ", "location = ", round(t_value, 2), "(t-value)\n"))Thank you!
![]()