Skip to contents

Simulate Data

df <- data.frame()
for(x in seq(0.1, 0.9, by = 0.1)) {
  score <- rchoco(n = 100, p = 0.4 + x / 2, confright = 0.4 + x / 3, 
                  confleft = 1-x, pex = 0.03, bex = 0.6, pmid = 0)
  df <- rbind(df, data.frame(x = x, score = score))
}

df |>
  ggplot(aes(x = score, y = after_stat(density))) +
  geom_histogram(bins = 100, fill = "#2196F3") +
  labs(title = "Rating Distribution", x = "Score", y = "Density") +
  theme_minimal()

Models

ZOIB Model

The Zero-One Inflated Beta (ZOIB) model assumes that the data can be modeled as a mixture of two logistic regression processes for the boundary values (0 and 1) and a beta regression process for the continuous proportions in-between.

f <- bf(
  score ~ x,
  phi ~ x,
  zoi ~ x,
  coi ~ x, 
  family = zero_one_inflated_beta()
)

m_zoib <- brm(f,
  data = df, family = zero_one_inflated_beta(), init = 0,
  chains = 4, iter = 500, backend = "cmdstanr"
)

m_zoib <- brms::add_criterion(m_zoib, "loo")  # For later model comparison

saveRDS(m_zoib, file = "models/m_zoib.rds")

XBX Model

Kosmidis & Zeileis (2024) introduce a generalization of the classic beta regression model with extended support [0, 1]. Specifically, the extended-support beta distribution (xbeta) leverages an underlying symmetric four-parameter beta distribution with exceedence parameter nu to obtain support [-nu, 1 + nu] that is subsequently censored to [0, 1] in order to obtain point masses at the boundary values 0 and 1.

f <- bf(
  score ~ x,
  phi ~ x,
  kappa ~ x, 
  family = xbeta()
)

m_xbx <- brm(f,
  data = df, family = xbeta(), init = 0,
  chains = 4, iter = 500, backend = "cmdstanr"
)

m_xbx <- brms::add_criterion(m_xbx, "loo")  # For later model comparison

saveRDS(m_xbx, file = "models/m_xbx.rds")

Beta-Gate Model

The Beta-Gate model corresponds to a reparametrized Ordered Beta model (Kubinec, 2023). In this model, observed 0s and 1s represent instances where the underlying continuous response tendency fell beyond lower or upper boundary points (‘gates’).

f <- bf(
  score ~ x,
  phi ~ x,
  pex ~ x,
  bex ~ x, 
  family = betagate()
)

m_betagate <- brm(f,
  data = df, family = betagate(), stanvars = betagate_stanvars(), init = 0,
  chains = 4, iter = 500, backend = "cmdstanr"
)

m_betagate <- brms::add_criterion(m_betagate, "loo")  # For later model comparison

saveRDS(m_betagate, file = "models/m_betagate.rds")

CHOCO Model

See the documentation of the Choice-Confidence (CHOCO).

f <- bf(
  score ~ x,
  confright ~ x,
  confleft ~ x,
  precright ~ x,
  precleft ~ x,
  pex ~ x,
  bex ~ x,
  pmid = 0, 
  family = choco()
)

m_choco <- brm(f,
  data = df, family = choco(), stanvars = choco_stanvars(), init = 0,
  chains = 4, iter = 500, backend = "cmdstanr"
)

m_choco <- brms::add_criterion(m_choco, "loo")  # For later model comparison

saveRDS(m_choco, file = "models/m_choco.rds")

Model Comparison

#> Loading required namespace: rstan

Model Fit

We can compare these models together using the loo package, which shows that CHOCO provides a significantly better fit than the other models.

Code
# loo::loo_compare(m_zoib, m_xbx, m_betagate, m_choco) |> 
#   parameters(include_ENP = TRUE)

Sampling Duration

rbind(
  data_modify(attributes(m_zoib$fit)$metadata$time$chain, Model="ZOIB"),
  data_modify(attributes(m_xbx$fit)$metadata$time$chain, Model="XBX"),
  data_modify(attributes(m_betagate$fit)$metadata$time$chain, Model="Beta-Gate"),
  data_modify(attributes(m_choco$fit)$metadata$time$chain, Model="CHOCO")
) |> 
  ggplot(aes(x = Model, y = total, fill = Model)) +
  geom_boxplot() +
  labs(y = "Sampling Duration (s)") +
  scale_fill_material_d(guide = "none") +
  scale_y_log10() +
  theme_minimal() 

Posterior Predictive Check

Running posterior predictive checks allows to visualize the predicted distributions from various models. We can see how typical Beta-related models fail to capture the bimodal nature of the data, which is well captured by the CHOCO model.

Note: iterations controls the actual number of iterations used (e.g., for the point-estimate) and keep_iterations the number included.

Code
pred <- rbind(
  estimate_prediction(m_zoib, keep_iterations = 50, iterations = 50) |>
    reshape_iterations() |>
    data_modify(Model = "ZOIB"),
  estimate_prediction(m_xbx, keep_iterations = 50, iterations = 50) |>
    reshape_iterations() |>
    data_modify(Model = "XBX"),
  estimate_prediction(m_betagate, keep_iterations = 50, iterations = 50) |>
    reshape_iterations() |>
    data_modify(Model = "Beta-Gate"),
  estimate_prediction(m_choco, keep_iterations = 50, iterations = 50) |>
    reshape_iterations() |>
    data_modify(Model = "CHOCO")
)

df |>
  ggplot(aes(x = score, y = after_stat(density))) +
  geom_histogram(bins = 100, fill = "#2196F3") +
  labs(title = "Rating Distribution", x = "Score", y = "Density") +
  theme_minimal() + 
  geom_histogram(
    data = pred, aes(x = iter_value, group = as.factor(iter_group)),
    bins = 100, alpha = 0.02, position = "identity", fill = "#FF5722"
  ) +
  facet_wrap(~Model)

Effect Visualisation

We can see how the predicted distribution changes as a function of x and gets “pushed” to the right. Moreover, we can also visualize the effect of x on specific parameters, showing that it mostly affects the parameter conf (the mean confidence - i.e., central tendency - on the right side), confleft (the relative confidence of the left side), and mu, which corresponds to the p probability of answering on the right. This is consistent with our expectations, and reflects the larger and more concentrated mass on the right of the scale for higher value of x (in brown).

Code
p1 <- estimate_prediction(m_choco, data = "grid", length = 4, keep_iterations = 500, iterations = 500) |> 
  reshape_iterations() |> 
  ggplot(aes(x = iter_value, fill = as.factor(x))) +
  geom_histogram(alpha = 0.6, bins = 100, position = "identity") +
  scale_fill_bluebrown_d() + 
  labs(x = "Rating") +
  theme_minimal()
p1

Code

# # Predict various parameters
# pred_params <- data.frame()
# for(param in c("mu", "confright", "confleft", "precright", "precleft", "pex")) {
#   pred_params <- m_choco |>
#     estimate_prediction(data = "grid", length = 20, predict = param) |>
#     as.data.frame() |>
#     data_modify(Parameter = param) |> 
#     rbind(pred_params)
# }
# 
# p2 <- pred_params |>
#   ggplot(aes(x = x, y = Predicted)) +
#   geom_ribbon(aes(ymin = CI_low, ymax = CI_high, fill = Parameter), alpha = 0.2) +
#   geom_line(aes(color = Parameter), linewidth = 1) +
#   facet_wrap(~Parameter, scales = "free_y", ncol=3) +
#   scale_fill_viridis_d() +
#   scale_color_viridis_d() +
#   theme_minimal()
# p2