Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
library(ggdist)
library(mirt)
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
library(ggdist)
library(mirt)
<- read.csv("../data/data.csv") |>
df mutate(
PHQ4_Condition = fct_relevel(PHQ4_Condition, "PHQ4 - Original"),
PHQ4_Condition = fct_recode(PHQ4_Condition, "Original" = "PHQ4 - Original", "Revised" = "PHQ4 - Revised"),
# BDI2_Category = ifelse(BDI2_Total <= 12, "Subclinical", ifelse(BDI2_Total <= 18, "Mild", "Moderate+")),
BDI2_Category = ifelse(BDI2_Total <= 18, "Minimal - Mild", "Moderate - Severe"),
STAI5_Category = ifelse(STAI5_General < 2, "Minimal - Mild", "Moderate - Severe"),
PHQ4_Depression_Fac = as.factor(PHQ4_Depression)
)
<- c("Original" = "#607D8B", "Revised" = "#FF9800") colors
<- lm(BDI2_Total ~ PHQ4_Depression * PHQ4_Condition, data = df)
model
::parameters(model) |>
parametersdisplay()
Parameter | Coefficient | SE | 95% CI | t(832) | p |
---|---|---|---|---|---|
(Intercept) | 8.23 | 0.59 | (7.08, 9.39) | 13.96 | < .001 |
PHQ4 Depression | 5.34 | 0.23 | (4.90, 5.79) | 23.51 | < .001 |
PHQ4 Condition (Revised) | -0.13 | 0.82 | (-1.73, 1.47) | -0.16 | 0.871 |
PHQ4 Depression × PHQ4 Condition (Revised) | -0.05 | 0.33 | (-0.70, 0.60) | -0.15 | 0.883 |
<- function(resp="BDI2_Total", pred="PHQ4_Depression", levels=c(0, 0.5)) {
get_bf if(length(levels) == 2) {
<- BayesFactor::ttestBF(
bf == levels[1] & df$PHQ4_Condition=="Revised", resp],
df[df[[pred]] == levels[2] & df$PHQ4_Condition=="Revised", resp])
df[df[[pred]] else {
} <- BayesFactor::ttestBF(
bf ==levels & df$PHQ4_Condition=="Original", resp],
df[df[[pred]]==levels & df$PHQ4_Condition=="Revised", resp])
df[df[[pred]]
}::format_bf(exp(bf@bayesFactor$bf), stars=TRUE)
insight
}
<- function(resp="BDI2_Total", pred="PHQ4_Depression", y1, y2) {
get_databf
# Between scores comparisons
<- data.frame(
bfs BF = c(get_bf(resp, pred, c(0, 0.5)),
get_bf(resp, pred, c(0.5, 1)),
get_bf(resp, pred, c(1, 1.5)),
get_bf(resp, pred, c(1.5, 2)),
get_bf(resp, pred, c(2, 2.5)),
get_bf(resp, pred, c(2.5, 3)))
)<- c(0.25, 0.75, 1.25, 1.75, 2.25, 2.75)
bfs[[pred]] <- y1
bfs[[resp]] $color <- "Revised"
bfs
# Within scores comparisons
<- data.frame(
bfs2 BF = c(get_bf(resp, pred, 0),
get_bf(resp, pred, 1),
get_bf(resp, pred, 2),
get_bf(resp, pred, 3),
get_bf(resp, pred, 4))
)<- c(0, 1, 2, 3, 4)
bfs2[[pred]] <- y2
bfs2[[resp]] $color <- "Original"
bfs2
<- rbind(bfs, bfs2)
bf
# Lines behing labels
<- data.frame(
lines xmin = c(0, 0.5, 1, 1.5, 2, 2.5),
xmax = c(0.5, 1, 1.5, 2, 2.5, 3),
y = y1
)
list(bf=bf, lines=lines)
}
<- get_databf(resp="BDI2_Total", pred="PHQ4_Depression", y1=c(28, 32, 36, 40, 44, 48), y2=c(10, 15, 20, 25, 30))
bfs
<- df |>
p_dep ggplot(aes(x = PHQ4_Depression, y = BDI2_Total), width=0.1) +
::stat_slab(
ggdistdata = df[df$PHQ4_Condition == "Original" & df$PHQ4_Depression <= 4, ],
fill = colors["Original"],
alpha=0.3,
scale = 0.3,
position = position_dodge(width = 1),
side = "left") +
::stat_slab(
ggdistdata = df[df$PHQ4_Condition == "Revised" & df$PHQ4_Depression %in% c(0, 1, 2, 3, 4), ],
fill = colors["Revised"],
alpha=0.3,
scale = 0.3,
position = position_dodge(width = 1),
side = "right") +
::stat_slab(
ggdistdata = df[df$PHQ4_Depression %in% c(0.5, 1.5, 2.5), ],
fill = colors["Revised"],
alpha=0.3,
scale = 0.3,
position = position_dodge(width = 1),
side = "both") +
geom_jitter2(aes(color = PHQ4_Condition), width=0.1, alpha=0.5, size=3) +
geom_smooth(aes(color = PHQ4_Condition, fill=PHQ4_Condition),
method = "lm", formula="y~x", alpha=0.2, key_glyph = draw_key_rect) +
geom_segment(data=bfs$lines, aes(x=xmin, xend=xmax, y=y, yend=y), color=colors["Revised"], linewidth=0.5) +
geom_label(data = bfs$bf, aes(x = PHQ4_Depression, y = BDI2_Total, label = BF, color=color), size=3) +
scale_fill_manual(values = colors) +
scale_color_manual(values = colors) +
theme_bw() +
# ggside::geom_xsidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
::scale_xsidey_continuous(expand = c(0, 0)) +
ggside# ggside::geom_ysidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
::scale_ysidex_continuous(expand = c(0, 0)) +
ggside::theme_ggside_void() +
ggsidecoord_cartesian(xlim=c(-0.15, 6.1)) +
guides(color="none", fill=guide_legend(override.aes = list(alpha=1))) +
labs(
x = "PHQ-4 (Depression Score)",
y = "BDI-II Total",
color = "PHQ4 Version",
fill = "PHQ4 Questionnaire Version",
title="Depression",
+
) theme(legend.position = "top",
plot.title = element_text(face="bold"))
p_dep
Warning: `is.ggproto()` was deprecated in ggplot2 3.5.2.
ℹ Please use `is_ggproto()` instead.
<- glm(Depression ~ PHQ4_Depression * PHQ4_Condition, data = df, family="binomial")
m
display(parameters::parameters(m))
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -3.84 | 0.40 | (-4.70, -3.11) | -9.52 | < .001 |
PHQ4 Depression | 0.50 | 0.11 | (0.29, 0.71) | 4.64 | < .001 |
PHQ4 Condition (Revised) | -0.70 | 0.62 | (-1.95, 0.51) | -1.12 | 0.262 |
PHQ4 Depression × PHQ4 Condition (Revised) | 0.12 | 0.16 | (-0.20, 0.44) | 0.75 | 0.454 |
<- m |>
p_mdd estimate_relation(length=20) |>
ggplot(aes(x=PHQ4_Depression, y=Predicted)) +
geom_ribbon(aes(ymin=CI_low, ymax=CI_high, fill=PHQ4_Condition), alpha=0.2) +
geom_line(aes(color=PHQ4_Condition), linewidth=1) +
scale_fill_manual(values = colors) +
scale_color_manual(values = colors) +
scale_y_continuous(labels=scales::percent) +
labs(y = "Probability of Depression",
x= "PHQ-4 (Depression Score)",
title = "Depression Disorder",
fill = "Questionnaire Version",
color = "Questionnaire Version") +
theme_minimal() +
theme(plot.title = element_text(face="bold"),
legend.position = "none")
<- function(m) {
make_roc <- insight::find_response(m)
resp <- insight::find_predictors(m)
pred <- as.formula(paste0(resp, " ~ ", pred$conditional[1]))
f <- glm(f, data = df[df$PHQ4_Condition == "Original", ], family="binomial")
m1 <- glm(f, data = df[df$PHQ4_Condition == "Revised", ], family="binomial")
m2
<- performance_roc(m1)
roc1 <- performance_roc(m2)
roc2 <- data.frame(AUC = c(
rocs ::area_under_curve(roc1$Specificity, roc1$Sensitivity),
bayestestR::area_under_curve(roc2$Specificity, roc2$Sensitivity)),
bayestestRPHQ4_Condition = c("Original", "Revised"))
$AUC <- paste0("AUC = ", insight::format_percent(rocs$AUC))
rocs$x <- c(0.25, 0.25)
rocs$y <- c(0.35, 0.85)
rocs
rbind(
mutate(as.data.frame(roc1), PHQ4_Condition = "Original"),
mutate(as.data.frame(roc2), PHQ4_Condition = "Revised")
|>
) ggplot(aes(x=Specificity)) +
geom_abline(intercept=0, slope=1, color="gray") +
geom_line(aes(y=Sensitivity, color=PHQ4_Condition), linewidth=2, alpha=0.9) +
geom_label(data=rocs, aes(x=x, y=y, label=AUC), color=colors) +
labs(x= "1 - Specificity (False Positive Rate)", y="Sensitivity (True Positive Rate)",
color="Questionnaire Version") +
scale_color_manual(values=colors) +
scale_x_continuous(labels=scales::percent) +
scale_y_continuous(labels=scales::percent) +
theme_minimal()
}
<- make_roc(m) p_mddroc
/ p_mddroc + plot_layout(guides="collect") p_mdd
These completementary analyses were suggested during review to also apply IRT to the second sample. Additionally, we also computed “empirical coverage” curves.
<- df |>
dat_irt filter(PHQ4_Condition == "Revised") |>
select(starts_with("PHQ4"), BDI2_Total, STAI5_General) |>
mutate(Depression_3 = as.numeric(as.factor(PHQ4_Depression_3)),
Depression_4 = as.numeric(as.factor(PHQ4_Depression_4)),
Anxiety_1 = as.numeric(as.factor(PHQ4_Anxiety_1)),
Anxiety_2 = as.numeric(as.factor(PHQ4_Anxiety_2)))
<- mirt::mirt(data = dat_irt[c("Depression_3", "Depression_4")],
irt_model1 itemtype = 'graded',
verbose = FALSE)
<- mirt::mirt(data = dat_irt[c("Anxiety_1", "Anxiety_2")],
irt_model2 optimizer = "nlminb",
itemtype = 'graded',
verbose = FALSE)
# plot(irt_model, type = "trace", theta_lim = c(-3, 3))
<- mirt::fscores(irt_model1, full.scores.SE = TRUE) |>
irt_data1 as.data.frame() |>
mutate(Groundtruth = dat_irt$BDI2_Total,
PHQ4_Score = dat_irt$PHQ4_Depression,
Dimension = "Depression")
<- mirt::fscores(irt_model2, full.scores.SE = TRUE) |>
irt_data2 as.data.frame() |>
mutate(Groundtruth = dat_irt$STAI5_General,
PHQ4_Score = dat_irt$PHQ4_Anxiety,
Dimension = "Anxiety")
$Label1 <- insight::format_value(cor(irt_data1$F1, irt_data1$Groundtruth))
irt_data1$Label2 <- insight::format_value(cor(irt_data1$F1, irt_data1$PHQ4_Score))
irt_data1$Label1 <- insight::format_value(cor(irt_data2$F1, irt_data2$Groundtruth))
irt_data2$Label2 <- insight::format_value(cor(irt_data2$F1, irt_data2$PHQ4_Score))
irt_data2
<- rbind(irt_data1, irt_data2) |>
irt_data mutate(Label1 = paste("r =", Label1),
Label2 = paste("r =", Label2))
|>
irt_data ggplot() +
geom_jitter(aes(x = F1, y = Groundtruth), width = 0.05, height = 0.05) +
geom_smooth(aes(x = F1, y = Groundtruth), method = "lm", color = "red", formula = "y ~ x") +
geom_text(data = dplyr::distinct(irt_data[c("Dimension", "Label1")]), aes(label = Label1),
x = -1, y = c(55, 3), size = 5, color = "red"
+
) facet_wrap(~ Dimension, scales = "free_y", ncol = 1) +
labs(x = "Theta latent factor extracted from IRT", y = "Ground truth score (BDI-II or STAI-5)") +
theme_minimal() |
|>
irt_data ggplot() +
geom_jitter(aes(x = F1, y = PHQ4_Score), width = 0.05, height = 0.05) +
geom_smooth(aes(x = F1, y = PHQ4_Score), method = "lm", color = "blue", formula = "y ~ x", se = FALSE) +
geom_smooth(aes(x = F1, y = PHQ4_Score), method = "lm", color = "darkorange", formula = "y ~ poly(x, 2)", se = FALSE) +
geom_text(data = dplyr::distinct(irt_data[c("Dimension", "Label2")]), aes(label = Label2),
x = -1, y = c(6, 6), size = 5, color = "red"
+
) facet_wrap(~ Dimension, scales = "free_y", ncol = 1) +
labs(x = "Theta latent factor extracted from IRT", y = "PHQ4 Score (Average)") +
theme_minimal()
<- function(x) {
add_labels <- case_when(
x == 0 ~ "Not at all",
x == 0.5 ~ "Once or twice",
x == 1 ~ "Several days",
x == 2 ~ "More than half the days",
x .default = "Nearly every day"
)fct_relevel(x, c("Not at all", "Once or twice", "Several days", "More than half the days", "Nearly every day"))
}
rbind(
mutate(dat_irt,
Item = "D1: Feeling down, depressed, or hopeless",
Response = add_labels(PHQ4_Depression_3),
Outcome = BDI2_Total),
mutate(dat_irt,
Item = "D2: Little interest or pleasure in doing things",
Response = add_labels(PHQ4_Depression_4),
Outcome = BDI2_Total),
mutate(dat_irt,
Item = "A1: Feeling nervous, anxious, or on edge",
Response = add_labels(PHQ4_Anxiety_1),
Outcome = STAI5_General),
mutate(dat_irt,
Item = "A2: Not being able to stop or control worrying",
Response = add_labels(PHQ4_Anxiety_2),
Outcome = STAI5_General)
|>
) estimate_density(method = "KernSmooth", select = "Outcome", by = c("Item", "Response")) |>
ggplot(aes(x = x, y = y, color = Response)) +
geom_line(linewidth = 1) +
labs(x = "Ground truth measure (BDI or STAI)", y = "Distribution") +
facet_wrap(~ Item, scales = "free") +
scale_y_continuous(expand = expansion(add = c(0, 0.001))) +
scale_x_continuous(expand = expansion(add = c(0, 0))) +
::scale_color_flat_d("rainbow") +
see::theme_modern(axis.title.space = 5) +
seetheme(strip.background = element_rect(fill = "#EEEEEE", colour = "white"),
axis.text.y = element_blank())
<- lm(STAI5_General ~ PHQ4_Anxiety * PHQ4_Condition, data = df)
model
::parameters(model) |>
parametersdisplay()
Parameter | Coefficient | SE | 95% CI | t(832) | p |
---|---|---|---|---|---|
(Intercept) | 0.76 | 0.05 | (0.66, 0.85) | 15.33 | < .001 |
PHQ4 Anxiety | 0.30 | 0.02 | (0.27, 0.33) | 19.72 | < .001 |
PHQ4 Condition (Revised) | -0.02 | 0.07 | (-0.15, 0.11) | -0.32 | 0.750 |
PHQ4 Anxiety × PHQ4 Condition (Revised) | 0.01 | 0.02 | (-0.03, 0.05) | 0.56 | 0.576 |
<- get_databf(resp="STAI5_General", pred="PHQ4_Anxiety", y1=c(2.0, 2.2, 2.4, 2.6, 2.8, 3), y2=c(0.7, 1, 1.3, 1.6, 1.9))
bfs
<- df |>
p_anx ggplot(aes(x = PHQ4_Anxiety, y = STAI5_General), width=0.1) +
::stat_slab(
ggdistdata = df[df$PHQ4_Condition == "Original" & df$PHQ4_Anxiety <= 4, ],
fill = colors["Original"],
alpha=0.3,
scale = 0.3,
position = position_dodge(width = 1),
side = "left") +
::stat_slab(
ggdistdata = df[df$PHQ4_Condition == "Revised" & df$PHQ4_Anxiety %in% c(0, 1, 2, 3, 4), ],
fill = colors["Revised"],
alpha=0.3,
scale = 0.3,
position = position_dodge(width = 1),
side = "right") +
::stat_slab(
ggdistdata = df[df$PHQ4_Anxiety %in% c(0.5, 1.5, 2.5), ],
fill = colors["Revised"],
alpha=0.3,
scale = 0.3,
position = position_dodge(width = 1),
side = "both") +
geom_jitter2(aes(color = PHQ4_Condition), width=0.1, alpha=0.5, size=3) +
geom_smooth(aes(color = PHQ4_Condition, fill=PHQ4_Condition),
method = "lm", formula="y~x", alpha=0.2, key_glyph = draw_key_rect) +
geom_segment(data=bfs$lines, aes(x=xmin, xend=xmax, y=y, yend=y), color=colors["Revised"], linewidth=0.5) +
geom_label(data = bfs$bf, aes(x = PHQ4_Anxiety, y = STAI5_General, label = BF, color=color), size=3) +
scale_fill_manual(values = colors) +
scale_color_manual(values = colors) +
theme_bw() +
# ggside::geom_xsidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
::scale_xsidey_continuous(expand = c(0, 0)) +
ggside# ggside::geom_ysidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
::scale_ysidex_continuous(expand = c(0, 0)) +
ggside::theme_ggside_void() +
ggsidecoord_cartesian(xlim=c(-0.15, 6.1)) +
guides(color="none", fill=guide_legend(override.aes = list(alpha=1))) +
labs(
x = "PHQ-4 (Anxiety Score)",
y = "STAI-5 General",
color = "PHQ4 Version",
fill = "PHQ4 Questionnaire Version",
title="Anxiety",
+
) theme(legend.position = "top",
plot.title = element_text(face="bold"))
p_anx
<- glm(Anxiety ~ PHQ4_Anxiety * PHQ4_Condition, data = df, family="binomial")
m
display(parameters::parameters(m))
Parameter | Log-Odds | SE | 95% CI | z | p |
---|---|---|---|---|---|
(Intercept) | -3.44 | 0.40 | (-4.28, -2.71) | -8.63 | < .001 |
PHQ4 Anxiety | 0.40 | 0.10 | (0.22, 0.60) | 4.14 | < .001 |
PHQ4 Condition (Revised) | 0.07 | 0.53 | (-0.97, 1.14) | 0.14 | 0.890 |
PHQ4 Anxiety × PHQ4 Condition (Revised) | -0.03 | 0.13 | (-0.29, 0.23) | -0.22 | 0.822 |
<- m |>
p_gad estimate_relation(length=20) |>
ggplot(aes(x=PHQ4_Anxiety, y=Predicted)) +
geom_ribbon(aes(ymin=CI_low, ymax=CI_high, fill=PHQ4_Condition), alpha=0.2) +
geom_line(aes(color=PHQ4_Condition), linewidth=1) +
scale_fill_manual(values = colors) +
scale_color_manual(values = colors) +
scale_y_continuous(labels=scales::percent) +
labs(y = "Probability of Anxiety",
x= "PHQ-4 (Anxiety Score)",
title = "Anxiety Disorder",
fill = "Questionnaire Version",
color = "Questionnaire Version") +
theme_minimal() +
theme(plot.title = element_text(face="bold"),
legend.position = "none")
<- make_roc(m)
p_gadroc
/ p_gadroc) + plot_layout(guides="collect") (p_gad
<- function(df_original, df_revised) {
bootstrap_diffs <- data.frame()
dat for(i in 1:2000) {
<- df_original[sample(nrow(df_original), replace = TRUE), ]
newdf_ori <- df_revised[sample(nrow(df_revised), replace = TRUE), ]
newdf_rev
= cor.test(newdf_ori$BDI2_Total, newdf_ori$PHQ4_Depression)$estimate
r_depression_ori = cor.test(newdf_rev$BDI2_Total, newdf_rev$PHQ4_Depression)$estimate
r_depression_rev = cor.test(newdf_ori$STAI5_General, newdf_ori$PHQ4_Anxiety)$estimate
r_anxiety_ori = cor.test(newdf_rev$STAI5_General, newdf_rev$PHQ4_Anxiety)$estimate
r_anxiety_rev
<- rbind(dat,
dat data.frame(
d_r_depression = r_depression_rev - r_depression_ori,
d_r_anxiety = r_anxiety_rev - r_anxiety_ori,
iteration = i
)
)
}
dat
}
<- bootstrap_diffs(
dat filter(df, PHQ4_Condition == "Original"),
filter(df, PHQ4_Condition == "Revised"))
|>
dat pivot_longer(cols = c(d_r_depression, d_r_anxiety)) |>
ggplot(aes(x = value)) +
geom_density(aes(fill = name), alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed") +
theme_bw() +
labs(x = "Difference in Correlation", y = "")
<- data.frame()
dat for(cat in c("Minimal - Mild", "Moderate - Severe")) {
<- rbind(
dat
dat,bootstrap_diffs(
filter(df, PHQ4_Condition == "Original", BDI2_Category == cat),
filter(df, PHQ4_Condition == "Revised", BDI2_Category == cat)) |>
mutate(Subsample = paste("Depression:", cat)),
bootstrap_diffs(
filter(df, PHQ4_Condition == "Original", STAI5_Category == cat),
filter(df, PHQ4_Condition == "Revised", STAI5_Category == cat)) |>
mutate(Subsample = paste("Anxiety:", cat))
)
}
<- dat |>
dat separate(Subsample, into = c("Type", "Subsample"), sep = ":") |>
pivot_longer(cols = c(d_r_depression, d_r_anxiety)) |>
filter(!(Type == "Depression" & name=="d_r_anxiety") &
!(Type == "Anxiety" & name=="d_r_depression")) |>
mutate(Type = ifelse(Type == "Depression", paste(Type, "(BDI-II)"), paste(Type, "(STAI-5)"))) |>
mutate(Type = fct_rev(Type))
<- dat |>
p_cor ggplot(aes(x = value)) +
geom_density(aes(fill = name), alpha = 0.9, color="white") +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_text(data=summarize(dat, pd = insight::format_p(bayestestR::pd_to_p(
as.numeric(pd(value)), direction="one-sided"), stars=TRUE),
.by=c("Subsample", "Type")),
aes(label=pd, x=0.3, y=5)) +
theme_bw() +
labs(x = "Difference in Correlation (PHQ4 Revised - Original)", y = "") +
facet_grid(Type~Subsample) +
scale_fill_manual(values = c("d_r_depression"="#3F51B5", "d_r_anxiety"="#E91E63"), guide="none") +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
strip.background = element_rect(color=NA))
p_cor
<- (p_dep + theme(legend.position='none')) /
p_a + theme(legend.position='none')) +
(p_anx plot_annotation(title="A.")
<- p_cor + plot_annotation(title="B.")
p_b
<- ((p_mdd / p_mddroc) | (p_gad / p_gadroc)) + patchwork::plot_layout(guides = "collect") + patchwork::plot_annotation(title="C.", theme=theme(legend.position='left'))
p_c
wrap_elements(p_a) | wrap_elements(p_b)) / wrap_elements(p_c) (