PHQ4R - Data Analysis (Study 2)

Data Preparation

Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
library(ggdist)
library(mirt)
Code
df <- read.csv("../data/data.csv") |> 
  mutate(
    PHQ4_Condition = fct_relevel(PHQ4_Condition, "PHQ4 - Original"),
    PHQ4_Condition = fct_recode(PHQ4_Condition, "Original" = "PHQ4 - Original", "Revised" = "PHQ4 - Revised"),
    # BDI2_Category = ifelse(BDI2_Total <= 12, "Subclinical", ifelse(BDI2_Total <= 18, "Mild", "Moderate+")),
    BDI2_Category = ifelse(BDI2_Total <= 18, "Minimal - Mild", "Moderate - Severe"),
    STAI5_Category = ifelse(STAI5_General < 2, "Minimal - Mild", "Moderate - Severe"),
    PHQ4_Depression_Fac = as.factor(PHQ4_Depression)
  )

colors <- c("Original" = "#607D8B", "Revised" = "#FF9800")

Depression

Relationship with BDI-2

Code
model <- lm(BDI2_Total ~ PHQ4_Depression * PHQ4_Condition, data = df)

parameters::parameters(model) |> 
  display()
Parameter Coefficient SE 95% CI t(832) p
(Intercept) 8.23 0.59 (7.08, 9.39) 13.96 < .001
PHQ4 Depression 5.34 0.23 (4.90, 5.79) 23.51 < .001
PHQ4 Condition (Revised) -0.13 0.82 (-1.73, 1.47) -0.16 0.871
PHQ4 Depression × PHQ4 Condition (Revised) -0.05 0.33 (-0.70, 0.60) -0.15 0.883
Code
get_bf <- function(resp="BDI2_Total", pred="PHQ4_Depression", levels=c(0, 0.5)) {
  if(length(levels) == 2) {
    bf <- BayesFactor::ttestBF(
      df[df[[pred]] == levels[1] & df$PHQ4_Condition=="Revised", resp], 
      df[df[[pred]] == levels[2] & df$PHQ4_Condition=="Revised", resp])
  } else {
    bf <- BayesFactor::ttestBF(
      df[df[[pred]]==levels & df$PHQ4_Condition=="Original", resp],
      df[df[[pred]]==levels & df$PHQ4_Condition=="Revised", resp])
  }
  insight::format_bf(exp(bf@bayesFactor$bf), stars=TRUE)
}

get_databf <- function(resp="BDI2_Total", pred="PHQ4_Depression", y1, y2) {
  
  # Between scores comparisons
  bfs <- data.frame(
    BF = c(get_bf(resp, pred, c(0, 0.5)), 
           get_bf(resp, pred, c(0.5, 1)), 
           get_bf(resp, pred, c(1, 1.5)), 
           get_bf(resp, pred, c(1.5, 2)),
           get_bf(resp, pred, c(2, 2.5)),
           get_bf(resp, pred, c(2.5, 3)))
  )
  bfs[[pred]] <- c(0.25, 0.75, 1.25, 1.75, 2.25, 2.75)
  bfs[[resp]] <- y1
  bfs$color <- "Revised"
  
  # Within scores comparisons
  bfs2 <- data.frame(
    BF = c(get_bf(resp, pred, 0), 
           get_bf(resp, pred, 1), 
           get_bf(resp, pred, 2),
           get_bf(resp, pred, 3),
           get_bf(resp, pred, 4))
  )
  bfs2[[pred]] <- c(0, 1, 2, 3, 4)
  bfs2[[resp]] <- y2
  bfs2$color <- "Original"
  
  bf <- rbind(bfs, bfs2)
  
  # Lines behing labels
  lines <- data.frame(
    xmin = c(0, 0.5, 1, 1.5, 2, 2.5),
    xmax = c(0.5, 1, 1.5, 2, 2.5, 3),
    y = y1
  )
  
  list(bf=bf, lines=lines)
}


bfs <- get_databf(resp="BDI2_Total", pred="PHQ4_Depression", y1=c(28, 32, 36, 40, 44, 48), y2=c(10, 15, 20, 25, 30))

p_dep <- df |> 
  ggplot(aes(x = PHQ4_Depression, y = BDI2_Total), width=0.1) +
  ggdist::stat_slab(
    data = df[df$PHQ4_Condition == "Original" & df$PHQ4_Depression <= 4, ], 
    fill = colors["Original"],
    alpha=0.3,
    scale = 0.3,
    position = position_dodge(width = 1),
    side = "left") +
  ggdist::stat_slab(
    data = df[df$PHQ4_Condition == "Revised" & df$PHQ4_Depression %in% c(0, 1, 2, 3, 4), ], 
    fill = colors["Revised"],
    alpha=0.3,
    scale = 0.3,
    position = position_dodge(width = 1),
    side = "right") +
  ggdist::stat_slab(
    data = df[df$PHQ4_Depression %in% c(0.5, 1.5, 2.5), ], 
    fill = colors["Revised"],
    alpha=0.3,
    scale = 0.3,
    position = position_dodge(width = 1),
    side = "both") +
  geom_jitter2(aes(color = PHQ4_Condition), width=0.1, alpha=0.5, size=3) +
  geom_smooth(aes(color = PHQ4_Condition, fill=PHQ4_Condition), 
              method = "lm", formula="y~x", alpha=0.2, key_glyph = draw_key_rect) +
  geom_segment(data=bfs$lines, aes(x=xmin, xend=xmax, y=y, yend=y), color=colors["Revised"], linewidth=0.5) +
  geom_label(data = bfs$bf, aes(x = PHQ4_Depression, y = BDI2_Total, label = BF, color=color), size=3) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  theme_bw() +
  # ggside::geom_xsidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  # ggside::geom_ysidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
  ggside::scale_ysidex_continuous(expand = c(0, 0)) +
  ggside::theme_ggside_void() +
  coord_cartesian(xlim=c(-0.15, 6.1)) +
  guides(color="none", fill=guide_legend(override.aes = list(alpha=1))) +
  labs(
    x = "PHQ-4 (Depression Score)",
    y = "BDI-II Total",
    color = "PHQ4 Version",
    fill = "PHQ4 Questionnaire Version",
    title="Depression",
  ) +
  theme(legend.position = "top",
        plot.title = element_text(face="bold"))
p_dep
Warning: `is.ggproto()` was deprecated in ggplot2 3.5.2.
ℹ Please use `is_ggproto()` instead.

Predictive Performance

Code
m <- glm(Depression ~ PHQ4_Depression * PHQ4_Condition, data = df, family="binomial")

display(parameters::parameters(m))
Parameter Log-Odds SE 95% CI z p
(Intercept) -3.84 0.40 (-4.70, -3.11) -9.52 < .001
PHQ4 Depression 0.50 0.11 (0.29, 0.71) 4.64 < .001
PHQ4 Condition (Revised) -0.70 0.62 (-1.95, 0.51) -1.12 0.262
PHQ4 Depression × PHQ4 Condition (Revised) 0.12 0.16 (-0.20, 0.44) 0.75 0.454
Code
p_mdd <- m |> 
  estimate_relation(length=20) |> 
  ggplot(aes(x=PHQ4_Depression, y=Predicted)) +
  geom_ribbon(aes(ymin=CI_low, ymax=CI_high, fill=PHQ4_Condition), alpha=0.2) +
  geom_line(aes(color=PHQ4_Condition), linewidth=1) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  scale_y_continuous(labels=scales::percent) +
  labs(y = "Probability of Depression",
       x= "PHQ-4 (Depression Score)",
       title = "Depression Disorder",
       fill = "Questionnaire Version",
       color = "Questionnaire Version") +
  theme_minimal()  +
  theme(plot.title = element_text(face="bold"),
        legend.position = "none")
Code
make_roc <- function(m) {
  resp <- insight::find_response(m)
  pred <- insight::find_predictors(m)
  f <- as.formula(paste0(resp, " ~ ", pred$conditional[1]))
  m1 <- glm(f, data = df[df$PHQ4_Condition == "Original", ], family="binomial") 
  m2 <- glm(f, data = df[df$PHQ4_Condition == "Revised", ], family="binomial")
  
  roc1 <- performance_roc(m1) 
  roc2 <- performance_roc(m2)
  rocs <- data.frame(AUC = c(
    bayestestR::area_under_curve(roc1$Specificity, roc1$Sensitivity),
    bayestestR::area_under_curve(roc2$Specificity, roc2$Sensitivity)),
    PHQ4_Condition = c("Original", "Revised"))
  rocs$AUC <- paste0("AUC = ", insight::format_percent(rocs$AUC))
  rocs$x <- c(0.25, 0.25)
  rocs$y <- c(0.35, 0.85)
  
  rbind(
    mutate(as.data.frame(roc1), PHQ4_Condition = "Original"),
    mutate(as.data.frame(roc2), PHQ4_Condition = "Revised")
  ) |> 
    ggplot(aes(x=Specificity)) +
    geom_abline(intercept=0, slope=1, color="gray") +
    geom_line(aes(y=Sensitivity, color=PHQ4_Condition), linewidth=2, alpha=0.9) +
    geom_label(data=rocs, aes(x=x, y=y, label=AUC), color=colors) +
    labs(x= "1 - Specificity (False Positive Rate)", y="Sensitivity (True Positive Rate)",
         color="Questionnaire Version") +
    scale_color_manual(values=colors) +
    scale_x_continuous(labels=scales::percent) +
    scale_y_continuous(labels=scales::percent) +
    theme_minimal()
}

p_mddroc <- make_roc(m)
Code
p_mdd / p_mddroc + plot_layout(guides="collect")

IRT

These completementary analyses were suggested during review to also apply IRT to the second sample. Additionally, we also computed “empirical coverage” curves.

Code
dat_irt <- df |> 
  filter(PHQ4_Condition == "Revised") |>
  select(starts_with("PHQ4"), BDI2_Total, STAI5_General) |> 
  mutate(Depression_3 = as.numeric(as.factor(PHQ4_Depression_3)),
         Depression_4 = as.numeric(as.factor(PHQ4_Depression_4)),
         Anxiety_1 = as.numeric(as.factor(PHQ4_Anxiety_1)),
         Anxiety_2 = as.numeric(as.factor(PHQ4_Anxiety_2)))

irt_model1 <- mirt::mirt(data = dat_irt[c("Depression_3", "Depression_4")],
                  itemtype = 'graded',
                  verbose = FALSE) 
irt_model2 <- mirt::mirt(data = dat_irt[c("Anxiety_1", "Anxiety_2")],
                         optimizer = "nlminb",
                  itemtype = 'graded',
                  verbose = FALSE) 

# plot(irt_model, type = "trace", theta_lim = c(-3, 3))

irt_data1  <- mirt::fscores(irt_model1, full.scores.SE = TRUE) |>
  as.data.frame() |> 
  mutate(Groundtruth = dat_irt$BDI2_Total,
         PHQ4_Score = dat_irt$PHQ4_Depression,
         Dimension = "Depression") 
irt_data2  <- mirt::fscores(irt_model2, full.scores.SE = TRUE) |>
  as.data.frame() |> 
  mutate(Groundtruth = dat_irt$STAI5_General,
         PHQ4_Score = dat_irt$PHQ4_Anxiety,
         Dimension = "Anxiety")

irt_data1$Label1 <- insight::format_value(cor(irt_data1$F1, irt_data1$Groundtruth))
irt_data1$Label2 <- insight::format_value(cor(irt_data1$F1, irt_data1$PHQ4_Score))
irt_data2$Label1 <- insight::format_value(cor(irt_data2$F1, irt_data2$Groundtruth))
irt_data2$Label2 <- insight::format_value(cor(irt_data2$F1, irt_data2$PHQ4_Score))

irt_data <- rbind(irt_data1, irt_data2) |> 
  mutate(Label1 = paste("r =", Label1),
         Label2 = paste("r =", Label2)) 

irt_data |> 
  ggplot() +
  geom_jitter(aes(x = F1, y = Groundtruth), width = 0.05, height = 0.05) +
  geom_smooth(aes(x = F1, y = Groundtruth), method = "lm", color = "red", formula = "y ~ x") +
  geom_text(data = dplyr::distinct(irt_data[c("Dimension", "Label1")]), aes(label = Label1),
    x = -1, y = c(55, 3), size = 5, color = "red"
  ) +
  facet_wrap(~ Dimension, scales = "free_y", ncol = 1) +
  labs(x = "Theta latent factor extracted from IRT", y = "Ground truth score (BDI-II or STAI-5)") +
  theme_minimal() |
  irt_data |> 
  ggplot() +
  geom_jitter(aes(x = F1, y = PHQ4_Score), width = 0.05, height = 0.05) +
  geom_smooth(aes(x = F1, y = PHQ4_Score), method = "lm", color = "blue", formula = "y ~ x", se = FALSE) +
  geom_smooth(aes(x = F1, y = PHQ4_Score), method = "lm", color = "darkorange", formula = "y ~ poly(x, 2)", se = FALSE) +
  geom_text(data = dplyr::distinct(irt_data[c("Dimension", "Label2")]), aes(label = Label2),
    x = -1, y = c(6, 6), size = 5, color = "red"
  ) +
  facet_wrap(~ Dimension, scales = "free_y", ncol = 1) +
  labs(x = "Theta latent factor extracted from IRT", y = "PHQ4 Score (Average)") +
  theme_minimal()

Code
add_labels <- function(x) {
  x <- case_when(
    x == 0 ~ "Not at all",
    x == 0.5 ~ "Once or twice",
    x == 1 ~ "Several days",
    x == 2 ~ "More than half the days",
    .default = "Nearly every day"
  )
  fct_relevel(x, c("Not at all", "Once or twice", "Several days", "More than half the days", "Nearly every day"))
}

rbind(
  mutate(dat_irt, 
         Item = "D1: Feeling down, depressed, or hopeless", 
         Response = add_labels(PHQ4_Depression_3),
         Outcome = BDI2_Total),
  mutate(dat_irt, 
         Item = "D2: Little interest or pleasure in doing things",
         Response = add_labels(PHQ4_Depression_4),
         Outcome = BDI2_Total),
  mutate(dat_irt, 
         Item = "A1: Feeling nervous, anxious, or on edge",
         Response = add_labels(PHQ4_Anxiety_1),
         Outcome = STAI5_General),
  mutate(dat_irt, 
         Item = "A2: Not being able to stop or control worrying",
         Response = add_labels(PHQ4_Anxiety_2),
         Outcome = STAI5_General)
  ) |>
  estimate_density(method = "KernSmooth", select = "Outcome", by = c("Item", "Response")) |> 
  ggplot(aes(x = x, y = y, color = Response)) +
  geom_line(linewidth = 1) +
  labs(x = "Ground truth measure (BDI or STAI)", y = "Distribution") +
  facet_wrap(~ Item, scales = "free") +
  scale_y_continuous(expand = expansion(add = c(0, 0.001))) +
  scale_x_continuous(expand = expansion(add = c(0, 0))) +
  see::scale_color_flat_d("rainbow") +
  see::theme_modern(axis.title.space = 5) +
  theme(strip.background = element_rect(fill = "#EEEEEE", colour = "white"),
        axis.text.y = element_blank())

Anxiety

Relationship with STAI

Code
model <- lm(STAI5_General ~ PHQ4_Anxiety * PHQ4_Condition, data = df)

parameters::parameters(model) |> 
  display()
Parameter Coefficient SE 95% CI t(832) p
(Intercept) 0.76 0.05 (0.66, 0.85) 15.33 < .001
PHQ4 Anxiety 0.30 0.02 (0.27, 0.33) 19.72 < .001
PHQ4 Condition (Revised) -0.02 0.07 (-0.15, 0.11) -0.32 0.750
PHQ4 Anxiety × PHQ4 Condition (Revised) 0.01 0.02 (-0.03, 0.05) 0.56 0.576
Code
bfs <- get_databf(resp="STAI5_General", pred="PHQ4_Anxiety", y1=c(2.0, 2.2, 2.4, 2.6, 2.8, 3), y2=c(0.7, 1, 1.3, 1.6, 1.9))

p_anx <- df |> 
  ggplot(aes(x = PHQ4_Anxiety, y = STAI5_General), width=0.1) +
  ggdist::stat_slab(
    data = df[df$PHQ4_Condition == "Original" & df$PHQ4_Anxiety <= 4, ], 
    fill = colors["Original"],
    alpha=0.3,
    scale = 0.3,
    position = position_dodge(width = 1),
    side = "left") +
  ggdist::stat_slab(
    data = df[df$PHQ4_Condition == "Revised" & df$PHQ4_Anxiety %in% c(0, 1, 2, 3, 4), ], 
    fill = colors["Revised"],
    alpha=0.3,
    scale = 0.3,
    position = position_dodge(width = 1),
    side = "right") +
  ggdist::stat_slab(
    data = df[df$PHQ4_Anxiety %in% c(0.5, 1.5, 2.5), ], 
    fill = colors["Revised"],
    alpha=0.3,
    scale = 0.3,
    position = position_dodge(width = 1),
    side = "both") +
  geom_jitter2(aes(color = PHQ4_Condition), width=0.1, alpha=0.5, size=3) +
  geom_smooth(aes(color = PHQ4_Condition, fill=PHQ4_Condition), 
              method = "lm", formula="y~x", alpha=0.2, key_glyph = draw_key_rect) +
  geom_segment(data=bfs$lines, aes(x=xmin, xend=xmax, y=y, yend=y), color=colors["Revised"], linewidth=0.5) +
  geom_label(data = bfs$bf, aes(x = PHQ4_Anxiety, y = STAI5_General, label = BF, color=color), size=3) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  theme_bw() +
  # ggside::geom_xsidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
  ggside::scale_xsidey_continuous(expand = c(0, 0)) +
  # ggside::geom_ysidedensity(aes(fill = PHQ4_Condition), color=NA, alpha=0.5) +
  ggside::scale_ysidex_continuous(expand = c(0, 0)) +
  ggside::theme_ggside_void() +
  coord_cartesian(xlim=c(-0.15, 6.1)) +
  guides(color="none", fill=guide_legend(override.aes = list(alpha=1))) +
  labs(
    x = "PHQ-4 (Anxiety Score)",
    y = "STAI-5 General",
    color = "PHQ4 Version",
    fill = "PHQ4 Questionnaire Version",
    title="Anxiety",
  ) +
  theme(legend.position = "top",
        plot.title = element_text(face="bold"))
p_anx

Predictive Performance

Code
m <- glm(Anxiety ~ PHQ4_Anxiety * PHQ4_Condition, data = df, family="binomial")

display(parameters::parameters(m))
Parameter Log-Odds SE 95% CI z p
(Intercept) -3.44 0.40 (-4.28, -2.71) -8.63 < .001
PHQ4 Anxiety 0.40 0.10 (0.22, 0.60) 4.14 < .001
PHQ4 Condition (Revised) 0.07 0.53 (-0.97, 1.14) 0.14 0.890
PHQ4 Anxiety × PHQ4 Condition (Revised) -0.03 0.13 (-0.29, 0.23) -0.22 0.822
Code
p_gad <- m |> 
  estimate_relation(length=20) |> 
  ggplot(aes(x=PHQ4_Anxiety, y=Predicted)) +
  geom_ribbon(aes(ymin=CI_low, ymax=CI_high, fill=PHQ4_Condition), alpha=0.2) +
  geom_line(aes(color=PHQ4_Condition), linewidth=1) +
  scale_fill_manual(values = colors) +
  scale_color_manual(values = colors) +
  scale_y_continuous(labels=scales::percent) +
  labs(y = "Probability of Anxiety",
       x= "PHQ-4 (Anxiety Score)",
       title = "Anxiety Disorder",
       fill = "Questionnaire Version",
       color = "Questionnaire Version") +
  theme_minimal()  +
  theme(plot.title = element_text(face="bold"),
        legend.position = "none")
Code
p_gadroc <- make_roc(m)

(p_gad / p_gadroc) + plot_layout(guides="collect")

Correlation Difference

Whole Sample

Code
bootstrap_diffs <- function(df_original, df_revised) {
  dat <- data.frame()
  for(i in 1:2000) {
    newdf_ori <- df_original[sample(nrow(df_original), replace = TRUE), ]
    newdf_rev <- df_revised[sample(nrow(df_revised), replace = TRUE), ]
    
    r_depression_ori = cor.test(newdf_ori$BDI2_Total, newdf_ori$PHQ4_Depression)$estimate
    r_depression_rev = cor.test(newdf_rev$BDI2_Total, newdf_rev$PHQ4_Depression)$estimate
    r_anxiety_ori = cor.test(newdf_ori$STAI5_General, newdf_ori$PHQ4_Anxiety)$estimate
    r_anxiety_rev = cor.test(newdf_rev$STAI5_General, newdf_rev$PHQ4_Anxiety)$estimate
    
    dat <- rbind(dat,
      data.frame(
        d_r_depression = r_depression_rev - r_depression_ori,
        d_r_anxiety = r_anxiety_rev - r_anxiety_ori,
        iteration = i
      )
    )
  }
  dat
}


dat <- bootstrap_diffs(
  filter(df, PHQ4_Condition == "Original"),
  filter(df, PHQ4_Condition == "Revised"))

dat |> 
  pivot_longer(cols = c(d_r_depression, d_r_anxiety)) |>
  ggplot(aes(x = value)) +
  geom_density(aes(fill = name), alpha = 0.5) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw() +
  labs(x = "Difference in Correlation", y = "") 

Subsamples

Code
dat <- data.frame()
for(cat in c("Minimal - Mild", "Moderate - Severe")) {
  dat <- rbind(
    dat,
    bootstrap_diffs(
      filter(df, PHQ4_Condition == "Original", BDI2_Category == cat),
      filter(df, PHQ4_Condition == "Revised", BDI2_Category == cat)) |> 
      mutate(Subsample = paste("Depression:", cat)),
    bootstrap_diffs(
      filter(df, PHQ4_Condition == "Original", STAI5_Category == cat),
      filter(df, PHQ4_Condition == "Revised", STAI5_Category == cat)) |> 
      mutate(Subsample = paste("Anxiety:", cat)) 
  )
}


dat <- dat |> 
  separate(Subsample, into = c("Type", "Subsample"), sep = ":") |>
  pivot_longer(cols = c(d_r_depression, d_r_anxiety)) |>
  filter(!(Type == "Depression" & name=="d_r_anxiety") & 
           !(Type == "Anxiety" & name=="d_r_depression")) |> 
  mutate(Type = ifelse(Type == "Depression", paste(Type, "(BDI-II)"), paste(Type, "(STAI-5)"))) |> 
  mutate(Type = fct_rev(Type)) 

p_cor <- dat |> 
  ggplot(aes(x = value)) +
  geom_density(aes(fill = name), alpha = 0.9, color="white") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_text(data=summarize(dat, pd = insight::format_p(bayestestR::pd_to_p(
    as.numeric(pd(value)), direction="one-sided"), stars=TRUE), 
                           .by=c("Subsample", "Type")),
            aes(label=pd, x=0.3, y=5)) +
  theme_bw() +
  labs(x = "Difference in Correlation (PHQ4 Revised - Original)", y = "") +
  facet_grid(Type~Subsample)  +
  scale_fill_manual(values = c("d_r_depression"="#3F51B5", "d_r_anxiety"="#E91E63"), guide="none") +
  theme(axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(),
        strip.background = element_rect(color=NA))
p_cor

Figures

Code
p_a <- (p_dep + theme(legend.position='none')) / 
              (p_anx  + theme(legend.position='none')) +
  plot_annotation(title="A.")
            
p_b <- p_cor + plot_annotation(title="B.")

p_c <- ((p_mdd / p_mddroc) | (p_gad / p_gadroc)) + patchwork::plot_layout(guides = "collect") + patchwork::plot_annotation(title="C.", theme=theme(legend.position='left'))

(wrap_elements(p_a) | wrap_elements(p_b)) / wrap_elements(p_c)