PHQ4R - Data Analysis (Study 1)

Data Preparation

Code

library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)
library(mirt)

df <- read.csv("https://raw.githubusercontent.com/RealityBending/IllusionGameReliability/main/data/preprocessed_questionnaires.csv") |>
  select(Sex, Age, starts_with("Item_PHQ4")) |>
  filter(!is.na(Item_PHQ4_Anxiety_1))

names(df) <- str_remove_all(names(df), "Item_PHQ4_")


df2 <- read.csv("../study2/data/data_raw.csv") |>
  filter(PHQ4_Condition == "PHQ4 - Revised") |>
  select(Sex = Gender, Age, starts_with("PHQ4_"), -PHQ4_Duration, -PHQ4_Condition, -PHQ4_Order)

names(df2) <- str_remove_all(names(df2), "PHQ4_")

df2ori <- read.csv("../study2/data/data_raw.csv") |>
  filter(PHQ4_Condition == "PHQ4 - Original") |>
  select(Sex = Gender, Age, starts_with("PHQ4_"), -PHQ4_Duration, -PHQ4_Condition, -PHQ4_Order)

names(df2ori) <- str_remove_all(names(df2ori), "PHQ4_")

Participants

The first sample includes 485 participants (Mean age = 30.1, SD = 10.1, range: [18, 73]; Sex: 50.3% females, 49.7% males, 0.0% other).
The first sample includes 551 participants (Mean age = 24.1, SD = 11.0, range: [17, 118]; Sex: 75.1% females, 21.4% males, 2.5% other, 0.91% missing).

Distribution

Code

add_labels <- function(x) {
  x <- case_when(
    x == 0 ~ "Not at all",
    x == 1 ~ "Once or twice",
    x == 2 ~ "Several days",
    x == 3 ~ "More than half the days",
    TRUE ~ "Nearly every day"
  )
  fct_relevel(x, c("Not at all", "Once or twice", "Several days", "More than half the days", "Nearly every day"))
}

Code

df <- select(df, -Sex, -Age)

data <- df |>
  pivot_longer(everything(), names_to = "Item", values_to = "Answer") |>
  group_by(Item, Answer) |>
  summarise(n = n() / nrow(df)) |>
  mutate(
    Facet = str_split_fixed(Item, "_", 2)[, 1],
    # Item = str_split_fixed(Item, "_", 2)[,2],
    Answer = add_labels(Answer),
    Item = case_when(
      Item == "Anxiety_1" ~ "A1: Feeling nervous, anxious or on edge",
      Item == "Anxiety_2" ~ "A2: Not being able to stop or control worrying",
      Item == "Depression_3" ~ "D1: Feeling down, depressed, or hopeless",
      Item == "Depression_4" ~ "D2: Little interest or pleasure in doing things"
    )
  )

p1 <- data |>
  ggplot(aes(x = Answer, fill = Facet)) +
  geom_bar(aes(y = n), stat = "identity") +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  scale_fill_manual(values = c("Depression" = "#3F51B5", "Anxiety" = "#E91E63"), guide = "none") +
  labs(y = "Proportion of Answers") +
  theme_modern(axis.title.space = 10) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title.x = element_blank(),
    plot.title = element_text(face = "bold"),
    strip.text = element_text(size = rel(0.7)),
    strip.background = element_rect(fill = "#EEEEEE", colour = "white")
  ) +
  facet_wrap(~Item)
p1 + ggtitle("Proportion of answers per item")

The “Once or twice” answer was selected in 29.85%

Code

p1b <- rbind(
  data.frame(Scores = paste0(df$Anxiety_1, "_", df$Anxiety_2),
             Facet = "Anxiety"),
  data.frame(Scores = paste0(df$Depression_3, "_", df$Depression_4),
             Facet = "Depression")
) |> 
  group_by(Facet, Scores) |> 
  summarize(n = n() / nrow(df)) |> 
  separate(Scores, into = c("Q1", "Q2")) |> 
  mutate(Label = ifelse(Q1 == Q2, format_percent(n), "")) |> 
  mutate(Q1 = add_labels(as.numeric(Q1)),
         Q2 = add_labels(as.numeric(Q2))) |> 
  ggplot(aes(x = Q1, y = Q2)) +
  geom_tile(aes(fill = n)) +
  geom_text(aes(label = Label), size=2, color="white") +
  scale_y_discrete(expand = c(0, 0)) +
  scale_x_discrete(expand = c(0, 0)) +
  scale_fill_gradientn(colors=c("white", "#2196F3", "#3F51B5", "#673AB7"), labels = scales::percent) +
  labs(fill = "% Participants", x = "Item 1", y = "Item 2") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        panel.grid.major = element_blank(),
        plot.title = element_text(face = "bold", hjust=0),
        strip.background=element_rect(fill="#EEEEEE", colour="white"),
        strip.text = element_text(face = "bold")) +
  facet_wrap(~Facet) 
p1b + ggtitle("Joint prevalence of responses")

Code

df2 <- select(df2, -Sex, -Age)

data <- df2 |>
  pivot_longer(everything(), names_to = "Item", values_to = "Answer") |>
  group_by(Item, Answer) |>
  summarise(n = n() / nrow(df2)) |>
  mutate(
    Facet = str_split_fixed(Item, "_", 2)[, 1],
    # Item = str_split_fixed(Item, "_", 2)[,2],
    Answer = add_labels(Answer),
    Item = case_when(
      Item == "Anxiety_1" ~ "A1: Feeling nervous, anxious or on edge",
      Item == "Anxiety_2" ~ "A2: Not being able to stop or control worrying",
      Item == "Depression_3" ~ "D1: Feeling down, depressed, or hopeless",
      Item == "Depression_4" ~ "D2: Little interest or pleasure in doing things"
    )
  )

p1_s2 <- data |>
  ggplot(aes(x = Answer, fill = Facet)) +
  geom_bar(aes(y = n), stat = "identity") +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  scale_fill_manual(values = c("Depression" = "#3F51B5", "Anxiety" = "#E91E63"), guide = "none") +
  labs(y = "Proportion of Answers") +
  theme_modern(axis.title.space = 10) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title.x = element_blank(),
    plot.title = element_text(face = "bold"),
    strip.text = element_text(size = rel(0.7)),
    strip.background = element_rect(fill = "#EEEEEE", colour = "white")
  ) +
  facet_wrap(~Item)
p1_s2 + ggtitle("Proportion of answers per item")

The “Once or twice” answer was selected in 30.31%

Code

p1b_s2 <- rbind(
  data.frame(Scores = paste0(df2$Anxiety_1, "_", df2$Anxiety_2),
             Facet = "Anxiety"),
  data.frame(Scores = paste0(df2$Depression_3, "_", df2$Depression_4),
             Facet = "Depression")
) |> 
  group_by(Facet, Scores) |> 
  summarize(n = n() / nrow(df2)) |> 
  separate(Scores, into = c("Q1", "Q2")) |> 
  mutate(Label = ifelse(Q1 == Q2, format_percent(n), "")) |> 
  mutate(Q1 = add_labels(as.numeric(Q1)),
         Q2 = add_labels(as.numeric(Q2))) |> 
  ggplot(aes(x = Q1, y = Q2)) +
  geom_tile(aes(fill = n)) +
  geom_text(aes(label = Label), size=2, color="white") +
  scale_y_discrete(expand = c(0, 0)) +
  scale_x_discrete(expand = c(0, 0)) +
  scale_fill_gradientn(colors=c("white", "#2196F3", "#3F51B5", "#673AB7"), labels = scales::percent) +
  labs(fill = "% Participants", x = "Item 1", y = "Item 2") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        panel.grid.major = element_blank(),
        plot.title = element_text(face = "bold", hjust=0),
        strip.background=element_rect(fill="#EEEEEE", colour="white"),
        strip.text = element_text(face = "bold")) +
  facet_wrap(~Facet) 
p1b_s2 + ggtitle("Joint prevalence of responses")

Anxiety

Code

df_anx <- select(df, contains("Anxiety"))
df_anx2 <- select(df2, contains("Anxiety"))
df_anxfull <- rbind(df_anx, df_anx2)

Consistency

Sample 1: Cronbach’s alpha is 0.903.
Sample 2: Cronbach’s alpha is 0.889.

Model

Code

m_anxiety <- mirt(df_anx, model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

m_anxiety


Call:
mirt(data = df_anx, model = 1, itemtype = "graded", SE = TRUE, 
    verbose = FALSE)

Full-information item factor analysis with 1 factor(s).
Converged within 1e-04 tolerance after 168 EM iterations.
mirt version: 1.44.0 
M-step optimizer: BFGS 
EM acceleration: Ramsay 
Number of rectangular quadrature: 61
Latent density type: Gaussian 

Information matrix estimated with method: Oakes
Second-order test: model is a possible local maximum
Condition number of information matrix =  30370.99

Log-likelihood = -1245.018
Estimated parameters: 10 
AIC = 2510.036
BIC = 2551.877; SABIC = 2520.138
G2 (14) = 20.58, p = 0.1128
RMSEA = 0.031, CFI = NaN, TLI = NaN

Code

# Factor loadings
summary(m_anxiety)

             F1    h2
Anxiety_1 0.895 0.802
Anxiety_2 0.991 0.982

          SE.F1
Anxiety_1 0.020
Anxiety_2 0.016

SS loadings:  1.784 
Proportion Var:  0.892 

Factor correlations: 

   F1
F1  1

Code

# Alpha
a <- coef(m_anxiety)
a <- data.frame(
  Item = c("Anxiety 1", "Anxiety 2"),
  Discrimination = c(paste0(format_value(a$Anxiety_1[1, 1]), ", ", format_ci(a$Anxiety_1[2, 1], a$Anxiety_1[3, 1])), paste0(format_value(a$Anxiety_2[1, 1]), ", ", format_ci(a$Anxiety_2[2, 1], a$Anxiety_2[3, 1])))
)
knitr::kable(a)

Item	Discrimination
Anxiety 1	3.42, 95% CI [2.66, 4.18]
Anxiety 2	12.55, 95% CI [-9.55, 34.65]

Code

# Plots
plot(m_anxiety, type = "trace", theta_lim = c(-3, 3))
plot(m_anxiety, type = "infotrace", theta_lim = c(-3, 3))
plot(m_anxiety, type = "score", theta_lim = c(-3, 3))
plot(m_anxiety, type = "infoSE", theta_lim = c(-3, 3))
plot(m_anxiety, type = "rxx", theta_lim = c(-3, 3))

Code

m_anxiety2 <- mirt(df_anx2, model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

Warning: EM cycles terminated after 500 iterations.

Code

m_anxiety2


Call:
mirt(data = df_anx2, model = 1, itemtype = "graded", SE = TRUE, 
    verbose = FALSE)

Full-information item factor analysis with 1 factor(s).
FAILED TO CONVERGE within 1e-04 tolerance after 500 EM iterations.
mirt version: 1.44.0 
M-step optimizer: BFGS 
EM acceleration: Ramsay 
Number of rectangular quadrature: 61
Latent density type: Gaussian 

Information matrix estimated with method: Oakes
Second-order test: model is a possible local maximum
Condition number of information matrix =  8863.464

Log-likelihood = -1422.3
Estimated parameters: 10 
AIC = 2864.599
BIC = 2907.717; SABIC = 2875.972
G2 (14) = 7.76, p = 0.9016
RMSEA = 0, CFI = NaN, TLI = NaN

Code

# Factor loadings
summary(m_anxiety2)

             F1    h2
Anxiety_1 0.957 0.916
Anxiety_2 0.909 0.826

          SE.F1
Anxiety_1 0.066
Anxiety_2 0.066

SS loadings:  1.742 
Proportion Var:  0.871 

Factor correlations: 

   F1
F1  1

Code

# Alpha
a <- coef(m_anxiety2)
a <- data.frame(
  Item = c("Anxiety 1", "Anxiety 2"),
  Discrimination = c(paste0(format_value(a$Anxiety_1[1, 1]), ", ", format_ci(a$Anxiety_1[2, 1], a$Anxiety_1[3, 1])), paste0(format_value(a$Anxiety_2[1, 1]), ", ", format_ci(a$Anxiety_2[2, 1], a$Anxiety_2[3, 1])))
)
knitr::kable(a)

Item	Discrimination
Anxiety 1	5.63, 95% CI [-3.48, 14.74]
Anxiety 2	3.70, 95% CI [0.68, 6.73]

Code

# Plots
plot(m_anxiety2, type = "trace", theta_lim = c(-3, 3))
plot(m_anxiety2, type = "infotrace", theta_lim = c(-3, 3))
plot(m_anxiety2, type = "score", theta_lim = c(-3, 3))
plot(m_anxiety2, type = "infoSE", theta_lim = c(-3, 3))
plot(m_anxiety2, type = "rxx", theta_lim = c(-3, 3))

Same on original version:

Code

m_anxiety2ori <- mirt(select(df2ori, contains("Anxiety")), model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

plot(m_anxiety2ori, type = "trace", theta_lim = c(-3, 3))
plot(m_anxiety2ori, type = "infotrace", theta_lim = c(-3, 3))

Code

m_anxietyfull <- mirt(df_anxfull, model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

Warning: EM cycles terminated after 500 iterations.

Code

plot(m_anxietyfull, type = "trace", theta_lim = c(-3, 3))

Code

# plot(m_anxietyfull, type = "infotrace", theta_lim = c(-3, 3))

Category Characteristic Curves (CRC) and Item Information Curves

Typically, an optimally informative item will have a large location and broad category coverage (as indicated by location parameters) over theta.

Code

crc <- function(mod, data) {
  Theta <- matrix(seq(-3.5, 3.5, by = .01))
  rez <- data.frame()
  for (i in 1:2) {
    dat <- as.data.frame(probtrace(extract.item(mod, i), Theta))
    dat$Theta <- Theta[, 1]
    dat$Information <- normalize(iteminfo(extract.item(mod, i), Theta))
    dat <- pivot_longer(dat, -one_of(c("Theta", "Information")), names_to = "Answer", values_to = "Probability")
    dat$Item <- names(data)[i]
    rez <- rbind(rez, dat)
  }

  rez <- rez |>
    mutate(
      Answer = case_when(
        Answer == "P.1" ~ "Not at all",
        Answer == "P.2" ~ "Once or twice",
        Answer == "P.3" ~ "Several days",
        Answer == "P.4" ~ "More than half the days",
        TRUE ~ "Nearly every day"
      ),
      Answer = fct_relevel(Answer, c("Not at all", "Once or twice", "Several days", "More than half the days", "Nearly every day")),
      Item = case_when(
        Item == "Anxiety_1" ~ "A1: Feeling nervous, anxious or on edge",
        Item == "Anxiety_2" ~ "A2: Not being able to stop or control worrying",
        Item == "Depression_3" ~ "D1: Feeling down, depressed, or hopeless",
        Item == "Depression_4" ~ "D2: Little interest or pleasure in doing things"
      )
    )

  # Get close to 0.95
  i1 <- rez[rez$Item == unique(rez$Item)[1], ]
  i2 <- rez[rez$Item == unique(rez$Item)[2], ]
  minmax <- rbind(
    rbind(
      i1[i1$Answer == "Not at all", ][which.min(abs(i1[i1$Answer == "Not at all", ]$Probability - 0.95)), ],
      i1[i1$Answer == "Nearly every day", ][which.min(abs(i1[i1$Answer == "Nearly every day", ]$Probability - 0.95)), ]
    ),
    rbind(
      i2[i2$Answer == "Not at all", ][which.min(abs(i2[i2$Answer == "Not at all", ]$Probability - 0.95)), ],
      i2[i2$Answer == "Nearly every day", ][which.min(abs(i2[i2$Answer == "Nearly every day", ]$Probability - 0.95)), ]
    )
  )

  p <- rez |>
    ggplot(aes(x = Theta, y = Probability)) +
    geom_line(aes(y = Information), linetype = "dotted", color = "grey") +
    geom_line(aes(color = Answer), linewidth = 1) +
    # geom_vline(data = minmax, aes(xintercept = Theta), linetype = "dashed") +
    scale_y_continuous(labels = scales::percent, expand = c(0, 0.005)) +
    scale_color_flat_d("rainbow") +
    facet_grid(~Item) +
    theme_modern(axis.title.space = 5) +
    theme(strip.background = element_rect(fill = "#EEEEEE", colour = "white"))
  list(p = p, rez = rez, minmax = minmax)
}

out <- crc(m_anxiety, df_anx)
p2a <- out$p + labs(x = expression(Anxiety ~ (theta)))
p2a

Code

out2 <- crc(m_anxiety2, df_anx2)
p2a_s2 <- out2$p + labs(x = expression(Anxiety ~ (theta)))
p2a_s2

Normalized scoring

Code

normalize_scores <- function(data, minmax) {
  minmax <- minmax[minmax$Theta %in% range(minmax$Theta), ]

  item <- data.frame()
  scores <- data.frame()
  for (i in unique(data$Item)) {
    item <- data[data$Item == i, ] |>
      filter(Theta >= min(minmax$Theta) & Theta <= max(minmax$Theta)) |>
      mutate(Theta_n = normalize(Theta)) |>
      rbind(item)

    scores <- item[item$Item == i, ] |>
      group_by(Answer) |>
      filter(Probability == max(Probability)) |>
      ungroup() |>
      mutate(label = insight::format_value(Theta_n)) |>
      rbind(scores)
  }

  item |>
    ggplot(aes(x = Theta, y = Probability, color = Answer)) +
    geom_line(linewidth = 1) +
    geom_segment(data = scores, aes(x = Theta, xend = Theta, y = 0, yend = Probability, color = Answer), linetype = "dashed") +
    geom_label(data = scores, aes(x = Theta, y = Probability, label = label), show.legend = FALSE) +
    scale_y_continuous(labels = scales::percent, expand = c(0, 0.01)) +
    scale_color_viridis_d(option = "D") +
    facet_grid(~Item) +
    theme_modern(axis.title.space = 5) +
    theme(strip.background = element_rect(fill = "#EEEEEE", colour = "white"))
}

p3a <- normalize_scores(data = out$rez, out$minmax) + labs(x = expression(Anxiety ~ (theta)))
p3a

Code

p3a_s2 <- normalize_scores(data = out2$rez, out2$minmax) + labs(x = expression(Anxiety ~ (theta)))
p3a_s2

Depression

Code

df_dep <- select(df, contains("Depression"))
df_dep2 <- select(df2, contains("Depression"))

Consistency

Sample 1: Cronbach’s alpha is 0.841.
Sample 2: Cronbach’s alpha is 0.817.

Model

Code

m_depression <- mirt(df_dep, model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

m_depression


Call:
mirt(data = df_dep, model = 1, itemtype = "graded", SE = TRUE, 
    verbose = FALSE)

Full-information item factor analysis with 1 factor(s).
Converged within 1e-04 tolerance after 192 EM iterations.
mirt version: 1.44.0 
M-step optimizer: BFGS 
EM acceleration: Ramsay 
Number of rectangular quadrature: 61
Latent density type: Gaussian 

Information matrix estimated with method: Oakes
Second-order test: model is a possible local maximum
Condition number of information matrix =  75020.53

Log-likelihood = -1327.525
Estimated parameters: 10 
AIC = 2675.05
BIC = 2716.892; SABIC = 2685.152
G2 (14) = 27.07, p = 0.0188
RMSEA = 0.044, CFI = NaN, TLI = NaN

Code

# Factor loadings
summary(m_depression)

                F1    h2
Depression_3 0.995 0.990
Depression_4 0.817 0.667

              SE.F1
Depression_3 0.0086
Depression_4 0.0219

SS loadings:  1.657 
Proportion Var:  0.828 

Factor correlations: 

   F1
F1  1

Code

# Alpha
a2 <- coef(m_depression)
a2 <- data.frame(
  Item = c("Depression 1", "Depression 2"),
  Discrimination = c(paste0(format_value(a2$Depression_3[1, 1]), ", ", format_ci(a2$Depression_3[2, 1], a2$Depression_3[3, 1])), paste0(format_value(a2$Depression_4[1, 1]), ", ", format_ci(a2$Depression_4[2, 1], a2$Depression_4[3, 1])))
)
knitr::kable(a2)

Item	Discrimination
Depression 1	16.64, 95% CI [-10.43, 43.70]
Depression 2	2.41, 95% CI [2.03, 2.79]

Code

m_depression2 <- mirt(df_dep2, model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

m_depression2


Call:
mirt(data = df_dep2, model = 1, itemtype = "graded", SE = TRUE, 
    verbose = FALSE)

Full-information item factor analysis with 1 factor(s).
Converged within 1e-04 tolerance after 45 EM iterations.
mirt version: 1.44.0 
M-step optimizer: BFGS 
EM acceleration: Ramsay 
Number of rectangular quadrature: 61
Latent density type: Gaussian 

Information matrix estimated with method: Oakes
Second-order test: model is a possible local maximum
Condition number of information matrix =  3891.031

Log-likelihood = -1478.441
Estimated parameters: 10 
AIC = 2976.882
BIC = 3019.999; SABIC = 2988.255
G2 (14) = 17.4, p = 0.2354
RMSEA = 0.021, CFI = NaN, TLI = NaN

Code

# Factor loadings
summary(m_depression2)

                F1    h2
Depression_3 0.875 0.766
Depression_4 0.879 0.772

             SE.F1
Depression_3  0.15
Depression_4  0.15

SS loadings:  1.538 
Proportion Var:  0.769 

Factor correlations: 

   F1
F1  1

Code

# Alpha
a2 <- coef(m_depression2)
a2 <- data.frame(
  Item = c("Depression 1", "Depression 2"),
  Discrimination = c(paste0(format_value(a2$Depression_3[1, 1]), ", ", format_ci(a2$Depression_3[2, 1], a2$Depression_3[3, 1])), paste0(format_value(a2$Depression_4[1, 1]), ", ", format_ci(a2$Depression_4[2, 1], a2$Depression_4[3, 1])))
)
knitr::kable(a2)

Item	Discrimination
Depression 1	3.08, 95% CI [-1.36, 7.52]
Depression 2	3.13, 95% CI [-1.49, 7.75]

Code

plot(m_depression2, type = "trace", theta_lim = c(-3, 3))
plot(m_depression2, type = "infotrace", theta_lim = c(-3, 3))

Code

m_dep2ori <- mirt(select(df2ori, contains("Depression")), model = 1, itemtype = "graded", SE = TRUE, verbose = FALSE)

plot(m_dep2ori, type = "trace", theta_lim = c(-3, 3))
plot(m_dep2ori, type = "infotrace", theta_lim = c(-3, 3))

Category Characteristic Curves (CRC) and Item Information Curves

Code

out <- crc(m_depression, df_dep)
p2b <- out$p + labs(x = expression(Depression ~ (theta)))
p2b

Code

out2 <- crc(m_depression2, df_dep2)
p2b_s2 <- out2$p + labs(x = expression(Depression ~ (theta)))
p2b_s2

Normalized scoring

Code

p3b <- normalize_scores(out$rez, out$minmax) + labs(x = expression(Depression ~ (theta)))
p3b

Code

p3b_s2 <- normalize_scores(out2$rez, out2$minmax) + labs(x = expression(Depression ~ (theta)))
p3b_s2

Figures

Code

fig1 <- wrap_elements(wrap_elements(p1 + plot_annotation(title = "A. Proportion of answers per item", theme = list(plot.title = element_text(face = "bold")))) /
  wrap_elements(p1b + plot_annotation(title = "B. Joint prevalence of responses", theme = list(plot.title = element_text(face = "bold"))))) +
  wrap_elements(p2a / p2b + plot_annotation(title = "C. Information curves per response type", theme = list(plot.title = element_text(face = "bold"))) + plot_layout(guides = "collect")) + plot_layout(widths = c(1, 1.5))


fig1

Code

fig2 <- p3a / p3b + plot_annotation(title = "Normalized Scoring", theme = list(plot.title = element_text(face = "bold"))) + plot_layout(guides = "collect")

fig2

Code

fig.width <- see::golden_ratio(7)
fig.height <- 7

ggsave("figures/figure1.png", fig1, width = fig.width * 1.43, height = fig.height * 1.43)
ggsave("figures/figure2.png", fig2, width = fig.width * 1.43, height = fig.height * 1.43)

Code

fig1_s2 <- wrap_elements(wrap_elements(p1_s2 + plot_annotation(title = "A. Proportion of answers per item", theme = list(plot.title = element_text(face = "bold")))) /
  wrap_elements(p1b_s2 + plot_annotation(title = "B. Joint prevalence of responses", theme = list(plot.title = element_text(face = "bold"))))) +
  wrap_elements(p2a_s2 / p2b_s2 + plot_annotation(title = "C. Information curves per response type", theme = list(plot.title = element_text(face = "bold"))) + plot_layout(guides = "collect")) + plot_layout(widths = c(1, 1.5))
fig1_s2

Code

fig2_s2 <- p3a_s2 / p3b_s2 + plot_annotation(title = "Normalized Scoring", theme = list(plot.title = element_text(face = "bold"))) + plot_layout(guides = "collect")
fig2_s2

Code

ggsave("figures/figure1_s2.png", fig1_s2, width = fig.width * 1.43, height = fig.height * 1.43)
ggsave("figures/figure2_s2.png", fig2_s2, width = fig.width * 1.43, height = fig.height * 1.43)

Scores

Code

source("../score_PHQ4.R")

make_scores <- function(df) {
  data <- score_PHQ4(A1=df$Anxiety_1, A2=df$Anxiety_2, D1=df$Depression_3, D2=df$Depression_4, method="basic") |> 
    datawizard::data_addsuffix("_Basic") |> 
    cbind(
      score_PHQ4(A1=df$Anxiety_1, A2=df$Anxiety_2, D1=df$Depression_3, D2=df$Depression_4, method="normalized") |> 
        datawizard::data_addsuffix("_Normalized")) |> 
    select(matches("Depression|Anxiety")) |> 
    pivot_longer(everything()) |> 
    separate(name, into = c("Dimension", "Scoring")) 
  
  data |> 
    ggplot(aes(x=value)) + 
    geom_area(data=estimate_density(data, by = c("Dimension", "Scoring"), method = "KernSmooth") |> 
                group_by(Scoring) |> 
                normalize(select = "y"),
              aes(x = x, y = y, fill=Dimension), alpha = 0.05, position="identity") +
    geom_hline(yintercept = 0.5, linetype = "dotted") +
    stat_ecdf(aes(color=Dimension), geom = "step", linewidth=1) +
    facet_wrap(~Scoring, scales = "free") +
    scale_y_continuous(labels=scales::percent, expand = c(0, 0)) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_fill_manual(values = c("Depression" = "#3F51B5", "Anxiety" = "#E91E63"), guide = "none") +
    scale_color_manual(values = c("Depression" = "#3F51B5", "Anxiety" = "#E91E63")) +
    theme_modern(axis.title.space = 5) +
    labs(title = "Distribution and Cumulative Density of the Sample", x = "Score", y = "Cumulative Proportion")
}

make_scores(df)

Code

make_scores(df2)

Correlation

Code

data.frame(Depression = mirt::fscores(m_depression)[, 1],
           Anxiety = mirt::fscores(m_anxiety)[, 1]) |> 
  correlation::correlation(df) |>
  summary() |> 
  export_table(caption = "Correlation Matrix")

Correlation Matrix

Parameter  | Anxiety_1 | Anxiety_2 | Depression_3 | Depression_4
----------------------------------------------------------------
Depression |      0.72 |      0.72 |         0.98 |         0.78
Anxiety    |      0.89 |      0.98 |         0.74 |         0.59

Code

data.frame(Depression = mirt::fscores(m_depression2)[, 1],
           Anxiety = mirt::fscores(m_anxiety2)[, 1]) |> 
  correlation::correlation(df2) |>
  summary() |> 
  export_table(caption = "Correlation Matrix")

Correlation Matrix

Parameter  | Anxiety_1 | Anxiety_2 | Depression_3 | Depression_4
----------------------------------------------------------------
Depression |      0.52 |      0.60 |         0.91 |         0.91
Anxiety    |      0.96 |      0.92 |         0.58 |         0.46