Denominator Neglect

Author

Jessica Helmer

Published

April 8, 2026

Setup

Code

library(tidyverse)
library(rstan)

options(mc.cores = parallel::detectCores())

Code

dn_full <- rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_cognitive_tasks/task_datasets/data_denominator_neglect.csv")

anchoraig_full <- rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_cognitive_tasks/metadata_tables/task_aig_version.csv")

Cleaning

Combined
Separate

Code

anchor_ids_c <- anchoraig_full %>%
  filter(task == "denominator_neglect_version_A" |
           task == "denominator_neglect_version_B") %>%
  mutate(task_version = factor(ifelse(task == "denominator_neglect_version_A", "A", "B"))) %>%
  filter(AIG_version == "anchor" & task_version == "B") %>%
  pull(session_id)

dn_dat_c <- rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_cognitive_tasks/task_datasets/data_denominator_neglect.csv") %>%
  arrange(session_id, trial_id) %>%
  filter(session_id %in% anchor_ids_c & task_version == "B") %>%
  mutate(proportion_difference = abs(left_lottery_gold_prop - right_lottery_gold_prop),
         trial_id = paste0("item_", trial_id)) |> 
  select(-c(session_restart_id, time_elapsed, custom_timer_ended_trial,
            trial_index, trial, response, trial_type)) |>
  left_join(rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_cognitive_tasks/metadata_tables/session.csv"),
            by = "session_id") |>
  left_join(rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_forecasting/processed_data/scores_quantile.csv") |>
              select(subject_id, sscore_standardized) |>
              summarize(.by = subject_id,
                        sscore = mean(sscore_standardized)),
            by = "subject_id") |>
  select(subject_id, trial_id, choice_type, proportion_difference, small_lottery_gold_prop, correct, sscore) |>
  mutate(item = str_split_i(trial_id, "_", 2),
         .keep = "unused")

saveRDS(dn_dat_c, here::here("Data", "Denominator Neglect", "dn_dat_c.rds"))

Code

anchor_ids_s <- anchoraig_full %>%
  filter(task == "denominator_neglect_version_A" |
           task == "denominator_neglect_version_B") %>%
  mutate(task_version = factor(ifelse(task == "denominator_neglect_version_A", "A", "B"))) %>%
  filter(AIG_version == "anchor" & task_version == "A") %>%
  pull(session_id)

dn_dat_s <- rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_cognitive_tasks/task_datasets/data_denominator_neglect.csv") %>%
  arrange(session_id, trial_id) %>%
  filter(session_id %in% anchor_ids_s & task_version == "A") %>%
  mutate(proportion_difference = abs(left_lottery_gold_prop - right_lottery_gold_prop),
         trial_id = paste0("item_", trial_id)) |> 
  select(-c(session_restart_id, time_elapsed, custom_timer_ended_trial,
            trial_index, trial, response, trial_type)) |>
  left_join(rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_cognitive_tasks/metadata_tables/session.csv"),
            by = "session_id") |>
  left_join(rio::import("https://raw.githubusercontent.com/forecastingresearch/fpt/refs/heads/main/data_forecasting/processed_data/scores_quantile.csv") |>
              select(subject_id, sscore_standardized) |>
              summarize(.by = subject_id,
                        sscore = mean(sscore_standardized)),
            by = "subject_id") |>
  mutate(small_lottery_display_type = ifelse(left_lottery_type == "small", 
                                             left_lottery_display_type, right_lottery_display_type),
         large_lottery_display_type = ifelse(left_lottery_type == "large",
                                             left_lottery_type, right_lottery_display_type)) |>
  select(subject_id, trial_id, choice_type, proportion_difference, small_lottery_gold_prop, , small_lottery_display_type, large_lottery_display_type, correct, sscore) |>
  mutate(item = str_split_i(trial_id, "_", 2),
         .keep = "unused")

saveRDS(dn_dat_s, here::here("Data", "Denominator Neglect", "dn_dat_s.rds"))

Denominator Neglect

Denominator Neglect is a ratio comparison task in which participants choose between lotteries of gold and silver coins. Participants’ goal is to get the most gold coins over the course of the trials and therefore should select the lottery with the highest proportion of gold coins.

Denominator Neglect: Combined

The following analysis focuses only on the combined version, “Version B.”

Methods

Modeling

Code

dn.c_wide <- dn_dat_c |>
  select(subject_id, item, correct) |>
  mutate(item = paste0("item_", item)) |>
  pivot_wider(names_from = item, values_from = correct) |>
  select(-subject_id) |>
  drop_na()

Code

dn.c_m <- stan(here::here("Models", "2pl-code.stan"), 
             data = list(J = nrow(dn.c_wide),
                         K = ncol(dn.c_wide),
                         y = dn.c_wide),
             chains = 4,
             iter = 2500,
             seed = 50401)
saveRDS(dn.c_m, here::here("Models", "denominator-neglect-c_2pl.rds"))

Code

dn.c_m <- readRDS(here::here("Models", "denominator-neglect-c_2pl.rds"))

Probabilities of correct response given difficulty and discrimination estimates and hypothetical \(\theta\) values.

Code

ps.c <- rstan::extract(dn.c_m, c("a", "b")) |>
  as.data.frame() |>
  mutate(rep = row_number()) |>
  filter(rep %in% 1:50) |>
  pivot_longer(-rep,
               names_to = "item", values_to = "est") |>
  separate_wider_delim(item, ".", names = c("param", "item")) |>
  pivot_wider(id_cols = c(item, rep),
              names_from = param, values_from = est) |>
  expand_grid(th = seq(-6, 6, length.out = 100)) |>
  mutate(p_1 = exp(a * (th - b)) / (1 + exp(a * (th - b))),
         p_0 = 1 - p_1,
         info = a^2 * (p_1 * p_0),
         choice_type = ifelse(as.numeric(item) <= 24, "conflict", "harmony"))

Results

Below are the item response curves.

Code

readRDS(here::here("Figures", "Denominator Neglect", "irc_denominator-neglect.rds"))

And below are the information curves.

Code

readRDS(here::here("Figures", "Denominator Neglect", "ic_denominator-neglect.rds"))

Test information curve

Code

tic.c <- ps.c %>%
  summarize(.by = c(rep, th),
            test_info = sum(info)) |>
  ggplot(aes(x = th, y = test_info, group = rep)) +
  geom_line(alpha = .3, color = "slategray3") +
  geom_line(data = ps.c |> summarize(.by = c(rep, th),
            test_info = sum(info)) |>
              summarize(.by = th,
                        test_info = mean(test_info)),
            aes(x = th, y = test_info),
            inherit.aes = F, linewidth = .8) +
  scale_y_continuous(breaks = c(0, 15, 30)) +
  scale_x_continuous(breaks = c(-4, 0, 4)) +
  coord_cartesian(xlim = c(-5, 5), ylim = c(0, 35)) +
  labs(y = "Test I(ϴ)", x = "ϴ") +
  theme_classic(base_size = 14) +
  theme(strip.background = element_blank(),
        strip.text.x = element_blank())

# saveRDS(tic, here::here("Figures", "Denominator Neglect", "tic.rds")) 

tic.c

Code

top_items.c <- data.frame(conf_item = as.numeric(),
                          harm_item = as.numeric(),
                          test_info_mean = as.numeric(),
                          test_info_sd = as.numeric())

for (i in seq(24)) {
  top_items.c <- expand_grid(conf_item = ps.c |> filter(choice_type == "conflict") |>
                               filter(!(item %in% top_items.c$conf_item)) |>
                               pull(item) |>
                               unique(),
                             harm_item = ps.c |> filter(choice_type == "harmony") |>
                               filter(!(item %in% top_items.c$harm_item)) |>
                               pull(item) |>
                               unique()) |>
    pmap(\(conf_item, harm_item){
      ps.c |>
        filter(item == conf_item | item == harm_item |
                 item %in% top_items.c$conf_item | item %in% top_items.c$harm_item) |>
        summarize(.by = c(rep, th),
                  test_info = sum(info)) |>
        summarize(.by = rep,
                  test_info = sum(test_info)) |>
        summarize(rank = i,
                  conf_item = conf_item,
                  harm_item = harm_item,
                  test_info_mean = mean(test_info),
                  test_info_sd = sd(test_info)) 
    }) |>
    list_rbind() |>
    filter(test_info_mean == max(test_info_mean)) |>
    rbind(top_items.c) |>
    arrange(rank)
}

saveRDS(top_items.c, here::here("Data", "Denominator Neglect", "top_items-c.rds"))

Code

top_items.c <- readRDS(here::here("Data", "Denominator Neglect", "top_items-c.rds"))

top_items.c |>
  ggplot(aes(x = rank, y = test_info_mean / rank)) +
  geom_line(alpha = 1, color = "slategray3") +
  scale_y_continuous(breaks = c(25, 30)) +
  scale_x_continuous(breaks = c(1, 12, 24)) +
  #coord_cartesian(xlim = c(-5, 5), ylim = c(0, 30)) +
  labs(y = "Test I(ϴ) per Item Pair", x = "Number of Item Pairs") +
  theme_classic(base_size = 14) +
  theme(strip.background = element_blank(),
        strip.text.x = element_blank())

Denominator Neglect: Separate

The following analysis focuses only on the separate version, “Version A.”

Methods

Modeling

Code

dn.s_wide <- dn_dat_s |>
  select(subject_id, item, correct) |>
  mutate(item = paste0("item_", item)) |>
  pivot_wider(names_from = item, values_from = correct) |>
  select(-subject_id) |>
  drop_na()

Code

dn.s_m <- stan(here::here("Models", "2pl-code.stan"), 
             data = list(J = nrow(dn.s_wide),
                         K = ncol(dn.s_wide),
                         y = dn.s_wide),
             chains = 4,
             iter = 2500,
             seed = 50401)
saveRDS(dn.s_m, here::here("Models", "denominator-neglect-s_2pl.rds"))

Code

dn.s_m <- readRDS(here::here("Models", "denominator-neglect-s_2pl.rds"))

Probabilities of correct response given difficulty and discrimination estimates and hypothetical \(\theta\) values.

Code

ps.s <- rstan::extract(dn.s_m, c("a", "b")) |>
  as.data.frame() |>
  mutate(rep = row_number()) |>
  filter(rep %in% 1:50) |>
  pivot_longer(-rep,
               names_to = "item", values_to = "est") |>
  separate_wider_delim(item, ".", names = c("param", "item")) |>
  pivot_wider(id_cols = c(item, rep),
              names_from = param, values_from = est) |>
  expand_grid(th = seq(-6, 6, length.out = 100)) |>
  mutate(p_1 = exp(a * (th - b)) / (1 + exp(a * (th - b))),
         p_0 = 1 - p_1,
         info = a^2 * (p_1 * p_0),
         choice_type = ifelse(as.numeric(item) <= 24, "conflict", "harmony"))

Results

Below are the item response curves.

Code

readRDS(here::here("Figures", "Denominator Neglect", "irc_denominator-neglect.rds"))

And below are the information curves.

Code

readRDS(here::here("Figures", "Denominator Neglect", "ic_denominator-neglect.rds"))

Test information curve

Code

tic.s <- ps.s %>%
  summarize(.by = c(rep, th),
            test_info = sum(info)) |>
  ggplot(aes(x = th, y = test_info, group = rep)) +
  geom_line(alpha = .3, color = "slategray3") +
  geom_line(data = ps.s |> summarize(.by = c(rep, th),
            test_info = sum(info)) |>
              summarize(.by = th,
                        test_info = mean(test_info)),
            aes(x = th, y = test_info),
            inherit.aes = F, linewidth = .8) +
  scale_y_continuous(breaks = c(0, 15, 30)) +
  scale_x_continuous(breaks = c(-4, 0, 4)) +
  coord_cartesian(xlim = c(-5, 5), ylim = c(0, 35)) +
  labs(y = "Test I(ϴ)", x = "ϴ") +
  theme_classic(base_size = 14) +
  theme(strip.background = element_blank(),
        strip.text.x = element_blank())

# saveRDS(tic, here::here("Figures", "Denominator Neglect", "tic.rds")) 

tic.s

Code

top_items.s <- data.frame(conf_item = as.numeric(),
                          harm_item = as.numeric(),
                          test_info_mean = as.numeric(),
                          test_info_sd = as.numeric())

for (i in seq(24)) {
  top_items.s <- expand_grid(conf_item = ps.s |> filter(choice_type == "conflict") |>
                               filter(!(item %in% top_items.s$conf_item)) |>
                               pull(item) |>
                               unique(),
                             harm_item = ps.s |> filter(choice_type == "harmony") |>
                               filter(!(item %in% top_items.s$harm_item)) |>
                               pull(item) |>
                               unique()) |>
    pmap(\(conf_item, harm_item){
      ps.s |>
        filter(item == conf_item | item == harm_item |
                 item %in% top_items.s$conf_item | item %in% top_items.s$harm_item) |>
        summarize(.by = c(rep, th),
                  test_info = sum(info)) |>
        summarize(.by = rep,
                  test_info = sum(test_info)) |>
        summarize(rank = i,
                  conf_item = conf_item,
                  harm_item = harm_item,
                  test_info_mean = mean(test_info),
                  test_info_sd = sd(test_info)) 
    }) |>
    list_rbind() |>
    filter(test_info_mean == max(test_info_mean)) |>
    rbind(top_items.s) |>
    arrange(rank)
}

saveRDS(top_items.s, here::here("Data", "Denominator Neglect", "top_items-s.rds"))

Code

top_items.s <- readRDS(here::here("Data", "Denominator Neglect", "top_items-s.rds"))

top_items.c |>
  ggplot(aes(x = rank, y = test_info_mean / rank)) +
  geom_line(alpha = 1, color = "slategray3") +
  scale_y_continuous(breaks = c(25, 30)) +
  scale_x_continuous(breaks = c(1, 12, 24)) +
  #coord_cartesian(xlim = c(-5, 5), ylim = c(0, 30)) +
  labs(y = "Test I(ϴ) per Item Pair", x = "Number of Item Pairs") +
  theme_classic(base_size = 14) +
  theme(strip.background = element_blank(),
        strip.text.x = element_blank())

–>