From b43ac34fc9fd97ff62886d949372cae3af846180 Mon Sep 17 00:00:00 2001 From: Pablo Diego-Rosell Date: Mon, 7 Dec 2020 16:16:31 +0100 Subject: [PATCH] Add files via upload --- R/data_prep.R | 563 +++++++++++++++++++++++++++++++ R/hyp_eda.R | 901 ++++++++++++++++++++++++++++++++++++++++++++++++++ R/hyp_test.R | 466 ++++++++++++++++++++++++++ R/optimizer.R | 117 +++++++ 4 files changed, 2047 insertions(+) create mode 100644 R/data_prep.R create mode 100644 R/hyp_eda.R create mode 100644 R/hyp_test.R create mode 100644 R/optimizer.R diff --git a/R/data_prep.R b/R/data_prep.R new file mode 100644 index 0000000..c6a3fb2 --- /dev/null +++ b/R/data_prep.R @@ -0,0 +1,563 @@ +#-------------------------------------------------------------------------- +# Load libraries and set working directory +#-------------------------------------------------------------------------- + +if (!require("pacman")) install.packages("pacman") +library ("pacman") +pacman::p_load(rstatix, ggpubr, corrgram) +#dd <- ("//gallup/dod_clients/DARPA_ASIST/CONSULTING/Analytics/Phase_1/Data/processed_data") # Server execution +dd <- "C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Data/Phase_1" # Local execution +knitr::opts_knit$set(root.dir = dd) + +#-------------------------------------------------------------------------- +# Check pre-processing errors +#-------------------------------------------------------------------------- + +setwd("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Data/Phase_1/errors") +list_of_files <- list.files(getwd(), pattern = '\\.txt$') +errors <- lapply(list_of_files, readLines) +errors <- data.frame(matrix(unlist(errors), nrow=length(errors), byrow=T)) +files <- data.frame(matrix(unlist(list_of_files), nrow=length(list_of_files), byrow=T)) +errors <- cbind (files, errors) +colnames(errors) <- c("error_file", "error") +write.csv(errors, "errors.csv") + +#-------------------------------------------------------------------------- +# Load experiment-level data ("results_data.csv") +#-------------------------------------------------------------------------- + +setwd(dd) +games <- read.csv ("results_data.csv", stringsAsFactors = F) + +#-------------------------------------------------------------------------- +# Generate trial ordering variable +#-------------------------------------------------------------------------- + +games <- games %>% + group_by(member_id) %>% mutate(trial_order = order(order(trial_id, decreasing=F))) + +#-------------------------------------------------------------------------- +# Data checks +#-------------------------------------------------------------------------- + +length(unique(games$subject_id)) # Should have 75 unique subjects +length(unique(games$trial_id)) # Should have 225 unique trials +table(games$complexity) # Should be balanced by complexity (e.g. 225/3 = 75 in each level) +table(games$training) # Should be balanced by training (e.g. 225/3 = 75 in each level) + +#-------------------------------------------------------------------------- +# Function to summarize continuous variables, standardize and check distribution +#-------------------------------------------------------------------------- + +normality_test <- function (test_variable) { + print(summary(test_variable)) + scaled_variable <- scale(test_variable) + plot(hist(scaled_variable)) + plot(ggqqplot(scaled_variable)) + plot(ggdensity(scaled_variable, fill = "lightgray")) + print(shapiro_test(scaled_variable[0:5000])) + return(scaled_variable) +} + +#-------------------------------------------------------------------------- +# Screen variables and transform for analysis +#-------------------------------------------------------------------------- +#-------------------------------------------------------------------------- +## Training condition - create dummies for hypothesis testing +#-------------------------------------------------------------------------- + +games$tradeoff <- as.factor(ifelse(grepl("NoTriage", games$training), c("NoTradeOff"), c("TradeOff"))) +games$device <- as.factor(ifelse(grepl("TriageSignal", games$training), c("YesDevice"), c("NoDevice"))) +games$q239_cat <- as.factor(ifelse(games$q239=="Yes", "Learned", "Not Learned")) + +#-------------------------------------------------------------------------- +## Satisficing tendency +#-------------------------------------------------------------------------- + +games$sat_tendency_std <- normality_test (games$q7_average) +# Comment: 3 NA values +# Comment: Looks reasonably normal, though shapiro significant + +#-------------------------------------------------------------------------- +# Load recomputed satisficing tendency factor scores after dropping item 7 ("noseven.csv") +#-------------------------------------------------------------------------- + +noseven <- read.csv ("noseven.csv", stringsAsFactors = F) +noseven <- subset(noseven, select = c(member_id, V10)) +colnames(noseven) <- c("member_id", "sat_tendency_std2") +games <- merge(games, noseven, by=c("member_id"), all.x = TRUE) +games$sat_tendency_std2 <- normality_test (games$sat_tendency_std2) + +#-------------------------------------------------------------------------- +## Prioritized navigation strategy +#-------------------------------------------------------------------------- + +table (games$original_nav_strategy) +games$original_nav_strat_yellow <- as.factor(ifelse( + games$original_nav_strategy =="Yellow First", + c("Yellow First"), c("Other"))) + +games$original_nav_strat_yelempt <- as.factor(ifelse( + games$original_nav_strategy =="Yellow First" | + games$original_nav_strategy =="Avoid Empty", + c("Yellow First - Avoid Empty"), c("Other"))) + +games$original_nav_strat_seq <- as.factor(ifelse( + games$original_nav_strategy =="Sequential", + c("Sequential"), c("Other"))) + +#-------------------------------------------------------------------------- +## Strategy planned in next trial +#-------------------------------------------------------------------------- + +games_next <- subset(games, select = c(trial_id, member_id, original_nav_strat_yelempt)) +games_next$trial_id <- games_next$trial_id-1 # Subtract trial_id - 1 to create an index of "next trials". +colnames (games_next)[3] <- c("next_nav_strat_yelempt") # Change column names +games <- merge(games,games_next, by=c("member_id","trial_id"), all.x = TRUE) # Merge new variables + +#-------------------------------------------------------------------------- +## Prioritized victim strategy - Explore and recode into binaries +#-------------------------------------------------------------------------- + +table (games$original_victim_strategy) +games$original_vic_strat_yellow <- as.factor(ifelse( + games$original_victim_strategy =="Yellow First", + c("Yellow First"), c("Other"))) + +games$original_vic_strat_seq <- as.factor(ifelse( + games$original_victim_strategy =="Sequential", + c("Sequential"), c("Other"))) + +#-------------------------------------------------------------------------- +## Spatial ability +#-------------------------------------------------------------------------- + +games$spatial_ability_std <- normality_test (games$q8_average) +# Comment: 3 NA values +# Comment: Looks reasonably normal, though shapiro significant + +#-------------------------------------------------------------------------- +## Video-gaming experience +#-------------------------------------------------------------------------- + +games$videogame_experience_std <- normality_test (games$videogame_experience) +# Comment: Possible bimodal + +#-------------------------------------------------------------------------- +## Workload +#-------------------------------------------------------------------------- + +games$workload_std <- normality_test (games$workload) +# Comment: Heavy negative skew, possible ceiling effect + +#-------------------------------------------------------------------------- +## Final score +#-------------------------------------------------------------------------- + +games$final_score_std <- normality_test (games$final_score) +# Comment: Negative skew, long left tail + +#-------------------------------------------------------------------------- +## Competency_score +#-------------------------------------------------------------------------- + +games$competency_score_std <- - normality_test (games$competency_score) # Standardized and reversed to facilitate interpretation (higher score, greater competency) +# Comment: Positive skew, long right tail + +#-------------------------------------------------------------------------- +## Calculate most frequently used victim and navigation strategies +#-------------------------------------------------------------------------- +allVars <- colnames(games) +vic_time_vars <- games[allVars [grepl("victim", allVars) & grepl("time", allVars)]] +games$most_time_vic_strat <- colnames(vic_time_vars)[apply(vic_time_vars,1,which.max)] +games$most_time_vic_strat <- gsub("victim_strategy_data.|.time_spent", "", games$most_time_vic_strat) +games$most_time_vic_strat_yellow <- as.factor(ifelse(games$most_time_vic_strat =="Yellow.First", "Yellow.First","Other")) +games$most_time_vic_strat_seq <- as.factor(ifelse(games$most_time_vic_strat =="Sequential", "Sequential","Other")) + +nav_time_vars <- games[allVars [grepl("navigation", allVars) & grepl("time", allVars)]] +games$most_time_nav_strat <- colnames(nav_time_vars)[apply(nav_time_vars,1,which.max)] +games$most_time_nav_strat <- gsub("navigation_strategy_data.|.time_spent", "", games$most_time_nav_strat) +games$most_time_nav_strat_seq <- as.factor(ifelse(games$most_time_nav_strat =="Sequential", "Sequential","Other")) +games$most_time_nav_strat_yel <- as.factor(ifelse(games$most_time_nav_strat =="Yellow.First", "Yellow.First","Other")) + +#-------------------------------------------------------------------------- +## Calculate most beneficial victim and navigation strategies (points per minute) +#-------------------------------------------------------------------------- + +vic_rate_vars <- games[allVars [grepl("victim", allVars) & grepl("points", allVars)]] +games$most_points_vic_strat <- colnames(vic_rate_vars)[apply(vic_rate_vars,1,which.max)] +games$most_points_vic_strat <- gsub("victim_strategy_data.|.points_per_minute", "", games$most_points_vic_strat) +games$most_points_vic_strat_yellow <- as.factor(ifelse(games$most_points_vic_strat =="Yellow.First", "Yellow.First","Other")) +games$most_points_vic_strat_seq <- as.factor(ifelse(games$most_points_vic_strat =="Sequential", "Sequential","Other")) + +nav_rate_vars <- games[allVars [grepl("navigation", allVars) & grepl("points", allVars)]] +games$most_points_nav_strat <- colnames(nav_rate_vars)[apply(nav_rate_vars,1,which.max)] +games$most_points_nav_strat <- gsub("navigation_strategy_data.|.points_per_minute", "", games$most_points_nav_strat) +games$most_points_nav_strat_seq <- as.factor(ifelse(games$most_points_nav_strat =="Sequential", "Sequential","Other")) +games$most_points_nav_strat_yel <- as.factor(ifelse(games$most_points_nav_strat =="Yellow.First", "Yellow.First","Other")) + +#-------------------------------------------------------------------------- +## Merge strategy use variables from previous trial +#-------------------------------------------------------------------------- + +# Select subset of variables of interest. +games_prev <- subset(games, select=c( + victim_strategy_data.Yellow.First.time_spent, + victim_strategy_data.Yellow.First.points_per_minute, + victim_strategy_data.Sequential.time_spent, + victim_strategy_data.Sequential.points_per_minute, + most_time_vic_strat_yellow, original_vic_strat_yellow, most_points_vic_strat_yellow, + most_time_vic_strat_seq, most_points_vic_strat_seq, + navigation_strategy_data.Yellow.First.time_spent, + navigation_strategy_data.Yellow.First.points_per_minute, + navigation_strategy_data.Sequential.time_spent, + navigation_strategy_data.Sequential.points_per_minute, + most_time_nav_strat_seq, original_nav_strat_seq, most_points_nav_strat_seq, + most_time_nav_strat_yel, most_points_nav_strat_yel, + subject_id, trial_id)) + +# Subtract trial_id - 1 to create an index of "previous trials". +games_prev$trial_id <- games_prev$trial_id+1 + +# Change column names +colnames (games_prev)[1:18] <- c("vic_strat_yel_prev_time", + "vic_strat_yel_prev_rate", + "vic_strat_seq_prev_time", + "vic_strat_seq_prev_rate", + "vic_strat_yel_prev_most_time", + "vic_strat_yel_prev_original", + "vic_strat_yel_prev_most_points", + "vic_strat_seq_prev_most_time", + "vic_strat_seq_prev_most_points", + "nav_strat_yel_prev_time", + "nav_strat_yel_prev_rate", + "nav_strat_seq_prev_time", + "nav_strat_seq_prev_rate", + "nav_strat_seq_prev_most_time", + "nav_strat_seq_prev_original", + "nav_strat_seq_prev_most_points", + "nav_strat_yel_prev_most_time", + "nav_strat_yel_prev_most_points") +# Merge new variables +games <- merge(games,games_prev, by=c("subject_id","trial_id"), all.x = TRUE) + +games$vic_strat_yel_prev_time_std <- normality_test (games$vic_strat_yel_prev_time) +games$vic_strat_yel_prev_rate_std <- normality_test (games$vic_strat_yel_prev_rate) +games$vic_strat_seq_prev_time_std <- normality_test (games$vic_strat_seq_prev_time ) +games$vic_strat_seq_prev_rate_std <- normality_test (games$vic_strat_seq_prev_rate) +games$nav_strat_yel_prev_time_std <- normality_test (games$nav_strat_yel_prev_time ) +games$nav_strat_yel_prev_rate_std <- normality_test (games$nav_strat_yel_prev_rate) +games$nav_strat_seq_prev_time_std <- normality_test (games$nav_strat_seq_prev_time) +games$nav_strat_seq_prev_rate_std <- normality_test (games$nav_strat_seq_prev_rate ) + +#-------------------------------------------------------------------------- +# Load event-level data ("events_data.csv") and subset into victim and room events +#-------------------------------------------------------------------------- + +events <- read.csv ("results_events.csv", stringsAsFactors = F) +table(events$event, useNA="always") + +#-------------------------------------------------------------------------- +# Merge complexity levels +#-------------------------------------------------------------------------- + +complexity <- subset(games, select = c(trial_id, complexity)) +events <- merge(events, complexity, by=c("trial_id"), all.x = TRUE) + +#-------------------------------------------------------------------------- +# Merge trial ordering variable +#-------------------------------------------------------------------------- + +trial_order <- subset(games, select = c(trial_id, trial_order)) +str(trial_order) +events <- merge(events, trial_order, by=c("trial_id"), all.x = TRUE) + +events$trial_order <- as.factor(events$trial_order) + +#-------------------------------------------------------------------------- +## Training conditions - merge with events data +#-------------------------------------------------------------------------- + +training_vars <- subset (games, select = c(trial_id, training, tradeoff, device)) +events <- merge(events,training_vars, by=c("trial_id"), all.x = TRUE) + +#-------------------------------------------------------------------------- +## Remaining seconds +#-------------------------------------------------------------------------- + +events$seconds_remaining_std <- normality_test (events$seconds_remaining) +# By definition, a uniform distribution + +#-------------------------------------------------------------------------- +## Remaining yellow victims expire (seconds_remaining<320 | seconds_remaining>280) +#-------------------------------------------------------------------------- + +events$five_minutes <- as.factor(ifelse(events$seconds_remaining< 340 & events$seconds_remaining>300, + c("Minutes 5-6"), c("Minutes 0-4:6-10"))) + +#-------------------------------------------------------------------------- +## Subset into victims and rooms +#-------------------------------------------------------------------------- + +victims <- subset (events, event == "victim_triaged") +rooms <- subset (events, event == "room_entered") + +#-------------------------------------------------------------------------- +## victim_color +#-------------------------------------------------------------------------- + +table(victims$victim_color, useNA="always") + +#-------------------------------------------------------------------------- +## Remaining yellow victims +#-------------------------------------------------------------------------- + +victims$total_yellow_victims_remaining_std <- normality_test (victims$total_yellow_victims_remaining) + +victims$all_yellow_rescued <- as.factor(ifelse( + victims$total_yellow_victims_remaining == 0, + c("Yellows rescued"), c("Yellow remain"))) +table(victims$all_yellow_rescued) + +#-------------------------------------------------------------------------- +## Yellow and green victims in current room +#-------------------------------------------------------------------------- + +table(rooms$yellow_victims_in_current_room, useNA="always") +table(rooms$green_victims_in_current_room, useNA="always") + +#-------------------------------------------------------------------------- +## room_type +#-------------------------------------------------------------------------- + +table(rooms$room_type, useNA="always") + +#-------------------------------------------------------------------------- +## victim_strategy - Code dummies for yellow first +#-------------------------------------------------------------------------- + +table(victims$victim_strategy, useNA="always") + +victims$vic_strat_yellow <- as.factor(ifelse( + victims$victim_strategy =="Yellow First", + c("Yellow First"), c("Other"))) + +#-------------------------------------------------------------------------- +## victim_strategy update - Create index within trial and compare strategies +#-------------------------------------------------------------------------- + +victims <- victims[with(victims, order(trial_id, event_index_number)),] +victims <- victims %>% + group_by(trial_id) %>% + mutate(event_ID = dplyr::row_number()) + +vic_strat_prev <- subset(victims, select=c(victim_strategy, member_id, trial_id, event_ID)) +vic_strat_prev$event_ID <- vic_strat_prev$event_ID+1 +colnames (vic_strat_prev)[1] <- c("victim_strategy_prev") +victims <- merge(victims,vic_strat_prev, by=c("member_id","trial_id", "event_ID"), all.x = TRUE) +victims$victim_strategy_update <- as.factor( + ifelse(victims$victim_strategy==victims$victim_strategy_prev, + c("No Update"), c("Update"))) + +#-------------------------------------------------------------------------- +## Victim rates +#-------------------------------------------------------------------------- + +victims$yellow_per_minute_std <- normality_test (victims$yellow_per_minute) +victims$green_per_minute_std <- normality_test (victims$green_per_minute) +victims$expected_green_rate_std <- normality_test (victims$expected_green_rate) +#cor(victims$expected_green_rate_std, victims$green_per_minute_std) # expected_green_rate is redundant + +#-------------------------------------------------------------------------- +## Average victim search times +#-------------------------------------------------------------------------- + +victims$green_search_time_std <- normality_test (victims$green_search_time) +victims$yellow_search_time_std <- normality_test (victims$yellow_search_time) + +#-------------------------------------------------------------------------- +## Spare time to rescue remaining yellow victims - +## Time to 5-minute mark - (number of remaining yellow victims * (average search time + time to be rescued (15 seconds))) +#-------------------------------------------------------------------------- + +victims$spare_time_yellows <- victims$seconds_remaining - + ((victims$total_yellow_victims_remaining) * (victims$yellow_search_time + 15)) +victims$spare_time_yellows_std <- normality_test (victims$spare_time_yellows) + +#-------------------------------------------------------------------------- +## navigation_strategy +#-------------------------------------------------------------------------- + +table(rooms$nav_strategy, useNA="always") + +rooms$nav_strat_yellow <- as.factor(ifelse( + rooms$nav_strategy =="Yellow First", + c("Yellow First"), c("Other"))) + +#-------------------------------------------------------------------------- +## nav_strategy update - Create index within trial and compare strategies +#-------------------------------------------------------------------------- + +rooms <- rooms[with(rooms, order(trial_id, event_index_number)),] +rooms <- rooms %>% + group_by(trial_id) %>% + mutate(event_ID = dplyr::row_number()) + +nav_strat_prev <- subset(rooms, select=c(nav_strategy, member_id, trial_id, event_ID)) +nav_strat_prev$event_ID <- nav_strat_prev$event_ID+1 +colnames (nav_strat_prev)[1] <- c("navigation_strategy_prev") +rooms <- merge(rooms,nav_strat_prev, by=c("member_id","trial_id", "event_ID"), all.x = TRUE) +rooms$nav_strategy_update <- as.factor( + ifelse(rooms$nav_strategy==rooms$navigation_strategy_prev, + c("No Update"), c("Update"))) + +#-------------------------------------------------------------------------- +## Learning - Create index within participant to track progression from trial to trial +#-------------------------------------------------------------------------- + +rooms <- rooms[with(rooms, order(trial_id, event_index_number)),] +rooms <- rooms %>% group_by(member_id) %>% mutate(event_ID_2 = dplyr::row_number()) + +#-------------------------------------------------------------------------- +## Learning - Create player aggregate number of empty rooms entered across trials +#-------------------------------------------------------------------------- + +rooms_max <- aggregate(cbind(rooms_entered_empty,rooms_entered_not_empty) ~ trial_id + member_id, rooms, max) +#rooms_max <- data.frame(trial_id=rooms_max$trial_id, +# member_id=rooms_max$member_id, +# rooms_entered_empty_running=rooms_max$rooms_entered_empty, +# rooms_entered_not_empty_running=rooms_max$rooms_entered_not_empty) + +rooms_max_prev_1 <- data.frame(trial_id=rooms_max$trial_id+1, + member_id=rooms_max$member_id, + empty_max_prev_1=rooms_max$rooms_entered_empty, + not_empty_max_prev_1=rooms_max$rooms_entered_not_empty) + +rooms_max_prev_2 <- data.frame(trial_id=rooms_max$trial_id+2, + member_id=rooms_max$member_id, + empty_max_prev_2=rooms_max$rooms_entered_empty, + not_empty_max_prev_2=rooms_max$rooms_entered_not_empty) + +rooms <- merge(rooms,rooms_max_prev_1, by=c("member_id","trial_id"), all.x = TRUE) +rooms$empty_max_prev_1[is.na(rooms$empty_max_prev_1)] <- 0 +rooms$not_empty_max_prev_1[is.na(rooms$not_empty_max_prev_1)] <- 0 + +rooms$rooms_entered_empty_running <- rooms$rooms_entered_empty +rooms$rooms_entered_not_empty_running <- rooms$rooms_entered_not_empty + +rooms$rooms_entered_empty_running <- rooms$rooms_entered_empty_running + rooms$empty_max_prev_1 +rooms$rooms_entered_not_empty_running <- rooms$rooms_entered_not_empty_running + rooms$not_empty_max_prev_1 + +rooms <- merge(rooms,rooms_max_prev_2, by=c("member_id","trial_id"), all.x = TRUE) +rooms$empty_max_prev_2[is.na(rooms$empty_max_prev_2)] <- 0 +rooms$not_empty_max_prev_2[is.na(rooms$not_empty_max_prev_2)] <- 0 + +rooms$rooms_entered_empty_running <- rooms$rooms_entered_empty_running + rooms$empty_max_prev_2 +rooms$rooms_entered_not_empty_running <- rooms$rooms_entered_not_empty_running + rooms$not_empty_max_prev_2 + +rooms <- rooms[,-which(names(rooms) %in% c("empty_max_prev_1", + "empty_max_prev_2", + "not_empty_max_prev_1", + "not_empty_max_prev_2"))] + +#-------------------------------------------------------------------------- +## Learning - Merge player aggregate number of empty rooms entered across trials to games data +#-------------------------------------------------------------------------- + +rooms_trial_max <- aggregate(cbind(rooms_entered_empty_running,rooms_entered_not_empty_running) ~ trial_id + member_id, rooms, max) +colnames(rooms_trial_max) <- c("trial_id", "member_id", "empty_max_trial", "not_empty_max_trial") +games <- merge(games,rooms_trial_max, by=c("member_id","trial_id"), all.x = TRUE) + +games$empty_max_trial_std <- normality_test (games$empty_max_trial) +games$not_empty_max_trial_std <- normality_test (games$not_empty_max_trial) + +#-------------------------------------------------------------------------- +## Learning - Merge player aggregate number of empty rooms entered in the previous trial +#-------------------------------------------------------------------------- + +games_post <- subset(games, select=c(trial_id, member_id, empty_max_trial, not_empty_max_trial)) +colnames(games_post)[3:4] <- c("empty_max_trial_prev", "not_empty_max_trial_prev") +games_post$trial_id <- games_post$trial_id+1 +games <- merge(games,games_post, by=c("member_id","trial_id"), all.x = TRUE) + +games$empty_max_trial_prev[is.na(games$empty_max_trial_prev)] <- 0 +games$not_empty_max_trial_prev[is.na(games$not_empty_max_trial_prev)] <- 0 + +games$empty_max_trial_prev_std <- normality_test (games$empty_max_trial_prev) +games$not_empty_max_trial_prev_std <- normality_test (games$not_empty_max_trial_prev) + +#-------------------------------------------------------------------------- +## navigation_strategy +#-------------------------------------------------------------------------- + +table(rooms$nav_strategy, useNA="always") + +#-------------------------------------------------------------------------- +## rooms_entered_empty (device punishments) +#-------------------------------------------------------------------------- + +rooms$rooms_entered_empty_std <- normality_test (rooms$rooms_entered_empty) +# Extreme positive skew + +#-------------------------------------------------------------------------- +## rooms_entered_not_empty (device rewards) +#-------------------------------------------------------------------------- + +rooms$rooms_entered_not_empty_std <- normality_test (rooms$rooms_entered_not_empty) +# Some positive skew + +#-------------------------------------------------------------------------- +## Add device incentives and disincentives games-level dataset +#-------------------------------------------------------------------------- + +rooms_entered_empty <- aggregate(rooms_entered_empty ~ member_id+trial_id, data = events, max) +rooms_entered_not_empty <- aggregate(rooms_entered_not_empty ~ member_id+trial_id, data = events, max) +games <- merge(games,rooms_entered_empty, by=c("member_id","trial_id"), all.x = TRUE) +games <- merge(games,rooms_entered_not_empty, by=c("member_id","trial_id"), all.x = TRUE) + +games$rooms_entered_empty_std <- normality_test (games$rooms_entered_empty) +games$rooms_entered_not_empty_std <- normality_test (games$rooms_entered_not_empty) + +#-------------------------------------------------------------------------- +## Add prior use of device summary data to games-level dataset +#-------------------------------------------------------------------------- + +prior_use <- aggregate(prior_use_consistent ~ member_id+trial_id, data = events, max) +prior_use$trial_id <- prior_use$trial_id+1 +games <- merge(games,prior_use, by=c("member_id","trial_id"), all.x = TRUE) +games$prior_use_consistent_std <- normality_test (games$prior_use_consistent) + +#-------------------------------------------------------------------------- +## Add skipped yellow victims to games-level dataset +#-------------------------------------------------------------------------- + +skipped_yellow <- aggregate(left_behind_yellow ~ member_id+trial_id, data = events, max) +games <- merge(games,skipped_yellow, by=c("member_id","trial_id"), all.x = TRUE) +games$left_behind_yellow_max_std <- normality_test (games$left_behind_yellow) +games$left_behind_yellow_cat <- as.factor( + ifelse(games$left_behind_yellow==0, + c("Skipped 0 yellows"), c("Skipped 1 or more yellows"))) + +#-------------------------------------------------------------------------- +## Add satisficing tendency measure to event-level dataset +#-------------------------------------------------------------------------- + +sat_tendency <- subset(games, select = c(trial_id, sat_tendency_std, sat_tendency_std2)) +victims <- merge( + victims, sat_tendency, by=c("trial_id"), all.x = TRUE) + +#-------------------------------------------------------------------------- +## Save processed data for analysis +#-------------------------------------------------------------------------- + +write.csv (games, "results_data_proc.csv") +write.csv (events, "results_events_proc.csv") +write.csv (victims, "results_victims_proc.csv") +write.csv (rooms, "results_rooms_proc.csv") + +#-------------------------------------------------------------------------- +## Render into notebook (run code below in separate script to test this one) +#-------------------------------------------------------------------------- + +#setwd ("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Analytics/Phase_1") # Local execution +#rmarkdown::render("data_prep.R") diff --git a/R/hyp_eda.R b/R/hyp_eda.R new file mode 100644 index 0000000..fe9b296 --- /dev/null +++ b/R/hyp_eda.R @@ -0,0 +1,901 @@ +#' --- +#' title: "ASIST - Gallup Experiment 1 Exploratory Analysis" +#' author: "Pablo Diego-Rosell, PhD" +#' date: "November 27, 2020" +#' output: +#' html_document: +#' toc: true +#' theme: united +#' --- + +#' #Setup + +#-------------------------------------------------------------------------- +# Set working directory, load libraries and data +#-------------------------------------------------------------------------- + +rm(list=ls(all=t)) +setwd ("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Data/Phase_1") # Local execution +#setwd ("//gallup/dod_clients/DARPA_ASIST/CONSULTING/Analytics/Phase_1/Data/processed_data") # Server execution +if (!require("pacman")) install.packages("pacman") +library ("pacman") +pacman::p_load(rstatix, ggpubr, corrgram, lme4, lmerTest, dplyr, sjPlot, sjmisc, + ggeffects, lattice, see, randomForest, VSURF, tibble, tidyverse, + reshape2, plotly, MASS, rstanarm, bayesplot) +games <- read.csv ("results_data_proc.csv") +events <- read.csv ("results_events_proc.csv") +victims <- read.csv ("results_victims_proc.csv") +rooms <- read.csv ("results_rooms_proc.csv") + +# Create auxiliary variables and subsets + +rooms$quarters <- cut(rooms$seconds_remaining, + breaks=c(-Inf, 150, 300, 450, Inf), + labels=c(4,3,2,1)) +rooms$trial_order <- as.factor(rooms$trial_order) +victims_first_half <- subset (victims, seconds_remaining >=300) +rooms_first_half <- subset (rooms, seconds_remaining >=300) + +#-------------------------------------------------------------------------- +# Exploratory analysis function +#-------------------------------------------------------------------------- + +exp_func <- function (DV, IV, data) { + print (paste("DESCRIPTIVE ANALYSIS OF DV =", DV, "& IV =", IV)) + depvar <- data[c(DV)][,1] + indvar <- data[c(IV)][,1] + formula = as.formula(paste(DV, IV, sep="~")) + print(paste("indvar =", IV)) + if (class(depvar)=="factor" & class(indvar) =="factor") { + print(table(indvar)) + print(prop.table(table(depvar, indvar), 2)) + plot(depvar, indvar, xlab=DV, ylab=IV) + print(summary(glm(formula, data = data, family="binomial"))) + } else if (class(depvar)=="factor" & (class(indvar)=="integer" | class(indvar)=="numeric")) { + print(paste("Sample size for IV =", length(indvar))) + print(summary(indvar)) + print(aggregate(indvar~depvar,data=data, mean, na.rm=T)) + boxplot(indvar~depvar, data=data, xlab=DV, ylab=IV) + + print(summary(glm(formula = formula, data = data, family="binomial"))) + } else if ((class(depvar)=="integer" | class(depvar)=="numeric") & class(indvar)=="factor") { + print(table(indvar)) + print(aggregate(depvar~indvar,data=data, mean, na.rm=T)) + boxplot(depvar~indvar,data=data, xlab=DV, ylab=IV) + print(summary(glm(formula = formula, data = data, family="gaussian"))) + } else if ((class(depvar)=="integer" | class(depvar)=="numeric") & (class(indvar)=="integer" | class(indvar)=="numeric")) { + print(paste("Sample size for IV =", length(indvar))) + print(summary(indvar)) + lm_results <- lm(formula = formula, data = data) + with(data,plot(depvar, indvar)) + abline(lm_results) + print(summary(lm_results)) + } else { + print("NO OUTPUT") + } +} + +#-------------------------------------------------------------------------- +# Modeling function +#-------------------------------------------------------------------------- + +hlm_func <- function (formula, data, family = "binomial", priors) { + print ("HYPOTHESIS TEST") + options(mc.cores = parallel::detectCores()) + nIters <- 2000 + if (family == "binomial"){ + freq_model <- glmer(formula, data = data, family = binomial, na.action = na.exclude) + bayes_model<- stan_glmer(formula,data=data, family = binomial(link = "logit"), + prior = priors, + prior_intercept = cauchy(0, 2.5), + chains = 4, iter = nIters, + seed = 12345, refresh = 1000) + } else { + freq_model <- lmer(formula, data = data, na.action = na.exclude) + bayes_model<- stan_glmer(formula,data=data, family = gaussian(link = "identity"), + prior = priors, + prior_intercept = cauchy(0, 2.5), + chains = 4, iter = nIters, + seed = 12345, refresh = 1000) + } + print(summary(freq_model)) + print(summary(bayes_model,pars = c("beta"), probs = c(0.025, 0.1, 0.5, 0.9, 0.975), digits = 2)) + print(plot(bayes_model, "areas", pars = c("beta"), prob = 0.95, prob_outer = 0.99)+ + geom_vline(xintercept = 0, linetype = "dashed")) + + ggtitle("Posterior distribution of IV effects", "with medians and 95% intervals") + print(plot(bayes_model, "trace", pars = c("beta"))) + return (list(bayes_model, freq_model)) +} + +vague_priors <- normal(location = 0, scale = 2.5) # Use weakly informative priors for exploratory purposes + +#-------------------------------------------------------------------------- +# Model plotting function +#-------------------------------------------------------------------------- + +hlm_plots <- function (model, data, type, group="device") { + if (type == "re") { + print (plot_model(model, type = "re")) + } + if (type == "int") { + print (plot_model(model, type = "int")) + print (plot_model(model, terms=c("event_ID_2", "member_id"), type = "re")) + } + data2 <- mutate(data, fv2 = predict(model)) + re <- ranef(model)$member_id + b <- fixef(model) + map <- data2$member_id + ggplot(data2, aes(event_ID_2, fv2, group=member_id, color=data2[,group])) + geom_line() +} + +#-------------------------------------------------------------------------- +# Variable selection function +#-------------------------------------------------------------------------- + +rf_eda <- function (outcome, predictors, dataset) { + variables <- c(predictors, outcome) + rf_data <- dataset[variables] + rf_data <- rf_data[complete.cases(rf_data),] + print(paste("Complete cases:", length(rf_data$training))) + formula <- as.formula(paste(outcome, "~.")) + + print("randomForest Importance") + fittedRF <- randomForest(formula, rf_data, ntree=10) + print(varImpPlot(fittedRF)) + + #print("VSURF Importance") + #games.vsurf <- VSURF(formula, data=dataset[variables], ntree=10, mtry = 100, verbose = F) + #plot(games.vsurf, step = "thres", imp.sd = F, var.names = T) + #print(colnames(rf_data[games.vsurf$varselect.thres])) + + print("Les Importance") + rf_bld_1 <- ranger::ranger(formula, data = rf_data, importance = 'impurity_corrected') + print(rf_bld_1) + les_importance <- ranger::importance(rf_bld_1) %>% + enframe("Variable", "Importance") %>% + mutate(Variable = fct_reorder(Variable, Importance), + New = Variable %in% setdiff(names(rf_data), names(rf_data))) %>% + arrange(desc(Importance)) %>% + dplyr::slice(1:10) + print(les_importance) + ggplot(aes(x = Variable, y = Importance), data=as.data.frame(les_importance)) + geom_col() + coord_flip() + labs(title = "Variable Importance") +} + +#' #Game-level correlations + +corrgram(games[, grepl("_std", names(games))], + lower.panel=panel.cor, + upper.panel=panel.pie) + +#' #Variable selection by outcome - Victim Strategies +#-------------------------------------------------------------------------- +# Variable selection by outcome - Victim Strategies +#-------------------------------------------------------------------------- + +#' ##1. "Prioritized strategy" predictions +# 1.1. Predictions for full dataset + +#non_missing_game_vars <- colnames(games)[colSums(is.na(games)) == 0] +games_original_vic_vars <- c('complexity', + 'training', + 'original_nav_strategy', + 'trial_order', + 'sat_tendency_std', + 'spatial_ability_std', + 'videogame_experience_std', + 'competency_score_std', + 'empty_max_trial_prev_std', + 'not_empty_max_trial_prev_std') + +rf_eda(outcome = "original_vic_strat_yellow", predictors = games_original_vic_vars, dataset = games) +rf_eda(outcome = "original_vic_strat_seq", predictors = games_original_vic_vars, dataset = games) + +### Navigation strategy and competency strong predictors of prioritized victim strategy. + +exp_func (DV="original_vic_strat_yellow", IV="competency_score_std", data=games) +exp_func (DV="original_vic_strat_seq", IV="competency_score_std", data=games) + +ggplot(games, aes(original_victim_strategy, ..count..)) + + geom_bar(aes(fill=original_nav_strategy), position="stack") + + ggtitle("Original Victim Strategy by Original Navigation Strategy") + +ggplot(games, aes(x=competency_score_std, fill=factor(original_victim_strategy))) + + geom_density(position="identity") + + ggtitle("Competency scores by Original Victim Strategy") + +ggplot(games, aes(x=competency_score_std, fill=factor(original_nav_strategy))) + + geom_density(position="identity") + + facet_wrap(~original_victim_strategy, ncol=3) + + ggtitle("Competency scores x Original Victim Strategy x Original Navigation Strategy") + +### Navigation strategy and spatial ability strong predictors of sequential victim strategy. +### Spatial ability possibly playing same role as competency score to pick out the "mixed" + +ggplot(games, aes(x=spatial_ability_std, fill=factor(original_vic_strat_seq))) + + geom_density(position="identity") + + facet_wrap(~original_nav_strategy, ncol=3) + + ggtitle("Spatial ability x Original Victim Strategy x Original Navigation Strategy") + +# 1.2. Predictions for second and third trial, including information from prior trials. + +games_original_vic_vars2 <- c(games_original_vic_vars, + 'vic_strat_yel_prev_most_time', + 'vic_strat_yel_prev_original', + 'vic_strat_seq_prev_most_time', + 'nav_strat_seq_prev_most_time', + 'nav_strat_seq_prev_original', + 'nav_strat_yel_prev_most_time') + +rf_eda(outcome = "original_vic_strat_yellow", predictors = games_original_vic_vars2, dataset = games) +rf_eda(outcome = "original_vic_strat_seq", predictors = games_original_vic_vars2, dataset = games) + +### Nothing special beyond the confirmed prediction that prior strategy use is strongly related to current strategy prioritization. + +#' ##2. "Execute strategy" predictions + +games_most_time_vars <- c(games_original_vic_vars, + 'original_victim_strategy', + 'original_nav_strategy') + +rf_eda(outcome = "most_time_vic_strat_yellow", predictors = games_most_time_vars, dataset = games) +rf_eda(outcome = "most_time_vic_strat_seq", predictors = games_most_time_vars, dataset = games) +rf_eda(outcome = "victim_strategy_data.Sequential.time_spent", predictors = games_most_time_vars, dataset = games) +rf_eda(outcome = "victim_strategy_data.Yellow.First.time_spent", predictors = games_most_time_vars, dataset = games) + +# Spatial ability, competency and experience add predictive power to original victim strategy + +ggplot(games, aes(most_time_vic_strat, ..count..)) + + geom_bar(aes(fill=original_victim_strategy), position="stack") + + ggtitle("Executed victim strategy and self-reported prioritized strategy") + +ggplot(games, aes(x=spatial_ability_std, fill=most_time_vic_strat_seq)) + + geom_density(position="identity", alpha = 0.4) + + ggtitle("Spatial ability and Executed victim strategy") + +ggplot(games, aes(x=videogame_experience_std, fill=most_time_vic_strat)) + + geom_density(position="identity", alpha = 0.4) + + ggtitle("Videogaming Experience and Executed victim strategy") + +ggplot(games, aes(x=competency_score_std, fill=most_time_vic_strat)) + + geom_density(position="identity", alpha = 0.4) + + ggtitle("Minecraft Competency and Executed victim strategy") + +ggplot(games, aes(x=spatial_ability_std, fill=factor(original_victim_strategy))) + + geom_density(position="identity") + + facet_wrap(~most_time_vic_strat, ncol=3) + + ggtitle("Competency scores x Original Victim Strategy x Original Navigation Strategy") + + +#' ##3. "Workload" predictions + +games_workload_vars <- c(games_original_vic_vars, + 'navigation_strategy_data.Sequential.time_spent', + 'navigation_strategy_data.Sequential.score', + 'navigation_strategy_data.Sequential.points_per_minute', + 'most_time_nav_strat', + 'most_time_nav_strat_seq', + 'most_time_nav_strat_yel', + 'empty_max_trial_std', + 'not_empty_max_trial_std', + 'rooms_entered_empty', + 'rooms_entered_not_empty_std', + 'left_behind_yellow', + 'left_behind_yellow_max_std', + 'left_behind_yellow_cat', + 'most_points_nav_strat', + "most_time_vic_strat_yellow", + "most_time_vic_strat_seq") + +rf_eda(outcome = "workload_std", predictors = games_workload_vars, dataset = games) + +games$sat_tendency_std_cut <- cut(games$sat_tendency_std,breaks=c(-2, -1, 0, 1, 2)) +ggplot(data=subset(games, !is.na(sat_tendency_std_cut)), + aes(x=factor(sat_tendency_std_cut), y=workload_std)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Average workload and satisficing tendency") + +ggplot(games, aes(x=workload_std, y=sat_tendency_std)) + + geom_point(size=2, shape=23)+ + stat_smooth(aes(x=workload_std, y=sat_tendency_std), method = lm, se = T) + + ggtitle ("Self-reported workload by satisficing tendency") + +ggplot(games, aes(x=factor(training), y=workload_std)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Average workload and training condition") + +ggplot(data=subset(games, most_time_vic_strat !="Green.First"), + aes(x=factor(most_time_vic_strat), y=workload_std)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Average workload and executed victim strategy") + +#' ##4. "Current Utility" predictions + +games_utility_vars <- c(games_workload_vars, + "workload_std") + +rf_eda(outcome = "final_score_std", predictors = games_utility_vars, dataset = games) + +ggplot(games, aes(x=final_score_std, y=navigation_strategy_data.Sequential.points_per_minute)) + + geom_point(size=2, shape=23)+ + stat_smooth(aes(x=final_score_std, y=navigation_strategy_data.Sequential.points_per_minute), method = lm, se = T) + + ggtitle ("Final Score by points per minute while using a sequential navigation strategy") + +ggplot(games, aes(x=final_score_std, y=rooms_entered_not_empty_std)) + + geom_point(size=2, shape=23)+ + stat_smooth(aes(x=final_score_std, y=rooms_entered_not_empty_std), method = lm, se = T) + + ggtitle ("Final Score by non-empty rooms entered") + +ggplot(games, aes(x=most_time_nav_strat, y=final_score_std)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Average score by most frequently used navigation strategy") + +ggplot(subset(games, most_time_vic_strat!="Green.First"), + aes(x=most_time_vic_strat, y=final_score_std)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Average score by most frequently used victim strategy") + + +#' ##5. "Update Strategy" predictions + +victims_first_half <- subset (victims, seconds_remaining >=300) + +victims_update_vars <- c("next_victim_distance", + "complexity", + "trial_order", + "training", + "tradeoff", + "device", + "five_minutes", + "total_yellow_victims_remaining_std", + "all_yellow_rescued", + "yellow_per_minute_std", + "green_per_minute_std", + "sat_tendency_std", + "seconds_remaining_std") +victims_update_vars2 <- c(victims_update_vars, + "victim_strategy_prev", + "green_search_time_std", + "yellow_search_time_std", + "spare_time_yellows_std") + +# 5.1. Predictions for full dataset + +rf_eda(outcome = "vic_strat_yellow", predictors = victims_update_vars, dataset = victims_first_half) +rf_eda(outcome = "victim_strategy_update", predictors = victims_update_vars, dataset = victims_first_half) + +victims_first_half_notna <- subset(victims_first_half, !is.na(victim_strategy_update)) + +ggplot(victims_first_half_notna, aes(x=green_per_minute_std, fill=victim_strategy_update)) + + geom_density(position="identity", alpha = 0.4) + + ggtitle("Probability of victim strategy update by rescue rates") + +ggplot(victims_first_half_notna, aes(x=yellow_per_minute_std, fill=victim_strategy_update)) + + geom_density(position="identity", alpha = 0.4) + + ggtitle("Probability of victim strategy update by yellow rate") + +ggplot(victims_first_half_notna, aes(x=seconds_remaining, fill=victim_strategy_update)) + + geom_density(position="identity", alpha = 0.4) + + ggtitle("Probability of victim strategy update by seconds remaining") + +# 5.2. Predictions for partial dataset + +rf_eda(outcome = "vic_strat_yellow", predictors = victims_update_vars2, dataset = victims_first_half) +rf_eda(outcome = "victim_strategy_update", predictors = victims_update_vars2, dataset = victims_first_half) + +victims_first_half_notna$victim_strategy_update_num <- + ifelse(victims_first_half_notna$victim_strategy_update=="Update", 1, 0) +ggplot(victims_first_half_notna, aes(x=victim_strategy_prev, y=victim_strategy_update_num)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Probability of victim strategy update by victim strategy used in previous trial") + +#' #Variable selection by outcome - Navigation Strategies +#-------------------------------------------------------------------------- +# Variable selection by outcome - Navigation Strategies +#-------------------------------------------------------------------------- + +#' ##1. "Prioritized strategy" predictions +# 1.1. Predictions for full dataset + +games_original_nav_vars <- c('complexity', + 'training', + 'original_victim_strategy', + 'trial_order', + 'sat_tendency_std', + 'spatial_ability_std', + 'videogame_experience_std', + 'competency_score_std', + 'empty_max_trial_prev_std', + 'not_empty_max_trial_prev_std') + +rf_eda(outcome = "original_nav_strat_yellow", predictors = games_original_nav_vars, dataset = games) +rf_eda(outcome = "original_nav_strat_seq", predictors = games_original_nav_vars, dataset = games) + +ggplot(games, aes(original_nav_strategy, ..count..)) + + geom_bar(aes(fill=training), position="stack") + + ggtitle("Original Navigation Strategy by training condition") + +ggplot(games, aes(original_nav_strategy, ..count..)) + + geom_bar(aes(fill=original_victim_strategy), position="stack") + + ggtitle("Original Navigation Strategy by Original Victim Strategy") + +ggplot(games, aes(x=sat_tendency_std, fill=factor(original_nav_strategy))) + + geom_density(position="identity") + + ggtitle("Satisficing tendency scores by Original Navigation Strategy") + +#' ##2. "Execute strategy" predictions +# 2.1. Predictions for full dataset + +games_most_time_vars <- c(games_original_nav_vars, + 'original_nav_strategy') + +rf_eda(outcome = "most_time_nav_strat_yel", predictors = games_most_time_vars, dataset = games) +rf_eda(outcome = "most_time_nav_strat_seq", predictors = games_most_time_vars, dataset = games) + +ggplot(games, aes(most_time_nav_strat, ..count..)) + + geom_bar(aes(fill=training), position="stack") + + ggtitle("Executed Navigation Strategy by training condition") + +ggplot(games, aes(x=competency_score_std, fill=factor(most_time_nav_strat))) + + geom_density(position="identity") + + ggtitle("Competency scores by Executed Navigation Strategy") + +ggplot(games, aes(x=most_time_nav_strat, y=empty_max_trial_prev_std)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Executed strategy by number of empty rooms entered in previous trial") + +#' ##3. "Update strategy" predictions +# 3.1. Predictions for full dataset + +rooms_update_vars <- c("yellow_victims_in_current_room", + "green_victims_in_current_room", + "nav_strategy", + "room_type", + "rooms_entered_empty", + "rooms_entered_not_empty", + "prior_use_consistent", + "prior_use_inconsistent", + "openings_seen", + "complexity", + "trial_order", + "training", + "tradeoff", + "device", + "seconds_remaining_std", + "five_minutes", + "event_ID_2", + "rooms_entered_empty_running", + "rooms_entered_not_empty_running", + "rooms_entered_empty_std", + "rooms_entered_not_empty_std") + +rf_eda(outcome = "nav_strategy_update", predictors = rooms_update_vars, dataset = rooms_first_half) + +rooms_first_half$nav_strategy_update_num <- + ifelse(rooms_first_half$nav_strategy_update=="Update", 1, 0) + +ggplot(rooms_first_half, aes(x=nav_strategy, y=nav_strategy_update_num)) + + stat_summary(fun="mean", geom="bar") + + stat_summary(fun.data = mean_se, geom = "errorbar", width=0.3, size = 1.5) + + ggtitle ("Probability of Navigation strategy update by current navigation strategy") + +rooms_first_half_notna <- subset(rooms_first_half, !is.na(nav_strategy_update)) +ggplot(rooms_first_half_notna, aes(x=prior_use_consistent, fill=factor(nav_strategy_update))) + + geom_density(position="identity", alpha=0.5) + + ggtitle("Consistent use of device by Executed Navigation Strategy") + +ggplot(rooms_first_half_notna, aes(x=prior_use_inconsistent, fill=factor(nav_strategy_update))) + + geom_density(position="identity", alpha=0.5) + + ggtitle("Inconsistent use of device by Executed Navigation Strategy") + +# 3.1. Predictions for partial dataset + +rooms_update_vars2 <- c(rooms_update_vars, + "exited_room_type", + "left_behind_yellow", + "left_behind_green", + "navigation_strategy_prev", + 'victim_strategy_data.Sequential.time_spent', + 'victim_strategy_data.Sequential.score', + 'victim_strategy_data.Sequential.points_per_minute', + 'prior_use_consistent_std') + +rf_eda(outcome = "nav_strategy_update", predictors = rooms_update_vars, dataset = rooms_first_half) + +#' ##4. Learning Functions - Device Use +#-------------------------------------------------------------------------- +# 4. Learning Functions - Device Use +#-------------------------------------------------------------------------- + +# 4.1. Exposure to positive and negative incentives following device beeps will increase the probability of learning the device (per post-strategy survey). +# Explore learning as a growth curve model +# Differences between device training and no device training groups + +std.error <- function(x) sqrt(var(x)/length(x)) +time_series <- rooms %>% group_by(device, event_ID) %>% + summarise(empty_mean = mean(rooms_entered_empty), + empty_lower = mean(rooms_entered_empty)-2*(std.error(rooms_entered_empty)), + empty_upper = mean(rooms_entered_empty)+2*(std.error(rooms_entered_empty)), + consistent_use_mean = mean(prior_use_consistent), + consistent_use_lower = mean(prior_use_consistent)-2*(std.error(prior_use_consistent)), + consistent_use_upper = mean(prior_use_consistent)+2*(std.error(prior_use_consistent))) + +ggplot(data=time_series, aes(x = event_ID, y = empty_mean, colour=device)) + geom_line() + + geom_ribbon(aes(ymin=time_series$empty_lower, ymax=time_series$empty_upper), linetype=2, alpha=0.1) + +ggplot(data=time_series, aes(x = event_ID, y = consistent_use_mean, colour=device)) + geom_line() + + geom_ribbon(aes(ymin=time_series$consistent_use_lower, ymax=time_series$consistent_use_upper), + linetype=2, alpha=0.1) + +# Variation among "no device" group, who are the learners + +rooms_nodevice <- subset (rooms, device == "NoDevice") +table(rooms_nodevice$rooms_entered_empty_running) + +ggplot(rooms_nodevice, aes(x=event_ID_2, y=rooms_entered_empty, group=trial_id)) + + geom_line() + +ggplot(rooms_nodevice, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=trial_order)) + +ggplot(rooms, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=trial_order)) + +ggplot(rooms, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=device)) + +# Individual differences in curves + +#trellis.device(color=T) +xyplot(rooms_entered_empty_running ~ event_ID_2 | member_id, data=rooms, + main="Emtpy Rooms Entered", + panel=function(x, y) { + panel.xyplot(x, y) + panel.lmline(x, y, lty=4) + }) + +# Since all start at 0, a random slope model should account for all differences +# Growth curve modelling +# Random intercepts model + +rooms$member_id <- as.factor(rooms$member_id) +ri_form <- as.formula (rooms_entered_empty_running ~ event_ID_2 + (1 | member_id)) +ri_mod <- hlm_func(formula= ri_form, data = rooms, family="gaussian", priors = vague_priors) +hlm_plots (ri_mod[[2]], rooms, "re") + +# Random intercepts and slopes model + +ris_form <- as.formula (rooms_entered_empty_running ~ event_ID_2 + (event_ID_2|member_id)) +ris_mod <- hlm_func(formula= ris_form, data = rooms, family="gaussian", priors = vague_priors) +hlm_plots (ris_mod[[2]], rooms, "re") + +# Random intercepts + slopes + interaction model + +risi_form <- as.formula (rooms_entered_empty_running ~ event_ID_2*device + (event_ID_2|member_id)) +risi_model <- hlm_func(formula= risi_form, data=rooms, family="gaussian", priors = vague_priors) +hlm_plots (risi_model[[2]], rooms, "int") +# Running into convergence issues, plus random intercepts not really necessary + +# Random slopes + interaction model + +rsi_form <- as.formula (rooms_entered_empty_running ~ event_ID_2*device + (0+event_ID_2|member_id)) +rsi_model <- hlm_func(formula= rsi_form, data=rooms, family="gaussian", priors = vague_priors) +hlm_plots (rsi_model[[2]], rooms, "int") + +# Random slopes + interactions model (add minecraft competency) + +competency <- subset (games, select = c(competency_score_std, + videogame_experience_std, + original_nav_strategy, + trial_id)) + +rooms <- merge (rooms, competency, by=c("trial_id"), all.x = TRUE) +rsi2_form <- as.formula (rooms_entered_empty_running ~ + event_ID_2*device*competency_score_std + + (0+event_ID_2|member_id)) +rsi2_model <- hlm_func(formula= rsi2_form, data=rooms, family="gaussian", priors = vague_priors) +hlm_plots (rsi2_model[[2]], rooms, "int") + +# Competency amplifies the effect of device training. +# At low competency levels, device training generates a small advantage on empty rooms. +# At high competency levels, device training generates a large advantage on empty rooms. + +rsi3_form <- as.formula (rooms_entered_empty_running ~ + event_ID_2*device*competency_score_std*videogame_experience_std + + (0+event_ID_2|member_id)) +rsi3_model <- hlm_func(formula= rsi3_form, data=rooms, family="gaussian", priors = vague_priors) +hlm_plots (rsi3_model[[2]], rooms, "int") + +# Look at no device group only to pinpoint their learning point + +rooms_nodevice <- subset (rooms, device=="NoDevice") +rsi4_form <- as.formula (rooms_entered_empty_running ~ + event_ID_2*competency_score_std*videogame_experience_std + + (0+event_ID_2|member_id)) +rsi4_model <- hlm_func(formula= rsi4_form, data=rooms_nodevice, family="gaussian", priors = vague_priors) +hlm_plots (rsi4_model[[2]], rooms_nodevice, "int") + +# Split by whether participants indicate they plan to use device + +table(games$next_nav_strat_yelempt, games$device) +next_nav_strat_yelempt <- subset (games, select = c(next_nav_strat_yelempt, trial_id)) + +rooms <- merge(rooms, next_nav_strat_yelempt, by=c("trial_id"), all.x = TRUE) +rooms_nodevice_next_strat <- subset (rooms, device=="NoDevice" & !is.na(next_nav_strat_yelempt)) +rooms_nodevice_yelempt <- subset(rooms_nodevice_next_strat, next_nav_strat_yelempt !="Other") +rooms_nodevice_other <- subset(rooms_nodevice_next_strat, next_nav_strat_yelempt =="Other") + +#trellis.device(color=T) +xyplot(rooms_entered_empty_running ~ event_ID_2 | member_id, data=rooms_nodevice_yelempt, + main="Emtpy Rooms Entered", + panel=function(x, y) { + panel.xyplot(x, y) + panel.lmline(x, y, lty=4) + }) + +#trellis.device(color=T) +xyplot(rooms_entered_empty_running ~ event_ID_2 | member_id, data=rooms_nodevice_other, + main="Emtpy Rooms Entered", + panel=function(x, y) { + panel.xyplot(x, y) + panel.lmline(x, y, lty=4) + }) + +# Plot progression of learners vs rest in nodevice group +learners <- unique(rooms_nodevice_yelempt$member_id) +rooms_nodevice$learners <- ifelse(rooms_nodevice$member_id %in% learners, "Learner", "Not Learner") +rooms_nodevice_learners <- subset(rooms_nodevice, learners =="Learner") + +ggplot(rooms_nodevice_learners, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=learners)) +ggplot(rooms_nodevice, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=learners)) + +risi_form <- as.formula (rooms_entered_empty_running ~ event_ID_2*learners + (0+event_ID_2|member_id)) +risi_model <- hlm_func(formula= risi_form, data=rooms_nodevice, family="gaussian", priors = vague_priors) +hlm_plots (risi_model[[2]], rooms_nodevice, "int") + +# Explore differences by quarters + +ggplot(rooms, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=quarters)) + +ggplot(rooms_nodevice, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=quarters)) + +ggplot(rooms_nodevice_learners, aes(x=event_ID_2, y=rooms_entered_empty_running, group=trial_id)) + + geom_line(aes(color=quarters)) + +rsi_form <- as.formula (rooms_entered_empty_running ~ event_ID_2 + quarters*device + (0+event_ID_2|member_id)) +rsi_model <- hlm_func(formula= rsi_form, data=rooms, family="gaussian", priors = vague_priors) +hlm_plots (rsi_model[[2]], rooms, "int") + +#' #4. Utility: Rational Agent vs. Human + +# Calculate total victims seen by adding victims saved and left_behind during the first half + +victims_saved <- as.data.frame(table(victims_first_half$trial_id, + victims_first_half$victim_color)) + +# table(victims_saved$Freq, victims_saved$Var2) + +sorted_rooms <- subset(rooms_first_half, select=c(trial_id, room_id, event_index_number, left_behind_yellow, left_behind_green)) +sorted_rooms <- sorted_rooms[with(sorted_rooms, order(trial_id, event_index_number)), ] +sorted_rooms <- sorted_rooms %>% group_by(trial_id) %>% mutate(index = row_number()) + +sorted_rooms_prev <- subset (sorted_rooms, select =-c(room_id, event_index_number)) +sorted_rooms_prev$index <- sorted_rooms_prev$index-1 +sorted_rooms <- subset (sorted_rooms, select =-c(left_behind_yellow, left_behind_green)) +sorted_rooms <- merge (sorted_rooms, sorted_rooms_prev, by=c("trial_id", "index"), all.x = TRUE) +left_behind_room <- aggregate(cbind(left_behind_yellow, left_behind_green) ~ trial_id + room_id, + sorted_rooms, min) +left_behind_trial <- aggregate(cbind(left_behind_yellow, left_behind_green) ~ trial_id, + left_behind_room, sum) +#summary(left_behind_trial) +left_behind_trial$left_behind_yellow[left_behind_trial$left_behind_yellow==-1] <- 0 # Three negative yellow victims due to processing errors. Fix manually. + +yellow_triaged <- subset(victims_saved, Var2=="Yellow", select = c(Var1, Freq)) +green_triaged <- subset(victims_saved, Var2=="Green", select = c(Var1, Freq)) +colnames(green_triaged) <- c("trial_id", "green_triaged") +colnames(yellow_triaged) <- c("trial_id", "yellow_triaged") +triaged_trial <- merge (green_triaged, yellow_triaged, by=c("trial_id"), + all.x = TRUE, all.y = TRUE) +probs_trial <- merge (triaged_trial, left_behind_trial, by=c("trial_id"), + all.x = TRUE, all.y = TRUE) + +probs_trial$total_seen_green <- probs_trial$left_behind_green + probs_trial$green_triaged +probs_trial$total_seen_yellow <- probs_trial$left_behind_yellow + probs_trial$yellow_triaged + +hist(probs_trial$total_seen_green) # As expected, positively skewed, with max of 24 +hist(probs_trial$total_seen_yellow) # As expected, negatively skewed, with max of 10 + +probs_trial$prob_yellow <- probs_trial$yellow_triaged/probs_trial$total_seen_yellow +probs_trial$prob_green <- probs_trial$green_triaged/probs_trial$total_seen_green +#summary(probs_trial) +hist(probs_trial$prob_yellow) +hist(probs_trial$prob_green) + +### Merge Search times (earlier approach, inaccurate, calculate search times directly) +# search_times <- subset (victims_first_half, +# select = c(trial_id, +# green_search_time, +# yellow_search_time, +# event_index_number)) + +#search_times <- search_times %>% group_by(trial_id) %>% +# mutate(max_event = max(as.numeric(event_index_number))) +#search_times <- subset(search_times, event_index_number==max_event, +# select=-c(event_index_number, max_event)) + +#probs_search_trial <- merge (probs_trial, search_times, by=c("trial_id"), +# all.x = TRUE, all.y = TRUE) + +### Calculate search times directly from number seen +# Victims seen +#total_victims_seen <- probs_search_trial$total_seen_yellow + probs_search_trial$total_seen_green +yellow_seen <- probs_trial$total_seen_yellow +green_seen <- probs_trial$total_seen_green + +# Subtract triage times from search times +# Calculate total triaging time for greens and yellows. + +# Total time triaging victims +# Subtract one triaging event from each list, as we assume the last triaged victim event is not added to the average search time, which is calculated at the time of seeing the victim. +total_triaging_time <- ((probs_trial$yellow_triaged-1)*15)+((probs_trial$green_triaged-1)*7.5) + +# Total time searching for victims (includes time triaging victims) (Earlier approach, inaccurate) +#total_search_time <- (probs_search_trial$yellow_search_time*probs_search_trial$total_seen_yellow) + +# (probs_search_trial$green_search_time*probs_search_trial$total_seen_green) +#yellow_search_time <- (probs_search_trial$yellow_search_time*yellow_seen) +#green_search_time <- (probs_search_trial$green_search_time*green_seen) + +# Average search times after removing time triaging victims (Earlier approach, inaccurate) +#total_search_time_minus_triage_time <- total_search_time-total_triaging_time +#probs_search_trial$yellow_search_time2 <- (yellow_search_time-total_triaging_time)/yellow_seen +#probs_search_trial$green_search_time2 <- (green_search_time-total_triaging_time)/green_seen + +probs_trial$yellow_search_time2 <- (300-total_triaging_time)/yellow_seen +probs_trial$green_search_time2 <- (300-total_triaging_time)/green_seen + +hist(probs_trial$yellow_search_time2) +summary(probs_trial$yellow_search_time2) +hist(probs_trial$green_search_time2) +summary(probs_trial$green_search_time2) + +# Merge into "games" dataset + +games <- merge (probs_trial, games, by=c("trial_id"), all.x=T, all.y=T) + +# Plot probabilities + +ggplot(games, aes(x=final_score, y=prob_yellow)) + + geom_point(size=2, shape=23)+ + stat_smooth(aes(x=final_score, y=prob_yellow), method = lm, se = T) + + ggtitle ("Score by prob. of rescuing yellow victims in FoV (First Half)") + +ggplot(probs_trial, aes(x=prob_yellow, y=prob_green)) + + geom_point(size=2, shape=23) + + xlim(0.5,1) + ylim (0, 1) + + ggtitle ("Probability of rescuing yellow victims in FoV (First Half)") + +# Plot utility curves + +ggplot(probs_trial, aes(x=yellow_search_time2, y=prob_green)) + + geom_point(size=2, shape=23) + + xlim(15,80) + ylim (0, 1) + + stat_smooth(aes(x=yellow_search_time2, y=prob_green), method = lm, + formula = y ~ poly(x, 2), se = T) + + ggtitle ("Probability of rescuing green victims in FoV (First Half) by yellow search time") + +ggplot(games, aes(x=final_score, y=prob_green)) + + geom_point(size=2, shape=23)+ + stat_smooth(aes(x=final_score, y=prob_yellow), method = lm, + formula = y ~ x, se = T) + + ggtitle ("Score by prob. of rescuing green victims in FoV (First Half)") + +# Summarize to compare with predictions by discretizing search times + +probs_trial$yellow_search_time_10<-case_when( + (probs_trial$yellow_search_time2>=20 & probs_trial$yellow_search_time2<30)~20, + (probs_trial$yellow_search_time2>=30 & probs_trial$yellow_search_time2<40)~30, + (probs_trial$yellow_search_time2>=40 & probs_trial$yellow_search_time2<50)~40, + (probs_trial$yellow_search_time2>=50 & probs_trial$yellow_search_time2<60)~50) + +probs_trial$green_search_time_10<-case_when( + (probs_trial$green_search_time2>=10 & probs_trial$green_search_time2<30)~10, + (probs_trial$green_search_time2>=20 & probs_trial$green_search_time2<30)~20, + (probs_trial$green_search_time2>=30 & probs_trial$green_search_time2<40)~30, + (probs_trial$green_search_time2>=40 & probs_trial$green_search_time2<50)~40) + +# probs_trial$yellow_search_time_10 <- cut(probs_trial$yellow_search_time2, breaks=yellow_breaks, labels = F) # Stopped using cut as dealing with labels was difficult +#probs_trial$green_search_time_10 <- cut(probs_trial$green_search_time2, breaks=green_breaks) # Stopped using cut as dealing with labels was difficult + +optim1 <- aggregate(prob_green~green_search_time_10+yellow_search_time_10, data=probs_trial, mean, na.rm=T) + +# 5 second intervals looking choppy +# probs_trial$yellow_search_time_5 <- cut(probs_trial$yellow_search_time2, breaks=seq(15, 60, 5)) +# probs_trial$green_search_time_5 <- cut(probs_trial$green_search_time2, breaks=seq(10, 40, 5)) + +# Rational Agent + +yellow_breaks <- seq(0, 60, 10) +green_breaks <- seq(0, 60, 10) +optim2 <- read.csv("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Analytics/Phase_1/optim_green.csv") +optim2 <- optim2[which(optim2$py==1), ] +#optim2$yellow_search_time_10 <- cut(optim2$search_y, breaks=yellow_breaks, labels = F) +#optim2$green_search_time_10 <- cut(optim2$search_g1, breaks=green_breaks, labels = F) +optim2 <- aggregate(optim_pg1~search_g1+search_y, data=optim2, mean, na.rm=T) +colnames(optim2)<- c("yellow_search_time", + "green_search_time", + "prob_green") + +p <- plot_ly(data = optim1, + x=optim1$yellow_search_time_10, + y=optim1$green_search_time_10, + z=optim1$prob_green, + type="mesh3d") +p <- add_trace(p, data = optim2, + x=optim2$yellow_search_time, + y=optim2$green_search_time, + z=optim2$prob_green, + type="mesh3d") +p <- layout(p, title = "Search times and green probabilities during first half", + scene = list( + xaxis = list(title = "Mean Yellow Search Time"), + yaxis = list(title = "Mean Green Search Time"), + zaxis = list(title = "Prob. of green") + )) +p <- layout(p, + title = "Button Restyle", + updatemenus = list( + list( + type = "buttons", + y = 0.8, + buttons = list( + list(method = "restyle", + args = list("visible", c(F,T)), + label = "Rational Agent"), + list(method = "restyle", + args = list("visible", c(T,F)), + label = "People in Exp1"), + list(method = "restyle", + args = list("visible", c(T,T)), + label = "Both"))) + )) +p + +htmlwidgets::saveWidget(p, "search_times.html") + +z <- acast(optim1, yellow_search_time_10~green_search_time_10, value.var="prob_green") +persp(z, phi = 30, theta = -50, + xlab = "Search time yellow", + ylab = "Search time green", + zlab = "Probability of green", + ticktype="detailed", col = "orange", shade=0.4) +plot(optim1$green_search_time_10, optim1$prob_green) +plot(optim1$yellow_search_time_10, optim1$prob_green) + + +ggplot(data = subset(probs_trial, yellow_search_time2<100), + aes(x=yellow_search_time2, + y=prob_green)) + + geom_point() + + geom_smooth(method = "glm", + method.args = list(family = "binomial"), + se = FALSE) + +# Test effect on score of probabilities of yellow and green, adjusting for competency + +formula_score <- as.formula (final_score_std ~ + prob_green + + prob_yellow + + competency_score_std + + (1 | member_id)) +model_score <- hlm_func(formula = formula_score, data = games, family="gaussian", priors = vague_priors) + diff --git a/R/hyp_test.R b/R/hyp_test.R new file mode 100644 index 0000000..e8d0932 --- /dev/null +++ b/R/hyp_test.R @@ -0,0 +1,466 @@ +#' --- +#' title: "ASIST - Gallup Experiment 1 Results" +#' author: "Pablo Diego-Rosell, PhD" +#' date: "November 27, 2020" +#' output: +#' html_document: +#' toc: true +#' theme: united +#' --- + +#' #Setup + +#-------------------------------------------------------------------------- +# Set working directory, load libraries and data +#-------------------------------------------------------------------------- + +rm(list=ls(all=t)) +setwd ("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Data/Phase_1") # Local execution +#setwd ("//gallup/dod_clients/DARPA_ASIST/CONSULTING/Analytics/Phase_1/Data/processed_data") # Server execution +if (!require("pacman")) install.packages("pacman") +library ("pacman") +pacman::p_load(rstatix, ggpubr, corrgram, lme4, lmerTest, dplyr, rstanarm, bayesplot) +games <- read.csv ("results_data_proc.csv") +events <- read.csv ("results_events_proc.csv") +victims <- read.csv ("results_victims_proc.csv") +rooms <- read.csv ("results_rooms_proc.csv") +victims_first_half <- subset (victims, seconds_remaining >=300) +rooms_first_half <- subset (rooms, seconds_remaining >=300) + +#-------------------------------------------------------------------------- +# Exploratory analysis function +#-------------------------------------------------------------------------- + +exp_func <- function (DV, IV, data) { + print (paste("DESCRIPTIVE ANALYSIS OF DV =", DV, "& IV =", IV)) + depvar <- data[c(DV)][,1] + indvar <- data[c(IV)][,1] + formula = as.formula(paste(DV, IV, sep="~")) + print(paste("indvar =", IV)) + if (class(depvar)=="factor" & class(indvar) =="factor") { + print(table(indvar)) + print(prop.table(table(depvar, indvar), 2)) + plot(depvar, indvar, xlab=DV, ylab=IV) + print(summary(glm(formula, data = data, family="binomial"))) + } else if (class(depvar)=="factor" & (class(indvar)=="integer" | class(indvar)=="numeric")) { + print(paste("Sample size for IV =", length(indvar))) + print(summary(indvar)) + print(aggregate(indvar~depvar,data=data, mean, na.rm=T)) + boxplot(indvar~depvar, data=data, xlab=DV, ylab=IV) + print(summary(glm(formula = formula, data = data, family="binomial"))) + } else if ((class(depvar)=="integer" | class(depvar)=="numeric") & class(indvar)=="factor") { + print(table(indvar)) + print(aggregate(depvar~indvar,data=data, mean, na.rm=T)) + boxplot(depvar~indvar,data=data, xlab=DV, ylab=IV) + print(summary(glm(formula = formula, data = data, family="gaussian"))) + } else if ((class(depvar)=="integer" | class(depvar)=="numeric") & (class(indvar)=="integer" | class(indvar)=="numeric")) { + print(paste("Sample size for IV =", length(indvar))) + print(summary(indvar)) + lm_results <- lm(formula = formula, data = data) + with(data,plot(depvar, indvar)) + abline(lm_results) + print(summary(lm_results)) + } else { + print("NO OUTPUT") + } +} + +#-------------------------------------------------------------------------- +# Hypothesis testing - modeling function +#-------------------------------------------------------------------------- + +# Assign priors based on adequate power to detect medium-sized effects (std. beta = 0.25, log odds = 0.9) + +MDES_l <- 0.9 # MDES in log odds, according to power analysis +MDES_b <- 0.25 # MDES in standard betas, according to power analysis + +hlm_func <- function (formula, data, family = "binomial", priors) { + print ("HYPOTHESIS TEST") + options(mc.cores = parallel::detectCores()) + nIters <- 5000 + if (family == "binomial"){ + freq_model <- glmer(formula, data = data, family = binomial, na.action = na.exclude) + bayes_model<- stan_glmer(formula,data=data, family = binomial(link = "logit"), + prior = priors, + prior_intercept = cauchy(0, 2.5), + chains = 4, iter = nIters, + seed = 12345, refresh = 1000) + } else { + freq_model <- lmer(formula, data = data, na.action = na.exclude) + bayes_model<- stan_glmer(formula,data=data, family = gaussian(link = "identity"), + prior = priors, + prior_intercept = cauchy(0, 2.5), + chains = 4, iter = nIters, + seed = 12345, refresh = 1000) + } + print(summary(freq_model)) + print(summary(bayes_model,pars = c("beta"), probs = c(0.025, 0.1, 0.5, 0.9, 0.975), digits = 2)) + print(plot(bayes_model, "areas", pars = c("beta"), prob = 0.95, prob_outer = 0.99)+ + geom_vline(xintercept = 0, linetype = "dashed")) + + ggtitle("Posterior distribution of IV effects", "with medians and 95% intervals") + print(plot(bayes_model, "trace", pars = c("beta"))) + return (list(bayes_model, freq_model)) +} + +#' #Hypothesis testing - Victim Strategies +#-------------------------------------------------------------------------- +# Hypothesis testing - Victim Strategies +#-------------------------------------------------------------------------- + +#' ##1. "Prioritized strategy" predictions +# 1.1. Participants in the "trade-off" and "trade-off + device" training conditions will be more likely to mention "yellow-only" as their victim prioritization strategy in the pre-trial survey. +exp_func (DV="original_vic_strat_yellow", IV="tradeoff", data=games) +# 1.2. The amount of time a victim strategy has been used in the past will increase the likelihood that participants will mention that strategy in the pre-trial survey. +#exp_func (DV="vic_strat_yel_prev_time", IV="original_vic_strat_yellow", data=games) +exp_func (DV="original_vic_strat_yellow", IV="vic_strat_yel_prev_most_time", data=games) +# 1.3. The average past benefit rate of a victim strategy will increase the likelihood that participants will mention that strategy in the pre-trial survey. +#exp_func (DV="original_vic_strat_yellow", IV="vic_strat_yel_prev_rate", data=games) +exp_func (DV="original_vic_strat_yellow", IV="vic_strat_yel_prev_most_points", data=games) +# 1.4. Participants high in satisficing tendency will be less likely to mention "yellow-only" as their victim prioritization strategy in the pre-trial survey. +exp_func (DV="original_vic_strat_yellow", IV="sat_tendency_std", data=games) +exp_func (DV="original_vic_strat_yellow", IV="sat_tendency_std2", data=games) +# 1.5. Participants with lower levels of spatial ability will be more likely to mention sequential search as their victim strategy in the pre-trial survey. +exp_func (DV="original_vic_strat_yellow", IV="spatial_ability_std", data=games) +# 1.6. Participants with lower levels of videogaming experience will be more likely to mention sequential search as their victim strategy in the pre-trial survey. +exp_func (DV="original_vic_strat_yellow", IV="videogame_experience_std", data=games) + +# Test 1.1., 1.4., 1.5., 1.6. using largest available sample (not adjusting for prior strategy use) +# Outcome: Prioritize yellow-only strategy + +v_formula_1_1 <- as.formula (original_vic_strat_yellow ~ + tradeoff + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_1 <- cauchy(location = c(MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 4), 2.5, 2.5)) +v_model_1_1 <- hlm_func(formula = v_formula_1_1, data = games, family="binomial", priors = priors1_1) + +# Outcome: Prioritize Sequential strategy + +v_formula_1_2 <- as.formula (original_vic_strat_seq ~ + tradeoff + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_2 <- cauchy(location = c(MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 4), 2.5, 2.5)) +v_model_1_2 <- hlm_func(formula= v_formula_1_2, data = games, family="binomial", priors = priors1_2) + +# Testing 1.5. & 1.6.: adjusting for prior strategy use +# Outcome: Prioritize Yellow-only strategy + +v_formula_1_3 <- as.formula (original_vic_strat_yellow ~ + tradeoff + + vic_strat_yel_prev_most_time + + vic_strat_yel_prev_most_points + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_3 <- cauchy(location = c(MDES_l, MDES_l, MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 6), 2.5, 2.5)) +v_model_1_3 <- hlm_func(formula = v_formula_1_3, data = games, family="binomial", priors = priors1_3) + +# Outcome: Prioritize sequential strategy + +v_formula_1_4 <- as.formula (original_vic_strat_seq ~ + tradeoff + + vic_strat_seq_prev_most_time + + vic_strat_seq_prev_most_points + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_4 <- cauchy(location = c(MDES_l, MDES_l, MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 6), 2.5, 2.5)) +v_model_1_4 <- hlm_func(formula= v_formula_1_4, data = games, family="binomial", priors = priors1_4) + +#' ##2. "Execute strategy" predictions +# 2.1. Participants will be more likely to implement strategies mentioned in the pre-trial survey than other strategies. +exp_func (DV="victim_strategy_data.Yellow.First.time_spent", IV="original_vic_strat_yellow", data=games) +exp_func (DV="most_time_vic_strat_yellow", IV="original_vic_strat_yellow", data=games) + +# Outcome: Yellow-only strategy + +v_formula_2_1 <- as.formula (most_time_vic_strat_yellow ~ + original_vic_strat_yellow + + training + + complexity + + (1 | member_id)) +priors2_1 <- cauchy(location = c(MDES_l, 0, 0, 0, 0, 0), + scale = c(MDES_l/2, rep(2.5, 5))) +v_model_2_1 <- hlm_func(formula= v_formula_2_1, data = games, family="binomial", priors = priors2_1) + +# Outcome: Sequential strategy + +exp_func (DV="victim_strategy_data.Sequential.time_spent", IV="original_vic_strat_seq", data=games) +exp_func (DV="most_time_vic_strat_seq", IV="original_vic_strat_seq", data=games) + +v_formula_2_2 <- as.formula (most_time_vic_strat_seq ~ + original_vic_strat_seq + + training + + complexity + + (1 | member_id)) +priors2_2 <- cauchy(location = c(MDES_l, 0, 0, 0, 0, 0), + scale = c(MDES_l/2, rep(2.5, 5))) +v_model_2_2 <- hlm_func(formula= v_formula_2_2, data = games, family="binomial", priors = priors2_1) + +#' ##3. "Workload" predictions +# 3.1. Participants are more likely to experience high workload under higher mission complexity than lower mission complexity. + +exp_func (DV="workload", IV="complexity", data=games) + +priors3_1 <- cauchy(location = c(MDES_b, MDES_b, 0, 0, 0), + scale = c(MDES_b/2, MDES_b/2, rep(2.5, 3))) +v_formula_3_1 <- as.formula (workload ~ + complexity + + training + (1 | member_id)) +v_model_3_1 <- hlm_func(formula= v_formula_3_1, data = games, family="gaussian", priors = priors3_1) + +#' ##4. "Current Utility" predictions +# 4.1. Participants using a "yellow-only" or "mixed" strategy where Py = 1 will have a higher score than participants using a strategy where Py < 1. +exp_func (DV="final_score_std", IV="left_behind_yellow_cat", data=games) +# 4.2. Participants with higher competency scores will have a higher score than participants with lower competency scores. +exp_func (DV="final_score_std", IV="competency_score_std", data=games) +# 4.3. Participants with higher spatial ability will have a higher score than participants with lower spatial ability. +exp_func (DV="final_score_std", IV="spatial_ability_std", data=games) + +v_formula_4_1 <- as.formula (final_score_std ~ + left_behind_yellow_max_std + + competency_score_std + + spatial_ability_std + + complexity + + training + + (1 | member_id)) +priors4_1 <- cauchy(location = c(rep(MDES_b, 3), rep(0, 5)), + scale = c(rep(MDES_b/2, 3), rep(2.5, 5))) +v_model_4_1 <- hlm_func(formula= v_formula_4_1, data = games, family="gaussian", priors = priors4_1) + +#' ##5. "Update Strategy" predictions +# 5.1 The probability of "yellow-only" victim strategy will decrease as average search times for victims decreases. +exp_func (DV="vic_strat_yellow", IV="yellow_search_time_std", data=victims_first_half) +exp_func (DV="vic_strat_yellow", IV="green_search_time_std", data=victims_first_half) +# 5.2 The probability of "yellow-only" victim strategy will increase as the time remaining for the 5-minute mark approaches the product of the number of remaining yellow victims times the sum of their average search time and time to be rescued. +exp_func (DV="vic_strat_yellow", IV="spare_time_yellows_std", data=victims_first_half) + +v_formula_5_1 <- as.formula (vic_strat_yellow ~ + yellow_search_time_std + + green_search_time_std + + spare_time_yellows_std + + complexity + + training + + (1 | member_id)) +priors5_1 <- cauchy(location = c(rep(MDES_l, 3), rep(0, 5)), + scale = c(rep(MDES_l/2, 3), rep(2.5, 5))) +v_model_5_1 <- hlm_func(formula= v_formula_5_1, data = victims_first_half, family="binomial", priors = priors5_1) + +# 5.3 The probability of a victim strategy update will be greater for participants with lower current utility, as measured by the sum of their yellow rate, their green rate, and their expected green rate after 5 minutes. +# 5.4 The probability of a victim strategy update among participants with lower current utility will increase as uncertainty about current utility decreases through learning. +# 5.5 Victim strategy updates will be more likely for participants high in optimizing tendency. +# 5.6 Victim strategy updates will be more likely after a participant has rescued all yellow victims or after the 5-minute mark (yellow victims decease). +exp_func (DV="vic_strat_yellow", IV="all_yellow_rescued", data=victims_first_half) + +# Note that expected_green_rate_std is redundant due to perfect colinearity with green_per_minute_std, and so it is excluded + +v_formula_5_2 <- as.formula (victim_strategy_update ~ + yellow_per_minute_std + + green_per_minute_std + + seconds_remaining_std + + sat_tendency_std2 + + all_yellow_rescued + + five_minutes + + complexity + + training + + (1 | member_id)) +priors5_2 <- cauchy(location = c(rep(MDES_l, 6), rep(0, 5)), + scale = c(rep(MDES_l/2, 6), rep(2.5, 5))) +v_model_5_2 <- hlm_func(formula= v_formula_5_2, data = victims_first_half, family="binomial", priors = priors5_2) + +#' #Hypothesis testing - Navigation Strategies +#-------------------------------------------------------------------------- +# Hypothesis testing - Navigation Strategies +#-------------------------------------------------------------------------- + +#' ##1. "Prioritized navigation strategy" predictions +table(games$original_nav_strategy) +# 1.1. Participants in the "trade-off + device" training condition will be more likely to mention "avoid empty rooms" and/or "prioritize rooms with yellow victims in them" as their building navigation strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_yelempt", IV="device", data=games) +# 1.2. The amount of time a navigation strategy has been used in the past will increase the likelihood that participants will mention that strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_yellow", IV="nav_strat_yel_prev_most_time", data=games) +exp_func (DV="original_nav_strat_seq", IV="nav_strat_seq_prev_most_time", data=games) +# 1.3. The average past benefit rate of a navigation strategy will increase the likelihood that participants will mention that strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_yellow", IV="nav_strat_yel_prev_most_points", data=games) +exp_func (DV="original_nav_strat_seq", IV="nav_strat_seq_prev_most_points", data=games) +# 1.4. Participants with a greater number of prior device uses, as evidenced by participants avoiding empty rooms (no beeps) or rooms inconsistent with a player's ongoing strategy (e.g. green only rooms when a player's strategy is yellow only), will be more likely to mention "avoid empty rooms" and/or "prioritize rooms with yellow victims in them" as their building navigation strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_yelempt", IV="prior_use_consistent_std", data=games) +# 1.5. Participants high in satisficing tendency will be less likely to mention "avoid empty rooms" and/or "prioritize rooms with yellow victims in them" as their building navigation strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_yelempt", IV="sat_tendency_std2", data=games) +# 1.6. Participants with lower levels of spatial ability will be more likely to mention sequential search as their navigation strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_seq", IV="spatial_ability_std", data=games) +# 1.7. Participants with lower levels of videogaming experience will be more likely to mention sequential search as their navigation strategy in the pre-trial survey. +exp_func (DV="original_nav_strat_seq", IV="videogame_experience_std", data=games) + +# Equation for hyp. 1.1. (keep largest sample by excluding prior use of strategy variables) + +# Outcome: Prioritize yellow first or avoid empty + +n_formula_1_1 <- as.formula (original_nav_strat_yelempt ~ + device + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_1 <- cauchy(location = c(MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 4), 2.5, 2.5)) +n_model_1_1 <- hlm_func(formula= n_formula_1_1, data = games, family="binomial", priors = priors1_1) + +# Equation for hyp. 1.6. & 1.7 .(keep largest sample by excluding prior use of strategy variables) +# Outcome: Prioritize sequential + +n_formula_1_2 <- as.formula (original_nav_strat_seq ~ + device + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_2 <- cauchy(location = c(MDES_l, MDES_l, -MDES_l, -MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 4), 2.5, 2.5)) +n_model_1_2 <- hlm_func(formula= n_formula_1_2, data = games, family="binomial", priors = priors1_2) + +# Equations for hyps 1.2., 1.3(test on "yellow only" and "sequential" strategies, as the two most common) + +n_formula_1_3 <- as.formula (original_nav_strat_yellow ~ + device + + nav_strat_yel_prev_most_time + + nav_strat_yel_prev_most_points + + prior_use_consistent_std + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_3 <- normal(location = c(MDES_l, MDES_l, MDES_l, MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 7), 2.5, 2.5)) # Changed to normal to avoid convergence issues +n_model_1_3 <- hlm_func(formula= n_formula_1_3, data = games, family="binomial", priors = priors1_3) + +n_formula_1_4 <- as.formula (original_nav_strat_seq ~ + device + + nav_strat_seq_prev_most_time + + nav_strat_seq_prev_most_points + + prior_use_consistent_std + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_4 <- normal(location = c(MDES_l, MDES_l, MDES_l, MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 7), 2.5, 2.5)) # Changed to normal to avoid convergence issues +n_model_1_4 <- hlm_func(formula= n_formula_1_4, data = games, family="binomial", priors = priors1_4) + +# Equations for hyps 1.4., 1.5 (test on "yellow only + sequential" strategies) + +n_formula_1_5 <- as.formula (original_nav_strat_yelempt ~ + device + + nav_strat_seq_prev_most_time + + nav_strat_seq_prev_most_points + + prior_use_consistent_std + + sat_tendency_std2 + + spatial_ability_std + + videogame_experience_std + + complexity + + (1 | member_id)) +priors1_5 <- normal(location = c(MDES_l, MDES_l, MDES_l, MDES_l, -MDES_l, MDES_l, MDES_l, 0, 0), + scale = c(rep(MDES_l/2, 7), 2.5, 2.5)) # Changed to normal to avoid convergence issues +n_model_1_5 <- hlm_func(formula= n_formula_1_5, data = games, family="binomial", priors = priors1_5) + +#' ##2. "Executed navigation strategy" predictions + +# Outcome: Yellow-Only + +n_formula_2_1 <- as.formula (most_time_nav_strat_yel ~ + original_nav_strat_yellow + + training + + complexity + + (1 | member_id)) +priors2_1 <- normal(location = c(MDES_l, 0, 0, 0, 0, 0), + scale = c(MDES_l/2, rep(2.5, 5))) # USed normal to fix convergence issues +n_model_2_1 <- hlm_func(formula= n_formula_2_1, data = games, family="binomial", priors=priors2_1) + + +# Outcome: Sequential + +n_formula_2_2 <- as.formula (most_time_nav_strat_seq ~ + original_nav_strat_seq + + training + + complexity + + (1 | member_id)) +priors2_2 <- cauchy(location = c(MDES_l, 0, 0, 0, 0, 0), + scale = c(MDES_l/2, rep(2.5, 5))) +n_model_2_2 <- hlm_func(formula= n_formula_2_2, data = games, family="binomial", priors=priors2_2) + +#' ##3. "Device Use" predictions +# 3.1. Exposure to positive and negative incentives following device beeps will increase the probability of learning the device (per post-strategy survey). + + +games_no_device <- subset (games, device == "NoDevice") +exp_func (DV="q239_cat", IV="rooms_entered_empty_std", data=games_no_device) +exp_func (DV="q239_cat", IV="rooms_entered_not_empty_std", data=games_no_device) + +v_formula_3_1 <- as.formula (q239_cat ~ + empty_max_trial_std + + not_empty_max_trial_std + + training + + complexity + + (1 | member_id)) +priors3_1 <- cauchy(location = c(MDES_l, MDES_l, rep(0, 3)), + scale = c(MDES_l/2, MDES_l/2, rep(2.5, 3))) +v_model_3_1 <- hlm_func(formula= v_formula_3_1, data = games_no_device, family="binomial", priors=priors3_1) + +# Since q239 is underpowered, consider plan to use device-relevant strategy in next trial (i.e. "yellow only" & "avoid empty"). + +exp_func (DV="next_nav_strat_yelempt", IV="rooms_entered_not_empty_std", data=games_no_device) +exp_func (DV="next_nav_strat_yelempt", IV="rooms_entered_empty_std", data=games_no_device) +exp_func (DV="next_nav_strat_yelempt", IV="empty_max_trial_std", data=games_no_device) + +v_formula_3_2 <- as.formula (original_nav_strat_yelempt ~ + empty_max_trial_prev_std + + not_empty_max_trial_prev_std + + training + + complexity + + (1 | member_id)) +priors3_2 <- cauchy(location = c(MDES_l, MDES_l, rep(0, 3)), + scale = c(MDES_l/2, MDES_l/2, rep(2.5, 3))) +v_model_3_2 <- hlm_func(formula=v_formula_3_2, data = games_no_device, family="binomial", priors=priors3_2) + +#' ##4. "Navigation strategy update" predictions +#4.1. Navigation strategy updates will be more likely after participants observe a perturbation. +#4.2. The probability of a navigation strategy update will increase with exposure to positive incentives following device beeps (rooms with victims). +#4.3. The probability of a navigation strategy update will increase with exposure to negative incentives following device beeps (empty rooms). +exp_func (DV="nav_strategy_update", IV="openings_seen", data=rooms_first_half) + +n_formula_4_1 <- as.formula (nav_strategy_update ~ + openings_seen + + rooms_entered_empty_std + + rooms_entered_not_empty_std + + complexity + + training + + (1 | member_id)) +priors4_1 <- cauchy(location = c(MDES_l, MDES_l, MDES_l, rep(0, 5)), + scale = c(rep(MDES_l/2, 3), rep(2.5, 5))) +n_model_4_1 <- hlm_func(formula= n_formula_4_1, data = rooms_first_half, family="binomial", priors=priors4_1) + +#-------------------------------------------------------------------------- +## Render into notebook (run code below in separate script to test this one) +#-------------------------------------------------------------------------- + +# setwd ("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Analytics/Phase_1") # Local execution +# rmarkdown::render("hyp_test.R") diff --git a/R/optimizer.R b/R/optimizer.R new file mode 100644 index 0000000..c716672 --- /dev/null +++ b/R/optimizer.R @@ -0,0 +1,117 @@ +#' --- +#' title: "ASIST - Gallup Experiment 1 - Optimization" +#' author: "Pablo Diego-Rosell, PhD" +#' date: "November 27, 2020" +#' output: +#' html_document: +#' toc: true +#' theme: united +#' --- + +#' #Setup + +rm(list=ls(all=t)) +if (!require("pacman")) install.packages("pacman") +library ("pacman") +pacman::p_load(reshape2, plotly) +setwd ("C:/Users/C_Pablo_Diego-Rosell/Desktop/Projects/ASIST/Analytics/Phase_1") + +#' Generate matrix for simulations in Visual Basic (Excel macro) + +x <- seq(10, 80, 1) # Range of search times (min 10, max 80, per experiment 1 data) +y <- seq(0, 1, 1/10) # Probabilities as a function of victims rescued over total N (10 yellow victims, per final design) +matrix <- expand.grid(x,x,y) +write.csv (matrix, "matrix.csv") + +#' Run optimizations in Visual Basic and read back in + +#' #Optimized probability of green at different probabilities of yellow + +optim <- read.csv("optim_green.csv") +for (i in unique(optim$py)) { + optim1 <- optim[which(optim$py==i), ] + print(summary(optim1)) + z <- acast(optim1, search_y~search_g1, value.var="optim_pg1") + persp(seq(1, 80, 1), seq(1, 80, 1), z, phi = 30, theta = 60, + xlab = "Search time yellow", ylab = "Search time green", zlab = "Probability of green", + zlim=c(0,1), + main=paste("Total Points by Search Times (Py = ", paste(i*10, "/10)"), sep=""), + ticktype="detailed") +} + +#' #Optimal Probability of Yellow (Full game, optimizing probability of green for total points) + +optim <- read.csv("optim_yellow.csv") +zlim1 <- c(0,540) +for (i in unique(optim$py)) { + optim1 <- optim[which(optim$py==i), ] + z <- acast(optim1, search_y~search_g1, value.var="points") + persp(seq(1, 80, 1), seq(1, 80, 1), z, phi = 30, theta = 60, + xlab = "Search time yellow", ylab = "Search time green", zlab = "Total points", + main = paste("Total Points by Search Times (Py = ", paste(i*10, "/10)"), sep=""), + ticktype="detailed", zlim =zlim1) +} + +# Create interactive plotly visualization with all 10 levels of Py + +p <- plot_ly() +for (i in unique(optim$py)) { + optim1 <- optim[which(optim$py==i), ] + p <- add_mesh (p, data = optim1, + x=optim1$search_y, + y=optim1$search_g1, + z=optim1$points, + type="mesh3d") +} +p <- layout(p, title = "Final score by search times and probability of yellow (optimized for green)", + scene = list( + xaxis = list(title = "Mean Yellow Search Time"), + yaxis = list(title = "Mean Green Search Time"), + zaxis = list(title = "Final Score") + )) +p <- layout(p, + updatemenus = list( + list( + type = "buttons", + y = 1, + buttons = list( + list(method = "restyle", + args = list("visible", c(T,F,F,F,F,F,F,F,F,F,F)), + label = "Py = 0.0"), + list(method = "restyle", + args = list("visible", c(F,T,F,F,F,F,F,F,F,F,F)), + label = "Py = 0.1"), + list(method = "restyle", + args = list("visible", c(F,F,T,F,F,F,F,F,F,F,F)), + label = "Py = 0.2"), + list(method = "restyle", + args = list("visible", c(F,F,F,T,F,F,F,F,F,F,F)), + label = "Py = 0.3"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,T,F,F,F,F,F,F)), + label = "Py = 0.4"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,F,T,F,F,F,F,F)), + label = "Py = 0.5"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,F,F,T,F,F,F,F)), + label = "Py = 0.6"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,F,F,F,T,F,F,F)), + label = "Py = 0.7"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,F,F,F,F,T,F,F)), + label = "Py = 0.8"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,F,F,F,F,F,T,F)), + label = "Py = 0.9"), + list(method = "restyle", + args = list("visible", c(F,F,F,F,F,F,F,F,F,F,T)), + label = "Py = 1.0"), + list(method = "restyle", + args = list("visible", c(T,T,T,T,T,T,T,T,T,T,T)), + label = "All"))) + )) +p + +htmlwidgets::saveWidget(p, "probs_yellow.html")