diff --git a/README.md b/README.md index 2f0161a..5c5e1b3 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ follows: [default: NULL] character. The name of an exposure variable in the input dataset. Must be binary or not given. All outputs will be stratified by this variable. This could be an exposure in the usual sense, or it could (mis)used to show different types of events (as long as the censoring structure is the same). If not specified, no stratification will occur. --subgroups=SUBGROUP_VARNAME - [default: NULL] The name of a subgroup variable or list of variable names. If a subgroup variable is used, analyses will be stratified as exposure * ( subgroup1, subgroup2, ...). If not specified, no stratification will occur. + [default: NULL] The name(s) of the subgroup variable(s). If using multiple subgroup variables, delimit with a dash (-), for example 'age_group-sex'. If subgroup variables are used, analyses will be stratified as exposure * subgroup1 * subgroup2 * ... (multiplicatively, not additively). If not specified, no stratification will occur. --origin_date=ORIGIN_VARNAME [default: must be specified] The name of a date variable (or name of a variable that is coercable to a date eg 'YYYY-MM-DD') in the input dataset that represents the start of follow-up. diff --git a/analysis/dataset_definition.py b/analysis/dataset_definition.py index 8704d65..5d0d713 100644 --- a/analysis/dataset_definition.py +++ b/analysis/dataset_definition.py @@ -66,7 +66,7 @@ ) -# grouping variables +# example exposure / stratification variables dataset.sex = patients.sex @@ -79,6 +79,8 @@ otherwise="unknown", ) +dataset.region = registered_patients.practice_nuts1_region_name + # start of follow up variable diff --git a/analysis/km.R b/analysis/km.R index 220af39..60bc67d 100644 --- a/analysis/km.R +++ b/analysis/km.R @@ -29,7 +29,7 @@ if(length(args)==0){ df_input <- "output/extract.arrow" dir_output <- "output/km_estimates/" exposure <- c("sex") - subgroups <- c("age_group") + subgroups <- c("age_group-region") origin_date <- "first_vax_date" event_date <- "second_vax_date" censor_date <- character() # "censor_date" @@ -40,7 +40,7 @@ if(length(args)==0){ smooth <- as.logical("FALSE") smooth_df <- as.integer("4") concise <- as.logical("TRUE") - plot <- as.logical("FALSE") + plot <- as.logical("TRUE") contrast <- as.logical("TRUE") filename_suffix <- as.character("") } else { @@ -58,7 +58,7 @@ if(length(args)==0){ help = "[default: NULL] character. The name of an exposure variable in the input dataset. Must be binary or not given. All outputs will be stratified by this variable. This could be an exposure in the usual sense, or it could (mis)used to show different types of events (as long as the censoring structure is the same). If not specified, no stratification will occur.", metavar = "exposure_varname"), make_option("--subgroups", type = "character", default = character(), - help = "[default: NULL] The name of a subgroup variable or list of variable names. If a subgroup variable is used, analyses will be stratified as exposure * ( subgroup1, subgroup2, ...). If not specified, no stratification will occur.", + help = "[default: NULL] The name(s) of the subgroup variable(s). If using multiple subgroup variables, delimit with a dash (-), for example 'age_group-sex'. If subgroup variables are used, analyses will be stratified as exposure * subgroup1 * subgroup2 * ... (multiplicatively, not additively). If not specified, no stratification will occur.", metavar = "subgroup_varname"), make_option("--origin_date", type = "character", help = "[default: must be specified] The name of a date variable (or name of a variable that is coercable to a date eg 'YYYY-MM-DD') in the input dataset that represents the start of follow-up.", @@ -118,6 +118,11 @@ if(length(args)==0){ # the quasiquotation still works inside ggplot, transmute, etc exposure_syms <- syms(exposure) + + +if(length(subgroups)>0) { + subgroups <- strsplit(subgroups, "-")[[1]] +} subgroup_syms <- syms(subgroups) # Create output directory ---- diff --git a/project.yaml b/project.yaml index 698c045..a0cbf04 100644 --- a/project.yaml +++ b/project.yaml @@ -17,7 +17,7 @@ actions: --df_input output/extract.arrow --dir_output output/km_estimates/ --exposure sex - --subgroups age_group + --subgroups age_group-region --origin_date first_vax_date --event_date second_vax_date --censor_date censor_date