Skip to content

Commit a6e156b

Browse files
committed
proper support for multiple subgroup variables
This allows multiple subgroups to be specified, separated by a dash. For example "age_group-sex". If more than one subgroup is specified, analyses will be multiplicatively stratified, for example exposure * (subgroup1 * subgroup2), rather than exposure * (subgroup1 + subgroup2).
1 parent 2a0d87f commit a6e156b

4 files changed

Lines changed: 13 additions & 6 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ follows:
5050
[default: NULL] character. The name of an exposure variable in the input dataset. Must be binary or not given. All outputs will be stratified by this variable. This could be an exposure in the usual sense, or it could (mis)used to show different types of events (as long as the censoring structure is the same). If not specified, no stratification will occur.
5151

5252
--subgroups=SUBGROUP_VARNAME
53-
[default: NULL] The name of a subgroup variable or list of variable names. If a subgroup variable is used, analyses will be stratified as exposure * ( subgroup1, subgroup2, ...). If not specified, no stratification will occur.
53+
[default: NULL] The name(s) of the subgroup variable(s). If using multiple subgroup variables, delimit with a dash (-), for example 'age_group-sex'. If subgroup variables are used, analyses will be stratified as exposure * subgroup1 * subgroup2 * ... (multiplicatively, not additively). If not specified, no stratification will occur.
5454

5555
--origin_date=ORIGIN_VARNAME
5656
[default: must be specified] The name of a date variable (or name of a variable that is coercable to a date eg 'YYYY-MM-DD') in the input dataset that represents the start of follow-up.

analysis/dataset_definition.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
)
6767

6868

69-
# grouping variables
69+
# example exposure / stratification variables
7070

7171
dataset.sex = patients.sex
7272

@@ -79,6 +79,8 @@
7979
otherwise="unknown",
8080
)
8181

82+
dataset.region = registered_patients.practice_nuts1_region_name
83+
8284

8385
# start of follow up variable
8486

analysis/km.R

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ if(length(args)==0){
2929
df_input <- "output/extract.arrow"
3030
dir_output <- "output/km_estimates/"
3131
exposure <- c("sex")
32-
subgroups <- c("age_group")
32+
subgroups <- c("age_group-region")
3333
origin_date <- "first_vax_date"
3434
event_date <- "second_vax_date"
3535
censor_date <- character() # "censor_date"
@@ -40,7 +40,7 @@ if(length(args)==0){
4040
smooth <- as.logical("FALSE")
4141
smooth_df <- as.integer("4")
4242
concise <- as.logical("TRUE")
43-
plot <- as.logical("FALSE")
43+
plot <- as.logical("TRUE")
4444
contrast <- as.logical("TRUE")
4545
filename_suffix <- as.character("")
4646
} else {
@@ -58,7 +58,7 @@ if(length(args)==0){
5858
help = "[default: NULL] character. The name of an exposure variable in the input dataset. Must be binary or not given. All outputs will be stratified by this variable. This could be an exposure in the usual sense, or it could (mis)used to show different types of events (as long as the censoring structure is the same). If not specified, no stratification will occur.",
5959
metavar = "exposure_varname"),
6060
make_option("--subgroups", type = "character", default = character(),
61-
help = "[default: NULL] The name of a subgroup variable or list of variable names. If a subgroup variable is used, analyses will be stratified as exposure * ( subgroup1, subgroup2, ...). If not specified, no stratification will occur.",
61+
help = "[default: NULL] The name(s) of the subgroup variable(s). If using multiple subgroup variables, delimit with a dash (-), for example 'age_group-sex'. If subgroup variables are used, analyses will be stratified as exposure * subgroup1 * subgroup2 * ... (multiplicatively, not additively). If not specified, no stratification will occur.",
6262
metavar = "subgroup_varname"),
6363
make_option("--origin_date", type = "character",
6464
help = "[default: must be specified] The name of a date variable (or name of a variable that is coercable to a date eg 'YYYY-MM-DD') in the input dataset that represents the start of follow-up.",
@@ -118,6 +118,11 @@ if(length(args)==0){
118118
# the quasiquotation still works inside ggplot, transmute, etc
119119

120120
exposure_syms <- syms(exposure)
121+
122+
123+
if(length(subgroups)>0) {
124+
subgroups <- strsplit(subgroups, "-")[[1]]
125+
}
121126
subgroup_syms <- syms(subgroups)
122127

123128
# Create output directory ----

project.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ actions:
1717
--df_input output/extract.arrow
1818
--dir_output output/km_estimates/
1919
--exposure sex
20-
--subgroups age_group
20+
--subgroups age_group-region
2121
--origin_date first_vax_date
2222
--event_date second_vax_date
2323
--censor_date censor_date

0 commit comments

Comments
 (0)