diff --git a/DESCRIPTION b/DESCRIPTION index 882df32ca..d5dc606ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,16 +54,21 @@ Imports: gridExtra, data.table, methods, - dplyr + dplyr, + cli, + purrr, + assertthat Suggests: lme4, httr, tibble, - testthat, + testthat (>= 3.0.0), e1071, DescTools, DSOpal, DSMolgenisArmadillo, - DSLite + DSLite, + dsBase RoxygenNote: 7.3.2 Encoding: UTF-8 +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index d737d5e6a..59da737b4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -104,6 +104,7 @@ export(ds.seq) export(ds.setSeed) export(ds.skewness) export(ds.sqrt) +export(ds.standardiseDf) export(ds.summary) export(ds.table) export(ds.table1D) @@ -117,6 +118,24 @@ export(ds.var) export(ds.vectorCalc) import(DSI) import(data.table) +import(dplyr) +importFrom(DSI,datashield.aggregate) +importFrom(DSI,datashield.assign) +importFrom(assertthat,assert_that) +importFrom(cli,cli_abort) +importFrom(cli,cli_alert_danger) +importFrom(cli,cli_alert_info) +importFrom(cli,cli_alert_success) +importFrom(cli,cli_alert_warning) +importFrom(cli,cli_end) +importFrom(cli,cli_li) +importFrom(cli,cli_ol) +importFrom(cli,cli_text) +importFrom(cli,cli_ul) +importFrom(purrr,map) +importFrom(purrr,map_lgl) +importFrom(purrr,pmap) +importFrom(purrr,pmap_lgl) importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R new file mode 100644 index 000000000..daaadbd6a --- /dev/null +++ b/R/ds.standardiseDf.R @@ -0,0 +1,603 @@ +#' Fill DataFrame with Missing Columns and Adjust Classes +#' +#' This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. +#' It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. +#' +#' @param df.name Name of the input DataFrame to fill. +#' @param newobj Name of the new DataFrame object created after filling. +#' @param fix_class Character, determines behaviour if class of variables is not the same in all +#' studies. Option "ask" (default) provides the user with a prompt asking if they want to set the +#' class across all studies, option "no" will throw an error if class conflicts are present. +#' @param fix_levels Character, determines behaviour if levels of factor variables is not the same +#' in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set +#' the levels of factor variables to be the same across all studies, whilst option "no" will throw +#' an error if factor variables do not have the same class. +#' @param datasources Data sources from which to aggregate data. Default is `NULL`. +#' @importFrom assertthat assert_that +#' @importFrom DSI datashield.aggregate datashield.assign +#' @return The filled DataFrame with added columns and adjusted classes or factor levels. +#' @export +ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", fix_levels = "ask", + datasources = NULL) { + fill_warnings <- list() + + .check_arguments(df.name, newobj, fix_class, fix_levels) + + if(is.null(datasources)){ + datasources <- datashield.connections_find() + } + + col_names <- datashield.aggregate(datasources, call("colnamesDS", df.name)) + .stop_if_cols_identical(col_names) + + var_classes <- .get_var_classes(df.name, datasources) + class_conflicts <- .identify_class_conflicts(var_classes) + + datashield.assign(datasources, newobj, as.symbol(df.name)) + + if (length(class_conflicts) > 0 & fix_class == "no") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Variables do not have the same class in all studies and `fix_class` is 'no'") + } else if (length(class_conflicts) > 0 & fix_class == "ask") { + class_decisions <- prompt_user_class_decision_all_vars( + names(class_conflicts), + var_classes$server, + dplyr::select(var_classes, all_of(names(class_conflicts))), + newobj, + datasources + ) + + withCallingHandlers({ + .fix_classes(newobj, names(class_conflicts), class_decisions, newobj, datasources) + }, warning = function(w) { + fill_warnings <<- c(fill_warnings, conditionMessage(w)) # Append warning to the list + invokeRestart("muffleWarning") # Suppress immediate display of the warning + }) + } + + unique_cols <- .get_unique_cols(col_names) + .add_missing_cols_to_df(newobj, unique_cols, newobj, datasources) + new_names <- datashield.aggregate(datasources, call("colnamesDS", newobj)) + added_cols <- .get_added_cols(col_names, new_names) + + new_classes <- .get_var_classes(newobj, datasources) + factor_vars <- .identify_factor_vars(new_classes) + factor_levels <- .get_factor_levels(newobj, factor_vars, datasources) + level_conflicts <- .identify_level_conflicts(factor_levels) + + if (length(level_conflicts) > 0 & fix_levels == "no") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Factor variables do not have the same levels in all studies and `fix_levels` is 'no'") + } else if (length(level_conflicts) > 0 & fix_levels == "ask") { + levels_decision <- ask_question_wait_response_levels(level_conflicts, newobj, datasources) + } + + if (levels_decision == "1") { + unique_levels <- .get_unique_levels(factor_levels, level_conflicts) + .set_factor_levels(newobj, unique_levels, datasources) + } + + .print_out_messages(added_cols, class_decisions, names(class_conflicts), unique_levels, + level_conflicts, levels_decision, newobj) + + .handle_warnings(fill_warnings) + .print_class_warning(class_conflicts, fix_class, class_decisions) +} + +#' Check Function Arguments for Validity +#' +#' This function validates the arguments provided to ensure they meet specified conditions. +#' It checks that the `fix_class` and `fix_levels` arguments are set to accepted values +#' and that `df.name` and `newobj` are character strings. +#' +#' @param df.name A character string representing the name of the data frame. +#' @param newobj A character string representing the name of the new object to be created. +#' @param fix_class A character string indicating the method for handling class issues. +#' Must be either `"ask"` or `"no"`. +#' @param fix_levels A character string indicating the method for handling level issues. +#' Must be either `"ask"` or `"no"`. +#' @return NULL. This function is used for validation and does not return a value. +#' @importFrom assertthat assert_that +#' @noRd +.check_arguments <- function(df.name, newobj, fix_class, fix_levels) { + assert_that(fix_class %in% c("ask", "no")) + assert_that(fix_levels %in% c("ask", "no")) + assert_that(is.character(df.name)) + assert_that(is.character(newobj)) +} + +#' Stop If Columns Are Identical +#' +#' Checks if the columns in the data frames are identical and throws an error if they are. +#' +#' @param col_names A list of column names from different data sources. +#' @return None. Throws an error if columns are identical. +#' @importFrom cli cli_abort +#' @noRd +.stop_if_cols_identical <- function(col_names) { + are_identical <- all(sapply(col_names, identical, col_names[[1]])) + if (are_identical) { + cli_abort("Columns are identical in all data frames: nothing to fill") + } +} + +#' Get Variable Classes from DataFrame +#' +#' Retrieves the class of each variable in the specified DataFrame from different data sources. +#' +#' @param df.name Name of the input DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return A DataFrame containing the variable classes from each data source. +#' @import dplyr +#' @noRd +.get_var_classes <- function(df.name, datasources) { + cally <- call("getClassAllColsDS", df.name) + classes <- datashield.aggregate(datasources, cally) %>% + bind_rows(.id = "server") + return(classes) +} + +#' Identify Class Conflicts +#' +#' Identifies conflicts in variable classes across different data sources. +#' +#' @param classes A DataFrame containing variable classes across data sources. +#' @return A list of variables that have class conflicts. +#' @import dplyr +#' @importFrom purrr map +#' @noRd +.identify_class_conflicts <- function(classes) { + server <- NULL + different_class <- classes |> + dplyr::select(-server) |> + map(~ unique(na.omit(.))) + + out <- different_class[which(different_class %>% map(length) > 1)] + return(out) +} + +#' Prompt User for Class Decision for All Variables +#' +#' Prompts the user to resolve class conflicts for all variables. +#' +#' @param vars A vector of variable names with class conflicts. +#' @param all_servers The names of all servers. +#' @param all_classes The classes of the variables across servers. +#' @return A vector of decisions for each variable's class. +#' @noRd +prompt_user_class_decision_all_vars <- function(vars, all_servers, all_classes, newobj, datasources) { + decisions <- c() + for (i in 1:length(vars)) { + decisions[i] <- prompt_user_class_decision(vars[i], all_servers, all_classes[[i]], newobj, datasources) + } + return(decisions) +} + +#' Prompt User for Class Decision for a Single Variable +#' +#' Prompts the user to resolve a class conflict for a single variable. +#' +#' @param var The variable name with a class conflict. +#' @param all_servers The names of all servers. +#' @param all_classes The classes of the variable across servers. +#' @importFrom cli cli_alert_warning cli_alert_danger +#' @return A decision for the variable's class. +#' @noRd +prompt_user_class_decision <- function(var, servers, classes, newobj, datasources) { + cli_alert_warning("`ds.dataFrameFill` requires that all columns have the same class.") + cli_alert_danger("Column {.strong {var}} has following classes:") + print_all_classes(servers, classes) + cli_text("") + return(ask_question_wait_response_class(var, newobj, datasources)) +} + +#' Print All Server-Class Pairs +#' +#' This function prints out a list of server names along with their corresponding +#' class types. It formats the output with a bullet-point list using the `cli` package. +#' +#' @param all_servers A character vector containing the names of servers. +#' @param all_classes A character vector containing the class types corresponding +#' to each server. +#' @return This function does not return a value. It prints the server-class pairs +#' to the console as a bulleted list. +#' @importFrom cli cli_ul cli_li cli_end +#' @noRd +print_all_classes <- function(all_servers, all_classes) { + combined <- paste(all_servers, all_classes, sep = ": ") + cli_ul() + for (i in 1:length(combined)) { + cli_li("{combined[i]}") + } + cli_end() +} + +#' Ask Question and Wait for Class Response +#' +#' Prompts the user with a question and waits for a response related to class decisions. +#' +#' @param question The question to ask the user. +#' @return The user's decision. +#' @importFrom cli cli_text cli_alert_warning cli_abort +#' @noRd +ask_question_wait_response_class <- function(var, newobj, datasources) { + readline <- NULL + ask_question_class(var) + answer <- readline() + if (answer == "6") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Aborted `ds.dataFrameFill`", .call = NULL) + } else if (!answer %in% as.character(1:5)) { + cli_text("") + cli_alert_warning("Invalid input. Please try again.") + cli_text("") + ask_question_wait_response_class(var, newobj, datasources) + } else { + return(answer) + } +} + +#' Prompt User for Class Conversion Options +#' +#' This function prompts the user with options to convert a variable to a specific class (e.g., factor, integer, numeric, character, or logical). +#' The function provides a list of class conversion options for the specified variable and includes an option to cancel the operation. +#' +#' @param var The name of the variable for which the user is prompted to select a class conversion option. +#' +#' @importFrom cli cli_alert_info cli_ol +#' @return None. This function is used for prompting the user and does not return a value. +#' @examples +#' ask_question("variable_name") +#' @noRd +ask_question_class <- function(var) { + cli_alert_info("Would you like to:") + class_options <- c("a factor", "an integer", "numeric", "a character", "a logical vector") + class_message <- paste0("Convert `{var}` to ", class_options, " in all studies") + cli_ol( + c(class_message, "Cancel `ds.dataFrameFill` operation") + ) +} + +#' Fix Variable Classes +#' +#' Applies the user's class decisions to fix the classes of variables across different data sources. +#' +#' @param df.name The name of the DataFrame. +#' @param different_classes A list of variables with class conflicts. +#' @param class_decisions The decisions made by the user. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with consistent variable classes. +#' @noRd +.fix_classes <- function(df.name, different_classes, class_decisions, newobj, datasources) { + cally <- call("fixClassDS", df.name, different_classes, class_decisions) + datashield.assign(datasources, newobj, cally) +} + +#' Get Unique Columns from Data Sources +#' +#' Retrieves all unique columns from the data sources. +#' +#' @param col_names A list of column names. +#' @return A vector of unique column names. +#' @noRd +.get_unique_cols <- function(col_names) { + return( + unique( + unlist(col_names) + ) + ) +} + +#' Add Missing Columns to DataFrame +#' +#' Adds any missing columns to the DataFrame to ensure all columns are present across data sources. +#' +#' @param df.name The name of the DataFrame. +#' @param unique_cols A vector of unique column names. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with added columns. +#' @noRd +.add_missing_cols_to_df <- function(df.name, cols_to_add_if_missing, newobj, datasources) { + cally <- call("fixColsDS", df.name, cols_to_add_if_missing) + datashield.assign(datasources, newobj, cally) +} + +#' Get Added Columns +#' +#' Compares the old and new column names and identifies newly added columns. +#' +#' @param old_names A list of old column names. +#' @param new_names A list of new column names. +#' @importFrom purrr pmap +#' @return A list of added column names. +#' @noRd +.get_added_cols <- function(old_names, new_names) { + list(old_names, new_names) %>% + pmap(function(.x, .y) { + .y[!.y %in% .x] + }) +} + +#' Identify Factor Variables +#' +#' Identifies which variables are factors in the DataFrame. +#' +#' @param var_classes A DataFrame containing variable classes. +#' @return A vector of factor variables. +#' @noRd +.identify_factor_vars <- function(var_classes) { + return( + var_classes %>% + dplyr::filter(row_number() == 1) %>% + dplyr::select(where(~ . == "factor")) + ) +} + +#' Get Factor Levels from Data Sources +#' +#' Retrieves the levels of factor variables from different data sources. +#' +#' @param factor_vars A vector of factor variables. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return A list of factor levels. +#' @noRd +.get_factor_levels <- function(df, factor_vars, datasources) { + factor_vars <- paste(names(factor_vars), collapse = ",") + cally <- call("getAllLevelsDS", df, factor_vars) + return(datashield.aggregate(datasources, cally)) +} + +#' Identify Factor Level Conflicts +#' +#' Identifies conflicts in factor levels across different data sources. +#' +#' @param factor_levels A list of factor levels. +#' @return A list of variables with level conflicts. +#' @importFrom purrr map_lgl pmap_lgl +#' @noRd +.identify_level_conflicts <- function(factor_levels) { + levels <- factor_levels %>% + pmap_lgl(function(...) { + args <- list(...) + !all(map_lgl(args[-1], ~ identical(.x, args[[1]]))) + }) + + return(names(levels[levels == TRUE])) +} + +#' Ask Question and Wait for Response on Factor Levels +#' +#' Prompts the user with options for resolving factor level conflicts and waits for a response. +#' +#' @param level_conflicts A list of variables with factor level conflicts. +#' @return The user's decision. +#' @noRd +ask_question_wait_response_levels <- function(level_conflicts, newobj, datasources) { + .make_levels_message(level_conflicts) + answer <- readline() + if (answer == "3") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Aborted `ds.dataFrameFill`", .call = NULL) + } else if (!answer %in% as.character(1:2)) { + cli_alert_warning("Invalid input. Please try again.") + cli_alert_info("") + .make_levels_message(level_conflicts) + return(ask_question_wait_response_levels(level_conflicts, newobj, datasources)) + } else { + return(answer) + } +} + +#' Make Factor Level Conflict Message +#' +#' Creates a message to alert the user about factor level conflicts and prompt for action. +#' +#' @param level_conflicts A list of variables with factor level conflicts. +#' @importFrom cli cli_alert_warning cli_alert_info cli_ol +#' @return None. Prints the message to the console. +#' @noRd +.make_levels_message <- function(level_conflicts) { + cli_alert_warning("Warning: factor variables {level_conflicts} do not have the same levels in all studies") + cli_alert_info("Would you like to:") + cli_ol(c("Create the missing levels where they are not present", "Do nothing", "Cancel `ds.dataFrameFill` operation")) +} + +#' Get Unique Factor Levels +#' +#' Retrieves the unique factor levels for variables with conflicts. +#' +#' @param factor_levels A list of factor levels. +#' @param level_conflicts A list of variables with level conflicts. +#' @importFrom purrr pmap +#' @return A list of unique factor levels. +#' @noRd +.get_unique_levels <- function(factor_levels, level_conflicts) { + unique_levels <- factor_levels %>% + map(~ .[level_conflicts]) %>% + pmap(function(...) { + as.character(c(...)) + }) %>% + map(~ unique(.)) + return(unique_levels) +} + +#' Set Factor Levels in DataFrame +#' +#' Applies the unique factor levels to the DataFrame. +#' +#' @param newobj The name of the new DataFrame. +#' @param unique_levels A list of unique factor levels. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with the new factor levels. +#' @noRd +.set_factor_levels <- function(df, unique_levels, datasources) { + cally <- call("fixLevelsDS", df, names(unique_levels), unique_levels) + datashield.assign(datasources, df, cally) +} + +#' Print Out Summary Messages +#' +#' Prints summary messages regarding the filled DataFrame, including added columns, class decisions, and factor level adjustments. +#' +#' @param added_cols A list of added columns. +#' @param class_decisions A vector of class decisions. +#' @param different_classes A list of variables with class conflicts. +#' @param unique_levels A list of unique factor levels. +#' @param level_conflicts A list of variables with level conflicts. +#' @param levels_decision The decision made regarding factor levels. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_text +#' @return None. Prints messages to the console. +#' @noRd +.print_out_messages <- function(added_cols, class_decisions, different_classes, unique_levels, + level_conflicts, levels_decision, newobj) { + .print_var_recode_message(added_cols, newobj) + + if (length(different_classes) > 0) { + .print_class_recode_message(class_decisions, different_classes, newobj) + cli_text("") + } + + if (length(level_conflicts) > 0 & levels_decision == "1") { + .print_levels_recode_message(unique_levels, newobj) + } +} + +#' Print Variable Recode Message +#' +#' Prints a message summarizing the columns that were added to the DataFrame. +#' +#' @param added_cols A list of added columns. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_text +#' @return None. Prints the message to the console. +#' @noRd +.print_var_recode_message <- function(added_cols, newobj) { + cli_alert_success("The following variables have been added to {newobj}:") + added_cols_neat <- added_cols %>% map(~ ifelse(length(.) == 0, "", .)) + var_message <- paste0(names(added_cols), " --> ", added_cols_neat) + for (i in 1:length(var_message)) { + cli_alert_info("{var_message[[i]]}") + } + cli_text("") +} + +#' Print Class Recode Message +#' +#' Prints a message summarizing the class decisions that were made for variables with conflicts. +#' +#' @param class_decisions A vector of class decisions. +#' @param different_classes A list of variables with class conflicts. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_alert_info cli_alert_success +#' @return None. Prints the message to the console. +#' @noRd +.print_class_recode_message <- function(class_decisions, different_classes, newobj) { + choice_neat <- .change_choice_to_string(class_decisions) + class_message <- paste0(different_classes, " --> ", choice_neat) + cli_alert_success("The following classes have been set for all datasources in {newobj}: ") + for (i in 1:length(class_message)) { + cli_alert_info("{class_message[[i]]}") + } +} + +#' Convert Class Decision Code to String +#' +#' This function converts a numeric class decision input (represented as a string) +#' into the corresponding class type string (e.g., "factor", "integer", "numeric", etc.). +#' @param class_decision A string representing the class decision. It should be +#' one of the following values: "1", "2", "3", "4", or "5". +#' @return A string representing the class type corresponding to the input: +#' "factor", "integer", "numeric", "character", or "logical". +#' @noRd +.change_choice_to_string <- function(class_decision) { + case_when( + class_decision == "1" ~ "factor", + class_decision == "2" ~ "integer", + class_decision == "3" ~ "numeric", + class_decision == "4" ~ "character", + class_decision == "5" ~ "logical" + ) +} + +#' Print Factor Levels Recode Message +#' +#' Prints a message summarizing the factor level decisions that were made for variables with conflicts. +#' +#' @param unique_levels A list of unique factor levels. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_alert_success cli_alert_info +#' @return None. Prints the message to the console. +#' @noRd +.print_levels_recode_message <- function(unique_levels, newobj) { + levels_message <- .make_levels_recode_message(unique_levels) + cli_alert_success("The following levels have been set for all datasources in {newobj}: ") + for (i in 1:length(levels_message)) { + cli_alert_info("{levels_message[[i]]}") + } +} + +#' Make Levels Recode Message +#' +#' Creates a message to alert the user about factor level recoding. +#' +#' @param unique_levels A list of unique factor levels. +#' @return A formatted string summarizing the level recoding. +#' @importFrom purrr pmap +#' @noRd +.make_levels_recode_message <- function(unique_levels) { + return( + list(names(unique_levels), unique_levels) %>% + pmap(function(.x, .y) { + paste0(.x, " --> ", paste0(.y, collapse = ", ")) + }) + ) +} + +#' Handle Warnings for Class Conversion Issues +#' +#' This function iterates through a list of warnings generated during class conversion and +#' triggers a danger alert if any warnings indicate that the conversion has resulted in `NA` values. +#' +#' @param fill_warnings A list or vector of warning messages generated during class conversion. +#' If any warnings indicate that `NA` values were introduced, a danger alert will be displayed. +#' @return NULL. This function is used for its side effects of printing alerts. +#' @importFrom cli cli_alert_danger +#' @noRd +.handle_warnings <- function(fill_warnings) { + if (length(fill_warnings) > 0) { + for (i in seq_along(fill_warnings)) { + if (grepl("NAs introduced by coercion", fill_warnings[[i]])) { + cli_alert_danger("Class conversion resulted in the creation of NA values.") + } else { + cli_alert_danger(fill_warnings[[i]]) + } + } + } +} + +#' Print Warning for Class Conflicts in Data Conversion +#' +#' This function displays a warning when there are class conflicts in a dataset that may have resulted +#' from incompatible class changes during data conversion. It alerts users to verify column classes, +#' as incompatible changes could corrupt the data. +#' +#' @param class_conflicts A list or vector of conflicting classes identified during conversion. +#' @param fix_class A string indicating the user's choice for fixing class conflicts. Typically, +#' this is "ask" if the user is prompted to confirm class changes. +#' @param class_decisions A vector of decisions made for class conversions. When any value is not +#' "6", it indicates unresolved class conflicts. +#' @return NULL. This function is used for its side effects of printing alerts. +#' @importFrom cli cli_alert_warning +#' @noRd +.print_class_warning <- function(class_conflicts, fix_class, class_decisions) { + if(length(class_conflicts) > 0 & fix_class == "ask" & all(!class_decisions == "6")) { + cli_alert_warning("Please check all columns that have changed class. Not all class changes + are compatible with all data types, so this could have corrupted the data.") + } +} + +readline <- NULL diff --git a/man/ds.standardiseDf.Rd b/man/ds.standardiseDf.Rd new file mode 100644 index 000000000..4f544f1d8 --- /dev/null +++ b/man/ds.standardiseDf.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.standardiseDf.R +\name{ds.standardiseDf} +\alias{ds.standardiseDf} +\title{Fill DataFrame with Missing Columns and Adjust Classes} +\usage{ +ds.standardiseDf( + df.name = NULL, + newobj = NULL, + fix_class = "ask", + fix_levels = "ask", + datasources = NULL +) +} +\arguments{ +\item{df.name}{Name of the input DataFrame to fill.} + +\item{newobj}{Name of the new DataFrame object created after filling.} + +\item{fix_class}{Character, determines behaviour if class of variables is not the same in all +studies. Option "ask" (default) provides the user with a prompt asking if they want to set the +class across all studies, option "no" will throw an error if class conflicts are present.} + +\item{fix_levels}{Character, determines behaviour if levels of factor variables is not the same +in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set +the levels of factor variables to be the same across all studies, whilst option "no" will throw +an error if factor variables do not have the same class.} + +\item{datasources}{Data sources from which to aggregate data. Default is `NULL`.} +} +\value{ +The filled DataFrame with added columns and adjusted classes or factor levels. +} +\description{ +This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. +It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. +} diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md new file mode 100644 index 000000000..4fa52ecac --- /dev/null +++ b/tests/testthat/_snaps/smk-standardiseDf.md @@ -0,0 +1,86 @@ +# ask_question displays the correct prompt + + Code + ask_question_class("my_var") + Message + i Would you like to: + 1. Convert `my_var` to a factor in all studies + 2. Convert `my_var` to an integer in all studies + 3. Convert `my_var` to numeric in all studies + 4. Convert `my_var` to a character in all studies + 5. Convert `my_var` to a logical vector in all studies + 6. Cancel `ds.dataFrameFill` operation + +# print_all_classes prints the correct message + + Code + print_all_classes(c("server_1", "server_2", "server_3"), c("numeric", "factor", + "integer")) + Message + * server_1: numeric + * server_2: factor + * server_3: integer + +# .make_levels_message makes correct message + + Code + .make_levels_message(level_conflicts) + Message + ! Warning: factor variables fac_col2, fac_col3, fac_col6, and fac_col9 do not have the same levels in all studies + i Would you like to: + 1. Create the missing levels where they are not present + 2. Do nothing + 3. Cancel `ds.dataFrameFill` operation + +# .print_var_recode_message prints the correct message + + Code + .print_var_recode_message(added_cols, "test_df") + Message + v The following variables have been added to test_df: + i sim1 --> col11 + i sim2 --> col11 + i sim3 --> col12 + + +# .print_class_recode_message prints the correct message + + Code + .print_class_recode_message(class_decisions, different_classes, "test_df") + Message + v The following classes have been set for all datasources in test_df: + i fac_col4 --> factor + i fac_col5 --> logical + +# .print_levels_recode_message prints the correct message + + Code + .print_levels_recode_message(unique_levs, "test_df") + Message + v The following levels have been set for all datasources in test_df: + i fac_col2 --> Blue, Green, Red + i fac_col3 --> No, Yes + i fac_col6 --> Bird, Cat, Dog + i fac_col9 --> False, True + +# .print_out_messages prints the correct messages + + Code + .print_out_messages(added_cols, class_decisions, different_classes, unique_levs, + level_conflicts, "1", "test_df") + Message + v The following variables have been added to test_df: + i sim1 --> col11 + i sim2 --> col11 + i sim3 --> col12 + + v The following classes have been set for all datasources in test_df: + i fac_col4 --> factor + i fac_col5 --> logical + + v The following levels have been set for all datasources in test_df: + i fac_col2 --> Blue, Green, Red + i fac_col3 --> No, Yes + i fac_col6 --> Bird, Cat, Dog + i fac_col9 --> False, True + diff --git a/tests/testthat/connection_to_datasets/init_studies_datasets.R b/tests/testthat/connection_to_datasets/init_studies_datasets.R index 0639aac61..b95ed48a1 100644 --- a/tests/testthat/connection_to_datasets/init_studies_datasets.R +++ b/tests/testthat/connection_to_datasets/init_studies_datasets.R @@ -1,85 +1,85 @@ init.studies.dataset.cnsim <- function(variables) { - if (ds.test_env$secure_login_details) + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "CNSIM.CNSIM1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "CNSIM.CNSIM2", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "CNSIM.CNSIM3", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") { - if (ds.test_env$driver == "OpalDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "CNSIM.CNSIM1", options=ds.test_env$options_1) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "CNSIM.CNSIM2", options=ds.test_env$options_2) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "CNSIM.CNSIM3", options=ds.test_env$options_3) - ds.test_env$login.data <- builder$build() - } - else if (ds.test_env$driver == "ArmadilloDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/cnsim/CNSIM1", driver = ds.test_env$driver) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/cnsim/CNSIM2", driver = ds.test_env$driver) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cnsim/CNSIM3", driver = ds.test_env$driver) - ds.test_env$login.data <- builder$build() - } - else - { - ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) - } - ds.test_env$stats.var <- variables + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/cnsim/CNSIM1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/cnsim/CNSIM2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cnsim/CNSIM3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } } init.studies.dataset.dasim <- function(variables) { - if (ds.test_env$secure_login_details) + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "DASIM.DASIM1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "DASIM.DASIM2", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "DASIM.DASIM3", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") { - if (ds.test_env$driver == "OpalDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "DASIM.DASIM1", options=ds.test_env$options_1) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "DASIM.DASIM2", options=ds.test_env$options_2) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "DASIM.DASIM3", options=ds.test_env$options_3) - ds.test_env$login.data <- builder$build() - } - else if (ds.test_env$driver == "ArmadilloDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/dasim/DASIM1", driver = ds.test_env$driver) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/dasim/DASIM2", driver = ds.test_env$driver) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/dasim/DASIM3", driver = ds.test_env$driver) - ds.test_env$login.data <- builder$build() - } - else - { - ds.test_env$login.data <- DSLite::setupDASIMTest("dsBase", env = ds.test_env) - } - ds.test_env$stats.var <- variables + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/dasim/DASIM1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/dasim/DASIM2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/dasim/DASIM3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() } + else + { + ds.test_env$login.data <- DSLite::setupDASIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } } init.studies.dataset.survival <- function(variables) { - if (ds.test_env$secure_login_details) + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "SURVIVAL.EXPAND_WITH_MISSING1", options=ds.test_env$options_1) + builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "SURVIVAL.EXPAND_WITH_MISSING2", options=ds.test_env$options_2) + builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "SURVIVAL.EXPAND_WITH_MISSING3", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") { - if (ds.test_env$driver == "OpalDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "SURVIVAL.EXPAND_WITH_MISSING1", options=ds.test_env$options_1) - builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "SURVIVAL.EXPAND_WITH_MISSING2", options=ds.test_env$options_2) - builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "SURVIVAL.EXPAND_WITH_MISSING3", options=ds.test_env$options_3) - ds.test_env$login.data <- builder$build() - } - else if (ds.test_env$driver == "ArmadilloDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/survival/EXPAND_WITH_MISSING1", driver = ds.test_env$driver) - builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/survival/EXPAND_WITH_MISSING2", driver = ds.test_env$driver) - builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/survival/EXPAND_WITH_MISSING3", driver = ds.test_env$driver) - ds.test_env$login.data <- builder$build() - } - else - { - ds.test_env$login.data <- DSLite::setupSURVIVALTest("dsBase", env = ds.test_env) - } - ds.test_env$stats.var <- variables + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/survival/EXPAND_WITH_MISSING1", driver = ds.test_env$driver) + builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/survival/EXPAND_WITH_MISSING2", driver = ds.test_env$driver) + builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/survival/EXPAND_WITH_MISSING3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() } + else + { + ds.test_env$login.data <- DSLite::setupSURVIVALTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } } init.studies.dataset.cluster.int <- function(variables) @@ -107,7 +107,7 @@ init.studies.dataset.cluster.int <- function(variables) builder$append(server = "cluster.int3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cluster/CLUSTER_INT3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) @@ -141,7 +141,7 @@ init.studies.dataset.cluster.slo <- function(variables) builder$append(server = "cluster.slo3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cluster/CLUSTER_SLO3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) @@ -176,7 +176,7 @@ init.studies.dataset.anthro <- function(variables) builder$append(server = "study3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/anthro/anthro3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) @@ -211,39 +211,95 @@ init.studies.dataset.gamlss <- function(variables) builder$append(server = "study3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/gamlss/gamlss3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) } ds.test_env$stats.var <- variables - + + } +} + +init.studies.dataset.stand <- function(variables) +{ + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_1", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_1", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } +} + +init.studies.dataset.stand_disclosure <- function(variables) +{ + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1_d", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_2_d", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_3_d", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1_d", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2_d", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3_d", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables } } connect.studies.dataset.cnsim <- function(variables) { - log.out.data.server() - source("connection_to_datasets/login_details.R") - init.studies.dataset.cnsim(variables) - log.in.data.server() + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.cnsim(variables) + log.in.data.server() } connect.studies.dataset.dasim <- function(variables) { - log.out.data.server() - source("connection_to_datasets/login_details.R") - init.studies.dataset.dasim(variables) - log.in.data.server() + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.dasim(variables) + log.in.data.server() } connect.studies.dataset.survival <- function(variables) { - log.out.data.server() - source("connection_to_datasets/login_details.R") - init.studies.dataset.survival(variables) - log.in.data.server() + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.survival(variables) + log.in.data.server() } connect.studies.dataset.cluster.int <- function(variables) @@ -278,19 +334,35 @@ connect.studies.dataset.gamlss <- function(variables) log.in.data.server() } +connect.studies.dataset.stand <- function(variables) +{ + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.stand(variables) + log.in.data.server() +} + +connect.studies.dataset.stand_disclosure <- function(variables) +{ + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.stand_disclosure(variables) + log.in.data.server() +} + disconnect.studies.dataset.cnsim <- function() { - log.out.data.server() + log.out.data.server() } disconnect.studies.dataset.dasim <- function() { - log.out.data.server() + log.out.data.server() } disconnect.studies.dataset.survival <- function() { - log.out.data.server() + log.out.data.server() } disconnect.studies.dataset.cluster.int <- function() @@ -312,3 +384,8 @@ disconnect.studies.dataset.gamlss <- function() { log.out.data.server() } + +disconnect.studies.dataset.stand <- function() +{ + log.out.data.server() +} diff --git a/tests/testthat/data_files/STANDARDISE/std_1.csv b/tests/testthat/data_files/STANDARDISE/std_1.csv new file mode 100644 index 000000000..328909592 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_1.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18 +High,Blue,Yes,3,NA,Bird,True,28.757752012461424,31,TRUE +High,Blue,Yes,3,NA,Bird,True,78.83051354438066,79,TRUE +High,Blue,Yes,3,NA,Bird,True,40.89769218116999,51,TRUE +Medium,Green,No,2,NA,Dog,False,88.301740400493145,14,FALSE +High,Blue,Yes,3,NA,Bird,True,94.04672842938453,67,TRUE +Medium,Green,No,2,NA,Dog,False,4.555649938993156,42,FALSE +Medium,Green,No,2,NA,Dog,False,52.810548804700375,50,FALSE +Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE +High,Blue,Yes,3,NA,Bird,True,55.14350144658238,14,TRUE +Low,NA,Yes,1,NA,Cat,True,45.661473530344665,25,TRUE +Medium,Green,No,2,NA,Dog,False,95.68333453498781,90,FALSE +Medium,Green,No,2,NA,Dog,False,45.33341561909765,91,FALSE +Low,NA,No,1,NA,Cat,False,67.75706354528666,69,FALSE +Medium,Green,Yes,2,NA,Dog,True,57.26334019564092,91,TRUE +High,Blue,No,3,NA,Bird,False,10.292468266561627,57,FALSE +Low,NA,Yes,1,NA,Cat,True,89.98249704018235,92,TRUE +High,Blue,No,3,NA,Bird,False,24.60877343546599,9,FALSE +High,Blue,Yes,3,NA,Bird,True,4.205953353084624,93,TRUE +Low,NA,Yes,1,NA,Cat,True,32.79207192827016,99,TRUE +Low,NA,Yes,1,NA,Cat,True,95.45036491472274,72,TRUE +Low,NA,Yes,1,NA,Cat,True,88.95393160637468,26,TRUE +Low,NA,No,1,NA,Cat,False,69.28034061565995,7,FALSE +High,Blue,Yes,3,NA,Bird,True,64.05068137682974,42,TRUE +Medium,Green,Yes,2,NA,Dog,True,99.42697766236961,9,TRUE +High,Blue,Yes,3,NA,Bird,True,65.57057991158217,83,TRUE +Medium,Green,Yes,2,NA,Dog,True,70.85304681677371,36,TRUE +Low,NA,No,1,NA,Cat,False,54.40660247113556,78,FALSE +Medium,Green,No,2,NA,Dog,False,59.41420204471797,81,FALSE +High,Blue,Yes,3,NA,Bird,True,28.91597372945398,43,TRUE +Medium,Green,No,2,NA,Dog,False,14.711364731192589,76,FALSE +Low,NA,Yes,1,NA,Cat,True,96.30242325365543,15,TRUE +High,Blue,No,3,NA,Bird,False,90.22990451194346,32,FALSE +High,Blue,Yes,3,NA,Bird,True,69.07052784226835,7,TRUE +Low,NA,No,1,NA,Cat,False,79.54674176871777,9,FALSE +High,Blue,No,3,NA,Bird,False,2.461368450894952,41,FALSE +Medium,Green,Yes,2,NA,Dog,True,47.77959710918367,74,TRUE +Low,NA,Yes,1,NA,Cat,True,75.84595375228673,23,TRUE +High,Blue,Yes,3,NA,Bird,True,21.640793583355844,27,TRUE +Low,NA,Yes,1,NA,Cat,True,31.818100763484836,60,TRUE +Low,NA,No,1,NA,Cat,False,23.16257853526622,53,FALSE +Medium,Green,Yes,2,NA,Dog,True,14.280002238228917,7,TRUE +High,Blue,No,3,NA,Bird,False,41.45463358145207,53,FALSE +High,Blue,No,3,NA,Bird,False,41.372432629577816,27,FALSE +Low,NA,Yes,1,NA,Cat,True,36.884545092470944,96,TRUE +High,Blue,Yes,3,NA,Bird,True,15.244474774226546,38,TRUE +Low,NA,Yes,1,NA,Cat,True,13.880606344901025,89,TRUE +High,Blue,Yes,3,NA,Bird,True,23.303409945219755,34,TRUE +Medium,Green,No,2,NA,Dog,False,46.59624502528459,93,FALSE +Low,NA,Yes,1,NA,Cat,True,26.597264036536217,69,TRUE +Medium,Green,Yes,2,NA,Dog,True,85.78277153428644,72,TRUE +Low,NA,No,1,NA,Cat,False,4.583116667345166,76,FALSE +Low,NA,Yes,1,NA,Cat,True,44.220007420517504,63,TRUE +High,Blue,Yes,3,NA,Bird,True,79.89248456433415,13,TRUE +Low,NA,Yes,1,NA,Cat,True,12.189925997518003,82,TRUE +Medium,Green,Yes,2,NA,Dog,True,56.094798375852406,97,TRUE +Low,NA,No,1,NA,Cat,False,20.65313896164298,91,FALSE +Low,NA,No,1,NA,Cat,False,12.753165024332702,25,FALSE +High,Blue,Yes,3,NA,Bird,True,75.33078643027693,38,TRUE +Low,NA,No,1,NA,Cat,False,89.50453591533005,21,FALSE +Medium,Green,Yes,2,NA,Dog,True,37.44627758860588,79,TRUE +Low,NA,Yes,1,NA,Cat,True,66.51151946280152,41,TRUE +High,Blue,No,3,NA,Bird,False,9.484066092409194,47,FALSE +Low,NA,No,1,NA,Cat,False,38.39696377981454,90,FALSE +High,Blue,Yes,3,NA,Bird,True,27.43836445733905,60,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.46400388795882,95,TRUE +High,Blue,No,3,NA,Bird,False,44.851634139195085,16,FALSE +Medium,Green,Yes,2,NA,Dog,True,81.00643530488014,94,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.23895095195621,6,TRUE +High,Blue,Yes,3,NA,Bird,True,79.43423211108893,72,TRUE +Medium,Green,Yes,2,NA,Dog,True,43.983168760314584,86,TRUE +Medium,Green,No,2,NA,Dog,False,75.44751586392522,86,FALSE +High,Blue,Yes,3,NA,Bird,True,62.922113155946136,39,TRUE +High,Blue,Yes,3,NA,Bird,True,71.01824013516307,31,TRUE +Low,NA,Yes,1,NA,Cat,True,0.062477332539856434,81,TRUE +Medium,Green,Yes,2,NA,Dog,True,47.53165740985423,50,TRUE +Medium,Green,No,2,NA,Dog,False,22.011888516135514,34,FALSE +Low,NA,No,1,NA,Cat,False,37.98165377229452,4,FALSE +Medium,Green,Yes,2,NA,Dog,True,61.277100327424705,13,TRUE +Low,NA,No,1,NA,Cat,False,35.179790924303234,69,FALSE +Low,NA,No,1,NA,Cat,False,11.113542434759438,25,FALSE +Medium,Green,No,2,NA,Dog,False,24.361947271972895,52,FALSE +High,Blue,No,3,NA,Bird,False,66.80555874481797,22,FALSE +High,Blue,Yes,3,NA,Bird,True,41.764677967876196,89,TRUE +Low,NA,No,1,NA,Cat,False,78.81958340294659,32,FALSE +Medium,Green,No,2,NA,Dog,False,10.286464425735176,25,FALSE +Low,NA,No,1,NA,Cat,False,43.489274149760604,87,FALSE +Medium,Green,Yes,2,NA,Dog,True,98.49569799844176,35,TRUE +Low,NA,Yes,1,NA,Cat,True,89.30511143989861,40,TRUE +High,Blue,No,3,NA,Bird,False,88.64690607879311,30,FALSE +High,Blue,Yes,3,NA,Bird,True,17.505265027284622,12,TRUE +Medium,Green,No,2,NA,Dog,False,13.069569156505167,31,FALSE +High,Blue,No,3,NA,Bird,False,65.31019250396639,30,FALSE +Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE +Medium,Green,No,2,NA,Dog,False,65.67581279668957,99,FALSE +Medium,Green,No,2,NA,Dog,False,32.03732424881309,14,FALSE +High,Blue,Yes,3,NA,Bird,True,18.769111926667392,93,TRUE +Medium,Green,Yes,2,NA,Dog,True,78.22943013161421,96,TRUE +Low,NA,No,1,NA,Cat,False,9.359498671256006,71,FALSE +High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE +High,Blue,Yes,3,NA,Bird,True,51.15054599009454,23,TRUE diff --git a/tests/testthat/data_files/STANDARDISE/std_1.rda b/tests/testthat/data_files/STANDARDISE/std_1.rda new file mode 100644 index 000000000..df5a8854f Binary files /dev/null and b/tests/testthat/data_files/STANDARDISE/std_1.rda differ diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.csv b/tests/testthat/data_files/STANDARDISE/std_1_d.csv new file mode 100644 index 000000000..9fe7462f6 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_1_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18 +High,Blue,Yes,3,NA,Bird,True,28.7577520124614,31,TRUE +High,Blue,Yes,3,NA,Bird,True,78.8305135443807,79,TRUE +High,Blue,Yes,3,NA,Bird,True,40.89769218117,51,TRUE +Medium,Green,No,2,NA,Dog,False,88.3017404004931,14,FALSE +High,Blue,Yes,3,NA,Bird,True,94.0467284293845,67,TRUE +Medium,Green,No,2,NA,Dog,False,4.55564993899316,42,FALSE +Medium,Green,No,2,NA,Dog,False,52.8105488047004,50,FALSE +Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE +High,Blue,Yes,3,NA,Bird,True,55.1435014465824,14,TRUE +Low,NA,Yes,1,NA,Cat,True,45.6614735303447,25,TRUE +Medium,Green,No,2,NA,Dog,False,95.6833345349878,90,FALSE +Medium,Green,No,2,NA,Dog,False,45.3334156190977,91,FALSE +Low,NA,No,1,NA,Cat,False,67.7570635452867,69,FALSE +Medium,Green,Yes,2,NA,Dog,True,57.2633401956409,91,TRUE +High,Blue,No,3,NA,Bird,False,10.2924682665616,57,FALSE +Low,NA,Yes,1,NA,Cat,True,89.9824970401824,92,TRUE +High,Blue,No,3,NA,Bird,False,24.608773435466,9,FALSE +High,Blue,Yes,3,NA,Bird,True,4.20595335308462,93,TRUE +Low,NA,Yes,1,NA,Cat,True,32.7920719282702,99,TRUE +Low,NA,Yes,1,NA,Cat,True,95.4503649147227,72,TRUE +Low,NA,Yes,1,NA,Cat,True,88.9539316063747,26,TRUE +Low,NA,No,1,NA,Cat,False,69.28034061566,7,FALSE +High,Blue,Yes,3,NA,Bird,True,64.0506813768297,42,TRUE +Medium,Green,Yes,2,NA,Dog,True,99.4269776623696,9,TRUE +High,Blue,Yes,3,NA,Bird,True,65.5705799115822,83,TRUE +Medium,Green,Yes,2,NA,Dog,True,70.8530468167737,36,TRUE +Low,NA,No,1,NA,Cat,False,54.4066024711356,78,FALSE +Medium,Green,No,2,NA,Dog,False,59.414202044718,81,FALSE +High,Blue,Yes,3,NA,Bird,True,28.915973729454,43,TRUE +Medium,Green,No,2,NA,Dog,False,14.7113647311926,76,FALSE +Low,NA,Yes,1,NA,Cat,True,96.3024232536554,15,TRUE +High,Blue,No,3,NA,Bird,False,90.2299045119435,32,FALSE +High,Blue,Yes,3,NA,Bird,True,69.0705278422683,7,TRUE +Low,NA,No,1,NA,Cat,False,79.5467417687178,9,FALSE +High,Blue,No,3,NA,Bird,False,2.46136845089495,41,FALSE +Medium,Green,Yes,2,NA,Dog,True,47.7795971091837,74,TRUE +Low,NA,Yes,1,NA,Cat,True,75.8459537522867,23,TRUE +High,Blue,Yes,3,NA,Bird,True,21.6407935833558,27,TRUE +Low,NA,Yes,1,NA,Cat,True,31.8181007634848,60,TRUE +Low,NA,No,1,NA,Cat,False,23.1625785352662,53,FALSE +Medium,Green,Yes,2,NA,Dog,True,14.2800022382289,7,TRUE +High,Blue,No,3,NA,Bird,False,41.4546335814521,53,FALSE +High,Blue,No,3,NA,Bird,False,41.3724326295778,27,FALSE +Low,NA,Yes,1,NA,Cat,True,36.8845450924709,96,TRUE +High,Blue,Yes,3,NA,Bird,True,15.2444747742265,38,TRUE +Low,NA,Yes,1,NA,Cat,True,13.880606344901,89,TRUE +High,Blue,Yes,3,NA,Bird,True,23.3034099452198,34,TRUE +Medium,Green,No,2,NA,Dog,False,46.5962450252846,93,FALSE +Low,NA,Yes,1,NA,Cat,True,26.5972640365362,69,TRUE +Medium,Green,Yes,2,NA,Dog,True,85.7827715342864,72,TRUE +Low,NA,No,1,NA,Cat,False,4.58311666734517,76,FALSE +Low,NA,Yes,1,NA,Cat,True,44.2200074205175,63,TRUE +High,Blue,Yes,3,NA,Bird,True,79.8924845643342,13,TRUE +Low,NA,Yes,1,NA,Cat,True,12.189925997518,82,TRUE +Medium,Green,Yes,2,NA,Dog,True,56.0947983758524,97,TRUE +Low,NA,No,1,NA,Cat,False,20.653138961643,91,FALSE +Low,NA,No,1,NA,Cat,False,12.7531650243327,25,FALSE +High,Blue,Yes,3,NA,Bird,True,75.3307864302769,38,TRUE +Low,NA,No,1,NA,Cat,False,89.50453591533,21,FALSE +Medium,Green,Yes,2,NA,Dog,True,37.4462775886059,79,TRUE +Low,NA,Yes,1,NA,Cat,True,66.5115194628015,41,TRUE +High,Blue,No,3,NA,Bird,False,9.48406609240919,47,FALSE +Low,NA,No,1,NA,Cat,False,38.3969637798145,90,FALSE +High,Blue,Yes,3,NA,Bird,True,27.438364457339,60,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.4640038879588,95,TRUE +High,Blue,No,3,NA,Bird,False,44.8516341391951,16,FALSE +Medium,Green,Yes,2,NA,Dog,True,81.0064353048801,94,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.2389509519562,6,TRUE +High,Blue,Yes,3,NA,Bird,True,79.4342321110889,72,TRUE +Medium,Green,Yes,2,NA,Dog,True,43.9831687603146,86,TRUE +Medium,Green,No,2,NA,Dog,False,75.4475158639252,86,FALSE +High,Blue,Yes,3,NA,Bird,True,62.9221131559461,39,TRUE +High,Blue,Yes,3,NA,Bird,True,71.0182401351631,31,TRUE +Low,NA,Yes,1,NA,Cat,True,0.0624773325398564,81,TRUE +Medium,Green,Yes,2,NA,Dog,True,47.5316574098542,50,TRUE +Medium,Green,No,2,NA,Dog,False,22.0118885161355,34,FALSE +Low,NA,No,1,NA,Cat,False,37.9816537722945,4,FALSE +Medium,Green,Yes,2,NA,Dog,True,61.2771003274247,13,TRUE +Low,NA,No,1,NA,Cat,False,35.1797909243032,69,FALSE +Low,NA,No,1,NA,Cat,False,11.1135424347594,25,FALSE +Medium,Green,No,2,NA,Dog,False,24.3619472719729,52,FALSE +High,Blue,No,3,NA,Bird,False,66.805558744818,22,FALSE +High,Blue,Yes,3,NA,Bird,True,41.7646779678762,89,TRUE +Low,NA,No,1,NA,Cat,False,78.8195834029466,32,FALSE +Medium,Green,No,2,NA,Dog,False,10.2864644257352,25,FALSE +Low,NA,No,1,NA,Cat,False,43.4892741497606,87,FALSE +Medium,Green,Yes,2,NA,Dog,True,98.4956979984418,35,TRUE +Low,NA,Yes,1,NA,Cat,True,89.3051114398986,40,TRUE +High,Blue,No,3,NA,Bird,False,88.6469060787931,30,FALSE +High,Blue,Yes,3,NA,Bird,True,17.5052650272846,12,TRUE +Medium,Green,No,2,NA,Dog,False,13.0695691565052,31,FALSE +High,Blue,No,3,NA,Bird,False,65.3101925039664,30,FALSE +Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE +Medium,Green,No,2,NA,Dog,False,65.6758127966896,99,FALSE +Medium,Green,No,2,NA,Dog,False,32.0373242488131,14,FALSE +High,Blue,Yes,3,NA,Bird,True,18.7691119266674,93,TRUE +Medium,Green,Yes,2,NA,Dog,True,78.2294301316142,96,TRUE +Low,NA,No,1,NA,Cat,False,9.35949867125601,71,FALSE +High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE +High,Blue,Yes,3,NA,Bird,True,51.1505459900945,23,TRUE diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.rda b/tests/testthat/data_files/STANDARDISE/std_1_d.rda new file mode 100644 index 000000000..4387f72b8 Binary files /dev/null and b/tests/testthat/data_files/STANDARDISE/std_1_d.rda differ diff --git a/tests/testthat/data_files/STANDARDISE/std_2.csv b/tests/testthat/data_files/STANDARDISE/std_2.csv new file mode 100644 index 000000000..9c20866c4 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_2.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19 +High,NA,NA,C,2,Large,Left,o,28.757752012461424,31 +High,NA,NA,C,2,Large,Left,s,78.83051354438066,79 +High,NA,NA,C,2,Large,Left,n,40.89769218116999,51 +Medium,Green,No,B,3,Medium,Right,c,88.301740400493145,14 +High,NA,NA,C,2,Large,Left,j,94.04672842938453,67 +Medium,Green,No,B,3,Medium,Right,r,4.555649938993156,42 +Medium,Green,No,B,3,Medium,Right,v,52.810548804700375,50 +Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43 +High,NA,NA,C,2,Large,Left,e,55.14350144658238,14 +Low,Red,NA,A,1,Small,Left,t,45.661473530344665,25 +Medium,Green,No,B,3,Medium,Right,n,95.68333453498781,90 +Medium,Green,No,B,3,Medium,Right,v,45.33341561909765,91 +Low,Red,No,A,1,Small,Right,y,67.75706354528666,69 +Medium,Green,NA,B,3,Medium,Left,z,57.26334019564092,91 +High,NA,No,C,2,Large,Right,e,10.292468266561627,57 +Low,Red,NA,A,1,Small,Left,s,89.98249704018235,92 +High,NA,No,C,2,Large,Right,y,24.60877343546599,9 +High,NA,NA,C,2,Large,Left,y,4.205953353084624,93 +Low,Red,NA,A,1,Small,Left,i,32.79207192827016,99 +Low,Red,NA,A,1,Small,Left,c,95.45036491472274,72 +Low,Red,NA,A,1,Small,Left,h,88.95393160637468,26 +Low,Red,No,A,1,Small,Right,z,69.28034061565995,7 +High,NA,NA,C,2,Large,Left,g,64.05068137682974,42 +Medium,Green,NA,B,3,Medium,Left,j,99.42697766236961,9 +High,NA,NA,C,2,Large,Left,i,65.57057991158217,83 +Medium,Green,NA,B,3,Medium,Left,s,70.85304681677371,36 +Low,Red,No,A,1,Small,Right,d,54.40660247113556,78 +Medium,Green,No,B,3,Medium,Right,n,59.41420204471797,81 +High,NA,NA,C,2,Large,Left,q,28.91597372945398,43 +Medium,Green,No,B,3,Medium,Right,k,14.711364731192589,76 +Low,Red,NA,A,1,Small,Left,g,96.30242325365543,15 +High,NA,No,C,2,Large,Right,u,90.22990451194346,32 +High,NA,NA,C,2,Large,Left,l,69.07052784226835,7 +Low,Red,No,A,1,Small,Right,o,79.54674176871777,9 +High,NA,No,C,2,Large,Right,j,2.461368450894952,41 +Medium,Green,NA,B,3,Medium,Left,m,47.77959710918367,74 +Low,Red,NA,A,1,Small,Left,g,75.84595375228673,23 +High,NA,NA,C,2,Large,Left,i,21.640793583355844,27 +Low,Red,NA,A,1,Small,Left,i,31.818100763484836,60 +Low,Red,No,A,1,Small,Right,j,23.16257853526622,53 +Medium,Green,NA,B,3,Medium,Left,w,14.280002238228917,7 +High,NA,No,C,2,Large,Right,u,41.45463358145207,53 +High,NA,No,C,2,Large,Right,g,41.372432629577816,27 +Low,Red,NA,A,1,Small,Left,u,36.884545092470944,96 +High,NA,NA,C,2,Large,Left,f,15.244474774226546,38 +Low,Red,NA,A,1,Small,Left,y,13.880606344901025,89 +High,NA,NA,C,2,Large,Left,b,23.303409945219755,34 +Medium,Green,No,B,3,Medium,Right,e,46.59624502528459,93 +Low,Red,NA,A,1,Small,Left,h,26.597264036536217,69 +Medium,Green,NA,B,3,Medium,Left,l,85.78277153428644,72 +Low,Red,No,A,1,Small,Right,m,4.583116667345166,76 +Low,Red,NA,A,1,Small,Left,r,44.220007420517504,63 +High,NA,NA,C,2,Large,Left,a,79.89248456433415,13 +Low,Red,NA,A,1,Small,Left,y,12.189925997518003,82 +Medium,Green,NA,B,3,Medium,Left,y,56.094798375852406,97 +Low,Red,No,A,1,Small,Right,f,20.65313896164298,91 +Low,Red,No,A,1,Small,Right,u,12.753165024332702,25 +High,NA,NA,C,2,Large,Left,o,75.33078643027693,38 +Low,Red,No,A,1,Small,Right,i,89.50453591533005,21 +Medium,Green,NA,B,3,Medium,Left,o,37.44627758860588,79 +Low,Red,NA,A,1,Small,Left,z,66.51151946280152,41 +High,NA,No,C,2,Large,Right,p,9.484066092409194,47 +Low,Red,No,A,1,Small,Right,t,38.39696377981454,90 +High,NA,NA,C,2,Large,Left,f,27.43836445733905,60 +Medium,Green,NA,B,3,Medium,Left,k,81.46400388795882,95 +High,NA,No,C,2,Large,Right,h,44.851634139195085,16 +Medium,Green,NA,B,3,Medium,Left,v,81.00643530488014,94 +Medium,Green,NA,B,3,Medium,Left,v,81.23895095195621,6 +High,NA,NA,C,2,Large,Left,g,79.43423211108893,72 +Medium,Green,NA,B,3,Medium,Left,p,43.983168760314584,86 +Medium,Green,No,B,3,Medium,Right,q,75.44751586392522,86 +High,NA,NA,C,2,Large,Left,v,62.922113155946136,39 +High,NA,NA,C,2,Large,Left,r,71.01824013516307,31 +Low,Red,NA,A,1,Small,Left,q,0.062477332539856434,81 +Medium,Green,NA,B,3,Medium,Left,b,47.53165740985423,50 +Medium,Green,No,B,3,Medium,Right,d,22.011888516135514,34 +Low,Red,No,A,1,Small,Right,m,37.98165377229452,4 +Medium,Green,NA,B,3,Medium,Left,e,61.277100327424705,13 +Low,Red,No,A,1,Small,Right,v,35.179790924303234,69 +Low,Red,No,A,1,Small,Right,s,11.113542434759438,25 +Medium,Green,No,B,3,Medium,Right,y,24.361947271972895,52 +High,NA,No,C,2,Large,Right,t,66.80555874481797,22 +High,NA,NA,C,2,Large,Left,v,41.764677967876196,89 +Low,Red,No,A,1,Small,Right,y,78.81958340294659,32 +Medium,Green,No,B,3,Medium,Right,n,10.286464425735176,25 +Low,Red,No,A,1,Small,Right,y,43.489274149760604,87 +Medium,Green,NA,B,3,Medium,Left,w,98.49569799844176,35 +Low,Red,NA,A,1,Small,Left,c,89.30511143989861,40 +High,NA,No,C,2,Large,Right,h,88.64690607879311,30 +High,NA,NA,C,2,Large,Left,p,17.505265027284622,12 +Medium,Green,No,B,3,Medium,Right,l,13.069569156505167,31 +High,NA,No,C,2,Large,Right,y,65.31019250396639,30 +Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64 +Medium,Green,No,B,3,Medium,Right,c,65.67581279668957,99 +Medium,Green,No,B,3,Medium,Right,n,32.03732424881309,14 +High,NA,NA,C,2,Large,Left,g,18.769111926667392,93 +Medium,Green,NA,B,3,Medium,Left,c,78.22943013161421,96 +Low,Red,No,A,1,Small,Right,w,9.359498671256006,71 +High,NA,NA,C,2,Large,Left,v,46.677904156968,67 +High,NA,NA,C,2,Large,Left,z,51.15054599009454,23 diff --git a/tests/testthat/data_files/STANDARDISE/std_2.rda b/tests/testthat/data_files/STANDARDISE/std_2.rda new file mode 100644 index 000000000..cbe3d2791 Binary files /dev/null and b/tests/testthat/data_files/STANDARDISE/std_2.rda differ diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.csv b/tests/testthat/data_files/STANDARDISE/std_2_d.csv new file mode 100644 index 000000000..0a5a46eba --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_2_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19 +High,NA,NA,C,2,Large,Left,o,28.7577520124614,31 +High,NA,NA,C,2,Large,Left,s,78.8305135443807,79 +High,NA,NA,C,2,Large,Left,n,40.89769218117,51 +Medium,Green,No,B,3,Medium,Right,c,88.3017404004931,14 +High,NA,NA,C,2,Large,Left,j,94.0467284293845,67 +Medium,Green,No,B,3,Medium,Right,r,4.55564993899316,42 +Medium,Green,No,B,3,Medium,Right,v,52.8105488047004,50 +Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43 +High,NA,NA,C,2,Large,Left,e,55.1435014465824,14 +Low,Red,NA,A,1,Small,Left,t,45.6614735303447,25 +Medium,Green,No,B,3,Medium,Right,n,95.6833345349878,90 +Medium,Green,No,B,3,Medium,Right,v,45.3334156190977,91 +Low,Red,No,A,1,Small,Right,y,67.7570635452867,69 +Medium,Green,NA,B,3,Medium,Left,z,57.2633401956409,91 +High,NA,No,C,2,Large,Right,e,10.2924682665616,57 +Low,Red,NA,A,1,Small,Left,s,89.9824970401824,92 +High,NA,No,C,2,Large,Right,y,24.608773435466,9 +High,NA,NA,C,2,Large,Left,y,4.20595335308462,93 +Low,Red,NA,A,1,Small,Left,i,32.7920719282702,99 +Low,Red,NA,A,1,Small,Left,c,95.4503649147227,72 +Low,Red,NA,A,1,Small,Left,h,88.9539316063747,26 +Low,Red,No,A,1,Small,Right,z,69.28034061566,7 +High,NA,NA,C,2,Large,Left,g,64.0506813768297,42 +Medium,Green,NA,B,3,Medium,Left,j,99.4269776623696,9 +High,NA,NA,C,2,Large,Left,i,65.5705799115822,83 +Medium,Green,NA,B,3,Medium,Left,s,70.8530468167737,36 +Low,Red,No,A,1,Small,Right,d,54.4066024711356,78 +Medium,Green,No,B,3,Medium,Right,n,59.414202044718,81 +High,NA,NA,C,2,Large,Left,q,28.915973729454,43 +Medium,Green,No,B,3,Medium,Right,k,14.7113647311926,76 +Low,Red,NA,A,1,Small,Left,g,96.3024232536554,15 +High,NA,No,C,2,Large,Right,u,90.2299045119435,32 +High,NA,NA,C,2,Large,Left,l,69.0705278422683,7 +Low,Red,No,A,1,Small,Right,o,79.5467417687178,9 +High,NA,No,C,2,Large,Right,j,2.46136845089495,41 +Medium,Green,NA,B,3,Medium,Left,m,47.7795971091837,74 +Low,Red,NA,A,1,Small,Left,g,75.8459537522867,23 +High,NA,NA,C,2,Large,Left,i,21.6407935833558,27 +Low,Red,NA,A,1,Small,Left,i,31.8181007634848,60 +Low,Red,No,A,1,Small,Right,j,23.1625785352662,53 +Medium,Green,NA,B,3,Medium,Left,w,14.2800022382289,7 +High,NA,No,C,2,Large,Right,u,41.4546335814521,53 +High,NA,No,C,2,Large,Right,g,41.3724326295778,27 +Low,Red,NA,A,1,Small,Left,u,36.8845450924709,96 +High,NA,NA,C,2,Large,Left,f,15.2444747742265,38 +Low,Red,NA,A,1,Small,Left,y,13.880606344901,89 +High,NA,NA,C,2,Large,Left,b,23.3034099452198,34 +Medium,Green,No,B,3,Medium,Right,e,46.5962450252846,93 +Low,Red,NA,A,1,Small,Left,h,26.5972640365362,69 +Medium,Green,NA,B,3,Medium,Left,l,85.7827715342864,72 +Low,Red,No,A,1,Small,Right,m,4.58311666734517,76 +Low,Red,NA,A,1,Small,Left,r,44.2200074205175,63 +High,NA,NA,C,2,Large,Left,a,79.8924845643342,13 +Low,Red,NA,A,1,Small,Left,y,12.189925997518,82 +Medium,Green,NA,B,3,Medium,Left,y,56.0947983758524,97 +Low,Red,No,A,1,Small,Right,f,20.653138961643,91 +Low,Red,No,A,1,Small,Right,u,12.7531650243327,25 +High,NA,NA,C,2,Large,Left,o,75.3307864302769,38 +Low,Red,No,A,1,Small,Right,i,89.50453591533,21 +Medium,Green,NA,B,3,Medium,Left,o,37.4462775886059,79 +Low,Red,NA,A,1,Small,Left,z,66.5115194628015,41 +High,NA,No,C,2,Large,Right,p,9.48406609240919,47 +Low,Red,No,A,1,Small,Right,t,38.3969637798145,90 +High,NA,NA,C,2,Large,Left,f,27.438364457339,60 +Medium,Green,NA,B,3,Medium,Left,k,81.4640038879588,95 +High,NA,No,C,2,Large,Right,h,44.8516341391951,16 +Medium,Green,NA,B,3,Medium,Left,v,81.0064353048801,94 +Medium,Green,NA,B,3,Medium,Left,v,81.2389509519562,6 +High,NA,NA,C,2,Large,Left,g,79.4342321110889,72 +Medium,Green,NA,B,3,Medium,Left,p,43.9831687603146,86 +Medium,Green,No,B,3,Medium,Right,q,75.4475158639252,86 +High,NA,NA,C,2,Large,Left,v,62.9221131559461,39 +High,NA,NA,C,2,Large,Left,r,71.0182401351631,31 +Low,Red,NA,A,1,Small,Left,q,0.0624773325398564,81 +Medium,Green,NA,B,3,Medium,Left,b,47.5316574098542,50 +Medium,Green,No,B,3,Medium,Right,d,22.0118885161355,34 +Low,Red,No,A,1,Small,Right,m,37.9816537722945,4 +Medium,Green,NA,B,3,Medium,Left,e,61.2771003274247,13 +Low,Red,No,A,1,Small,Right,v,35.1797909243032,69 +Low,Red,No,A,1,Small,Right,s,11.1135424347594,25 +Medium,Green,No,B,3,Medium,Right,y,24.3619472719729,52 +High,NA,No,C,2,Large,Right,t,66.805558744818,22 +High,NA,NA,C,2,Large,Left,v,41.7646779678762,89 +Low,Red,No,A,1,Small,Right,y,78.8195834029466,32 +Medium,Green,No,B,3,Medium,Right,n,10.2864644257352,25 +Low,Red,No,A,1,Small,Right,y,43.4892741497606,87 +Medium,Green,NA,B,3,Medium,Left,w,98.4956979984418,35 +Low,Red,NA,A,1,Small,Left,c,89.3051114398986,40 +High,NA,No,C,2,Large,Right,h,88.6469060787931,30 +High,NA,NA,C,2,Large,Left,p,17.5052650272846,12 +Medium,Green,No,B,3,Medium,Right,l,13.0695691565052,31 +High,NA,No,C,2,Large,Right,y,65.3101925039664,30 +Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64 +Medium,Green,No,B,3,Medium,Right,c,65.6758127966896,99 +Medium,Green,No,B,3,Medium,Right,n,32.0373242488131,14 +High,NA,NA,C,2,Large,Left,g,18.7691119266674,93 +Medium,Green,NA,B,3,Medium,Left,c,78.2294301316142,96 +Low,Red,No,A,1,Small,Right,w,9.35949867125601,71 +High,NA,NA,C,2,Large,Left,v,46.677904156968,67 +High,NA,NA,C,2,Large,Left,z,51.1505459900945,23 diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.rda b/tests/testthat/data_files/STANDARDISE/std_2_d.rda new file mode 100644 index 000000000..cad2198a2 Binary files /dev/null and b/tests/testthat/data_files/STANDARDISE/std_2_d.rda differ diff --git a/tests/testthat/data_files/STANDARDISE/std_3.csv b/tests/testthat/data_files/STANDARDISE/std_3.csv new file mode 100644 index 000000000..583c9a22e --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_3.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20 +High,Blue,Yes,C,Three,31,TRUE,o,28.757752012461424 +High,Blue,Yes,C,Three,79,TRUE,s,78.83051354438066 +High,Blue,Yes,C,Three,51,TRUE,n,40.89769218116999 +Medium,NA,NA,B,Two,14,FALSE,c,88.301740400493145 +High,Blue,Yes,C,Three,67,TRUE,j,94.04672842938453 +Medium,NA,NA,B,Two,42,FALSE,r,4.555649938993156 +Medium,NA,NA,B,Two,50,FALSE,v,52.810548804700375 +Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046 +High,Blue,Yes,C,Three,14,TRUE,e,55.14350144658238 +Low,NA,Yes,A,One,25,TRUE,t,45.661473530344665 +Medium,NA,NA,B,Two,90,FALSE,n,95.68333453498781 +Medium,NA,NA,B,Two,91,FALSE,v,45.33341561909765 +Low,NA,NA,A,One,69,FALSE,y,67.75706354528666 +Medium,NA,Yes,B,Two,91,TRUE,z,57.26334019564092 +High,Blue,NA,C,Three,57,FALSE,e,10.292468266561627 +Low,NA,Yes,A,One,92,TRUE,s,89.98249704018235 +High,Blue,NA,C,Three,9,FALSE,y,24.60877343546599 +High,Blue,Yes,C,Three,93,TRUE,y,4.205953353084624 +Low,NA,Yes,A,One,99,TRUE,i,32.79207192827016 +Low,NA,Yes,A,One,72,TRUE,c,95.45036491472274 +Low,NA,Yes,A,One,26,TRUE,h,88.95393160637468 +Low,NA,NA,A,One,7,FALSE,z,69.28034061565995 +High,Blue,Yes,C,Three,42,TRUE,g,64.05068137682974 +Medium,NA,Yes,B,Two,9,TRUE,j,99.42697766236961 +High,Blue,Yes,C,Three,83,TRUE,i,65.57057991158217 +Medium,NA,Yes,B,Two,36,TRUE,s,70.85304681677371 +Low,NA,NA,A,One,78,FALSE,d,54.40660247113556 +Medium,NA,NA,B,Two,81,FALSE,n,59.41420204471797 +High,Blue,Yes,C,Three,43,TRUE,q,28.91597372945398 +Medium,NA,NA,B,Two,76,FALSE,k,14.711364731192589 +Low,NA,Yes,A,One,15,TRUE,g,96.30242325365543 +High,Blue,NA,C,Three,32,FALSE,u,90.22990451194346 +High,Blue,Yes,C,Three,7,TRUE,l,69.07052784226835 +Low,NA,NA,A,One,9,FALSE,o,79.54674176871777 +High,Blue,NA,C,Three,41,FALSE,j,2.461368450894952 +Medium,NA,Yes,B,Two,74,TRUE,m,47.77959710918367 +Low,NA,Yes,A,One,23,TRUE,g,75.84595375228673 +High,Blue,Yes,C,Three,27,TRUE,i,21.640793583355844 +Low,NA,Yes,A,One,60,TRUE,i,31.818100763484836 +Low,NA,NA,A,One,53,FALSE,j,23.16257853526622 +Medium,NA,Yes,B,Two,7,TRUE,w,14.280002238228917 +High,Blue,NA,C,Three,53,FALSE,u,41.45463358145207 +High,Blue,NA,C,Three,27,FALSE,g,41.372432629577816 +Low,NA,Yes,A,One,96,TRUE,u,36.884545092470944 +High,Blue,Yes,C,Three,38,TRUE,f,15.244474774226546 +Low,NA,Yes,A,One,89,TRUE,y,13.880606344901025 +High,Blue,Yes,C,Three,34,TRUE,b,23.303409945219755 +Medium,NA,NA,B,Two,93,FALSE,e,46.59624502528459 +Low,NA,Yes,A,One,69,TRUE,h,26.597264036536217 +Medium,NA,Yes,B,Two,72,TRUE,l,85.78277153428644 +Low,NA,NA,A,One,76,FALSE,m,4.583116667345166 +Low,NA,Yes,A,One,63,TRUE,r,44.220007420517504 +High,Blue,Yes,C,Three,13,TRUE,a,79.89248456433415 +Low,NA,Yes,A,One,82,TRUE,y,12.189925997518003 +Medium,NA,Yes,B,Two,97,TRUE,y,56.094798375852406 +Low,NA,NA,A,One,91,FALSE,f,20.65313896164298 +Low,NA,NA,A,One,25,FALSE,u,12.753165024332702 +High,Blue,Yes,C,Three,38,TRUE,o,75.33078643027693 +Low,NA,NA,A,One,21,FALSE,i,89.50453591533005 +Medium,NA,Yes,B,Two,79,TRUE,o,37.44627758860588 +Low,NA,Yes,A,One,41,TRUE,z,66.51151946280152 +High,Blue,NA,C,Three,47,FALSE,p,9.484066092409194 +Low,NA,NA,A,One,90,FALSE,t,38.39696377981454 +High,Blue,Yes,C,Three,60,TRUE,f,27.43836445733905 +Medium,NA,Yes,B,Two,95,TRUE,k,81.46400388795882 +High,Blue,NA,C,Three,16,FALSE,h,44.851634139195085 +Medium,NA,Yes,B,Two,94,TRUE,v,81.00643530488014 +Medium,NA,Yes,B,Two,6,TRUE,v,81.23895095195621 +High,Blue,Yes,C,Three,72,TRUE,g,79.43423211108893 +Medium,NA,Yes,B,Two,86,TRUE,p,43.983168760314584 +Medium,NA,NA,B,Two,86,FALSE,q,75.44751586392522 +High,Blue,Yes,C,Three,39,TRUE,v,62.922113155946136 +High,Blue,Yes,C,Three,31,TRUE,r,71.01824013516307 +Low,NA,Yes,A,One,81,TRUE,q,0.062477332539856434 +Medium,NA,Yes,B,Two,50,TRUE,b,47.53165740985423 +Medium,NA,NA,B,Two,34,FALSE,d,22.011888516135514 +Low,NA,NA,A,One,4,FALSE,m,37.98165377229452 +Medium,NA,Yes,B,Two,13,TRUE,e,61.277100327424705 +Low,NA,NA,A,One,69,FALSE,v,35.179790924303234 +Low,NA,NA,A,One,25,FALSE,s,11.113542434759438 +Medium,NA,NA,B,Two,52,FALSE,y,24.361947271972895 +High,Blue,NA,C,Three,22,FALSE,t,66.80555874481797 +High,Blue,Yes,C,Three,89,TRUE,v,41.764677967876196 +Low,NA,NA,A,One,32,FALSE,y,78.81958340294659 +Medium,NA,NA,B,Two,25,FALSE,n,10.286464425735176 +Low,NA,NA,A,One,87,FALSE,y,43.489274149760604 +Medium,NA,Yes,B,Two,35,TRUE,w,98.49569799844176 +Low,NA,Yes,A,One,40,TRUE,c,89.30511143989861 +High,Blue,NA,C,Three,30,FALSE,h,88.64690607879311 +High,Blue,Yes,C,Three,12,TRUE,p,17.505265027284622 +Medium,NA,NA,B,Two,31,FALSE,l,13.069569156505167 +High,Blue,NA,C,Three,30,FALSE,y,65.31019250396639 +Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399 +Medium,NA,NA,B,Two,99,FALSE,c,65.67581279668957 +Medium,NA,NA,B,Two,14,FALSE,n,32.03732424881309 +High,Blue,Yes,C,Three,93,TRUE,g,18.769111926667392 +Medium,NA,Yes,B,Two,96,TRUE,c,78.22943013161421 +Low,NA,NA,A,One,71,FALSE,w,9.359498671256006 +High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968 +High,Blue,Yes,C,Three,23,TRUE,z,51.15054599009454 diff --git a/tests/testthat/data_files/STANDARDISE/std_3.rda b/tests/testthat/data_files/STANDARDISE/std_3.rda new file mode 100644 index 000000000..803626748 Binary files /dev/null and b/tests/testthat/data_files/STANDARDISE/std_3.rda differ diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.csv b/tests/testthat/data_files/STANDARDISE/std_3_d.csv new file mode 100644 index 000000000..5dfce08f8 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_3_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20 +High,Blue,Yes,C,Three,31,TRUE,o,28.7577520124614 +High,Blue,Yes,C,Three,79,TRUE,s,78.8305135443807 +High,Blue,Yes,C,Three,51,TRUE,n,40.89769218117 +Medium,NA,NA,B,Two,14,FALSE,c,88.3017404004931 +High,Blue,Yes,C,Three,67,TRUE,j,94.0467284293845 +Medium,NA,NA,B,Two,42,FALSE,r,4.55564993899316 +Medium,NA,NA,B,Two,50,FALSE,v,52.8105488047004 +Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046 +High,Blue,Yes,C,Three,14,TRUE,e,55.1435014465824 +Low,NA,Yes,A,One,25,TRUE,t,45.6614735303447 +Medium,NA,NA,B,Two,90,FALSE,n,95.6833345349878 +Medium,NA,NA,B,Two,91,FALSE,v,45.3334156190977 +Low,NA,NA,A,One,69,FALSE,y,67.7570635452867 +Medium,NA,Yes,B,Two,91,TRUE,z,57.2633401956409 +High,Blue,NA,C,Three,57,FALSE,e,10.2924682665616 +Low,NA,Yes,A,One,92,TRUE,s,89.9824970401824 +High,Blue,NA,C,Three,9,FALSE,y,24.608773435466 +High,Blue,Yes,C,Three,93,TRUE,y,4.20595335308462 +Low,NA,Yes,A,One,99,TRUE,i,32.7920719282702 +Low,NA,Yes,A,One,72,TRUE,c,95.4503649147227 +Low,NA,Yes,A,One,26,TRUE,h,88.9539316063747 +Low,NA,NA,A,One,7,FALSE,z,69.28034061566 +High,Blue,Yes,C,Three,42,TRUE,g,64.0506813768297 +Medium,NA,Yes,B,Two,9,TRUE,j,99.4269776623696 +High,Blue,Yes,C,Three,83,TRUE,i,65.5705799115822 +Medium,NA,Yes,B,Two,36,TRUE,s,70.8530468167737 +Low,NA,NA,A,One,78,FALSE,d,54.4066024711356 +Medium,NA,NA,B,Two,81,FALSE,n,59.414202044718 +High,Blue,Yes,C,Three,43,TRUE,q,28.915973729454 +Medium,NA,NA,B,Two,76,FALSE,k,14.7113647311926 +Low,NA,Yes,A,One,15,TRUE,g,96.3024232536554 +High,Blue,NA,C,Three,32,FALSE,u,90.2299045119435 +High,Blue,Yes,C,Three,7,TRUE,l,69.0705278422683 +Low,NA,NA,A,One,9,FALSE,o,79.5467417687178 +High,Blue,NA,C,Three,41,FALSE,j,2.46136845089495 +Medium,NA,Yes,B,Two,74,TRUE,m,47.7795971091837 +Low,NA,Yes,A,One,23,TRUE,g,75.8459537522867 +High,Blue,Yes,C,Three,27,TRUE,i,21.6407935833558 +Low,NA,Yes,A,One,60,TRUE,i,31.8181007634848 +Low,NA,NA,A,One,53,FALSE,j,23.1625785352662 +Medium,NA,Yes,B,Two,7,TRUE,w,14.2800022382289 +High,Blue,NA,C,Three,53,FALSE,u,41.4546335814521 +High,Blue,NA,C,Three,27,FALSE,g,41.3724326295778 +Low,NA,Yes,A,One,96,TRUE,u,36.8845450924709 +High,Blue,Yes,C,Three,38,TRUE,f,15.2444747742265 +Low,NA,Yes,A,One,89,TRUE,y,13.880606344901 +High,Blue,Yes,C,Three,34,TRUE,b,23.3034099452198 +Medium,NA,NA,B,Two,93,FALSE,e,46.5962450252846 +Low,NA,Yes,A,One,69,TRUE,h,26.5972640365362 +Medium,NA,Yes,B,Two,72,TRUE,l,85.7827715342864 +Low,NA,NA,A,One,76,FALSE,m,4.58311666734517 +Low,NA,Yes,A,One,63,TRUE,r,44.2200074205175 +High,Blue,Yes,C,Three,13,TRUE,a,79.8924845643342 +Low,NA,Yes,A,One,82,TRUE,y,12.189925997518 +Medium,NA,Yes,B,Two,97,TRUE,y,56.0947983758524 +Low,NA,NA,A,One,91,FALSE,f,20.653138961643 +Low,NA,NA,A,One,25,FALSE,u,12.7531650243327 +High,Blue,Yes,C,Three,38,TRUE,o,75.3307864302769 +Low,NA,NA,A,One,21,FALSE,i,89.50453591533 +Medium,NA,Yes,B,Two,79,TRUE,o,37.4462775886059 +Low,NA,Yes,A,One,41,TRUE,z,66.5115194628015 +High,Blue,NA,C,Three,47,FALSE,p,9.48406609240919 +Low,NA,NA,A,One,90,FALSE,t,38.3969637798145 +High,Blue,Yes,C,Three,60,TRUE,f,27.438364457339 +Medium,NA,Yes,B,Two,95,TRUE,k,81.4640038879588 +High,Blue,NA,C,Three,16,FALSE,h,44.8516341391951 +Medium,NA,Yes,B,Two,94,TRUE,v,81.0064353048801 +Medium,NA,Yes,B,Two,6,TRUE,v,81.2389509519562 +High,Blue,Yes,C,Three,72,TRUE,g,79.4342321110889 +Medium,NA,Yes,B,Two,86,TRUE,p,43.9831687603146 +Medium,NA,NA,B,Two,86,FALSE,q,75.4475158639252 +High,Blue,Yes,C,Three,39,TRUE,v,62.9221131559461 +High,Blue,Yes,C,Three,31,TRUE,r,71.0182401351631 +Low,NA,Yes,A,One,81,TRUE,q,0.0624773325398564 +Medium,NA,Yes,B,Two,50,TRUE,b,47.5316574098542 +Medium,NA,NA,B,Two,34,FALSE,d,22.0118885161355 +Low,NA,NA,A,One,4,FALSE,m,37.9816537722945 +Medium,NA,Yes,B,Two,13,TRUE,e,61.2771003274247 +Low,NA,NA,A,One,69,FALSE,v,35.1797909243032 +Low,NA,NA,A,One,25,FALSE,s,11.1135424347594 +Medium,NA,NA,B,Two,52,FALSE,y,24.3619472719729 +High,Blue,NA,C,Three,22,FALSE,t,66.805558744818 +High,Blue,Yes,C,Three,89,TRUE,v,41.7646779678762 +Low,NA,NA,A,One,32,FALSE,y,78.8195834029466 +Medium,NA,NA,B,Two,25,FALSE,n,10.2864644257352 +Low,NA,NA,A,One,87,FALSE,y,43.4892741497606 +Medium,NA,Yes,B,Two,35,TRUE,w,98.4956979984418 +Low,NA,Yes,A,One,40,TRUE,c,89.3051114398986 +High,Blue,NA,C,Three,30,FALSE,h,88.6469060787931 +High,Blue,Yes,C,Three,12,TRUE,p,17.5052650272846 +Medium,NA,NA,B,Two,31,FALSE,l,13.0695691565052 +High,Blue,NA,C,Three,30,FALSE,y,65.3101925039664 +Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399 +Medium,NA,NA,B,Two,99,FALSE,c,65.6758127966896 +Medium,NA,NA,B,Two,14,FALSE,n,32.0373242488131 +High,Blue,Yes,C,Three,93,TRUE,g,18.7691119266674 +Medium,NA,Yes,B,Two,96,TRUE,c,78.2294301316142 +Low,NA,NA,A,One,71,FALSE,w,9.35949867125601 +High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968 +High,Blue,Yes,C,Three,23,TRUE,z,51.1505459900945 diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.rda b/tests/testthat/data_files/STANDARDISE/std_3_d.rda new file mode 100644 index 000000000..4165f0983 Binary files /dev/null and b/tests/testthat/data_files/STANDARDISE/std_3_d.rda differ diff --git a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R index 9e1299710..e6dac369f 100644 --- a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R +++ b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R @@ -1,4 +1,4 @@ -# +# # Molgenis' Armadillo - Upload Testing Datasets # @@ -15,7 +15,7 @@ upload_testing_dataset_table <- function(project_name, folder_name, table_name, MolgenisArmadillo::armadillo.login_basic(armadillo = 'http://127.0.0.1:8080', username = "admin", password = "admin") if (! 'datashield' %in% MolgenisArmadillo::armadillo.list_projects()) - MolgenisArmadillo::armadillo.create_project('datashield') + MolgenisArmadillo::armadillo.create_project('datashield', overwrite_existing = "no") upload_testing_dataset_table('datashield', 'anthro', 'anthro1', 'ANTHRO/anthro1.rda') upload_testing_dataset_table('datashield', 'anthro', 'anthro2', 'ANTHRO/anthro2.rda') @@ -69,4 +69,12 @@ upload_testing_dataset_table('datashield', 'testing', 'DATASET1', 'TESTING/DATAS upload_testing_dataset_table('datashield', 'testing', 'DATASET2', 'TESTING/DATASET2.rda') upload_testing_dataset_table('datashield', 'testing', 'DATASET3', 'TESTING/DATASET3.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_1', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_2', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_3', 'STANDARDISE/std_3.rda') + +upload_testing_dataset_table('datashield', 'standardise', 'std_1_d', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_2_d', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_3_d', 'STANDARDISE/std_3.rda') + print(MolgenisArmadillo::armadillo.list_tables('datashield')) diff --git a/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R b/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R index ae79d2e62..1fce513b3 100644 --- a/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R +++ b/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R @@ -1,4 +1,4 @@ -# +# # Obiba's Opal - Upload Testing Datasets # @@ -9,11 +9,11 @@ library(tibble) upload_testing_dataset_table <- function(opal, project_name, table_name, local_file_path) { if (! opal.project_exists(opal, project_name)) opal.project_create(opal, project_name, database = "mongodb") - + dataset_name <- load(file = local_file_path) dataset <- eval(as.symbol(dataset_name)) data <- as_tibble(dataset, rownames = '_row_id_') - + opal.table_save(opal, data, project_name, table_name, id.name = "_row_id_", force = TRUE) } @@ -72,4 +72,14 @@ upload_testing_dataset_table(opal, 'TESTING', 'DATASET1', 'TESTING/DATASET1.rda' upload_testing_dataset_table(opal, 'TESTING', 'DATASET2', 'TESTING/DATASET2.rda') upload_testing_dataset_table(opal, 'TESTING', 'DATASET3', 'TESTING/DATASET3.rda') +upload_testing_dataset_table('STANDARDISE', 'std_1', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('STANDARDISE', 'std_2', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('STANDARDISE', 'std_3', 'STANDARDISE/std_3.rda') + +upload_testing_dataset_table('STANDARDISE', 'std_1_d', 'STANDARDISE/std_1_d.rda') +upload_testing_dataset_table('STANDARDISE', 'std_2_d', 'STANDARDISE/std_2_d.rda') +upload_testing_dataset_table('STANDARDISE', 'std_3_d', 'STANDARDISE/std_3_d.rda') + + + opal.logout(opal) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 4c55c6e74..1b0f9b8a6 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -24,6 +24,7 @@ library(DescTools) library(DSOpal) library(DSMolgenisArmadillo) library(DSLite) +library(dsBase) source("dstest_functions/ds_expect_variables.R") source("perf_tests/perf_rate.R") diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R new file mode 100644 index 000000000..d557c325b --- /dev/null +++ b/tests/testthat/test-smk-standardiseDf.R @@ -0,0 +1,712 @@ +# +# Set up +# +context("ds.standardiseDf::smk::setup") +options(datashield.errors.print = TRUE) + +connect.studies.dataset.stand( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) + +test_that("setup", { + ds_expect_variables(c("D")) +}) + +# +# Tests +# + +#################################################################################################### +# Code that will be used in multiple tests +#################################################################################################### +var_class <- .get_var_classes("D", datasources = ds.test_env$connections) + +class_conflicts <- .identify_class_conflicts(var_class) + +different_classes <- c("fac_col4", "fac_col5") + +class_decisions <- c("1", "5") + +.fix_classes( + df.name = "D", + different_classes = different_classes, + class_decisions = class_decisions, + newobj = "new_classes", + datasources = ds.test_env$connections) + +cols_to_set <- c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col9", "col12", + "col15", "col18", "fac_col7", "fac_col10", "col13", "col16", "col19", "col11", "col14", "col17", + "col20") + +.add_missing_cols_to_df( + df.name = "D", + cols_to_add_if_missing = cols_to_set, + newobj = "with_new_cols", + datasources = ds.test_env$connections) + +old_cols <- ds.colnames("D") + +new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + +new_cols_servers <- list( + server_1 = new_cols, + server_2 = new_cols, + server_3 = new_cols +) + +added_cols <- .get_added_cols(old_cols, new_cols_servers) + +var_class_fact <- .get_var_classes("with_new_cols", datasources = ds.test_env$connections) + +fac_vars <- .identify_factor_vars(var_class_fact) + +fac_levels <- .get_factor_levels("with_new_cols", fac_vars, ds.test_env$connections) + +level_conflicts <- .identify_level_conflicts(fac_levels) + +unique_levs <- .get_unique_levels(fac_levels, level_conflicts) + +#################################################################################################### +# Tests +#################################################################################################### +test_that(".stop_if_cols_identical throws error if columns are identical", { + + identical_cols <- list( + c("col1", "col2", "col3"), + c("col1", "col2", "col3"), + c("col1", "col2", "col3") + ) + + expect_error( + .stop_if_cols_identical(identical_cols), + "Columns are identical in all data frames: nothing to fill" + ) + +}) + +test_that(".stop_if_cols_identical doesn't throw error if data frames have different columns", { + + different_cols <- list( + c("col1", "col2", "col3"), + c("col1", "col2", "col4"), + c("col1", "col5", "col3") + ) + + expect_silent( + .stop_if_cols_identical(different_cols) + ) + +}) + +test_that(".get_var_classes returns correct output", { + + expected <- tibble( + server = c("sim1", "sim2", "sim3"), + fac_col1 = c("factor", "factor", "factor"), + fac_col2 = c("factor", "factor", "factor"), + fac_col3 = c("factor", "factor", "factor"), + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor"), + fac_col6 = c("factor", NA, NA), + fac_col9 = c("factor", NA, NA), + col12 = c("numeric", NA, NA), + col15 = c("integer", NA, NA), + col18 = c("logical", NA, NA), + fac_col7 = c(NA, "factor", NA), + fac_col10 = c(NA, "factor", NA), + col13 = c(NA, "character", NA), + col16 = c(NA, "numeric", NA), + col19 = c(NA, "integer", NA), + col11 = c(NA, NA, "integer"), + col14 = c(NA, NA, "logical"), + col17 = c(NA, NA, "character"), + col20 = c(NA, NA, "numeric") + ) + + expect_equal(var_class, expected) + +}) + +test_that(".identify_class_conflicts returns correct output", { + expected <- list( + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor") + ) + + expect_equal(class_conflicts, expected) + +}) + +test_that("ask_question displays the correct prompt", { + expect_snapshot(ask_question_class("my_var")) +}) + +test_that("ask_question_wait_response_class continues with valid response", { + expect_equal( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "1" + ), "1" + ) +}) + +test_that("ask_question_wait_response_class throws error if option 6 selected", { + expect_error( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "6") + ) +}) + +test_that("print_all_classes prints the correct message", { + expect_snapshot( + print_all_classes( + c("server_1", "server_2", "server_3"), + c("numeric", "factor", "integer") + ) + ) +}) + +test_that("prompt_user_class_decision function properly", { + expect_message( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("sim2", "sim2", "sim3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ) + ) + + expect_equal( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("sim2", "sim2", "sim3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ), + "test_col" + ) +}) + +test_that("prompt_user_class_decision_all_vars returns correct value", { + expect_equal( + with_mocked_bindings( + prompt_user_class_decision_all_vars( + vars = c("test_var_1", "test_var_2"), + all_servers = c("sim2", "sim2", "sim3"), + all_classes = tibble( + test_var_1 = c("numeric", "character", "factor"), + test_var_2 = c("logical", "integer", "factor") + ), + "test_df", + conns), + prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" + ), + c("1", "1") + ) +}) + +test_that(".fix_classes sets the correct classes in serverside data frame", { + + expect_equal( + unname(unlist(ds.class("D$fac_col4"))), + c("numeric", "character", "factor") + ) + + expect_equal( + unname(unlist(ds.class("D$fac_col5"))), + c("logical", "integer", "factor") + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col4"))), + rep("factor", 3) + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col5"))), + rep("logical", 3) + ) + +}) + +test_that(".get_unique_cols extracts unique names from a list", { + expect_equal( + .get_unique_cols( + list( + server_1 = c("col_1", "col_2", "col_3"), + server_1 = c("col_1", "col_2", "col_4"), + server_1 = c("col_2", "col_3", "col_3", "col_5") + ) + ), + c("col_1", "col_2", "col_3", "col_4", "col_5") + ) +}) + +test_that(".add_missing_cols_to_df correctly creates missing columns", { + + new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + + observed <- ds.colnames("with_new_cols") + + expected <- list( + sim1 = new_cols, + sim2 = new_cols, + sim3 = new_cols + ) + + expect_equal(observed, expected) +}) + +test_that(".get_added_cols correctly identifies newly added columns", { + + expect_equal( + added_cols, + list( + sim1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), + sim2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), + sim3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") + ) + ) +}) + +test_that(".identify_factor_vars correctly identifies factor variables", { + + + + var_class_fact <- var_class |> dplyr::select(server: col18) + expect_equal( + names(fac_vars), + c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) +}) + +test_that(".get_factor_levels correctly identifies factor levels", { + expected <- list( + sim1 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue", "Green"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ), + sim2 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Green", "Red"), + fac_col3 = c("No"), + fac_col6 = NULL, + fac_col9 = NULL + ), + sim3 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue"), + fac_col3 = c("Yes"), + fac_col6 = NULL, + fac_col9 = NULL + ) + ) + + expect_equal(fac_levels, expected) +}) + +test_that(".identify_level_conflicts correctly factor columns with different levels", { + expect_equal( + .identify_level_conflicts(fac_levels), + c("fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) + +}) + +test_that("ask_question_wait_response_levels continues with valid response", { + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + +}) + +test_that("ask_question_wait_response_levels aborts with response of 3", { + expect_error( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "3") + ) +}) + +test_that(".make_levels_message makes correct message", { + expect_snapshot(.make_levels_message(level_conflicts)) +}) + +test_that(".get_unique_levels extracts all possible levels", { + + expected <- list( + fac_col2 = c("Blue", "Green", "Red"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ) + + expect_equal(unique_levs, expected) + +}) + +test_that(".set_factor_levels sets levels correctly", { + .set_factor_levels("with_new_cols", unique_levs, ds.test_env$connections) + + expect_equal( + ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), + list( + sim1 = c("Blue", "Green", "Red"), + sim2 = c("Blue", "Green", "Red"), + sim3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), + list( + sim1 = c("No", "Yes"), + sim2 = c("No", "Yes"), + sim3 = c("No", "Yes") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), + list( + sim1 = c("Bird", "Cat", "Dog"), + sim2 = c("Bird", "Cat", "Dog"), + sim3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), + list( + sim1 = c("False", "True"), + sim2 = c("False", "True"), + sim3 = c("False", "True") + ) + ) + +}) + +test_that(".print_var_recode_message prints the correct message", { + expect_snapshot(.print_var_recode_message(added_cols, "test_df")) +}) + +test_that(".print_class_recode_message prints the correct message", { + expect_snapshot( + .print_class_recode_message(class_decisions, different_classes, "test_df") + ) +}) + +test_that(".print_levels_recode_message prints the correct message", { + expect_snapshot( + .print_levels_recode_message(unique_levs, "test_df") + ) +}) + +test_that(".make_levels_recode_message prints the correct message", { + expect_equal( + .make_levels_recode_message(unique_levs), + list( + "fac_col2 --> Blue, Green, Red", + "fac_col3 --> No, Yes", + "fac_col6 --> Bird, Cat, Dog", + "fac_col9 --> False, True" + ) + ) +}) + +test_that(".print_out_messages prints the correct messages", { + expect_snapshot( + .print_out_messages( + added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" + ) + ) +}) + +test_that(".change_choice_to_string converts numeric class codes to strings correctly", { + expect_equal(.change_choice_to_string("1"), "factor") + expect_equal(.change_choice_to_string("2"), "integer") + expect_equal(.change_choice_to_string("3"), "numeric") + expect_equal(.change_choice_to_string("4"), "character") + expect_equal(.change_choice_to_string("5"), "logical") +}) + +test_that("ds.standardiseDf doesn't run if dataframes are identical", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_error( + ds.standardiseDf( + df = "test_fill", + newobj = "shouldn't_exist"), + "Columns are identical" + ) + }) + +test_that("ds.standardiseDf works when called directly and class conversion is factor", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "factor" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "integer" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "integer" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "numeric" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "numeric" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "character" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "character" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "logical" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "logical" + ) +}) + +test_that("ds.standardiseDf changes levels if this option is selected", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" + ) + + levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) + levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) + levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) + levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) + levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) + levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) + + expect_equal( + levels_2, + list( + sim1 = c("Blue", "Green", "Red"), + sim2 = c("Blue", "Green", "Red"), + sim3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + levels_3, + list( + sim1 = c("No", "Yes"), + sim2 = c("No", "Yes"), + sim3 = c("No", "Yes") + ) + ) + + expect_equal( + levels_4, + list( + sim1 = c("1", "2", "3", "A", "B", "C"), + sim2 = c("1", "2", "3", "A", "B", "C"), + sim3 = c("1", "2", "3", "A", "B", "C") + ) + ) + + expect_equal( + levels_5, + list( + sim1 = c("1", "2", "3", "One", "Three", "Two"), + sim2 = c("1", "2", "3", "One", "Three", "Two"), + sim3 = c("1", "2", "3", "One", "Three", "Two") + ) + ) + + expect_equal( + levels_6, + list( + sim1 = c("Bird", "Cat", "Dog"), + sim2 = c("Bird", "Cat", "Dog"), + sim3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + levels_9, + list( + sim1 = c("False", "True"), + sim2 = c("False", "True"), + sim3 = c("False", "True") + ) + ) + +}) + +test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { + expect_error( + ds.standardiseDf( + df = "D", + newobj = "shouldnt_exist", + fix_class = "no" + ), + "Variables do not have the same class in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + +test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "shouldnt_exist", + fix_levels = "no" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") + ), + "Factor variables do not have the same levels in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + +test_that("ds.standardiseDf doesn't run if a factor variable has too many levels", { + connect.studies.dataset.stand_disclosure( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) + + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + ) + +}) + +disconnect.studies.dataset.stand() + +context("ds.standardiseDf::smk::done")