diff --git a/NEWS.md b/NEWS.md index 1807846c9..9aa691641 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,9 @@ # mlr3pipelines 0.3.1 +* PipeOps are now encapsulated during train and predict and gained the active bindings + timings, log, warnings, errors +* GraphLearners gained the timing_pipeops active binding * Changed PipeOps: - PipeOpMissInd now also allows for setting type = integer - PipeOpNMF: now exposes all parameters previously in .options diff --git a/R/Graph.R b/R/Graph.R index f97b7b318..2a8677dc4 100644 --- a/R/Graph.R +++ b/R/Graph.R @@ -452,7 +452,7 @@ Graph = R6Class("Graph", state = function(val) { if (!missing(val)) { assert_list(val, names = "unique", null.ok = TRUE) - assert_subset(names(val), names(self$pipeops)) + assert_subset(names(val), c(names(self$pipeops), "log", "train_time", "predict_time")) imap(self$pipeops, function(pipeop, pname) pipeop$state = val[[pname]]) val } else { diff --git a/R/GraphLearner.R b/R/GraphLearner.R index 1a5d58ba2..8f18a4123 100644 --- a/R/GraphLearner.R +++ b/R/GraphLearner.R @@ -15,6 +15,9 @@ #' Setting a new predict type will try to set the `predict_type` in all relevant #' [`PipeOp`] / [`Learner`][mlr3::Learner] encapsulated within the [`Graph`]. #' Similarly, the predict_type of a Graph will always be the smallest denominator in the [`Graph`]. +#' +#' In the `timings_pipeops` active binding, the `timings` of the individual [`PipeOp`]s are stored as a named list +#' with names `"train"` and `"predict"`. #' @family Learners #' @export GraphLearner = R6Class("GraphLearner", inherit = Learner, @@ -23,6 +26,12 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner, initialize = function(graph, id = NULL, param_vals = list(), task_type = NULL, predict_type = NULL) { graph = as_graph(graph, clone = TRUE) + + # set the encapsulate of all pipeops to "none" + for (i in seq_along(graph$pipeops)) { + graph$pipeops[[i]]$encapsulate = c(train = "none", predict = "none") + } + id = assert_string(id, null.ok = TRUE) %??% paste(graph$ids(sorted = TRUE), collapse = ".") self$graph = graph output = graph$output @@ -46,7 +55,6 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner, } if (length(task_type) == 0L) { # recursively walk backwards through the graph - # FIXME: think more about target transformation graphs here get_po_task_type = function(x) { task_type = c( match(c(x$output$train, x$output$predict), class_table$prediction), @@ -101,11 +109,21 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner, stop("param_set is read-only.") } self$graph$param_set + }, + timings_pipeops = function(rhs) { + assert_ro_binding(rhs) + if (is.null(self$state$model)) { + timing = stats::setNames(rep(NA_real_, length(self$graph$pipeops)), nm = names(self$graph$pipeops)) + return(list(train = timing, predict = timing)) # early exit + } + # reorder based on topologically sorted ids + list(train = stats::setNames(map_dbl(self$state$model, function(pipeop) pipeop$train_time %??% NA_real_), nm = names(self$graph$pipeops))[self$graph$ids(TRUE)], + predict = stats::setNames(map_dbl(self$state$model, function(pipeop) pipeop$predict_time %??% NA_real_), nm = names(self$graph$pipeops))[self$graph$ids(TRUE)]) } ), private = list( deep_clone = function(name, value) { - # FIXME this repairs the mlr3::Learner deep_clone() method which is broken. + # FIXME: this repairs the mlr3::Learner deep_clone() method which is broken. if (is.environment(value) && !is.null(value[[".__enclos_env__"]])) { return(value$clone(deep = TRUE)) } @@ -122,6 +140,7 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner, on.exit({self$graph$state = NULL}) self$graph$state = self$model prediction = self$graph$predict(task) + self$state$model = self$graph$state # needed to get each pipeop's predict_time in the state assert_list(prediction, types = "Prediction", len = 1, .var.name = sprintf("Prediction returned by Graph %s", self$id)) prediction[[1]] diff --git a/R/PipeOp.R b/R/PipeOp.R index d95015ac0..7acceab01 100644 --- a/R/PipeOp.R +++ b/R/PipeOp.R @@ -59,8 +59,8 @@ #' Set of all required packages for the [`PipeOp`]'s `$train` and `$predict` methods. See `$packages` slot. #' Default is `character(0)`. #' * `tags` ::`character`\cr -#' A set of tags associated with the `PipeOp`. Tags describe a PipeOp's purpose. -#' Can be used to filter `as.data.table(mlr_pipeops)`. Default is `"abstract"`, indicating an abstract `PipeOp`. +#' A set of tags associated with the [`PipeOp`]. Tags describe a [`PipeOp`]'s purpose. +#' Can be used to filter `as.data.table(mlr_pipeops)`. Default is `"abstract"`, indicating an abstract [`PipeOp`]. #' #' @section Internals: #' [`PipeOp`] is an abstract class with abstract functions `private$.train()` and `private$.predict()`. To create a functional @@ -111,14 +111,29 @@ #' Number of output channels. This equals `nrow($output)`. #' * `is_trained` :: `logical(1)` \cr #' Indicate whether the [`PipeOp`] was already trained and can therefore be used for prediction. -#' * `tags` ::`character`\cr -#' A set of tags associated with the `PipeOp`. Tags describe a PipeOp's purpose. +#' * `tags` :: `character`\cr +#' A set of tags associated with the [`PipeOp`]. Tags describe a [`PipeOp`]'s purpose. #' Can be used to filter `as.data.table(mlr_pipeops)`. -#' PipeOp tags are inherited and child classes can introduce additional tags. +#' [`PipeOp`] tags are inherited and child classes can introduce additional tags. #' * `hash` :: `character(1)` \cr #' Checksum calculated on the [`PipeOp`], depending on the [`PipeOp`]'s `class` and the slots `$id` and `$param_set$values`. If a #' [`PipeOp`]'s functionality may change depending on more than these values, it should inherit the `$hash` active #' binding and calculate the hash as `digest(list(super$hash, ), algo = "xxhash64")`. +#' * `timings` :: `numeric(2)` \cr +#' Elapsed time in seconds for the steps `"train"` and `"predict"`. +#' Measured via [mlr3misc::encapsulate()]. +#' * `log` :: [`data.table`]\cr +#' Returns the output (including warning and errors) as table with columns `"stage"` ("train" or "predict"), +#' `"class"` ("output", "warning", or "error"), and `"msg"` (`character()`). +#' * `warnings` :: `character()`\cr +#' Logged warnings as vector. +#' * `errors` :: `character()`\cr +#' Logged errors as vector. +#' * `encapsulate` :: named `character()`\cr +#' Controls how to execute the code in internal train and predict methods. +#' Must be a named character vector with names `"train"` and `"predict"`. +#' Possible values are `"none"`, `"evaluate"` (requires package \CRANpkg{evaluate}) and `"callr"` (requires package \CRANpkg{callr}). +#' See [mlr3misc::encapsulate()] for more details. #' * `.result` :: `list` \cr #' If the [`Graph`]'s `$keep_results` flag is set to `TRUE`, then the intermediate Results of `$train()` and `$predict()` #' are saved to this slot, exactly as they are returned by these functions. This is mainly for debugging purposes @@ -127,7 +142,7 @@ #' @section Methods: #' * `train(input)`\cr #' (`list`) -> named `list`\cr -#' Train [`PipeOp`] on `inputs`, transform it to output and store the learned `$state`. If the PipeOp is already +#' Train [`PipeOp`] on `inputs`, transform it to output and store the learned `$state`. If the [`PipeOp`] is already #' trained, already present `$state` is overwritten. Input list is typechecked against the `$input` `train` column. #' Return value is a list with as many entries as `$output` has #' rows, with each entry named after the `$output` `name` column and class according to the `$output` `train` column. @@ -177,11 +192,11 @@ #' # a list as output. #' .train = function(input) { #' sum = input[[1]] + input[[2]] -#' self$state = sum +#' self$state$sum = sum #' list(sum) #' }, #' .predict = function(input) { -#' list(letters[self$state]) +#' list(letters[self$state$sum]) #' } #' ) #' ) @@ -222,6 +237,7 @@ PipeOp = R6Class("PipeOp", self$output = assert_connection_table(output) self$packages = assert_character(packages, any.missing = FALSE, unique = TRUE) self$tags = assert_subset(tags, mlr_reflections$pipeops$valid_tags) + private$.encapsulate = private$.learner$encapsulate %??% c(train = "none", predict = "none") # propagate a learner's encapsulate in case of as_pipeop.Learner calls etc. }, print = function(...) { @@ -241,42 +257,10 @@ PipeOp = R6Class("PipeOp", }, train = function(input) { - self$state = NULL # reset to untrained state first - require_namespaces(self$packages) - - if (every(input, is_noop)) { - self$state = NO_OP - return(named_list(self$output$name, NO_OP)) - } - unpacked = unpack_multiplicities(input, multiplicity_type_nesting_level(self$input$train), self$input$name, self$id) - if (!is.null(unpacked)) { - return(evaluate_multiplicities(self, unpacked, "train", NULL)) - } - input = check_types(self, input, "input", "train") - on.exit({self$state = NULL}) # if any of the followi fails, make sure to reset self$state - output = private$.train(input) - output = check_types(self, output, "output", "train") - on.exit() # don't reset state any more - output + pipeop_train(self, input) }, predict = function(input) { - # need to load packages in train *and* predict, because they might run in different R instances - require_namespaces(self$packages) - - if (every(input, is_noop)) { - return(named_list(self$output$name, NO_OP)) - } - if (is_noop(self$state)) { - stopf("Pipeop %s got NO_OP during train but no NO_OP during predict.", self$id) - } - unpacked = unpack_multiplicities(input, multiplicity_type_nesting_level(self$input$predict), self$input$name, self$id) - if (!is.null(unpacked)) { - return(evaluate_multiplicities(self, unpacked, "predict", self$state)) - } - input = check_types(self, input, "input", "predict") - output = private$.predict(input) - output = check_types(self, output, "output", "predict") - output + pipeop_predict(self, input) } ), @@ -333,6 +317,30 @@ PipeOp = R6Class("PipeOp", val } })), algo = "xxhash64") + }, + timings = function(rhs) { + assert_ro_binding(rhs) + set_names(c(self$state$train_time %??% NA_real_, self$state$predict_time %??% NA_real_), c("train", "predict")) + }, + log = function(rhs) { + assert_ro_binding(rhs) + self$state$log + }, + warnings = function(rhs) { + assert_ro_binding(rhs) + get_log_condition(self$state, "warning") + }, + errors = function(rhs) { + assert_ro_binding(rhs) + get_log_condition(self$state, "error") + }, + encapsulate = function(rhs) { + if (missing(rhs)) { + return(private$.encapsulate) + } + assert_character(rhs) + assert_names(names(rhs), subset.of = c("train", "predict")) + private$.encapsulate = insert_named(c(train = "none", predict = "none"), rhs) } ), @@ -355,7 +363,8 @@ PipeOp = R6Class("PipeOp", .predict = function(input) stop("abstract"), .param_set = NULL, .param_set_source = NULL, - .id = NULL + .id = NULL, + .encapsulate = NULL ) ) @@ -497,3 +506,117 @@ evaluate_multiplicities = function(self, unpacked, evalcall, instate) { map(transpose_list(map(result, "output")), as.Multiplicity) } + +pipeop_train = function(pipeop, input) { + # This wrapper calls pipeop$train, and additionally performs some basic checks that the training was successful. + # Exceptions here are possibly encapsulated, so that they get captured and turned into log messages. + train_wrapper = function(pipeop, input) { + output = get_private(pipeop)$.train(input) + + if (is.null(output)) { + stopf("PipeOp '%s' on input '%s' returned NULL during internal train()", pipeop$id, deparse(substitute(input))) + } + + output + } + + pipeop$state = NULL # reset to untrained state first + #require_namespaces(pipeop$packages) + + if (every(input, is_noop)) { + pipeop$state = NO_OP + return(named_list(pipeop$output$name, NO_OP)) + } + + unpacked = unpack_multiplicities(input, multiplicity_type_nesting_level(pipeop$input$train), pipeop$input$name, pipeop$id) + if (!is.null(unpacked)) { + return(evaluate_multiplicities(pipeop, unpacked, "train", NULL)) + } + + input = check_types(pipeop, input, "input", "train") + on.exit({pipeop$state = NULL}) # if any of the following fails, make sure to reset pipeop$state + + lg$debug("Calling train method of PipeOp '%s' on input '%s'", + pipeop$id, deparse(substitute(input)), pipeop = pipeop$clone()) + + # call train_wrapper with encapsulation + result = encapsulate(pipeop$encapsulate["train"], + .f = train_wrapper, + .args = list(pipeop = pipeop, input = input), + .pkgs = pipeop$packages, + .seed = NA_integer_ + ) + + output = check_types(pipeop, result$result, "output", "train") + on.exit() # don't reset state any more + + pipeop$state$log = append_log(pipeop$state$log, "train", result$log$class, result$log$msg) + pipeop$state$train_time = result$elapsed + + if (is.null(output)) { + lg$debug("PipeOp '%s' on input '%s' failed to return a state", + pipeop$id, deparse(substitute(input)), pipeop = pipeop$clone(), messages = result$log$msg) + } else { + lg$debug("PipeOp '%s' on input '%s' succeeded to return a state", + pipeop$id, deparse(substitute(input)), pipeop = pipeop$clone(), messages = result$log$msg) + } + + output +} + +pipeop_predict = function(pipeop, input) { + # This wrapper calls pipeop$predict, and additionally performs some basic checks that the prediction was successful. + # Exceptions here are possibly encapsulated, so that they get captured and turned into log messages. + predict_wrapper = function(pipeop, input) { + # NOTE: may actually be sensible to check this + #if (is.null(pipeop$state)) { + # stopf("No trained state available for PipeOp '%s' on input '%s'", pipeop$id, deparse(substitute(input))) + #} + + get_private(pipeop)$.predict(input) + } + + # need to load packages in train *and* predict, because they might run in different R instances + #require_namespaces(pipeop$packages) + + if (every(input, is_noop)) { + return(named_list(pipeop$output$name, NO_OP)) + } + + if (is_noop(pipeop$state)) { + stopf("Pipeop %s got NO_OP during train but no NO_OP during predict.", pipeop$id) + } + + unpacked = unpack_multiplicities(input, multiplicity_type_nesting_level(pipeop$input$predict), pipeop$input$name, pipeop$id) + if (!is.null(unpacked)) { + return(evaluate_multiplicities(pipeop, unpacked, "predict", pipeop$state)) + } + + input = check_types(pipeop, input, "input", "predict") + + # call predict with encapsulation + lg$debug("Calling predict method of PipeOp '%s' on input '%s'", + pipeop$id, deparse(substitute(input)), pipeop = pipeop$clone()) + + result = encapsulate( + pipeop$encapsulate["predict"], + .f = predict_wrapper, + .args = list(pipeop = pipeop, input = input), + .pkgs = pipeop$packages, + .seed = NA_integer_ + ) + + output = check_types(pipeop, result$result, "output", "predict") + + pipeop$state$log = append_log(pipeop$state$log, "predict", result$log$class, result$log$msg) + pipeop$state$predict_time = result$elapsed + + output +} + +#FIXME: need this from mlr3 +assert_ro_binding = mlr3:::assert_ro_binding +get_private = mlr3:::get_private +append_log = mlr3:::append_log +get_log_condition = mlr3:::get_log_condition + diff --git a/R/PipeOpImputeLearner.R b/R/PipeOpImputeLearner.R index c07247ca7..bea207c61 100644 --- a/R/PipeOpImputeLearner.R +++ b/R/PipeOpImputeLearner.R @@ -176,7 +176,10 @@ PipeOpImputeLearner = R6Class("PipeOpImputeLearner", ) ) -mlr_pipeops$add("imputelearner", PipeOpImputeLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("imputelearner", PipeOpImputeLearner, + list(R6Class("Learner", + public = list(id = "learner", task_type = "classif", param_set = ParamSet$new()), + active = list(encapsulate = function() c(train = "none", predict = "none")))$new())) # See mlr-org/mlr#470 convert_to_task = function(id = "imputing", data, target, task_type, ...) { diff --git a/R/PipeOpLearner.R b/R/PipeOpLearner.R index 33f7c39fc..4c25ec951 100644 --- a/R/PipeOpLearner.R +++ b/R/PipeOpLearner.R @@ -149,7 +149,10 @@ PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp, ) ) -mlr_pipeops$add("learner", PipeOpLearner, list(R6Class("Learner", public = list(id = "learner", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("learner", PipeOpLearner, + list(R6Class("Learner", + public = list(id = "learner", task_type = "classif", param_set = ParamSet$new()), + active = list(encapsulate = function() c(train = "none", predict = "none")))$new())) #' @export as_learner.PipeOp = function(x, clone = FALSE) { diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index b8aba7bf6..c44b5806a 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -204,4 +204,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", ) ) -mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new())) +mlr_pipeops$add("learner_cv", PipeOpLearnerCV, + list(R6Class("Learner", + public = list(id = "learnercv", task_type = "classif", param_set = ParamSet$new()), + active = list(encapsulate = function() c(train = "none", predict = "none")))$new())) diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index e3fa48e09..c10ace9db 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -58,8 +58,8 @@ Sets the \verb{$output} slot of the resulting object; see description there. Set of all required packages for the \code{\link{PipeOp}}'s \verb{$train} and \verb{$predict} methods. See \verb{$packages} slot. Default is \code{character(0)}. \item \code{tags} ::\code{character}\cr -A set of tags associated with the \code{PipeOp}. Tags describe a PipeOp's purpose. -Can be used to filter \code{as.data.table(mlr_pipeops)}. Default is \code{"abstract"}, indicating an abstract \code{PipeOp}. +A set of tags associated with the \code{\link{PipeOp}}. Tags describe a \code{\link{PipeOp}}'s purpose. +Can be used to filter \code{as.data.table(mlr_pipeops)}. Default is \code{"abstract"}, indicating an abstract \code{\link{PipeOp}}. } } @@ -116,14 +116,29 @@ Number of input channels. This equals \verb{nrow($input)}. Number of output channels. This equals \verb{nrow($output)}. \item \code{is_trained} :: \code{logical(1)} \cr Indicate whether the \code{\link{PipeOp}} was already trained and can therefore be used for prediction. -\item \code{tags} ::\code{character}\cr -A set of tags associated with the \code{PipeOp}. Tags describe a PipeOp's purpose. +\item \code{tags} :: \code{character}\cr +A set of tags associated with the \code{\link{PipeOp}}. Tags describe a \code{\link{PipeOp}}'s purpose. Can be used to filter \code{as.data.table(mlr_pipeops)}. -PipeOp tags are inherited and child classes can introduce additional tags. +\code{\link{PipeOp}} tags are inherited and child classes can introduce additional tags. \item \code{hash} :: \code{character(1)} \cr Checksum calculated on the \code{\link{PipeOp}}, depending on the \code{\link{PipeOp}}'s \code{class} and the slots \verb{$id} and \verb{$param_set$values}. If a \code{\link{PipeOp}}'s functionality may change depending on more than these values, it should inherit the \verb{$hash} active binding and calculate the hash as \verb{digest(list(super$hash, ), algo = "xxhash64")}. +\item \code{timings} :: \code{numeric(2)} \cr +Elapsed time in seconds for the steps \code{"train"} and \code{"predict"}. +Measured via \code{\link[mlr3misc:encapsulate]{mlr3misc::encapsulate()}}. +\item \code{log} :: \code{\link{data.table}}\cr +Returns the output (including warning and errors) as table with columns \code{"stage"} ("train" or "predict"), +\code{"class"} ("output", "warning", or "error"), and \code{"msg"} (\code{character()}). +\item \code{warnings} :: \code{character()}\cr +Logged warnings as vector. +\item \code{errors} :: \code{character()}\cr +Logged errors as vector. +\item \code{encapsulate} :: named \code{character()}\cr +Controls how to execute the code in internal train and predict methods. +Must be a named character vector with names \code{"train"} and \code{"predict"}. +Possible values are \code{"none"}, \code{"evaluate"} (requires package \CRANpkg{evaluate}) and \code{"callr"} (requires package \CRANpkg{callr}). +See \code{\link[mlr3misc:encapsulate]{mlr3misc::encapsulate()}} for more details. \item \code{.result} :: \code{list} \cr If the \code{\link{Graph}}'s \verb{$keep_results} flag is set to \code{TRUE}, then the intermediate Results of \verb{$train()} and \verb{$predict()} are saved to this slot, exactly as they are returned by these functions. This is mainly for debugging purposes @@ -136,7 +151,7 @@ and done, if requested, by the \code{\link{Graph}} backend itself; it should \em \itemize{ \item \code{train(input)}\cr (\code{list}) -> named \code{list}\cr -Train \code{\link{PipeOp}} on \code{inputs}, transform it to output and store the learned \verb{$state}. If the PipeOp is already +Train \code{\link{PipeOp}} on \code{inputs}, transform it to output and store the learned \verb{$state}. If the \code{\link{PipeOp}} is already trained, already present \verb{$state} is overwritten. Input list is typechecked against the \verb{$input} \code{train} column. Return value is a list with as many entries as \verb{$output} has rows, with each entry named after the \verb{$output} \code{name} column and class according to the \verb{$output} \code{train} column. @@ -189,11 +204,11 @@ PipeOpSumLetter = R6::R6Class("sumletter", # a list as output. .train = function(input) { sum = input[[1]] + input[[2]] - self$state = sum + self$state$sum = sum list(sum) }, .predict = function(input) { - list(letters[self$state]) + list(letters[self$state$sum]) } ) ) diff --git a/man/mlr_learners_graph.Rd b/man/mlr_learners_graph.Rd index 847de745a..bb2cbb77b 100644 --- a/man/mlr_learners_graph.Rd +++ b/man/mlr_learners_graph.Rd @@ -19,6 +19,9 @@ The \code{predict_type} of a \code{\link{GraphLearner}} can be obtained or set v Setting a new predict type will try to set the \code{predict_type} in all relevant \code{\link{PipeOp}} / \code{\link[mlr3:Learner]{Learner}} encapsulated within the \code{\link{Graph}}. Similarly, the predict_type of a Graph will always be the smallest denominator in the \code{\link{Graph}}. + +In the \code{timings_pipeops} active binding, the \code{timings} of the individual \code{\link{PipeOp}}s are stored as a named list +with names \code{"train"} and \code{"predict"}. } \seealso{ Other Learners: diff --git a/tests/testthat/helper_test_pipeops.R b/tests/testthat/helper_test_pipeops.R index 921d463fb..02d78ee4d 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/tests/testthat/helper_test_pipeops.R @@ -15,6 +15,7 @@ PipeOpDebugBasic = R6Class("PipeOpDebugBasic", .predict = function(inputs) { catf("Predicting %s", self$id) self$state = c(self$state, inputs) + inputs } ) ) @@ -57,7 +58,7 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", }, .predict = function(inputs) { catf("Predicting %s with input %s and state %s", - self$id, deparse_list_safe(inputs), deparse_list_safe(self$state)) + self$id, deparse_list_safe(inputs), deparse_list_safe(self$state[-which(names(self$state) %in% c("log", "train_time"))])) iin = inputs[[1]] as.list(iin + seq_len(self$nout)) } @@ -84,3 +85,63 @@ VarargPipeop = R6Class("VarargPipeop", } ) ) + +PipeOpDebugEncapsulate = R6Class("PipeOpDebugEncapsulate", + inherit = PipeOp, + public = list( + initialize = function(id = "debug.encapsulate", param_vals = list()) { + param_set = ParamSet$new(list( + ParamLgl$new("message_train", default = FALSE, tags = "train"), + ParamLgl$new("message_predict", default = FALSE, tags = "predict"), + ParamLgl$new("warning_train", default = FALSE, tags = "train"), + ParamLgl$new("warning_predict", default = FALSE, tags = "predict"), + ParamLgl$new("error_train", default = FALSE, tags = "train"), + ParamLgl$new("error_predict", default = FALSE, tags = "predict"), + ParamLgl$new("segfault_train", default = FALSE, tags = "train"), + ParamLgl$new("segfault_predict", default = FALSE, tags = "predict"), + ParamLgl$new("predict_missing", default = FALSE, tags = "predict") + )) + super$initialize(id = id, param_set = param_set, param_vals = param_vals, + input = data.table(name = "input", train = "*", predict = "*"), + output = data.table(name = "output", train = "*", predict = "*") + ) + }), + private = list( + .train = function(inputs) { + pv = self$param_set$get_values(tags = "train") + + if (isTRUE(pv[["message_train"]])) { + message("Message from classif.debug->train()") + } + if (isTRUE(pv[["warning_train"]])) { + warning("Warning from classif.debug->train()") + } + if (isTRUE(pv[["error_train"]])) { + stop("Error from classif.debug->train()") + } + if (isTRUE(pv[["segfault_train"]])) { + get("attach")(structure(list(), class = "UserDefinedDatabase")) + } + self$state = inputs + }, + .predict = function(inputs) { + pv = self$param_set$get_values(tags = "predict") + + if (isTRUE(pv[["message_predict"]])) { + message("Message from classif.debug->predict()") + } + if (isTRUE(pv[["warning_predict"]])) { + warning("Warning from classif.debug->predict()") + } + if (isTRUE(pv[["error_predict"]])) { + stop("Error from classif.debug->predict()") + } + if (isTRUE(pv[["segfault_predict"]])) { + get("attach")(structure(list(), class = "UserDefinedDatabase")) + } + self$state = c(self$state, inputs) + inputs + } + ) +) + diff --git a/tests/testthat/test_GraphLearner.R b/tests/testthat/test_GraphLearner.R index 35328a13f..6ba183cde 100644 --- a/tests/testthat/test_GraphLearner.R +++ b/tests/testthat/test_GraphLearner.R @@ -34,20 +34,41 @@ test_that("basic graphlearner tests", { gr2 = PipeOpScale$new() %>>% PipeOpLearner$new(lrn) glrn2 = GraphLearner$new(gr2) + glrn2$graph$set_names(c("scale", "classif.rpart"), new = c("newscale", "newclassif.rpart")) + expect_list(glrn2$timings_pipeops, len = 2L) + expect_equal(names(glrn2$timings_pipeops), c("train", "predict")) + expect_equal(names(glrn2$timings_pipeops$train), glrn2$graph$ids(TRUE)) + expect_equal(names(glrn2$timings_pipeops$train), names(glrn2$timings_pipeops$predict)) + expect_true(every(glrn2$timings_pipeops, function(phase) every(phase, is.na))) glrn2_clone = glrn2$clone(deep = TRUE) expect_learner(glrn2) expect_true(run_experiment(task, glrn)$ok) glrn2$train(task) + expect_list(glrn2$timings_pipeops, len = 2L) + expect_equal(names(glrn2$timings_pipeops), c("train", "predict")) + expect_equal(names(glrn2$timings_pipeops$train), names(glrn2$timings_pipeops$predict)) + expect_true(every(is.na(glrn2$timings_pipeops$train), isFALSE)) + expect_true(every(is.na(glrn2$timings_pipeops$predict), isTRUE)) glrn2_clone$state = glrn2$state -# glrn2_clone$state$log = glrn2_clone$state$log$clone(deep = TRUE) # FIXME: this can go when mlr-org/mlr3#343 is fixed -# glrn2_clone$state$model$classif.rpart$log = glrn2_clone$state$model$classif.rpart$log$clone(deep = TRUE) # FIXME: this can go when mlr-org/mlr3#343 is fixed + # glrn2_clone$state$log = glrn2_clone$state$log$clone(deep = TRUE) # FIXME: this can go when mlr-org/mlr3#343 is fixed + # glrn2_clone$state$model$classif.rpart$log = glrn2_clone$state$model$classif.rpart$log$clone(deep = TRUE) # FIXME: this can go when mlr-org/mlr3#343 is fixed expect_deep_clone(glrn2_clone, glrn2$clone(deep = TRUE)) expect_prediction_classif({ graphpred2 = glrn2$predict(task) }) + expect_list(glrn2$timings_pipeops, len = 2L) + expect_equal(names(glrn2$timings_pipeops), c("train", "predict")) + expect_equal(names(glrn2$timings_pipeops$train), glrn2$graph$ids(TRUE)) + expect_equal(names(glrn2$timings_pipeops$train), names(glrn2$timings_pipeops$predict)) + expect_true(every(is.na(glrn2$timings_pipeops$train), isFALSE)) + expect_true(every(is.na(glrn2$timings_pipeops$predict), isFALSE)) expect_equal(glrn2$predict(task), glrn2_clone$predict(task)) + glrn2$graph$set_names(c("newscale", "newclassif.rpart"), new = c("NEWscale", "NEWclassif.rpart")) + expect_equal(names(glrn2$timings_pipeops$train), glrn2$graph$ids(TRUE)) + expect_equal(names(glrn2$timings_pipeops$train), names(glrn2$timings_pipeops$predict)) + scidf = cbind(scale(iris[1:4]), iris[5]) scalediris = TaskClassif$new("scalediris", as_data_backend(scidf), "Species") diff --git a/tests/testthat/test_PipeOp.R b/tests/testthat/test_PipeOp.R index 7550bfb8c..8ae24fee6 100644 --- a/tests/testthat/test_PipeOp.R +++ b/tests/testthat/test_PipeOp.R @@ -13,10 +13,18 @@ test_that("PipeOp - General functions", { expect_equal(po_1$packages, character(0)) expect_null(po_1$state) assert_subset(po_1$tags, mlr_reflections$pipeops$valid_tags) + expect_equal(po_1$timings, c(train = NA_real_, predict = NA_real_)) expect_output(expect_equal(po_1$train(list(1)), list(output = 1)), "Training debug.basic") - expect_equal(po_1$state, list(input = 1)) + expect_equal(po_1$state$input, 1) + expect_setequal(names(po_1$state), c("input", "log", "train_time")) expect_true(po_1$is_trained) + expect_equal(names(po_1$timings), c("train", "predict")) + expect_true(which(is.na(po_1$timings)) == 2L) + + expect_output(expect_equal(po_1$predict(list(1)), list(output = 1)), "Predicting debug.basic") + expect_equal(names(po_1$timings), c("train", "predict")) + expect_true(sum(is.na(po_1$timings)) == 0L) expect_error(po_1$train(tsk("iris")), regexp = "Must be of type 'list'") }) diff --git a/tests/testthat/test_encapsulate.R b/tests/testthat/test_encapsulate.R new file mode 100644 index 000000000..6fc840259 --- /dev/null +++ b/tests/testthat/test_encapsulate.R @@ -0,0 +1,148 @@ +disable_encapsulation = function(x) { + x$encapsulate = c(train = "none", predict = "none") + x +} + +enable_encapsulation = function(x) { + x$encapsulate = c(train = "evaluate", predict = "evaluate") + x +} + +test_that("Encapsulate - PipeOp", { + pipeop = PipeOpDebugEncapsulate$new(param_vals = list( + message_train = TRUE, warning_train = TRUE, message_predict = TRUE, warning_predict = TRUE) + ) + + expect_message(expect_warning(disable_encapsulation(pipeop)$train(list(1)))) + log = pipeop$log + expect_data_table(log) + + expect_silent(enable_encapsulation(pipeop)$train(list(1))) + log = pipeop$log + expect_data_table(log) + expect_data_table(log, nrows = 2L, ncols = 3L, any.missing = FALSE) + expect_factor(log$class) + expect_set_equal(as.character(log$class), c("output", "warning")) + expect_true(all(grepl("->train()", log$msg, fixed = TRUE))) + expect_true("output" %in% log$class) + expect_true("warning" %in% log$class) + expect_false("error" %in% log$class) + + expect_message(expect_warning(disable_encapsulation(pipeop)$predict(list(1)))) + log = pipeop$log[stage == "predict"] + expect_data_table(log) + expect_equal(nrow(log), 0L) + + enable_encapsulation(pipeop)$predict(list(1)) + log = pipeop$log[stage == "predict"] + expect_data_table(log) + expect_data_table(log, nrows = 2L, ncols = 3L, any.missing = FALSE) + expect_factor(log$class) + expect_equal(as.character(log$class), c("output", "warning")) + expect_true(all(grepl("->predict()", log$msg, fixed = TRUE))) +}) + +test_that("Encapsulate - Graph", { + task = tsk("iris") + + pipeop = PipeOpDebugEncapsulate$new(param_vals = list( + message_train = TRUE, warning_train = TRUE, message_predict = TRUE, warning_predict = TRUE) + ) + + learner = lrn("classif.debug") + learner$param_set$values = list(message_train = 1, warning_train = 1, message_predict = 1, warning_predict = 1) + + # encapsulation of pipeops within graph disabled + g1 = disable_encapsulation(pipeop) %>>% disable_encapsulation(learner) + expect_true(all(unlist(map(g1$pipeops, "encapsulate")) == "none")) + + expect_message(expect_warning(g1$train(task))) # train + log = map(g1$pipeops, "log") + for (i in seq_along(log)) { + l = log[[i]] + expect_data_table(l) + } + + for (i in seq_along(g1$pipeops)) { + g1$pipeops[[i]]$encapsulate = c(train = "evaluate", predict = "evaluate") + } + expect_true(all(unlist(map(g1$pipeops, "encapsulate")) == "evaluate")) + expect_silent(g1$train(task)) # train, encapsulation of pipeops within graph enabled + log = map(g1$pipeops, "log") + for (i in seq_along(log)) { + l = log[[i]] + expect_data_table(l) + expect_data_table(l, nrows = 2L, ncols = 3L, any.missing = FALSE) + } + + for (i in seq_along(g1$pipeops)) { + g1$pipeops[[i]]$encapsulate = c(train = "none", predict = "none") + } + expect_true(all(unlist(map(g1$pipeops, "encapsulate")) == "none")) + expect_message(expect_warning(g1$predict(task))) # predict, encapsulation of pipeops within graph disabled + log = map(g1$pipeops, function(pipeop) pipeop$log[stage == "predict"]) + for (i in seq_along(log)) { + l = log[[i]] + expect_data_table(l) + } + + for (i in seq_along(g1$pipeops)) { + g1$pipeops[[i]]$encapsulate = c(train = "evaluate", predict = "evaluate") + } + expect_true(all(unlist(map(g1$pipeops, "encapsulate")) == "evaluate")) + expect_silent(g1$predict(task)) # predict, encapsulation of graph + log = map(g1$pipeops, function(pipeop) pipeop$log[stage == "predict"]) + for (i in seq_along(log)) { + l = log[[i]] + expect_data_table(l) + expect_data_table(l, nrows = 2L, ncols = 3L, any.missing = FALSE) + } +}) + +test_that("Encapsulate - GraphLearner", { + task = tsk("iris") + + pipeop = PipeOpDebugEncapsulate$new(param_vals = list( + message_train = TRUE, warning_train = TRUE, message_predict = TRUE, warning_predict = TRUE) + ) + + # encapsulation of pipeops within graphlearner will always be set to "none" + gl1 = GraphLearner$new(enable_encapsulation(pipeop) %>>% enable_encapsulation(lrn("classif.debug"))) + expect_true(all(unlist(map(gl1$graph$pipeops, "encapsulate")) == "none")) + + expect_message(expect_warning(disable_encapsulation(gl1)$train(task))) # train, no encapsulation of graph + log = gl1$log + expect_data_table(log) + expect_true(every(map(gl1$graph$pipeops, "log"), is.null)) # will always be NULL + + expect_silent(enable_encapsulation(gl1)$train(task)) # train, encapsulation of graph + log = gl1$log + expect_data_table(log) + expect_data_table(log, nrows = 2L, ncols = 3L, any.missing = FALSE) + expect_true(every(map(gl1$graph$pipeops, "log"), is.null)) + + expect_message(expect_warning(disable_encapsulation(gl1)$predict(task))) # predict, no encapsulation of graph + log = gl1$log[stage == "predict"] + expect_data_table(log) + expect_equal(nrow(log), 0L) + expect_true(every(map(gl1$graph$pipeops, "log"), is.null)) + + expect_silent(enable_encapsulation(gl1)$predict(task)) # predict, encapsulation of graph + log = gl1$log[stage == "predict"] + expect_data_table(log) + expect_true(every(map(gl1$graph$pipeops, "log"), is.null)) +}) + +test_that("Encapsulate - PipeOpLearner and PipeOpLearnerCV", { + learner = lrn("classif.debug") + learner$encapsulate = c(train = "evaluate", predict = "none") + + pl1 = PipeOpLearner$new(learner) + pl2 = as_pipeop(learner) + + plcv = PipeOpLearnerCV$new(learner) + + expect_equal(learner$encapsulate, pl1$encapsulate) + expect_equal(pl1$encapsulate, pl2$encapsulate) + expect_equal(pl2$encapsulate, plcv$encapsulate) +}) diff --git a/tests/testthat/test_multiplicities.R b/tests/testthat/test_multiplicities.R index 26a70c03c..ddc820f69 100644 --- a/tests/testthat/test_multiplicities.R +++ b/tests/testthat/test_multiplicities.R @@ -92,13 +92,15 @@ test_that("PipeOp - evaluate_multiplicities", { po = PipeOpTestMultiplicites$new(2) expect_null(po$state) - po$param_set$values$state = "trained" + po$param_set$values$state = list("trained") train_out1 = po$train(as.Multiplicity(list(0, as.Multiplicity(0)))) - expect_multiplicity(train_out1[[1]]) - expect_equal(po$state, as.Multiplicity(list("trained"))) + expect_multiplicity(train_out1[[1L]]) + expect_equal(po$state[[1L]][[1L]], "trained") + expect_setequal(names(po$state[[1L]]), c("", "log", "train_time")) predict_out1 = po$predict(as.Multiplicity(list(0, as.Multiplicity(0)))) - expect_equal(po$state, as.Multiplicity(list("trained"))) - expect_multiplicity(predict_out1[[1]]) + expect_equal(po$state[[1L]][[1L]], "trained") + expect_setequal(names(po$state[[1L]]), c("", "log", "train_time", "predict_time")) + expect_multiplicity(predict_out1[[1L]]) po$state = list("no_multiplicties") expect_error(po$predict(as.Multiplicity(list(0, as.Multiplicity(0)))), regexp = "state was not a multiplicity") @@ -108,11 +110,11 @@ test_that("PipeOp - evaluate_multiplicities", { expect_error(po$predict(as.Multiplicity(list(0, as.Multiplicity(0)))), regexp = "state had different length / names than input") expect_equal(po$state, as.Multiplicity(NULL)) - po$param_set$values$state = "trained" + po$param_set$values$state = list("trained") train_out2 = po$train(as.Multiplicity(list(0, as.Multiplicity(0)))) expect_multiplicity(train_out2[[1]]) old_state = po$state - po$param_set$values$state = "error" + po$param_set$values$state = list("error") expect_error(po$train(as.Multiplicity(list(0, as.Multiplicity(0)))), regexp = "Error") expect_equal(po$state, NULL) # state is completely reset to NULL }) diff --git a/tests/testthat/test_pipeop_imputelearner.R b/tests/testthat/test_pipeop_imputelearner.R index edd2b6162..86887bf5d 100644 --- a/tests/testthat/test_pipeop_imputelearner.R +++ b/tests/testthat/test_pipeop_imputelearner.R @@ -150,7 +150,7 @@ test_that("PipeOpImputeLearner - model active binding to state", { # after predicting state is unchanged and models still are equivalent predict_out = po$predict(list(task)) - expect_equal(po$state, train_state) + expect_equal(po$state[-which(names(po$state) == "predict_time")], train_state) expect_null(po$learner$state) expect_equal(names(models), names(po$learner_models)) expect_true(all(pmap_lgl(list(map(models, .f = "model"), map(po$learner_models, .f = "model")), .f = all.equal))) diff --git a/tests/testthat/test_pipeop_learner.R b/tests/testthat/test_pipeop_learner.R index acfbba6ee..3ad42bc9a 100644 --- a/tests/testthat/test_pipeop_learner.R +++ b/tests/testthat/test_pipeop_learner.R @@ -70,7 +70,7 @@ test_that("PipeOpLearner - model active binding to state", { # after predicting states are unchanged predict_out = po$predict(list(task)) - expect_equal(po$state, train_state) + expect_equal(po$state[-which(names(po$state) == "predict_time")], train_state) expect_null(po$learner$state) expect_equal(po$learner_model$state, po$state) }) diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R index 62595d6c4..9300109d6 100644 --- a/tests/testthat/test_pipeop_learnercv.R +++ b/tests/testthat/test_pipeop_learnercv.R @@ -94,7 +94,7 @@ test_that("PipeOpLearnerCV - model active binding to state", { # after predicting states are unchanged predict_out = po$predict(list(task)) - expect_equal(po$state, train_state) + expect_equal(po$state[-which(names(po$state) == "predict_time")], train_state) expect_null(po$learner$state) expect_equal(po$learner_model$state, po$state) }) diff --git a/tests/testthat/test_pipeop_multiplicityexply.R b/tests/testthat/test_pipeop_multiplicityexply.R index 6be3a28ac..f9e2f3d4c 100644 --- a/tests/testthat/test_pipeop_multiplicityexply.R +++ b/tests/testthat/test_pipeop_multiplicityexply.R @@ -3,8 +3,8 @@ context("PipeOpMultiplicityExply") test_that("multiplicityexply - basic properties", { po = PipeOpMultiplicityExply$new(3) expect_pipeop(po) - expect_data_table(po$input, nrows = 1) - expect_data_table(po$output, nrows = 3) + expect_data_table(po$input, nrows = 1L) + expect_data_table(po$output, nrows = 3L) expect_pipeop_class(PipeOpMultiplicityExply, list(1)) expect_pipeop_class(PipeOpMultiplicityExply, list(3)) @@ -19,12 +19,15 @@ test_that("multiplicityexply - train and predict", { po = PipeOpMultiplicityExply$new(2) tout = train_pipeop(po, list(as.Multiplicity(list(t1, t2)))) - expect_list(po$state, len = 0) - expect_list(tout, len = 2) - expect_equal(tout[[1]], t1) - expect_equal(tout[[2]], t2) + expect_list(po$state, len = 2L) + expect_setequal(names(po$state), c("log", "train_time")) + expect_list(tout, len = 2L) + expect_equal(tout[[1L]], t1) + expect_equal(tout[[2L]], t2) pout = predict_pipeop(po, list(as.Multiplicity(list(t1, t2)))) - expect_list(pout, len = 2) - expect_equal(pout[[1]], t1) - expect_equal(pout[[2]], t2) + expect_list(po$state, len = 3L) + expect_setequal(names(po$state), c("log", "train_time", "predict_time")) + expect_list(pout, len = 2L) + expect_equal(pout[[1L]], t1) + expect_equal(pout[[2L]], t2) }) diff --git a/tests/testthat/test_pipeop_multiplicityimply.R b/tests/testthat/test_pipeop_multiplicityimply.R index c855449c4..6c3718618 100644 --- a/tests/testthat/test_pipeop_multiplicityimply.R +++ b/tests/testthat/test_pipeop_multiplicityimply.R @@ -3,16 +3,16 @@ context("PipeOpMultiplicityImply") test_that("multiplicityimply - basic properties", { po = PipeOpMultiplicityImply$new(3) expect_pipeop(po) - expect_data_table(po$input, nrows = 3) - expect_data_table(po$output, nrows = 1) + expect_data_table(po$input, nrows = 3L) + expect_data_table(po$output, nrows = 1L) expect_pipeop_class(PipeOpMultiplicityImply, list(1)) expect_pipeop_class(PipeOpMultiplicityImply, list(3)) po = PipeOpMultiplicityImply$new() expect_pipeop(po) - expect_data_table(po$input, nrows = 1) - expect_data_table(po$output, nrows = 1) + expect_data_table(po$input, nrows = 1L) + expect_data_table(po$output, nrows = 1L) }) test_that("multiplicityimply - train and predict", { @@ -24,36 +24,45 @@ test_that("multiplicityimply - train and predict", { po = PipeOpMultiplicityImply$new(2) tout = train_pipeop(po, list(t1, t2)) - expect_list(po$state, len = 0) - expect_multiplicity(tout[[1]]) - expect_equal(tout[[1]][[1]], t1) - expect_equal(tout[[1]][[2]], t2) + expect_list(po$state, len = 2L) + expect_setequal(names(po$state), c("log", "train_time")) + expect_multiplicity(tout[[1L]]) + expect_equal(tout[[1L]][[1L]], t1) + expect_equal(tout[[1L]][[2L]], t2) pout = predict_pipeop(po, list(t1, t2)) - expect_multiplicity(pout[[1]]) - expect_equal(pout[[1]][[1]], t1) - expect_equal(pout[[1]][[2]], t2) + expect_list(po$state, len = 3L) + expect_setequal(names(po$state), c("log", "train_time", "predict_time")) + expect_multiplicity(pout[[1L]]) + expect_equal(pout[[1L]][[1L]], t1) + expect_equal(pout[[1L]][[2L]], t2) po = PipeOpMultiplicityImply$new() tout = train_pipeop(po, list(t1, t2)) - expect_list(po$state, len = 0) - expect_multiplicity(tout[[1]]) - expect_equal(tout[[1]][[1]], t1) - expect_equal(tout[[1]][[2]], t2) + expect_list(po$state, len = 2L) + expect_setequal(names(po$state), c("log", "train_time")) + expect_multiplicity(tout[[1L]]) + expect_equal(tout[[1L]][[1L]], t1) + expect_equal(tout[[1L]][[2L]], t2) pout = predict_pipeop(po, list(t1, t2)) - expect_multiplicity(pout[[1]]) - expect_equal(pout[[1]][[1]], t1) - expect_equal(pout[[1]][[2]], t2) + expect_list(po$state, len = 3L) + expect_setequal(names(po$state), c("log", "train_time", "predict_time")) + expect_multiplicity(pout[[1L]]) + expect_equal(pout[[1L]][[1L]], t1) + expect_equal(pout[[1L]][[2L]], t2) po = PipeOpMultiplicityImply$new(c("t1", "t2")) tout = train_pipeop(po, list(t1, t2)) - expect_list(po$state, len = 0) - expect_multiplicity(tout[[1]]) - expect_equal(tout[[1]][[1]], t1) - expect_equal(tout[[1]][[2]], t2) + expect_list(po$state, len = 2L) + expect_setequal(names(po$state), c("log", "train_time")) + expect_multiplicity(tout[[1L]]) + expect_equal(tout[[1L]][[1L]], t1) + expect_equal(tout[[1L]][[2L]], t2) pout = predict_pipeop(po, list(t1, t2)) - expect_multiplicity(pout[[1]]) - expect_equal(pout[[1]][[1]], t1) - expect_equal(pout[[1]][[2]], t2) + expect_list(po$state, len = 3L) + expect_setequal(names(po$state), c("log", "train_time", "predict_time")) + expect_multiplicity(pout[[1L]]) + expect_equal(pout[[1L]][[1L]], t1) + expect_equal(pout[[1L]][[2L]], t2) }) test_that("multiplicityimply innum names are used", { diff --git a/tests/testthat/test_pipeop_ovr.R b/tests/testthat/test_pipeop_ovr.R index 3dd9169d9..54d716913 100644 --- a/tests/testthat/test_pipeop_ovr.R +++ b/tests/testthat/test_pipeop_ovr.R @@ -3,34 +3,34 @@ context("PipeOpOVRSplit") test_that("PipeOpOVRSplit - basic properties", { po = PipeOpOVRSplit$new() expect_pipeop(po) - expect_data_table(po$input, nrows = 1) - expect_data_table(po$output, nrows = 1) + expect_data_table(po$input, nrows = 1L) + expect_data_table(po$output, nrows = 1L) expect_pipeop_class(PipeOpOVRSplit) }) test_that("PipeOpOVRSplit - train and predict", { # toy task to split - dat = data.table(target = as.factor(rep(c("a", "b", "rest"), each = 10)), feature = rnorm(30)) + dat = data.table(target = as.factor(rep(c("a", "b", "rest"), each = 10L)), feature = rnorm(30L)) tsk = TaskClassif$new("test", backend = dat, target = "target") po = PipeOpOVRSplit$new() tout = train_pipeop(po, list(tsk)) expect_equal(po$state$levels, tsk$class_names) - expect_multiplicity(tout[[1]]) - expect_list(tout[[1]], len = 3) - expect_named(tout[[1]], tsk$class_names) - expect_true(all(pmap_lgl(list(tout[[1]], names(tout[[1]])), .f = function(task, name) { + expect_multiplicity(tout[[1L]]) + expect_list(tout[[1L]], len = 3L) + expect_named(tout[[1L]], tsk$class_names) + expect_true(all(pmap_lgl(list(tout[[1L]], names(tout[[1L]])), .f = function(task, name) { expect_task(task) all(task$target_names == tsk$target_names) && task$positive == name && task$negative == "rest." && all.equal(task$truth(), factor(ifelse(tsk$truth() == task$positive, task$positive, "rest."), levels = c(task$positive, "rest."))) }))) pout = predict_pipeop(po, list(tsk)) - expect_multiplicity(pout[[1]]) - expect_list(pout[[1]], len = 3) - expect_named(pout[[1]], tsk$class_names) - expect_true(all(pmap_lgl(list(pout[[1]], names(pout[[1]])), .f = function(task, name) { + expect_multiplicity(pout[[1L]]) + expect_list(pout[[1L]], len = 3L) + expect_named(pout[[1L]], tsk$class_names) + expect_true(all(pmap_lgl(list(pout[[1L]], names(pout[[1L]])), .f = function(task, name) { expect_task(task) task$target_names == tsk$target_names && task$positive == name && task$negative == "rest." && all.equal(task$truth(), factor(ifelse(tsk$truth() == task$positive, task$positive, "rest."), levels = c(task$positive, "rest."))) @@ -42,18 +42,18 @@ context("PipeOpOVRUnite") test_that("PipeOpOVRUnite - basic properties", { po = PipeOpOVRUnite$new() expect_pipeop(po) - expect_data_table(po$input, nrows = 1) - expect_data_table(po$output, nrows = 1) + expect_data_table(po$input, nrows = 1L) + expect_data_table(po$output, nrows = 1L) expect_pipeop_class(PipeOpOVRUnite) }) test_that("PipeOpOVRUnite- train and predict", { # toy tasks that are splitted, trained and predicted manually - feature = rep(c(1, 0), c(10, 20)) - dat1 = data.table(target = as.factor(rep(c("a", "rest"), c(10, 20))), feature = feature) - dat2 = data.table(target = as.factor(rep(c("rest", "b", "rest"), c(10, 10, 10))), feature = feature) - dat3 = data.table(target = as.factor(rep(c("rest", "c"), c(20, 10))), feature = feature) + feature = rep(c(1, 0), c(10L, 20L)) + dat1 = data.table(target = as.factor(rep(c("a", "rest"), c(10L, 20L))), feature = feature) + dat2 = data.table(target = as.factor(rep(c("rest", "b", "rest"), c(10L, 10L, 10L))), feature = feature) + dat3 = data.table(target = as.factor(rep(c("rest", "c"), c(20L, 10L))), feature = feature) tsk1 = TaskClassif$new("t1", backend = dat1, target = "target", positive = "a") tsk2 = TaskClassif$new("t2", backend = dat2, target = "target", positive = "b") tsk3 = TaskClassif$new("t3", backend = dat3, target = "target", positive = "c") @@ -67,7 +67,7 @@ test_that("PipeOpOVRUnite- train and predict", { lrn$predict(task) }) pout = po$predict(list(as.Multiplicity(tin))) - expect_prediction_classif(pout[[1]]) + expect_prediction_classif(pout[[1L]]) # predict_type "response" lrn$predict_type = "response" @@ -76,18 +76,18 @@ test_that("PipeOpOVRUnite- train and predict", { lrn$predict(task) }) pout = po$predict(list(as.Multiplicity(tin))) - expect_prediction_classif(pout[[1]]) + expect_prediction_classif(pout[[1L]]) # NA handling - na_response = tin[[1]]$response - na_response[1] = NA - tin[[1]] = PredictionClassif$new(row_ids = tin[[1]]$row_ids, truth = tin[[1]]$truth, response = na_response) + na_response = tin[[1L]]$response + na_response[1L] = NA + tin[[1L]] = PredictionClassif$new(row_ids = tin[[1L]]$row_ids, truth = tin[[1L]]$truth, response = na_response) pout = po$predict(list(as.Multiplicity(tin))) - expect_prediction_classif(pout[[1]]) - expect_equal(pout[[1]]$prob[1, ], c(a = 1/3, b = 1/3, c = 1/3)) + expect_prediction_classif(pout[[1L]]) + expect_equal(pout[[1L]]$prob[1L, ], c(a = 1/3, b = 1/3, c = 1/3)) # error handling - tin[[1]] = PredictionClassif$new(row_ids = tin[[1]]$row_ids, truth = tin[[1]]$truth) + tin[[1L]] = PredictionClassif$new(row_ids = tin[[1L]]$row_ids, truth = tin[[1L]]$truth) expect_error(po$predict(list(as.Multiplicity(tin))), regexp = "PipeOpOVRUnite input predictions had missing 'prob' and missing 'response' values") }) @@ -95,11 +95,11 @@ context("PipeOpOVRSplit and PipeOpOVRUnite") test_that("PipeOpOVRSplit and PipeOpOVRUnite - train and predict", { # same toy task but now we compare the results to the automated Graph's results - feature = rep(c(1, 0), c(10, 20)) - dat0 = data.table(target = as.factor(rep(c("a", "b", "c"), each = 10)), feature = feature) - dat1 = data.table(target = as.factor(rep(c("a", "rest"), c(10, 20))), feature = feature) - dat2 = data.table(target = as.factor(rep(c("rest", "b", "rest"), c(10, 10, 10))), feature = feature) - dat3 = data.table(target = as.factor(rep(c("rest", "c"), c(20, 10))), feature = feature) + feature = rep(c(1, 0), c(10L, 20L)) + dat0 = data.table(target = as.factor(rep(c("a", "b", "c"), each = 10L)), feature = feature) + dat1 = data.table(target = as.factor(rep(c("a", "rest"), c(10L, 20L))), feature = feature) + dat2 = data.table(target = as.factor(rep(c("rest", "b", "rest"), c(10L, 10L, 10L))), feature = feature) + dat3 = data.table(target = as.factor(rep(c("rest", "c"), c(20L, 10L))), feature = feature) tsk0 = TaskClassif$new("t0", backend = dat0, target = "target") tsk1 = TaskClassif$new("t1", backend = dat1, target = "target", positive = "a") tsk2 = TaskClassif$new("t2", backend = dat2, target = "target", positive = "b") @@ -115,15 +115,15 @@ test_that("PipeOpOVRSplit and PipeOpOVRUnite - train and predict", { gr = PipeOpOVRSplit$new() %>>% LearnerClassifRpart$new() %>>% PipeOpOVRUnite$new() expect_graph(gr) tout = gr$train(tsk0) - expect_list(gr$state$ovrunite, len = 0) - expect_null(tout[[1]]) + expect_list(gr$state$ovrunite, len = 2L) + expect_null(tout[[1L]]) pout = gr$predict(tsk0) - expect_equal(pout_ref[[1]]$prob, pout[[1]]$prob) + expect_equal(pout_ref[[1L]]$prob, pout[[1L]]$prob) # setting weights to zero results in uniform probs - gr$param_set$values$ovrunite.weights = rep(0, 3) - expect_true(all.equal(unique(gr$predict(tsk0)[[1]]$prob), t(c(a = 1/3, b = 1/3, c = 1/3)))) + gr$param_set$values$ovrunite.weights = rep(0, 3L) + expect_true(all.equal(unique(gr$predict(tsk0)[[1L]]$prob), t(c(a = 1/3, b = 1/3, c = 1/3)))) }) test_that("PipeOpOVRSplit and PipeOpOVRUnite - task size", { @@ -131,5 +131,5 @@ test_that("PipeOpOVRSplit and PipeOpOVRUnite - task size", { gr$train(tsk("iris")$filter(c(1:30, 51:80, 101:130))) prd = gr$predict(tsk("iris")$filter(c(1:30, 51:80, 101:130)))[[1]] expect_prediction_classif(prd) - expect_true(nrow(prd$data$prob) == 90) + expect_true(nrow(prd$data$prob) == 90L) }) diff --git a/tests/testthat/test_pipeop_proxy.R b/tests/testthat/test_pipeop_proxy.R index 52479cb04..22755bfb9 100644 --- a/tests/testthat/test_pipeop_proxy.R +++ b/tests/testthat/test_pipeop_proxy.R @@ -5,7 +5,8 @@ test_that("PipeOpProxy - basic properties", { pop = PipeOpProxy$new(param_vals = list(content = PipeOpNOP$new())) expect_pipeop(pop) expect_equal(train_pipeop(pop, inputs = list(task))[[1L]], task) - expect_equal(pop$state, list(nop = list())) + expect_length(pop$state, 3L) + expect_length(pop$state$nop, 2L) expect_equal(predict_pipeop(pop, inputs = list(task))[[1L]], task) }) @@ -71,7 +72,7 @@ test_that("PipeOpProxy - Complicated Graphs", { # --------- # two parallel single-input-single-output pipeops - pop = po("proxy", content = list(PipeOpDebugMulti$new(1, 1, "debug1"), PipeOpDebugMulti$new(1, 1, "debug2")), outnum = 2) + pop = po("proxy", content = list(PipeOpDebugMulti$new(1, 1, "debug1"), PipeOpDebugMulti$new(1, 1, "debug2")), outnum = 2L) expect_output( expect_equal(pop$train(list(1)), list(output1 = 2, output2 = 2)), @@ -97,15 +98,15 @@ Predicting debug2 with input list\\(input_1 = 4\\) and state list\\(input_1 = 2\ # --------- # NOP | feature union | NOP; feature union has vararg - pop = po("proxy", content = list(PipeOpDebugMulti$new(1, 1, "debug1"), PipeOpFeatureUnion$new(), PipeOpDebugMulti$new(1, 1, "debug2")), outnum = 3) + pop = po("proxy", content = list(PipeOpDebugMulti$new(1, 1, "debug1"), PipeOpFeatureUnion$new(), PipeOpDebugMulti$new(1, 1, "debug2")), outnum = 3L) tsk1 = tsk("iris") - tsk2 = po("pca")$train(list(tsk1))[[1]] - tsk3 = po("ica")$train(list(tsk1))[[1]] + tsk2 = po("pca")$train(list(tsk1))[[1L]] + tsk3 = po("ica")$train(list(tsk1))[[1L]] expect_output( expect_equal(pop$train(list(1, tsk1, tsk2, tsk3, 2)), - list(output1 = 2, output2 = po("featureunion")$train(list(tsk1, tsk2, tsk3))[[1]], output3 = 3)), + list(output1 = 2, output2 = po("featureunion")$train(list(tsk1, tsk2, tsk3))[[1L]], output3 = 3)), "^Training debug1 with input list\\(input_1 = 1\\) Training debug2 with input list\\(input_1 = 2\\)$") diff --git a/tests/testthat/test_pipeop_replicate.R b/tests/testthat/test_pipeop_replicate.R index 11c41f106..a74ba6112 100644 --- a/tests/testthat/test_pipeop_replicate.R +++ b/tests/testthat/test_pipeop_replicate.R @@ -3,24 +3,24 @@ context("PipeOpReplicate") test_that("PipeOpReplicate - basic properties", { po = PipeOpReplicate$new() expect_pipeop(po) - expect_data_table(po$input, nrows = 1) - expect_data_table(po$output, nrows = 1) + expect_data_table(po$input, nrows = 1L) + expect_data_table(po$output, nrows = 1L) expect_pipeop_class(PipeOpReplicate) }) test_that("PipeOpReplicate - train and predict", { tsk = mlr_tasks$get("iris") - nreps = 3 + nreps = 3L po = PipeOpReplicate$new(param_vals = list(reps = nreps)) tout = train_pipeop(po, list(tsk)) - expect_list(po$state, len = 0) - expect_multiplicity(tout[[1]]) - expect_list(tout[[1]], len = nreps) - expect_true(all(map_lgl(tout[[1]], .f = function(x) all.equal(x, tsk)))) + expect_list(po$state, len = 2L) + expect_multiplicity(tout[[1L]]) + expect_list(tout[[1L]], len = nreps) + expect_true(all(map_lgl(tout[[1L]], .f = function(x) all.equal(x, tsk)))) pout = predict_pipeop(po, list(tsk)) - expect_multiplicity(pout[[1]]) - expect_list(pout[[1]], len = 3) - expect_true(all(map_lgl(pout[[1]], .f = function(x) all.equal(x, tsk)))) + expect_multiplicity(pout[[1L]]) + expect_list(pout[[1L]], len = 3L) + expect_true(all(map_lgl(pout[[1L]], .f = function(x) all.equal(x, tsk)))) }) diff --git a/tests/testthat/test_pipeop_targetmutate.R b/tests/testthat/test_pipeop_targetmutate.R index e707d60b6..ef99bee05 100644 --- a/tests/testthat/test_pipeop_targetmutate.R +++ b/tests/testthat/test_pipeop_targetmutate.R @@ -22,8 +22,10 @@ test_that("PipeOpTargetMutate - basic properties", { address_in = address(task) train_out = g$train(task) expect_null(train_out[[1L]]) - expect_length(g$state[[1L]], 0L) - expect_length(g$state[[3L]], 0L) + expect_length(g$state[[1L]], 2L) + expect_setequal(names(g$state[[1L]]), c("log", "train_time")) + expect_length(g$state[[3L]], 2L) + expect_setequal(names(g$state[[3L]]), c("log", "train_time")) predict_out = g$predict(task) @@ -67,53 +69,53 @@ test_that("PipeOpTargetMutate - log base 2 trafo", { expect_equal(2 ^ learner_predict_out$response, predict_out[[1L]]$response) }) -#'test_that("PipeOpTargetMutate - Regr -> Classif", { -#' g = Graph$new() -#' g$add_pipeop(PipeOpTargetMutate$new("regr_classif", -#' param_vals = list( -#' trafo = function(x) setNames(as.data.table(as.factor(x > 20)), nm = "medv_dich")), -#' new_task_type = "classif" -#' ) -#' ) -#' g$add_pipeop(LearnerClassifRpart$new()) -#' g$add_pipeop(PipeOpTargetInvert$new()) -#' g$add_edge(src_id = "regr_classif", dst_id = "targetinvert", src_channel = 1L, dst_channel = 1L) -#' g$add_edge(src_id = "regr_classif", dst_id = "classif.rpart", src_channel = 2L, dst_channel = 1L) -#' g$add_edge(src_id = "classif.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L) -#' -#' task = mlr_tasks$get("boston_housing") -#' task$col_roles$feature = setdiff(task$col_roles$feature, y = "cmedv") -#' train_out = g$train(task) -#' expect_r6(g$state$classif.rpart$train_task, classes = "TaskClassif") -#' -#' expect_true(g$state$classif.rpart$train_task$target_names == "medv_dich") -#' expect_true("twoclass" %in% g$state$classif.rpart$train_task$properties) -#' expect_true("medv" %nin% g$state$classif.rpart$train_task$feature_names) -#' predict_out = g$predict(task) -#' expect_number(predict_out[[1]]$score(msr("classif.ce")), lower = 0, upper = 1) -#'}) - -#'test_that("PipeOpTargetMutate - Classif -> Regr", { -#' # quite nonsense but lets us test classif to regr -#' g = Graph$new() -#' g$add_pipeop(PipeOpTargetMutate$new("classif_regr", -#' param_vals = list( -#' trafo = function(x) setNames(as.data.table(fifelse(x[[1L]] == levels(x[[1L]])[[1L]], yes = 0, no = 10) + rnorm(150L)), nm = "Species_numeric")), -#' new_task_type = "regr" -#' ) -#' ) -#' g$add_pipeop(LearnerRegrRpart$new()) -#' g$add_pipeop(PipeOpTargetInvert$new()) -#' g$add_edge(src_id = "classif_regr", dst_id = "targetinvert", src_channel = 1L, dst_channel = 1L) -#' g$add_edge(src_id = "classif_regr", dst_id = "regr.rpart", src_channel = 2L, dst_channel = 1L) -#' g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L) -#' -#' task = mlr_tasks$get("iris") -#' train_out = g$train(task) -#' expect_r6(g$state$regr.rpart$train_task, classes = "TaskRegr") -#' -#' expect_true(g$state$regr.rpart$train_task$target_names == "Species_numeric") -#' expect_true("Species" %nin% g$state$regr.rpart$train_task$feature_names) -#' predict_out = g$predict(task) -#' expect_number(predict_out[[1]]$score(msr("regr.mse"))) -#'}) +#test_that("PipeOpTargetMutate - Regr -> Classif", { +# g = Graph$new() +# g$add_pipeop(PipeOpTargetMutate$new("regr_classif", +# param_vals = list( +# trafo = function(x) setNames(as.data.table(as.factor(x > 20)), nm = "medv_dich")), +# new_task_type = "classif" +# ) +# ) +# g$add_pipeop(LearnerClassifRpart$new()) +# g$add_pipeop(PipeOpTargetInvert$new()) +# g$add_edge(src_id = "regr_classif", dst_id = "targetinvert", src_channel = 1L, dst_channel = 1L) +# g$add_edge(src_id = "regr_classif", dst_id = "classif.rpart", src_channel = 2L, dst_channel = 1L) +# g$add_edge(src_id = "classif.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L) +# +# task = mlr_tasks$get("boston_housing") +# task$col_roles$feature = setdiff(task$col_roles$feature, y = "cmedv") +# train_out = g$train(task) +# expect_r6(g$state$classif.rpart$train_task, classes = "TaskClassif") +# +# expect_true(g$state$classif.rpart$train_task$target_names == "medv_dich") +# expect_true("twoclass" %in% g$state$classif.rpart$train_task$properties) +# expect_true("medv" %nin% g$state$classif.rpart$train_task$feature_names) +# predict_out = g$predict(task) +# expect_number(predict_out[[1]]$score(msr("classif.ce")), lower = 0, upper = 1) +#}) + +#test_that("PipeOpTargetMutate - Classif -> Regr", { +# # quite nonsense but lets us test classif to regr +# g = Graph$new() +# g$add_pipeop(PipeOpTargetMutate$new("classif_regr", +# param_vals = list( +# trafo = function(x) setNames(as.data.table(fifelse(x[[1L]] == levels(x[[1L]])[[1L]], yes = 0, no = 10) + rnorm(150L)), nm = "Species_numeric")), +# new_task_type = "regr" +# ) +# ) +# g$add_pipeop(LearnerRegrRpart$new()) +# g$add_pipeop(PipeOpTargetInvert$new()) +# g$add_edge(src_id = "classif_regr", dst_id = "targetinvert", src_channel = 1L, dst_channel = 1L) +# g$add_edge(src_id = "classif_regr", dst_id = "regr.rpart", src_channel = 2L, dst_channel = 1L) +# g$add_edge(src_id = "regr.rpart", dst_id = "targetinvert", src_channel = 1L, dst_channel = 2L) +# +# task = mlr_tasks$get("iris") +# train_out = g$train(task) +# expect_r6(g$state$regr.rpart$train_task, classes = "TaskRegr") +# +# expect_true(g$state$regr.rpart$train_task$target_names == "Species_numeric") +# expect_true("Species" %nin% g$state$regr.rpart$train_task$feature_names) +# predict_out = g$predict(task) +# expect_number(predict_out[[1]]$score(msr("regr.mse"))) +#}) diff --git a/tests/testthat/test_pipeop_targettrafoscalerange.R b/tests/testthat/test_pipeop_targettrafoscalerange.R index 2fb88377c..2b58d705e 100644 --- a/tests/testthat/test_pipeop_targettrafoscalerange.R +++ b/tests/testthat/test_pipeop_targettrafoscalerange.R @@ -15,7 +15,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", { b = 1 / (rng[2L] - rng[1L]) a = -rng[1L] * b expect_equal(train_out1[[2L]]$data(cols = "medv.scaled")[[1L]], a + x * b) - expect_equal(po$state, list(scale = b, offset = a)) + expect_equal(po$state[c("scale", "offset")], list(scale = b, offset = a)) predict_out1 = po$predict(list(task)) @@ -24,7 +24,7 @@ test_that("PipeOpTargetTrafoScaleRange - basic properties", { state = po$state task$row_roles$use = 1:506 predict_out2 = po$predict(list(task)) - expect_equal(state, po$state) + expect_equal(state[c("scale", "offset")], po$state[c("scale", "offset")]) g = Graph$new() g$add_pipeop(po)