diff --git a/NAMESPACE b/NAMESPACE index 12a1e0dc8..86e2ff4eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -280,14 +280,9 @@ import(palmerpenguins) import(paradox) importFrom(R6,R6Class) importFrom(R6,is.R6) -importFrom(data.table,as.data.table) -importFrom(data.table,data.table) importFrom(future,nbrOfWorkers) importFrom(future,plan) importFrom(graphics,plot) -importFrom(mlr3misc,clbk) -importFrom(mlr3misc,clbks) -importFrom(mlr3misc,mlr_callbacks) importFrom(parallelly,availableCores) importFrom(stats,contr.treatment) importFrom(stats,model.frame) diff --git a/NEWS.md b/NEWS.md index 5469abd28..3648aab36 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,14 @@ # mlr3 (development version) -* feat: Add `mirai` support for parallelization and encapsulation. +## New Features: + +* `Task` got method `$materialize_view()` which can save memory after subsetting a task. +* Better input validation for: + * `Learner` fields. +* Various improvements to the documentation and logging output, including + examples for methods. +* Measure "oob_error" now works even without storing models during resampling. +* Added `mirai` support for parallelization and encapsulation. # mlr3 1.1.0 diff --git a/R/Learner.R b/R/Learner.R index 787fd6ec1..79c64a693 100644 --- a/R/Learner.R +++ b/R/Learner.R @@ -176,46 +176,16 @@ Learner = R6Class("Learner", #' This is an internal data structure which may change in the future. state = NULL, - #' @template field_task_type - task_type = NULL, - #' @field feature_types (`character()`)\cr #' Stores the feature types the learner can handle, e.g. `"logical"`, `"numeric"`, or `"factor"`. #' A complete list of candidate feature types, grouped by task type, is stored in [`mlr_reflections$task_feature_types`][mlr_reflections]. feature_types = NULL, - #' @field properties (`character()`)\cr - #' Stores a set of properties/capabilities the learner has. - #' A complete list of candidate properties, grouped by task type, is stored in [`mlr_reflections$learner_properties`][mlr_reflections]. - properties = NULL, + #' @template field_packages packages = NULL, - #' @template field_predict_sets - predict_sets = "test", - - #' @field parallel_predict (`logical(1)`)\cr - #' If set to `TRUE`, use \CRANpkg{future} to calculate predictions in parallel (default: `FALSE`). - #' The row ids of the `task` will be split into [future::nbrOfWorkers()] chunks, - #' and predictions are evaluated according to the active [future::plan()]. - #' This currently only works for methods `Learner$predict()` and `Learner$predict_newdata()`, - #' and has no effect during [resample()] or [benchmark()] where you have other means - #' to parallelize. - #' - #' Note that the recorded time required for prediction reports the time required to predict - #' is not properly defined and depends on the parallelization backend. - parallel_predict = FALSE, - - #' @field timeout (named `numeric(2)`)\cr - #' Timeout for the learner's train and predict steps, in seconds. - #' This works differently for different encapsulation methods, see - #' [mlr3misc::encapsulate()]. - #' Default is `c(train = Inf, predict = Inf)`. - #' Also see the section on error handling the mlr3book: - #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-error-handling} - timeout = c(train = Inf, predict = Inf), - #' @template field_man man = NULL, @@ -228,12 +198,12 @@ Learner = R6Class("Learner", self$id = assert_string(id, min.chars = 1L) self$label = assert_string(label, na.ok = TRUE) - self$task_type = assert_choice(task_type, mlr_reflections$task_types$type) + private$.task_type = assert_choice(task_type, mlr_reflections$task_types$type) self$feature_types = assert_ordered_set(feature_types, mlr_reflections$task_feature_types, .var.name = "feature_types") private$.predict_types = assert_ordered_set(predict_types, names(mlr_reflections$learner_predict_types[[task_type]]), empty.ok = FALSE, .var.name = "predict_types") private$.predict_type = predict_types[1L] - self$properties = sort(assert_subset(properties, mlr_reflections$learner_properties[[task_type]])) + private$.properties = sort(assert_subset(properties, mlr_reflections$learner_properties[[task_type]])) self$packages = union("mlr3", assert_character(packages, any.missing = FALSE, min.chars = 1L)) self$man = assert_string(man, na.ok = TRUE) @@ -490,10 +460,10 @@ Learner = R6Class("Learner", } prevci = task$col_info - task$backend = newdata - task$col_info = col_info(task$backend) - task$col_info[, c("label", "fix_factor_levels")] = prevci[list(task$col_info$id), on = "id", c("label", "fix_factor_levels")] - task$col_info$fix_factor_levels[is.na(task$col_info$fix_factor_levels)] = FALSE + task$.__enclos_env__$private$.backend = newdata + task$.__enclos_env__$private$.col_info = col_info(task$backend) + task$.__enclos_env__$private$.col_info[, c("label", "fix_factor_levels")] = prevci[list(task$col_info$id), on = "id", c("label", "fix_factor_levels")] + task$.__enclos_env__$private$.col_info$fix_factor_levels[is.na(task$.__enclos_env__$private$.col_info$fix_factor_levels)] = FALSE task$row_roles$use = task$backend$rownames task_col_roles = task$col_roles update_col_roles = FALSE @@ -680,6 +650,67 @@ Learner = R6Class("Learner", ), active = list( + #' @template field_task_type + task_type = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_type will soon be read-only.") + private$.properties = rhs + } + private$.task_type + }, + + #' @field properties (`character()`)\cr + #' Stores a set of properties/capabilities the learner has. + #' A complete list of candidate properties, grouped by task type, is stored in [`mlr_reflections$learner_properties`][mlr_reflections]. + properties = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("properties will soon be read-only.") + private$.properties = rhs + } + private$.properties + }, + + #' @template field_predict_sets + predict_sets = function(rhs) { + if (missing(rhs)) { + return(private$.predict_sets) + } + assert_subset(rhs, mlr_reflections$predict_sets) + private$.predict_sets = rhs + }, + + #' @field parallel_predict (`logical(1)`)\cr + #' If set to `TRUE`, use \CRANpkg{future} to calculate predictions in parallel (default: `FALSE`). + #' The row ids of the `task` will be split into [future::nbrOfWorkers()] chunks, + #' and predictions are evaluated according to the active [future::plan()]. + #' This currently only works for methods `Learner$predict()` and `Learner$predict_newdata()`, + #' and has no effect during [resample()] or [benchmark()] where you have other means + #' to parallelize. + #' + #' Note that the recorded time required for prediction reports the time required to predict + #' is not properly defined and depends on the parallelization backend. + parallel_predict = function(rhs) { + if (missing(rhs)) { + return(private$.parallel_predict) + } + private$.parallel_predict = assert_flag(rhs) + }, + + #' @field timeout (named `numeric(2)`)\cr + #' Timeout for the learner's train and predict steps, in seconds. + #' This works differently for different encapsulation methods, see + #' [mlr3misc::encapsulate()]. + #' Default is `c(train = Inf, predict = Inf)`. + #' Also see the section on error handling the mlr3book: + #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-error-handling} + timeout = function(rhs) { + if (missing(rhs)) { + return(private$.timeout) + } + assert_permutation(names(rhs), c("train", "predict")) + private$.timeout = assert_numeric(rhs, lower = 0, any.missing = FALSE, len = 2L) + }, + #' @field use_weights (`character(1)`)\cr #' How weights should be handled. #' Settings are `"use"` `"ignore"`, and `"error"`. @@ -840,6 +871,11 @@ Learner = R6Class("Learner", ), private = list( + .predict_sets = "test", + .task_type = NULL, + .properties = NULL, + .parallel_predict = FALSE, + .timeout = c(train = Inf, predict = Inf), .use_weights = NULL, .encapsulation = c(train = "none", predict = "none"), .fallback = NULL, diff --git a/R/Resampling.R b/R/Resampling.R index 29867ed06..994b817a4 100644 --- a/R/Resampling.R +++ b/R/Resampling.R @@ -106,24 +106,7 @@ Resampling = R6Class("Resampling", #' `$train_set()` and `$test_set()`. instance = NULL, - #' @field task_hash (`character(1)`)\cr - #' The hash of the [Task] which was passed to `r$instantiate()`. - task_hash = NA_character_, - - #' @field task_row_hash (`character(1)`)\cr - #' The hash of the row ids of the [Task] which was passed to `r$instantiate()`. - task_row_hash = NA_character_, - - #' @field task_nrow (`integer(1)`)\cr - #' The number of observations of the [Task] which was passed to `r$instantiate()`. - #' - task_nrow = NA_integer_, - #' @field duplicated_ids (`logical(1)`)\cr - #' If `TRUE`, duplicated rows can occur within a single training set or within a single test set. - #' E.g., this is `TRUE` for Bootstrap, and `FALSE` for cross-validation. - #' Only used internally. - duplicated_ids = NULL, #' @template field_man man = NULL, @@ -139,7 +122,7 @@ Resampling = R6Class("Resampling", private$.id = assert_string(id, min.chars = 1L) self$label = assert_string(label, na.ok = TRUE) self$param_set = assert_param_set(param_set) - self$duplicated_ids = assert_flag(duplicated_ids) + private$.duplicated_ids = assert_flag(duplicated_ids) self$man = assert_string(man, na.ok = TRUE) }, @@ -188,9 +171,9 @@ Resampling = R6Class("Resampling", task = assert_task(as_task(task)) private$.hash = NULL self$instance = private$.get_instance(task) - self$task_hash = task$hash - self$task_row_hash = task$row_hash - self$task_nrow = task$nrow + private$.task_hash = task$hash + private$.task_row_hash = task$row_hash + private$.task_nrow = task$nrow invisible(self) }, @@ -256,6 +239,48 @@ Resampling = R6Class("Resampling", } private$.hash + }, + + #' @field task_hash (`character(1)`)\cr + #' The hash of the [Task] which was passed to `r$instantiate()`. + task_hash = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_hash will soon be read-only.") + private$.task_hash = rhs + } + private$.task_hash + }, + + #' @field task_row_hash (`character(1)`)\cr + #' The hash of the row ids of the [Task] which was passed to `r$instantiate()`. + task_row_hash = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_row_hash will soon be read-only.") + private$.task_row_hash = rhs + } + private$.task_row_hash + }, + + #' @field task_nrow (`integer(1)`)\cr + #' The number of observations of the [Task] which was passed to `r$instantiate()`. + task_nrow = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_nrow will soon be read-only.") + private$.task_nrow = rhs + } + private$.task_nrow + }, + + #' @field duplicated_ids (`logical(1)`)\cr + #' If `TRUE`, duplicated rows can occur within a single training set or within a single test set. + #' E.g., this is `TRUE` for Bootstrap, and `FALSE` for cross-validation. + #' Only used internally. + duplicated_ids = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("duplicated_ids will soon be read-only.") + private$.duplicated_ids = rhs + } + private$.duplicated_ids } ), @@ -264,6 +289,10 @@ Resampling = R6Class("Resampling", .id = NULL, .hash = NULL, .groups = NULL, + .task_hash = NA_character_, + .task_row_hash = NA_character_, + .task_nrow = NA_integer_, + .duplicated_ids = NULL, .get_instance = function(task) { strata = task$strata diff --git a/R/ResamplingCustom.R b/R/ResamplingCustom.R index 73265425a..f0f428369 100644 --- a/R/ResamplingCustom.R +++ b/R/ResamplingCustom.R @@ -53,9 +53,9 @@ ResamplingCustom = R6Class("ResamplingCustom", inherit = Resampling, assert_subset(unlist(train_sets, use.names = FALSE), task$row_ids) assert_subset(unlist(test_sets, use.names = FALSE), task$row_ids) self$instance = list(train = train_sets, test = test_sets) - self$task_hash = task$hash - self$task_nrow = task$nrow - self$task_row_hash = task$row_hash + private$.task_hash = task$hash + private$.task_nrow = task$nrow + private$.task_row_hash = task$row_hash invisible(self) } ), diff --git a/R/ResamplingCustomCV.R b/R/ResamplingCustomCV.R index f5f6ca286..674b68063 100644 --- a/R/ResamplingCustomCV.R +++ b/R/ResamplingCustomCV.R @@ -74,9 +74,9 @@ ResamplingCustomCV = R6Class("ResamplingCustomCV", inherit = Resampling, } self$instance = split(task$row_ids, f, drop = TRUE) - self$task_hash = task$hash - self$task_nrow = task$nrow - self$task_row_hash = task$row_hash + private$.task_hash = task$hash + private$.task_nrow = task$nrow + private$.task_row_hash = task$row_hash invisible(self) } ), diff --git a/R/Task.R b/R/Task.R index 96312d03f..4b0731991 100644 --- a/R/Task.R +++ b/R/Task.R @@ -82,24 +82,6 @@ Task = R6Class("Task", #' @template field_task_type task_type = NULL, - #' @field backend ([DataBackend])\cr - #' Abstract interface to the data of the task. - backend = NULL, - - #' @field col_info ([data.table::data.table()])\cr - #' Table with with 4 columns, mainly for internal purposes: - #' - `"id"` (`character()`) stores the name of the column. - #' - `"type"` (`character()`) holds the storage type of the variable, e.g. `integer`, `numeric` or `character`. - #' See [mlr_reflections$task_feature_types][mlr_reflections] for a complete list of allowed types. - #' - `"levels"` (`list()`) stores a vector of distinct values (levels) for ordered and unordered factor variables. - #' - `"label"` (`character()`) stores a vector of prettier, formated column names. - #' - `"fix_factor_levels"` (`logical()`) stores flags which determine if the levels of the respective variable - #' need to be reordered after querying the data from the [DataBackend]. - #' - #' Note that all columns of the [DataBackend], also columns which are not selected or have any role, are listed - #' in this table. - col_info = NULL, - #' @template field_man man = NA_character_, @@ -108,10 +90,6 @@ Task = R6Class("Task", #' Required for [convert_task()]. extra_args = NULL, - #' @field mlr3_version (`package_version`)\cr - #' Package version of `mlr3` used to create the task. - mlr3_version = NULL, - #' @description #' Creates a new instance of this [R6][R6::R6Class] class. #' @@ -120,10 +98,10 @@ Task = R6Class("Task", private$.id = assert_string(id, min.chars = 1L) self$label = assert_string(label, na.ok = TRUE) self$task_type = assert_choice(task_type, mlr_reflections$task_types$type) - if (!inherits(backend, "DataBackend")) { - self$backend = as_data_backend(backend) + private$.backend = if (!inherits(backend, "DataBackend")) { + as_data_backend(backend) } else { - self$backend = assert_backend(backend) + assert_backend(backend) } cn = self$backend$colnames @@ -134,9 +112,9 @@ Task = R6Class("Task", stopf("Column names may not contain special character '%%'") } - self$col_info = col_info(self$backend) - self$col_info$label = NA_character_ - self$col_info$fix_factor_levels = FALSE + private$.col_info = col_info(private$.backend) + private$.col_info$label = NA_character_ + private$.col_info$fix_factor_levels = FALSE assert_subset(self$col_info$type, mlr_reflections$task_feature_types, .var.name = "feature types") pmap(self$col_info, @@ -151,7 +129,7 @@ Task = R6Class("Task", private$.col_roles = named_list(mlr_reflections$task_col_roles[[task_type]], character()) private$.col_roles$feature = setdiff(cn, self$backend$primary_key) self$extra_args = assert_list(extra_args, names = "unique") - self$mlr3_version = mlr_reflections$package_version + private$.mlr3_version = mlr_reflections$package_version }, #' @description @@ -583,8 +561,8 @@ Task = R6Class("Task", # everything looks good, modify task private$.hash = NULL - self$backend = DataBackendRbind$new(self$backend, data) - self$col_info = tab[] + private$.backend = DataBackendRbind$new(self$backend, data) + private$.col_info = tab[] private$.row_roles$use = c(private$.row_roles$use, data$rownames) invisible(self) @@ -631,10 +609,10 @@ Task = R6Class("Task", # update col_info for existing columns ci = col_info(data) - self$col_info = ujoin(self$col_info, ci, key = "id") + private$.col_info = ujoin(private$.col_info, ci, key = "id") # add rows to col_info for new columns - self$col_info = rbindlist(list( + private$.col_info = rbindlist(list( self$col_info, insert_named(ci[!list(self$col_info), on = "id"], list(label = NA_character_, fix_factor_levels = FALSE)) ), use.names = TRUE) @@ -647,7 +625,7 @@ Task = R6Class("Task", private$.col_roles$feature = union(col_roles$feature, setdiff(data$colnames, c(pk, col_roles$target))) # update backend - self$backend = DataBackendCbind$new(self$backend, data) + private$.backend = DataBackendCbind$new(self$backend, data) invisible(self) }, @@ -677,8 +655,8 @@ Task = R6Class("Task", assert_has_backend(self) private$.hash = NULL private$.col_hashes = NULL - self$backend = DataBackendRename$new(self$backend, old, new) - setkeyv(self$col_info[old, ("id") := new, on = "id"], "id") + private$.backend = DataBackendRename$new(self$backend, old, new) + setkeyv(private$.col_info[old, ("id") := new, on = "id"], "id") private$.col_roles = map(private$.col_roles, map_values, old = old, new = new) invisible(self) }, @@ -787,7 +765,7 @@ Task = R6Class("Task", tab$fix_factor_levels = TRUE private$.hash = NULL - self$col_info = ujoin(self$col_info, tab, key = "id") + private$.col_info = ujoin(self$col_info, tab, key = "id") invisible(self) }, @@ -816,7 +794,7 @@ Task = R6Class("Task", tab[, c("levels", "fix_factor_levels") := list(Map(intersect, levels, new_levels), TRUE)] private$.hash = NULL - self$col_info = ujoin(self$col_info, remove_named(tab, "new_levels"), key = "id") + private$.col_info = ujoin(self$col_info, remove_named(tab, "new_levels"), key = "id") invisible(self) }, @@ -879,8 +857,8 @@ Task = R6Class("Task", b = self$backend ..cns = union(b$primary_key, unlist(private$.col_roles, use.names = FALSE)) dt = b$data(rows = unique(self$row_ids), cols = ..cns) - self$backend = as_data_backend(dt, primary_key = b$primary_key) - self$col_info = setkeyv(self$col_info[list(..cns), on = "id"], "id") + private$.backend = as_data_backend(dt, primary_key = b$primary_key) + private$.col_info = setkeyv(self$col_info[list(..cns), on = "id"], "id") if (internal_valid_task && !is.null(private$.internal_valid_task)) { private$.internal_valid_task$materialize_view(FALSE) @@ -900,6 +878,46 @@ Task = R6Class("Task", private$.id = assert_string(rhs, min.chars = 1L) }, + #' @field backend ([DataBackend])\cr + #' Abstract interface to the data of the task. + backend = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("backend will soon be read-only.") + private$.backend = rhs + } + private$.backend + }, + + #' @field col_info ([data.table::data.table()])\cr + #' Table with with 4 columns, mainly for internal purposes: + #' - `"id"` (`character()`) stores the name of the column. + #' - `"type"` (`character()`) holds the storage type of the variable, e.g. `integer`, `numeric` or `character`. + #' See [mlr_reflections$task_feature_types][mlr_reflections] for a complete list of allowed types. + #' - `"levels"` (`list()`) stores a vector of distinct values (levels) for ordered and unordered factor variables. + #' - `"label"` (`character()`) stores a vector of prettier, formated column names. + #' - `"fix_factor_levels"` (`logical()`) stores flags which determine if the levels of the respective variable + #' need to be reordered after querying the data from the [DataBackend]. + #' + #' Note that all columns of the [DataBackend], also columns which are not selected or have any role, are listed + #' in this table. + col_info = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("col_info will soon be read-only.") + private$.col_info = rhs + } + private$.col_info + }, + + #' @field mlr3_version (`package_version`)\cr + #' Package version of `mlr3` used to create the task. + mlr3_version = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("mlr3_version will soon be read-only.") + private$.mlr3_version = rhs + } + private$.mlr3_version + }, + #' @field internal_valid_task (`Task` or `integer()` or `NULL`)\cr #' Optional validation task that can, e.g., be used for early stopping with learners such as XGBoost. #' See also the `$validate` field of [`Learner`]. @@ -1342,10 +1360,13 @@ Task = R6Class("Task", .col_hashes = NULL, .characteristics = NULL, .row_hash = NULL, + .backend = NULL, + .col_info = NULL, + .mlr3_version = NULL, deep_clone = function(name, value) { # NB: DataBackends are never copied! - if (name == "col_info") { + if (name == ".col_info") { copy(value) } else if (name == ".internal_valid_task" && !is.null(value)) { value$clone(deep = TRUE) @@ -1594,10 +1615,10 @@ task_rm_backend = function(task) { ee = get_private(task) ee$.hash = force(task$hash) ee$.col_hashes = force(task$col_hashes) - ee$.internal_valid_task$backend = NULL + ee$.internal_valid_task$.__enclos_env__$private$.backend = NULL # NULL backend - task$backend = NULL + ee$.backend = NULL task } diff --git a/man/Learner.Rd b/man/Learner.Rd index 06630cdf7..83c39828d 100644 --- a/man/Learner.Rd +++ b/man/Learner.Rd @@ -268,24 +268,33 @@ Contains all information gathered during \code{train()} and \code{predict()}. It is not recommended to access elements from \code{state} directly. This is an internal data structure which may change in the future.} +\item{\code{feature_types}}{(\code{character()})\cr +Stores the feature types the learner can handle, e.g. \code{"logical"}, \code{"numeric"}, or \code{"factor"}. +A complete list of candidate feature types, grouped by task type, is stored in \code{\link[=mlr_reflections]{mlr_reflections$task_feature_types}}.} + +\item{\code{packages}}{(\code{character(1)})\cr +Set of required packages. +These packages are loaded, but not attached.} + +\item{\code{man}}{(\code{character(1)})\cr +String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. +Defaults to \code{NA}, but can be set by child classes.} +} +\if{html}{\out{}} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ \item{\code{task_type}}{(\code{character(1)})\cr Task type, e.g. \code{"classif"} or \code{"regr"}. For a complete list of possible task types (depending on the loaded packages), see \code{\link[=mlr_reflections]{mlr_reflections$task_types$type}}.} -\item{\code{feature_types}}{(\code{character()})\cr -Stores the feature types the learner can handle, e.g. \code{"logical"}, \code{"numeric"}, or \code{"factor"}. -A complete list of candidate feature types, grouped by task type, is stored in \code{\link[=mlr_reflections]{mlr_reflections$task_feature_types}}.} - \item{\code{properties}}{(\code{character()})\cr Stores a set of properties/capabilities the learner has. A complete list of candidate properties, grouped by task type, is stored in \code{\link[=mlr_reflections]{mlr_reflections$learner_properties}}.} -\item{\code{packages}}{(\code{character(1)})\cr -Set of required packages. -These packages are loaded, but not attached.} - \item{\code{predict_sets}}{(\code{character()})\cr During \code{\link[=resample]{resample()}}/\code{\link[=benchmark]{benchmark()}}, a \link{Learner} can predict on multiple sets. Per default, a learner only predicts observations in the test set (\code{predict_sets == "test"}). @@ -316,15 +325,6 @@ Default is \code{c(train = Inf, predict = Inf)}. Also see the section on error handling the mlr3book: \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-error-handling}} -\item{\code{man}}{(\code{character(1)})\cr -String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. -Defaults to \code{NA}, but can be set by child classes.} -} -\if{html}{\out{
}} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ \item{\code{use_weights}}{(\code{character(1)})\cr How weights should be handled. Settings are \code{"use"} \code{"ignore"}, and \code{"error"}. diff --git a/man/Task.Rd b/man/Task.Rd index 6a2eee4c2..5100313df 100644 --- a/man/Task.Rd +++ b/man/Task.Rd @@ -243,6 +243,23 @@ Task type, e.g. \code{"classif"} or \code{"regr"}. For a complete list of possible task types (depending on the loaded packages), see \code{\link[=mlr_reflections]{mlr_reflections$task_types$type}}.} +\item{\code{man}}{(\code{character(1)})\cr +String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. +Defaults to \code{NA}, but can be set by child classes.} + +\item{\code{extra_args}}{(named \code{list()})\cr +Additional arguments set during construction. +Required for \code{\link[=convert_task]{convert_task()}}.} +} +\if{html}{\out{
}} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)})\cr +Identifier of the object. +Used in tables, plot and text output.} + \item{\code{backend}}{(\link{DataBackend})\cr Abstract interface to the data of the task.} @@ -261,25 +278,8 @@ need to be reordered after querying the data from the \link{DataBackend}. Note that all columns of the \link{DataBackend}, also columns which are not selected or have any role, are listed in this table.} -\item{\code{man}}{(\code{character(1)})\cr -String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. -Defaults to \code{NA}, but can be set by child classes.} - -\item{\code{extra_args}}{(named \code{list()})\cr -Additional arguments set during construction. -Required for \code{\link[=convert_task]{convert_task()}}.} - \item{\code{mlr3_version}}{(\code{package_version})\cr Package version of \code{mlr3} used to create the task.} -} -\if{html}{\out{
}} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ -\item{\code{id}}{(\code{character(1)})\cr -Identifier of the object. -Used in tables, plot and text output.} \item{\code{internal_valid_task}}{(\code{Task} or \code{integer()} or \code{NULL})\cr Optional validation task that can, e.g., be used for early stopping with learners such as XGBoost. diff --git a/tests/testthat/test_HotstartStack.R b/tests/testthat/test_HotstartStack.R index a1e1dc3c3..e74ba2310 100644 --- a/tests/testthat/test_HotstartStack.R +++ b/tests/testthat/test_HotstartStack.R @@ -134,7 +134,7 @@ test_that("HotstartStack works with backward target learner and decreased hotsta learner_1$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), 0) @@ -151,7 +151,7 @@ test_that("HotstartStack works with backward target learner when cost of hotstar learner_2$train(task) learner = lrn("classif.debug", iter = 3) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(0, 0)) @@ -166,7 +166,7 @@ test_that("HotstartStack works when hotstart values of hotstart learners are low learner_2$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(0, NA_real_)) @@ -181,7 +181,7 @@ test_that("HotstartStack works when backward hotstart and target learner are equ learner_1$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), -1) @@ -197,7 +197,7 @@ test_that("HotstartStack works with backward target learner when hotstart values learner_1$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), NA_real_) @@ -218,7 +218,7 @@ test_that("HotstartStack works with backward target learner when hotstart learne learner_4$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1, learner_2, learner_3, learner_4)) expect_equal(hot$start_cost(learner, task$hash), c(NA_real_, -1, 0, NA_real_)) @@ -233,7 +233,7 @@ test_that("HotstartStack works with forward/backward target learner and increase learner_2$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(1, 0)) @@ -250,7 +250,7 @@ test_that("HotstartStack works with forward/backward target learner when cost of learner_2$train(task) learner = lrn("classif.debug", iter = 3) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(1, 1)) @@ -265,7 +265,7 @@ test_that("HotstartStack works when hotstart values of hotstart learners are low learner_2$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(1, 0)) @@ -280,7 +280,7 @@ test_that("HotstartStack works when forward/backward hotstart and target learner learner_1$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), -1) @@ -297,7 +297,7 @@ test_that("HotstartStack works with forward/backward target learner when hotstar learner_2$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(0, 0)) @@ -310,7 +310,7 @@ test_that("HotstartStack works with forward/backward target learner when hotstar learner_1$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), 1) @@ -331,7 +331,7 @@ test_that("HotstartStack works with forward/backward target learner when hotstar learner_4$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2, learner_3, learner_4)) expect_equal(hot$start_cost(learner, task$hash), c(1, -1, 0, NA_real_)) diff --git a/tests/testthat/test_Learner.R b/tests/testthat/test_Learner.R index d8b6093b9..5487f3f9d 100644 --- a/tests/testthat/test_Learner.R +++ b/tests/testthat/test_Learner.R @@ -134,7 +134,7 @@ test_that("predict on newdata works / no target column", { xdt = data.table(x = 1, y = 1) task = as_task_regr(xdt, target = "y") learner = lrn("regr.featureless") - learner$properties = setdiff(learner$properties, "missings") + get_private(learner, ".properties") = setdiff(learner$properties, "missings") learner$train(task) learner$predict_newdata(xdt[, 1]) }) @@ -303,7 +303,7 @@ test_that("weights", { test_that("mandatory properties", { task = tsk("iris") learner = lrn("classif.rpart") - learner$properties = setdiff(learner$properties, "multiclass") + get_private(learner, ".properties") = setdiff(learner$properties, "multiclass") resample = rsmp("holdout") expect_error(learner$train(task), "multiclass") @@ -323,7 +323,7 @@ test_that("train task is cloned (#382)", { test_that("Error on missing data (#413)", { task = tsk("pima") learner = lrn("classif.rpart") - learner$properties = setdiff(learner$properties, "missings") + get_private(learner, ".properties") = setdiff(learner$properties, "missings") expect_error(learner$train(task), "missing values") }) @@ -388,7 +388,7 @@ test_that("properties are also checked on validation task", { task$rbind(row) task$internal_valid_task = 151 learner = lrn("classif.debug", validate = "predefined") - learner$properties = setdiff(learner$properties, "missings") + learner$.__enclos_env__$private$.properties = setdiff(learner$properties, "missings") expect_error(learner$train(task), "missing values") }) @@ -729,8 +729,8 @@ test_that("predict_newdata creates column info correctly", { learner = lrn("classif.debug", save_tasks = TRUE) task = tsk("iris") - task$col_info$label = letters[1:6] - task$col_info$fix_factor_levels = c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE) + task$.__enclos_env__$private$.col_info$label = letters[1:6] + task$.__enclos_env__$private$.col_info$fix_factor_levels = c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE) learner$train(task) ## data.frame is passed without task @@ -872,3 +872,26 @@ test_that("oob_error is available without storing models via $.extract_oob_error expect_equal(rr$aggregate(msr("oob_error")), c(oob_error = 0.123)) }) + +#test_that("field validation", { +# l = lrn("classif.debug") +# expect_error({l$timeout = c(train = 1)}, "permutation") +# expect_error({l$timeout = c(train = 1, predict = -1)}, ">= 0") +# expect_error({l$timeout = c(a = 1, b = 0)}, "permutation") +# l$timeout = c(train = 1, predict = 0) +# expect_equal(l$timeout, c(train = 1, predict = 0)) +# +# expect_error({l$parallel_predict = ""}, "flag") +# l$parallel_predict = FALSE +# expect_false(l$parallel_predict) +# l$parallel_predict = TRUE +# expect_true(l$parallel_predict) +# +# expect_error({l$task_type = "regr"}, "read-only") +# +# expect_error({l$predict_sets = "abc"}, "but has additional elements") +# l$predict_sets = "train" +# expect_equal(l$predict_sets, "train") +# l$predict_sets = c("train", "test", "internal_valid") +# expect_equal(l$predict_sets, c("train", "test", "internal_valid")) +#}) diff --git a/tests/testthat/test_Resampling.R b/tests/testthat/test_Resampling.R index db7f67d7b..4898cf6e9 100644 --- a/tests/testthat/test_Resampling.R +++ b/tests/testthat/test_Resampling.R @@ -158,3 +158,12 @@ test_that("task_row_hash in Resampling works correctly", { resampling$instantiate(task) expect_identical(resampling$task_row_hash, task$row_hash) }) + +# Uncomment this once we make the fields read-only +#test_that("fields are read-only", { +# r = rsmp("cv", folds = 2) +# expect_error({r$task_row_hash = "foo"}, "read-only") +# expect_error({r$task_nrow = 10}, "read-only") +# expect_error({r$task_hash = "foo"}, "read-only") +# expect_error({r$duplicated_ids = TRUE}, "read-only") +#}) diff --git a/tests/testthat/test_Task.R b/tests/testthat/test_Task.R index 56aef9804..39c8f424a 100644 --- a/tests/testthat/test_Task.R +++ b/tests/testthat/test_Task.R @@ -1020,3 +1020,11 @@ test_that("materialize_view works with duplicates", { task2$materialize_view() expect_equal(task$data(), task2$data()) }) + +# Uncomment this once we make the fields read-only +#test_that("task fields are read-only", { +# task = tsk("iris") +# expect_error({task$col_info = "foo"}, "read-only") +# expect_error({task$backend = tsk("iris")$backend}, "read-only") +# expect_error({task$mlr3_version = "1.0.0"}, "read-only") +#}) diff --git a/tests/testthat/test_benchmark.R b/tests/testthat/test_benchmark.R index 87d0362bf..0075ec879 100644 --- a/tests/testthat/test_benchmark.R +++ b/tests/testthat/test_benchmark.R @@ -572,7 +572,7 @@ test_that("properties are also checked on validation task", { task$rbind(row) task$internal_valid_task = 151 learner = lrn("classif.debug", validate = "predefined") - learner$properties = setdiff(learner$properties, "missings") + learner$.__enclos_env__$private$.properties = setdiff(learner$properties, "missings") suppressWarnings(expect_error(benchmark(benchmark_grid(task, learner, rsmp("holdout"))), "missing values")) }) diff --git a/tests/testthat/test_resample.R b/tests/testthat/test_resample.R index c23d0f541..0006ecce7 100644 --- a/tests/testthat/test_resample.R +++ b/tests/testthat/test_resample.R @@ -349,7 +349,7 @@ test_that("properties are also checked on validation task", { task$rbind(row) task$internal_valid_task = 151 learner = lrn("classif.debug", validate = "predefined") - learner$properties = setdiff(learner$properties, "missings") + learner$.__enclos_env__$private$.properties = setdiff(learner$properties, "missings") suppressWarnings(expect_error(resample(task, learner, rsmp("holdout")), "missing values")) })