From 36bd8900be252de06b408d22155d3eb62fbc0a5b Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 13 Aug 2025 19:15:02 +0200 Subject: [PATCH 1/2] deprecate assignment to some fields --- NAMESPACE | 5 -- NEWS.md | 14 ++++ R/Learner.R | 110 ++++++++++++++++++---------- R/Resampling.R | 71 ++++++++++++------ R/ResamplingCustom.R | 6 +- R/ResamplingCustomCV.R | 6 +- R/Task.R | 107 ++++++++++++++++----------- man/Learner.Rd | 34 ++++----- man/Task.Rd | 34 ++++----- tests/testthat/test_HotstartStack.R | 26 +++---- tests/testthat/test_Learner.R | 35 +++++++-- tests/testthat/test_Resampling.R | 9 +++ tests/testthat/test_Task.R | 8 ++ tests/testthat/test_benchmark.R | 2 +- tests/testthat/test_resample.R | 2 +- 15 files changed, 302 insertions(+), 167 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8ced66c05..0d1ccfdd4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -284,14 +284,9 @@ import(palmerpenguins) import(paradox) importFrom(R6,R6Class) importFrom(R6,is.R6) -importFrom(data.table,as.data.table) -importFrom(data.table,data.table) importFrom(future,nbrOfWorkers) importFrom(future,plan) importFrom(graphics,plot) -importFrom(mlr3misc,clbk) -importFrom(mlr3misc,clbks) -importFrom(mlr3misc,mlr_callbacks) importFrom(parallelly,availableCores) importFrom(stats,contr.treatment) importFrom(stats,model.frame) diff --git a/NEWS.md b/NEWS.md index 49a258565..06df8252e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,19 @@ # mlr3 (development version) +## New Features: + +* `Task` got method `$materialize_view()` which can save memory after subsetting a task. +* Better input validation for: + * `Learner` fields. +* Various improvements to the documentation and logging output, including + examples for methods. +* Measure "oob_error" now works even without storing models during resampling. + +## Deprecations: + +* Assigning to some fields of `Task`, `Learner`, and `Resampling` now throws a deprecation warning. + This will become an error in the future. + # mlr3 1.1.0 * feat: Add new measure `MeasureRegrRQR` for quantile regression. diff --git a/R/Learner.R b/R/Learner.R index 9b32fd6b6..a380dd0f1 100644 --- a/R/Learner.R +++ b/R/Learner.R @@ -177,46 +177,16 @@ Learner = R6Class("Learner", #' This is an internal data structure which may change in the future. state = NULL, - #' @template field_task_type - task_type = NULL, - #' @field feature_types (`character()`)\cr #' Stores the feature types the learner can handle, e.g. `"logical"`, `"numeric"`, or `"factor"`. #' A complete list of candidate feature types, grouped by task type, is stored in [`mlr_reflections$task_feature_types`][mlr_reflections]. feature_types = NULL, - #' @field properties (`character()`)\cr - #' Stores a set of properties/capabilities the learner has. - #' A complete list of candidate properties, grouped by task type, is stored in [`mlr_reflections$learner_properties`][mlr_reflections]. - properties = NULL, + #' @template field_packages packages = NULL, - #' @template field_predict_sets - predict_sets = "test", - - #' @field parallel_predict (`logical(1)`)\cr - #' If set to `TRUE`, use \CRANpkg{future} to calculate predictions in parallel (default: `FALSE`). - #' The row ids of the `task` will be split into [future::nbrOfWorkers()] chunks, - #' and predictions are evaluated according to the active [future::plan()]. - #' This currently only works for methods `Learner$predict()` and `Learner$predict_newdata()`, - #' and has no effect during [resample()] or [benchmark()] where you have other means - #' to parallelize. - #' - #' Note that the recorded time required for prediction reports the time required to predict - #' is not properly defined and depends on the parallelization backend. - parallel_predict = FALSE, - - #' @field timeout (named `numeric(2)`)\cr - #' Timeout for the learner's train and predict steps, in seconds. - #' This works differently for different encapsulation methods, see - #' [mlr3misc::encapsulate()]. - #' Default is `c(train = Inf, predict = Inf)`. - #' Also see the section on error handling the mlr3book: - #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-error-handling} - timeout = c(train = Inf, predict = Inf), - #' @template field_man man = NULL, @@ -229,12 +199,12 @@ Learner = R6Class("Learner", self$id = assert_string(id, min.chars = 1L) self$label = assert_string(label, na.ok = TRUE) - self$task_type = assert_choice(task_type, mlr_reflections$task_types$type) + private$.task_type = assert_choice(task_type, mlr_reflections$task_types$type) self$feature_types = assert_ordered_set(feature_types, mlr_reflections$task_feature_types, .var.name = "feature_types") private$.predict_types = assert_ordered_set(predict_types, names(mlr_reflections$learner_predict_types[[task_type]]), empty.ok = FALSE, .var.name = "predict_types") private$.predict_type = predict_types[1L] - self$properties = sort(assert_subset(properties, mlr_reflections$learner_properties[[task_type]])) + private$.properties = sort(assert_subset(properties, mlr_reflections$learner_properties[[task_type]])) if (!missing(data_formats)) warn_deprecated("Learner$initialize argument 'data_formats'") self$packages = union("mlr3", assert_character(packages, any.missing = FALSE, min.chars = 1L)) self$man = assert_string(man, na.ok = TRUE) @@ -492,10 +462,10 @@ Learner = R6Class("Learner", } prevci = task$col_info - task$backend = newdata - task$col_info = col_info(task$backend) - task$col_info[, c("label", "fix_factor_levels")] = prevci[list(task$col_info$id), on = "id", c("label", "fix_factor_levels")] - task$col_info$fix_factor_levels[is.na(task$col_info$fix_factor_levels)] = FALSE + task$.__enclos_env__$private$.backend = newdata + task$.__enclos_env__$private$.col_info = col_info(task$backend) + task$.__enclos_env__$private$.col_info[, c("label", "fix_factor_levels")] = prevci[list(task$col_info$id), on = "id", c("label", "fix_factor_levels")] + task$.__enclos_env__$private$.col_info$fix_factor_levels[is.na(task$.__enclos_env__$private$.col_info$fix_factor_levels)] = FALSE task$row_roles$use = task$backend$rownames task_col_roles = task$col_roles update_col_roles = FALSE @@ -676,6 +646,67 @@ Learner = R6Class("Learner", ), active = list( + #' @template field_task_type + task_type = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_type will soon be read-only.") + private$.properties = rhs + } + private$.task_type + }, + + #' @field properties (`character()`)\cr + #' Stores a set of properties/capabilities the learner has. + #' A complete list of candidate properties, grouped by task type, is stored in [`mlr_reflections$learner_properties`][mlr_reflections]. + properties = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("properties will soon be read-only.") + private$.properties = rhs + } + private$.properties + }, + + #' @template field_predict_sets + predict_sets = function(rhs) { + if (missing(rhs)) { + return(private$.predict_sets) + } + assert_subset(rhs, mlr_reflections$predict_sets) + private$.predict_sets = rhs + }, + + #' @field parallel_predict (`logical(1)`)\cr + #' If set to `TRUE`, use \CRANpkg{future} to calculate predictions in parallel (default: `FALSE`). + #' The row ids of the `task` will be split into [future::nbrOfWorkers()] chunks, + #' and predictions are evaluated according to the active [future::plan()]. + #' This currently only works for methods `Learner$predict()` and `Learner$predict_newdata()`, + #' and has no effect during [resample()] or [benchmark()] where you have other means + #' to parallelize. + #' + #' Note that the recorded time required for prediction reports the time required to predict + #' is not properly defined and depends on the parallelization backend. + parallel_predict = function(rhs) { + if (missing(rhs)) { + return(private$.parallel_predict) + } + private$.parallel_predict = assert_flag(rhs) + }, + + #' @field timeout (named `numeric(2)`)\cr + #' Timeout for the learner's train and predict steps, in seconds. + #' This works differently for different encapsulation methods, see + #' [mlr3misc::encapsulate()]. + #' Default is `c(train = Inf, predict = Inf)`. + #' Also see the section on error handling the mlr3book: + #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-error-handling} + timeout = function(rhs) { + if (missing(rhs)) { + return(private$.timeout) + } + assert_permutation(names(rhs), c("train", "predict")) + private$.timeout = assert_numeric(rhs, lower = 0, any.missing = FALSE, len = 2L) + }, + #' @field use_weights (`character(1)`)\cr #' How weights should be handled. #' Settings are `"use"` `"ignore"`, and `"error"`. @@ -841,6 +872,11 @@ Learner = R6Class("Learner", ), private = list( + .predict_sets = "test", + .task_type = NULL, + .properties = NULL, + .parallel_predict = FALSE, + .timeout = c(train = Inf, predict = Inf), .use_weights = NULL, .encapsulation = c(train = "none", predict = "none"), .fallback = NULL, diff --git a/R/Resampling.R b/R/Resampling.R index 29867ed06..994b817a4 100644 --- a/R/Resampling.R +++ b/R/Resampling.R @@ -106,24 +106,7 @@ Resampling = R6Class("Resampling", #' `$train_set()` and `$test_set()`. instance = NULL, - #' @field task_hash (`character(1)`)\cr - #' The hash of the [Task] which was passed to `r$instantiate()`. - task_hash = NA_character_, - - #' @field task_row_hash (`character(1)`)\cr - #' The hash of the row ids of the [Task] which was passed to `r$instantiate()`. - task_row_hash = NA_character_, - - #' @field task_nrow (`integer(1)`)\cr - #' The number of observations of the [Task] which was passed to `r$instantiate()`. - #' - task_nrow = NA_integer_, - #' @field duplicated_ids (`logical(1)`)\cr - #' If `TRUE`, duplicated rows can occur within a single training set or within a single test set. - #' E.g., this is `TRUE` for Bootstrap, and `FALSE` for cross-validation. - #' Only used internally. - duplicated_ids = NULL, #' @template field_man man = NULL, @@ -139,7 +122,7 @@ Resampling = R6Class("Resampling", private$.id = assert_string(id, min.chars = 1L) self$label = assert_string(label, na.ok = TRUE) self$param_set = assert_param_set(param_set) - self$duplicated_ids = assert_flag(duplicated_ids) + private$.duplicated_ids = assert_flag(duplicated_ids) self$man = assert_string(man, na.ok = TRUE) }, @@ -188,9 +171,9 @@ Resampling = R6Class("Resampling", task = assert_task(as_task(task)) private$.hash = NULL self$instance = private$.get_instance(task) - self$task_hash = task$hash - self$task_row_hash = task$row_hash - self$task_nrow = task$nrow + private$.task_hash = task$hash + private$.task_row_hash = task$row_hash + private$.task_nrow = task$nrow invisible(self) }, @@ -256,6 +239,48 @@ Resampling = R6Class("Resampling", } private$.hash + }, + + #' @field task_hash (`character(1)`)\cr + #' The hash of the [Task] which was passed to `r$instantiate()`. + task_hash = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_hash will soon be read-only.") + private$.task_hash = rhs + } + private$.task_hash + }, + + #' @field task_row_hash (`character(1)`)\cr + #' The hash of the row ids of the [Task] which was passed to `r$instantiate()`. + task_row_hash = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_row_hash will soon be read-only.") + private$.task_row_hash = rhs + } + private$.task_row_hash + }, + + #' @field task_nrow (`integer(1)`)\cr + #' The number of observations of the [Task] which was passed to `r$instantiate()`. + task_nrow = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("task_nrow will soon be read-only.") + private$.task_nrow = rhs + } + private$.task_nrow + }, + + #' @field duplicated_ids (`logical(1)`)\cr + #' If `TRUE`, duplicated rows can occur within a single training set or within a single test set. + #' E.g., this is `TRUE` for Bootstrap, and `FALSE` for cross-validation. + #' Only used internally. + duplicated_ids = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("duplicated_ids will soon be read-only.") + private$.duplicated_ids = rhs + } + private$.duplicated_ids } ), @@ -264,6 +289,10 @@ Resampling = R6Class("Resampling", .id = NULL, .hash = NULL, .groups = NULL, + .task_hash = NA_character_, + .task_row_hash = NA_character_, + .task_nrow = NA_integer_, + .duplicated_ids = NULL, .get_instance = function(task) { strata = task$strata diff --git a/R/ResamplingCustom.R b/R/ResamplingCustom.R index 73265425a..f0f428369 100644 --- a/R/ResamplingCustom.R +++ b/R/ResamplingCustom.R @@ -53,9 +53,9 @@ ResamplingCustom = R6Class("ResamplingCustom", inherit = Resampling, assert_subset(unlist(train_sets, use.names = FALSE), task$row_ids) assert_subset(unlist(test_sets, use.names = FALSE), task$row_ids) self$instance = list(train = train_sets, test = test_sets) - self$task_hash = task$hash - self$task_nrow = task$nrow - self$task_row_hash = task$row_hash + private$.task_hash = task$hash + private$.task_nrow = task$nrow + private$.task_row_hash = task$row_hash invisible(self) } ), diff --git a/R/ResamplingCustomCV.R b/R/ResamplingCustomCV.R index f5f6ca286..674b68063 100644 --- a/R/ResamplingCustomCV.R +++ b/R/ResamplingCustomCV.R @@ -74,9 +74,9 @@ ResamplingCustomCV = R6Class("ResamplingCustomCV", inherit = Resampling, } self$instance = split(task$row_ids, f, drop = TRUE) - self$task_hash = task$hash - self$task_nrow = task$nrow - self$task_row_hash = task$row_hash + private$.task_hash = task$hash + private$.task_nrow = task$nrow + private$.task_row_hash = task$row_hash invisible(self) } ), diff --git a/R/Task.R b/R/Task.R index 2b6270a9e..b46465d3f 100644 --- a/R/Task.R +++ b/R/Task.R @@ -83,24 +83,6 @@ Task = R6Class("Task", #' @template field_task_type task_type = NULL, - #' @field backend ([DataBackend])\cr - #' Abstract interface to the data of the task. - backend = NULL, - - #' @field col_info ([data.table::data.table()])\cr - #' Table with with 4 columns, mainly for internal purposes: - #' - `"id"` (`character()`) stores the name of the column. - #' - `"type"` (`character()`) holds the storage type of the variable, e.g. `integer`, `numeric` or `character`. - #' See [mlr_reflections$task_feature_types][mlr_reflections] for a complete list of allowed types. - #' - `"levels"` (`list()`) stores a vector of distinct values (levels) for ordered and unordered factor variables. - #' - `"label"` (`character()`) stores a vector of prettier, formated column names. - #' - `"fix_factor_levels"` (`logical()`) stores flags which determine if the levels of the respective variable - #' need to be reordered after querying the data from the [DataBackend]. - #' - #' Note that all columns of the [DataBackend], also columns which are not selected or have any role, are listed - #' in this table. - col_info = NULL, - #' @template field_man man = NA_character_, @@ -109,10 +91,6 @@ Task = R6Class("Task", #' Required for [convert_task()]. extra_args = NULL, - #' @field mlr3_version (`package_version`)\cr - #' Package version of `mlr3` used to create the task. - mlr3_version = NULL, - #' @description #' Creates a new instance of this [R6][R6::R6Class] class. #' @@ -121,10 +99,10 @@ Task = R6Class("Task", private$.id = assert_string(id, min.chars = 1L) self$label = assert_string(label, na.ok = TRUE) self$task_type = assert_choice(task_type, mlr_reflections$task_types$type) - if (!inherits(backend, "DataBackend")) { - self$backend = as_data_backend(backend) + private$.backend = if (!inherits(backend, "DataBackend")) { + as_data_backend(backend) } else { - self$backend = assert_backend(backend) + assert_backend(backend) } cn = self$backend$colnames @@ -135,9 +113,9 @@ Task = R6Class("Task", stopf("Column names may not contain special character '%%'") } - self$col_info = col_info(self$backend) - self$col_info$label = NA_character_ - self$col_info$fix_factor_levels = FALSE + private$.col_info = col_info(private$.backend) + private$.col_info$label = NA_character_ + private$.col_info$fix_factor_levels = FALSE assert_subset(self$col_info$type, mlr_reflections$task_feature_types, .var.name = "feature types") pmap(self$col_info, @@ -152,7 +130,7 @@ Task = R6Class("Task", private$.col_roles = named_list(mlr_reflections$task_col_roles[[task_type]], character()) private$.col_roles$feature = setdiff(cn, self$backend$primary_key) self$extra_args = assert_list(extra_args, names = "unique") - self$mlr3_version = mlr_reflections$package_version + private$.mlr3_version = mlr_reflections$package_version }, #' @description @@ -585,8 +563,8 @@ Task = R6Class("Task", # everything looks good, modify task private$.hash = NULL - self$backend = DataBackendRbind$new(self$backend, data) - self$col_info = tab[] + private$.backend = DataBackendRbind$new(self$backend, data) + private$.col_info = tab[] private$.row_roles$use = c(private$.row_roles$use, data$rownames) invisible(self) @@ -633,10 +611,10 @@ Task = R6Class("Task", # update col_info for existing columns ci = col_info(data) - self$col_info = ujoin(self$col_info, ci, key = "id") + private$.col_info = ujoin(private$.col_info, ci, key = "id") # add rows to col_info for new columns - self$col_info = rbindlist(list( + private$.col_info = rbindlist(list( self$col_info, insert_named(ci[!list(self$col_info), on = "id"], list(label = NA_character_, fix_factor_levels = FALSE)) ), use.names = TRUE) @@ -649,7 +627,7 @@ Task = R6Class("Task", private$.col_roles$feature = union(col_roles$feature, setdiff(data$colnames, c(pk, col_roles$target))) # update backend - self$backend = DataBackendCbind$new(self$backend, data) + private$.backend = DataBackendCbind$new(self$backend, data) invisible(self) }, @@ -679,8 +657,8 @@ Task = R6Class("Task", assert_has_backend(self) private$.hash = NULL private$.col_hashes = NULL - self$backend = DataBackendRename$new(self$backend, old, new) - setkeyv(self$col_info[old, ("id") := new, on = "id"], "id") + private$.backend = DataBackendRename$new(self$backend, old, new) + setkeyv(private$.col_info[old, ("id") := new, on = "id"], "id") private$.col_roles = map(private$.col_roles, map_values, old = old, new = new) invisible(self) }, @@ -789,7 +767,7 @@ Task = R6Class("Task", tab$fix_factor_levels = TRUE private$.hash = NULL - self$col_info = ujoin(self$col_info, tab, key = "id") + private$.col_info = ujoin(self$col_info, tab, key = "id") invisible(self) }, @@ -818,7 +796,7 @@ Task = R6Class("Task", tab[, c("levels", "fix_factor_levels") := list(Map(intersect, levels, new_levels), TRUE)] private$.hash = NULL - self$col_info = ujoin(self$col_info, remove_named(tab, "new_levels"), key = "id") + private$.col_info = ujoin(self$col_info, remove_named(tab, "new_levels"), key = "id") invisible(self) }, @@ -881,8 +859,8 @@ Task = R6Class("Task", b = self$backend ..cns = union(b$primary_key, unlist(private$.col_roles, use.names = FALSE)) dt = b$data(rows = unique(self$row_ids), cols = ..cns) - self$backend = as_data_backend(dt, primary_key = b$primary_key) - self$col_info = setkeyv(self$col_info[list(..cns), on = "id"], "id") + private$.backend = as_data_backend(dt, primary_key = b$primary_key) + private$.col_info = setkeyv(self$col_info[list(..cns), on = "id"], "id") if (internal_valid_task && !is.null(private$.internal_valid_task)) { private$.internal_valid_task$materialize_view(FALSE) @@ -902,6 +880,46 @@ Task = R6Class("Task", private$.id = assert_string(rhs, min.chars = 1L) }, + #' @field backend ([DataBackend])\cr + #' Abstract interface to the data of the task. + backend = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("backend will soon be read-only.") + private$.backend = rhs + } + private$.backend + }, + + #' @field col_info ([data.table::data.table()])\cr + #' Table with with 4 columns, mainly for internal purposes: + #' - `"id"` (`character()`) stores the name of the column. + #' - `"type"` (`character()`) holds the storage type of the variable, e.g. `integer`, `numeric` or `character`. + #' See [mlr_reflections$task_feature_types][mlr_reflections] for a complete list of allowed types. + #' - `"levels"` (`list()`) stores a vector of distinct values (levels) for ordered and unordered factor variables. + #' - `"label"` (`character()`) stores a vector of prettier, formated column names. + #' - `"fix_factor_levels"` (`logical()`) stores flags which determine if the levels of the respective variable + #' need to be reordered after querying the data from the [DataBackend]. + #' + #' Note that all columns of the [DataBackend], also columns which are not selected or have any role, are listed + #' in this table. + col_info = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("col_info will soon be read-only.") + private$.col_info = rhs + } + private$.col_info + }, + + #' @field mlr3_version (`package_version`)\cr + #' Package version of `mlr3` used to create the task. + mlr3_version = function(rhs) { + if (!missing(rhs)) { + warn_deprecated("mlr3_version will soon be read-only.") + private$.mlr3_version = rhs + } + private$.mlr3_version + }, + #' @field internal_valid_task (`Task` or `integer()` or `NULL`)\cr #' Optional validation task that can, e.g., be used for early stopping with learners such as XGBoost. #' See also the `$validate` field of [`Learner`]. @@ -1349,10 +1367,13 @@ Task = R6Class("Task", .col_hashes = NULL, .characteristics = NULL, .row_hash = NULL, + .backend = NULL, + .col_info = NULL, + .mlr3_version = NULL, deep_clone = function(name, value) { # NB: DataBackends are never copied! - if (name == "col_info") { + if (name == ".col_info") { copy(value) } else if (name == ".internal_valid_task" && !is.null(value)) { value$clone(deep = TRUE) @@ -1601,10 +1622,10 @@ task_rm_backend = function(task) { ee = get_private(task) ee$.hash = force(task$hash) ee$.col_hashes = force(task$col_hashes) - ee$.internal_valid_task$backend = NULL + ee$.internal_valid_task$.__enclos_env__$private$.backend = NULL # NULL backend - task$backend = NULL + ee$.backend = NULL task } diff --git a/man/Learner.Rd b/man/Learner.Rd index 5a3152fb7..3ccaf57e0 100644 --- a/man/Learner.Rd +++ b/man/Learner.Rd @@ -268,24 +268,33 @@ Contains all information gathered during \code{train()} and \code{predict()}. It is not recommended to access elements from \code{state} directly. This is an internal data structure which may change in the future.} +\item{\code{feature_types}}{(\code{character()})\cr +Stores the feature types the learner can handle, e.g. \code{"logical"}, \code{"numeric"}, or \code{"factor"}. +A complete list of candidate feature types, grouped by task type, is stored in \code{\link[=mlr_reflections]{mlr_reflections$task_feature_types}}.} + +\item{\code{packages}}{(\code{character(1)})\cr +Set of required packages. +These packages are loaded, but not attached.} + +\item{\code{man}}{(\code{character(1)})\cr +String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. +Defaults to \code{NA}, but can be set by child classes.} +} +\if{html}{\out{}} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ \item{\code{task_type}}{(\code{character(1)})\cr Task type, e.g. \code{"classif"} or \code{"regr"}. For a complete list of possible task types (depending on the loaded packages), see \code{\link[=mlr_reflections]{mlr_reflections$task_types$type}}.} -\item{\code{feature_types}}{(\code{character()})\cr -Stores the feature types the learner can handle, e.g. \code{"logical"}, \code{"numeric"}, or \code{"factor"}. -A complete list of candidate feature types, grouped by task type, is stored in \code{\link[=mlr_reflections]{mlr_reflections$task_feature_types}}.} - \item{\code{properties}}{(\code{character()})\cr Stores a set of properties/capabilities the learner has. A complete list of candidate properties, grouped by task type, is stored in \code{\link[=mlr_reflections]{mlr_reflections$learner_properties}}.} -\item{\code{packages}}{(\code{character(1)})\cr -Set of required packages. -These packages are loaded, but not attached.} - \item{\code{predict_sets}}{(\code{character()})\cr During \code{\link[=resample]{resample()}}/\code{\link[=benchmark]{benchmark()}}, a \link{Learner} can predict on multiple sets. Per default, a learner only predicts observations in the test set (\code{predict_sets == "test"}). @@ -316,15 +325,6 @@ Default is \code{c(train = Inf, predict = Inf)}. Also see the section on error handling the mlr3book: \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-error-handling}} -\item{\code{man}}{(\code{character(1)})\cr -String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. -Defaults to \code{NA}, but can be set by child classes.} -} -\if{html}{\out{
}} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ \item{\code{use_weights}}{(\code{character(1)})\cr How weights should be handled. Settings are \code{"use"} \code{"ignore"}, and \code{"error"}. diff --git a/man/Task.Rd b/man/Task.Rd index 858fa97ad..028838c9f 100644 --- a/man/Task.Rd +++ b/man/Task.Rd @@ -243,6 +243,23 @@ Task type, e.g. \code{"classif"} or \code{"regr"}. For a complete list of possible task types (depending on the loaded packages), see \code{\link[=mlr_reflections]{mlr_reflections$task_types$type}}.} +\item{\code{man}}{(\code{character(1)})\cr +String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. +Defaults to \code{NA}, but can be set by child classes.} + +\item{\code{extra_args}}{(named \code{list()})\cr +Additional arguments set during construction. +Required for \code{\link[=convert_task]{convert_task()}}.} +} +\if{html}{\out{
}} +} +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)})\cr +Identifier of the object. +Used in tables, plot and text output.} + \item{\code{backend}}{(\link{DataBackend})\cr Abstract interface to the data of the task.} @@ -261,25 +278,8 @@ need to be reordered after querying the data from the \link{DataBackend}. Note that all columns of the \link{DataBackend}, also columns which are not selected or have any role, are listed in this table.} -\item{\code{man}}{(\code{character(1)})\cr -String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. -Defaults to \code{NA}, but can be set by child classes.} - -\item{\code{extra_args}}{(named \code{list()})\cr -Additional arguments set during construction. -Required for \code{\link[=convert_task]{convert_task()}}.} - \item{\code{mlr3_version}}{(\code{package_version})\cr Package version of \code{mlr3} used to create the task.} -} -\if{html}{\out{
}} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ -\item{\code{id}}{(\code{character(1)})\cr -Identifier of the object. -Used in tables, plot and text output.} \item{\code{internal_valid_task}}{(\code{Task} or \code{integer()} or \code{NULL})\cr Optional validation task that can, e.g., be used for early stopping with learners such as XGBoost. diff --git a/tests/testthat/test_HotstartStack.R b/tests/testthat/test_HotstartStack.R index a1e1dc3c3..e74ba2310 100644 --- a/tests/testthat/test_HotstartStack.R +++ b/tests/testthat/test_HotstartStack.R @@ -134,7 +134,7 @@ test_that("HotstartStack works with backward target learner and decreased hotsta learner_1$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), 0) @@ -151,7 +151,7 @@ test_that("HotstartStack works with backward target learner when cost of hotstar learner_2$train(task) learner = lrn("classif.debug", iter = 3) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(0, 0)) @@ -166,7 +166,7 @@ test_that("HotstartStack works when hotstart values of hotstart learners are low learner_2$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(0, NA_real_)) @@ -181,7 +181,7 @@ test_that("HotstartStack works when backward hotstart and target learner are equ learner_1$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), -1) @@ -197,7 +197,7 @@ test_that("HotstartStack works with backward target learner when hotstart values learner_1$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), NA_real_) @@ -218,7 +218,7 @@ test_that("HotstartStack works with backward target learner when hotstart learne learner_4$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" + learner$.__enclos_env__$private$.properties[learner$properties %chin% "hotstart_forward"] = "hotstart_backward" hot = HotstartStack$new(list(learner_1, learner_2, learner_3, learner_4)) expect_equal(hot$start_cost(learner, task$hash), c(NA_real_, -1, 0, NA_real_)) @@ -233,7 +233,7 @@ test_that("HotstartStack works with forward/backward target learner and increase learner_2$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(1, 0)) @@ -250,7 +250,7 @@ test_that("HotstartStack works with forward/backward target learner when cost of learner_2$train(task) learner = lrn("classif.debug", iter = 3) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(1, 1)) @@ -265,7 +265,7 @@ test_that("HotstartStack works when hotstart values of hotstart learners are low learner_2$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(1, 0)) @@ -280,7 +280,7 @@ test_that("HotstartStack works when forward/backward hotstart and target learner learner_1$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), -1) @@ -297,7 +297,7 @@ test_that("HotstartStack works with forward/backward target learner when hotstar learner_2$train(task) learner = lrn("classif.debug", iter = 1) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2)) expect_equal(hot$start_cost(learner, task$hash), c(0, 0)) @@ -310,7 +310,7 @@ test_that("HotstartStack works with forward/backward target learner when hotstar learner_1$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1)) expect_equal(hot$start_cost(learner, task$hash), 1) @@ -331,7 +331,7 @@ test_that("HotstartStack works with forward/backward target learner when hotstar learner_4$train(task) learner = lrn("classif.debug", iter = 2) - learner$properties = c(learner$properties, "hotstart_backward") + get_private(learner, ".properties") = c(learner$properties, "hotstart_backward") hot = HotstartStack$new(list(learner_1, learner_2, learner_3, learner_4)) expect_equal(hot$start_cost(learner, task$hash), c(1, -1, 0, NA_real_)) diff --git a/tests/testthat/test_Learner.R b/tests/testthat/test_Learner.R index d8b6093b9..5487f3f9d 100644 --- a/tests/testthat/test_Learner.R +++ b/tests/testthat/test_Learner.R @@ -134,7 +134,7 @@ test_that("predict on newdata works / no target column", { xdt = data.table(x = 1, y = 1) task = as_task_regr(xdt, target = "y") learner = lrn("regr.featureless") - learner$properties = setdiff(learner$properties, "missings") + get_private(learner, ".properties") = setdiff(learner$properties, "missings") learner$train(task) learner$predict_newdata(xdt[, 1]) }) @@ -303,7 +303,7 @@ test_that("weights", { test_that("mandatory properties", { task = tsk("iris") learner = lrn("classif.rpart") - learner$properties = setdiff(learner$properties, "multiclass") + get_private(learner, ".properties") = setdiff(learner$properties, "multiclass") resample = rsmp("holdout") expect_error(learner$train(task), "multiclass") @@ -323,7 +323,7 @@ test_that("train task is cloned (#382)", { test_that("Error on missing data (#413)", { task = tsk("pima") learner = lrn("classif.rpart") - learner$properties = setdiff(learner$properties, "missings") + get_private(learner, ".properties") = setdiff(learner$properties, "missings") expect_error(learner$train(task), "missing values") }) @@ -388,7 +388,7 @@ test_that("properties are also checked on validation task", { task$rbind(row) task$internal_valid_task = 151 learner = lrn("classif.debug", validate = "predefined") - learner$properties = setdiff(learner$properties, "missings") + learner$.__enclos_env__$private$.properties = setdiff(learner$properties, "missings") expect_error(learner$train(task), "missing values") }) @@ -729,8 +729,8 @@ test_that("predict_newdata creates column info correctly", { learner = lrn("classif.debug", save_tasks = TRUE) task = tsk("iris") - task$col_info$label = letters[1:6] - task$col_info$fix_factor_levels = c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE) + task$.__enclos_env__$private$.col_info$label = letters[1:6] + task$.__enclos_env__$private$.col_info$fix_factor_levels = c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE) learner$train(task) ## data.frame is passed without task @@ -872,3 +872,26 @@ test_that("oob_error is available without storing models via $.extract_oob_error expect_equal(rr$aggregate(msr("oob_error")), c(oob_error = 0.123)) }) + +#test_that("field validation", { +# l = lrn("classif.debug") +# expect_error({l$timeout = c(train = 1)}, "permutation") +# expect_error({l$timeout = c(train = 1, predict = -1)}, ">= 0") +# expect_error({l$timeout = c(a = 1, b = 0)}, "permutation") +# l$timeout = c(train = 1, predict = 0) +# expect_equal(l$timeout, c(train = 1, predict = 0)) +# +# expect_error({l$parallel_predict = ""}, "flag") +# l$parallel_predict = FALSE +# expect_false(l$parallel_predict) +# l$parallel_predict = TRUE +# expect_true(l$parallel_predict) +# +# expect_error({l$task_type = "regr"}, "read-only") +# +# expect_error({l$predict_sets = "abc"}, "but has additional elements") +# l$predict_sets = "train" +# expect_equal(l$predict_sets, "train") +# l$predict_sets = c("train", "test", "internal_valid") +# expect_equal(l$predict_sets, c("train", "test", "internal_valid")) +#}) diff --git a/tests/testthat/test_Resampling.R b/tests/testthat/test_Resampling.R index db7f67d7b..4898cf6e9 100644 --- a/tests/testthat/test_Resampling.R +++ b/tests/testthat/test_Resampling.R @@ -158,3 +158,12 @@ test_that("task_row_hash in Resampling works correctly", { resampling$instantiate(task) expect_identical(resampling$task_row_hash, task$row_hash) }) + +# Uncomment this once we make the fields read-only +#test_that("fields are read-only", { +# r = rsmp("cv", folds = 2) +# expect_error({r$task_row_hash = "foo"}, "read-only") +# expect_error({r$task_nrow = 10}, "read-only") +# expect_error({r$task_hash = "foo"}, "read-only") +# expect_error({r$duplicated_ids = TRUE}, "read-only") +#}) diff --git a/tests/testthat/test_Task.R b/tests/testthat/test_Task.R index 38b3d08a6..e282b1ea7 100644 --- a/tests/testthat/test_Task.R +++ b/tests/testthat/test_Task.R @@ -1020,3 +1020,11 @@ test_that("materialize_view works with duplicates", { task2$materialize_view() expect_equal(task$data(), task2$data()) }) + +# Uncomment this once we make the fields read-only +#test_that("task fields are read-only", { +# task = tsk("iris") +# expect_error({task$col_info = "foo"}, "read-only") +# expect_error({task$backend = tsk("iris")$backend}, "read-only") +# expect_error({task$mlr3_version = "1.0.0"}, "read-only") +#}) diff --git a/tests/testthat/test_benchmark.R b/tests/testthat/test_benchmark.R index 87d0362bf..0075ec879 100644 --- a/tests/testthat/test_benchmark.R +++ b/tests/testthat/test_benchmark.R @@ -572,7 +572,7 @@ test_that("properties are also checked on validation task", { task$rbind(row) task$internal_valid_task = 151 learner = lrn("classif.debug", validate = "predefined") - learner$properties = setdiff(learner$properties, "missings") + learner$.__enclos_env__$private$.properties = setdiff(learner$properties, "missings") suppressWarnings(expect_error(benchmark(benchmark_grid(task, learner, rsmp("holdout"))), "missing values")) }) diff --git a/tests/testthat/test_resample.R b/tests/testthat/test_resample.R index c23d0f541..0006ecce7 100644 --- a/tests/testthat/test_resample.R +++ b/tests/testthat/test_resample.R @@ -349,7 +349,7 @@ test_that("properties are also checked on validation task", { task$rbind(row) task$internal_valid_task = 151 learner = lrn("classif.debug", validate = "predefined") - learner$properties = setdiff(learner$properties, "missings") + learner$.__enclos_env__$private$.properties = setdiff(learner$properties, "missings") suppressWarnings(expect_error(resample(task, learner, rsmp("holdout")), "missing values")) }) From 54cf41566b60dfd68bb5dd617c2a2026e86558d5 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 13 Aug 2025 19:18:22 +0200 Subject: [PATCH 2/2] Update NEWS.md --- NEWS.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 06df8252e..1dbdf4b4b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,11 +9,6 @@ examples for methods. * Measure "oob_error" now works even without storing models during resampling. -## Deprecations: - -* Assigning to some fields of `Task`, `Learner`, and `Resampling` now throws a deprecation warning. - This will become an error in the future. - # mlr3 1.1.0 * feat: Add new measure `MeasureRegrRQR` for quantile regression.