mlr-org · sebffischer · Feb 16, 2024 · Feb 22, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -87,13 +87,15 @@ Suggests:
  methods,
  vtreat,
  future
+Remotes:
+ mlr-org/mlr3@feat/train-predict
 ByteCompile: true
 Encoding: UTF-8
 Config/testthat/edition: 3
 Config/testthat/parallel: true
 NeedsCompilation: no
 Roxygen: list(markdown = TRUE, r6 = FALSE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
 Collate:
  'Graph.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -148,6 +148,7 @@ import(mlr3)
 import(mlr3misc)
 import(paradox)
 importFrom(R6,R6Class)
+importFrom(data.table,as.data.table)
 importFrom(digest,digest)
 importFrom(stats,setNames)
 importFrom(utils,bibentry)

diff --git a/R/GraphLearner.R b/R/GraphLearner.R
@@ -102,7 +102,7 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner,
  feature_types = mlr_reflections$task_feature_types,
  predict_types = names(mlr_reflections$learner_predict_types[[task_type]]),
  packages = graph$packages,
- properties = mlr_reflections$learner_properties[[task_type]],
+ properties = setdiff(mlr_reflections$learner_properties[[task_type]], "uses_test_task"),
  man = "mlr3pipelines::GraphLearner"
  )
 
@@ -173,6 +173,13 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner,
  }
  ),
  private = list(
+ .dependent_properties = function() {
+ if (some(self$graph$pipeops, function(p) "uses_test_task" %in% p$properties)) {
+ "uses_test_task"
+ } else {
+ character(0)
+ }
+ },
  .graph = NULL,
  deep_clone = function(name, value) {
  # FIXME this repairs the mlr3::Learner deep_clone() method which is broken.
@@ -186,7 +193,16 @@ GraphLearner = R6Class("GraphLearner", inherit = Learner,
  },
 
  .train = function(task) {
- on.exit({self$graph$state = NULL})
+ if (!"uses_test_task" %in% self$properties) {
+ # remove the test information unless needed, so it is not preprocessed unnecessarily
+ on.exit({
+ prev_test_task = task$test_task
+ on.exit({
+ task$test_task = prev_test_task
+ })
+ }, add = TRUE)
+ }
+ on.exit({self$graph$state = NULL}, add = TRUE)
  self$graph$train(task)
  state = self$graph$state
  state

diff --git a/R/PipeOp.R b/R/PipeOp.R
@@ -236,7 +236,7 @@ PipeOp = R6Class("PipeOp",
  .result = NULL,
  tags = NULL,
 
- initialize = function(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = "abstract") {
+ initialize = function(id, param_set = ParamSet$new(), param_vals = list(), input, output, packages = character(0), tags = "abstract", properties = character(0)) {
  if (inherits(param_set, "ParamSet")) {
  private$.param_set = assert_param_set(param_set)
  private$.param_set_source = NULL
@@ -246,6 +246,7 @@ PipeOp = R6Class("PipeOp",
  }
  self$id = assert_string(id)
 
+ private$.properties = sort(assert_subset(properties, mlr_reflections$pipeops$properties))
  self$param_set$values = insert_named(self$param_set$values, param_vals)
  self$input = assert_connection_table(input)
  self$output = assert_connection_table(output)
@@ -335,6 +336,16 @@ PipeOp = R6Class("PipeOp",
  ),
 
  active = list(
+ properties = function(rhs) {
+ if (!missing(rhs)) {
+ private$.properties = sort(assert_subset(rhs, mlr_reflections$pipeops$properties))
+ }
+ dependent_properties = private$.dependent_properties()
+ if (!length(dependent_properties)) {
+ return(private$.properties)
+ }
+ sort(c(private$.properties, dependent_properties))
+ },
  id = function(val) {
  if (!missing(val)) {
  private$.id = val
@@ -415,6 +426,10 @@ PipeOp = R6Class("PipeOp",
  ),
 
  private = list(
+ .dependent_properties = function(rhs) {
+ character(0)
+ },
+ .properties = NULL,
  deep_clone = function(name, value) {
  if (!is.null(private$.param_set_source)) {
  private$.param_set = NULL # required to keep clone identical to original, otherwise tests get really ugly

diff --git a/R/PipeOpImpute.R b/R/PipeOpImpute.R
@@ -148,7 +148,6 @@ PipeOpImpute = R6Class("PipeOpImpute",
 
  .train = function(inputs) {
  intask = inputs[[1]]$clone(deep = TRUE)
-
  affected_cols = (self$param_set$values$affect_columns %??% selector_all())(intask)
  affected_cols = intersect(affected_cols, private$.select_cols(intask))
 
@@ -191,6 +190,10 @@ PipeOpImpute = R6Class("PipeOpImpute",
 
  self$state$outtasklayout = copy(intask$feature_types)
 
+ if (!is.null(intask$test_task)) {
+ intask$test_task = private$.predict(list(intask$test_task))[[1L]]
+ }
+
  list(intask)
  },
 

diff --git a/R/PipeOpLearner.R b/R/PipeOpLearner.R
@@ -134,7 +134,13 @@ PipeOpLearner = R6Class("PipeOpLearner", inherit = PipeOp,
  ),
  private = list(
  .learner = NULL,
-
+ .dependent_properties = function() {
+ if ("uses_test_task" %in% private$.learner$properties) {
+ "uses_test_task"
+ } else {
+ character(0)
+ }
+ },
  .train = function(inputs) {
  on.exit({private$.learner$state = NULL})
  task = inputs[[1L]]

diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
@@ -171,6 +171,13 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
  }
  ),
  private = list(
+ .dependent_properties = function() {
+ if ("uses_test_task" %in% private$.learner$properties) {
+ "uses_test_task"
+ } else {
+ character(0)
+ }
+ },
  .train_task = function(task) {
  on.exit({private$.learner$state = NULL})
 

diff --git a/R/PipeOpTaskPreproc.R b/R/PipeOpTaskPreproc.R
@@ -217,6 +217,12 @@ PipeOpTaskPreproc = R6Class("PipeOpTaskPreproc",
  self$state$outtasklayout = copy(intask$feature_types)
  self$state$outtaskshell = intask$data(rows = intask$row_ids[0])
 
+ if (!is.null(intask$test_task)) {
+ # we call into .predict() and not .predict_task() to not put the burden
+ # of subsetting the features etc. on the PipeOp overwriting .predict_task
+ intask$test_task = private$.predict(list(intask$test_task))[[1L]]
+ }
+
  if (do_subset) {
  # FIXME: this fails if .train_task added a column with the same name
  intask$col_roles$feature = union(intask$col_roles$feature, y = remove_cols)

diff --git a/R/zzz.R b/R/zzz.R
@@ -16,6 +16,7 @@ register_mlr3 = function() {
  c("abstract", "meta", "missings", "feature selection", "imbalanced data",
  "data transform", "target transform", "ensemble", "robustify", "learner", "encode",
  "multiplicity")))
+ x$pipeops$properties = c("uses_test_task")
 }
 
 .onLoad = function(libname, pkgname) { # nocov start

diff --git a/man/Graph.Rd b/man/Graph.Rd
diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd
diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd
diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd