Adds an example about FunctionTransformer

Signed-off-by: Xavier Dupre <[email protected]>
onnx · Nov 8, 2023 · 98f9888 · 98f9888
1 parent 0374eea
commit 98f9888
Show file tree

Hide file tree

Showing 2 changed files with 194 additions and 0 deletions.
diff --git a/docs/tutorial/plot_function_transformer.py b/docs/tutorial/plot_function_transformer.py
@@ -0,0 +1,193 @@
+"""
+Issues with FunctionTransformer
+===============================
+
+A pipeline including a `FunctionTransformer
+<https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_
+cannot be automatically converted into onnx because there is no converter able to
+convert custom python code into ONNX. A custom converter needs to be written
+specifically for it.
+
+Initial try
++++++++++++
+
+A very simple pipeline and the first attempt to convert it into ONNX.
+"""
+import numpy as np
+from numpy.testing import assert_allclose
+from pandas import DataFrame
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from skl2onnx import to_onnx
+
+# For the custom converter
+from skl2onnx import update_registered_converter
+from skl2onnx.common.utils import check_input_and_output_numbers
+from skl2onnx.algebra.onnx_ops import OnnxSlice, OnnxSub, OnnxDiv, OnnxMul, OnnxCastLike
+
+# To check discrepancies
+from onnx.reference import ReferenceEvaluator
+from onnxruntime import InferenceSession
+
+
+def calculate(df):
+ df["c"] = 100 * (df["a"] - df["b"]) / df["b"]
+ return df
+
+
+mapper = ColumnTransformer(
+ transformers=[
+ ("c", FunctionTransformer(calculate), ["a", "b"]),
+ ],
+ remainder="passthrough",
+ verbose_feature_names_out=False,
+)
+mapper.set_output(transform="pandas")
+
+pipe = Pipeline([("mapper", mapper), ("classifier", DecisionTreeClassifier())])
+
+data = DataFrame(
+ [
+ dict(a=1, b=2, f=5),
+ dict(a=4, b=5, f=10),
+ ]
+)
+y = np.array([0, 1], dtype=np.int64)
+pipe.fit(data, y)
+
+try:
+ to_onnx(pipe, data[:1], options={"zipmap": False})
+except Exception as e:
+ print("It does not work:", e)
+
+##################################
+# Use of custom transformer
+# +++++++++++++++++++++++++
+#
+# It is easier to write a custom converter if the FunctionTransformer
+# is implemented as a custom transformer.
+
+
+class OverpriceCalculator(BaseEstimator, TransformerMixin):
+ def __init__(self):
+ pass
+
+ def calculate_overprice(self, x, y):
+ return 100 * (x - y) / y
+
+ def fit(self, X, y=None):
+ return self
+
+ def transform(self, X, y=None):
+ x = X.apply(lambda x: self.calculate_overprice(x.a, x.b), axis=1)
+ return x.values.reshape((-1, 1))
+
+
+mapper = ColumnTransformer(
+ transformers=[
+ ("c", OverpriceCalculator(), ["a", "b"]),
+ ],
+ remainder="passthrough",
+ verbose_feature_names_out=False,
+)
+
+pipe_tr = Pipeline([("mapper", mapper), ("classifier", DecisionTreeClassifier())])
+pipe_tr.fit(data, y)
+
+#############################
+# Both pipelines return the same output.
+assert_allclose(pipe.predict_proba(data), pipe_tr.predict_proba(data))
+
+#############################
+# But the conversion still fails with a different error message.
+
+try:
+ to_onnx(pipe_tr, data[:1], options={"zipmap": False})
+except Exception as e:
+ print("It does not work:", e)
+
+
+#################################
+# Custom converter
+# ++++++++++++++++
+#
+# We need to implement the method `calculate_overprice` in ONNX.
+# The first function returns the expected type and shape.
+
+
+def overprice_shape_calculator(operator):
+ check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
+ # Gets the input type, the transformer works on any numerical type.
+ input_type = operator.inputs[0].type.__class__
+ # The first dimension is usually dynamic (batch dimension).
+ input_dim = operator.inputs[0].get_first_dimension()
+ operator.outputs[0].type = input_type([input_dim, 1])
+
+
+def overprice_converter(scope, operator, container):
+ # No need to retrieve the fitted estimator, it is not trained.
+ # op = operator.raw_operator
+ opv = container.target_opset
+ X = operator.inputs[0]
+
+ # 100 * (x-y)/y --> 100 * (X[0] - X[1]) / X[1]
+
+ zero = np.array([0], dtype=np.int64)
+ one = np.array([1], dtype=np.int64)
+ two = np.array([2], dtype=np.int64)
+ hundred = np.array([100], dtype=np.float32)
+
+ # Slice(data, starts, ends, axes)
+ x0 = OnnxSlice(X, zero, one, one, op_version=opv)
+ x1 = OnnxSlice(X, one, two, one, op_version=opv)
+ z = OnnxMul(
+ OnnxCastLike(hundred, X, op_version=opv),
+ OnnxDiv(OnnxSub(x0, x1, op_version=opv), x1, op_version=opv),
+ op_version=opv,
+ output_names=operator.outputs[0],
+ )
+ z.add_to(scope, container)
+
+
+update_registered_converter(
+ OverpriceCalculator,
+ "AliasOverpriceCalculator",
+ overprice_shape_calculator,
+ overprice_converter,
+)
+
+
+onx = to_onnx(pipe_tr, data[:1], target_opset=18, options={"zipmap": False})
+
+############################
+# Let's check there is no discrepancies
+# +++++++++++++++++++++++++++++++++++++
+#
+# First with :class:`onnx.reference.ReferenceEvaluator`.
+
+expected = (pipe_tr.predict(data), pipe_tr.predict_proba(data))
+feeds = {
+ "a": data["a"].values.reshape((-1, 1)),
+ "b": data["b"].values.reshape((-1, 1)),
+ "f": data["f"].values.reshape((-1, 1)),
+}
+print(feeds)
+
+# verbose=10 to show intermediate results
+ref = ReferenceEvaluator(onx, verbose=0)
+got = ref.run(None, feeds)
+
+assert_allclose(expected[0], got[0])
+assert_allclose(expected[1], got[1])
+
+#######################################
+# Then with the runtime used to deploy, onnxruntime for example.
+
+ref = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
+got = ref.run(None, feeds)
+
+assert_allclose(expected[0], got[0])
+assert_allclose(expected[1], got[1])
diff --git a/docs/tutorial_2_new_converter.rst b/docs/tutorial_2_new_converter.rst
@@ -42,6 +42,7 @@ an example.
 
  auto_tutorial/plot_icustom_converter
  auto_tutorial/plot_jcustom_syntax
+ auto_tutorial/plot_function_transformer
  auto_tutorial/plot_kcustom_converter_wrapper
  auto_tutorial/plot_lcustom_options
  auto_tutorial/plot_mcustom_parser