From 2c0c1aa0e115af4d1dc6d9c0df1db0fb5d5d6ff5 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Fri, 2 Jan 2026 14:12:11 +0530
Subject: [PATCH 01/15] [ENH] Refactor `Extension`

---
 openml/extensions/connectors/__init__.py    |   5 +
 openml/extensions/connectors/base.py        |  25 ++++
 openml/extensions/execution/__init__.py     |   5 +
 openml/extensions/execution/base.py         | 109 +++++++++++++++++
 openml/extensions/registry.py               |  57 +++++++++
 openml/extensions/serialization/__init__.py |   5 +
 openml/extensions/serialization/base.py     | 124 ++++++++++++++++++++
 7 files changed, 330 insertions(+)
 create mode 100644 openml/extensions/connectors/__init__.py
 create mode 100644 openml/extensions/connectors/base.py
 create mode 100644 openml/extensions/execution/__init__.py
 create mode 100644 openml/extensions/execution/base.py
 create mode 100644 openml/extensions/registry.py
 create mode 100644 openml/extensions/serialization/__init__.py
 create mode 100644 openml/extensions/serialization/base.py

diff --git a/openml/extensions/connectors/__init__.py b/openml/extensions/connectors/__init__.py
new file mode 100644
index 000000000..5bbd28033
--- /dev/null
+++ b/openml/extensions/connectors/__init__.py
@@ -0,0 +1,5 @@
+# License: BSD 3-Clause
+
+"""Base classes for OpenML API connectors."""
+
+from openml.extensions.connectors.base import OpenMLAPIConnector
\ No newline at end of file
diff --git a/openml/extensions/connectors/base.py b/openml/extensions/connectors/base.py
new file mode 100644
index 000000000..048adcdf3
--- /dev/null
+++ b/openml/extensions/connectors/base.py
@@ -0,0 +1,25 @@
+# License: BSD 3-Clause
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from openml.extensions.execution import ModelExecutor
+from openml.extensions.serialization import ModelSerializer
+
+class OpenMLAPIConnector(ABC):
+    """
+    Base class for OpenML API connectors.
+    """
+
+    @abstractmethod
+    def serializer(self) -> ModelSerializer:
+        """Return the serializer for this API."""
+
+    @abstractmethod
+    def executor(self) -> ModelExecutor:
+        """Return the executor for this API."""
+
+    @classmethod
+    @abstractmethod
+    def supports(cls, model: Any) -> bool:
+        """High-level check if this connector supports the model."""
\ No newline at end of file
diff --git a/openml/extensions/execution/__init__.py b/openml/extensions/execution/__init__.py
new file mode 100644
index 000000000..c41c09ba7
--- /dev/null
+++ b/openml/extensions/execution/__init__.py
@@ -0,0 +1,5 @@
+# License: BSD 3-Clause
+
+"""Base class for estimator executors."""
+
+from openml.extensions.execution.base import ModelExecutor
\ No newline at end of file
diff --git a/openml/extensions/execution/base.py b/openml/extensions/execution/base.py
new file mode 100644
index 000000000..148f123f2
--- /dev/null
+++ b/openml/extensions/execution/base.py
@@ -0,0 +1,109 @@
+# License: BSD 3-Clause
+
+"""Base class for estimator executors."""
+
+from abc import ABC, abstractmethod
+from typing import Any
+from collections import OrderedDict
+
+import numpy as np
+import scipy.sparse
+
+from openml.tasks.task import OpenMLTask
+from openml.runs.trace import OpenMLRunTrace
+
+class ModelExecutor(ABC):
+    """Define runtime execution semantics for a specific API type."""
+
+    @classmethod
+    @abstractmethod
+    def can_handle_model(cls, model: Any) -> bool:
+        """Check whether a model flow can be handled by this extension.
+
+        This is typically done by checking the type of the model, or the package it belongs to.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+
+    @abstractmethod
+    def seed_model(self, model: Any, seed: int | None) -> Any:
+        """Set the seed of all the unseeded components of a model and return the seeded model.
+
+        Required so that all seed information can be uploaded to OpenML for reproducible results.
+
+        Parameters
+        ----------
+        model : Any
+            The model to be seeded
+        seed : int
+
+        Returns
+        -------
+        model
+        """
+
+    @abstractmethod
+    def _run_model_on_fold(  # noqa: PLR0913
+        self,
+        model: Any,
+        task: OpenMLTask,
+        X_train: np.ndarray | scipy.sparse.spmatrix,
+        rep_no: int,
+        fold_no: int,
+        y_train: np.ndarray | None = None,
+        X_test: np.ndarray | scipy.sparse.spmatrix | None = None,
+    ) -> tuple[np.ndarray, np.ndarray | None, OrderedDict[str, float], OpenMLRunTrace | None]:
+        """Run a model on a repeat, fold, subsample triplet of the task.
+
+        Returns the data that is necessary to construct the OpenML Run object. Is used by
+        :func:`openml.runs.run_flow_on_task`.
+
+        Parameters
+        ----------
+        model : Any
+            The UNTRAINED model to run. The model instance will be copied and not altered.
+        task : OpenMLTask
+            The task to run the model on.
+        X_train : array-like
+            Training data for the given repetition and fold.
+        rep_no : int
+            The repeat of the experiment (0-based; in case of 1 time CV, always 0)
+        fold_no : int
+            The fold nr of the experiment (0-based; in case of holdout, always 0)
+        y_train : Optional[np.ndarray] (default=None)
+            Target attributes for supervised tasks. In case of classification, these are integer
+            indices to the potential classes specified by dataset.
+        X_test : Optional, array-like (default=None)
+            Test attributes to test for generalization in supervised tasks.
+
+        Returns
+        -------
+        predictions : np.ndarray
+            Model predictions.
+        probabilities :  Optional, np.ndarray
+            Predicted probabilities (only applicable for supervised classification tasks).
+        user_defined_measures : OrderedDict[str, float]
+            User defined measures that were generated on this fold
+        trace : Optional, OpenMLRunTrace
+            Hyperparameter optimization trace (only applicable for supervised tasks with
+            hyperparameter optimization).
+        """
+
+    @abstractmethod
+    def check_if_model_fitted(self, model: Any) -> bool:
+        """Returns True/False denoting if the model has already been fitted/trained.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
\ No newline at end of file
diff --git a/openml/extensions/registry.py b/openml/extensions/registry.py
new file mode 100644
index 000000000..2ab6cc785
--- /dev/null
+++ b/openml/extensions/registry.py
@@ -0,0 +1,57 @@
+# License: BSD 3-Clause
+
+from typing import Any
+
+from openml.exceptions import OpenMLException
+from openml.extensions.connectors import OpenMLAPIConnector
+
+from openml_sklearn import SklearnExtension
+
+API_CONNECTOR_REGISTRY: list[type[OpenMLAPIConnector]] = [
+    SklearnExtension, # TODO: I need to refactor SklearnExtension
+]
+
+def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
+    """
+    Identifies and returns the appropriate OpenML API connector for a given estimator.
+
+    This function iterates through the global ``API_CONNECTOR_REGISTRY`` to find 
+    a connector class that supports the provided estimator object. If exactly one 
+    matching connector is found, it is instantiated and returned.
+
+    Parameters
+    ----------
+    estimator : Any
+        The estimator object (e.g., a scikit-learn estimator) for which an API 
+        connector is required.
+
+    Returns
+    -------
+    OpenMLAPIConnector
+        An instance of the matching API connector.
+
+    Raises
+    ------
+    OpenMLException
+        If no connector is found in the registry that supports the provided 
+        model, or if multiple connectors in the registry claim support for 
+        the provided model.
+    """
+    candidates = [
+        connector for connector in API_CONNECTOR_REGISTRY
+        if connector.supports(estimator)
+    ]
+
+    if not candidates:
+        raise OpenMLException(
+            "No OpenML API connector found for this estimator."
+        )
+
+    if len(candidates) > 1:
+        names = [c.__name__ for c in candidates]
+        raise OpenMLException(
+            "Multiple API connectors match this estimator:\n"
+            + "\n".join(f"- {n}" for n in names)
+        )
+
+    return candidates[0]()
diff --git a/openml/extensions/serialization/__init__.py b/openml/extensions/serialization/__init__.py
new file mode 100644
index 000000000..e8bf5d200
--- /dev/null
+++ b/openml/extensions/serialization/__init__.py
@@ -0,0 +1,5 @@
+# License: BSD 3-Clause
+
+"""Base classes for estimator serializors."""
+
+from openml.extensions.serialization.base import ModelSerializer
\ No newline at end of file
diff --git a/openml/extensions/serialization/base.py b/openml/extensions/serialization/base.py
new file mode 100644
index 000000000..2e275af2e
--- /dev/null
+++ b/openml/extensions/serialization/base.py
@@ -0,0 +1,124 @@
+# License: BSD 3-Clause
+
+"""Base class for estimator serializors."""
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from openml.flows import OpenMLFlow
+from openml.runs.trace import OpenMLTraceIteration
+
+
+class ModelSerializer(ABC):
+    """Handle the conversion between estimator instances and OpenML Flows."""
+
+    @classmethod
+    @abstractmethod
+    def can_handle_model(cls, model: Any) -> bool:
+        """Check whether a model flow can be handled by this extension.
+
+        This is typically done by checking the type of the model, or the package it belongs to.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+
+    @abstractmethod
+    def model_to_flow(self, model: Any) -> OpenMLFlow:
+        """Transform a model to a flow for uploading it to OpenML.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        OpenMLFlow
+        """
+
+    @abstractmethod
+    def flow_to_model(
+        self,
+        flow: OpenMLFlow,
+        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
+        strict_version: bool = True,  # noqa: FBT002, FBT001
+    ) -> Any:
+        """Instantiate a model from the flow representation.
+
+        Parameters
+        ----------
+        flow : OpenMLFlow
+
+        initialize_with_defaults : bool, optional (default=False)
+            If this flag is set, the hyperparameter values of flows will be
+            ignored and a flow with its defaults is returned.
+
+        strict_version : bool, default=True
+            Whether to fail if version requirements are not fulfilled.
+
+        Returns
+        -------
+        Any
+        """
+
+    @abstractmethod
+    def obtain_parameter_values(
+        self,
+        flow: OpenMLFlow,
+        model: Any = None,
+    ) -> list[dict[str, Any]]:
+        """Extracts all parameter settings required for the flow from the model.
+
+        If no explicit model is provided, the parameters will be extracted from `flow.model`
+        instead.
+
+        Parameters
+        ----------
+        flow : OpenMLFlow
+            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
+
+        model: Any, optional (default=None)
+            The model from which to obtain the parameter values. Must match the flow signature.
+            If None, use the model specified in ``OpenMLFlow.model``.
+
+        Returns
+        -------
+        list
+            A list of dicts, where each dict has the following entries:
+            - ``oml:name`` : str: The OpenML parameter name
+            - ``oml:value`` : mixed: A representation of the parameter value
+            - ``oml:component`` : int: flow id to which the parameter belongs
+        """
+
+    @abstractmethod
+    def get_version_information(self) -> list[str]:
+        """Return dependency and version information."""
+
+    # Abstract methods for hyperparameter optimization
+
+    @abstractmethod
+    def instantiate_model_from_hpo_class(
+        self,
+        model: Any,
+        trace_iteration: OpenMLTraceIteration,
+    ) -> Any:
+        """Instantiate a base model which can be searched over by the hyperparameter optimization
+        model.
+
+        Parameters
+        ----------
+        model : Any
+            A hyperparameter optimization model which defines the model to be instantiated.
+        trace_iteration : OpenMLTraceIteration
+            Describing the hyperparameter settings to instantiate.
+
+        Returns
+        -------
+        Any
+        """
+        # TODO a trace belongs to a run and therefore a flow -> simplify this part of the interface!
\ No newline at end of file

From 2aab335a39a7044ee34427a05d9647579f63cb59 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 2 Jan 2026 08:49:05 +0000
Subject: [PATCH 02/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/extensions/connectors/__init__.py    |  2 +-
 openml/extensions/connectors/base.py        | 15 +++++-----
 openml/extensions/execution/__init__.py     |  2 +-
 openml/extensions/execution/base.py         | 16 +++++++----
 openml/extensions/registry.py               | 32 ++++++++++-----------
 openml/extensions/serialization/__init__.py |  2 +-
 openml/extensions/serialization/base.py     | 11 ++++---
 7 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/openml/extensions/connectors/__init__.py b/openml/extensions/connectors/__init__.py
index 5bbd28033..d9ebb7fd8 100644
--- a/openml/extensions/connectors/__init__.py
+++ b/openml/extensions/connectors/__init__.py
@@ -2,4 +2,4 @@
 
 """Base classes for OpenML API connectors."""
 
-from openml.extensions.connectors.base import OpenMLAPIConnector
\ No newline at end of file
+from openml.extensions.connectors.base import OpenMLAPIConnector
diff --git a/openml/extensions/connectors/base.py b/openml/extensions/connectors/base.py
index 048adcdf3..df753c04e 100644
--- a/openml/extensions/connectors/base.py
+++ b/openml/extensions/connectors/base.py
@@ -1,15 +1,16 @@
 # License: BSD 3-Clause
+from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from openml.extensions.execution import ModelExecutor
+    from openml.extensions.serialization import ModelSerializer
 
-from openml.extensions.execution import ModelExecutor
-from openml.extensions.serialization import ModelSerializer
 
 class OpenMLAPIConnector(ABC):
-    """
-    Base class for OpenML API connectors.
-    """
+    """Base class for OpenML API connectors."""
 
     @abstractmethod
     def serializer(self) -> ModelSerializer:
@@ -22,4 +23,4 @@ def executor(self) -> ModelExecutor:
     @classmethod
     @abstractmethod
     def supports(cls, model: Any) -> bool:
-        """High-level check if this connector supports the model."""
\ No newline at end of file
+        """High-level check if this connector supports the model."""
diff --git a/openml/extensions/execution/__init__.py b/openml/extensions/execution/__init__.py
index c41c09ba7..245a16db4 100644
--- a/openml/extensions/execution/__init__.py
+++ b/openml/extensions/execution/__init__.py
@@ -2,4 +2,4 @@
 
 """Base class for estimator executors."""
 
-from openml.extensions.execution.base import ModelExecutor
\ No newline at end of file
+from openml.extensions.execution.base import ModelExecutor
diff --git a/openml/extensions/execution/base.py b/openml/extensions/execution/base.py
index 148f123f2..018fe2964 100644
--- a/openml/extensions/execution/base.py
+++ b/openml/extensions/execution/base.py
@@ -2,15 +2,19 @@
 
 """Base class for estimator executors."""
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import Any
 from collections import OrderedDict
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    import numpy as np
+    import scipy.sparse
 
-import numpy as np
-import scipy.sparse
+    from openml.runs.trace import OpenMLRunTrace
+    from openml.tasks.task import OpenMLTask
 
-from openml.tasks.task import OpenMLTask
-from openml.runs.trace import OpenMLRunTrace
 
 class ModelExecutor(ABC):
     """Define runtime execution semantics for a specific API type."""
@@ -106,4 +110,4 @@ def check_if_model_fitted(self, model: Any) -> bool:
         Returns
         -------
         bool
-        """
\ No newline at end of file
+        """
diff --git a/openml/extensions/registry.py b/openml/extensions/registry.py
index 2ab6cc785..0bdd9cd2f 100644
--- a/openml/extensions/registry.py
+++ b/openml/extensions/registry.py
@@ -1,28 +1,32 @@
 # License: BSD 3-Clause
+from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+from openml_sklearn import SklearnExtension
 
 from openml.exceptions import OpenMLException
-from openml.extensions.connectors import OpenMLAPIConnector
 
-from openml_sklearn import SklearnExtension
+if TYPE_CHECKING:
+    from openml.extensions.connectors import OpenMLAPIConnector
 
 API_CONNECTOR_REGISTRY: list[type[OpenMLAPIConnector]] = [
-    SklearnExtension, # TODO: I need to refactor SklearnExtension
+    SklearnExtension,  # TODO: I need to refactor SklearnExtension
 ]
 
+
 def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
     """
     Identifies and returns the appropriate OpenML API connector for a given estimator.
 
-    This function iterates through the global ``API_CONNECTOR_REGISTRY`` to find 
-    a connector class that supports the provided estimator object. If exactly one 
+    This function iterates through the global ``API_CONNECTOR_REGISTRY`` to find
+    a connector class that supports the provided estimator object. If exactly one
     matching connector is found, it is instantiated and returned.
 
     Parameters
     ----------
     estimator : Any
-        The estimator object (e.g., a scikit-learn estimator) for which an API 
+        The estimator object (e.g., a scikit-learn estimator) for which an API
         connector is required.
 
     Returns
@@ -33,25 +37,21 @@ def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
     Raises
     ------
     OpenMLException
-        If no connector is found in the registry that supports the provided 
-        model, or if multiple connectors in the registry claim support for 
+        If no connector is found in the registry that supports the provided
+        model, or if multiple connectors in the registry claim support for
         the provided model.
     """
     candidates = [
-        connector for connector in API_CONNECTOR_REGISTRY
-        if connector.supports(estimator)
+        connector for connector in API_CONNECTOR_REGISTRY if connector.supports(estimator)
     ]
 
     if not candidates:
-        raise OpenMLException(
-            "No OpenML API connector found for this estimator."
-        )
+        raise OpenMLException("No OpenML API connector found for this estimator.")
 
     if len(candidates) > 1:
         names = [c.__name__ for c in candidates]
         raise OpenMLException(
-            "Multiple API connectors match this estimator:\n"
-            + "\n".join(f"- {n}" for n in names)
+            "Multiple API connectors match this estimator:\n" + "\n".join(f"- {n}" for n in names)
         )
 
     return candidates[0]()
diff --git a/openml/extensions/serialization/__init__.py b/openml/extensions/serialization/__init__.py
index e8bf5d200..f21df84bd 100644
--- a/openml/extensions/serialization/__init__.py
+++ b/openml/extensions/serialization/__init__.py
@@ -2,4 +2,4 @@
 
 """Base classes for estimator serializors."""
 
-from openml.extensions.serialization.base import ModelSerializer
\ No newline at end of file
+from openml.extensions.serialization.base import ModelSerializer
diff --git a/openml/extensions/serialization/base.py b/openml/extensions/serialization/base.py
index 2e275af2e..96026fc56 100644
--- a/openml/extensions/serialization/base.py
+++ b/openml/extensions/serialization/base.py
@@ -2,11 +2,14 @@
 
 """Base class for estimator serializors."""
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
-from openml.flows import OpenMLFlow
-from openml.runs.trace import OpenMLTraceIteration
+if TYPE_CHECKING:
+    from openml.flows import OpenMLFlow
+    from openml.runs.trace import OpenMLTraceIteration
 
 
 class ModelSerializer(ABC):
@@ -121,4 +124,4 @@ def instantiate_model_from_hpo_class(
         -------
         Any
         """
-        # TODO a trace belongs to a run and therefore a flow -> simplify this part of the interface!
\ No newline at end of file
+        # TODO a trace belongs to a run and therefore a flow -> simplify this part of the interface!

From 1365bf62701d274f50cc298b8ce297efe35cb3e8 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Fri, 2 Jan 2026 14:32:07 +0530
Subject: [PATCH 03/15] correct openml exception

---
 openml/extensions/registry.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openml/extensions/registry.py b/openml/extensions/registry.py
index 0bdd9cd2f..4cb1a2d84 100644
--- a/openml/extensions/registry.py
+++ b/openml/extensions/registry.py
@@ -5,7 +5,7 @@
 
 from openml_sklearn import SklearnExtension
 
-from openml.exceptions import OpenMLException
+from openml.exceptions import PyOpenMLError
 
 if TYPE_CHECKING:
     from openml.extensions.connectors import OpenMLAPIConnector
@@ -46,11 +46,11 @@ def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
     ]
 
     if not candidates:
-        raise OpenMLException("No OpenML API connector found for this estimator.")
+        raise PyOpenMLError("No OpenML API connector found for this estimator.")
 
     if len(candidates) > 1:
         names = [c.__name__ for c in candidates]
-        raise OpenMLException(
+        raise PyOpenMLError(
             "Multiple API connectors match this estimator:\n" + "\n".join(f"- {n}" for n in names)
         )
 

From 67c0efb92a1b615f0e511ad4bbbb64c76b6d59d3 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Fri, 2 Jan 2026 14:51:42 +0530
Subject: [PATCH 04/15] use __all__ for imports in __init__

---
 openml/extensions/connectors/__init__.py    | 2 ++
 openml/extensions/execution/__init__.py     | 2 ++
 openml/extensions/serialization/__init__.py | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/openml/extensions/connectors/__init__.py b/openml/extensions/connectors/__init__.py
index d9ebb7fd8..31d358d70 100644
--- a/openml/extensions/connectors/__init__.py
+++ b/openml/extensions/connectors/__init__.py
@@ -3,3 +3,5 @@
 """Base classes for OpenML API connectors."""
 
 from openml.extensions.connectors.base import OpenMLAPIConnector
+
+__all__ = ["OpenMLAPIConnector"]
diff --git a/openml/extensions/execution/__init__.py b/openml/extensions/execution/__init__.py
index 245a16db4..0939998e8 100644
--- a/openml/extensions/execution/__init__.py
+++ b/openml/extensions/execution/__init__.py
@@ -3,3 +3,5 @@
 """Base class for estimator executors."""
 
 from openml.extensions.execution.base import ModelExecutor
+
+__all__ = ["ModelExecutor"]
diff --git a/openml/extensions/serialization/__init__.py b/openml/extensions/serialization/__init__.py
index f21df84bd..df618b60f 100644
--- a/openml/extensions/serialization/__init__.py
+++ b/openml/extensions/serialization/__init__.py
@@ -3,3 +3,5 @@
 """Base classes for estimator serializors."""
 
 from openml.extensions.serialization.base import ModelSerializer
+
+__all__ = ["ModelSerializer"]

From e5850efc932e3f550b31270c31c3bec3dd5a36bb Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Fri, 2 Jan 2026 16:08:00 +0530
Subject: [PATCH 05/15] update registry

---
 openml/extensions/connectors/base.py |  3 +++
 openml/extensions/registry.py        | 22 ++++++++--------------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/openml/extensions/connectors/base.py b/openml/extensions/connectors/base.py
index df753c04e..398c3c656 100644
--- a/openml/extensions/connectors/base.py
+++ b/openml/extensions/connectors/base.py
@@ -1,4 +1,7 @@
 # License: BSD 3-Clause
+
+"""Base class for OpenML API connectors."""
+
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
diff --git a/openml/extensions/registry.py b/openml/extensions/registry.py
index 4cb1a2d84..23d220898 100644
--- a/openml/extensions/registry.py
+++ b/openml/extensions/registry.py
@@ -1,4 +1,7 @@
 # License: BSD 3-Clause
+
+"""Extension registry."""
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any
@@ -17,7 +20,7 @@
 
 def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
     """
-    Identifies and returns the appropriate OpenML API connector for a given estimator.
+    Identify and return the appropriate OpenML API connector for a given estimator.
 
     This function iterates through the global ``API_CONNECTOR_REGISTRY`` to find
     a connector class that supports the provided estimator object. If exactly one
@@ -41,17 +44,8 @@ def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
         model, or if multiple connectors in the registry claim support for
         the provided model.
     """
-    candidates = [
-        connector for connector in API_CONNECTOR_REGISTRY if connector.supports(estimator)
-    ]
-
-    if not candidates:
-        raise PyOpenMLError("No OpenML API connector found for this estimator.")
-
-    if len(candidates) > 1:
-        names = [c.__name__ for c in candidates]
-        raise PyOpenMLError(
-            "Multiple API connectors match this estimator:\n" + "\n".join(f"- {n}" for n in names)
-        )
+    for connector_cls in API_CONNECTOR_REGISTRY:
+        if connector_cls.supports(estimator):
+            return connector_cls()
 
-    return candidates[0]()
+    raise PyOpenMLError("No OpenML API connector supports this estimator.")

From 00da7a9691add4f4c3ceaa099573de61779c8fab Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 12:17:56 +0530
Subject: [PATCH 06/15] update registry and file structure

---
 openml/extensions/base/__init__.py            |  13 ++
 .../base.py => base/_connector.py}            |   7 +-
 .../{execution/base.py => base/_executor.py}  |  72 +++++++---
 openml/extensions/base/_serializer.py         |  73 ++++++++++
 openml/extensions/connectors/__init__.py      |   7 -
 openml/extensions/execution/__init__.py       |   7 -
 openml/extensions/registry.py                 |  15 +--
 openml/extensions/serialization/__init__.py   |   7 -
 openml/extensions/serialization/base.py       | 127 ------------------
 9 files changed, 151 insertions(+), 177 deletions(-)
 create mode 100644 openml/extensions/base/__init__.py
 rename openml/extensions/{connectors/base.py => base/_connector.py} (69%)
 rename openml/extensions/{execution/base.py => base/_executor.py} (64%)
 create mode 100644 openml/extensions/base/_serializer.py
 delete mode 100644 openml/extensions/connectors/__init__.py
 delete mode 100644 openml/extensions/execution/__init__.py
 delete mode 100644 openml/extensions/serialization/__init__.py
 delete mode 100644 openml/extensions/serialization/base.py

diff --git a/openml/extensions/base/__init__.py b/openml/extensions/base/__init__.py
new file mode 100644
index 000000000..d85c0b268
--- /dev/null
+++ b/openml/extensions/base/__init__.py
@@ -0,0 +1,13 @@
+# License: BSD 3-Clause
+
+"""Base classes for OpenML extensions."""
+
+from openml.extensions.base._connector import OpenMLAPIConnector
+from openml.extensions.base._executor import ModelExecutor
+from openml.extensions.base._serializer import ModelSerializer
+
+__all__ = [
+    "ModelExecutor",
+    "ModelSerializer",
+    "OpenMLAPIConnector",
+]
diff --git a/openml/extensions/connectors/base.py b/openml/extensions/base/_connector.py
similarity index 69%
rename from openml/extensions/connectors/base.py
rename to openml/extensions/base/_connector.py
index 398c3c656..9ad66307a 100644
--- a/openml/extensions/connectors/base.py
+++ b/openml/extensions/base/_connector.py
@@ -8,8 +8,7 @@
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from openml.extensions.execution import ModelExecutor
-    from openml.extensions.serialization import ModelSerializer
+    from openml.extensions.base import ModelExecutor, ModelSerializer
 
 
 class OpenMLAPIConnector(ABC):
@@ -25,5 +24,5 @@ def executor(self) -> ModelExecutor:
 
     @classmethod
     @abstractmethod
-    def supports(cls, model: Any) -> bool:
-        """High-level check if this connector supports the model."""
+    def supports(cls, estimator: Any) -> bool:
+        """High-level check if this connector supports the estimator instance or flow."""
diff --git a/openml/extensions/execution/base.py b/openml/extensions/base/_executor.py
similarity index 64%
rename from openml/extensions/execution/base.py
rename to openml/extensions/base/_executor.py
index 018fe2964..67184a3b3 100644
--- a/openml/extensions/execution/base.py
+++ b/openml/extensions/base/_executor.py
@@ -12,29 +12,14 @@
     import numpy as np
     import scipy.sparse
 
-    from openml.runs.trace import OpenMLRunTrace
+    from openml.flows import OpenMLFlow
+    from openml.runs.trace import OpenMLRunTrace, OpenMLTraceIteration
     from openml.tasks.task import OpenMLTask
 
 
 class ModelExecutor(ABC):
     """Define runtime execution semantics for a specific API type."""
 
-    @classmethod
-    @abstractmethod
-    def can_handle_model(cls, model: Any) -> bool:
-        """Check whether a model flow can be handled by this extension.
-
-        This is typically done by checking the type of the model, or the package it belongs to.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-
     @abstractmethod
     def seed_model(self, model: Any, seed: int | None) -> Any:
         """Set the seed of all the unseeded components of a model and return the seeded model.
@@ -111,3 +96,56 @@ def check_if_model_fitted(self, model: Any) -> bool:
         -------
         bool
         """
+
+    @abstractmethod
+    def obtain_parameter_values(
+        self,
+        flow: OpenMLFlow,
+        model: Any = None,
+    ) -> list[dict[str, Any]]:
+        """Extracts all parameter settings required for the flow from the model.
+
+        If no explicit model is provided, the parameters will be extracted from `flow.model`
+        instead.
+
+        Parameters
+        ----------
+        flow : OpenMLFlow
+            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
+
+        model: Any, optional (default=None)
+            The model from which to obtain the parameter values. Must match the flow signature.
+            If None, use the model specified in ``OpenMLFlow.model``.
+
+        Returns
+        -------
+        list
+            A list of dicts, where each dict has the following entries:
+            - ``oml:name`` : str: The OpenML parameter name
+            - ``oml:value`` : mixed: A representation of the parameter value
+            - ``oml:component`` : int: flow id to which the parameter belongs
+        """
+
+    # Abstract methods for hyperparameter optimization
+
+    @abstractmethod
+    def instantiate_model_from_hpo_class(
+        self,
+        model: Any,
+        trace_iteration: OpenMLTraceIteration,
+    ) -> Any:
+        """Instantiate a base model which can be searched over by the hyperparameter optimization
+        model.
+
+        Parameters
+        ----------
+        model : Any
+            A hyperparameter optimization model which defines the model to be instantiated.
+        trace_iteration : OpenMLTraceIteration
+            Describing the hyperparameter settings to instantiate.
+
+        Returns
+        -------
+        Any
+        """
+        # TODO a trace belongs to a run and therefore a flow -> simplify this part of the interface!
diff --git a/openml/extensions/base/_serializer.py b/openml/extensions/base/_serializer.py
new file mode 100644
index 000000000..f2673d4c4
--- /dev/null
+++ b/openml/extensions/base/_serializer.py
@@ -0,0 +1,73 @@
+# License: BSD 3-Clause
+
+"""Base class for estimator serializors."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from openml.flows import OpenMLFlow
+
+
+class ModelSerializer(ABC):
+    """Handle the conversion between estimator instances and OpenML Flows."""
+
+    @classmethod
+    @abstractmethod
+    def can_handle_model(cls, model: Any) -> bool:
+        """Check whether a model flow can be handled by this extension.
+
+        This is typically done by checking the type of the model, or the package it belongs to.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+
+    @abstractmethod
+    def model_to_flow(self, model: Any) -> OpenMLFlow:
+        """Transform a model to a flow for uploading it to OpenML.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        OpenMLFlow
+        """
+
+    @abstractmethod
+    def flow_to_model(
+        self,
+        flow: OpenMLFlow,
+        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
+        strict_version: bool = True,  # noqa: FBT002, FBT001
+    ) -> Any:
+        """Instantiate a model from the flow representation.
+
+        Parameters
+        ----------
+        flow : OpenMLFlow
+
+        initialize_with_defaults : bool, optional (default=False)
+            If this flag is set, the hyperparameter values of flows will be
+            ignored and a flow with its defaults is returned.
+
+        strict_version : bool, default=True
+            Whether to fail if version requirements are not fulfilled.
+
+        Returns
+        -------
+        Any
+        """
+
+    @abstractmethod
+    def get_version_information(self) -> list[str]:
+        """Return dependency and version information."""
diff --git a/openml/extensions/connectors/__init__.py b/openml/extensions/connectors/__init__.py
deleted file mode 100644
index 31d358d70..000000000
--- a/openml/extensions/connectors/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# License: BSD 3-Clause
-
-"""Base classes for OpenML API connectors."""
-
-from openml.extensions.connectors.base import OpenMLAPIConnector
-
-__all__ = ["OpenMLAPIConnector"]
diff --git a/openml/extensions/execution/__init__.py b/openml/extensions/execution/__init__.py
deleted file mode 100644
index 0939998e8..000000000
--- a/openml/extensions/execution/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# License: BSD 3-Clause
-
-"""Base class for estimator executors."""
-
-from openml.extensions.execution.base import ModelExecutor
-
-__all__ = ["ModelExecutor"]
diff --git a/openml/extensions/registry.py b/openml/extensions/registry.py
index 23d220898..e3bc0788e 100644
--- a/openml/extensions/registry.py
+++ b/openml/extensions/registry.py
@@ -6,15 +6,14 @@
 
 from typing import TYPE_CHECKING, Any
 
-from openml_sklearn import SklearnExtension
-
 from openml.exceptions import PyOpenMLError
+from openml.extensions.sklearn import SklearnAPIConnector
 
 if TYPE_CHECKING:
-    from openml.extensions.connectors import OpenMLAPIConnector
+    from openml.extensions.base import OpenMLAPIConnector
 
 API_CONNECTOR_REGISTRY: list[type[OpenMLAPIConnector]] = [
-    SklearnExtension,  # TODO: I need to refactor SklearnExtension
+    SklearnAPIConnector,
 ]
 
 
@@ -23,14 +22,14 @@ def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector:
     Identify and return the appropriate OpenML API connector for a given estimator.
 
     This function iterates through the global ``API_CONNECTOR_REGISTRY`` to find
-    a connector class that supports the provided estimator object. If exactly one
-    matching connector is found, it is instantiated and returned.
+    a connector class that supports the provided estimator instance or OpenML flow.
+    If a matching connector is found, it is instantiated and returned.
 
     Parameters
     ----------
     estimator : Any
-        The estimator object (e.g., a scikit-learn estimator) for which an API
-        connector is required.
+        The estimator instance (e.g., a scikit-learn estimator) or OpenML flow for
+        which an API connector is required.
 
     Returns
     -------
diff --git a/openml/extensions/serialization/__init__.py b/openml/extensions/serialization/__init__.py
deleted file mode 100644
index df618b60f..000000000
--- a/openml/extensions/serialization/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# License: BSD 3-Clause
-
-"""Base classes for estimator serializors."""
-
-from openml.extensions.serialization.base import ModelSerializer
-
-__all__ = ["ModelSerializer"]
diff --git a/openml/extensions/serialization/base.py b/openml/extensions/serialization/base.py
deleted file mode 100644
index 96026fc56..000000000
--- a/openml/extensions/serialization/base.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# License: BSD 3-Clause
-
-"""Base class for estimator serializors."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from openml.flows import OpenMLFlow
-    from openml.runs.trace import OpenMLTraceIteration
-
-
-class ModelSerializer(ABC):
-    """Handle the conversion between estimator instances and OpenML Flows."""
-
-    @classmethod
-    @abstractmethod
-    def can_handle_model(cls, model: Any) -> bool:
-        """Check whether a model flow can be handled by this extension.
-
-        This is typically done by checking the type of the model, or the package it belongs to.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-
-    @abstractmethod
-    def model_to_flow(self, model: Any) -> OpenMLFlow:
-        """Transform a model to a flow for uploading it to OpenML.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        OpenMLFlow
-        """
-
-    @abstractmethod
-    def flow_to_model(
-        self,
-        flow: OpenMLFlow,
-        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        """Instantiate a model from the flow representation.
-
-        Parameters
-        ----------
-        flow : OpenMLFlow
-
-        initialize_with_defaults : bool, optional (default=False)
-            If this flag is set, the hyperparameter values of flows will be
-            ignored and a flow with its defaults is returned.
-
-        strict_version : bool, default=True
-            Whether to fail if version requirements are not fulfilled.
-
-        Returns
-        -------
-        Any
-        """
-
-    @abstractmethod
-    def obtain_parameter_values(
-        self,
-        flow: OpenMLFlow,
-        model: Any = None,
-    ) -> list[dict[str, Any]]:
-        """Extracts all parameter settings required for the flow from the model.
-
-        If no explicit model is provided, the parameters will be extracted from `flow.model`
-        instead.
-
-        Parameters
-        ----------
-        flow : OpenMLFlow
-            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
-
-        model: Any, optional (default=None)
-            The model from which to obtain the parameter values. Must match the flow signature.
-            If None, use the model specified in ``OpenMLFlow.model``.
-
-        Returns
-        -------
-        list
-            A list of dicts, where each dict has the following entries:
-            - ``oml:name`` : str: The OpenML parameter name
-            - ``oml:value`` : mixed: A representation of the parameter value
-            - ``oml:component`` : int: flow id to which the parameter belongs
-        """
-
-    @abstractmethod
-    def get_version_information(self) -> list[str]:
-        """Return dependency and version information."""
-
-    # Abstract methods for hyperparameter optimization
-
-    @abstractmethod
-    def instantiate_model_from_hpo_class(
-        self,
-        model: Any,
-        trace_iteration: OpenMLTraceIteration,
-    ) -> Any:
-        """Instantiate a base model which can be searched over by the hyperparameter optimization
-        model.
-
-        Parameters
-        ----------
-        model : Any
-            A hyperparameter optimization model which defines the model to be instantiated.
-        trace_iteration : OpenMLTraceIteration
-            Describing the hyperparameter settings to instantiate.
-
-        Returns
-        -------
-        Any
-        """
-        # TODO a trace belongs to a run and therefore a flow -> simplify this part of the interface!

From 373fa53697e00ecacfdb390f1d326a9a95f468df Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 12:30:32 +0530
Subject: [PATCH 07/15] [DO NOT MERGE] Refactor openml-sklearn back into
 openml-python

---
 openml/extensions/sklearn/__init__.py   |   13 +
 openml/extensions/sklearn/connector.py  |   68 +
 openml/extensions/sklearn/executor.py   |  784 ++++++++++++
 openml/extensions/sklearn/serializer.py | 1508 +++++++++++++++++++++++
 4 files changed, 2373 insertions(+)
 create mode 100644 openml/extensions/sklearn/__init__.py
 create mode 100644 openml/extensions/sklearn/connector.py
 create mode 100644 openml/extensions/sklearn/executor.py
 create mode 100644 openml/extensions/sklearn/serializer.py

diff --git a/openml/extensions/sklearn/__init__.py b/openml/extensions/sklearn/__init__.py
new file mode 100644
index 000000000..4e22407be
--- /dev/null
+++ b/openml/extensions/sklearn/__init__.py
@@ -0,0 +1,13 @@
+# License: BSD 3-Clause
+
+"""OpenML extension for Scikit-learn."""
+
+from openml.extensions.sklearn.connector import SklearnAPIConnector
+from openml.extensions.sklearn.executor import SklearnExecutor
+from openml.extensions.sklearn.serializer import SklearnSerializer
+
+__all__ = [
+    "SklearnAPIConnector",
+    "SklearnExecutor",
+    "SklearnSerializer",
+]
diff --git a/openml/extensions/sklearn/connector.py b/openml/extensions/sklearn/connector.py
new file mode 100644
index 000000000..795baf640
--- /dev/null
+++ b/openml/extensions/sklearn/connector.py
@@ -0,0 +1,68 @@
+# License: BSD 3-Clause
+
+"""Connector for the Scikit-learn extension."""
+
+from __future__ import annotations
+
+from openml.extensions.base import OpenMLAPIConnector
+from openml.extensions.sklearn.executor import SklearnExecutor
+from openml.extensions.sklearn.serializer import SklearnSerializer
+from openml.flows import OpenMLFlow
+
+
+class SklearnAPIConnector(OpenMLAPIConnector):
+    """
+    Connector for the Scikit-learn extension.
+
+    This class provides the interface to connect Scikit-learn models and flows
+    to the OpenML API, handling both serialization and execution compatibility checks.
+    """
+
+    def serializer(self) -> SklearnSerializer:
+        """
+        Return the serializer for Scikit-learn estimators.
+
+        Returns
+        -------
+        SklearnSerializer
+            The serializer instance capable of handling Scikit-learn estimator.
+        """
+        return SklearnSerializer()
+
+    def executor(self) -> SklearnExecutor:
+        """
+        Return the executor for Scikit-learn estimators.
+
+        Returns
+        -------
+        SklearnExecutor
+            The executor instance capable of running Scikit-learn estimators.
+        """
+        return SklearnExecutor()
+
+    @classmethod
+    def supports(cls, estimator) -> bool:
+        """
+        Check if this connector supports the given model or flow.
+
+        Parameters
+        ----------
+        estimator : Any or OpenMLFlow
+            The Scikit-learn estimator instance or OpenMLFlow object.
+
+        Returns
+        -------
+        bool
+            True if both the serializer and executor can handle the provided
+            estimator or flow, False otherwise.
+        """
+        serializer = SklearnSerializer()
+        SklearnExecutor()
+
+        if isinstance(estimator, OpenMLFlow):
+            support = serializer.can_handle_flow(estimator)
+
+        else:
+            support = serializer.can_handle_model(estimator)
+
+        return support
diff --git a/openml/extensions/sklearn/executor.py b/openml/extensions/sklearn/executor.py
new file mode 100644
index 000000000..69d54a102
--- /dev/null
+++ b/openml/extensions/sklearn/executor.py
@@ -0,0 +1,784 @@
+# License: BSD 3-Clause
+from __future__ import annotations
+
+import json
+import logging
+import time
+import warnings
+from collections import OrderedDict
+from typing import TYPE_CHECKING, Any, List, cast
+
+import numpy as np
+import pandas as pd
+import sklearn.base
+import sklearn.model_selection
+import sklearn.pipeline
+
+import openml
+from openml.exceptions import PyOpenMLError
+from openml.extensions.base import ModelExecutor
+from openml.flows import OpenMLFlow
+from openml.runs.trace import PREFIX, OpenMLRunTrace, OpenMLTraceIteration
+from openml.tasks import (
+    OpenMLClassificationTask,
+    OpenMLClusteringTask,
+    OpenMLLearningCurveTask,
+    OpenMLRegressionTask,
+    OpenMLSupervisedTask,
+    OpenMLTask,
+)
+
+if TYPE_CHECKING:
+    import scipy.sparse
+
+logger = logging.getLogger(__name__)
+
+SKLEARN_PIPELINE_STRING_COMPONENTS = ("drop", "passthrough")
+COMPONENT_REFERENCE = "component_reference"
+COMPOSITION_STEP_CONSTANT = "composition_step_constant"
+
+
+class SklearnExecutor(ModelExecutor):
+    """Executor for Scikit-learn estimators."""
+
+    def seed_model(self, model: Any, seed: int | None = None) -> Any:  # noqa: C901
+        """Set the random state of all the unseeded components of a model and return the seeded
+        model.
+
+        Required so that all seed information can be uploaded to OpenML for reproducible results.
+
+        Models that are already seeded will maintain the seed. In this case,
+        only integer seeds are allowed (An exception is raised when a RandomState was used as
+        seed).
+
+        Parameters
+        ----------
+        model : sklearn model
+            The model to be seeded
+        seed : int
+            The seed to initialize the RandomState with. Unseeded subcomponents
+            will be seeded with a random number from the RandomState.
+
+        Returns
+        -------
+        Any
+        """
+
+        def _seed_current_object(current_value):
+            if isinstance(current_value, int):  # acceptable behaviour
+                return False
+
+            if isinstance(current_value, np.random.RandomState):
+                raise ValueError(
+                    "Models initialized with a RandomState object are not "
+                    "supported. Please seed with an integer. ",
+                )
+
+            if current_value is not None:
+                raise ValueError(
+                    "Models should be seeded with int or None (this should never happen). ",
+                )
+
+            return True
+
+        rs = np.random.RandomState(seed)
+        model_params = model.get_params()
+        random_states = {}
+        for param_name in sorted(model_params):
+            if "random_state" in param_name:
+                current_value = model_params[param_name]
+                # important to draw the value at this point (and not in the if
+                # statement) this way we guarantee that if a different set of
+                # subflows is seeded, the same number of the random generator is
+                # used
+                new_value = rs.randint(0, 2**16)
+                if _seed_current_object(current_value):
+                    random_states[param_name] = new_value
+
+            # Also seed CV objects!
+            elif isinstance(model_params[param_name], sklearn.model_selection.BaseCrossValidator):
+                if not hasattr(model_params[param_name], "random_state"):
+                    continue
+
+                current_value = model_params[param_name].random_state
+                new_value = rs.randint(0, 2**16)
+                if _seed_current_object(current_value):
+                    model_params[param_name].random_state = new_value
+
+        model.set_params(**random_states)
+        return model
+
+    def check_if_model_fitted(self, model: Any) -> bool:
+        """Returns True/False denoting if the model has already been fitted/trained
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+        from sklearn.exceptions import NotFittedError
+        from sklearn.utils.validation import check_is_fitted
+
+        try:
+            # check if model is fitted
+            check_is_fitted(model)
+
+            # Creating random dummy data of arbitrary size
+            dummy_data = np.random.uniform(size=(10, 3))  # noqa: NPY002
+            # Using 'predict' instead of 'sklearn.utils.validation.check_is_fitted' for a more
+            # robust check that works across sklearn versions and models. Internally, 'predict'
+            # should call 'check_is_fitted' for every concerned attribute, thus offering a more
+            # assured check than explicit calls to 'check_is_fitted'
+            model.predict(dummy_data)
+            # Will reach here if the model was fit on a dataset with 3 features
+            return True
+        except NotFittedError:  # needs to be the first exception to be caught
+            # Model is not fitted, as is required
+            return False
+        except ValueError:
+            # Will reach here if the model was fit on a dataset with more or less than 3 features
+            return True
+
+    def _run_model_on_fold(  # noqa: PLR0915, PLR0913, C901, PLR0912
+        self,
+        model: Any,
+        task: OpenMLTask,
+        X_train: np.ndarray | scipy.sparse.spmatrix | pd.DataFrame,
+        rep_no: int,
+        fold_no: int,
+        y_train: np.ndarray | None = None,
+        X_test: np.ndarray | scipy.sparse.spmatrix | pd.DataFrame | None = None,
+    ) -> tuple[
+        np.ndarray,
+        pd.DataFrame | None,
+        OrderedDict[str, float],
+        OpenMLRunTrace | None,
+    ]:
+        """Run a model on a repeat,fold,subsample triplet of the task and return prediction
+        information.
+
+        Furthermore, it will measure run time measures in case multi-core behaviour allows this.
+        * exact user cpu time will be measured if the number of cores is set (recursive throughout
+        the model) exactly to 1
+        * wall clock time will be measured if the number of cores is set (recursive throughout the
+        model) to any given number (but not when it is set to -1)
+
+        Returns the data that is necessary to construct the OpenML Run object. Is used by
+        run_task_get_arff_content. Do not use this function unless you know what you are doing.
+
+        Parameters
+        ----------
+        model : Any
+            The UNTRAINED model to run. The model instance will be copied and not altered.
+        task : OpenMLTask
+            The task to run the model on.
+        X_train : array-like
+            Training data for the given repetition and fold.
+        rep_no : int
+            The repeat of the experiment (0-based; in case of 1 time CV, always 0)
+        fold_no : int
+            The fold nr of the experiment (0-based; in case of holdout, always 0)
+        y_train : Optional[np.ndarray] (default=None)
+            Target attributes for supervised tasks. In case of classification, these are integer
+            indices to the potential classes specified by dataset.
+        X_test : Optional, array-like (default=None)
+            Test attributes to test for generalization in supervised tasks.
+
+        Returns
+        -------
+        pred_y : np.ndarray
+            Predictions on the training/test set, depending on the task type.
+            For supervised tasks, predictions are on the test set.
+            For unsupervised tasks, predictions are on the training set.
+        proba_y : pd.DataFrame, optional
+            Predicted probabilities for the test set.
+            None, if task is not Classification or Learning Curve prediction.
+        user_defined_measures : OrderedDict[str, float]
+            User defined measures that were generated on this fold
+        trace : OpenMLRunTrace, optional
+            arff trace object from a fitted model and the trace content obtained by
+            repeatedly calling ``run_model_on_task``
+        """
+
+        def _prediction_to_probabilities(
+            y: np.ndarray | list,
+            model_classes: list[Any],
+            class_labels: list[str] | None,
+        ) -> pd.DataFrame:
+            """Transforms predicted probabilities to match with OpenML class indices.
+
+            Parameters
+            ----------
+            y : np.ndarray
+                Predicted probabilities (possibly omitting classes if they were not present in the
+                training data).
+            model_classes : list
+                List of classes known_predicted by the model, ordered by their index.
+            class_labels : list
+                List of classes as stored in the task object fetched from server.
+
+            Returns
+            -------
+            pd.DataFrame
+            """
+            if class_labels is None:
+                raise ValueError("The task has no class labels")
+
+            if isinstance(y_train, np.ndarray) and isinstance(class_labels[0], str):
+                # mapping (decoding) the predictions to the categories
+                # creating a separate copy to not change the expected pred_y type
+                y = [class_labels[pred] for pred in y]  # list or numpy array of predictions
+
+            # model_classes: sklearn classifier mapping from original array id to
+            # prediction index id
+            if not isinstance(model_classes, list):
+                raise ValueError("please convert model classes to list prior to calling this fn")
+
+            # DataFrame allows more accurate mapping of classes as column names
+            result = pd.DataFrame(
+                0,
+                index=np.arange(len(y)),
+                columns=model_classes,
+                dtype=np.float32,
+            )
+            for obs, prediction in enumerate(y):
+                result.loc[obs, prediction] = 1.0
+            return result
+
+        if isinstance(task, OpenMLSupervisedTask):
+            if y_train is None:
+                raise TypeError("argument y_train must not be of type None")
+            if X_test is None:
+                raise TypeError("argument X_test must not be of type None")
+
+        model_copy = sklearn.base.clone(model, safe=True)
+        # sanity check: prohibit users from optimizing n_jobs
+        self._prevent_optimize_n_jobs(model_copy)
+        # measures and stores runtimes
+        user_defined_measures = OrderedDict()  # type: 'OrderedDict[str, float]'
+        try:
+            # for measuring runtime. Only available since Python 3.3
+            modelfit_start_cputime = time.process_time()
+            modelfit_start_walltime = time.time()
+
+            if isinstance(task, OpenMLSupervisedTask):
+                model_copy.fit(X_train, y_train)  # type: ignore
+            elif isinstance(task, OpenMLClusteringTask):
+                model_copy.fit(X_train)  # type: ignore
+
+            modelfit_dur_cputime = (time.process_time() - modelfit_start_cputime) * 1000
+            modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
+
+            user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime
+            refit_time = model_copy.refit_time_ * 1000 if hasattr(model_copy, "refit_time_") else 0  # type: ignore
+            user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime
+
+        except AttributeError as e:
+            # typically happens when training a regressor on classification task
+            raise PyOpenMLError(str(e)) from e
+
+        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
+            # search for model classes_ (might differ depending on modeltype)
+            # first, pipelines are a special case (these don't have a classes_
+            # object, but rather borrows it from the last step. We do this manually,
+            # because of the BaseSearch check)
+            if isinstance(model_copy, sklearn.pipeline.Pipeline):
+                used_estimator = model_copy.steps[-1][-1]
+            else:
+                used_estimator = model_copy
+
+            if self._is_hpo_class(used_estimator):
+                model_classes = used_estimator.best_estimator_.classes_
+            else:
+                model_classes = used_estimator.classes_
+
+            if not isinstance(model_classes, list):
+                model_classes = model_classes.tolist()
+
+            # to handle the case when dataset is numpy and categories are encoded
+            # however the class labels stored in task are still categories
+            if isinstance(y_train, np.ndarray) and isinstance(
+                cast("List", task.class_labels)[0],
+                str,
+            ):
+                model_classes = [cast("List[str]", task.class_labels)[i] for i in model_classes]
+
+        modelpredict_start_cputime = time.process_time()
+        modelpredict_start_walltime = time.time()
+
+        # In supervised learning this returns the predictions for Y, in clustering
+        # it returns the clusters
+        if isinstance(task, OpenMLSupervisedTask):
+            pred_y = model_copy.predict(X_test)
+        elif isinstance(task, OpenMLClusteringTask):
+            pred_y = model_copy.predict(X_train)
+        else:
+            raise ValueError(task)
+
+        modelpredict_duration_cputime = (time.process_time() - modelpredict_start_cputime) * 1000
+        user_defined_measures["usercpu_time_millis_testing"] = modelpredict_duration_cputime
+        user_defined_measures["usercpu_time_millis"] = (
+            modelfit_dur_cputime + modelpredict_duration_cputime
+        )
+        modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000
+        user_defined_measures["wall_clock_time_millis_testing"] = modelpredict_duration_walltime
+        user_defined_measures["wall_clock_time_millis"] = (
+            modelfit_dur_walltime + modelpredict_duration_walltime + refit_time
+        )
+
+        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
+            try:
+                proba_y = model_copy.predict_proba(X_test)
+                proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy
+            except AttributeError:  # predict_proba is not available when probability=False
+                proba_y = _prediction_to_probabilities(pred_y, model_classes, task.class_labels)
+
+            if task.class_labels is not None:
+                if proba_y.shape[1] != len(task.class_labels):
+                    # Remap the probabilities in case there was a class missing
+                    # at training time. By default, the classification targets
+                    # are mapped to be zero-based indices to the actual classes.
+                    # Therefore, the model_classes contain the correct indices to
+                    # the correct probability array. Example:
+                    # classes in the dataset: 0, 1, 2, 3, 4, 5
+                    # classes in the training set: 0, 1, 2, 4, 5
+                    # then we need to add a column full of zeros into the probabilities
+                    # for class 3 because the rest of the library expects that the
+                    # probabilities are ordered the same way as the classes are ordered).
+                    message = (
+                        f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)}"
+                        " classes!"
+                    )
+                    warnings.warn(message, stacklevel=2)
+                    openml.config.logger.warning(message)
+
+                    for _i, col in enumerate(task.class_labels):
+                        # adding missing columns with 0 probability
+                        if col not in model_classes:
+                            proba_y[col] = 0
+                    # We re-order the columns to move possibly added missing columns into place.
+                    proba_y = proba_y[task.class_labels]
+            else:
+                raise ValueError("The task has no class labels")
+
+            if not np.all(set(proba_y.columns) == set(task.class_labels)):
+                missing_cols = list(set(task.class_labels) - set(proba_y.columns))
+                raise ValueError("Predicted probabilities missing for the columns: ", missing_cols)
+
+        elif isinstance(task, (OpenMLRegressionTask, OpenMLClusteringTask)):
+            proba_y = None
+        else:
+            raise TypeError(type(task))
+
+        if self._is_hpo_class(model_copy):
+            trace_data = self._extract_trace_data(model_copy, rep_no, fold_no)
+            trace: OpenMLRunTrace | None = self._obtain_arff_trace(
+                model_copy,
+                trace_data,
+            )
+        else:
+            trace = None
+
+        return pred_y, proba_y, user_defined_measures, trace
+
+    def obtain_parameter_values(  # noqa: C901, PLR0915
+        self,
+        flow: OpenMLFlow,
+        model: Any = None,
+    ) -> list[dict[str, Any]]:
+        """Extracts all parameter settings required for the flow from the model.
+
+        If no explicit model is provided, the parameters will be extracted from `flow.model`
+        instead.
+
+        Parameters
+        ----------
+        flow : OpenMLFlow
+            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
+
+        model: Any, optional (default=None)
+            The model from which to obtain the parameter values. Must match the flow signature.
+            If None, use the model specified in ``OpenMLFlow.model``.
+
+        Returns
+        -------
+        list
+            A list of dicts, where each dict has the following entries:
+            - ``oml:name`` : str: The OpenML parameter name
+            - ``oml:value`` : mixed: A representation of the parameter value
+            - ``oml:component`` : int: flow id to which the parameter belongs
+        """
+        openml.flows.functions._check_flow_for_server_id(flow)
+
+        def get_flow_dict(_flow):
+            flow_map = {_flow.name: _flow.flow_id}
+            for subflow in _flow.components:
+                flow_map.update(get_flow_dict(_flow.components[subflow]))
+            return flow_map
+
+        def extract_parameters(  # noqa: PLR0915, PLR0912, C901
+            _flow,
+            _flow_dict,
+            component_model,
+            _main_call=False,  # noqa: FBT002
+            main_id=None,
+        ):
+            def is_subcomponent_specification(values):
+                # checks whether the current value can be a specification of
+                # subcomponents, as for example the value for steps parameter
+                # (in Pipeline) or transformers parameter (in
+                # ColumnTransformer).
+                return (
+                    # Specification requires list/tuple of list/tuple with
+                    # at least length 2.
+                    isinstance(values, (tuple, list))
+                    and all(isinstance(item, (tuple, list)) and len(item) > 1 for item in values)
+                    # And each component needs to be a flow or interpretable string
+                    and all(
+                        isinstance(item[1], openml.flows.OpenMLFlow)
+                        or (
+                            isinstance(item[1], str)
+                            and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
+                        )
+                        for item in values
+                    )
+                )
+
+            # _flow is openml flow object, _param dict maps from flow name to flow
+            # id for the main call, the param dict can be overridden (useful for
+            # unit tests / sentinels) this way, for flows without subflows we do
+            # not have to rely on _flow_dict
+            exp_parameters = set(_flow.parameters)
+            if (
+                isinstance(component_model, str)
+                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
+            ):
+                model_parameters = set()
+            else:
+                model_parameters = set(component_model.get_params(deep=False))
+            if len(exp_parameters.symmetric_difference(model_parameters)) != 0:
+                flow_params = sorted(exp_parameters)
+                model_params = sorted(model_parameters)
+                raise ValueError(
+                    "Parameters of the model do not match the "
+                    "parameters expected by the "
+                    "flow:\nexpected flow parameters: "
+                    f"{flow_params}\nmodel parameters: {model_params}",
+                )
+            exp_components = set(_flow.components)
+            if (
+                isinstance(component_model, str)
+                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
+            ):
+                model_components = set()
+            else:
+                _ = set(component_model.get_params(deep=False))
+                model_components = {
+                    mp
+                    for mp in component_model.get_params(deep=True)
+                    if "__" not in mp and mp not in _
+                }
+            if len(exp_components.symmetric_difference(model_components)) != 0:
+                is_problem = True
+                if len(exp_components - model_components) > 0:
+                    # If an expected component is not returned as a component by get_params(),
+                    # this means that it is also a parameter -> we need to check that this is
+                    # actually the case
+                    difference = exp_components - model_components
+                    component_in_model_parameters = []
+                    for component in difference:
+                        if component in model_parameters:
+                            component_in_model_parameters.append(True)
+                        else:
+                            component_in_model_parameters.append(False)
+                    is_problem = not all(component_in_model_parameters)
+                if is_problem:
+                    flow_components = sorted(exp_components)
+                    model_components = sorted(model_components)
+                    raise ValueError(
+                        "Subcomponents of the model do not match the "
+                        "parameters expected by the "
+                        "flow:\nexpected flow subcomponents: "
+                        f"{flow_components}\nmodel subcomponents: {model_components}",
+                    )
+
+            _params = []
+            for _param_name in _flow.parameters:
+                _current = OrderedDict()
+                _current["oml:name"] = _param_name
+
+                current_param_values = self.model_to_flow(component_model.get_params()[_param_name])
+
+                # Try to filter out components (a.k.a. subflows) which are
+                # handled further down in the code (by recursively calling
+                # this function)!
+                if isinstance(current_param_values, openml.flows.OpenMLFlow):
+                    continue
+
+                if is_subcomponent_specification(current_param_values):
+                    # complex parameter value, with subcomponents
+                    parsed_values = []
+                    for subcomponent in current_param_values:
+                        # scikit-learn stores usually tuples in the form
+                        # (name (str), subcomponent (mixed), argument
+                        # (mixed)). OpenML replaces the subcomponent by an
+                        # OpenMLFlow object.
+                        if len(subcomponent) < 2 or len(subcomponent) > 3:
+                            raise ValueError("Component reference should be size {2,3}. ")
+
+                        subcomponent_identifier = subcomponent[0]
+                        subcomponent_flow = subcomponent[1]
+                        if not isinstance(subcomponent_identifier, str):
+                            raise TypeError(
+                                "Subcomponent identifier should be of type string, "
+                                f"but is {type(subcomponent_identifier)}",
+                            )
+                        if not isinstance(subcomponent_flow, (openml.flows.OpenMLFlow, str)):
+                            if (
+                                isinstance(subcomponent_flow, str)
+                                and subcomponent_flow in SKLEARN_PIPELINE_STRING_COMPONENTS
+                            ):
+                                pass
+                            else:
+                                raise TypeError(
+                                    "Subcomponent flow should be of type flow, but is"
+                                    f" {type(subcomponent_flow)}",
+                                )
+
+                        current = {
+                            "oml-python:serialized_object": COMPONENT_REFERENCE,
+                            "value": {
+                                "key": subcomponent_identifier,
+                                "step_name": subcomponent_identifier,
+                            },
+                        }
+                        if len(subcomponent) == 3:
+                            if not isinstance(subcomponent[2], list) and not isinstance(
+                                subcomponent[2],
+                                OrderedDict,
+                            ):
+                                raise TypeError(
+                                    "Subcomponent argument should be list or OrderedDict",
+                                )
+                            current["value"]["argument_1"] = subcomponent[2]
+                        parsed_values.append(current)
+                    parsed_values = json.dumps(parsed_values)
+                else:
+                    # vanilla parameter value
+                    parsed_values = json.dumps(current_param_values)
+
+                _current["oml:value"] = parsed_values
+                if _main_call:
+                    _current["oml:component"] = main_id
+                else:
+                    _current["oml:component"] = _flow_dict[_flow.name]
+                _params.append(_current)
+
+            for _identifier in _flow.components:
+                subcomponent_model = component_model.get_params()[_identifier]
+                _params.extend(
+                    extract_parameters(
+                        _flow.components[_identifier],
+                        _flow_dict,
+                        subcomponent_model,
+                    ),
+                )
+            return _params
+
+        flow_dict = get_flow_dict(flow)
+        model = model if model is not None else flow.model
+        return extract_parameters(flow, flow_dict, model, _main_call=True, main_id=flow.flow_id)
+
+    def _openml_param_name_to_sklearn(
+        self,
+        openml_parameter: openml.setups.OpenMLParameter,
+        flow: OpenMLFlow,
+    ) -> str:
+        """
+        Converts the name of an OpenMLParameter into the sklean name, given a flow.
+
+        Parameters
+        ----------
+        openml_parameter: OpenMLParameter
+            The parameter under consideration
+
+        flow: OpenMLFlow
+            The flow that provides context.
+
+        Returns
+        -------
+        sklearn_parameter_name: str
+            The name the parameter will have once used in scikit-learn
+        """
+        if not isinstance(openml_parameter, openml.setups.OpenMLParameter):
+            raise ValueError("openml_parameter should be an instance of OpenMLParameter")
+        if not isinstance(flow, OpenMLFlow):
+            raise ValueError("flow should be an instance of OpenMLFlow")
+
+        flow_structure = flow.get_structure("name")
+        if openml_parameter.flow_name not in flow_structure:
+            raise ValueError("Obtained OpenMLParameter and OpenMLFlow do not correspond. ")
+        name = openml_parameter.flow_name  # for PEP8
+        return "__".join(flow_structure[name] + [openml_parameter.parameter_name])
+
+    ################################################################################################
+    # Methods for hyperparameter optimization
+
+    def _is_hpo_class(self, model: Any) -> bool:
+        """Check whether the model performs hyperparameter optimization.
+
+        Used to check whether an optimization trace can be extracted from the model after
+        running it.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+        return isinstance(model, sklearn.model_selection._search.BaseSearchCV)
+
+    def instantiate_model_from_hpo_class(
+        self,
+        model: Any,
+        trace_iteration: OpenMLTraceIteration,
+    ) -> Any:
+        """Instantiate a ``base_estimator`` which can be searched over by the hyperparameter
+        optimization model.
+
+        Parameters
+        ----------
+        model : Any
+            A hyperparameter optimization model which defines the model to be instantiated.
+        trace_iteration : OpenMLTraceIteration
+            Describing the hyperparameter settings to instantiate.
+
+        Returns
+        -------
+        Any
+        """
+        if not self._is_hpo_class(model):
+            raise AssertionError(
+                f"Flow model {model} is not an instance of"
+                " sklearn.model_selection._search.BaseSearchCV",
+            )
+        base_estimator = model.estimator
+        base_estimator.set_params(**trace_iteration.get_parameters())
+        return base_estimator
+
+    def _extract_trace_data(self, model, rep_no, fold_no):
+        """Extracts data from a machine learning model's cross-validation results
+        and creates an ARFF (Attribute-Relation File Format) trace.
+
+        Parameters
+        ----------
+        model : Any
+            A fitted hyperparameter optimization model.
+        rep_no : int
+            The repetition number.
+        fold_no : int
+            The fold number.
+
+        Returns
+        -------
+        A list of ARFF tracecontent.
+        """
+        arff_tracecontent = []
+        for itt_no in range(len(model.cv_results_["mean_test_score"])):
+            # we use the string values for True and False, as it is defined in
+            # this way by the OpenML server
+            selected = "false"
+            if itt_no == model.best_index_:
+                selected = "true"
+            test_score = model.cv_results_["mean_test_score"][itt_no]
+            arff_line = [rep_no, fold_no, itt_no, test_score, selected]
+            for key in model.cv_results_:
+                if key.startswith("param_"):
+                    value = model.cv_results_[key][itt_no]
+                    # Built-in serializer does not convert all numpy types,
+                    # these methods convert them to built-in types instead.
+                    if isinstance(value, np.generic):
+                        # For scalars it actually returns scalars, not a list
+                        value = value.tolist()
+                    serialized_value = json.dumps(value) if value is not np.ma.masked else np.nan
+                    arff_line.append(serialized_value)
+            arff_tracecontent.append(arff_line)
+        return arff_tracecontent
+
+    def _obtain_arff_trace(
+        self,
+        model: Any,
+        trace_content: list,
+    ) -> OpenMLRunTrace:
+        """Create arff trace object from a fitted model and the trace content obtained by
+        repeatedly calling ``run_model_on_task``.
+
+        Parameters
+        ----------
+        model : Any
+            A fitted hyperparameter optimization model.
+
+        trace_content : List[List]
+            Trace content obtained by ``openml.runs.run_flow_on_task``.
+
+        Returns
+        -------
+        OpenMLRunTrace
+        """
+        if not self._is_hpo_class(model):
+            raise AssertionError(
+                f"Flow model {model} is not an instance of "
+                "sklearn.model_selection._search.BaseSearchCV",
+            )
+        if not hasattr(model, "cv_results_"):
+            raise ValueError("model should contain `cv_results_`")
+
+        # attributes that will be in trace arff, regardless of the model
+        trace_attributes = [
+            ("repeat", "NUMERIC"),
+            ("fold", "NUMERIC"),
+            ("iteration", "NUMERIC"),
+            ("evaluation", "NUMERIC"),
+            ("selected", ["true", "false"]),
+        ]
+
+        # model dependent attributes for trace arff
+        for key in model.cv_results_:
+            if key.startswith("param_"):
+                # supported types should include all types, including bool,
+                # int float
+                supported_basic_types = (bool, int, float, str)
+                for param_value in model.cv_results_[key]:
+                    if isinstance(param_value, np.generic):
+                        param_value = param_value.tolist()  # noqa: PLW2901
+                    if (
+                        isinstance(param_value, supported_basic_types)
+                        or param_value is None
+                        or param_value is np.ma.masked
+                    ):
+                        # basic string values
+                        type = "STRING"  # noqa: A001
+                    elif isinstance(param_value, (list, tuple)) and all(
+                        isinstance(i, int) for i in param_value
+                    ):
+                        # list of integers (usually for selecting features)
+                        # hyperparameter layer_sizes of MLPClassifier
+                        type = "STRING"  # noqa: A001
+                    else:
+                        raise TypeError(f"Unsupported param type in param grid: {key}")
+
+                # renamed the attribute param to parameter, as this is a required
+                # OpenML convention - this also guards against name collisions
+                # with the required trace attributes
+                attribute = (PREFIX + key[6:], type)  # type: ignore
+                trace_attributes.append(attribute)
+
+        return OpenMLRunTrace.generate(
+            trace_attributes,
+            trace_content,
+        )
diff --git a/openml/extensions/sklearn/serializer.py b/openml/extensions/sklearn/serializer.py
new file mode 100644
index 000000000..26413afec
--- /dev/null
+++ b/openml/extensions/sklearn/serializer.py
@@ -0,0 +1,1508 @@
+# License: BSD 3-Clause
+
+"""Serializer for the Scikit-learn estimators."""
+
+from __future__ import annotations
+
+import contextlib
+import copy
+import importlib
+import inspect
+import json
+import logging
+import re
+import sys
+import traceback
+import warnings
+from collections import OrderedDict
+from json.decoder import JSONDecodeError
+from re import IGNORECASE
+from typing import Any, Callable, Sized
+
+import numpy as np
+import scipy.stats
+import sklearn.base
+import sklearn.model_selection
+import sklearn.pipeline
+from packaging.version import Version
+
+import openml
+from openml.exceptions import PyOpenMLError
+from openml.extensions.base import ModelSerializer
+from openml.flows import OpenMLFlow
+
+logger = logging.getLogger(__name__)
+
+
+DEPENDENCIES_PATTERN = re.compile(
+    r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
+    r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$",
+)
+
+# NOTE(eddiebergman): This was imported before but became deprecated,
+# as a result I just enumerated them manually by copy-ing and pasting,
+# recommended solution in Numpy 2.0 guide was to explicitly list them.
+SIMPLE_NUMPY_TYPES = [
+    np.int8,
+    np.int16,
+    np.int32,
+    np.int64,
+    np.longlong,
+    np.uint8,
+    np.uint16,
+    np.uint32,
+    np.uint64,
+    np.ulonglong,
+    np.float16,
+    np.float32,
+    np.float64,
+    np.longdouble,
+    np.complex64,
+    np.complex128,
+    np.clongdouble,
+]
+SIMPLE_TYPES = (bool, int, float, str, *SIMPLE_NUMPY_TYPES)
+
+
+SKLEARN_PIPELINE_STRING_COMPONENTS = ("drop", "passthrough")
+COMPONENT_REFERENCE = "component_reference"
+COMPOSITION_STEP_CONSTANT = "composition_step_constant"
+
+
+class SklearnSerializer(ModelSerializer):
+    """Serializer for Scikit-learn estimators."""
+
+    @classmethod
+    def can_handle_flow(cls, flow: OpenMLFlow) -> bool:
+        """Check whether a given describes a scikit-learn estimator.
+
+        This is done by parsing the ``external_version`` field.
+
+        Parameters
+        ----------
+        flow : OpenMLFlow
+
+        Returns
+        -------
+        bool
+        """
+        return cls._is_sklearn_flow(flow)
+
+    @classmethod
+    def can_handle_model(cls, model: Any) -> bool:
+        """Check whether a model is an instance of ``sklearn.base.BaseEstimator``.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+        return isinstance(model, sklearn.base.BaseEstimator)
+
+    def flow_to_model(
+        self,
+        flow: OpenMLFlow,
+        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
+        strict_version: bool = True,  # noqa: FBT001, FBT002
+    ) -> Any:
+        """Initializes a sklearn model based on a flow.
+
+        Parameters
+        ----------
+        flow : mixed
+            the object to deserialize (can be flow object, or any serialized
+            parameter value that is accepted by)
+
+        initialize_with_defaults : bool, optional (default=False)
+            If this flag is set, the hyperparameter values of flows will be
+            ignored and a flow with its defaults is returned.
+
+        strict_version : bool, default=True
+            Whether to fail if version requirements are not fulfilled.
+
+        Returns
+        -------
+        mixed
+        """
+        return self._deserialize_sklearn(
+            flow,
+            initialize_with_defaults=initialize_with_defaults,
+            strict_version=strict_version,
+        )
+
+    def model_to_flow(self, model: Any) -> OpenMLFlow:
+        """Transform a scikit-learn model to a flow for uploading it to OpenML.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        OpenMLFlow
+        """
+        # Necessary to make pypy not complain about all the different possible return types
+        return self._serialize_sklearn(model)
+
+    @classmethod
+    def trim_flow_name(  # noqa: C901
+        cls,
+        long_name: str,
+        extra_trim_length: int = 100,
+        _outer: bool = True,  # noqa: FBT001, FBT002
+    ) -> str:
+        """Shorten generated sklearn flow name to at most ``max_length`` characters.
+
+        Flows are assumed to have the following naming structure:
+        ``(model_selection)? (pipeline)? (steps)+``
+        and will be shortened to:
+        ``sklearn.(selection.)?(pipeline.)?(steps)+``
+        e.g. (white spaces and newlines added for readability)
+
+        .. code ::
+
+            sklearn.pipeline.Pipeline(
+                columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
+                    numeric=sklearn.pipeline.Pipeline(
+                        imputer=sklearn.preprocessing.imputation.Imputer,
+                        standardscaler=sklearn.preprocessing.data.StandardScaler),
+                    nominal=sklearn.pipeline.Pipeline(
+                        simpleimputer=sklearn.impute.SimpleImputer,
+                        onehotencoder=sklearn.preprocessing._encoders.OneHotEncoder)),
+                variancethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
+                svc=sklearn.svm.classes.SVC)
+
+        ->
+        ``sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC)``
+
+        Parameters
+        ----------
+        long_name : str
+            The full flow name generated by the scikit-learn extension.
+        extra_trim_length: int (default=100)
+            If the trimmed name would exceed `extra_trim_length` characters, additional trimming
+            of the short name is performed. This reduces the produced short name length.
+            There is no guarantee the end result will not exceed `extra_trim_length`.
+        _outer : bool (default=True)
+            For internal use only. Specifies if the function is called recursively.
+
+        Returns
+        -------
+        str
+
+        """
+
+        def remove_all_in_parentheses(string: str) -> str:
+            string, removals = re.subn(r"\([^()]*\)", "", string)
+            while removals > 0:
+                string, removals = re.subn(r"\([^()]*\)", "", string)
+            return string
+
+        # Generally, we want to trim all hyperparameters, the exception to that is for model
+        # selection, as the `estimator` hyperparameter is very indicative of what is in the flow.
+        # So we first trim name of the `estimator` specified in mode selection. For reference, in
+        # the example below, we want to trim `sklearn.tree.tree.DecisionTreeClassifier`, and
+        # keep it in the final trimmed flow name:
+        # sklearn.pipeline.Pipeline(Imputer=sklearn.preprocessing.imputation.Imputer,
+        # VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,  # noqa: ERA001, E501
+        # Estimator=sklearn.model_selection._search.RandomizedSearchCV(estimator=
+        # sklearn.tree.tree.DecisionTreeClassifier))
+        if "sklearn.model_selection" in long_name:
+            start_index = long_name.index("sklearn.model_selection")
+            estimator_start = (
+                start_index + long_name[start_index:].index("estimator=") + len("estimator=")
+            )
+
+            model_select_boilerplate = long_name[start_index:estimator_start]
+            # above is .g. "sklearn.model_selection._search.RandomizedSearchCV(estimator="
+            model_selection_class = model_select_boilerplate.split("(")[0].split(".")[-1]
+
+            # Now we want to also find and parse the `estimator`, for this we find the closing
+            # parenthesis to the model selection technique:
+            closing_parenthesis_expected = 1
+            for char in long_name[estimator_start:]:
+                if char == "(":
+                    closing_parenthesis_expected += 1
+                if char == ")":
+                    closing_parenthesis_expected -= 1
+                if closing_parenthesis_expected == 0:
+                    break
+
+            _end: int = estimator_start + len(long_name[estimator_start:]) - 1
+            model_select_pipeline = long_name[estimator_start:_end]
+
+            trimmed_pipeline = cls.trim_flow_name(model_select_pipeline, _outer=False)
+            _, trimmed_pipeline = trimmed_pipeline.split(".", maxsplit=1)  # trim module prefix
+            model_select_short = f"sklearn.{model_selection_class}[{trimmed_pipeline}]"
+            name = long_name[:start_index] + model_select_short + long_name[_end + 1 :]
+        else:
+            name = long_name
+
+        module_name = long_name.split(".")[0]
+        short_name = module_name + ".{}"
+
+        if name.startswith("sklearn.pipeline"):
+            full_pipeline_class, pipeline = name[:-1].split("(", maxsplit=1)
+            pipeline_class = full_pipeline_class.split(".")[-1]
+            # We don't want nested pipelines in the short name, so we trim all complicated
+            # subcomponents, i.e. those with parentheses:
+            pipeline = remove_all_in_parentheses(pipeline)
+
+            # then the pipeline steps are formatted e.g.:
+            # step1name=sklearn.submodule.ClassName,step2name...
+            components = [component.split(".")[-1] for component in pipeline.split(",")]
+            pipeline = f"{pipeline_class}({','.join(components)})"
+            if len(short_name.format(pipeline)) > extra_trim_length:
+                pipeline = f"{pipeline_class}(...,{components[-1]})"
+        else:
+            # Just a simple component: e.g. sklearn.tree.DecisionTreeClassifier
+            pipeline = remove_all_in_parentheses(name).split(".")[-1]
+
+        if not _outer:
+            # Anything from parenthesis in inner calls should not be culled, so we use brackets
+            pipeline = pipeline.replace("(", "[").replace(")", "]")
+        else:
+            # Square brackets may be introduced with nested model_selection
+            pipeline = pipeline.replace("[", "(").replace("]", ")")
+
+        return short_name.format(pipeline)
+
+    @classmethod
+    def _min_dependency_str(cls, sklearn_version: str) -> str:
+        """Returns a string containing the minimum dependencies for the sklearn version passed.
+
+        Parameters
+        ----------
+        sklearn_version : str
+            A version string of the xx.xx.xx
+
+        Returns
+        -------
+        str
+        """
+        # This explicit check is necessary to support existing entities on the OpenML servers
+        # that used the fixed dependency string (in the else block)
+        if Version(openml.__version__) > Version("0.11"):
+            # OpenML v0.11 onwards supports sklearn>=0.24
+            # assumption: 0.24 onwards sklearn should contain a _min_dependencies.py file with
+            # variables declared for extracting minimum dependency for that version
+            if Version(sklearn_version) >= Version("0.24"):
+                from sklearn import _min_dependencies as _mindep
+
+                dependency_list = {
+                    "numpy": f"{_mindep.NUMPY_MIN_VERSION}",
+                    "scipy": f"{_mindep.SCIPY_MIN_VERSION}",
+                    "joblib": f"{_mindep.JOBLIB_MIN_VERSION}",
+                    "threadpoolctl": f"{_mindep.THREADPOOLCTL_MIN_VERSION}",
+                }
+            elif Version(sklearn_version) >= Version("0.23"):
+                dependency_list = {
+                    "numpy": "1.13.3",
+                    "scipy": "0.19.1",
+                    "joblib": "0.11",
+                    "threadpoolctl": "2.0.0",
+                }
+                if Version(sklearn_version).micro == 0:
+                    dependency_list.pop("threadpoolctl")
+            elif Version(sklearn_version) >= Version("0.21"):
+                dependency_list = {"numpy": "1.11.0", "scipy": "0.17.0", "joblib": "0.11"}
+            elif Version(sklearn_version) >= Version("0.19"):
+                dependency_list = {"numpy": "1.8.2", "scipy": "0.13.3"}
+            else:
+                dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
+        else:
+            # this is INCORRECT for sklearn versions >= 0.19 and < 0.24
+            # given that OpenML has existing flows uploaded with such dependency information,
+            # we change no behaviour for older sklearn version, however from 0.24 onwards
+            # the dependency list will be accurately updated for any flow uploaded to OpenML
+            dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
+
+        sklearn_dep = f"sklearn=={sklearn_version}"
+        dep_str = "\n".join([f"{k}>={v}" for k, v in dependency_list.items()])
+        return "\n".join([sklearn_dep, dep_str])
+
+    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
+        self,
+        o: Any,
+        components: dict | None = None,
+        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
+        recursion_depth: int = 0,
+        strict_version: bool = True,  # noqa: FBT002, FBT001
+    ) -> Any:
+        """Recursive function to deserialize a scikit-learn flow.
+
+        This function inspects an object to deserialize and decides how to do so. This function
+        delegates all work to the respective functions to deserialize special data structures etc.
+        This function works on everything that has been serialized to OpenML: OpenMLFlow,
+        components (which are flows themselves), functions, hyperparameter distributions (for
+        random search) and the actual hyperparameter values themselves.
+
+        Parameters
+        ----------
+        o : mixed
+            the object to deserialize (can be flow object, or any serialized
+            parameter value that is accepted by)
+
+        components : Optional[dict]
+            Components of the current flow being de-serialized. These will not be used when
+            de-serializing the actual flow, but when de-serializing a component reference.
+
+        initialize_with_defaults : bool, optional (default=False)
+            If this flag is set, the hyperparameter values of flows will be
+            ignored and a flow with its defaults is returned.
+
+        recursion_depth : int
+            The depth at which this flow is called, mostly for debugging
+            purposes
+
+        strict_version : bool, default=True
+            Whether to fail if version requirements are not fulfilled.
+
+        Returns
+        -------
+        mixed
+        """
+        logger.info(
+            "-{} flow_to_sklearn START o={}, components={}, init_defaults={}".format(
+                "-" * recursion_depth, o, components, initialize_with_defaults
+            ),
+        )
+        depth_pp = recursion_depth + 1  # shortcut var, depth plus plus
+
+        # First, we need to check whether the presented object is a json string.
+        # JSON strings are used to encoder parameter values. By passing around
+        # json strings for parameters, we make sure that we can flow_to_sklearn
+        # the parameter values to the correct type.
+
+        if isinstance(o, str):
+            with contextlib.suppress(JSONDecodeError):
+                o = json.loads(o)
+
+        if isinstance(o, dict):
+            # Check if the dict encodes a 'special' object, which could not
+            # easily converted into a string, but rather the information to
+            # re-create the object were stored in a dictionary.
+            if "oml-python:serialized_object" in o:
+                serialized_type = o["oml-python:serialized_object"]
+                value = o["value"]
+                if serialized_type == "type":
+                    rval = self._deserialize_type(value)
+                elif serialized_type == "rv_frozen":
+                    rval = self._deserialize_rv_frozen(value)
+                elif serialized_type == "function":
+                    rval = self._deserialize_function(value)
+                elif serialized_type in (COMPOSITION_STEP_CONSTANT, COMPONENT_REFERENCE):
+                    if serialized_type == COMPOSITION_STEP_CONSTANT:
+                        pass
+                    elif serialized_type == COMPONENT_REFERENCE:
+                        value = self._deserialize_sklearn(
+                            value,
+                            recursion_depth=depth_pp,
+                            strict_version=strict_version,
+                        )
+                    else:
+                        raise NotImplementedError(serialized_type)
+                    assert components is not None  # Necessary for mypy
+                    step_name = value["step_name"]
+                    key = value["key"]
+                    component = self._deserialize_sklearn(
+                        components[key],
+                        initialize_with_defaults=initialize_with_defaults,
+                        recursion_depth=depth_pp,
+                        strict_version=strict_version,
+                    )
+                    # The component is now added to where it should be used
+                    # later. It should not be passed to the constructor of the
+                    # main flow object.
+                    del components[key]
+                    if step_name is None:
+                        rval = component
+                    elif "argument_1" not in value:
+                        rval = (step_name, component)
+                    else:
+                        rval = (step_name, component, value["argument_1"])
+                elif serialized_type == "cv_object":
+                    rval = self._deserialize_cross_validator(
+                        value,
+                        recursion_depth=recursion_depth,
+                        strict_version=strict_version,
+                    )
+                else:
+                    raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")
+
+            else:
+                rval = OrderedDict(
+                    (
+                        self._deserialize_sklearn(
+                            o=key,
+                            components=components,
+                            initialize_with_defaults=initialize_with_defaults,
+                            recursion_depth=depth_pp,
+                            strict_version=strict_version,
+                        ),
+                        self._deserialize_sklearn(
+                            o=value,
+                            components=components,
+                            initialize_with_defaults=initialize_with_defaults,
+                            recursion_depth=depth_pp,
+                            strict_version=strict_version,
+                        ),
+                    )
+                    for key, value in sorted(o.items())
+                )
+        elif isinstance(o, (list, tuple)):
+            rval = [
+                self._deserialize_sklearn(
+                    o=element,
+                    components=components,
+                    initialize_with_defaults=initialize_with_defaults,
+                    recursion_depth=depth_pp,
+                    strict_version=strict_version,
+                )
+                for element in o
+            ]
+            if isinstance(o, tuple):
+                rval = tuple(rval)
+        elif isinstance(o, (bool, int, float, str)) or o is None:
+            rval = o
+        elif isinstance(o, OpenMLFlow):
+            if not self._is_sklearn_flow(o):
+                raise ValueError("Only sklearn flows can be reinstantiated")
+            rval = self._deserialize_model(
+                flow=o,
+                keep_defaults=initialize_with_defaults,
+                recursion_depth=recursion_depth,
+                strict_version=strict_version,
+            )
+        else:
+            raise TypeError(o)
+        logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}")
+        return rval
+
+    def _serialize_sklearn(self, o: Any, parent_model: Any | None = None) -> Any:  # noqa: PLR0912, C901
+        rval = None  # type: Any
+
+        # TODO: assert that only on first recursion lvl `parent_model` can be None
+        if self.is_estimator(o):
+            # is the main model or a submodel
+            rval = self._serialize_model(o)
+        elif (
+            isinstance(o, (list, tuple))
+            and len(o) == 2
+            and o[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
+            and isinstance(parent_model, sklearn.pipeline._BaseComposition)
+        ):
+            rval = o
+        elif isinstance(o, (list, tuple)):
+            # TODO: explain what type of parameter is here
+            rval = [self._serialize_sklearn(element, parent_model) for element in o]
+            if isinstance(o, tuple):
+                rval = tuple(rval)
+        elif isinstance(o, SIMPLE_TYPES) or o is None:
+            if isinstance(o, tuple(SIMPLE_NUMPY_TYPES)):
+                o = o.item()  # type: ignore
+            # base parameter values
+            rval = o
+        elif isinstance(o, dict):
+            # TODO: explain what type of parameter is here
+            if not isinstance(o, OrderedDict):
+                o = OrderedDict(sorted(o.items()))
+
+            rval = OrderedDict()
+            for key, value in o.items():
+                if not isinstance(key, str):
+                    raise TypeError(
+                        "Can only use string as keys, you passed "
+                        f"type {type(key)} for value {key!s}.",
+                    )
+                _key = self._serialize_sklearn(key, parent_model)
+                rval[_key] = self._serialize_sklearn(value, parent_model)
+        elif isinstance(o, type):
+            # TODO: explain what type of parameter is here
+            rval = self._serialize_type(o)
+        elif isinstance(o, scipy.stats.distributions.rv_frozen):
+            rval = self._serialize_rv_frozen(o)
+        # This only works for user-defined functions (and not even partial).
+        # I think this is exactly what we want here as there shouldn't be any
+        # built-in or functool.partials in a pipeline
+        elif inspect.isfunction(o):
+            # TODO: explain what type of parameter is here
+            rval = self._serialize_function(o)
+        elif self._is_cross_validator(o):
+            # TODO: explain what type of parameter is here
+            rval = self._serialize_cross_validator(o)
+        else:
+            raise TypeError(o, type(o))
+
+        return rval
+
+    def is_estimator(self, model: Any) -> bool:
+        """Check whether the given model is a scikit-learn estimator.
+
+        This function is only required for backwards compatibility and will be removed in the
+        near future.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+        o = model
+        return hasattr(o, "fit") and hasattr(o, "get_params") and hasattr(o, "set_params")
+
+    def get_version_information(self) -> list[str]:
+        """List versions of libraries required by the flow.
+
+        Libraries listed are ``Python``, ``scikit-learn``, ``numpy`` and ``scipy``.
+
+        Returns
+        -------
+        List
+        """
+        # This can possibly be done by a package such as pyxb, but I could not get
+        # it to work properly.
+        import numpy
+        import scipy
+        import sklearn
+
+        major, minor, micro, _, _ = sys.version_info
+        python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}."
+        sklearn_version = f"Sklearn_{sklearn.__version__}."
+        numpy_version = f"NumPy_{numpy.__version__}."  # type: ignore
+        scipy_version = f"SciPy_{scipy.__version__}."
+
+        return [python_version, sklearn_version, numpy_version, scipy_version]
+
+    def create_setup_string(self, model: Any) -> str:  # noqa: ARG002
+        """Create a string which can be used to reinstantiate the given model.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        str
+        """
+        return " ".join(self.get_version_information())
+
+    def _is_cross_validator(self, o: Any) -> bool:
+        return isinstance(o, sklearn.model_selection.BaseCrossValidator)
+
+    @classmethod
+    def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
+        sklearn_dependency = isinstance(flow.dependencies, str) and "sklearn" in flow.dependencies
+        sklearn_as_external = isinstance(flow.external_version, str) and (
+            flow.external_version.startswith("sklearn==") or ",sklearn==" in flow.external_version
+        )
+        return sklearn_dependency or sklearn_as_external
+
+    def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
+        r"""Fetches the sklearn function docstring for the flow description
+
+        Retrieves the sklearn docstring available and does the following:
+        * If length of docstring <= char_lim, then returns the complete docstring
+        * Else, trims the docstring till it encounters a 'Read more in the :ref:'
+        * Or till it encounters a 'Parameters\n----------\n'
+        The final string returned is at most of length char_lim with leading and
+        trailing whitespaces removed.
+
+        Parameters
+        ----------
+        model : sklearn model
+        char_lim : int
+            Specifying the max length of the returned string.
+            OpenML servers have a constraint of 1024 characters for the 'description' field.
+
+        Returns
+        -------
+        str
+        """
+
+        def match_format(s):
+            return f"{s}\n{len(s) * '-'}\n"
+
+        s = inspect.getdoc(model)
+        if s is None:
+            return ""
+        try:
+            # trim till 'Read more'
+            pattern = "Read more in the :ref:"
+            index = s.index(pattern)
+            s = s[:index]
+            # trimming docstring to be within char_lim
+            if len(s) > char_lim:
+                s = f"{s[: char_lim - 3]}..."
+            return s.strip()
+        except ValueError:
+            logger.warning(
+                "'Read more' not found in descriptions. "
+                "Trying to trim till 'Parameters' if available in docstring.",
+            )
+        try:
+            # if 'Read more' doesn't exist, trim till 'Parameters'
+            pattern = "Parameters"
+            index = s.index(match_format(pattern))
+        except ValueError:
+            # returning full docstring
+            logger.warning("'Parameters' not found in docstring. Omitting docstring trimming.")
+            index = len(s)
+        s = s[:index]
+        # trimming docstring to be within char_lim
+        if len(s) > char_lim:
+            s = f"{s[: char_lim - 3]}..."
+        return s.strip()
+
+    def _extract_sklearn_parameter_docstring(self, model) -> None | str:
+        """Extracts the part of sklearn docstring containing parameter information
+
+        Fetches the entire docstring and trims just the Parameter section.
+        The assumption is that 'Parameters' is the first section in sklearn docstrings,
+        followed by other sections titled 'Attributes', 'See also', 'Note', 'References',
+        appearing in that order if defined.
+        Returns a None if no section with 'Parameters' can be found in the docstring.
+
+        Parameters
+        ----------
+        model : sklearn model
+
+        Returns
+        -------
+        str, or None
+        """
+
+        def match_format(s):
+            return f"{s}\n{len(s) * '-'}\n"
+
+        s = inspect.getdoc(model)
+        if s is None:
+            return None
+        try:
+            index1 = s.index(match_format("Parameters"))
+        except ValueError as e:
+            # when sklearn docstring has no 'Parameters' section
+            logger.warning(f"{match_format('Parameters')} {e}")
+            return None
+
+        headings = ["Attributes", "Notes", "See also", "Note", "References"]
+        for h in headings:
+            try:
+                # to find end of Parameters section
+                index2 = s.index(match_format(h))
+                break
+            except ValueError:
+                logger.warning(f"{h} not available in docstring")
+                continue
+        else:
+            # in the case only 'Parameters' exist, trim till end of docstring
+            index2 = len(s)
+        s = s[index1:index2]
+        return s.strip()
+
+    def _extract_sklearn_param_info(self, model, char_lim=1024) -> None | dict:
+        """Parses parameter type and description from sklearn dosctring
+
+        Parameters
+        ----------
+        model : sklearn model
+        char_lim : int
+            Specifying the max length of the returned string.
+            OpenML servers have a constraint of 1024 characters string fields.
+
+        Returns
+        -------
+        Dict, or None
+        """
+        docstring = self._extract_sklearn_parameter_docstring(model)
+        if docstring is None:
+            # when sklearn docstring has no 'Parameters' section
+            return None
+
+        n = re.compile("[.]*\n", flags=IGNORECASE)
+        lines = n.split(docstring)
+        p = re.compile("[a-z0-9_ ]+ : [a-z0-9_']+[a-z0-9_ ]*", flags=IGNORECASE)
+        # The above regular expression is designed to detect sklearn parameter names and type
+        # in the format of [variable_name][space]:[space][type]
+        # The expectation is that the parameter description for this detected parameter will
+        # be all the lines in the docstring till the regex finds another parameter match
+
+        # collecting parameters and their descriptions
+        description = []  # type: List
+        for s in lines:
+            param = p.findall(s)
+            if param != []:
+                # a parameter definition is found by regex
+                # creating placeholder when parameter found which will be a list of strings
+                # string descriptions will be appended in subsequent iterations
+                # till another parameter is found and a new placeholder is created
+                placeholder = [""]  # type: List[str]
+                description.append(placeholder)
+            elif len(description) > 0:  # description=[] means no parameters found yet
+                # appending strings to the placeholder created when parameter found
+                description[-1].append(s)
+        for i in range(len(description)):
+            # concatenating parameter description strings
+            description[i] = "\n".join(description[i]).strip()
+            # limiting all parameter descriptions to accepted OpenML string length
+            if len(description[i]) > char_lim:
+                description[i] = f"{description[i][: char_lim - 3]}..."
+
+        # collecting parameters and their types
+        parameter_docs = OrderedDict()
+        matches = p.findall(docstring)
+        for i, param in enumerate(matches):
+            key, value = str(param).split(":")
+            parameter_docs[key.strip()] = [value.strip(), description[i]]
+
+        # to avoid KeyError for missing parameters
+        param_list_true = list(model.get_params().keys())
+        param_list_found = list(parameter_docs.keys())
+        for param in list(set(param_list_true) - set(param_list_found)):
+            parameter_docs[param] = [None, None]
+
+        return parameter_docs
+
+    def _serialize_model(self, model: Any) -> OpenMLFlow:
+        """Create an OpenMLFlow.
+
+        Calls `sklearn_to_flow` recursively to properly serialize the
+        parameters to strings and the components (other models) to OpenMLFlows.
+
+        Parameters
+        ----------
+        model : sklearn estimator
+
+        Returns
+        -------
+        OpenMLFlow
+
+        """
+        # Get all necessary information about the model objects itself
+        (
+            parameters,
+            parameters_meta_info,
+            subcomponents,
+            subcomponents_explicit,
+        ) = self._extract_information_from_model(model)
+
+        # Check that a component does not occur multiple times in a flow as this
+        # is not supported by OpenML
+        self._check_multiple_occurence_of_component_in_flow(model, subcomponents)
+
+        # Create a flow name, which contains all components in brackets, e.g.:
+        # RandomizedSearchCV(Pipeline(StandardScaler,AdaBoostClassifier(DecisionTreeClassifier)),
+        # StandardScaler,AdaBoostClassifier(DecisionTreeClassifier))
+        class_name = model.__module__ + "." + model.__class__.__name__
+
+        # will be part of the name (in brackets)
+        sub_components_names = ""
+        for key in subcomponents:
+            name_thing = subcomponents[key]
+            if isinstance(name_thing, OpenMLFlow):
+                name = name_thing.name
+            elif (
+                isinstance(name_thing, str)
+                and subcomponents[key] in SKLEARN_PIPELINE_STRING_COMPONENTS
+            ):
+                name = name_thing
+            else:
+                raise TypeError(type(subcomponents[key]))
+
+            if key in subcomponents_explicit:
+                sub_components_names += "," + key + "=" + name
+            else:
+                sub_components_names += "," + name
+
+        # slice operation on string in order to get rid of leading comma
+        name = f"{class_name}({sub_components_names[1:]})" if sub_components_names else class_name
+        short_name = self.trim_flow_name(name)
+
+        # Get the external versions of all sub-components
+        external_version = self._get_external_version_string(model, subcomponents)
+        dependencies = self._get_dependencies()
+        tags = self._get_tags()
+
+        sklearn_description = self._get_sklearn_description(model)
+        return OpenMLFlow(
+            name=name,
+            class_name=class_name,
+            custom_name=short_name,
+            description=sklearn_description,
+            model=model,
+            components=subcomponents,
+            parameters=parameters,
+            parameters_meta_info=parameters_meta_info,
+            external_version=external_version,
+            tags=tags,
+            extension=self,
+            language="English",
+            dependencies=dependencies,
+        )
+
+    def _get_dependencies(self) -> str:
+        return self._min_dependency_str(sklearn.__version__)  # type: ignore
+
+    def _get_tags(self) -> list[str]:
+        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)  # type: ignore
+        sklearn_version_formatted = sklearn_version.replace("==", "_")
+        return [
+            "openml-python",
+            "sklearn",
+            "scikit-learn",
+            "python",
+            sklearn_version_formatted,
+            # TODO: add more tags based on the scikit-learn
+            # module a flow is in? For example automatically
+            # annotate a class of sklearn.svm.SVC() with the
+            # tag svm?
+        ]
+
+    def _get_external_version_string(
+        self,
+        model: Any,
+        sub_components: dict[str, OpenMLFlow],
+    ) -> str:
+        # Create external version string for a flow, given the model and the
+        # already parsed dictionary of sub_components. Retrieves the external
+        # version of all subcomponents, which themselves already contain all
+        # requirements for their subcomponents. The external version string is a
+        # sorted concatenation of all modules which are present in this run.
+
+        external_versions = set()
+
+        # The model is None if the flow is a placeholder flow such as 'passthrough' or 'drop'
+        if model is not None:
+            model_package_name = model.__module__.split(".")[0]
+            module = importlib.import_module(model_package_name)
+            model_package_version_number = module.__version__  # type: ignore
+            external_version = self._format_external_version(
+                model_package_name,
+                model_package_version_number,
+            )
+            external_versions.add(external_version)
+
+        openml_version = self._format_external_version("openml", openml.__version__)
+        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)  # type: ignore
+        external_versions.add(openml_version)
+        external_versions.add(sklearn_version)
+        for visitee in sub_components.values():
+            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
+                continue
+            for external_version in visitee.external_version.split(","):
+                external_versions.add(external_version)
+        return ",".join(sorted(external_versions))
+
+    def _check_multiple_occurence_of_component_in_flow(
+        self,
+        model: Any,
+        sub_components: dict[str, OpenMLFlow],
+    ) -> None:
+        to_visit_stack: list[OpenMLFlow] = []
+        to_visit_stack.extend(sub_components.values())
+        known_sub_components: set[str] = set()
+
+        while len(to_visit_stack) > 0:
+            visitee = to_visit_stack.pop()
+            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
+                known_sub_components.add(visitee)
+            elif visitee.name in known_sub_components:
+                raise ValueError(
+                    f"Found a second occurence of component {visitee.name} when "
+                    f"trying to serialize {model}.",
+                )
+            else:
+                known_sub_components.add(visitee.name)
+                to_visit_stack.extend(visitee.components.values())
+
+    def _extract_information_from_model(  # noqa: PLR0915, C901, PLR0912
+        self,
+        model: Any,
+    ) -> tuple[
+        OrderedDict[str, str | None],
+        OrderedDict[str, dict | None],
+        OrderedDict[str, OpenMLFlow],
+        set,
+    ]:
+        # This function contains four "global" states and is quite long and
+        # complicated. If it gets to complicated to ensure it's correctness,
+        # it would be best to make it a class with the four "global" states being
+        # the class attributes and the if/elif/else in the for-loop calls to
+        # separate class methods
+
+        # stores all entities that should become subcomponents
+        sub_components = OrderedDict()  # type: OrderedDict[str, OpenMLFlow]
+        # stores the keys of all subcomponents that should become
+        sub_components_explicit = set()
+        parameters: OrderedDict[str, str | None] = OrderedDict()
+        parameters_meta_info: OrderedDict[str, dict | None] = OrderedDict()
+        parameters_docs = self._extract_sklearn_param_info(model)
+
+        model_parameters = model.get_params(deep=False)
+        for k, v in sorted(model_parameters.items(), key=lambda t: t[0]):
+            rval = self._serialize_sklearn(v, model)
+
+            def flatten_all(list_):
+                """Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1])."""
+                for el in list_:
+                    if isinstance(el, (list, tuple)) and len(el) > 0:
+                        yield from flatten_all(el)
+                    else:
+                        yield el
+
+            # In case rval is a list of lists (or tuples), we need to identify two situations:
+            # - sklearn pipeline steps, feature union or base classifiers in voting classifier.
+            #   They look like e.g. [("imputer", Imputer()), ("classifier", SVC())]
+            # - a list of lists with simple types (e.g. int or str), such as for an OrdinalEncoder
+            #   where all possible values for each feature are described: [[0,1,2], [1,2,5]]
+            is_non_empty_list_of_lists_with_same_type = (
+                isinstance(rval, (list, tuple))
+                and len(rval) > 0
+                and isinstance(rval[0], (list, tuple))
+                and all(isinstance(rval_i, type(rval[0])) for rval_i in rval)
+            )
+
+            # Check that all list elements are of simple types.
+            nested_list_of_simple_types = (
+                is_non_empty_list_of_lists_with_same_type
+                and all(isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval))
+                and all(
+                    len(rv) in (2, 3) and rv[1] not in SKLEARN_PIPELINE_STRING_COMPONENTS
+                    for rv in rval
+                )
+            )
+
+            if is_non_empty_list_of_lists_with_same_type and not nested_list_of_simple_types:
+                # If a list of lists is identified that include 'non-simple' types (e.g. objects),
+                # we assume they are steps in a pipeline, feature union, or base classifiers in
+                # a voting classifier.
+                parameter_value = []  # type: List
+                reserved_keywords = set(model.get_params(deep=False).keys())
+
+                for sub_component_tuple in rval:
+                    identifier = sub_component_tuple[0]
+                    sub_component = sub_component_tuple[1]
+                    sub_component_type = type(sub_component_tuple)
+                    if not 2 <= len(sub_component_tuple) <= 3:
+                        # length 2 is for {VotingClassifier.estimators,
+                        # Pipeline.steps, FeatureUnion.transformer_list}
+                        # length 3 is for ColumnTransformer
+                        raise ValueError(
+                            f"Length of tuple of type {sub_component_type}"
+                            " does not match assumptions"
+                        )
+
+                    if isinstance(sub_component, str):
+                        if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
+                            msg = (
+                                "Second item of tuple does not match assumptions. "
+                                "If string, can be only 'drop' or 'passthrough' but"
+                                f"got {sub_component}"
+                            )
+                            raise ValueError(msg)
+                    elif sub_component is None:
+                        msg = (
+                            "Cannot serialize objects of None type. Please use a valid "
+                            "placeholder for None. Note that empty sklearn estimators can be "
+                            "replaced with 'drop' or 'passthrough'."
+                        )
+                        raise ValueError(msg)
+                    elif not isinstance(sub_component, OpenMLFlow):
+                        msg = (
+                            "Second item of tuple does not match assumptions. "
+                            f"Expected OpenMLFlow, got {type(sub_component)}"
+                        )
+                        raise TypeError(msg)
+
+                    if identifier in reserved_keywords:
+                        parent_model = f"{model.__module__}.{model.__class__.__name__}"
+                        msg = (
+                            "Found element shadowing official "
+                            f"parameter for {parent_model}: {identifier}"
+                        )
+                        raise PyOpenMLError(msg)
+
+                    # when deserializing the parameter
+                    sub_components_explicit.add(identifier)
+                    if isinstance(sub_component, str):
+                        external_version = self._get_external_version_string(None, {})
+                        dependencies = self._get_dependencies()
+                        tags = self._get_tags()
+
+                        sub_components[identifier] = OpenMLFlow(
+                            name=sub_component,
+                            description="Placeholder flow for scikit-learn's string pipeline "
+                            "members",
+                            components=OrderedDict(),
+                            parameters=OrderedDict(),
+                            parameters_meta_info=OrderedDict(),
+                            external_version=external_version,
+                            tags=tags,
+                            language="English",
+                            dependencies=dependencies,
+                            model=None,
+                        )
+                        component_reference: OrderedDict[str, str | dict] = OrderedDict()
+                        component_reference["oml-python:serialized_object"] = (
+                            COMPOSITION_STEP_CONSTANT
+                        )
+                        cr_value: dict[str, Any] = OrderedDict()
+                        cr_value["key"] = identifier
+                        cr_value["step_name"] = identifier
+                        if len(sub_component_tuple) == 3:
+                            cr_value["argument_1"] = sub_component_tuple[2]
+                        component_reference["value"] = cr_value
+                    else:
+                        sub_components[identifier] = sub_component
+                        component_reference = OrderedDict()
+                        component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
+                        cr_value = OrderedDict()
+                        cr_value["key"] = identifier
+                        cr_value["step_name"] = identifier
+                        if len(sub_component_tuple) == 3:
+                            cr_value["argument_1"] = sub_component_tuple[2]
+                        component_reference["value"] = cr_value
+                    parameter_value.append(component_reference)
+
+                # Here (and in the elif and else branch below) are the only
+                # places where we encode a value as json to make sure that all
+                # parameter values still have the same type after
+                # deserialization
+                if isinstance(rval, tuple):
+                    parameter_json = json.dumps(tuple(parameter_value))
+                else:
+                    parameter_json = json.dumps(parameter_value)
+                parameters[k] = parameter_json
+
+            elif isinstance(rval, OpenMLFlow):
+                # A subcomponent, for example the base model in
+                # AdaBoostClassifier
+                sub_components[k] = rval
+                sub_components_explicit.add(k)
+                component_reference = OrderedDict()
+                component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
+                cr_value = OrderedDict()
+                cr_value["key"] = k
+                cr_value["step_name"] = None
+                component_reference["value"] = cr_value
+                cr = self._serialize_sklearn(component_reference, model)
+                parameters[k] = json.dumps(cr)
+
+            elif not (hasattr(rval, "__len__") and len(rval) == 0):
+                rval = json.dumps(rval)
+                parameters[k] = rval
+            # a regular hyperparameter
+            else:
+                parameters[k] = None
+
+            if parameters_docs is not None:
+                data_type, description = parameters_docs[k]
+                parameters_meta_info[k] = OrderedDict(
+                    (("description", description), ("data_type", data_type)),
+                )
+            else:
+                parameters_meta_info[k] = OrderedDict((("description", None), ("data_type", None)))
+
+        return parameters, parameters_meta_info, sub_components, sub_components_explicit
+
+    def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> tuple[dict, set]:
+        """
+        Returns
+        -------
+            i) a dict with all parameter names that have a default value, and
+            ii) a set with all parameter names that do not have a default
+
+        Parameters
+        ----------
+        fn_name : callable
+            The function of which we want to obtain the defaults
+
+        Returns
+        -------
+        params_with_defaults: dict
+            a dict mapping parameter name to the default value
+        params_without_defaults: set
+            a set with all parameters that do not have a default value
+        """
+        # parameters with defaults are optional, all others are required.
+        parameters = inspect.signature(fn_name).parameters
+        required_params = set()
+        optional_params = {}
+        for param in parameters:
+            parameter = parameters.get(param)
+            default_val = parameter.default  # type: ignore
+            if default_val is inspect.Signature.empty:
+                required_params.add(param)
+            else:
+                optional_params[param] = default_val
+        return optional_params, required_params
+
+    def _deserialize_model(  # noqa: C901
+        self,
+        flow: OpenMLFlow,
+        keep_defaults: bool,  # noqa: FBT001
+        recursion_depth: int,
+        strict_version: bool = True,  # noqa: FBT002, FBT001
+    ) -> Any:
+        logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}")
+        model_name = flow.class_name
+        self._check_dependencies(flow.dependencies, strict_version=strict_version)
+
+        parameters = flow.parameters
+        components = flow.components
+        parameter_dict: dict[str, Any] = OrderedDict()
+
+        # Do a shallow copy of the components dictionary so we can remove the
+        # components from this copy once we added them into the pipeline. This
+        # allows us to not consider them any more when looping over the
+        # components, but keeping the dictionary of components untouched in the
+        # original components dictionary.
+        components_ = copy.copy(components)
+
+        for name in parameters:
+            value = parameters.get(name)
+            logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}")
+            rval = self._deserialize_sklearn(
+                value,
+                components=components_,
+                initialize_with_defaults=keep_defaults,
+                recursion_depth=recursion_depth + 1,
+                strict_version=strict_version,
+            )
+            parameter_dict[name] = rval
+
+        for name in components:
+            if name in parameter_dict:
+                continue
+            if name not in components_:
+                continue
+            value = components[name]
+            logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}")
+            rval = self._deserialize_sklearn(
+                value,
+                recursion_depth=recursion_depth + 1,
+                strict_version=strict_version,
+            )
+            parameter_dict[name] = rval
+
+        if model_name is None and flow.name in SKLEARN_PIPELINE_STRING_COMPONENTS:
+            return flow.name
+
+        assert model_name is not None
+        module_name = model_name.rsplit(".", 1)
+        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
+
+        if keep_defaults:
+            # obtain all params with a default
+            param_defaults, _ = self._get_fn_arguments_with_defaults(model_class.__init__)
+
+            # delete the params that have a default from the dict,
+            # so they get initialized with their default value
+            # except [...]
+            for param in param_defaults:
+                # [...] the ones that also have a key in the components dict.
+                # As OpenML stores different flows for ensembles with different
+                # (base-)components, in OpenML terms, these are not considered
+                # hyperparameters but rather constants (i.e., changing them would
+                # result in a different flow)
+                if param not in components:
+                    del parameter_dict[param]
+
+        if not strict_version:
+            # Ignore incompatible parameters
+            allowed_parameter = list(inspect.signature(model_class.__init__).parameters)
+            for p in list(parameter_dict.keys()):
+                if p not in allowed_parameter:
+                    warnings.warn(
+                        f"While deserializing in a non-strict way, parameter {p} is not "
+                        f"allowed for {model_class.__name__} likely due to a version mismatch. "
+                        "We ignore the parameter.",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+                    del parameter_dict[p]
+
+        return model_class(**parameter_dict)
+
+    def _check_dependencies(
+        self,
+        dependencies: str,
+        strict_version: bool = True,  # noqa: FBT001, FBT002
+    ) -> None:
+        if not dependencies:
+            return
+
+        dependencies_list = dependencies.split("\n")
+        for dependency_string in dependencies_list:
+            match = DEPENDENCIES_PATTERN.match(dependency_string)
+            if not match:
+                raise ValueError(f"Cannot parse dependency {dependency_string}")
+
+            dependency_name = match.group("name")
+            operation = match.group("operation")
+            version = match.group("version")
+
+            module = importlib.import_module(dependency_name)
+            required_version = Version(version)
+            installed_version = Version(module.__version__)  # type: ignore
+
+            if operation == "==":
+                check = required_version == installed_version
+            elif operation == ">":
+                check = installed_version > required_version
+            elif operation == ">=":
+                check = (
+                    installed_version > required_version or installed_version == required_version
+                )
+            else:
+                raise NotImplementedError(f"operation '{operation}' is not supported")
+            message = (
+                f"Trying to deserialize a model with dependency {dependency_string} not satisfied."
+            )
+            if not check:
+                if strict_version:
+                    raise ValueError(message)
+
+                warnings.warn(message, category=UserWarning, stacklevel=2)
+
+    def _serialize_type(self, o: Any) -> OrderedDict[str, str]:
+        mapping = {
+            float: "float",
+            np.float32: "np.float32",
+            np.float64: "np.float64",
+            int: "int",
+            np.int32: "np.int32",
+            np.int64: "np.int64",
+        }
+        if Version(np.__version__) < Version("1.24"):
+            mapping[float] = "np.float"
+            mapping[int] = "np.int"
+
+        ret = OrderedDict()  # type: 'OrderedDict[str, str]'
+        ret["oml-python:serialized_object"] = "type"
+        ret["value"] = mapping[o]
+        return ret
+
+    def _deserialize_type(self, o: str) -> Any:
+        mapping = {
+            "float": float,
+            "np.float32": np.float32,
+            "np.float64": np.float64,
+            "int": int,
+            "np.int32": np.int32,
+            "np.int64": np.int64,
+        }
+
+        # TODO(eddiebergman): Might be able to remove this
+        if Version(np.__version__) < Version("1.24"):
+            mapping["np.float"] = np.float  # type: ignore # noqa: NPY001
+            mapping["np.int"] = np.int  # type: ignore # noqa: NPY001
+
+        return mapping[o]
+
+    def _serialize_rv_frozen(self, o: Any) -> OrderedDict[str, str | dict]:
+        args = o.args
+        kwds = o.kwds
+        a = o.a
+        b = o.b
+        dist = o.dist.__class__.__module__ + "." + o.dist.__class__.__name__
+        ret: OrderedDict[str, str | dict] = OrderedDict()
+        ret["oml-python:serialized_object"] = "rv_frozen"
+        ret["value"] = OrderedDict(
+            (("dist", dist), ("a", a), ("b", b), ("args", args), ("kwds", kwds)),
+        )
+        return ret
+
+    def _deserialize_rv_frozen(self, o: OrderedDict[str, str]) -> Any:
+        args = o["args"]
+        kwds = o["kwds"]
+        a = o["a"]
+        b = o["b"]
+        dist_name = o["dist"]
+
+        module_name = dist_name.rsplit(".", 1)
+        try:
+            rv_class = getattr(importlib.import_module(module_name[0]), module_name[1])
+        except AttributeError as e:
+            _tb = traceback.format_exc()
+            warnings.warn(
+                f"Cannot create model {dist_name} for flow. Reason is from error {type(e)}:{e}"
+                f"\nTraceback: {_tb}",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+            return None
+
+        dist = scipy.stats.distributions.rv_frozen(rv_class(), *args, **kwds)  # type: ignore
+        dist.a = a
+        dist.b = b
+
+        return dist
+
+    def _serialize_function(self, o: Callable) -> OrderedDict[str, str]:
+        name = o.__module__ + "." + o.__name__
+        ret = OrderedDict()  # type: 'OrderedDict[str, str]'
+        ret["oml-python:serialized_object"] = "function"
+        ret["value"] = name
+        return ret
+
+    def _deserialize_function(self, name: str) -> Callable:
+        module_name = name.rsplit(".", 1)
+        return getattr(importlib.import_module(module_name[0]), module_name[1])
+
+    def _serialize_cross_validator(self, o: Any) -> OrderedDict[str, str | dict]:
+        ret: OrderedDict[str, str | dict] = OrderedDict()
+
+        parameters = OrderedDict()  # type: 'OrderedDict[str, Any]'
+
+        # XXX this is copied from sklearn.model_selection._split
+        cls = o.__class__
+        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
+        # Ignore varargs, kw and default values and pop self
+        init_signature = inspect.signature(init)  # type: ignore
+        # Consider the constructor parameters excluding 'self'
+        if init is object.__init__:
+            args = []  # type: List
+        else:
+            args = sorted(
+                [
+                    p.name
+                    for p in init_signature.parameters.values()
+                    if p.name != "self" and p.kind != p.VAR_KEYWORD
+                ],
+            )
+
+        for key in args:
+            # We need deprecation warnings to always be on in order to
+            # catch deprecated param values.
+            # This is set in utils/__init__.py but it gets overwritten
+            # when running under python3 somehow.
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter("always", DeprecationWarning)
+                value = getattr(o, key, None)
+                if w is not None and len(w) and w[0].category is DeprecationWarning:
+                    # if the parameter is deprecated, don't show it
+                    continue
+
+            if not (isinstance(value, Sized) and len(value) == 0):
+                value = json.dumps(value)
+                parameters[key] = value
+            else:
+                parameters[key] = None
+
+        ret["oml-python:serialized_object"] = "cv_object"
+        name = o.__module__ + "." + o.__class__.__name__
+        value = OrderedDict([("name", name), ("parameters", parameters)])
+        ret["value"] = value
+
+        return ret
+
+    def _deserialize_cross_validator(
+        self,
+        value: OrderedDict[str, Any],
+        recursion_depth: int,
+        strict_version: bool = True,  # noqa: FBT002, FBT001
+    ) -> Any:
+        model_name = value["name"]
+        parameters = value["parameters"]
+
+        module_name = model_name.rsplit(".", 1)
+        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
+        for parameter in parameters:
+            parameters[parameter] = self._deserialize_sklearn(
+                parameters[parameter],
+                recursion_depth=recursion_depth + 1,
+                strict_version=strict_version,
+            )
+        return model_class(**parameters)
+
+    def _format_external_version(
+        self,
+        model_package_name: str,
+        model_package_version_number: str,
+    ) -> str:
+        return f"{model_package_name}=={model_package_version_number}"
+
+    @staticmethod
+    def _get_parameter_values_recursive(
+        param_grid: dict | list[dict],
+        parameter_name: str,
+    ) -> list[Any]:
+        """
+        Returns a list of values for a given hyperparameter, encountered
+        recursively throughout the flow. (e.g., n_jobs can be defined
+        for various flows)
+
+        Parameters
+        ----------
+        param_grid: Union[Dict, List[Dict]]
+            Dict mapping from hyperparameter list to value, to a list of
+            such dicts
+
+        parameter_name: str
+            The hyperparameter that needs to be inspected
+
+        Returns
+        -------
+        List
+            A list of all values of hyperparameters with this name
+        """
+        if isinstance(param_grid, dict):
+            return [
+                value
+                for param, value in param_grid.items()
+                if param.split("__")[-1] == parameter_name
+            ]
+
+        if isinstance(param_grid, list):
+            result = []
+            for sub_grid in param_grid:
+                result.extend(
+                    SklearnSerializer._get_parameter_values_recursive(sub_grid, parameter_name),
+                )
+            return result
+
+        raise ValueError("Param_grid should either be a dict or list of dicts")
+
+    def _prevent_optimize_n_jobs(self, model):
+        """
+        Ensures that HPO classes will not optimize the n_jobs hyperparameter
+
+        Parameters
+        ----------
+        model:
+            The model that will be fitted
+        """
+        if self._is_hpo_class(model):
+            if isinstance(model, sklearn.model_selection.GridSearchCV):
+                param_distributions = model.param_grid
+            elif isinstance(model, sklearn.model_selection.RandomizedSearchCV):
+                param_distributions = model.param_distributions
+            else:
+                if hasattr(model, "param_distributions"):
+                    param_distributions = model.param_distributions
+                else:
+                    raise AttributeError(
+                        "Using subclass BaseSearchCV other than "
+                        "{GridSearchCV, RandomizedSearchCV}. "
+                        "Could not find attribute "
+                        "param_distributions.",
+                    )
+                logger.warning(
+                    "Warning! Using subclass BaseSearchCV other than "
+                    "{GridSearchCV, RandomizedSearchCV}. "
+                    "Should implement param check. ",
+                )
+            n_jobs_vals = self._get_parameter_values_recursive(
+                param_distributions,
+                "n_jobs",
+            )
+            if len(n_jobs_vals) > 0:
+                raise PyOpenMLError(
+                    "openml-python should not be used to optimize the n_jobs parameter.",
+                )

From e86fab7fdabebb456799a476e9575613aa4a5558 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 16:25:41 +0530
Subject: [PATCH 08/15] add public function for serialisation and
 deserialisation

---
 openml/extensions/utils.py | 46 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 openml/extensions/utils.py

diff --git a/openml/extensions/utils.py b/openml/extensions/utils.py
new file mode 100644
index 000000000..e3480ef6b
--- /dev/null
+++ b/openml/extensions/utils.py
@@ -0,0 +1,46 @@
+# License: BSD 3-Clause
+
+"""Utility functions for OpenML extensions."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from openml.extensions.registry import resolve_api_connector
+
+if TYPE_CHECKING:
+    from openml.flows import OpenMLFlow
+
+
+def flow_to_estimator(flow: OpenMLFlow) -> Any:
+    """Convert an OpenML flow to an estimator instance.
+
+    Parameters
+    ----------
+    flow : openml.flows.OpenMLFlow
+        The OpenML flow to convert.
+
+    Returns
+    -------
+    estimator_instance : Any
+        The corresponding estimator instance.
+    """
+    connector = resolve_api_connector(flow)
+    return connector.serializer().flow_to_model(flow)
+
+
+def estimator_to_flow(estimator_instance: Any) -> OpenMLFlow:
+    """Convert an estimator instance to an OpenML flow.
+
+    Parameters
+    ----------
+    estimator_instance : Any
+        The estimator instance to convert.
+
+    Returns
+    -------
+    flow : openml.flows.OpenMLFlow
+        The corresponding OpenML flow.
+    """
+    connector = resolve_api_connector(estimator_instance)
+    return connector.serializer().model_to_flow(estimator_instance)

From e92156aa4b4d94bf41ed009a2261e1c25864cb70 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 16:36:55 +0530
Subject: [PATCH 09/15] move the flow utils to flows/functions.py

---
 openml/extensions/utils.py | 46 --------------------------------------
 openml/flows/functions.py  | 35 +++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 46 deletions(-)
 delete mode 100644 openml/extensions/utils.py

diff --git a/openml/extensions/utils.py b/openml/extensions/utils.py
deleted file mode 100644
index e3480ef6b..000000000
--- a/openml/extensions/utils.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# License: BSD 3-Clause
-
-"""Utility functions for OpenML extensions."""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-
-from openml.extensions.registry import resolve_api_connector
-
-if TYPE_CHECKING:
-    from openml.flows import OpenMLFlow
-
-
-def flow_to_estimator(flow: OpenMLFlow) -> Any:
-    """Convert an OpenML flow to an estimator instance.
-
-    Parameters
-    ----------
-    flow : openml.flows.OpenMLFlow
-        The OpenML flow to convert.
-
-    Returns
-    -------
-    estimator_instance : Any
-        The corresponding estimator instance.
-    """
-    connector = resolve_api_connector(flow)
-    return connector.serializer().flow_to_model(flow)
-
-
-def estimator_to_flow(estimator_instance: Any) -> OpenMLFlow:
-    """Convert an estimator instance to an OpenML flow.
-
-    Parameters
-    ----------
-    estimator_instance : Any
-        The estimator instance to convert.
-
-    Returns
-    -------
-    flow : openml.flows.OpenMLFlow
-        The corresponding OpenML flow.
-    """
-    connector = resolve_api_connector(estimator_instance)
-    return connector.serializer().model_to_flow(estimator_instance)
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 9906958e5..e51175acc 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -14,6 +14,7 @@
 import openml._api_calls
 import openml.utils
 from openml.exceptions import OpenMLCacheException
+from openml.extensions.registry import resolve_api_connector
 
 from . import OpenMLFlow
 
@@ -552,3 +553,37 @@ def delete_flow(flow_id: int) -> bool:
         True if the deletion was successful. False otherwise.
     """
     return openml.utils._delete_entity("flow", flow_id)
+
+
+def flow_to_estimator(flow: OpenMLFlow) -> Any:
+    """Convert an OpenML flow to an estimator instance.
+
+    Parameters
+    ----------
+    flow : openml.flows.OpenMLFlow
+        The OpenML flow to convert.
+
+    Returns
+    -------
+    estimator_instance : Any
+        The corresponding estimator instance.
+    """
+    connector = resolve_api_connector(flow)
+    return connector.serializer().flow_to_model(flow)
+
+
+def estimator_to_flow(estimator_instance: Any) -> OpenMLFlow:
+    """Convert an estimator instance to an OpenML flow.
+
+    Parameters
+    ----------
+    estimator_instance : Any
+        The estimator instance to convert.
+
+    Returns
+    -------
+    flow : openml.flows.OpenMLFlow
+        The corresponding OpenML flow.
+    """
+    connector = resolve_api_connector(estimator_instance)
+    return connector.serializer().model_to_flow(estimator_instance)

From 1945c58a456db0a684c27289026a71322f3b828b Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 16:40:18 +0530
Subject: [PATCH 10/15] update flows

---
 openml/flows/functions.py | 35 -----------------------------
 openml/flows/utils.py     | 46 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 35 deletions(-)
 create mode 100644 openml/flows/utils.py

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index e51175acc..9906958e5 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -14,7 +14,6 @@
 import openml._api_calls
 import openml.utils
 from openml.exceptions import OpenMLCacheException
-from openml.extensions.registry import resolve_api_connector
 
 from . import OpenMLFlow
 
@@ -553,37 +552,3 @@ def delete_flow(flow_id: int) -> bool:
         True if the deletion was successful. False otherwise.
     """
     return openml.utils._delete_entity("flow", flow_id)
-
-
-def flow_to_estimator(flow: OpenMLFlow) -> Any:
-    """Convert an OpenML flow to an estimator instance.
-
-    Parameters
-    ----------
-    flow : openml.flows.OpenMLFlow
-        The OpenML flow to convert.
-
-    Returns
-    -------
-    estimator_instance : Any
-        The corresponding estimator instance.
-    """
-    connector = resolve_api_connector(flow)
-    return connector.serializer().flow_to_model(flow)
-
-
-def estimator_to_flow(estimator_instance: Any) -> OpenMLFlow:
-    """Convert an estimator instance to an OpenML flow.
-
-    Parameters
-    ----------
-    estimator_instance : Any
-        The estimator instance to convert.
-
-    Returns
-    -------
-    flow : openml.flows.OpenMLFlow
-        The corresponding OpenML flow.
-    """
-    connector = resolve_api_connector(estimator_instance)
-    return connector.serializer().model_to_flow(estimator_instance)
diff --git a/openml/flows/utils.py b/openml/flows/utils.py
new file mode 100644
index 000000000..e3480ef6b
--- /dev/null
+++ b/openml/flows/utils.py
@@ -0,0 +1,46 @@
+# License: BSD 3-Clause
+
+"""Utility functions for OpenML extensions."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from openml.extensions.registry import resolve_api_connector
+
+if TYPE_CHECKING:
+    from openml.flows import OpenMLFlow
+
+
+def flow_to_estimator(flow: OpenMLFlow) -> Any:
+    """Convert an OpenML flow to an estimator instance.
+
+    Parameters
+    ----------
+    flow : openml.flows.OpenMLFlow
+        The OpenML flow to convert.
+
+    Returns
+    -------
+    estimator_instance : Any
+        The corresponding estimator instance.
+    """
+    connector = resolve_api_connector(flow)
+    return connector.serializer().flow_to_model(flow)
+
+
+def estimator_to_flow(estimator_instance: Any) -> OpenMLFlow:
+    """Convert an estimator instance to an OpenML flow.
+
+    Parameters
+    ----------
+    estimator_instance : Any
+        The estimator instance to convert.
+
+    Returns
+    -------
+    flow : openml.flows.OpenMLFlow
+        The corresponding OpenML flow.
+    """
+    connector = resolve_api_connector(estimator_instance)
+    return connector.serializer().model_to_flow(estimator_instance)

From 5a1ccd620bd4e9e2e112a303f2ac2d978106292b Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 16:56:36 +0530
Subject: [PATCH 11/15] expose parameters of flow_to_model

---
 openml/flows/utils.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/openml/flows/utils.py b/openml/flows/utils.py
index e3480ef6b..0e2c2020f 100644
--- a/openml/flows/utils.py
+++ b/openml/flows/utils.py
@@ -12,13 +12,23 @@
     from openml.flows import OpenMLFlow
 
 
-def flow_to_estimator(flow: OpenMLFlow) -> Any:
-    """Convert an OpenML flow to an estimator instance.
+def flow_to_estimator(
+    flow: OpenMLFlow,
+    initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
+    strict_version: bool = True,  # noqa: FBT002, FBT001
+) -> Any:
+    """Instantiate a model from the flow representation.
 
     Parameters
     ----------
-    flow : openml.flows.OpenMLFlow
-        The OpenML flow to convert.
+    flow : OpenMLFlow
+
+    initialize_with_defaults : bool, optional (default=False)
+        If this flag is set, the hyperparameter values of flows will be
+        ignored and a flow with its defaults is returned.
+
+    strict_version : bool, default=True
+        Whether to fail if version requirements are not fulfilled.
 
     Returns
     -------
@@ -26,7 +36,11 @@ def flow_to_estimator(flow: OpenMLFlow) -> Any:
         The corresponding estimator instance.
     """
     connector = resolve_api_connector(flow)
-    return connector.serializer().flow_to_model(flow)
+    return connector.serializer().flow_to_model(
+        flow,
+        initialize_with_defaults=initialize_with_defaults,
+        strict_version=strict_version,
+    )
 
 
 def estimator_to_flow(estimator_instance: Any) -> OpenMLFlow:

From c7e52e1e6b3cfdd6f5197a4958cc20efb09696a7 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 21:16:50 +0530
Subject: [PATCH 12/15] remove sklearn

---
 openml/extensions/.DS_Store             |  Bin 0 -> 10244 bytes
 openml/extensions/sklearn/__init__.py   |   13 -
 openml/extensions/sklearn/connector.py  |   68 -
 openml/extensions/sklearn/executor.py   |  784 ------------
 openml/extensions/sklearn/serializer.py | 1508 -----------------------
 5 files changed, 2373 deletions(-)
 create mode 100644 openml/extensions/.DS_Store
 delete mode 100644 openml/extensions/sklearn/__init__.py
 delete mode 100644 openml/extensions/sklearn/connector.py
 delete mode 100644 openml/extensions/sklearn/executor.py
 delete mode 100644 openml/extensions/sklearn/serializer.py

diff --git a/openml/extensions/.DS_Store b/openml/extensions/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..0ee8fbd94f44119155f92a35af97354b7bcaca43
GIT binary patch
literal 10244
zcmeHMTWl0n7(V~B&>1?=0SXk{i3^2r$x;dwT5e{0Q_8Ktwsc!A%k0iT2c|R2%<Q(M
zG;U0M0m0~tca$fMMji+#@kJ9A(MLrSjPZe}QC~387nMZ+bIvSjx8+R}P3JV{{O9t2
z=bZnW|DV}&N(h0LoW7QjC?Q0|Or;`YF-fB1G*3uE&@&SyAf6DD_#}mYbtF4MJM0J<
z2pI?&2pI?&2pRY<Fo54|n#4*j!!l$bWFTbVJ_Gdq5XDSoJdg!0*{cH!b_76Kj@>L_
z9nS#_6AffMkOeLo6zAlp2MC}L95H~yaXd=ZQN{yV;F7}$a5#ZrWe83vz*a|Gl&KSl
za~YN)10e$w8K7tPN_J*9i9PQ8KHOnBnY87kQG*6m3+B4NSi)Knl_kR@Lp03&#Gvz(
zpstjl%1IivJz|X4gC!+;^KrHzyiG80S03)mL775nvp$y`V{01hrlDk~@NCIkGHT}#
znL#X{$(sx(&{xWM3te3QIAT0Rx@alA*C<&{a7$1XsC|-Hh|~~ckGGS&fZCVFuvns<
z9`v#S=hxF&%k=#E`maDJE1x#KqC%>utT>$LcLo!_=I6W?%{@q4U5;a=i*+r1z%&L^
zN_DGa`<iJRE(4glfhvc3Oj~ybTb!J&dkmZ41Ck_4DWzs)q@iJBEV_0>{b(#YvT0p?
zEZVTKVRTdy=hd#+vOjgw8nB(SXaOb&Uc`iY(Aabi8zolhUdBq}E-9pB$nx^7+~BfA
z(crHGgGFgZr94yZN%r;~lvB!_JX+S7^-ah24j8V7GNn{`$UsNNvG=+L9q-9FR&K!d
zQp)U%W@U1g<{QnHl`u~m*jMFcT*tEZI-W_V=}r2^kWa@Z3?=(K$Msv;WJwd}=UPh6
zGxi%eshKz0aB8o5tm#+>>GG@v4=-J@dR^nzczeh0eYdKV*>mPr%W@x5ZTsdi({Q^7
zP2KPB)x3;h>!y9YCvSRY+A_LqZNQifF-f{rRkLu>LlISdkZLlQHc#bDe^{v%MYcwC
zu__<ZaZkG)ovxxQ*F>BLS6{YRRS(jUR@cB%4*)AysdATx#w%K&Shq%&yH28yC?4Gc
zLf1FQYPY4O4NK7_zE0C7S?$Sl{*|3U=eBBDO=g^d0W{GI`kGr*^$1;dm*)AMS;NLH
z-3-=#6vz6!e$6#H2hm4^-W}M>9x3)yP`hsW4jV)^_MjELbh>6;XErRVSy-he)NWDv
zTudp7Y3EIh3w*3RJjB}Kqp6Lr;?>#@G6hy+MAydadAEeWJyf!ew2@upD8}pvIZvJ^
zm&jG}F}Xp$B;S&s$X)U~089r7W<VuW!y;G;G1vf&&;*;H1v+6L?1uy#f_^aJ1b8q6
zr(p!nz>{zmo`Umm0iJ_b;8l1HF2S2{1+Kz1cn98xoA4=o24BD(_zu2@AK)(h0e`|h
zVVbZ&s1X(lON5xPTBs8mg(hLMutnG-bP7p96HW+@FvJZIim`Z@w~M7PjJ1gY7rbGF
z%aaFO<gx7?J9hqm0`B%K9%A$6FNiK%wWgtITl0Na<i1;U*RessJbzrgFwbY6QR20?
zcr3~f7vr=vD7!Iaj3JRmu2R9Zm&i*+3>jq_Z!VWtiWn@)bl!|b>O>4rMPN;NZDhTK
zL8z4RW-QVui3;$>W_hcG;i#1JrYgrJNg=EeX^pgt7*dM(Px<<7a)<m#ej)eJ*Jr_8
zsDb5Bj~@OAY==GQ<J7~u;RvKagEZ(c05)Xd6ngs^7=_2708hX<^!5wzG&}<@!b|Wn
zT!h!*4fOc8;BEBxci}p`2Oq*ma05OGJpUW?`=8<05?|h3^kOdIw~4--Pq?<_*vD~%
zMu3Q4dHRL7@C!W2mzw>8xj5$A%O?)Ykb#hakb#hakb$YiKp8hTO7H*MAN>FSsYOa?
zZOFhBV*twA5^eN)uXv0swVz&$MKK@6OcT{BaLFK8P!-zGuH&&muj5w)eZeuCjmLw&
f0+$SqI0i?_Z}b24e+Go-|1>_b@cci8&;Ne|0>mUZ

literal 0
HcmV?d00001

diff --git a/openml/extensions/sklearn/__init__.py b/openml/extensions/sklearn/__init__.py
deleted file mode 100644
index 4e22407be..000000000
--- a/openml/extensions/sklearn/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# License: BSD 3-Clause
-
-"""OpenML extension for Scikit-learn."""
-
-from openml.extensions.sklearn.connector import SklearnAPIConnector
-from openml.extensions.sklearn.executor import SklearnExecutor
-from openml.extensions.sklearn.serializer import SklearnSerializer
-
-__all__ = [
-    "SklearnAPIConnector",
-    "SklearnExecutor",
-    "SklearnSerializer",
-]
diff --git a/openml/extensions/sklearn/connector.py b/openml/extensions/sklearn/connector.py
deleted file mode 100644
index 795baf640..000000000
--- a/openml/extensions/sklearn/connector.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# License: BSD 3-Clause
-
-"""Connector for the Scikit-learn extension."""
-
-from __future__ import annotations
-
-from openml.extensions.base import OpenMLAPIConnector
-from openml.extensions.sklearn.executor import SklearnExecutor
-from openml.extensions.sklearn.serializer import SklearnSerializer
-from openml.flows import OpenMLFlow
-
-
-class SklearnAPIConnector(OpenMLAPIConnector):
-    """
-    Connector for the Scikit-learn extension.
-
-    This class provides the interface to connect Scikit-learn models and flows
-    to the OpenML API, handling both serialization and execution compatibility checks.
-    """
-
-    def serializer(self) -> SklearnSerializer:
-        """
-        Return the serializer for Scikit-learn estimators.
-
-        Returns
-        -------
-        SklearnSerializer
-            The serializer instance capable of handling Scikit-learn estimator.
-        """
-        return SklearnSerializer()
-
-    def executor(self) -> SklearnExecutor:
-        """
-        Return the executor for Scikit-learn estimators.
-
-        Returns
-        -------
-        SklearnExecutor
-            The executor instance capable of running Scikit-learn estimators.
-        """
-        return SklearnExecutor()
-
-    @classmethod
-    def supports(cls, estimator) -> bool:
-        """
-        Check if this connector supports the given model or flow.
-
-        Parameters
-        ----------
-        estimator : Any or OpenMLFlow
-            The Scikit-learn estimator instance or OpenMLFlow object.
-
-        Returns
-        -------
-        bool
-            True if both the serializer and executor can handle the provided
-            estimator or flow, False otherwise.
-        """
-        serializer = SklearnSerializer()
-        SklearnExecutor()
-
-        if isinstance(estimator, OpenMLFlow):
-            support = serializer.can_handle_flow(estimator)
-
-        else:
-            support = serializer.can_handle_model(estimator)
-
-        return support
diff --git a/openml/extensions/sklearn/executor.py b/openml/extensions/sklearn/executor.py
deleted file mode 100644
index 69d54a102..000000000
--- a/openml/extensions/sklearn/executor.py
+++ /dev/null
@@ -1,784 +0,0 @@
-# License: BSD 3-Clause
-from __future__ import annotations
-
-import json
-import logging
-import time
-import warnings
-from collections import OrderedDict
-from typing import TYPE_CHECKING, Any, List, cast
-
-import numpy as np
-import pandas as pd
-import sklearn.base
-import sklearn.model_selection
-import sklearn.pipeline
-
-import openml
-from openml.exceptions import PyOpenMLError
-from openml.extensions.base import ModelExecutor
-from openml.flows import OpenMLFlow
-from openml.runs.trace import PREFIX, OpenMLRunTrace, OpenMLTraceIteration
-from openml.tasks import (
-    OpenMLClassificationTask,
-    OpenMLClusteringTask,
-    OpenMLLearningCurveTask,
-    OpenMLRegressionTask,
-    OpenMLSupervisedTask,
-    OpenMLTask,
-)
-
-if TYPE_CHECKING:
-    import scipy.sparse
-
-logger = logging.getLogger(__name__)
-
-SKLEARN_PIPELINE_STRING_COMPONENTS = ("drop", "passthrough")
-COMPONENT_REFERENCE = "component_reference"
-COMPOSITION_STEP_CONSTANT = "composition_step_constant"
-
-
-class SklearnExecutor(ModelExecutor):
-    """Executor for Scikit-learn estimators."""
-
-    def seed_model(self, model: Any, seed: int | None = None) -> Any:  # noqa: C901
-        """Set the random state of all the unseeded components of a model and return the seeded
-        model.
-
-        Required so that all seed information can be uploaded to OpenML for reproducible results.
-
-        Models that are already seeded will maintain the seed. In this case,
-        only integer seeds are allowed (An exception is raised when a RandomState was used as
-        seed).
-
-        Parameters
-        ----------
-        model : sklearn model
-            The model to be seeded
-        seed : int
-            The seed to initialize the RandomState with. Unseeded subcomponents
-            will be seeded with a random number from the RandomState.
-
-        Returns
-        -------
-        Any
-        """
-
-        def _seed_current_object(current_value):
-            if isinstance(current_value, int):  # acceptable behaviour
-                return False
-
-            if isinstance(current_value, np.random.RandomState):
-                raise ValueError(
-                    "Models initialized with a RandomState object are not "
-                    "supported. Please seed with an integer. ",
-                )
-
-            if current_value is not None:
-                raise ValueError(
-                    "Models should be seeded with int or None (this should never happen). ",
-                )
-
-            return True
-
-        rs = np.random.RandomState(seed)
-        model_params = model.get_params()
-        random_states = {}
-        for param_name in sorted(model_params):
-            if "random_state" in param_name:
-                current_value = model_params[param_name]
-                # important to draw the value at this point (and not in the if
-                # statement) this way we guarantee that if a different set of
-                # subflows is seeded, the same number of the random generator is
-                # used
-                new_value = rs.randint(0, 2**16)
-                if _seed_current_object(current_value):
-                    random_states[param_name] = new_value
-
-            # Also seed CV objects!
-            elif isinstance(model_params[param_name], sklearn.model_selection.BaseCrossValidator):
-                if not hasattr(model_params[param_name], "random_state"):
-                    continue
-
-                current_value = model_params[param_name].random_state
-                new_value = rs.randint(0, 2**16)
-                if _seed_current_object(current_value):
-                    model_params[param_name].random_state = new_value
-
-        model.set_params(**random_states)
-        return model
-
-    def check_if_model_fitted(self, model: Any) -> bool:
-        """Returns True/False denoting if the model has already been fitted/trained
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        from sklearn.exceptions import NotFittedError
-        from sklearn.utils.validation import check_is_fitted
-
-        try:
-            # check if model is fitted
-            check_is_fitted(model)
-
-            # Creating random dummy data of arbitrary size
-            dummy_data = np.random.uniform(size=(10, 3))  # noqa: NPY002
-            # Using 'predict' instead of 'sklearn.utils.validation.check_is_fitted' for a more
-            # robust check that works across sklearn versions and models. Internally, 'predict'
-            # should call 'check_is_fitted' for every concerned attribute, thus offering a more
-            # assured check than explicit calls to 'check_is_fitted'
-            model.predict(dummy_data)
-            # Will reach here if the model was fit on a dataset with 3 features
-            return True
-        except NotFittedError:  # needs to be the first exception to be caught
-            # Model is not fitted, as is required
-            return False
-        except ValueError:
-            # Will reach here if the model was fit on a dataset with more or less than 3 features
-            return True
-
-    def _run_model_on_fold(  # noqa: PLR0915, PLR0913, C901, PLR0912
-        self,
-        model: Any,
-        task: OpenMLTask,
-        X_train: np.ndarray | scipy.sparse.spmatrix | pd.DataFrame,
-        rep_no: int,
-        fold_no: int,
-        y_train: np.ndarray | None = None,
-        X_test: np.ndarray | scipy.sparse.spmatrix | pd.DataFrame | None = None,
-    ) -> tuple[
-        np.ndarray,
-        pd.DataFrame | None,
-        OrderedDict[str, float],
-        OpenMLRunTrace | None,
-    ]:
-        """Run a model on a repeat,fold,subsample triplet of the task and return prediction
-        information.
-
-        Furthermore, it will measure run time measures in case multi-core behaviour allows this.
-        * exact user cpu time will be measured if the number of cores is set (recursive throughout
-        the model) exactly to 1
-        * wall clock time will be measured if the number of cores is set (recursive throughout the
-        model) to any given number (but not when it is set to -1)
-
-        Returns the data that is necessary to construct the OpenML Run object. Is used by
-        run_task_get_arff_content. Do not use this function unless you know what you are doing.
-
-        Parameters
-        ----------
-        model : Any
-            The UNTRAINED model to run. The model instance will be copied and not altered.
-        task : OpenMLTask
-            The task to run the model on.
-        X_train : array-like
-            Training data for the given repetition and fold.
-        rep_no : int
-            The repeat of the experiment (0-based; in case of 1 time CV, always 0)
-        fold_no : int
-            The fold nr of the experiment (0-based; in case of holdout, always 0)
-        y_train : Optional[np.ndarray] (default=None)
-            Target attributes for supervised tasks. In case of classification, these are integer
-            indices to the potential classes specified by dataset.
-        X_test : Optional, array-like (default=None)
-            Test attributes to test for generalization in supervised tasks.
-
-        Returns
-        -------
-        pred_y : np.ndarray
-            Predictions on the training/test set, depending on the task type.
-            For supervised tasks, predictions are on the test set.
-            For unsupervised tasks, predictions are on the training set.
-        proba_y : pd.DataFrame, optional
-            Predicted probabilities for the test set.
-            None, if task is not Classification or Learning Curve prediction.
-        user_defined_measures : OrderedDict[str, float]
-            User defined measures that were generated on this fold
-        trace : OpenMLRunTrace, optional
-            arff trace object from a fitted model and the trace content obtained by
-            repeatedly calling ``run_model_on_task``
-        """
-
-        def _prediction_to_probabilities(
-            y: np.ndarray | list,
-            model_classes: list[Any],
-            class_labels: list[str] | None,
-        ) -> pd.DataFrame:
-            """Transforms predicted probabilities to match with OpenML class indices.
-
-            Parameters
-            ----------
-            y : np.ndarray
-                Predicted probabilities (possibly omitting classes if they were not present in the
-                training data).
-            model_classes : list
-                List of classes known_predicted by the model, ordered by their index.
-            class_labels : list
-                List of classes as stored in the task object fetched from server.
-
-            Returns
-            -------
-            pd.DataFrame
-            """
-            if class_labels is None:
-                raise ValueError("The task has no class labels")
-
-            if isinstance(y_train, np.ndarray) and isinstance(class_labels[0], str):
-                # mapping (decoding) the predictions to the categories
-                # creating a separate copy to not change the expected pred_y type
-                y = [class_labels[pred] for pred in y]  # list or numpy array of predictions
-
-            # model_classes: sklearn classifier mapping from original array id to
-            # prediction index id
-            if not isinstance(model_classes, list):
-                raise ValueError("please convert model classes to list prior to calling this fn")
-
-            # DataFrame allows more accurate mapping of classes as column names
-            result = pd.DataFrame(
-                0,
-                index=np.arange(len(y)),
-                columns=model_classes,
-                dtype=np.float32,
-            )
-            for obs, prediction in enumerate(y):
-                result.loc[obs, prediction] = 1.0
-            return result
-
-        if isinstance(task, OpenMLSupervisedTask):
-            if y_train is None:
-                raise TypeError("argument y_train must not be of type None")
-            if X_test is None:
-                raise TypeError("argument X_test must not be of type None")
-
-        model_copy = sklearn.base.clone(model, safe=True)
-        # sanity check: prohibit users from optimizing n_jobs
-        self._prevent_optimize_n_jobs(model_copy)
-        # measures and stores runtimes
-        user_defined_measures = OrderedDict()  # type: 'OrderedDict[str, float]'
-        try:
-            # for measuring runtime. Only available since Python 3.3
-            modelfit_start_cputime = time.process_time()
-            modelfit_start_walltime = time.time()
-
-            if isinstance(task, OpenMLSupervisedTask):
-                model_copy.fit(X_train, y_train)  # type: ignore
-            elif isinstance(task, OpenMLClusteringTask):
-                model_copy.fit(X_train)  # type: ignore
-
-            modelfit_dur_cputime = (time.process_time() - modelfit_start_cputime) * 1000
-            modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
-
-            user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime
-            refit_time = model_copy.refit_time_ * 1000 if hasattr(model_copy, "refit_time_") else 0  # type: ignore
-            user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime
-
-        except AttributeError as e:
-            # typically happens when training a regressor on classification task
-            raise PyOpenMLError(str(e)) from e
-
-        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            # search for model classes_ (might differ depending on modeltype)
-            # first, pipelines are a special case (these don't have a classes_
-            # object, but rather borrows it from the last step. We do this manually,
-            # because of the BaseSearch check)
-            if isinstance(model_copy, sklearn.pipeline.Pipeline):
-                used_estimator = model_copy.steps[-1][-1]
-            else:
-                used_estimator = model_copy
-
-            if self._is_hpo_class(used_estimator):
-                model_classes = used_estimator.best_estimator_.classes_
-            else:
-                model_classes = used_estimator.classes_
-
-            if not isinstance(model_classes, list):
-                model_classes = model_classes.tolist()
-
-            # to handle the case when dataset is numpy and categories are encoded
-            # however the class labels stored in task are still categories
-            if isinstance(y_train, np.ndarray) and isinstance(
-                cast("List", task.class_labels)[0],
-                str,
-            ):
-                model_classes = [cast("List[str]", task.class_labels)[i] for i in model_classes]
-
-        modelpredict_start_cputime = time.process_time()
-        modelpredict_start_walltime = time.time()
-
-        # In supervised learning this returns the predictions for Y, in clustering
-        # it returns the clusters
-        if isinstance(task, OpenMLSupervisedTask):
-            pred_y = model_copy.predict(X_test)
-        elif isinstance(task, OpenMLClusteringTask):
-            pred_y = model_copy.predict(X_train)
-        else:
-            raise ValueError(task)
-
-        modelpredict_duration_cputime = (time.process_time() - modelpredict_start_cputime) * 1000
-        user_defined_measures["usercpu_time_millis_testing"] = modelpredict_duration_cputime
-        user_defined_measures["usercpu_time_millis"] = (
-            modelfit_dur_cputime + modelpredict_duration_cputime
-        )
-        modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000
-        user_defined_measures["wall_clock_time_millis_testing"] = modelpredict_duration_walltime
-        user_defined_measures["wall_clock_time_millis"] = (
-            modelfit_dur_walltime + modelpredict_duration_walltime + refit_time
-        )
-
-        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            try:
-                proba_y = model_copy.predict_proba(X_test)
-                proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy
-            except AttributeError:  # predict_proba is not available when probability=False
-                proba_y = _prediction_to_probabilities(pred_y, model_classes, task.class_labels)
-
-            if task.class_labels is not None:
-                if proba_y.shape[1] != len(task.class_labels):
-                    # Remap the probabilities in case there was a class missing
-                    # at training time. By default, the classification targets
-                    # are mapped to be zero-based indices to the actual classes.
-                    # Therefore, the model_classes contain the correct indices to
-                    # the correct probability array. Example:
-                    # classes in the dataset: 0, 1, 2, 3, 4, 5
-                    # classes in the training set: 0, 1, 2, 4, 5
-                    # then we need to add a column full of zeros into the probabilities
-                    # for class 3 because the rest of the library expects that the
-                    # probabilities are ordered the same way as the classes are ordered).
-                    message = (
-                        f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)}"
-                        " classes!"
-                    )
-                    warnings.warn(message, stacklevel=2)
-                    openml.config.logger.warning(message)
-
-                    for _i, col in enumerate(task.class_labels):
-                        # adding missing columns with 0 probability
-                        if col not in model_classes:
-                            proba_y[col] = 0
-                    # We re-order the columns to move possibly added missing columns into place.
-                    proba_y = proba_y[task.class_labels]
-            else:
-                raise ValueError("The task has no class labels")
-
-            if not np.all(set(proba_y.columns) == set(task.class_labels)):
-                missing_cols = list(set(task.class_labels) - set(proba_y.columns))
-                raise ValueError("Predicted probabilities missing for the columns: ", missing_cols)
-
-        elif isinstance(task, (OpenMLRegressionTask, OpenMLClusteringTask)):
-            proba_y = None
-        else:
-            raise TypeError(type(task))
-
-        if self._is_hpo_class(model_copy):
-            trace_data = self._extract_trace_data(model_copy, rep_no, fold_no)
-            trace: OpenMLRunTrace | None = self._obtain_arff_trace(
-                model_copy,
-                trace_data,
-            )
-        else:
-            trace = None
-
-        return pred_y, proba_y, user_defined_measures, trace
-
-    def obtain_parameter_values(  # noqa: C901, PLR0915
-        self,
-        flow: OpenMLFlow,
-        model: Any = None,
-    ) -> list[dict[str, Any]]:
-        """Extracts all parameter settings required for the flow from the model.
-
-        If no explicit model is provided, the parameters will be extracted from `flow.model`
-        instead.
-
-        Parameters
-        ----------
-        flow : OpenMLFlow
-            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
-
-        model: Any, optional (default=None)
-            The model from which to obtain the parameter values. Must match the flow signature.
-            If None, use the model specified in ``OpenMLFlow.model``.
-
-        Returns
-        -------
-        list
-            A list of dicts, where each dict has the following entries:
-            - ``oml:name`` : str: The OpenML parameter name
-            - ``oml:value`` : mixed: A representation of the parameter value
-            - ``oml:component`` : int: flow id to which the parameter belongs
-        """
-        openml.flows.functions._check_flow_for_server_id(flow)
-
-        def get_flow_dict(_flow):
-            flow_map = {_flow.name: _flow.flow_id}
-            for subflow in _flow.components:
-                flow_map.update(get_flow_dict(_flow.components[subflow]))
-            return flow_map
-
-        def extract_parameters(  # noqa: PLR0915, PLR0912, C901
-            _flow,
-            _flow_dict,
-            component_model,
-            _main_call=False,  # noqa: FBT002
-            main_id=None,
-        ):
-            def is_subcomponent_specification(values):
-                # checks whether the current value can be a specification of
-                # subcomponents, as for example the value for steps parameter
-                # (in Pipeline) or transformers parameter (in
-                # ColumnTransformer).
-                return (
-                    # Specification requires list/tuple of list/tuple with
-                    # at least length 2.
-                    isinstance(values, (tuple, list))
-                    and all(isinstance(item, (tuple, list)) and len(item) > 1 for item in values)
-                    # And each component needs to be a flow or interpretable string
-                    and all(
-                        isinstance(item[1], openml.flows.OpenMLFlow)
-                        or (
-                            isinstance(item[1], str)
-                            and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-                        )
-                        for item in values
-                    )
-                )
-
-            # _flow is openml flow object, _param dict maps from flow name to flow
-            # id for the main call, the param dict can be overridden (useful for
-            # unit tests / sentinels) this way, for flows without subflows we do
-            # not have to rely on _flow_dict
-            exp_parameters = set(_flow.parameters)
-            if (
-                isinstance(component_model, str)
-                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
-            ):
-                model_parameters = set()
-            else:
-                model_parameters = set(component_model.get_params(deep=False))
-            if len(exp_parameters.symmetric_difference(model_parameters)) != 0:
-                flow_params = sorted(exp_parameters)
-                model_params = sorted(model_parameters)
-                raise ValueError(
-                    "Parameters of the model do not match the "
-                    "parameters expected by the "
-                    "flow:\nexpected flow parameters: "
-                    f"{flow_params}\nmodel parameters: {model_params}",
-                )
-            exp_components = set(_flow.components)
-            if (
-                isinstance(component_model, str)
-                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
-            ):
-                model_components = set()
-            else:
-                _ = set(component_model.get_params(deep=False))
-                model_components = {
-                    mp
-                    for mp in component_model.get_params(deep=True)
-                    if "__" not in mp and mp not in _
-                }
-            if len(exp_components.symmetric_difference(model_components)) != 0:
-                is_problem = True
-                if len(exp_components - model_components) > 0:
-                    # If an expected component is not returned as a component by get_params(),
-                    # this means that it is also a parameter -> we need to check that this is
-                    # actually the case
-                    difference = exp_components - model_components
-                    component_in_model_parameters = []
-                    for component in difference:
-                        if component in model_parameters:
-                            component_in_model_parameters.append(True)
-                        else:
-                            component_in_model_parameters.append(False)
-                    is_problem = not all(component_in_model_parameters)
-                if is_problem:
-                    flow_components = sorted(exp_components)
-                    model_components = sorted(model_components)
-                    raise ValueError(
-                        "Subcomponents of the model do not match the "
-                        "parameters expected by the "
-                        "flow:\nexpected flow subcomponents: "
-                        f"{flow_components}\nmodel subcomponents: {model_components}",
-                    )
-
-            _params = []
-            for _param_name in _flow.parameters:
-                _current = OrderedDict()
-                _current["oml:name"] = _param_name
-
-                current_param_values = self.model_to_flow(component_model.get_params()[_param_name])
-
-                # Try to filter out components (a.k.a. subflows) which are
-                # handled further down in the code (by recursively calling
-                # this function)!
-                if isinstance(current_param_values, openml.flows.OpenMLFlow):
-                    continue
-
-                if is_subcomponent_specification(current_param_values):
-                    # complex parameter value, with subcomponents
-                    parsed_values = []
-                    for subcomponent in current_param_values:
-                        # scikit-learn stores usually tuples in the form
-                        # (name (str), subcomponent (mixed), argument
-                        # (mixed)). OpenML replaces the subcomponent by an
-                        # OpenMLFlow object.
-                        if len(subcomponent) < 2 or len(subcomponent) > 3:
-                            raise ValueError("Component reference should be size {2,3}. ")
-
-                        subcomponent_identifier = subcomponent[0]
-                        subcomponent_flow = subcomponent[1]
-                        if not isinstance(subcomponent_identifier, str):
-                            raise TypeError(
-                                "Subcomponent identifier should be of type string, "
-                                f"but is {type(subcomponent_identifier)}",
-                            )
-                        if not isinstance(subcomponent_flow, (openml.flows.OpenMLFlow, str)):
-                            if (
-                                isinstance(subcomponent_flow, str)
-                                and subcomponent_flow in SKLEARN_PIPELINE_STRING_COMPONENTS
-                            ):
-                                pass
-                            else:
-                                raise TypeError(
-                                    "Subcomponent flow should be of type flow, but is"
-                                    f" {type(subcomponent_flow)}",
-                                )
-
-                        current = {
-                            "oml-python:serialized_object": COMPONENT_REFERENCE,
-                            "value": {
-                                "key": subcomponent_identifier,
-                                "step_name": subcomponent_identifier,
-                            },
-                        }
-                        if len(subcomponent) == 3:
-                            if not isinstance(subcomponent[2], list) and not isinstance(
-                                subcomponent[2],
-                                OrderedDict,
-                            ):
-                                raise TypeError(
-                                    "Subcomponent argument should be list or OrderedDict",
-                                )
-                            current["value"]["argument_1"] = subcomponent[2]
-                        parsed_values.append(current)
-                    parsed_values = json.dumps(parsed_values)
-                else:
-                    # vanilla parameter value
-                    parsed_values = json.dumps(current_param_values)
-
-                _current["oml:value"] = parsed_values
-                if _main_call:
-                    _current["oml:component"] = main_id
-                else:
-                    _current["oml:component"] = _flow_dict[_flow.name]
-                _params.append(_current)
-
-            for _identifier in _flow.components:
-                subcomponent_model = component_model.get_params()[_identifier]
-                _params.extend(
-                    extract_parameters(
-                        _flow.components[_identifier],
-                        _flow_dict,
-                        subcomponent_model,
-                    ),
-                )
-            return _params
-
-        flow_dict = get_flow_dict(flow)
-        model = model if model is not None else flow.model
-        return extract_parameters(flow, flow_dict, model, _main_call=True, main_id=flow.flow_id)
-
-    def _openml_param_name_to_sklearn(
-        self,
-        openml_parameter: openml.setups.OpenMLParameter,
-        flow: OpenMLFlow,
-    ) -> str:
-        """
-        Converts the name of an OpenMLParameter into the sklean name, given a flow.
-
-        Parameters
-        ----------
-        openml_parameter: OpenMLParameter
-            The parameter under consideration
-
-        flow: OpenMLFlow
-            The flow that provides context.
-
-        Returns
-        -------
-        sklearn_parameter_name: str
-            The name the parameter will have once used in scikit-learn
-        """
-        if not isinstance(openml_parameter, openml.setups.OpenMLParameter):
-            raise ValueError("openml_parameter should be an instance of OpenMLParameter")
-        if not isinstance(flow, OpenMLFlow):
-            raise ValueError("flow should be an instance of OpenMLFlow")
-
-        flow_structure = flow.get_structure("name")
-        if openml_parameter.flow_name not in flow_structure:
-            raise ValueError("Obtained OpenMLParameter and OpenMLFlow do not correspond. ")
-        name = openml_parameter.flow_name  # for PEP8
-        return "__".join(flow_structure[name] + [openml_parameter.parameter_name])
-
-    ################################################################################################
-    # Methods for hyperparameter optimization
-
-    def _is_hpo_class(self, model: Any) -> bool:
-        """Check whether the model performs hyperparameter optimization.
-
-        Used to check whether an optimization trace can be extracted from the model after
-        running it.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        return isinstance(model, sklearn.model_selection._search.BaseSearchCV)
-
-    def instantiate_model_from_hpo_class(
-        self,
-        model: Any,
-        trace_iteration: OpenMLTraceIteration,
-    ) -> Any:
-        """Instantiate a ``base_estimator`` which can be searched over by the hyperparameter
-        optimization model.
-
-        Parameters
-        ----------
-        model : Any
-            A hyperparameter optimization model which defines the model to be instantiated.
-        trace_iteration : OpenMLTraceIteration
-            Describing the hyperparameter settings to instantiate.
-
-        Returns
-        -------
-        Any
-        """
-        if not self._is_hpo_class(model):
-            raise AssertionError(
-                f"Flow model {model} is not an instance of"
-                " sklearn.model_selection._search.BaseSearchCV",
-            )
-        base_estimator = model.estimator
-        base_estimator.set_params(**trace_iteration.get_parameters())
-        return base_estimator
-
-    def _extract_trace_data(self, model, rep_no, fold_no):
-        """Extracts data from a machine learning model's cross-validation results
-        and creates an ARFF (Attribute-Relation File Format) trace.
-
-        Parameters
-        ----------
-        model : Any
-            A fitted hyperparameter optimization model.
-        rep_no : int
-            The repetition number.
-        fold_no : int
-            The fold number.
-
-        Returns
-        -------
-        A list of ARFF tracecontent.
-        """
-        arff_tracecontent = []
-        for itt_no in range(len(model.cv_results_["mean_test_score"])):
-            # we use the string values for True and False, as it is defined in
-            # this way by the OpenML server
-            selected = "false"
-            if itt_no == model.best_index_:
-                selected = "true"
-            test_score = model.cv_results_["mean_test_score"][itt_no]
-            arff_line = [rep_no, fold_no, itt_no, test_score, selected]
-            for key in model.cv_results_:
-                if key.startswith("param_"):
-                    value = model.cv_results_[key][itt_no]
-                    # Built-in serializer does not convert all numpy types,
-                    # these methods convert them to built-in types instead.
-                    if isinstance(value, np.generic):
-                        # For scalars it actually returns scalars, not a list
-                        value = value.tolist()
-                    serialized_value = json.dumps(value) if value is not np.ma.masked else np.nan
-                    arff_line.append(serialized_value)
-            arff_tracecontent.append(arff_line)
-        return arff_tracecontent
-
-    def _obtain_arff_trace(
-        self,
-        model: Any,
-        trace_content: list,
-    ) -> OpenMLRunTrace:
-        """Create arff trace object from a fitted model and the trace content obtained by
-        repeatedly calling ``run_model_on_task``.
-
-        Parameters
-        ----------
-        model : Any
-            A fitted hyperparameter optimization model.
-
-        trace_content : List[List]
-            Trace content obtained by ``openml.runs.run_flow_on_task``.
-
-        Returns
-        -------
-        OpenMLRunTrace
-        """
-        if not self._is_hpo_class(model):
-            raise AssertionError(
-                f"Flow model {model} is not an instance of "
-                "sklearn.model_selection._search.BaseSearchCV",
-            )
-        if not hasattr(model, "cv_results_"):
-            raise ValueError("model should contain `cv_results_`")
-
-        # attributes that will be in trace arff, regardless of the model
-        trace_attributes = [
-            ("repeat", "NUMERIC"),
-            ("fold", "NUMERIC"),
-            ("iteration", "NUMERIC"),
-            ("evaluation", "NUMERIC"),
-            ("selected", ["true", "false"]),
-        ]
-
-        # model dependent attributes for trace arff
-        for key in model.cv_results_:
-            if key.startswith("param_"):
-                # supported types should include all types, including bool,
-                # int float
-                supported_basic_types = (bool, int, float, str)
-                for param_value in model.cv_results_[key]:
-                    if isinstance(param_value, np.generic):
-                        param_value = param_value.tolist()  # noqa: PLW2901
-                    if (
-                        isinstance(param_value, supported_basic_types)
-                        or param_value is None
-                        or param_value is np.ma.masked
-                    ):
-                        # basic string values
-                        type = "STRING"  # noqa: A001
-                    elif isinstance(param_value, (list, tuple)) and all(
-                        isinstance(i, int) for i in param_value
-                    ):
-                        # list of integers (usually for selecting features)
-                        # hyperparameter layer_sizes of MLPClassifier
-                        type = "STRING"  # noqa: A001
-                    else:
-                        raise TypeError(f"Unsupported param type in param grid: {key}")
-
-                # renamed the attribute param to parameter, as this is a required
-                # OpenML convention - this also guards against name collisions
-                # with the required trace attributes
-                attribute = (PREFIX + key[6:], type)  # type: ignore
-                trace_attributes.append(attribute)
-
-        return OpenMLRunTrace.generate(
-            trace_attributes,
-            trace_content,
-        )
diff --git a/openml/extensions/sklearn/serializer.py b/openml/extensions/sklearn/serializer.py
deleted file mode 100644
index 26413afec..000000000
--- a/openml/extensions/sklearn/serializer.py
+++ /dev/null
@@ -1,1508 +0,0 @@
-# License: BSD 3-Clause
-
-"""Serializer for the Scikit-learn estimators."""
-
-from __future__ import annotations
-
-import contextlib
-import copy
-import importlib
-import inspect
-import json
-import logging
-import re
-import sys
-import traceback
-import warnings
-from collections import OrderedDict
-from json.decoder import JSONDecodeError
-from re import IGNORECASE
-from typing import Any, Callable, Sized
-
-import numpy as np
-import scipy.stats
-import sklearn.base
-import sklearn.model_selection
-import sklearn.pipeline
-from packaging.version import Version
-
-import openml
-from openml.exceptions import PyOpenMLError
-from openml.extensions.base import ModelSerializer
-from openml.flows import OpenMLFlow
-
-logger = logging.getLogger(__name__)
-
-
-DEPENDENCIES_PATTERN = re.compile(
-    r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
-    r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$",
-)
-
-# NOTE(eddiebergman): This was imported before but became deprecated,
-# as a result I just enumerated them manually by copy-ing and pasting,
-# recommended solution in Numpy 2.0 guide was to explicitly list them.
-SIMPLE_NUMPY_TYPES = [
-    np.int8,
-    np.int16,
-    np.int32,
-    np.int64,
-    np.longlong,
-    np.uint8,
-    np.uint16,
-    np.uint32,
-    np.uint64,
-    np.ulonglong,
-    np.float16,
-    np.float32,
-    np.float64,
-    np.longdouble,
-    np.complex64,
-    np.complex128,
-    np.clongdouble,
-]
-SIMPLE_TYPES = (bool, int, float, str, *SIMPLE_NUMPY_TYPES)
-
-
-SKLEARN_PIPELINE_STRING_COMPONENTS = ("drop", "passthrough")
-COMPONENT_REFERENCE = "component_reference"
-COMPOSITION_STEP_CONSTANT = "composition_step_constant"
-
-
-class SklearnSerializer(ModelSerializer):
-    """Serializer for Scikit-learn estimators."""
-
-    @classmethod
-    def can_handle_flow(cls, flow: OpenMLFlow) -> bool:
-        """Check whether a given describes a scikit-learn estimator.
-
-        This is done by parsing the ``external_version`` field.
-
-        Parameters
-        ----------
-        flow : OpenMLFlow
-
-        Returns
-        -------
-        bool
-        """
-        return cls._is_sklearn_flow(flow)
-
-    @classmethod
-    def can_handle_model(cls, model: Any) -> bool:
-        """Check whether a model is an instance of ``sklearn.base.BaseEstimator``.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        return isinstance(model, sklearn.base.BaseEstimator)
-
-    def flow_to_model(
-        self,
-        flow: OpenMLFlow,
-        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
-        strict_version: bool = True,  # noqa: FBT001, FBT002
-    ) -> Any:
-        """Initializes a sklearn model based on a flow.
-
-        Parameters
-        ----------
-        flow : mixed
-            the object to deserialize (can be flow object, or any serialized
-            parameter value that is accepted by)
-
-        initialize_with_defaults : bool, optional (default=False)
-            If this flag is set, the hyperparameter values of flows will be
-            ignored and a flow with its defaults is returned.
-
-        strict_version : bool, default=True
-            Whether to fail if version requirements are not fulfilled.
-
-        Returns
-        -------
-        mixed
-        """
-        return self._deserialize_sklearn(
-            flow,
-            initialize_with_defaults=initialize_with_defaults,
-            strict_version=strict_version,
-        )
-
-    def model_to_flow(self, model: Any) -> OpenMLFlow:
-        """Transform a scikit-learn model to a flow for uploading it to OpenML.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        OpenMLFlow
-        """
-        # Necessary to make pypy not complain about all the different possible return types
-        return self._serialize_sklearn(model)
-
-    @classmethod
-    def trim_flow_name(  # noqa: C901
-        cls,
-        long_name: str,
-        extra_trim_length: int = 100,
-        _outer: bool = True,  # noqa: FBT001, FBT002
-    ) -> str:
-        """Shorten generated sklearn flow name to at most ``max_length`` characters.
-
-        Flows are assumed to have the following naming structure:
-        ``(model_selection)? (pipeline)? (steps)+``
-        and will be shortened to:
-        ``sklearn.(selection.)?(pipeline.)?(steps)+``
-        e.g. (white spaces and newlines added for readability)
-
-        .. code ::
-
-            sklearn.pipeline.Pipeline(
-                columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
-                    numeric=sklearn.pipeline.Pipeline(
-                        imputer=sklearn.preprocessing.imputation.Imputer,
-                        standardscaler=sklearn.preprocessing.data.StandardScaler),
-                    nominal=sklearn.pipeline.Pipeline(
-                        simpleimputer=sklearn.impute.SimpleImputer,
-                        onehotencoder=sklearn.preprocessing._encoders.OneHotEncoder)),
-                variancethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
-                svc=sklearn.svm.classes.SVC)
-
-        ->
-        ``sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC)``
-
-        Parameters
-        ----------
-        long_name : str
-            The full flow name generated by the scikit-learn extension.
-        extra_trim_length: int (default=100)
-            If the trimmed name would exceed `extra_trim_length` characters, additional trimming
-            of the short name is performed. This reduces the produced short name length.
-            There is no guarantee the end result will not exceed `extra_trim_length`.
-        _outer : bool (default=True)
-            For internal use only. Specifies if the function is called recursively.
-
-        Returns
-        -------
-        str
-
-        """
-
-        def remove_all_in_parentheses(string: str) -> str:
-            string, removals = re.subn(r"\([^()]*\)", "", string)
-            while removals > 0:
-                string, removals = re.subn(r"\([^()]*\)", "", string)
-            return string
-
-        # Generally, we want to trim all hyperparameters, the exception to that is for model
-        # selection, as the `estimator` hyperparameter is very indicative of what is in the flow.
-        # So we first trim name of the `estimator` specified in mode selection. For reference, in
-        # the example below, we want to trim `sklearn.tree.tree.DecisionTreeClassifier`, and
-        # keep it in the final trimmed flow name:
-        # sklearn.pipeline.Pipeline(Imputer=sklearn.preprocessing.imputation.Imputer,
-        # VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,  # noqa: ERA001, E501
-        # Estimator=sklearn.model_selection._search.RandomizedSearchCV(estimator=
-        # sklearn.tree.tree.DecisionTreeClassifier))
-        if "sklearn.model_selection" in long_name:
-            start_index = long_name.index("sklearn.model_selection")
-            estimator_start = (
-                start_index + long_name[start_index:].index("estimator=") + len("estimator=")
-            )
-
-            model_select_boilerplate = long_name[start_index:estimator_start]
-            # above is .g. "sklearn.model_selection._search.RandomizedSearchCV(estimator="
-            model_selection_class = model_select_boilerplate.split("(")[0].split(".")[-1]
-
-            # Now we want to also find and parse the `estimator`, for this we find the closing
-            # parenthesis to the model selection technique:
-            closing_parenthesis_expected = 1
-            for char in long_name[estimator_start:]:
-                if char == "(":
-                    closing_parenthesis_expected += 1
-                if char == ")":
-                    closing_parenthesis_expected -= 1
-                if closing_parenthesis_expected == 0:
-                    break
-
-            _end: int = estimator_start + len(long_name[estimator_start:]) - 1
-            model_select_pipeline = long_name[estimator_start:_end]
-
-            trimmed_pipeline = cls.trim_flow_name(model_select_pipeline, _outer=False)
-            _, trimmed_pipeline = trimmed_pipeline.split(".", maxsplit=1)  # trim module prefix
-            model_select_short = f"sklearn.{model_selection_class}[{trimmed_pipeline}]"
-            name = long_name[:start_index] + model_select_short + long_name[_end + 1 :]
-        else:
-            name = long_name
-
-        module_name = long_name.split(".")[0]
-        short_name = module_name + ".{}"
-
-        if name.startswith("sklearn.pipeline"):
-            full_pipeline_class, pipeline = name[:-1].split("(", maxsplit=1)
-            pipeline_class = full_pipeline_class.split(".")[-1]
-            # We don't want nested pipelines in the short name, so we trim all complicated
-            # subcomponents, i.e. those with parentheses:
-            pipeline = remove_all_in_parentheses(pipeline)
-
-            # then the pipeline steps are formatted e.g.:
-            # step1name=sklearn.submodule.ClassName,step2name...
-            components = [component.split(".")[-1] for component in pipeline.split(",")]
-            pipeline = f"{pipeline_class}({','.join(components)})"
-            if len(short_name.format(pipeline)) > extra_trim_length:
-                pipeline = f"{pipeline_class}(...,{components[-1]})"
-        else:
-            # Just a simple component: e.g. sklearn.tree.DecisionTreeClassifier
-            pipeline = remove_all_in_parentheses(name).split(".")[-1]
-
-        if not _outer:
-            # Anything from parenthesis in inner calls should not be culled, so we use brackets
-            pipeline = pipeline.replace("(", "[").replace(")", "]")
-        else:
-            # Square brackets may be introduced with nested model_selection
-            pipeline = pipeline.replace("[", "(").replace("]", ")")
-
-        return short_name.format(pipeline)
-
-    @classmethod
-    def _min_dependency_str(cls, sklearn_version: str) -> str:
-        """Returns a string containing the minimum dependencies for the sklearn version passed.
-
-        Parameters
-        ----------
-        sklearn_version : str
-            A version string of the xx.xx.xx
-
-        Returns
-        -------
-        str
-        """
-        # This explicit check is necessary to support existing entities on the OpenML servers
-        # that used the fixed dependency string (in the else block)
-        if Version(openml.__version__) > Version("0.11"):
-            # OpenML v0.11 onwards supports sklearn>=0.24
-            # assumption: 0.24 onwards sklearn should contain a _min_dependencies.py file with
-            # variables declared for extracting minimum dependency for that version
-            if Version(sklearn_version) >= Version("0.24"):
-                from sklearn import _min_dependencies as _mindep
-
-                dependency_list = {
-                    "numpy": f"{_mindep.NUMPY_MIN_VERSION}",
-                    "scipy": f"{_mindep.SCIPY_MIN_VERSION}",
-                    "joblib": f"{_mindep.JOBLIB_MIN_VERSION}",
-                    "threadpoolctl": f"{_mindep.THREADPOOLCTL_MIN_VERSION}",
-                }
-            elif Version(sklearn_version) >= Version("0.23"):
-                dependency_list = {
-                    "numpy": "1.13.3",
-                    "scipy": "0.19.1",
-                    "joblib": "0.11",
-                    "threadpoolctl": "2.0.0",
-                }
-                if Version(sklearn_version).micro == 0:
-                    dependency_list.pop("threadpoolctl")
-            elif Version(sklearn_version) >= Version("0.21"):
-                dependency_list = {"numpy": "1.11.0", "scipy": "0.17.0", "joblib": "0.11"}
-            elif Version(sklearn_version) >= Version("0.19"):
-                dependency_list = {"numpy": "1.8.2", "scipy": "0.13.3"}
-            else:
-                dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
-        else:
-            # this is INCORRECT for sklearn versions >= 0.19 and < 0.24
-            # given that OpenML has existing flows uploaded with such dependency information,
-            # we change no behaviour for older sklearn version, however from 0.24 onwards
-            # the dependency list will be accurately updated for any flow uploaded to OpenML
-            dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
-
-        sklearn_dep = f"sklearn=={sklearn_version}"
-        dep_str = "\n".join([f"{k}>={v}" for k, v in dependency_list.items()])
-        return "\n".join([sklearn_dep, dep_str])
-
-    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
-        self,
-        o: Any,
-        components: dict | None = None,
-        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
-        recursion_depth: int = 0,
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        """Recursive function to deserialize a scikit-learn flow.
-
-        This function inspects an object to deserialize and decides how to do so. This function
-        delegates all work to the respective functions to deserialize special data structures etc.
-        This function works on everything that has been serialized to OpenML: OpenMLFlow,
-        components (which are flows themselves), functions, hyperparameter distributions (for
-        random search) and the actual hyperparameter values themselves.
-
-        Parameters
-        ----------
-        o : mixed
-            the object to deserialize (can be flow object, or any serialized
-            parameter value that is accepted by)
-
-        components : Optional[dict]
-            Components of the current flow being de-serialized. These will not be used when
-            de-serializing the actual flow, but when de-serializing a component reference.
-
-        initialize_with_defaults : bool, optional (default=False)
-            If this flag is set, the hyperparameter values of flows will be
-            ignored and a flow with its defaults is returned.
-
-        recursion_depth : int
-            The depth at which this flow is called, mostly for debugging
-            purposes
-
-        strict_version : bool, default=True
-            Whether to fail if version requirements are not fulfilled.
-
-        Returns
-        -------
-        mixed
-        """
-        logger.info(
-            "-{} flow_to_sklearn START o={}, components={}, init_defaults={}".format(
-                "-" * recursion_depth, o, components, initialize_with_defaults
-            ),
-        )
-        depth_pp = recursion_depth + 1  # shortcut var, depth plus plus
-
-        # First, we need to check whether the presented object is a json string.
-        # JSON strings are used to encoder parameter values. By passing around
-        # json strings for parameters, we make sure that we can flow_to_sklearn
-        # the parameter values to the correct type.
-
-        if isinstance(o, str):
-            with contextlib.suppress(JSONDecodeError):
-                o = json.loads(o)
-
-        if isinstance(o, dict):
-            # Check if the dict encodes a 'special' object, which could not
-            # easily converted into a string, but rather the information to
-            # re-create the object were stored in a dictionary.
-            if "oml-python:serialized_object" in o:
-                serialized_type = o["oml-python:serialized_object"]
-                value = o["value"]
-                if serialized_type == "type":
-                    rval = self._deserialize_type(value)
-                elif serialized_type == "rv_frozen":
-                    rval = self._deserialize_rv_frozen(value)
-                elif serialized_type == "function":
-                    rval = self._deserialize_function(value)
-                elif serialized_type in (COMPOSITION_STEP_CONSTANT, COMPONENT_REFERENCE):
-                    if serialized_type == COMPOSITION_STEP_CONSTANT:
-                        pass
-                    elif serialized_type == COMPONENT_REFERENCE:
-                        value = self._deserialize_sklearn(
-                            value,
-                            recursion_depth=depth_pp,
-                            strict_version=strict_version,
-                        )
-                    else:
-                        raise NotImplementedError(serialized_type)
-                    assert components is not None  # Necessary for mypy
-                    step_name = value["step_name"]
-                    key = value["key"]
-                    component = self._deserialize_sklearn(
-                        components[key],
-                        initialize_with_defaults=initialize_with_defaults,
-                        recursion_depth=depth_pp,
-                        strict_version=strict_version,
-                    )
-                    # The component is now added to where it should be used
-                    # later. It should not be passed to the constructor of the
-                    # main flow object.
-                    del components[key]
-                    if step_name is None:
-                        rval = component
-                    elif "argument_1" not in value:
-                        rval = (step_name, component)
-                    else:
-                        rval = (step_name, component, value["argument_1"])
-                elif serialized_type == "cv_object":
-                    rval = self._deserialize_cross_validator(
-                        value,
-                        recursion_depth=recursion_depth,
-                        strict_version=strict_version,
-                    )
-                else:
-                    raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")
-
-            else:
-                rval = OrderedDict(
-                    (
-                        self._deserialize_sklearn(
-                            o=key,
-                            components=components,
-                            initialize_with_defaults=initialize_with_defaults,
-                            recursion_depth=depth_pp,
-                            strict_version=strict_version,
-                        ),
-                        self._deserialize_sklearn(
-                            o=value,
-                            components=components,
-                            initialize_with_defaults=initialize_with_defaults,
-                            recursion_depth=depth_pp,
-                            strict_version=strict_version,
-                        ),
-                    )
-                    for key, value in sorted(o.items())
-                )
-        elif isinstance(o, (list, tuple)):
-            rval = [
-                self._deserialize_sklearn(
-                    o=element,
-                    components=components,
-                    initialize_with_defaults=initialize_with_defaults,
-                    recursion_depth=depth_pp,
-                    strict_version=strict_version,
-                )
-                for element in o
-            ]
-            if isinstance(o, tuple):
-                rval = tuple(rval)
-        elif isinstance(o, (bool, int, float, str)) or o is None:
-            rval = o
-        elif isinstance(o, OpenMLFlow):
-            if not self._is_sklearn_flow(o):
-                raise ValueError("Only sklearn flows can be reinstantiated")
-            rval = self._deserialize_model(
-                flow=o,
-                keep_defaults=initialize_with_defaults,
-                recursion_depth=recursion_depth,
-                strict_version=strict_version,
-            )
-        else:
-            raise TypeError(o)
-        logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}")
-        return rval
-
-    def _serialize_sklearn(self, o: Any, parent_model: Any | None = None) -> Any:  # noqa: PLR0912, C901
-        rval = None  # type: Any
-
-        # TODO: assert that only on first recursion lvl `parent_model` can be None
-        if self.is_estimator(o):
-            # is the main model or a submodel
-            rval = self._serialize_model(o)
-        elif (
-            isinstance(o, (list, tuple))
-            and len(o) == 2
-            and o[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-            and isinstance(parent_model, sklearn.pipeline._BaseComposition)
-        ):
-            rval = o
-        elif isinstance(o, (list, tuple)):
-            # TODO: explain what type of parameter is here
-            rval = [self._serialize_sklearn(element, parent_model) for element in o]
-            if isinstance(o, tuple):
-                rval = tuple(rval)
-        elif isinstance(o, SIMPLE_TYPES) or o is None:
-            if isinstance(o, tuple(SIMPLE_NUMPY_TYPES)):
-                o = o.item()  # type: ignore
-            # base parameter values
-            rval = o
-        elif isinstance(o, dict):
-            # TODO: explain what type of parameter is here
-            if not isinstance(o, OrderedDict):
-                o = OrderedDict(sorted(o.items()))
-
-            rval = OrderedDict()
-            for key, value in o.items():
-                if not isinstance(key, str):
-                    raise TypeError(
-                        "Can only use string as keys, you passed "
-                        f"type {type(key)} for value {key!s}.",
-                    )
-                _key = self._serialize_sklearn(key, parent_model)
-                rval[_key] = self._serialize_sklearn(value, parent_model)
-        elif isinstance(o, type):
-            # TODO: explain what type of parameter is here
-            rval = self._serialize_type(o)
-        elif isinstance(o, scipy.stats.distributions.rv_frozen):
-            rval = self._serialize_rv_frozen(o)
-        # This only works for user-defined functions (and not even partial).
-        # I think this is exactly what we want here as there shouldn't be any
-        # built-in or functool.partials in a pipeline
-        elif inspect.isfunction(o):
-            # TODO: explain what type of parameter is here
-            rval = self._serialize_function(o)
-        elif self._is_cross_validator(o):
-            # TODO: explain what type of parameter is here
-            rval = self._serialize_cross_validator(o)
-        else:
-            raise TypeError(o, type(o))
-
-        return rval
-
-    def is_estimator(self, model: Any) -> bool:
-        """Check whether the given model is a scikit-learn estimator.
-
-        This function is only required for backwards compatibility and will be removed in the
-        near future.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        o = model
-        return hasattr(o, "fit") and hasattr(o, "get_params") and hasattr(o, "set_params")
-
-    def get_version_information(self) -> list[str]:
-        """List versions of libraries required by the flow.
-
-        Libraries listed are ``Python``, ``scikit-learn``, ``numpy`` and ``scipy``.
-
-        Returns
-        -------
-        List
-        """
-        # This can possibly be done by a package such as pyxb, but I could not get
-        # it to work properly.
-        import numpy
-        import scipy
-        import sklearn
-
-        major, minor, micro, _, _ = sys.version_info
-        python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}."
-        sklearn_version = f"Sklearn_{sklearn.__version__}."
-        numpy_version = f"NumPy_{numpy.__version__}."  # type: ignore
-        scipy_version = f"SciPy_{scipy.__version__}."
-
-        return [python_version, sklearn_version, numpy_version, scipy_version]
-
-    def create_setup_string(self, model: Any) -> str:  # noqa: ARG002
-        """Create a string which can be used to reinstantiate the given model.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        str
-        """
-        return " ".join(self.get_version_information())
-
-    def _is_cross_validator(self, o: Any) -> bool:
-        return isinstance(o, sklearn.model_selection.BaseCrossValidator)
-
-    @classmethod
-    def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
-        sklearn_dependency = isinstance(flow.dependencies, str) and "sklearn" in flow.dependencies
-        sklearn_as_external = isinstance(flow.external_version, str) and (
-            flow.external_version.startswith("sklearn==") or ",sklearn==" in flow.external_version
-        )
-        return sklearn_dependency or sklearn_as_external
-
-    def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
-        r"""Fetches the sklearn function docstring for the flow description
-
-        Retrieves the sklearn docstring available and does the following:
-        * If length of docstring <= char_lim, then returns the complete docstring
-        * Else, trims the docstring till it encounters a 'Read more in the :ref:'
-        * Or till it encounters a 'Parameters\n----------\n'
-        The final string returned is at most of length char_lim with leading and
-        trailing whitespaces removed.
-
-        Parameters
-        ----------
-        model : sklearn model
-        char_lim : int
-            Specifying the max length of the returned string.
-            OpenML servers have a constraint of 1024 characters for the 'description' field.
-
-        Returns
-        -------
-        str
-        """
-
-        def match_format(s):
-            return f"{s}\n{len(s) * '-'}\n"
-
-        s = inspect.getdoc(model)
-        if s is None:
-            return ""
-        try:
-            # trim till 'Read more'
-            pattern = "Read more in the :ref:"
-            index = s.index(pattern)
-            s = s[:index]
-            # trimming docstring to be within char_lim
-            if len(s) > char_lim:
-                s = f"{s[: char_lim - 3]}..."
-            return s.strip()
-        except ValueError:
-            logger.warning(
-                "'Read more' not found in descriptions. "
-                "Trying to trim till 'Parameters' if available in docstring.",
-            )
-        try:
-            # if 'Read more' doesn't exist, trim till 'Parameters'
-            pattern = "Parameters"
-            index = s.index(match_format(pattern))
-        except ValueError:
-            # returning full docstring
-            logger.warning("'Parameters' not found in docstring. Omitting docstring trimming.")
-            index = len(s)
-        s = s[:index]
-        # trimming docstring to be within char_lim
-        if len(s) > char_lim:
-            s = f"{s[: char_lim - 3]}..."
-        return s.strip()
-
-    def _extract_sklearn_parameter_docstring(self, model) -> None | str:
-        """Extracts the part of sklearn docstring containing parameter information
-
-        Fetches the entire docstring and trims just the Parameter section.
-        The assumption is that 'Parameters' is the first section in sklearn docstrings,
-        followed by other sections titled 'Attributes', 'See also', 'Note', 'References',
-        appearing in that order if defined.
-        Returns a None if no section with 'Parameters' can be found in the docstring.
-
-        Parameters
-        ----------
-        model : sklearn model
-
-        Returns
-        -------
-        str, or None
-        """
-
-        def match_format(s):
-            return f"{s}\n{len(s) * '-'}\n"
-
-        s = inspect.getdoc(model)
-        if s is None:
-            return None
-        try:
-            index1 = s.index(match_format("Parameters"))
-        except ValueError as e:
-            # when sklearn docstring has no 'Parameters' section
-            logger.warning(f"{match_format('Parameters')} {e}")
-            return None
-
-        headings = ["Attributes", "Notes", "See also", "Note", "References"]
-        for h in headings:
-            try:
-                # to find end of Parameters section
-                index2 = s.index(match_format(h))
-                break
-            except ValueError:
-                logger.warning(f"{h} not available in docstring")
-                continue
-        else:
-            # in the case only 'Parameters' exist, trim till end of docstring
-            index2 = len(s)
-        s = s[index1:index2]
-        return s.strip()
-
-    def _extract_sklearn_param_info(self, model, char_lim=1024) -> None | dict:
-        """Parses parameter type and description from sklearn dosctring
-
-        Parameters
-        ----------
-        model : sklearn model
-        char_lim : int
-            Specifying the max length of the returned string.
-            OpenML servers have a constraint of 1024 characters string fields.
-
-        Returns
-        -------
-        Dict, or None
-        """
-        docstring = self._extract_sklearn_parameter_docstring(model)
-        if docstring is None:
-            # when sklearn docstring has no 'Parameters' section
-            return None
-
-        n = re.compile("[.]*\n", flags=IGNORECASE)
-        lines = n.split(docstring)
-        p = re.compile("[a-z0-9_ ]+ : [a-z0-9_']+[a-z0-9_ ]*", flags=IGNORECASE)
-        # The above regular expression is designed to detect sklearn parameter names and type
-        # in the format of [variable_name][space]:[space][type]
-        # The expectation is that the parameter description for this detected parameter will
-        # be all the lines in the docstring till the regex finds another parameter match
-
-        # collecting parameters and their descriptions
-        description = []  # type: List
-        for s in lines:
-            param = p.findall(s)
-            if param != []:
-                # a parameter definition is found by regex
-                # creating placeholder when parameter found which will be a list of strings
-                # string descriptions will be appended in subsequent iterations
-                # till another parameter is found and a new placeholder is created
-                placeholder = [""]  # type: List[str]
-                description.append(placeholder)
-            elif len(description) > 0:  # description=[] means no parameters found yet
-                # appending strings to the placeholder created when parameter found
-                description[-1].append(s)
-        for i in range(len(description)):
-            # concatenating parameter description strings
-            description[i] = "\n".join(description[i]).strip()
-            # limiting all parameter descriptions to accepted OpenML string length
-            if len(description[i]) > char_lim:
-                description[i] = f"{description[i][: char_lim - 3]}..."
-
-        # collecting parameters and their types
-        parameter_docs = OrderedDict()
-        matches = p.findall(docstring)
-        for i, param in enumerate(matches):
-            key, value = str(param).split(":")
-            parameter_docs[key.strip()] = [value.strip(), description[i]]
-
-        # to avoid KeyError for missing parameters
-        param_list_true = list(model.get_params().keys())
-        param_list_found = list(parameter_docs.keys())
-        for param in list(set(param_list_true) - set(param_list_found)):
-            parameter_docs[param] = [None, None]
-
-        return parameter_docs
-
-    def _serialize_model(self, model: Any) -> OpenMLFlow:
-        """Create an OpenMLFlow.
-
-        Calls `sklearn_to_flow` recursively to properly serialize the
-        parameters to strings and the components (other models) to OpenMLFlows.
-
-        Parameters
-        ----------
-        model : sklearn estimator
-
-        Returns
-        -------
-        OpenMLFlow
-
-        """
-        # Get all necessary information about the model objects itself
-        (
-            parameters,
-            parameters_meta_info,
-            subcomponents,
-            subcomponents_explicit,
-        ) = self._extract_information_from_model(model)
-
-        # Check that a component does not occur multiple times in a flow as this
-        # is not supported by OpenML
-        self._check_multiple_occurence_of_component_in_flow(model, subcomponents)
-
-        # Create a flow name, which contains all components in brackets, e.g.:
-        # RandomizedSearchCV(Pipeline(StandardScaler,AdaBoostClassifier(DecisionTreeClassifier)),
-        # StandardScaler,AdaBoostClassifier(DecisionTreeClassifier))
-        class_name = model.__module__ + "." + model.__class__.__name__
-
-        # will be part of the name (in brackets)
-        sub_components_names = ""
-        for key in subcomponents:
-            name_thing = subcomponents[key]
-            if isinstance(name_thing, OpenMLFlow):
-                name = name_thing.name
-            elif (
-                isinstance(name_thing, str)
-                and subcomponents[key] in SKLEARN_PIPELINE_STRING_COMPONENTS
-            ):
-                name = name_thing
-            else:
-                raise TypeError(type(subcomponents[key]))
-
-            if key in subcomponents_explicit:
-                sub_components_names += "," + key + "=" + name
-            else:
-                sub_components_names += "," + name
-
-        # slice operation on string in order to get rid of leading comma
-        name = f"{class_name}({sub_components_names[1:]})" if sub_components_names else class_name
-        short_name = self.trim_flow_name(name)
-
-        # Get the external versions of all sub-components
-        external_version = self._get_external_version_string(model, subcomponents)
-        dependencies = self._get_dependencies()
-        tags = self._get_tags()
-
-        sklearn_description = self._get_sklearn_description(model)
-        return OpenMLFlow(
-            name=name,
-            class_name=class_name,
-            custom_name=short_name,
-            description=sklearn_description,
-            model=model,
-            components=subcomponents,
-            parameters=parameters,
-            parameters_meta_info=parameters_meta_info,
-            external_version=external_version,
-            tags=tags,
-            extension=self,
-            language="English",
-            dependencies=dependencies,
-        )
-
-    def _get_dependencies(self) -> str:
-        return self._min_dependency_str(sklearn.__version__)  # type: ignore
-
-    def _get_tags(self) -> list[str]:
-        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)  # type: ignore
-        sklearn_version_formatted = sklearn_version.replace("==", "_")
-        return [
-            "openml-python",
-            "sklearn",
-            "scikit-learn",
-            "python",
-            sklearn_version_formatted,
-            # TODO: add more tags based on the scikit-learn
-            # module a flow is in? For example automatically
-            # annotate a class of sklearn.svm.SVC() with the
-            # tag svm?
-        ]
-
-    def _get_external_version_string(
-        self,
-        model: Any,
-        sub_components: dict[str, OpenMLFlow],
-    ) -> str:
-        # Create external version string for a flow, given the model and the
-        # already parsed dictionary of sub_components. Retrieves the external
-        # version of all subcomponents, which themselves already contain all
-        # requirements for their subcomponents. The external version string is a
-        # sorted concatenation of all modules which are present in this run.
-
-        external_versions = set()
-
-        # The model is None if the flow is a placeholder flow such as 'passthrough' or 'drop'
-        if model is not None:
-            model_package_name = model.__module__.split(".")[0]
-            module = importlib.import_module(model_package_name)
-            model_package_version_number = module.__version__  # type: ignore
-            external_version = self._format_external_version(
-                model_package_name,
-                model_package_version_number,
-            )
-            external_versions.add(external_version)
-
-        openml_version = self._format_external_version("openml", openml.__version__)
-        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)  # type: ignore
-        external_versions.add(openml_version)
-        external_versions.add(sklearn_version)
-        for visitee in sub_components.values():
-            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
-                continue
-            for external_version in visitee.external_version.split(","):
-                external_versions.add(external_version)
-        return ",".join(sorted(external_versions))
-
-    def _check_multiple_occurence_of_component_in_flow(
-        self,
-        model: Any,
-        sub_components: dict[str, OpenMLFlow],
-    ) -> None:
-        to_visit_stack: list[OpenMLFlow] = []
-        to_visit_stack.extend(sub_components.values())
-        known_sub_components: set[str] = set()
-
-        while len(to_visit_stack) > 0:
-            visitee = to_visit_stack.pop()
-            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
-                known_sub_components.add(visitee)
-            elif visitee.name in known_sub_components:
-                raise ValueError(
-                    f"Found a second occurence of component {visitee.name} when "
-                    f"trying to serialize {model}.",
-                )
-            else:
-                known_sub_components.add(visitee.name)
-                to_visit_stack.extend(visitee.components.values())
-
-    def _extract_information_from_model(  # noqa: PLR0915, C901, PLR0912
-        self,
-        model: Any,
-    ) -> tuple[
-        OrderedDict[str, str | None],
-        OrderedDict[str, dict | None],
-        OrderedDict[str, OpenMLFlow],
-        set,
-    ]:
-        # This function contains four "global" states and is quite long and
-        # complicated. If it gets to complicated to ensure it's correctness,
-        # it would be best to make it a class with the four "global" states being
-        # the class attributes and the if/elif/else in the for-loop calls to
-        # separate class methods
-
-        # stores all entities that should become subcomponents
-        sub_components = OrderedDict()  # type: OrderedDict[str, OpenMLFlow]
-        # stores the keys of all subcomponents that should become
-        sub_components_explicit = set()
-        parameters: OrderedDict[str, str | None] = OrderedDict()
-        parameters_meta_info: OrderedDict[str, dict | None] = OrderedDict()
-        parameters_docs = self._extract_sklearn_param_info(model)
-
-        model_parameters = model.get_params(deep=False)
-        for k, v in sorted(model_parameters.items(), key=lambda t: t[0]):
-            rval = self._serialize_sklearn(v, model)
-
-            def flatten_all(list_):
-                """Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1])."""
-                for el in list_:
-                    if isinstance(el, (list, tuple)) and len(el) > 0:
-                        yield from flatten_all(el)
-                    else:
-                        yield el
-
-            # In case rval is a list of lists (or tuples), we need to identify two situations:
-            # - sklearn pipeline steps, feature union or base classifiers in voting classifier.
-            #   They look like e.g. [("imputer", Imputer()), ("classifier", SVC())]
-            # - a list of lists with simple types (e.g. int or str), such as for an OrdinalEncoder
-            #   where all possible values for each feature are described: [[0,1,2], [1,2,5]]
-            is_non_empty_list_of_lists_with_same_type = (
-                isinstance(rval, (list, tuple))
-                and len(rval) > 0
-                and isinstance(rval[0], (list, tuple))
-                and all(isinstance(rval_i, type(rval[0])) for rval_i in rval)
-            )
-
-            # Check that all list elements are of simple types.
-            nested_list_of_simple_types = (
-                is_non_empty_list_of_lists_with_same_type
-                and all(isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval))
-                and all(
-                    len(rv) in (2, 3) and rv[1] not in SKLEARN_PIPELINE_STRING_COMPONENTS
-                    for rv in rval
-                )
-            )
-
-            if is_non_empty_list_of_lists_with_same_type and not nested_list_of_simple_types:
-                # If a list of lists is identified that include 'non-simple' types (e.g. objects),
-                # we assume they are steps in a pipeline, feature union, or base classifiers in
-                # a voting classifier.
-                parameter_value = []  # type: List
-                reserved_keywords = set(model.get_params(deep=False).keys())
-
-                for sub_component_tuple in rval:
-                    identifier = sub_component_tuple[0]
-                    sub_component = sub_component_tuple[1]
-                    sub_component_type = type(sub_component_tuple)
-                    if not 2 <= len(sub_component_tuple) <= 3:
-                        # length 2 is for {VotingClassifier.estimators,
-                        # Pipeline.steps, FeatureUnion.transformer_list}
-                        # length 3 is for ColumnTransformer
-                        raise ValueError(
-                            f"Length of tuple of type {sub_component_type}"
-                            " does not match assumptions"
-                        )
-
-                    if isinstance(sub_component, str):
-                        if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
-                            msg = (
-                                "Second item of tuple does not match assumptions. "
-                                "If string, can be only 'drop' or 'passthrough' but"
-                                f"got {sub_component}"
-                            )
-                            raise ValueError(msg)
-                    elif sub_component is None:
-                        msg = (
-                            "Cannot serialize objects of None type. Please use a valid "
-                            "placeholder for None. Note that empty sklearn estimators can be "
-                            "replaced with 'drop' or 'passthrough'."
-                        )
-                        raise ValueError(msg)
-                    elif not isinstance(sub_component, OpenMLFlow):
-                        msg = (
-                            "Second item of tuple does not match assumptions. "
-                            f"Expected OpenMLFlow, got {type(sub_component)}"
-                        )
-                        raise TypeError(msg)
-
-                    if identifier in reserved_keywords:
-                        parent_model = f"{model.__module__}.{model.__class__.__name__}"
-                        msg = (
-                            "Found element shadowing official "
-                            f"parameter for {parent_model}: {identifier}"
-                        )
-                        raise PyOpenMLError(msg)
-
-                    # when deserializing the parameter
-                    sub_components_explicit.add(identifier)
-                    if isinstance(sub_component, str):
-                        external_version = self._get_external_version_string(None, {})
-                        dependencies = self._get_dependencies()
-                        tags = self._get_tags()
-
-                        sub_components[identifier] = OpenMLFlow(
-                            name=sub_component,
-                            description="Placeholder flow for scikit-learn's string pipeline "
-                            "members",
-                            components=OrderedDict(),
-                            parameters=OrderedDict(),
-                            parameters_meta_info=OrderedDict(),
-                            external_version=external_version,
-                            tags=tags,
-                            language="English",
-                            dependencies=dependencies,
-                            model=None,
-                        )
-                        component_reference: OrderedDict[str, str | dict] = OrderedDict()
-                        component_reference["oml-python:serialized_object"] = (
-                            COMPOSITION_STEP_CONSTANT
-                        )
-                        cr_value: dict[str, Any] = OrderedDict()
-                        cr_value["key"] = identifier
-                        cr_value["step_name"] = identifier
-                        if len(sub_component_tuple) == 3:
-                            cr_value["argument_1"] = sub_component_tuple[2]
-                        component_reference["value"] = cr_value
-                    else:
-                        sub_components[identifier] = sub_component
-                        component_reference = OrderedDict()
-                        component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
-                        cr_value = OrderedDict()
-                        cr_value["key"] = identifier
-                        cr_value["step_name"] = identifier
-                        if len(sub_component_tuple) == 3:
-                            cr_value["argument_1"] = sub_component_tuple[2]
-                        component_reference["value"] = cr_value
-                    parameter_value.append(component_reference)
-
-                # Here (and in the elif and else branch below) are the only
-                # places where we encode a value as json to make sure that all
-                # parameter values still have the same type after
-                # deserialization
-                if isinstance(rval, tuple):
-                    parameter_json = json.dumps(tuple(parameter_value))
-                else:
-                    parameter_json = json.dumps(parameter_value)
-                parameters[k] = parameter_json
-
-            elif isinstance(rval, OpenMLFlow):
-                # A subcomponent, for example the base model in
-                # AdaBoostClassifier
-                sub_components[k] = rval
-                sub_components_explicit.add(k)
-                component_reference = OrderedDict()
-                component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
-                cr_value = OrderedDict()
-                cr_value["key"] = k
-                cr_value["step_name"] = None
-                component_reference["value"] = cr_value
-                cr = self._serialize_sklearn(component_reference, model)
-                parameters[k] = json.dumps(cr)
-
-            elif not (hasattr(rval, "__len__") and len(rval) == 0):
-                rval = json.dumps(rval)
-                parameters[k] = rval
-            # a regular hyperparameter
-            else:
-                parameters[k] = None
-
-            if parameters_docs is not None:
-                data_type, description = parameters_docs[k]
-                parameters_meta_info[k] = OrderedDict(
-                    (("description", description), ("data_type", data_type)),
-                )
-            else:
-                parameters_meta_info[k] = OrderedDict((("description", None), ("data_type", None)))
-
-        return parameters, parameters_meta_info, sub_components, sub_components_explicit
-
-    def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> tuple[dict, set]:
-        """
-        Returns
-        -------
-            i) a dict with all parameter names that have a default value, and
-            ii) a set with all parameter names that do not have a default
-
-        Parameters
-        ----------
-        fn_name : callable
-            The function of which we want to obtain the defaults
-
-        Returns
-        -------
-        params_with_defaults: dict
-            a dict mapping parameter name to the default value
-        params_without_defaults: set
-            a set with all parameters that do not have a default value
-        """
-        # parameters with defaults are optional, all others are required.
-        parameters = inspect.signature(fn_name).parameters
-        required_params = set()
-        optional_params = {}
-        for param in parameters:
-            parameter = parameters.get(param)
-            default_val = parameter.default  # type: ignore
-            if default_val is inspect.Signature.empty:
-                required_params.add(param)
-            else:
-                optional_params[param] = default_val
-        return optional_params, required_params
-
-    def _deserialize_model(  # noqa: C901
-        self,
-        flow: OpenMLFlow,
-        keep_defaults: bool,  # noqa: FBT001
-        recursion_depth: int,
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}")
-        model_name = flow.class_name
-        self._check_dependencies(flow.dependencies, strict_version=strict_version)
-
-        parameters = flow.parameters
-        components = flow.components
-        parameter_dict: dict[str, Any] = OrderedDict()
-
-        # Do a shallow copy of the components dictionary so we can remove the
-        # components from this copy once we added them into the pipeline. This
-        # allows us to not consider them any more when looping over the
-        # components, but keeping the dictionary of components untouched in the
-        # original components dictionary.
-        components_ = copy.copy(components)
-
-        for name in parameters:
-            value = parameters.get(name)
-            logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}")
-            rval = self._deserialize_sklearn(
-                value,
-                components=components_,
-                initialize_with_defaults=keep_defaults,
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version,
-            )
-            parameter_dict[name] = rval
-
-        for name in components:
-            if name in parameter_dict:
-                continue
-            if name not in components_:
-                continue
-            value = components[name]
-            logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}")
-            rval = self._deserialize_sklearn(
-                value,
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version,
-            )
-            parameter_dict[name] = rval
-
-        if model_name is None and flow.name in SKLEARN_PIPELINE_STRING_COMPONENTS:
-            return flow.name
-
-        assert model_name is not None
-        module_name = model_name.rsplit(".", 1)
-        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-
-        if keep_defaults:
-            # obtain all params with a default
-            param_defaults, _ = self._get_fn_arguments_with_defaults(model_class.__init__)
-
-            # delete the params that have a default from the dict,
-            # so they get initialized with their default value
-            # except [...]
-            for param in param_defaults:
-                # [...] the ones that also have a key in the components dict.
-                # As OpenML stores different flows for ensembles with different
-                # (base-)components, in OpenML terms, these are not considered
-                # hyperparameters but rather constants (i.e., changing them would
-                # result in a different flow)
-                if param not in components:
-                    del parameter_dict[param]
-
-        if not strict_version:
-            # Ignore incompatible parameters
-            allowed_parameter = list(inspect.signature(model_class.__init__).parameters)
-            for p in list(parameter_dict.keys()):
-                if p not in allowed_parameter:
-                    warnings.warn(
-                        f"While deserializing in a non-strict way, parameter {p} is not "
-                        f"allowed for {model_class.__name__} likely due to a version mismatch. "
-                        "We ignore the parameter.",
-                        UserWarning,
-                        stacklevel=2,
-                    )
-                    del parameter_dict[p]
-
-        return model_class(**parameter_dict)
-
-    def _check_dependencies(
-        self,
-        dependencies: str,
-        strict_version: bool = True,  # noqa: FBT001, FBT002
-    ) -> None:
-        if not dependencies:
-            return
-
-        dependencies_list = dependencies.split("\n")
-        for dependency_string in dependencies_list:
-            match = DEPENDENCIES_PATTERN.match(dependency_string)
-            if not match:
-                raise ValueError(f"Cannot parse dependency {dependency_string}")
-
-            dependency_name = match.group("name")
-            operation = match.group("operation")
-            version = match.group("version")
-
-            module = importlib.import_module(dependency_name)
-            required_version = Version(version)
-            installed_version = Version(module.__version__)  # type: ignore
-
-            if operation == "==":
-                check = required_version == installed_version
-            elif operation == ">":
-                check = installed_version > required_version
-            elif operation == ">=":
-                check = (
-                    installed_version > required_version or installed_version == required_version
-                )
-            else:
-                raise NotImplementedError(f"operation '{operation}' is not supported")
-            message = (
-                f"Trying to deserialize a model with dependency {dependency_string} not satisfied."
-            )
-            if not check:
-                if strict_version:
-                    raise ValueError(message)
-
-                warnings.warn(message, category=UserWarning, stacklevel=2)
-
-    def _serialize_type(self, o: Any) -> OrderedDict[str, str]:
-        mapping = {
-            float: "float",
-            np.float32: "np.float32",
-            np.float64: "np.float64",
-            int: "int",
-            np.int32: "np.int32",
-            np.int64: "np.int64",
-        }
-        if Version(np.__version__) < Version("1.24"):
-            mapping[float] = "np.float"
-            mapping[int] = "np.int"
-
-        ret = OrderedDict()  # type: 'OrderedDict[str, str]'
-        ret["oml-python:serialized_object"] = "type"
-        ret["value"] = mapping[o]
-        return ret
-
-    def _deserialize_type(self, o: str) -> Any:
-        mapping = {
-            "float": float,
-            "np.float32": np.float32,
-            "np.float64": np.float64,
-            "int": int,
-            "np.int32": np.int32,
-            "np.int64": np.int64,
-        }
-
-        # TODO(eddiebergman): Might be able to remove this
-        if Version(np.__version__) < Version("1.24"):
-            mapping["np.float"] = np.float  # type: ignore # noqa: NPY001
-            mapping["np.int"] = np.int  # type: ignore # noqa: NPY001
-
-        return mapping[o]
-
-    def _serialize_rv_frozen(self, o: Any) -> OrderedDict[str, str | dict]:
-        args = o.args
-        kwds = o.kwds
-        a = o.a
-        b = o.b
-        dist = o.dist.__class__.__module__ + "." + o.dist.__class__.__name__
-        ret: OrderedDict[str, str | dict] = OrderedDict()
-        ret["oml-python:serialized_object"] = "rv_frozen"
-        ret["value"] = OrderedDict(
-            (("dist", dist), ("a", a), ("b", b), ("args", args), ("kwds", kwds)),
-        )
-        return ret
-
-    def _deserialize_rv_frozen(self, o: OrderedDict[str, str]) -> Any:
-        args = o["args"]
-        kwds = o["kwds"]
-        a = o["a"]
-        b = o["b"]
-        dist_name = o["dist"]
-
-        module_name = dist_name.rsplit(".", 1)
-        try:
-            rv_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-        except AttributeError as e:
-            _tb = traceback.format_exc()
-            warnings.warn(
-                f"Cannot create model {dist_name} for flow. Reason is from error {type(e)}:{e}"
-                f"\nTraceback: {_tb}",
-                RuntimeWarning,
-                stacklevel=2,
-            )
-            return None
-
-        dist = scipy.stats.distributions.rv_frozen(rv_class(), *args, **kwds)  # type: ignore
-        dist.a = a
-        dist.b = b
-
-        return dist
-
-    def _serialize_function(self, o: Callable) -> OrderedDict[str, str]:
-        name = o.__module__ + "." + o.__name__
-        ret = OrderedDict()  # type: 'OrderedDict[str, str]'
-        ret["oml-python:serialized_object"] = "function"
-        ret["value"] = name
-        return ret
-
-    def _deserialize_function(self, name: str) -> Callable:
-        module_name = name.rsplit(".", 1)
-        return getattr(importlib.import_module(module_name[0]), module_name[1])
-
-    def _serialize_cross_validator(self, o: Any) -> OrderedDict[str, str | dict]:
-        ret: OrderedDict[str, str | dict] = OrderedDict()
-
-        parameters = OrderedDict()  # type: 'OrderedDict[str, Any]'
-
-        # XXX this is copied from sklearn.model_selection._split
-        cls = o.__class__
-        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
-        # Ignore varargs, kw and default values and pop self
-        init_signature = inspect.signature(init)  # type: ignore
-        # Consider the constructor parameters excluding 'self'
-        if init is object.__init__:
-            args = []  # type: List
-        else:
-            args = sorted(
-                [
-                    p.name
-                    for p in init_signature.parameters.values()
-                    if p.name != "self" and p.kind != p.VAR_KEYWORD
-                ],
-            )
-
-        for key in args:
-            # We need deprecation warnings to always be on in order to
-            # catch deprecated param values.
-            # This is set in utils/__init__.py but it gets overwritten
-            # when running under python3 somehow.
-            with warnings.catch_warnings(record=True) as w:
-                warnings.simplefilter("always", DeprecationWarning)
-                value = getattr(o, key, None)
-                if w is not None and len(w) and w[0].category is DeprecationWarning:
-                    # if the parameter is deprecated, don't show it
-                    continue
-
-            if not (isinstance(value, Sized) and len(value) == 0):
-                value = json.dumps(value)
-                parameters[key] = value
-            else:
-                parameters[key] = None
-
-        ret["oml-python:serialized_object"] = "cv_object"
-        name = o.__module__ + "." + o.__class__.__name__
-        value = OrderedDict([("name", name), ("parameters", parameters)])
-        ret["value"] = value
-
-        return ret
-
-    def _deserialize_cross_validator(
-        self,
-        value: OrderedDict[str, Any],
-        recursion_depth: int,
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        model_name = value["name"]
-        parameters = value["parameters"]
-
-        module_name = model_name.rsplit(".", 1)
-        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-        for parameter in parameters:
-            parameters[parameter] = self._deserialize_sklearn(
-                parameters[parameter],
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version,
-            )
-        return model_class(**parameters)
-
-    def _format_external_version(
-        self,
-        model_package_name: str,
-        model_package_version_number: str,
-    ) -> str:
-        return f"{model_package_name}=={model_package_version_number}"
-
-    @staticmethod
-    def _get_parameter_values_recursive(
-        param_grid: dict | list[dict],
-        parameter_name: str,
-    ) -> list[Any]:
-        """
-        Returns a list of values for a given hyperparameter, encountered
-        recursively throughout the flow. (e.g., n_jobs can be defined
-        for various flows)
-
-        Parameters
-        ----------
-        param_grid: Union[Dict, List[Dict]]
-            Dict mapping from hyperparameter list to value, to a list of
-            such dicts
-
-        parameter_name: str
-            The hyperparameter that needs to be inspected
-
-        Returns
-        -------
-        List
-            A list of all values of hyperparameters with this name
-        """
-        if isinstance(param_grid, dict):
-            return [
-                value
-                for param, value in param_grid.items()
-                if param.split("__")[-1] == parameter_name
-            ]
-
-        if isinstance(param_grid, list):
-            result = []
-            for sub_grid in param_grid:
-                result.extend(
-                    SklearnSerializer._get_parameter_values_recursive(sub_grid, parameter_name),
-                )
-            return result
-
-        raise ValueError("Param_grid should either be a dict or list of dicts")
-
-    def _prevent_optimize_n_jobs(self, model):
-        """
-        Ensures that HPO classes will not optimize the n_jobs hyperparameter
-
-        Parameters
-        ----------
-        model:
-            The model that will be fitted
-        """
-        if self._is_hpo_class(model):
-            if isinstance(model, sklearn.model_selection.GridSearchCV):
-                param_distributions = model.param_grid
-            elif isinstance(model, sklearn.model_selection.RandomizedSearchCV):
-                param_distributions = model.param_distributions
-            else:
-                if hasattr(model, "param_distributions"):
-                    param_distributions = model.param_distributions
-                else:
-                    raise AttributeError(
-                        "Using subclass BaseSearchCV other than "
-                        "{GridSearchCV, RandomizedSearchCV}. "
-                        "Could not find attribute "
-                        "param_distributions.",
-                    )
-                logger.warning(
-                    "Warning! Using subclass BaseSearchCV other than "
-                    "{GridSearchCV, RandomizedSearchCV}. "
-                    "Should implement param check. ",
-                )
-            n_jobs_vals = self._get_parameter_values_recursive(
-                param_distributions,
-                "n_jobs",
-            )
-            if len(n_jobs_vals) > 0:
-                raise PyOpenMLError(
-                    "openml-python should not be used to optimize the n_jobs parameter.",
-                )

From 12df955a3af5a9b3ec117f921207de73d34ba67a Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 21:21:02 +0530
Subject: [PATCH 13/15] remove .DS_Store

---
 openml/extensions/.DS_Store | Bin 10244 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 openml/extensions/.DS_Store

diff --git a/openml/extensions/.DS_Store b/openml/extensions/.DS_Store
deleted file mode 100644
index 0ee8fbd94f44119155f92a35af97354b7bcaca43..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 10244
zcmeHMTWl0n7(V~B&>1?=0SXk{i3^2r$x;dwT5e{0Q_8Ktwsc!A%k0iT2c|R2%<Q(M
zG;U0M0m0~tca$fMMji+#@kJ9A(MLrSjPZe}QC~387nMZ+bIvSjx8+R}P3JV{{O9t2
z=bZnW|DV}&N(h0LoW7QjC?Q0|Or;`YF-fB1G*3uE&@&SyAf6DD_#}mYbtF4MJM0J<
z2pI?&2pI?&2pRY<Fo54|n#4*j!!l$bWFTbVJ_Gdq5XDSoJdg!0*{cH!b_76Kj@>L_
z9nS#_6AffMkOeLo6zAlp2MC}L95H~yaXd=ZQN{yV;F7}$a5#ZrWe83vz*a|Gl&KSl
za~YN)10e$w8K7tPN_J*9i9PQ8KHOnBnY87kQG*6m3+B4NSi)Knl_kR@Lp03&#Gvz(
zpstjl%1IivJz|X4gC!+;^KrHzyiG80S03)mL775nvp$y`V{01hrlDk~@NCIkGHT}#
znL#X{$(sx(&{xWM3te3QIAT0Rx@alA*C<&{a7$1XsC|-Hh|~~ckGGS&fZCVFuvns<
z9`v#S=hxF&%k=#E`maDJE1x#KqC%>utT>$LcLo!_=I6W?%{@q4U5;a=i*+r1z%&L^
zN_DGa`<iJRE(4glfhvc3Oj~ybTb!J&dkmZ41Ck_4DWzs)q@iJBEV_0>{b(#YvT0p?
zEZVTKVRTdy=hd#+vOjgw8nB(SXaOb&Uc`iY(Aabi8zolhUdBq}E-9pB$nx^7+~BfA
z(crHGgGFgZr94yZN%r;~lvB!_JX+S7^-ah24j8V7GNn{`$UsNNvG=+L9q-9FR&K!d
zQp)U%W@U1g<{QnHl`u~m*jMFcT*tEZI-W_V=}r2^kWa@Z3?=(K$Msv;WJwd}=UPh6
zGxi%eshKz0aB8o5tm#+>>GG@v4=-J@dR^nzczeh0eYdKV*>mPr%W@x5ZTsdi({Q^7
zP2KPB)x3;h>!y9YCvSRY+A_LqZNQifF-f{rRkLu>LlISdkZLlQHc#bDe^{v%MYcwC
zu__<ZaZkG)ovxxQ*F>BLS6{YRRS(jUR@cB%4*)AysdATx#w%K&Shq%&yH28yC?4Gc
zLf1FQYPY4O4NK7_zE0C7S?$Sl{*|3U=eBBDO=g^d0W{GI`kGr*^$1;dm*)AMS;NLH
z-3-=#6vz6!e$6#H2hm4^-W}M>9x3)yP`hsW4jV)^_MjELbh>6;XErRVSy-he)NWDv
zTudp7Y3EIh3w*3RJjB}Kqp6Lr;?>#@G6hy+MAydadAEeWJyf!ew2@upD8}pvIZvJ^
zm&jG}F}Xp$B;S&s$X)U~089r7W<VuW!y;G;G1vf&&;*;H1v+6L?1uy#f_^aJ1b8q6
zr(p!nz>{zmo`Umm0iJ_b;8l1HF2S2{1+Kz1cn98xoA4=o24BD(_zu2@AK)(h0e`|h
zVVbZ&s1X(lON5xPTBs8mg(hLMutnG-bP7p96HW+@FvJZIim`Z@w~M7PjJ1gY7rbGF
z%aaFO<gx7?J9hqm0`B%K9%A$6FNiK%wWgtITl0Na<i1;U*RessJbzrgFwbY6QR20?
zcr3~f7vr=vD7!Iaj3JRmu2R9Zm&i*+3>jq_Z!VWtiWn@)bl!|b>O>4rMPN;NZDhTK
zL8z4RW-QVui3;$>W_hcG;i#1JrYgrJNg=EeX^pgt7*dM(Px<<7a)<m#ej)eJ*Jr_8
zsDb5Bj~@OAY==GQ<J7~u;RvKagEZ(c05)Xd6ngs^7=_2708hX<^!5wzG&}<@!b|Wn
zT!h!*4fOc8;BEBxci}p`2Oq*ma05OGJpUW?`=8<05?|h3^kOdIw~4--Pq?<_*vD~%
zMu3Q4dHRL7@C!W2mzw>8xj5$A%O?)Ykb#hakb#hakb$YiKp8hTO7H*MAN>FSsYOa?
zZOFhBV*twA5^eN)uXv0swVz&$MKK@6OcT{BaLFK8P!-zGuH&&muj5w)eZeuCjmLw&
f0+$SqI0i?_Z}b24e+Go-|1>_b@cci8&;Ne|0>mUZ


From 9e5e752fe43f57c436c8b1f876bcb4bf7c924fc9 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Mon, 5 Jan 2026 21:24:51 +0530
Subject: [PATCH 14/15] add flow functions to __init__.py

---
 openml/flows/__init__.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
index ce32fec7d..e0bd979da 100644
--- a/openml/flows/__init__.py
+++ b/openml/flows/__init__.py
@@ -1,7 +1,7 @@
 # License: BSD 3-Clause
 
-from .flow import OpenMLFlow
-from .functions import (
+from openml.flows.flow import OpenMLFlow
+from openml.flows.functions import (
     assert_flows_equal,
     delete_flow,
     flow_exists,
@@ -9,6 +9,7 @@
     get_flow_id,
     list_flows,
 )
+from openml.flows.utils import estimator_to_flow, flow_to_estimator
 
 __all__ = [
     "OpenMLFlow",
@@ -18,4 +19,6 @@
     "flow_exists",
     "assert_flows_equal",
     "delete_flow",
+    "estimator_to_flow",
+    "flow_to_estimator",
 ]

From bf9a0aa823e3e16654550e93683d887e4a98e279 Mon Sep 17 00:00:00 2001
From: Jigyasu <jigyasu@outlook.in>
Date: Tue, 6 Jan 2026 11:18:45 +0530
Subject: [PATCH 15/15] add tests for extension base classes and registry

---
 tests/test_extensions/test_base.py | 143 +++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 tests/test_extensions/test_base.py

diff --git a/tests/test_extensions/test_base.py b/tests/test_extensions/test_base.py
new file mode 100644
index 000000000..46abcf4ca
--- /dev/null
+++ b/tests/test_extensions/test_base.py
@@ -0,0 +1,143 @@
+# License: BSD 3-Clause
+
+"""Test OpenML extension base classes and registry."""
+
+import pytest
+from collections import OrderedDict
+
+from openml.exceptions import PyOpenMLError
+from openml.extensions.base import (
+    ModelSerializer,
+    ModelExecutor,
+    OpenMLAPIConnector,
+)
+from openml.extensions.registry import resolve_api_connector
+
+
+class TestModelSerializer:
+    """Test ModelSerializer abstract base class."""
+
+    def test_is_abstract(self):
+        """ModelSerializer should not be instantiable."""
+        with pytest.raises(TypeError):
+            ModelSerializer()  # noqa: B024
+
+    class DummySerializer(ModelSerializer):
+        @classmethod
+        def can_handle_model(cls, model):
+            return True
+
+        def model_to_flow(self, model):
+            return "dummy_flow"
+
+        def flow_to_model(self, flow, initialize_with_defaults=False, strict_version=True):
+            return "dummy_model"
+
+        def get_version_information(self):
+            return ["dummy>=0.1"]
+
+    def test_concrete_implementation(self):
+        serializer = self.DummySerializer()
+
+        assert serializer.can_handle_model(object()) is True
+        assert serializer.model_to_flow("model") == "dummy_flow"
+        assert serializer.flow_to_model("flow") == "dummy_model"
+        assert serializer.get_version_information() == ["dummy>=0.1"]
+
+
+class TestModelExecutor:
+    """Test ModelExecutor abstract base class."""
+
+    def test_is_abstract(self):
+        """ModelExecutor should not be instantiable."""
+        with pytest.raises(TypeError):
+            ModelExecutor()  # noqa: B024
+
+    class DummyExecutor(ModelExecutor):
+        def seed_model(self, model, seed):
+            return model
+
+        def _run_model_on_fold(
+            self,
+            model,
+            task,
+            X_train,
+            rep_no,
+            fold_no,
+            y_train=None,
+            X_test=None,
+        ):
+            return (
+                [],             # predictions
+                None,            # probabilities
+                OrderedDict(),   # user_defined_measures
+                None,            # trace
+            )
+
+        def check_if_model_fitted(self, model):
+            return False
+
+        def obtain_parameter_values(self, flow, model=None):
+            return []
+
+        def instantiate_model_from_hpo_class(self, model, trace_iteration):
+            return model
+
+    def test_concrete_implementation(self):
+        executor = self.DummyExecutor()
+
+        assert executor.seed_model("model", 42) == "model"
+        assert executor.check_if_model_fitted("model") is False
+        assert executor.obtain_parameter_values("flow") == []
+
+
+class TestOpenMLAPIConnector:
+    """Test OpenMLAPIConnector abstract base class."""
+
+    def test_is_abstract(self):
+        """OpenMLAPIConnector should not be instantiable."""
+        with pytest.raises(TypeError):
+            OpenMLAPIConnector()  # noqa: B024
+
+    class DummySerializer:
+        pass
+
+    class DummyExecutor:
+        pass
+
+    class DummyConnector(OpenMLAPIConnector):
+        def serializer(self):
+            return TestOpenMLAPIConnector.DummySerializer()
+
+        def executor(self):
+            return TestOpenMLAPIConnector.DummyExecutor()
+
+        @classmethod
+        def supports(cls, estimator):
+            return estimator == "supported"
+
+    def test_concrete_implementation(self):
+        connector = self.DummyConnector()
+
+        assert isinstance(connector.serializer(), self.DummySerializer)
+        assert isinstance(connector.executor(), self.DummyExecutor)
+        assert self.DummyConnector.supports("supported") is True
+        assert self.DummyConnector.supports("unsupported") is False
+
+    def test_resolve_api_connector_success(self, monkeypatch):
+        monkeypatch.setattr(
+            "openml.extensions.registry.API_CONNECTOR_REGISTRY",
+            [self.DummyConnector],
+        )
+
+        connector = resolve_api_connector("supported")
+        assert isinstance(connector, self.DummyConnector)
+
+    def test_resolve_api_connector_no_match(self, monkeypatch):
+        monkeypatch.setattr(
+            "openml.extensions.registry.API_CONNECTOR_REGISTRY",
+            [],
+        )
+
+        with pytest.raises(PyOpenMLError, match="No OpenML API connector supports"):
+            resolve_api_connector("anything")