diff --git a/openml/extensions/connectors/__init__.py b/openml/extensions/connectors/__init__.py new file mode 100644 index 000000000..31d358d70 --- /dev/null +++ b/openml/extensions/connectors/__init__.py @@ -0,0 +1,7 @@ +# License: BSD 3-Clause + +"""Base classes for OpenML API connectors.""" + +from openml.extensions.connectors.base import OpenMLAPIConnector + +__all__ = ["OpenMLAPIConnector"] diff --git a/openml/extensions/connectors/base.py b/openml/extensions/connectors/base.py new file mode 100644 index 000000000..398c3c656 --- /dev/null +++ b/openml/extensions/connectors/base.py @@ -0,0 +1,29 @@ +# License: BSD 3-Clause + +"""Base class for OpenML API connectors.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from openml.extensions.execution import ModelExecutor + from openml.extensions.serialization import ModelSerializer + + +class OpenMLAPIConnector(ABC): + """Base class for OpenML API connectors.""" + + @abstractmethod + def serializer(self) -> ModelSerializer: + """Return the serializer for this API.""" + + @abstractmethod + def executor(self) -> ModelExecutor: + """Return the executor for this API.""" + + @classmethod + @abstractmethod + def supports(cls, model: Any) -> bool: + """High-level check if this connector supports the model.""" diff --git a/openml/extensions/execution/__init__.py b/openml/extensions/execution/__init__.py new file mode 100644 index 000000000..0939998e8 --- /dev/null +++ b/openml/extensions/execution/__init__.py @@ -0,0 +1,7 @@ +# License: BSD 3-Clause + +"""Base class for estimator executors.""" + +from openml.extensions.execution.base import ModelExecutor + +__all__ = ["ModelExecutor"] diff --git a/openml/extensions/execution/base.py b/openml/extensions/execution/base.py new file mode 100644 index 000000000..018fe2964 --- /dev/null +++ b/openml/extensions/execution/base.py @@ -0,0 +1,113 @@ +# License: BSD 3-Clause + +"""Base class for estimator executors.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from collections import OrderedDict +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + import numpy as np + import scipy.sparse + + from openml.runs.trace import OpenMLRunTrace + from openml.tasks.task import OpenMLTask + + +class ModelExecutor(ABC): + """Define runtime execution semantics for a specific API type.""" + + @classmethod + @abstractmethod + def can_handle_model(cls, model: Any) -> bool: + """Check whether a model flow can be handled by this extension. + + This is typically done by checking the type of the model, or the package it belongs to. + + Parameters + ---------- + model : Any + + Returns + ------- + bool + """ + + @abstractmethod + def seed_model(self, model: Any, seed: int | None) -> Any: + """Set the seed of all the unseeded components of a model and return the seeded model. + + Required so that all seed information can be uploaded to OpenML for reproducible results. + + Parameters + ---------- + model : Any + The model to be seeded + seed : int + + Returns + ------- + model + """ + + @abstractmethod + def _run_model_on_fold( # noqa: PLR0913 + self, + model: Any, + task: OpenMLTask, + X_train: np.ndarray | scipy.sparse.spmatrix, + rep_no: int, + fold_no: int, + y_train: np.ndarray | None = None, + X_test: np.ndarray | scipy.sparse.spmatrix | None = None, + ) -> tuple[np.ndarray, np.ndarray | None, OrderedDict[str, float], OpenMLRunTrace | None]: + """Run a model on a repeat, fold, subsample triplet of the task. + + Returns the data that is necessary to construct the OpenML Run object. Is used by + :func:`openml.runs.run_flow_on_task`. + + Parameters + ---------- + model : Any + The UNTRAINED model to run. The model instance will be copied and not altered. + task : OpenMLTask + The task to run the model on. + X_train : array-like + Training data for the given repetition and fold. + rep_no : int + The repeat of the experiment (0-based; in case of 1 time CV, always 0) + fold_no : int + The fold nr of the experiment (0-based; in case of holdout, always 0) + y_train : Optional[np.ndarray] (default=None) + Target attributes for supervised tasks. In case of classification, these are integer + indices to the potential classes specified by dataset. + X_test : Optional, array-like (default=None) + Test attributes to test for generalization in supervised tasks. + + Returns + ------- + predictions : np.ndarray + Model predictions. + probabilities : Optional, np.ndarray + Predicted probabilities (only applicable for supervised classification tasks). + user_defined_measures : OrderedDict[str, float] + User defined measures that were generated on this fold + trace : Optional, OpenMLRunTrace + Hyperparameter optimization trace (only applicable for supervised tasks with + hyperparameter optimization). + """ + + @abstractmethod + def check_if_model_fitted(self, model: Any) -> bool: + """Returns True/False denoting if the model has already been fitted/trained. + + Parameters + ---------- + model : Any + + Returns + ------- + bool + """ diff --git a/openml/extensions/registry.py b/openml/extensions/registry.py new file mode 100644 index 000000000..23d220898 --- /dev/null +++ b/openml/extensions/registry.py @@ -0,0 +1,51 @@ +# License: BSD 3-Clause + +"""Extension registry.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from openml_sklearn import SklearnExtension + +from openml.exceptions import PyOpenMLError + +if TYPE_CHECKING: + from openml.extensions.connectors import OpenMLAPIConnector + +API_CONNECTOR_REGISTRY: list[type[OpenMLAPIConnector]] = [ + SklearnExtension, # TODO: I need to refactor SklearnExtension +] + + +def resolve_api_connector(estimator: Any) -> OpenMLAPIConnector: + """ + Identify and return the appropriate OpenML API connector for a given estimator. + + This function iterates through the global ``API_CONNECTOR_REGISTRY`` to find + a connector class that supports the provided estimator object. If exactly one + matching connector is found, it is instantiated and returned. + + Parameters + ---------- + estimator : Any + The estimator object (e.g., a scikit-learn estimator) for which an API + connector is required. + + Returns + ------- + OpenMLAPIConnector + An instance of the matching API connector. + + Raises + ------ + OpenMLException + If no connector is found in the registry that supports the provided + model, or if multiple connectors in the registry claim support for + the provided model. + """ + for connector_cls in API_CONNECTOR_REGISTRY: + if connector_cls.supports(estimator): + return connector_cls() + + raise PyOpenMLError("No OpenML API connector supports this estimator.") diff --git a/openml/extensions/serialization/__init__.py b/openml/extensions/serialization/__init__.py new file mode 100644 index 000000000..df618b60f --- /dev/null +++ b/openml/extensions/serialization/__init__.py @@ -0,0 +1,7 @@ +# License: BSD 3-Clause + +"""Base classes for estimator serializors.""" + +from openml.extensions.serialization.base import ModelSerializer + +__all__ = ["ModelSerializer"] diff --git a/openml/extensions/serialization/base.py b/openml/extensions/serialization/base.py new file mode 100644 index 000000000..96026fc56 --- /dev/null +++ b/openml/extensions/serialization/base.py @@ -0,0 +1,127 @@ +# License: BSD 3-Clause + +"""Base class for estimator serializors.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from openml.flows import OpenMLFlow + from openml.runs.trace import OpenMLTraceIteration + + +class ModelSerializer(ABC): + """Handle the conversion between estimator instances and OpenML Flows.""" + + @classmethod + @abstractmethod + def can_handle_model(cls, model: Any) -> bool: + """Check whether a model flow can be handled by this extension. + + This is typically done by checking the type of the model, or the package it belongs to. + + Parameters + ---------- + model : Any + + Returns + ------- + bool + """ + + @abstractmethod + def model_to_flow(self, model: Any) -> OpenMLFlow: + """Transform a model to a flow for uploading it to OpenML. + + Parameters + ---------- + model : Any + + Returns + ------- + OpenMLFlow + """ + + @abstractmethod + def flow_to_model( + self, + flow: OpenMLFlow, + initialize_with_defaults: bool = False, # noqa: FBT001, FBT002 + strict_version: bool = True, # noqa: FBT002, FBT001 + ) -> Any: + """Instantiate a model from the flow representation. + + Parameters + ---------- + flow : OpenMLFlow + + initialize_with_defaults : bool, optional (default=False) + If this flag is set, the hyperparameter values of flows will be + ignored and a flow with its defaults is returned. + + strict_version : bool, default=True + Whether to fail if version requirements are not fulfilled. + + Returns + ------- + Any + """ + + @abstractmethod + def obtain_parameter_values( + self, + flow: OpenMLFlow, + model: Any = None, + ) -> list[dict[str, Any]]: + """Extracts all parameter settings required for the flow from the model. + + If no explicit model is provided, the parameters will be extracted from `flow.model` + instead. + + Parameters + ---------- + flow : OpenMLFlow + OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server) + + model: Any, optional (default=None) + The model from which to obtain the parameter values. Must match the flow signature. + If None, use the model specified in ``OpenMLFlow.model``. + + Returns + ------- + list + A list of dicts, where each dict has the following entries: + - ``oml:name`` : str: The OpenML parameter name + - ``oml:value`` : mixed: A representation of the parameter value + - ``oml:component`` : int: flow id to which the parameter belongs + """ + + @abstractmethod + def get_version_information(self) -> list[str]: + """Return dependency and version information.""" + + # Abstract methods for hyperparameter optimization + + @abstractmethod + def instantiate_model_from_hpo_class( + self, + model: Any, + trace_iteration: OpenMLTraceIteration, + ) -> Any: + """Instantiate a base model which can be searched over by the hyperparameter optimization + model. + + Parameters + ---------- + model : Any + A hyperparameter optimization model which defines the model to be instantiated. + trace_iteration : OpenMLTraceIteration + Describing the hyperparameter settings to instantiate. + + Returns + ------- + Any + """ + # TODO a trace belongs to a run and therefore a flow -> simplify this part of the interface!