diff --git a/openml/base.py b/openml/base.py index fbfb9dfc8..927ba8fed 100644 --- a/openml/base.py +++ b/openml/base.py @@ -1,26 +1,22 @@ # License: BSD 3-Clause from __future__ import annotations -import re import webbrowser from abc import ABC, abstractmethod -from typing import Iterable, Sequence +from typing import Sequence import xmltodict import openml._api_calls import openml.config +from openml.utils import ReprMixin from .utils import _get_rest_api_type_alias, _tag_openml_base -class OpenMLBase(ABC): +class OpenMLBase(ReprMixin, ABC): """Base object for functionality that is shared across entities.""" - def __repr__(self) -> str: - body_fields = self._get_repr_body_fields() - return self._apply_repr_template(body_fields) - @property @abstractmethod def id(self) -> int | None: @@ -60,34 +56,6 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | N """ # Should be implemented in the base class. - def _apply_repr_template( - self, - body_fields: Iterable[tuple[str, str | int | list[str] | None]], - ) -> str: - """Generates the header and formats the body for string representation of the object. - - Parameters - ---------- - body_fields: List[Tuple[str, str]] - A list of (name, value) pairs to display in the body of the __repr__. - """ - # We add spaces between capitals, e.g. ClassificationTask -> Classification Task - name_with_spaces = re.sub( - r"(\w)([A-Z])", - r"\1 \2", - self.__class__.__name__[len("OpenML") :], - ) - header_text = f"OpenML {name_with_spaces}" - header = f"{header_text}\n{'=' * len(header_text)}\n" - - _body_fields: list[tuple[str, str | int | list[str]]] = [ - (k, "None" if v is None else v) for k, v in body_fields - ] - longest_field_name_length = max(len(name) for name, _ in _body_fields) - field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}" - body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields) - return header + body - @abstractmethod def _to_dict(self) -> dict[str, dict]: """Creates a dictionary representation of self. diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py index 218b0066d..5026d27e8 100644 --- a/openml/datasets/data_feature.py +++ b/openml/datasets/data_feature.py @@ -6,8 +6,10 @@ if TYPE_CHECKING: from IPython.lib import pretty +from openml.utils import ReprMixin -class OpenMLDataFeature: + +class OpenMLDataFeature(ReprMixin): """ Data Feature (a.k.a. Attribute) object. @@ -74,8 +76,20 @@ def __init__( # noqa: PLR0913 self.number_missing_values = number_missing_values self.ontologies = ontologies - def __repr__(self) -> str: - return "[%d - %s (%s)]" % (self.index, self.name, self.data_type) + def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]: + """Collect all information to display in the __repr__ body.""" + fields: dict[str, int | str | None] = { + "Index": self.index, + "Name": self.name, + "Data Type": self.data_type, + } + + order = [ + "Index", + "Name", + "Data Type", + ] + return [(key, fields[key]) for key in order if key in fields] def __eq__(self, other: Any) -> bool: return isinstance(other, OpenMLDataFeature) and self.__dict__ == other.__dict__ diff --git a/openml/setups/setup.py b/openml/setups/setup.py index 0960ad4c1..bf65cecf9 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -1,13 +1,14 @@ # License: BSD 3-Clause from __future__ import annotations -from typing import Any +from typing import Any, Sequence import openml.config import openml.flows +from openml.utils import ReprMixin -class OpenMLSetup: +class OpenMLSetup(ReprMixin): """Setup object (a.k.a. Configuration). Parameters @@ -43,30 +44,21 @@ def _to_dict(self) -> dict[str, Any]: else None, } - def __repr__(self) -> str: - header = "OpenML Setup" - header = f"{header}\n{'=' * len(header)}\n" - - fields = { + def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]: + """Collect all information to display in the __repr__ body.""" + fields: dict[str, int | str | None] = { "Setup ID": self.setup_id, "Flow ID": self.flow_id, "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id), - "# of Parameters": ( - len(self.parameters) if self.parameters is not None else float("nan") - ), + "# of Parameters": (len(self.parameters) if self.parameters is not None else "nan"), } # determines the order in which the information will be printed order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"] - _fields = [(key, fields[key]) for key in order if key in fields] - - longest_field_name_length = max(len(name) for name, _ in _fields) - field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}" - body = "\n".join(field_line_format.format(name, value) for name, value in _fields) - return header + body + return [(key, fields[key]) for key in order if key in fields] -class OpenMLParameter: +class OpenMLParameter(ReprMixin): """Parameter object (used in setup). Parameters @@ -123,11 +115,9 @@ def _to_dict(self) -> dict[str, Any]: "value": self.value, } - def __repr__(self) -> str: - header = "OpenML Parameter" - header = f"{header}\n{'=' * len(header)}\n" - - fields = { + def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]: + """Collect all information to display in the __repr__ body.""" + fields: dict[str, int | str | None] = { "ID": self.id, "Flow ID": self.flow_id, # "Flow Name": self.flow_name, @@ -156,9 +146,4 @@ def __repr__(self) -> str: parameter_default, parameter_value, ] - _fields = [(key, fields[key]) for key in order if key in fields] - - longest_field_name_length = max(len(name) for name, _ in _fields) - field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}" - body = "\n".join(field_line_format.format(name, value) for name, value in _fields) - return header + body + return [(key, fields[key]) for key in order if key in fields] diff --git a/openml/tasks/split.py b/openml/tasks/split.py index 4e781df35..ece41c2ac 100644 --- a/openml/tasks/split.py +++ b/openml/tasks/split.py @@ -4,12 +4,14 @@ import pickle from collections import OrderedDict from pathlib import Path -from typing import Any +from typing import Any, Sequence from typing_extensions import NamedTuple import arff # type: ignore import numpy as np +from openml.utils import ReprMixin + class Split(NamedTuple): """A single split of a dataset.""" @@ -18,7 +20,7 @@ class Split(NamedTuple): test: np.ndarray -class OpenMLSplit: +class OpenMLSplit(ReprMixin): """OpenML Split object. This class manages train-test splits for a dataset across multiple @@ -63,6 +65,22 @@ def __init__( self.folds = len(self.split[0]) self.samples = len(self.split[0][0]) + def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]: + """Collect all information to display in the __repr__ body.""" + fields = { + "Name": self.name, + "Description": ( + self.description if len(self.description) <= 80 else self.description[:77] + "..." + ), + "Repeats": self.repeats, + "Folds": self.folds, + "Samples": self.samples, + } + + order = ["Name", "Description", "Repeats", "Folds", "Samples"] + + return [(key, fields[key]) for key in order if key in fields] + def __eq__(self, other: Any) -> bool: if ( (not isinstance(self, type(other))) diff --git a/openml/utils.py b/openml/utils.py index 7e72e7aee..b19f9e698 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -2,11 +2,23 @@ from __future__ import annotations import contextlib +import re import shutil import warnings +from abc import ABC, abstractmethod from functools import wraps from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Mapping, Sized, TypeVar, overload +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Mapping, + Sequence, + Sized, + TypeVar, + overload, +) from typing_extensions import Literal, ParamSpec import numpy as np @@ -469,3 +481,57 @@ def update(self, length: int) -> None: self._progress_bar.update(length) if self._progress_bar.total <= self._progress_bar.n: self._progress_bar.close() + + +class ReprMixin(ABC): + """A mixin class that provides a customizable string representation for OpenML objects. + + This mixin standardizes the __repr__ output format across OpenML classes. + Classes inheriting from this mixin should implement the + _get_repr_body_fields method to specify which fields to display. + """ + + def __repr__(self) -> str: + body_fields = self._get_repr_body_fields() + return self._apply_repr_template(body_fields) + + @abstractmethod + def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]: + """Collect all information to display in the __repr__ body. + + Returns + ------- + body_fields : List[Tuple[str, Union[str, int, List[str]]]] + A list of (name, value) pairs to display in the body of the __repr__. + E.g.: [('metric', 'accuracy'), ('dataset', 'iris')] + If value is a List of str, then each item of the list will appear in a separate row. + """ + # Should be implemented in the base class. + + def _apply_repr_template( + self, + body_fields: Iterable[tuple[str, str | int | list[str] | None]], + ) -> str: + """Generates the header and formats the body for string representation of the object. + + Parameters + ---------- + body_fields: List[Tuple[str, str]] + A list of (name, value) pairs to display in the body of the __repr__. + """ + # We add spaces between capitals, e.g. ClassificationTask -> Classification Task + name_with_spaces = re.sub( + r"(\w)([A-Z])", + r"\1 \2", + self.__class__.__name__[len("OpenML") :], + ) + header_text = f"OpenML {name_with_spaces}" + header = f"{header_text}\n{'=' * len(header_text)}\n" + + _body_fields: list[tuple[str, str | int | list[str]]] = [ + (k, "None" if v is None else v) for k, v in body_fields + ] + longest_field_name_length = max(len(name) for name, _ in _body_fields) + field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}" + body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields) + return header + body