openml · rohansen856 · Dec 30, 2025 · Dec 31, 2025 · Jan 1, 2026 · Jan 5, 2026
diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py
@@ -0,0 +1,8 @@
+from openml._api.runtime.core import APIContext
+
+
+def set_api_version(version: str, *, strict: bool = False) -> None:
+    api_context.set_version(version=version, strict=strict)
+
+
+api_context = APIContext()
diff --git a/openml/_api/config.py b/openml/_api/config.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+DelayMethod = Literal["human", "robot"]
+
+
+@dataclass
+class APIConfig:
+    server: str
+    base_url: str
+    key: str
+    timeout: int = 10  # seconds
+
+
+@dataclass
+class APISettings:
+    v1: APIConfig
+    v2: APIConfig
+
+
+@dataclass
+class ConnectionConfig:
+    retries: int = 3
+    delay_method: DelayMethod = "human"
+    delay_time: int = 1  # seconds
+
+    def __post_init__(self) -> None:
+        if self.delay_method not in ("human", "robot"):
+            raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}")
+
+
+@dataclass
+class CacheConfig:
+    dir: str = "~/.openml/cache"
+    ttl: int = 60 * 60 * 24 * 7  # one week
+
+
+@dataclass
+class Settings:
+    api: APISettings
+    connection: ConnectionConfig
+    cache: CacheConfig
+
+
+settings = Settings(
+    api=APISettings(
+        v1=APIConfig(
+            server="https://www.openml.org/",
+            base_url="api/v1/xml/",
+            key="...",
+        ),
+        v2=APIConfig(
+            server="http://127.0.0.1:8001/",
+            base_url="",
+            key="...",
+        ),
+    ),
+    connection=ConnectionConfig(),
+    cache=CacheConfig(),
+)
diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py
@@ -0,0 +1,3 @@
+from openml._api.http.client import HTTPClient
+
+__all__ = ["HTTPClient"]
diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from urllib.parse import urlencode, urljoin, urlparse
+
+import requests
+from requests import Response
+
+from openml.__version__ import __version__
+from openml._api.config import settings
+
+if TYPE_CHECKING:
+    from openml._api.config import APIConfig
+
+
+class CacheMixin:
+    @property
+    def dir(self) -> str:
+        return settings.cache.dir
+
+    @property
+    def ttl(self) -> int:
+        return settings.cache.ttl
+
+    def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path:
+        parsed_url = urlparse(url)
+        netloc_parts = parsed_url.netloc.split(".")[::-1]  # reverse domain
+        path_parts = parsed_url.path.strip("/").split("/")
+
+        # remove api_key and serialize params if any
+        filtered_params = {k: v for k, v in params.items() if k != "api_key"}
+        params_part = [urlencode(filtered_params)] if filtered_params else []
+
+        return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part)
+
+    def _get_cache_response(self, cache_dir: Path) -> Response:  # noqa: ARG002
+        return Response()
+
+    def _set_cache_response(self, cache_dir: Path, response: Response) -> None:  # noqa: ARG002
+        return None
+
+
+class HTTPClient(CacheMixin):
+    def __init__(self, config: APIConfig) -> None:
+        self.config = config
+        self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}
+
+    @property
+    def server(self) -> str:
+        return self.config.server
+
+    @property
+    def base_url(self) -> str:
+        return self.config.base_url
+
+    @property
+    def key(self) -> str:
+        return self.config.key
+
+    @property
+    def timeout(self) -> int:
+        return self.config.timeout
+
+    def request(
+        self,
+        method: str,
+        path: str,
+        *,
+        use_cache: bool = False,
+        use_api_key: bool = False,
+        **request_kwargs: Any,
+    ) -> Response:
+        url = urljoin(self.server, urljoin(self.base_url, path))
+
+        params = request_kwargs.pop("params", {})
+        params = params.copy()
+        if use_api_key:
+            params["api_key"] = self.key
+
+        headers = request_kwargs.pop("headers", {})
+        headers = headers.copy()
+        headers.update(self.headers)
+
+        timeout = request_kwargs.pop("timeout", self.timeout)
+        cache_dir = self._get_cache_dir(url, params)
+
+        if use_cache:
+            try:
+                return self._get_cache_response(cache_dir)
+            # TODO: handle ttl expired error
+            except Exception:
+                raise
+
+        response = requests.request(
+            method=method,
+            url=url,
+            params=params,
+            headers=headers,
+            timeout=timeout,
+            **request_kwargs,
+        )
+
+        if use_cache:
+            self._set_cache_response(cache_dir, response)
+
+        return response
+
+    def get(
+        self,
+        path: str,
+        *,
+        use_cache: bool = False,
+        use_api_key: bool = False,
+        **request_kwargs: Any,
+    ) -> Response:
+        # TODO: remove override when cache is implemented
+        use_cache = False
+        return self.request(
+            method="GET",
+            path=path,
+            use_cache=use_cache,
+            use_api_key=use_api_key,
+            **request_kwargs,
+        )
+
+    def post(
+        self,
+        path: str,
+        **request_kwargs: Any,
+    ) -> Response:
+        return self.request(
+            method="POST",
+            path=path,
+            use_cache=False,
+            use_api_key=True,
+            **request_kwargs,
+        )
+
+    def delete(
+        self,
+        path: str,
+        **request_kwargs: Any,
+    ) -> Response:
+        return self.request(
+            method="DELETE",
+            path=path,
+            use_cache=False,
+            use_api_key=True,
+            **request_kwargs,
+        )
diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py
diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py
@@ -0,0 +1,5 @@
+from openml._api.resources.datasets import DatasetsV1, DatasetsV2
+from openml._api.resources.studies import StudiesV1, StudiesV2
+from openml._api.resources.tasks import TasksV1, TasksV2
+
+__all__ = ["DatasetsV1", "DatasetsV2", "StudiesV1", "StudiesV2", "TasksV1", "TasksV2"]
diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import pandas as pd
+    from requests import Response
+
+    from openml._api.http import HTTPClient
+    from openml.datasets.dataset import OpenMLDataset
+    from openml.tasks.task import OpenMLTask
+
+
+class ResourceAPI:
+    def __init__(self, http: HTTPClient):
+        self._http = http
+
+
+class DatasetsAPI(ResourceAPI, ABC):
+    @abstractmethod
+    def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...
+
+
+class TasksAPI(ResourceAPI, ABC):
+    @abstractmethod
+    def get(
+        self,
+        task_id: int,
+        *,
+        return_response: bool = False,
+    ) -> OpenMLTask | tuple[OpenMLTask, Response]: ...
+
+
+class StudiesAPI(ResourceAPI, ABC):
+    @abstractmethod
+    def list(  # noqa: PLR0913
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        status: str | None = None,
+        main_entity_type: str | None = None,
+        uploader: list[int] | None = None,
+        benchmark_suite: int | None = None,
+    ) -> pd.DataFrame: ...
diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from openml._api.resources.base import DatasetsAPI
+
+if TYPE_CHECKING:
+    from responses import Response
+
+    from openml.datasets.dataset import OpenMLDataset
+
+
+class DatasetsV1(DatasetsAPI):
+    def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
+        raise NotImplementedError
+
+
+class DatasetsV2(DatasetsAPI):
+    def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
+        raise NotImplementedError
diff --git a/openml/_api/resources/studies.py b/openml/_api/resources/studies.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import pandas as pd
+import xmltodict
+
+from openml._api.resources.base import StudiesAPI
+
+
+class StudiesV1(StudiesAPI):
+    def list(  # noqa: PLR0913
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        status: str | None = None,
+        main_entity_type: str | None = None,
+        uploader: list[int] | None = None,
+        benchmark_suite: int | None = None,
+    ) -> pd.DataFrame:
+        api_call = "study/list"
+
+        if limit is not None:
+            api_call += f"/limit/{limit}"
+        if offset is not None:
+            api_call += f"/offset/{offset}"
+        if status is not None:
+            api_call += f"/status/{status}"
+        if main_entity_type is not None:
+            api_call += f"/main_entity_type/{main_entity_type}"
+        if uploader is not None:
+            api_call += f"/uploader/{','.join(str(u) for u in uploader)}"
+        if benchmark_suite is not None:
+            api_call += f"/benchmark_suite/{benchmark_suite}"
+
+        response = self._http.get(api_call)
+        xml_string = response.text
+
+        # Parse XML and convert to DataFrame
+        study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
+
+        assert isinstance(study_dict["oml:study_list"]["oml:study"], list), type(
+            study_dict["oml:study_list"],
+        )
+        assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[
+            "oml:study_list"
+        ]["@xmlns:oml"]
+
+        studies = {}
+        for study_ in study_dict["oml:study_list"]["oml:study"]:
+            expected_fields = {
+                "oml:id": ("id", int),
+                "oml:alias": ("alias", str),
+                "oml:main_entity_type": ("main_entity_type", str),
+                "oml:benchmark_suite": ("benchmark_suite", int),
+                "oml:name": ("name", str),
+                "oml:status": ("status", str),
+                "oml:creation_date": ("creation_date", str),
+                "oml:creator": ("creator", int),
+            }
+            study_id = int(study_["oml:id"])
+            current_study = {}
+            for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
+                if oml_field_name in study_:
+                    current_study[real_field_name] = cast_fn(study_[oml_field_name])
+            current_study["id"] = int(current_study["id"])
+            studies[study_id] = current_study
+
+        return pd.DataFrame.from_dict(studies, orient="index")
+
+
+class StudiesV2(StudiesAPI):
+    def list(  # noqa: PLR0913
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        status: str | None = None,
+        main_entity_type: str | None = None,
+        uploader: list[int] | None = None,
+        benchmark_suite: int | None = None,
+    ) -> pd.DataFrame:
+        raise NotImplementedError("V2 API implementation is not yet available")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from openml._api.http.client import HTTPClient

		__all__ = ["HTTPClient"]