diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index ede0e1034..db91ad529 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,9 +1,15 @@ from __future__ import annotations +from abc import abstractmethod +from typing import TYPE_CHECKING, Any + from openml.enums import ResourceType from .base import ResourceAPI +if TYPE_CHECKING: + from openml.evaluations import OpenMLEvaluation + class DatasetAPI(ResourceAPI): """Abstract API interface for dataset resources.""" @@ -34,6 +40,23 @@ class EvaluationAPI(ResourceAPI): resource_type: ResourceType = ResourceType.EVALUATION + @abstractmethod + def list( # noqa: PLR0913 + self, + limit: int, + offset: int, + *, + function: str, + tasks: list | None = None, + setups: list | None = None, + flows: list | None = None, + runs: list | None = None, + uploaders: list | None = None, + study: int | None = None, + sort_order: str | None = None, + **kwargs: Any, + ) -> list[OpenMLEvaluation]: ... + class FlowAPI(ResourceAPI): """Abstract API interface for flow resources.""" diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py index fe7e360a6..8a8ecf5d0 100644 --- a/openml/_api/resources/evaluation.py +++ b/openml/_api/resources/evaluation.py @@ -1,11 +1,279 @@ from __future__ import annotations +import builtins +import json +from typing import Any + +import xmltodict + +from openml.evaluations import OpenMLEvaluation + from .base import EvaluationAPI, ResourceV1API, ResourceV2API class EvaluationV1API(ResourceV1API, EvaluationAPI): - """Version 1 API implementation for evaluation resources.""" + """V1 API implementation for evaluations. + Fetches evaluations from the v1 XML API endpoint. + """ + + def list( # noqa: PLR0913 + self, + limit: int, + offset: int, + *, + function: str, + tasks: builtins.list | None = None, + setups: builtins.list | None = None, + flows: builtins.list | None = None, + runs: builtins.list | None = None, + uploaders: builtins.list | None = None, + study: int | None = None, + sort_order: str | None = None, + **kwargs: Any, + ) -> builtins.list[OpenMLEvaluation]: + """Retrieve evaluations from the OpenML v1 XML API. + + This method builds an evaluation query URL based on the provided + filters, sends a request to the OpenML v1 endpoint, parses the XML + response into a dictionary, and enriches the result with uploader + usernames. + + Parameters + ---------- + The arguments that are lists are separated from the single value + ones which are put into the kwargs. + + limit : int + the number of evaluations to return + offset : int + the number of evaluations to skip, starting from the first + function : str + the evaluation function. e.g., predictive_accuracy + + tasks : list[int,str], optional + the list of task IDs + setups: list[int,str], optional + the list of setup IDs + flows : list[int,str], optional + the list of flow IDs + runs :list[int,str], optional + the list of run IDs + uploaders : list[int,str], optional + the list of uploader IDs + + study : int, optional + + kwargs: dict, optional + Legal filter operators: tag, per_fold + + sort_order : str, optional + order of sorting evaluations, ascending ("asc") or descending ("desc") + + Returns + ------- + list of OpenMLEvaluation objects + + Notes + ----- + This method performs two API calls: + 1. Fetches evaluation data from the specified endpoint + 2. Fetches user information for all uploaders in the evaluation data + + The user information is used to map uploader IDs to usernames. + """ + api_call = self._build_url( + limit, + offset, + function=function, + tasks=tasks, + setups=setups, + flows=flows, + runs=runs, + uploaders=uploaders, + study=study, + sort_order=sort_order, + **kwargs, + ) + + eval_response = self._http.get(api_call) + xml_content = eval_response.text + + return self._parse_list_xml(xml_content) + + def _build_url( # noqa: PLR0913, C901 + self, + limit: int, + offset: int, + *, + function: str, + tasks: builtins.list | None = None, + setups: builtins.list | None = None, + flows: builtins.list | None = None, + runs: builtins.list | None = None, + uploaders: builtins.list | None = None, + study: int | None = None, + sort_order: str | None = None, + **kwargs: Any, + ) -> str: + """ + Construct an OpenML evaluation API URL with filtering parameters. + + Parameters + ---------- + The arguments that are lists are separated from the single value + ones which are put into the kwargs. + + limit : int + the number of evaluations to return + offset : int + the number of evaluations to skip, starting from the first + function : str + the evaluation function. e.g., predictive_accuracy + + tasks : list[int,str], optional + the list of task IDs + setups: list[int,str], optional + the list of setup IDs + flows : list[int,str], optional + the list of flow IDs + runs :list[int,str], optional + the list of run IDs + uploaders : list[int,str], optional + the list of uploader IDs + + study : int, optional + + kwargs: dict, optional + Legal filter operators: tag, per_fold + + sort_order : str, optional + order of sorting evaluations, ascending ("asc") or descending ("desc") + + Returns + ------- + str + A relative API path suitable for an OpenML HTTP request. + """ + api_call = f"evaluation/list/function/{function}" + if limit is not None: + api_call += f"/limit/{limit}" + if offset is not None: + api_call += f"/offset/{offset}" + if kwargs is not None: + for operator, value in kwargs.items(): + if value is not None: + api_call += f"/{operator}/{value}" + if tasks is not None: + api_call += f"/task/{','.join([str(int(i)) for i in tasks])}" + if setups is not None: + api_call += f"/setup/{','.join([str(int(i)) for i in setups])}" + if flows is not None: + api_call += f"/flow/{','.join([str(int(i)) for i in flows])}" + if runs is not None: + api_call += f"/run/{','.join([str(int(i)) for i in runs])}" + if uploaders is not None: + api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}" + if study is not None: + api_call += f"/study/{study}" + if sort_order is not None: + api_call += f"/sort_order/{sort_order}" + + return api_call + + def _parse_list_xml(self, xml_content: str) -> builtins.list[OpenMLEvaluation]: + """Helper function to parse API calls which are lists of runs""" + evals_dict: dict[str, Any] = xmltodict.parse(xml_content, force_list=("oml:evaluation",)) + # Minimalistic check if the XML is useful + if "oml:evaluations" not in evals_dict: + raise ValueError( + f'Error in return XML, does not contain "oml:evaluations": {evals_dict!s}', + ) + + assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), ( + "Expected 'oml:evaluation' to be a list, but got " + f"{type(evals_dict['oml:evaluations']['oml:evaluation']).__name__}. " + ) + + uploader_ids = list( + {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]}, + ) + user_dict = self._get_users(uploader_ids) + + evals = [] + for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]: + run_id = int(eval_["oml:run_id"]) + value = float(eval_["oml:value"]) if "oml:value" in eval_ else None + values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None + array_data = eval_.get("oml:array_data") + + evals.append( + OpenMLEvaluation( + run_id=run_id, + task_id=int(eval_["oml:task_id"]), + setup_id=int(eval_["oml:setup_id"]), + flow_id=int(eval_["oml:flow_id"]), + flow_name=eval_["oml:flow_name"], + data_id=int(eval_["oml:data_id"]), + data_name=eval_["oml:data_name"], + function=eval_["oml:function"], + upload_time=eval_["oml:upload_time"], + uploader=int(eval_["oml:uploader"]), + uploader_name=user_dict[eval_["oml:uploader"]], + value=value, + values=values, + array_data=array_data, + ) + ) + + return evals + + def _get_users(self, uploader_ids: builtins.list[str]) -> dict: + """ + Retrieve usernames for a list of OpenML user IDs. + + Parameters + ---------- + uploader_ids : list[str] + List of OpenML user IDs. + + Returns + ------- + dict + A mapping from user ID (str) to username (str). + """ + api_users = "user/list/user_id/" + ",".join(uploader_ids) + user_response = self._http.get(api_users) + xml_content_user = user_response.text + + users = xmltodict.parse(xml_content_user, force_list=("oml:user",)) + return {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]} class EvaluationV2API(ResourceV2API, EvaluationAPI): - """Version 2 API implementation for evaluation resources.""" + """V2 API implementation for evaluations. + Fetches evaluations from the v2 json API endpoint. + """ + + def list( # noqa: PLR0913 + self, + limit: int, # noqa: ARG002 + offset: int, # noqa: ARG002 + *, + function: str, # noqa: ARG002 + tasks: builtins.list | None = None, # noqa: ARG002 + setups: builtins.list | None = None, # noqa: ARG002 + flows: builtins.list | None = None, # noqa: ARG002 + runs: builtins.list | None = None, # noqa: ARG002 + uploaders: builtins.list | None = None, # noqa: ARG002 + study: int | None = None, # noqa: ARG002 + sort_order: str | None = None, # noqa: ARG002 + **kwargs: Any, # noqa: ARG002 + ) -> builtins.list[OpenMLEvaluation]: + """ + Retrieve evaluation results from the OpenML v2 JSON API. + + Notes + ----- + This method is not yet implemented. + """ + self._not_supported(method="list") diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index 61c95a480..72d22a605 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -2,10 +2,9 @@ # ruff: noqa: PLR0913 from __future__ import annotations -import json from functools import partial from itertools import chain -from typing import Any, Literal +from typing import TYPE_CHECKING, Literal from typing_extensions import overload import numpy as np @@ -15,7 +14,9 @@ import openml import openml._api_calls import openml.utils -from openml.evaluations import OpenMLEvaluation + +if TYPE_CHECKING: + from openml.evaluations import OpenMLEvaluation @overload @@ -120,7 +121,7 @@ def list_evaluations( per_fold_str = str(per_fold).lower() listing_call = partial( - _list_evaluations, + openml._backend.evaluation.list, function=function, tasks=tasks, setups=setups, @@ -142,138 +143,6 @@ def list_evaluations( return {e.run_id: e for e in flattened} -def _list_evaluations( # noqa: C901 - limit: int, - offset: int, - *, - function: str, - tasks: list | None = None, - setups: list | None = None, - flows: list | None = None, - runs: list | None = None, - uploaders: list | None = None, - study: int | None = None, - sort_order: str | None = None, - **kwargs: Any, -) -> list[OpenMLEvaluation]: - """ - Perform API call ``/evaluation/function{function}/{filters}`` - - Parameters - ---------- - The arguments that are lists are separated from the single value - ones which are put into the kwargs. - - limit : int - the number of evaluations to return - offset : int - the number of evaluations to skip, starting from the first - function : str - the evaluation function. e.g., predictive_accuracy - - tasks : list[int,str], optional - the list of task IDs - setups: list[int,str], optional - the list of setup IDs - flows : list[int,str], optional - the list of flow IDs - runs :list[int,str], optional - the list of run IDs - uploaders : list[int,str], optional - the list of uploader IDs - - study : int, optional - - kwargs: dict, optional - Legal filter operators: tag, per_fold - - sort_order : str, optional - order of sorting evaluations, ascending ("asc") or descending ("desc") - - Returns - ------- - list of OpenMLEvaluation objects - """ - api_call = f"evaluation/list/function/{function}" - if limit is not None: - api_call += f"/limit/{limit}" - if offset is not None: - api_call += f"/offset/{offset}" - if kwargs is not None: - for operator, value in kwargs.items(): - if value is not None: - api_call += f"/{operator}/{value}" - if tasks is not None: - api_call += f"/task/{','.join([str(int(i)) for i in tasks])}" - if setups is not None: - api_call += f"/setup/{','.join([str(int(i)) for i in setups])}" - if flows is not None: - api_call += f"/flow/{','.join([str(int(i)) for i in flows])}" - if runs is not None: - api_call += f"/run/{','.join([str(int(i)) for i in runs])}" - if uploaders is not None: - api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}" - if study is not None: - api_call += f"/study/{study}" - if sort_order is not None: - api_call += f"/sort_order/{sort_order}" - - return __list_evaluations(api_call) - - -def __list_evaluations(api_call: str) -> list[OpenMLEvaluation]: - """Helper function to parse API calls which are lists of runs""" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - evals_dict = xmltodict.parse(xml_string, force_list=("oml:evaluation",)) - # Minimalistic check if the XML is useful - if "oml:evaluations" not in evals_dict: - raise ValueError( - f'Error in return XML, does not contain "oml:evaluations": {evals_dict!s}', - ) - - assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), ( - "Expected 'oml:evaluation' to be a list, but got" - f"{type(evals_dict['oml:evaluations']['oml:evaluation']).__name__}. " - ) - - uploader_ids = list( - {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]}, - ) - api_users = "user/list/user_id/" + ",".join(uploader_ids) - xml_string_user = openml._api_calls._perform_api_call(api_users, "get") - - users = xmltodict.parse(xml_string_user, force_list=("oml:user",)) - user_dict = {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]} - - evals = [] - for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]: - run_id = int(eval_["oml:run_id"]) - value = float(eval_["oml:value"]) if "oml:value" in eval_ else None - values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None - array_data = eval_.get("oml:array_data") - - evals.append( - OpenMLEvaluation( - run_id=run_id, - task_id=int(eval_["oml:task_id"]), - setup_id=int(eval_["oml:setup_id"]), - flow_id=int(eval_["oml:flow_id"]), - flow_name=eval_["oml:flow_name"], - data_id=int(eval_["oml:data_id"]), - data_name=eval_["oml:data_name"], - function=eval_["oml:function"], - upload_time=eval_["oml:upload_time"], - uploader=int(eval_["oml:uploader"]), - uploader_name=user_dict[eval_["oml:uploader"]], - value=value, - values=values, - array_data=array_data, - ) - ) - - return evals - - def list_evaluation_measures() -> list[str]: """Return list of evaluation measures available. diff --git a/tests/test_api/test_evaluation.py b/tests/test_api/test_evaluation.py new file mode 100644 index 000000000..14b655b2a --- /dev/null +++ b/tests/test_api/test_evaluation.py @@ -0,0 +1,39 @@ +# License: BSD 3-Clause +from __future__ import annotations + +import pytest +from openml._api import EvaluationV1API, EvaluationV2API +from openml.evaluations import OpenMLEvaluation +from openml.exceptions import OpenMLNotSupportedError + + +@pytest.fixture +def evaluation_v1(http_client_v1, minio_client) -> EvaluationV1API: + return EvaluationV1API(http=http_client_v1, minio=minio_client) + +@pytest.fixture +def evaluation_v2(http_client_v2, minio_client) -> EvaluationV2API: + return EvaluationV2API(http=http_client_v2, minio=minio_client) + + +@pytest.mark.test_server() +def test_v1_list(evaluation_v1): + evaluations = evaluation_v1.list( + function="predictive_accuracy", + limit=10, + offset=0, + ) + + assert isinstance(evaluations, list) + assert len(evaluations) == 10 + assert all(isinstance(e, OpenMLEvaluation) for e in evaluations) + + +@pytest.mark.test_server() +def test_v2_list(evaluation_v2): + with pytest.raises(OpenMLNotSupportedError): + evaluation_v2.list( + function="predictive_accuracy", + limit=10, + offset=0, + )