Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 170 additions & 121 deletions tests/routers/openml/task_list_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@

import httpx
import pytest
from sqlalchemy.ext.asyncio import AsyncConnection

from core.errors import NoResultsError
from routers.dependencies import Pagination
from routers.openml.tasks import TaskStatusFilter, list_tasks


async def test_list_tasks_default(py_api: httpx.AsyncClient) -> None:
Expand Down Expand Up @@ -36,60 +39,169 @@ async def test_list_tasks_get(py_api: httpx.AsyncClient) -> None:
assert isinstance(response.json(), list)


async def test_list_tasks_filter_type(py_api: httpx.AsyncClient) -> None:
@pytest.mark.parametrize(
("limit", "offset", "expected_status", "expected_max_results"),
[
(-10, 0, HTTPStatus.NOT_FOUND, 0), # negative limit clamped to 0 -> No results
(5, -10, HTTPStatus.OK, 5), # negative offset clamped to 0 -> First 5 results
],
ids=["negative_limit", "negative_offset"],
)
async def test_list_tasks_negative_pagination_safely_clamped(
limit: int,
offset: int,
expected_status: int,
expected_max_results: int,
py_api: httpx.AsyncClient,
) -> None:
"""Negative pagination values are safely clamped to 0 instead of causing 500 errors.

A limit clamped to 0 raises NoResultsError, which the API maps to HTTP 404.
An offset clamped to 0 simply returns the first page of results (200 OK).

Note: This remains an HTTP-level (py_api) test to ensure end-to-end safety is
preserved.
"""
response = await py_api.post(
"/tasks/list",
json={"pagination": {"limit": limit, "offset": offset}},
)
assert response.status_code == expected_status
if expected_status == HTTPStatus.OK:
body = response.json()
assert len(body) <= expected_max_results
# Compare to a baseline with offset=0 to prove it was correctly clamped
baseline = await py_api.post(
"/tasks/list",
json={"pagination": {"limit": limit, "offset": 0}},
)
assert baseline.status_code == HTTPStatus.OK
assert [t["task_id"] for t in body] == [t["task_id"] for t in baseline.json()]
else:
error = response.json()
assert error["type"] == NoResultsError.uri


@pytest.mark.parametrize(
("pagination_override", "expected_field"),
[
({"limit": "abc", "offset": 0}, "limit"), # Invalid type
({"limit": 5, "offset": "xyz"}, "offset"), # Invalid type
],
ids=["bad_limit_type", "bad_offset_type"],
)
async def test_list_tasks_invalid_pagination_type(
pagination_override: dict[str, Any], expected_field: str, py_api: httpx.AsyncClient
) -> None:
"""Invalid pagination types return 422 Unprocessable Entity."""
response = await py_api.post(
"/tasks/list",
json={"pagination": pagination_override},
)
assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
Comment on lines +93 to +101
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Strengthen the 422 tests by asserting on the validation error structure, not only the status code.

These tests only assert the 422 status. To better guard against regressions, also assert key fields in the error body (e.g. detail[0]['loc'] includes pagination.limit/pagination.offset and the error type is type_error.integer or equivalent). This ensures the request fails for the intended validation reason, not some other 422 path introduced later.

Suggested implementation:

@pytest.mark.parametrize(
    ("pagination_override", "expected_loc_suffix"),
    [
        ({"limit": "abc", "offset": 0}, ("pagination", "limit")),  # Invalid type
        ({"limit": 5, "offset": "xyz"}, ("pagination", "offset")),  # Invalid type
    ],
    ids=["bad_limit_type", "bad_offset_type"],
)
async def test_list_tasks_invalid_pagination_type(
    pagination_override: dict[str, Any],
    expected_loc_suffix: tuple[str, str],
    py_api: httpx.AsyncClient,
) -> None:
    """Invalid pagination types return 422 Unprocessable Entity with expected validation errors."""
    response = await py_api.post(
        "/tasks/list",
        json={"pagination": pagination_override},
    )
    assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY

    body = response.json()
    # FastAPI / Pydantic-style validation error structure
    assert "detail" in body
    assert isinstance(body["detail"], list)
    assert body["detail"], "Expected at least one validation error"

    error = body["detail"][0]
    # Ensure we are failing specifically on the pagination field we overrode
    assert "loc" in error
    assert error["loc"][-2:] == list(expected_loc_suffix)

    # Allow for common integer type error codes across Pydantic versions
    assert error.get("type") in {"type_error.integer", "int_parsing", "int_type"}

async def test_list_tasks_default(py_api: httpx.AsyncClient) -> None:

# Verify that the error points to the correct field
detail = response.json()["detail"][0]
assert detail["loc"][-2:] == ["pagination", expected_field]
assert detail["type"] in {"type_error.integer", "int_parsing", "int_type"}


@pytest.mark.parametrize(
"value",
["1...2", "abc"],
ids=["triple_dot", "non_numeric"],
)
async def test_list_tasks_invalid_range(value: str, py_api: httpx.AsyncClient) -> None:
"""Invalid number_instances format returns 422 Unprocessable Entity."""
response = await py_api.post("/tasks/list", json={"number_instances": value})
assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
# Verify the error is for the correct field
detail = response.json()["detail"][0]
assert detail["loc"][-1] == "number_instances"


@pytest.mark.parametrize(
"payload",
[
{"tag": "!@#$% "}, # SystemString64 regex mismatch
{"data_name": "!@#$% "}, # CasualString128 regex mismatch
{"task_id": []}, # min_length=1 violation
{"data_id": []}, # min_length=1 violation
],
ids=["bad_tag_format", "bad_data_name_format", "empty_task_ids", "empty_data_ids"],
)
async def test_list_tasks_invalid_inputs(
payload: dict[str, Any], py_api: httpx.AsyncClient
) -> None:
"""Malformed inputs violating Pydantic/FastAPI constraints return 422."""
response = await py_api.post("/tasks/list", json=payload)
assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
# Ensure we are failing for the field we provided
detail = response.json()["detail"][0]
expected_field = next(iter(payload))
assert detail["loc"][-1] == expected_field


async def test_list_tasks_no_results_api_mapping(py_api: httpx.AsyncClient) -> None:
"""Verify that a triggered NoResultsError is correctly mapped to 404/problem+json."""
# This acts as the Happy Path verification for end-to-end error handling
payload = {"tag": "completely-nonexistent-tag-12345"}
response = await py_api.post("/tasks/list", json=payload)
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
assert error["type"] == NoResultsError.uri


# ── Direct call tests: list_tasks ──


async def test_list_tasks_filter_type(expdb_test: AsyncConnection) -> None:
"""Filter by task_type_id returns only tasks of that type."""
response = await py_api.post("/tasks/list", json={"task_type_id": 1})
assert response.status_code == HTTPStatus.OK
tasks = response.json()
tasks = await list_tasks(pagination=Pagination(), task_type_id=1, expdb=expdb_test)
assert len(tasks) > 0
assert all(t["task_type_id"] == 1 for t in tasks)


async def test_list_tasks_filter_tag(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_filter_tag(expdb_test: AsyncConnection) -> None:
"""Filter by tag returns only tasks with that tag."""
response = await py_api.post("/tasks/list", json={"tag": "OpenML100"})
assert response.status_code == HTTPStatus.OK
tasks = response.json()
tasks = await list_tasks(pagination=Pagination(), tag="OpenML100", expdb=expdb_test)
assert len(tasks) > 0
assert all("OpenML100" in t["tag"] for t in tasks)


@pytest.mark.parametrize("task_id", [1, 59, [1, 2, 3]])
async def test_list_tasks_filter_task_id(
task_id: int | list[int], py_api: httpx.AsyncClient
task_id: int | list[int], expdb_test: AsyncConnection
) -> None:
"""Filter by task_id returns only those tasks."""
"""Filter by task_id returns only those tasks (regardless of status)."""
ids = [task_id] if isinstance(task_id, int) else task_id
response = await py_api.post("/tasks/list", json={"task_id": ids})
assert response.status_code == HTTPStatus.OK
returned_ids = {t["task_id"] for t in response.json()}
assert returned_ids == set(ids)
tasks = await list_tasks(
pagination=Pagination(), task_id=ids, status=TaskStatusFilter.ALL, expdb=expdb_test
)
returned_ids = sorted(t["task_id"] for t in tasks)
assert returned_ids == sorted(ids)


async def test_list_tasks_filter_data_id(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_filter_data_id(expdb_test: AsyncConnection) -> None:
"""Filter by data_id returns only tasks that use that dataset."""
data_id = 10
response = await py_api.post("/tasks/list", json={"data_id": [data_id]})
assert response.status_code == HTTPStatus.OK
tasks = response.json()
tasks = await list_tasks(pagination=Pagination(), data_id=[data_id], expdb=expdb_test)
assert len(tasks) > 0
assert all(t["did"] == data_id for t in tasks)


async def test_list_tasks_filter_data_name(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_filter_data_name(expdb_test: AsyncConnection) -> None:
"""Filter by data_name returns only tasks whose dataset matches."""
response = await py_api.post("/tasks/list", json={"data_name": "mfeat-pixel"})
assert response.status_code == HTTPStatus.OK
tasks = response.json()
tasks = await list_tasks(pagination=Pagination(), data_name="mfeat-pixel", expdb=expdb_test)
assert len(tasks) > 0
assert all(t["name"] == "mfeat-pixel" for t in tasks)


async def test_list_tasks_filter_status_deactivated(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_filter_status_deactivated(expdb_test: AsyncConnection) -> None:
"""Filter by status='deactivated' returns tasks with that status."""
response = await py_api.post("/tasks/list", json={"status": "deactivated"})
assert response.status_code == HTTPStatus.OK
tasks = response.json()
tasks = await list_tasks(
pagination=Pagination(), status=TaskStatusFilter.DEACTIVATED, expdb=expdb_test
)
assert len(tasks) > 0
assert all(t["status"] == "deactivated" for t in tasks)

Expand All @@ -98,114 +210,65 @@ async def test_list_tasks_filter_status_deactivated(py_api: httpx.AsyncClient) -
("limit", "offset"),
[(5, 0), (10, 0), (5, 5)],
)
async def test_list_tasks_pagination(limit: int, offset: int, py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_pagination(limit: int, offset: int, expdb_test: AsyncConnection) -> None:
"""Pagination limit and offset are respected."""
response = await py_api.post(
"/tasks/list",
json={"pagination": {"limit": limit, "offset": offset}},
tasks = await list_tasks(pagination=Pagination(limit=limit, offset=offset), expdb=expdb_test)
assert len(tasks) <= limit

# Precise verification: compare IDs against a corresponding slice from an offset=0 baseline
baseline = await list_tasks(
pagination=Pagination(limit=limit + offset, offset=0), expdb=expdb_test
)
assert response.status_code == HTTPStatus.OK
assert len(response.json()) <= limit
expected_ids = [t["task_id"] for t in baseline][offset : offset + limit]
assert [t["task_id"] for t in tasks] == expected_ids


async def test_list_tasks_pagination_order_stable(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_pagination_order_stable(expdb_test: AsyncConnection) -> None:
"""Results are ordered by task_id — consecutive pages are in ascending order."""
r1 = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 0}})
r2 = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 5}})
ids1 = [t["task_id"] for t in r1.json()]
ids2 = [t["task_id"] for t in r2.json()]
tasks1 = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test)
tasks2 = await list_tasks(pagination=Pagination(limit=5, offset=5), expdb=expdb_test)
ids1 = [t["task_id"] for t in tasks1]
ids2 = [t["task_id"] for t in tasks2]
assert ids1 == sorted(ids1)
assert ids2 == sorted(ids2)
if ids1 and ids2:
assert max(ids1) < min(ids2)


async def test_list_tasks_number_instances_range(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_number_instances_range(expdb_test: AsyncConnection) -> None:
"""number_instances range filter returns tasks whose dataset matches."""
min_instances, max_instances = 100, 1000
response = await py_api.post(
"/tasks/list",
json={"number_instances": f"{min_instances}..{max_instances}"},
tasks = await list_tasks(
pagination=Pagination(),
number_instances=f"{min_instances}..{max_instances}",
expdb=expdb_test,
)
assert response.status_code == HTTPStatus.OK
tasks = response.json()
assert len(tasks) > 0
for task in tasks:
qualities = {q["name"]: q["value"] for q in task["quality"]}
if "NumberOfInstances" in qualities:
assert min_instances <= float(qualities["NumberOfInstances"]) <= max_instances
assert "NumberOfInstances" in qualities
assert min_instances <= float(qualities["NumberOfInstances"]) <= max_instances


async def test_list_tasks_inputs_are_basic_subset(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_inputs_are_basic_subset(expdb_test: AsyncConnection) -> None:
"""Input entries only contain the expected basic input names."""
basic_inputs = {"source_data", "target_feature", "estimation_procedure", "evaluation_measures"}
response = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 0}})
assert response.status_code == HTTPStatus.OK
for task in response.json():
tasks = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test)
assert any(task["input"] for task in tasks), "Expected at least one task to have inputs"
for task in tasks:
for inp in task["input"]:
assert inp["name"] in basic_inputs


async def test_list_tasks_quality_values_are_strings(py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_quality_values_are_strings(expdb_test: AsyncConnection) -> None:
"""Quality values must be strings (to match PHP API behaviour)."""
response = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 0}})
assert response.status_code == HTTPStatus.OK
for task in response.json():
tasks = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test)
assert any(task["quality"] for task in tasks), "Expected at least one task to have qualities"
for task in tasks:
for quality in task["quality"]:
assert isinstance(quality["value"], str)


@pytest.mark.parametrize(
"pagination_override",
[
{"limit": "abc", "offset": 0}, # Invalid type
{"limit": 5, "offset": "xyz"}, # Invalid type
],
ids=["bad_limit_type", "bad_offset_type"],
)
async def test_list_tasks_invalid_pagination_type(
pagination_override: dict[str, Any], py_api: httpx.AsyncClient
) -> None:
"""Invalid pagination types return 422 Unprocessable Entity."""
response = await py_api.post(
"/tasks/list",
json={"pagination": pagination_override},
)
assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY


@pytest.mark.parametrize(
("limit", "offset", "expected_status", "expected_max_results"),
[
(-10, 0, HTTPStatus.NOT_FOUND, 0), # negative limit clamped to 0 -> No results
(5, -10, HTTPStatus.OK, 5), # negative offset clamped to 0 -> First 5 results
],
ids=["negative_limit", "negative_offset"],
)
async def test_list_tasks_negative_pagination_safely_clamped(
limit: int,
offset: int,
expected_status: int,
expected_max_results: int,
py_api: httpx.AsyncClient,
) -> None:
"""Negative pagination values are safely clamped to 0 instead of causing 500 errors.

A limit clamped to 0 returns a 482 NoResultsError (404 Not Found).
An offset clamped to 0 simply returns the first page of results (200 OK).
"""
response = await py_api.post(
"/tasks/list",
json={"pagination": {"limit": limit, "offset": offset}},
)
assert response.status_code == expected_status
if expected_status == HTTPStatus.OK:
assert len(response.json()) <= expected_max_results
else:
error = response.json()
assert error["type"] == NoResultsError.uri


@pytest.mark.parametrize(
"payload",
[
Expand All @@ -215,21 +278,7 @@ async def test_list_tasks_negative_pagination_safely_clamped(
],
ids=["bad_tag", "bad_task_id", "bad_data_name"],
)
async def test_list_tasks_no_results(payload: dict[str, Any], py_api: httpx.AsyncClient) -> None:
async def test_list_tasks_no_results(payload: dict[str, Any], expdb_test: AsyncConnection) -> None:
"""Filters matching nothing return 404 NoResultsError."""
response = await py_api.post("/tasks/list", json=payload)
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
assert error["type"] == NoResultsError.uri


@pytest.mark.parametrize(
"value",
["1...2", "abc"],
ids=["triple_dot", "non_numeric"],
)
async def test_list_tasks_invalid_range(value: str, py_api: httpx.AsyncClient) -> None:
"""Invalid number_instances format returns 422 Unprocessable Entity."""
response = await py_api.post("/tasks/list", json={"number_instances": value})
assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
with pytest.raises(NoResultsError):
await list_tasks(pagination=Pagination(), expdb=expdb_test, **payload)
Comment on lines +281 to +284
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider keeping an HTTP-level py_api test for 404 NoResultsError mapping in addition to the direct-call exception test.

The previous version validated the full HTTP behavior (404 status, application/problem+json content type, and inclusion of NoResultsError.uri in the body). The new version only confirms that list_tasks raises the correct Python exception, but not that it’s mapped to the expected HTTP response. Please either reintroduce an HTTP-level test (e.g. test_list_tasks_no_results_http) or add a small py_api test that asserts 404 + problem+json + error type, and keep this direct-call test as-is.

Suggested implementation:

@pytest.mark.parametrize(
    "payload",
    [
        # A tag that does not exist
        {"tag": "nonexistent-tag-123"},
        # A task ID that does not exist
        {"task_id": 999999999},
        # A data_name that does not match any task
        {"data_name": "nonexistent-dataset-123"},
    ],
    ids=["bad_tag", "bad_task_id", "bad_data_name"],
)
async def test_list_tasks_no_results(payload: dict[str, Any], expdb_test: AsyncConnection) -> None:
    """Filters matching nothing raise NoResultsError at the Python API level."""
    # Direct-call test that ensures the router logic raises the domain exception.
    with pytest.raises(NoResultsError):
        await list_tasks(expdb_test, **payload)


@pytest.mark.parametrize(
    "payload",
    [
        {"tag": "nonexistent-tag-123"},
        {"task_id": 999999999},
        {"data_name": "nonexistent-dataset-123"},
    ],
    ids=["bad_tag", "bad_task_id", "bad_data_name"],
)
async def test_list_tasks_no_results_http(
    payload: dict[str, Any],
    async_client: AsyncClient,
) -> None:
    """Filters matching nothing are mapped to HTTP 404 problem+json with NoResultsError type."""
    response = await async_client.get("/openml/tasks", params=payload)

    assert response.status_code == status.HTTP_404_NOT_FOUND
    assert response.headers["content-type"].startswith("application/problem+json")

    body = response.json()
    # Error type should include the NoResultsError identifier/URI
    assert isinstance(body.get("type"), str)
    assert "NoResultsError" in body["type"]

To make this compile and follow your project’s conventions, you will likely need to:

  1. Ensure imports at the top of tests/routers/openml/task_list_test.py:
    • from httpx import AsyncClient (or whatever async client type your async_client fixture uses).
    • from starlette import status or from fastapi import status (whichever you already use elsewhere).
    • from ...routers.openml.task_list import list_tasks (adjust module path to the actual router function).
    • from ...exceptions import NoResultsError (or your actual domain error module).
  2. If your test client fixture is named differently (e.g. client instead of async_client) or is sync-only, update the test_list_tasks_no_results_http signature and call style accordingly.
  3. If you already have shared “no-results” payload constants or fixtures in this test module, you may want to reuse them rather than hard-coding the example payloads in both parametrizations.

Comment on lines +281 to +284
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Docstring still describes HTTP behavior.

This test now calls list_tasks(...) directly and asserts NoResultsError, so “return 404” is no longer accurate. Please update the wording to match the direct-call path.

✏️ Suggested wording
-    """Filters matching nothing return 404 NoResultsError."""
+    """Filters matching nothing raise NoResultsError on direct calls."""
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/routers/openml/task_list_test.py` around lines 281 - 284, Update the
test docstring to reflect the direct-call behavior: replace the HTTP wording
"return 404" with a description that the call to list_tasks(...) raises
NoResultsError when filters match nothing (e.g., "Filters matching nothing raise
NoResultsError"). Keep reference to the test function name
test_list_tasks_no_results and the call signature using Pagination(),
expdb_test, and payload to ensure clarity.

Loading