diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py index b2c2338..54b7af7 100644 --- a/tests/routers/openml/task_list_test.py +++ b/tests/routers/openml/task_list_test.py @@ -3,8 +3,11 @@ import httpx import pytest +from sqlalchemy.ext.asyncio import AsyncConnection from core.errors import NoResultsError +from routers.dependencies import Pagination +from routers.openml.tasks import TaskStatusFilter, list_tasks async def test_list_tasks_default(py_api: httpx.AsyncClient) -> None: @@ -36,60 +39,169 @@ async def test_list_tasks_get(py_api: httpx.AsyncClient) -> None: assert isinstance(response.json(), list) -async def test_list_tasks_filter_type(py_api: httpx.AsyncClient) -> None: +@pytest.mark.parametrize( + ("limit", "offset", "expected_status", "expected_max_results"), + [ + (-10, 0, HTTPStatus.NOT_FOUND, 0), # negative limit clamped to 0 -> No results + (5, -10, HTTPStatus.OK, 5), # negative offset clamped to 0 -> First 5 results + ], + ids=["negative_limit", "negative_offset"], +) +async def test_list_tasks_negative_pagination_safely_clamped( + limit: int, + offset: int, + expected_status: int, + expected_max_results: int, + py_api: httpx.AsyncClient, +) -> None: + """Negative pagination values are safely clamped to 0 instead of causing 500 errors. + + A limit clamped to 0 raises NoResultsError, which the API maps to HTTP 404. + An offset clamped to 0 simply returns the first page of results (200 OK). + + Note: This remains an HTTP-level (py_api) test to ensure end-to-end safety is + preserved. + """ + response = await py_api.post( + "/tasks/list", + json={"pagination": {"limit": limit, "offset": offset}}, + ) + assert response.status_code == expected_status + if expected_status == HTTPStatus.OK: + body = response.json() + assert len(body) <= expected_max_results + # Compare to a baseline with offset=0 to prove it was correctly clamped + baseline = await py_api.post( + "/tasks/list", + json={"pagination": {"limit": limit, "offset": 0}}, + ) + assert baseline.status_code == HTTPStatus.OK + assert [t["task_id"] for t in body] == [t["task_id"] for t in baseline.json()] + else: + error = response.json() + assert error["type"] == NoResultsError.uri + + +@pytest.mark.parametrize( + ("pagination_override", "expected_field"), + [ + ({"limit": "abc", "offset": 0}, "limit"), # Invalid type + ({"limit": 5, "offset": "xyz"}, "offset"), # Invalid type + ], + ids=["bad_limit_type", "bad_offset_type"], +) +async def test_list_tasks_invalid_pagination_type( + pagination_override: dict[str, Any], expected_field: str, py_api: httpx.AsyncClient +) -> None: + """Invalid pagination types return 422 Unprocessable Entity.""" + response = await py_api.post( + "/tasks/list", + json={"pagination": pagination_override}, + ) + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + # Verify that the error points to the correct field + detail = response.json()["detail"][0] + assert detail["loc"][-2:] == ["pagination", expected_field] + assert detail["type"] in {"type_error.integer", "int_parsing", "int_type"} + + +@pytest.mark.parametrize( + "value", + ["1...2", "abc"], + ids=["triple_dot", "non_numeric"], +) +async def test_list_tasks_invalid_range(value: str, py_api: httpx.AsyncClient) -> None: + """Invalid number_instances format returns 422 Unprocessable Entity.""" + response = await py_api.post("/tasks/list", json={"number_instances": value}) + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + # Verify the error is for the correct field + detail = response.json()["detail"][0] + assert detail["loc"][-1] == "number_instances" + + +@pytest.mark.parametrize( + "payload", + [ + {"tag": "!@#$% "}, # SystemString64 regex mismatch + {"data_name": "!@#$% "}, # CasualString128 regex mismatch + {"task_id": []}, # min_length=1 violation + {"data_id": []}, # min_length=1 violation + ], + ids=["bad_tag_format", "bad_data_name_format", "empty_task_ids", "empty_data_ids"], +) +async def test_list_tasks_invalid_inputs( + payload: dict[str, Any], py_api: httpx.AsyncClient +) -> None: + """Malformed inputs violating Pydantic/FastAPI constraints return 422.""" + response = await py_api.post("/tasks/list", json=payload) + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + # Ensure we are failing for the field we provided + detail = response.json()["detail"][0] + expected_field = next(iter(payload)) + assert detail["loc"][-1] == expected_field + + +async def test_list_tasks_no_results_api_mapping(py_api: httpx.AsyncClient) -> None: + """Verify that a triggered NoResultsError is correctly mapped to 404/problem+json.""" + # This acts as the Happy Path verification for end-to-end error handling + payload = {"tag": "completely-nonexistent-tag-12345"} + response = await py_api.post("/tasks/list", json=payload) + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == NoResultsError.uri + + +# ── Direct call tests: list_tasks ── + + +async def test_list_tasks_filter_type(expdb_test: AsyncConnection) -> None: """Filter by task_type_id returns only tasks of that type.""" - response = await py_api.post("/tasks/list", json={"task_type_id": 1}) - assert response.status_code == HTTPStatus.OK - tasks = response.json() + tasks = await list_tasks(pagination=Pagination(), task_type_id=1, expdb=expdb_test) assert len(tasks) > 0 assert all(t["task_type_id"] == 1 for t in tasks) -async def test_list_tasks_filter_tag(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_filter_tag(expdb_test: AsyncConnection) -> None: """Filter by tag returns only tasks with that tag.""" - response = await py_api.post("/tasks/list", json={"tag": "OpenML100"}) - assert response.status_code == HTTPStatus.OK - tasks = response.json() + tasks = await list_tasks(pagination=Pagination(), tag="OpenML100", expdb=expdb_test) assert len(tasks) > 0 assert all("OpenML100" in t["tag"] for t in tasks) @pytest.mark.parametrize("task_id", [1, 59, [1, 2, 3]]) async def test_list_tasks_filter_task_id( - task_id: int | list[int], py_api: httpx.AsyncClient + task_id: int | list[int], expdb_test: AsyncConnection ) -> None: - """Filter by task_id returns only those tasks.""" + """Filter by task_id returns only those tasks (regardless of status).""" ids = [task_id] if isinstance(task_id, int) else task_id - response = await py_api.post("/tasks/list", json={"task_id": ids}) - assert response.status_code == HTTPStatus.OK - returned_ids = {t["task_id"] for t in response.json()} - assert returned_ids == set(ids) + tasks = await list_tasks( + pagination=Pagination(), task_id=ids, status=TaskStatusFilter.ALL, expdb=expdb_test + ) + returned_ids = sorted(t["task_id"] for t in tasks) + assert returned_ids == sorted(ids) -async def test_list_tasks_filter_data_id(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_filter_data_id(expdb_test: AsyncConnection) -> None: """Filter by data_id returns only tasks that use that dataset.""" data_id = 10 - response = await py_api.post("/tasks/list", json={"data_id": [data_id]}) - assert response.status_code == HTTPStatus.OK - tasks = response.json() + tasks = await list_tasks(pagination=Pagination(), data_id=[data_id], expdb=expdb_test) assert len(tasks) > 0 assert all(t["did"] == data_id for t in tasks) -async def test_list_tasks_filter_data_name(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_filter_data_name(expdb_test: AsyncConnection) -> None: """Filter by data_name returns only tasks whose dataset matches.""" - response = await py_api.post("/tasks/list", json={"data_name": "mfeat-pixel"}) - assert response.status_code == HTTPStatus.OK - tasks = response.json() + tasks = await list_tasks(pagination=Pagination(), data_name="mfeat-pixel", expdb=expdb_test) assert len(tasks) > 0 assert all(t["name"] == "mfeat-pixel" for t in tasks) -async def test_list_tasks_filter_status_deactivated(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_filter_status_deactivated(expdb_test: AsyncConnection) -> None: """Filter by status='deactivated' returns tasks with that status.""" - response = await py_api.post("/tasks/list", json={"status": "deactivated"}) - assert response.status_code == HTTPStatus.OK - tasks = response.json() + tasks = await list_tasks( + pagination=Pagination(), status=TaskStatusFilter.DEACTIVATED, expdb=expdb_test + ) assert len(tasks) > 0 assert all(t["status"] == "deactivated" for t in tasks) @@ -98,114 +210,65 @@ async def test_list_tasks_filter_status_deactivated(py_api: httpx.AsyncClient) - ("limit", "offset"), [(5, 0), (10, 0), (5, 5)], ) -async def test_list_tasks_pagination(limit: int, offset: int, py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_pagination(limit: int, offset: int, expdb_test: AsyncConnection) -> None: """Pagination limit and offset are respected.""" - response = await py_api.post( - "/tasks/list", - json={"pagination": {"limit": limit, "offset": offset}}, + tasks = await list_tasks(pagination=Pagination(limit=limit, offset=offset), expdb=expdb_test) + assert len(tasks) <= limit + + # Precise verification: compare IDs against a corresponding slice from an offset=0 baseline + baseline = await list_tasks( + pagination=Pagination(limit=limit + offset, offset=0), expdb=expdb_test ) - assert response.status_code == HTTPStatus.OK - assert len(response.json()) <= limit + expected_ids = [t["task_id"] for t in baseline][offset : offset + limit] + assert [t["task_id"] for t in tasks] == expected_ids -async def test_list_tasks_pagination_order_stable(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_pagination_order_stable(expdb_test: AsyncConnection) -> None: """Results are ordered by task_id — consecutive pages are in ascending order.""" - r1 = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 0}}) - r2 = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 5}}) - ids1 = [t["task_id"] for t in r1.json()] - ids2 = [t["task_id"] for t in r2.json()] + tasks1 = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test) + tasks2 = await list_tasks(pagination=Pagination(limit=5, offset=5), expdb=expdb_test) + ids1 = [t["task_id"] for t in tasks1] + ids2 = [t["task_id"] for t in tasks2] assert ids1 == sorted(ids1) assert ids2 == sorted(ids2) if ids1 and ids2: assert max(ids1) < min(ids2) -async def test_list_tasks_number_instances_range(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_number_instances_range(expdb_test: AsyncConnection) -> None: """number_instances range filter returns tasks whose dataset matches.""" min_instances, max_instances = 100, 1000 - response = await py_api.post( - "/tasks/list", - json={"number_instances": f"{min_instances}..{max_instances}"}, + tasks = await list_tasks( + pagination=Pagination(), + number_instances=f"{min_instances}..{max_instances}", + expdb=expdb_test, ) - assert response.status_code == HTTPStatus.OK - tasks = response.json() assert len(tasks) > 0 for task in tasks: qualities = {q["name"]: q["value"] for q in task["quality"]} - if "NumberOfInstances" in qualities: - assert min_instances <= float(qualities["NumberOfInstances"]) <= max_instances + assert "NumberOfInstances" in qualities + assert min_instances <= float(qualities["NumberOfInstances"]) <= max_instances -async def test_list_tasks_inputs_are_basic_subset(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_inputs_are_basic_subset(expdb_test: AsyncConnection) -> None: """Input entries only contain the expected basic input names.""" basic_inputs = {"source_data", "target_feature", "estimation_procedure", "evaluation_measures"} - response = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 0}}) - assert response.status_code == HTTPStatus.OK - for task in response.json(): + tasks = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test) + assert any(task["input"] for task in tasks), "Expected at least one task to have inputs" + for task in tasks: for inp in task["input"]: assert inp["name"] in basic_inputs -async def test_list_tasks_quality_values_are_strings(py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_quality_values_are_strings(expdb_test: AsyncConnection) -> None: """Quality values must be strings (to match PHP API behaviour).""" - response = await py_api.post("/tasks/list", json={"pagination": {"limit": 5, "offset": 0}}) - assert response.status_code == HTTPStatus.OK - for task in response.json(): + tasks = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test) + assert any(task["quality"] for task in tasks), "Expected at least one task to have qualities" + for task in tasks: for quality in task["quality"]: assert isinstance(quality["value"], str) -@pytest.mark.parametrize( - "pagination_override", - [ - {"limit": "abc", "offset": 0}, # Invalid type - {"limit": 5, "offset": "xyz"}, # Invalid type - ], - ids=["bad_limit_type", "bad_offset_type"], -) -async def test_list_tasks_invalid_pagination_type( - pagination_override: dict[str, Any], py_api: httpx.AsyncClient -) -> None: - """Invalid pagination types return 422 Unprocessable Entity.""" - response = await py_api.post( - "/tasks/list", - json={"pagination": pagination_override}, - ) - assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY - - -@pytest.mark.parametrize( - ("limit", "offset", "expected_status", "expected_max_results"), - [ - (-10, 0, HTTPStatus.NOT_FOUND, 0), # negative limit clamped to 0 -> No results - (5, -10, HTTPStatus.OK, 5), # negative offset clamped to 0 -> First 5 results - ], - ids=["negative_limit", "negative_offset"], -) -async def test_list_tasks_negative_pagination_safely_clamped( - limit: int, - offset: int, - expected_status: int, - expected_max_results: int, - py_api: httpx.AsyncClient, -) -> None: - """Negative pagination values are safely clamped to 0 instead of causing 500 errors. - - A limit clamped to 0 returns a 482 NoResultsError (404 Not Found). - An offset clamped to 0 simply returns the first page of results (200 OK). - """ - response = await py_api.post( - "/tasks/list", - json={"pagination": {"limit": limit, "offset": offset}}, - ) - assert response.status_code == expected_status - if expected_status == HTTPStatus.OK: - assert len(response.json()) <= expected_max_results - else: - error = response.json() - assert error["type"] == NoResultsError.uri - - @pytest.mark.parametrize( "payload", [ @@ -215,21 +278,7 @@ async def test_list_tasks_negative_pagination_safely_clamped( ], ids=["bad_tag", "bad_task_id", "bad_data_name"], ) -async def test_list_tasks_no_results(payload: dict[str, Any], py_api: httpx.AsyncClient) -> None: +async def test_list_tasks_no_results(payload: dict[str, Any], expdb_test: AsyncConnection) -> None: """Filters matching nothing return 404 NoResultsError.""" - response = await py_api.post("/tasks/list", json=payload) - assert response.status_code == HTTPStatus.NOT_FOUND - assert response.headers["content-type"] == "application/problem+json" - error = response.json() - assert error["type"] == NoResultsError.uri - - -@pytest.mark.parametrize( - "value", - ["1...2", "abc"], - ids=["triple_dot", "non_numeric"], -) -async def test_list_tasks_invalid_range(value: str, py_api: httpx.AsyncClient) -> None: - """Invalid number_instances format returns 422 Unprocessable Entity.""" - response = await py_api.post("/tasks/list", json={"number_instances": value}) - assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + with pytest.raises(NoResultsError): + await list_tasks(pagination=Pagination(), expdb=expdb_test, **payload)