From 5ae0a223e84541ee38db813abdd5f4879677c0b2 Mon Sep 17 00:00:00 2001 From: Ritoban Dutta Date: Sun, 1 Mar 2026 02:02:38 +0530 Subject: [PATCH] [FEAT] Add POST /datasets/untag endpoint (#20) --- src/database/datasets.py | 15 ++++++ src/routers/openml/datasets.py | 33 ++++++++++++ tests/routers/openml/dataset_tag_test.py | 68 ++++++++++++++++++++++++ 3 files changed, 116 insertions(+) diff --git a/src/database/datasets.py b/src/database/datasets.py index f69a035a..f9db094d 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -66,6 +66,21 @@ def tag(id_: int, tag_: str, *, user_id: int, connection: Connection) -> None: ) +def untag(id_: int, tag_: str, *, connection: Connection) -> None: + connection.execute( + text( + """ + DELETE FROM dataset_tag + WHERE `id` = :dataset_id AND `tag` = :tag + """, + ), + parameters={ + "dataset_id": id_, + "tag": tag_, + }, + ) + + def get_description( id_: int, connection: Connection, diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index dda25117..0ed9f46f 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -48,6 +48,28 @@ def tag_dataset( } +@router.post( + path="/untag", +) +def untag_dataset( + data_id: Annotated[int, Body()], + tag: Annotated[str, SystemString64], + user: Annotated[User | None, Depends(fetch_user)] = None, + expdb_db: Annotated[Connection, Depends(expdb_connection)] = None, +) -> dict[str, dict[str, Any]]: + if user is None: + raise create_authentication_failed_error() + + tags = database.datasets.get_tags_for(data_id, expdb_db) + if tag.casefold() not in [t.casefold() for t in tags]: + raise create_tag_not_found_error(data_id, tag) + + database.datasets.untag(data_id, tag, connection=expdb_db) + return { + "data_untag": {"id": str(data_id)}, + } + + def create_authentication_failed_error() -> HTTPException: return HTTPException( status_code=HTTPStatus.PRECONDITION_FAILED, @@ -55,6 +77,17 @@ def create_authentication_failed_error() -> HTTPException: ) +def create_tag_not_found_error(data_id: int, tag: str) -> HTTPException: + return HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail={ + "code": "474", + "message": "Entity not tagged by this tag.", + "additional_information": f"id={data_id}; tag={tag}", + }, + ) + + def create_tag_exists_error(data_id: int, tag: str) -> HTTPException: return HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index 5449862a..0867598d 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -85,3 +85,71 @@ def test_dataset_tag_invalid_tag_is_rejected( assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY assert new.json()["detail"][0]["loc"] == ["body", "tag"] + + +@pytest.mark.parametrize( + "key", + [None, ApiKey.INVALID], + ids=["no authentication", "invalid key"], +) +def test_dataset_untag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> None: + apikey = "" if key is None else f"?api_key={key}" + response = py_api.post( + f"/datasets/untag{apikey}", + json={"data_id": 1, "tag": "study_14"}, + ) + assert response.status_code == HTTPStatus.PRECONDITION_FAILED + assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"} + + +@pytest.mark.parametrize( + "key", + [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER], + ids=["administrator", "non-owner", "owner"], +) +def test_dataset_untag(key: ApiKey, expdb_test: Connection, py_api: TestClient) -> None: + dataset_id, tag = 1, "study_14" # Dataset 1 already has tag 'study_14' + response = py_api.post( + f"/datasets/untag?api_key={key}", + json={"data_id": dataset_id, "tag": tag}, + ) + assert response.status_code == HTTPStatus.OK + assert response.json() == {"data_untag": {"id": str(dataset_id)}} + + tags = get_tags_for(id_=dataset_id, connection=expdb_test) + assert tag not in tags + + +def test_dataset_untag_fails_if_tag_does_not_exist(py_api: TestClient) -> None: + dataset_id, tag = 1, "nonexistent_tag" + response = py_api.post( + f"/datasets/untag?api_key={ApiKey.ADMIN}", + json={"data_id": dataset_id, "tag": tag}, + ) + assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + expected = { + "detail": { + "code": "474", + "message": "Entity not tagged by this tag.", + "additional_information": f"id={dataset_id}; tag={tag}", + }, + } + assert expected == response.json() + + +@pytest.mark.parametrize( + "tag", + ["", "h@", " a", "a" * 65], + ids=["too short", "@", "space", "too long"], +) +def test_dataset_untag_invalid_tag_is_rejected( + tag: str, + py_api: TestClient, +) -> None: + response = py_api.post( + f"/datasets/untag?api_key={ApiKey.ADMIN}", + json={"data_id": 1, "tag": tag}, + ) + + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + assert response.json()["detail"][0]["loc"] == ["body", "tag"]