openml · ritoban23 · Feb 28, 2026 · sourcery-ai · Feb 28, 2026 · coderabbitai
diff --git a/src/database/datasets.py b/src/database/datasets.py
@@ -66,6 +66,21 @@ def tag(id_: int, tag_: str, *, user_id: int, connection: Connection) -> None:
     )
 
 
+def untag(id_: int, tag_: str, *, connection: Connection) -> None:
+    connection.execute(
+        text(
+            """
+    DELETE FROM dataset_tag
+    WHERE `id` = :dataset_id AND `tag` = :tag
+    """,
+        ),
+        parameters={
+            "dataset_id": id_,
+            "tag": tag_,
+        },
+    )
+
+
 def get_description(
     id_: int,
     connection: Connection,

diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
@@ -48,13 +48,46 @@ def tag_dataset(
     }
 
 
+@router.post(
+    path="/untag",
+)
+def untag_dataset(
+    data_id: Annotated[int, Body()],
+    tag: Annotated[str, SystemString64],
+    user: Annotated[User | None, Depends(fetch_user)] = None,
+    expdb_db: Annotated[Connection, Depends(expdb_connection)] = None,
+) -> dict[str, dict[str, Any]]:
+    if user is None:
+        raise create_authentication_failed_error()
+
+    tags = database.datasets.get_tags_for(data_id, expdb_db)
+    if tag.casefold() not in [t.casefold() for t in tags]:
+        raise create_tag_not_found_error(data_id, tag)
+
+    database.datasets.untag(data_id, tag, connection=expdb_db)
+    return {
+        "data_untag": {"id": str(data_id)},
-    tags = database.datasets.get_tags_for(data_id, expdb_db)
-    if tag.casefold() not in [t.casefold() for t in tags]:
-        raise create_tag_not_found_error(data_id, tag)
-
-    database.datasets.untag(data_id, tag, connection=expdb_db)
-    return {
-        "data_untag": {"id": str(data_id)},
+    tags = database.datasets.get_tags_for(data_id, expdb_db)
+    matching_tag = next((existing for existing in tags if existing.casefold() == tag.casefold()), None)
+    if matching_tag is None:
+        raise create_tag_not_found_error(data_id, tag)
+
+    database.datasets.untag(data_id, matching_tag, connection=expdb_db)
+    return {
+        "data_untag": {"id": str(data_id)},
-    tags = database.datasets.get_tags_for(data_id, expdb_db)
-    if tag.casefold() not in [t.casefold() for t in tags]:
-        raise create_tag_not_found_error(data_id, tag)
-
-    database.datasets.untag(data_id, tag, connection=expdb_db)
-    return {
-        "data_untag": {"id": str(data_id)},
+    tags = database.datasets.get_tags_for(data_id, expdb_db)
+    matching_tag = next((existing for existing in tags if existing.casefold() == tag.casefold()), None)
+    if matching_tag is None:
+        raise create_tag_not_found_error(data_id, tag)
+
+    database.datasets.untag(data_id, matching_tag, connection=expdb_db)
+    return {
+        "data_untag": {"id": str(data_id)},
+    }
+
+
 def create_authentication_failed_error() -> HTTPException:
     return HTTPException(
         status_code=HTTPStatus.PRECONDITION_FAILED,
         detail={"code": "103", "message": "Authentication failed"},
     )
 
 
+def create_tag_not_found_error(data_id: int, tag: str) -> HTTPException:
+    return HTTPException(
+        status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+        detail={
+            "code": "474",
+            "message": "Entity not tagged by this tag.",
+            "additional_information": f"id={data_id}; tag={tag}",
+        },
+    )
+
+
 def create_tag_exists_error(data_id: int, tag: str) -> HTTPException:
     return HTTPException(
         status_code=HTTPStatus.INTERNAL_SERVER_ERROR,

diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
@@ -85,3 +85,71 @@ def test_dataset_tag_invalid_tag_is_rejected(
 
     assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
     assert new.json()["detail"][0]["loc"] == ["body", "tag"]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [None, ApiKey.INVALID],
+    ids=["no authentication", "invalid key"],
+)
+def test_dataset_untag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> None:
+    apikey = "" if key is None else f"?api_key={key}"
+    response = py_api.post(
+        f"/datasets/untag{apikey}",
+        json={"data_id": 1, "tag": "study_14"},
+    )
+    assert response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"}
+
+
+@pytest.mark.parametrize(
+    "key",
+    [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER],
+    ids=["administrator", "non-owner", "owner"],
+)
+def test_dataset_untag(key: ApiKey, expdb_test: Connection, py_api: TestClient) -> None:
+    dataset_id, tag = 1, "study_14"  # Dataset 1 already has tag 'study_14'
+    response = py_api.post(
+        f"/datasets/untag?api_key={key}",
+        json={"data_id": dataset_id, "tag": tag},
+    )
+    assert response.status_code == HTTPStatus.OK
+    assert response.json() == {"data_untag": {"id": str(dataset_id)}}
+
+    tags = get_tags_for(id_=dataset_id, connection=expdb_test)
+    assert tag not in tags
+
+
+def test_dataset_untag_fails_if_tag_does_not_exist(py_api: TestClient) -> None:
+    dataset_id, tag = 1, "nonexistent_tag"
+    response = py_api.post(
+        f"/datasets/untag?api_key={ApiKey.ADMIN}",
+        json={"data_id": dataset_id, "tag": tag},
+    )
+    assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
+    expected = {
+        "detail": {
+            "code": "474",
+            "message": "Entity not tagged by this tag.",
+            "additional_information": f"id={dataset_id}; tag={tag}",
+        },
+    }
+    assert expected == response.json()
+
+
+@pytest.mark.parametrize(
+    "tag",
+    ["", "h@", " a", "a" * 65],
+    ids=["too short", "@", "space", "too long"],
+)
+def test_dataset_untag_invalid_tag_is_rejected(
+    tag: str,
+    py_api: TestClient,
+) -> None:
+    response = py_api.post(
+        f"/datasets/untag?api_key={ApiKey.ADMIN}",
+        json={"data_id": 1, "tag": tag},
+    )
+
+    assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
+    assert response.json()["detail"][0]["loc"] == ["body", "tag"]