Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/database/setups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from sqlalchemy import Connection, text
from sqlalchemy.engine import Row


def get(id_: int, connection: Connection) -> Row | None:
"""Get the setup by its ID."""
row = connection.execute(
text(
"""
SELECT *
FROM algorithm_setup
WHERE sid = :setup_id
""",
),
parameters={"setup_id": id_},
)
return row.one_or_none()


def get_tags_for(id_: int, connection: Connection) -> list[str]:
"""Get all tags for a specific setup."""
rows = connection.execute(
text(
"""
SELECT tag
FROM setup_tag
WHERE id = :setup_id
""",
),
parameters={"setup_id": id_},
)
return [row.tag for row in rows]
Comment on lines +20 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Return tags in a deterministic order and avoid over-fetching.

SELECT * without ordering can yield unstable tag ordering in responses and tests. Fetch only tag and add an explicit ORDER BY.

💡 Suggested query refinement
 def get_tags_for(id_: int, connection: Connection) -> list[str]:
     """Get all tags for a specific setup."""
     rows = connection.execute(
         text(
             """
-            SELECT *
+            SELECT tag
             FROM setup_tag
             WHERE id = :setup_id
+            ORDER BY tag
             """,
         ),
         parameters={"setup_id": id_},
     )
     return [row.tag for row in rows]
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/database/setups.py` around lines 20 - 32, The get_tags_for function
currently selects all columns and lacks deterministic ordering; update the SQL
in connection.execute to select only the tag column (e.g., SELECT tag FROM
setup_tag) and add an explicit ORDER BY (for example ORDER BY tag ASC) while
keeping the parameterized filter (WHERE setup_id = :setup_id) so you return a
stable, minimal list of tags for the given setup_id.



def tag(id_: int, tag_: str, *, user_id: int, connection: Connection) -> None:
"""Insert a new tag for the setup."""
connection.execute(
text(
"""
INSERT INTO setup_tag(`id`, `tag`, `uploader`)
VALUES (:setup_id, :tag, :user_id)
""",
),
parameters={
"setup_id": id_,
"user_id": user_id,
"tag": tag_,
},
)
2 changes: 2 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from routers.openml.evaluations import router as evaluationmeasures_router
from routers.openml.flows import router as flows_router
from routers.openml.qualities import router as qualities_router
from routers.openml.setups import router as setups_router
from routers.openml.study import router as study_router
from routers.openml.tasks import router as task_router
from routers.openml.tasktype import router as ttype_router
Expand Down Expand Up @@ -47,6 +48,7 @@ def create_api() -> FastAPI:
app = FastAPI(**fastapi_kwargs)

app.include_router(datasets_router)
app.include_router(setups_router)
app.include_router(qualities_router)
app.include_router(mldcat_ap_router)
app.include_router(ttype_router)
Expand Down
68 changes: 68 additions & 0 deletions src/routers/openml/setups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from http import HTTPStatus
from typing import Annotated, Any

from fastapi import APIRouter, Body, Depends, HTTPException
from sqlalchemy import Connection

import database.setups
from database.users import User, UserGroup
from routers.dependencies import expdb_connection, fetch_user
from routers.types import SystemString64

router = APIRouter(prefix="/setup", tags=["setups"])


def create_authentication_failed_error() -> HTTPException:
return HTTPException(
status_code=HTTPStatus.PRECONDITION_FAILED,
detail={"code": "103", "message": "Authentication failed"},
)


def create_tag_exists_error(setup_id: int, tag: str) -> HTTPException:
return HTTPException(
# Changed from INTERNAL_SERVER_ERROR (500) to CONFLICT (409)
status_code=HTTPStatus.CONFLICT,
detail={
"code": "473",
"message": "Entity already tagged by this tag.",
"additional_information": f"id={setup_id}; tag={tag}",
},
)


@router.post("/tag")
def tag_setup(
setup_id: Annotated[int, Body()],
tag: Annotated[str, Body(..., embed=False), SystemString64],
user: Annotated[User | None, Depends(fetch_user)] = None,
expdb_db: Annotated[Connection, Depends(expdb_connection)] = None,
) -> dict[str, dict[str, Any]]:
# 1. AUTHENTICATE FIRST
if user is None:
raise create_authentication_failed_error()

# 2. VERIFY EXISTENCE
setup = database.setups.get(setup_id, expdb_db)
if not setup:
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Setup not found")

# 3. VERIFY OWNERSHIP / PERMISSIONS
# (Fixes the crash by not looking for a Dataset 'visibility' column)
is_admin = UserGroup.ADMIN in user.groups
is_owner = getattr(setup, "uploader", None) == user.user_id

if not (is_admin or is_owner):
raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="No access granted")

# 4. CHECK IF TAG EXISTS
tags = database.setups.get_tags_for(setup_id, expdb_db)
if tag.casefold() in [t.casefold() for t in tags]:
raise create_tag_exists_error(setup_id, tag)

# 5. APPLY THE TAG
database.setups.tag(setup_id, tag, user_id=user.user_id, connection=expdb_db)

Comment on lines +59 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Duplicate prevention is race-prone under concurrent requests.

The current check-then-insert flow can still insert duplicates when two requests run at the same time. Enforce uniqueness in the DB (e.g., unique key on setup/tag) and map duplicate-write failure to the existing “already tagged” error.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/routers/openml/setups.py` around lines 58 - 64, The current
check-then-insert in the route using database.setups.get_tags_for and
database.setups.tag is race-prone; add a UNIQUE constraint on (setup_id, tag) at
the DB schema level and update the tag write path to handle duplicate-key errors
by translating them into the existing create_tag_exists_error(setup_id, tag)
response. Concretely, add the unique index in migrations, then catch the
DB-specific unique-violation exception either inside database.setups.tag or
around its call in this router and on that exception raise/create the same
"already tagged" error instead of propagating the DB error.

return {
"setup_tag": {"id": str(setup_id), "tag": [*tags, tag]},
}