Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/database/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,23 @@ def get_features(dataset_id: int, connection: Connection) -> list[Feature]:
return [Feature(**row, nominal_values=None) for row in rows.mappings()]


def get_feature_ontologies(dataset_id: int, connection: Connection) -> dict[int, list[str]]:
rows = connection.execute(
text(
"""
SELECT `index`, `value`
FROM data_feature_description
WHERE `did` = :dataset_id AND `description_type` = 'ontology'
""",
),
parameters={"dataset_id": dataset_id},
)
ontologies: dict[int, list[str]] = {}
for row in rows.mappings():
ontologies.setdefault(row["index"], []).append(row["value"])
return ontologies


def get_feature_values(dataset_id: int, *, feature_index: int, connection: Connection) -> list[str]:
rows = connection.execute(
text(
Expand Down
6 changes: 6 additions & 0 deletions src/routers/openml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,12 @@ def get_dataset_features(
) -> list[Feature]:
_get_dataset_raise_otherwise(dataset_id, user, expdb)
features = database.datasets.get_features(dataset_id, expdb)

# Attach ontologies from data_feature_description
ontologies = database.datasets.get_feature_ontologies(dataset_id, expdb)
for feature in features:
feature.ontology = ontologies.get(feature.index)

for feature in [f for f in features if f.data_type == FeatureType.NOMINAL]:
feature.nominal_values = database.datasets.get_feature_values(
dataset_id,
Expand Down
1 change: 1 addition & 0 deletions src/schemas/datasets/openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class Feature(BaseModel):
index: int
name: str
data_type: FeatureType
ontology: list[str] | None = None
is_target: bool
is_ignore: bool
is_row_identifier: bool
Expand Down