From 2f98934f73e99d1ca51032a3844e9a1992935e26 Mon Sep 17 00:00:00 2001 From: Prtm2110 Date: Tue, 24 Feb 2026 15:23:57 +0530 Subject: [PATCH 1/2] added ontology --- src/database/datasets.py | 17 +++++++++++++++++ src/routers/openml/datasets.py | 7 +++++++ src/schemas/datasets/openml.py | 1 + 3 files changed, 25 insertions(+) diff --git a/src/database/datasets.py b/src/database/datasets.py index f011a651..aff4042a 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -131,6 +131,23 @@ def get_features(dataset_id: int, connection: Connection) -> list[Feature]: return [Feature(**row, nominal_values=None) for row in rows.mappings()] +def get_feature_ontologies(dataset_id: int, connection: Connection) -> dict[int, list[str]]: + rows = connection.execute( + text( + """ + SELECT `index`, `value` + FROM data_feature_description + WHERE `did` = :dataset_id AND `description_type` = 'ontology' + """, + ), + parameters={"dataset_id": dataset_id}, + ) + ontologies: dict[int, list[str]] = {} + for row in rows: + ontologies.setdefault(row.index, []).append(row.value) + return ontologies + + def get_feature_values(dataset_id: int, *, feature_index: int, connection: Connection) -> list[str]: rows = connection.execute( text( diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index dda25117..856d6ba1 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -287,6 +287,13 @@ def get_dataset_features( ) -> list[Feature]: _get_dataset_raise_otherwise(dataset_id, user, expdb) features = database.datasets.get_features(dataset_id, expdb) + + # Attach ontologies from data_feature_description + ontologies = database.datasets.get_feature_ontologies(dataset_id, expdb) + for feature in features: + if feature.index in ontologies: + feature.ontology = ontologies[feature.index] + for feature in [f for f in features if f.data_type == FeatureType.NOMINAL]: feature.nominal_values = database.datasets.get_feature_values( dataset_id, diff --git a/src/schemas/datasets/openml.py b/src/schemas/datasets/openml.py index 8edb373c..b1f51574 100644 --- a/src/schemas/datasets/openml.py +++ b/src/schemas/datasets/openml.py @@ -40,6 +40,7 @@ class Feature(BaseModel): index: int name: str data_type: FeatureType + ontology: list[str] | None = None is_target: bool is_ignore: bool is_row_identifier: bool From 732b32622af39fe22920fa35518c8532a7a2d3c7 Mon Sep 17 00:00:00 2001 From: Prtm2110 Date: Tue, 24 Feb 2026 15:45:58 +0530 Subject: [PATCH 2/2] changed to use .mappings() --- src/database/datasets.py | 4 ++-- src/routers/openml/datasets.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/database/datasets.py b/src/database/datasets.py index aff4042a..f18b21e7 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -143,8 +143,8 @@ def get_feature_ontologies(dataset_id: int, connection: Connection) -> dict[int, parameters={"dataset_id": dataset_id}, ) ontologies: dict[int, list[str]] = {} - for row in rows: - ontologies.setdefault(row.index, []).append(row.value) + for row in rows.mappings(): + ontologies.setdefault(row["index"], []).append(row["value"]) return ontologies diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index 856d6ba1..1072296b 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -291,8 +291,7 @@ def get_dataset_features( # Attach ontologies from data_feature_description ontologies = database.datasets.get_feature_ontologies(dataset_id, expdb) for feature in features: - if feature.index in ontologies: - feature.ontology = ontologies[feature.index] + feature.ontology = ontologies.get(feature.index) for feature in [f for f in features if f.data_type == FeatureType.NOMINAL]: feature.nominal_values = database.datasets.get_feature_values(