openml · PGijsbers · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023
diff --git a/docs/migration.md b/docs/migration.md
@@ -16,7 +16,11 @@ in which case using the generated REST API documentation is recommended.
 
 The first iteration of the new server has nearly identical responses to the old JSON
 endpoints, but there are exceptions. Most exceptions either bug fixes, or arise from
-technical limitations.
+technical limitations. This list covers the most important changes, but there may
+be some undocumented changes for edge cases. The PHP API was underspecified, and we
+decided that reverse engineering the specifications which mostly arise from
+implementation details was not worth the effort. If there is a behavioral change which
+was not documented but affects you, please [open a bug report](https://github.com/openml/server-api/issues/new?assignees=&labels=bug%2C+triage&projects=&template=bug-report.md&title=).
 
 ### All Endpoints
 The following changes affect all endpoints.
@@ -35,7 +39,7 @@ and JSON content will be different.
 + {"detail":[{"loc":["query","_dataset_id"],"msg":"value is not a valid integer","type":"type_error.integer"}]}
 ```
 
-!!! Bug "Input validation has been added to many end points"
+!!! warning "Input validation has been added to many end points"
 
    There are endpoints which previously did not do any input validation.
    These endpoints now do enforce stricter input constraints.
@@ -75,7 +79,16 @@ Python-V1 will always return JSON.
 
 
 ## V1 to V2
-Most of the changes are focused on standardizing responses.
+Most of the changes are focused on standardizing responses, working on:
+
+ * using JSON types.
+ * removing levels of nesting for endpoints which return single-field JSON.
+ * always returning lists for fields which may contain multiple values even if it
+   contains only one element or no element.
+ * restricting or expanding input types as appropriate.
+ * standardizing authentication and access messages, and consistently execute those checks
+   before fetching data or providing error messages about the data.
+
 
 ### Datasets
 

diff --git a/src/database/datasets.py b/src/database/datasets.py
@@ -6,6 +6,20 @@
 from database.meta import get_column_names
 
 
+def list_all_qualities(connection: Connection) -> list[str]:
+    # The current implementation only fetches *used* qualities, otherwise you should
+    # query: SELECT `name` FROM `quality` WHERE `type`='DataQuality'
+    qualities = connection.execute(
+        text(
+            """
+        SELECT DISTINCT(`quality`)
+        FROM data_quality
+        """,
+        ),
+    )
+    return [quality.quality for quality in qualities]
+
+
 def get_dataset(dataset_id: int, connection: Connection) -> dict[str, Any] | None:
     columns = get_column_names(connection, "dataset")
     row = connection.execute(

diff --git a/src/main.py b/src/main.py
@@ -3,7 +3,8 @@
 import uvicorn
 from fastapi import FastAPI
 from routers.mldcat_ap.dataset import router as mldcat_ap_router
-from routers.v1.datasets import router as datasets_router_old_format
+from routers.v1.datasets import router as datasets_router_v1_format
+from routers.v1.qualities import router as qualities_router
 from routers.v2.datasets import router as datasets_router
 
 
@@ -37,7 +38,8 @@ def create_api() -> FastAPI:
     app = FastAPI()
 
     app.include_router(datasets_router)
-    app.include_router(datasets_router_old_format)
+    app.include_router(datasets_router_v1_format)
+    app.include_router(qualities_router)
     app.include_router(mldcat_ap_router)
 
     return app

diff --git a/src/routers/v1/qualities.py b/src/routers/v1/qualities.py
@@ -0,0 +1,21 @@
+from typing import Annotated, Literal
+
+from database.datasets import list_all_qualities
+from fastapi import APIRouter, Depends
+from sqlalchemy import Connection
+
+from routers.dependencies import expdb_connection
+
+router = APIRouter(prefix="/v1/datasets", tags=["datasets"])
+
+
+@router.get("/qualities/list")
+def list_qualities(
+    expdb: Annotated[Connection, Depends(expdb_connection)],
+) -> dict[Literal["data_qualities_list"], dict[Literal["quality"], list[str]]]:
+    qualities = list_all_qualities(connection=expdb)
+    return {
+        "data_qualities_list": {
+            "quality": qualities,
+        },
+    }
diff --git a/tests/routers/v1/qualities_test.py b/tests/routers/v1/qualities_test.py
@@ -0,0 +1,162 @@
+import http.client
+
+import httpx
+import pytest
+from sqlalchemy import Connection, text
+from starlette.testclient import TestClient
+
+
+def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None:
+    expdb_test.execute(
+        text(
+            """
+        DELETE FROM data_quality
+        WHERE `quality`=:deleted_quality
+        """,
+        ),
+        parameters={"deleted_quality": quality_name},
+    )
+    expdb_test.execute(
+        text(
+            """
+        DELETE FROM quality
+        WHERE `name`=:deleted_quality
+        """,
+        ),
+        parameters={"deleted_quality": quality_name},
+    )
+
+
+@pytest.mark.php()
+def test_list_qualities_identical(api_client: TestClient) -> None:
+    original = httpx.get("http://server-api-php-api-1:80/api/v1/json/data/qualities/list")
+    new = api_client.get("/v1/datasets/qualities/list")
+    assert original.status_code == new.status_code
+    assert original.json() == new.json()
+    # To keep the test idempotent, we cannot test if reaction to database changes is identical
+
+
+def test_list_qualities(api_client: TestClient, expdb_test: Connection) -> None:
+    response = api_client.get("/v1/datasets/qualities/list")
+    assert response.status_code == http.client.OK
+    expected = {
+        "data_qualities_list": {
+            "quality": [
+                "AutoCorrelation",
+                "CfsSubsetEval_DecisionStumpAUC",
+                "CfsSubsetEval_DecisionStumpErrRate",
+                "CfsSubsetEval_DecisionStumpKappa",
+                "CfsSubsetEval_NaiveBayesAUC",
+                "CfsSubsetEval_NaiveBayesErrRate",
+                "CfsSubsetEval_NaiveBayesKappa",
+                "CfsSubsetEval_kNN1NAUC",
+                "CfsSubsetEval_kNN1NErrRate",
+                "CfsSubsetEval_kNN1NKappa",
+                "ClassEntropy",
+                "DecisionStumpAUC",
+                "DecisionStumpErrRate",
+                "DecisionStumpKappa",
+                "Dimensionality",
+                "EquivalentNumberOfAtts",
+                "J48.00001.AUC",
+                "J48.00001.ErrRate",
+                "J48.00001.Kappa",
+                "J48.0001.AUC",
+                "J48.0001.ErrRate",
+                "J48.0001.Kappa",
+                "J48.001.AUC",
+                "J48.001.ErrRate",
+                "J48.001.Kappa",
+                "MajorityClassPercentage",
+                "MajorityClassSize",
+                "MaxAttributeEntropy",
+                "MaxKurtosisOfNumericAtts",
+                "MaxMeansOfNumericAtts",
+                "MaxMutualInformation",
+                "MaxNominalAttDistinctValues",
+                "MaxSkewnessOfNumericAtts",
+                "MaxStdDevOfNumericAtts",
+                "MeanAttributeEntropy",
+                "MeanKurtosisOfNumericAtts",
+                "MeanMeansOfNumericAtts",
+                "MeanMutualInformation",
+                "MeanNoiseToSignalRatio",
+                "MeanNominalAttDistinctValues",
+                "MeanSkewnessOfNumericAtts",
+                "MeanStdDevOfNumericAtts",
+                "MinAttributeEntropy",
+                "MinKurtosisOfNumericAtts",
+                "MinMeansOfNumericAtts",
+                "MinMutualInformation",
+                "MinNominalAttDistinctValues",
+                "MinSkewnessOfNumericAtts",
+                "MinStdDevOfNumericAtts",
+                "MinorityClassPercentage",
+                "MinorityClassSize",
+                "NaiveBayesAUC",
+                "NaiveBayesErrRate",
+                "NaiveBayesKappa",
+                "NumberOfBinaryFeatures",
+                "NumberOfClasses",
+                "NumberOfFeatures",
+                "NumberOfInstances",
+                "NumberOfInstancesWithMissingValues",
+                "NumberOfMissingValues",
+                "NumberOfNumericFeatures",
+                "NumberOfSymbolicFeatures",
+                "PercentageOfBinaryFeatures",
+                "PercentageOfInstancesWithMissingValues",
+                "PercentageOfMissingValues",
+                "PercentageOfNumericFeatures",
+                "PercentageOfSymbolicFeatures",
+                "Quartile1AttributeEntropy",
+                "Quartile1KurtosisOfNumericAtts",
+                "Quartile1MeansOfNumericAtts",
+                "Quartile1MutualInformation",
+                "Quartile1SkewnessOfNumericAtts",
+                "Quartile1StdDevOfNumericAtts",
+                "Quartile2AttributeEntropy",
+                "Quartile2KurtosisOfNumericAtts",
+                "Quartile2MeansOfNumericAtts",
+                "Quartile2MutualInformation",
+                "Quartile2SkewnessOfNumericAtts",
+                "Quartile2StdDevOfNumericAtts",
+                "Quartile3AttributeEntropy",
+                "Quartile3KurtosisOfNumericAtts",
+                "Quartile3MeansOfNumericAtts",
+                "Quartile3MutualInformation",
+                "Quartile3SkewnessOfNumericAtts",
+                "Quartile3StdDevOfNumericAtts",
+                "REPTreeDepth1AUC",
+                "REPTreeDepth1ErrRate",
+                "REPTreeDepth1Kappa",
+                "REPTreeDepth2AUC",
+                "REPTreeDepth2ErrRate",
+                "REPTreeDepth2Kappa",
+                "REPTreeDepth3AUC",
+                "REPTreeDepth3ErrRate",
+                "REPTreeDepth3Kappa",
+                "RandomTreeDepth1AUC",
+                "RandomTreeDepth1ErrRate",
+                "RandomTreeDepth1Kappa",
+                "RandomTreeDepth2AUC",
+                "RandomTreeDepth2ErrRate",
+                "RandomTreeDepth2Kappa",
+                "RandomTreeDepth3AUC",
+                "RandomTreeDepth3ErrRate",
+                "RandomTreeDepth3Kappa",
+                "StdvNominalAttDistinctValues",
+                "kNN1NAUC",
+                "kNN1NErrRate",
+                "kNN1NKappa",
+            ],
+        },
+    }
+    assert expected == response.json()
+
+    deleted = expected["data_qualities_list"]["quality"].pop()
+    _remove_quality_from_database(quality_name=deleted, expdb_test=expdb_test)
+
+    response = api_client.get("/v1/datasets/qualities/list")
+    assert response.status_code == http.client.OK
+    assert expected == response.json()