Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions docs/migration.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ in which case using the generated REST API documentation is recommended.

The first iteration of the new server has nearly identical responses to the old JSON
endpoints, but there are exceptions. Most exceptions either bug fixes, or arise from
technical limitations.
technical limitations. This list covers the most important changes, but there may
be some undocumented changes for edge cases. The PHP API was underspecified, and we
decided that reverse engineering the specifications which mostly arise from
implementation details was not worth the effort. If there is a behavioral change which
was not documented but affects you, please [open a bug report](https://github.com/openml/server-api/issues/new?assignees=&labels=bug%2C+triage&projects=&template=bug-report.md&title=).

### All Endpoints
The following changes affect all endpoints.
Expand All @@ -35,7 +39,7 @@ and JSON content will be different.
+ {"detail":[{"loc":["query","_dataset_id"],"msg":"value is not a valid integer","type":"type_error.integer"}]}
```

!!! Bug "Input validation has been added to many end points"
!!! warning "Input validation has been added to many end points"

There are endpoints which previously did not do any input validation.
These endpoints now do enforce stricter input constraints.
Expand Down Expand Up @@ -75,7 +79,16 @@ Python-V1 will always return JSON.


## V1 to V2
Most of the changes are focused on standardizing responses.
Most of the changes are focused on standardizing responses, working on:

* using JSON types.
* removing levels of nesting for endpoints which return single-field JSON.
* always returning lists for fields which may contain multiple values even if it
contains only one element or no element.
* restricting or expanding input types as appropriate.
* standardizing authentication and access messages, and consistently execute those checks
before fetching data or providing error messages about the data.


### Datasets

Expand Down
14 changes: 14 additions & 0 deletions src/database/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@
from database.meta import get_column_names


def list_all_qualities(connection: Connection) -> list[str]:
# The current implementation only fetches *used* qualities, otherwise you should
# query: SELECT `name` FROM `quality` WHERE `type`='DataQuality'
qualities = connection.execute(
text(
"""
SELECT DISTINCT(`quality`)
FROM data_quality
""",
),
)
return [quality.quality for quality in qualities]


def get_dataset(dataset_id: int, connection: Connection) -> dict[str, Any] | None:
columns = get_column_names(connection, "dataset")
row = connection.execute(
Expand Down
6 changes: 4 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import uvicorn
from fastapi import FastAPI
from routers.mldcat_ap.dataset import router as mldcat_ap_router
from routers.v1.datasets import router as datasets_router_old_format
from routers.v1.datasets import router as datasets_router_v1_format
from routers.v1.qualities import router as qualities_router
from routers.v2.datasets import router as datasets_router


Expand Down Expand Up @@ -37,7 +38,8 @@ def create_api() -> FastAPI:
app = FastAPI()

app.include_router(datasets_router)
app.include_router(datasets_router_old_format)
app.include_router(datasets_router_v1_format)
app.include_router(qualities_router)
app.include_router(mldcat_ap_router)

return app
Expand Down
21 changes: 21 additions & 0 deletions src/routers/v1/qualities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Annotated, Literal

from database.datasets import list_all_qualities
from fastapi import APIRouter, Depends
from sqlalchemy import Connection

from routers.dependencies import expdb_connection

router = APIRouter(prefix="/v1/datasets", tags=["datasets"])


@router.get("/qualities/list")
def list_qualities(
expdb: Annotated[Connection, Depends(expdb_connection)],
) -> dict[Literal["data_qualities_list"], dict[Literal["quality"], list[str]]]:
qualities = list_all_qualities(connection=expdb)
return {
"data_qualities_list": {
"quality": qualities,
},
}
162 changes: 162 additions & 0 deletions tests/routers/v1/qualities_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import http.client

import httpx
import pytest
from sqlalchemy import Connection, text
from starlette.testclient import TestClient


def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None:
expdb_test.execute(
text(
"""
DELETE FROM data_quality
WHERE `quality`=:deleted_quality
""",
),
parameters={"deleted_quality": quality_name},
)
expdb_test.execute(
text(
"""
DELETE FROM quality
WHERE `name`=:deleted_quality
""",
),
parameters={"deleted_quality": quality_name},
)


@pytest.mark.php()
def test_list_qualities_identical(api_client: TestClient) -> None:
original = httpx.get("http://server-api-php-api-1:80/api/v1/json/data/qualities/list")
new = api_client.get("/v1/datasets/qualities/list")
assert original.status_code == new.status_code
assert original.json() == new.json()
# To keep the test idempotent, we cannot test if reaction to database changes is identical


def test_list_qualities(api_client: TestClient, expdb_test: Connection) -> None:
response = api_client.get("/v1/datasets/qualities/list")
assert response.status_code == http.client.OK
expected = {
"data_qualities_list": {
"quality": [
"AutoCorrelation",
"CfsSubsetEval_DecisionStumpAUC",
"CfsSubsetEval_DecisionStumpErrRate",
"CfsSubsetEval_DecisionStumpKappa",
"CfsSubsetEval_NaiveBayesAUC",
"CfsSubsetEval_NaiveBayesErrRate",
"CfsSubsetEval_NaiveBayesKappa",
"CfsSubsetEval_kNN1NAUC",
"CfsSubsetEval_kNN1NErrRate",
"CfsSubsetEval_kNN1NKappa",
"ClassEntropy",
"DecisionStumpAUC",
"DecisionStumpErrRate",
"DecisionStumpKappa",
"Dimensionality",
"EquivalentNumberOfAtts",
"J48.00001.AUC",
"J48.00001.ErrRate",
"J48.00001.Kappa",
"J48.0001.AUC",
"J48.0001.ErrRate",
"J48.0001.Kappa",
"J48.001.AUC",
"J48.001.ErrRate",
"J48.001.Kappa",
"MajorityClassPercentage",
"MajorityClassSize",
"MaxAttributeEntropy",
"MaxKurtosisOfNumericAtts",
"MaxMeansOfNumericAtts",
"MaxMutualInformation",
"MaxNominalAttDistinctValues",
"MaxSkewnessOfNumericAtts",
"MaxStdDevOfNumericAtts",
"MeanAttributeEntropy",
"MeanKurtosisOfNumericAtts",
"MeanMeansOfNumericAtts",
"MeanMutualInformation",
"MeanNoiseToSignalRatio",
"MeanNominalAttDistinctValues",
"MeanSkewnessOfNumericAtts",
"MeanStdDevOfNumericAtts",
"MinAttributeEntropy",
"MinKurtosisOfNumericAtts",
"MinMeansOfNumericAtts",
"MinMutualInformation",
"MinNominalAttDistinctValues",
"MinSkewnessOfNumericAtts",
"MinStdDevOfNumericAtts",
"MinorityClassPercentage",
"MinorityClassSize",
"NaiveBayesAUC",
"NaiveBayesErrRate",
"NaiveBayesKappa",
"NumberOfBinaryFeatures",
"NumberOfClasses",
"NumberOfFeatures",
"NumberOfInstances",
"NumberOfInstancesWithMissingValues",
"NumberOfMissingValues",
"NumberOfNumericFeatures",
"NumberOfSymbolicFeatures",
"PercentageOfBinaryFeatures",
"PercentageOfInstancesWithMissingValues",
"PercentageOfMissingValues",
"PercentageOfNumericFeatures",
"PercentageOfSymbolicFeatures",
"Quartile1AttributeEntropy",
"Quartile1KurtosisOfNumericAtts",
"Quartile1MeansOfNumericAtts",
"Quartile1MutualInformation",
"Quartile1SkewnessOfNumericAtts",
"Quartile1StdDevOfNumericAtts",
"Quartile2AttributeEntropy",
"Quartile2KurtosisOfNumericAtts",
"Quartile2MeansOfNumericAtts",
"Quartile2MutualInformation",
"Quartile2SkewnessOfNumericAtts",
"Quartile2StdDevOfNumericAtts",
"Quartile3AttributeEntropy",
"Quartile3KurtosisOfNumericAtts",
"Quartile3MeansOfNumericAtts",
"Quartile3MutualInformation",
"Quartile3SkewnessOfNumericAtts",
"Quartile3StdDevOfNumericAtts",
"REPTreeDepth1AUC",
"REPTreeDepth1ErrRate",
"REPTreeDepth1Kappa",
"REPTreeDepth2AUC",
"REPTreeDepth2ErrRate",
"REPTreeDepth2Kappa",
"REPTreeDepth3AUC",
"REPTreeDepth3ErrRate",
"REPTreeDepth3Kappa",
"RandomTreeDepth1AUC",
"RandomTreeDepth1ErrRate",
"RandomTreeDepth1Kappa",
"RandomTreeDepth2AUC",
"RandomTreeDepth2ErrRate",
"RandomTreeDepth2Kappa",
"RandomTreeDepth3AUC",
"RandomTreeDepth3ErrRate",
"RandomTreeDepth3Kappa",
"StdvNominalAttDistinctValues",
"kNN1NAUC",
"kNN1NErrRate",
"kNN1NKappa",
],
},
}
assert expected == response.json()

deleted = expected["data_qualities_list"]["quality"].pop()
_remove_quality_from_database(quality_name=deleted, expdb_test=expdb_test)

response = api_client.get("/v1/datasets/qualities/list")
assert response.status_code == http.client.OK
assert expected == response.json()