From c90a26b2f34870cb52cde14b94ff8ad5aa30e17b Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 14:17:39 +0200
Subject: [PATCH 1/7] Read database configuration from file
---
src/config.py | 29 +++++++++++++++++++++++++++++
src/config.toml | 11 +++++++++++
src/database/datasets.py | 16 ++++++++++++++--
3 files changed, 54 insertions(+), 2 deletions(-)
create mode 100644 src/config.py
create mode 100644 src/config.toml
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 00000000..8081cb6d
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+import tomllib
+import typing
+from pathlib import Path
+
+TomlTable = dict[str, typing.Any]
+
+
+def apply_defaults_to_subtables(configuration: TomlTable, table: str) -> TomlTable:
+ defaults = configuration[table]["defaults"]
+ return {
+ subtable: defaults | overrides
+ for subtable, overrides in configuration[table].items()
+ if subtable != "defaults"
+ }
+
+
+def load_configuration(file: Path) -> TomlTable:
+ configuration = tomllib.loads(file.read_text())
+ configuration["databases"] = apply_defaults_to_subtables(
+ configuration,
+ table="databases",
+ )
+ return configuration
+
+
+_configuration = load_configuration(Path(__file__).parent / "config.toml")
+DATABASE_CONFIGURATION = _configuration["databases"]
diff --git a/src/config.toml b/src/config.toml
new file mode 100644
index 00000000..f85d9a06
--- /dev/null
+++ b/src/config.toml
@@ -0,0 +1,11 @@
+[databases.defaults]
+host="127.0.0.1"
+port="3306"
+# SQLAlchemy `dialect` and `driver`: https://docs.sqlalchemy.org/en/20/dialects/index.html
+drivername="mysql"
+
+[databases.expdb]
+database="openml_expdb"
+
+[databases.openml]
+database="openml"
diff --git a/src/database/datasets.py b/src/database/datasets.py
index f794aeae..f9723084 100644
--- a/src/database/datasets.py
+++ b/src/database/datasets.py
@@ -1,17 +1,29 @@
""" Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707"""
from typing import Any
+from config import DATABASE_CONFIGURATION
from sqlalchemy import create_engine, text
+from sqlalchemy.engine import URL
from database.meta import get_column_names
+expdb_url = URL.create(
+ username="root",
+ password="ok",
+ **DATABASE_CONFIGURATION["expdb"],
+)
expdb = create_engine(
- "mysql://root:ok@127.0.0.1:3306/openml_expdb",
+ expdb_url,
echo=True,
pool_recycle=3600,
)
+openml_url = URL.create(
+ username="root",
+ password="ok",
+ **DATABASE_CONFIGURATION["openml"],
+)
openml = create_engine(
- "mysql://root:ok@127.0.0.1:3306/openml",
+ openml_url,
echo=True,
pool_recycle=3600,
)
From 30a9a3dcb2ff1f83bea45074b15a947e631981b5 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 14:45:36 +0200
Subject: [PATCH 2/7] Add python_dotenv to load environment variables from .env
files
---
pyproject.toml | 1 +
1 file changed, 1 insertion(+)
diff --git a/pyproject.toml b/pyproject.toml
index fe7ecdba..898eae5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
"uvicorn",
"sqlalchemy",
"mysqlclient",
+ "python_dotenv",
]
[project.optional-dependencies]
From af8650eb1089236f6ad2ef398b735031c121cab6 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 14:46:18 +0200
Subject: [PATCH 3/7] Load database credentials from environment variables
---
src/config.py | 21 +++++++++++++++++++++
src/database/datasets.py | 12 ++----------
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/src/config.py b/src/config.py
index 8081cb6d..57e4220b 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,9 +1,12 @@
from __future__ import annotations
+import os
import tomllib
import typing
from pathlib import Path
+from dotenv import load_dotenv
+
TomlTable = dict[str, typing.Any]
@@ -25,5 +28,23 @@ def load_configuration(file: Path) -> TomlTable:
return configuration
+load_dotenv()
_configuration = load_configuration(Path(__file__).parent / "config.toml")
+
DATABASE_CONFIGURATION = _configuration["databases"]
+DATABASE_CONFIGURATION["openml"]["username"] = os.environ.get(
+ "OPENML_DATABASES_OPENML_USERNAME",
+ "root",
+)
+DATABASE_CONFIGURATION["openml"]["password"] = os.environ.get(
+ "OPENML_DATABASES_OPENML_PASSWORD",
+ "ok",
+)
+DATABASE_CONFIGURATION["expdb"]["username"] = os.environ.get(
+ "OPENML_DATABASES_EXPDB_USERNAME",
+ "root",
+)
+DATABASE_CONFIGURATION["expdb"]["password"] = os.environ.get(
+ "OPENML_DATABASES_EXPDB_PASSWORD",
+ "ok",
+)
diff --git a/src/database/datasets.py b/src/database/datasets.py
index f9723084..c4a78f3c 100644
--- a/src/database/datasets.py
+++ b/src/database/datasets.py
@@ -7,21 +7,13 @@
from database.meta import get_column_names
-expdb_url = URL.create(
- username="root",
- password="ok",
- **DATABASE_CONFIGURATION["expdb"],
-)
+expdb_url = URL.create(**DATABASE_CONFIGURATION["expdb"])
expdb = create_engine(
expdb_url,
echo=True,
pool_recycle=3600,
)
-openml_url = URL.create(
- username="root",
- password="ok",
- **DATABASE_CONFIGURATION["openml"],
-)
+openml_url = URL.create(**DATABASE_CONFIGURATION["openml"])
openml = create_engine(
openml_url,
echo=True,
From 9c2999d9c15f21739ec7b87bd9d3cff73192b2cf Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 16:06:08 +0200
Subject: [PATCH 4/7] Only start app if needed, add config file path fixture
---
tests/config_test.py | 0
tests/conftest.py | 13 ++++++++++---
2 files changed, 10 insertions(+), 3 deletions(-)
create mode 100644 tests/config_test.py
diff --git a/tests/config_test.py b/tests/config_test.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/conftest.py b/tests/conftest.py
index fead0652..45c68f1e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,20 +1,27 @@
import json
-import pathlib
+from pathlib import Path
from typing import Any, Generator
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
-from main import app
@pytest.fixture()
def api_client() -> Generator[FastAPI, None, None]:
+ # We want to avoid starting a test client app if tests don't need it.
+ from main import app
+
return TestClient(app)
@pytest.fixture()
def dataset_130() -> Generator[dict[str, Any], None, None]:
- json_path = pathlib.Path(__file__).parent / "resources" / "datasets" / "dataset_130.json"
+ json_path = Path(__file__).parent / "resources" / "datasets" / "dataset_130.json"
with json_path.open("r") as dataset_file:
yield json.load(dataset_file)
+
+
+@pytest.fixture()
+def default_configuration_file() -> Path:
+ return Path().parent.parent / "src" / "config.toml"
From a3aaa71cdb480b87658f1d13660161afd118d476 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 16:06:51 +0200
Subject: [PATCH 5/7] Don't automatically load configuration on import
---
src/config.py | 52 +++++++++++++++++++---------------------
src/database/datasets.py | 7 +++---
2 files changed, 28 insertions(+), 31 deletions(-)
diff --git a/src/config.py b/src/config.py
index 57e4220b..7f7f1f01 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,5 +1,4 @@
-from __future__ import annotations
-
+import functools
import os
import tomllib
import typing
@@ -10,7 +9,7 @@
TomlTable = dict[str, typing.Any]
-def apply_defaults_to_subtables(configuration: TomlTable, table: str) -> TomlTable:
+def _apply_defaults_to_subtables(configuration: TomlTable, table: str) -> TomlTable:
defaults = configuration[table]["defaults"]
return {
subtable: defaults | overrides
@@ -19,32 +18,29 @@ def apply_defaults_to_subtables(configuration: TomlTable, table: str) -> TomlTab
}
-def load_configuration(file: Path) -> TomlTable:
+@functools.cache
+def load_database_configuration(file: Path = Path(__file__).parent / "config.toml") -> TomlTable:
configuration = tomllib.loads(file.read_text())
- configuration["databases"] = apply_defaults_to_subtables(
+
+ database_configuration = _apply_defaults_to_subtables(
configuration,
table="databases",
)
- return configuration
-
-
-load_dotenv()
-_configuration = load_configuration(Path(__file__).parent / "config.toml")
-
-DATABASE_CONFIGURATION = _configuration["databases"]
-DATABASE_CONFIGURATION["openml"]["username"] = os.environ.get(
- "OPENML_DATABASES_OPENML_USERNAME",
- "root",
-)
-DATABASE_CONFIGURATION["openml"]["password"] = os.environ.get(
- "OPENML_DATABASES_OPENML_PASSWORD",
- "ok",
-)
-DATABASE_CONFIGURATION["expdb"]["username"] = os.environ.get(
- "OPENML_DATABASES_EXPDB_USERNAME",
- "root",
-)
-DATABASE_CONFIGURATION["expdb"]["password"] = os.environ.get(
- "OPENML_DATABASES_EXPDB_PASSWORD",
- "ok",
-)
+ load_dotenv()
+ database_configuration["openml"]["username"] = os.environ.get(
+ "OPENML_DATABASES_OPENML_USERNAME",
+ "root",
+ )
+ database_configuration["openml"]["password"] = os.environ.get(
+ "OPENML_DATABASES_OPENML_PASSWORD",
+ "ok",
+ )
+ database_configuration["expdb"]["username"] = os.environ.get(
+ "OPENML_DATABASES_EXPDB_USERNAME",
+ "root",
+ )
+ database_configuration["expdb"]["password"] = os.environ.get(
+ "OPENML_DATABASES_EXPDB_PASSWORD",
+ "ok",
+ )
+ return database_configuration
diff --git a/src/database/datasets.py b/src/database/datasets.py
index c4a78f3c..1e534396 100644
--- a/src/database/datasets.py
+++ b/src/database/datasets.py
@@ -1,19 +1,20 @@
""" Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707"""
from typing import Any
-from config import DATABASE_CONFIGURATION
+from config import load_database_configuration
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
from database.meta import get_column_names
-expdb_url = URL.create(**DATABASE_CONFIGURATION["expdb"])
+_database_configuration = load_database_configuration()
+expdb_url = URL.create(**_database_configuration["expdb"])
expdb = create_engine(
expdb_url,
echo=True,
pool_recycle=3600,
)
-openml_url = URL.create(**DATABASE_CONFIGURATION["openml"])
+openml_url = URL.create(**_database_configuration["openml"])
openml = create_engine(
openml_url,
echo=True,
From d99170b3586b3e8906bcc8b505f5d12a114f09c6 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 16:08:58 +0200
Subject: [PATCH 6/7] Add minimal tests for configuration loading
---
tests/config_test.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/tests/config_test.py b/tests/config_test.py
index e69de29b..50bcc573 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -0,0 +1,28 @@
+import os
+from pathlib import Path
+
+from config import _apply_defaults_to_subtables, load_database_configuration
+
+
+def test_apply_defaults_to_subtables_applies_defaults() -> None:
+ input_ = {"foo": {"defaults": {1: 1}, "other": {}}}
+ expected = {"other": {1: 1}}
+ output = _apply_defaults_to_subtables(input_, table="foo")
+ assert expected == output
+
+
+def test_apply_defaults_to_subtables_does_not_override() -> None:
+ input_ = {"foo": {"defaults": {1: 1}, "other": {1: 2}}}
+ expected = {"other": {1: 2}}
+ output = _apply_defaults_to_subtables(input_, table="foo")
+ assert expected == output
+
+
+def test_load_configuration_adds_environment_variables(default_configuration_file: Path) -> None:
+ database_configuration = load_database_configuration(default_configuration_file)
+ assert database_configuration["openml"]["username"] == "root"
+
+ load_database_configuration.cache_clear()
+ os.environ["OPENML_DATABASES_OPENML_USERNAME"] = "foo"
+ database_configuration = load_database_configuration(default_configuration_file)
+ assert database_configuration["openml"]["username"] == "foo"
From edfabae928631c4ae8d80d8ea0f2ce32af92e346 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Oct 2023 16:30:59 +0200
Subject: [PATCH 7/7] Refactor to work with sibligns to make function more
intuitive
---
src/config.py | 13 ++++++-------
tests/config_test.py | 21 ++++++++++++++-------
2 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/src/config.py b/src/config.py
index 7f7f1f01..ae425f96 100644
--- a/src/config.py
+++ b/src/config.py
@@ -9,11 +9,11 @@
TomlTable = dict[str, typing.Any]
-def _apply_defaults_to_subtables(configuration: TomlTable, table: str) -> TomlTable:
- defaults = configuration[table]["defaults"]
+def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
+ defaults = configuration["defaults"]
return {
- subtable: defaults | overrides
- for subtable, overrides in configuration[table].items()
+ subtable: (defaults | overrides) if isinstance(overrides, dict) else overrides
+ for subtable, overrides in configuration.items()
if subtable != "defaults"
}
@@ -22,9 +22,8 @@ def _apply_defaults_to_subtables(configuration: TomlTable, table: str) -> TomlTa
def load_database_configuration(file: Path = Path(__file__).parent / "config.toml") -> TomlTable:
configuration = tomllib.loads(file.read_text())
- database_configuration = _apply_defaults_to_subtables(
- configuration,
- table="databases",
+ database_configuration = _apply_defaults_to_siblings(
+ configuration["databases"],
)
load_dotenv()
database_configuration["openml"]["username"] = os.environ.get(
diff --git a/tests/config_test.py b/tests/config_test.py
index 50bcc573..412606d1 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -1,20 +1,27 @@
import os
from pathlib import Path
-from config import _apply_defaults_to_subtables, load_database_configuration
+from config import _apply_defaults_to_siblings, load_database_configuration
-def test_apply_defaults_to_subtables_applies_defaults() -> None:
- input_ = {"foo": {"defaults": {1: 1}, "other": {}}}
+def test_apply_defaults_to_siblings_applies_defaults() -> None:
+ input_ = {"defaults": {1: 1}, "other": {}}
expected = {"other": {1: 1}}
- output = _apply_defaults_to_subtables(input_, table="foo")
+ output = _apply_defaults_to_siblings(input_)
assert expected == output
-def test_apply_defaults_to_subtables_does_not_override() -> None:
- input_ = {"foo": {"defaults": {1: 1}, "other": {1: 2}}}
+def test_apply_defaults_to_siblings_does_not_override() -> None:
+ input_ = {"defaults": {1: 1}, "other": {1: 2}}
expected = {"other": {1: 2}}
- output = _apply_defaults_to_subtables(input_, table="foo")
+ output = _apply_defaults_to_siblings(input_)
+ assert expected == output
+
+
+def test_apply_defaults_to_siblings_ignores_nontables() -> None:
+ input_ = {"defaults": {1: 1}, "other": {1: 2}, "not-a-table": 3}
+ expected = {"other": {1: 2}, "not-a-table": 3}
+ output = _apply_defaults_to_siblings(input_)
assert expected == output