From 83e1531850e763feddb526f2749b5691d48bd015 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 20 Jan 2026 12:35:18 +0100 Subject: [PATCH 01/67] Use the correct path to the cache directory for the task --- openml/tasks/functions.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index 3df2861c0..2bf1a40f4 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -415,9 +415,10 @@ def get_task( if not isinstance(task_id, int): raise TypeError(f"Task id should be integer, is {type(task_id)}") - cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) - tid_cache_dir = cache_key_dir / str(task_id) - tid_cache_dir_existed = tid_cache_dir.exists() + task_cache_directory = openml.utils._create_cache_directory_for_id( + TASKS_CACHE_DIR_NAME, task_id + ) + task_cache_directory_existed = task_cache_directory.exists() try: task = _get_task_description(task_id) dataset = get_dataset(task.dataset_id, **get_dataset_kwargs) @@ -431,8 +432,8 @@ def get_task( if download_splits and isinstance(task, OpenMLSupervisedTask): task.download_split() except Exception as e: - if not tid_cache_dir_existed: - openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir) + if not task_cache_directory_existed: + openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory) raise e return task From f90036debbf81fc3fd6452263d9b80e786ac2806 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 21 Jan 2026 16:50:09 +0100 Subject: [PATCH 02/67] Push configuration of test server URL exclusively to config.py --- openml/cli.py | 2 +- openml/config.py | 4 +++- openml/testing.py | 2 +- tests/conftest.py | 2 +- tests/test_datasets/test_dataset_functions.py | 14 +++++--------- tests/test_flows/test_flow_functions.py | 15 +++++---------- tests/test_openml/test_config.py | 2 +- tests/test_runs/test_run_functions.py | 9 +++------ tests/test_tasks/test_task_functions.py | 12 ++++-------- 9 files changed, 24 insertions(+), 38 deletions(-) diff --git a/openml/cli.py b/openml/cli.py index 0afb089c2..18192a7db 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -109,7 +109,7 @@ def check_server(server: str) -> str: def replace_shorthand(server: str) -> str: if server == "test": - return "https://test.openml.org/api/v1/xml" + return f"{config.TEST_SERVER_URL}/api/v1/xml" if server == "production": return "https://www.openml.org/api/v1/xml" return server diff --git a/openml/config.py b/openml/config.py index e6104fd7f..5b2d69067 100644 --- a/openml/config.py +++ b/openml/config.py @@ -27,6 +27,8 @@ OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" _TEST_SERVER_NORMAL_USER_KEY = "normaluser" +TEST_SERVER_URL = "https://test.openml.org" + class _Config(TypedDict): apikey: str @@ -213,7 +215,7 @@ class ConfigurationForExamples: _last_used_server = None _last_used_key = None _start_last_called = False - _test_server = "https://test.openml.org/api/v1/xml" + _test_server = f"{TEST_SERVER_URL}/api/v1/xml" _test_apikey = _TEST_SERVER_NORMAL_USER_KEY @classmethod diff --git a/openml/testing.py b/openml/testing.py index 8d3bbbd5b..9ee555a91 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -47,7 +47,7 @@ class TestBase(unittest.TestCase): "user": [], } flow_name_tracker: ClassVar[list[str]] = [] - test_server = "https://test.openml.org/api/v1/xml" + test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" admin_key = "abc" user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..29366ce37 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -277,7 +277,7 @@ def with_server(request): openml.config.apikey = None yield return - openml.config.server = "https://test.openml.org/api/v1/xml" + openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" openml.config.apikey = TestBase.user_key yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index c41664ba7..74faa73ea 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1730,7 +1730,6 @@ def test_delete_dataset(self): @mock.patch.object(requests.Session, "delete") def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = ( test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml" ) @@ -1745,14 +1744,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke ): openml.datasets.delete_dataset(40_000) - dataset_url = "https://test.openml.org/api/v1/xml/data/40000" + dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000" assert dataset_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = ( test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml" ) @@ -1767,14 +1765,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key ): openml.datasets.delete_dataset(40_000) - dataset_url = "https://test.openml.org/api/v1/xml/data/40000" + dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000" assert dataset_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = ( test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml" ) @@ -1786,14 +1783,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key) success = openml.datasets.delete_dataset(40000) assert success - dataset_url = "https://test.openml.org/api/v1/xml/data/40000" + dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000" assert dataset_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = ( test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml" ) @@ -1808,7 +1804,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key) ): openml.datasets.delete_dataset(9_999_999) - dataset_url = "https://test.openml.org/api/v1/xml/data/9999999" + dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999" assert dataset_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @@ -2010,7 +2006,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory): test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml" ) # While the mocked example is from production, unit tests by default connect to the test server. - requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text()) + requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text()) dataset = openml.datasets.get_dataset(61, download_data=True) assert dataset._parquet_url is not None assert dataset.parquet_file is not None diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index 2339b27c8..790686d94 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -453,7 +453,6 @@ def test_delete_flow(self): @mock.patch.object(requests.Session, "delete") def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key): ): openml.flows.delete_flow(40_000) - flow_url = "https://test.openml.org/api/v1/xml/flow/40000" + flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000" assert flow_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key): ): openml.flows.delete_flow(40_000) - flow_url = "https://test.openml.org/api/v1/xml/flow/40000" + flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000" assert flow_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_subflow(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key): ): openml.flows.delete_flow(40_000) - flow_url = "https://test.openml.org/api/v1/xml/flow/40000" + flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000" assert flow_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_flow_success(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml" mock_delete.return_value = create_request_response( status_code=200, @@ -523,7 +519,7 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key): success = openml.flows.delete_flow(33364) assert success - flow_url = "https://test.openml.org/api/v1/xml/flow/33364" + flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364" assert flow_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @@ -531,7 +527,6 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key): @mock.patch.object(requests.Session, "delete") @pytest.mark.xfail(reason="failures_issue_1544", strict=False) def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key): ): openml.flows.delete_flow(9_999_999) - flow_url = "https://test.openml.org/api/v1/xml/flow/9999999" + flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999" assert flow_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 7ef223504..3ff7bd55e 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -78,7 +78,7 @@ def test_get_config_as_dict(self): config = openml.config.get_config_as_dict() _config = {} _config["apikey"] = TestBase.user_key - _config["server"] = "https://test.openml.org/api/v1/xml" + _config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml" _config["cachedir"] = self.workdir _config["avoid_duplicate_runs"] = False _config["connection_n_retries"] = 20 diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 8f2c505b7..b8bd6abd7 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1813,7 +1813,6 @@ def test_initialize_model_from_run_nonstrict(self): @mock.patch.object(requests.Session, "delete") def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -1826,14 +1825,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key): ): openml.runs.delete_run(40_000) - run_url = "https://test.openml.org/api/v1/xml/run/40000" + run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000" assert run_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_run_success(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml" mock_delete.return_value = create_request_response( status_code=200, @@ -1843,14 +1841,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key): success = openml.runs.delete_run(10591880) assert success - run_url = "https://test.openml.org/api/v1/xml/run/10591880" + run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880" assert run_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -1863,7 +1860,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key): ): openml.runs.delete_run(9_999_999) - run_url = "https://test.openml.org/api/v1/xml/run/9999999" + run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999" assert run_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index d44717177..af143a26b 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -244,7 +244,6 @@ def test_deletion_of_cache_dir(self): @mock.patch.object(requests.Session, "delete") def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_owned.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -257,14 +256,13 @@ def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key): ): openml.tasks.delete_task(1) - task_url = "https://test.openml.org/api/v1/xml/task/1" + task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/1" assert task_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_has_runs.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -277,14 +275,13 @@ def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key): ): openml.tasks.delete_task(3496) - task_url = "https://test.openml.org/api/v1/xml/task/3496" + task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/3496" assert task_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_success(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_successful.xml" mock_delete.return_value = create_request_response( status_code=200, @@ -294,14 +291,13 @@ def test_delete_success(mock_delete, test_files_directory, test_api_key): success = openml.tasks.delete_task(361323) assert success - task_url = "https://test.openml.org/api/v1/xml/task/361323" + task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/361323" assert task_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") @mock.patch.object(requests.Session, "delete") def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key): - openml.config.start_using_configuration_for_example() content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_exist.xml" mock_delete.return_value = create_request_response( status_code=412, @@ -314,6 +310,6 @@ def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key): ): openml.tasks.delete_task(9_999_999) - task_url = "https://test.openml.org/api/v1/xml/task/9999999" + task_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/task/9999999" assert task_url == mock_delete.call_args.args[0] assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key") From 3a257abea627f9a37d00feb7766cf1a49b82dbd5 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 28 Jan 2026 12:12:02 +0100 Subject: [PATCH 03/67] Update the test to use a dataset which does not have a parquet file Locally, MinIO already has more parquet files than on the test server. --- tests/test_datasets/test_dataset_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 74faa73ea..fe5939d7a 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -886,7 +886,7 @@ def test_create_invalid_dataset(self): @pytest.mark.uses_test_server() def test_get_online_dataset_arff(self): - dataset_id = 100 # Australian + dataset_id = 128 # iris -- one of the few datasets with parquet file # lazy loading not used as arff file is checked. dataset = openml.datasets.get_dataset(dataset_id, download_data=True) decoder = arff.ArffDecoder() From 3b79017a48da3ba9f002de813160ff60cb2159db Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 28 Jan 2026 12:32:30 +0100 Subject: [PATCH 04/67] Replace hard-coded cache directory by configured one --- tests/test_datasets/test_dataset_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index fe5939d7a..9df7e3879 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -527,7 +527,7 @@ def test_deletion_of_cache_dir(self): def test_deletion_of_cache_dir_faulty_download(self, patch): patch.side_effect = Exception("Boom!") self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1) - datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets") + datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets") assert len(os.listdir(datasets_cache_dir)) == 0 @pytest.mark.uses_test_server() From f524d756964ecb03e77f9e932022a446bd1c5a35 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 28 Jan 2026 16:04:26 +0100 Subject: [PATCH 05/67] Update test to use dataset file that is already in cache Note that the previously strategy didn't work anymore if the server returned a parquet file, which is the case for the new local setup. --- tests/test_datasets/test_dataset_functions.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 9df7e3879..27d3075fd 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -532,14 +532,7 @@ def test_deletion_of_cache_dir_faulty_download(self, patch): @pytest.mark.uses_test_server() def test_publish_dataset(self): - # lazy loading not possible as we need the arff-file. - openml.datasets.get_dataset(3, download_data=True) - file_path = os.path.join( - openml.config.get_cache_directory(), - "datasets", - "3", - "dataset.arff", - ) + arff_file_path = Path(__file__).parent.parent / "files" / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff" dataset = OpenMLDataset( "anneal", "test", @@ -547,7 +540,7 @@ def test_publish_dataset(self): version=1, licence="public", default_target_attribute="class", - data_file=file_path, + data_file=arff_file_path, ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.dataset_id) From 7ef12c25b8c83ff102fac9b2606e7386dbd57a11 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 29 Jan 2026 11:02:54 +0530 Subject: [PATCH 06/67] Windows test --- .github/workflows/test.yml | 10 ++++++- docker-compose.yml | 53 ++++++++++++++++++++++++++++++++++++++ docker/update.sh | 31 ++++++++++++++++++++++ pytest.ini | 4 +++ tests/conftest.py | 42 ++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 docker-compose.yml create mode 100644 docker/update.sh create mode 100644 pytest.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d65cc3796..c52486d0a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,7 +74,15 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install test dependencies and scikit-learn + - name: Checkout server-api and patch Docker path + if: runner.os == 'Linux' + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + + - name: Install test dependencies, scikit-learn, and optional pandas + shell: bash run: | python -m pip install --upgrade pip pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..20fcef863 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,53 @@ +services: + database: + image: "openml/test-database:20240105" + container_name: "openml-test-db-ci" + environment: + MYSQL_ROOT_PASSWORD: ok + ports: + - "33060:3306" + healthcheck: + test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] + start_period: 30s + interval: 5s + retries: 10 + + # SETUP WORKER + database-setup: + image: mysql + container_name: "openml-test-setup-ci" + volumes: + # You MUST save the update.sh content you shared earlier to this path + - ./docker/update.sh:/database-update.sh + command: /bin/sh -c "/database-update.sh" + depends_on: + database: + condition: service_healthy + + php-api: + image: "openml/php-rest-api:v1.2.2" + container_name: "openml-php-api-ci" + ports: + - "9002:80" + depends_on: + database: + condition: service_started + environment: + - DB_HOST_OPENML=database:3306 + - DB_HOST_EXPDB=database:3306 + - BASE_URL=http://localhost:9002/ + - INDEX_ES_DURING_STARTUP=false + + # V2 API (PYTHON) + python-api: + container_name: "openml-python-api-ci" + build: + # TODO: replace with image when available + context: ../server-api + dockerfile: docker/python/Dockerfile + ports: + - "9001:8000" + depends_on: + - database + environment: + - DATABASE_URL=mysql://root:ok@database:3306/openml \ No newline at end of file diff --git a/docker/update.sh b/docker/update.sh new file mode 100644 index 000000000..7e9864742 --- /dev/null +++ b/docker/update.sh @@ -0,0 +1,31 @@ +#/bin/bash +# Change the filepath of openml.file +# from "https://www.openml.org/data/download/1666876/phpFsFYVN" +# to "http://minio:9000/datasets/0000/0001/phpFsFYVN" +mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' + +# Update openml.expdb.dataset with the same url +mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' + + + + + +# Create the data_feature_description TABLE. TODO: can we make sure this table exists already? +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` ( + `did` int unsigned NOT NULL, + `index` int unsigned NOT NULL, + `uploader` mediumint unsigned NOT NULL, + `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `description_type` enum("plain", "ontology") NOT NULL, + `value` varchar(256) NOT NULL, + KEY `did` (`did`,`index`), + CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE +)' + +# SET dataset 1 to active (used in unittests java) +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)' +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";' + +# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing. +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..69fbd903f --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +env = + OPENML_SERVER = http://localhost:9001/api/v2 + OPENML_API_KEY = AD000000000000000000000000000000 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..890978558 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from __future__ import annotations import multiprocessing +import sys multiprocessing.set_start_method("spawn", force=True) @@ -35,6 +36,9 @@ import pytest import openml_sklearn +import time +import subprocess +import requests import openml from openml.testing import TestBase @@ -296,6 +300,44 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) +# This starts the entire stack once for the whole test run +@pytest.fixture(scope="session", autouse=True) +def openml_docker_stack(): + # if sys.platform == "win32": + # yield + # return + # 1. Start the containers defined in your final docker-compose.yml + subprocess.run(["docker", "compose", "up", "-d"], check=True) + + # 2. Wait for the database setup worker to finish its tasks + # This ensures update.sh has finished before we hit the APIs + subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) + + # 3. Quick health check: Wait for the Python API to respond on port 9001 + timeout = 30 + start = time.time() + while time.time() - start < timeout: + try: + if requests.get("http://localhost:9001/api/v2/").status_code == 200: + break + except requests.exceptions.ConnectionError: + time.sleep(1) + + yield # Tests run here + + # 4. Tear everything down after tests finish to keep the machine clean + subprocess.run(["docker", "compose", "down", "-v"], check=True) + +# This resets the database state before every single test to prevent race conditions +@pytest.fixture(scope="function", autouse=True) +def reset_db_state(): + # if sys.platform == "win32": + # yield + # return + # Fast restart of the database container to return to the 'baked-in' state + subprocess.run(["docker", "compose", "restart", "database"], check=True) + # Re-run the setup worker to ensure paths are still correct + subprocess.run(["docker", "compose", "up", "database-setup"], check=True) @pytest.fixture def static_cache_dir(): From a5601e3dc849ac4c8759c14292960d624d774ff0 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 29 Jan 2026 10:05:28 +0100 Subject: [PATCH 07/67] relax assumptions on local file structure --- tests/test_datasets/test_dataset_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 27d3075fd..49b13e4b8 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1457,8 +1457,9 @@ def test_data_edit_critical_field(self): raise e time.sleep(10) # Delete the cache dir to get the newer version of the dataset + shutil.rmtree( - os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)), + os.path.join(openml.config.get_cache_directory(), "datasets", str(did)), ) @pytest.mark.uses_test_server() @@ -1892,9 +1893,8 @@ def _dataset_features_is_downloaded(did: int): def _dataset_data_file_is_downloaded(did: int): - parquet_present = _dataset_file_is_downloaded(did, "dataset.pq") - arff_present = _dataset_file_is_downloaded(did, "dataset.arff") - return parquet_present or arff_present + cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did) + return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir()) def _assert_datasets_retrieved_successfully( From d862be2de5ddc4d551efad22dff1fdefb7db3854 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 29 Jan 2026 10:47:51 +0100 Subject: [PATCH 08/67] Do not use static cache directory --- tests/test_tasks/test_task_functions.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index af143a26b..08811add5 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -156,13 +156,13 @@ def test_get_task(self): task = openml.tasks.get_task(1, download_data=True) # anneal; crossvalidation assert isinstance(task, OpenMLTask) assert os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "task.xml") + os.path.join(openml.config.get_cache_directory(), "tasks", "1", "task.xml") ) assert not os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff") + os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff") ) assert os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff") + os.path.join(openml.config.get_cache_directory(), "datasets", "1", "dataset_1.pq") ) @pytest.mark.uses_test_server() @@ -170,21 +170,21 @@ def test_get_task_lazy(self): task = openml.tasks.get_task(2, download_data=False) # anneal; crossvalidation assert isinstance(task, OpenMLTask) assert os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "task.xml") + os.path.join(openml.config.get_cache_directory(), "tasks", "2", "task.xml") ) assert task.class_labels == ["1", "2", "3", "4", "5", "U"] assert not os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "datasplits.arff") + os.path.join(openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff") ) # Since the download_data=False is propagated to get_dataset assert not os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "datasets", "2", "dataset.arff") + os.path.join(openml.config.get_cache_directory(), "datasets", "2", "dataset.arff") ) task.download_split() assert os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "datasplits.arff") + os.path.join(openml.config.get_cache_directory(), "tasks", "2", "datasplits.arff") ) @mock.patch("openml.tasks.functions.get_dataset") @@ -228,7 +228,7 @@ def test_download_split(self): split = task.download_split() assert type(split) == OpenMLSplit assert os.path.exists( - os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff") + os.path.join(openml.config.get_cache_directory(), "tasks", "1", "datasplits.arff") ) def test_deletion_of_cache_dir(self): From 16699e6871f6b242fbd4fae1e2893dc78930bf1e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 29 Jan 2026 11:18:40 +0100 Subject: [PATCH 09/67] Update expected number to match initial server state This means it is not reliant on the evaluation engine processing the dataset. Interestingly, the database state purposely seems to keep the last task's dataset in preparation explicitly (by having processing marked as done but having to dataset_status entry). --- tests/test_tasks/test_task_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 08811add5..6951bf36f 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -96,7 +96,9 @@ def test_list_tasks_empty(self): @pytest.mark.uses_test_server() def test_list_tasks_by_tag(self): - num_basic_tasks = 100 # number is flexible, check server if fails + # Server starts with 99 active tasks with the tag, and one 'in_preparation', + # so depending on the processing of the last dataset, there may be 99 or 100 matches. + num_basic_tasks = 99 tasks = openml.tasks.list_tasks(tag="OpenML100") assert len(tasks) >= num_basic_tasks for task in tasks.to_dict(orient="index").values(): From 7c14c684d35eb409562b590fd225a315f7108ce0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:35:22 +0530 Subject: [PATCH 10/67] bug fixing --- .github/workflows/test.yml | 2 +- pyproject.toml | 15 --------------- pytest.ini | 14 ++++++++++++++ tests/conftest.py | 16 ++++------------ tests/test_1.py | 14 ++++++++++++++ 5 files changed, 33 insertions(+), 28 deletions(-) create mode 100644 tests/test_1.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c52486d0a..c2b05a6be 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -75,7 +75,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Checkout server-api and patch Docker path - if: runner.os == 'Linux' + # if: matrix.os == 'Linux' shell: bash run: | git clone --depth 1 https://github.com/openml/server-api.git server-api diff --git a/pyproject.toml b/pyproject.toml index 93a6ffbfa..0627d0901 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,21 +124,6 @@ openml = ["*.txt", "*.md", "py.typed"] [tool.setuptools.dynamic] version = {attr = "openml.__version__.__version__"} -# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref -[tool.pytest.ini_options] -testpaths = ["tests"] -minversion = "7.0" -xfail_strict = true -filterwarnings=[ - "ignore:the matrix subclass:PendingDeprecationWarning" -] -markers = [ - "server: anything that connects to a server", - "upload: anything that uploads to a server", - "production: any interaction with the production server", - "cache: anything that interacts with the (test) cache", -] - # https://github.com/charliermarsh/ruff [tool.ruff] target-version = "py310" diff --git a/pytest.ini b/pytest.ini index 69fbd903f..12d9fe136 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,18 @@ [pytest] +minversion = 7.0 +testpaths = tests +xfail_strict = true + +filterwarnings = + ignore:the matrix subclass:PendingDeprecationWarning + +markers = + server: anything that connects to a server + upload: anything that uploads to a server + production: any interaction with the production server + cache: anything that interacts with the (test) cache + uses_test_server: tests that use the local docker stack + env = OPENML_SERVER = http://localhost:9001/api/v2 OPENML_API_KEY = AD000000000000000000000000000000 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 890978558..7ea9257f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -306,37 +306,29 @@ def openml_docker_stack(): # if sys.platform == "win32": # yield # return - # 1. Start the containers defined in your final docker-compose.yml subprocess.run(["docker", "compose", "up", "-d"], check=True) - - # 2. Wait for the database setup worker to finish its tasks - # This ensures update.sh has finished before we hit the APIs subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) - # 3. Quick health check: Wait for the Python API to respond on port 9001 - timeout = 30 + timeout = 10 start = time.time() while time.time() - start < timeout: try: - if requests.get("http://localhost:9001/api/v2/").status_code == 200: + response = requests.get("http://localhost:9001/api/v2/") + if response.status_code in [200, 404, 405]: break except requests.exceptions.ConnectionError: time.sleep(1) - yield # Tests run here + yield - # 4. Tear everything down after tests finish to keep the machine clean subprocess.run(["docker", "compose", "down", "-v"], check=True) -# This resets the database state before every single test to prevent race conditions @pytest.fixture(scope="function", autouse=True) def reset_db_state(): # if sys.platform == "win32": # yield # return - # Fast restart of the database container to return to the 'baked-in' state subprocess.run(["docker", "compose", "restart", "database"], check=True) - # Re-run the setup worker to ensure paths are still correct subprocess.run(["docker", "compose", "up", "database-setup"], check=True) @pytest.fixture diff --git a/tests/test_1.py b/tests/test_1.py new file mode 100644 index 000000000..169ebbd03 --- /dev/null +++ b/tests/test_1.py @@ -0,0 +1,14 @@ +import pytest +import requests + +# Requesting the 'openml_docker_stack' fixture forces it to run! +def test_can_connect_to_local_docker(openml_docker_stack): + print("\n🐳 Docker Stack is UP! Checking connection...") + + # Try to talk to the V2 API we just built + response = requests.get("http://localhost:9001/api/v2") + + # If we get a 200 OK or 404 (Not Found), the server is alive. + # If it fails, this line will crash the test. + assert response.status_code in [200, 404] + print("āœ… Successfully connected to Local V2 API on port 9001") \ No newline at end of file From 16ceeaab9f2cb65eb9a9025704c4e31204a6fb57 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:06:38 +0530 Subject: [PATCH 11/67] remove db refresh every test --- .github/workflows/test.yml | 1 - tests/conftest.py | 8 -------- tests/test_1.py | 6 ++---- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 65ebcbe4a..228500278 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -83,7 +83,6 @@ jobs: python-version: ${{ matrix.python-version }} - name: Checkout server-api and patch Docker path - # if: matrix.os == 'Linux' shell: bash run: | git clone --depth 1 https://github.com/openml/server-api.git server-api diff --git a/tests/conftest.py b/tests/conftest.py index 7ea9257f6..e9bb08013 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -323,14 +323,6 @@ def openml_docker_stack(): subprocess.run(["docker", "compose", "down", "-v"], check=True) -@pytest.fixture(scope="function", autouse=True) -def reset_db_state(): - # if sys.platform == "win32": - # yield - # return - subprocess.run(["docker", "compose", "restart", "database"], check=True) - subprocess.run(["docker", "compose", "up", "database-setup"], check=True) - @pytest.fixture def static_cache_dir(): return Path(__file__).parent / "files" diff --git a/tests/test_1.py b/tests/test_1.py index 169ebbd03..318fa83c1 100644 --- a/tests/test_1.py +++ b/tests/test_1.py @@ -3,12 +3,10 @@ # Requesting the 'openml_docker_stack' fixture forces it to run! def test_can_connect_to_local_docker(openml_docker_stack): - print("\n🐳 Docker Stack is UP! Checking connection...") # Try to talk to the V2 API we just built - response = requests.get("http://localhost:9001/api/v2") + response = requests.get("http://localhost:9001/docs") # If we get a 200 OK or 404 (Not Found), the server is alive. # If it fails, this line will crash the test. - assert response.status_code in [200, 404] - print("āœ… Successfully connected to Local V2 API on port 9001") \ No newline at end of file + assert response.status_code in [200] From 015acf46330c5604824b30d9c28a0538a54dd120 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:18:32 +0530 Subject: [PATCH 12/67] bug fixing --- .github/workflows/test.yml | 8 ++++---- pyproject.toml | 19 +++++++++++++++++++ pytest.ini | 18 ------------------ tests/conftest.py | 9 ++------- 4 files changed, 25 insertions(+), 29 deletions(-) delete mode 100644 pytest.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 228500278..686440234 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -83,10 +83,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Checkout server-api and patch Docker path - shell: bash - run: | - git clone --depth 1 https://github.com/openml/server-api.git server-api - sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml - name: Install test dependencies, scikit-learn, and optional pandas shell: bash diff --git a/pyproject.toml b/pyproject.toml index 0627d0901..6165f9497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,6 +124,25 @@ openml = ["*.txt", "*.md", "py.typed"] [tool.setuptools.dynamic] version = {attr = "openml.__version__.__version__"} +# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref +[tool.pytest.ini_options] +testpaths = ["tests"] +minversion = "7.0" +xfail_strict = true +filterwarnings=[ + "ignore:the matrix subclass:PendingDeprecationWarning" +] +markers = [ + "server: anything that connects to a server", + "upload: anything that uploads to a server", + "production: any interaction with the production server", + "cache: anything that interacts with the (test) cache", +] +env = [ + "OPENML_SERVER=http://localhost:9001/api/v2", + "OPENML_API_KEY=AD000000000000000000000000000000", +] + # https://github.com/charliermarsh/ruff [tool.ruff] target-version = "py310" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 12d9fe136..000000000 --- a/pytest.ini +++ /dev/null @@ -1,18 +0,0 @@ -[pytest] -minversion = 7.0 -testpaths = tests -xfail_strict = true - -filterwarnings = - ignore:the matrix subclass:PendingDeprecationWarning - -markers = - server: anything that connects to a server - upload: anything that uploads to a server - production: any interaction with the production server - cache: anything that interacts with the (test) cache - uses_test_server: tests that use the local docker stack - -env = - OPENML_SERVER = http://localhost:9001/api/v2 - OPENML_API_KEY = AD000000000000000000000000000000 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index e9bb08013..a2c29a6ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -300,12 +300,8 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) -# This starts the entire stack once for the whole test run @pytest.fixture(scope="session", autouse=True) def openml_docker_stack(): - # if sys.platform == "win32": - # yield - # return subprocess.run(["docker", "compose", "up", "-d"], check=True) subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) @@ -313,9 +309,8 @@ def openml_docker_stack(): start = time.time() while time.time() - start < timeout: try: - response = requests.get("http://localhost:9001/api/v2/") - if response.status_code in [200, 404, 405]: - break + requests.get("http://localhost:9001/api/v2/") + break except requests.exceptions.ConnectionError: time.sleep(1) From 937fc770adf8a618851e7cc602b2a87e23f504fe Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:50:32 +0530 Subject: [PATCH 13/67] bug fixing --- .github/workflows/test.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 686440234..107494bf0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -82,12 +82,6 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Checkout server-api and patch Docker path - shell: bash - run: | - git clone --depth 1 https://github.com/openml/server-api.git server-api - sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml - - name: Install test dependencies, scikit-learn, and optional pandas shell: bash run: | @@ -107,6 +101,12 @@ jobs: echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" + - name: Checkout server-api and patch Docker path + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + - name: Show installed dependencies run: python -m pip list @@ -145,6 +145,13 @@ jobs: run: | # we need a separate step because of the bash-specific if-statement in the previous one. pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + - name: Cleanup Docker setup + if: always() + shell: bash + run: | + rm -rf server-api + git checkout docker-compose.yml + - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() run: | From 30972f8d7c7249f64fc605a17ca006351a1d6149 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:53:36 +0530 Subject: [PATCH 14/67] bug fixing --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 107494bf0..f3d16aeeb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -146,10 +146,10 @@ jobs: pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup - if: always() - shell: bash - run: | - rm -rf server-api + if: always() + shell: bash + run: | + rm -rf server-api git checkout docker-compose.yml - name: Check for files left behind by test From 775dcf722f95aa0f78b4dbef16fe8177cec2a6f0 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 30 Jan 2026 10:30:18 +0100 Subject: [PATCH 15/67] Add symlink to regular test cache directory --- tests/files/localhost:8080 | 1 + 1 file changed, 1 insertion(+) create mode 120000 tests/files/localhost:8080 diff --git a/tests/files/localhost:8080 b/tests/files/localhost:8080 new file mode 120000 index 000000000..5a469fa32 --- /dev/null +++ b/tests/files/localhost:8080 @@ -0,0 +1 @@ +/Users/pietergijsbers/repositories/openml-python/tests/files/org/openml/test \ No newline at end of file From 319cb355c7b4488f83e223e3a9b0d9d20e080771 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 30 Jan 2026 10:47:29 +0100 Subject: [PATCH 16/67] Skip test for 1.8 since expected results differ too much --- tests/test_runs/test_run_functions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index b8bd6abd7..dda940e4d 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1870,6 +1870,10 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key): Version(sklearn.__version__) < Version("0.21"), reason="couldn't perform local tests successfully w/o bloating RAM", ) +@unittest.skipIf( + Version(sklearn.__version__) >= Version("1.8"), + reason="predictions differ significantly", + ) @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs") @pytest.mark.uses_test_server() def test__run_task_get_arffcontent_2(parallel_mock): From a680ebe1648ec2bd549259eab164c62e66bb7151 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 30 Jan 2026 11:08:50 +0100 Subject: [PATCH 17/67] Simplify path to static cache directory --- tests/test_datasets/test_dataset_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 49b13e4b8..2654721bd 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -532,7 +532,7 @@ def test_deletion_of_cache_dir_faulty_download(self, patch): @pytest.mark.uses_test_server() def test_publish_dataset(self): - arff_file_path = Path(__file__).parent.parent / "files" / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff" + arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff" dataset = OpenMLDataset( "anneal", "test", From b161b3b8ce5d92d31f4564ae60cb836ae5793d57 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 30 Jan 2026 11:26:24 +0100 Subject: [PATCH 18/67] Update symbolic link to be relative --- tests/files/localhost:8080 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/files/localhost:8080 b/tests/files/localhost:8080 index 5a469fa32..334c709ef 120000 --- a/tests/files/localhost:8080 +++ b/tests/files/localhost:8080 @@ -1 +1 @@ -/Users/pietergijsbers/repositories/openml-python/tests/files/org/openml/test \ No newline at end of file +org/openml/test \ No newline at end of file From 0b989d151e45899c0cba0f7981938b293668ad82 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 30 Jan 2026 11:27:52 +0100 Subject: [PATCH 19/67] Fix typo --- tests/test_datasets/test_dataset_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 2654721bd..d8a9d80b9 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -879,7 +879,7 @@ def test_create_invalid_dataset(self): @pytest.mark.uses_test_server() def test_get_online_dataset_arff(self): - dataset_id = 128 # iris -- one of the few datasets with parquet file + dataset_id = 128 # iris -- one of the few datasets without parquet file # lazy loading not used as arff file is checked. dataset = openml.datasets.get_dataset(dataset_id, download_data=True) decoder = arff.ArffDecoder() From 892ea6c85ce7eecd5ae0541ad46b2a0f459786b5 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sat, 31 Jan 2026 20:41:45 +0530 Subject: [PATCH 20/67] trying ot fix multiple threads issue --- tests/conftest.py | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a2c29a6ad..262ba2ccb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,8 @@ import multiprocessing import sys +import fasteners + multiprocessing.set_start_method("spawn", force=True) from collections.abc import Iterator @@ -300,23 +302,38 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) -@pytest.fixture(scope="session", autouse=True) -def openml_docker_stack(): - subprocess.run(["docker", "compose", "up", "-d"], check=True) +def _is_server_responding(): + """Check if the Docker API is already listening.""" + try: + requests.get("http://localhost:9001/api/v2/", timeout=1) + return True + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + return False + +def _start_docker(): + """Logic to spin up the containers and wait for initialization.""" + subprocess.run(["docker", "compose", "up", "-d"], check=True, capture_output=True, text=True) subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) + +@pytest.fixture(scope="session", autouse=True) +def openml_docker_stack(tmp_path_factory, worker_id): + # For local development, single-process runs + if worker_id == "master": + _start_docker() + yield + subprocess.run(["docker", "compose", "down", "-v"], check=True) + return + + # Case 2: Running in CI with multiple workers (xdist) + root_tmp_dir = tmp_path_factory.getbasetemp().parent + lock_file = root_tmp_dir / "docker_setup.lock" - timeout = 10 - start = time.time() - while time.time() - start < timeout: - try: - requests.get("http://localhost:9001/api/v2/") - break - except requests.exceptions.ConnectionError: - time.sleep(1) - + lock = fasteners.InterProcessLock(str(lock_file)) + with lock: + if not _is_server_responding(): + _start_docker() + yield - - subprocess.run(["docker", "compose", "down", "-v"], check=True) @pytest.fixture def static_cache_dir(): From ae3befb71a66ec5db5ffda3473ef08e53ff62a81 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sat, 31 Jan 2026 20:42:35 +0530 Subject: [PATCH 21/67] removed test file --- tests/test_1.py | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 tests/test_1.py diff --git a/tests/test_1.py b/tests/test_1.py deleted file mode 100644 index 318fa83c1..000000000 --- a/tests/test_1.py +++ /dev/null @@ -1,12 +0,0 @@ -import pytest -import requests - -# Requesting the 'openml_docker_stack' fixture forces it to run! -def test_can_connect_to_local_docker(openml_docker_stack): - - # Try to talk to the V2 API we just built - response = requests.get("http://localhost:9001/docs") - - # If we get a 200 OK or 404 (Not Found), the server is alive. - # If it fails, this line will crash the test. - assert response.status_code in [200] From 5f396a020e1c40a5e1814b2dd02f48f21200f969 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sat, 31 Jan 2026 21:20:51 +0530 Subject: [PATCH 22/67] removed unnecessary code (?) --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6165f9497..93a6ffbfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,10 +138,6 @@ markers = [ "production: any interaction with the production server", "cache: anything that interacts with the (test) cache", ] -env = [ - "OPENML_SERVER=http://localhost:9001/api/v2", - "OPENML_API_KEY=AD000000000000000000000000000000", -] # https://github.com/charliermarsh/ruff [tool.ruff] From 8a319cd6c057ad27084ab90099ac526913fa3b05 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sat, 31 Jan 2026 21:26:54 +0530 Subject: [PATCH 23/67] Trigger Build From 4ba4239242d40b916843a10aa298a9fa1c97c55b Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sun, 1 Feb 2026 17:18:00 +0530 Subject: [PATCH 24/67] Clean up code --- docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 20fcef863..2db258741 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,18 +12,17 @@ services: interval: 5s retries: 10 - # SETUP WORKER database-setup: image: mysql container_name: "openml-test-setup-ci" volumes: - # You MUST save the update.sh content you shared earlier to this path - ./docker/update.sh:/database-update.sh command: /bin/sh -c "/database-update.sh" depends_on: database: condition: service_healthy +# V1 API (PHP) php-api: image: "openml/php-rest-api:v1.2.2" container_name: "openml-php-api-ci" From 02924041dbbe65dbf1068189e109c0839539e531 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sun, 1 Feb 2026 17:30:22 +0530 Subject: [PATCH 25/67] comment fixing --- tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 262ba2ccb..25adf5d53 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -317,14 +317,14 @@ def _start_docker(): @pytest.fixture(scope="session", autouse=True) def openml_docker_stack(tmp_path_factory, worker_id): - # For local development, single-process runs + # For local development with single worker if worker_id == "master": _start_docker() yield subprocess.run(["docker", "compose", "down", "-v"], check=True) return - # Case 2: Running in CI with multiple workers (xdist) + # For CI with multiple workers (xdist) root_tmp_dir = tmp_path_factory.getbasetemp().parent lock_file = root_tmp_dir / "docker_setup.lock" From a7b5d767714da63f87e652d824dc8cecf0df49f0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sun, 1 Feb 2026 21:26:02 +0530 Subject: [PATCH 26/67] attempted bug fixing --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f3d16aeeb..d27f861e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -143,7 +143,7 @@ jobs: - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + pytest -n auto --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup if: always() From 9b0f3d71f4d87921f666ef48e4b404d874cd0b02 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sun, 1 Feb 2026 21:40:50 +0530 Subject: [PATCH 27/67] attempted bug fixing --- .github/workflows/test.yml | 2 +- tests/conftest.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d27f861e7..f3d16aeeb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -143,7 +143,7 @@ jobs: - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n auto --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup if: always() diff --git a/tests/conftest.py b/tests/conftest.py index 25adf5d53..e203cbd1e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -330,10 +330,14 @@ def openml_docker_stack(tmp_path_factory, worker_id): lock = fasteners.InterProcessLock(str(lock_file)) with lock: - if not _is_server_responding(): - _start_docker() - - yield + import socket + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + result = sock.connect_ex(('localhost', 33060)) + is_port_open = (result == 0) + sock.close() + + if not is_port_open: + _start_docker() @pytest.fixture def static_cache_dir(): From 630f240f36477932a647c261e6d2854b35876671 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Sun, 1 Feb 2026 21:50:19 +0530 Subject: [PATCH 28/67] attempted bug fixing --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index e203cbd1e..a1f542a07 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -338,6 +338,7 @@ def openml_docker_stack(tmp_path_factory, worker_id): if not is_port_open: _start_docker() + yield @pytest.fixture def static_cache_dir(): From c61d4109cadc4e4fd19d61df347e57cb25f501c1 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:06:11 +0530 Subject: [PATCH 29/67] attempted bug fixing reverts --- tests/conftest.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a1f542a07..25adf5d53 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -330,14 +330,9 @@ def openml_docker_stack(tmp_path_factory, worker_id): lock = fasteners.InterProcessLock(str(lock_file)) with lock: - import socket - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - result = sock.connect_ex(('localhost', 33060)) - is_port_open = (result == 0) - sock.close() - - if not is_port_open: - _start_docker() + if not _is_server_responding(): + _start_docker() + yield @pytest.fixture From 1ab42b7f6ce6b43fa0e6af3ff9d133ad4e495e80 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:36:35 +0530 Subject: [PATCH 30/67] disabling parallel runs --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f3d16aeeb..8177e53db 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -123,7 +123,7 @@ jobs: marks="not production and not uses_test_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -138,12 +138,12 @@ jobs: marks="production and not uses_test_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + pytest --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup if: always() From 06405c8e8b4b7170b793ea64014b0e3f504dbded Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 2 Feb 2026 13:37:17 +0530 Subject: [PATCH 31/67] disabling parallel runs --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8177e53db..4b34e74f4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -123,7 +123,7 @@ jobs: marks="not production and not uses_test_server" fi - pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -138,12 +138,12 @@ jobs: marks="production and not uses_test_server" fi - pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + pytest -n 0 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup if: always() From e22b7ca82bbc1443dc011cde714eda0de3ae3467 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 2 Feb 2026 17:17:18 +0530 Subject: [PATCH 32/67] disabling windows CI --- .github/workflows/test.yml | 6 +++--- tests/conftest.py | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4b34e74f4..f3d16aeeb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -123,7 +123,7 @@ jobs: marks="not production and not uses_test_server" fi - pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -138,12 +138,12 @@ jobs: marks="production and not uses_test_server" fi - pytest -n 0 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 0 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup if: always() diff --git a/tests/conftest.py b/tests/conftest.py index 25adf5d53..c1420527d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -317,6 +317,14 @@ def _start_docker(): @pytest.fixture(scope="session", autouse=True) def openml_docker_stack(tmp_path_factory, worker_id): + # Skip Docker setup in CI on Windows given docker images are for Linux + is_ci = os.environ.get("CI") == "true" + is_windows = sys.platform == "win32" or os.name == "nt" + + if is_ci and is_windows: + yield + return + # For local development with single worker if worker_id == "master": _start_docker() From 1b00a7fb35ca57b4ff14a865865983aa336b790e Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 6 Feb 2026 14:30:58 +0530 Subject: [PATCH 33/67] removed docker from pytest default --- .github/workflows/test.yml | 9 ++++++ docker-compose.yml | 5 ++++ tests/conftest.py | 56 ++++++++------------------------------ 3 files changed, 25 insertions(+), 45 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f3d16aeeb..a62562b52 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -107,6 +107,15 @@ jobs: git clone --depth 1 https://github.com/openml/server-api.git server-api sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + - name: Start Docker Test Environment + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + sed -i 's/\r$//' docker/update.sh + docker compose up -d + docker wait openml-test-setup-ci + echo "OPENML_TEST_SERVER=local" >> $GITHUB_ENV + - name: Show installed dependencies run: python -m pip list diff --git a/docker-compose.yml b/docker-compose.yml index 2db258741..4122f0e18 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,11 @@ services: start_period: 30s interval: 5s retries: 10 + networks: + default: + aliases: + - openml-test-database + - elasticsearch database-setup: image: mysql diff --git a/tests/conftest.py b/tests/conftest.py index c1420527d..a64e6d2d0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,9 +24,6 @@ from __future__ import annotations import multiprocessing -import sys - -import fasteners multiprocessing.set_start_method("spawn", force=True) @@ -38,9 +35,6 @@ import pytest import openml_sklearn -import time -import subprocess -import requests import openml from openml.testing import TestBase @@ -302,46 +296,18 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) -def _is_server_responding(): - """Check if the Docker API is already listening.""" - try: - requests.get("http://localhost:9001/api/v2/", timeout=1) - return True - except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): - return False - -def _start_docker(): - """Logic to spin up the containers and wait for initialization.""" - subprocess.run(["docker", "compose", "up", "-d"], check=True, capture_output=True, text=True) - subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) - -@pytest.fixture(scope="session", autouse=True) -def openml_docker_stack(tmp_path_factory, worker_id): - # Skip Docker setup in CI on Windows given docker images are for Linux - is_ci = os.environ.get("CI") == "true" - is_windows = sys.platform == "win32" or os.name == "nt" - - if is_ci and is_windows: - yield - return - - # For local development with single worker - if worker_id == "master": - _start_docker() - yield - subprocess.run(["docker", "compose", "down", "-v"], check=True) - return - - # For CI with multiple workers (xdist) - root_tmp_dir = tmp_path_factory.getbasetemp().parent - lock_file = root_tmp_dir / "docker_setup.lock" +@pytest.fixture(scope="session") +def openml_test_config(): + """ + Returns the URL for the test server. + """ + if os.environ.get("OPENML_TEST_SERVER") == "local": + return { + "v1": "http://localhost:9002/api/v1/", + "v2": "http://localhost:9001/" + } - lock = fasteners.InterProcessLock(str(lock_file)) - with lock: - if not _is_server_responding(): - _start_docker() - - yield + raise ValueError("Use the environment variable OPENML_TEST_SERVER=local before running docker to run tests against a local OpenML server.") @pytest.fixture def static_cache_dir(): From cc6e673852c06fd4e00afee0198046a9bfb58c89 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 6 Feb 2026 16:00:31 +0530 Subject: [PATCH 34/67] change mysql port --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4122f0e18..a47a10106 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ services: environment: MYSQL_ROOT_PASSWORD: ok ports: - - "33060:3306" + - "33069:3306" healthcheck: test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] start_period: 30s From c1bf5589a92358d78eed01dfcb8568e534875636 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 6 Feb 2026 16:40:09 +0530 Subject: [PATCH 35/67] Change order of ci flow --- .github/workflows/test.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a62562b52..2a1f4e9ae 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -154,6 +154,15 @@ jobs: run: | # we need a separate step because of the bash-specific if-statement in the previous one. pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + - name: Upload coverage + if: matrix.code-cov && always() + uses: codecov/codecov-action@v4 + with: + files: coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: true + verbose: true + - name: Cleanup Docker setup if: always() shell: bash @@ -173,15 +182,6 @@ jobs: exit 1 fi - - name: Upload coverage - if: matrix.code-cov && always() - uses: codecov/codecov-action@v4 - with: - files: coverage.xml - token: ${{ secrets.CODECOV_TOKEN }} - fail_ci_if_error: true - verbose: true - dummy_windows_py_sk024: name: (windows-latest, Py, sk0.24.*, sk-only:false) runs-on: ubuntu-latest From 1a794feb545caec924be3bee062a9d123cafa02a Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:26:16 +0530 Subject: [PATCH 36/67] CI testing --- .github/workflows/test.yml | 17 ++++-------- docker-compose.yml | 57 -------------------------------------- docker/update.sh | 31 --------------------- tests/conftest.py | 13 --------- 4 files changed, 6 insertions(+), 112 deletions(-) delete mode 100644 docker-compose.yml delete mode 100644 docker/update.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a1f4e9ae..30b36a0bf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,20 +101,15 @@ jobs: echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" - - name: Checkout server-api and patch Docker path - shell: bash - run: | - git clone --depth 1 https://github.com/openml/server-api.git server-api - sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + - name: Clone Services + run: git clone --depth 1 https://github.com/openml/services.git - - name: Start Docker Test Environment - if: matrix.os == 'ubuntu-latest' - shell: bash + - name: Start Docker Services + working-directory: ./services run: | - sed -i 's/\r$//' docker/update.sh - docker compose up -d + sudo systemctl stop mysql.service + docker compose --profile rest-api --profile minio --profile evaluation-engine up -d docker wait openml-test-setup-ci - echo "OPENML_TEST_SERVER=local" >> $GITHUB_ENV - name: Show installed dependencies run: python -m pip list diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index a47a10106..000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,57 +0,0 @@ -services: - database: - image: "openml/test-database:20240105" - container_name: "openml-test-db-ci" - environment: - MYSQL_ROOT_PASSWORD: ok - ports: - - "33069:3306" - healthcheck: - test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] - start_period: 30s - interval: 5s - retries: 10 - networks: - default: - aliases: - - openml-test-database - - elasticsearch - - database-setup: - image: mysql - container_name: "openml-test-setup-ci" - volumes: - - ./docker/update.sh:/database-update.sh - command: /bin/sh -c "/database-update.sh" - depends_on: - database: - condition: service_healthy - -# V1 API (PHP) - php-api: - image: "openml/php-rest-api:v1.2.2" - container_name: "openml-php-api-ci" - ports: - - "9002:80" - depends_on: - database: - condition: service_started - environment: - - DB_HOST_OPENML=database:3306 - - DB_HOST_EXPDB=database:3306 - - BASE_URL=http://localhost:9002/ - - INDEX_ES_DURING_STARTUP=false - - # V2 API (PYTHON) - python-api: - container_name: "openml-python-api-ci" - build: - # TODO: replace with image when available - context: ../server-api - dockerfile: docker/python/Dockerfile - ports: - - "9001:8000" - depends_on: - - database - environment: - - DATABASE_URL=mysql://root:ok@database:3306/openml \ No newline at end of file diff --git a/docker/update.sh b/docker/update.sh deleted file mode 100644 index 7e9864742..000000000 --- a/docker/update.sh +++ /dev/null @@ -1,31 +0,0 @@ -#/bin/bash -# Change the filepath of openml.file -# from "https://www.openml.org/data/download/1666876/phpFsFYVN" -# to "http://minio:9000/datasets/0000/0001/phpFsFYVN" -mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' - -# Update openml.expdb.dataset with the same url -mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' - - - - - -# Create the data_feature_description TABLE. TODO: can we make sure this table exists already? -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` ( - `did` int unsigned NOT NULL, - `index` int unsigned NOT NULL, - `uploader` mediumint unsigned NOT NULL, - `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - `description_type` enum("plain", "ontology") NOT NULL, - `value` varchar(256) NOT NULL, - KEY `did` (`did`,`index`), - CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE -)' - -# SET dataset 1 to active (used in unittests java) -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)' -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";' - -# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing. -mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index a64e6d2d0..08db800df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -296,19 +296,6 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) -@pytest.fixture(scope="session") -def openml_test_config(): - """ - Returns the URL for the test server. - """ - if os.environ.get("OPENML_TEST_SERVER") == "local": - return { - "v1": "http://localhost:9002/api/v1/", - "v2": "http://localhost:9001/" - } - - raise ValueError("Use the environment variable OPENML_TEST_SERVER=local before running docker to run tests against a local OpenML server.") - @pytest.fixture def static_cache_dir(): return Path(__file__).parent / "files" From dbe77827401b802fc47887dc07c9c9b486e2aa57 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:27:50 +0530 Subject: [PATCH 37/67] CI testing --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 30b36a0bf..8b857a435 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -162,7 +162,7 @@ jobs: if: always() shell: bash run: | - rm -rf server-api + rm -rf services git checkout docker-compose.yml - name: Check for files left behind by test From d8be5f12a47e520fa1f2697b299a0d6c5e1e0856 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:28:13 +0530 Subject: [PATCH 38/67] CI testing --- tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 08db800df..0fa4b959a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -295,7 +295,8 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + + @pytest.fixture def static_cache_dir(): return Path(__file__).parent / "files" From b20484521e24eced3e456e17c3424ee76f98e11e Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:44:47 +0530 Subject: [PATCH 39/67] CI testing --- .github/workflows/test.yml | 14 ++++++++------ tests/conftest.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8b857a435..ea8a22c26 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,16 +1,13 @@ ---- name: Tests on: workflow_dispatch: - push: branches: - main - develop tags: - "v*.*.*" - pull_request: branches: - main @@ -102,20 +99,24 @@ jobs: echo "Repository status before tests: $git_status" - name: Clone Services + if: matrix.os == 'ubuntu-latest' run: git clone --depth 1 https://github.com/openml/services.git - name: Start Docker Services + if: matrix.os == 'ubuntu-latest' working-directory: ./services run: | sudo systemctl stop mysql.service docker compose --profile rest-api --profile minio --profile evaluation-engine up -d - docker wait openml-test-setup-ci + docker wait openml-test-database-setup - name: Show installed dependencies run: python -m pip list - name: Run tests on Ubuntu Test if: matrix.os == 'ubuntu-latest' + env: + TEST_SERVER_URL: "http://localhost:8000" run: | if [ "${{ matrix.code-cov }}" = "true" ]; then codecov="--cov=openml --long --cov-report=xml" @@ -131,6 +132,8 @@ jobs: - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' + env: + TEST_SERVER_URL: "http://localhost:8000" run: | if [ "${{ matrix.code-cov }}" = "true" ]; then codecov="--cov=openml --long --cov-report=xml" @@ -162,8 +165,7 @@ jobs: if: always() shell: bash run: | - rm -rf services - git checkout docker-compose.yml + sudo rm -rf services - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() diff --git a/tests/conftest.py b/tests/conftest.py index 0fa4b959a..ae67f2f43 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -296,7 +296,7 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): return Path(__file__).parent / "files" From 54725fa2d0b95855e1b329d34b5921f28253a9e8 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:52:20 +0530 Subject: [PATCH 40/67] Windows CI bugfixing --- .github/workflows/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea8a22c26..a21992474 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -162,8 +162,7 @@ jobs: verbose: true - name: Cleanup Docker setup - if: always() - shell: bash + if: matrix.os == 'ubuntu-latest' run: | sudo rm -rf services From abc44a5493e2a8f0210cd29da27e45e7b369eccc Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 20:52:45 +0530 Subject: [PATCH 41/67] merging 2 branches --- .github/workflows/test.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a21992474..8778dc33c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,7 +100,13 @@ jobs: - name: Clone Services if: matrix.os == 'ubuntu-latest' - run: git clone --depth 1 https://github.com/openml/services.git + run: | + git clone --depth 1 https://github.com/openml/services.git + git fetch origin setup-test-locally:setup-test-locally + git fetch origin add/python-rest-api:add/python-rest-api + + git merge setup-test-locally + git merge add/python-rest-api - name: Start Docker Services if: matrix.os == 'ubuntu-latest' From b034687ff0ba29195fd49001eec53bd2462e0361 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 20:55:05 +0530 Subject: [PATCH 42/67] merging 2 branches --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8778dc33c..748798856 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -102,8 +102,8 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | git clone --depth 1 https://github.com/openml/services.git - git fetch origin setup-test-locally:setup-test-locally - git fetch origin add/python-rest-api:add/python-rest-api + git fetch origin setup-test-locally + git fetch origin add/python-rest-api git merge setup-test-locally git merge add/python-rest-api From b8826f5f5fd18b89593dbbfe20bd3a9b8dec8134 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:01:12 +0530 Subject: [PATCH 43/67] merging 2 branches --- .github/workflows/test.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 748798856..7c0136d5b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -102,11 +102,17 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | git clone --depth 1 https://github.com/openml/services.git - git fetch origin setup-test-locally - git fetch origin add/python-rest-api + git clone --depth 1 https://github.com/openml/services.git + cd services - git merge setup-test-locally - git merge add/python-rest-api + git config user.email "ci@openml.org" + git config user.name "CI" + + git fetch origin pull/13/head:pr-13 + git merge pr-13 --no-edit + + git fetch origin pull/15/head:pr-15 + git merge pr-15 --no-edit - name: Start Docker Services if: matrix.os == 'ubuntu-latest' From 445cbe807a9859421f38d4d8642694d2a5bcce87 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:04:30 +0530 Subject: [PATCH 44/67] merging 2 branches --- .github/workflows/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7c0136d5b..43264c913 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,7 +101,6 @@ jobs: - name: Clone Services if: matrix.os == 'ubuntu-latest' run: | - git clone --depth 1 https://github.com/openml/services.git git clone --depth 1 https://github.com/openml/services.git cd services From 295ef9339f4e09627be1e6c1a4fbbe4afc7f05b8 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:20:57 +0530 Subject: [PATCH 45/67] curl to verify server is running --- .github/workflows/test.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 43264c913..ad08a477a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,6 +121,12 @@ jobs: docker compose --profile rest-api --profile minio --profile evaluation-engine up -d docker wait openml-test-database-setup + - name: Verify API is Reachable + if: matrix.os == 'ubuntu-latest' + run: | + timeout 20s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 3; done' + curl -I http://localhost:8000/api/v1/xml/data/1 + - name: Show installed dependencies run: python -m pip list From 488f40934267cfea6d44e954568922f7cd4ba68a Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:42:04 +0530 Subject: [PATCH 46/67] path fix --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ad08a477a..b229cb6a9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -133,7 +133,7 @@ jobs: - name: Run tests on Ubuntu Test if: matrix.os == 'ubuntu-latest' env: - TEST_SERVER_URL: "http://localhost:8000" + TEST_SERVER_URL: "http://localhost:8000/" run: | if [ "${{ matrix.code-cov }}" = "true" ]; then codecov="--cov=openml --long --cov-report=xml" @@ -150,7 +150,7 @@ jobs: - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' env: - TEST_SERVER_URL: "http://localhost:8000" + TEST_SERVER_URL: "http://localhost:8000/" run: | if [ "${{ matrix.code-cov }}" = "true" ]; then codecov="--cov=openml --long --cov-report=xml" From 45e72578d6c1cb4faee5aa940430bd4db82fc5f5 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 11 Feb 2026 23:52:12 +0530 Subject: [PATCH 47/67] run all test server tests --- .github/workflows/test.yml | 15 +++++++++------ tests/files/localhost:8080 | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) delete mode 120000 tests/files/localhost:8080 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b229cb6a9..5b608d501 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,13 +1,16 @@ +--- name: Tests on: workflow_dispatch: + push: branches: - main - develop tags: - "v*.*.*" + pull_request: branches: - main @@ -125,7 +128,7 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | timeout 20s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 3; done' - curl -I http://localhost:8000/api/v1/xml/data/1 + curl -I http://localhost:8000/api/v1/task/1 - name: Show installed dependencies run: python -m pip list @@ -140,9 +143,9 @@ jobs: fi if [ "${{ matrix.sklearn-only }}" = "true" ]; then - marks="sklearn and not production and not uses_test_server" + marks="sklearn and not production" else - marks="not production and not uses_test_server" + marks="not production" fi pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" @@ -157,9 +160,9 @@ jobs: fi if [ "${{ matrix.sklearn-only }}" = "true" ]; then - marks="sklearn and production and not uses_test_server" + marks="sklearn and production" else - marks="production and not uses_test_server" + marks="production" fi pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" @@ -167,7 +170,7 @@ jobs: - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 - name: Upload coverage if: matrix.code-cov && always() diff --git a/tests/files/localhost:8080 b/tests/files/localhost:8080 deleted file mode 120000 index 334c709ef..000000000 --- a/tests/files/localhost:8080 +++ /dev/null @@ -1 +0,0 @@ -org/openml/test \ No newline at end of file From 7fcf039fb215c840faa4bc6d0607eb30d133cf67 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 00:29:40 +0530 Subject: [PATCH 48/67] fix 'Cleanup Docker setup' --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5b608d501..78db57bdc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -182,7 +182,7 @@ jobs: verbose: true - name: Cleanup Docker setup - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'ubuntu-latest' && always() run: | sudo rm -rf services From 37cfb2eea805f42181f61c7a6246ba8f598cdca4 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 00:33:28 +0530 Subject: [PATCH 49/67] skipping windows given docker binaries do not match --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 78db57bdc..fabad7757 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -170,7 +170,7 @@ jobs: - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Upload coverage if: matrix.code-cov && always() From 9290010e8ad897c25cccf4e39330d9b1a1b339a0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 14:47:26 +0530 Subject: [PATCH 50/67] testing out locally --- .github/workflows/test.yml | 4 ---- openml/config.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fabad7757..219f01e70 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -135,8 +135,6 @@ jobs: - name: Run tests on Ubuntu Test if: matrix.os == 'ubuntu-latest' - env: - TEST_SERVER_URL: "http://localhost:8000/" run: | if [ "${{ matrix.code-cov }}" = "true" ]; then codecov="--cov=openml --long --cov-report=xml" @@ -152,8 +150,6 @@ jobs: - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' - env: - TEST_SERVER_URL: "http://localhost:8000/" run: | if [ "${{ matrix.code-cov }}" = "true" ]; then codecov="--cov=openml --long --cov-report=xml" diff --git a/openml/config.py b/openml/config.py index 5b2d69067..3f46c7480 100644 --- a/openml/config.py +++ b/openml/config.py @@ -27,7 +27,7 @@ OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -TEST_SERVER_URL = "https://test.openml.org" +TEST_SERVER_URL = "http://localhost:8000" class _Config(TypedDict): From bbfa193afaaa90ca77f7adddf77f9b4b58edbe2a Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 17:07:02 +0530 Subject: [PATCH 51/67] replacing with 8080 --- openml/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/config.py b/openml/config.py index 3f46c7480..0e8d21618 100644 --- a/openml/config.py +++ b/openml/config.py @@ -27,7 +27,7 @@ OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -TEST_SERVER_URL = "http://localhost:8000" +TEST_SERVER_URL = "http://localhost:8080" class _Config(TypedDict): From 4531cbc4afb14c5a9e01e1c2c062c17756b18da0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 21:34:44 +0530 Subject: [PATCH 52/67] test --- .github/workflows/test.yml | 15 ++++++++++++++- openml/config.py | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 219f01e70..328045554 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,8 +121,21 @@ jobs: working-directory: ./services run: | sudo systemctl stop mysql.service - docker compose --profile rest-api --profile minio --profile evaluation-engine up -d + docker compose --profile rest-api --profile minio up -d + + echo "1. Waiting for Database population..." docker wait openml-test-database-setup + + echo "2. Waiting for Elasticsearch (this is the slow part)..." + # Wait up to 5 minutes for ES to go green + timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-elasticsearch)" == "healthy" ]; do sleep 5; done' + + echo "3. Waiting for PHP API..." + # Wait up to 5 minutes for PHP to accept connections + timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done' + + echo "4. Docker Stack is Healthy!" + docker ps - name: Verify API is Reachable if: matrix.os == 'ubuntu-latest' diff --git a/openml/config.py b/openml/config.py index 0e8d21618..3f46c7480 100644 --- a/openml/config.py +++ b/openml/config.py @@ -27,7 +27,7 @@ OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -TEST_SERVER_URL = "http://localhost:8080" +TEST_SERVER_URL = "http://localhost:8000" class _Config(TypedDict): From d90615a30c53ada2b1b84caaea975f87ac21634c Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 21:46:48 +0530 Subject: [PATCH 53/67] test --- .github/workflows/test.yml | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 328045554..7f832d982 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -140,8 +140,38 @@ jobs: - name: Verify API is Reachable if: matrix.os == 'ubuntu-latest' run: | - timeout 20s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 3; done' - curl -I http://localhost:8000/api/v1/task/1 + echo "Waiting for API to be ready (Handling 412 Sync Errors)..." + + # Helper function to check status + check_api() { + # Fetch HTTP code + code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1) + if [ "$code" == "200" ]; then + return 0 + else + return 1 + fi + } + + # Loop for up to 60 seconds + count=0 + while [ $count -lt 12 ]; do + if check_api; then + echo "API is Ready (200 OK)!" + exit 0 + fi + echo "API responded with status $(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1). Retrying in 5s..." + sleep 5 + count=$((count+1)) + done + + echo "API failed to initialize. Printing last response body for debugging:" + curl -v http://localhost:8000/api/v1/xml/data/1 + + # Also print PHP logs to see the specific OpenML Exception + echo "=== PHP API LOGS ===" + docker logs openml-php-rest-api + exit 1 - name: Show installed dependencies run: python -m pip list From 9b12d6fb1376eea87d7e27e890b39ed1c116483c Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 23:59:08 +0530 Subject: [PATCH 54/67] test --- .github/workflows/test.yml | 43 ++++++++++---------------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7f832d982..deb4620f2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -137,41 +137,20 @@ jobs: echo "4. Docker Stack is Healthy!" docker ps - - name: Verify API is Reachable + - name: Verify API and Splits if: matrix.os == 'ubuntu-latest' run: | - echo "Waiting for API to be ready (Handling 412 Sync Errors)..." + echo "Checking Data API..." + timeout 60s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)" == "200" ]; do sleep 5; done' + + echo "Checking Task Splits (The 412 Killer)..." + # If this fails, the evaluation engine is broken + timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do + echo "Splits not ready yet. Waiting..." + sleep 5 + done' - # Helper function to check status - check_api() { - # Fetch HTTP code - code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1) - if [ "$code" == "200" ]; then - return 0 - else - return 1 - fi - } - - # Loop for up to 60 seconds - count=0 - while [ $count -lt 12 ]; do - if check_api; then - echo "API is Ready (200 OK)!" - exit 0 - fi - echo "API responded with status $(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1). Retrying in 5s..." - sleep 5 - count=$((count+1)) - done - - echo "API failed to initialize. Printing last response body for debugging:" - curl -v http://localhost:8000/api/v1/xml/data/1 - - # Also print PHP logs to see the specific OpenML Exception - echo "=== PHP API LOGS ===" - docker logs openml-php-rest-api - exit 1 + echo "System is fully operational." - name: Show installed dependencies run: python -m pip list From 45d34234015dd999f1de178b69f1fde55549c9ba Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 12 Feb 2026 23:59:34 +0530 Subject: [PATCH 55/67] test --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index deb4620f2..8207a9b78 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -168,7 +168,7 @@ jobs: marks="not production" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -183,7 +183,7 @@ jobs: marks="production" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' From 16f22b12d1e6a46802b6140c3a0bfbdfd67a8c71 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 00:18:58 +0530 Subject: [PATCH 56/67] test --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8207a9b78..4a8a983c4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,7 +121,7 @@ jobs: working-directory: ./services run: | sudo systemctl stop mysql.service - docker compose --profile rest-api --profile minio up -d + docker compose --profile rest-api --profile minio --profile evaluation-engine up -d echo "1. Waiting for Database population..." docker wait openml-test-database-setup @@ -145,7 +145,7 @@ jobs: echo "Checking Task Splits (The 412 Killer)..." # If this fails, the evaluation engine is broken - timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do + timeout 180s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do echo "Splits not ready yet. Waiting..." sleep 5 done' @@ -168,7 +168,7 @@ jobs: marks="not production" fi - pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -183,7 +183,7 @@ jobs: marks="production" fi - pytest --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' From dd2ce686e1e6d8cdb9d07a705d034e89ca010e93 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 00:52:09 +0530 Subject: [PATCH 57/67] test --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a8a983c4..d80fb14db 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,7 +121,7 @@ jobs: working-directory: ./services run: | sudo systemctl stop mysql.service - docker compose --profile rest-api --profile minio --profile evaluation-engine up -d + docker compose --profile rest-api --profile minio up -d --build echo "1. Waiting for Database population..." docker wait openml-test-database-setup @@ -145,7 +145,7 @@ jobs: echo "Checking Task Splits (The 412 Killer)..." # If this fails, the evaluation engine is broken - timeout 180s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do + timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do echo "Splits not ready yet. Waiting..." sleep 5 done' From ebecceaf8a6c9f7bff7cb63024eaea3581250328 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 00:59:36 +0530 Subject: [PATCH 58/67] test --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d80fb14db..33f96a592 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -121,7 +121,7 @@ jobs: working-directory: ./services run: | sudo systemctl stop mysql.service - docker compose --profile rest-api --profile minio up -d --build + docker compose --profile rest-api --profile minio --profile evaluation-engine up -d --build echo "1. Waiting for Database population..." docker wait openml-test-database-setup From a0ac6b99126ff48b84cce26d3b476c13b68b8ffe Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 01:06:49 +0530 Subject: [PATCH 59/67] test --- .github/workflows/test.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 33f96a592..da689aecf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -136,7 +136,41 @@ jobs: echo "4. Docker Stack is Healthy!" docker ps + - name: Error + working-directory: ./services + run: | + echo "---------------------------------------------------" + echo "1. PROBING: Can we reach the API at all?" + timeout 60s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 5; done' || echo "WARNING: Main API is slow/down" + + echo "---------------------------------------------------" + echo "2. PROBING: Waiting for Task 119 Splits (The Failure Point)..." + # We wait 60s. If it works, great. If not, we want the logs. + timeout 60s bash -c 'until curl -sSf http://localhost:8000/api_splits/get/119/Task_119_splits.arff > /dev/null; do + echo " ... file not ready yet" + sleep 5 + done' || echo "FAILURE: Task 119 splits were NOT generated." + + echo "---------------------------------------------------" + echo "3.DUMPING EVALUATION ENGINE LOGS (STDOUT)" + docker logs openml-evaluation-engine + + echo "---------------------------------------------------" + echo "4.DUMPING INTERNAL CRON LOGS (The Hidden Logs)" + # The engine runs via cron, so the real errors are often in this file, NOT in docker logs + docker exec openml-evaluation-engine cat /cron.log || echo "Could not read /cron.log" + + echo "---------------------------------------------------" + echo "5.DUMPING PHP API LOGS (Why did it throw 412?)" + docker logs openml-php-rest-api | grep "412" -B 5 -A 5 || echo "No 412 errors found in logs?" + + echo "---------------------------------------------------" + echo "6.CHECKING NETWORK (Can the container see Nginx?)" + # This checks if the container can actually resolve 'localhost' to the host machine + docker exec openml-evaluation-engine curl -v http://localhost:8000/api/v1/xml/data/1 || echo "Container cannot connect to localhost:8000" + # Force fail so you see the red X and check logs + exit 1 - name: Verify API and Splits if: matrix.os == 'ubuntu-latest' run: | From 439e683d9ba19f7820d660e879fe6a5b2c0d89db Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 01:14:50 +0530 Subject: [PATCH 60/67] test --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index da689aecf..2e592a8a7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -115,6 +115,10 @@ jobs: git fetch origin pull/15/head:pr-15 git merge pr-15 --no-edit + sed -i 's/localhost:8000/172.28.0.2:8000/g' config/database/update.sh + + # Verify the change + grep "172.28.0.2" config/database/update.sh || echo "Patch failed!" - name: Start Docker Services if: matrix.os == 'ubuntu-latest' @@ -136,6 +140,7 @@ jobs: echo "4. Docker Stack is Healthy!" docker ps + - name: Error working-directory: ./services run: | From f87051bdec7513698e0a7c114027b8c06c718a53 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 01:22:59 +0530 Subject: [PATCH 61/67] test --- .github/workflows/test.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2e592a8a7..d2926a790 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -117,8 +117,13 @@ jobs: git merge pr-15 --no-edit sed -i 's/localhost:8000/172.28.0.2:8000/g' config/database/update.sh - # Verify the change - grep "172.28.0.2" config/database/update.sh || echo "Patch failed!" + # === PATCH 2: Fix MinIO Path Mismatch === + # The PR uses '/minio/' but Nginx usually expects '/data/' for MinIO + # We replace '/minio/' with '/data/' in the URL rewrite script + sed -i 's|/minio/|/data/|g' config/database/update.sh + + echo "=== Patched Update Script ===" + cat config/database/update.sh | grep "172.28.0.2" - name: Start Docker Services if: matrix.os == 'ubuntu-latest' From 4077a5628aff3192abbe0181e4a8ad010e2100d0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 01:31:04 +0530 Subject: [PATCH 62/67] test --- .github/workflows/test.yml | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d2926a790..9cda74f35 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,29 +101,32 @@ jobs: echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" - - name: Clone Services + - name: Configure Host Network (The "Magic" Step) + run: | + # Map 'nginx' to localhost so the Host machine can resolve the URLs in the database + echo "127.0.0.1 nginx" | sudo tee -a /etc/hosts + ping -c 1 nginx + + - name: Clone Services & Apply Universal Patch if: matrix.os == 'ubuntu-latest' run: | git clone --depth 1 https://github.com/openml/services.git cd services - git config user.email "ci@openml.org" git config user.name "CI" - - git fetch origin pull/13/head:pr-13 - git merge pr-13 --no-edit - - git fetch origin pull/15/head:pr-15 - git merge pr-15 --no-edit - sed -i 's/localhost:8000/172.28.0.2:8000/g' config/database/update.sh + git fetch origin pull/13/head:pr-13 && git merge pr-13 --no-edit + git fetch origin pull/15/head:pr-15 && git merge pr-15 --no-edit + + # === PATCH 1: Use 'nginx' hostname === + # This works inside Docker (DNS) and on Host (via /etc/hosts hack above) + sed -i 's/localhost:8000/nginx:8000/g' config/database/update.sh - # === PATCH 2: Fix MinIO Path Mismatch === - # The PR uses '/minio/' but Nginx usually expects '/data/' for MinIO - # We replace '/minio/' with '/data/' in the URL rewrite script + # === PATCH 2: Fix Path Mismatch === + # Ensure we use /data/ which Nginx recognizes sed -i 's|/minio/|/data/|g' config/database/update.sh echo "=== Patched Update Script ===" - cat config/database/update.sh | grep "172.28.0.2" + cat config/database/update.sh | grep "nginx" - name: Start Docker Services if: matrix.os == 'ubuntu-latest' From fad1ee7dbe052f824706dafbcdc974ff49d6cd5e Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 02:07:20 +0530 Subject: [PATCH 63/67] test --- .github/workflows/test.yml | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9cda74f35..4c4fac0e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,7 +105,6 @@ jobs: run: | # Map 'nginx' to localhost so the Host machine can resolve the URLs in the database echo "127.0.0.1 nginx" | sudo tee -a /etc/hosts - ping -c 1 nginx - name: Clone Services & Apply Universal Patch if: matrix.os == 'ubuntu-latest' @@ -145,9 +144,34 @@ jobs: echo "3. Waiting for PHP API..." # Wait up to 5 minutes for PHP to accept connections timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done' + + - name: Finalize Setup & Verify Splits + if: matrix.os == 'ubuntu-latest' + run: | + echo "1. Forcing Elasticsearch Indexing Sync..." + # This helps clear the 412 errors the Engine is hitting + curl -s http://nginx:8000/api/v1/xml/admin/index/sync || echo "Sync endpoint not found, skipping..." + + echo "2. Waiting for Evaluation Engine to process Task 119..." + echo "Targeting Task 119 (The primary failure point)." + + # Give it 5 minutes (300s). Java + indexing + splits generation is heavy. + count=0 + while [ $count -lt 30 ]; do + code=$(curl -s -o /dev/null -w "%{http_code}" http://nginx:8000/api_splits/get/119/Task_119_splits.arff) + if [ "$code" == "200" ]; then + echo "āœ… SUCCESS: Task 119 splits are ready!" + exit 0 + fi + echo " ... waiting for split generation (Current Status: $code)" + sleep 10 + count=$((count+1)) + done - echo "4. Docker Stack is Healthy!" - docker ps + echo "āŒ ERROR: Evaluation Engine timed out." + # Dump logs only if this step fails + docker exec openml-evaluation-engine cat /logs/evaluation.log || echo "Could not read log file" + exit 1 - name: Error working-directory: ./services From 4086730d5c206c416cea8ff2ec6cd9cf4850a481 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 02:18:12 +0530 Subject: [PATCH 64/67] test --- .github/workflows/test.yml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c4fac0e7..991caf076 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -148,14 +148,14 @@ jobs: - name: Finalize Setup & Verify Splits if: matrix.os == 'ubuntu-latest' run: | - echo "1. Forcing Elasticsearch Indexing Sync..." - # This helps clear the 412 errors the Engine is hitting - curl -s http://nginx:8000/api/v1/xml/admin/index/sync || echo "Sync endpoint not found, skipping..." + echo "1. Forcing Elasticsearch Indexing Sync (With Auth)..." + # We append the default test API Key (AD0...0) to authorized the admin action + curl -s "http://nginx:8000/api/v1/xml/admin/index/sync?api_key=AD000000000000000000000000000000" || echo "Sync request failed" echo "2. Waiting for Evaluation Engine to process Task 119..." echo "Targeting Task 119 (The primary failure point)." - # Give it 5 minutes (300s). Java + indexing + splits generation is heavy. + # We give it 5 minutes (300s) to handle the queue. count=0 while [ $count -lt 30 ]; do code=$(curl -s -o /dev/null -w "%{http_code}" http://nginx:8000/api_splits/get/119/Task_119_splits.arff) @@ -169,8 +169,17 @@ jobs: done echo "āŒ ERROR: Evaluation Engine timed out." - # Dump logs only if this step fails - docker exec openml-evaluation-engine cat /logs/evaluation.log || echo "Could not read log file" + + echo "=== DEBUG: LISTING LOG DIR ===" + docker exec openml-evaluation-engine ls -R /logs/ || echo "Dir empty" + + echo "=== DEBUG: DUMPING ALL LOGS ===" + # Use wildcard to catch whatever the filename actually is + docker exec openml-evaluation-engine sh -c "cat /logs/*.log" || echo "Could not read logs" + + # Also check the cron log again, just in case + docker exec openml-evaluation-engine cat /cron.log || echo "Cron log empty" + exit 1 - name: Error From fecebbccd71bebe8a9d4e7538a5c27d67237ee91 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 02:37:54 +0530 Subject: [PATCH 65/67] windows fix? --- openml/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openml/config.py b/openml/config.py index 3f46c7480..233fbcf24 100644 --- a/openml/config.py +++ b/openml/config.py @@ -9,6 +9,7 @@ import os import platform import shutil +import sys import warnings from collections.abc import Iterator from contextlib import contextmanager @@ -27,7 +28,10 @@ OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -TEST_SERVER_URL = "http://localhost:8000" +if sys.platform.startswith("win"): + TEST_SERVER_URL = "http://localhost" +else: + TEST_SERVER_URL = "http://localhost:8000" class _Config(TypedDict): From 4845a1ed259a48caf9291a2d8eeafa33048ec5e4 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 02:42:15 +0530 Subject: [PATCH 66/67] windows fix? --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 991caf076..bb666cbdc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,7 +101,8 @@ jobs: echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" - - name: Configure Host Network (The "Magic" Step) + - name: Configure Host Network + if: matrix.os == 'ubuntu-latest' run: | # Map 'nginx' to localhost so the Host machine can resolve the URLs in the database echo "127.0.0.1 nginx" | sudo tee -a /etc/hosts From a2470507a570ef4582017dadf392f72f172e6200 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 13 Feb 2026 02:46:28 +0530 Subject: [PATCH 67/67] windows fix? --- .github/workflows/test.yml | 50 -------------------------------------- 1 file changed, 50 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bb666cbdc..f8319300d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -183,56 +183,6 @@ jobs: exit 1 - - name: Error - working-directory: ./services - run: | - echo "---------------------------------------------------" - echo "1. PROBING: Can we reach the API at all?" - timeout 60s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 5; done' || echo "WARNING: Main API is slow/down" - - echo "---------------------------------------------------" - echo "2. PROBING: Waiting for Task 119 Splits (The Failure Point)..." - # We wait 60s. If it works, great. If not, we want the logs. - timeout 60s bash -c 'until curl -sSf http://localhost:8000/api_splits/get/119/Task_119_splits.arff > /dev/null; do - echo " ... file not ready yet" - sleep 5 - done' || echo "FAILURE: Task 119 splits were NOT generated." - - echo "---------------------------------------------------" - echo "3.DUMPING EVALUATION ENGINE LOGS (STDOUT)" - docker logs openml-evaluation-engine - - echo "---------------------------------------------------" - echo "4.DUMPING INTERNAL CRON LOGS (The Hidden Logs)" - # The engine runs via cron, so the real errors are often in this file, NOT in docker logs - docker exec openml-evaluation-engine cat /cron.log || echo "Could not read /cron.log" - - echo "---------------------------------------------------" - echo "5.DUMPING PHP API LOGS (Why did it throw 412?)" - docker logs openml-php-rest-api | grep "412" -B 5 -A 5 || echo "No 412 errors found in logs?" - - echo "---------------------------------------------------" - echo "6.CHECKING NETWORK (Can the container see Nginx?)" - # This checks if the container can actually resolve 'localhost' to the host machine - docker exec openml-evaluation-engine curl -v http://localhost:8000/api/v1/xml/data/1 || echo "Container cannot connect to localhost:8000" - - # Force fail so you see the red X and check logs - exit 1 - - name: Verify API and Splits - if: matrix.os == 'ubuntu-latest' - run: | - echo "Checking Data API..." - timeout 60s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)" == "200" ]; do sleep 5; done' - - echo "Checking Task Splits (The 412 Killer)..." - # If this fails, the evaluation engine is broken - timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do - echo "Splits not ready yet. Waiting..." - sleep 5 - done' - - echo "System is fully operational." - - name: Show installed dependencies run: python -m pip list