From c1fcb4fbd647024d92f1d4f4eeb8eed6fbee0e9f Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Sun, 1 Mar 2026 12:39:50 -0800 Subject: [PATCH 1/3] fix: remove hardcoded TrustServerCertificate and drop vertica from CI Remove the debug `TrustServerCertificate=yes` override in the MSSQL connector that disabled TLS validation for all connections (MITM risk). Users can still opt in via URI params or TOML config. Drop vertica from CI docker compose since the `vertica/vertica-ce` image is no longer publicly pullable, which was failing all CI jobs. Vertica tests are already gated on DATADIFF_VERTICA_URI being set. Closes #2 Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 3 +-- .github/workflows/ci_full.yml | 3 +-- data_diff/databases/mssql.py | 3 --- tests/test_mssql.py | 37 +++++++++++++++++++++++++++++++++++ tests/waiting_for_stack_up.sh | 16 +-------------- 5 files changed, 40 insertions(+), 22 deletions(-) create mode 100644 tests/test_mssql.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5baa15f9..0e615c335 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,12 +42,11 @@ jobs: run: uv tool run ty check --python-version 3.10 - name: Build the stack - run: docker compose up -d mysql postgres presto trino clickhouse vertica + run: docker compose up -d mysql postgres presto trino clickhouse - name: Run tests env: DATADIFF_CLICKHOUSE_URI: "clickhouse://clickhouse:Password1@localhost:9000/clickhouse" - DATADIFF_VERTICA_URI: "vertica://vertica:Password1@localhost:5433/vertica" run: | chmod +x tests/waiting_for_stack_up.sh ./tests/waiting_for_stack_up.sh && uv run pytest tests/ diff --git a/.github/workflows/ci_full.yml b/.github/workflows/ci_full.yml index b1061a71e..797ce0ee0 100644 --- a/.github/workflows/ci_full.yml +++ b/.github/workflows/ci_full.yml @@ -34,12 +34,11 @@ jobs: run: uv sync --frozen - name: Build the stack - run: docker compose up -d mysql postgres presto trino clickhouse vertica + run: docker compose up -d mysql postgres presto trino clickhouse - name: Run tests env: DATADIFF_CLICKHOUSE_URI: "clickhouse://clickhouse:Password1@localhost:9000/clickhouse" - DATADIFF_VERTICA_URI: "vertica://vertica:Password1@localhost:5433/vertica" run: | chmod +x tests/waiting_for_stack_up.sh ./tests/waiting_for_stack_up.sh && uv run pytest tests/ diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index 917e00cee..b1c6cff32 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -178,9 +178,6 @@ def __init__(self, host, port, user, password, *, database, thread_count, **kw) self._args = {k: v for k, v in args.items() if v is not None} self._args["driver"] = "{ODBC Driver 18 for SQL Server}" - # TODO temp dev debug - self._args["TrustServerCertificate"] = "yes" - try: self.default_database = self._args["database"] self.default_schema = self._args["schema"] diff --git a/tests/test_mssql.py b/tests/test_mssql.py new file mode 100644 index 000000000..91a525d30 --- /dev/null +++ b/tests/test_mssql.py @@ -0,0 +1,37 @@ +from unittest.mock import patch + +from data_diff.databases.mssql import MsSQL + + +def _make_mssql(**extra_kw): + """Create an MsSQL instance with mocked threading and pyodbc.""" + with patch.object(MsSQL, "__attrs_post_init__", lambda self: None): + return MsSQL( + host="localhost", + port=1433, + user="sa", + password="secret", + database="testdb", + schema="dbo", + thread_count=1, + **extra_kw, + ) + + +class TestMsSQLConnectionArgs: + def test_trust_server_certificate_not_set_by_default(self): + db = _make_mssql() + assert "TrustServerCertificate" not in db._args + + def test_user_supplied_trust_server_certificate_preserved(self): + db = _make_mssql(TrustServerCertificate="yes") + assert db._args["TrustServerCertificate"] == "yes" + + def test_driver_is_set(self): + db = _make_mssql() + assert db._args["driver"] == "{ODBC Driver 18 for SQL Server}" + + def test_none_values_filtered_from_args(self): + db = _make_mssql() + for v in db._args.values(): + assert v is not None diff --git a/tests/waiting_for_stack_up.sh b/tests/waiting_for_stack_up.sh index 762138ded..6b933f8fd 100644 --- a/tests/waiting_for_stack_up.sh +++ b/tests/waiting_for_stack_up.sh @@ -1,17 +1,3 @@ #!/bin/bash -if [ -n "$DATADIFF_VERTICA_URI" ] - then - echo "Check Vertica DB running..." - while true - do - if docker logs dd-vertica | tail -n 100 | grep -q -i "vertica is now running" - then - echo "Vertica DB is ready"; - break; - else - echo "Waiting for Vertica DB starting..."; - sleep 10; - fi - done -fi +# Placeholder: add service readiness checks here as needed. From 5d96990bb5b51af0d116058279f2cbf2eccdfb7c Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Sun, 1 Mar 2026 12:46:15 -0800 Subject: [PATCH 2/3] fix: add actionable SSL error message and use docker compose --wait in CI Add a targeted error message when MSSQL connections fail due to SSL/certificate issues, guiding users to pass TrustServerCertificate explicitly. This helps users who previously relied on the removed hardcoded override. Use `docker compose up -d --wait` in CI workflows so services are healthy before pytest runs, replacing the now-empty wait script. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 2 +- .github/workflows/ci_full.yml | 2 +- data_diff/databases/mssql.py | 10 ++++++++++ tests/test_mssql.py | 29 ++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e615c335..e4fe63262 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: run: uv tool run ty check --python-version 3.10 - name: Build the stack - run: docker compose up -d mysql postgres presto trino clickhouse + run: docker compose up -d --wait mysql postgres presto trino clickhouse - name: Run tests env: diff --git a/.github/workflows/ci_full.yml b/.github/workflows/ci_full.yml index 797ce0ee0..567889305 100644 --- a/.github/workflows/ci_full.yml +++ b/.github/workflows/ci_full.yml @@ -34,7 +34,7 @@ jobs: run: uv sync --frozen - name: Build the stack - run: docker compose up -d mysql postgres presto trino clickhouse + run: docker compose up -d --wait mysql postgres presto trino clickhouse - name: Run tests env: diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index b1c6cff32..61cba0f17 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -192,6 +192,16 @@ def create_connection(self): connection = self._mssql.connect(**self._args) return connection except self._mssql.Error as error: + msg = str(error) + if "SSL" in msg or "certificate" in msg.lower(): + raise ConnectError( + f"TLS certificate validation failed connecting to " + f"{self._args.get('server', 'unknown host')}. " + f"ODBC Driver 18 requires encrypted connections by default. " + f"If using a self-signed certificate, pass " + f"TrustServerCertificate='yes' in your connection URI or TOML config. " + f"Original error: {error}" + ) from error raise ConnectError(*error.args) from error def select_table_schema(self, path: DbPath) -> str: diff --git a/tests/test_mssql.py b/tests/test_mssql.py index 91a525d30..855492115 100644 --- a/tests/test_mssql.py +++ b/tests/test_mssql.py @@ -1,5 +1,8 @@ -from unittest.mock import patch +from unittest.mock import MagicMock, patch +import pytest + +from data_diff.databases.base import ConnectError from data_diff.databases.mssql import MsSQL @@ -35,3 +38,27 @@ def test_none_values_filtered_from_args(self): db = _make_mssql() for v in db._args.values(): assert v is not None + + +class TestMsSQLConnectionErrors: + def test_ssl_error_provides_actionable_message(self): + db = _make_mssql() + mock_mssql = MagicMock() + mock_mssql.Error = type("Error", (Exception,), {}) + mock_mssql.connect.side_effect = mock_mssql.Error( + "[SSL Provider] The certificate chain was issued by an untrusted authority" + ) + + with patch("data_diff.databases.mssql.import_mssql", return_value=mock_mssql): + with pytest.raises(ConnectError, match="TrustServerCertificate"): + db.create_connection() + + def test_non_ssl_error_passes_through(self): + db = _make_mssql() + mock_mssql = MagicMock() + mock_mssql.Error = type("Error", (Exception,), {}) + mock_mssql.connect.side_effect = mock_mssql.Error("Login failed for user") + + with patch("data_diff.databases.mssql.import_mssql", return_value=mock_mssql): + with pytest.raises(ConnectError, match="Login failed"): + db.create_connection() From e7377ce0259052988c46bf7a62bd93f4763544a7 Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Sun, 1 Mar 2026 16:23:23 -0800 Subject: [PATCH 3/3] fix: make SSL detection case-insensitive and remove dead wait script from CI Use case-insensitive matching for both "ssl" and "certificate" in the connection error detection, covering all ODBC driver message variants. Remove the now-empty waiting_for_stack_up.sh invocation from CI workflows since docker compose --wait already handles service readiness. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 4 +--- .github/workflows/ci_full.yml | 4 +--- data_diff/databases/mssql.py | 4 ++-- tests/test_mssql.py | 12 ++++++++++++ 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e4fe63262..9bd2562ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,4 @@ jobs: - name: Run tests env: DATADIFF_CLICKHOUSE_URI: "clickhouse://clickhouse:Password1@localhost:9000/clickhouse" - run: | - chmod +x tests/waiting_for_stack_up.sh - ./tests/waiting_for_stack_up.sh && uv run pytest tests/ + run: uv run pytest tests/ diff --git a/.github/workflows/ci_full.yml b/.github/workflows/ci_full.yml index 567889305..9808270f7 100644 --- a/.github/workflows/ci_full.yml +++ b/.github/workflows/ci_full.yml @@ -39,6 +39,4 @@ jobs: - name: Run tests env: DATADIFF_CLICKHOUSE_URI: "clickhouse://clickhouse:Password1@localhost:9000/clickhouse" - run: | - chmod +x tests/waiting_for_stack_up.sh - ./tests/waiting_for_stack_up.sh && uv run pytest tests/ + run: uv run pytest tests/ diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index 61cba0f17..d84eab94d 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -192,8 +192,8 @@ def create_connection(self): connection = self._mssql.connect(**self._args) return connection except self._mssql.Error as error: - msg = str(error) - if "SSL" in msg or "certificate" in msg.lower(): + lower_msg = str(error).lower() + if "ssl" in lower_msg or "certificate" in lower_msg: raise ConnectError( f"TLS certificate validation failed connecting to " f"{self._args.get('server', 'unknown host')}. " diff --git a/tests/test_mssql.py b/tests/test_mssql.py index 855492115..b263cbea4 100644 --- a/tests/test_mssql.py +++ b/tests/test_mssql.py @@ -53,6 +53,18 @@ def test_ssl_error_provides_actionable_message(self): with pytest.raises(ConnectError, match="TrustServerCertificate"): db.create_connection() + def test_certificate_only_keyword_triggers_actionable_message(self): + db = _make_mssql() + mock_mssql = MagicMock() + mock_mssql.Error = type("Error", (Exception,), {}) + mock_mssql.connect.side_effect = mock_mssql.Error( + "The remote certificate was rejected by the verification procedure" + ) + + with patch("data_diff.databases.mssql.import_mssql", return_value=mock_mssql): + with pytest.raises(ConnectError, match="TrustServerCertificate"): + db.create_connection() + def test_non_ssl_error_passes_through(self): db = _make_mssql() mock_mssql = MagicMock()