From 919a7534798aae105bdc9dbb76712eeb14236a47 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Fri, 10 Oct 2025 17:21:49 +0300 Subject: [PATCH 1/2] Fix: Mask credentials in duckdb attach for postgres, mysql logging --- sqlmesh/core/config/connection.py | 13 ++++-- tests/core/test_connection_config.py | 64 +++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index dbda66614e..e5526469e9 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -58,6 +58,7 @@ "clickhouse", } MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?|\&)(motherduck_token=)(\S*)") +PASSWORD_REGEX = re.compile(r"(password=)(\S+)") def _get_engine_import_validator( @@ -479,13 +480,13 @@ def create_engine_adapter( adapter = BaseDuckDBConnectionConfig._data_file_to_adapter.get(key) if adapter is not None: logger.info( - f"Using existing DuckDB adapter due to overlapping data file: {self._mask_motherduck_token(key)}" + f"Using existing DuckDB adapter due to overlapping data file: {self._mask_sensitive_data(key)}" ) return adapter if data_files: masked_files = { - self._mask_motherduck_token(file if isinstance(file, str) else file.path) + self._mask_sensitive_data(file if isinstance(file, str) else file.path) for file in data_files } logger.info(f"Creating new DuckDB adapter for data files: {masked_files}") @@ -507,10 +508,14 @@ def get_catalog(self) -> t.Optional[str]: return list(self.catalogs)[0] return None - def _mask_motherduck_token(self, string: str) -> str: - return MOTHERDUCK_TOKEN_REGEX.sub( + def _mask_sensitive_data(self, string: str) -> str: + # Mask MotherDuck tokens + result = MOTHERDUCK_TOKEN_REGEX.sub( lambda m: f"{m.group(1)}{m.group(2)}{'*' * len(m.group(3))}", string ) + # Mask PostgreSQL and MySQL passwords + result = PASSWORD_REGEX.sub(lambda m: f"{m.group(1)}{'*' * len(m.group(2))}", result) + return result class MotherDuckConnectionConfig(BaseDuckDBConnectionConfig): diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 4e71e18148..286a7ab9c1 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -944,42 +944,86 @@ def test_motherduck_token_mask(make_config): assert isinstance(config_1, MotherDuckConnectionConfig) assert isinstance(config_2, MotherDuckConnectionConfig) assert isinstance(config_3, MotherDuckConnectionConfig) - assert config_1._mask_motherduck_token(config_1.database) == "whodunnit" + + # motherduck format + assert config_1._mask_sensitive_data(config_1.database) == "whodunnit" assert ( - config_1._mask_motherduck_token(f"md:{config_1.database}?motherduck_token={config_1.token}") + config_1._mask_sensitive_data(f"md:{config_1.database}?motherduck_token={config_1.token}") == "md:whodunnit?motherduck_token=*****" ) assert ( - config_1._mask_motherduck_token( + config_1._mask_sensitive_data( f"md:{config_1.database}?attach_mode=single&motherduck_token={config_1.token}" ) == "md:whodunnit?attach_mode=single&motherduck_token=*****" ) assert ( - config_2._mask_motherduck_token(f"md:{config_2.database}?motherduck_token={config_2.token}") + config_2._mask_sensitive_data(f"md:{config_2.database}?motherduck_token={config_2.token}") == "md:whodunnit?motherduck_token=******************" ) assert ( - config_3._mask_motherduck_token(f"md:?motherduck_token={config_3.token}") + config_3._mask_sensitive_data(f"md:?motherduck_token={config_3.token}") == "md:?motherduck_token=**********" ) assert ( - config_1._mask_motherduck_token("?motherduck_token=secret1235") + config_1._mask_sensitive_data("?motherduck_token=secret1235") == "?motherduck_token=**********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=short") + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=short") == "md:whodunnit?motherduck_token=*****" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789") + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=longtoken123456789") == "md:whodunnit?motherduck_token=******************" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=") + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=") == "md:whodunnit?motherduck_token=" ) - assert config_1._mask_motherduck_token(":memory:") == ":memory:" + assert config_1._mask_sensitive_data(":memory:") == ":memory:" + + # postgres format + assert ( + config_1._mask_sensitive_data( + "postgres:dbname=mydb user=myuser password=secret123 host=localhost" + ) + == "postgres:dbname=mydb user=myuser password=********* host=localhost" + ) + + assert ( + config_1._mask_sensitive_data( + "dbname=postgres user=postgres password=pg_secret host=127.0.0.1" + ) + == "dbname=postgres user=postgres password=********* host=127.0.0.1" + ) + assert ( + config_1._mask_sensitive_data( + "postgres:dbname=testdb password=verylongpassword123 user=admin" + ) + == "postgres:dbname=testdb password=******************* user=admin" + ) + assert config_1._mask_sensitive_data("postgres:password=short") == "postgres:password=*****" + assert ( + config_1._mask_sensitive_data("postgres:host=localhost password=p@ssw0rd! dbname=db") + == "postgres:host=localhost password=********* dbname=db" + ) + + assert ( + config_1._mask_sensitive_data("postgres:dbname=mydb user=myuser host=localhost") + == "postgres:dbname=mydb user=myuser host=localhost" + ) + + assert ( + config_1._mask_sensitive_data("md:db?motherduck_token=token123 postgres:password=secret") + == "md:db?motherduck_token=******** postgres:password=******" + ) + + # MySQL format + assert ( + config_1._mask_sensitive_data("host=localhost user=root password=mysql123 database=mydb") + == "host=localhost user=root password=******** database=mydb" + ) def test_bigquery(make_config): From b62287649d1ff72522392365bf4c9e03596a149a Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Fri, 10 Oct 2025 17:52:11 +0300 Subject: [PATCH 2/2] pr feedback --- sqlmesh/core/config/connection.py | 8 ++++---- tests/core/test_connection_config.py | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index e5526469e9..8341f8466f 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -509,12 +509,12 @@ def get_catalog(self) -> t.Optional[str]: return None def _mask_sensitive_data(self, string: str) -> str: - # Mask MotherDuck tokens + # Mask MotherDuck tokens with fixed number of asterisks result = MOTHERDUCK_TOKEN_REGEX.sub( - lambda m: f"{m.group(1)}{m.group(2)}{'*' * len(m.group(3))}", string + lambda m: f"{m.group(1)}{m.group(2)}{'*' * 8 if m.group(3) else ''}", string ) - # Mask PostgreSQL and MySQL passwords - result = PASSWORD_REGEX.sub(lambda m: f"{m.group(1)}{'*' * len(m.group(2))}", result) + # Mask PostgreSQL/MySQL passwords with fixed number of asterisks + result = PASSWORD_REGEX.sub(lambda m: f"{m.group(1)}{'*' * 8}", result) return result diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 286a7ab9c1..4e1397b7f1 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -949,33 +949,33 @@ def test_motherduck_token_mask(make_config): assert config_1._mask_sensitive_data(config_1.database) == "whodunnit" assert ( config_1._mask_sensitive_data(f"md:{config_1.database}?motherduck_token={config_1.token}") - == "md:whodunnit?motherduck_token=*****" + == "md:whodunnit?motherduck_token=********" ) assert ( config_1._mask_sensitive_data( f"md:{config_1.database}?attach_mode=single&motherduck_token={config_1.token}" ) - == "md:whodunnit?attach_mode=single&motherduck_token=*****" + == "md:whodunnit?attach_mode=single&motherduck_token=********" ) assert ( config_2._mask_sensitive_data(f"md:{config_2.database}?motherduck_token={config_2.token}") - == "md:whodunnit?motherduck_token=******************" + == "md:whodunnit?motherduck_token=********" ) assert ( config_3._mask_sensitive_data(f"md:?motherduck_token={config_3.token}") - == "md:?motherduck_token=**********" + == "md:?motherduck_token=********" ) assert ( config_1._mask_sensitive_data("?motherduck_token=secret1235") - == "?motherduck_token=**********" + == "?motherduck_token=********" ) assert ( config_1._mask_sensitive_data("md:whodunnit?motherduck_token=short") - == "md:whodunnit?motherduck_token=*****" + == "md:whodunnit?motherduck_token=********" ) assert ( config_1._mask_sensitive_data("md:whodunnit?motherduck_token=longtoken123456789") - == "md:whodunnit?motherduck_token=******************" + == "md:whodunnit?motherduck_token=********" ) assert ( config_1._mask_sensitive_data("md:whodunnit?motherduck_token=") @@ -988,25 +988,25 @@ def test_motherduck_token_mask(make_config): config_1._mask_sensitive_data( "postgres:dbname=mydb user=myuser password=secret123 host=localhost" ) - == "postgres:dbname=mydb user=myuser password=********* host=localhost" + == "postgres:dbname=mydb user=myuser password=******** host=localhost" ) assert ( config_1._mask_sensitive_data( "dbname=postgres user=postgres password=pg_secret host=127.0.0.1" ) - == "dbname=postgres user=postgres password=********* host=127.0.0.1" + == "dbname=postgres user=postgres password=******** host=127.0.0.1" ) assert ( config_1._mask_sensitive_data( "postgres:dbname=testdb password=verylongpassword123 user=admin" ) - == "postgres:dbname=testdb password=******************* user=admin" + == "postgres:dbname=testdb password=******** user=admin" ) - assert config_1._mask_sensitive_data("postgres:password=short") == "postgres:password=*****" + assert config_1._mask_sensitive_data("postgres:password=short") == "postgres:password=********" assert ( config_1._mask_sensitive_data("postgres:host=localhost password=p@ssw0rd! dbname=db") - == "postgres:host=localhost password=********* dbname=db" + == "postgres:host=localhost password=******** dbname=db" ) assert ( @@ -1016,7 +1016,7 @@ def test_motherduck_token_mask(make_config): assert ( config_1._mask_sensitive_data("md:db?motherduck_token=token123 postgres:password=secret") - == "md:db?motherduck_token=******** postgres:password=******" + == "md:db?motherduck_token=******** postgres:password=********" ) # MySQL format