diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index dbda66614e..8341f8466f 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -58,6 +58,7 @@ "clickhouse", } MOTHERDUCK_TOKEN_REGEX = re.compile(r"(\?|\&)(motherduck_token=)(\S*)") +PASSWORD_REGEX = re.compile(r"(password=)(\S+)") def _get_engine_import_validator( @@ -479,13 +480,13 @@ def create_engine_adapter( adapter = BaseDuckDBConnectionConfig._data_file_to_adapter.get(key) if adapter is not None: logger.info( - f"Using existing DuckDB adapter due to overlapping data file: {self._mask_motherduck_token(key)}" + f"Using existing DuckDB adapter due to overlapping data file: {self._mask_sensitive_data(key)}" ) return adapter if data_files: masked_files = { - self._mask_motherduck_token(file if isinstance(file, str) else file.path) + self._mask_sensitive_data(file if isinstance(file, str) else file.path) for file in data_files } logger.info(f"Creating new DuckDB adapter for data files: {masked_files}") @@ -507,10 +508,14 @@ def get_catalog(self) -> t.Optional[str]: return list(self.catalogs)[0] return None - def _mask_motherduck_token(self, string: str) -> str: - return MOTHERDUCK_TOKEN_REGEX.sub( - lambda m: f"{m.group(1)}{m.group(2)}{'*' * len(m.group(3))}", string + def _mask_sensitive_data(self, string: str) -> str: + # Mask MotherDuck tokens with fixed number of asterisks + result = MOTHERDUCK_TOKEN_REGEX.sub( + lambda m: f"{m.group(1)}{m.group(2)}{'*' * 8 if m.group(3) else ''}", string ) + # Mask PostgreSQL/MySQL passwords with fixed number of asterisks + result = PASSWORD_REGEX.sub(lambda m: f"{m.group(1)}{'*' * 8}", result) + return result class MotherDuckConnectionConfig(BaseDuckDBConnectionConfig): diff --git a/tests/core/test_connection_config.py b/tests/core/test_connection_config.py index 4e71e18148..4e1397b7f1 100644 --- a/tests/core/test_connection_config.py +++ b/tests/core/test_connection_config.py @@ -944,42 +944,86 @@ def test_motherduck_token_mask(make_config): assert isinstance(config_1, MotherDuckConnectionConfig) assert isinstance(config_2, MotherDuckConnectionConfig) assert isinstance(config_3, MotherDuckConnectionConfig) - assert config_1._mask_motherduck_token(config_1.database) == "whodunnit" + + # motherduck format + assert config_1._mask_sensitive_data(config_1.database) == "whodunnit" assert ( - config_1._mask_motherduck_token(f"md:{config_1.database}?motherduck_token={config_1.token}") - == "md:whodunnit?motherduck_token=*****" + config_1._mask_sensitive_data(f"md:{config_1.database}?motherduck_token={config_1.token}") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token( + config_1._mask_sensitive_data( f"md:{config_1.database}?attach_mode=single&motherduck_token={config_1.token}" ) - == "md:whodunnit?attach_mode=single&motherduck_token=*****" + == "md:whodunnit?attach_mode=single&motherduck_token=********" ) assert ( - config_2._mask_motherduck_token(f"md:{config_2.database}?motherduck_token={config_2.token}") - == "md:whodunnit?motherduck_token=******************" + config_2._mask_sensitive_data(f"md:{config_2.database}?motherduck_token={config_2.token}") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_3._mask_motherduck_token(f"md:?motherduck_token={config_3.token}") - == "md:?motherduck_token=**********" + config_3._mask_sensitive_data(f"md:?motherduck_token={config_3.token}") + == "md:?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("?motherduck_token=secret1235") - == "?motherduck_token=**********" + config_1._mask_sensitive_data("?motherduck_token=secret1235") + == "?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=short") - == "md:whodunnit?motherduck_token=*****" + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=short") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=longtoken123456789") - == "md:whodunnit?motherduck_token=******************" + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=longtoken123456789") + == "md:whodunnit?motherduck_token=********" ) assert ( - config_1._mask_motherduck_token("md:whodunnit?motherduck_token=") + config_1._mask_sensitive_data("md:whodunnit?motherduck_token=") == "md:whodunnit?motherduck_token=" ) - assert config_1._mask_motherduck_token(":memory:") == ":memory:" + assert config_1._mask_sensitive_data(":memory:") == ":memory:" + + # postgres format + assert ( + config_1._mask_sensitive_data( + "postgres:dbname=mydb user=myuser password=secret123 host=localhost" + ) + == "postgres:dbname=mydb user=myuser password=******** host=localhost" + ) + + assert ( + config_1._mask_sensitive_data( + "dbname=postgres user=postgres password=pg_secret host=127.0.0.1" + ) + == "dbname=postgres user=postgres password=******** host=127.0.0.1" + ) + assert ( + config_1._mask_sensitive_data( + "postgres:dbname=testdb password=verylongpassword123 user=admin" + ) + == "postgres:dbname=testdb password=******** user=admin" + ) + assert config_1._mask_sensitive_data("postgres:password=short") == "postgres:password=********" + assert ( + config_1._mask_sensitive_data("postgres:host=localhost password=p@ssw0rd! dbname=db") + == "postgres:host=localhost password=******** dbname=db" + ) + + assert ( + config_1._mask_sensitive_data("postgres:dbname=mydb user=myuser host=localhost") + == "postgres:dbname=mydb user=myuser host=localhost" + ) + + assert ( + config_1._mask_sensitive_data("md:db?motherduck_token=token123 postgres:password=secret") + == "md:db?motherduck_token=******** postgres:password=********" + ) + + # MySQL format + assert ( + config_1._mask_sensitive_data("host=localhost user=root password=mysql123 database=mydb") + == "host=localhost user=root password=******** database=mydb" + ) def test_bigquery(make_config):