From b1e3086bfddf1ee494571f3fff7c0f07f4f2501f Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 16 May 2025 12:46:04 +0300 Subject: [PATCH 1/2] Feat: Introduce runtime check for identifier limits per engine --- sqlmesh/core/engine_adapter/base.py | 31 ++++++++++++++----- sqlmesh/core/engine_adapter/mysql.py | 1 + sqlmesh/core/engine_adapter/postgres.py | 1 + .../integration/test_integration.py | 14 +++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index 8bdc3147d1..86efb4530a 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -106,6 +106,7 @@ class EngineAdapter: SUPPORTS_REPLACE_TABLE = True DEFAULT_CATALOG_TYPE = DIALECT QUOTE_IDENTIFIERS_IN_VIEWS = True + MAX_IDENTIFIER_LENGTH: t.Optional[int] = None def __init__( self, @@ -2138,14 +2139,12 @@ def execute( ) with self.transaction(): for e in ensure_list(expressions): - sql = t.cast( - str, - ( - self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs) - if isinstance(e, exp.Expression) - else e - ), - ) + if isinstance(e, exp.Expression): + self._check_identifier_length(e, check_only_ddl=True) + sql = self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs) + else: + sql = t.cast(str, e) + self._log_sql( sql, expression=e if isinstance(e, exp.Expression) else None, @@ -2516,6 +2515,22 @@ def ping(self) -> None: def _select_columns(cls, columns: t.Iterable[str]) -> exp.Select: return exp.select(*(exp.column(c, quoted=True) for c in columns)) + def _check_identifier_length( + self, expression: exp.Expression, check_only_ddl: bool = True + ) -> None: + if self.MAX_IDENTIFIER_LENGTH is None or ( + check_only_ddl and not isinstance(expression, exp.DDL) + ): + return + + for identifier in expression.find_all(exp.Identifier): + name = identifier.name + name_length = len(name) + if name_length > self.MAX_IDENTIFIER_LENGTH: + raise SQLMeshError( + f"Identifier name {name} (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters" + ) + class EngineAdapterWithIndexSupport(EngineAdapter): SUPPORTS_INDEXES = True diff --git a/sqlmesh/core/engine_adapter/mysql.py b/sqlmesh/core/engine_adapter/mysql.py index 8fb3b8fcee..298dc18903 100644 --- a/sqlmesh/core/engine_adapter/mysql.py +++ b/sqlmesh/core/engine_adapter/mysql.py @@ -39,6 +39,7 @@ class MySQLEngineAdapter( MAX_TABLE_COMMENT_LENGTH = 2048 MAX_COLUMN_COMMENT_LENGTH = 1024 SUPPORTS_REPLACE_TABLE = False + MAX_IDENTIFIER_LENGTH = 64 SCHEMA_DIFFER = SchemaDiffer( parameterized_type_defaults={ exp.DataType.build("BIT", dialect=DIALECT).this: [(1,)], diff --git a/sqlmesh/core/engine_adapter/postgres.py b/sqlmesh/core/engine_adapter/postgres.py index eac239ff39..bd7faef289 100644 --- a/sqlmesh/core/engine_adapter/postgres.py +++ b/sqlmesh/core/engine_adapter/postgres.py @@ -34,6 +34,7 @@ class PostgresEngineAdapter( HAS_VIEW_BINDING = True CURRENT_CATALOG_EXPRESSION = exp.column("current_catalog") SUPPORTS_REPLACE_TABLE = False + MAX_IDENTIFIER_LENGTH = 63 SCHEMA_DIFFER = SchemaDiffer( parameterized_type_defaults={ # DECIMAL without precision is "up to 131072 digits before the decimal point; up to 16383 digits after the decimal point" diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 89a6e2acae..95f13c912a 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -2652,3 +2652,17 @@ def execute( {"id": 1, "name": "foo"} if ctx.dialect != "snowflake" else {"ID": 1, "NAME": "foo"} ) assert df.iloc[0].to_dict() == expected_result + + +def test_identifier_length_limit(ctx: TestContext): + adapter = ctx.engine_adapter + if adapter.MAX_IDENTIFIER_LENGTH is None: + pytest.skip(f"Engine {adapter.dialect} does not have identifier length limits set.") + + long_table_name = "a" * (adapter.MAX_IDENTIFIER_LENGTH + 1) + + with pytest.raises( + SQLMeshError, + match=f"Identifier name {long_table_name} (length {len(long_table_name)}) exceeds {adapter.dialect.capitalize()}'s max identifier limit of {adapter.MAX_IDENTIFIER_LENGTH} characters", + ): + adapter.create_table(long_table_name, {"col": exp.DataType.build("int")}) From 3e06f0a4fbebc59c2daba2036f9010a8214797e2 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 16 May 2025 18:14:01 +0300 Subject: [PATCH 2/2] PR Feedback 1 --- sqlmesh/core/engine_adapter/base.py | 12 ++++-------- sqlmesh/core/engine_adapter/risingwave.py | 1 + .../engine_adapter/integration/test_integration.py | 6 +++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index 86efb4530a..5586418c79 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -2140,7 +2140,7 @@ def execute( with self.transaction(): for e in ensure_list(expressions): if isinstance(e, exp.Expression): - self._check_identifier_length(e, check_only_ddl=True) + self._check_identifier_length(e) sql = self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs) else: sql = t.cast(str, e) @@ -2515,12 +2515,8 @@ def ping(self) -> None: def _select_columns(cls, columns: t.Iterable[str]) -> exp.Select: return exp.select(*(exp.column(c, quoted=True) for c in columns)) - def _check_identifier_length( - self, expression: exp.Expression, check_only_ddl: bool = True - ) -> None: - if self.MAX_IDENTIFIER_LENGTH is None or ( - check_only_ddl and not isinstance(expression, exp.DDL) - ): + def _check_identifier_length(self, expression: exp.Expression) -> None: + if self.MAX_IDENTIFIER_LENGTH is None or not isinstance(expression, exp.DDL): return for identifier in expression.find_all(exp.Identifier): @@ -2528,7 +2524,7 @@ def _check_identifier_length( name_length = len(name) if name_length > self.MAX_IDENTIFIER_LENGTH: raise SQLMeshError( - f"Identifier name {name} (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters" + f"Identifier name '{name}' (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters" ) diff --git a/sqlmesh/core/engine_adapter/risingwave.py b/sqlmesh/core/engine_adapter/risingwave.py index c55d709ef3..f32ce2f457 100644 --- a/sqlmesh/core/engine_adapter/risingwave.py +++ b/sqlmesh/core/engine_adapter/risingwave.py @@ -30,6 +30,7 @@ class RisingwaveEngineAdapter(PostgresEngineAdapter): COMMENT_CREATION_VIEW = CommentCreationView.UNSUPPORTED SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_TRANSACTIONS = False + MAX_IDENTIFIER_LENGTH = None def _truncate_table(self, table_name: TableName) -> None: return self.execute(exp.Delete(this=exp.to_table(table_name))) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 95f13c912a..67aa8d6ad4 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -3,6 +3,7 @@ import os import pathlib +import re import sys import typing as t import shutil @@ -1571,7 +1572,6 @@ def test_init_project(ctx: TestContext, tmp_path_factory: pytest.TempPathFactory # normalize object names for snowflake if ctx.dialect == "snowflake": - import re def _normalize_snowflake(name: str, prefix_regex: str = "(sqlmesh__)(.*)"): match = re.search(prefix_regex, name) @@ -1789,7 +1789,6 @@ def test_to_time_column( # Clickhouse does not have natively timezone-aware types and does not accept timestrings # with UTC offset "+XX:XX". Therefore, we remove the timezone offset and set a timezone- # specific data type to validate what is returned. - import re time_column = re.match(r"^(.*?)\+", time_column).group(1) time_column_type = exp.DataType.build("TIMESTAMP('UTC')", dialect="clickhouse") @@ -2661,8 +2660,9 @@ def test_identifier_length_limit(ctx: TestContext): long_table_name = "a" * (adapter.MAX_IDENTIFIER_LENGTH + 1) + match = f"Identifier name '{long_table_name}' (length {len(long_table_name)}) exceeds {adapter.dialect.capitalize()}'s max identifier limit of {adapter.MAX_IDENTIFIER_LENGTH} characters" with pytest.raises( SQLMeshError, - match=f"Identifier name {long_table_name} (length {len(long_table_name)}) exceeds {adapter.dialect.capitalize()}'s max identifier limit of {adapter.MAX_IDENTIFIER_LENGTH} characters", + match=re.escape(match), ): adapter.create_table(long_table_name, {"col": exp.DataType.build("int")})