diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py index 8bdc3147d1..5586418c79 100644 --- a/sqlmesh/core/engine_adapter/base.py +++ b/sqlmesh/core/engine_adapter/base.py @@ -106,6 +106,7 @@ class EngineAdapter: SUPPORTS_REPLACE_TABLE = True DEFAULT_CATALOG_TYPE = DIALECT QUOTE_IDENTIFIERS_IN_VIEWS = True + MAX_IDENTIFIER_LENGTH: t.Optional[int] = None def __init__( self, @@ -2138,14 +2139,12 @@ def execute( ) with self.transaction(): for e in ensure_list(expressions): - sql = t.cast( - str, - ( - self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs) - if isinstance(e, exp.Expression) - else e - ), - ) + if isinstance(e, exp.Expression): + self._check_identifier_length(e) + sql = self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs) + else: + sql = t.cast(str, e) + self._log_sql( sql, expression=e if isinstance(e, exp.Expression) else None, @@ -2516,6 +2515,18 @@ def ping(self) -> None: def _select_columns(cls, columns: t.Iterable[str]) -> exp.Select: return exp.select(*(exp.column(c, quoted=True) for c in columns)) + def _check_identifier_length(self, expression: exp.Expression) -> None: + if self.MAX_IDENTIFIER_LENGTH is None or not isinstance(expression, exp.DDL): + return + + for identifier in expression.find_all(exp.Identifier): + name = identifier.name + name_length = len(name) + if name_length > self.MAX_IDENTIFIER_LENGTH: + raise SQLMeshError( + f"Identifier name '{name}' (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters" + ) + class EngineAdapterWithIndexSupport(EngineAdapter): SUPPORTS_INDEXES = True diff --git a/sqlmesh/core/engine_adapter/mysql.py b/sqlmesh/core/engine_adapter/mysql.py index 8fb3b8fcee..298dc18903 100644 --- a/sqlmesh/core/engine_adapter/mysql.py +++ b/sqlmesh/core/engine_adapter/mysql.py @@ -39,6 +39,7 @@ class MySQLEngineAdapter( MAX_TABLE_COMMENT_LENGTH = 2048 MAX_COLUMN_COMMENT_LENGTH = 1024 SUPPORTS_REPLACE_TABLE = False + MAX_IDENTIFIER_LENGTH = 64 SCHEMA_DIFFER = SchemaDiffer( parameterized_type_defaults={ exp.DataType.build("BIT", dialect=DIALECT).this: [(1,)], diff --git a/sqlmesh/core/engine_adapter/postgres.py b/sqlmesh/core/engine_adapter/postgres.py index eac239ff39..bd7faef289 100644 --- a/sqlmesh/core/engine_adapter/postgres.py +++ b/sqlmesh/core/engine_adapter/postgres.py @@ -34,6 +34,7 @@ class PostgresEngineAdapter( HAS_VIEW_BINDING = True CURRENT_CATALOG_EXPRESSION = exp.column("current_catalog") SUPPORTS_REPLACE_TABLE = False + MAX_IDENTIFIER_LENGTH = 63 SCHEMA_DIFFER = SchemaDiffer( parameterized_type_defaults={ # DECIMAL without precision is "up to 131072 digits before the decimal point; up to 16383 digits after the decimal point" diff --git a/sqlmesh/core/engine_adapter/risingwave.py b/sqlmesh/core/engine_adapter/risingwave.py index c55d709ef3..f32ce2f457 100644 --- a/sqlmesh/core/engine_adapter/risingwave.py +++ b/sqlmesh/core/engine_adapter/risingwave.py @@ -30,6 +30,7 @@ class RisingwaveEngineAdapter(PostgresEngineAdapter): COMMENT_CREATION_VIEW = CommentCreationView.UNSUPPORTED SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_TRANSACTIONS = False + MAX_IDENTIFIER_LENGTH = None def _truncate_table(self, table_name: TableName) -> None: return self.execute(exp.Delete(this=exp.to_table(table_name))) diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 89a6e2acae..67aa8d6ad4 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -3,6 +3,7 @@ import os import pathlib +import re import sys import typing as t import shutil @@ -1571,7 +1572,6 @@ def test_init_project(ctx: TestContext, tmp_path_factory: pytest.TempPathFactory # normalize object names for snowflake if ctx.dialect == "snowflake": - import re def _normalize_snowflake(name: str, prefix_regex: str = "(sqlmesh__)(.*)"): match = re.search(prefix_regex, name) @@ -1789,7 +1789,6 @@ def test_to_time_column( # Clickhouse does not have natively timezone-aware types and does not accept timestrings # with UTC offset "+XX:XX". Therefore, we remove the timezone offset and set a timezone- # specific data type to validate what is returned. - import re time_column = re.match(r"^(.*?)\+", time_column).group(1) time_column_type = exp.DataType.build("TIMESTAMP('UTC')", dialect="clickhouse") @@ -2652,3 +2651,18 @@ def execute( {"id": 1, "name": "foo"} if ctx.dialect != "snowflake" else {"ID": 1, "NAME": "foo"} ) assert df.iloc[0].to_dict() == expected_result + + +def test_identifier_length_limit(ctx: TestContext): + adapter = ctx.engine_adapter + if adapter.MAX_IDENTIFIER_LENGTH is None: + pytest.skip(f"Engine {adapter.dialect} does not have identifier length limits set.") + + long_table_name = "a" * (adapter.MAX_IDENTIFIER_LENGTH + 1) + + match = f"Identifier name '{long_table_name}' (length {len(long_table_name)}) exceeds {adapter.dialect.capitalize()}'s max identifier limit of {adapter.MAX_IDENTIFIER_LENGTH} characters" + with pytest.raises( + SQLMeshError, + match=re.escape(match), + ): + adapter.create_table(long_table_name, {"col": exp.DataType.build("int")})