From fee780f641e72d7dc196829a8dd3cbeca0d8f5f4 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 17 Jul 2025 05:38:29 +0000 Subject: [PATCH 1/4] Feat: Allow some control of table naming at the physical layer --- docs/guides/configuration.md | 91 ++++++++++- docs/reference/configuration.md | 5 +- sqlmesh/core/config/__init__.py | 5 +- sqlmesh/core/config/common.py | 28 ++++ sqlmesh/core/config/root.py | 4 +- sqlmesh/core/context.py | 6 +- sqlmesh/core/snapshot/definition.py | 46 +++++- sqlmesh/utils/hashing.py | 4 +- tests/core/test_config.py | 29 ++++ tests/core/test_integration.py | 72 +++++++++ tests/core/test_snapshot.py | 229 +++++++++++++++++++++++++++- 11 files changed, 505 insertions(+), 14 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 52ebdf7793..069c14fe76 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -320,10 +320,14 @@ The cache directory is automatically created if it doesn't exist. You can clear SQLMesh creates schemas, physical tables, and views in the data warehouse/engine. Learn more about why and how SQLMesh creates schema in the ["Why does SQLMesh create schemas?" FAQ](../faq/faq.md#schema-question). -The default SQLMesh behavior described in the FAQ is appropriate for most deployments, but you can override where SQLMesh creates physical tables and views with the `physical_schema_mapping`, `environment_suffix_target`, and `environment_catalog_mapping` configuration options. These options are in the [environments](../reference/configuration.md#environments) section of the configuration reference page. +The default SQLMesh behavior described in the FAQ is appropriate for most deployments, but you can override *where* SQLMesh creates physical tables and views with the `physical_schema_mapping`, `environment_suffix_target`, and `environment_catalog_mapping` configuration options. + +You can also override *what* the physical tables are called by using the `physical_table_naming_convention` option. + +These options are in the [environments](../reference/configuration.md#environments) section of the configuration reference page. #### Physical table schemas -By default, SQLMesh creates physical tables for a model with a naming convention of `sqlmesh__[model schema]`. +By default, SQLMesh creates physical schemas for a model with a naming convention of `sqlmesh__[model schema]`. This can be overridden on a per-schema basis using the `physical_schema_mapping` option, which removes the `sqlmesh__` prefix and uses the [regex pattern](https://docs.python.org/3/library/re.html#regular-expression-syntax) you provide to map the schemas defined in your model to their corresponding physical schemas. @@ -436,6 +440,89 @@ Given the example of a model called `my_schema.users` with a default catalog of - Using `environment_suffix_target: catalog` only works on engines that support querying across different catalogs. If your engine does not support cross-catalog queries then you will need to use `environment_suffix_target: schema` or `environment_suffix_target: table` instead. - Automatic catalog creation is not supported on all engines even if they support cross-catalog queries. For engines where it is not supported, the catalogs must be managed externally from SQLMesh and exist prior to invoking SQLMesh. +#### Physical table naming convention + +Out of the box, SQLMesh has the following defaults set: + + - `environment_suffix_target: schema` + - `physical_table_naming_convention: schema_and_table` + +Given a catalog of `warehouse` and a model named `finance_mart.transaction_events_over_threshold`, this causes SQLMesh to create physical tables using the following convention: + +``` +# .sqlmesh__.____ + +warehouse.sqlmesh__finance_mart.finance_mart__transaction_events_over_threshold__ +``` + +This deliberately contains some redundancy with the *model* schema as it's repeated at the physical layer in both the physical schema name as well as the physical table name. + +##### Table only + +Some engines have object name length limitations which cause them to [silently truncate](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS) table and view names that exceed this limit. This behaviour breaks SQLMesh, so we raise a runtime error if we detect the engine would silently truncate the name of the table we are trying to create. + +Having redundancy in the physical table names does reduce the number of characters that can be utilised in model names. To increase the number of characters available to model names, you can use `physical_table_naming_convention` like so: + +=== "YAML" + + ```yaml linenums="1" + physical_table_naming_convention: table_only + ``` + +=== "Python" + + ```python linenums="1" + from sqlmesh.core.config import Config, ModelDefaultsConfig, TableNamingConvention + + config = Config( + model_defaults=ModelDefaultsConfig(dialect=), + physical_table_naming_convention=TableNamingConvention.TABLE_ONLY, + ) + ``` + +This will cause SQLMesh to omit the model schema from the table name and generate physical names that look like (using the above example): +``` +# .sqlmesh__.
__ + +warehouse.sqlmesh__finance_mart.transaction_events_over_threshold__ +``` + +Notice that the model schema name is no longer part of the physical table name. This allows for slightly longer model names on engines with low identifier length limits, which may be useful for your project. + +##### MD5 hash + +If you *still* need more characters, you can set `physical_table_naming_convention: hash_md5` like so: + +=== "YAML" + + ```yaml linenums="1" + physical_table_naming_convention: hash_md5 + ``` + +=== "Python" + + ```python linenums="1" + from sqlmesh.core.config import Config, ModelDefaultsConfig, TableNamingConvention + + config = Config( + model_defaults=ModelDefaultsConfig(dialect=), + physical_table_naming_convention=TableNamingConvention.HASH_MD5, + ) + ``` + +This will cause SQLMesh generate physical names that are always 45-50 characters in length and look something like: + +``` +# sqlmesh_md5__ + +sqlmesh_md5__d3b07384d113edec49eaa6238ad5ff00 + +# or, for a dev preview +sqlmesh_md5__d3b07384d113edec49eaa6238ad5ff00__dev +``` + +This has a downside that now it's much more difficult to determine which table corresponds to which model by just looking at the database with a SQL client. However, the table names now have a predictable length so there are no longer any surprises with identfiers exceeding the max length at the physical layer. + #### Environment view catalogs By default, SQLMesh creates an environment view in the same [catalog](../concepts/glossary.md#catalog) as the physical table the view points to. The physical table's catalog is determined by either the catalog specified in the model name or the default catalog defined in the connection. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 40d0eeb26b..e44f650bf0 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -36,8 +36,9 @@ Configuration options for SQLMesh environment creation and promotion. | `physical_schema_override` | (Deprecated) Use `physical_schema_mapping` instead. A mapping from model schema names to names of schemas in which physical tables for the corresponding models will be placed. | dict[string, string] | N | | `physical_schema_mapping` | A mapping from regular expressions to names of schemas in which physical tables for the corresponding models [will be placed](../guides/configuration.md#physical-table-schemas). (Default physical schema name: `sqlmesh__[model schema]`) | dict[string, string] | N | | `environment_suffix_target` | Whether SQLMesh views should append their environment name to the `schema` or `table` - [additional details](../guides/configuration.md#view-schema-override). (Default: `schema`) | string | N | -| `gateway_managed_virtual_layer` | Whether SQLMesh views of the virtual layer will be created by the default gateway or model specified gateways - [additional details](../guides/multi_engine.md#gateway-managed-virtual-layer). (Default: False) | boolean | N | -| `infer_python_dependencies` | Whether SQLMesh will statically analyze Python code to automatically infer Python package requirements. (Default: True) | boolean | N | +| `physical_table_naming_convention`| Sets which parts of the model name are included in the physical table names. Options are `schema_and_table` or `table_only` - [additional details](../guides/configuration.md#physical-table-naming-convention). (Default: `schema_and_table`) | string | N | +| `gateway_managed_virtual_layer` | Whether SQLMesh views of the virtual layer will be created by the default gateway or model specified gateways - [additional details](../guides/multi_engine.md#gateway-managed-virtual-layer). (Default: False) | boolean | N | +| `infer_python_dependencies` | Whether SQLMesh will statically analyze Python code to automatically infer Python package requirements. (Default: True) | boolean | N | | `environment_catalog_mapping` | A mapping from regular expressions to catalog names. The catalog name is used to determine the target catalog for a given environment. | dict[string, string] | N | | `log_limit` | The default number of logs to keep (Default: `20`) | int | N | diff --git a/sqlmesh/core/config/__init__.py b/sqlmesh/core/config/__init__.py index af84818858..d8c7607d51 100644 --- a/sqlmesh/core/config/__init__.py +++ b/sqlmesh/core/config/__init__.py @@ -2,7 +2,10 @@ AutoCategorizationMode as AutoCategorizationMode, CategorizerConfig as CategorizerConfig, ) -from sqlmesh.core.config.common import EnvironmentSuffixTarget as EnvironmentSuffixTarget +from sqlmesh.core.config.common import ( + EnvironmentSuffixTarget as EnvironmentSuffixTarget, + TableNamingConvention as TableNamingConvention, +) from sqlmesh.core.config.connection import ( AthenaConnectionConfig as AthenaConnectionConfig, BaseDuckDBConnectionConfig as BaseDuckDBConnectionConfig, diff --git a/sqlmesh/core/config/common.py b/sqlmesh/core/config/common.py index d7be902713..770c1f5daf 100644 --- a/sqlmesh/core/config/common.py +++ b/sqlmesh/core/config/common.py @@ -49,6 +49,34 @@ def __repr__(self) -> str: return str(self) +class TableNamingConvention(str, Enum): + # Causes table names at the physical layer to follow the convention: + # ____ + SCHEMA_AND_TABLE = "schema_and_table" + + # Causes table names at the physical layer to follow the convention: + # __ + TABLE_ONLY = "table_only" + + # Takes the table name that would be returned from SCHEMA_AND_TABLE and wraps it in md5() + # to generate a hash and prefixes the has with `sqlmesh_md5__`, for the following reasons: + # - at a glance, you can still see it's managed by sqlmesh and that md5 was used to generate the hash + # - unquoted identifiers that start with numbers can trip up DB engine parsers, so having a text prefix prevents this + # This causes table names at the physical layer to follow the convention: + # sqlmesh_md5__3b07384d113edec49eaa6238ad5ff00d + HASH_MD5 = "hash_md5" + + @classproperty + def default(cls) -> TableNamingConvention: + return TableNamingConvention.SCHEMA_AND_TABLE + + def __str__(self) -> str: + return self.name + + def __repr__(self) -> str: + return str(self) + + def _concurrent_tasks_validator(v: t.Any) -> int: if isinstance(v, str): v = int(v) diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index cd92ff8467..6cedfbe9f6 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -14,7 +14,7 @@ from sqlmesh.cicd.config import CICDBotConfig from sqlmesh.core import constants as c from sqlmesh.core.console import get_console -from sqlmesh.core.config import EnvironmentSuffixTarget +from sqlmesh.core.config import EnvironmentSuffixTarget, TableNamingConvention from sqlmesh.core.config.base import BaseConfig, UpdateStrategy from sqlmesh.core.config.common import variables_validator, compile_regex_mapping from sqlmesh.core.config.connection import ( @@ -106,6 +106,7 @@ class Config(BaseConfig): model_defaults: Default values for model definitions. physical_schema_mapping: A mapping from regular expressions to names of schemas in which physical tables for corresponding models will be placed. environment_suffix_target: Indicates whether to append the environment name to the schema or table name. + physical_table_naming_convention: Indicates how tables should be named at the physical layer gateway_managed_virtual_layer: Whether the models' views in the virtual layer are created by the model-specific gateway rather than the default gateway. infer_python_dependencies: Whether to statically analyze Python code to automatically infer Python package requirements. environment_catalog_mapping: A mapping from regular expressions to catalog names. The catalog name is used to determine the target catalog for a given environment. @@ -147,6 +148,7 @@ class Config(BaseConfig): environment_suffix_target: EnvironmentSuffixTarget = Field( default=EnvironmentSuffixTarget.default ) + physical_table_naming_convention: t.Optional[TableNamingConvention] = None gateway_managed_virtual_layer: bool = False infer_python_dependencies: bool = True environment_catalog_mapping: RegexKeyDict = {} diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 1ba241f69f..5a0531209a 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -2904,9 +2904,11 @@ def _nodes_to_snapshots(self, nodes: t.Dict[str, Node]) -> t.Dict[str, Snapshot] fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} for node in nodes.values(): - kwargs = {} + kwargs: t.Dict[str, t.Any] = {} if node.project in self._projects: - kwargs["ttl"] = self.config_for_node(node).snapshot_ttl + config = self.config_for_node(node) + kwargs["ttl"] = config.snapshot_ttl + kwargs["table_naming_convention"] = config.physical_table_naming_convention snapshot = Snapshot.from_node( node, diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py index 1a284aadfd..6d8e25e7dc 100644 --- a/sqlmesh/core/snapshot/definition.py +++ b/sqlmesh/core/snapshot/definition.py @@ -13,6 +13,7 @@ from sqlglot import exp from sqlglot.optimizer.normalize_identifiers import normalize_identifiers +from sqlmesh.core.config import TableNamingConvention from sqlmesh.core import constants as c from sqlmesh.core.audit import StandaloneAudit from sqlmesh.core.environment import EnvironmentSuffixTarget @@ -44,7 +45,7 @@ format_evaluated_code_exception, Executable, ) -from sqlmesh.utils.hashing import hash_data +from sqlmesh.utils.hashing import hash_data, md5 from sqlmesh.utils.pydantic import PydanticModel, field_validator if t.TYPE_CHECKING: @@ -333,6 +334,7 @@ class SnapshotInfoMixin(ModelKindMixin): # This can be removed from this model once Pydantic 1 support is dropped (must remain in `Snapshot` though) base_table_name_override: t.Optional[str] dev_table_suffix: str + table_naming_convention: t.Optional[TableNamingConvention] = None @cached_property def identifier(self) -> str: @@ -451,6 +453,7 @@ def _table_name(self, version: str, is_deployable: bool) -> str: version, catalog=self.fully_qualified_table.catalog, suffix=self.dev_table_suffix if is_dev_table else None, + naming_convention=self.table_naming_convention, ) @property @@ -580,6 +583,7 @@ class Snapshot(PydanticModel, SnapshotInfoMixin): migrated: Whether or not this snapshot has been created as a result of migration. unrestorable: Whether or not this snapshot can be used to revert its model to a previous version. next_auto_restatement_ts: The timestamp which indicates when is the next time this snapshot should be restated. + table_naming_convention: Convention to follow when generating the physical table name """ name: str @@ -605,6 +609,9 @@ class Snapshot(PydanticModel, SnapshotInfoMixin): base_table_name_override: t.Optional[str] = None next_auto_restatement_ts: t.Optional[int] = None dev_table_suffix: str = "dev" + table_naming_convention_: t.Optional[TableNamingConvention] = Field( + default=None, alias="table_naming_convention" + ) @field_validator("ttl") @classmethod @@ -656,6 +663,7 @@ def from_node( ttl: str = c.DEFAULT_SNAPSHOT_TTL, version: t.Optional[str] = None, cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None, + table_naming_convention: t.Optional[TableNamingConvention] = None, ) -> Snapshot: """Creates a new snapshot for a node. @@ -666,6 +674,7 @@ def from_node( ttl: A TTL to determine how long orphaned (snapshots that are not promoted anywhere) should live. version: The version that a snapshot is associated with. Usually set during the planning phase. cache: Cache of node name to fingerprints. + table_naming_convention: Convention to follow when generating the physical table name Returns: The newly created snapshot. @@ -697,6 +706,7 @@ def from_node( updated_ts=created_ts, ttl=ttl, version=version, + table_naming_convention=table_naming_convention, ) def __eq__(self, other: t.Any) -> bool: @@ -1206,6 +1216,7 @@ def table_info(self) -> SnapshotTableInfo: custom_materialization=custom_materialization, dev_table_suffix=self.dev_table_suffix, model_gateway=self.model_gateway, + table_naming_convention=self.table_naming_convention, # type: ignore ) @property @@ -1568,14 +1579,41 @@ def table_name( version: str, catalog: t.Optional[str] = None, suffix: t.Optional[str] = None, + naming_convention: t.Optional[TableNamingConvention] = None, ) -> str: table = exp.to_table(name) - # bigquery projects usually have "-" in them which is illegal in the table name, so we aggressively prune - name = "__".join(sanitize_name(part.name) for part in table.parts) + naming_convention = naming_convention or TableNamingConvention.default + + if naming_convention == TableNamingConvention.HASH_MD5: + # just take a MD5 hash of what we would have generated anyway using SCHEMA_AND_TABLE + value_to_hash = table_name( + physical_schema=physical_schema, + name=name, + version=version, + catalog=catalog, + suffix=suffix, + naming_convention=TableNamingConvention.SCHEMA_AND_TABLE, + ) + full_name = f"{c.SQLMESH}_md5__{md5(value_to_hash)}" + else: + # note: Snapshot._table_name() already strips the catalog from the model name before calling this function + # Therefore, a model with 3-part naming like "foo.bar.baz" gets passed as (name="bar.baz", catalog="foo") to this function + # This is why there is no TableNamingConvention.CATALOG_AND_SCHEMA_AND_TABLE + table_parts = table.parts + parts_to_consider = 2 if naming_convention == TableNamingConvention.SCHEMA_AND_TABLE else 1 + + # in case the parsed table name has less parts than what the naming convention says we should be considering + parts_to_consider = min(len(table_parts), parts_to_consider) + + # bigquery projects usually have "-" in them which is illegal in the table name, so we aggressively prune + name = "__".join(sanitize_name(part.name) for part in table_parts[-parts_to_consider:]) + + full_name = f"{name}__{version}" + suffix = f"__{suffix}" if suffix else "" - table.set("this", exp.to_identifier(f"{name}__{version}{suffix}")) + table.set("this", exp.to_identifier(f"{full_name}{suffix}")) table.set("db", exp.to_identifier(physical_schema)) if not table.catalog and catalog: table.set("catalog", exp.to_identifier(catalog)) diff --git a/sqlmesh/utils/hashing.py b/sqlmesh/utils/hashing.py index 1bccd987bc..a166d36bec 100644 --- a/sqlmesh/utils/hashing.py +++ b/sqlmesh/utils/hashing.py @@ -9,7 +9,9 @@ def crc32(data: t.Iterable[t.Optional[str]]) -> str: return str(zlib.crc32(_safe_concat(data))) -def md5(data: t.Iterable[t.Optional[str]]) -> str: +def md5(data: t.Union[str, t.Iterable[t.Optional[str]]]) -> str: + if isinstance(data, str): + data = [data] return hashlib.md5(_safe_concat(data)).hexdigest() diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 6c3eb6e361..854809e1de 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -3,6 +3,7 @@ import re from pathlib import Path from unittest import mock +import typing as t import pytest from pytest_mock import MockerFixture @@ -17,6 +18,7 @@ MotherDuckConnectionConfig, BuiltInSchedulerConfig, EnvironmentSuffixTarget, + TableNamingConvention, ) from sqlmesh.core.config.connection import DuckDBAttachOptions, RedshiftConnectionConfig from sqlmesh.core.config.feature_flag import DbtFeatureFlag, FeatureFlag @@ -1412,3 +1414,30 @@ def test_load_yaml_config_custom_dotenv_path(tmp_path_factory): default_gateway="test_gateway", model_defaults=ModelDefaultsConfig(dialect="postgres"), ) + + +@pytest.mark.parametrize( + "convention_str, expected", + [ + (None, None), + ("schema_and_table", TableNamingConvention.SCHEMA_AND_TABLE), + ("table_only", TableNamingConvention.TABLE_ONLY), + ("hash_md5", TableNamingConvention.HASH_MD5), + ], +) +def test_physical_table_naming_convention( + convention_str: t.Optional[str], expected: t.Optional[TableNamingConvention], tmp_path: Path +): + config_part = f"physical_table_naming_convention: {convention_str}" if convention_str else "" + (tmp_path / "config.yaml").write_text(f""" +gateways: + test_gateway: + connection: + type: duckdb +model_defaults: + dialect: duckdb +{config_part} + """) + + config = load_config_from_paths(Config, project_paths=[tmp_path / "config.yaml"]) + assert config.physical_table_naming_convention == expected diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index 0717ba11aa..ab6150ee4e 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -35,6 +35,7 @@ GatewayConfig, ModelDefaultsConfig, DuckDBConnectionConfig, + TableNamingConvention, ) from sqlmesh.core.config.common import EnvironmentSuffixTarget from sqlmesh.core.console import Console, get_console @@ -7115,3 +7116,74 @@ def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): gathered_gateways = context.engine_adapters.keys() expected_gateways = {"local", "memory", "extra"} assert gathered_gateways == expected_gateways + + +def test_physical_table_naming_strategy_table_only(copy_to_temp_path: t.Callable): + sushi_context = Context( + paths=copy_to_temp_path("examples/sushi"), + config=Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(), + physical_table_naming_convention=TableNamingConvention.TABLE_ONLY, + ), + ) + + assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.TABLE_ONLY + sushi_context.plan(auto_apply=True) + + adapter = sushi_context.engine_adapter + + snapshot_tables = [ + dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) + for r in adapter.fetchall( + "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" + ) + ] + + assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) + + prod_env = sushi_context.state_reader.get_environment("prod") + assert prod_env + + prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) + + assert all( + s.table_naming_convention == TableNamingConvention.TABLE_ONLY + for s in prod_env_snapshots.values() + ) + + +def test_physical_table_naming_strategy_hash_md5(copy_to_temp_path: t.Callable): + sushi_context = Context( + paths=copy_to_temp_path("examples/sushi"), + config=Config( + model_defaults=ModelDefaultsConfig(dialect="duckdb"), + default_connection=DuckDBConnectionConfig(), + physical_table_naming_convention=TableNamingConvention.HASH_MD5, + ), + ) + + assert sushi_context.config.physical_table_naming_convention == TableNamingConvention.HASH_MD5 + sushi_context.plan(auto_apply=True) + + adapter = sushi_context.engine_adapter + + snapshot_tables = [ + dict(catalog=str(r[0]), schema=str(r[1]), table=str(r[2])) + for r in adapter.fetchall( + "select table_catalog, table_schema, table_name from information_schema.tables where table_type='BASE TABLE'" + ) + ] + + assert all([not t["table"].startswith("sushi") for t in snapshot_tables]) + assert all([t["table"].startswith("sqlmesh_md5") for t in snapshot_tables]) + + prod_env = sushi_context.state_reader.get_environment("prod") + assert prod_env + + prod_env_snapshots = sushi_context.state_reader.get_snapshots(prod_env.snapshots) + + assert all( + s.table_naming_convention == TableNamingConvention.HASH_MD5 + for s in prod_env_snapshots.values() + ) diff --git a/tests/core/test_snapshot.py b/tests/core/test_snapshot.py index ffaee9be74..66ba6613be 100644 --- a/tests/core/test_snapshot.py +++ b/tests/core/test_snapshot.py @@ -61,11 +61,14 @@ get_next_model_interval_start, check_ready_intervals, _contiguous_intervals, + table_name, + TableNamingConvention, ) from sqlmesh.utils import AttributeDict from sqlmesh.utils.date import DatetimeRanges, to_date, to_datetime, to_timestamp from sqlmesh.utils.errors import SQLMeshError, SignalEvalError from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroInfo +from sqlmesh.utils.hashing import md5 from sqlmesh.core.console import get_console @@ -1131,7 +1134,7 @@ def test_stamp(model: Model): assert original_fingerprint != stamped_fingerprint -def test_table_name(snapshot: Snapshot, make_snapshot: t.Callable): +def test_snapshot_table_name(snapshot: Snapshot, make_snapshot: t.Callable): # Mimic a direct breaking change. snapshot.fingerprint = SnapshotFingerprint( data_hash="1", metadata_hash="1", parent_data_hash="1" @@ -1186,6 +1189,59 @@ def test_table_name(snapshot: Snapshot, make_snapshot: t.Callable): ) +def test_table_name_naming_convention_table_only(make_snapshot: t.Callable[..., Snapshot]): + # 3-part naming + snapshot = make_snapshot( + SqlModel(name='"foo"."bar"."baz"', query=parse_one("select 1")), + table_naming_convention=TableNamingConvention.TABLE_ONLY, + ) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + assert snapshot.table_name(is_deployable=True) == f"foo.sqlmesh__bar.baz__{snapshot.version}" + assert ( + snapshot.table_name(is_deployable=False) == f"foo.sqlmesh__bar.baz__{snapshot.version}__dev" + ) + + # 2-part naming + snapshot = make_snapshot( + SqlModel(name='"foo"."bar"', query=parse_one("select 1")), + table_naming_convention=TableNamingConvention.TABLE_ONLY, + ) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + assert snapshot.table_name(is_deployable=True) == f"sqlmesh__foo.bar__{snapshot.version}" + assert snapshot.table_name(is_deployable=False) == f"sqlmesh__foo.bar__{snapshot.version}__dev" + + +def test_table_name_naming_convention_hash_md5(make_snapshot: t.Callable[..., Snapshot]): + # 3-part naming + snapshot = make_snapshot( + SqlModel(name='"foo"."bar"."baz"', query=parse_one("select 1")), + table_naming_convention=TableNamingConvention.HASH_MD5, + ) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + hash = md5(f"foo.sqlmesh__bar.bar__baz__{snapshot.version}") + assert snapshot.table_name(is_deployable=True) == f"foo.sqlmesh__bar.sqlmesh_md5__{hash}" + hash_dev = md5(f"foo.sqlmesh__bar.bar__baz__{snapshot.version}__dev") + assert ( + snapshot.table_name(is_deployable=False) == f"foo.sqlmesh__bar.sqlmesh_md5__{hash_dev}__dev" + ) + + # 2-part naming + snapshot = make_snapshot( + SqlModel(name='"foo"."bar"', query=parse_one("select 1")), + table_naming_convention=TableNamingConvention.HASH_MD5, + ) + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + hash = md5(f"sqlmesh__foo.foo__bar__{snapshot.version}") + assert snapshot.table_name(is_deployable=True) == f"sqlmesh__foo.sqlmesh_md5__{hash}" + + hash_dev = md5(f"sqlmesh__foo.foo__bar__{snapshot.version}__dev") + assert snapshot.table_name(is_deployable=False) == f"sqlmesh__foo.sqlmesh_md5__{hash_dev}__dev" + + def test_table_name_view(make_snapshot: t.Callable): # Mimic a direct breaking change. snapshot = make_snapshot(SqlModel(name="name", query=parse_one("select 1"), kind="VIEW")) @@ -2133,6 +2189,177 @@ def test_deployability_index_missing_parent(make_snapshot): assert not deplyability_index.is_deployable(snapshot_a) +@pytest.mark.parametrize( + "call_kwargs, expected", + [ + ######################################## + # TableNamingConvention.SCHEMA_AND_TABLE + ( + dict(physical_schema="sqlmesh__foo", name="bar", version="1234"), + "sqlmesh__foo.bar__1234", + ), + ( + dict(physical_schema="sqlmesh__foo", name="foo.bar", version="1234"), + "sqlmesh__foo.foo__bar__1234", + ), + ( + dict(physical_schema="sqlmesh__foo", name="bar", version="1234", catalog="foo"), + "foo.sqlmesh__foo.bar__1234", + ), + ( + dict(physical_schema="sqlmesh__foo", name="bar.baz", version="1234", catalog="foo"), + "foo.sqlmesh__foo.bar__baz__1234", + ), + ( + dict(physical_schema="sqlmesh__foo", name="bar.baz", version="1234", suffix="dev"), + "sqlmesh__foo.bar__baz__1234__dev", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="bar.baz", + version="1234", + catalog="foo", + suffix="dev", + ), + "foo.sqlmesh__foo.bar__baz__1234__dev", + ), + ################################## + # TableNamingConvention.TABLE_ONLY + ( + dict( + physical_schema="sqlmesh__foo", + name="bar", + version="1234", + naming_convention=TableNamingConvention.TABLE_ONLY, + ), + "sqlmesh__foo.bar__1234", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="foo.bar", + version="1234", + naming_convention=TableNamingConvention.TABLE_ONLY, + ), + "sqlmesh__foo.bar__1234", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="bar", + version="1234", + catalog="foo", + naming_convention=TableNamingConvention.TABLE_ONLY, + ), + "foo.sqlmesh__foo.bar__1234", + ), + ( + dict( + physical_schema="sqlmesh__bar", + name="bar.baz", + version="1234", + catalog="foo", + naming_convention=TableNamingConvention.TABLE_ONLY, + ), + "foo.sqlmesh__bar.baz__1234", + ), + ( + dict( + physical_schema="sqlmesh__bar", + name="bar.baz", + version="1234", + suffix="dev", + naming_convention=TableNamingConvention.TABLE_ONLY, + ), + "sqlmesh__bar.baz__1234__dev", + ), + ( + dict( + physical_schema="sqlmesh__bar", + name="bar.baz", + version="1234", + catalog="foo", + suffix="dev", + naming_convention=TableNamingConvention.TABLE_ONLY, + ), + "foo.sqlmesh__bar.baz__1234__dev", + ), + ################################# + # TableNamingConvention.HASH_MD5 + ( + dict( + physical_schema="sqlmesh__foo", + name="bar", + version="1234", + naming_convention=TableNamingConvention.HASH_MD5, + ), + f"sqlmesh__foo.sqlmesh_md5__{md5('sqlmesh__foo.bar__1234')}", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="foo.bar", + version="1234", + naming_convention=TableNamingConvention.HASH_MD5, + ), + f"sqlmesh__foo.sqlmesh_md5__{md5('sqlmesh__foo.foo__bar__1234')}", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="bar", + version="1234", + catalog="foo", + naming_convention=TableNamingConvention.HASH_MD5, + ), + f"foo.sqlmesh__foo.sqlmesh_md5__{md5('foo.sqlmesh__foo.bar__1234')}", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="bar.baz", + version="1234", + catalog="foo", + naming_convention=TableNamingConvention.HASH_MD5, + ), + f"foo.sqlmesh__foo.sqlmesh_md5__{md5('foo.sqlmesh__foo.bar__baz__1234')}", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="bar.baz", + version="1234", + suffix="dev", + naming_convention=TableNamingConvention.HASH_MD5, + ), + f"sqlmesh__foo.sqlmesh_md5__{md5('sqlmesh__foo.bar__baz__1234__dev')}__dev", + ), + ( + dict( + physical_schema="sqlmesh__foo", + name="bar.baz", + version="1234", + catalog="foo", + suffix="dev", + naming_convention=TableNamingConvention.HASH_MD5, + ), + f"foo.sqlmesh__foo.sqlmesh_md5__{md5('foo.sqlmesh__foo.bar__baz__1234__dev')}__dev", + ), + ], +) +def test_table_name(call_kwargs: t.Dict[str, t.Any], expected: str): + """ + physical_schema: str + name: str + version: str + catalog: t.Optional[str] + suffix: t.Optional[str] + naming_convention: t.Optional[TableNamingConvention] + """ + assert table_name(**call_kwargs) == expected + + @pytest.mark.parametrize( "model_name, environment_naming_info, default_catalog, dialect, expected", ( From c194506944336af44bd320342bf726179a1ea281 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 23 Jul 2025 21:05:27 +0000 Subject: [PATCH 2/4] docs feedback --- docs/guides/configuration.md | 19 +++++++++++++++++-- tests/core/test_integration.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 069c14fe76..6e14d1f605 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -446,8 +446,9 @@ Out of the box, SQLMesh has the following defaults set: - `environment_suffix_target: schema` - `physical_table_naming_convention: schema_and_table` + - no `physical_schema_mapping` overrides, so a `sqlmesh__` physical schema will be created for each model schema -Given a catalog of `warehouse` and a model named `finance_mart.transaction_events_over_threshold`, this causes SQLMesh to create physical tables using the following convention: +This means that given a catalog of `warehouse` and a model named `finance_mart.transaction_events_over_threshold`, SQLMesh will create physical tables using the following convention: ``` # .sqlmesh__.__
__ @@ -457,6 +458,8 @@ warehouse.sqlmesh__finance_mart.finance_mart__transaction_events_over_threshold_ This deliberately contains some redundancy with the *model* schema as it's repeated at the physical layer in both the physical schema name as well as the physical table name. +This default exists to make the physical table names portable between different configurations. If you were to define a `physical_schema_mapping` that maps all models to the same physical schema, since the model schema is included in the table name as well, there are no naming conflicts. + ##### Table only Some engines have object name length limitations which cause them to [silently truncate](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS) table and view names that exceed this limit. This behaviour breaks SQLMesh, so we raise a runtime error if we detect the engine would silently truncate the name of the table we are trying to create. @@ -489,6 +492,18 @@ warehouse.sqlmesh__finance_mart.transaction_events_over_threshold__ Notice that the model schema name is no longer part of the physical table name. This allows for slightly longer model names on engines with low identifier length limits, which may be useful for your project. +In this configuration, it is your responsibility to ensure that any schema overrides in `physical_schema_mapping` result in each model schema getting mapped to a unique physical schema. + +For example, the following configuration will cause **data corruption**: + +```yaml +physical_table_naming_convention: table_only +physical_schema_mapping: + '.*': sqlmesh +``` + +This is because every model schema is mapped to the same physical schema but the model schema name is omitted from the physical table name. + ##### MD5 hash If you *still* need more characters, you can set `physical_table_naming_convention: hash_md5` like so: @@ -521,7 +536,7 @@ sqlmesh_md5__d3b07384d113edec49eaa6238ad5ff00 sqlmesh_md5__d3b07384d113edec49eaa6238ad5ff00__dev ``` -This has a downside that now it's much more difficult to determine which table corresponds to which model by just looking at the database with a SQL client. However, the table names now have a predictable length so there are no longer any surprises with identfiers exceeding the max length at the physical layer. +This has a downside that now it's much more difficult to determine which table corresponds to which model by just looking at the database with a SQL client. However, the table names have a predictable length so there are no longer any surprises with identfiers exceeding the max length at the physical layer. #### Environment view catalogs diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index ab6150ee4e..5bc7b44cd4 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -7117,7 +7117,7 @@ def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path): expected_gateways = {"local", "memory", "extra"} assert gathered_gateways == expected_gateways - + def test_physical_table_naming_strategy_table_only(copy_to_temp_path: t.Callable): sushi_context = Context( paths=copy_to_temp_path("examples/sushi"), From 366f3fe8dce87902f1f6de7a75d7abba0424645e Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Fri, 25 Jul 2025 00:01:06 +0000 Subject: [PATCH 3/4] Update docs --- docs/reference/configuration.md | 40 +++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index e44f650bf0..06aed36b53 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -16,33 +16,44 @@ This section describes the other root level configuration parameters. Configuration options for SQLMesh project directories. -| Option | Description | Type | Required | -| ------------------ | ------------------------------------------------------------------------------------------------------------------ | :----------: | :------: | -| `ignore_patterns` | Files that match glob patterns specified in this list are ignored when scanning the project folder (Default: `[]`) | list[string] | N | -| `project` | The project name of this config. Used for [multi-repo setups](../guides/multi_repo.md). | string | N | +| Option | Description | Type | Required | +| ------------------ | --------------------------------------------------------------------------------------------------------------------------- | :----------: | :------: | +| `ignore_patterns` | Files that match glob patterns specified in this list are ignored when scanning the project folder (Default: `[]`) | list[string] | N | +| `project` | The project name of this config. Used for [multi-repo setups](../guides/multi_repo.md). | string | N | | `cache_dir` | The directory to store the SQLMesh cache. Can be an absolute path or relative to the project directory. (Default: `.cache`) | string | N | +| `log_limit` | The default number of historical log files to keep (Default: `20`) | int | N | -### Environments +### Database (Physical Layer) -Configuration options for SQLMesh environment creation and promotion. +Configuration options for how SQLMesh manages database objects in the [physical layer](../concepts/glossary.md#physical-layer). | Option | Description | Type | Required | |-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------:|:--------:| | `snapshot_ttl` | The period of time that a model snapshot not a part of any environment should exist before being deleted. This is defined as a string with the default `in 1 week`. Other [relative dates](https://dateparser.readthedocs.io/en/latest/) can be used, such as `in 30 days`. (Default: `in 1 week`) | string | N | +| `physical_schema_override` | (Deprecated) Use `physical_schema_mapping` instead. A mapping from model schema names to names of schemas in which physical tables for the corresponding models will be placed. | dict[string, string] | N | +| `physical_schema_mapping` | A mapping from regular expressions to names of schemas in which physical tables for the corresponding models [will be placed](../guides/configuration.md#physical-table-schemas). (Default physical schema name: `sqlmesh__[model schema]`) | dict[string, string] | N | +| `physical_table_naming_convention`| Sets which parts of the model name are included in the physical table names. Options are `schema_and_table`, `table_only` or `hash_md5` - [additional details](../guides/configuration.md#physical-table-naming-convention). (Default: `schema_and_table`) | string | N | + +### Environments (Virtual Layer) + +Configuration options for how SQLMesh manages environment creation and promotion in the [virtual layer](../concepts/glossary.md#virtual-layer). + +| Option | Description | Type | Required | +|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------:|:--------:| | `environment_ttl` | The period of time that a development environment should exist before being deleted. This is defined as a string with the default `in 1 week`. Other [relative dates](https://dateparser.readthedocs.io/en/latest/) can be used, such as `in 30 days`. (Default: `in 1 week`) | string | N | | `pinned_environments` | The list of development environments that are exempt from deletion due to expiration | list[string] | N | -| `time_column_format` | The default format to use for all model time columns. This time format uses [python format codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) (Default: `%Y-%m-%d`) | string | N | | `default_target_environment` | The name of the environment that will be the default target for the `sqlmesh plan` and `sqlmesh run` commands. (Default: `prod`) | string | N | -| `physical_schema_override` | (Deprecated) Use `physical_schema_mapping` instead. A mapping from model schema names to names of schemas in which physical tables for the corresponding models will be placed. | dict[string, string] | N | -| `physical_schema_mapping` | A mapping from regular expressions to names of schemas in which physical tables for the corresponding models [will be placed](../guides/configuration.md#physical-table-schemas). (Default physical schema name: `sqlmesh__[model schema]`) | dict[string, string] | N | -| `environment_suffix_target` | Whether SQLMesh views should append their environment name to the `schema` or `table` - [additional details](../guides/configuration.md#view-schema-override). (Default: `schema`) | string | N | -| `physical_table_naming_convention`| Sets which parts of the model name are included in the physical table names. Options are `schema_and_table` or `table_only` - [additional details](../guides/configuration.md#physical-table-naming-convention). (Default: `schema_and_table`) | string | N | +| `environment_suffix_target` | Whether SQLMesh views should append their environment name to the `schema`, `table` or `catalog` - [additional details](../guides/configuration.md#view-schema-override). (Default: `schema`) | string | N | | `gateway_managed_virtual_layer` | Whether SQLMesh views of the virtual layer will be created by the default gateway or model specified gateways - [additional details](../guides/multi_engine.md#gateway-managed-virtual-layer). (Default: False) | boolean | N | -| `infer_python_dependencies` | Whether SQLMesh will statically analyze Python code to automatically infer Python package requirements. (Default: True) | boolean | N | | `environment_catalog_mapping` | A mapping from regular expressions to catalog names. The catalog name is used to determine the target catalog for a given environment. | dict[string, string] | N | -| `log_limit` | The default number of logs to keep (Default: `20`) | int | N | -### Model defaults +### Models + +| Option | Description | Type | Required | +|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------:|:--------:| +| `time_column_format` | The default format to use for all model time columns. This time format uses [python format codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) (Default: `%Y-%m-%d`) | string | N | +| `infer_python_dependencies` | Whether SQLMesh will statically analyze Python code to automatically infer Python package requirements. (Default: True) | boolean | N | +| `model_defaults` | Default [properties](./model_configuration.md#model-defaults) to set on each model. At a minimum, `dialect` must be set. | dict[string, any] | Y | The `model_defaults` key is **required** and must contain a value for the `dialect` key. @@ -83,6 +94,7 @@ Configuration for the `sqlmesh plan` command. | `no_diff` | Don't show diffs for changed models (Default: False) | boolean | N | | `no_prompts` | Disables interactive prompts in CLI (Default: True) | boolean | N | | `always_recreate_environment` | Always recreates the target environment from the environment specified in `create_from` (by default `prod`) (Default: False) | boolean | N | + ## Run Configuration for the `sqlmesh run` command. Please note that this is only applicable when configured with the [builtin](#builtin) scheduler. From a5496e36a5fd9684c28a9b7595ea646f282b0771 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Fri, 25 Jul 2025 01:14:47 +0000 Subject: [PATCH 4/4] PR feedback --- sqlmesh/core/config/root.py | 4 ++- sqlmesh/core/snapshot/definition.py | 11 ++++++--- tests/core/test_config.py | 2 +- tests/core/test_snapshot.py | 38 +++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index 6cedfbe9f6..4dd28f97a5 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -148,7 +148,9 @@ class Config(BaseConfig): environment_suffix_target: EnvironmentSuffixTarget = Field( default=EnvironmentSuffixTarget.default ) - physical_table_naming_convention: t.Optional[TableNamingConvention] = None + physical_table_naming_convention: TableNamingConvention = Field( + default=TableNamingConvention.default + ) gateway_managed_virtual_layer: bool = False infer_python_dependencies: bool = True environment_catalog_mapping: RegexKeyDict = {} diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py index 6d8e25e7dc..1331dd72f7 100644 --- a/sqlmesh/core/snapshot/definition.py +++ b/sqlmesh/core/snapshot/definition.py @@ -228,6 +228,7 @@ class SnapshotDataVersion(PydanticModel, frozen=True): change_category: t.Optional[SnapshotChangeCategory] = None physical_schema_: t.Optional[str] = Field(default=None, alias="physical_schema") dev_table_suffix: str + table_naming_convention: TableNamingConvention = Field(default=TableNamingConvention.default) def snapshot_id(self, name: str) -> SnapshotId: return SnapshotId(name=name, identifier=self.fingerprint.to_identifier()) @@ -334,7 +335,7 @@ class SnapshotInfoMixin(ModelKindMixin): # This can be removed from this model once Pydantic 1 support is dropped (must remain in `Snapshot` though) base_table_name_override: t.Optional[str] dev_table_suffix: str - table_naming_convention: t.Optional[TableNamingConvention] = None + table_naming_convention: TableNamingConvention = Field(default=TableNamingConvention.default) @cached_property def identifier(self) -> str: @@ -609,8 +610,8 @@ class Snapshot(PydanticModel, SnapshotInfoMixin): base_table_name_override: t.Optional[str] = None next_auto_restatement_ts: t.Optional[int] = None dev_table_suffix: str = "dev" - table_naming_convention_: t.Optional[TableNamingConvention] = Field( - default=None, alias="table_naming_convention" + table_naming_convention_: TableNamingConvention = Field( + default=TableNamingConvention.default, alias="table_naming_convention" ) @field_validator("ttl") @@ -663,7 +664,7 @@ def from_node( ttl: str = c.DEFAULT_SNAPSHOT_TTL, version: t.Optional[str] = None, cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None, - table_naming_convention: t.Optional[TableNamingConvention] = None, + table_naming_convention: TableNamingConvention = TableNamingConvention.default, ) -> Snapshot: """Creates a new snapshot for a node. @@ -1023,6 +1024,7 @@ def categorize_as(self, category: SnapshotChangeCategory) -> None: previous_version = self.previous_version self.version = previous_version.data_version.version self.physical_schema_ = previous_version.physical_schema + self.table_naming_convention = previous_version.table_naming_convention if self.is_materialized and (category.is_indirect_non_breaking or category.is_metadata): # Reuse the dev table for indirect non-breaking changes. self.dev_version_ = ( @@ -1229,6 +1231,7 @@ def data_version(self) -> SnapshotDataVersion: change_category=self.change_category, physical_schema=self.physical_schema, dev_table_suffix=self.dev_table_suffix, + table_naming_convention=self.table_naming_convention, ) @property diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 854809e1de..9277fc6902 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -1419,7 +1419,7 @@ def test_load_yaml_config_custom_dotenv_path(tmp_path_factory): @pytest.mark.parametrize( "convention_str, expected", [ - (None, None), + (None, TableNamingConvention.SCHEMA_AND_TABLE), ("schema_and_table", TableNamingConvention.SCHEMA_AND_TABLE), ("table_only", TableNamingConvention.TABLE_ONLY), ("hash_md5", TableNamingConvention.HASH_MD5), diff --git a/tests/core/test_snapshot.py b/tests/core/test_snapshot.py index 66ba6613be..cab5e17fff 100644 --- a/tests/core/test_snapshot.py +++ b/tests/core/test_snapshot.py @@ -166,6 +166,7 @@ def test_json(snapshot: Snapshot): "name": '"name"', "parents": [{"name": '"parent"."tbl"', "identifier": snapshot.parents[0].identifier}], "previous_versions": [], + "table_naming_convention": "schema_and_table", "updated_ts": 1663891973000, "version": snapshot.fingerprint.to_version(), "migrated": False, @@ -1140,6 +1141,9 @@ def test_snapshot_table_name(snapshot: Snapshot, make_snapshot: t.Callable): data_hash="1", metadata_hash="1", parent_data_hash="1" ) snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + assert snapshot.table_naming_convention == TableNamingConvention.SCHEMA_AND_TABLE + assert snapshot.data_version.table_naming_convention == TableNamingConvention.SCHEMA_AND_TABLE + snapshot.previous_versions = () assert snapshot.table_name(is_deployable=True) == "sqlmesh__default.name__3078928823" assert snapshot.table_name(is_deployable=False) == "sqlmesh__default.name__3078928823__dev" @@ -1196,6 +1200,8 @@ def test_table_name_naming_convention_table_only(make_snapshot: t.Callable[..., table_naming_convention=TableNamingConvention.TABLE_ONLY, ) snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + assert snapshot.table_naming_convention == TableNamingConvention.TABLE_ONLY + assert snapshot.data_version.table_naming_convention == TableNamingConvention.TABLE_ONLY assert snapshot.table_name(is_deployable=True) == f"foo.sqlmesh__bar.baz__{snapshot.version}" assert ( @@ -1220,6 +1226,8 @@ def test_table_name_naming_convention_hash_md5(make_snapshot: t.Callable[..., Sn table_naming_convention=TableNamingConvention.HASH_MD5, ) snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + assert snapshot.table_naming_convention == TableNamingConvention.HASH_MD5 + assert snapshot.data_version.table_naming_convention == TableNamingConvention.HASH_MD5 hash = md5(f"foo.sqlmesh__bar.bar__baz__{snapshot.version}") assert snapshot.table_name(is_deployable=True) == f"foo.sqlmesh__bar.sqlmesh_md5__{hash}" @@ -1273,6 +1281,36 @@ def test_table_name_view(make_snapshot: t.Callable): assert new_snapshot.dev_version != snapshot.dev_version +def test_table_naming_convention_change_reuse_previous_version(make_snapshot): + # Ensure that snapshots that trigger "reuse previous version" inherit the naming convention of the previous snapshot + original_snapshot: Snapshot = make_snapshot( + SqlModel(name="a", query=parse_one("select 1, ds")), + table_naming_convention=TableNamingConvention.SCHEMA_AND_TABLE, + ) + original_snapshot.categorize_as(SnapshotChangeCategory.BREAKING) + + assert original_snapshot.table_naming_convention == TableNamingConvention.SCHEMA_AND_TABLE + assert original_snapshot.table_name() == "sqlmesh__default.a__4145234055" + + changed_snapshot: Snapshot = make_snapshot( + SqlModel(name="a", query=parse_one("select 1, 'forward_only' as a, ds")), + table_naming_convention=TableNamingConvention.HASH_MD5, + ) + changed_snapshot.previous_versions = original_snapshot.all_versions + + assert changed_snapshot.previous_version == original_snapshot.data_version + + changed_snapshot.categorize_as(SnapshotChangeCategory.FORWARD_ONLY) + + # inherited from previous version even though changed_snapshot was created with TableNamingConvention.HASH_MD5 + assert changed_snapshot.table_naming_convention == TableNamingConvention.SCHEMA_AND_TABLE + assert ( + changed_snapshot.previous_version.table_naming_convention + == TableNamingConvention.SCHEMA_AND_TABLE + ) + assert changed_snapshot.table_name() == "sqlmesh__default.a__4145234055" + + def test_categorize_change_sql(make_snapshot): old_snapshot = make_snapshot(SqlModel(name="a", query=parse_one("select 1, ds")))