From 63ae19c286d4b2d622ba90b351530b81a2067898 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Mon, 2 Dec 2024 09:15:48 +0000 Subject: [PATCH 1/7] fix: consider sqlmesh major/minor for migrate and rollback --- sqlmesh/core/state_sync/engine_adapter.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/state_sync/engine_adapter.py b/sqlmesh/core/state_sync/engine_adapter.py index 9afb0aabf7..72561720dc 100644 --- a/sqlmesh/core/state_sync/engine_adapter.py +++ b/sqlmesh/core/state_sync/engine_adapter.py @@ -1308,7 +1308,13 @@ def _apply_migrations( versions = self.get_versions(validate=False) migrations = MIGRATIONS[versions.schema_version :] - migrate_rows = migrations or major_minor(SQLGLOT_VERSION) != versions.minor_sqlglot_version + migrate_rows = any( + [ + migrations, + major_minor(SQLGLOT_VERSION) != versions.minor_sqlglot_version, + major_minor(SQLMESH_VERSION) != versions.minor_sqlmesh_version, + ] + ) if not skip_backup and migrate_rows: self._backup_state() From f9eb9fa35750c792ed43c3fd2b97f757749f1598 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Mon, 2 Dec 2024 10:19:56 +0000 Subject: [PATCH 2/7] bugfix account for breaking change in dbt-fabric --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f44976afaf..bae57b89fa 100644 --- a/setup.py +++ b/setup.py @@ -106,6 +106,7 @@ "dbt-databricks", "dbt-redshift", "dbt-sqlserver>=1.7.0", + "dbt-fabric!=1.8.8", # Broken API, https://github.com/microsoft/dbt-fabric/compare/v1.8.7...v1.8.8 L183 "dbt-trino", ], "dbt": [ From 6520ffd3838cd5bc814507f113ef5899e462c856 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Mon, 2 Dec 2024 12:05:45 +0000 Subject: [PATCH 3/7] bugfix pinning bitnami image due to pyspark pandas 3.12 issue --- tests/core/engine_adapter/integration/docker/spark/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/core/engine_adapter/integration/docker/spark/Dockerfile b/tests/core/engine_adapter/integration/docker/spark/Dockerfile index 7fb39b840c..8458d9e6d6 100644 --- a/tests/core/engine_adapter/integration/docker/spark/Dockerfile +++ b/tests/core/engine_adapter/integration/docker/spark/Dockerfile @@ -1,4 +1,6 @@ -FROM docker.io/bitnami/spark:3.5 +# Pinned to a specific version due to PySpark Pandas requiring distutils which dissappears in python 3.12 +# Should be fixed in v4 https://issues.apache.org/jira/browse/SPARK-45390 +FROM docker.io/bitnami/spark:3.5.2-debian-12-r0 USER root RUN install_packages curl USER 1001 From db65e002cbd549a20a23d25ff3929873c88226b9 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Mon, 2 Dec 2024 20:46:11 +0000 Subject: [PATCH 4/7] bugfix remove impact to snapshot and environment migration process --- sqlmesh/core/state_sync/engine_adapter.py | 25 ++++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sqlmesh/core/state_sync/engine_adapter.py b/sqlmesh/core/state_sync/engine_adapter.py index 72561720dc..7e837a95d5 100644 --- a/sqlmesh/core/state_sync/engine_adapter.py +++ b/sqlmesh/core/state_sync/engine_adapter.py @@ -1223,15 +1223,18 @@ def migrate( migration_start_ts = time.perf_counter() try: - migrate_rows = self._apply_migrations(default_catalog, skip_backup) + migrate_snapshots_and_environments = self._apply_migrations( + default_catalog, skip_backup + ) - if not migrate_rows and major_minor(SQLMESH_VERSION) == versions.minor_sqlmesh_version: + if not migrate_snapshots_and_environments: + if major_minor(SQLMESH_VERSION) != versions.minor_sqlmesh_version: + self._update_versions() return - if migrate_rows: - self._migrate_rows(promoted_snapshots_only) - # Cleanup plan DAGs since we currently don't migrate snapshot records that are in there. - self.engine_adapter.delete_from(self.plan_dags_table, "TRUE") + self._migrate_rows(promoted_snapshots_only) + # Cleanup plan DAGs since we currently don't migrate snapshot records that are in there. + self.engine_adapter.delete_from(self.plan_dags_table, "TRUE") self._update_versions() analytics.collector.on_migration_end( @@ -1307,22 +1310,24 @@ def _apply_migrations( ) -> bool: versions = self.get_versions(validate=False) migrations = MIGRATIONS[versions.schema_version :] - - migrate_rows = any( + should_backup = any( [ migrations, major_minor(SQLGLOT_VERSION) != versions.minor_sqlglot_version, major_minor(SQLMESH_VERSION) != versions.minor_sqlmesh_version, ] ) - if not skip_backup and migrate_rows: + if not skip_backup and should_backup: self._backup_state() for migration in migrations: logger.info(f"Applying migration {migration}") migration.migrate(self, default_catalog=default_catalog) - return bool(migrate_rows) + migrate_snapshots_and_environments = ( + bool(migrations) or major_minor(SQLGLOT_VERSION) != versions.minor_sqlglot_version + ) + return migrate_snapshots_and_environments def _migrate_rows(self, promoted_snapshots_only: bool) -> None: logger.info("Fetching environments") From 633257f7274e49e08441907c9cdf17ee8f441d81 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Tue, 3 Dec 2024 06:25:12 +0000 Subject: [PATCH 5/7] revert migrate function changes --- sqlmesh/core/state_sync/engine_adapter.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/sqlmesh/core/state_sync/engine_adapter.py b/sqlmesh/core/state_sync/engine_adapter.py index 7e837a95d5..3c477d7ae6 100644 --- a/sqlmesh/core/state_sync/engine_adapter.py +++ b/sqlmesh/core/state_sync/engine_adapter.py @@ -1223,18 +1223,15 @@ def migrate( migration_start_ts = time.perf_counter() try: - migrate_snapshots_and_environments = self._apply_migrations( - default_catalog, skip_backup - ) + migrate_rows = self._apply_migrations(default_catalog, skip_backup) - if not migrate_snapshots_and_environments: - if major_minor(SQLMESH_VERSION) != versions.minor_sqlmesh_version: - self._update_versions() + if not migrate_rows and major_minor(SQLMESH_VERSION) == versions.minor_sqlmesh_version: return - self._migrate_rows(promoted_snapshots_only) - # Cleanup plan DAGs since we currently don't migrate snapshot records that are in there. - self.engine_adapter.delete_from(self.plan_dags_table, "TRUE") + if migrate_rows: + self._migrate_rows(promoted_snapshots_only) + # Cleanup plan DAGs since we currently don't migrate snapshot records that are in there. + self.engine_adapter.delete_from(self.plan_dags_table, "TRUE") self._update_versions() analytics.collector.on_migration_end( From 08c426d05d75a0b516b1175888ae2d7a3eed43f7 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Tue, 3 Dec 2024 06:25:24 +0000 Subject: [PATCH 6/7] Revert "bugfix pinning bitnami image due to pyspark pandas 3.12 issue" This reverts commit 511cb966885681d64b4d098c6d41d050abbfbc00. --- tests/core/engine_adapter/integration/docker/spark/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/core/engine_adapter/integration/docker/spark/Dockerfile b/tests/core/engine_adapter/integration/docker/spark/Dockerfile index 8458d9e6d6..7fb39b840c 100644 --- a/tests/core/engine_adapter/integration/docker/spark/Dockerfile +++ b/tests/core/engine_adapter/integration/docker/spark/Dockerfile @@ -1,6 +1,4 @@ -# Pinned to a specific version due to PySpark Pandas requiring distutils which dissappears in python 3.12 -# Should be fixed in v4 https://issues.apache.org/jira/browse/SPARK-45390 -FROM docker.io/bitnami/spark:3.5.2-debian-12-r0 +FROM docker.io/bitnami/spark:3.5 USER root RUN install_packages curl USER 1001 From 9554f2642287f84dce946c6e6a0047605ab72dc9 Mon Sep 17 00:00:00 2001 From: "michael.wallis" Date: Wed, 4 Dec 2024 09:06:52 +0000 Subject: [PATCH 7/7] Revert "bugfix account for breaking change in dbt-fabric" This reverts commit f9eb9fa35750c792ed43c3fd2b97f757749f1598. --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index bae57b89fa..f44976afaf 100644 --- a/setup.py +++ b/setup.py @@ -106,7 +106,6 @@ "dbt-databricks", "dbt-redshift", "dbt-sqlserver>=1.7.0", - "dbt-fabric!=1.8.8", # Broken API, https://github.com/microsoft/dbt-fabric/compare/v1.8.7...v1.8.8 L183 "dbt-trino", ], "dbt": [