From d4a3acbd7b46e929d51a4852fbe8c92b95ea39fc Mon Sep 17 00:00:00 2001 From: Elena Felder <41136058+elefeint@users.noreply.github.com> Date: Thu, 8 Jan 2026 12:38:17 -0500 Subject: [PATCH 1/8] Chore(deps): update the minimum required duckdb version for motherduck (#5650) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 97c190a290..2c140d4770 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,7 @@ duckdb = [] fabric = ["pyodbc>=5.0.0"] gcppostgres = ["cloud-sql-python-connector[pg8000]>=1.8.0"] github = ["PyGithub>=2.6.0"] -motherduck = ["duckdb>=1.2.0"] +motherduck = ["duckdb>=1.3.2"] mssql = ["pymssql"] mssql-odbc = ["pyodbc>=5.0.0"] mysql = ["pymysql"] From c668eef01ce844827099a046c054909efc2f6c72 Mon Sep 17 00:00:00 2001 From: Trey Spiller <1831878+treysp@users.noreply.github.com> Date: Fri, 9 Jan 2026 12:14:22 -0600 Subject: [PATCH 2/8] Fix: ignore non-key "dialect" in MODEL/AUDIT block (#5651) --- sqlmesh/core/dialect.py | 12 +++- tests/core/test_model.py | 150 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/dialect.py b/sqlmesh/core/dialect.py index 332550d57c..72115fc4a3 100644 --- a/sqlmesh/core/dialect.py +++ b/sqlmesh/core/dialect.py @@ -803,8 +803,15 @@ def text_diff( return "\n".join(unified_diff(a_sql, b_sql)) +WS_OR_COMMENT = r"(?:\s|--[^\n]*\n|/\*.*?\*/)" +HEADER = r"\b(?:model|audit)\b(?=\s*\()" +KEY_BOUNDARY = r"(?:\(|,)" # key is preceded by either '(' or ',' +DIALECT_VALUE = r"['\"]?(?P[a-z][a-z0-9]*)['\"]?" +VALUE_BOUNDARY = r"(?=,|\))" # value is followed by comma or closing paren + DIALECT_PATTERN = re.compile( - r"(model|audit).*?\(.*?dialect\s+'?([a-z]*)", re.IGNORECASE | re.DOTALL + rf"{HEADER}.*?{KEY_BOUNDARY}{WS_OR_COMMENT}*dialect{WS_OR_COMMENT}+{DIALECT_VALUE}{WS_OR_COMMENT}*{VALUE_BOUNDARY}", + re.IGNORECASE | re.DOTALL, ) @@ -895,7 +902,8 @@ def parse( A list of the parsed expressions: [Model, *Statements, Query, *Statements] """ match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE]) - dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect) + dialect_str = match.group("dialect") if match else None + dialect = Dialect.get_or_raise(dialect_str or default_dialect) tokens = dialect.tokenize(sql) chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)] diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 3e0f6d40b9..f9ef97ecc0 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -2727,6 +2727,156 @@ def test_parse(assert_exp_eq): ) +def test_dialect_pattern(): + def make_test_sql(text: str) -> str: + return f""" + MODEL ( + name test_model, + kind INCREMENTAL_BY_TIME_RANGE( + time_column ds + ), + {text} + ); + + SELECT 1; + """ + + def assert_match(test_sql: str, expected_value: t.Optional[str] = "duckdb"): + match = d.DIALECT_PATTERN.search(test_sql) + + dialect_str: t.Optional[str] = None + if expected_value is not None: + assert match + dialect_str = match.group("dialect") + + assert dialect_str == expected_value + + # single-quoted dialect + assert_match( + make_test_sql( + """ + dialect 'duckdb', + description 'there's a dialect foo in here too!' + """ + ) + ) + + # bare dialect + assert_match( + make_test_sql( + """ + dialect duckdb, + description 'there's a dialect foo in here too!' + """ + ) + ) + + # double-quoted dialect (allowed in BQ) + assert_match( + make_test_sql( + """ + dialect "duckdb", + description 'there's a dialect foo in here too!' + """ + ) + ) + + # no dialect specified, "dialect" in description + test_sql = make_test_sql( + """ + description 'there's a dialect foo in here too!' + """ + ) + + matches = list(d.DIALECT_PATTERN.finditer(test_sql)) + assert not matches + + # line comment between properties + assert_match( + make_test_sql( + """ + tag my_tag, -- comment + dialect duckdb + """ + ) + ) + + # block comment between properties + assert_match( + make_test_sql( + """ + tag my_tag, /* comment */ + dialect duckdb + """ + ) + ) + + # quoted empty dialect + assert_match( + make_test_sql( + """ + dialect '', + tag my_tag + """ + ), + None, + ) + + # double-quoted empty dialect + assert_match( + make_test_sql( + """ + dialect "", + tag my_tag + """ + ), + None, + ) + + # trailing comment after dialect value + assert_match( + make_test_sql( + """ + dialect duckdb -- trailing comment + """ + ) + ) + + # dialect value isn't terminated by ',' or ')' + test_sql = make_test_sql( + """ + dialect duckdb -- trailing comment + tag my_tag + """ + ) + + matches = list(d.DIALECT_PATTERN.finditer(test_sql)) + assert not matches + + # dialect first + assert_match( + """ + MODEL( + dialect duckdb, + name my_name + ); + """ + ) + + # full parse + sql = """ + MODEL ( + name test_model, + description 'this text mentions dialect foo but is not a property' + ); + + SELECT 1; + """ + expressions = d.parse(sql, default_dialect="duckdb") + model = load_sql_based_model(expressions) + assert model.dialect == "" + + CONST = "bar" From 529ed0050c3e940834cd8833c7c747e86575a9d8 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Mon, 12 Jan 2026 12:42:28 +1300 Subject: [PATCH 3/8] Chore: update databricks and snowflake auth in integration tests (#5652) --- .circleci/continue_config.yml | 6 +++- .circleci/manage-test-db.sh | 16 +--------- .github/workflows/pr.yaml | 2 ++ Makefile | 4 +-- sqlmesh/core/engine_adapter/databricks.py | 32 +++++++++---------- .../engine_adapter/integration/__init__.py | 5 ++- .../engine_adapter/integration/config.yaml | 7 ++-- .../engine_adapter/integration/conftest.py | 1 - .../integration/test_freshness.py | 10 ++++++ 9 files changed, 45 insertions(+), 38 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index d5ad6d5ee1..bf27e03f47 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -258,6 +258,10 @@ jobs: echo "export REDSHIFT_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" echo "export GCP_POSTGRES_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" echo "export FABRIC_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" + + # Make snowflake private key available + echo $SNOWFLAKE_PRIVATE_KEY_RAW | base64 -d > /tmp/snowflake-keyfile.p8 + echo "export SNOWFLAKE_PRIVATE_KEY_FILE='/tmp/snowflake-keyfile.p8'" >> "$BASH_ENV" - run: name: Create test database command: ./.circleci/manage-test-db.sh << parameters.engine >> "$TEST_DB_NAME" up @@ -309,7 +313,7 @@ workflows: matrix: parameters: engine: - #- snowflake + - snowflake - databricks - redshift - bigquery diff --git a/.circleci/manage-test-db.sh b/.circleci/manage-test-db.sh index f90b567ce8..b6e9c265c9 100755 --- a/.circleci/manage-test-db.sh +++ b/.circleci/manage-test-db.sh @@ -25,7 +25,7 @@ function_exists() { # Snowflake snowflake_init() { echo "Installing Snowflake CLI" - pip install "snowflake-cli-labs<3.8.0" + pip install "snowflake-cli" } snowflake_up() { @@ -40,20 +40,6 @@ snowflake_down() { databricks_init() { echo "Installing Databricks CLI" curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh || true - - echo "Writing out Databricks CLI config file" - echo -e "[DEFAULT]\nhost = $DATABRICKS_SERVER_HOSTNAME\ntoken = $DATABRICKS_ACCESS_TOKEN" > ~/.databrickscfg - - # this takes a path like 'sql/protocolv1/o/2934659247569/0723-005339-foobar' and extracts '0723-005339-foobar' from it - CLUSTER_ID=${DATABRICKS_HTTP_PATH##*/} - - echo "Extracted cluster id: $CLUSTER_ID from '$DATABRICKS_HTTP_PATH'" - - # Note: the cluster doesnt need to be running to create / drop catalogs, but it does need to be running to run the integration tests - echo "Ensuring cluster is running" - # the || true is to prevent the following error from causing an abort: - # > Error: is in unexpected state Running. - databricks clusters start $CLUSTER_ID || true } databricks_up() { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 08ac729206..69e93635dc 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -30,6 +30,8 @@ jobs: test-vscode-e2e: runs-on: labels: [ubuntu-2204-8] + # As at 2026-01-12 this job flakes 100% of the time. It needs investigation + if: false steps: - uses: actions/checkout@v5 - uses: actions/setup-node@v6 diff --git a/Makefile b/Makefile index 2b3e10cb1b..611b179eba 100644 --- a/Makefile +++ b/Makefile @@ -212,14 +212,14 @@ risingwave-test: engine-risingwave-up # Cloud Engines # ################# -snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER guard-SNOWFLAKE_PASSWORD engine-snowflake-install +snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER engine-snowflake-install pytest -n auto -m "snowflake" --reruns 3 --junitxml=test-results/junit-snowflake.xml bigquery-test: guard-BIGQUERY_KEYFILE engine-bigquery-install $(PIP) install -e ".[bigframes]" pytest -n auto -m "bigquery" --reruns 3 --junitxml=test-results/junit-bigquery.xml -databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_ACCESS_TOKEN guard-DATABRICKS_CONNECT_VERSION engine-databricks-install +databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_CONNECT_VERSION engine-databricks-install $(PIP) install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}' pytest -n auto -m "databricks" --reruns 3 --junitxml=test-results/junit-databricks.xml diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py index 97190492f2..870b946e7d 100644 --- a/sqlmesh/core/engine_adapter/databricks.py +++ b/sqlmesh/core/engine_adapter/databricks.py @@ -78,21 +78,21 @@ def can_access_databricks_connect(cls, disable_databricks_connect: bool) -> bool def _use_spark_session(self) -> bool: if self.can_access_spark_session(bool(self._extra_config.get("disable_spark_session"))): return True - return ( - self.can_access_databricks_connect( - bool(self._extra_config.get("disable_databricks_connect")) - ) - and ( - { - "databricks_connect_server_hostname", - "databricks_connect_access_token", - }.issubset(self._extra_config) - ) - and ( - "databricks_connect_cluster_id" in self._extra_config - or "databricks_connect_use_serverless" in self._extra_config - ) - ) + + if self.can_access_databricks_connect( + bool(self._extra_config.get("disable_databricks_connect")) + ): + if self._extra_config.get("databricks_connect_use_serverless"): + return True + + if { + "databricks_connect_cluster_id", + "databricks_connect_server_hostname", + "databricks_connect_access_token", + }.issubset(self._extra_config): + return True + + return False @property def is_spark_session_connection(self) -> bool: @@ -108,7 +108,7 @@ def _set_spark_engine_adapter_if_needed(self) -> None: connect_kwargs = dict( host=self._extra_config["databricks_connect_server_hostname"], - token=self._extra_config["databricks_connect_access_token"], + token=self._extra_config.get("databricks_connect_access_token"), ) if "databricks_connect_use_serverless" in self._extra_config: connect_kwargs["serverless"] = True diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 49624154e4..4ad6a17944 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -756,7 +756,10 @@ def _get_create_user_or_role( return username, f"CREATE ROLE {username}" if self.dialect == "databricks": # Creating an account-level group in Databricks requires making REST API calls so we are going to - # use a pre-created group instead. We assume the suffix on the name is the unique id + # use a pre-created group instead. We assume the suffix on the name is the unique id. + # In the Databricks UI, Workspace Settings -> Identity and Access, create the following groups: + # - test_user, test_analyst, test_etl_user, test_reader, test_writer, test_admin + # (there do not need to be any users assigned to these groups) return "_".join(username.split("_")[:-1]), None if self.dialect == "bigquery": # BigQuery uses IAM service accounts that need to be pre-created diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 8e87b2c3c8..0b1ecd8193 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -128,7 +128,7 @@ gateways: warehouse: {{ env_var('SNOWFLAKE_WAREHOUSE') }} database: {{ env_var('SNOWFLAKE_DATABASE') }} user: {{ env_var('SNOWFLAKE_USER') }} - password: {{ env_var('SNOWFLAKE_PASSWORD') }} + private_key_path: {{ env_var('SNOWFLAKE_PRIVATE_KEY_FILE', 'tests/fixtures/snowflake/rsa_key_no_pass.p8') }} check_import: false state_connection: type: duckdb @@ -139,7 +139,10 @@ gateways: catalog: {{ env_var('DATABRICKS_CATALOG') }} server_hostname: {{ env_var('DATABRICKS_SERVER_HOSTNAME') }} http_path: {{ env_var('DATABRICKS_HTTP_PATH') }} - access_token: {{ env_var('DATABRICKS_ACCESS_TOKEN') }} + auth_type: {{ env_var('DATABRICKS_AUTH_TYPE', 'databricks-oauth') }} + oauth_client_id: {{ env_var('DATABRICKS_CLIENT_ID') }} + oauth_client_secret: {{ env_var('DATABRICKS_CLIENT_SECRET') }} + databricks_connect_use_serverless: true check_import: false inttest_redshift: diff --git a/tests/core/engine_adapter/integration/conftest.py b/tests/core/engine_adapter/integration/conftest.py index 308819b671..3fb4bc15f1 100644 --- a/tests/core/engine_adapter/integration/conftest.py +++ b/tests/core/engine_adapter/integration/conftest.py @@ -7,7 +7,6 @@ import logging from pytest import FixtureRequest - from sqlmesh import Config, EngineAdapter from sqlmesh.core.constants import SQLMESH_PATH from sqlmesh.core.config.connection import ( diff --git a/tests/core/engine_adapter/integration/test_freshness.py b/tests/core/engine_adapter/integration/test_freshness.py index 5e4c4cf439..e5ee574e7e 100644 --- a/tests/core/engine_adapter/integration/test_freshness.py +++ b/tests/core/engine_adapter/integration/test_freshness.py @@ -25,6 +25,16 @@ EVALUATION_SPY = None +@pytest.fixture(autouse=True) +def _skip_snowflake(ctx: TestContext): + if ctx.dialect == "snowflake": + # these tests use callbacks that need to run db queries within a time_travel context that changes the system time to be in the future + # this causes invalid JWT's to be generated when the callbacks try to run a db query + pytest.skip( + "snowflake.connector generates an invalid JWT when time_travel changes the system time" + ) + + # Mock the snapshot evaluator's evaluate function to count the number of times it is called @pytest.fixture(autouse=True, scope="function") def _install_evaluation_spy(mocker: MockerFixture): From d5ceeb27c489875857d7c644bff5dd60417cc4bc Mon Sep 17 00:00:00 2001 From: Vaggelis Danias Date: Tue, 13 Jan 2026 10:23:33 +0200 Subject: [PATCH 4/8] Chore(cicd_bot): Make console printing optional (#5656) --- sqlmesh/integrations/github/cicd/command.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sqlmesh/integrations/github/cicd/command.py b/sqlmesh/integrations/github/cicd/command.py index f1b611150a..5506d4917b 100644 --- a/sqlmesh/integrations/github/cicd/command.py +++ b/sqlmesh/integrations/github/cicd/command.py @@ -25,12 +25,21 @@ envvar="GITHUB_TOKEN", help="The Github Token to be used. Pass in `${{ secrets.GITHUB_TOKEN }}` if you want to use the one created by Github actions", ) +@click.option( + "--full-logs", + is_flag=True, + help="Whether to print all logs in the Github Actions output or only in their relevant GA check", +) @click.pass_context -def github(ctx: click.Context, token: str) -> None: +def github(ctx: click.Context, token: str, full_logs: bool = False) -> None: """Github Action CI/CD Bot. See https://sqlmesh.readthedocs.io/en/stable/integrations/github/ for details""" # set a larger width because if none is specified, it auto-detects 80 characters when running in GitHub Actions # which can result in surprise newlines when outputting dates to backfill - set_console(MarkdownConsole(width=1000, warning_capture_only=True, error_capture_only=True)) + set_console( + MarkdownConsole( + width=1000, warning_capture_only=not full_logs, error_capture_only=not full_logs + ) + ) ctx.obj["github"] = GithubController( paths=ctx.obj["paths"], token=token, From 2e5587700a7028824f7581dfb18fc6547b524b8e Mon Sep 17 00:00:00 2001 From: Jesse Hodges Date: Tue, 20 Jan 2026 11:56:11 -0600 Subject: [PATCH 5/8] expose a source option for trino (#5672) --- docs/integrations/engines/trino.md | 1 + sqlmesh/core/config/connection.py | 4 ++- tests/core/engine_adapter/test_trino.py | 4 +++ tests/core/test_config.py | 33 +++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/docs/integrations/engines/trino.md b/docs/integrations/engines/trino.md index ec1139e20d..db732f0cc1 100644 --- a/docs/integrations/engines/trino.md +++ b/docs/integrations/engines/trino.md @@ -90,6 +90,7 @@ hive.metastore.glue.default-warehouse-dir=s3://my-bucket/ | `http_scheme` | The HTTP scheme to use when connecting to your cluster. By default, it's `https` and can only be `http` for no-auth or basic auth. | string | N | | `port` | The port to connect to your cluster. By default, it's `443` for `https` scheme and `80` for `http` | int | N | | `roles` | Mapping of catalog name to a role | dict | N | +| `source` | Value to send as Trino's `source` field for query attribution / auditing. Default: `sqlmesh`. | string | N | | `http_headers` | Additional HTTP headers to send with each request. | dict | N | | `session_properties` | Trino session properties. Run `SHOW SESSION` to see all options. | dict | N | | `retries` | Number of retries to attempt when a request fails. Default: `3` | int | N | diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 638f0c28c8..4e11fc626f 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -1888,6 +1888,7 @@ class TrinoConnectionConfig(ConnectionConfig): client_certificate: t.Optional[str] = None client_private_key: t.Optional[str] = None cert: t.Optional[str] = None + source: str = "sqlmesh" # SQLMesh options schema_location_mapping: t.Optional[dict[re.Pattern, str]] = None @@ -1984,6 +1985,7 @@ def _connection_kwargs_keys(self) -> t.Set[str]: "port", "catalog", "roles", + "source", "http_scheme", "http_headers", "session_properties", @@ -2041,7 +2043,7 @@ def _static_connection_kwargs(self) -> t.Dict[str, t.Any]: "user": self.impersonation_user or self.user, "max_attempts": self.retries, "verify": self.cert if self.cert is not None else self.verify, - "source": "sqlmesh", + "source": self.source, } @property diff --git a/tests/core/engine_adapter/test_trino.py b/tests/core/engine_adapter/test_trino.py index a3c67eb023..1bfe82b858 100644 --- a/tests/core/engine_adapter/test_trino.py +++ b/tests/core/engine_adapter/test_trino.py @@ -412,6 +412,8 @@ def test_timestamp_mapping(): catalog="catalog", ) + assert config._connection_factory_with_kwargs.keywords["source"] == "sqlmesh" + adapter = config.create_engine_adapter() assert adapter.timestamp_mapping is None @@ -419,11 +421,13 @@ def test_timestamp_mapping(): user="user", host="host", catalog="catalog", + source="my_source", timestamp_mapping={ "TIMESTAMP": "TIMESTAMP(6)", "TIMESTAMP(3)": "TIMESTAMP WITH TIME ZONE", }, ) + assert config._connection_factory_with_kwargs.keywords["source"] == "my_source" adapter = config.create_engine_adapter() assert adapter.timestamp_mapping is not None assert adapter.timestamp_mapping[exp.DataType.build("TIMESTAMP")] == exp.DataType.build( diff --git a/tests/core/test_config.py b/tests/core/test_config.py index d0fad16e76..f3a0de6672 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -862,6 +862,39 @@ def test_trino_schema_location_mapping_syntax(tmp_path): assert len(conn.schema_location_mapping) == 2 +def test_trino_source_option(tmp_path): + config_path = tmp_path / "config_trino_source.yaml" + with open(config_path, "w", encoding="utf-8") as fd: + fd.write( + """ + gateways: + trino: + connection: + type: trino + user: trino + host: trino + catalog: trino + source: my_sqlmesh_source + + default_gateway: trino + + model_defaults: + dialect: trino + """ + ) + + config = load_config_from_paths( + Config, + project_paths=[config_path], + ) + + from sqlmesh.core.config.connection import TrinoConnectionConfig + + conn = config.gateways["trino"].connection + assert isinstance(conn, TrinoConnectionConfig) + assert conn.source == "my_sqlmesh_source" + + def test_gcp_postgres_ip_and_scopes(tmp_path): config_path = tmp_path / "config_gcp_postgres.yaml" with open(config_path, "w", encoding="utf-8") as fd: From 6ddca26e92c6a515d9fc742f39d3d5a7589ffb9c Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:36:29 -0800 Subject: [PATCH 6/8] feat: add ability to disable blocked check merge pr (#5676) --- docs/integrations/github.md | 31 ++++++++++--------- sqlmesh/integrations/github/cicd/config.py | 1 + .../integrations/github/cicd/controller.py | 4 +-- .../github/cicd/test_github_controller.py | 12 +++++++ 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/docs/integrations/github.md b/docs/integrations/github.md index a11d90d044..923714888e 100644 --- a/docs/integrations/github.md +++ b/docs/integrations/github.md @@ -286,21 +286,22 @@ Below is an example of how to define the default config for the bot in either YA ### Configuration Properties -| Option | Description | Type | Required | -|---------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:| -| `invalidate_environment_after_deploy` | Indicates if the PR environment created should be automatically invalidated after changes are deployed. Invalidated environments are cleaned up automatically by the Janitor. Default: `True` | bool | N | -| `merge_method` | The merge method to use when automatically merging a PR after deploying to prod. Defaults to `None` meaning automatic merge is not done. Options: `merge`, `squash`, `rebase` | string | N | -| `enable_deploy_command` | Indicates if the `/deploy` command should be enabled in order to allowed synchronized deploys to production. Default: `False` | bool | N | -| `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/`. Default: `None` meaning no namespace is used. | string | N | -| `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N | -| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed.| str | N | -| `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N | -| `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N | -| `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N | -| `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N | -| `pr_environment_name` | The name of the PR environment to create for which a PR number will be appended to. Defaults to the repo name if not provided. Note: The name will be normalized to alphanumeric + underscore and lowercase. | str | N | -| `prod_branch_name` | The name of the git branch associated with production. Ex: `prod`. Default: `main` or `master` is considered prod | str | N | -| `forward_only_branch_suffix` | If the git branch has this suffix, trigger a [forward-only](../concepts/plans.md#forward-only-plans) plan instead of a normal plan. Default: `-forward-only` | str | N | +| Option | Description | Type | Required | +|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------:|:--------:| +| `invalidate_environment_after_deploy` | Indicates if the PR environment created should be automatically invalidated after changes are deployed. Invalidated environments are cleaned up automatically by the Janitor. Default: `True` | bool | N | +| `merge_method` | The merge method to use when automatically merging a PR after deploying to prod. Defaults to `None` meaning automatic merge is not done. Options: `merge`, `squash`, `rebase` | string | N | +| `enable_deploy_command` | Indicates if the `/deploy` command should be enabled in order to allowed synchronized deploys to production. Default: `False` | bool | N | +| `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/`. Default: `None` meaning no namespace is used. | string | N | +| `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N | +| `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed. | str | N | +| `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N | +| `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N | +| `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N | +| `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N | +| `pr_environment_name` | The name of the PR environment to create for which a PR number will be appended to. Defaults to the repo name if not provided. Note: The name will be normalized to alphanumeric + underscore and lowercase. | str | N | +| `prod_branch_name` | The name of the git branch associated with production. Ex: `prod`. Default: `main` or `master` is considered prod | str | N | +| `forward_only_branch_suffix` | If the git branch has this suffix, trigger a [forward-only](../concepts/plans.md#forward-only-plans) plan instead of a normal plan. Default: `-forward-only` | str | N | +| `check_if_blocked_on_deploy_to_prod` | The bot normally checks if a PR is blocked from merging before deploying to production. Setting this to `False` will skip that check. Default: `True` | bool | N | Example with all properties defined: diff --git a/sqlmesh/integrations/github/cicd/config.py b/sqlmesh/integrations/github/cicd/config.py index a287bf1af5..7fb3a0f5b6 100644 --- a/sqlmesh/integrations/github/cicd/config.py +++ b/sqlmesh/integrations/github/cicd/config.py @@ -36,6 +36,7 @@ class GithubCICDBotConfig(BaseConfig): forward_only_branch_suffix_: t.Optional[str] = Field( default=None, alias="forward_only_branch_suffix" ) + check_if_blocked_on_deploy_to_prod: bool = True @model_validator(mode="before") @classmethod diff --git a/sqlmesh/integrations/github/cicd/controller.py b/sqlmesh/integrations/github/cicd/controller.py index b27be4070b..40102b97e8 100644 --- a/sqlmesh/integrations/github/cicd/controller.py +++ b/sqlmesh/integrations/github/cicd/controller.py @@ -772,10 +772,10 @@ def deploy_to_prod(self) -> None: "PR is already merged and this event was triggered prior to the merge." ) merge_status = self._get_merge_state_status() - if merge_status.is_blocked: + if self.bot_config.check_if_blocked_on_deploy_to_prod and merge_status.is_blocked: raise CICDBotError( "Branch protection or ruleset requirement is likely not satisfied, e.g. missing CODEOWNERS approval. " - "Please check PR and resolve any issues." + "Please check PR and resolve any issues. To disable this check, set `check_if_blocked_on_deploy_to_prod` to false in the bot configuration." ) if merge_status.is_dirty: raise CICDBotError( diff --git a/tests/integrations/github/cicd/test_github_controller.py b/tests/integrations/github/cicd/test_github_controller.py index baa0fb9ad2..e4fe10e321 100644 --- a/tests/integrations/github/cicd/test_github_controller.py +++ b/tests/integrations/github/cicd/test_github_controller.py @@ -476,6 +476,18 @@ def test_deploy_to_prod_blocked_pr(github_client, make_controller): controller.deploy_to_prod() +def test_deploy_to_prod_not_blocked_pr_if_config_set(github_client, make_controller): + mock_pull_request = github_client.get_repo().get_pull() + mock_pull_request.merged = False + controller = make_controller( + "tests/fixtures/github/pull_request_synchronized.json", + github_client, + merge_state_status=MergeStateStatus.BLOCKED, + bot_config=GithubCICDBotConfig(check_if_blocked_on_deploy_to_prod=False), + ) + controller.deploy_to_prod() + + def test_deploy_to_prod_dirty_pr(github_client, make_controller): mock_pull_request = github_client.get_repo().get_pull() mock_pull_request.merged = False From a5544e273171fb3b5e43b310d000e2977f3f7bb9 Mon Sep 17 00:00:00 2001 From: Ryan Eakman <6326532+eakmanrq@users.noreply.github.com> Date: Wed, 21 Jan 2026 16:56:36 -0800 Subject: [PATCH 7/8] fix: exclude pandas 3 (#5681) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2c140d4770..1a674dea72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "ipywidgets", "jinja2", "packaging", - "pandas", + "pandas<3.0.0", "pydantic>=2.0.0", "python-dotenv", "requests", From 4f833af9f3ad5cec0cbc8b4aca5dcf20548c9f8c Mon Sep 17 00:00:00 2001 From: Jesse Hodges Date: Mon, 26 Jan 2026 14:05:18 -0600 Subject: [PATCH 8/8] document query_label session property (#5683) --- docs/integrations/engines/bigquery.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/integrations/engines/bigquery.md b/docs/integrations/engines/bigquery.md index a454996ecd..b93d6837ed 100644 --- a/docs/integrations/engines/bigquery.md +++ b/docs/integrations/engines/bigquery.md @@ -193,6 +193,23 @@ If the `impersonated_service_account` argument is set, SQLMesh will: The user account must have [sufficient permissions to impersonate the service account](https://cloud.google.com/docs/authentication/use-service-account-impersonation). +## Query Label + +BigQuery supports a `query_label` session variable which is attached to query jobs and can be used for auditing / attribution. + +SQLMesh supports setting it via `session_properties.query_label` on a model, as an array (or tuple) of key/value tuples. + +Example: +```sql +MODEL ( + name my_project.my_dataset.my_model, + dialect 'bigquery', + session_properties ( + query_label = [('team', 'data_platform'), ('env', 'prod')] + ) +); +``` + ## Permissions Required With any of the above connection methods, ensure these BigQuery permissions are enabled to allow SQLMesh to work correctly.