From e9de3c50d99bcc166a606f034d385015abea0906 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Fri, 9 Jan 2026 02:42:40 +0000 Subject: [PATCH] Chore: update databricks and snowflake auth in integration tests --- .circleci/continue_config.yml | 6 +++- .circleci/manage-test-db.sh | 16 +--------- .github/workflows/pr.yaml | 2 ++ Makefile | 4 +-- sqlmesh/core/engine_adapter/databricks.py | 32 +++++++++---------- .../engine_adapter/integration/__init__.py | 5 ++- .../engine_adapter/integration/config.yaml | 7 ++-- .../engine_adapter/integration/conftest.py | 1 - .../integration/test_freshness.py | 10 ++++++ 9 files changed, 45 insertions(+), 38 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index d5ad6d5ee1..bf27e03f47 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -258,6 +258,10 @@ jobs: echo "export REDSHIFT_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" echo "export GCP_POSTGRES_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" echo "export FABRIC_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV" + + # Make snowflake private key available + echo $SNOWFLAKE_PRIVATE_KEY_RAW | base64 -d > /tmp/snowflake-keyfile.p8 + echo "export SNOWFLAKE_PRIVATE_KEY_FILE='/tmp/snowflake-keyfile.p8'" >> "$BASH_ENV" - run: name: Create test database command: ./.circleci/manage-test-db.sh << parameters.engine >> "$TEST_DB_NAME" up @@ -309,7 +313,7 @@ workflows: matrix: parameters: engine: - #- snowflake + - snowflake - databricks - redshift - bigquery diff --git a/.circleci/manage-test-db.sh b/.circleci/manage-test-db.sh index f90b567ce8..b6e9c265c9 100755 --- a/.circleci/manage-test-db.sh +++ b/.circleci/manage-test-db.sh @@ -25,7 +25,7 @@ function_exists() { # Snowflake snowflake_init() { echo "Installing Snowflake CLI" - pip install "snowflake-cli-labs<3.8.0" + pip install "snowflake-cli" } snowflake_up() { @@ -40,20 +40,6 @@ snowflake_down() { databricks_init() { echo "Installing Databricks CLI" curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh || true - - echo "Writing out Databricks CLI config file" - echo -e "[DEFAULT]\nhost = $DATABRICKS_SERVER_HOSTNAME\ntoken = $DATABRICKS_ACCESS_TOKEN" > ~/.databrickscfg - - # this takes a path like 'sql/protocolv1/o/2934659247569/0723-005339-foobar' and extracts '0723-005339-foobar' from it - CLUSTER_ID=${DATABRICKS_HTTP_PATH##*/} - - echo "Extracted cluster id: $CLUSTER_ID from '$DATABRICKS_HTTP_PATH'" - - # Note: the cluster doesnt need to be running to create / drop catalogs, but it does need to be running to run the integration tests - echo "Ensuring cluster is running" - # the || true is to prevent the following error from causing an abort: - # > Error: is in unexpected state Running. - databricks clusters start $CLUSTER_ID || true } databricks_up() { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 08ac729206..69e93635dc 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -30,6 +30,8 @@ jobs: test-vscode-e2e: runs-on: labels: [ubuntu-2204-8] + # As at 2026-01-12 this job flakes 100% of the time. It needs investigation + if: false steps: - uses: actions/checkout@v5 - uses: actions/setup-node@v6 diff --git a/Makefile b/Makefile index 2b3e10cb1b..611b179eba 100644 --- a/Makefile +++ b/Makefile @@ -212,14 +212,14 @@ risingwave-test: engine-risingwave-up # Cloud Engines # ################# -snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER guard-SNOWFLAKE_PASSWORD engine-snowflake-install +snowflake-test: guard-SNOWFLAKE_ACCOUNT guard-SNOWFLAKE_WAREHOUSE guard-SNOWFLAKE_DATABASE guard-SNOWFLAKE_USER engine-snowflake-install pytest -n auto -m "snowflake" --reruns 3 --junitxml=test-results/junit-snowflake.xml bigquery-test: guard-BIGQUERY_KEYFILE engine-bigquery-install $(PIP) install -e ".[bigframes]" pytest -n auto -m "bigquery" --reruns 3 --junitxml=test-results/junit-bigquery.xml -databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_ACCESS_TOKEN guard-DATABRICKS_CONNECT_VERSION engine-databricks-install +databricks-test: guard-DATABRICKS_CATALOG guard-DATABRICKS_SERVER_HOSTNAME guard-DATABRICKS_HTTP_PATH guard-DATABRICKS_CONNECT_VERSION engine-databricks-install $(PIP) install 'databricks-connect==${DATABRICKS_CONNECT_VERSION}' pytest -n auto -m "databricks" --reruns 3 --junitxml=test-results/junit-databricks.xml diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py index 97190492f2..870b946e7d 100644 --- a/sqlmesh/core/engine_adapter/databricks.py +++ b/sqlmesh/core/engine_adapter/databricks.py @@ -78,21 +78,21 @@ def can_access_databricks_connect(cls, disable_databricks_connect: bool) -> bool def _use_spark_session(self) -> bool: if self.can_access_spark_session(bool(self._extra_config.get("disable_spark_session"))): return True - return ( - self.can_access_databricks_connect( - bool(self._extra_config.get("disable_databricks_connect")) - ) - and ( - { - "databricks_connect_server_hostname", - "databricks_connect_access_token", - }.issubset(self._extra_config) - ) - and ( - "databricks_connect_cluster_id" in self._extra_config - or "databricks_connect_use_serverless" in self._extra_config - ) - ) + + if self.can_access_databricks_connect( + bool(self._extra_config.get("disable_databricks_connect")) + ): + if self._extra_config.get("databricks_connect_use_serverless"): + return True + + if { + "databricks_connect_cluster_id", + "databricks_connect_server_hostname", + "databricks_connect_access_token", + }.issubset(self._extra_config): + return True + + return False @property def is_spark_session_connection(self) -> bool: @@ -108,7 +108,7 @@ def _set_spark_engine_adapter_if_needed(self) -> None: connect_kwargs = dict( host=self._extra_config["databricks_connect_server_hostname"], - token=self._extra_config["databricks_connect_access_token"], + token=self._extra_config.get("databricks_connect_access_token"), ) if "databricks_connect_use_serverless" in self._extra_config: connect_kwargs["serverless"] = True diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py index 49624154e4..4ad6a17944 100644 --- a/tests/core/engine_adapter/integration/__init__.py +++ b/tests/core/engine_adapter/integration/__init__.py @@ -756,7 +756,10 @@ def _get_create_user_or_role( return username, f"CREATE ROLE {username}" if self.dialect == "databricks": # Creating an account-level group in Databricks requires making REST API calls so we are going to - # use a pre-created group instead. We assume the suffix on the name is the unique id + # use a pre-created group instead. We assume the suffix on the name is the unique id. + # In the Databricks UI, Workspace Settings -> Identity and Access, create the following groups: + # - test_user, test_analyst, test_etl_user, test_reader, test_writer, test_admin + # (there do not need to be any users assigned to these groups) return "_".join(username.split("_")[:-1]), None if self.dialect == "bigquery": # BigQuery uses IAM service accounts that need to be pre-created diff --git a/tests/core/engine_adapter/integration/config.yaml b/tests/core/engine_adapter/integration/config.yaml index 8e87b2c3c8..0b1ecd8193 100644 --- a/tests/core/engine_adapter/integration/config.yaml +++ b/tests/core/engine_adapter/integration/config.yaml @@ -128,7 +128,7 @@ gateways: warehouse: {{ env_var('SNOWFLAKE_WAREHOUSE') }} database: {{ env_var('SNOWFLAKE_DATABASE') }} user: {{ env_var('SNOWFLAKE_USER') }} - password: {{ env_var('SNOWFLAKE_PASSWORD') }} + private_key_path: {{ env_var('SNOWFLAKE_PRIVATE_KEY_FILE', 'tests/fixtures/snowflake/rsa_key_no_pass.p8') }} check_import: false state_connection: type: duckdb @@ -139,7 +139,10 @@ gateways: catalog: {{ env_var('DATABRICKS_CATALOG') }} server_hostname: {{ env_var('DATABRICKS_SERVER_HOSTNAME') }} http_path: {{ env_var('DATABRICKS_HTTP_PATH') }} - access_token: {{ env_var('DATABRICKS_ACCESS_TOKEN') }} + auth_type: {{ env_var('DATABRICKS_AUTH_TYPE', 'databricks-oauth') }} + oauth_client_id: {{ env_var('DATABRICKS_CLIENT_ID') }} + oauth_client_secret: {{ env_var('DATABRICKS_CLIENT_SECRET') }} + databricks_connect_use_serverless: true check_import: false inttest_redshift: diff --git a/tests/core/engine_adapter/integration/conftest.py b/tests/core/engine_adapter/integration/conftest.py index 308819b671..3fb4bc15f1 100644 --- a/tests/core/engine_adapter/integration/conftest.py +++ b/tests/core/engine_adapter/integration/conftest.py @@ -7,7 +7,6 @@ import logging from pytest import FixtureRequest - from sqlmesh import Config, EngineAdapter from sqlmesh.core.constants import SQLMESH_PATH from sqlmesh.core.config.connection import ( diff --git a/tests/core/engine_adapter/integration/test_freshness.py b/tests/core/engine_adapter/integration/test_freshness.py index 5e4c4cf439..e5ee574e7e 100644 --- a/tests/core/engine_adapter/integration/test_freshness.py +++ b/tests/core/engine_adapter/integration/test_freshness.py @@ -25,6 +25,16 @@ EVALUATION_SPY = None +@pytest.fixture(autouse=True) +def _skip_snowflake(ctx: TestContext): + if ctx.dialect == "snowflake": + # these tests use callbacks that need to run db queries within a time_travel context that changes the system time to be in the future + # this causes invalid JWT's to be generated when the callbacks try to run a db query + pytest.skip( + "snowflake.connector generates an invalid JWT when time_travel changes the system time" + ) + + # Mock the snapshot evaluator's evaluate function to count the number of times it is called @pytest.fixture(autouse=True, scope="function") def _install_evaluation_spy(mocker: MockerFixture):