From 986ca14db2942f5e02588aa39299ce752b3b01d9 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 30 May 2024 13:57:33 +0200 Subject: [PATCH] iMPlement per-provider tests with lowest-direct dependency resolution With this change we are running tests of downgrading Airflow dependencies to "lowest-direct" ones - separately for "core" tests and for each provider (and run corresponding tests with it). This should allows us to determine what are the lowest bounds for the dependencies - for Airflow and for individual providers and continue doing it while Airflow evolves in the future. Fixes: #35549 Related: #39100 --- .github/workflows/ci.yml | 5 + .github/workflows/run-unit-tests.yml | 12 ++ .github/workflows/special-tests.yml | 31 ++++ Dockerfile.ci | 37 +++- airflow/providers/amazon/provider.yaml | 16 +- airflow/providers/apache/flink/provider.yaml | 2 +- airflow/providers/apache/hdfs/provider.yaml | 6 +- airflow/providers/apache/hive/provider.yaml | 7 +- airflow/providers/apache/kafka/provider.yaml | 4 +- airflow/providers/apache/kylin/provider.yaml | 2 +- airflow/providers/apache/spark/provider.yaml | 2 +- airflow/providers/cloudant/provider.yaml | 2 +- .../providers/cncf/kubernetes/provider.yaml | 2 +- airflow/providers/common/sql/provider.yaml | 4 +- airflow/providers/databricks/provider.yaml | 3 + airflow/providers/exasol/provider.yaml | 4 +- airflow/providers/fab/provider.yaml | 2 +- airflow/providers/facebook/provider.yaml | 2 +- airflow/providers/github/provider.yaml | 4 +- airflow/providers/google/provider.yaml | 24 +-- airflow/providers/grpc/provider.yaml | 2 +- .../providers/microsoft/azure/provider.yaml | 6 +- .../providers/microsoft/mssql/provider.yaml | 2 +- airflow/providers/mongo/provider.yaml | 4 +- airflow/providers/mysql/provider.yaml | 2 +- airflow/providers/odbc/provider.yaml | 2 +- airflow/providers/oracle/provider.yaml | 2 +- airflow/providers/pagerduty/provider.yaml | 2 +- airflow/providers/papermill/provider.yaml | 3 + airflow/providers/pinecone/provider.yaml | 2 +- airflow/providers/postgres/provider.yaml | 2 +- airflow/providers/presto/provider.yaml | 4 +- airflow/providers/salesforce/provider.yaml | 4 +- airflow/providers/sftp/provider.yaml | 2 +- airflow/providers/snowflake/provider.yaml | 3 +- airflow/providers/ssh/provider.yaml | 2 +- airflow/providers/tableau/provider.yaml | 2 +- airflow/providers/trino/provider.yaml | 4 +- airflow/providers/vertica/provider.yaml | 2 +- airflow/providers/weaviate/provider.yaml | 4 +- contributing-docs/testing/unit_tests.rst | 125 +++++++++++++ dev/breeze/doc/images/output_shell.svg | 164 ++++++++++++------ dev/breeze/doc/images/output_shell.txt | 2 +- .../doc/images/output_testing_db-tests.svg | 96 +++++----- .../doc/images/output_testing_db-tests.txt | 2 +- .../images/output_testing_non-db-tests.svg | 96 +++++----- .../images/output_testing_non-db-tests.txt | 2 +- .../doc/images/output_testing_tests.svg | 96 +++++----- .../doc/images/output_testing_tests.txt | 2 +- .../airflow_breeze/commands/common_options.py | 7 + .../commands/developer_commands.py | 10 ++ .../commands/developer_commands_config.py | 2 + .../commands/testing_commands.py | 6 + .../commands/testing_commands_config.py | 3 + .../src/airflow_breeze/params/shell_params.py | 2 + .../templates/pyproject_TEMPLATE.toml.jinja2 | 2 +- .../src/airflow_breeze/utils/packages.py | 5 +- .../src/airflow_breeze/utils/parallel.py | 2 +- .../airflow_breeze/utils/selective_checks.py | 31 +++- dev/breeze/tests/test_packages.py | 10 +- dev/breeze/tests/test_selective_checks.py | 20 +++ generated/provider_dependencies.json | 123 +++++++------ hatch_build.py | 41 +++-- scripts/docker/entrypoint_ci.sh | 37 +++- scripts/in_container/run_ci_tests.sh | 10 ++ 65 files changed, 787 insertions(+), 336 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f11487bdd06ee..9060cfb7cb019 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,6 +79,8 @@ jobs: ${{ steps.selective-checks.outputs.parallel-test-types-list-as-string }} providers-test-types-list-as-string: >- ${{ steps.selective-checks.outputs.providers-test-types-list-as-string }} + separate-test-types-list-as-string: >- + ${{ steps.selective-checks.outputs.separate-test-types-list-as-string }} include-success-outputs: ${{ steps.selective-checks.outputs.include-success-outputs }} postgres-exclude: ${{ steps.selective-checks.outputs.postgres-exclude }} mysql-exclude: ${{ steps.selective-checks.outputs.mysql-exclude }} @@ -455,8 +457,11 @@ jobs: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} image-tag: ${{ needs.build-info.outputs.image-tag }} parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }} + separate-test-types-list-as-string: >- + ${{ needs.build-info.outputs.separate-test-types-list-as-string }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} + python-versions: ${{ needs.build-info.outputs.python-versions }} default-postgres-version: ${{ needs.build-info.outputs.default-postgres-version }} canary-run: ${{ needs.build-info.outputs.canary-run }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 5ff40b033c3c4..d10032241259f 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -104,6 +104,16 @@ on: # yamllint disable-line rule:truthy required: false default: "true" type: string + force-lowest-dependencies: + description: "Whether to force lowest dependencies for the tests or not (true/false)" + required: false + default: "false" + type: string + monitor-delay-time-in-seconds: + description: "How much time to wait between printing parallel monitor summary" + required: false + default: 20 + type: number jobs: tests: timeout-minutes: 120 @@ -128,6 +138,7 @@ jobs: DOWNGRADE_SQLALCHEMY: "${{ inputs.downgrade-sqlalchemy }}" DOWNGRADE_PENDULUM: "${{ inputs.downgrade-pendulum }}" ENABLE_COVERAGE: "${{ inputs.run-coverage }}" + FORCE_LOWEST_DEPENDENCIES: "${{ inputs.force-lowest-dependencies }}" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -140,6 +151,7 @@ jobs: PYDANTIC: "${{ inputs.pydantic }}" PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" UPGRADE_BOTO: "${{ inputs.upgrade-boto }}" + AIRFLOW_MONITOR_DELAY_TIME_IN_SECONDS: "${{inputs.monitor-delay-time-in-seconds}}" VERBOSE: "true" steps: - name: "Cleanup repo" diff --git a/.github/workflows/special-tests.yml b/.github/workflows/special-tests.yml index c9d2d6f3d1a19..f4d626c6017d2 100644 --- a/.github/workflows/special-tests.yml +++ b/.github/workflows/special-tests.yml @@ -32,6 +32,10 @@ on: # yamllint disable-line rule:truthy description: "The list of parallel test types to run separated by spaces" required: true type: string + separate-test-types-list-as-string: + description: "The list of separate provider test types to run separated by spaces" + required: true + type: string run-coverage: description: "Whether to run coverage or not (true/false)" required: true @@ -40,6 +44,10 @@ on: # yamllint disable-line rule:truthy description: "Which version of python should be used by default" required: true type: string + python-versions: + description: "The list of python versions (stringified JSON array) to run the tests on." + required: true + type: string default-postgres-version: description: "The default version of the postgres to use" required: true @@ -189,6 +197,29 @@ jobs: run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + tests-with-lowest-direct-resolution: + name: "Lowest direct dependency resolution tests" + uses: ./.github/workflows/run-unit-tests.yml + permissions: + contents: read + packages: read + secrets: inherit + with: + runs-on-as-json-default: ${{ inputs.runs-on-as-json-default }} + test-name: "LowestDeps-Postgres" + force-lowest-dependencies: "true" + test-scope: "All" + backend: "postgres" + image-tag: ${{ inputs.image-tag }} + python-versions: ${{ inputs.python-versions }} + backend-versions: "['${{ inputs.default-postgres-version }}']" + excludes: "[]" + parallel-test-types-list-as-string: ${{ inputs.separate-test-types-list-as-string }} + include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} + run-coverage: ${{ inputs.run-coverage }} + debug-resources: ${{ inputs.debug-resources }} + monitor-delay-time-in-seconds: 120 + tests-quarantined: name: "Quarantined test" uses: ./.github/workflows/run-unit-tests.yml diff --git a/Dockerfile.ci b/Dockerfile.ci index 1c9d9db3dd40b..cc06ef923bffd 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1072,15 +1072,15 @@ function check_downgrade_sqlalchemy() { } function check_downgrade_pendulum() { - if [[ ${DOWNGRADE_PENDULUM=} != "true" ]]; then + if [[ ${DOWNGRADE_PENDULUM=} != "true" || ${PYTHON_MAJOR_MINOR_VERSION} == "3.12" ]]; then return fi - min_pendulum_version=$(grep "\"pendulum>=" hatch_build.py | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs) + local MIN_PENDULUM_VERSION="2.1.2" echo - echo "${COLOR_BLUE}Downgrading pendulum to minimum supported version: ${min_pendulum_version}${COLOR_RESET}" + echo "${COLOR_BLUE}Downgrading pendulum to minimum supported version: ${MIN_PENDULUM_VERSION}${COLOR_RESET}" echo # shellcheck disable=SC2086 - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "pendulum==${min_pendulum_version}" + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "pendulum==${MIN_PENDULUM_VERSION}" pip check } @@ -1109,12 +1109,41 @@ function check_run_tests() { fi } +function check_force_lowest_dependencies() { + if [[ ${FORCE_LOWEST_DEPENDENCIES=} != "true" ]]; then + return + fi + EXTRA="" + if [[ ${TEST_TYPE=} =~ Providers\[.*\] ]]; then + # shellcheck disable=SC2001 + EXTRA=$(echo "[${TEST_TYPE}]" | sed 's/Providers\[\(.*\)\]/\1/') + echo + echo "${COLOR_BLUE}Forcing dependencies to lowest versions for provider: ${EXTRA}${COLOR_RESET}" + echo + else + echo + echo "${COLOR_BLUE}Forcing dependencies to lowest versions for Airflow.${COLOR_RESET}" + echo + fi + set -x + # TODO: hard-code explicitly papermill on 3.12 but we should automate it + if [[ ${EXTRA} == "[papermill]" && ${PYTHON_MAJOR_MINOR_VERSION} == "3.12" ]]; then + echo + echo "Skipping papermill check on Python 3.12!" + echo + exit 0 + fi + uv pip install --python "$(which python)" --resolution lowest-direct --upgrade --editable ".${EXTRA}" + set +x +} + determine_airflow_to_use environment_initialization check_boto_upgrade check_pydantic check_downgrade_sqlalchemy check_downgrade_pendulum +check_force_lowest_dependencies check_run_tests "${@}" exec /bin/bash "${@}" diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index 41aba21c36b10..9789ccf6a00bc 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -92,17 +92,17 @@ dependencies: # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number # of candidates to consider. Make sure to configure boto3 version here as well as in all the tools below # in the `devel-dependencies` section to be the same minimum version. - - boto3>=1.33.0 - - botocore>=1.33.0 + - boto3>=1.34.0 + - botocore>=1.34.0 - inflection>=0.5.1 # Allow a wider range of watchtower versions for flexibility among users - - watchtower>=2.0.1,<4 + - watchtower>=3.0.0,<4 - jsonpath_ng>=1.5.3 - redshift_connector>=2.0.918 - sqlalchemy_redshift>=0.8.6 - - asgiref + - asgiref>=2.3.0 - PyAthena>=3.0.10 - - jmespath + - jmespath>=0.7.0 additional-extras: - name: pandas @@ -111,13 +111,15 @@ additional-extras: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + # There is conflict between boto3 and aiobotocore dependency botocore. # TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or # boto3 have native async support and we move away from aio aiobotocore - name: aiobotocore dependencies: - - aiobotocore[boto3]>=2.5.3 + - aiobotocore[boto3]>=2.10.0 - name: cncf.kubernetes dependencies: - apache-airflow-providers-cncf-kubernetes>=7.2.0 diff --git a/airflow/providers/apache/flink/provider.yaml b/airflow/providers/apache/flink/provider.yaml index 67b6def47621e..6d02277f23ccf 100644 --- a/airflow/providers/apache/flink/provider.yaml +++ b/airflow/providers/apache/flink/provider.yaml @@ -38,7 +38,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - cryptography>=2.0.0 + - cryptography>=41.0.0 - apache-airflow-providers-cncf-kubernetes>=5.1.0 integrations: diff --git a/airflow/providers/apache/hdfs/provider.yaml b/airflow/providers/apache/hdfs/provider.yaml index 05cc13b449161..a74189578f276 100644 --- a/airflow/providers/apache/hdfs/provider.yaml +++ b/airflow/providers/apache/hdfs/provider.yaml @@ -53,7 +53,11 @@ versions: dependencies: - apache-airflow>=2.7.0 - - hdfs[avro,dataframe,kerberos]>=2.0.4 + - hdfs[avro,dataframe,kerberos]>=2.5.4;python_version<"3.12" + - hdfs[avro,dataframe,kerberos]>=2.7.3;python_version>="3.12" + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + integrations: - integration-name: Hadoop Distributed File System (HDFS) diff --git a/airflow/providers/apache/hive/provider.yaml b/airflow/providers/apache/hive/provider.yaml index dd38bbfc8587a..945c78c6eadba 100644 --- a/airflow/providers/apache/hive/provider.yaml +++ b/airflow/providers/apache/hive/provider.yaml @@ -77,9 +77,12 @@ dependencies: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + - pyhive[hive_pure_sasl]>=0.7.0 - - thrift>=0.9.2 + - thrift>=0.11.0 + - jmespath>=0.7.0 integrations: - integration-name: Apache Hive diff --git a/airflow/providers/apache/kafka/provider.yaml b/airflow/providers/apache/kafka/provider.yaml index dfceef55fb134..19ff29bd4f172 100644 --- a/airflow/providers/apache/kafka/provider.yaml +++ b/airflow/providers/apache/kafka/provider.yaml @@ -37,8 +37,8 @@ versions: dependencies: - apache-airflow>=2.7.0 - - asgiref - - confluent-kafka>=1.8.2 + - asgiref>=2.3.0 + - confluent-kafka>=2.3.0 integrations: - integration-name: Apache Kafka diff --git a/airflow/providers/apache/kylin/provider.yaml b/airflow/providers/apache/kylin/provider.yaml index f3ee9f92e5b85..5fd624da9e5be 100644 --- a/airflow/providers/apache/kylin/provider.yaml +++ b/airflow/providers/apache/kylin/provider.yaml @@ -44,7 +44,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - kylinpy>=2.6 + - kylinpy>=2.7.0 integrations: - integration-name: Apache Kylin diff --git a/airflow/providers/apache/spark/provider.yaml b/airflow/providers/apache/spark/provider.yaml index 0f81797c35d72..e778ee94a2c43 100644 --- a/airflow/providers/apache/spark/provider.yaml +++ b/airflow/providers/apache/spark/provider.yaml @@ -59,7 +59,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - pyspark + - pyspark>=3.1.3 - grpcio-status>=1.59.0 additional-extras: diff --git a/airflow/providers/cloudant/provider.yaml b/airflow/providers/cloudant/provider.yaml index 5ad2f6cfba409..8d991559f8e22 100644 --- a/airflow/providers/cloudant/provider.yaml +++ b/airflow/providers/cloudant/provider.yaml @@ -44,7 +44,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - cloudant>=2.0 + - cloudant>=2.13.0 integrations: - integration-name: IBM Cloudant diff --git a/airflow/providers/cncf/kubernetes/provider.yaml b/airflow/providers/cncf/kubernetes/provider.yaml index 29554b1061f41..1abd182a0191d 100644 --- a/airflow/providers/cncf/kubernetes/provider.yaml +++ b/airflow/providers/cncf/kubernetes/provider.yaml @@ -87,7 +87,7 @@ dependencies: - aiofiles>=23.2.0 - apache-airflow>=2.7.0 - asgiref>=3.5.2 - - cryptography>=2.0.0 + - cryptography>=41.0.0 # The Kubernetes API is known to introduce problems when upgraded to a MAJOR version. Airflow Core # Uses Kubernetes for Kubernetes executor, and we also know that Kubernetes Python client follows SemVer # (https://github.com/kubernetes-client/python#compatibility). This is a crucial component of Airflow diff --git a/airflow/providers/common/sql/provider.yaml b/airflow/providers/common/sql/provider.yaml index 4426651d4fd43..f96562b052f06 100644 --- a/airflow/providers/common/sql/provider.yaml +++ b/airflow/providers/common/sql/provider.yaml @@ -66,7 +66,9 @@ additional-extras: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + integrations: - integration-name: Common SQL diff --git a/airflow/providers/databricks/provider.yaml b/airflow/providers/databricks/provider.yaml index 80506dc16c15d..7a0ed2f9a65b5 100644 --- a/airflow/providers/databricks/provider.yaml +++ b/airflow/providers/databricks/provider.yaml @@ -73,6 +73,9 @@ dependencies: - databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0 - aiohttp>=3.9.2, <4 - mergedeep>=1.3.4 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + - pyarrow>=14.0.1 additional-extras: # pip install apache-airflow-providers-databricks[sdk] diff --git a/airflow/providers/exasol/provider.yaml b/airflow/providers/exasol/provider.yaml index 8d01afa96b59c..9d0755c7c2b1a 100644 --- a/airflow/providers/exasol/provider.yaml +++ b/airflow/providers/exasol/provider.yaml @@ -64,7 +64,9 @@ dependencies: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + integrations: - integration-name: Exasol diff --git a/airflow/providers/fab/provider.yaml b/airflow/providers/fab/provider.yaml index b787f810e343a..a38eef6732def 100644 --- a/airflow/providers/fab/provider.yaml +++ b/airflow/providers/fab/provider.yaml @@ -51,7 +51,7 @@ dependencies: - flask-appbuilder==4.5.0 - flask-login>=0.6.2 - google-re2>=1.0 - - jmespath + - jmespath>=0.7.0 config: fab: diff --git a/airflow/providers/facebook/provider.yaml b/airflow/providers/facebook/provider.yaml index 26b4470655d50..25ec2e8f364aa 100644 --- a/airflow/providers/facebook/provider.yaml +++ b/airflow/providers/facebook/provider.yaml @@ -48,7 +48,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - facebook-business>=6.0.2 + - facebook-business>=15.0.2 integrations: - integration-name: Facebook Ads diff --git a/airflow/providers/github/provider.yaml b/airflow/providers/github/provider.yaml index 0c132525410bb..4012be4e35092 100644 --- a/airflow/providers/github/provider.yaml +++ b/airflow/providers/github/provider.yaml @@ -24,9 +24,7 @@ description: | dependencies: - apache-airflow>=2.7.0 - # There was a change introduced in version 1.58 which breaks `pickle` serialization out of the box. - # See https://github.com/PyGithub/PyGithub/issues/2436. - - PyGithub!=1.58 + - PyGithub>=2.1.1 state: ready source-date-epoch: 1716287833 diff --git a/airflow/providers/google/provider.yaml b/airflow/providers/google/provider.yaml index 8a48f9481168c..8cdf5967f1bfb 100644 --- a/airflow/providers/google/provider.yaml +++ b/airflow/providers/google/provider.yaml @@ -93,6 +93,7 @@ dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.7.2 - asgiref>=3.5.2 + - dill>=0.2.3 # When upgrading the major version of gcloud-aio-auth we want to make sure to # 1. use at least version 5.2, which uses offset-aware datetime internally # 2. override Token's new `refresh` method instead of `acquire_access_token`, which allows us to avoid @@ -104,12 +105,12 @@ dependencies: - gcloud-aio-storage>=9.0.0 - gcsfs>=2023.10.0 - google-ads>=23.1.0 - - google-analytics-admin + - google-analytics-admin>=0.9.0 # Google-api-core 2.16.0 back-compat issue: # - https://github.com/googleapis/python-api-core/issues/576 # - https://github.com/apache/airflow/issues/39394 - google-api-core>=2.11.0,!=2.16.0,!=2.18.0 - - google-api-python-client>=1.6.0 + - google-api-python-client>=2.0.2 - google-auth>=1.0.0 - google-auth-httplib2>=0.0.1 - google-cloud-aiplatform>=1.42.1 @@ -117,7 +118,7 @@ dependencies: # google-cloud-bigquery version 3.21.0 introduced a performance enhancement in QueryJob.result(), # which has led to backward compatibility issues # - https://github.com/apache/airflow/issues/39541 - - google-cloud-bigquery<3.21.0,>=3.0.1 + - google-cloud-bigquery<3.21.0,>=3.4.0 - google-cloud-bigquery-datatransfer>=3.13.0 - google-cloud-bigtable>=2.17.0 - google-cloud-build>=3.22.0 @@ -150,25 +151,28 @@ dependencies: - google-cloud-videointelligence>=2.11.0 - google-cloud-vision>=3.4.0 - google-cloud-workflows>=1.10.0 - - google-cloud-run>=0.9.0 + - google-cloud-run>=0.10.0 - google-cloud-batch>=0.13.0 - grpcio-gcp>=0.2.2 - - httpx + - httpx>=0.18.0 - json-merge-patch>=0.2 - - looker-sdk>=22.2.0 - - pandas-gbq + - looker-sdk>=22.4.0 + - pandas-gbq>=0.7.0 # In pandas 2.2 minimal version of the sqlalchemy is 2.0 # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + # A transient dependency of google-cloud-bigquery-datatransfer, but we # further constrain it since older versions are buggy. - proto-plus>=1.19.6 - - PyOpenSSL + - python-slugify>=7.0.0 + - PyOpenSSL>=23.0.0 - sqlalchemy-bigquery>=1.2.1 - sqlalchemy-spanner>=1.6.2 - - python-slugify>=5.0 + - tenacity>=8.1.0 additional-extras: - name: apache.beam diff --git a/airflow/providers/grpc/provider.yaml b/airflow/providers/grpc/provider.yaml index c94a736ae1345..468a931162d07 100644 --- a/airflow/providers/grpc/provider.yaml +++ b/airflow/providers/grpc/provider.yaml @@ -52,7 +52,7 @@ dependencies: # https://github.com/googleapis/google-cloud-python/issues/10566 - google-auth>=1.0.0, <3.0.0 - google-auth-httplib2>=0.0.1 - - grpcio>=1.15.0 + - grpcio>=1.59.0 integrations: - integration-name: gRPC diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index d2c309718fe5e..45e1cf82e1a29 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -82,7 +82,7 @@ dependencies: - adlfs>=2023.10.0 - azure-batch>=8.0.0 - azure-cosmos>=4.6.0 - - azure-mgmt-cosmosdb + - azure-mgmt-cosmosdb>=3.0.0 - azure-datalake-store>=0.0.45 - azure-identity>=1.3.1 - azure-keyvault-secrets>=4.1.0 @@ -90,9 +90,9 @@ dependencies: - azure-mgmt-resource>=2.2.0 - azure-storage-blob>=12.14.0 - azure-mgmt-storage>=16.0.0 - - azure-storage-file-share + - azure-storage-file-share>=12.7.0 - azure-servicebus>=7.12.1 - - azure-synapse-spark + - azure-synapse-spark>=0.2.0 - azure-synapse-artifacts>=0.17.0 - adal>=1.2.7 - azure-storage-file-datalake>=12.9.1 diff --git a/airflow/providers/microsoft/mssql/provider.yaml b/airflow/providers/microsoft/mssql/provider.yaml index dad7714c483cd..4ce15eee0f3a2 100644 --- a/airflow/providers/microsoft/mssql/provider.yaml +++ b/airflow/providers/microsoft/mssql/provider.yaml @@ -53,7 +53,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.3.1 - - pymssql>=2.1.8 + - pymssql>=2.3.0 integrations: - integration-name: Microsoft SQL Server (MSSQL) diff --git a/airflow/providers/mongo/provider.yaml b/airflow/providers/mongo/provider.yaml index 8276351601528..b05bf76e339e7 100644 --- a/airflow/providers/mongo/provider.yaml +++ b/airflow/providers/mongo/provider.yaml @@ -51,10 +51,10 @@ versions: dependencies: - apache-airflow>=2.7.0 - dnspython>=1.13.0 - - pymongo>=3.6.0 + - pymongo>=4.0.0 devel-dependencies: - - mongomock + - mongomock>=4.0.0 integrations: - integration-name: MongoDB diff --git a/airflow/providers/mysql/provider.yaml b/airflow/providers/mysql/provider.yaml index bf76a0a6138c9..f03b75e825c5f 100644 --- a/airflow/providers/mysql/provider.yaml +++ b/airflow/providers/mysql/provider.yaml @@ -64,7 +64,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.3.1 - - mysqlclient>=1.3.6 + - mysqlclient>=1.4.0 - mysql-connector-python>=8.0.29 additional-extras: diff --git a/airflow/providers/odbc/provider.yaml b/airflow/providers/odbc/provider.yaml index b16a5b69a186b..7e059d3d3c328 100644 --- a/airflow/providers/odbc/provider.yaml +++ b/airflow/providers/odbc/provider.yaml @@ -52,7 +52,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.10.0 - - pyodbc + - pyodbc>=5.0.0 integrations: - integration-name: ODBC diff --git a/airflow/providers/oracle/provider.yaml b/airflow/providers/oracle/provider.yaml index 05e93d0d58daf..e7c3e240af8b4 100644 --- a/airflow/providers/oracle/provider.yaml +++ b/airflow/providers/oracle/provider.yaml @@ -58,7 +58,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.3.1 - - oracledb>=1.0.0 + - oracledb>=2.0.0 additional-extras: - name: numpy diff --git a/airflow/providers/pagerduty/provider.yaml b/airflow/providers/pagerduty/provider.yaml index e9bd27753f241..7cb041eec8257 100644 --- a/airflow/providers/pagerduty/provider.yaml +++ b/airflow/providers/pagerduty/provider.yaml @@ -49,7 +49,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - pdpyras>=4.1.2 + - pdpyras>=4.2.0 integrations: - integration-name: Pagerduty diff --git a/airflow/providers/papermill/provider.yaml b/airflow/providers/papermill/provider.yaml index 9cfdc16d54c44..9c303d30c86d4 100644 --- a/airflow/providers/papermill/provider.yaml +++ b/airflow/providers/papermill/provider.yaml @@ -58,6 +58,9 @@ dependencies: - papermill[all]>=2.4.0 - scrapbook[all] - ipykernel + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + integrations: - integration-name: Papermill diff --git a/airflow/providers/pinecone/provider.yaml b/airflow/providers/pinecone/provider.yaml index 8fe24a996f507..9e6eb6356eec2 100644 --- a/airflow/providers/pinecone/provider.yaml +++ b/airflow/providers/pinecone/provider.yaml @@ -43,7 +43,7 @@ integrations: dependencies: - apache-airflow>=2.7.0 - - pinecone-client>=3.0.0 + - pinecone-client>=3.1.0 hooks: - integration-name: Pinecone diff --git a/airflow/providers/postgres/provider.yaml b/airflow/providers/postgres/provider.yaml index 78d471fb40b02..66441aa0d413b 100644 --- a/airflow/providers/postgres/provider.yaml +++ b/airflow/providers/postgres/provider.yaml @@ -63,7 +63,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.3.1 - - psycopg2-binary>=2.8.0 + - psycopg2-binary>=2.9.4 additional-extras: - name: amazon diff --git a/airflow/providers/presto/provider.yaml b/airflow/providers/presto/provider.yaml index e3c0b4ce0aa30..6c47c520c5eb1 100644 --- a/airflow/providers/presto/provider.yaml +++ b/airflow/providers/presto/provider.yaml @@ -66,7 +66,9 @@ dependencies: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + integrations: - integration-name: Presto diff --git a/airflow/providers/salesforce/provider.yaml b/airflow/providers/salesforce/provider.yaml index 54717cb032e73..7b33238af8a7b 100644 --- a/airflow/providers/salesforce/provider.yaml +++ b/airflow/providers/salesforce/provider.yaml @@ -62,7 +62,9 @@ dependencies: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + integrations: - integration-name: Salesforce diff --git a/airflow/providers/sftp/provider.yaml b/airflow/providers/sftp/provider.yaml index 3f5823e27f5f2..e97f8bb86c17d 100644 --- a/airflow/providers/sftp/provider.yaml +++ b/airflow/providers/sftp/provider.yaml @@ -65,7 +65,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-ssh>=2.1.0 - - paramiko>=2.8.0 + - paramiko>=2.9.0 - asyncssh>=2.12.0 integrations: diff --git a/airflow/providers/snowflake/provider.yaml b/airflow/providers/snowflake/provider.yaml index 9a7b7db5dcf7c..cfa66909c3749 100644 --- a/airflow/providers/snowflake/provider.yaml +++ b/airflow/providers/snowflake/provider.yaml @@ -75,8 +75,9 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.10.0 - - snowflake-connector-python>=2.7.11 + - snowflake-connector-python>=3.7.1 - snowflake-sqlalchemy>=1.4.0 + - pyarrow>=14.0.1 integrations: - integration-name: Snowflake diff --git a/airflow/providers/ssh/provider.yaml b/airflow/providers/ssh/provider.yaml index bbd8eddee6bef..c8c71d93f5a29 100644 --- a/airflow/providers/ssh/provider.yaml +++ b/airflow/providers/ssh/provider.yaml @@ -60,7 +60,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - paramiko>=2.6.0 + - paramiko>=2.9.0 - sshtunnel>=0.3.2 integrations: diff --git a/airflow/providers/tableau/provider.yaml b/airflow/providers/tableau/provider.yaml index f58ee5476497f..bdc445e9b6dff 100644 --- a/airflow/providers/tableau/provider.yaml +++ b/airflow/providers/tableau/provider.yaml @@ -52,7 +52,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - - tableauserverclient + - tableauserverclient>=0.25 integrations: - integration-name: Tableau diff --git a/airflow/providers/trino/provider.yaml b/airflow/providers/trino/provider.yaml index 619afb6c73053..894bd75f585e4 100644 --- a/airflow/providers/trino/provider.yaml +++ b/airflow/providers/trino/provider.yaml @@ -67,7 +67,9 @@ dependencies: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + - trino>=0.318.0 integrations: diff --git a/airflow/providers/vertica/provider.yaml b/airflow/providers/vertica/provider.yaml index 4341ac845f628..6cdde00b49449 100644 --- a/airflow/providers/vertica/provider.yaml +++ b/airflow/providers/vertica/provider.yaml @@ -53,7 +53,7 @@ versions: dependencies: - apache-airflow>=2.7.0 - apache-airflow-providers-common-sql>=1.3.1 - - vertica-python>=0.5.1 + - vertica-python>=0.6.0 integrations: - integration-name: Vertica diff --git a/airflow/providers/weaviate/provider.yaml b/airflow/providers/weaviate/provider.yaml index 0e30097012289..9c1b5643d100c 100644 --- a/airflow/providers/weaviate/provider.yaml +++ b/airflow/providers/weaviate/provider.yaml @@ -53,7 +53,9 @@ dependencies: # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 # In addition FAB also limit sqlalchemy to < 2.0 - - pandas>=1.2.5,<2.2 + - pandas>=1.5.3,<2.2;python_version<"3.12" + - pandas>=2.1.1,<2.2;python_version>="3.12" + hooks: - integration-name: Weaviate diff --git a/contributing-docs/testing/unit_tests.rst b/contributing-docs/testing/unit_tests.rst index 6b9004a633605..3342585b8cfc3 100644 --- a/contributing-docs/testing/unit_tests.rst +++ b/contributing-docs/testing/unit_tests.rst @@ -1251,6 +1251,131 @@ Rebuilding single provider package can be done using this command: breeze release-management prepare-provider-packages \ --version-suffix-for-pypi dev0 --package-format wheel +Lowest direct dependency resolution tests +----------------------------------------- + +We have special tests that run with the lowest direct resolution of dependencies for Airflow and providers. +This is run in order to check whether we are not using a feature that is not available in an +older version of some dependencies. + +Tests with lowest-direct dependency resolution for Airflow +---------------------------------------------------------- + +You can test minimum dependencies that are installed by Airflow by running (for example to run "Core" tests): + +.. code-block::bash + + breeze testing tests --force-lowest-dependencies --test-type "Core" + +You can also iterate on the tests and versions of the dependencies by entering breeze shell and +running the tests from there: + +.. code-block::bash + + breeze shell --force-lowest-dependencies + +The way it works - when you run the breeze with ``--force-lowest-dependencies`` flag, breeze will use +attempt (with the help of ``uv``) to downgrade the dependencies to the lowest version that is compatible +with the dependencies specified in airflow dependencies. You will see it in the output of the breeze +command as a sequence of downgrades like this: + +.. code-block:: diff + + - aiohttp==3.9.5 + + aiohttp==3.9.2 + - anyio==4.4.0 + + anyio==3.7.1 + + +Tests with lowest-direct dependency resolution for a Provider +------------------------------------------------------------- + +Similarly we can test if the provider tests are working for lowest dependencies of specific provider. + +Those tests can be easily run locally with breeze (replace PROVIDER_ID with id of the provider): + +.. code-block::bash + + breeze testing tests --force-lowest-dependencies --test-type "Providers[PROVIDER_ID]" + +If you find that the tests are failing for some dependencies, make sure to add minimum version for +the dependency in the provider.yaml file of the appropriate provider and re-run it. + +You can also iterate on the tests and versions of the dependencies by entering breeze shell and +running the tests from there: + +.. code-block::bash + + breeze shell --force-lowest-dependencies --test-type "Providers[PROVIDER_ID]" + +Similarly as in case of "Core" tests, the dependencies will be downgraded to the lowest version that is +compatible with the dependencies specified in the provider dependencies and you will see the list of +downgrades in the output of the breeze command. Note that this will be combined downgrades of both +Airflow and selected provider dependencies, so the list will be longer than in case of "Core" tests +and longer than **just** dependencies of the provider. For example for a ``google`` provider, part of the +downgraded dependencies will contain both Airflow and Google Provider dependencies: + +.. code-block:: diff + + - flask-login==0.6.3 + + flask-login==0.6.2 + - flask-session==0.5.0 + + flask-session==0.4.0 + - flask-wtf==1.2.1 + + flask-wtf==1.1.0 + - fsspec==2023.12.2 + + fsspec==2023.10.0 + - gcloud-aio-bigquery==7.1.0 + + gcloud-aio-bigquery==6.1.2 + - gcloud-aio-storage==9.2.0 + + +How to fix failing lowest-direct dependency resolution tests +------------------------------------------------------------ + +When your tests pass in regular test, but fail in "lowest-direct" dependency resolution tests, you need +to figure out the lower-bindings missing in ``hatch_build.py`` (for Airflow core dependencies) or +in the corresponding provider's ``provider.yaml`` file. This is usually a very easy thing that takes a little +bit of time to figure out especially if you just added new feature from a library that you use, just check in +the release notes what is the minimum version of the library that you can use and set it as the +``>=VERSION`` in the ``hatch_build.py`` or ``provider.yaml`` file. For ``hatch_build.py`` changes you do not +need to do anything else, for ``provider.yaml`` file you need to regenerate generated dependencies +by running ``pre-commit run`` in the provider directory after adding the file to git or just letting the +pre-commit to do it's job if you already has pre-commit installed via ``pre-commit install`` - then just +committing the change will regenerate the dependencies automatically. + +After that, re-run the ``breeze shell --force-lowest-dependencies`` command and see if the tests pass. + +.. code-block::bash + + breeze shell --force-lowest-dependencies --test-type "Providers[PROVIDER_ID]" + +Sometimes it might get a bit tricky to know what is the minimum version of the library you should be using +but in this case you can easily find it by looking at the error and list of downgraded packages and +guessing which one is the one that is causing the problem. You can then look at the release notes of the +library and find the minimum version but also you can revert to technique known as bisecting which allows +you to quickly figure out the right version without knowing the root cause of the problem. + +Assume you suspect library "foo" that was downgraded from 1.0.0 to 0.1.0 is causing the problem. Bisecting +technique looks like follows: + +* enter breeze with ``--force-lowest-dependencies`` flag (the ``foo`` library is downgraded to 0.1.0). Your + test should fail. +* make sure that just upgrading the ``foo`` library to 1.0.0 -> re-run failing test (with ``pytest ``) + and see that it passes. +* downgrade the ``foo`` library to 0.1.0 -> re-run failing test (with ``pytest ``) and see that it + fails. +* look at the list of versions available for the library between 0.1.0 and 1.0.0 (for example via + ``_ link - where ``foo`` is your library. +* find a middle version between the 1.0.0 and 0.1.0 and upgrade the library to this version - see if the + test passes or fails - if it passes, continue with finding the middle version between the current version + and lower version, if it fails, continue with finding the middle version between the current version and + higher version. +* continue that way until you find the version that is the lowest version that passes the test. +* set this version in the ``hatch_build.py`` or ``provider.yaml`` file, regenerate the generated + dependencies file and re-start breeze with ``--force-lowest-dependencies`` flag and see that the + library has been downgraded to the version you set and the test passes. + Other Settings -------------- diff --git a/dev/breeze/doc/images/output_shell.svg b/dev/breeze/doc/images/output_shell.svg index 5d4bf9f03f5f5..c397fe3e38f7b 100644 --- a/dev/breeze/doc/images/output_shell.svg +++ b/dev/breeze/doc/images/output_shell.svg @@ -1,4 +1,4 @@ - +