From e3cdfbc756ed8fc64b63bbe225dd79e8e2491df6 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 15 Dec 2020 21:12:14 +0530 Subject: [PATCH 01/36] add breeze config for mssql, fix db init on mssql --- BREEZE.rst | 4 +- Dockerfile.ci | 21 +- airflow/jobs/scheduler_job.py | 3 +- ..._change_ts_columns_to_datetime_on_mssql.py | 185 ++++++++++++++++++ ...3f6d53_add_unique_constraint_to_conn_id.py | 2 +- ..._add_scheduling_decision_to_dagrun_and_.py | 28 ++- ...bbf4a7ad0465_remove_id_column_from_xcom.py | 64 +++++- airflow/models/dag.py | 7 +- airflow/models/dagrun.py | 5 +- airflow/models/serialized_dag.py | 16 +- breeze | 7 + breeze-complete | 11 +- scripts/ci/docker-compose/backend-mssql.yml | 33 ++++ scripts/ci/libraries/_initialization.sh | 4 + scripts/in_container/check_environment.sh | 2 + setup.py | 3 +- 16 files changed, 368 insertions(+), 27 deletions(-) create mode 100644 airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py create mode 100644 scripts/ci/docker-compose/backend-mssql.yml diff --git a/BREEZE.rst b/BREEZE.rst index ed2b4fda352d2..2f0510a015ca2 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -1883,7 +1883,7 @@ This is the current syntax for `./breeze <./breeze>`_: Backend to use for tests - it determines which database is used. One of: - sqlite mysql postgres + sqlite mysql postgres mssql Default: sqlite @@ -2349,7 +2349,7 @@ This is the current syntax for `./breeze <./breeze>`_: Backend to use for tests - it determines which database is used. One of: - sqlite mysql postgres + sqlite mysql postgres mssql Default: sqlite diff --git a/Dockerfile.ci b/Dockerfile.ci index 92b6a456b0269..5a02a8927f86e 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -160,8 +160,25 @@ RUN mkdir -pv /usr/share/man/man1 \ ${ADDITIONAL_RUNTIME_APT_DEPS} \ && apt-get autoremove -yqq --purge \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && curl https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz \ + && rm -rf /var/lib/apt/lists/* + +#Install MS SQL Drivers +RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ + && curl https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql-release.list \ + && apt-get update -yqq \ + && apt-get upgrade -yqq \ + && ACCEPT_EULA=Y apt-get -yqq install -y --no-install-recommends \ + gcc \ + unixodbc-dev \ + g++ \ + msodbcsql17 \ + mssql-tools \ + && rm -rf /var/lib/apt/lists/* + +ARG DOCKER_CLI_VERSION=19.03.9 +ENV DOCKER_CLI_VERSION=${DOCKER_CLI_VERSION} + +RUN curl https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz \ | tar -C /usr/bin --strip-components=1 -xvzf - docker/docker WORKDIR ${AIRFLOW_SOURCES} diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py index b400ae92e6c29..cdfec4c5cb814 100644 --- a/airflow/jobs/scheduler_job.py +++ b/airflow/jobs/scheduler_job.py @@ -37,6 +37,7 @@ from sqlalchemy.exc import OperationalError from sqlalchemy.orm import load_only, selectinload from sqlalchemy.orm.session import Session, make_transient +from sqlalchemy.sql import expression from airflow import models, settings from airflow.configuration import conf @@ -1644,7 +1645,7 @@ def _update_dag_next_dagruns(self, dag_models: Iterable[DagModel], session: Sess .filter( DagRun.dag_id.in_([o.dag_id for o in dag_models]), DagRun.state == State.RUNNING, # pylint: disable=comparison-with-callable - DagRun.external_trigger.is_(False), + DagRun.external_trigger == expression.false(), ) .group_by(DagRun.dag_id) .all() diff --git a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py new file mode 100644 index 0000000000000..bff3a453c4e14 --- /dev/null +++ b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py @@ -0,0 +1,185 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""change ts/datetime columns to datetime/datetime2 on mssql + +Revision ID: 83f031fd9f1c +Revises: 2e42bb497a22 +Create Date: 2021-03-21 12:22:02.197726 + +""" + +from collections import defaultdict + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import mssql + +# revision identifiers, used by Alembic. +revision = '83f031fd9f1c' +down_revision = '2e42bb497a22' +branch_labels = None +depends_on = None + + +def is_table_empty(conn, table_name): + """ + This function checks if the mssql table is empty + :param conn: sql connection object + :param table_name: table name + :return: Booelan indicating if the table is present + """ + return conn.execute(f'select TOP 1 * from {table_name}').first() is None + + +def get_table_constraints(conn, table_name): + """ + This function return primary and unique constraint + along with column name. some tables like task_instance + is missing primary key constraint name and the name is + auto-generated by sql server. so this function helps to + retrieve any primary or unique constraint name. + + :param conn: sql connection object + :param table_name: table name + :return: a dictionary of ((constraint name, constraint type), column name) of table + :rtype: defaultdict(list) + """ + query = """SELECT tc.CONSTRAINT_NAME , tc.CONSTRAINT_TYPE, ccu.COLUMN_NAME + FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS tc + JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE AS ccu ON ccu.CONSTRAINT_NAME = tc.CONSTRAINT_NAME + WHERE tc.TABLE_NAME = '{table_name}' AND + (tc.CONSTRAINT_TYPE = 'PRIMARY KEY' or UPPER(tc.CONSTRAINT_TYPE) = 'UNIQUE') + """.format( + table_name=table_name + ) + result = conn.execute(query).fetchall() + constraint_dict = defaultdict(list) + for constraint, constraint_type, column in result: + constraint_dict[(constraint, constraint_type)].append(column) + return constraint_dict + + +def drop_column_constraints(operator, column_name, constraint_dict): + """ + Drop a primary key or unique constraint + + :param operator: batch_alter_table for the table + :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table + """ + for constraint, columns in constraint_dict.items(): + if column_name in columns: + if constraint[1].lower().startswith("primary"): + operator.drop_constraint(constraint[0], type_='primary') + elif constraint[1].lower().startswith("unique"): + operator.drop_constraint(constraint[0], type_='unique') + + +def create_constraints(operator, column_name, constraint_dict): + """ + Create a primary key or unique constraint + + :param operator: batch_alter_table for the table + :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table + """ + for constraint, columns in constraint_dict.items(): + if column_name in columns: + if constraint[1].lower().startswith("primary"): + operator.create_primary_key(constraint_name=constraint[0], columns=columns) + elif constraint[1].lower().startswith("unique"): + operator.create_unique_constraint(constraint_name=constraint[0], columns=columns) + + +def __use_date_time2(conn): + result = conn.execute( + """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" + ).fetchone() + mssql_version = result[0] + return mssql_version not in ("2000", "2005") + + +def __is_timestamp(conn, table_name, column_name): + query = f"""SELECT + TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE + FROM SYS.COLUMNS C + JOIN SYS.TYPES T + ON C.USER_TYPE_ID=T.USER_TYPE_ID + WHERE C.OBJECT_ID=OBJECT_ID('{table_name}') and C.NAME='{column_name}'; + """ + column_type = conn.execute(query).fetchone()[0] + return column_type == "timestamp" + + +def recreate_mssql_ts_column(conn, op, table_name, column_name): + """ + Drop the timestamp column and recreate it as + datetime or datetime2(6) + """ + if __is_timestamp(conn, table_name, column_name) and is_table_empty(conn, table_name): + with op.batch_alter_table(table_name) as batch_op: + constraint_dict = get_table_constraints(conn, table_name) + drop_column_constraints(batch_op, column_name, constraint_dict) + batch_op.drop_column(column_name=column_name) + if __use_date_time2(conn): + batch_op.add_column(sa.Column(column_name, mssql.DATETIME2(precision=6), nullable=False)) + else: + batch_op.add_column(sa.Column(column_name, mssql.DATETIME, nullable=False)) + create_constraints(batch_op, column_name, constraint_dict) + + +def alter_mssql_datetime_column(conn, op, table_name, column_name, nullable): + """Update the datetime column to datetime2(6)""" + if __use_date_time2(conn): + op.alter_column( + table_name=table_name, + column_name=column_name, + type_=mssql.DATETIME2(precision=6), + nullable=nullable, + ) + + +def alter_mssql_datetime2_column(conn, op, table_name, column_name, nullable): + """Update the datetime2(6) column to datetime""" + if __use_date_time2(conn): + op.alter_column( + table_name=table_name, column_name=column_name, type_=mssql.DATETIME, nullable=nullable + ) + + +def upgrade(): + """Change timestamp and datetime to datetime2/datetime when using MSSQL as backend""" + conn = op.get_bind() + if conn.dialect.name == 'mssql': + recreate_mssql_ts_column(conn, op, 'dag_code', 'last_updated') + recreate_mssql_ts_column(conn, op, 'rendered_task_instance_fields', 'execution_date') + # alter_mssql_datetime_column(conn, op, 'chart', 'last_modified', False) + alter_mssql_datetime_column(conn, op, 'serialized_dag', 'last_updated', False) + # alter_mssql_datetime_column(conn, op, 'known_event', 'start_date', True) + # alter_mssql_datetime_column(conn, op, 'known_event', 'end_date', True) + + +def downgrade(): + """Change datetime2(6) columns back to datetime when using MSSQL as backend""" + conn = op.get_bind() + if conn.dialect.name == 'mssql': + alter_mssql_datetime2_column(conn, op, 'chart', 'last_modified', False) + alter_mssql_datetime2_column(conn, op, 'serialized_dag', 'last_updated', False) + alter_mssql_datetime2_column(conn, op, 'known_event', 'start_date', True) + alter_mssql_datetime2_column(conn, op, 'known_event', 'end_date', True) diff --git a/airflow/migrations/versions/8d48763f6d53_add_unique_constraint_to_conn_id.py b/airflow/migrations/versions/8d48763f6d53_add_unique_constraint_to_conn_id.py index 2c743e4813185..44be9880ec5ee 100644 --- a/airflow/migrations/versions/8d48763f6d53_add_unique_constraint_to_conn_id.py +++ b/airflow/migrations/versions/8d48763f6d53_add_unique_constraint_to_conn_id.py @@ -38,9 +38,9 @@ def upgrade(): """Apply add unique constraint to conn_id and set it as non-nullable""" try: with op.batch_alter_table('connection') as batch_op: + batch_op.alter_column("conn_id", nullable=False, existing_type=sa.String(250)) batch_op.create_unique_constraint(constraint_name="unique_conn_id", columns=["conn_id"]) - batch_op.alter_column("conn_id", nullable=False, existing_type=sa.String(250)) except sa.exc.IntegrityError: raise Exception("Make sure there are no duplicate connections with the same conn_id or null values") diff --git a/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py b/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py index 8019aa2d91be5..a18b72b96525f 100644 --- a/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py +++ b/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py @@ -26,7 +26,7 @@ import sqlalchemy as sa from alembic import op -from sqlalchemy.dialects import mysql +from sqlalchemy.dialects import mssql, mysql # revision identifiers, used by Alembic. revision = '98271e7606e2' @@ -35,12 +35,32 @@ depends_on = None +def __use_date_time2(conn): + result = conn.execute( + """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" + ).fetchone() + mssql_version = result[0] + return mssql_version not in ("2000", "2005") + + +def __get_timestamp(conn): + dialect_name = conn.dialect.name + if dialect_name == "mssql": + return mssql.DATETIME2(precision=6) if __use_date_time2(conn) else mssql.DATETIME + elif dialect_name != "mysql": + return sa.TIMESTAMP(timezone=True) + else: + return mysql.TIMESTAMP(fsp=6, timezone=True) + + def upgrade(): """Apply Add scheduling_decision to DagRun and DAG""" conn = op.get_bind() # pylint: disable=no-member - is_mysql = bool(conn.dialect.name == "mysql") is_sqlite = bool(conn.dialect.name == "sqlite") - timestamp = sa.TIMESTAMP(timezone=True) if not is_mysql else mysql.TIMESTAMP(fsp=6, timezone=True) + is_mssql = bool(conn.dialect.name == "mssql") + timestamp = __get_timestamp(conn) if is_sqlite: op.execute("PRAGMA foreign_keys=off") @@ -71,7 +91,7 @@ def upgrade(): op.execute( "UPDATE dag SET concurrency={}, has_task_concurrency_limits={} where concurrency IS NULL".format( - concurrency, 1 if is_sqlite else sa.true() + concurrency, 1 if is_sqlite or is_mssql else sa.true() ) ) diff --git a/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py b/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py index f063aeafd227d..c1ec278dc87f7 100644 --- a/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py +++ b/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py @@ -24,6 +24,8 @@ """ +from collections import defaultdict + from alembic import op from sqlalchemy import Column, Integer from sqlalchemy.engine.reflection import Inspector @@ -35,6 +37,61 @@ depends_on = None +def get_table_constraints(conn, table_name): + """ + This function return primary and unique constraint + along with column name. some tables like task_instance + is missing primary key constraint name and the name is + auto-generated by sql server. so this function helps to + retrieve any primary or unique constraint name. + :param conn: sql connection object + :param table_name: table name + :return: a dictionary of ((constraint name, constraint type), column name) of table + :rtype: defaultdict(list) + """ + query = """SELECT tc.CONSTRAINT_NAME , tc.CONSTRAINT_TYPE, ccu.COLUMN_NAME + FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS tc + JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE AS ccu ON ccu.CONSTRAINT_NAME = tc.CONSTRAINT_NAME + WHERE tc.TABLE_NAME = '{table_name}' AND + (tc.CONSTRAINT_TYPE = 'PRIMARY KEY' or UPPER(tc.CONSTRAINT_TYPE) = 'UNIQUE') + """.format( + table_name=table_name + ) + result = conn.execute(query).fetchall() + constraint_dict = defaultdict(list) + for constraint, constraint_type, column in result: + constraint_dict[(constraint, constraint_type)].append(column) + return constraint_dict + + +def drop_column_constraints(operator, column_name, constraint_dict): + """ + Drop a primary key or unique constraint + :param operator: batch_alter_table for the table + :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table + """ + for constraint, columns in constraint_dict.items(): + if column_name in columns: + if constraint[1].lower().startswith("primary"): + operator.drop_constraint(constraint[0], type_='primary') + elif constraint[1].lower().startswith("unique"): + operator.drop_constraint(constraint[0], type_='unique') + + +def create_constraints(operator, column_name, constraint_dict): + """ + Create a primary key or unique constraint + :param operator: batch_alter_table for the table + :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table + """ + for constraint, columns in constraint_dict.items(): + if column_name in columns: + if constraint[1].lower().startswith("primary"): + operator.create_primary_key(constraint_name=constraint[0], columns=columns) + elif constraint[1].lower().startswith("unique"): + operator.create_unique_constraint(constraint_name=constraint[0], columns=columns) + + def upgrade(): """Apply Remove id column from xcom""" conn = op.get_bind() @@ -43,9 +100,14 @@ def upgrade(): with op.batch_alter_table('xcom') as bop: xcom_columns = [col.get('name') for col in inspector.get_columns("xcom")] if "id" in xcom_columns: + if conn.dialect.name == 'mssql': + constraint_dict = get_table_constraints(conn, "xcom") + drop_column_constraints(bop, 'id', constraint_dict) bop.drop_column('id') bop.drop_index('idx_xcom_dag_task_date') - bop.create_primary_key('pk_xcom', ['dag_id', 'task_id', 'key', 'execution_date']) + # mssql doesn't allow primary keys with nullable columns + if conn.dialect.name != 'mssql': + bop.create_primary_key('pk_xcom', ['dag_id', 'task_id', 'key', 'execution_date']) def downgrade(): diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 9f929bf2d264a..bd6c79d8fe6d0 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -52,6 +52,7 @@ from sqlalchemy import Boolean, Column, ForeignKey, Index, Integer, String, Text, func, or_ from sqlalchemy.orm import backref, joinedload, relationship from sqlalchemy.orm.session import Session +from sqlalchemy.sql import expression import airflow.templates from airflow import settings, utils @@ -1872,7 +1873,7 @@ def bulk_write_to_db(cls, dags: Collection["DAG"], session=None): .filter( DagRun.dag_id.in_(existing_dag_ids), DagRun.state == State.RUNNING, # pylint: disable=comparison-with-callable - DagRun.external_trigger.is_(False), + DagRun.external_trigger == expression.false(), ) .group_by(DagRun.dag_id) .all() @@ -2270,8 +2271,8 @@ def dags_needing_dagruns(cls, session: Session): query = ( session.query(cls) .filter( - cls.is_paused.is_(False), - cls.is_active.is_(True), + cls.is_paused == expression.false(), + cls.is_active == expression.true(), cls.next_dagrun_create_after <= func.now(), ) .order_by(cls.next_dagrun_create_after) diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 6bd96f3aa7eee..e18351d3c1fb8 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -35,6 +35,7 @@ from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import backref, relationship, synonym from sqlalchemy.orm.session import Session +from sqlalchemy.sql import expression from airflow import settings from airflow.configuration import conf as airflow_conf @@ -211,8 +212,8 @@ def next_dagruns_to_examine( DagModel.dag_id == cls.dag_id, ) .filter( - DagModel.is_paused.is_(False), - DagModel.is_active.is_(True), + DagModel.is_paused == expression.false(), + DagModel.is_active == expression.true(), ) .order_by( nulls_first(cls.last_scheduling_decision, session=session), diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 81448b6e64b8d..3118871fc262b 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -26,8 +26,7 @@ import sqlalchemy_jsonfield from sqlalchemy import BigInteger, Column, Index, String, and_ from sqlalchemy.orm import Session, backref, foreign, relationship -from sqlalchemy.sql import exists -from sqlalchemy.sql.expression import func +from sqlalchemy.sql.expression import literal from airflow.models.base import ID_LEN, Base from airflow.models.dag import DAG, DagModel @@ -117,15 +116,18 @@ def write_dag(cls, dag: DAG, min_update_interval: Optional[int] = None, session: # If Yes, does nothing # If No or the DAG does not exists, updates / writes Serialized DAG to DB if min_update_interval is not None: - if session.query( - exists().where( + if ( + session.query(literal(True)) + .filter( and_( cls.dag_id == dag.dag_id, (timezone.utcnow() - timedelta(seconds=min_update_interval)) < cls.last_updated, ) ) - ).scalar(): - return False + .first() + is not None + ): + return log.debug("Checking if DAG (%s) changed", dag.dag_id) new_serialized_dag = cls(dag) @@ -217,7 +219,7 @@ def has_dag(cls, dag_id: str, session: Session = None) -> bool: :param dag_id: the DAG to check :param session: ORM Session """ - return session.query(exists().where(cls.dag_id == dag_id)).scalar() + return session.query(literal(True)).filter(cls.dag_id == dag_id).first() is not None @classmethod @provide_session diff --git a/breeze b/breeze index 50f9e5510aba1..a838d65036109 100755 --- a/breeze +++ b/breeze @@ -3069,6 +3069,9 @@ function breeze::read_saved_environment_variables() { MYSQL_VERSION="${MYSQL_VERSION:=$(parameters::read_from_file MYSQL_VERSION)}" MYSQL_VERSION=${MYSQL_VERSION:=${_breeze_default_mysql_version}} + MSSQL_VERSION="${MSSQL_VERSION:=$(parameters::read_from_file MSSQL_VERSION)}" + MSSQL_VERSION=${MSSQL_VERSION:=${_breeze_default_mssql_version}} + # Here you read DockerHub user/account that you use # You can populate your own images in DockerHub this way and work with the, # You can override it with "--dockerhub-user" option and it will be stored in .build directory @@ -3102,6 +3105,7 @@ function breeze::read_saved_environment_variables() { # EXECUTOR # POSTGRES_VERSION # MYSQL_VERSION +# MSSQL_VERSION # DOCKERHUB_USER # DOCKERHUB_REPO # @@ -3135,6 +3139,7 @@ function breeze::check_and_save_all_params() { parameters::check_and_save_allowed_param "EXECUTOR" "Executors" "--executor" parameters::check_and_save_allowed_param "POSTGRES_VERSION" "Postgres version" "--postgres-version" parameters::check_and_save_allowed_param "MYSQL_VERSION" "Mysql version" "--mysql-version" + parameters::check_and_save_allowed_param "MSSQL_VERSION" "MSSql version" "--mssql-version" parameters::check_and_save_allowed_param "GITHUB_REGISTRY" "GitHub Registry" "--github-registry" parameters::check_allowed_param TEST_TYPE "Type of tests" "--test-type" @@ -3156,6 +3161,7 @@ function breeze::check_and_save_all_params() { # WEBSERVER_HOST_PORT # POSTGRES_HOST_PORT # MYSQL_HOST_PORT +# MSSQL_HOST_PORT # ####################################################################################################### function breeze::print_cheatsheet() { @@ -3188,6 +3194,7 @@ function breeze::print_cheatsheet() { echo " * ${FLOWER_HOST_PORT} -> forwarded to Flower dashboard -> airflow:5555" echo " * ${POSTGRES_HOST_PORT} -> forwarded to Postgres database -> postgres:5432" echo " * ${MYSQL_HOST_PORT} -> forwarded to MySQL database -> mysql:3306" + echo " * ${MSSQL_HOST_PORT} -> forwarded to MSSQL database -> mssql:1443" echo " * ${REDIS_HOST_PORT} -> forwarded to Redis broker -> redis:6379" echo echo " Here are links to those services that you can use on host:" diff --git a/breeze-complete b/breeze-complete index 6126ca4228c33..e19a79791224b 100644 --- a/breeze-complete +++ b/breeze-complete @@ -23,8 +23,8 @@ # by the BATS tests automatically during pre-commit and CI # Those cannot be made read-only as the breeze-complete must be re-sourceable -_breeze_allowed_python_major_minor_versions="3.6 3.7 3.8" -_breeze_allowed_backends="sqlite mysql postgres" +_breeze_allowed_python_major_minor_versions="2.7 3.5 3.6 3.7 3.8" +_breeze_allowed_backends="sqlite mysql postgres mssql" _breeze_allowed_integrations="cassandra kerberos mongo openldap pinot rabbitmq redis statsd trino all" _breeze_allowed_generate_constraints_modes="source-providers pypi-providers no-providers" # registrys is good here even if it is not correct english. We are adding s automatically to all variables @@ -34,6 +34,7 @@ _breeze_allowed_kubernetes_versions="v1.20.2 v1.19.7 v1.18.15" _breeze_allowed_helm_versions="v3.2.4" _breeze_allowed_kind_versions="v0.10.0" _breeze_allowed_mysql_versions="5.7 8" +_breeze_allowed_mssql_versions="2017-latest 2019-latest" _breeze_allowed_postgres_versions="9.6 10 11 12 13" _breeze_allowed_kind_operations="start stop restart status deploy test shell k9s" _breeze_allowed_executors="KubernetesExecutor CeleryExecutor LocalExecutor CeleryKubernetesExecutor" @@ -54,6 +55,7 @@ _breeze_allowed_installation_methods=". apache-airflow" _breeze_default_executor=$(echo "${_breeze_allowed_executors}" | awk '{print $1}') _breeze_default_postgres_version=$(echo "${_breeze_allowed_postgres_versions}" | awk '{print $1}') _breeze_default_mysql_version=$(echo "${_breeze_allowed_mysql_versions}" | awk '{print $1}') + _breeze_default_mssql_version=$(echo "${_breeze_allowed_mssql_versions}" | awk '{print $1}') _breeze_default_test_type=$(echo "${_breeze_allowed_test_types}" | awk '{print $1}') _breeze_default_package_format=$(echo "${_breeze_allowed_package_formats}" | awk '{print $1}') } @@ -175,7 +177,7 @@ verbose assume-yes assume-no assume-quit forward-credentials init-script: force-build-images force-pull-images force-pull-base-python-image production-image extras: force-clean-images skip-rebuild-check build-cache-local build-cache-pulled build-cache-disabled disable-pip-cache dockerhub-user: dockerhub-repo: use-github-registry github-registry: github-repository: github-image-id: generate-constraints-mode: -postgres-version: mysql-version: +postgres-version: mysql-version: mssql-version: version-suffix-for-pypi: version-suffix-for-svn: additional-extras: additional-python-deps: additional-dev-deps: additional-runtime-deps: image-tag: disable-mysql-client-installation constraints-location: disable-pip-cache install-from-docker-context-files @@ -287,6 +289,9 @@ function breeze_complete::get_known_values_breeze() { --mysql-version) _breeze_known_values=${_breeze_allowed_mysql_versions} ;; + --mssql-version) + _breeze_known_values=${_breeze_allowed_mssql_versions} + ;; -D | --dockerhub-user) _breeze_known_values="${_breeze_default_dockerhub_user}" ;; diff --git a/scripts/ci/docker-compose/backend-mssql.yml b/scripts/ci/docker-compose/backend-mssql.yml new file mode 100644 index 0000000000000..4fabc1817d599 --- /dev/null +++ b/scripts/ci/docker-compose/backend-mssql.yml @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +version: "2.2" +services: + airflow: + environment: + - BACKEND=mssql + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=mssql+pyodbc://sa:Airflow123@mssql:1433/master?driver=ODBC+Driver+17+for+SQL+Server + - AIRFLOW__CELERY__RESULT_BACKEND=db+mssql+pyodbc://sa:Airflow123@mssql:1433/master?driver=ODBC+Driver+17+for+SQL+Server + depends_on: + - mssql + mssql: + image: mcr.microsoft.com/mssql/server:${MSSQL_VERSION} + environment: + - ACCEPT_EULA=Y + - SA_PASSWORD=Airflow123 + ports: + - "${MSSQL_HOST_PORT}:1433" diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index c734ccd14d088..7129c435149de 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -74,6 +74,7 @@ function initialization::initialize_base_variables() { export WEBSERVER_HOST_PORT=${WEBSERVER_HOST_PORT:="28080"} export POSTGRES_HOST_PORT=${POSTGRES_HOST_PORT:="25433"} export MYSQL_HOST_PORT=${MYSQL_HOST_PORT:="23306"} + export MSSQL_HOST_PORT=${MSSQL_HOST_PORT:="21433"} export FLOWER_HOST_PORT=${FLOWER_HOST_PORT:="25555"} export REDIS_HOST_PORT=${REDIS_HOST_PORT:="26379"} @@ -112,6 +113,9 @@ function initialization::initialize_base_variables() { # Default MySQL versions export MYSQL_VERSION=${MYSQL_VERSION:=${CURRENT_MYSQL_VERSIONS[0]}} + #Default MS SQL version + export MSSQL_VERSION=${MSSQL_VERSION:="2017-latest"} + # If set to true, the database will be reset at entry. Works for Postgres and MySQL export DB_RESET=${DB_RESET:="false"} diff --git a/scripts/in_container/check_environment.sh b/scripts/in_container/check_environment.sh index 22c6fe58d2092..3237bc9db1935 100755 --- a/scripts/in_container/check_environment.sh +++ b/scripts/in_container/check_environment.sh @@ -98,6 +98,8 @@ function check_db_backend { check_service "PostgreSQL" "run_nc postgres 5432" "${MAX_CHECK}" elif [[ ${BACKEND} == "mysql" ]]; then check_service "MySQL" "run_nc mysql 3306" "${MAX_CHECK}" + elif [[ ${BACKEND} == "mssql" ]]; then + check_service "MSSQL" "run_nc mssql 1433" "${MAX_CHECK}" elif [[ ${BACKEND} == "sqlite" ]]; then return else diff --git a/setup.py b/setup.py index f92a42269c93d..d949c0c235c78 100644 --- a/setup.py +++ b/setup.py @@ -522,7 +522,8 @@ def get_sphinx_theme_version() -> str: 'paramiko', 'pipdeptree', 'pre-commit', - 'pylint~=2.8.1', + 'pylint>=2.7.0', + 'pyodbc', 'pysftp', 'pytest~=6.0', 'pytest-cov', From f2b36b3488d4d6fe39f70fe0162bcec4abc0e506 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 15:59:17 +0530 Subject: [PATCH 02/36] fix more incompatibilities --- airflow/models/dag.py | 2 +- airflow/models/dagcode.py | 5 +++-- tests/models/test_renderedtifields.py | 5 ++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/airflow/models/dag.py b/airflow/models/dag.py index bd6c79d8fe6d0..b708295cfc8cf 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -2187,7 +2187,7 @@ def get_paused_dag_ids(dag_ids: List[str], session: Session = None) -> Set[str]: """ paused_dag_ids = ( session.query(DagModel.dag_id) - .filter(DagModel.is_paused.is_(True)) + .filter(DagModel.is_paused == expression.true()) .filter(DagModel.dag_id.in_(dag_ids)) .all() ) diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index ce65fc2f159ec..e47f269c7eff4 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -20,7 +20,8 @@ from datetime import datetime from typing import Iterable, List, Optional -from sqlalchemy import BigInteger, Column, String, Text, exists +from sqlalchemy import BigInteger, Column, String, Text +from sqlalchemy.sql.expression import literal from airflow.exceptions import AirflowException, DagCodeNotFound from airflow.models.base import Base @@ -147,7 +148,7 @@ def has_dag(cls, fileloc: str, session=None) -> bool: :param session: ORM Session """ fileloc_hash = cls.dag_fileloc_hash(fileloc) - return session.query(exists().where(cls.fileloc_hash == fileloc_hash)).scalar() + return session.query(literal(True)).filter(cls.fileloc_hash == fileloc_hash).first() is not None @classmethod def get_code_by_fileloc(cls, fileloc: str) -> str: diff --git a/tests/models/test_renderedtifields.py b/tests/models/test_renderedtifields.py index d83f542f875b9..0132ca5dff372 100644 --- a/tests/models/test_renderedtifields.py +++ b/tests/models/test_renderedtifields.py @@ -172,7 +172,10 @@ def test_delete_old_records(self, rtif_num, num_to_keep, remaining_rtifs, expect assert rtif_num == len(result) # Verify old records are deleted and only 'num_to_keep' records are kept - with assert_queries_count(expected_query_count): + expected_query_count_based_on_db = (expected_query_count + 1, expected_query_count)[ + session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] or expected_query_count == 0 + ] + with assert_queries_count(expected_query_count_based_on_db): RTIF.delete_old_records(task_id=task.task_id, dag_id=task.dag_id, num_to_keep=num_to_keep) result = session.query(RTIF).filter(RTIF.dag_id == dag.dag_id, RTIF.task_id == task.task_id).all() assert remaining_rtifs == len(result) From 66f69a91113b5e8efbf053d0e327a79001ce8b1b Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 18:18:57 +0530 Subject: [PATCH 03/36] fix failing mssql tests --- airflow/jobs/scheduler_job.py | 37 ++++++++++++++++++----------------- airflow/models/dag.py | 6 ++++-- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py index cdfec4c5cb814..a99a7335ad901 100644 --- a/airflow/jobs/scheduler_job.py +++ b/airflow/jobs/scheduler_job.py @@ -33,7 +33,7 @@ from typing import DefaultDict, Dict, Iterable, List, Optional, Set, Tuple from setproctitle import setproctitle -from sqlalchemy import and_, func, not_, or_, tuple_ +from sqlalchemy import and_, func, not_, or_ from sqlalchemy.exc import OperationalError from sqlalchemy.orm import load_only, selectinload from sqlalchemy.orm.session import Session, make_transient @@ -1068,15 +1068,15 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session = task_instance_str = "\n\t".join(repr(x) for x in executable_tis) self.log.info("Setting the following tasks to queued state:\n\t%s", task_instance_str) - - # set TIs to queued state - filter_for_tis = TI.filter_for_tis(executable_tis) - session.query(TI).filter(filter_for_tis).update( - # TODO[ha]: should we use func.now()? How does that work with DB timezone on mysql when it's not - # UTC? - {TI.state: State.QUEUED, TI.queued_dttm: timezone.utcnow(), TI.queued_by_job_id: self.id}, - synchronize_session=False, - ) + if len(executable_tis) > 0: + # set TIs to queued state + filter_for_tis = TI.filter_for_tis(executable_tis) + session.query(TI).filter(filter_for_tis).update( + # TODO[ha]: should we use func.now()? How does that work with DB timezone + # on mysql when it's not UTC? + {TI.state: State.QUEUED, TI.queued_dttm: timezone.utcnow(), TI.queued_by_job_id: self.id}, + synchronize_session=False, + ) for ti in executable_tis: make_transient(ti) @@ -1581,15 +1581,16 @@ def _create_dag_runs(self, dag_models: Iterable[DagModel], session: Session) -> # as DagModel.dag_id and DagModel.next_dagrun # This list is used to verify if the DagRun already exist so that we don't attempt to create # duplicate dag runs - active_dagruns = ( - session.query(DagRun.dag_id, DagRun.execution_date) - .filter( - tuple_(DagRun.dag_id, DagRun.execution_date).in_( - [(dm.dag_id, dm.next_dagrun) for dm in dag_models] - ) + + filter_dms = [ + and_( + DagRun.dag_id == dm.dag_id, + DagRun.execution_date == dm.next_dagrun, ) - .all() - ) + for dm in dag_models + ] + + active_dagruns = session.query(DagRun.dag_id, DagRun.execution_date).filter(or_(*filter_dms)).all() for dag_model in dag_models: try: diff --git a/airflow/models/dag.py b/airflow/models/dag.py index b708295cfc8cf..d1c8d576c1e6b 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -101,7 +101,9 @@ def get_last_dagrun(dag_id, session, include_externally_triggered=False): DR = DagRun query = session.query(DR).filter(DR.dag_id == dag_id) if not include_externally_triggered: - query = query.filter(DR.external_trigger == False) # noqa pylint: disable=singleton-comparison + query = query.filter( + DR.external_trigger == expression.false() + ) # noqa pylint: disable=singleton-comparison query = query.order_by(DR.execution_date.desc()) return query.first() @@ -898,7 +900,7 @@ def get_num_active_runs(self, external_trigger=None, session=None): ) if external_trigger is not None: - query = query.filter(DagRun.external_trigger == external_trigger) + query = query.filter(DagRun.external_trigger == expression.literal(external_trigger)) return query.scalar() From 4f0775d48ec9219a9ad6032a94f31c8ae8fd9a06 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 19:21:25 +0530 Subject: [PATCH 04/36] Update airflow/models/dag.py Co-authored-by: Ash Berlin-Taylor --- airflow/models/dag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/models/dag.py b/airflow/models/dag.py index d1c8d576c1e6b..99413401a0dd1 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -103,7 +103,7 @@ def get_last_dagrun(dag_id, session, include_externally_triggered=False): if not include_externally_triggered: query = query.filter( DR.external_trigger == expression.false() - ) # noqa pylint: disable=singleton-comparison + ) query = query.order_by(DR.execution_date.desc()) return query.first() From 8054f097584b452de2ad656317672246dc381ad1 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 20:11:49 +0530 Subject: [PATCH 05/36] use special query for MSSQL only --- airflow/jobs/scheduler_job.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py index a99a7335ad901..bf4ad42c23b3f 100644 --- a/airflow/jobs/scheduler_job.py +++ b/airflow/jobs/scheduler_job.py @@ -33,7 +33,7 @@ from typing import DefaultDict, Dict, Iterable, List, Optional, Set, Tuple from setproctitle import setproctitle -from sqlalchemy import and_, func, not_, or_ +from sqlalchemy import and_, func, not_, or_, tuple_ from sqlalchemy.exc import OperationalError from sqlalchemy.orm import load_only, selectinload from sqlalchemy.orm.session import Session, make_transient @@ -1582,15 +1582,24 @@ def _create_dag_runs(self, dag_models: Iterable[DagModel], session: Session) -> # This list is used to verify if the DagRun already exist so that we don't attempt to create # duplicate dag runs - filter_dms = [ - and_( - DagRun.dag_id == dm.dag_id, - DagRun.execution_date == dm.next_dagrun, + if session.bind.dialect.name == 'mssql': + active_dagruns_filter = or_( + *[ + and_( + DagRun.dag_id == dm.dag_id, + DagRun.execution_date == dm.next_dagrun, + ) + for dm in dag_models + ] + ) + else: + active_dagruns_filter = tuple_(DagRun.dag_id, DagRun.execution_date).in_( + [(dm.dag_id, dm.next_dagrun) for dm in dag_models] ) - for dm in dag_models - ] - active_dagruns = session.query(DagRun.dag_id, DagRun.execution_date).filter(or_(*filter_dms)).all() + active_dagruns = ( + session.query(DagRun.dag_id, DagRun.execution_date).filter(active_dagruns_filter).all() + ) for dag_model in dag_models: try: From c639eb84222c3b2700219948381a62465a29e85b Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 20:51:39 +0530 Subject: [PATCH 06/36] fix count tests --- tests/models/test_renderedtifields.py | 1 + tests/models/test_taskinstance.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/models/test_renderedtifields.py b/tests/models/test_renderedtifields.py index 0132ca5dff372..19f26fb539ddb 100644 --- a/tests/models/test_renderedtifields.py +++ b/tests/models/test_renderedtifields.py @@ -172,6 +172,7 @@ def test_delete_old_records(self, rtif_num, num_to_keep, remaining_rtifs, expect assert rtif_num == len(result) # Verify old records are deleted and only 'num_to_keep' records are kept + # For other DBs,an extra query is fired in RenderedTaskInstanceFields.delete_old_records expected_query_count_based_on_db = (expected_query_count + 1, expected_query_count)[ session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] or expected_query_count == 0 ] diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index 35c00fe8368cf..df0f855520939 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -2098,8 +2098,15 @@ def test_execute_queries_count(self, expected_query_count, mark_success): run_type=DagRunType.SCHEDULED, session=session, ) + # an extra query is fired in RenderedTaskInstanceFields.delete_old_records + # for other DBs. delete_old_records is called only when mark_success is False + expected_query_count_based_on_db = (expected_query_count + 1, expected_query_count)[ + session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] + or expected_query_count == 0 + or mark_success + ] - with assert_queries_count(expected_query_count): + with assert_queries_count(expected_query_count_based_on_db): ti._run_raw_task(mark_success=mark_success) def test_execute_queries_count_store_serialized(self): @@ -2116,8 +2123,12 @@ def test_execute_queries_count_store_serialized(self): run_type=DagRunType.SCHEDULED, session=session, ) - - with assert_queries_count(12): + # an extra query is fired in RenderedTaskInstanceFields.delete_old_records + # for other DBs + expected_query_count_based_on_db = (11, 10)[ + session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] + ] + with assert_queries_count(expected_query_count_based_on_db): ti._run_raw_task() def test_operator_field_with_serialization(self): From 670255532a2a4cb19e72d992fe06d62775632639 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 20:54:29 +0530 Subject: [PATCH 07/36] fix style checks --- airflow/models/dag.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 99413401a0dd1..fb5242f13431d 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -101,9 +101,7 @@ def get_last_dagrun(dag_id, session, include_externally_triggered=False): DR = DagRun query = session.query(DR).filter(DR.dag_id == dag_id) if not include_externally_triggered: - query = query.filter( - DR.external_trigger == expression.false() - ) + query = query.filter(DR.external_trigger == expression.false()) query = query.order_by(DR.execution_date.desc()) return query.first() From eee6c4b24749f3d9a183d1da7aaf292269739764 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 22 Mar 2021 22:57:39 +0530 Subject: [PATCH 08/36] Update airflow/models/dagcode.py Co-authored-by: Ash Berlin-Taylor --- airflow/models/dagcode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index e47f269c7eff4..4295a5bffe095 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -148,7 +148,7 @@ def has_dag(cls, fileloc: str, session=None) -> bool: :param session: ORM Session """ fileloc_hash = cls.dag_fileloc_hash(fileloc) - return session.query(literal(True)).filter(cls.fileloc_hash == fileloc_hash).first() is not None + return session.query(literal(True)).filter(cls.fileloc_hash == fileloc_hash).one_or_none() is not None @classmethod def get_code_by_fileloc(cls, fileloc: str) -> str: From 98252820ac4399d5a4d9403aecea13319b108021 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 23 Mar 2021 00:13:30 +0530 Subject: [PATCH 09/36] make xcom fields non-nullable --- ...1f0_make_xcom_pkey_columns_non_nullable.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py diff --git a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py new file mode 100644 index 0000000000000..7e9203bb55315 --- /dev/null +++ b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py @@ -0,0 +1,76 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""make xcom pkey columns non-nullable + +Revision ID: e9304a3141f0 +Revises: 83f031fd9f1c +Create Date: 2021-03-22 17:35:01.107500 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import mssql, mysql + +from airflow.models.base import COLLATION_ARGS + +# revision identifiers, used by Alembic. +revision = 'e9304a3141f0' +down_revision = '83f031fd9f1c' +branch_labels = None +depends_on = None + + +def __use_date_time2(conn): + result = conn.execute( + """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" + ).fetchone() + mssql_version = result[0] + return mssql_version not in ("2000", "2005") + + +def __get_timestamp(conn): + dialect_name = conn.dialect.name + if dialect_name == "mssql": + return mssql.DATETIME2(precision=6) if __use_date_time2(conn) else mssql.DATETIME + elif dialect_name != "mysql": + return sa.TIMESTAMP(timezone=True) + else: + return mysql.TIMESTAMP(fsp=6, timezone=True) + + +def upgrade(): + """Apply make xcom pkey columns non-nullable""" + conn = op.get_bind() + with op.batch_alter_table('xcom') as bop: + bop.alter_column("key", type_=sa.String(length=512, **COLLATION_ARGS), nullable=False) + bop.alter_column("execution_date", type_=__get_timestamp(conn), nullable=False) + if conn.dialect.name == 'mssql': + bop.create_primary_key('pk_xcom', ['dag_id', 'task_id', 'key', 'execution_date']) + + +def downgrade(): + """Unapply make xcom pkey columns non-nullable""" + conn = op.get_bind() + with op.batch_alter_table('xcom') as bop: + if conn.dialect.name == 'mssql': + bop.drop_constraint('pk_xcom', 'primary') + bop.alter_column("key", type_=sa.String(length=512, **COLLATION_ARGS), nullable=True) + bop.alter_column("execution_date", type_=__get_timestamp(conn), nullable=True) From d8901dad2be024d7eb77722655fcb9dca8ea93a2 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 23 Mar 2021 07:51:43 +0530 Subject: [PATCH 10/36] exclude MSSQL default tables --- tests/utils/test_db.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/utils/test_db.py b/tests/utils/test_db.py index e608ff1734c73..601dc6f9fe9da 100644 --- a/tests/utils/test_db.py +++ b/tests/utils/test_db.py @@ -68,6 +68,12 @@ def test_database_schema_and_sqlalchemy_model_are_in_sync(self): lambda t: (t[0] == 'remove_index' and t[1].name == 'permission_view_id'), # from test_security unit test lambda t: (t[0] == 'remove_table' and t[1].name == 'some_model'), + # MSSQL default tables + lambda t: (t[0] == 'remove_table' and t[1].name == 'spt_monitor'), + lambda t: (t[0] == 'remove_table' and t[1].name == 'spt_fallback_db'), + lambda t: (t[0] == 'remove_table' and t[1].name == 'spt_fallback_usg'), + lambda t: (t[0] == 'remove_table' and t[1].name == 'MSreplication_options'), + lambda t: (t[0] == 'remove_table' and t[1].name == 'spt_fallback_dev'), ] for ignore in ignores: diff = [d for d in diff if not ignore(d)] From cf7612404a479c00c3e7e1fc365ae30ca5497481 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 23 Mar 2021 18:13:21 +0530 Subject: [PATCH 11/36] make mssql tables in sync with SQLA models --- ...b1_make_mssql_tables_in_sync_with_sqla_.py | 111 ++++++++++++++++++ .../ci/docker-compose/backend-mssql-port.yml | 22 ++++ 2 files changed, 133 insertions(+) create mode 100644 airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py create mode 100644 scripts/ci/docker-compose/backend-mssql-port.yml diff --git a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py new file mode 100644 index 0000000000000..e6adb3eea0850 --- /dev/null +++ b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py @@ -0,0 +1,111 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""make mssql tables in sync with SQLA models + +Revision ID: 5ccc55a461b1 +Revises: e9304a3141f0 +Create Date: 2021-03-23 11:50:44.914282 + +""" + +from alembic import op +from sqlalchemy.dialects import mssql + +# revision identifiers, used by Alembic. +revision = '5ccc55a461b1' +down_revision = 'e9304a3141f0' +branch_labels = None +depends_on = None + + +def __get_timestamp(conn): + result = conn.execute( + """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" + ).fetchone() + mssql_version = result[0] + if mssql_version not in ("2000", "2005"): + return mssql.DATETIME2(precision=6) + else: + return mssql.DATETIME + + +def upgrade(): + """Apply make mssql tables in sync with SQLA models""" + conn = op.get_bind() + if conn.dialect.name == "mssql": + op.alter_column( + table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=False + ) + with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: + task_reschedule_batch_op.alter_column( + column_name='end_date', type_=__get_timestamp(conn), nullable=False + ) + task_reschedule_batch_op.alter_column( + column_name='reschedule_date', type_=__get_timestamp(conn), nullable=False + ) + task_reschedule_batch_op.alter_column( + column_name='start_date', type_=__get_timestamp(conn), nullable=False + ) + with op.batch_alter_table('task_fail') as task_fail_batch_op: + task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') + task_fail_batch_op.alter_column( + column_name="execution_date", type_=__get_timestamp(conn), nullable=False + ) + task_fail_batch_op.create_index( + 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + with op.batch_alter_table('task_instance') as task_instance_batch_op: + task_instance_batch_op.drop_index('ti_state_lkp') + task_instance_batch_op.create_index( + 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state'], unique=False + ) + + +def downgrade(): + """Unapply make mssql tables in sync with SQLA models""" + conn = op.get_bind() + if conn.dialect.name == "mssql": + op.alter_column( + table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=True + ) + with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: + task_reschedule_batch_op.alter_column( + column_name='end_date', type_=__get_timestamp(conn), nullable=True + ) + task_reschedule_batch_op.alter_column( + column_name='reschedule_date', type_=__get_timestamp(conn), nullable=True + ) + task_reschedule_batch_op.alter_column( + column_name='start_date', type_=__get_timestamp(conn), nullable=True + ) + with op.batch_alter_table('task_fail') as task_fail_batch_op: + task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') + task_fail_batch_op.alter_column( + column_name="execution_date", type_=__get_timestamp(conn), nullable=False + ) + task_fail_batch_op.create_index( + 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + with op.batch_alter_table('task_instance') as task_instance_batch_op: + task_instance_batch_op.drop_index('ti_state_lkp') + task_instance_batch_op.create_index( + 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date'], unique=False + ) diff --git a/scripts/ci/docker-compose/backend-mssql-port.yml b/scripts/ci/docker-compose/backend-mssql-port.yml new file mode 100644 index 0000000000000..42fd4185da3b6 --- /dev/null +++ b/scripts/ci/docker-compose/backend-mssql-port.yml @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +version: "2.2" +services: + mssql: + ports: + - "${MSSQL_HOST_PORT}:1433" From 7f2ccd022ab1633b60c8b02c9361506111f44223 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Wed, 24 Mar 2021 01:07:25 +0530 Subject: [PATCH 12/36] fix www tests --- airflow/www/security.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/airflow/www/security.py b/airflow/www/security.py index 8768587fdb769..34c5bf3d2207e 100644 --- a/airflow/www/security.py +++ b/airflow/www/security.py @@ -347,10 +347,12 @@ def can_read_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG read access.""" if not user: user = g.user - dag_resource_name = permissions.resource_name_for_dag(dag_id) - return self._has_view_access( - user, permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG - ) or self._has_view_access(user, permissions.ACTION_CAN_READ, dag_resource_name) + prefixed_dag_id = self.prefixed_dag_id(dag_id) + return ( + self._has_view_access(user, permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG) + or self._has_view_access(user, permissions.ACTION_CAN_READ, prefixed_dag_id) + or False + ) def can_edit_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG edit access.""" @@ -358,9 +360,11 @@ def can_edit_dag(self, dag_id, user=None) -> bool: user = g.user dag_resource_name = permissions.resource_name_for_dag(dag_id) - return self._has_view_access( - user, permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG - ) or self._has_view_access(user, permissions.ACTION_CAN_EDIT, dag_resource_name) + return ( + self._has_view_access(user, permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG) + or self._has_view_access(user, permissions.ACTION_CAN_EDIT, prefixed_dag_id) + or False + ) def prefixed_dag_id(self, dag_id): """Returns the permission name for a DAG id.""" @@ -398,7 +402,7 @@ def has_access(self, permission, resource, user=None) -> bool: if user.is_anonymous: user.roles = self.get_user_roles(user) - has_access = self._has_view_access(user, permission, resource) + has_access = self._has_view_access(user, permission, resource) or False # FAB built-in view access method. Won't work for AllDag access. if self.is_dag_resource(resource): From 3d3cb007ce280c619c60f0a82aaabd14fc32c4bb Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Wed, 24 Mar 2021 01:34:41 +0530 Subject: [PATCH 13/36] relax constraints on dag_run to fix tests --- ...1d8_relax_dag_run_constraints_for_mssql.py | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py diff --git a/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py b/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py new file mode 100644 index 0000000000000..757231a0e685c --- /dev/null +++ b/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py @@ -0,0 +1,105 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""relax dag_run constraints for mssql + +Revision ID: e70ba651e1d8 +Revises: 5ccc55a461b1 +Create Date: 2021-03-23 19:39:24.129388 + +""" +from collections import defaultdict + +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'e70ba651e1d8' +down_revision = '5ccc55a461b1' +branch_labels = None +depends_on = None + + +def drop_constraint(operator, constraint_dict): + """ + Drop a primary key or unique constraint + + :param operator: batch_alter_table for the table + :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table + """ + + +def get_table_constraints(conn, table_name): + """ + This function return primary and unique constraint + along with column name. some tables like task_instance + is missing primary key constraint name and the name is + auto-generated by sql server. so this function helps to + retrieve any primary or unique constraint name. + + :param conn: sql connection object + :param table_name: table name + :return: a dictionary of ((constraint name, constraint type), column name) of table + :rtype: defaultdict(list) + """ + query = """SELECT tc.CONSTRAINT_NAME , tc.CONSTRAINT_TYPE, ccu.COLUMN_NAME + FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS tc + JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE AS ccu ON ccu.CONSTRAINT_NAME = tc.CONSTRAINT_NAME + WHERE tc.TABLE_NAME = '{table_name}' AND + (tc.CONSTRAINT_TYPE = 'PRIMARY KEY' or UPPER(tc.CONSTRAINT_TYPE) = 'UNIQUE') + """.format( + table_name=table_name + ) + result = conn.execute(query).fetchall() + constraint_dict = defaultdict(list) + for constraint, constraint_type, column in result: + constraint_dict[(constraint, constraint_type)].append(column) + return constraint_dict + + +def upgrade(): + """Apply relax dag_run constraints for mssql""" + conn = op.get_bind() + if conn.dialect.name == "mssql": + constraint_dict = get_table_constraints(conn, 'dag_run') + for constraint, columns in constraint_dict.items(): + if 'dag_id' in columns: + if constraint[1].lower().startswith("unique"): + op.drop_constraint(constraint[0], 'dag_run', type_='unique') + # create filtered indexes + conn.execute( + """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_execution_date + ON dag_run(dag_id,execution_date) + WHERE dag_id IS NOT NULL and execution_date is not null""" + ) + conn.execute( + """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_run_id + ON dag_run(dag_id,run_id) + WHERE dag_id IS NOT NULL and run_id is not null""" + ) + + +def downgrade(): + """Unapply relax dag_run constraints for mssql""" + conn = op.get_bind() + if conn.dialect.name == "mssql": + op.create_unique_constraint('UQ__dag_run__dag_id_run_id', 'dag_run', ['dag_id', 'run_id']) + op.create_unique_constraint( + 'UQ__dag_run__dag_id_execution_date', 'dag_run', ['dag_id', 'execution_date'] + ) + op.drop_index('idx_not_null_dag_id_execution_date', table_name='dag_run') + op.drop_index('idx_not_null_dag_id_run_id', table_name='dag_run') From 44d207d6c594f630b868e4ccad4eb776c01f71ea Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Wed, 7 Apr 2021 09:00:58 +0530 Subject: [PATCH 14/36] base the migration on top of latest master --- .../5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py | 2 +- .../83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py | 6 +++--- .../e70ba651e1d8_relax_dag_run_constraints_for_mssql.py | 2 +- .../e9304a3141f0_make_xcom_pkey_columns_non_nullable.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py index e6adb3eea0850..70fa52fe978ef 100644 --- a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py +++ b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py @@ -20,7 +20,7 @@ Revision ID: 5ccc55a461b1 Revises: e9304a3141f0 -Create Date: 2021-03-23 11:50:44.914282 +Create Date: 2021-04-06 14:22:02.197726 """ diff --git a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py index bff3a453c4e14..c8194606cae1a 100644 --- a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py +++ b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py @@ -19,8 +19,8 @@ """change ts/datetime columns to datetime/datetime2 on mssql Revision ID: 83f031fd9f1c -Revises: 2e42bb497a22 -Create Date: 2021-03-21 12:22:02.197726 +Revises: 90d1635d7b86 +Create Date: 2021-04-06 12:22:02.197726 """ @@ -32,7 +32,7 @@ # revision identifiers, used by Alembic. revision = '83f031fd9f1c' -down_revision = '2e42bb497a22' +down_revision = '90d1635d7b86' branch_labels = None depends_on = None diff --git a/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py b/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py index 757231a0e685c..86432aa71df3c 100644 --- a/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py +++ b/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py @@ -20,7 +20,7 @@ Revision ID: e70ba651e1d8 Revises: 5ccc55a461b1 -Create Date: 2021-03-23 19:39:24.129388 +Create Date: 2021-04-06 15:22:02.197726 """ from collections import defaultdict diff --git a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py index 7e9203bb55315..48e1134d51f8e 100644 --- a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py +++ b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py @@ -20,7 +20,7 @@ Revision ID: e9304a3141f0 Revises: 83f031fd9f1c -Create Date: 2021-03-22 17:35:01.107500 +Create Date: 2021-04-06 13:22:02.197726 """ import sqlalchemy as sa From d1679a4c5a745d00d870e0f3291d420b350ddbde Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sun, 11 Apr 2021 18:27:31 +0530 Subject: [PATCH 15/36] incorporate review suggestions --- ...b1_make_mssql_tables_in_sync_with_sqla_.py | 102 +++++++++--------- ..._change_ts_columns_to_datetime_on_mssql.py | 20 ++-- 2 files changed, 58 insertions(+), 64 deletions(-) diff --git a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py index 70fa52fe978ef..9a6adc0f24238 100644 --- a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py +++ b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py @@ -50,62 +50,60 @@ def __get_timestamp(conn): def upgrade(): """Apply make mssql tables in sync with SQLA models""" conn = op.get_bind() - if conn.dialect.name == "mssql": - op.alter_column( - table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=False + if conn.dialect.name != "mssql": + return + op.alter_column(table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=False) + with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: + task_reschedule_batch_op.alter_column( + column_name='end_date', type_=__get_timestamp(conn), nullable=False + ) + task_reschedule_batch_op.alter_column( + column_name='reschedule_date', type_=__get_timestamp(conn), nullable=False + ) + task_reschedule_batch_op.alter_column( + column_name='start_date', type_=__get_timestamp(conn), nullable=False + ) + with op.batch_alter_table('task_fail') as task_fail_batch_op: + task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') + task_fail_batch_op.alter_column( + column_name="execution_date", type_=__get_timestamp(conn), nullable=False + ) + task_fail_batch_op.create_index( + 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + with op.batch_alter_table('task_instance') as task_instance_batch_op: + task_instance_batch_op.drop_index('ti_state_lkp') + task_instance_batch_op.create_index( + 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state'], unique=False ) - with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: - task_reschedule_batch_op.alter_column( - column_name='end_date', type_=__get_timestamp(conn), nullable=False - ) - task_reschedule_batch_op.alter_column( - column_name='reschedule_date', type_=__get_timestamp(conn), nullable=False - ) - task_reschedule_batch_op.alter_column( - column_name='start_date', type_=__get_timestamp(conn), nullable=False - ) - with op.batch_alter_table('task_fail') as task_fail_batch_op: - task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') - task_fail_batch_op.alter_column( - column_name="execution_date", type_=__get_timestamp(conn), nullable=False - ) - task_fail_batch_op.create_index( - 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False - ) - with op.batch_alter_table('task_instance') as task_instance_batch_op: - task_instance_batch_op.drop_index('ti_state_lkp') - task_instance_batch_op.create_index( - 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state'], unique=False - ) def downgrade(): """Unapply make mssql tables in sync with SQLA models""" conn = op.get_bind() - if conn.dialect.name == "mssql": - op.alter_column( - table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=True + if conn.dialect.name != "mssql": + return + op.alter_column(table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=True) + with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: + task_reschedule_batch_op.alter_column( + column_name='end_date', type_=__get_timestamp(conn), nullable=True + ) + task_reschedule_batch_op.alter_column( + column_name='reschedule_date', type_=__get_timestamp(conn), nullable=True + ) + task_reschedule_batch_op.alter_column( + column_name='start_date', type_=__get_timestamp(conn), nullable=True + ) + with op.batch_alter_table('task_fail') as task_fail_batch_op: + task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') + task_fail_batch_op.alter_column( + column_name="execution_date", type_=__get_timestamp(conn), nullable=False + ) + task_fail_batch_op.create_index( + 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + with op.batch_alter_table('task_instance') as task_instance_batch_op: + task_instance_batch_op.drop_index('ti_state_lkp') + task_instance_batch_op.create_index( + 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date'], unique=False ) - with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: - task_reschedule_batch_op.alter_column( - column_name='end_date', type_=__get_timestamp(conn), nullable=True - ) - task_reschedule_batch_op.alter_column( - column_name='reschedule_date', type_=__get_timestamp(conn), nullable=True - ) - task_reschedule_batch_op.alter_column( - column_name='start_date', type_=__get_timestamp(conn), nullable=True - ) - with op.batch_alter_table('task_fail') as task_fail_batch_op: - task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') - task_fail_batch_op.alter_column( - column_name="execution_date", type_=__get_timestamp(conn), nullable=False - ) - task_fail_batch_op.create_index( - 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False - ) - with op.batch_alter_table('task_instance') as task_instance_batch_op: - task_instance_batch_op.drop_index('ti_state_lkp') - task_instance_batch_op.create_index( - 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date'], unique=False - ) diff --git a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py index c8194606cae1a..c71c6b6a313d0 100644 --- a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py +++ b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py @@ -166,20 +166,16 @@ def alter_mssql_datetime2_column(conn, op, table_name, column_name, nullable): def upgrade(): """Change timestamp and datetime to datetime2/datetime when using MSSQL as backend""" conn = op.get_bind() - if conn.dialect.name == 'mssql': - recreate_mssql_ts_column(conn, op, 'dag_code', 'last_updated') - recreate_mssql_ts_column(conn, op, 'rendered_task_instance_fields', 'execution_date') - # alter_mssql_datetime_column(conn, op, 'chart', 'last_modified', False) - alter_mssql_datetime_column(conn, op, 'serialized_dag', 'last_updated', False) - # alter_mssql_datetime_column(conn, op, 'known_event', 'start_date', True) - # alter_mssql_datetime_column(conn, op, 'known_event', 'end_date', True) + if conn.dialect.name != 'mssql': + return + recreate_mssql_ts_column(conn, op, 'dag_code', 'last_updated') + recreate_mssql_ts_column(conn, op, 'rendered_task_instance_fields', 'execution_date') + alter_mssql_datetime_column(conn, op, 'serialized_dag', 'last_updated', False) def downgrade(): """Change datetime2(6) columns back to datetime when using MSSQL as backend""" conn = op.get_bind() - if conn.dialect.name == 'mssql': - alter_mssql_datetime2_column(conn, op, 'chart', 'last_modified', False) - alter_mssql_datetime2_column(conn, op, 'serialized_dag', 'last_updated', False) - alter_mssql_datetime2_column(conn, op, 'known_event', 'start_date', True) - alter_mssql_datetime2_column(conn, op, 'known_event', 'end_date', True) + if conn.dialect.name != 'mssql': + return + alter_mssql_datetime2_column(conn, op, 'serialized_dag', 'last_updated', False) From 50ac2799539538753dcb7da1169193f31b7ea9be Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 13 Apr 2021 08:28:32 +0530 Subject: [PATCH 16/36] fix pylint errors --- airflow/providers/apache/hive/transfers/mssql_to_hive.py | 6 +++--- tests/providers/apache/hive/transfers/test_mssql_to_hive.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airflow/providers/apache/hive/transfers/mssql_to_hive.py b/airflow/providers/apache/hive/transfers/mssql_to_hive.py index 1c404edecd0b7..7a0edd6d035fb 100644 --- a/airflow/providers/apache/hive/transfers/mssql_to_hive.py +++ b/airflow/providers/apache/hive/transfers/mssql_to_hive.py @@ -104,9 +104,9 @@ def __init__( def type_map(cls, mssql_type: int) -> str: """Maps MsSQL type to Hive type.""" map_dict = { - pymssql.BINARY.value: 'INT', - pymssql.DECIMAL.value: 'FLOAT', - pymssql.NUMBER.value: 'INT', + pymssql.BINARY.value: 'INT', # pylint: disable=c-extension-no-member,no-member + pymssql.DECIMAL.value: 'FLOAT', # pylint: disable=c-extension-no-member,no-member + pymssql.NUMBER.value: 'INT', # pylint: disable=c-extension-no-member,no-member } return map_dict.get(mssql_type, 'STRING') diff --git a/tests/providers/apache/hive/transfers/test_mssql_to_hive.py b/tests/providers/apache/hive/transfers/test_mssql_to_hive.py index 99455dab0fb17..003188d0645b8 100644 --- a/tests/providers/apache/hive/transfers/test_mssql_to_hive.py +++ b/tests/providers/apache/hive/transfers/test_mssql_to_hive.py @@ -41,19 +41,19 @@ def setUp(self): self.kwargs = dict(sql='sql', hive_table='table', task_id='test_mssql_to_hive', dag=None) def test_type_map_binary(self): - # pylint: disable=c-extension-no-member, no-member + # pylint: disable=c-extension-no-member,no-member mapped_type = MsSqlToHiveOperator(**self.kwargs).type_map(pymssql.BINARY.value) assert mapped_type == 'INT' def test_type_map_decimal(self): - # pylint: disable=c-extension-no-member, no-member + # pylint: disable=c-extension-no-member,no-member mapped_type = MsSqlToHiveOperator(**self.kwargs).type_map(pymssql.DECIMAL.value) assert mapped_type == 'FLOAT' def test_type_map_number(self): - # pylint: disable=c-extension-no-member, no-member + # pylint: disable=c-extension-no-member,no-member mapped_type = MsSqlToHiveOperator(**self.kwargs).type_map(pymssql.NUMBER.value) assert mapped_type == 'INT' From f97e1132fcba0e916809fad6c3d43765dd4d9d68 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 13 Apr 2021 12:33:56 +0530 Subject: [PATCH 17/36] move migration on top of latest master --- .../83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py index c71c6b6a313d0..52a3777c0af1c 100644 --- a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py +++ b/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py @@ -19,7 +19,7 @@ """change ts/datetime columns to datetime/datetime2 on mssql Revision ID: 83f031fd9f1c -Revises: 90d1635d7b86 +Revises: a13f7613ad25 Create Date: 2021-04-06 12:22:02.197726 """ @@ -32,7 +32,7 @@ # revision identifiers, used by Alembic. revision = '83f031fd9f1c' -down_revision = '90d1635d7b86' +down_revision = 'a13f7613ad25' branch_labels = None depends_on = None From e2bc6a1738724519ad043f055262c0abc1741be1 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 13 Apr 2021 12:43:54 +0530 Subject: [PATCH 18/36] fix pylint errors --- airflow/providers/microsoft/mssql/hooks/mssql.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airflow/providers/microsoft/mssql/hooks/mssql.py b/airflow/providers/microsoft/mssql/hooks/mssql.py index eebce5eb466cd..d61d5539d97f7 100644 --- a/airflow/providers/microsoft/mssql/hooks/mssql.py +++ b/airflow/providers/microsoft/mssql/hooks/mssql.py @@ -43,8 +43,8 @@ def get_conn( conn = self.get_connection( self.mssql_conn_id # type: ignore[attr-defined] # pylint: disable=no-member ) - # pylint: disable=c-extension-no-member - conn = pymssql.connect( # pylint: disable=no-member + # pylint: disable=c-extension-no-member,no-member + conn = pymssql.connect( server=conn.host, user=conn.login, password=conn.password, @@ -55,10 +55,10 @@ def get_conn( def set_autocommit( self, - conn: pymssql.connect, # pylint: disable=c-extension-no-member, no-member + conn: pymssql.connect, # pylint: disable=c-extension-no-member,no-member autocommit: bool, ) -> None: conn.autocommit(autocommit) - def get_autocommit(self, conn: pymssql.connect): # pylint: disable=c-extension-no-member, no-member + def get_autocommit(self, conn: pymssql.connect): # pylint: disable=c-extension-no-member,no-member return conn.autocommit_state From 78b9666d90c39f2fbbbf4055d6e61eb571d2a06e Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Wed, 14 Apr 2021 09:30:32 +0530 Subject: [PATCH 19/36] add mssql to CI --- .github/workflows/ci.yml | 63 +++++++++++++++++++++ scripts/ci/libraries/_initialization.sh | 8 ++- scripts/ci/selective_ci_checks.sh | 13 ++++- tests/bats/breeze/test_breeze_complete.bats | 16 ++++++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0268404cf479..3203fa3109ed0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -145,6 +145,7 @@ jobs: postgresVersions: ${{ steps.selective-checks.outputs.postgres-versions }} defaultPostgresVersion: ${{ steps.selective-checks.outputs.default-postgres-version }} mysqlVersions: ${{ steps.selective-checks.outputs.mysql-versions }} + mssqlVersions: ${{ steps.selective-checks.outputs.mssql-versions }} defaultMySQLVersion: ${{ steps.selective-checks.outputs.default-mysql-version }} helmVersions: ${{ steps.selective-checks.outputs.helm-versions }} defaultHelmVersion: ${{ steps.selective-checks.outputs.default-helm-version }} @@ -153,6 +154,7 @@ jobs: testTypes: ${{ steps.selective-checks.outputs.test-types }} postgresExclude: ${{ steps.selective-checks.outputs.postgres-exclude }} mysqlExclude: ${{ steps.selective-checks.outputs.mysql-exclude }} + mssqlExclude: ${{ steps.selective-checks.outputs.mssql-exclude }} sqliteExclude: ${{ steps.selective-checks.outputs.sqlite-exclude }} run-tests: ${{ steps.selective-checks.outputs.run-tests }} run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} @@ -762,6 +764,62 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" path: "./files/coverage*.xml" retention-days: 7 + tests-mssql: + timeout-minutes: 130 + name: > + MSSQL${{matrix.mssql-version}}, Py${{matrix.python-version}}: ${{needs.build-info.outputs.testTypes}} + runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} + needs: [build-info, ci-images] + strategy: + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} + mssql-version: ${{ fromJson(needs.build-info.outputs.mssqlVersions) }} + exclude: ${{ fromJson(needs.build-info.outputs.mssqlExclude) }} + fail-fast: false + env: + RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} + BACKEND: mssql + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} + MSSQL_VERSION: ${{ matrix.mssql-version }} + TEST_TYPES: "${{needs.build-info.outputs.testTypes}}" + GITHUB_REGISTRY: ${{ needs.ci-images.outputs.githubRegistry }} + if: needs.build-info.outputs.run-tests == 'true' + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + with: + persist-credentials: false + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + - name: "Free space" + run: ./scripts/ci/tools/ci_free_space_on_ci.sh + - name: "Prepare CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" + run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh + - name: "Tests: ${{needs.build-info.outputs.testTypes}}" + run: ./scripts/ci/testing/ci_run_airflow_testing.sh + - name: "Upload airflow logs" + uses: actions/upload-artifact@v2 + if: failure() + with: + name: airflow-logs-${{matrix.python-version}}-${{matrix.mssql-version}} + path: "./files/airflow_logs*" + retention-days: 7 + - name: "Upload container logs" + uses: actions/upload-artifact@v2 + if: failure() + with: + name: container-logs-mssql-${{matrix.python-version}}-${{matrix.mssql-version}} + path: "./files/container_logs*" + retention-days: 7 + - name: "Upload artifact for coverage" + uses: actions/upload-artifact@v2 + with: + name: coverage-mssql-${{matrix.python-version}}-${{matrix.mssql-version}} + path: "./files/coverage*.xml" + retention-days: 7 + tests-sqlite: timeout-minutes: 130 name: > @@ -898,6 +956,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-postgres - tests-sqlite - tests-mysql + - tests-mssql - tests-quarantined env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} @@ -1046,6 +1105,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-sqlite - tests-postgres - tests-mysql + - tests-mssql - tests-kubernetes - prod-images - docs @@ -1107,6 +1167,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-sqlite - tests-postgres - tests-mysql + - tests-mssql - tests-kubernetes - ci-images - docs @@ -1151,6 +1212,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - static-checks-pylint - tests-sqlite - tests-mysql + - tests-mssql - tests-postgres - tests-kubernetes env: @@ -1221,6 +1283,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-sqlite - tests-postgres - tests-mysql + - tests-mssql - tests-kubernetes - constraints - prepare-test-provider-packages-wheel diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh index 7129c435149de..e824963a0c8f4 100644 --- a/scripts/ci/libraries/_initialization.sh +++ b/scripts/ci/libraries/_initialization.sh @@ -23,6 +23,7 @@ CURRENT_KUBERNETES_VERSIONS=() CURRENT_KUBERNETES_MODES=() CURRENT_POSTGRES_VERSIONS=() CURRENT_MYSQL_VERSIONS=() +CURRENT_MSSQL_VERSIONS=() CURRENT_KIND_VERSIONS=() CURRENT_HELM_VERSIONS=() CURRENT_EXECUTOR=() @@ -104,6 +105,10 @@ function initialization::initialize_base_variables() { CURRENT_MYSQL_VERSIONS+=("5.7" "8") export CURRENT_MYSQL_VERSIONS + # Currently supported versions of MSSQL + CURRENT_MSSQL_VERSIONS+=("2017-latest" "2019-latest") + export CURRENT_MSSQL_VERSIONS + BACKEND=${BACKEND:="sqlite"} export BACKEND @@ -114,7 +119,7 @@ function initialization::initialize_base_variables() { export MYSQL_VERSION=${MYSQL_VERSION:=${CURRENT_MYSQL_VERSIONS[0]}} #Default MS SQL version - export MSSQL_VERSION=${MSSQL_VERSION:="2017-latest"} + export MSSQL_VERSION=${MSSQL_VERSION:=${CURRENT_MSSQL_VERSIONS[0]}} # If set to true, the database will be reset at entry. Works for Postgres and MySQL export DB_RESET=${DB_RESET:="false"} @@ -901,6 +906,7 @@ function initialization::make_constants_read_only() { readonly CURRENT_KUBERNETES_MODES readonly CURRENT_POSTGRES_VERSIONS readonly CURRENT_MYSQL_VERSIONS + readonly CURRENT_MSSQL_VERSIONS readonly CURRENT_KIND_VERSIONS readonly CURRENT_HELM_VERSIONS readonly CURRENT_EXECUTOR diff --git a/scripts/ci/selective_ci_checks.sh b/scripts/ci/selective_ci_checks.sh index 7e77eddec8c7c..5d40d60645a81 100755 --- a/scripts/ci/selective_ci_checks.sh +++ b/scripts/ci/selective_ci_checks.sh @@ -103,9 +103,19 @@ function output_all_basic_variables() { initialization::ga_output mysql-versions \ "$(initialization::parameters_to_json "${MYSQL_VERSION}")" fi - initialization::ga_output default-mysql-version "${MYSQL_VERSION}" + if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then + initialization::ga_output mssql-versions \ + "$(initialization::parameters_to_json "${CURRENT_MSSQL_VERSIONS[@]}")" + else + initialization::ga_output mssql-versions \ + "$(initialization::parameters_to_json "${MSSQL_VERSION}")" + fi + initialization::ga_output default-mssql-version "${MSSQL_VERSION}" + + + initialization::ga_output kind-versions \ "$(initialization::parameters_to_json "${CURRENT_KIND_VERSIONS[@]}")" initialization::ga_output default-kind-version "${KIND_VERSION}" @@ -117,6 +127,7 @@ function output_all_basic_variables() { if [[ ${FULL_TESTS_NEEDED_LABEL} == "true" ]]; then initialization::ga_output postgres-exclude '[{ "python-version": "3.6" }]' initialization::ga_output mysql-exclude '[{ "python-version": "3.7" }]' + initialization::ga_output mssql-exclude '[{ "python-version": "3.7" }]' initialization::ga_output sqlite-exclude '[{ "python-version": "3.8" }]' else initialization::ga_output postgres-exclude '[]' diff --git a/tests/bats/breeze/test_breeze_complete.bats b/tests/bats/breeze/test_breeze_complete.bats index c1dfed1659c43..249a493f80095 100644 --- a/tests/bats/breeze/test_breeze_complete.bats +++ b/tests/bats/breeze/test_breeze_complete.bats @@ -239,6 +239,22 @@ assert_equal "${_breeze_default_mysql_version}" "${MYSQL_VERSION}" } +@test "Test allowed MSSQL versions same as CURRENT" { + load ../bats_utils + #shellcheck source=breeze-complete + source "${AIRFLOW_SOURCES}/breeze-complete" + + assert_equal "${_breeze_allowed_mssql_versions}" "${CURRENT_MSSQL_VERSIONS[*]}" +} + +@test "Test default MSSQL version same as MSSQL_VERSION" { + load ../bats_utils + #shellcheck source=breeze-complete + source "${AIRFLOW_SOURCES}/breeze-complete" + + assert_equal "${_breeze_default_mssql_version}" "${MSSQL_VERSION}" +} + @test "Test allowed Postgres versions same as CURRENT" { load ../bats_utils #shellcheck source=breeze-complete From 9fd1bee5e6d1df48b0a97102780d1b8ac382259d Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Wed, 14 Apr 2021 12:54:31 +0530 Subject: [PATCH 20/36] fix port conflict --- scripts/ci/docker-compose/backend-mssql.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/ci/docker-compose/backend-mssql.yml b/scripts/ci/docker-compose/backend-mssql.yml index 4fabc1817d599..d4132b51e33c0 100644 --- a/scripts/ci/docker-compose/backend-mssql.yml +++ b/scripts/ci/docker-compose/backend-mssql.yml @@ -29,5 +29,3 @@ services: environment: - ACCEPT_EULA=Y - SA_PASSWORD=Airflow123 - ports: - - "${MSSQL_HOST_PORT}:1433" From b941b202eb4fc6ffa5b81be19adf7dac4abf08a5 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Fri, 16 Apr 2021 00:44:14 +0530 Subject: [PATCH 21/36] simplify db based count check conditionals --- tests/models/test_renderedtifields.py | 7 ++++--- tests/models/test_taskinstance.py | 15 +++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/models/test_renderedtifields.py b/tests/models/test_renderedtifields.py index 19f26fb539ddb..c4eca2c5bbb1e 100644 --- a/tests/models/test_renderedtifields.py +++ b/tests/models/test_renderedtifields.py @@ -173,9 +173,10 @@ def test_delete_old_records(self, rtif_num, num_to_keep, remaining_rtifs, expect # Verify old records are deleted and only 'num_to_keep' records are kept # For other DBs,an extra query is fired in RenderedTaskInstanceFields.delete_old_records - expected_query_count_based_on_db = (expected_query_count + 1, expected_query_count)[ - session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] or expected_query_count == 0 - ] + expected_query_count_based_on_db = ( + expected_query_count + 1 if session.bind.dialect.name == "mssql" else expected_query_count + ) + with assert_queries_count(expected_query_count_based_on_db): RTIF.delete_old_records(task_id=task.task_id, dag_id=task.dag_id, num_to_keep=num_to_keep) result = session.query(RTIF).filter(RTIF.dag_id == dag.dag_id, RTIF.task_id == task.task_id).all() diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index df0f855520939..90c7bd65951f1 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -2100,11 +2100,11 @@ def test_execute_queries_count(self, expected_query_count, mark_success): ) # an extra query is fired in RenderedTaskInstanceFields.delete_old_records # for other DBs. delete_old_records is called only when mark_success is False - expected_query_count_based_on_db = (expected_query_count + 1, expected_query_count)[ - session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] - or expected_query_count == 0 - or mark_success - ] + expected_query_count_based_on_db = ( + expected_query_count + 1 + if session.bind.dialect.name == "mssql" and expected_query_count > 0 and not mark_success + else expected_query_count + ) with assert_queries_count(expected_query_count_based_on_db): ti._run_raw_task(mark_success=mark_success) @@ -2125,9 +2125,8 @@ def test_execute_queries_count_store_serialized(self): ) # an extra query is fired in RenderedTaskInstanceFields.delete_old_records # for other DBs - expected_query_count_based_on_db = (11, 10)[ - session.bind.dialect.name in ["postgresql", "sqlite", "mysql"] - ] + expected_query_count_based_on_db = 11 if session.bind.dialect.name == "mssql" else 10 + with assert_queries_count(expected_query_count_based_on_db): ti._run_raw_task() From 88a4bf9c8a14c01fd9c4f47b48730b623f7ef0b1 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Fri, 7 May 2021 17:05:26 +0530 Subject: [PATCH 22/36] fix pylint errors --- airflow/models/serialized_dag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 3118871fc262b..6cdcc053dfff1 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -127,7 +127,7 @@ def write_dag(cls, dag: DAG, min_update_interval: Optional[int] = None, session: .first() is not None ): - return + return False log.debug("Checking if DAG (%s) changed", dag.dag_id) new_serialized_dag = cls(dag) From ac3cefcbdf37d1a17974c7fe8fa8c4845d250e37 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sat, 8 May 2021 10:56:24 +0530 Subject: [PATCH 23/36] fix rebase and mssql tests --- Dockerfile.ci | 14 ++++---------- .../api_connexion/endpoints/event_log_endpoint.py | 2 +- .../endpoints/import_error_endpoint.py | 2 +- airflow/api_connexion/endpoints/user_endpoint.py | 4 ++-- airflow/api_connexion/parameters.py | 13 ++++++------- airflow/models/serialized_dag.py | 6 ++++-- .../apache/hive/transfers/mssql_to_hive.py | 6 +++--- airflow/providers/microsoft/mssql/hooks/mssql.py | 8 ++++---- airflow/www/security.py | 14 ++++++-------- airflow/www/views.py | 8 ++++---- breeze-complete | 2 +- setup.py | 3 +-- tests/models/test_renderedtifields.py | 4 +++- tests/models/test_taskinstance.py | 2 +- .../apache/hive/transfers/test_mssql_to_hive.py | 6 +++--- 15 files changed, 44 insertions(+), 50 deletions(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index 5a02a8927f86e..14e4294c6b029 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -160,10 +160,8 @@ RUN mkdir -pv /usr/share/man/man1 \ ${ADDITIONAL_RUNTIME_APT_DEPS} \ && apt-get autoremove -yqq --purge \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -#Install MS SQL Drivers -RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ + && rm -rf /var/lib/apt/lists/* \ + && curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ && curl https://packages.microsoft.com/config/debian/9/prod.list > /etc/apt/sources.list.d/mssql-release.list \ && apt-get update -yqq \ && apt-get upgrade -yqq \ @@ -173,12 +171,8 @@ RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ g++ \ msodbcsql17 \ mssql-tools \ - && rm -rf /var/lib/apt/lists/* - -ARG DOCKER_CLI_VERSION=19.03.9 -ENV DOCKER_CLI_VERSION=${DOCKER_CLI_VERSION} - -RUN curl https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz \ + && rm -rf /var/lib/apt/lists/* \ + && curl https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_CLI_VERSION}.tgz \ | tar -C /usr/bin --strip-components=1 -xvzf - docker/docker WORKDIR ${AIRFLOW_SOURCES} diff --git a/airflow/api_connexion/endpoints/event_log_endpoint.py b/airflow/api_connexion/endpoints/event_log_endpoint.py index a263c5ccb3148..bc53ff57672c5 100644 --- a/airflow/api_connexion/endpoints/event_log_endpoint.py +++ b/airflow/api_connexion/endpoints/event_log_endpoint.py @@ -44,7 +44,7 @@ def get_event_log(event_log_id, session): @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_AUDIT_LOG)]) @format_parameters({'limit': check_limit}) @provide_session -def get_event_logs(session, limit, offset=None, order_by='event_log_id'): +def get_event_logs(session, limit, offset=0, order_by='event_log_id'): """Get all log entries from event log""" to_replace = {"event_log_id": "id", "when": "dttm"} allowed_filter_attrs = [ diff --git a/airflow/api_connexion/endpoints/import_error_endpoint.py b/airflow/api_connexion/endpoints/import_error_endpoint.py index a0a09b8c485c1..280cc3ecbb23d 100644 --- a/airflow/api_connexion/endpoints/import_error_endpoint.py +++ b/airflow/api_connexion/endpoints/import_error_endpoint.py @@ -47,7 +47,7 @@ def get_import_error(import_error_id, session): @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_IMPORT_ERROR)]) @format_parameters({'limit': check_limit}) @provide_session -def get_import_errors(session, limit, offset=None, order_by='import_error_id'): +def get_import_errors(session, limit, offset=0, order_by='import_error_id'): """Get all import errors""" to_replace = {"import_error_id": 'id'} allowed_filter_attrs = ['import_error_id', "timestamp", "filename"] diff --git a/airflow/api_connexion/endpoints/user_endpoint.py b/airflow/api_connexion/endpoints/user_endpoint.py index 53ce0e9cfb929..29b480205e077 100644 --- a/airflow/api_connexion/endpoints/user_endpoint.py +++ b/airflow/api_connexion/endpoints/user_endpoint.py @@ -41,7 +41,7 @@ def get_user(username): @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_USER)]) @format_parameters({'limit': check_limit}) -def get_users(limit, order_by='id', offset=None): +def get_users(limit, order_by='id', offset=0): """Get users""" appbuilder = current_app.appbuilder session = appbuilder.get_session @@ -49,7 +49,7 @@ def get_users(limit, order_by='id', offset=None): to_replace = {"user_id": "id"} allowed_filter_attrs = [ "user_id", - 'id', + "id", "first_name", "last_name", "user_name", diff --git a/airflow/api_connexion/parameters.py b/airflow/api_connexion/parameters.py index 8e06301676900..1dac50b351ebf 100644 --- a/airflow/api_connexion/parameters.py +++ b/airflow/api_connexion/parameters.py @@ -18,7 +18,7 @@ from typing import Callable, Dict, TypeVar, cast from pendulum.parsing import ParserError -from sqlalchemy import asc, desc +from sqlalchemy import text from airflow.api_connexion.exceptions import BadRequest from airflow.configuration import conf @@ -97,11 +97,10 @@ def apply_sorting(query, order_by, to_replace=None, allowed_attrs=None): detail=f"Ordering with '{lstriped_orderby}' is disallowed or " f"the attribute does not exist on the model" ) + if to_replace: + lstriped_orderby = to_replace.get(lstriped_orderby, lstriped_orderby) if order_by[0] == "-": - func = desc - order_by = lstriped_orderby + order_by = f"{lstriped_orderby} desc" else: - func = asc - if to_replace: - order_by = to_replace.get(order_by, order_by) - return query.order_by(func(order_by)) + order_by = f"{lstriped_orderby} asc" + return query.order_by(text(order_by)) diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 6cdcc053dfff1..2b600612addc7 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -26,7 +26,7 @@ import sqlalchemy_jsonfield from sqlalchemy import BigInteger, Column, Index, String, and_ from sqlalchemy.orm import Session, backref, foreign, relationship -from sqlalchemy.sql.expression import literal +from sqlalchemy.sql.expression import func, literal from airflow.models.base import ID_LEN, Base from airflow.models.dag import DAG, DagModel @@ -315,7 +315,9 @@ def get_dag_dependencies(cls, session: Session = None) -> Dict[str, List['DagDep if session.bind.dialect.name in ["sqlite", "mysql"]: for row in session.query(cls.dag_id, func.json_extract(cls.data, "$.dag.dag_dependencies")).all(): dependencies[row[0]] = [DagDependency(**d) for d in json.loads(row[1])] - + elif session.bind.dialect.name in ["mssql"]: + for row in session.query(cls.dag_id, func.json_query(cls.data, "$.dag.dag_dependencies")).all(): + dependencies[row[0]] = [DagDependency(**d) for d in json.loads(row[1])] else: for row in session.query( cls.dag_id, func.json_extract_path(cls.data, "dag", "dag_dependencies") diff --git a/airflow/providers/apache/hive/transfers/mssql_to_hive.py b/airflow/providers/apache/hive/transfers/mssql_to_hive.py index 7a0edd6d035fb..1c404edecd0b7 100644 --- a/airflow/providers/apache/hive/transfers/mssql_to_hive.py +++ b/airflow/providers/apache/hive/transfers/mssql_to_hive.py @@ -104,9 +104,9 @@ def __init__( def type_map(cls, mssql_type: int) -> str: """Maps MsSQL type to Hive type.""" map_dict = { - pymssql.BINARY.value: 'INT', # pylint: disable=c-extension-no-member,no-member - pymssql.DECIMAL.value: 'FLOAT', # pylint: disable=c-extension-no-member,no-member - pymssql.NUMBER.value: 'INT', # pylint: disable=c-extension-no-member,no-member + pymssql.BINARY.value: 'INT', + pymssql.DECIMAL.value: 'FLOAT', + pymssql.NUMBER.value: 'INT', } return map_dict.get(mssql_type, 'STRING') diff --git a/airflow/providers/microsoft/mssql/hooks/mssql.py b/airflow/providers/microsoft/mssql/hooks/mssql.py index d61d5539d97f7..eebce5eb466cd 100644 --- a/airflow/providers/microsoft/mssql/hooks/mssql.py +++ b/airflow/providers/microsoft/mssql/hooks/mssql.py @@ -43,8 +43,8 @@ def get_conn( conn = self.get_connection( self.mssql_conn_id # type: ignore[attr-defined] # pylint: disable=no-member ) - # pylint: disable=c-extension-no-member,no-member - conn = pymssql.connect( + # pylint: disable=c-extension-no-member + conn = pymssql.connect( # pylint: disable=no-member server=conn.host, user=conn.login, password=conn.password, @@ -55,10 +55,10 @@ def get_conn( def set_autocommit( self, - conn: pymssql.connect, # pylint: disable=c-extension-no-member,no-member + conn: pymssql.connect, # pylint: disable=c-extension-no-member, no-member autocommit: bool, ) -> None: conn.autocommit(autocommit) - def get_autocommit(self, conn: pymssql.connect): # pylint: disable=c-extension-no-member,no-member + def get_autocommit(self, conn: pymssql.connect): # pylint: disable=c-extension-no-member, no-member return conn.autocommit_state diff --git a/airflow/www/security.py b/airflow/www/security.py index 34c5bf3d2207e..a60447936027e 100644 --- a/airflow/www/security.py +++ b/airflow/www/security.py @@ -347,11 +347,10 @@ def can_read_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG read access.""" if not user: user = g.user - prefixed_dag_id = self.prefixed_dag_id(dag_id) - return ( + dag_resource_name = permissions.resource_name_for_dag(dag_id) + return bool( self._has_view_access(user, permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG) - or self._has_view_access(user, permissions.ACTION_CAN_READ, prefixed_dag_id) - or False + or self._has_view_access(user, permissions.ACTION_CAN_READ, dag_resource_name) ) def can_edit_dag(self, dag_id, user=None) -> bool: @@ -360,10 +359,9 @@ def can_edit_dag(self, dag_id, user=None) -> bool: user = g.user dag_resource_name = permissions.resource_name_for_dag(dag_id) - return ( + return bool( self._has_view_access(user, permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG) - or self._has_view_access(user, permissions.ACTION_CAN_EDIT, prefixed_dag_id) - or False + or self._has_view_access(user, permissions.ACTION_CAN_EDIT, dag_resource_name) ) def prefixed_dag_id(self, dag_id): @@ -402,7 +400,7 @@ def has_access(self, permission, resource, user=None) -> bool: if user.is_anonymous: user.roles = self.get_user_roles(user) - has_access = self._has_view_access(user, permission, resource) or False + has_access = bool(self._has_view_access(user, permission, resource)) # FAB built-in view access method. Won't work for AllDag access. if self.is_dag_resource(resource): diff --git a/airflow/www/views.py b/airflow/www/views.py index 78dca2f010eb6..05af4f92c2686 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -76,7 +76,7 @@ from pendulum.datetime import DateTime from pygments import highlight, lexers from pygments.formatters import HtmlFormatter # noqa pylint: disable=no-name-in-module -from sqlalchemy import and_, desc, func, or_, union_all +from sqlalchemy import Date, and_, desc, func, or_, union_all from sqlalchemy.orm import joinedload from wtforms import SelectField, validators from wtforms.validators import InputRequired @@ -2103,13 +2103,13 @@ def calendar(self): with create_session() as session: dag_states = ( session.query( - func.date(DagRun.execution_date).label('date'), + (DagRun.execution_date.cast(Date)).label('date'), DagRun.state, func.count('*').label('count'), ) .filter(DagRun.dag_id == dag.dag_id) - .group_by(func.date(DagRun.execution_date), DagRun.state) - .order_by(func.date(DagRun.execution_date).asc()) + .group_by(DagRun.execution_date.cast(Date), DagRun.state) + .order_by(DagRun.execution_date.cast(Date).asc()) .all() ) diff --git a/breeze-complete b/breeze-complete index e19a79791224b..b0985eb95c0e1 100644 --- a/breeze-complete +++ b/breeze-complete @@ -23,7 +23,7 @@ # by the BATS tests automatically during pre-commit and CI # Those cannot be made read-only as the breeze-complete must be re-sourceable -_breeze_allowed_python_major_minor_versions="2.7 3.5 3.6 3.7 3.8" +_breeze_allowed_python_major_minor_versions="3.6 3.7 3.8" _breeze_allowed_backends="sqlite mysql postgres mssql" _breeze_allowed_integrations="cassandra kerberos mongo openldap pinot rabbitmq redis statsd trino all" _breeze_allowed_generate_constraints_modes="source-providers pypi-providers no-providers" diff --git a/setup.py b/setup.py index d949c0c235c78..f92a42269c93d 100644 --- a/setup.py +++ b/setup.py @@ -522,8 +522,7 @@ def get_sphinx_theme_version() -> str: 'paramiko', 'pipdeptree', 'pre-commit', - 'pylint>=2.7.0', - 'pyodbc', + 'pylint~=2.8.1', 'pysftp', 'pytest~=6.0', 'pytest-cov', diff --git a/tests/models/test_renderedtifields.py b/tests/models/test_renderedtifields.py index c4eca2c5bbb1e..bd88d5dfa1468 100644 --- a/tests/models/test_renderedtifields.py +++ b/tests/models/test_renderedtifields.py @@ -174,7 +174,9 @@ def test_delete_old_records(self, rtif_num, num_to_keep, remaining_rtifs, expect # Verify old records are deleted and only 'num_to_keep' records are kept # For other DBs,an extra query is fired in RenderedTaskInstanceFields.delete_old_records expected_query_count_based_on_db = ( - expected_query_count + 1 if session.bind.dialect.name == "mssql" else expected_query_count + expected_query_count + 1 + if session.bind.dialect.name == "mssql" and expected_query_count != 0 + else expected_query_count ) with assert_queries_count(expected_query_count_based_on_db): diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index 90c7bd65951f1..9c2beb6cce87f 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -2125,7 +2125,7 @@ def test_execute_queries_count_store_serialized(self): ) # an extra query is fired in RenderedTaskInstanceFields.delete_old_records # for other DBs - expected_query_count_based_on_db = 11 if session.bind.dialect.name == "mssql" else 10 + expected_query_count_based_on_db = 13 if session.bind.dialect.name == "mssql" else 12 with assert_queries_count(expected_query_count_based_on_db): ti._run_raw_task() diff --git a/tests/providers/apache/hive/transfers/test_mssql_to_hive.py b/tests/providers/apache/hive/transfers/test_mssql_to_hive.py index 003188d0645b8..99455dab0fb17 100644 --- a/tests/providers/apache/hive/transfers/test_mssql_to_hive.py +++ b/tests/providers/apache/hive/transfers/test_mssql_to_hive.py @@ -41,19 +41,19 @@ def setUp(self): self.kwargs = dict(sql='sql', hive_table='table', task_id='test_mssql_to_hive', dag=None) def test_type_map_binary(self): - # pylint: disable=c-extension-no-member,no-member + # pylint: disable=c-extension-no-member, no-member mapped_type = MsSqlToHiveOperator(**self.kwargs).type_map(pymssql.BINARY.value) assert mapped_type == 'INT' def test_type_map_decimal(self): - # pylint: disable=c-extension-no-member,no-member + # pylint: disable=c-extension-no-member, no-member mapped_type = MsSqlToHiveOperator(**self.kwargs).type_map(pymssql.DECIMAL.value) assert mapped_type == 'FLOAT' def test_type_map_number(self): - # pylint: disable=c-extension-no-member,no-member + # pylint: disable=c-extension-no-member, no-member mapped_type = MsSqlToHiveOperator(**self.kwargs).type_map(pymssql.NUMBER.value) assert mapped_type == 'INT' From b16933d55a66d9ea3da696fdd6850fd0bb917ba8 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sun, 9 May 2021 07:32:47 +0530 Subject: [PATCH 24/36] fix sqlite test --- airflow/www/views.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/airflow/www/views.py b/airflow/www/views.py index 05af4f92c2686..e90ec6bc821b3 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -2090,6 +2090,14 @@ def tree(self): @action_logging # pylint: disable=too-many-locals def calendar(self): """Get DAG runs as calendar""" + + def _convert_to_date(session, column): + """Convert column to date.""" + if session.bind.dialect.name == 'mssql': + return column.cast(Date) + else: + return func.date(column) + dag_id = request.args.get('dag_id') dag = current_app.dag_bag.get_dag(dag_id) if not dag: @@ -2103,13 +2111,13 @@ def calendar(self): with create_session() as session: dag_states = ( session.query( - (DagRun.execution_date.cast(Date)).label('date'), + (_convert_to_date(session, DagRun.execution_date)).label('date'), DagRun.state, func.count('*').label('count'), ) .filter(DagRun.dag_id == dag.dag_id) - .group_by(DagRun.execution_date.cast(Date), DagRun.state) - .order_by(DagRun.execution_date.cast(Date).asc()) + .group_by(_convert_to_date(session, DagRun.execution_date), DagRun.state) + .order_by(_convert_to_date(session, DagRun.execution_date).asc()) .all() ) From a9e1a04e2bea0da3d71910df2a23564491872eda Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sun, 9 May 2021 10:35:18 +0530 Subject: [PATCH 25/36] combine mssql specific migrations --- ...b1_make_mssql_tables_in_sync_with_sqla_.py | 109 ------------------ ...f031fd9f1c_improve_mssql_compatibility.py} | 99 ++++++++++++++-- ...1d8_relax_dag_run_constraints_for_mssql.py | 105 ----------------- ...1f0_make_xcom_pkey_columns_non_nullable.py | 10 +- 4 files changed, 95 insertions(+), 228 deletions(-) delete mode 100644 airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py rename airflow/migrations/versions/{83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py => 83f031fd9f1c_improve_mssql_compatibility.py} (58%) delete mode 100644 airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py diff --git a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py b/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py deleted file mode 100644 index 9a6adc0f24238..0000000000000 --- a/airflow/migrations/versions/5ccc55a461b1_make_mssql_tables_in_sync_with_sqla_.py +++ /dev/null @@ -1,109 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""make mssql tables in sync with SQLA models - -Revision ID: 5ccc55a461b1 -Revises: e9304a3141f0 -Create Date: 2021-04-06 14:22:02.197726 - -""" - -from alembic import op -from sqlalchemy.dialects import mssql - -# revision identifiers, used by Alembic. -revision = '5ccc55a461b1' -down_revision = 'e9304a3141f0' -branch_labels = None -depends_on = None - - -def __get_timestamp(conn): - result = conn.execute( - """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) - like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) - like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" - ).fetchone() - mssql_version = result[0] - if mssql_version not in ("2000", "2005"): - return mssql.DATETIME2(precision=6) - else: - return mssql.DATETIME - - -def upgrade(): - """Apply make mssql tables in sync with SQLA models""" - conn = op.get_bind() - if conn.dialect.name != "mssql": - return - op.alter_column(table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=False) - with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: - task_reschedule_batch_op.alter_column( - column_name='end_date', type_=__get_timestamp(conn), nullable=False - ) - task_reschedule_batch_op.alter_column( - column_name='reschedule_date', type_=__get_timestamp(conn), nullable=False - ) - task_reschedule_batch_op.alter_column( - column_name='start_date', type_=__get_timestamp(conn), nullable=False - ) - with op.batch_alter_table('task_fail') as task_fail_batch_op: - task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') - task_fail_batch_op.alter_column( - column_name="execution_date", type_=__get_timestamp(conn), nullable=False - ) - task_fail_batch_op.create_index( - 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False - ) - with op.batch_alter_table('task_instance') as task_instance_batch_op: - task_instance_batch_op.drop_index('ti_state_lkp') - task_instance_batch_op.create_index( - 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state'], unique=False - ) - - -def downgrade(): - """Unapply make mssql tables in sync with SQLA models""" - conn = op.get_bind() - if conn.dialect.name != "mssql": - return - op.alter_column(table_name="xcom", column_name="timestamp", type_=__get_timestamp(conn), nullable=True) - with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: - task_reschedule_batch_op.alter_column( - column_name='end_date', type_=__get_timestamp(conn), nullable=True - ) - task_reschedule_batch_op.alter_column( - column_name='reschedule_date', type_=__get_timestamp(conn), nullable=True - ) - task_reschedule_batch_op.alter_column( - column_name='start_date', type_=__get_timestamp(conn), nullable=True - ) - with op.batch_alter_table('task_fail') as task_fail_batch_op: - task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') - task_fail_batch_op.alter_column( - column_name="execution_date", type_=__get_timestamp(conn), nullable=False - ) - task_fail_batch_op.create_index( - 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False - ) - with op.batch_alter_table('task_instance') as task_instance_batch_op: - task_instance_batch_op.drop_index('ti_state_lkp') - task_instance_batch_op.create_index( - 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date'], unique=False - ) diff --git a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py b/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py similarity index 58% rename from airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py rename to airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py index 52a3777c0af1c..97a89cb15e404 100644 --- a/airflow/migrations/versions/83f031fd9f1c_change_ts_columns_to_datetime_on_mssql.py +++ b/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -"""change ts/datetime columns to datetime/datetime2 on mssql +"""improve mssql compatibility Revision ID: 83f031fd9f1c Revises: a13f7613ad25 @@ -105,7 +105,7 @@ def create_constraints(operator, column_name, constraint_dict): operator.create_unique_constraint(constraint_name=constraint[0], columns=columns) -def __use_date_time2(conn): +def _use_date_time2(conn): result = conn.execute( """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) @@ -115,7 +115,7 @@ def __use_date_time2(conn): return mssql_version not in ("2000", "2005") -def __is_timestamp(conn, table_name, column_name): +def _is_timestamp(conn, table_name, column_name): query = f"""SELECT TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE FROM SYS.COLUMNS C @@ -132,12 +132,12 @@ def recreate_mssql_ts_column(conn, op, table_name, column_name): Drop the timestamp column and recreate it as datetime or datetime2(6) """ - if __is_timestamp(conn, table_name, column_name) and is_table_empty(conn, table_name): + if _is_timestamp(conn, table_name, column_name) and is_table_empty(conn, table_name): with op.batch_alter_table(table_name) as batch_op: constraint_dict = get_table_constraints(conn, table_name) drop_column_constraints(batch_op, column_name, constraint_dict) batch_op.drop_column(column_name=column_name) - if __use_date_time2(conn): + if _use_date_time2(conn): batch_op.add_column(sa.Column(column_name, mssql.DATETIME2(precision=6), nullable=False)) else: batch_op.add_column(sa.Column(column_name, mssql.DATETIME, nullable=False)) @@ -146,7 +146,7 @@ def recreate_mssql_ts_column(conn, op, table_name, column_name): def alter_mssql_datetime_column(conn, op, table_name, column_name, nullable): """Update the datetime column to datetime2(6)""" - if __use_date_time2(conn): + if _use_date_time2(conn): op.alter_column( table_name=table_name, column_name=column_name, @@ -157,25 +157,106 @@ def alter_mssql_datetime_column(conn, op, table_name, column_name, nullable): def alter_mssql_datetime2_column(conn, op, table_name, column_name, nullable): """Update the datetime2(6) column to datetime""" - if __use_date_time2(conn): + if _use_date_time2(conn): op.alter_column( table_name=table_name, column_name=column_name, type_=mssql.DATETIME, nullable=nullable ) +def _get_timestamp(conn): + result = conn.execute( + """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) + like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" + ).fetchone() + mssql_version = result[0] + if mssql_version not in ("2000", "2005"): + return mssql.DATETIME2(precision=6) + else: + return mssql.DATETIME + + def upgrade(): - """Change timestamp and datetime to datetime2/datetime when using MSSQL as backend""" + """Improve compatibility with MSSQL backend""" conn = op.get_bind() if conn.dialect.name != 'mssql': return recreate_mssql_ts_column(conn, op, 'dag_code', 'last_updated') recreate_mssql_ts_column(conn, op, 'rendered_task_instance_fields', 'execution_date') alter_mssql_datetime_column(conn, op, 'serialized_dag', 'last_updated', False) + op.alter_column(table_name="xcom", column_name="timestamp", type_=_get_timestamp(conn), nullable=False) + with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: + task_reschedule_batch_op.alter_column( + column_name='end_date', type_=_get_timestamp(conn), nullable=False + ) + task_reschedule_batch_op.alter_column( + column_name='reschedule_date', type_=_get_timestamp(conn), nullable=False + ) + task_reschedule_batch_op.alter_column( + column_name='start_date', type_=_get_timestamp(conn), nullable=False + ) + with op.batch_alter_table('task_fail') as task_fail_batch_op: + task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') + task_fail_batch_op.alter_column( + column_name="execution_date", type_=_get_timestamp(conn), nullable=False + ) + task_fail_batch_op.create_index( + 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + with op.batch_alter_table('task_instance') as task_instance_batch_op: + task_instance_batch_op.drop_index('ti_state_lkp') + task_instance_batch_op.create_index( + 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state'], unique=False + ) + constraint_dict = get_table_constraints(conn, 'dag_run') + for constraint, columns in constraint_dict.items(): + if 'dag_id' in columns: + if constraint[1].lower().startswith("unique"): + op.drop_constraint(constraint[0], 'dag_run', type_='unique') + # create filtered indexes + conn.execute( + """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_execution_date + ON dag_run(dag_id,execution_date) + WHERE dag_id IS NOT NULL and execution_date is not null""" + ) + conn.execute( + """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_run_id + ON dag_run(dag_id,run_id) + WHERE dag_id IS NOT NULL and run_id is not null""" + ) def downgrade(): - """Change datetime2(6) columns back to datetime when using MSSQL as backend""" + """Reverse MSSQL backend compatibility improvements""" conn = op.get_bind() if conn.dialect.name != 'mssql': return alter_mssql_datetime2_column(conn, op, 'serialized_dag', 'last_updated', False) + op.alter_column(table_name="xcom", column_name="timestamp", type_=_get_timestamp(conn), nullable=True) + with op.batch_alter_table('task_reschedule') as task_reschedule_batch_op: + task_reschedule_batch_op.alter_column( + column_name='end_date', type_=_get_timestamp(conn), nullable=True + ) + task_reschedule_batch_op.alter_column( + column_name='reschedule_date', type_=_get_timestamp(conn), nullable=True + ) + task_reschedule_batch_op.alter_column( + column_name='start_date', type_=_get_timestamp(conn), nullable=True + ) + with op.batch_alter_table('task_fail') as task_fail_batch_op: + task_fail_batch_op.drop_index('idx_task_fail_dag_task_date') + task_fail_batch_op.alter_column( + column_name="execution_date", type_=_get_timestamp(conn), nullable=False + ) + task_fail_batch_op.create_index( + 'idx_task_fail_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + with op.batch_alter_table('task_instance') as task_instance_batch_op: + task_instance_batch_op.drop_index('ti_state_lkp') + task_instance_batch_op.create_index( + 'ti_state_lkp', ['dag_id', 'task_id', 'execution_date'], unique=False + ) + op.create_unique_constraint('UQ__dag_run__dag_id_run_id', 'dag_run', ['dag_id', 'run_id']) + op.create_unique_constraint('UQ__dag_run__dag_id_execution_date', 'dag_run', ['dag_id', 'execution_date']) + op.drop_index('idx_not_null_dag_id_execution_date', table_name='dag_run') + op.drop_index('idx_not_null_dag_id_run_id', table_name='dag_run') diff --git a/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py b/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py deleted file mode 100644 index 86432aa71df3c..0000000000000 --- a/airflow/migrations/versions/e70ba651e1d8_relax_dag_run_constraints_for_mssql.py +++ /dev/null @@ -1,105 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""relax dag_run constraints for mssql - -Revision ID: e70ba651e1d8 -Revises: 5ccc55a461b1 -Create Date: 2021-04-06 15:22:02.197726 - -""" -from collections import defaultdict - -from alembic import op - -# revision identifiers, used by Alembic. -revision = 'e70ba651e1d8' -down_revision = '5ccc55a461b1' -branch_labels = None -depends_on = None - - -def drop_constraint(operator, constraint_dict): - """ - Drop a primary key or unique constraint - - :param operator: batch_alter_table for the table - :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table - """ - - -def get_table_constraints(conn, table_name): - """ - This function return primary and unique constraint - along with column name. some tables like task_instance - is missing primary key constraint name and the name is - auto-generated by sql server. so this function helps to - retrieve any primary or unique constraint name. - - :param conn: sql connection object - :param table_name: table name - :return: a dictionary of ((constraint name, constraint type), column name) of table - :rtype: defaultdict(list) - """ - query = """SELECT tc.CONSTRAINT_NAME , tc.CONSTRAINT_TYPE, ccu.COLUMN_NAME - FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS tc - JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE AS ccu ON ccu.CONSTRAINT_NAME = tc.CONSTRAINT_NAME - WHERE tc.TABLE_NAME = '{table_name}' AND - (tc.CONSTRAINT_TYPE = 'PRIMARY KEY' or UPPER(tc.CONSTRAINT_TYPE) = 'UNIQUE') - """.format( - table_name=table_name - ) - result = conn.execute(query).fetchall() - constraint_dict = defaultdict(list) - for constraint, constraint_type, column in result: - constraint_dict[(constraint, constraint_type)].append(column) - return constraint_dict - - -def upgrade(): - """Apply relax dag_run constraints for mssql""" - conn = op.get_bind() - if conn.dialect.name == "mssql": - constraint_dict = get_table_constraints(conn, 'dag_run') - for constraint, columns in constraint_dict.items(): - if 'dag_id' in columns: - if constraint[1].lower().startswith("unique"): - op.drop_constraint(constraint[0], 'dag_run', type_='unique') - # create filtered indexes - conn.execute( - """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_execution_date - ON dag_run(dag_id,execution_date) - WHERE dag_id IS NOT NULL and execution_date is not null""" - ) - conn.execute( - """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_run_id - ON dag_run(dag_id,run_id) - WHERE dag_id IS NOT NULL and run_id is not null""" - ) - - -def downgrade(): - """Unapply relax dag_run constraints for mssql""" - conn = op.get_bind() - if conn.dialect.name == "mssql": - op.create_unique_constraint('UQ__dag_run__dag_id_run_id', 'dag_run', ['dag_id', 'run_id']) - op.create_unique_constraint( - 'UQ__dag_run__dag_id_execution_date', 'dag_run', ['dag_id', 'execution_date'] - ) - op.drop_index('idx_not_null_dag_id_execution_date', table_name='dag_run') - op.drop_index('idx_not_null_dag_id_run_id', table_name='dag_run') diff --git a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py index 48e1134d51f8e..bcfc2bbcc473d 100644 --- a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py +++ b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py @@ -36,7 +36,7 @@ depends_on = None -def __use_date_time2(conn): +def _use_date_time2(conn): result = conn.execute( """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) @@ -46,10 +46,10 @@ def __use_date_time2(conn): return mssql_version not in ("2000", "2005") -def __get_timestamp(conn): +def _get_timestamp(conn): dialect_name = conn.dialect.name if dialect_name == "mssql": - return mssql.DATETIME2(precision=6) if __use_date_time2(conn) else mssql.DATETIME + return mssql.DATETIME2(precision=6) if _use_date_time2(conn) else mssql.DATETIME elif dialect_name != "mysql": return sa.TIMESTAMP(timezone=True) else: @@ -61,7 +61,7 @@ def upgrade(): conn = op.get_bind() with op.batch_alter_table('xcom') as bop: bop.alter_column("key", type_=sa.String(length=512, **COLLATION_ARGS), nullable=False) - bop.alter_column("execution_date", type_=__get_timestamp(conn), nullable=False) + bop.alter_column("execution_date", type_=_get_timestamp(conn), nullable=False) if conn.dialect.name == 'mssql': bop.create_primary_key('pk_xcom', ['dag_id', 'task_id', 'key', 'execution_date']) @@ -73,4 +73,4 @@ def downgrade(): if conn.dialect.name == 'mssql': bop.drop_constraint('pk_xcom', 'primary') bop.alter_column("key", type_=sa.String(length=512, **COLLATION_ARGS), nullable=True) - bop.alter_column("execution_date", type_=__get_timestamp(conn), nullable=True) + bop.alter_column("execution_date", type_=_get_timestamp(conn), nullable=True) From 823cd1b79339373e41696520016328902f83fffa Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sun, 9 May 2021 10:48:55 +0530 Subject: [PATCH 26/36] revert offset changes --- airflow/api_connexion/endpoints/event_log_endpoint.py | 2 +- airflow/api_connexion/endpoints/import_error_endpoint.py | 2 +- airflow/api_connexion/endpoints/user_endpoint.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/api_connexion/endpoints/event_log_endpoint.py b/airflow/api_connexion/endpoints/event_log_endpoint.py index bc53ff57672c5..a263c5ccb3148 100644 --- a/airflow/api_connexion/endpoints/event_log_endpoint.py +++ b/airflow/api_connexion/endpoints/event_log_endpoint.py @@ -44,7 +44,7 @@ def get_event_log(event_log_id, session): @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_AUDIT_LOG)]) @format_parameters({'limit': check_limit}) @provide_session -def get_event_logs(session, limit, offset=0, order_by='event_log_id'): +def get_event_logs(session, limit, offset=None, order_by='event_log_id'): """Get all log entries from event log""" to_replace = {"event_log_id": "id", "when": "dttm"} allowed_filter_attrs = [ diff --git a/airflow/api_connexion/endpoints/import_error_endpoint.py b/airflow/api_connexion/endpoints/import_error_endpoint.py index 280cc3ecbb23d..a0a09b8c485c1 100644 --- a/airflow/api_connexion/endpoints/import_error_endpoint.py +++ b/airflow/api_connexion/endpoints/import_error_endpoint.py @@ -47,7 +47,7 @@ def get_import_error(import_error_id, session): @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_IMPORT_ERROR)]) @format_parameters({'limit': check_limit}) @provide_session -def get_import_errors(session, limit, offset=0, order_by='import_error_id'): +def get_import_errors(session, limit, offset=None, order_by='import_error_id'): """Get all import errors""" to_replace = {"import_error_id": 'id'} allowed_filter_attrs = ['import_error_id', "timestamp", "filename"] diff --git a/airflow/api_connexion/endpoints/user_endpoint.py b/airflow/api_connexion/endpoints/user_endpoint.py index 29b480205e077..01dae3dd8e746 100644 --- a/airflow/api_connexion/endpoints/user_endpoint.py +++ b/airflow/api_connexion/endpoints/user_endpoint.py @@ -41,7 +41,7 @@ def get_user(username): @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_USER)]) @format_parameters({'limit': check_limit}) -def get_users(limit, order_by='id', offset=0): +def get_users(limit, order_by='id', offset=None): """Get users""" appbuilder = current_app.appbuilder session = appbuilder.get_session From 4af7bbeef4fd2fab75a54ada81f433899cfd9fce Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sun, 9 May 2021 16:12:37 +0530 Subject: [PATCH 27/36] fix smart sensor for mssql --- airflow/sensors/smart_sensor.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/airflow/sensors/smart_sensor.py b/airflow/sensors/smart_sensor.py index 6c17beb8912ad..6f26ee9c23c7b 100644 --- a/airflow/sensors/smart_sensor.py +++ b/airflow/sensors/smart_sensor.py @@ -24,7 +24,7 @@ from logging.config import DictConfigurator # type: ignore from time import sleep -from sqlalchemy import and_, or_, tuple_ +from sqlalchemy import and_, or_ from airflow.exceptions import AirflowException, AirflowTaskTimeout from airflow.models import BaseOperator, SensorInstance, SkipMixin, TaskInstance @@ -391,13 +391,21 @@ def _update_ti_hostname(self, sensor_works, session=None): :param session: The sqlalchemy session. """ TI = TaskInstance - ti_keys = [(x.dag_id, x.task_id, x.execution_date) for x in sensor_works] def update_ti_hostname_with_count(count, ti_keys): # Using or_ instead of in_ here to prevent from full table scan. tis = ( session.query(TI) - .filter(or_(tuple_(TI.dag_id, TI.task_id, TI.execution_date) == ti_key for ti_key in ti_keys)) + .filter( + or_( + and_( + TI.dag_id == ti_key.dag_id, + TI.task_id == ti_key.task_id, + TI.execution_date == ti_key.execution_date, + ) + for ti_key in ti_keys + ) + ) .all() ) @@ -407,7 +415,9 @@ def update_ti_hostname_with_count(count, ti_keys): return count + len(ti_keys) - count = helpers.reduce_in_chunks(update_ti_hostname_with_count, ti_keys, 0, self.max_tis_per_query) + count = helpers.reduce_in_chunks( + update_ti_hostname_with_count, sensor_works, 0, self.max_tis_per_query + ) if count: self.log.info("Updated hostname on %s tis.", count) From 39223e4e1c192ede671e7350b6bdabc03aeec0fd Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Sun, 9 May 2021 21:12:57 +0530 Subject: [PATCH 28/36] fix mssql CI trigger --- scripts/ci/selective_ci_checks.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/ci/selective_ci_checks.sh b/scripts/ci/selective_ci_checks.sh index 5d40d60645a81..9a115a2f768a5 100755 --- a/scripts/ci/selective_ci_checks.sh +++ b/scripts/ci/selective_ci_checks.sh @@ -132,6 +132,7 @@ function output_all_basic_variables() { else initialization::ga_output postgres-exclude '[]' initialization::ga_output mysql-exclude '[]' + initialization::ga_output mssql-exclude '[]' initialization::ga_output sqlite-exclude '[]' fi From 23c35668e8c0385a205dbece0934e177f40c7772 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 9 May 2021 17:23:59 +0200 Subject: [PATCH 29/36] fixup! fix smart sensor for mssql --- scripts/ci/docker-compose/backend-mssql.yml | 2 ++ scripts/ci/docker-compose/base.yml | 1 + 2 files changed, 3 insertions(+) diff --git a/scripts/ci/docker-compose/backend-mssql.yml b/scripts/ci/docker-compose/backend-mssql.yml index d4132b51e33c0..93d2da179548b 100644 --- a/scripts/ci/docker-compose/backend-mssql.yml +++ b/scripts/ci/docker-compose/backend-mssql.yml @@ -29,3 +29,5 @@ services: environment: - ACCEPT_EULA=Y - SA_PASSWORD=Airflow123 + volumes: + - mssql-db-volume:/var/opt/mssql diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml index eab6425f5513a..a3d184282cd0b 100644 --- a/scripts/ci/docker-compose/base.yml +++ b/scripts/ci/docker-compose/base.yml @@ -38,3 +38,4 @@ volumes: sqlite-db-volume: postgres-db-volume: mysql-db-volume: + mssql-db-volume: From f00e4d60dcda31dae3f514395a38e5ce85b052a2 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 10 May 2021 19:18:33 +0530 Subject: [PATCH 30/36] add experimental MSSQL support details into docs --- README.md | 15 ++++++++------- docs/apache-airflow/installation.rst | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c9a8c866373d5..d0d2e173a0fa8 100644 --- a/README.md +++ b/README.md @@ -97,13 +97,14 @@ We **highly** recommend upgrading to the latest Airflow major release at the ear Apache Airflow is tested with: -| | Master version (dev) | Stable version (2.0.2) | Previous version (1.10.15) | -| ------------ | ------------------------- | ------------------------ | ------------------------- | -| Python | 3.6, 3.7, 3.8 | 3.6, 3.7, 3.8 | 2.7, 3.5, 3.6, 3.7, 3.8 | -| Kubernetes | 1.20, 1.19, 1.18 | 1.20, 1.19, 1.18 | 1.18, 1.17, 1.16 | -| PostgreSQL | 9.6, 10, 11, 12, 13 | 9.6, 10, 11, 12, 13 | 9.6, 10, 11, 12, 13 | -| MySQL | 5.7, 8 | 5.7, 8 | 5.6, 5.7 | -| SQLite | 3.15.0+ | 3.15.0+ | 3.15.0+ | +| | Master version (dev) | Stable version (2.0.2) | Previous version (1.10.15) | +| -------------------- | ------------------------- | ------------------------ | ------------------------- | +| Python | 3.6, 3.7, 3.8 | 3.6, 3.7, 3.8 | 2.7, 3.5, 3.6, 3.7, 3.8 | +| Kubernetes | 1.20, 1.19, 1.18 | 1.20, 1.19, 1.18 | 1.18, 1.17, 1.16 | +| PostgreSQL | 9.6, 10, 11, 12, 13 | 9.6, 10, 11, 12, 13 | 9.6, 10, 11, 12, 13 | +| MySQL | 5.7, 8 | 5.7, 8 | 5.6, 5.7 | +| SQLite | 3.15.0+ | 3.15.0+ | 3.15.0+ | +| MSSQL(Experimental) | 2017,2019 | | | **Note:** MySQL 5.x versions are unable to or have limitations with running multiple schedulers -- please see the [Scheduler docs](https://airflow.apache.org/docs/apache-airflow/stable/scheduler.html). diff --git a/docs/apache-airflow/installation.rst b/docs/apache-airflow/installation.rst index e8fb3e7bdc349..ace814e0a2368 100644 --- a/docs/apache-airflow/installation.rst +++ b/docs/apache-airflow/installation.rst @@ -65,6 +65,7 @@ Airflow is tested with: * PostgreSQL: 9.6, 10, 11, 12, 13 * MySQL: 5.7, 8 * SQLite: 3.15.0+ + * MSSQL(Experimental): 2017, 2019 * Kubernetes: 1.18.15 1.19.7 1.20.2 From 6b90ccbbb01954e0f804335ca3e19c6cdc810790 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 10 May 2021 19:58:44 +0530 Subject: [PATCH 31/36] incorporate review comments --- .../api_connexion/endpoints/user_endpoint.py | 2 +- ...3f031fd9f1c_improve_mssql_compatibility.py | 8 +---- airflow/models/dag.py | 4 ++- airflow/models/serialized_dag.py | 5 ++- airflow/sensors/smart_sensor.py | 31 ++++++++++--------- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/airflow/api_connexion/endpoints/user_endpoint.py b/airflow/api_connexion/endpoints/user_endpoint.py index 01dae3dd8e746..53ce0e9cfb929 100644 --- a/airflow/api_connexion/endpoints/user_endpoint.py +++ b/airflow/api_connexion/endpoints/user_endpoint.py @@ -49,7 +49,7 @@ def get_users(limit, order_by='id', offset=None): to_replace = {"user_id": "id"} allowed_filter_attrs = [ "user_id", - "id", + 'id', "first_name", "last_name", "user_name", diff --git a/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py b/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py index 97a89cb15e404..eaf78c113d088 100644 --- a/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py +++ b/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py @@ -164,13 +164,7 @@ def alter_mssql_datetime2_column(conn, op, table_name, column_name, nullable): def _get_timestamp(conn): - result = conn.execute( - """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) - like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) - like '9%' THEN '2005' ELSE '2005Plus' END AS MajorVersion""" - ).fetchone() - mssql_version = result[0] - if mssql_version not in ("2000", "2005"): + if _use_date_time2(conn): return mssql.DATETIME2(precision=6) else: return mssql.DATETIME diff --git a/airflow/models/dag.py b/airflow/models/dag.py index fb5242f13431d..1cf3797d79c17 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -898,7 +898,9 @@ def get_num_active_runs(self, external_trigger=None, session=None): ) if external_trigger is not None: - query = query.filter(DagRun.external_trigger == expression.literal(external_trigger)) + query = query.filter( + DagRun.external_trigger == (expression.true() if external_trigger else expression.false()) + ) return query.scalar() diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 2b600612addc7..d0db6e8b5b981 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -127,6 +127,9 @@ def write_dag(cls, dag: DAG, min_update_interval: Optional[int] = None, session: .first() is not None ): + # TODO: .first() is not None can be changed to .scalar() once we update to sqlalchemy 1.4+ + # as the associated sqlalchemy bug for MySQL was fixed + # related issue : https://github.com/sqlalchemy/sqlalchemy/issues/5481 return False log.debug("Checking if DAG (%s) changed", dag.dag_id) @@ -315,7 +318,7 @@ def get_dag_dependencies(cls, session: Session = None) -> Dict[str, List['DagDep if session.bind.dialect.name in ["sqlite", "mysql"]: for row in session.query(cls.dag_id, func.json_extract(cls.data, "$.dag.dag_dependencies")).all(): dependencies[row[0]] = [DagDependency(**d) for d in json.loads(row[1])] - elif session.bind.dialect.name in ["mssql"]: + elif session.bind.dialect.name == "mssql": for row in session.query(cls.dag_id, func.json_query(cls.data, "$.dag.dag_dependencies")).all(): dependencies[row[0]] = [DagDependency(**d) for d in json.loads(row[1])] else: diff --git a/airflow/sensors/smart_sensor.py b/airflow/sensors/smart_sensor.py index 6f26ee9c23c7b..9d0a28c65ae00 100644 --- a/airflow/sensors/smart_sensor.py +++ b/airflow/sensors/smart_sensor.py @@ -24,7 +24,7 @@ from logging.config import DictConfigurator # type: ignore from time import sleep -from sqlalchemy import and_, or_ +from sqlalchemy import and_, or_, tuple_ from airflow.exceptions import AirflowException, AirflowTaskTimeout from airflow.models import BaseOperator, SensorInstance, SkipMixin, TaskInstance @@ -392,28 +392,29 @@ def _update_ti_hostname(self, sensor_works, session=None): """ TI = TaskInstance - def update_ti_hostname_with_count(count, ti_keys): + def update_ti_hostname_with_count(count, sensor_works): # Using or_ instead of in_ here to prevent from full table scan. - tis = ( - session.query(TI) - .filter( - or_( - and_( - TI.dag_id == ti_key.dag_id, - TI.task_id == ti_key.task_id, - TI.execution_date == ti_key.execution_date, - ) - for ti_key in ti_keys + if session.bind.dialect.name == 'mssql': + ti_filter = or_( + and_( + TI.dag_id == ti_key.dag_id, + TI.task_id == ti_key.task_id, + TI.execution_date == ti_key.execution_date, ) + for ti_key in sensor_works ) - .all() - ) + else: + ti_keys = [(x.dag_id, x.task_id, x.execution_date) for x in sensor_works] + ti_filter = or_( + tuple_(TI.dag_id, TI.task_id, TI.execution_date) == ti_key for ti_key in ti_keys + ) + tis = session.query(TI).filter(ti_filter).all() for ti in tis: ti.hostname = self.hostname session.commit() - return count + len(ti_keys) + return count + len(sensor_works) count = helpers.reduce_in_chunks( update_ti_hostname_with_count, sensor_works, 0, self.max_tis_per_query From ff76c41aba5e78bbfb997f7ecc7d8b281b65ed14 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Mon, 10 May 2021 22:58:25 +0530 Subject: [PATCH 32/36] incorporate review comments --- airflow/www/security.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/airflow/www/security.py b/airflow/www/security.py index a60447936027e..9b391f79cc840 100644 --- a/airflow/www/security.py +++ b/airflow/www/security.py @@ -348,10 +348,9 @@ def can_read_dag(self, dag_id, user=None) -> bool: if not user: user = g.user dag_resource_name = permissions.resource_name_for_dag(dag_id) - return bool( - self._has_view_access(user, permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG) - or self._has_view_access(user, permissions.ACTION_CAN_READ, dag_resource_name) - ) + return self._has_view_access( + user, permissions.ACTION_CAN_READ, permissions.RESOURCE_DAG + ) or self._has_view_access(user, permissions.ACTION_CAN_READ, dag_resource_name) def can_edit_dag(self, dag_id, user=None) -> bool: """Determines whether a user has DAG edit access.""" @@ -359,10 +358,9 @@ def can_edit_dag(self, dag_id, user=None) -> bool: user = g.user dag_resource_name = permissions.resource_name_for_dag(dag_id) - return bool( - self._has_view_access(user, permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG) - or self._has_view_access(user, permissions.ACTION_CAN_EDIT, dag_resource_name) - ) + return self._has_view_access( + user, permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG + ) or self._has_view_access(user, permissions.ACTION_CAN_EDIT, dag_resource_name) def prefixed_dag_id(self, dag_id): """Returns the permission name for a DAG id.""" @@ -380,6 +378,14 @@ def is_dag_resource(self, resource_name): return True return resource_name.startswith(permissions.RESOURCE_DAG_PREFIX) + def _has_view_access(self, user, action, resource) -> bool: + """ + Overriding the method to ensure that it always returns a bool + _has_view_access can return NoneType which gives us + issues later on, this fixes that. + """ + return bool(super()._has_view_access(user, action, resource)) + def has_access(self, permission, resource, user=None) -> bool: """ Verify whether a given user could perform certain permission @@ -400,7 +406,7 @@ def has_access(self, permission, resource, user=None) -> bool: if user.is_anonymous: user.roles = self.get_user_roles(user) - has_access = bool(self._has_view_access(user, permission, resource)) + has_access = self._has_view_access(user, permission, resource) # FAB built-in view access method. Won't work for AllDag access. if self.is_dag_resource(resource): From cfb21946e37312cb1ba498bd995c187b46c9772c Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Mon, 10 May 2021 22:39:49 +0100 Subject: [PATCH 33/36] Apply suggestions from code review --- .../versions/bbf4a7ad0465_remove_id_column_from_xcom.py | 9 ++++++--- .../e9304a3141f0_make_xcom_pkey_columns_non_nullable.py | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py b/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py index c1ec278dc87f7..045f5a63179fd 100644 --- a/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py +++ b/airflow/migrations/versions/bbf4a7ad0465_remove_id_column_from_xcom.py @@ -40,10 +40,11 @@ def get_table_constraints(conn, table_name): """ This function return primary and unique constraint - along with column name. some tables like task_instance - is missing primary key constraint name and the name is - auto-generated by sql server. so this function helps to + along with column name. Some tables like `task_instance` + is missing the primary key constraint name and the name is + auto-generated by the SQL server. so this function helps to retrieve any primary or unique constraint name. + :param conn: sql connection object :param table_name: table name :return: a dictionary of ((constraint name, constraint type), column name) of table @@ -67,6 +68,7 @@ def get_table_constraints(conn, table_name): def drop_column_constraints(operator, column_name, constraint_dict): """ Drop a primary key or unique constraint + :param operator: batch_alter_table for the table :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table """ @@ -81,6 +83,7 @@ def drop_column_constraints(operator, column_name, constraint_dict): def create_constraints(operator, column_name, constraint_dict): """ Create a primary key or unique constraint + :param operator: batch_alter_table for the table :param constraint_dict: a dictionary of ((constraint name, constraint type), column name) of table """ diff --git a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py index bcfc2bbcc473d..bde065b3e1ef4 100644 --- a/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py +++ b/airflow/migrations/versions/e9304a3141f0_make_xcom_pkey_columns_non_nullable.py @@ -50,10 +50,10 @@ def _get_timestamp(conn): dialect_name = conn.dialect.name if dialect_name == "mssql": return mssql.DATETIME2(precision=6) if _use_date_time2(conn) else mssql.DATETIME - elif dialect_name != "mysql": - return sa.TIMESTAMP(timezone=True) - else: + elif dialect_name == "mysql": return mysql.TIMESTAMP(fsp=6, timezone=True) + else: + return sa.TIMESTAMP(timezone=True) def upgrade(): From 51b33de32c51ea6f8e4c35acf79001766df5b95b Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Mon, 10 May 2021 22:43:50 +0100 Subject: [PATCH 34/36] Apply suggestions from code review --- .../versions/83f031fd9f1c_improve_mssql_compatibility.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py b/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py index eaf78c113d088..ce5f3fc57b06b 100644 --- a/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py +++ b/airflow/migrations/versions/83f031fd9f1c_improve_mssql_compatibility.py @@ -39,8 +39,9 @@ def is_table_empty(conn, table_name): """ - This function checks if the mssql table is empty - :param conn: sql connection object + This function checks if the MS SQL table is empty + + :param conn: SQL connection object :param table_name: table name :return: Booelan indicating if the table is present """ @@ -171,7 +172,7 @@ def _get_timestamp(conn): def upgrade(): - """Improve compatibility with MSSQL backend""" + """Improve compatibility with MSSQL backend""" conn = op.get_bind() if conn.dialect.name != 'mssql': return @@ -221,7 +222,7 @@ def upgrade(): def downgrade(): - """Reverse MSSQL backend compatibility improvements""" + """Reverse MSSQL backend compatibility improvements""" conn = op.get_bind() if conn.dialect.name != 'mssql': return From cfb162ec0996ebc4726456cf41fe25de093beb9b Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Tue, 11 May 2021 15:01:11 +0530 Subject: [PATCH 35/36] fixup method names --- ...98271e7606e2_add_scheduling_decision_to_dagrun_and_.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py b/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py index a18b72b96525f..d220d32b1653f 100644 --- a/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py +++ b/airflow/migrations/versions/98271e7606e2_add_scheduling_decision_to_dagrun_and_.py @@ -35,7 +35,7 @@ depends_on = None -def __use_date_time2(conn): +def _use_date_time2(conn): result = conn.execute( """SELECT CASE WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) like '8%' THEN '2000' WHEN CONVERT(VARCHAR(128), SERVERPROPERTY ('productversion')) @@ -45,10 +45,10 @@ def __use_date_time2(conn): return mssql_version not in ("2000", "2005") -def __get_timestamp(conn): +def _get_timestamp(conn): dialect_name = conn.dialect.name if dialect_name == "mssql": - return mssql.DATETIME2(precision=6) if __use_date_time2(conn) else mssql.DATETIME + return mssql.DATETIME2(precision=6) if _use_date_time2(conn) else mssql.DATETIME elif dialect_name != "mysql": return sa.TIMESTAMP(timezone=True) else: @@ -60,7 +60,7 @@ def upgrade(): conn = op.get_bind() # pylint: disable=no-member is_sqlite = bool(conn.dialect.name == "sqlite") is_mssql = bool(conn.dialect.name == "mssql") - timestamp = __get_timestamp(conn) + timestamp = _get_timestamp(conn) if is_sqlite: op.execute("PRAGMA foreign_keys=off") From f12e0077675cc79ef5495fd7ea425cea60a49f28 Mon Sep 17 00:00:00 2001 From: Aneesh Joseph Date: Wed, 26 May 2021 07:26:13 +0530 Subject: [PATCH 36/36] add back poituk's fix which I removed in error with a force push --- scripts/ci/testing/ci_run_airflow_testing.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/ci/testing/ci_run_airflow_testing.sh b/scripts/ci/testing/ci_run_airflow_testing.sh index fa8c0448267e3..9368b873087e1 100755 --- a/scripts/ci/testing/ci_run_airflow_testing.sh +++ b/scripts/ci/testing/ci_run_airflow_testing.sh @@ -96,6 +96,12 @@ function run_all_test_types_in_parallel() { # Remove Integration from list of tests to run in parallel test_types_to_run="${test_types_to_run//Integration/}" run_integration_tests_separately="true" + if [[ ${BACKEND} == "mssql" ]]; then + # Skip running "Integration" tests for low memory condition for mssql + run_integration_tests_separately="false" + else + run_integration_tests_separately="true" + fi fi fi set +e