From 9a18ce65338c09daf4d9c8a36374c2151e6c1b75 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Tue, 27 Jun 2023 21:34:01 -0700 Subject: [PATCH 01/30] Create Chime Notifier --- .../amazon/aws/notifications/chime.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 tests/providers/amazon/aws/notifications/chime.py diff --git a/tests/providers/amazon/aws/notifications/chime.py b/tests/providers/amazon/aws/notifications/chime.py new file mode 100644 index 0000000000000..5cf6f2cadd11a --- /dev/null +++ b/tests/providers/amazon/aws/notifications/chime.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from unittest import mock + +from airflow.models import Connection +from airflow.operators.empty import EmptyOperator +from airflow.providers.amazon.aws.hooks.chime import ChimeWebhookHook +from airflow.providers.amazon.aws.notifications.chime import ChimeNotifier, send_chime_notification +from airflow.utils import db + + +class TestChimeNotifier: + # Chime webhooks can't really have a default connection, so we need to create one for tests. + def setup_method(self): + db.merge_conn( + Connection( + conn_id="default-chime-webhook", + conn_type="chime", + host="hooks.chime.aws/incomingwebhooks/", + password="abcd-1134-ZeDA?token=somechimetoken111", + schema="https", + ) + ) + + @mock.patch.object(ChimeWebhookHook, "send_message") + def test_chime_notifier(self, mock_chime_hook, dag_maker): + with dag_maker("test_chime_notifier") as dag: + EmptyOperator(task_id="task1") + + notifier = send_chime_notification( + chime_conn_id="default-chime-webhook", message="Chime Test Message" + ) + notifier({"dag": dag}) + mock_chime_hook.assert_called_once_with(message="Chime Test Message") + + @mock.patch.object(ChimeWebhookHook, "send_message") + def test_chime_notifier_with_notifier_class(self, mock_chime_hook, dag_maker): + with dag_maker("test_chime_notifier") as dag: + EmptyOperator(task_id="task1") + + notifier = ChimeNotifier( + chime_conn_id="default-chime-webhook", message="Test Chime Message for Class" + ) + notifier({"dag": dag}) + mock_chime_hook.assert_called_once_with(message="Test Chime Message for Class") + + @mock.patch.object(ChimeWebhookHook, "send_message") + def test_chime_notifier_templated(self, mock_chime_hook, dag_maker): + with dag_maker("test_chime_notifier") as dag: + EmptyOperator(task_id="task1") + + notifier = send_chime_notification( + chime_conn_id="default-chime-webhook", message="test {{ username }}" + ) + context = {"dag": dag} + notifier(context) + mock_chime_hook.return_value.call.assert_called_once_with(message="test {{ username }}") From 6e452611b847b5d6c81129424eae8a2bde2e53db Mon Sep 17 00:00:00 2001 From: cjames23 Date: Tue, 27 Jun 2023 21:39:58 -0700 Subject: [PATCH 02/30] Rename chime notifier test file --- .../amazon/aws/notifications/chime.py | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 tests/providers/amazon/aws/notifications/chime.py diff --git a/tests/providers/amazon/aws/notifications/chime.py b/tests/providers/amazon/aws/notifications/chime.py deleted file mode 100644 index 5cf6f2cadd11a..0000000000000 --- a/tests/providers/amazon/aws/notifications/chime.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -from unittest import mock - -from airflow.models import Connection -from airflow.operators.empty import EmptyOperator -from airflow.providers.amazon.aws.hooks.chime import ChimeWebhookHook -from airflow.providers.amazon.aws.notifications.chime import ChimeNotifier, send_chime_notification -from airflow.utils import db - - -class TestChimeNotifier: - # Chime webhooks can't really have a default connection, so we need to create one for tests. - def setup_method(self): - db.merge_conn( - Connection( - conn_id="default-chime-webhook", - conn_type="chime", - host="hooks.chime.aws/incomingwebhooks/", - password="abcd-1134-ZeDA?token=somechimetoken111", - schema="https", - ) - ) - - @mock.patch.object(ChimeWebhookHook, "send_message") - def test_chime_notifier(self, mock_chime_hook, dag_maker): - with dag_maker("test_chime_notifier") as dag: - EmptyOperator(task_id="task1") - - notifier = send_chime_notification( - chime_conn_id="default-chime-webhook", message="Chime Test Message" - ) - notifier({"dag": dag}) - mock_chime_hook.assert_called_once_with(message="Chime Test Message") - - @mock.patch.object(ChimeWebhookHook, "send_message") - def test_chime_notifier_with_notifier_class(self, mock_chime_hook, dag_maker): - with dag_maker("test_chime_notifier") as dag: - EmptyOperator(task_id="task1") - - notifier = ChimeNotifier( - chime_conn_id="default-chime-webhook", message="Test Chime Message for Class" - ) - notifier({"dag": dag}) - mock_chime_hook.assert_called_once_with(message="Test Chime Message for Class") - - @mock.patch.object(ChimeWebhookHook, "send_message") - def test_chime_notifier_templated(self, mock_chime_hook, dag_maker): - with dag_maker("test_chime_notifier") as dag: - EmptyOperator(task_id="task1") - - notifier = send_chime_notification( - chime_conn_id="default-chime-webhook", message="test {{ username }}" - ) - context = {"dag": dag} - notifier(context) - mock_chime_hook.return_value.call.assert_called_once_with(message="test {{ username }}") From 5df0196085017595c3d6900973b87bfb8a3c4052 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Fri, 29 Sep 2023 20:18:55 -0700 Subject: [PATCH 03/30] Create open search hook and operators for search, index, and add document. --- .../providers/amazon/aws/hooks/opensearch.py | 126 ++++++++++++ .../amazon/aws/operators/opensearch.py | 189 ++++++++++++++++++ airflow/providers/amazon/provider.yaml | 38 ++-- .../operators/opensearch.rst | 86 ++++++++ .../aws/Amazon-OpenSearch-light.png | Bin 0 -> 1955 bytes generated/provider_dependencies.json | 6 + .../amazon/aws/hooks/test_opensearch.py | 53 +++++ .../amazon/aws/example_opensearch.py | 148 ++++++++++++++ 8 files changed, 632 insertions(+), 14 deletions(-) create mode 100644 airflow/providers/amazon/aws/hooks/opensearch.py create mode 100644 airflow/providers/amazon/aws/operators/opensearch.py create mode 100644 docs/apache-airflow-providers-amazon/operators/opensearch.rst create mode 100644 docs/integration-logos/aws/Amazon-OpenSearch-light.png create mode 100644 tests/providers/amazon/aws/hooks/test_opensearch.py create mode 100644 tests/system/providers/amazon/aws/example_opensearch.py diff --git a/airflow/providers/amazon/aws/hooks/opensearch.py b/airflow/providers/amazon/aws/hooks/opensearch.py new file mode 100644 index 0000000000000..8306bf0c13ef6 --- /dev/null +++ b/airflow/providers/amazon/aws/hooks/opensearch.py @@ -0,0 +1,126 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from typing import Any + +from opensearchpy import AWSV4SignerAuth, OpenSearch, RequestsHttpConnection + +from airflow.exceptions import AirflowException +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook + + +class OpenSearchHook(AwsBaseHook): + """ + This Hook provides a thin wrapper around the OpenSearch client. + + :param: open_search_conn_id: AWS Connection to use with Open Search + :param: log_query: Whether to log the query used for Open Search + """ + + def __init__(self, *args: Any, open_search_conn_id: str, log_query: bool, **kwargs: Any): + super().__init__(*args, **kwargs) + self.conn_id = open_search_conn_id + self.log_query = log_query + + conn = self.get_connection(self.conn_id) + self.use_ssl = conn.extra_dejson.get("use_ssl", False) + self.verify_certs = conn.extra_dejson.get("verify_certs", False) + + self.__SERVICE = "es" + self._credentials = self.get_credentials(self.region_name) + self._auth = AWSV4SignerAuth(self._credentials, self.region_name, self.__SERVICE) + + self.client = OpenSearch( + hosts=[{"host": conn.host, "port": conn.port}], + http_auth=self._auth, + use_ssl=self.use_ssl, + verify_certs=self.verify_certs, + connection_class=RequestsHttpConnection, + ) + + def get_client(self) -> OpenSearch: + """ + + This function is intended for Operators that will take in arguments and use the high level + OpenSearch client which allows using Python objects to perform searches. + + """ + return self.client + + def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: + """ + Runs a search query against the connected OpenSearch cluster. + + :param: query: The query for the search against OpenSearch. + :param: index_name: The name of the index to search against + """ + if self.log_query: + self.log.info("Searching %s with Query: %s", index_name, query) + return self.client.search(body=query, index=index_name, **kwargs) + + def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: + """ + Index a document on open search. + + :param: document: A dictionary representation of the document + :param: index_name: the name of the index that this document will be associated with + :param: doc_id: the numerical identifier that will be used to identify the document on the index. + """ + return self.client.index(index=index_name, id=doc_id, body=document, **kwargs) + + def delete(self, index_name: str, query: dict | None = None, doc_id: int | None = None): + """ + Delete from an index by either a query or by the document id. + + :param: index_name: the name of the index to delete from + :param: query: If deleting by query a dict representation of the query to run to + identify documents to delete. + :param: doc_id: The identifier of the document to delete. + """ + if query is not None: + if self.log_query: + self.log.info("Deleting from %s using Query: %s", index_name, query) + return self.client.delete_by_query(index=index_name, body=query) + elif doc_id is not None: + return self.client.delete(index=index_name, id=doc_id) + else: + AirflowException("To delete a document you must include one of either a query or a document id. ") + + @staticmethod + def get_ui_field_behaviour() -> dict[str, Any]: + """Returns custom UI field behaviour for Amazon Open Search Connection.""" + return { + "hidden_fields": ["schema"], + "relabeling": { + "host": "OpenSearch Cluster Endpoint", + "login": "AWS Access Key ID", + "password": "AWS Secret Access Key", + "extra": "Open Search Configuration", + }, + "placeholders": { + "extra": json.dumps( + { + "use_ssl": True, + "verify_certs": True, + }, + indent=2, + ), + }, + } diff --git a/airflow/providers/amazon/aws/operators/opensearch.py b/airflow/providers/amazon/aws/operators/opensearch.py new file mode 100644 index 0000000000000..c8548cd57a81e --- /dev/null +++ b/airflow/providers/amazon/aws/operators/opensearch.py @@ -0,0 +1,189 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from functools import cached_property +from typing import TYPE_CHECKING, Any, Sequence + +from airflow.exceptions import AirflowException +from airflow.models import BaseOperator +from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook + +if TYPE_CHECKING: + from opensearch_dsl.document import Document + from opensearch_dsl.search import Search + + from airflow.utils.context import Context + + +class OpenSearchQueryOperator(BaseOperator): + """ + Runs a query search against a given index on an AWS OpenSearch cluster and returns results. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:OpenSearchSearchOperator` + + :param: query: A Dictionary Open Search DSL query. + :param: search_object: A Search object from opensearch-dsl. + :param: index_name: The name of the index to search for documents. + :param: aws_conn_id: aws connection to use + :param: log_query: Whether to log the query used. Defaults to True and logs query used. + """ + + template_fields: Sequence[str] = ["query"] + + def __init__( + self, + *, + query: dict | None = None, + search_object: Search | None = None, + index_name: str | None = None, + aws_conn_id: str = "aws_default", + log_query: bool = True, + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.query = query + self.index_name = index_name + self.aws_conn_id = aws_conn_id + self.log_query = log_query + self.search_object = search_object + + @cached_property + def hook(self) -> OpenSearchHook: + """Gets an instance of an OpenSearchHook.""" + return OpenSearchHook(open_search_conn_id=self.aws_conn_id, log_query=self.log_query) + + def execute(self, context: Context) -> Any: + """Executes a search against a given index or a Search object on an AWS OpenSearch Cluster.""" + result = None + + if self.query is not None: + if not self.query.get("query"): + raise AirflowException("Query input is missing required field Query in dictionary") + if self.index_name is None: + raise AirflowException("Index name is required when using the query input.") + try: + result = self.hook.search(index_name=self.index_name, query=self.query) + except Exception as e: + raise AirflowException(e) + elif self.search_object is not None: + try: + result = self.search_object.using(self.hook.get_client()).execute() + except Exception as e: + raise AirflowException(e) + else: + raise AirflowException( + """Input missing required input of query or search_object. + Either query or search_object is required.""" + ) + return result + + +class OpenSearchCreateIndexOperator(BaseOperator): + """ + Creates a new index on an AWS Open Search cluster with a given index name. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:OpenSearchCreateIndexOperator` + + :param: index_name: The name of the index to be created. + :param: index_body: A dictionary that defines index settings + :param: aws_conn_id: aws connection to use + """ + + def __init__( + self, *, index_name: str, index_body: dict[str, Any], aws_conn_id: str = "aws_default", **kwargs + ) -> None: + super().__init__(**kwargs) + self.index_name = index_name + self.index_body = index_body + self.aws_conn_id = aws_conn_id + + @cached_property + def hook(self) -> OpenSearchHook: + """Gets an instance of an OpenSearchHook.""" + return OpenSearchHook(open_search_conn_id=self.aws_conn_id, log_query=False) + + def execute(self, context: Context) -> Any: + """Creates an index on an AWS Open Search cluster.""" + try: + self.hook.get_client().indices.create(index=self.index_name, body=self.index_body) + except Exception as e: + raise AirflowException(e) + + +class OpenSearchAddDocumentOperator(BaseOperator): + """ + Runs a query search against a given index on an AWS OpenSearch cluster and returns results. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:OpenSearchAddDocumentOperator` + + :param: index_name: The name of the index to put the document. + :param: document: A dictionary representation of the document. + :param: document_id: The id for the document in the index. + :param: doc_class: A Document subclassed object using opensearch-dsl + :param: aws_conn_id: aws connection to use + """ + + def __init__( + self, + *, + index_name: str | None = None, + document: dict[str, Any] | None = None, + doc_id: int | None = None, + doc_class: Document | None = None, + aws_conn_id: str = "aws_default", + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.index_name = index_name + self.document = document + self.doc_id = doc_id + self.doc_class = doc_class + self.aws_conn_id = aws_conn_id + + @cached_property + def hook(self) -> OpenSearchHook: + """Gets an instance of an OpenSearchHook.""" + return OpenSearchHook(open_search_conn_id=self.aws_conn_id, log_query=False) + + def execute(self, context: Context) -> Any: + """Saves a document to a given index on an AWS OpenSearch cluster.""" + if self.doc_class is not None: + try: + result = self.doc_class.save(using=self.hook.get_client()) + except Exception as e: + raise AirflowException(e) + elif self.index_name is not None and self.document is not None and self.doc_id is not None: + try: + result = self.hook.index( + index_name=self.index_name, document=self.document, doc_id=self.doc_id + ) + except Exception as e: + raise AirflowException(e) + else: + raise AirflowException( + "Index name, document dictionary and doc_id or a Document subclassed object is required." + ) + + return result diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index b9e2e03131622..c8b7811cb8068 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -23,14 +23,6 @@ description: | suspended: false versions: - - 8.7.1 - - 8.7.0 - - 8.6.0 - - 8.5.1 - - 8.5.0 - - 8.4.0 - - 8.3.1 - - 8.3.0 - 8.2.0 - 8.1.0 - 8.0.0 @@ -72,9 +64,9 @@ dependencies: - apache-airflow-providers-http # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number # of candidates to consider. We should also make sure that all the below related packages have also the - # same minimum version specified. Boto3 1.28.0 has been released on July 6 2023. We should also make sure we - # set it to the version that `aiobotocore` supports (see `aiobotocore` optional dependency at the end - # of this file). Currently we set aiobotocore as minimum 2.5.3 - as this is was the first version + # same minimum version specified. Boto3 1.28.0 has been released on July 6, 2023. We should also make + # sure we set it to the version that `aiobotocore` supports (see `aiobotocore` optional dependency at + # the end of this file). Currently, we set aiobotocore as minimum 2.5.3 - as this it was the first version # that supported boto3 1.28. NOTE!!! BOTOCORE VERSIONS ARE SHIFTED BY 3 MINOR VERSIONS # NOTE!!! Make sure to update _MIN_BOTO3_VERSION in setup.py when you update it here - boto3>=1.28.0 @@ -89,7 +81,14 @@ dependencies: - jsonpath_ng>=1.5.3 - redshift_connector>=2.0.888 - sqlalchemy_redshift>=0.8.6 + - mypy-boto3-rds>=1.24.0 + - mypy-boto3-redshift-data>=1.24.0 + - mypy-boto3-appflow>=1.24.0 - asgiref + - mypy-boto3-s3>=1.24.0 + - opensearch-py>=2.2.0 + - opensearch-dsl>=2.1.0 + integrations: - integration-name: Amazon Athena @@ -292,6 +291,12 @@ integrations: how-to-guide: - /docs/apache-airflow-providers-amazon/operators/appflow.rst tags: [aws] + - integration-name: AWS Open Search + external-doc-url: https://aws.amazon.com/opensearch-service/ + logo: /integration-logos/aws/Amazon-OpenSearch-light.png + how-to-guide: + - /docs/apache-airflow-providers-amazon/operators/opensearch.rst + tags: [aws] operators: - integration-name: Amazon Athena @@ -365,6 +370,9 @@ operators: - integration-name: Amazon Appflow python-modules: - airflow.providers.amazon.aws.operators.appflow + - integration-name: AWS Open Search + python-modules: + - airflow.providers.amazon.aws.operators.opensearch sensors: - integration-name: Amazon Athena @@ -538,6 +546,9 @@ hooks: - integration-name: Amazon Appflow python-modules: - airflow.providers.amazon.aws.hooks.appflow + - integration-name: Amazon Open Search + python-modules: + - airflow.providers.amazon.aws.hooks.opensearch triggers: - integration-name: Amazon Web Services @@ -552,9 +563,6 @@ triggers: - integration-name: Amazon EC2 python-modules: - airflow.providers.amazon.aws.triggers.ec2 - - integration-name: AWS Lambda - python-modules: - - airflow.providers.amazon.aws.triggers.lambda_function - integration-name: Amazon Redshift python-modules: - airflow.providers.amazon.aws.triggers.redshift_cluster @@ -685,6 +693,8 @@ connection-types: connection-type: emr - hook-class-name: airflow.providers.amazon.aws.hooks.redshift_sql.RedshiftSQLHook connection-type: redshift + - hook-class-name: airflow.providers.amazon.aws.hooks.opensearch.OpenSearchHook + connection-type: opensearch notifications: - airflow.providers.amazon.aws.notifications.chime.ChimeNotifier diff --git a/docs/apache-airflow-providers-amazon/operators/opensearch.rst b/docs/apache-airflow-providers-amazon/operators/opensearch.rst new file mode 100644 index 0000000000000..bfd663b79ff2b --- /dev/null +++ b/docs/apache-airflow-providers-amazon/operators/opensearch.rst @@ -0,0 +1,86 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +==================== +Amazon OpenSearch +==================== + +`Amazon Open Search `__ Amazon OpenSearch Service makes it +easy for you to perform interactive log analytics, real-time application monitoring, website search, and more. +OpenSearch is an open source, distributed search and analytics suite derived from Elasticsearch. + +Prerequisite Tasks +------------------ + +.. include:: ../../_partials/prerequisite_tasks.rst + +Operators +--------- + +.. _howto/operator:OpenSearchCreateIndexOperator: + +Create an Index on an Open Search Domain +================================================= + +Use the :class:`OpenSearchAddDocumentOperator ` to add +a new document to a specified Index on an Amazon OpenSearch cluster. + + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_opensearch.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_opensearch_document] + :end-before: [END howto_operator_opensearch_document] + +.. _howto/operator:OpenSearchAddDocumentOperator: + +Add a document to an Index on an Open Search Domain +================================================= + +Use the :class:`OpenSearchCreateIndexOperator ` to create a new +index on an Open Search Cluster + + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_opensearch.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_opensearch_index] + :end-before: [END howto_operator_opensearch_index] + + +.. _howto/operator:OpenSearchSearchOperator: + +Run a query on an Amazon OpenSearch cluster +================================================= + +Use the :class:`OpenSearchSearchOperator ` to run +search queries against an Amazon OpenSearch cluster on a given index. + + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_opensearch.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_opensearch_search] + :end-before: [END howto_operator_opensearch_search] + + + + +Reference +--------- +* `Open Search High Level Client `__ +* `Open Search Low Level Client `__ diff --git a/docs/integration-logos/aws/Amazon-OpenSearch-light.png b/docs/integration-logos/aws/Amazon-OpenSearch-light.png new file mode 100644 index 0000000000000000000000000000000000000000..a51d300b3bb8339ba6585bf0051b79c9cc47bc69 GIT binary patch literal 1955 zcmV;U2VD4xP)BvF-BtgrL%zvD!9OZ_w7;RA_zx6GbaXW!d8KYIuuVg2p&-zjBq% zj;PikW4fZa-AjDLV~onX$>rMMfPj+muEUw9ivWGPTWcHC<_NxS0-?}}JKPAXR zbJqiTD?=VGdI<18Qsm*Z`%!O3QsmKZXR<_AW$Iqb#48Cwcbyc3t0&R)tE(mZr52ID>49358Py|x68+T#@Rb{Mw>n=0Kh$J09Z1MSgq{SWiwrjTIh^2mS`9Z&{~+B>U7@xu>C&TG`<-}HWmzJ+xY4H z@xquL^ljri!_?UTOd?5g3cu=&hU-5~Cb=jB`@arA!Lzi(!{PCUZ(Wz&Q8`*S~AxvAwz|;gf{CudMvF|Dxs~=K!$=-X|E76P(tyf zrsRG+j1VS84pSf;JGeuF61w{h@ItpzikysZD1Q)?$UuRetsb1Inrc?h`H-J(NYa^{ zHF7$m->RMLrJf81A*cwBJ5-ch?oTRs@r)k;kH~o=i94B+%?d?i{#Q@>&aAX7iO!gjxH~jwDch6-^fZlIM$JIjH6X2 zyY1iH-_-rx&Mj3}ae5eXax$)14vTy{@o?`*ocV2!Mz3iiEhl>etA(6dZiL6RCpxoa zJ0*GnN|FlUzbm!DO~Vz<2v&|zs-1C+=^xbR&08ZbVS_>35ZpDUPqaAVhgF6r?woBL+Rl6T4{dT{+}^C^x40v3 zbXR2@Tk6a1gMNkN5%Q{^L{w_PPO)FwWH)y<4D9L)!a_a^Y(K0>l02f+;v%({G#VJJ zanXLbP?2LD7tv`$Nu#04GMW}?5rwn67>z`RhoX$EN-u_NK%?+D#V;-QY-RbD-*$ur z4Sq0b5b#)eSu&@=lD}({45Gr)n#cti@7gk5DCZ_?)Q64af{dR`OnzK9#_eNK7jj|Q zyY(QklbJbtzzsk8W+2wXuh?IM~<=FD`+KR6$SEu@2${aeNaacNG^?QD;|^UYRhWX z3%E-P1Uxxa)DYOV=yPL()8iV?2J4=yllEpFXxewjC1h!6kQBi`Z9gIXK#ha|i4>e7JFt0=SPoxE0n*;9-R*zCEMc0Yq5QnC!st6ajfOAdd z+^-Lv9#hzs=U{mj@2t7LHvp(~e^S6LJH8hLIQffU58ij0%HOp3P-?X*`v zKJq@{5ZAit#)YxLO89}6fND2aP|;AZcb@xy`8<`*AVpJU9^Z*;JyEj37Z0LHLkNX& p^kicT;n5$2!O@Rb>%{41>i-dwevl6;o0b3o002ovPDHLkV1iGC!-xO? literal 0 HcmV?d00001 diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 1eecaa04b958a..254ecb13e45bb 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -28,6 +28,12 @@ "boto3>=1.28.0", "botocore>=1.31.0", "jsonpath_ng>=1.5.3", + "mypy-boto3-appflow>=1.24.0", + "mypy-boto3-rds>=1.24.0", + "mypy-boto3-redshift-data>=1.24.0", + "mypy-boto3-s3>=1.24.0", + "opensearch-dsl>=2.1.0", + "opensearch-py>=2.2.0", "redshift_connector>=2.0.888", "sqlalchemy_redshift>=0.8.6", "watchtower~=2.0.1" diff --git a/tests/providers/amazon/aws/hooks/test_opensearch.py b/tests/providers/amazon/aws/hooks/test_opensearch.py new file mode 100644 index 0000000000000..0ae5562e22699 --- /dev/null +++ b/tests/providers/amazon/aws/hooks/test_opensearch.py @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pytest + +from airflow.models import Connection +from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook +from airflow.utils import db + + +class TestOpenSearchHook: + def setup_method(self): + db.merge_conn( + Connection( + conn_id="open_search_conn", + conn_type="open_search", + host="myhost.opensearch.com", + login="MyAWSSecretID", + password="MyAccessKey", + ) + ) + + @pytest.fixture() + def mock_search(self, monkeypatch): + def mock_return(): + return {"status": "test"} + + monkeypatch.setattr(OpenSearchHook, "search", mock_return) + + def test_hook_search(self, mock_search): + hook = OpenSearchHook(open_search_conn_id="open_search_conn", log_query=True) + + result = hook.search( + index_name="testIndex", + query={"size": 1, "query": {"multi_match": {"query": "test", "fields": ["testField"]}}}, + ) + + assert result diff --git a/tests/system/providers/amazon/aws/example_opensearch.py b/tests/system/providers/amazon/aws/example_opensearch.py new file mode 100644 index 0000000000000..3dfc92e53361c --- /dev/null +++ b/tests/system/providers/amazon/aws/example_opensearch.py @@ -0,0 +1,148 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from datetime import datetime + +import boto3 +from opensearch_dsl import Keyword, Search, Text +from opensearch_dsl.document import Document + +from airflow import DAG +from airflow.decorators import task +from airflow.models.baseoperator import chain +from airflow.providers.amazon.aws.operators.opensearch import ( + OpenSearchAddDocumentOperator, + OpenSearchCreateIndexOperator, + OpenSearchQueryOperator, +) +from airflow.utils.trigger_rule import TriggerRule +from tests.system.providers.amazon.aws.utils import ENV_ID_KEY, SystemTestContextBuilder + +DAG_ID = "example_opensearch" +INDEX_NAME = "example-index" + +sys_test_context_task = SystemTestContextBuilder().build() + + +class TestDoc(Document): + title = Text(fields={"raw": Keyword()}) + media_type = Text() + + class Index: + name = INDEX_NAME + + def save(self, **kwargs): + return super().save(**kwargs) + + +@task +def create_open_search_cluster(): + env_id = test_context[ENV_ID_KEY] + opensearch = boto3.client("opensearch") + opensearch.create_domain( + DomainName=f"{env_id}-opensearch-cluster", + EngineVersion="2.7", + ClusterConfig={ + "InstanceType": "t3.small.search", + "InstanceCount": 1, + "DedicatedMasterEnabled": False, + "ZoneAwarenessEnabled": False, + }, + ) + + +@task(trigger_rule=TriggerRule.ALL_DONE) +def delete_os_cluster(env_id: str): + boto3.client("opensearch").delete_domain(DomainName=f"{env_id}-opensearch-cluster") + + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2023, 9, 1), + schedule="@once", + catchup=False, + tags=["example"], +) as dag: + test_context = sys_test_context_task() + create_cluster = create_open_search_cluster() + # [START howto_operator_opensearch_index] + create_index = OpenSearchCreateIndexOperator( + task_id="create_index_example", + index_name=INDEX_NAME, + index_body={"settings": {"index": {"number_of_shards": 1}}}, + ) + # [END howto_operator_opensearch_index] + + # [START howto_operator_opensearch_document] + add_low_doc = OpenSearchAddDocumentOperator( + task_id="add_low_level_document", + index_name=INDEX_NAME, + document={"title": "MontyPython", "media_type": "Movie"}, + doc_id=1, + ) + add_high_doc = OpenSearchAddDocumentOperator( + task_id="add_high_level_document", + doc_class=TestDoc(meta={"id": 2}, title="Top Gun", media_type="Movie"), + ) + + # [END howto_operator_opensearch_document] + + # [START howto_operator_opensearch_search] + search_low_docs = OpenSearchQueryOperator( + task_id="search_low_level", + index_name=INDEX_NAME, + query={ + "size": 5, + "query": {"multi_match": {"query": "MontyPython", "fields": ["title^2", "media_type"]}}, + }, + ) + + search_high_docs = OpenSearchQueryOperator( + task_id="search_high", + search_object=Search(index=INDEX_NAME) + .filter("term", media_type="Movie") + .query("match", title="Top Gun"), + ) + + # [END howto_operator_opensearch_search] + + remove_cluster = delete_os_cluster(env_id=test_context[ENV_ID_KEY]) + + chain( + # TEST SETUP + test_context, + create_cluster, + # TEST BODY + create_index, + add_low_doc, + add_high_doc, + search_low_docs, + search_high_docs, + # TEST TEAR DOWN + remove_cluster, + ) + from tests.system.utils.watcher import watcher + + # This test needs watcher in order to properly mark success/failure + # when "tearDown" task with trigger rule is part of the DAG + list(dag.tasks) >> watcher() + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag) From 851c3ab4a9456871b75e9aea4197be3c8c90de6b Mon Sep 17 00:00:00 2001 From: cjames23 Date: Fri, 29 Sep 2023 22:15:51 -0700 Subject: [PATCH 04/30] Fix system test, connection name, missing provider.yaml info and add opensearchqueryoperator unit test --- .../providers/amazon/aws/hooks/opensearch.py | 4 ++ .../amazon/aws/operators/opensearch.py | 24 +++---- airflow/providers/amazon/provider.yaml | 11 +++ .../amazon/aws/operators/test_opensearch.py | 67 +++++++++++++++++++ .../amazon/aws/example_opensearch.py | 3 +- 5 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 tests/providers/amazon/aws/operators/test_opensearch.py diff --git a/airflow/providers/amazon/aws/hooks/opensearch.py b/airflow/providers/amazon/aws/hooks/opensearch.py index 8306bf0c13ef6..174904f3823d1 100644 --- a/airflow/providers/amazon/aws/hooks/opensearch.py +++ b/airflow/providers/amazon/aws/hooks/opensearch.py @@ -33,6 +33,10 @@ class OpenSearchHook(AwsBaseHook): :param: open_search_conn_id: AWS Connection to use with Open Search :param: log_query: Whether to log the query used for Open Search """ + conn_name_attr = "opensearch_conn_id" + default_conn_name = "opensearch_default" + conn_type = "opensearch" + hook_name = "AWS Open Search Hook" def __init__(self, *args: Any, open_search_conn_id: str, log_query: bool, **kwargs: Any): super().__init__(*args, **kwargs) diff --git a/airflow/providers/amazon/aws/operators/opensearch.py b/airflow/providers/amazon/aws/operators/opensearch.py index c8548cd57a81e..3bfd43450f9d6 100644 --- a/airflow/providers/amazon/aws/operators/opensearch.py +++ b/airflow/providers/amazon/aws/operators/opensearch.py @@ -42,7 +42,7 @@ class OpenSearchQueryOperator(BaseOperator): :param: query: A Dictionary Open Search DSL query. :param: search_object: A Search object from opensearch-dsl. :param: index_name: The name of the index to search for documents. - :param: aws_conn_id: aws connection to use + :param: opensearch_conn_id: opensearch connection to use :param: log_query: Whether to log the query used. Defaults to True and logs query used. """ @@ -54,21 +54,21 @@ def __init__( query: dict | None = None, search_object: Search | None = None, index_name: str | None = None, - aws_conn_id: str = "aws_default", + opensearch_conn_id: str = "opensearch_default", log_query: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.query = query self.index_name = index_name - self.aws_conn_id = aws_conn_id + self.opensearch_conn_id = opensearch_conn_id self.log_query = log_query self.search_object = search_object @cached_property def hook(self) -> OpenSearchHook: """Gets an instance of an OpenSearchHook.""" - return OpenSearchHook(open_search_conn_id=self.aws_conn_id, log_query=self.log_query) + return OpenSearchHook(open_search_conn_id=self.opensearch_conn_id, log_query=self.log_query) def execute(self, context: Context) -> Any: """Executes a search against a given index or a Search object on an AWS OpenSearch Cluster.""" @@ -106,21 +106,21 @@ class OpenSearchCreateIndexOperator(BaseOperator): :param: index_name: The name of the index to be created. :param: index_body: A dictionary that defines index settings - :param: aws_conn_id: aws connection to use + :param: opensearch_conn_id: opensearch connection to use """ def __init__( - self, *, index_name: str, index_body: dict[str, Any], aws_conn_id: str = "aws_default", **kwargs + self, *, index_name: str, index_body: dict[str, Any], opensearch_conn_id: str = "opensearch_default", **kwargs ) -> None: super().__init__(**kwargs) self.index_name = index_name self.index_body = index_body - self.aws_conn_id = aws_conn_id + self.opensearch_conn_id = opensearch_conn_id @cached_property def hook(self) -> OpenSearchHook: """Gets an instance of an OpenSearchHook.""" - return OpenSearchHook(open_search_conn_id=self.aws_conn_id, log_query=False) + return OpenSearchHook(open_search_conn_id=self.opensearch_conn_id, log_query=False) def execute(self, context: Context) -> Any: """Creates an index on an AWS Open Search cluster.""" @@ -142,7 +142,7 @@ class OpenSearchAddDocumentOperator(BaseOperator): :param: document: A dictionary representation of the document. :param: document_id: The id for the document in the index. :param: doc_class: A Document subclassed object using opensearch-dsl - :param: aws_conn_id: aws connection to use + :param: opensearch_conn_id: opensearch connection to use """ def __init__( @@ -152,7 +152,7 @@ def __init__( document: dict[str, Any] | None = None, doc_id: int | None = None, doc_class: Document | None = None, - aws_conn_id: str = "aws_default", + opensearch_conn_id: str = "opensearch_default", **kwargs, ) -> None: super().__init__(**kwargs) @@ -160,12 +160,12 @@ def __init__( self.document = document self.doc_id = doc_id self.doc_class = doc_class - self.aws_conn_id = aws_conn_id + self.opensearch_conn_id = opensearch_conn_id @cached_property def hook(self) -> OpenSearchHook: """Gets an instance of an OpenSearchHook.""" - return OpenSearchHook(open_search_conn_id=self.aws_conn_id, log_query=False) + return OpenSearchHook(open_search_conn_id=self.opensearch_conn_id, log_query=False) def execute(self, context: Context) -> Any: """Saves a document to a given index on an AWS OpenSearch cluster.""" diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index c8b7811cb8068..0561e3b525137 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -23,6 +23,14 @@ description: | suspended: false versions: + - 8.7.1 + - 8.7.0 + - 8.6.0 + - 8.5.1 + - 8.5.0 + - 8.4.0 + - 8.3.1 + - 8.3.0 - 8.2.0 - 8.1.0 - 8.0.0 @@ -563,6 +571,9 @@ triggers: - integration-name: Amazon EC2 python-modules: - airflow.providers.amazon.aws.triggers.ec2 + - integration-name: AWS Lambda + python-modules: + - airflow.providers.amazon.aws.triggers.lambda_function - integration-name: Amazon Redshift python-modules: - airflow.providers.amazon.aws.triggers.redshift_cluster diff --git a/tests/providers/amazon/aws/operators/test_opensearch.py b/tests/providers/amazon/aws/operators/test_opensearch.py new file mode 100644 index 0000000000000..ecf69a328c162 --- /dev/null +++ b/tests/providers/amazon/aws/operators/test_opensearch.py @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest import mock + +import pytest + +from airflow.models import DAG, DagRun, TaskInstance +from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook +from airflow.providers.amazon.aws.operators.opensearch import OpenSearchQueryOperator, \ + OpenSearchAddDocumentOperator, OpenSearchCreateIndexOperator +from airflow.utils import timezone +from airflow.utils.timezone import datetime + +TEST_DAG_ID = "unit_tests" +DEFAULT_DATE = datetime(2018, 1, 1) +MOCK_TEST_DATA = { + "result": "success" +} + + +class TestOpenSearchQueryOperator: + def setup_method(self): + args = { + "owner": "airflow", + "start_date": DEFAULT_DATE, + } + + self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") + + self.open_search = OpenSearchQueryOperator( + task_id="test_opensearch_query_operator", + index_name="test_index", + query={ + "size": 5, + "query": {"multi_match": {"query": "test", "fields": ["test_title^2", "test_type"]}}, + }, + ) + + def test_init(self): + assert self.open_search.task_id == "test_opensearch_query_operator" + assert self.open_search.opensearch_conn_id == "opensearch_default" + assert self.open_search.query["size"] == 5 + + @mock.patch.object(OpenSearchHook, "search", return_value=MOCK_TEST_DATA) + @mock.patch.object(OpenSearchHook, "get_conn") + def test_search_query(self, mock_search): + self.open_search.execute({}) + mock_search.assert_called_once_with( + {"size": 5, "query": {"multi_match": {"query": "test", "fields": ["test_title^2", "test_type"]}}}, + "test_index" + ) diff --git a/tests/system/providers/amazon/aws/example_opensearch.py b/tests/system/providers/amazon/aws/example_opensearch.py index 3dfc92e53361c..9c1af7769caf6 100644 --- a/tests/system/providers/amazon/aws/example_opensearch.py +++ b/tests/system/providers/amazon/aws/example_opensearch.py @@ -40,6 +40,7 @@ class TestDoc(Document): + __test__ = False title = Text(fields={"raw": Keyword()}) media_type = Text() @@ -49,7 +50,6 @@ class Index: def save(self, **kwargs): return super().save(**kwargs) - @task def create_open_search_cluster(): env_id = test_context[ENV_ID_KEY] @@ -80,6 +80,7 @@ def delete_os_cluster(env_id: str): ) as dag: test_context = sys_test_context_task() create_cluster = create_open_search_cluster() + # [START howto_operator_opensearch_index] create_index = OpenSearchCreateIndexOperator( task_id="create_index_example", From e595762a7534d1b41022392159f892939bd9d996 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sat, 30 Sep 2023 09:18:40 -0700 Subject: [PATCH 05/30] Create opensearch provider and hook as base for amazon open search hook --- .../providers/amazon/aws/hooks/opensearch.py | 74 +++------- airflow/providers/amazon/provider.yaml | 3 +- airflow/providers/opensearch/CHANGELOG.rst | 27 ++++ airflow/providers/opensearch/__init__.py | 0 .../providers/opensearch/hooks/__init__.py | 0 .../providers/opensearch/hooks/opensearch.py | 127 ++++++++++++++++++ airflow/providers/opensearch/provider.yaml | 46 +++++++ .../amazon/aws/hooks/test_opensearch.py | 29 +++- .../amazon/aws/operators/test_opensearch.py | 35 ++++- 9 files changed, 272 insertions(+), 69 deletions(-) create mode 100644 airflow/providers/opensearch/CHANGELOG.rst create mode 100644 airflow/providers/opensearch/__init__.py create mode 100644 airflow/providers/opensearch/hooks/__init__.py create mode 100644 airflow/providers/opensearch/hooks/opensearch.py create mode 100644 airflow/providers/opensearch/provider.yaml diff --git a/airflow/providers/amazon/aws/hooks/opensearch.py b/airflow/providers/amazon/aws/hooks/opensearch.py index 174904f3823d1..3315e3823fb83 100644 --- a/airflow/providers/amazon/aws/hooks/opensearch.py +++ b/airflow/providers/amazon/aws/hooks/opensearch.py @@ -23,41 +23,28 @@ from opensearchpy import AWSV4SignerAuth, OpenSearch, RequestsHttpConnection from airflow.exceptions import AirflowException +from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook -class OpenSearchHook(AwsBaseHook): +class AwsOpenSearchHook(OpenSearchHook, AwsBaseHook): """ This Hook provides a thin wrapper around the OpenSearch client. :param: open_search_conn_id: AWS Connection to use with Open Search :param: log_query: Whether to log the query used for Open Search """ - conn_name_attr = "opensearch_conn_id" - default_conn_name = "opensearch_default" + conn_name_attr = "aws_opensearch_conn_id" + default_conn_name = "aws_opensearch_default" conn_type = "opensearch" hook_name = "AWS Open Search Hook" def __init__(self, *args: Any, open_search_conn_id: str, log_query: bool, **kwargs: Any): super().__init__(*args, **kwargs) - self.conn_id = open_search_conn_id - self.log_query = log_query + self.region = self.conn.extra_dejson.get("region_name", self.region_name) - conn = self.get_connection(self.conn_id) - self.use_ssl = conn.extra_dejson.get("use_ssl", False) - self.verify_certs = conn.extra_dejson.get("verify_certs", False) - - self.__SERVICE = "es" - self._credentials = self.get_credentials(self.region_name) - self._auth = AWSV4SignerAuth(self._credentials, self.region_name, self.__SERVICE) - - self.client = OpenSearch( - hosts=[{"host": conn.host, "port": conn.port}], - http_auth=self._auth, - use_ssl=self.use_ssl, - verify_certs=self.verify_certs, - connection_class=RequestsHttpConnection, - ) + self._credentials = self.get_credentials(self.region) + self._auth = AWSV4SignerAuth(self._credentials, self.region, self.__SERVICE) def get_client(self) -> OpenSearch: """ @@ -66,47 +53,15 @@ def get_client(self) -> OpenSearch: OpenSearch client which allows using Python objects to perform searches. """ + self.client = OpenSearch( + hosts=[{"host": self.conn.host, "port": self.conn.port}], + http_auth=self._auth, + use_ssl=self.use_ssl, + verify_certs=self.verify_certs, + connection_class=RequestsHttpConnection, + ) return self.client - def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: - """ - Runs a search query against the connected OpenSearch cluster. - - :param: query: The query for the search against OpenSearch. - :param: index_name: The name of the index to search against - """ - if self.log_query: - self.log.info("Searching %s with Query: %s", index_name, query) - return self.client.search(body=query, index=index_name, **kwargs) - - def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: - """ - Index a document on open search. - - :param: document: A dictionary representation of the document - :param: index_name: the name of the index that this document will be associated with - :param: doc_id: the numerical identifier that will be used to identify the document on the index. - """ - return self.client.index(index=index_name, id=doc_id, body=document, **kwargs) - - def delete(self, index_name: str, query: dict | None = None, doc_id: int | None = None): - """ - Delete from an index by either a query or by the document id. - - :param: index_name: the name of the index to delete from - :param: query: If deleting by query a dict representation of the query to run to - identify documents to delete. - :param: doc_id: The identifier of the document to delete. - """ - if query is not None: - if self.log_query: - self.log.info("Deleting from %s using Query: %s", index_name, query) - return self.client.delete_by_query(index=index_name, body=query) - elif doc_id is not None: - return self.client.delete(index=index_name, id=doc_id) - else: - AirflowException("To delete a document you must include one of either a query or a document id. ") - @staticmethod def get_ui_field_behaviour() -> dict[str, Any]: """Returns custom UI field behaviour for Amazon Open Search Connection.""" @@ -123,6 +78,7 @@ def get_ui_field_behaviour() -> dict[str, Any]: { "use_ssl": True, "verify_certs": True, + "region_name": "us-east-1" }, indent=2, ), diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index 0561e3b525137..767169f51ea4d 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -70,6 +70,7 @@ dependencies: - apache-airflow>=2.4.0 - apache-airflow-providers-common-sql>=1.3.1 - apache-airflow-providers-http + - apache-airflow-providers-opensearch # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number # of candidates to consider. We should also make sure that all the below related packages have also the # same minimum version specified. Boto3 1.28.0 has been released on July 6, 2023. We should also make @@ -94,8 +95,6 @@ dependencies: - mypy-boto3-appflow>=1.24.0 - asgiref - mypy-boto3-s3>=1.24.0 - - opensearch-py>=2.2.0 - - opensearch-dsl>=2.1.0 integrations: diff --git a/airflow/providers/opensearch/CHANGELOG.rst b/airflow/providers/opensearch/CHANGELOG.rst new file mode 100644 index 0000000000000..4cb4526149fa0 --- /dev/null +++ b/airflow/providers/opensearch/CHANGELOG.rst @@ -0,0 +1,27 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there are some breaking changes + and you want to add an explanation to the users on how they are supposed to deal with them. + The changelog is updated and maintained semi-automatically by release manager. + +``apache-airflow-providers-opensearch`` + +Changelog +--------- diff --git a/airflow/providers/opensearch/__init__.py b/airflow/providers/opensearch/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airflow/providers/opensearch/hooks/__init__.py b/airflow/providers/opensearch/hooks/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py new file mode 100644 index 0000000000000..db6435c791483 --- /dev/null +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -0,0 +1,127 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from functools import cached_property +from typing import Any + +from opensearchpy import OpenSearch, RequestsHttpConnection + +from airflow.exceptions import AirflowException +from airflow.hooks.base import BaseHook + + +class OpenSearchHook(BaseHook): + """ + This Hook provides a thin wrapper around the OpenSearch client. + + :param: open_search_conn_id: Connection to use with Open Search + :param: log_query: Whether to log the query used for Open Search + """ + conn_name_attr = "opensearch_conn_id" + default_conn_name = "opensearch_default" + conn_type = "opensearch" + hook_name = "OpenSearch Hook" + + def __init__(self, *args: Any, open_search_conn_id: str, log_query: bool, **kwargs: Any): + super().__init__(*args, **kwargs) + self.client = None + self.conn_id = open_search_conn_id + self.log_query = log_query + + self.conn = self.get_connection(self.conn_id) + self.use_ssl = self.conn.extra_dejson.get("use_ssl", False) + self.verify_certs = self.conn.extra_dejson.get("verify_certs", False) + self.__SERVICE = "es" + + @cached_property + def get_client(self) -> OpenSearch: + """ + + This function is intended for Operators that will take in arguments and use the high level + OpenSearch client which allows using Python objects to perform searches. + + """ + auth = (self.conn.login, self.conn.password) + self.client = OpenSearch( + hosts=[{"host": self.conn.host, "port": self.conn.port}], + http_auth=auth, + use_ssl=self.use_ssl, + verify_certs=self.verify_certs, + connection_class=RequestsHttpConnection, + ) + return self.client + + def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: + """ + Runs a search query against the connected OpenSearch cluster. + + :param: query: The query for the search against OpenSearch. + :param: index_name: The name of the index to search against + """ + if self.log_query: + self.log.info("Searching %s with Query: %s", index_name, query) + return self.client.search(body=query, index=index_name, **kwargs) + + def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: + """ + Index a document on open search. + + :param: document: A dictionary representation of the document + :param: index_name: the name of the index that this document will be associated with + :param: doc_id: the numerical identifier that will be used to identify the document on the index. + """ + return self.client.index(index=index_name, id=doc_id, body=document, **kwargs) + + def delete(self, index_name: str, query: dict | None = None, doc_id: int | None = None): + """ + Delete from an index by either a query or by the document id. + + :param: index_name: the name of the index to delete from + :param: query: If deleting by query a dict representation of the query to run to + identify documents to delete. + :param: doc_id: The identifier of the document to delete. + """ + if query is not None: + if self.log_query: + self.log.info("Deleting from %s using Query: %s", index_name, query) + return self.client.delete_by_query(index=index_name, body=query) + elif doc_id is not None: + return self.client.delete(index=index_name, id=doc_id) + else: + AirflowException("To delete a document you must include one of either a query or a document id. ") + + @staticmethod + def get_ui_field_behaviour() -> dict[str, Any]: + """Returns custom UI field behaviour for Open Search Connection.""" + return { + "hidden_fields": ["schema"], + "relabeling": { + "extra": "Open Search Configuration", + }, + "placeholders": { + "extra": json.dumps( + { + "use_ssl": True, + "verify_certs": True + }, + indent=2, + ), + }, + } diff --git a/airflow/providers/opensearch/provider.yaml b/airflow/providers/opensearch/provider.yaml new file mode 100644 index 0000000000000..a8e8e958de5be --- /dev/null +++ b/airflow/providers/opensearch/provider.yaml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-opensearch +name: Opensearch +description: | + `Opensearch `__ + +suspended: false +versions: + - 1.0.0 + +dependencies: + - apache-airflow>=2.4.3 + - opensearch-py>=2.2.0 + - opensearch-dsl>=2.1.0 + +integrations: + - integration-name: Opensearch + external-doc-url: https://opensearch.org/ + logo: /integration-logos/opensearch/opensearch.png + tags: [software] + +hooks: + - integration-name: Opensearch + python-modules: + - airflow.providers.opensearch.hooks.opensearch + +connection-types: + - hook-class-name: airflow.providers.opensearch.hooks.opensearch.OpenSearchHook + connection-type: opensearch diff --git a/tests/providers/amazon/aws/hooks/test_opensearch.py b/tests/providers/amazon/aws/hooks/test_opensearch.py index 0ae5562e22699..46d928a72c925 100644 --- a/tests/providers/amazon/aws/hooks/test_opensearch.py +++ b/tests/providers/amazon/aws/hooks/test_opensearch.py @@ -17,6 +17,8 @@ from __future__ import annotations import pytest +from moto import mock_opensearch +import boto3 from airflow.models import Connection from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook @@ -24,26 +26,43 @@ class TestOpenSearchHook: + @mock_opensearch + def create_domain(self): + client = boto3.client("opensearch") + response = client.create_domain(DomainName=f"test-opensearch-cluster", + EngineVersion="2.7", + ClusterConfig={ + "InstanceType": "t3.small.search", + "InstanceCount": 1, + "DedicatedMasterEnabled": False, + "ZoneAwarenessEnabled": False, + },) + return response["endpoint"] + def setup_method(self): db.merge_conn( Connection( - conn_id="open_search_conn", - conn_type="open_search", - host="myhost.opensearch.com", + conn_id="opensearch_default", + conn_type="opensearch", + host=self.create_domain(), login="MyAWSSecretID", password="MyAccessKey", + extra={ + "region_name": "us-east-1" + } ) ) @pytest.fixture() def mock_search(self, monkeypatch): - def mock_return(): + def mock_return(index_name: str): return {"status": "test"} monkeypatch.setattr(OpenSearchHook, "search", mock_return) def test_hook_search(self, mock_search): - hook = OpenSearchHook(open_search_conn_id="open_search_conn", log_query=True) + hook = OpenSearchHook(open_search_conn_id="opensearch_default", + log_query=True) result = hook.search( index_name="testIndex", diff --git a/tests/providers/amazon/aws/operators/test_opensearch.py b/tests/providers/amazon/aws/operators/test_opensearch.py index ecf69a328c162..c27747be65ec9 100644 --- a/tests/providers/amazon/aws/operators/test_opensearch.py +++ b/tests/providers/amazon/aws/operators/test_opensearch.py @@ -19,14 +19,17 @@ from unittest import mock import pytest +import boto3 +from moto import mock_opensearch -from airflow.models import DAG, DagRun, TaskInstance +from airflow.models import DAG, DagRun, TaskInstance, Connection from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook from airflow.providers.amazon.aws.operators.opensearch import OpenSearchQueryOperator, \ OpenSearchAddDocumentOperator, OpenSearchCreateIndexOperator -from airflow.utils import timezone +from airflow.utils import timezone, db from airflow.utils.timezone import datetime + TEST_DAG_ID = "unit_tests" DEFAULT_DATE = datetime(2018, 1, 1) MOCK_TEST_DATA = { @@ -35,7 +38,33 @@ class TestOpenSearchQueryOperator: + + @mock_opensearch + def create_domain(self): + client = boto3.client("opensearch") + response = client.create_domain(DomainName=f"test-opensearch-cluster", + EngineVersion="2.7", + ClusterConfig={ + "InstanceType": "t3.small.search", + "InstanceCount": 1, + "DedicatedMasterEnabled": False, + "ZoneAwarenessEnabled": False, + }, ) + return response["endpoint"] + def setup_method(self): + db.merge_conn( + Connection( + conn_id="opensearch_default", + conn_type="open_search", + host=self.create_domain(), + login="MyAWSSecretID", + password="MyAccessKey", + extra={ + "region_name": "us-east-1" + } + ) + ) args = { "owner": "airflow", "start_date": DEFAULT_DATE, @@ -56,9 +85,9 @@ def test_init(self): assert self.open_search.task_id == "test_opensearch_query_operator" assert self.open_search.opensearch_conn_id == "opensearch_default" assert self.open_search.query["size"] == 5 + assert self.open_search.hook.region == "us-east-1" @mock.patch.object(OpenSearchHook, "search", return_value=MOCK_TEST_DATA) - @mock.patch.object(OpenSearchHook, "get_conn") def test_search_query(self, mock_search): self.open_search.execute({}) mock_search.assert_called_once_with( From 1abf3a5ef31776d58f7ff72f38afb338245ce469 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sun, 1 Oct 2023 21:37:09 -0700 Subject: [PATCH 06/30] Create OpenSearch provider to enable AWS OpenSearch functionality see PR #34693 --- CONTRIBUTING.rst | 8 +- INSTALL | 8 +- .../providers/opensearch/hooks/opensearch.py | 25 ++-- .../opensearch/operators/__init__.py | 16 +++ .../operators/opensearch.py | 29 ++-- airflow/providers/opensearch/provider.yaml | 8 +- .../commits.rst | 30 ++++ .../index.rst | 104 ++++++++++++++ .../installing-providers-from-sources.rst | 18 +++ .../operators/opensearch.rst | 72 ++++++++++ generated/provider_dependencies.json | 11 +- tests/providers/opensearch/__init__.py | 16 +++ tests/providers/opensearch/conftest.py | 60 ++++++++ tests/providers/opensearch/hooks/__init__.py | 16 +++ .../opensearch/hooks/test_opensearch.py | 37 +++++ .../opensearch/operators/__init__.py | 16 +++ .../opensearch/operators/test_opensearch.py | 126 +++++++++++++++++ tests/system/providers/opensearch/__init__.py | 16 +++ .../opensearch/example_opensearch.py | 131 ++++++++++++++++++ 19 files changed, 709 insertions(+), 38 deletions(-) create mode 100644 airflow/providers/opensearch/operators/__init__.py rename airflow/providers/{amazon/aws => opensearch}/operators/opensearch.py (88%) create mode 100644 docs/apache-airflow-providers-opensearch/commits.rst create mode 100644 docs/apache-airflow-providers-opensearch/index.rst create mode 100644 docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst create mode 100644 docs/apache-airflow-providers-opensearch/operators/opensearch.rst create mode 100644 tests/providers/opensearch/__init__.py create mode 100644 tests/providers/opensearch/conftest.py create mode 100644 tests/providers/opensearch/hooks/__init__.py create mode 100644 tests/providers/opensearch/hooks/test_opensearch.py create mode 100644 tests/providers/opensearch/operators/__init__.py create mode 100644 tests/providers/opensearch/operators/test_opensearch.py create mode 100644 tests/system/providers/opensearch/__init__.py create mode 100644 tests/system/providers/opensearch/example_opensearch.py diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b9858abfa99ed..aab84e7083ffd 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -676,10 +676,10 @@ deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, -mysql, neo4j, odbc, openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, -password, pinot, plexus, postgres, presto, rabbitmq, redis, s3, salesforce, samba, segment, -sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, -tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk +mysql, neo4j, odbc, openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, +papermill, password, pinot, plexus, postgres, presto, rabbitmq, redis, s3, salesforce, samba, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk .. END EXTRAS HERE Provider packages diff --git a/INSTALL b/INSTALL index 26351cf4254e6..027d4a621c000 100644 --- a/INSTALL +++ b/INSTALL @@ -103,10 +103,10 @@ deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, -mysql, neo4j, odbc, openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, -password, pinot, plexus, postgres, presto, rabbitmq, redis, s3, salesforce, samba, segment, -sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, -tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk +mysql, neo4j, odbc, openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, +papermill, password, pinot, plexus, postgres, presto, rabbitmq, redis, s3, salesforce, samba, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, webhdfs, winrm, yandex, zendesk # END EXTRAS HERE # For installing Airflow in development environments - see CONTRIBUTING.rst diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py index db6435c791483..2e8b9237b332a 100644 --- a/airflow/providers/opensearch/hooks/opensearch.py +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -34,14 +34,14 @@ class OpenSearchHook(BaseHook): :param: open_search_conn_id: Connection to use with Open Search :param: log_query: Whether to log the query used for Open Search """ + conn_name_attr = "opensearch_conn_id" default_conn_name = "opensearch_default" conn_type = "opensearch" hook_name = "OpenSearch Hook" - def __init__(self, *args: Any, open_search_conn_id: str, log_query: bool, **kwargs: Any): - super().__init__(*args, **kwargs) - self.client = None + def __init__(self, open_search_conn_id: str, log_query: bool, **kwargs: Any): + super().__init__(**kwargs) self.conn_id = open_search_conn_id self.log_query = log_query @@ -59,14 +59,14 @@ def get_client(self) -> OpenSearch: """ auth = (self.conn.login, self.conn.password) - self.client = OpenSearch( + client = OpenSearch( hosts=[{"host": self.conn.host, "port": self.conn.port}], http_auth=auth, use_ssl=self.use_ssl, verify_certs=self.verify_certs, connection_class=RequestsHttpConnection, ) - return self.client + return client def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: """ @@ -77,7 +77,7 @@ def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: """ if self.log_query: self.log.info("Searching %s with Query: %s", index_name, query) - return self.client.search(body=query, index=index_name, **kwargs) + return self.get_client.search(body=query, index=index_name, **kwargs) def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: """ @@ -87,9 +87,9 @@ def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> :param: index_name: the name of the index that this document will be associated with :param: doc_id: the numerical identifier that will be used to identify the document on the index. """ - return self.client.index(index=index_name, id=doc_id, body=document, **kwargs) + return self.get_client.index(index=index_name, id=doc_id, body=document, **kwargs) - def delete(self, index_name: str, query: dict | None = None, doc_id: int | None = None): + def delete(self, index_name: str, query: dict | None = None, doc_id: int | None = None) -> Any: """ Delete from an index by either a query or by the document id. @@ -101,9 +101,9 @@ def delete(self, index_name: str, query: dict | None = None, doc_id: int | None if query is not None: if self.log_query: self.log.info("Deleting from %s using Query: %s", index_name, query) - return self.client.delete_by_query(index=index_name, body=query) + return self.get_client.delete_by_query(index=index_name, body=query) elif doc_id is not None: - return self.client.delete(index=index_name, id=doc_id) + return self.get_client.delete(index=index_name, id=doc_id) else: AirflowException("To delete a document you must include one of either a query or a document id. ") @@ -117,10 +117,7 @@ def get_ui_field_behaviour() -> dict[str, Any]: }, "placeholders": { "extra": json.dumps( - { - "use_ssl": True, - "verify_certs": True - }, + {"use_ssl": True, "verify_certs": True}, indent=2, ), }, diff --git a/airflow/providers/opensearch/operators/__init__.py b/airflow/providers/opensearch/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/opensearch/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/amazon/aws/operators/opensearch.py b/airflow/providers/opensearch/operators/opensearch.py similarity index 88% rename from airflow/providers/amazon/aws/operators/opensearch.py rename to airflow/providers/opensearch/operators/opensearch.py index 3bfd43450f9d6..c17b35995cb15 100644 --- a/airflow/providers/amazon/aws/operators/opensearch.py +++ b/airflow/providers/opensearch/operators/opensearch.py @@ -22,18 +22,18 @@ from airflow.exceptions import AirflowException from airflow.models import BaseOperator -from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook +from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook if TYPE_CHECKING: - from opensearch_dsl.document import Document from opensearch_dsl.search import Search + from opensearchpy import Document from airflow.utils.context import Context class OpenSearchQueryOperator(BaseOperator): """ - Runs a query search against a given index on an AWS OpenSearch cluster and returns results. + Runs a query search against a given index on an OpenSearch cluster and returns results. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -71,7 +71,7 @@ def hook(self) -> OpenSearchHook: return OpenSearchHook(open_search_conn_id=self.opensearch_conn_id, log_query=self.log_query) def execute(self, context: Context) -> Any: - """Executes a search against a given index or a Search object on an AWS OpenSearch Cluster.""" + """Executes a search against a given index or a Search object on an OpenSearch Cluster.""" result = None if self.query is not None: @@ -85,7 +85,7 @@ def execute(self, context: Context) -> Any: raise AirflowException(e) elif self.search_object is not None: try: - result = self.search_object.using(self.hook.get_client()).execute() + result = self.search_object.using(self.hook.get_client).execute() except Exception as e: raise AirflowException(e) else: @@ -98,7 +98,7 @@ def execute(self, context: Context) -> Any: class OpenSearchCreateIndexOperator(BaseOperator): """ - Creates a new index on an AWS Open Search cluster with a given index name. + Creates a new index on an Open Search cluster with a given index name. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -110,7 +110,12 @@ class OpenSearchCreateIndexOperator(BaseOperator): """ def __init__( - self, *, index_name: str, index_body: dict[str, Any], opensearch_conn_id: str = "opensearch_default", **kwargs + self, + *, + index_name: str, + index_body: dict[str, Any], + opensearch_conn_id: str = "opensearch_default", + **kwargs, ) -> None: super().__init__(**kwargs) self.index_name = index_name @@ -123,16 +128,16 @@ def hook(self) -> OpenSearchHook: return OpenSearchHook(open_search_conn_id=self.opensearch_conn_id, log_query=False) def execute(self, context: Context) -> Any: - """Creates an index on an AWS Open Search cluster.""" + """Creates an index on an Open Search cluster.""" try: - self.hook.get_client().indices.create(index=self.index_name, body=self.index_body) + self.hook.get_client.indices.create(index=self.index_name, body=self.index_body) except Exception as e: raise AirflowException(e) class OpenSearchAddDocumentOperator(BaseOperator): """ - Runs a query search against a given index on an AWS OpenSearch cluster and returns results. + Runs a query search against a given index on an OpenSearch cluster and returns results. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -168,10 +173,10 @@ def hook(self) -> OpenSearchHook: return OpenSearchHook(open_search_conn_id=self.opensearch_conn_id, log_query=False) def execute(self, context: Context) -> Any: - """Saves a document to a given index on an AWS OpenSearch cluster.""" + """Saves a document to a given index on an OpenSearch cluster.""" if self.doc_class is not None: try: - result = self.doc_class.save(using=self.hook.get_client()) + result = self.doc_class.save(using=self.hook.get_client) except Exception as e: raise AirflowException(e) elif self.index_name is not None and self.document is not None and self.doc_id is not None: diff --git a/airflow/providers/opensearch/provider.yaml b/airflow/providers/opensearch/provider.yaml index a8e8e958de5be..1a864720cd8a9 100644 --- a/airflow/providers/opensearch/provider.yaml +++ b/airflow/providers/opensearch/provider.yaml @@ -26,9 +26,8 @@ versions: - 1.0.0 dependencies: - - apache-airflow>=2.4.3 + - apache-airflow>=2.5.0 - opensearch-py>=2.2.0 - - opensearch-dsl>=2.1.0 integrations: - integration-name: Opensearch @@ -41,6 +40,11 @@ hooks: python-modules: - airflow.providers.opensearch.hooks.opensearch +operators: + - integration-name: Opensearch + python-modules: + - airflow.providers.opensearch.operators.opensearch + connection-types: - hook-class-name: airflow.providers.opensearch.hooks.opensearch.OpenSearchHook connection-type: opensearch diff --git a/docs/apache-airflow-providers-opensearch/commits.rst b/docs/apache-airflow-providers-opensearch/commits.rst new file mode 100644 index 0000000000000..d36d55467eec3 --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/commits.rst @@ -0,0 +1,30 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Package apache-airflow-providers-opensearch +------------------------------------------------------ + +`OpenSearch `__ + + +This is detailed commit list of changes for versions provider package: ``opensearch``. +For high-level changelog, see :doc:`package information including changelog `. + +1.0.0 +..... diff --git a/docs/apache-airflow-providers-opensearch/index.rst b/docs/apache-airflow-providers-opensearch/index.rst new file mode 100644 index 0000000000000..c89306086b5cf --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/index.rst @@ -0,0 +1,104 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-opensearch`` +======================================= + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Basics + + Home + Changelog + Security + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Guides + + Connection types + Operators + Logging for Tasks + Configuration + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: References + + Python API <_api/airflow/providers/opensearch/index> + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: System tests + + System Tests <_api/tests/system/providers/opensearch/index> + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Resources + + Example DAGs + PyPI Repository + Installing from sources + +.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +Package apache-airflow-providers-opensearch +------------------------------------------------------ +`OpenSearch `__ + +Release: 1.0.0 + +Provider package +---------------- + +This is a provider package for ``opensearch`` provider. All classes for this provider package +are in ``airflow.providers.opensearch`` python package. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below) +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-opensearch`` + +Requirements +------------ + +The minimum Apache Airflow version supported by this provider package is ``2.5.0``. + +=================== ================== +PIP package Version required +=================== ================== +``apache-airflow`` ``>=2.5.0`` +``opensearchpy`` ``>=2.2.0`` +=================== ================== diff --git a/docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst b/docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..b4e730f4ff21a --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../exts/includes/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst b/docs/apache-airflow-providers-opensearch/operators/opensearch.rst new file mode 100644 index 0000000000000..09e6dc5825d9f --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/operators/opensearch.rst @@ -0,0 +1,72 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +============= +OpenSearch +============= + +`OpenSearch `__ + + +Operators +--------- + +.. _howto/operator:OpenSearchCreateIndexOperator: + +Create an Index in Open Search +============================== + +Use the :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchCreateIndexOperator` +to create a new index in an Open Search domain. + + + +.. exampleinclude:: /../../tests/system/providers/opensearch/example_opensearch.py + :language: python + :start-after: [START howto_operator_opensearch_create_index] + :dedent: 4 + :end-before: [END howto_operator_opensearch_create_index] + + +.. _howto/operator:OpenSearchAddDocumentOperator + +Add a Document to an Index on OpenSearch +========================================= + +Use the :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchAddDocumentOperator` +to add single documents to an Open Search Index + +.. exampleinclude:: /../../tests/system/providers/opensearch/example_opensearch.py + :language: python + :start-after: [START howto_operator_opensearch_add_document] + :dedent: 4 + :end-before: [END howto_operator_opensearch_add_document] + + +.. _howto/operator:OpenSearchQueryOperator + +Run a query against an Open Search Index +========================================= + +Use the :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchQueryOperator` +to run a query against an Open Search index. + +.. exampleinclude:: /../../tests/system/providers/opensearch/example_opensearch.py + :language: python + :start-after: [START howto_operator_opensearch_query] + :dedent: 4 + :end-before: [END howto_operator_opensearch_query] diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 128ccba43eadf..56f9bcb507115 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -23,6 +23,7 @@ "deps": [ "apache-airflow-providers-common-sql>=1.3.1", "apache-airflow-providers-http", + "apache-airflow-providers-opensearch", "apache-airflow>=2.4.0", "asgiref", "boto3>=1.28.0", @@ -32,8 +33,6 @@ "mypy-boto3-rds>=1.24.0", "mypy-boto3-redshift-data>=1.24.0", "mypy-boto3-s3>=1.24.0", - "opensearch-dsl>=2.1.0", - "opensearch-py>=2.2.0", "redshift_connector>=2.0.888", "sqlalchemy_redshift>=0.8.6", "watchtower~=2.0.1" @@ -673,6 +672,14 @@ ], "excluded-python-versions": [] }, + "opensearch": { + "deps": [ + "apache-airflow>=2.5.0", + "opensearch-py>=2.2.0" + ], + "cross-providers-deps": [], + "excluded-python-versions": [] + }, "opsgenie": { "deps": [ "apache-airflow>=2.4.0", diff --git a/tests/providers/opensearch/__init__.py b/tests/providers/opensearch/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/opensearch/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/opensearch/conftest.py b/tests/providers/opensearch/conftest.py new file mode 100644 index 0000000000000..e95af6687a422 --- /dev/null +++ b/tests/providers/opensearch/conftest.py @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +import pytest + +from airflow.models import Connection +from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook +from airflow.utils import db + +MOCK_RETURN = {"status": "test"} + + +class MockSearch(OpenSearchHook): + # Mock class to override the Hook for monkeypatching + def get_client(self) -> None: + return None + + def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: + return MOCK_RETURN + + def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: + return doc_id + + +@pytest.fixture +def mock_hook(monkeypatch): + monkeypatch.setattr(OpenSearchHook, "search", MockSearch.search) + monkeypatch.setattr(OpenSearchHook, "get_client", MockSearch.get_client) + monkeypatch.setattr(OpenSearchHook, "index", MockSearch.index) + + +@pytest.fixture(autouse=True) +def setup_connection(): + # We need to set up a Connection into the database for all tests. + db.merge_conn( + Connection( + conn_id="opensearch_default", + conn_type="opensearch", + host="myopensearch.com", + login="test_usser", + password="test", + ) + ) diff --git a/tests/providers/opensearch/hooks/__init__.py b/tests/providers/opensearch/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/opensearch/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/opensearch/hooks/test_opensearch.py b/tests/providers/opensearch/hooks/test_opensearch.py new file mode 100644 index 0000000000000..9470c54639649 --- /dev/null +++ b/tests/providers/opensearch/hooks/test_opensearch.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook + +MOCK_SEARCH_RETURN = {"status": "test"} + + +class TestOpenSearchHook: + def test_hook_search(self, mock_hook): + result = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True).search( + index_name="testIndex", + query={"size": 1, "query": {"multi_match": {"query": "test", "fields": ["testField"]}}}, + ) + + assert result == MOCK_SEARCH_RETURN + + def test_hook_index(self, mock_hook): + result = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True).index( + index_name="test_index", document={"title": "Monty Python"}, doc_id=3 + ) + assert result == 3 diff --git a/tests/providers/opensearch/operators/__init__.py b/tests/providers/opensearch/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/opensearch/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/opensearch/operators/test_opensearch.py b/tests/providers/opensearch/operators/test_opensearch.py new file mode 100644 index 0000000000000..1eb0739a104e4 --- /dev/null +++ b/tests/providers/opensearch/operators/test_opensearch.py @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from opensearchpy import Document, Keyword, Text + +from airflow.models import DAG +from airflow.providers.opensearch.operators.opensearch import ( + OpenSearchAddDocumentOperator, + OpenSearchCreateIndexOperator, + OpenSearchQueryOperator, +) +from airflow.utils.timezone import datetime + +TEST_DAG_ID = "unit_tests" +DEFAULT_DATE = datetime(2018, 1, 1) +MOCK_SEARCH_RETURN = {"status": "test"} + + +class FakeDocument(Document): + title = Text(fields={"raw": Keyword()}) + author = Text() + published = Text() + + def save(self, **kwargs): + return super().save(**kwargs) + + +class TestOpenSearchQueryOperator: + def setup_method(self): + args = { + "owner": "airflow", + "start_date": DEFAULT_DATE, + } + + self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") + + self.open_search = OpenSearchQueryOperator( + task_id="test_opensearch_query_operator", + index_name="test_index", + query={ + "size": 5, + "query": {"multi_match": {"query": "test", "fields": ["test_title^2", "test_type"]}}, + }, + ) + + def test_init(self): + assert self.open_search.task_id == "test_opensearch_query_operator" + assert self.open_search.opensearch_conn_id == "opensearch_default" + assert self.open_search.query["size"] == 5 + + def test_search_query(self, mock_hook): + result = self.open_search.execute({}) + assert result == MOCK_SEARCH_RETURN + + +class TestOpenSearchCreateIndexOperator: + # This test does not test execute logic because there is only a redirect to the OpenSearch + # client. + def setup_method(self): + args = { + "owner": "airflow", + "start_date": DEFAULT_DATE, + } + + self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") + + self.open_search = OpenSearchCreateIndexOperator( + task_id="test_opensearch_query_operator", index_name="test_index", index_body={"test": 1} + ) + + def test_init(self): + assert self.open_search.task_id == "test_opensearch_query_operator" + assert self.open_search.opensearch_conn_id == "opensearch_default" + assert self.open_search.index_name == "test_index" + + +class TestOpenSearchAddDocumentOperator: + def setup_method(self): + args = { + "owner": "airflow", + "start_date": DEFAULT_DATE, + } + + self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") + + self.open_search = OpenSearchAddDocumentOperator( + task_id="test_opensearch_doc_operator", + index_name="test_index", + document={"title": "Monty Python"}, + doc_id=1, + ) + + self.open_search_with_doc_class = OpenSearchAddDocumentOperator( + task_id="test_opensearch_doc_class_operator", + doc_class=FakeDocument(meta={"id": 2}, title="Hamlet", author="Shakespeare", published="1299"), + ) + + def test_init_with_args(self): + assert self.open_search.task_id == "test_opensearch_doc_operator" + assert self.open_search.opensearch_conn_id == "opensearch_default" + assert self.open_search.index_name == "test_index" + + def test_init_with_class(self): + # This operator uses the OpenSearch client method directly, testing here just + # confirming that the object is an instance of the class. + assert isinstance(self.open_search_with_doc_class.doc_class, FakeDocument) + assert self.open_search_with_doc_class.task_id == "test_opensearch_doc_class_operator" + + def test_add_document_using_args(self, mock_hook): + result = self.open_search.execute({}) + assert result == 1 diff --git a/tests/system/providers/opensearch/__init__.py b/tests/system/providers/opensearch/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/system/providers/opensearch/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/system/providers/opensearch/example_opensearch.py b/tests/system/providers/opensearch/example_opensearch.py new file mode 100644 index 0000000000000..23e6d8a7398eb --- /dev/null +++ b/tests/system/providers/opensearch/example_opensearch.py @@ -0,0 +1,131 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from datetime import datetime, timedelta + +from opensearchpy import Integer, Search, Text +from opensearchpy.helpers.document import Document + +from airflow.models.baseoperator import chain +from airflow.models.dag import DAG +from airflow.providers.opensearch.operators.opensearch import ( + OpenSearchAddDocumentOperator, + OpenSearchCreateIndexOperator, + OpenSearchQueryOperator, +) + +DAG_ID = "example_opensearch" +INDEX_NAME = "example_index" + +default_args = { + "owner": "airflow", + "depend_on_past": False, + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), +} + + +class LogDocument(Document): + log_group_id = Integer() + logger = Text() + message = Text() + + class Index: + name = INDEX_NAME + + def save(self, **kwargs): + super().save(**kwargs) + + +def load_connections(): + # Connections needed for this example dag to finish + from airflow.models import Connection + from airflow.utils import db + + db.merge_conn( + Connection( + conn_id="opensearch_test", conn_type="opensearch", host="127.0.0.1", login="test", password="test" + ) + ) + + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule="@once", + catchup=False, + tags=["example"], + default_args=default_args, + description="Examples of OpenSearch Operators", +) as dag: + + # [START howto_operator_opensearch_create_index] + create_index = OpenSearchCreateIndexOperator( + task_id="create_index", + index_name=INDEX_NAME, + index_body={"settings": {"index": {"number_of_shards": 1}}}, + ) + # [END howto_operator_opensearch_create_index] + + # [START howto_operator_opensearch_add_document] + add_document_by_args = OpenSearchAddDocumentOperator( + task_id="add_document_with_args", + index_name=INDEX_NAME, + doc_id=1, + document={"log_group_id": 1, "logger": "python", "message": "hello world"}, + ) + + add_document_by_class = OpenSearchAddDocumentOperator( + task_id="add_document_by_class", + doc_class=LogDocument(meta={"id": 2}, log_group_id=2, logger="airflow", message="Hello Airflow"), + ) + # [END howto_operator_opensearch_add_document] + + # [START howto_operator_opensearch_query] + search_low_level = OpenSearchQueryOperator( + task_id="low_level_query", + index_name="system_test", + query={"query": {"bool": {"must": {"match": {"message": "hello world"}}}}}, + ) + + search_object = ( + Search(index_name=INDEX_NAME) + .filter("term", logger="Airflow") + .query("match", message="Hellow Airflow") + ) + + search_high_level = OpenSearchQueryOperator(task_id="high_level_query", search_object=search_object) + # [END howto_operator_opensearch_query] + + chain(create_index, add_document_by_class, add_document_by_args, search_high_level, search_low_level) + + from tests.system.utils.watcher import watcher + + # This test needs watcher in order to properly mark success/failure + # when "tearDown" task with trigger rule is part of the DAG + list(dag.tasks) >> watcher() + + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag) From ec03b37ec10a74ce22230a28fb1c1db4b2c2a48e Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sun, 1 Oct 2023 21:44:11 -0700 Subject: [PATCH 07/30] Remove aws open search files --- .../providers/amazon/aws/hooks/opensearch.py | 86 ---------- .../operators/opensearch.rst | 86 ---------- .../amazon/aws/hooks/test_opensearch.py | 72 --------- .../amazon/aws/operators/test_opensearch.py | 96 ----------- .../amazon/aws/example_opensearch.py | 149 ------------------ 5 files changed, 489 deletions(-) delete mode 100644 airflow/providers/amazon/aws/hooks/opensearch.py delete mode 100644 docs/apache-airflow-providers-amazon/operators/opensearch.rst delete mode 100644 tests/providers/amazon/aws/hooks/test_opensearch.py delete mode 100644 tests/providers/amazon/aws/operators/test_opensearch.py delete mode 100644 tests/system/providers/amazon/aws/example_opensearch.py diff --git a/airflow/providers/amazon/aws/hooks/opensearch.py b/airflow/providers/amazon/aws/hooks/opensearch.py deleted file mode 100644 index 3315e3823fb83..0000000000000 --- a/airflow/providers/amazon/aws/hooks/opensearch.py +++ /dev/null @@ -1,86 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import json -from typing import Any - -from opensearchpy import AWSV4SignerAuth, OpenSearch, RequestsHttpConnection - -from airflow.exceptions import AirflowException -from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook -from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook - - -class AwsOpenSearchHook(OpenSearchHook, AwsBaseHook): - """ - This Hook provides a thin wrapper around the OpenSearch client. - - :param: open_search_conn_id: AWS Connection to use with Open Search - :param: log_query: Whether to log the query used for Open Search - """ - conn_name_attr = "aws_opensearch_conn_id" - default_conn_name = "aws_opensearch_default" - conn_type = "opensearch" - hook_name = "AWS Open Search Hook" - - def __init__(self, *args: Any, open_search_conn_id: str, log_query: bool, **kwargs: Any): - super().__init__(*args, **kwargs) - self.region = self.conn.extra_dejson.get("region_name", self.region_name) - - self._credentials = self.get_credentials(self.region) - self._auth = AWSV4SignerAuth(self._credentials, self.region, self.__SERVICE) - - def get_client(self) -> OpenSearch: - """ - - This function is intended for Operators that will take in arguments and use the high level - OpenSearch client which allows using Python objects to perform searches. - - """ - self.client = OpenSearch( - hosts=[{"host": self.conn.host, "port": self.conn.port}], - http_auth=self._auth, - use_ssl=self.use_ssl, - verify_certs=self.verify_certs, - connection_class=RequestsHttpConnection, - ) - return self.client - - @staticmethod - def get_ui_field_behaviour() -> dict[str, Any]: - """Returns custom UI field behaviour for Amazon Open Search Connection.""" - return { - "hidden_fields": ["schema"], - "relabeling": { - "host": "OpenSearch Cluster Endpoint", - "login": "AWS Access Key ID", - "password": "AWS Secret Access Key", - "extra": "Open Search Configuration", - }, - "placeholders": { - "extra": json.dumps( - { - "use_ssl": True, - "verify_certs": True, - "region_name": "us-east-1" - }, - indent=2, - ), - }, - } diff --git a/docs/apache-airflow-providers-amazon/operators/opensearch.rst b/docs/apache-airflow-providers-amazon/operators/opensearch.rst deleted file mode 100644 index bfd663b79ff2b..0000000000000 --- a/docs/apache-airflow-providers-amazon/operators/opensearch.rst +++ /dev/null @@ -1,86 +0,0 @@ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -==================== -Amazon OpenSearch -==================== - -`Amazon Open Search `__ Amazon OpenSearch Service makes it -easy for you to perform interactive log analytics, real-time application monitoring, website search, and more. -OpenSearch is an open source, distributed search and analytics suite derived from Elasticsearch. - -Prerequisite Tasks ------------------- - -.. include:: ../../_partials/prerequisite_tasks.rst - -Operators ---------- - -.. _howto/operator:OpenSearchCreateIndexOperator: - -Create an Index on an Open Search Domain -================================================= - -Use the :class:`OpenSearchAddDocumentOperator ` to add -a new document to a specified Index on an Amazon OpenSearch cluster. - - -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_opensearch.py - :language: python - :dedent: 4 - :start-after: [START howto_operator_opensearch_document] - :end-before: [END howto_operator_opensearch_document] - -.. _howto/operator:OpenSearchAddDocumentOperator: - -Add a document to an Index on an Open Search Domain -================================================= - -Use the :class:`OpenSearchCreateIndexOperator ` to create a new -index on an Open Search Cluster - - -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_opensearch.py - :language: python - :dedent: 4 - :start-after: [START howto_operator_opensearch_index] - :end-before: [END howto_operator_opensearch_index] - - -.. _howto/operator:OpenSearchSearchOperator: - -Run a query on an Amazon OpenSearch cluster -================================================= - -Use the :class:`OpenSearchSearchOperator ` to run -search queries against an Amazon OpenSearch cluster on a given index. - - -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_opensearch.py - :language: python - :dedent: 4 - :start-after: [START howto_operator_opensearch_search] - :end-before: [END howto_operator_opensearch_search] - - - - -Reference ---------- -* `Open Search High Level Client `__ -* `Open Search Low Level Client `__ diff --git a/tests/providers/amazon/aws/hooks/test_opensearch.py b/tests/providers/amazon/aws/hooks/test_opensearch.py deleted file mode 100644 index 46d928a72c925..0000000000000 --- a/tests/providers/amazon/aws/hooks/test_opensearch.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import pytest -from moto import mock_opensearch -import boto3 - -from airflow.models import Connection -from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook -from airflow.utils import db - - -class TestOpenSearchHook: - @mock_opensearch - def create_domain(self): - client = boto3.client("opensearch") - response = client.create_domain(DomainName=f"test-opensearch-cluster", - EngineVersion="2.7", - ClusterConfig={ - "InstanceType": "t3.small.search", - "InstanceCount": 1, - "DedicatedMasterEnabled": False, - "ZoneAwarenessEnabled": False, - },) - return response["endpoint"] - - def setup_method(self): - db.merge_conn( - Connection( - conn_id="opensearch_default", - conn_type="opensearch", - host=self.create_domain(), - login="MyAWSSecretID", - password="MyAccessKey", - extra={ - "region_name": "us-east-1" - } - ) - ) - - @pytest.fixture() - def mock_search(self, monkeypatch): - def mock_return(index_name: str): - return {"status": "test"} - - monkeypatch.setattr(OpenSearchHook, "search", mock_return) - - def test_hook_search(self, mock_search): - hook = OpenSearchHook(open_search_conn_id="opensearch_default", - log_query=True) - - result = hook.search( - index_name="testIndex", - query={"size": 1, "query": {"multi_match": {"query": "test", "fields": ["testField"]}}}, - ) - - assert result diff --git a/tests/providers/amazon/aws/operators/test_opensearch.py b/tests/providers/amazon/aws/operators/test_opensearch.py deleted file mode 100644 index c27747be65ec9..0000000000000 --- a/tests/providers/amazon/aws/operators/test_opensearch.py +++ /dev/null @@ -1,96 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from unittest import mock - -import pytest -import boto3 -from moto import mock_opensearch - -from airflow.models import DAG, DagRun, TaskInstance, Connection -from airflow.providers.amazon.aws.hooks.opensearch import OpenSearchHook -from airflow.providers.amazon.aws.operators.opensearch import OpenSearchQueryOperator, \ - OpenSearchAddDocumentOperator, OpenSearchCreateIndexOperator -from airflow.utils import timezone, db -from airflow.utils.timezone import datetime - - -TEST_DAG_ID = "unit_tests" -DEFAULT_DATE = datetime(2018, 1, 1) -MOCK_TEST_DATA = { - "result": "success" -} - - -class TestOpenSearchQueryOperator: - - @mock_opensearch - def create_domain(self): - client = boto3.client("opensearch") - response = client.create_domain(DomainName=f"test-opensearch-cluster", - EngineVersion="2.7", - ClusterConfig={ - "InstanceType": "t3.small.search", - "InstanceCount": 1, - "DedicatedMasterEnabled": False, - "ZoneAwarenessEnabled": False, - }, ) - return response["endpoint"] - - def setup_method(self): - db.merge_conn( - Connection( - conn_id="opensearch_default", - conn_type="open_search", - host=self.create_domain(), - login="MyAWSSecretID", - password="MyAccessKey", - extra={ - "region_name": "us-east-1" - } - ) - ) - args = { - "owner": "airflow", - "start_date": DEFAULT_DATE, - } - - self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") - - self.open_search = OpenSearchQueryOperator( - task_id="test_opensearch_query_operator", - index_name="test_index", - query={ - "size": 5, - "query": {"multi_match": {"query": "test", "fields": ["test_title^2", "test_type"]}}, - }, - ) - - def test_init(self): - assert self.open_search.task_id == "test_opensearch_query_operator" - assert self.open_search.opensearch_conn_id == "opensearch_default" - assert self.open_search.query["size"] == 5 - assert self.open_search.hook.region == "us-east-1" - - @mock.patch.object(OpenSearchHook, "search", return_value=MOCK_TEST_DATA) - def test_search_query(self, mock_search): - self.open_search.execute({}) - mock_search.assert_called_once_with( - {"size": 5, "query": {"multi_match": {"query": "test", "fields": ["test_title^2", "test_type"]}}}, - "test_index" - ) diff --git a/tests/system/providers/amazon/aws/example_opensearch.py b/tests/system/providers/amazon/aws/example_opensearch.py deleted file mode 100644 index 9c1af7769caf6..0000000000000 --- a/tests/system/providers/amazon/aws/example_opensearch.py +++ /dev/null @@ -1,149 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from datetime import datetime - -import boto3 -from opensearch_dsl import Keyword, Search, Text -from opensearch_dsl.document import Document - -from airflow import DAG -from airflow.decorators import task -from airflow.models.baseoperator import chain -from airflow.providers.amazon.aws.operators.opensearch import ( - OpenSearchAddDocumentOperator, - OpenSearchCreateIndexOperator, - OpenSearchQueryOperator, -) -from airflow.utils.trigger_rule import TriggerRule -from tests.system.providers.amazon.aws.utils import ENV_ID_KEY, SystemTestContextBuilder - -DAG_ID = "example_opensearch" -INDEX_NAME = "example-index" - -sys_test_context_task = SystemTestContextBuilder().build() - - -class TestDoc(Document): - __test__ = False - title = Text(fields={"raw": Keyword()}) - media_type = Text() - - class Index: - name = INDEX_NAME - - def save(self, **kwargs): - return super().save(**kwargs) - -@task -def create_open_search_cluster(): - env_id = test_context[ENV_ID_KEY] - opensearch = boto3.client("opensearch") - opensearch.create_domain( - DomainName=f"{env_id}-opensearch-cluster", - EngineVersion="2.7", - ClusterConfig={ - "InstanceType": "t3.small.search", - "InstanceCount": 1, - "DedicatedMasterEnabled": False, - "ZoneAwarenessEnabled": False, - }, - ) - - -@task(trigger_rule=TriggerRule.ALL_DONE) -def delete_os_cluster(env_id: str): - boto3.client("opensearch").delete_domain(DomainName=f"{env_id}-opensearch-cluster") - - -with DAG( - dag_id=DAG_ID, - start_date=datetime(2023, 9, 1), - schedule="@once", - catchup=False, - tags=["example"], -) as dag: - test_context = sys_test_context_task() - create_cluster = create_open_search_cluster() - - # [START howto_operator_opensearch_index] - create_index = OpenSearchCreateIndexOperator( - task_id="create_index_example", - index_name=INDEX_NAME, - index_body={"settings": {"index": {"number_of_shards": 1}}}, - ) - # [END howto_operator_opensearch_index] - - # [START howto_operator_opensearch_document] - add_low_doc = OpenSearchAddDocumentOperator( - task_id="add_low_level_document", - index_name=INDEX_NAME, - document={"title": "MontyPython", "media_type": "Movie"}, - doc_id=1, - ) - add_high_doc = OpenSearchAddDocumentOperator( - task_id="add_high_level_document", - doc_class=TestDoc(meta={"id": 2}, title="Top Gun", media_type="Movie"), - ) - - # [END howto_operator_opensearch_document] - - # [START howto_operator_opensearch_search] - search_low_docs = OpenSearchQueryOperator( - task_id="search_low_level", - index_name=INDEX_NAME, - query={ - "size": 5, - "query": {"multi_match": {"query": "MontyPython", "fields": ["title^2", "media_type"]}}, - }, - ) - - search_high_docs = OpenSearchQueryOperator( - task_id="search_high", - search_object=Search(index=INDEX_NAME) - .filter("term", media_type="Movie") - .query("match", title="Top Gun"), - ) - - # [END howto_operator_opensearch_search] - - remove_cluster = delete_os_cluster(env_id=test_context[ENV_ID_KEY]) - - chain( - # TEST SETUP - test_context, - create_cluster, - # TEST BODY - create_index, - add_low_doc, - add_high_doc, - search_low_docs, - search_high_docs, - # TEST TEAR DOWN - remove_cluster, - ) - from tests.system.utils.watcher import watcher - - # This test needs watcher in order to properly mark success/failure - # when "tearDown" task with trigger rule is part of the DAG - list(dag.tasks) >> watcher() - -from tests.system.utils import get_test_run # noqa: E402 - -# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) -test_run = get_test_run(dag) From 48d951154bea52c6dcbea2245ac74d814dd5a4bf Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sun, 1 Oct 2023 21:47:59 -0700 Subject: [PATCH 08/30] Remove reference to opensearch provider --- airflow/providers/amazon/provider.yaml | 1 - generated/provider_dependencies.json | 1 - 2 files changed, 2 deletions(-) diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index 767169f51ea4d..62b7fa2ed8e0c 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -70,7 +70,6 @@ dependencies: - apache-airflow>=2.4.0 - apache-airflow-providers-common-sql>=1.3.1 - apache-airflow-providers-http - - apache-airflow-providers-opensearch # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number # of candidates to consider. We should also make sure that all the below related packages have also the # same minimum version specified. Boto3 1.28.0 has been released on July 6, 2023. We should also make diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 56f9bcb507115..c5cdd0b152097 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -23,7 +23,6 @@ "deps": [ "apache-airflow-providers-common-sql>=1.3.1", "apache-airflow-providers-http", - "apache-airflow-providers-opensearch", "apache-airflow>=2.4.0", "asgiref", "boto3>=1.28.0", From 94cd4688867b98881242553e0ee5a96f6ffead0b Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sun, 1 Oct 2023 21:51:16 -0700 Subject: [PATCH 09/30] Fix docs for OpenSearch --- airflow/providers/amazon/provider.yaml | 17 +++-------------- airflow/providers/opensearch/provider.yaml | 2 ++ 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index 62b7fa2ed8e0c..0aed5312eb0e5 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -297,12 +297,7 @@ integrations: how-to-guide: - /docs/apache-airflow-providers-amazon/operators/appflow.rst tags: [aws] - - integration-name: AWS Open Search - external-doc-url: https://aws.amazon.com/opensearch-service/ - logo: /integration-logos/aws/Amazon-OpenSearch-light.png - how-to-guide: - - /docs/apache-airflow-providers-amazon/operators/opensearch.rst - tags: [aws] + operators: - integration-name: Amazon Athena @@ -376,9 +371,6 @@ operators: - integration-name: Amazon Appflow python-modules: - airflow.providers.amazon.aws.operators.appflow - - integration-name: AWS Open Search - python-modules: - - airflow.providers.amazon.aws.operators.opensearch sensors: - integration-name: Amazon Athena @@ -552,9 +544,7 @@ hooks: - integration-name: Amazon Appflow python-modules: - airflow.providers.amazon.aws.hooks.appflow - - integration-name: Amazon Open Search - python-modules: - - airflow.providers.amazon.aws.hooks.opensearch + triggers: - integration-name: Amazon Web Services @@ -702,8 +692,7 @@ connection-types: connection-type: emr - hook-class-name: airflow.providers.amazon.aws.hooks.redshift_sql.RedshiftSQLHook connection-type: redshift - - hook-class-name: airflow.providers.amazon.aws.hooks.opensearch.OpenSearchHook - connection-type: opensearch + notifications: - airflow.providers.amazon.aws.notifications.chime.ChimeNotifier diff --git a/airflow/providers/opensearch/provider.yaml b/airflow/providers/opensearch/provider.yaml index 1a864720cd8a9..270d7cd2085dd 100644 --- a/airflow/providers/opensearch/provider.yaml +++ b/airflow/providers/opensearch/provider.yaml @@ -32,6 +32,8 @@ dependencies: integrations: - integration-name: Opensearch external-doc-url: https://opensearch.org/ + how-to-guide: + - /docs/apache-airflow-providers-opensearch/operators/opensearch.rst logo: /integration-logos/opensearch/opensearch.png tags: [software] From 830a49dc1daa543c54eed694caa66d75ba5659bb Mon Sep 17 00:00:00 2001 From: cjames23 Date: Mon, 2 Oct 2023 15:16:44 -0700 Subject: [PATCH 10/30] Fix type for OpenSearchAddDocumentOperator --- airflow/providers/opensearch/operators/opensearch.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/airflow/providers/opensearch/operators/opensearch.py b/airflow/providers/opensearch/operators/opensearch.py index c17b35995cb15..dee76571b471b 100644 --- a/airflow/providers/opensearch/operators/opensearch.py +++ b/airflow/providers/opensearch/operators/opensearch.py @@ -25,8 +25,6 @@ from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook if TYPE_CHECKING: - from opensearch_dsl.search import Search - from opensearchpy import Document from airflow.utils.context import Context @@ -52,7 +50,7 @@ def __init__( self, *, query: dict | None = None, - search_object: Search | None = None, + search_object: Any | None = None, index_name: str | None = None, opensearch_conn_id: str = "opensearch_default", log_query: bool = True, @@ -137,7 +135,7 @@ def execute(self, context: Context) -> Any: class OpenSearchAddDocumentOperator(BaseOperator): """ - Runs a query search against a given index on an OpenSearch cluster and returns results. + Adds a new document to a given Index. It will either add or overwrite an existing document. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -156,7 +154,7 @@ def __init__( index_name: str | None = None, document: dict[str, Any] | None = None, doc_id: int | None = None, - doc_class: Document | None = None, + doc_class: Any | None = None, opensearch_conn_id: str = "opensearch_default", **kwargs, ) -> None: @@ -176,7 +174,8 @@ def execute(self, context: Context) -> Any: """Saves a document to a given index on an OpenSearch cluster.""" if self.doc_class is not None: try: - result = self.doc_class.save(using=self.hook.get_client) + doc = self.doc_class.init(using=self.hook.get_client) + result = doc.save(using=self.hook.get_client) except Exception as e: raise AirflowException(e) elif self.index_name is not None and self.document is not None and self.doc_id is not None: From fa721c60498f1c447f6e34a5fcd98a4162c142f9 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Mon, 2 Oct 2023 17:45:27 -0700 Subject: [PATCH 11/30] Fix ruff errors for documentation, amazon provider linter issue. Add opensearch to extra-packages-ref.rst --- airflow/providers/amazon/provider.yaml | 2 +- airflow/providers/opensearch/hooks/opensearch.py | 7 +------ airflow/providers/opensearch/operators/opensearch.py | 1 - docs/apache-airflow/extra-packages-ref.rst | 2 ++ 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index 0aed5312eb0e5..f0532b41bbbcb 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -561,7 +561,7 @@ triggers: - airflow.providers.amazon.aws.triggers.ec2 - integration-name: AWS Lambda python-modules: - - airflow.providers.amazon.aws.triggers.lambda_function + - airflow.providers.amazon.aws.triggers.lambda_function - integration-name: Amazon Redshift python-modules: - airflow.providers.amazon.aws.triggers.redshift_cluster diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py index 2e8b9237b332a..7a6da8cb4e5ea 100644 --- a/airflow/providers/opensearch/hooks/opensearch.py +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -52,12 +52,7 @@ def __init__(self, open_search_conn_id: str, log_query: bool, **kwargs: Any): @cached_property def get_client(self) -> OpenSearch: - """ - - This function is intended for Operators that will take in arguments and use the high level - OpenSearch client which allows using Python objects to perform searches. - - """ + """This function is intended for Operators that forward high level client objects.""" auth = (self.conn.login, self.conn.password) client = OpenSearch( hosts=[{"host": self.conn.host, "port": self.conn.port}], diff --git a/airflow/providers/opensearch/operators/opensearch.py b/airflow/providers/opensearch/operators/opensearch.py index dee76571b471b..3f329441729d5 100644 --- a/airflow/providers/opensearch/operators/opensearch.py +++ b/airflow/providers/opensearch/operators/opensearch.py @@ -25,7 +25,6 @@ from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook if TYPE_CHECKING: - from airflow.utils.context import Context diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index c24fe2d437278..1fd5e157f6a60 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -301,6 +301,8 @@ These are extras that provide support for integration with external systems via +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | openlineage | ``pip install 'apache-airflow[openlineage]'`` | Sending OpenLineage events | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ +| opensearch | ``pip install 'apache-airflow[opensearch]'`` | Opensearch hooks and operators | | ++---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | papermill | ``pip install 'apache-airflow[papermill]'`` | Papermill hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | sftp | ``pip install 'apache-airflow[sftp]'`` | SFTP hooks, operators and sensors | | From 19da2e0b3e4602275cddbe3ec2997c247f08d2e3 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Mon, 2 Oct 2023 21:34:20 -0700 Subject: [PATCH 12/30] Fix documentation for OpenSearch provider --- .../opensearch/operators/opensearch.py | 2 +- airflow/providers/opensearch/provider.yaml | 2 +- .../changelog.rst | 19 +++ .../connections/index.rst | 29 +++++ .../connections/opensearch.rst | 37 ++++++ .../index.rst | 4 +- .../operators/index.rst | 29 +++++ .../operators/opensearch.rst | 4 +- .../security.rst | 38 ++++++ docs/spelling_wordlist.txt | 2 + generated/provider_dependencies.json | 2 +- images/breeze/output-commands-hash.txt | 18 +-- images/breeze/output-commands.svg | 108 +++++++++--------- images/breeze/output_build-docs.svg | 8 +- ...release-management_add-back-references.svg | 10 +- ...ement_generate-issue-content-providers.svg | 12 +- ...agement_prepare-provider-documentation.svg | 10 +- ...e-management_prepare-provider-packages.svg | 12 +- ...output_release-management_publish-docs.svg | 10 +- images/breeze/output_sbom.svg | 20 ++-- ...t_sbom_generate-providers-requirements.svg | 82 +++++++------ 21 files changed, 308 insertions(+), 150 deletions(-) create mode 100644 docs/apache-airflow-providers-opensearch/changelog.rst create mode 100644 docs/apache-airflow-providers-opensearch/connections/index.rst create mode 100644 docs/apache-airflow-providers-opensearch/connections/opensearch.rst create mode 100644 docs/apache-airflow-providers-opensearch/operators/index.rst create mode 100644 docs/apache-airflow-providers-opensearch/security.rst diff --git a/airflow/providers/opensearch/operators/opensearch.py b/airflow/providers/opensearch/operators/opensearch.py index 3f329441729d5..1af8e32900804 100644 --- a/airflow/providers/opensearch/operators/opensearch.py +++ b/airflow/providers/opensearch/operators/opensearch.py @@ -34,7 +34,7 @@ class OpenSearchQueryOperator(BaseOperator): .. seealso:: For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:OpenSearchSearchOperator` + :ref:`howto/operator:OpenSearchQueryOperator` :param: query: A Dictionary Open Search DSL query. :param: search_object: A Search object from opensearch-dsl. diff --git a/airflow/providers/opensearch/provider.yaml b/airflow/providers/opensearch/provider.yaml index 270d7cd2085dd..11a59e6c327c4 100644 --- a/airflow/providers/opensearch/provider.yaml +++ b/airflow/providers/opensearch/provider.yaml @@ -26,7 +26,7 @@ versions: - 1.0.0 dependencies: - - apache-airflow>=2.5.0 + - apache-airflow>=2.4.3 - opensearch-py>=2.2.0 integrations: diff --git a/docs/apache-airflow-providers-opensearch/changelog.rst b/docs/apache-airflow-providers-opensearch/changelog.rst new file mode 100644 index 0000000000000..d1bb0e4735929 --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/changelog.rst @@ -0,0 +1,19 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../../airflow/providers/opensearch/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-opensearch/connections/index.rst b/docs/apache-airflow-providers-opensearch/connections/index.rst new file mode 100644 index 0000000000000..72d2846638b3a --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/connections/index.rst @@ -0,0 +1,29 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + + +OpenSearch Connections +======================= + + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/docs/apache-airflow-providers-opensearch/connections/opensearch.rst b/docs/apache-airflow-providers-opensearch/connections/opensearch.rst new file mode 100644 index 0000000000000..382629fd0cbc8 --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/connections/opensearch.rst @@ -0,0 +1,37 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + + +OpenSearch Connection +===================== +The Open Search connection provides credentials for an OpenSearch instance. + +Configuring the Connection +-------------------------- +Host (required) + The host address of the Open Search instance. +Login (required) + The login user. +Password (required) + The password for the login user. +Extra (optional) + Specifying the extra parameters as a (json dictionary) that can be used in the Open Search connection. + The following parameters are all optional: + + * ``use_ssl``: Boolean on requiring an ssl connection. + * ``verify_certs``: Boolean indicating to verify certs for ssl. diff --git a/docs/apache-airflow-providers-opensearch/index.rst b/docs/apache-airflow-providers-opensearch/index.rst index c89306086b5cf..d122ba3831577 100644 --- a/docs/apache-airflow-providers-opensearch/index.rst +++ b/docs/apache-airflow-providers-opensearch/index.rst @@ -1,3 +1,4 @@ + .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -35,8 +36,6 @@ Connection types Operators - Logging for Tasks - Configuration .. toctree:: :hidden: @@ -57,7 +56,6 @@ :maxdepth: 1 :caption: Resources - Example DAGs PyPI Repository Installing from sources diff --git a/docs/apache-airflow-providers-opensearch/operators/index.rst b/docs/apache-airflow-providers-opensearch/operators/index.rst new file mode 100644 index 0000000000000..bc0f38e1108f6 --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/operators/index.rst @@ -0,0 +1,29 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + + +Open Search Operators +====================== + + +.. toctree:: + :maxdepth: 1 + :glob: + + * diff --git a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst b/docs/apache-airflow-providers-opensearch/operators/opensearch.rst index 09e6dc5825d9f..ade44acd5e515 100644 --- a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst +++ b/docs/apache-airflow-providers-opensearch/operators/opensearch.rst @@ -42,7 +42,7 @@ to create a new index in an Open Search domain. :end-before: [END howto_operator_opensearch_create_index] -.. _howto/operator:OpenSearchAddDocumentOperator +.. _howto/operator:OpenSearchAddDocumentOperator: Add a Document to an Index on OpenSearch ========================================= @@ -57,7 +57,7 @@ to add single documents to an Open Search Index :end-before: [END howto_operator_opensearch_add_document] -.. _howto/operator:OpenSearchQueryOperator +.. _howto/operator:OpenSearchQueryOperator: Run a query against an Open Search Index ========================================= diff --git a/docs/apache-airflow-providers-opensearch/security.rst b/docs/apache-airflow-providers-opensearch/security.rst new file mode 100644 index 0000000000000..66c6f79a4ecfc --- /dev/null +++ b/docs/apache-airflow-providers-opensearch/security.rst @@ -0,0 +1,38 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Releasing security patches +-------------------------- + +Airflow providers are released independently from Airflow itself and the information about vulnerabilities +is published separately. You can upgrade providers independently from Airflow itself, following the +instructions found in :doc:`apache-airflow:installation/installing-from-pypi`. + +When we release Provider version, the development is always done from the ``main`` branch where we prepare +the next version. The provider uses strict `SemVer `_ versioning policy. Depending on +the scope of the change, Provider will get ''MAJOR'' version upgrade when there are +breaking changes, ``MINOR`` version upgrade when there are new features or ``PATCHLEVEL`` version upgrade +when there are only bug fixes (including security bugfixes) - and this is the only version that receives +security fixes by default, so you should upgrade to latest version of the provider if you want to receive +all released security fixes. + +The only exception to that rule is when we have a critical security fix and good reason to provide an +out-of-band release for the provider, in which case stakeholders in the provider might decide to cherry-pick +and prepare a branch for an older version of the provider following the +`mixed governance model `_ +and requires interested parties to cherry-pick and test the fixes. diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 549492a1133b5..9f1525ec94314 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1053,6 +1053,8 @@ Oozie openapi openfaas openlineage +OpenSearch +opensearch oper OperatorLineage Opsgenie diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index c5cdd0b152097..aa7f403fba9bd 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -673,7 +673,7 @@ }, "opensearch": { "deps": [ - "apache-airflow>=2.5.0", + "apache-airflow>=2.4.3", "opensearch-py>=2.2.0" ], "cross-providers-deps": [], diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index a13a0f434728b..b68a1b025d2ef 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -2,7 +2,7 @@ # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. main:d78f3561b8937512f5d76c60e5fb1b7a -build-docs:c5da2956cfff3989a9699c32aee63a13 +build-docs:27f59f285448615a47ee2ee22645ef2b ci:find-backtracking-candidates:17fe56b867a745e5032a08dfcd3f73ee ci:fix-ownership:3e5a73533cc96045e72cb258783cfc96 ci:free-space:49af17b032039c05c41a7a8283f365cc @@ -36,25 +36,25 @@ prod-image:build:20f84ddadc2fe4ae2723b7ccdde0197f prod-image:pull:3817ef211b023b76df84ee1110ef64dd prod-image:verify:bd2b78738a7c388dbad6076c41a9f906 prod-image:e9ecd759e51ebd926df3170b29d1d2dc -release-management:add-back-references:6da27012538a7cc79ddd4ec650470470 +release-management:add-back-references:298fd8d5dd342a076037e996f39ba371 release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:01aef235b11e59ed7f10c970a5cdaba7 -release-management:generate-issue-content-providers:756d5dfd135c3e473756022ca6150c24 +release-management:generate-issue-content-providers:19c4c98c30b820685e5775a5396aebdf release-management:generate-providers-metadata:d4e8e5cfaa024e3963af02d7a873048d release-management:install-provider-packages:34c38aca17d23dbb454fe7a6bfd8e630 release-management:prepare-airflow-package:85d01c57e5b5ee0fb9e5f9d9706ed3b5 -release-management:prepare-provider-documentation:a53cd338bd719c77108b53bc8d45b634 -release-management:prepare-provider-packages:fc69d2ab8abdcbafcaaa63da380f4b76 -release-management:publish-docs:45a6ea090bfcf564ea0dd8fc61655d8a +release-management:prepare-provider-documentation:d0dbef40ff62a19c966889f3735e747c +release-management:prepare-provider-packages:5a87973bd373d20b6881f6fd62fbeb05 +release-management:publish-docs:130b14797fc4cd5f33d39c8277584f1c release-management:release-prod-images:cfbfe8b19fee91fd90718f98ef2fd078 release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1 release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe release-management:update-constraints:02ec4b119150e3fdbac52026e94820ef release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a -release-management:885a5fe8a39a3773011cf1f9bd2983ad -sbom:generate-providers-requirements:f8328b801efa7908d5b14b25a0097c4d +release-management:856c346e8ec1cf320f656bcfe0af7451 +sbom:generate-providers-requirements:c09b5d10014dda99ff76e45cdd742fcd sbom:update-sbom-information:653be48be70b4b7ff5172d491aadc694 -sbom:104afc4ac8c007dcd99218d1b040047e +sbom:d77b8267b8b7e99c09c0eabd467b3d54 setup:autocomplete:fffcd49e102e09ccd69b3841a9e3ea8e setup:check-all-params-in-groups:7aa55fa1b0f17a6f7b7ca225c6b82574 setup:config:fd32471ee31894decf91984615771add diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index 19b8fd9d54dd5..9594f35caf6a6 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -35,8 +35,8 @@ .breeze-help-r1 { fill: #c5c8c6;font-weight: bold } .breeze-help-r2 { fill: #c5c8c6 } .breeze-help-r3 { fill: #d0b344;font-weight: bold } -.breeze-help-r4 { fill: #868887 } -.breeze-help-r5 { fill: #68a0b3;font-weight: bold } +.breeze-help-r4 { fill: #68a0b3;font-weight: bold } +.breeze-help-r5 { fill: #868887 } .breeze-help-r6 { fill: #98a84b;font-weight: bold } .breeze-help-r7 { fill: #8d7b39 } @@ -217,59 +217,59 @@ -Usage: breeze [OPTIONS] COMMAND [ARGS]... +Usage: breeze [OPTIONSCOMMAND [ARGS]... -╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) -[default: 3.8]                                               ---backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] ---mysql-version-MVersion of MySQL used.(>5.7< | 8.0 | 8.1)[default: 5.7] ---mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] ---integrationIntegration(s) to enable when running (can be more than one).                             -(all | all-testable | cassandra | celery | kafka | kerberos | mongo | openlineage | otel  -| pinot | statsd | trino)                                                                 ---forward-credentials-fForward local credentials to container when running. ---db-reset-dReset DB when entering the container. ---max-timeMaximum time that the command should take - if it takes longer, the command will fail. -(INTEGER RANGE)                                                                        ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) -[default: autodetect]                                          -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Developer commands ─────────────────────────────────────────────────────────────────────────────────────────────────╮ -start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets   -if contents of www directory changed.                                                            -static-checks     Run static checks.                                                                               -build-docs        Build documents.                                                                                 -down              Stop running breeze environment.                                                                 -shell             Enter breeze environment. this is the default command use when no other is selected.             -exec              Joins the interactive shell of running airflow container.                                        -compile-www-assetsCompiles www assets.                                                                             -cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.                -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Testing commands ───────────────────────────────────────────────────────────────────────────────────────────────────╮ -testing        Tools that developers can use to run tests                                                          -k8s            Tools that developers use to run Kubernetes tests                                                   -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Image commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -ci-image         Tools that developers can use to manually manage CI images                                        -prod-image       Tools that developers can use to manually manage PROD images                                      -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Release management commands ────────────────────────────────────────────────────────────────────────────────────────╮ -release-management     Tools that release managers can use to prepare and manage Airflow releases                  -sbom                   Tools that release managers can use to prepare sbom information                             -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Other commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -setup     Tools that developers can use to configure Breeze                                                        -ci        Tools that CI workflows use to cleanup/manage CI environment                                             -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags ────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) +[default: 3.8]                                               +--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--mysql-version-MVersion of MySQL used.(>5.7< | 8.0 | 8.1)[default: 5.7] +--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] +--integrationIntegration(s) to enable when running (can be more than one).                             +(all | all-testable | cassandra | celery | kafka | kerberos | mongo | openlineage | otel  +| pinot | statsd | trino)                                                                 +--forward-credentials-fForward local credentials to container when running. +--db-reset-dReset DB when entering the container. +--max-timeMaximum time that the command should take - if it takes longer, the command will fail. +(INTEGER RANGE)                                                                        +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +[default: autodetect]                                          +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Developer commands ─────────────────────────────────────────────────────────────────────────────────────────────────╮ +start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets   +if contents of www directory changed.                                                            +static-checks     Run static checks.                                                                               +build-docs        Build documents.                                                                                 +down              Stop running breeze environment.                                                                 +shell             Enter breeze environment. this is the default command use when no other is selected.             +exec              Joins the interactive shell of running airflow container.                                        +compile-www-assetsCompiles www assets.                                                                             +cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.                +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Testing commands ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +testing        Tools that developers can use to run tests                                                          +k8s            Tools that developers use to run Kubernetes tests                                                   +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Image commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +ci-image         Tools that developers can use to manually manage CI images                                        +prod-image       Tools that developers can use to manually manage PROD images                                      +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Release management commands ────────────────────────────────────────────────────────────────────────────────────────╮ +release-management     Tools that release managers can use to prepare and manage Airflow releases                  +sbom                   Tools that release managers can use to prepare sbom information                             +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Other commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +setup     Tools that developers can use to configure Breeze                                                        +ci        Tools that CI workflows use to cleanup/manage CI environment                                             +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index 96a78ce2025dc..606151a8d49b6 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -165,10 +165,10 @@                          | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook | ftp                          | github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins |                          microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j -                         | odbc | openfaas | openlineage | opsgenie | oracle | pagerduty | papermill | plexus | -                         postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | -                         slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | -                         yandex | zendesk | providers-index]... +                         | odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill | +                         plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | +                         singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino +                         | vertica | yandex | zendesk | providers-index]... Build documents. diff --git a/images/breeze/output_release-management_add-back-references.svg b/images/breeze/output_release-management_add-back-references.svg index 6948f038b3a81..733266369ae72 100644 --- a/images/breeze/output_release-management_add-back-references.svg +++ b/images/breeze/output_release-management_add-back-references.svg @@ -156,11 +156,11 @@                                                      github | google | grpc | hashicorp | http | imap | influxdb |                                                      jdbc | jenkins | microsoft.azure | microsoft.mssql |                                                      microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | -                                                     openfaas | openlineage | opsgenie | oracle | pagerduty | -                                                     papermill | plexus | postgres | presto | redis | salesforce | -                                                     samba | segment | sendgrid | sftp | singularity | slack | smtp | -                                                     snowflake | sqlite | ssh | tableau | tabular | telegram | trino | -                                                     vertica | yandex | zendesk]... +                                                     openfaas | openlineage | opensearch | opsgenie | oracle | +                                                     pagerduty | papermill | plexus | postgres | presto | redis | +                                                     salesforce | samba | segment | sendgrid | sftp | singularity | +                                                     slack | smtp | snowflake | sqlite | ssh | tableau | tabular | +                                                     telegram | trino | vertica | yandex | zendesk]... Command to add back references for documentation to make it backward compatible. diff --git a/images/breeze/output_release-management_generate-issue-content-providers.svg b/images/breeze/output_release-management_generate-issue-content-providers.svg index 85e734f89ae2e..193564c97acba 100644 --- a/images/breeze/output_release-management_generate-issue-content-providers.svg +++ b/images/breeze/output_release-management_generate-issue-content-providers.svg @@ -184,12 +184,12 @@                                                                   imap | influxdb | jdbc | jenkins | microsoft.azure |                                                                   microsoft.mssql | microsoft.psrp | microsoft.winrm |                                                                   mongo | mysql | neo4j | odbc | openfaas | -                                                                  openlineage | opsgenie | oracle | pagerduty | -                                                                  papermill | plexus | postgres | presto | redis | -                                                                  salesforce | samba | segment | sendgrid | sftp | -                                                                  singularity | slack | smtp | snowflake | sqlite | -                                                                  ssh | tableau | tabular | telegram | trino | vertica -                                                                  | yandex | zendesk]... +                                                                  openlineage | opensearch | opsgenie | oracle | +                                                                  pagerduty | papermill | plexus | postgres | presto | +                                                                  redis | salesforce | samba | segment | sendgrid | +                                                                  sftp | singularity | slack | smtp | snowflake | +                                                                  sqlite | ssh | tableau | tabular | telegram | trino +                                                                  | vertica | yandex | zendesk]... Generates content for issue to test the release. diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index 7bd0169b95d94..56451a3061494 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -192,11 +192,11 @@                                                                 google | grpc | hashicorp | http | imap | influxdb |                                                                 jdbc | jenkins | microsoft.azure | microsoft.mssql |                                                                 microsoft.psrp | microsoft.winrm | mongo | mysql | -                                                                neo4j | odbc | openfaas | openlineage | opsgenie | -                                                                oracle | pagerduty | papermill | plexus | postgres | -                                                                presto | redis | salesforce | samba | segment | -                                                                sendgrid | sftp | singularity | slack | smtp | -                                                                snowflake | sqlite | ssh | tableau | tabular | +                                                                neo4j | odbc | openfaas | openlineage | opensearch | +                                                                opsgenie | oracle | pagerduty | papermill | plexus | +                                                                postgres | presto | redis | salesforce | samba | +                                                                segment | sendgrid | sftp | singularity | slack | smtp +                                                                | snowflake | sqlite | ssh | tableau | tabular |                                                                 telegram | trino | vertica | yandex | zendesk]... Prepare CHANGELOGREADME and COMMITS information for providers. diff --git a/images/breeze/output_release-management_prepare-provider-packages.svg b/images/breeze/output_release-management_prepare-provider-packages.svg index c6e542c6dbbf1..1950e7ab89aea 100644 --- a/images/breeze/output_release-management_prepare-provider-packages.svg +++ b/images/breeze/output_release-management_prepare-provider-packages.svg @@ -169,12 +169,12 @@                                                            ftp | github | google | grpc | hashicorp | http | imap |                                                            influxdb | jdbc | jenkins | microsoft.azure |                                                            microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo -                                                           | mysql | neo4j | odbc | openfaas | openlineage | opsgenie -                                                           | oracle | pagerduty | papermill | plexus | postgres | -                                                           presto | redis | salesforce | samba | segment | sendgrid | -                                                           sftp | singularity | slack | smtp | snowflake | sqlite | -                                                           ssh | tableau | tabular | telegram | trino | vertica | -                                                           yandex | zendesk]... +                                                           | mysql | neo4j | odbc | openfaas | openlineage | +                                                           opensearch | opsgenie | oracle | pagerduty | papermill | +                                                           plexus | postgres | presto | redis | salesforce | samba | +                                                           segment | sendgrid | sftp | singularity | slack | smtp | +                                                           snowflake | sqlite | ssh | tableau | tabular | telegram | +                                                           trino | vertica | yandex | zendesk]... Prepare sdist/whl packages of Airflow Providers. diff --git a/images/breeze/output_release-management_publish-docs.svg b/images/breeze/output_release-management_publish-docs.svg index cab90ff6e2382..00a8dc505a604 100644 --- a/images/breeze/output_release-management_publish-docs.svg +++ b/images/breeze/output_release-management_publish-docs.svg @@ -193,11 +193,11 @@                                               docker | elasticsearch | exasol | facebook | ftp | github | google |                                               grpc | hashicorp | http | imap | influxdb | jdbc | jenkins |                                               microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | -                                              mongo | mysql | neo4j | odbc | openfaas | openlineage | opsgenie | -                                              oracle | pagerduty | papermill | plexus | postgres | presto | redis | -                                              salesforce | samba | segment | sendgrid | sftp | singularity | slack | -                                              smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | -                                              vertica | yandex | zendesk | providers-index]... +                                              mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | +                                              opsgenie | oracle | pagerduty | papermill | plexus | postgres | presto | +                                              redis | salesforce | samba | segment | sendgrid | sftp | singularity | +                                              slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | +                                              trino | vertica | yandex | zendesk | providers-index]... Command to publish generated documentation to airflow-site diff --git a/images/breeze/output_sbom.svg b/images/breeze/output_sbom.svg index 7210a7cb2f686..03d2956956aee 100644 --- a/images/breeze/output_sbom.svg +++ b/images/breeze/output_sbom.svg @@ -35,8 +35,8 @@ .breeze-sbom-r1 { fill: #c5c8c6;font-weight: bold } .breeze-sbom-r2 { fill: #c5c8c6 } .breeze-sbom-r3 { fill: #d0b344;font-weight: bold } -.breeze-sbom-r4 { fill: #868887 } -.breeze-sbom-r5 { fill: #68a0b3;font-weight: bold } +.breeze-sbom-r4 { fill: #68a0b3;font-weight: bold } +.breeze-sbom-r5 { fill: #868887 } .breeze-sbom-r6 { fill: #98a84b;font-weight: bold } @@ -90,17 +90,17 @@ -Usage: breeze sbom [OPTIONS] COMMAND [ARGS]... +Usage: breeze sbom [OPTIONSCOMMAND [ARGS]... Tools that release managers can use to prepare sbom information -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ SBOM commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ -update-sbom-information                      Update SBOM information in airflow-site project.                      -generate-providers-requirements              Generate requirements for selected provider.                          -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ SBOM commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ +update-sbom-information                      Update SBOM information in airflow-site project.                      +generate-providers-requirements              Generate requirements for selected provider.                          +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_sbom_generate-providers-requirements.svg b/images/breeze/output_sbom_generate-providers-requirements.svg index e9f8758c4640e..ad06ddc249908 100644 --- a/images/breeze/output_sbom_generate-providers-requirements.svg +++ b/images/breeze/output_sbom_generate-providers-requirements.svg @@ -1,4 +1,4 @@ - + - + @@ -150,9 +152,12 @@ + + + - Command: sbom generate-providers-requirements + Command: sbom generate-providers-requirements @@ -163,41 +168,42 @@ -Usage: breeze sbom generate-providers-requirements [OPTIONS] +Usage: breeze sbom generate-providers-requirements [OPTIONS] Generate requirements for selected provider. -╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ ---airflow-versionAirflow version to use to generate the requirements(TEXT) ---pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) ---provider-idProvider to generate the requirements for                                                       -(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid |    -apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |        -apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb |    -asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor |      -databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol |       -facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins | -microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j |  -odbc | openfaas | openlineage | opsgenie | oracle | pagerduty | papermill | plexus | postgres | -presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp |  -snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | yandex | zendesk)   ---forceForce update providers requirements even if they already exist. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Parallel running ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---run-in-parallelRun the operation in parallel on all or selected subset of parameters. ---parallelismMaximum number of processes to use while running the operation in parallel. -(INTEGER RANGE)                                                             -[default: 4; 1<=x<=8]                                                       ---skip-cleanupSkip cleanup of temporary files created during parallel run. ---debug-resourcesWhether to show resource information while running in parallel. ---include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ +--airflow-versionAirflow version to use to generate the requirements(TEXT) +--pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) +*--provider-idProvider to generate the requirements for                                                    +(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid | +apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |     +apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb | +asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor |   +databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol |    +facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc |        +jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo |     +mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | +papermill | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid |   +sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram  +| trino | vertica | yandex | zendesk)                                                        +[required]                                                                                   +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Parallel running ───────────────────────────────────────────────────────────────────────────────────────────────────╮ +--run-in-parallelRun the operation in parallel on all or selected subset of parameters. +--parallelismMaximum number of processes to use while running the operation in parallel. +(INTEGER RANGE)                                                             +[default: 4; 1<=x<=8]                                                       +--skip-cleanupSkip cleanup of temporary files created during parallel run. +--debug-resourcesWhether to show resource information while running in parallel. +--include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ From 1f92e166e2c2db3b40d93f9182c20b072b9d6598 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Mon, 2 Oct 2023 21:39:14 -0700 Subject: [PATCH 13/30] Fix breeze cmds regenerated. --- images/breeze/output-commands-hash.txt | 4 +- ...t_sbom_generate-providers-requirements.svg | 42 +++++++++---------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index b68a1b025d2ef..d58adc4edc625 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -52,9 +52,9 @@ release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe release-management:update-constraints:02ec4b119150e3fdbac52026e94820ef release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a release-management:856c346e8ec1cf320f656bcfe0af7451 -sbom:generate-providers-requirements:c09b5d10014dda99ff76e45cdd742fcd +sbom:generate-providers-requirements:51e0c660a5f7846ada099e871ebd4c67 sbom:update-sbom-information:653be48be70b4b7ff5172d491aadc694 -sbom:d77b8267b8b7e99c09c0eabd467b3d54 +sbom:b98ded4bbb76f0cb205cea32f3102aee setup:autocomplete:fffcd49e102e09ccd69b3841a9e3ea8e setup:check-all-params-in-groups:7aa55fa1b0f17a6f7b7ca225c6b82574 setup:config:fd32471ee31894decf91984615771add diff --git a/images/breeze/output_sbom_generate-providers-requirements.svg b/images/breeze/output_sbom_generate-providers-requirements.svg index ad06ddc249908..b5536101c9019 100644 --- a/images/breeze/output_sbom_generate-providers-requirements.svg +++ b/images/breeze/output_sbom_generate-providers-requirements.svg @@ -38,9 +38,7 @@ .breeze-sbom-generate-providers-requirements-r4 { fill: #68a0b3;font-weight: bold } .breeze-sbom-generate-providers-requirements-r5 { fill: #868887 } .breeze-sbom-generate-providers-requirements-r6 { fill: #8d7b39 } -.breeze-sbom-generate-providers-requirements-r7 { fill: #cc555a } -.breeze-sbom-generate-providers-requirements-r8 { fill: #8a4346 } -.breeze-sbom-generate-providers-requirements-r9 { fill: #98a84b;font-weight: bold } +.breeze-sbom-generate-providers-requirements-r7 { fill: #98a84b;font-weight: bold } @@ -173,21 +171,21 @@ Generate requirements for selected provider. ╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ ---airflow-versionAirflow version to use to generate the requirements(TEXT) ---pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) -*--provider-idProvider to generate the requirements for                                                    -(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid | -apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |     -apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb | -asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor |   -databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol |    -facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc |        -jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo |     -mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | -papermill | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid |   -sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram  -| trino | vertica | yandex | zendesk)                                                        -[required]                                                                                   +--airflow-versionAirflow version to use to generate the requirements(TEXT) +--pythonPython version to generate the requirements for(3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11) +--provider-idProvider to generate the requirements for                                                       +(airbyte | alibaba | amazon | apache.beam | apache.cassandra | apache.drill | apache.druid |    +apache.flink | apache.hdfs | apache.hive | apache.impala | apache.kafka | apache.kylin |        +apache.livy | apache.pig | apache.pinot | apache.spark | apache.sqoop | apprise | arangodb |    +asana | atlassian.jira | celery | cloudant | cncf.kubernetes | common.sql | daskexecutor |      +databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol |       +facebook | ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins | +microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j |  +odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill | plexus +| postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity |    +slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica |      +yandex | zendesk)                                                                               +--forceForce update providers requirements even if they already exist. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Parallel running ───────────────────────────────────────────────────────────────────────────────────────────────────╮ --run-in-parallelRun the operation in parallel on all or selected subset of parameters. @@ -199,10 +197,10 @@ --include-success-outputsWhether to include outputs of successful parallel runs (skipped by default). ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ From 09d6969fd5309be7c72ad254f5047cf5706aa29e Mon Sep 17 00:00:00 2001 From: cjames23 Date: Mon, 2 Oct 2023 22:31:44 -0700 Subject: [PATCH 14/30] Fix opensearch example dag spelling error --- tests/system/providers/opensearch/example_opensearch.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/system/providers/opensearch/example_opensearch.py b/tests/system/providers/opensearch/example_opensearch.py index 23e6d8a7398eb..5f483eb80443f 100644 --- a/tests/system/providers/opensearch/example_opensearch.py +++ b/tests/system/providers/opensearch/example_opensearch.py @@ -96,7 +96,7 @@ def load_connections(): add_document_by_class = OpenSearchAddDocumentOperator( task_id="add_document_by_class", - doc_class=LogDocument(meta={"id": 2}, log_group_id=2, logger="airflow", message="Hello Airflow"), + doc_class=LogDocument(meta={"id": 2}, log_group_id=2, logger="airflow", message="hello airflow"), ) # [END howto_operator_opensearch_add_document] @@ -108,9 +108,7 @@ def load_connections(): ) search_object = ( - Search(index_name=INDEX_NAME) - .filter("term", logger="Airflow") - .query("match", message="Hellow Airflow") + Search(index_name=INDEX_NAME).filter("term", logger="airflow").query("match", message="hello airflow") ) search_high_level = OpenSearchQueryOperator(task_id="high_level_query", search_object=search_object) From 0fa59c1a0247bd8efcbaeb67d8da9ce0de3809e6 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Tue, 3 Oct 2023 17:28:30 -0700 Subject: [PATCH 15/30] Fix old amazon providers.yaml ref, req 2.5 airflow for open search --- airflow/providers/amazon/provider.yaml | 4 ---- airflow/providers/opensearch/provider.yaml | 2 +- generated/provider_dependencies.json | 6 +----- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index f0532b41bbbcb..7eecbcd8cf794 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -89,11 +89,7 @@ dependencies: - jsonpath_ng>=1.5.3 - redshift_connector>=2.0.888 - sqlalchemy_redshift>=0.8.6 - - mypy-boto3-rds>=1.24.0 - - mypy-boto3-redshift-data>=1.24.0 - - mypy-boto3-appflow>=1.24.0 - asgiref - - mypy-boto3-s3>=1.24.0 integrations: diff --git a/airflow/providers/opensearch/provider.yaml b/airflow/providers/opensearch/provider.yaml index 11a59e6c327c4..270d7cd2085dd 100644 --- a/airflow/providers/opensearch/provider.yaml +++ b/airflow/providers/opensearch/provider.yaml @@ -26,7 +26,7 @@ versions: - 1.0.0 dependencies: - - apache-airflow>=2.4.3 + - apache-airflow>=2.5.0 - opensearch-py>=2.2.0 integrations: diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index aa7f403fba9bd..696b969914129 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -28,10 +28,6 @@ "boto3>=1.28.0", "botocore>=1.31.0", "jsonpath_ng>=1.5.3", - "mypy-boto3-appflow>=1.24.0", - "mypy-boto3-rds>=1.24.0", - "mypy-boto3-redshift-data>=1.24.0", - "mypy-boto3-s3>=1.24.0", "redshift_connector>=2.0.888", "sqlalchemy_redshift>=0.8.6", "watchtower~=2.0.1" @@ -673,7 +669,7 @@ }, "opensearch": { "deps": [ - "apache-airflow>=2.4.3", + "apache-airflow>=2.5.0", "opensearch-py>=2.2.0" ], "cross-providers-deps": [], From 566da2262f84b3ad07d361c212c28e705c2c766f Mon Sep 17 00:00:00 2001 From: cjames23 Date: Tue, 3 Oct 2023 18:30:54 -0700 Subject: [PATCH 16/30] Fix open search logo location --- .../opensearch.png} | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/integration-logos/{aws/Amazon-OpenSearch-light.png => opensearch/opensearch.png} (100%) diff --git a/docs/integration-logos/aws/Amazon-OpenSearch-light.png b/docs/integration-logos/opensearch/opensearch.png similarity index 100% rename from docs/integration-logos/aws/Amazon-OpenSearch-light.png rename to docs/integration-logos/opensearch/opensearch.png From 681ca93e187159de9d87637fca3574a42c87e972 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Tue, 3 Oct 2023 19:53:06 -0700 Subject: [PATCH 17/30] Add open search to bug report. Fix open search example dag. --- .github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml | 1 + tests/system/providers/opensearch/example_opensearch.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml index 02f6b38a852c5..87ff85308d15d 100644 --- a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml @@ -80,6 +80,7 @@ body: - odbc - openfaas - openlineage + - opensearch - opsgenie - oracle - pagerduty diff --git a/tests/system/providers/opensearch/example_opensearch.py b/tests/system/providers/opensearch/example_opensearch.py index 5f483eb80443f..b6e82291b1bdc 100644 --- a/tests/system/providers/opensearch/example_opensearch.py +++ b/tests/system/providers/opensearch/example_opensearch.py @@ -20,8 +20,9 @@ from datetime import datetime, timedelta -from opensearchpy import Integer, Search, Text +from opensearchpy import Integer, Text from opensearchpy.helpers.document import Document +from opensearchpy.helpers.search import Search from airflow.models.baseoperator import chain from airflow.models.dag import DAG @@ -106,9 +107,8 @@ def load_connections(): index_name="system_test", query={"query": {"bool": {"must": {"match": {"message": "hello world"}}}}}, ) - search_object = ( - Search(index_name=INDEX_NAME).filter("term", logger="airflow").query("match", message="hello airflow") + Search().index(INDEX_NAME).filter("term", logger="airflow").query("match", message="hello airflow") ) search_high_level = OpenSearchQueryOperator(task_id="high_level_query", search_object=search_object) From 61aca328c2572aec9765e30fba6a2f183b7649a7 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Tue, 3 Oct 2023 20:56:05 -0700 Subject: [PATCH 18/30] Add licenses to inits in open search provider --- airflow/providers/opensearch/__init__.py | 16 ++++++++++++++++ airflow/providers/opensearch/hooks/__init__.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/airflow/providers/opensearch/__init__.py b/airflow/providers/opensearch/__init__.py index e69de29bb2d1d..13a83393a9124 100644 --- a/airflow/providers/opensearch/__init__.py +++ b/airflow/providers/opensearch/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/opensearch/hooks/__init__.py b/airflow/providers/opensearch/hooks/__init__.py index e69de29bb2d1d..13a83393a9124 100644 --- a/airflow/providers/opensearch/hooks/__init__.py +++ b/airflow/providers/opensearch/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. From 1aa8726a6c8b6427702392239dab6f3f616d110b Mon Sep 17 00:00:00 2001 From: cjames23 Date: Wed, 4 Oct 2023 16:47:39 -0700 Subject: [PATCH 19/30] Use opensearch base exception class in operators. Fix documentation consistency in naming --- airflow/providers/opensearch/hooks/opensearch.py | 4 ++-- .../providers/opensearch/operators/opensearch.py | 16 +++++++++------- .../connections/opensearch.rst | 8 ++++---- .../operators/opensearch.rst | 6 +++--- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py index 7a6da8cb4e5ea..fa5ddbce9dcc9 100644 --- a/airflow/providers/opensearch/hooks/opensearch.py +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -65,7 +65,7 @@ def get_client(self) -> OpenSearch: def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: """ - Runs a search query against the connected OpenSearch cluster. + Run a search query against the connected OpenSearch cluster. :param: query: The query for the search against OpenSearch. :param: index_name: The name of the index to search against @@ -76,7 +76,7 @@ def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: """ - Index a document on open search. + Index a document on OpenSearch. :param: document: A dictionary representation of the document :param: index_name: the name of the index that this document will be associated with diff --git a/airflow/providers/opensearch/operators/opensearch.py b/airflow/providers/opensearch/operators/opensearch.py index 1af8e32900804..865c25a001649 100644 --- a/airflow/providers/opensearch/operators/opensearch.py +++ b/airflow/providers/opensearch/operators/opensearch.py @@ -20,6 +20,8 @@ from functools import cached_property from typing import TYPE_CHECKING, Any, Sequence +from opensearchpy.exceptions import OpenSearchException + from airflow.exceptions import AirflowException from airflow.models import BaseOperator from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook @@ -78,12 +80,12 @@ def execute(self, context: Context) -> Any: raise AirflowException("Index name is required when using the query input.") try: result = self.hook.search(index_name=self.index_name, query=self.query) - except Exception as e: + except OpenSearchException as e: raise AirflowException(e) elif self.search_object is not None: try: result = self.search_object.using(self.hook.get_client).execute() - except Exception as e: + except OpenSearchException as e: raise AirflowException(e) else: raise AirflowException( @@ -95,7 +97,7 @@ def execute(self, context: Context) -> Any: class OpenSearchCreateIndexOperator(BaseOperator): """ - Creates a new index on an Open Search cluster with a given index name. + Create a new index on an Open Search cluster with a given index name. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -128,13 +130,13 @@ def execute(self, context: Context) -> Any: """Creates an index on an Open Search cluster.""" try: self.hook.get_client.indices.create(index=self.index_name, body=self.index_body) - except Exception as e: + except OpenSearchException as e: raise AirflowException(e) class OpenSearchAddDocumentOperator(BaseOperator): """ - Adds a new document to a given Index. It will either add or overwrite an existing document. + Add a new document to a given Index or overwrite an existing one. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -175,14 +177,14 @@ def execute(self, context: Context) -> Any: try: doc = self.doc_class.init(using=self.hook.get_client) result = doc.save(using=self.hook.get_client) - except Exception as e: + except OpenSearchException as e: raise AirflowException(e) elif self.index_name is not None and self.document is not None and self.doc_id is not None: try: result = self.hook.index( index_name=self.index_name, document=self.document, doc_id=self.doc_id ) - except Exception as e: + except OpenSearchException as e: raise AirflowException(e) else: raise AirflowException( diff --git a/docs/apache-airflow-providers-opensearch/connections/opensearch.rst b/docs/apache-airflow-providers-opensearch/connections/opensearch.rst index 382629fd0cbc8..781f5da502cd4 100644 --- a/docs/apache-airflow-providers-opensearch/connections/opensearch.rst +++ b/docs/apache-airflow-providers-opensearch/connections/opensearch.rst @@ -19,7 +19,7 @@ OpenSearch Connection ===================== -The Open Search connection provides credentials for an OpenSearch instance. +The OpenSearch connection provides credentials for an OpenSearch instance. Configuring the Connection -------------------------- @@ -30,8 +30,8 @@ Login (required) Password (required) The password for the login user. Extra (optional) - Specifying the extra parameters as a (json dictionary) that can be used in the Open Search connection. + Specifying the extra parameters as a (json dictionary) that can be used in the OpenSearch connection. The following parameters are all optional: - * ``use_ssl``: Boolean on requiring an ssl connection. - * ``verify_certs``: Boolean indicating to verify certs for ssl. + * ``use_ssl``: Boolean on requiring an ssl connection. Default is false. + * ``verify_certs``: Boolean indicating to verify certs for ssl. Default is false. diff --git a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst b/docs/apache-airflow-providers-opensearch/operators/opensearch.rst index ade44acd5e515..77bb8a270e84f 100644 --- a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst +++ b/docs/apache-airflow-providers-opensearch/operators/opensearch.rst @@ -30,7 +30,7 @@ Operators Create an Index in Open Search ============================== -Use the :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchCreateIndexOperator` +Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchCreateIndexOperator` to create a new index in an Open Search domain. @@ -47,7 +47,7 @@ to create a new index in an Open Search domain. Add a Document to an Index on OpenSearch ========================================= -Use the :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchAddDocumentOperator` +Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchAddDocumentOperator` to add single documents to an Open Search Index .. exampleinclude:: /../../tests/system/providers/opensearch/example_opensearch.py @@ -62,7 +62,7 @@ to add single documents to an Open Search Index Run a query against an Open Search Index ========================================= -Use the :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchQueryOperator` +Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchQueryOperator` to run a query against an Open Search index. .. exampleinclude:: /../../tests/system/providers/opensearch/example_opensearch.py From b071121857895e1c81b3a1729b8f453fc90df67b Mon Sep 17 00:00:00 2001 From: Cary Date: Wed, 4 Oct 2023 16:53:29 -0700 Subject: [PATCH 20/30] Update airflow/providers/opensearch/hooks/opensearch.py doc string. Co-authored-by: D. Ferruzzi --- airflow/providers/opensearch/hooks/opensearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py index fa5ddbce9dcc9..06389a27ad870 100644 --- a/airflow/providers/opensearch/hooks/opensearch.py +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -29,7 +29,7 @@ class OpenSearchHook(BaseHook): """ - This Hook provides a thin wrapper around the OpenSearch client. + Provide a thin wrapper around the OpenSearch client. :param: open_search_conn_id: Connection to use with Open Search :param: log_query: Whether to log the query used for Open Search From 5de3a334a019c2f4af01b686132de24381649f08 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Wed, 4 Oct 2023 16:56:54 -0700 Subject: [PATCH 21/30] Turn connection into cached property in opensearch hook --- airflow/providers/opensearch/hooks/opensearch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py index 06389a27ad870..ed5e052a20008 100644 --- a/airflow/providers/opensearch/hooks/opensearch.py +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -45,11 +45,14 @@ def __init__(self, open_search_conn_id: str, log_query: bool, **kwargs: Any): self.conn_id = open_search_conn_id self.log_query = log_query - self.conn = self.get_connection(self.conn_id) self.use_ssl = self.conn.extra_dejson.get("use_ssl", False) self.verify_certs = self.conn.extra_dejson.get("verify_certs", False) self.__SERVICE = "es" + @cached_property + def conn(self): + return self.get_connection(self.conn_id) + @cached_property def get_client(self) -> OpenSearch: """This function is intended for Operators that forward high level client objects.""" From fa7266e0186d98814989b8bcaf72cd1348e4bf6b Mon Sep 17 00:00:00 2001 From: Cary Date: Thu, 5 Oct 2023 10:09:01 -0700 Subject: [PATCH 22/30] Update tests/providers/opensearch/hooks/test_opensearch.py move hook init for readability. Co-authored-by: D. Ferruzzi --- tests/providers/opensearch/hooks/test_opensearch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/providers/opensearch/hooks/test_opensearch.py b/tests/providers/opensearch/hooks/test_opensearch.py index 9470c54639649..938be19393f96 100644 --- a/tests/providers/opensearch/hooks/test_opensearch.py +++ b/tests/providers/opensearch/hooks/test_opensearch.py @@ -23,8 +23,9 @@ class TestOpenSearchHook: def test_hook_search(self, mock_hook): - result = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True).search( - index_name="testIndex", + hook = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True) + result = hook.search( + index_name="testIndex", query={"size": 1, "query": {"multi_match": {"query": "test", "fields": ["testField"]}}}, ) From d191184aab74cfab3e0542b764a6143746a7931e Mon Sep 17 00:00:00 2001 From: cjames23 Date: Thu, 5 Oct 2023 10:16:41 -0700 Subject: [PATCH 23/30] Fix misspelling in connection, add dag fixture to tests/providers/operators/test_opensearch.py --- tests/providers/opensearch/conftest.py | 2 +- .../opensearch/operators/test_opensearch.py | 39 +++++++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/tests/providers/opensearch/conftest.py b/tests/providers/opensearch/conftest.py index e95af6687a422..53c58ca85aacf 100644 --- a/tests/providers/opensearch/conftest.py +++ b/tests/providers/opensearch/conftest.py @@ -54,7 +54,7 @@ def setup_connection(): conn_id="opensearch_default", conn_type="opensearch", host="myopensearch.com", - login="test_usser", + login="test_user", password="test", ) ) diff --git a/tests/providers/opensearch/operators/test_opensearch.py b/tests/providers/opensearch/operators/test_opensearch.py index 1eb0739a104e4..f854ac1bf7ac9 100644 --- a/tests/providers/opensearch/operators/test_opensearch.py +++ b/tests/providers/opensearch/operators/test_opensearch.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import pytest from opensearchpy import Document, Keyword, Text from airflow.models import DAG @@ -28,7 +29,7 @@ TEST_DAG_ID = "unit_tests" DEFAULT_DATE = datetime(2018, 1, 1) -MOCK_SEARCH_RETURN = {"status": "test"} +EXPECTED_SEARCH_RETURN = {"status": "test"} class FakeDocument(Document): @@ -40,14 +41,21 @@ def save(self, **kwargs): return super().save(**kwargs) -class TestOpenSearchQueryOperator: - def setup_method(self): - args = { +@pytest.fixture +def dag_setup(): + return DAG( + f"{TEST_DAG_ID}test_schedule_dag_once", + default_args={ "owner": "airflow", "start_date": DEFAULT_DATE, - } + }, + schedule="@once", + ) + - self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") +class TestOpenSearchQueryOperator: + def setup_method(self, dag_setup): + self.dag = dag_setup self.open_search = OpenSearchQueryOperator( task_id="test_opensearch_query_operator", @@ -65,19 +73,15 @@ def test_init(self): def test_search_query(self, mock_hook): result = self.open_search.execute({}) - assert result == MOCK_SEARCH_RETURN + assert result == EXPECTED_SEARCH_RETURN class TestOpenSearchCreateIndexOperator: # This test does not test execute logic because there is only a redirect to the OpenSearch # client. - def setup_method(self): - args = { - "owner": "airflow", - "start_date": DEFAULT_DATE, - } + def setup_method(self, dag_setup): - self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") + self.dag = dag_setup self.open_search = OpenSearchCreateIndexOperator( task_id="test_opensearch_query_operator", index_name="test_index", index_body={"test": 1} @@ -90,13 +94,8 @@ def test_init(self): class TestOpenSearchAddDocumentOperator: - def setup_method(self): - args = { - "owner": "airflow", - "start_date": DEFAULT_DATE, - } - - self.dag = DAG(f"{TEST_DAG_ID}test_schedule_dag_once", default_args=args, schedule="@once") + def setup_method(self, dag_setup): + self.dag = dag_setup self.open_search = OpenSearchAddDocumentOperator( task_id="test_opensearch_doc_operator", From 7449ba200f3d5975c711760178e6327a34e0fc2f Mon Sep 17 00:00:00 2001 From: cjames23 Date: Thu, 5 Oct 2023 15:00:09 -0700 Subject: [PATCH 24/30] Fix formatting of opensearch hook tests --- tests/providers/opensearch/hooks/test_opensearch.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/providers/opensearch/hooks/test_opensearch.py b/tests/providers/opensearch/hooks/test_opensearch.py index 938be19393f96..0a39e9db3b129 100644 --- a/tests/providers/opensearch/hooks/test_opensearch.py +++ b/tests/providers/opensearch/hooks/test_opensearch.py @@ -23,16 +23,15 @@ class TestOpenSearchHook: def test_hook_search(self, mock_hook): - hook = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True) - result = hook.search( - index_name="testIndex", + self.hook = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True) + result = self.hook.search( + index_name="testIndex", query={"size": 1, "query": {"multi_match": {"query": "test", "fields": ["testField"]}}}, ) assert result == MOCK_SEARCH_RETURN def test_hook_index(self, mock_hook): - result = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True).index( - index_name="test_index", document={"title": "Monty Python"}, doc_id=3 - ) + self.hook = OpenSearchHook(open_search_conn_id="opensearch_default", log_query=True) + result = self.hook.index(index_name="test_index", document={"title": "Monty Python"}, doc_id=3) assert result == 3 From a6f0b1bbac96f6b062f6fac922a711fc2e15bcfb Mon Sep 17 00:00:00 2001 From: cjames23 Date: Thu, 12 Oct 2023 17:16:07 -0700 Subject: [PATCH 25/30] Fix output-commands-hast.txt --- images/breeze/output-commands-hash.txt | 4 +--- images/breeze/output-commands.svg | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index f8f8b8584d13b..18bd1a776e473 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -1,10 +1,8 @@ # This file is automatically generated by pre-commit. If you have a conflict with this file # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. - -main:d78f3561b8937512f5d76c60e5fb1b7a +main:c97d8a728bb02c2be73002c4b39d0829 build-docs:27f59f285448615a47ee2ee22645ef2b - ci:find-backtracking-candidates:17fe56b867a745e5032a08dfcd3f73ee ci:fix-ownership:3e5a73533cc96045e72cb258783cfc96 ci:free-space:49af17b032039c05c41a7a8283f365cc diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index 9594f35caf6a6..847db703d23cd 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -223,7 +223,7 @@ --python-pPython major/minor version used in Airflow image for images.(>3.8< | 3.9 | 3.10 | 3.11) [default: 3.8]                                               --backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite] ---postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11] +--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15 | 16)[default: 11] --mysql-version-MVersion of MySQL used.(>5.7< | 8.0 | 8.1)[default: 5.7] --mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest] --integrationIntegration(s) to enable when running (can be more than one).                             From 9ae9807266c3e84eff494ca520ca88ba4447b9f0 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Thu, 12 Oct 2023 17:22:22 -0700 Subject: [PATCH 26/30] rename OpenSearchHook get_client to client. --- airflow/providers/opensearch/hooks/opensearch.py | 10 +++++----- airflow/providers/opensearch/operators/opensearch.py | 8 ++++---- tests/providers/opensearch/conftest.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/airflow/providers/opensearch/hooks/opensearch.py b/airflow/providers/opensearch/hooks/opensearch.py index ed5e052a20008..97907e54d52ca 100644 --- a/airflow/providers/opensearch/hooks/opensearch.py +++ b/airflow/providers/opensearch/hooks/opensearch.py @@ -54,7 +54,7 @@ def conn(self): return self.get_connection(self.conn_id) @cached_property - def get_client(self) -> OpenSearch: + def client(self) -> OpenSearch: """This function is intended for Operators that forward high level client objects.""" auth = (self.conn.login, self.conn.password) client = OpenSearch( @@ -75,7 +75,7 @@ def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: """ if self.log_query: self.log.info("Searching %s with Query: %s", index_name, query) - return self.get_client.search(body=query, index=index_name, **kwargs) + return self.client.search(body=query, index=index_name, **kwargs) def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> Any: """ @@ -85,7 +85,7 @@ def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> :param: index_name: the name of the index that this document will be associated with :param: doc_id: the numerical identifier that will be used to identify the document on the index. """ - return self.get_client.index(index=index_name, id=doc_id, body=document, **kwargs) + return self.client.index(index=index_name, id=doc_id, body=document, **kwargs) def delete(self, index_name: str, query: dict | None = None, doc_id: int | None = None) -> Any: """ @@ -99,9 +99,9 @@ def delete(self, index_name: str, query: dict | None = None, doc_id: int | None if query is not None: if self.log_query: self.log.info("Deleting from %s using Query: %s", index_name, query) - return self.get_client.delete_by_query(index=index_name, body=query) + return self.client.delete_by_query(index=index_name, body=query) elif doc_id is not None: - return self.get_client.delete(index=index_name, id=doc_id) + return self.client.delete(index=index_name, id=doc_id) else: AirflowException("To delete a document you must include one of either a query or a document id. ") diff --git a/airflow/providers/opensearch/operators/opensearch.py b/airflow/providers/opensearch/operators/opensearch.py index 865c25a001649..08ab1dc0ce21b 100644 --- a/airflow/providers/opensearch/operators/opensearch.py +++ b/airflow/providers/opensearch/operators/opensearch.py @@ -84,7 +84,7 @@ def execute(self, context: Context) -> Any: raise AirflowException(e) elif self.search_object is not None: try: - result = self.search_object.using(self.hook.get_client).execute() + result = self.search_object.using(self.hook.client).execute() except OpenSearchException as e: raise AirflowException(e) else: @@ -129,7 +129,7 @@ def hook(self) -> OpenSearchHook: def execute(self, context: Context) -> Any: """Creates an index on an Open Search cluster.""" try: - self.hook.get_client.indices.create(index=self.index_name, body=self.index_body) + self.hook.client.indices.create(index=self.index_name, body=self.index_body) except OpenSearchException as e: raise AirflowException(e) @@ -175,8 +175,8 @@ def execute(self, context: Context) -> Any: """Saves a document to a given index on an OpenSearch cluster.""" if self.doc_class is not None: try: - doc = self.doc_class.init(using=self.hook.get_client) - result = doc.save(using=self.hook.get_client) + doc = self.doc_class.init(using=self.hook.client) + result = doc.save(using=self.hook.client) except OpenSearchException as e: raise AirflowException(e) elif self.index_name is not None and self.document is not None and self.doc_id is not None: diff --git a/tests/providers/opensearch/conftest.py b/tests/providers/opensearch/conftest.py index 53c58ca85aacf..79f6fefa98f27 100644 --- a/tests/providers/opensearch/conftest.py +++ b/tests/providers/opensearch/conftest.py @@ -29,7 +29,7 @@ class MockSearch(OpenSearchHook): # Mock class to override the Hook for monkeypatching - def get_client(self) -> None: + def client(self) -> None: return None def search(self, query: dict, index_name: str, **kwargs: Any) -> Any: From 6f40db43c72a31692446c5941cac680a15ef48de Mon Sep 17 00:00:00 2001 From: cjames23 Date: Fri, 13 Oct 2023 23:51:16 -0700 Subject: [PATCH 27/30] Remove provider.yaml changes from PR --- airflow/providers/amazon/provider.yaml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index d33843853ebdd..74ada2335556b 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -73,9 +73,9 @@ dependencies: - apache-airflow-providers-http # We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number # of candidates to consider. We should also make sure that all the below related packages have also the - # same minimum version specified. Boto3 1.28.0 has been released on July 6, 2023. We should also make - # sure we set it to the version that `aiobotocore` supports (see `aiobotocore` optional dependency at - # the end of this file). Currently, we set aiobotocore as minimum 2.5.3 - as this it was the first version + # same minimum version specified. Boto3 1.28.0 has been released on July 6 2023. We should also make sure we + # set it to the version that `aiobotocore` supports (see `aiobotocore` optional dependency at the end + # of this file). Currently we set aiobotocore as minimum 2.5.3 - as this is was the first version # that supported boto3 1.28. NOTE!!! BOTOCORE VERSIONS ARE SHIFTED BY 3 MINOR VERSIONS # NOTE!!! Make sure to update _MIN_BOTO3_VERSION in setup.py when you update it here - boto3>=1.28.0 @@ -88,7 +88,6 @@ dependencies: - sqlalchemy_redshift>=0.8.6 - asgiref - integrations: - integration-name: Amazon Athena external-doc-url: https://aws.amazon.com/athena/ @@ -291,7 +290,6 @@ integrations: - /docs/apache-airflow-providers-amazon/operators/appflow.rst tags: [aws] - operators: - integration-name: Amazon Athena python-modules: @@ -544,7 +542,6 @@ hooks: python-modules: - airflow.providers.amazon.aws.hooks.appflow - triggers: - integration-name: Amazon Web Services python-modules: @@ -692,7 +689,6 @@ connection-types: - hook-class-name: airflow.providers.amazon.aws.hooks.redshift_sql.RedshiftSQLHook connection-type: redshift - notifications: - airflow.providers.amazon.aws.notifications.chime.ChimeNotifier - airflow.providers.amazon.aws.notifications.sns.SnsNotifier From 475232cd3532bbbefb648a5de874cafb064486a4 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sat, 14 Oct 2023 01:07:14 -0700 Subject: [PATCH 28/30] Fix OpenSearch provider conf.test client ref --- tests/providers/opensearch/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/providers/opensearch/conftest.py b/tests/providers/opensearch/conftest.py index 79f6fefa98f27..f6f5f027c6c80 100644 --- a/tests/providers/opensearch/conftest.py +++ b/tests/providers/opensearch/conftest.py @@ -42,7 +42,7 @@ def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> @pytest.fixture def mock_hook(monkeypatch): monkeypatch.setattr(OpenSearchHook, "search", MockSearch.search) - monkeypatch.setattr(OpenSearchHook, "get_client", MockSearch.get_client) + monkeypatch.setattr(OpenSearchHook, "get_client", MockSearch.client) monkeypatch.setattr(OpenSearchHook, "index", MockSearch.index) From dd1a47fcf95bd5383631fdb1056cf5659e25c739 Mon Sep 17 00:00:00 2001 From: cjames23 Date: Sat, 14 Oct 2023 14:41:34 -0700 Subject: [PATCH 29/30] Fix breeze command images --- images/breeze/output_build-docs.svg | 59 ++++++++-------- ...release-management_add-back-references.svg | 8 +-- ...ement_generate-issue-content-providers.svg | 8 +-- ...agement_prepare-provider-documentation.svg | 7 +- ...e-management_prepare-provider-packages.svg | 7 +- ...output_release-management_publish-docs.svg | 68 ++++++++++--------- images/breeze/output_sbom.svg | 6 +- ...t_sbom_generate-providers-requirements.svg | 41 +++++------ 8 files changed, 101 insertions(+), 103 deletions(-) diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index 651072d97cc2d..71e474fd5caf4 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -1,4 +1,4 @@ - +