From bbf5b758598f99d79b42b4851f6f3ad61b397f3d Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Tue, 18 Jun 2024 18:36:12 -0400 Subject: [PATCH 01/22] Add Power BI operator that refreshes the powerbi dataset Add Power BI integration to the provider.yaml --- .../microsoft/azure/hooks/powerbi.py | 323 ++++++++++++++++++ .../microsoft/azure/operators/powerbi.py | 200 +++++++++++ .../providers/microsoft/azure/provider.yaml | 12 + .../microsoft/azure/hooks/test_powerbi.py | 277 +++++++++++++++ .../microsoft/azure/operators/test_powerbi.py | 261 ++++++++++++++ .../azure/example_dataset_refresh.py | 87 +++++ 6 files changed, 1160 insertions(+) create mode 100644 airflow/providers/microsoft/azure/hooks/powerbi.py create mode 100644 airflow/providers/microsoft/azure/operators/powerbi.py create mode 100644 tests/providers/microsoft/azure/hooks/test_powerbi.py create mode 100644 tests/providers/microsoft/azure/operators/test_powerbi.py create mode 100644 tests/system/providers/microsoft/azure/example_dataset_refresh.py diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py new file mode 100644 index 0000000000000..d547958efef0e --- /dev/null +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -0,0 +1,323 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import time +from enum import Enum +from typing import Any, Callable + +import requests +from azure.identity import ClientSecretCredential + +from airflow.exceptions import AirflowException +from airflow.hooks.base import BaseHook + + +class PowerBIDatasetRefreshFields(Enum): + """Power BI refresh dataset details.""" + + REQUEST_ID = "request_id" + STATUS = "status" + END_TIME = "end_time" + ERROR = "error" + + +class PowerBIDatasetRefreshStatus: + """Power BI refresh dataset statuses.""" + + # If the completion state is unknown or a refresh is in progress. + IN_PROGRESS = "In Progress" + FAILED = "Failed" + COMPLETED = "Completed" + DISABLED = "Disabled" + + TERMINAL_STATUSES = {FAILED, COMPLETED} + + +class PowerBIDatasetRefreshException(AirflowException): + """An exception that indicates a dataset refresh failed to complete.""" + + +class PowerBIHook(BaseHook): + """ + A hook to interact with Power BI. + + :param powerbi_conn_id: Airflow Connection ID that contains the connection + information for the Power BI account used for authentication. + """ + + conn_type: str = "powerbi" + conn_name_attr: str = "powerbi_conn_id" + default_conn_name: str = "powerbi_default" + hook_name: str = "Power BI" + + @classmethod + def get_connection_form_widgets(cls) -> dict[str, Any]: + """Return connection widgets to add to connection form.""" + from flask_appbuilder.fieldwidgets import BS3TextFieldWidget + from flask_babel import lazy_gettext + from wtforms import StringField + + return { + "tenant_id": StringField(lazy_gettext("Tenant ID"), widget=BS3TextFieldWidget()), + } + + @classmethod + def get_ui_field_behaviour(cls) -> dict[str, Any]: + """Return custom field behaviour.""" + return { + "hidden_fields": ["schema", "port", "host", "extra"], + "relabeling": { + "login": "Client ID", + "password": "Secret", + }, + } + + def __init__( + self, + *, + powerbi_conn_id: str = default_conn_name, + ): + self.conn_id = powerbi_conn_id + self._api_version = "v1.0" + self._base_url = "https://api.powerbi.com" + super().__init__() + + def refresh_dataset(self, dataset_id: str, group_id: str) -> str: + """ + Triggers a refresh for the specified dataset from the given group id. + + :param dataset_id: The dataset id. + :param group_id: The workspace id. + + :return: Request id of the dataset refresh request. + """ + url = f"{self._base_url}/{self._api_version}/myorg" + + # add the group id if it is specified + url += f"/groups/{group_id}" + + # add the dataset key + url += f"/datasets/{dataset_id}/refreshes" + + response = self._send_request("POST", url=url) + + if response.ok: + request_id = response.headers["RequestId"] + return request_id + + raise PowerBIDatasetRefreshException( + "Failed to trigger dataset refresh. Status code: %s", str(response.status_code) + ) + + def _get_token(self) -> str: + """Retrieve the access token used to authenticate against the API.""" + conn = self.get_connection(self.conn_id) + extras = conn.extra_dejson + print(extras) + tenant = extras.get("tenant_id", None) + + if not conn.login or not conn.password: + raise ValueError("A Client ID and Secret is required to authenticate with Power BI.") + + if not tenant: + raise ValueError("A Tenant ID is required when authenticating with Client ID and Secret.") + + credential = ClientSecretCredential( + client_id=conn.login, client_secret=conn.password, tenant_id=tenant + ) + + resource = "https://analysis.windows.net/powerbi/api" + + access_token = credential.get_token(f"{resource}/.default") + + return access_token.token + + def get_refresh_history( + self, + dataset_id: str, + group_id: str, + ) -> list[dict[str, str]]: + """ + Retrieve the refresh history of the specified dataset from the given group ID. + + :param dataset_id: The dataset ID. + :param group_id: The workspace ID. + + :return: Dictionary containing all the refresh histories of the dataset. + """ + url = f"{self._base_url}/{self._api_version}/myorg" + + # add the group id + url += f"/groups/{group_id}" + + # add the dataset id + url += f"/datasets/{dataset_id}/refreshes" + + raw_response = self._send_request("GET", url=url) + + if raw_response.ok: + response = raw_response.json() + refresh_histories = response.get("value") + return [self.raw_to_refresh_details(refresh_history) for refresh_history in refresh_histories] + + raise PowerBIDatasetRefreshException( + "Failed to retrieve refresh history. Status code: %s", str(response.status_code) + ) + + def raw_to_refresh_details(self, refresh_details: dict) -> dict[str, str]: + """ + Convert raw refresh details into a dictionary containing required fields. + + :param refresh_details: Raw object of refresh details. + """ + return { + PowerBIDatasetRefreshFields.REQUEST_ID.value: str(refresh_details.get("requestId")), + PowerBIDatasetRefreshFields.STATUS.value: ( + "In Progress" + if str(refresh_details.get("status")) == "Unknown" + else str(refresh_details.get("status")) + ), + PowerBIDatasetRefreshFields.END_TIME.value: str(refresh_details.get("endTime")), + PowerBIDatasetRefreshFields.ERROR.value: str(refresh_details.get("serviceExceptionJson")), + } + + def get_latest_refresh_details(self, dataset_id: str, group_id: str) -> dict[str, str] | None: + """ + Get the refresh details of the most recent dataset refresh in the refresh history of the data source. + + :return: Dictionary containing refresh status and end time if refresh history exists, otherwise None. + """ + history = self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) + + if len(history) == 0: + return None + + refresh_details = history[0] + return refresh_details + + def get_refresh_details_by_request_id(self, dataset_id: str, group_id: str, request_id) -> dict[str, str]: + """ + Get the refresh details of the given request Id. + + :param request_id: Request Id of the Dataset refresh. + """ + refresh_histories = self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) + + if len(refresh_histories) == 0: + raise PowerBIDatasetRefreshException( + f"Unable to fetch the details of dataset refresh with Request Id: {request_id}" + ) + + request_ids = [ + refresh_history.get(PowerBIDatasetRefreshFields.REQUEST_ID.value) + for refresh_history in refresh_histories + ] + + if request_id not in request_ids: + raise PowerBIDatasetRefreshException( + f"Unable to fetch the details of dataset refresh with Request Id: {request_id}" + ) + + request_id_index = request_ids.index(request_id) + refresh_details = refresh_histories[request_id_index] + + return refresh_details + + def wait_for_dataset_refresh_status( + self, + *, + expected_status: str, + request_id: str, + dataset_id: str, + group_id: str, + check_interval: int = 60, + timeout: int = 60 * 60 * 24 * 7, + ) -> bool: + """ + Wait until the dataset refresh of given request ID has reached the expected status. + + :param expected_status: The desired status to check against a dataset refresh's current status. + :param request_id: Request id for the dataset refresh request. + :param check_interval: Time in seconds to check on a dataset refresh's status. + :param timeout: Time in seconds to wait for a dataset to reach a terminal status or the expected status. + :return: Boolean indicating if the dataset refresh has reached the ``expected_status`` before the timeout. + """ + dataset_refresh_details = self.get_refresh_details_by_request_id( + dataset_id=dataset_id, group_id=group_id, request_id=request_id + ) + dataset_refresh_status = dataset_refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) + + start_time = time.monotonic() + + while ( + dataset_refresh_status not in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES + and dataset_refresh_status != expected_status + ): + # Check if the dataset-refresh duration has exceeded the ``timeout`` configured. + if start_time + timeout < time.monotonic(): + raise PowerBIDatasetRefreshException( + f"Dataset refresh has not reached a terminal status after {timeout} seconds" + ) + + time.sleep(check_interval) + + dataset_refresh_details = self.get_refresh_details_by_request_id( + dataset_id=dataset_id, group_id=group_id, request_id=request_id + ) + dataset_refresh_status = dataset_refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) + + return dataset_refresh_status == expected_status + + def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> str: + """ + Triggers the Power BI dataset refresh. + + :param dataset_id: The dataset ID. + :param group_id: The workspace ID. + + :return: Request ID of the dataset refresh request. + """ + # Start dataset refresh + self.log.info("Starting dataset refresh.") + request_id = self.refresh_dataset(dataset_id=dataset_id, group_id=group_id) + + return request_id + + def _send_request(self, request_type: str, url: str, **kwargs) -> requests.Response: + """ + Send a request to the Power BI REST API. + + :param request_type: The type of the request (GET, POST, PUT, etc.). + :param url: The URL against which the request needs to be made. + :param kwargs: Additional keyword arguments to be passed to the request function. + :return: The response object returned by the request. + :raises requests.HTTPError: If the request fails (e.g., non-2xx status code). + """ + self.header: dict[str, str] = {} + + request_funcs: dict[str, Callable[..., requests.Response]] = { + "GET": requests.get, + "POST": requests.post, + } + + func: Callable[..., requests.Response] = request_funcs[request_type.upper()] + + response = func(url=url, headers={"Authorization": f"Bearer {self._get_token()}"}, **kwargs) + + return response diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py new file mode 100644 index 0000000000000..9c26a4b6048c5 --- /dev/null +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -0,0 +1,200 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from functools import cached_property +from typing import TYPE_CHECKING, Sequence + +from airflow.configuration import conf +from airflow.models import BaseOperator, BaseOperatorLink +from airflow.providers.microsoft.azure.hooks.powerbi import ( + PowerBIDatasetRefreshException, + PowerBIDatasetRefreshFields, + PowerBIDatasetRefreshStatus, + PowerBIHook, +) + +if TYPE_CHECKING: + from airflow.models.taskinstancekey import TaskInstanceKey + from airflow.utils.context import Context + + +class PowerBILink(BaseOperatorLink): + """Construct a link to monitor a dataset in Power BI.""" + + name = "Monitor PowerBI Dataset" + + def get_link(self, operator: BaseOperator, *, ti_key: TaskInstanceKey): + url = ( + f"https://app.powerbi.com" # type: ignore[attr-defined] + f"/groups/{operator.group_id}/datasets/{operator.dataset_id}" # type: ignore[attr-defined] + f"/details?experience=power-bi" + ) + + return url + + +class PowerBIDatasetRefreshOperator(BaseOperator): + """ + Refreshes a Power BI dataset. + + By default the operator will wait until the refresh has completed before + exiting. The refresh status is checked every 60 seconds as a default. This + can be changed by specifying a new value for `check_interval`. + + :param dataset_id: The dataset id. + :param group_id: The workspace id. + :param wait_for_termination: Wait until the pre-existing or current triggered refresh completes before exiting. + :param force_refresh: Force refresh if pre-existing refresh found. + :param powerbi_conn_id: Airflow Connection ID that contains the connection + information for the Power BI account used for authentication. + :param timeout: Time in seconds to wait for a dataset to reach a terminal status for non-asynchronous waits. Used only if ``wait_for_termination`` is True. + :param check_interval: Number of seconds to wait before rechecking the + refresh status. + """ + + template_fields: Sequence[str] = ( + "dataset_id", + "group_id", + ) + template_fields_renderers = {"parameters": "json"} + + operator_extra_links = (PowerBILink(),) + + def __init__( + self, + *, # Indicates all the following parameters must be specified using keyword arguments. + dataset_id: str, + group_id: str, + wait_for_termination: bool = True, + force_refresh: bool = False, + powerbi_conn_id: str = PowerBIHook.default_conn_name, + timeout: int = 60 * 60 * 24 * 7, + check_interval: int = 60, + deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False), + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.dataset_id = dataset_id + self.group_id = group_id + self.wait_for_termination = wait_for_termination + self.force_refresh = force_refresh + self.powerbi_conn_id = powerbi_conn_id + self.timeout = timeout + self.check_interval = check_interval + self.deferrable = deferrable + + @cached_property + def hook(self) -> PowerBIHook: + """Create and return an PowerBIHook (cached).""" + return PowerBIHook(powerbi_conn_id=self.powerbi_conn_id) + + def execute(self, context: Context): + """Refresh the Power BI Dataset.""" + self.log.info("Check if a refresh is already in progress.") + refresh_details = self.hook.get_latest_refresh_details( + dataset_id=self.dataset_id, group_id=self.group_id + ) + + if ( + refresh_details is None + or refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) + in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES + ): + self.log.info("No pre-existing refresh found.") + request_id = self.hook.trigger_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, + ) + + if self.wait_for_termination: + self.log.info("Waiting for dataset refresh to terminate.") + if self.hook.wait_for_dataset_refresh_status( + request_id=request_id, + dataset_id=self.dataset_id, + group_id=self.group_id, + expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + ): + self.log.info("Dataset refresh %s has completed successfully.", request_id) + else: + raise PowerBIDatasetRefreshException( + f"Dataset refresh {request_id} has failed or has been cancelled." + ) + else: + # If in-progress pre-existing refresh is found. + if ( + refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) + == PowerBIDatasetRefreshStatus.IN_PROGRESS + ): + request_id = str(refresh_details.get(PowerBIDatasetRefreshFields.REQUEST_ID.value)) + self.log.info("Found pre-existing dataset refresh request: %s.", request_id) + + if self.force_refresh or self.wait_for_termination: + self.log.info("Waiting for dataset refresh %s to terminate.", request_id) + if self.hook.wait_for_dataset_refresh_status( + request_id=request_id, + dataset_id=self.dataset_id, + group_id=self.group_id, + expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + check_interval=self.check_interval, + timeout=self.timeout, + ): + self.log.info( + "Pre-existing dataset refresh %s has completed successfully.", request_id + ) + else: + raise PowerBIDatasetRefreshException( + f"Pre-exisintg dataset refresh {request_id} has failed or has been cancelled." + ) + + if self.force_refresh: + self.log.info("Starting new refresh.") + request_id = self.hook.trigger_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, + ) + + if self.wait_for_termination: + self.log.info("Waiting for dataset refresh to terminate.") + if self.hook.wait_for_dataset_refresh_status( + request_id=request_id, + dataset_id=self.dataset_id, + group_id=self.group_id, + expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + ): + self.log.info("Dataset refresh %s has completed successfully.", request_id) + else: + raise PowerBIDatasetRefreshException( + f"Dataset refresh {request_id} has failed or has been cancelled." + ) + + # Retrieve refresh details after triggering refresh + refresh_details = self.hook.get_refresh_details_by_request_id( + dataset_id=self.dataset_id, group_id=self.group_id, request_id=request_id + ) + + request_id = str(refresh_details.get(PowerBIDatasetRefreshFields.REQUEST_ID.value)) + status = str(refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value)) + end_time = str(refresh_details.get(PowerBIDatasetRefreshFields.END_TIME.value)) + error = str(refresh_details.get(PowerBIDatasetRefreshFields.ERROR.value)) + + # Xcom Integration + context["ti"].xcom_push(key="powerbi_dataset_refresh_id", value=request_id) + context["ti"].xcom_push(key="powerbi_dataset_refresh_status", value=status) + context["ti"].xcom_push(key="powerbi_dataset_refresh_end_time", value=end_time) + context["ti"].xcom_push(key="powerbi_dataset_refresh_error", value=error) diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index 3700227b9b160..44f5515566691 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -175,6 +175,9 @@ integrations: how-to-guide: - /docs/apache-airflow-providers-microsoft-azure/operators/msgraph.rst tags: [azure] + - integration-name: Microsoft Power BI + external-doc-url: https://learn.microsoft.com/en-us/rest/api/power-bi/ + tags: [azure] operators: - integration-name: Microsoft Azure Data Lake Storage @@ -207,6 +210,9 @@ operators: - integration-name: Microsoft Graph API python-modules: - airflow.providers.microsoft.azure.operators.msgraph + - integration-name: Microsoft Power BI + python-modules: + - airflow.providers.microsoft.azure.operators.powerbi sensors: - integration-name: Microsoft Azure Cosmos DB @@ -267,6 +273,9 @@ hooks: - integration-name: Microsoft Graph API python-modules: - airflow.providers.microsoft.azure.hooks.msgraph + - integration-name: Microsoft Power BI + python-modules: + - airflow.providers.microsoft.azure.hooks.powerbi triggers: - integration-name: Microsoft Azure Data Factory @@ -333,6 +342,8 @@ connection-types: connection-type: azure_synapse - hook-class-name: airflow.providers.microsoft.azure.hooks.data_lake.AzureDataLakeStorageV2Hook connection-type: adls + - hook-class-name: airflow.providers.microsoft.azure.hooks.powerbi.PowerBIHook + connection-type: powerbi secrets-backends: - airflow.providers.microsoft.azure.secrets.key_vault.AzureKeyVaultBackend @@ -343,6 +354,7 @@ logging: extra-links: - airflow.providers.microsoft.azure.operators.data_factory.AzureDataFactoryPipelineRunLink - airflow.providers.microsoft.azure.operators.synapse.AzureSynapsePipelineRunLink + - airflow.providers.microsoft.azure.operators.powerbi.PowerBILink config: azure_remote_logging: diff --git a/tests/providers/microsoft/azure/hooks/test_powerbi.py b/tests/providers/microsoft/azure/hooks/test_powerbi.py new file mode 100644 index 0000000000000..9036243aac6ee --- /dev/null +++ b/tests/providers/microsoft/azure/hooks/test_powerbi.py @@ -0,0 +1,277 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest import mock +from unittest.mock import MagicMock + +import pytest + +from airflow.models.connection import Connection +from airflow.providers.microsoft.azure.hooks.powerbi import ( + PowerBIDatasetRefreshException, + PowerBIDatasetRefreshFields, + PowerBIDatasetRefreshStatus, + PowerBIHook, +) + +DEFAULT_CONNECTION_CLIENT_SECRET = "powerbi_conn_id" +MODULE = "airflow.providers.microsoft.azure.hooks.powerbi" +CLIENT_ID = "client_id" +CLIENT_SECRET = "client_secret" +TENANT_ID = "tenant_id" +BASE_URL = "https://api.powerbi.com" +API_VERSION = "v1.0" +GROUP_ID = "group_id" +DATASET_ID = "dataset_id" + +API_RAW_RESPONSE = { + "value": [ + # Completed refresh + { + "requestId": "5e2d9921-e91b-491f-b7e1-e7d8db49194c", + "status": "Completed", + "endTime": "2024-04-15T20:14:08.1458221Z", + # serviceExceptionJson is not present when status is not "Failed" + }, + # In-progress refresh + { + "requestId": "6b6536c1-cfcb-4148-9c21-402c3f5241e4", + "status": "Unknown", # endtime is not available. + }, + # Failed refresh + { + "requestId": "11bf290a-346b-48b7-8973-c5df149337ff", + "status": "Failed", + "endTime": "2024-04-15T20:14:08.1458221Z", + "serviceExceptionJson": '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', + }, + ] +} + +FORMATTED_RESPONSE = [ + # Completed refresh + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + PowerBIDatasetRefreshFields.ERROR.value: "None", + }, + # In-progress refresh + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "6b6536c1-cfcb-4148-9c21-402c3f5241e4", + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.IN_PROGRESS, + PowerBIDatasetRefreshFields.END_TIME.value: "None", + PowerBIDatasetRefreshFields.ERROR.value: "None", + }, + # Failed refresh + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "11bf290a-346b-48b7-8973-c5df149337ff", + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.FAILED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + PowerBIDatasetRefreshFields.ERROR.value: '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', + }, +] + + +@pytest.fixture +def powerbi_hook(): + client = PowerBIHook(powerbi_conn_id=DEFAULT_CONNECTION_CLIENT_SECRET) + return client + + +@pytest.fixture +def get_token(powerbi_hook): + powerbi_hook._get_token = MagicMock(return_value="access_token") + return powerbi_hook._get_token() + + +def test_get_token_with_missing_credentials(powerbi_hook): + # Mock the get_connection method to return a connection with missing credentials + powerbi_hook.get_connection = MagicMock( + return_value=Connection( + conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, + conn_type="powerbi", + login=None, + password=None, + extra={ + "tenant_id": TENANT_ID, + }, + ) + ) + + with pytest.raises(ValueError): + powerbi_hook._get_token() + + +def test_get_token_with_missing_tenant_id(powerbi_hook): + # Mock the get_connection method to return a connection with missing tenant ID + powerbi_hook.get_connection = MagicMock( + return_value=Connection( + conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, + conn_type="powerbi", + login=CLIENT_ID, + password=CLIENT_SECRET, + extra={}, + ) + ) + + with pytest.raises(ValueError): + powerbi_hook._get_token() + + +@mock.patch(f"{MODULE}.ClientSecretCredential") +def test_get_token_with_valid_credentials(mock_credential, powerbi_hook): + # Mock the get_connection method to return a connection with valid credentials + powerbi_hook.get_connection = MagicMock( + return_value=Connection( + conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, + conn_type="powerbi", + login=CLIENT_ID, + password=CLIENT_SECRET, + extra={ + "tenant_id": TENANT_ID, + }, + ) + ) + + token = powerbi_hook._get_token() + mock_credential.assert_called() + + assert token is not None + + +def test_refresh_dataset(powerbi_hook, requests_mock, get_token): + request_id = "request_id" + + # Mock the request in _send_request method to return a successful response + requests_mock.post( + f"{BASE_URL}/{API_VERSION}/myorg/groups/{GROUP_ID}/datasets/{DATASET_ID}/refreshes", + status_code=202, + headers={"Authorization": f"Bearer {get_token}", "RequestId": request_id}, + ) + + result = powerbi_hook.refresh_dataset(dataset_id=DATASET_ID, group_id=GROUP_ID) + + assert result == request_id + + +def test_get_refresh_history_success(powerbi_hook, requests_mock, get_token): + url = f"{BASE_URL}/{API_VERSION}/myorg/groups/{GROUP_ID}/datasets/{DATASET_ID}/refreshes" + + requests_mock.get( + url, json=API_RAW_RESPONSE, headers={"Authorization": f"Bearer {get_token}"}, status_code=200 + ) + + result = powerbi_hook.get_refresh_history(DATASET_ID, GROUP_ID) + + assert len(result) == 3 + assert result == FORMATTED_RESPONSE + + +def test_get_latest_refresh_details_with_no_history(powerbi_hook): + # Mock the get_refresh_history method to return an empty list + powerbi_hook.get_refresh_history = MagicMock(return_value=[]) + + result = powerbi_hook.get_latest_refresh_details(dataset_id=DATASET_ID, group_id=GROUP_ID) + + assert result is None + + +def test_get_latest_refresh_details_with_history(powerbi_hook): + # Mock the get_refresh_history method to return a list with refresh details + refresh_history = FORMATTED_RESPONSE + powerbi_hook.get_refresh_history = MagicMock(return_value=refresh_history) + + result = powerbi_hook.get_latest_refresh_details(dataset_id=DATASET_ID, group_id=GROUP_ID) + + assert result == FORMATTED_RESPONSE[0] + + +def test_get_refresh_details_by_request_id(powerbi_hook): + # Mock the get_refresh_history method to return a list of refresh histories + refresh_histories = FORMATTED_RESPONSE + powerbi_hook.get_refresh_history = MagicMock(return_value=refresh_histories) + + # Call the function with a valid request ID + request_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" + result = powerbi_hook.get_refresh_details_by_request_id( + dataset_id=DATASET_ID, group_id=GROUP_ID, request_id=request_id + ) + + # Assert that the correct refresh details are returned + assert result == { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", + PowerBIDatasetRefreshFields.STATUS.value: "Completed", + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + PowerBIDatasetRefreshFields.ERROR.value: "None", + } + + # Call the function with an invalid request ID + invalid_request_id = "invalid_request_id" + with pytest.raises(PowerBIDatasetRefreshException): + powerbi_hook.get_refresh_details_by_request_id( + dataset_id=DATASET_ID, group_id=GROUP_ID, request_id=invalid_request_id + ) + + +_wait_for_dataset_refresh_status_test_args = [ + (PowerBIDatasetRefreshStatus.COMPLETED, PowerBIDatasetRefreshStatus.COMPLETED, True), + (PowerBIDatasetRefreshStatus.FAILED, PowerBIDatasetRefreshStatus.COMPLETED, False), + (PowerBIDatasetRefreshStatus.IN_PROGRESS, PowerBIDatasetRefreshStatus.COMPLETED, "timeout"), +] + + +@pytest.mark.parametrize( + argnames=("dataset_refresh_status", "expected_status", "expected_result"), + argvalues=_wait_for_dataset_refresh_status_test_args, + ids=[ + f"refresh_status_{argval[0]}_expected_{argval[1]}" + for argval in _wait_for_dataset_refresh_status_test_args + ], +) +def test_wait_for_dataset_refresh_status( + powerbi_hook, dataset_refresh_status, expected_status, expected_result +): + config = { + "dataset_id": DATASET_ID, + "group_id": GROUP_ID, + "request_id": "5e2d9921-e91b-491f-b7e1-e7d8db49194c", + "timeout": 3, + "check_interval": 1, + "expected_status": expected_status, + } + + # Mock the get_refresh_details_by_request_id method to return a dataset refresh details + dataset_refresh_details = {PowerBIDatasetRefreshFields.STATUS.value: dataset_refresh_status} + powerbi_hook.get_refresh_details_by_request_id = MagicMock(return_value=dataset_refresh_details) + + if expected_result != "timeout": + assert powerbi_hook.wait_for_dataset_refresh_status(**config) == expected_result + else: + with pytest.raises(PowerBIDatasetRefreshException): + powerbi_hook.wait_for_dataset_refresh_status(**config) + + +def test_trigger_dataset_refresh(powerbi_hook): + # Mock the refresh_dataset method to return a request ID + powerbi_hook.refresh_dataset = MagicMock(return_value="request_id") + + # Assert trigger_dataset_refresh raises an exception. + response = powerbi_hook.trigger_dataset_refresh(dataset_id=DATASET_ID, group_id=GROUP_ID) + + assert response == "request_id" diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py new file mode 100644 index 0000000000000..d7b01c056ed57 --- /dev/null +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -0,0 +1,261 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from unittest.mock import MagicMock, call + +import pytest + +from airflow.providers.microsoft.azure.hooks.powerbi import ( + PowerBIDatasetRefreshException, + PowerBIDatasetRefreshFields, + PowerBIDatasetRefreshStatus, + PowerBIHook, +) +from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator + +DEFAULT_CONNECTION_CLIENT_SECRET = "powerbi_conn_id" +TASK_ID = "run_powerbi_operator" +GROUP_ID = "group_id" +DATASET_ID = "dataset_id" +CONFIG = { + "task_id": TASK_ID, + "powerbi_conn_id": DEFAULT_CONNECTION_CLIENT_SECRET, + "group_id": GROUP_ID, + "dataset_id": DATASET_ID, + "check_interval": 1, + "timeout": 3, +} + +# Sample responses from PowerBI API +COMPLETED_REFRESH_DETAILS = { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + # serviceExceptionJson is not present when status is not "Failed" +} + +FAILED_REFRESH_DETAILS = { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "11bf290a-346b-48b7-8973-c5df149337ff", + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.FAILED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + PowerBIDatasetRefreshFields.ERROR.value: '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', +} + +IN_PROGRESS_REFRESH_DETAILS = { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "6b6536c1-cfcb-4148-9c21-402c3f5241e4", + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.IN_PROGRESS, # endtime is not available. +} + + +@pytest.fixture +def mock_powerbi_hook(): + hook = PowerBIHook() + return hook + + +# Test cases: refresh_details returns None, Terminal Status, In-progress Status +_get_latest_refresh_details_args = [ + (None), + COMPLETED_REFRESH_DETAILS, + FAILED_REFRESH_DETAILS, + IN_PROGRESS_REFRESH_DETAILS, +] + + +@pytest.mark.parametrize( + argnames=("latest_refresh_details"), + argvalues=_get_latest_refresh_details_args, + ids=[ + ( + f"latest_refresh_status_{argval[PowerBIDatasetRefreshFields.STATUS.value]}_no_wait_for_termination" + if argval is not None + else "latest_refresh_status_None_no_wait_for_termination" + ) + for argval in _get_latest_refresh_details_args + ], +) +def test_execute_no_wait_for_termination(mock_powerbi_hook, latest_refresh_details): + operator = PowerBIDatasetRefreshOperator( + wait_for_termination=False, + force_refresh=False, + **CONFIG, + ) + operator.hook = mock_powerbi_hook + context = {"ti": MagicMock()} + new_refresh_request_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" + mock_powerbi_hook.get_latest_refresh_details = MagicMock(return_value=latest_refresh_details) + mock_powerbi_hook.trigger_dataset_refresh = MagicMock(return_value=new_refresh_request_id) + mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock( + return_value={ + PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + # serviceExceptionJson is not present when status is not "Failed" + } + ) + mock_powerbi_hook.wait_for_dataset_refresh_status = MagicMock(return_value=True) + operator.execute(context) + + if ( + latest_refresh_details is None + or latest_refresh_details[PowerBIDatasetRefreshFields.STATUS.value] + in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES + ): + assert mock_powerbi_hook.get_latest_refresh_details.called + assert mock_powerbi_hook.trigger_dataset_refresh.called + else: + assert not mock_powerbi_hook.trigger_dataset_refresh.called + + assert not mock_powerbi_hook.wait_for_dataset_refresh_status.called + assert mock_powerbi_hook.get_refresh_details_by_request_id.called + assert context["ti"].xcom_push.call_count == 4 + assert context["ti"].xcom_push.call_args_list == [ + call(key="powerbi_dataset_refresh_id", value=new_refresh_request_id), + call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), + call(key="powerbi_dataset_refresh_end_time", value="2024-04-15T20:14:08.1458221Z"), + call(key="powerbi_dataset_refresh_error", value="None"), + ] + + +_get_wait_for_status_args = [(True), (False)] + + +@pytest.mark.parametrize( + argnames=("wait_for_status_return_value"), + argvalues=_get_wait_for_status_args, + ids=[f"wait_for_status_return_value_{argval}" for argval in _get_wait_for_status_args], +) +def test_execute_wait_for_termination_preexisting_refresh_going_on( + mock_powerbi_hook, wait_for_status_return_value +): + operator = PowerBIDatasetRefreshOperator( + wait_for_termination=True, + force_refresh=True, + **CONFIG, + ) + preexisting_refresh_request_id = "6b6536c1-cfcb-4148-9c21-402c3f5241e4" + new_refresh_request_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" + operator.hook = mock_powerbi_hook + context = {"ti": MagicMock()} + mock_powerbi_hook.get_latest_refresh_details = MagicMock( + return_value={ + PowerBIDatasetRefreshFields.REQUEST_ID.value: preexisting_refresh_request_id, + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.IN_PROGRESS, # endtime is not available. + } + ) + mock_powerbi_hook.trigger_dataset_refresh = MagicMock(return_value=new_refresh_request_id) + mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock( + return_value={ + PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + # serviceExceptionJson is not present when status is not "Failed" + } + ) + mock_powerbi_hook.wait_for_dataset_refresh_status = MagicMock(return_value=wait_for_status_return_value) + + if wait_for_status_return_value is False: + with pytest.raises(PowerBIDatasetRefreshException): + operator.execute(context) + assert not mock_powerbi_hook.trigger_dataset_refresh.called + else: + operator.execute(context) + assert mock_powerbi_hook.trigger_dataset_refresh.called + assert mock_powerbi_hook.get_refresh_details_by_request_id.called + assert mock_powerbi_hook.wait_for_dataset_refresh_status.call_count == 2 + assert context["ti"].xcom_push.call_count == 4 + assert context["ti"].xcom_push.call_args_list == [ + call(key="powerbi_dataset_refresh_id", value=new_refresh_request_id), + call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), + call(key="powerbi_dataset_refresh_end_time", value="2024-04-15T20:14:08.1458221Z"), + call(key="powerbi_dataset_refresh_error", value="None"), + ] + + +_get_wait_for_status_and_latest_refresh_details_args = [ + (True, None), + (False, None), + (True, COMPLETED_REFRESH_DETAILS), + (False, COMPLETED_REFRESH_DETAILS), + (True, FAILED_REFRESH_DETAILS), + (False, FAILED_REFRESH_DETAILS), +] + + +@pytest.mark.parametrize( + argnames=("wait_for_status_return_value", "latest_refresh_details"), + argvalues=_get_wait_for_status_and_latest_refresh_details_args, + ids=[ + ( + f"wait_for_status_detail_{argval[1][PowerBIDatasetRefreshFields.STATUS.value]}_return_value_{argval[0]}" + if argval[1] is not None + else f"wait_for_status_detail_None_return_value_{argval[0]}" + ) + for argval in _get_wait_for_status_and_latest_refresh_details_args + ], +) +def test_execute_wait_for_termination_no_preexisting_refresh( + mock_powerbi_hook, wait_for_status_return_value, latest_refresh_details +): + operator = PowerBIDatasetRefreshOperator( + wait_for_termination=True, + force_refresh=True, + **CONFIG, + ) + operator.hook = mock_powerbi_hook + context = {"ti": MagicMock()} + new_refresh_request_id = "11bf290a-346b-48b7-8973-c5df149337ff" + + # Magic mock the hook methods + mock_powerbi_hook.get_latest_refresh_details = MagicMock(return_value=latest_refresh_details) + mock_powerbi_hook.trigger_dataset_refresh = MagicMock(return_value=new_refresh_request_id) + mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock( + return_value={ + PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, + PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, + PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", + # serviceExceptionJson is not present when status is not "Failed" + } + ) + mock_powerbi_hook.wait_for_dataset_refresh_status = MagicMock(return_value=wait_for_status_return_value) + + # Act and assert + if wait_for_status_return_value is False: + with pytest.raises(PowerBIDatasetRefreshException): + operator.execute(context) + else: + operator.execute(context) + assert mock_powerbi_hook.trigger_dataset_refresh.called + assert mock_powerbi_hook.get_refresh_details_by_request_id.called + mock_powerbi_hook.wait_for_dataset_refresh_status.assert_called_once_with( + request_id=new_refresh_request_id, + dataset_id=DATASET_ID, + group_id=GROUP_ID, + expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + ) + assert context["ti"].xcom_push.call_count == 4 + assert context["ti"].xcom_push.call_args_list == [ + call( + key="powerbi_dataset_refresh_id", + value=new_refresh_request_id, + ), + call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), + call(key="powerbi_dataset_refresh_end_time", value="2024-04-15T20:14:08.1458221Z"), + call(key="powerbi_dataset_refresh_error", value="None"), + ] diff --git a/tests/system/providers/microsoft/azure/example_dataset_refresh.py b/tests/system/providers/microsoft/azure/example_dataset_refresh.py new file mode 100644 index 0000000000000..6c04d90d4c07d --- /dev/null +++ b/tests/system/providers/microsoft/azure/example_dataset_refresh.py @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from datetime import datetime, timedelta + +from airflow.models import DAG + +# Ignore missing args provided by default_args +# mypy: disable-error-code="call-arg" +from airflow.operators.empty import EmptyOperator +from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator +from airflow.utils.edgemodifier import Label + +DAG_ID = "example_powerbi_dataset_refresh" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 8, 13), + schedule="@daily", + catchup=False, + default_args={ + "retries": 1, + "retry_delay": timedelta(minutes=3), + }, + default_view="graph", +) as dag: + begin = EmptyOperator(task_id="begin") + end = EmptyOperator(task_id="end") + + # [START howto_operator_powerbi_refresh_dataset] + dataset_refresh = PowerBIDatasetRefreshOperator( + powerbi_conn_id="powerbi_default", + task_id="dataset_refresh", + dataset_id="dataset-id", + group_id="group-id", + ) + # [END howto_operator_powerbi_refresh_dataset] + + # [START howto_operator_powerbi_refresh_dataset_async] + dataset_refresh2 = PowerBIDatasetRefreshOperator( + powerbi_conn_id="powerbi_default", + task_id="dataset_refresh_async", + dataset_id="dataset-id", + group_id="group-id", + wait_for_termination=False, + ) + # [END howto_operator_powerbi_refresh_dataset_async] + + # [START howto_operator_powerbi_refresh_dataset_force_refresh] + dataset_refresh3 = PowerBIDatasetRefreshOperator( + powerbi_conn_id="powerbi_default", + task_id="dataset_refresh_force_refresh", + dataset_id="dataset-id", + group_id="group-id", + force_refresh=True, + ) + # [END howto_operator_powerbi_refresh_dataset_force_refresh] + + begin >> Label("No async wait") >> dataset_refresh + begin >> Label("Do async wait with force refresh") >> dataset_refresh2 + begin >> Label("Do async wait") >> dataset_refresh3 >> end + + from tests.system.utils.watcher import watcher + + # This test needs watcher in order to properly mark success/failure + # when "tearDown" task with trigger rule is part of the DAG + list(dag.tasks) >> watcher() + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag) From b0251cadc2e2d0a58ad6e26d697bf843dacb44f8 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 8 Jul 2024 17:25:06 -0400 Subject: [PATCH 02/22] Extend Power BI Operator to support async mode * Extend PowerBIHook call to msgraph operator * Add the trigger class to enable deffering * Enable cache token --- .../microsoft/azure/hooks/powerbi.py | 139 +++++++++++++--- .../microsoft/azure/operators/powerbi.py | 117 ++++++-------- .../providers/microsoft/azure/provider.yaml | 3 + .../microsoft/azure/triggers/powerbi.py | 150 ++++++++++++++++++ .../microsoft/azure/hooks/test_powerbi.py | 3 - .../microsoft/azure/operators/test_powerbi.py | 5 - 6 files changed, 317 insertions(+), 100 deletions(-) create mode 100644 airflow/providers/microsoft/azure/triggers/powerbi.py diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index d547958efef0e..053ae5babc7f8 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -17,15 +17,22 @@ from __future__ import annotations +import logging import time from enum import Enum -from typing import Any, Callable +from typing import TYPE_CHECKING, Any, Callable import requests from azure.identity import ClientSecretCredential from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook +from airflow.providers.microsoft.azure.hooks.msgraph import KiotaRequestAdapterHook + +if TYPE_CHECKING: + from azure.core.credentials import AccessToken + +logger = logging.getLogger(__name__) class PowerBIDatasetRefreshFields(Enum): @@ -33,7 +40,6 @@ class PowerBIDatasetRefreshFields(Enum): REQUEST_ID = "request_id" STATUS = "status" - END_TIME = "end_time" ERROR = "error" @@ -96,6 +102,7 @@ def __init__( self.conn_id = powerbi_conn_id self._api_version = "v1.0" self._base_url = "https://api.powerbi.com" + self.cached_access_token: dict[str, str | None | int] = {"access_token": None, "expiry_time": 0} super().__init__() def refresh_dataset(self, dataset_id: str, group_id: str) -> str: @@ -127,9 +134,14 @@ def refresh_dataset(self, dataset_id: str, group_id: str) -> str: def _get_token(self) -> str: """Retrieve the access token used to authenticate against the API.""" + access_token = self.cached_access_token.get("access_token") + expiry_time = self.cached_access_token.get("expiry_time") + + if access_token and isinstance(expiry_time, int) and expiry_time > time.time(): + return str(access_token) + conn = self.get_connection(self.conn_id) extras = conn.extra_dejson - print(extras) tenant = extras.get("tenant_id", None) if not conn.login or not conn.password: @@ -138,15 +150,19 @@ def _get_token(self) -> str: if not tenant: raise ValueError("A Tenant ID is required when authenticating with Client ID and Secret.") - credential = ClientSecretCredential( + credentials = ClientSecretCredential( client_id=conn.login, client_secret=conn.password, tenant_id=tenant ) - resource = "https://analysis.windows.net/powerbi/api" + resource = "https://analysis.windows.net/powerbi/api/.default" - access_token = credential.get_token(f"{resource}/.default") + raw_access_token: AccessToken = credentials.get_token(resource) - return access_token.token + self.cached_access_token = { + "access_token": raw_access_token.token, + "expiry_time": raw_access_token.expires_on, + } + return raw_access_token.token def get_refresh_history( self, @@ -193,7 +209,6 @@ def raw_to_refresh_details(self, refresh_details: dict) -> dict[str, str]: if str(refresh_details.get("status")) == "Unknown" else str(refresh_details.get("status")) ), - PowerBIDatasetRefreshFields.END_TIME.value: str(refresh_details.get("endTime")), PowerBIDatasetRefreshFields.ERROR.value: str(refresh_details.get("serviceExceptionJson")), } @@ -258,31 +273,26 @@ def wait_for_dataset_refresh_status( :param timeout: Time in seconds to wait for a dataset to reach a terminal status or the expected status. :return: Boolean indicating if the dataset refresh has reached the ``expected_status`` before the timeout. """ - dataset_refresh_details = self.get_refresh_details_by_request_id( - dataset_id=dataset_id, group_id=group_id, request_id=request_id - ) - dataset_refresh_status = dataset_refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) - start_time = time.monotonic() - while ( - dataset_refresh_status not in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES - and dataset_refresh_status != expected_status - ): - # Check if the dataset-refresh duration has exceeded the ``timeout`` configured. - if start_time + timeout < time.monotonic(): - raise PowerBIDatasetRefreshException( - f"Dataset refresh has not reached a terminal status after {timeout} seconds" - ) - - time.sleep(check_interval) - + while start_time + timeout > time.monotonic(): dataset_refresh_details = self.get_refresh_details_by_request_id( dataset_id=dataset_id, group_id=group_id, request_id=request_id ) dataset_refresh_status = dataset_refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) - return dataset_refresh_status == expected_status + if dataset_refresh_status in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES: + return dataset_refresh_status == expected_status + + logger.info( + "Current dataset refresh status is %s. Sleeping for %s", + dataset_refresh_status, + check_interval, + ) + time.sleep(check_interval) + + # Timeout reached + return False def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> str: """ @@ -321,3 +331,80 @@ def _send_request(self, request_type: str, url: str, **kwargs) -> requests.Respo response = func(url=url, headers={"Authorization": f"Bearer {self._get_token()}"}, **kwargs) return response + + +class PowerBIAsyncHook(PowerBIHook): + """ + A hook to interact with Power BI asynchronously. + + :param powerbi_conn_id: Airflow Connection ID that contains the connection + information for the Power BI account used for authentication. + """ + + default_conn_name: str = "powerbi_default" + + def __init__( + self, + *, + powerbi_conn_id: str = default_conn_name, + ): + self.powerbi_conn_id = powerbi_conn_id + self._api_version = "v1.0" + self.helper_hook = KiotaRequestAdapterHook( + conn_id=self.powerbi_conn_id, api_version=self._api_version + ) + super().__init__() + + async def get_dataset_refresh_status( + self, dataset_id: str, group_id: str, dataset_refresh_id: str + ) -> str: + """ + Retrieve the refresh status for the specified dataset refresh from the given group id. + + :param dataset_id: The dataset Id. + :param group_id: The workspace Id. + :param dataset_refresh_id: The dataset refresh Id. + + :return: Dataset refresh status. + """ + response = await self.helper_hook.run( + url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", + response_type=None, + path_parameters={ + "group_id": group_id, + "dataset_id": dataset_id, + }, + method="GET", + ) + # clean the raw refresh histories fetched from the API. + raw_refresh_histories = response.get("value") + clean_refresh_histories = [ + self.raw_to_refresh_details(refresh_history) for refresh_history in raw_refresh_histories + ] + + for refresh_history in clean_refresh_histories: + if refresh_history.get(PowerBIDatasetRefreshFields.REQUEST_ID.value) == dataset_refresh_id: + return str(refresh_history.get(PowerBIDatasetRefreshFields.STATUS.value, "Unknown")) + + raise PowerBIDatasetRefreshException( + f"Failed to retrieve the status of dataset refresh with Id: {dataset_refresh_id}" + ) + + async def cancel_dataset_refresh(self, dataset_id: str, group_id: str, dataset_refresh_id: str) -> None: + """ + Cancel the dataset refresh. + + :param dataset_id: The dataset Id. + :param group_id: The workspace Id. + :param dataset_refresh_id: The dataset refresh Id. + """ + await self.helper_hook.run( + url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes/{dataset_refresh_id}", + response_type=None, + path_parameters={ + "group_id": group_id, + "dataset_id": dataset_id, + "dataset_refresh_id": dataset_refresh_id, + }, + method="DELETE", + ) diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index 9c26a4b6048c5..ffd4fc21bd038 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -17,10 +17,13 @@ from __future__ import annotations +import time +import warnings from functools import cached_property -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING, Any, Sequence from airflow.configuration import conf +from airflow.exceptions import AirflowException from airflow.models import BaseOperator, BaseOperatorLink from airflow.providers.microsoft.azure.hooks.powerbi import ( PowerBIDatasetRefreshException, @@ -28,6 +31,7 @@ PowerBIDatasetRefreshStatus, PowerBIHook, ) +from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger if TYPE_CHECKING: from airflow.models.taskinstancekey import TaskInstanceKey @@ -106,29 +110,39 @@ def hook(self) -> PowerBIHook: def execute(self, context: Context): """Refresh the Power BI Dataset.""" - self.log.info("Check if a refresh is already in progress.") - refresh_details = self.hook.get_latest_refresh_details( - dataset_id=self.dataset_id, group_id=self.group_id + self.log.info("Executing Dataset refresh.") + request_id = self.hook.trigger_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, ) - if ( - refresh_details is None - or refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) - in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES - ): - self.log.info("No pre-existing refresh found.") - request_id = self.hook.trigger_dataset_refresh( - dataset_id=self.dataset_id, - group_id=self.group_id, - ) - - if self.wait_for_termination: + # Push Dataset Refresh ID to Xcom regardless of what happen durinh the refresh + context["ti"].xcom_push(key="powerbi_dataset_refresh_id", value=request_id) + + if self.wait_for_termination: + if self.deferrable: + end_time = time.time() + self.timeout + self.defer( + trigger=PowerBITrigger( + powerbi_conn_id=self.powerbi_conn_id, + group_id=self.group_id, + dataset_id=self.dataset_id, + dataset_refresh_id=request_id, + end_time=end_time, + check_interval=self.check_interval, + wait_for_termination=self.wait_for_termination, + ), + method_name=self.execute_complete.__name__, + ) + else: self.log.info("Waiting for dataset refresh to terminate.") if self.hook.wait_for_dataset_refresh_status( request_id=request_id, dataset_id=self.dataset_id, group_id=self.group_id, expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + check_interval=self.check_interval, + timeout=self.timeout, ): self.log.info("Dataset refresh %s has completed successfully.", request_id) else: @@ -136,65 +150,36 @@ def execute(self, context: Context): f"Dataset refresh {request_id} has failed or has been cancelled." ) else: - # If in-progress pre-existing refresh is found. - if ( - refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) - == PowerBIDatasetRefreshStatus.IN_PROGRESS - ): - request_id = str(refresh_details.get(PowerBIDatasetRefreshFields.REQUEST_ID.value)) - self.log.info("Found pre-existing dataset refresh request: %s.", request_id) - - if self.force_refresh or self.wait_for_termination: - self.log.info("Waiting for dataset refresh %s to terminate.", request_id) - if self.hook.wait_for_dataset_refresh_status( - request_id=request_id, - dataset_id=self.dataset_id, - group_id=self.group_id, - expected_status=PowerBIDatasetRefreshStatus.COMPLETED, - check_interval=self.check_interval, - timeout=self.timeout, - ): - self.log.info( - "Pre-existing dataset refresh %s has completed successfully.", request_id - ) - else: - raise PowerBIDatasetRefreshException( - f"Pre-exisintg dataset refresh {request_id} has failed or has been cancelled." - ) - - if self.force_refresh: - self.log.info("Starting new refresh.") - request_id = self.hook.trigger_dataset_refresh( - dataset_id=self.dataset_id, - group_id=self.group_id, - ) - - if self.wait_for_termination: - self.log.info("Waiting for dataset refresh to terminate.") - if self.hook.wait_for_dataset_refresh_status( - request_id=request_id, - dataset_id=self.dataset_id, - group_id=self.group_id, - expected_status=PowerBIDatasetRefreshStatus.COMPLETED, - ): - self.log.info("Dataset refresh %s has completed successfully.", request_id) - else: - raise PowerBIDatasetRefreshException( - f"Dataset refresh {request_id} has failed or has been cancelled." - ) + if self.deferrable is True: + warnings.warn( + "Argument `wait_for_termination` is False and `deferrable` is True , hence " + "`deferrable` parameter doesn't have any effect", + UserWarning, + stacklevel=2, + ) # Retrieve refresh details after triggering refresh refresh_details = self.hook.get_refresh_details_by_request_id( dataset_id=self.dataset_id, group_id=self.group_id, request_id=request_id ) - request_id = str(refresh_details.get(PowerBIDatasetRefreshFields.REQUEST_ID.value)) status = str(refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value)) - end_time = str(refresh_details.get(PowerBIDatasetRefreshFields.END_TIME.value)) error = str(refresh_details.get(PowerBIDatasetRefreshFields.ERROR.value)) # Xcom Integration - context["ti"].xcom_push(key="powerbi_dataset_refresh_id", value=request_id) context["ti"].xcom_push(key="powerbi_dataset_refresh_status", value=status) - context["ti"].xcom_push(key="powerbi_dataset_refresh_end_time", value=end_time) context["ti"].xcom_push(key="powerbi_dataset_refresh_error", value=error) + + def execute_complete(self, context: Context, event: dict[str, str]) -> Any: + """ + Return immediately - callback for when the trigger fires. + + Relies on trigger to throw an exception, otherwise it assumes execution was successful. + """ + if event: + if event["status"] == "error": + raise AirflowException(event["message"]) + else: + # Push Dataset refresh status to Xcom + context["ti"].xcom_push(key="powerbi_dataset_refresh_status", value=event["status"]) + self.log.info(event["message"]) diff --git a/airflow/providers/microsoft/azure/provider.yaml b/airflow/providers/microsoft/azure/provider.yaml index 44f5515566691..bdaa1216727a7 100644 --- a/airflow/providers/microsoft/azure/provider.yaml +++ b/airflow/providers/microsoft/azure/provider.yaml @@ -287,6 +287,9 @@ triggers: - integration-name: Microsoft Graph API python-modules: - airflow.providers.microsoft.azure.triggers.msgraph + - integration-name: Microsoft Power BI + python-modules: + - airflow.providers.microsoft.azure.triggers.powerbi transfers: - source-integration-name: Local diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py new file mode 100644 index 0000000000000..70ef2b8bb9acb --- /dev/null +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -0,0 +1,150 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import asyncio +import time +from typing import AsyncIterator + +from airflow.providers.microsoft.azure.hooks.powerbi import ( + PowerBIAsyncHook, + PowerBIDatasetRefreshStatus, +) +from airflow.triggers.base import BaseTrigger, TriggerEvent + + +class PowerBITrigger(BaseTrigger): + """ + Triggers when Power BI dataset refresh is completed. + + Wait for termination will always be True. + + :param powerbi_conn_id: The connection Id to connect to PowerBI. + :param dataset_id: The dataset Id to refresh. + :param group_id: The workspace Id where dataset is located. + :param dataset_refresh_id: The dataset refresh Id. + :param end_time: Time in seconds when trigger should stop polling. + :param check_interval: Time in seconds to wait between each poll. + :param wait_for_termination: Wait for the dataset refresh to complete or fail. + """ + + def __init__( + self, + powerbi_conn_id: str, + dataset_id: str, + group_id: str, + dataset_refresh_id: str, + end_time: float, + check_interval: int = 60, + wait_for_termination: bool = True, + ): + super().__init__() + self.powerbi_conn_id = powerbi_conn_id + self.dataset_id = dataset_id + self.group_id = group_id + self.dataset_refresh_id = dataset_refresh_id + self.end_time = end_time + self.check_interval = check_interval + self.wait_for_termination = wait_for_termination + + def serialize(self): + """Serialize the trigger instance.""" + return ( + "airflow.providers.microsoft.azure.triggers.powerbi.PowerBITrigger", + { + "powerbi_conn_id": self.powerbi_conn_id, + "dataset_id": self.dataset_id, + "group_id": self.group_id, + "dataset_refresh_id": self.dataset_refresh_id, + "end_time": self.end_time, + "check_interval": self.check_interval, + "wait_for_termination": self.wait_for_termination, + }, + ) + + async def run(self) -> AsyncIterator[TriggerEvent]: + """Make async connection to the PowerBI and polls for the dataset refresh status.""" + hook = PowerBIAsyncHook(powerbi_conn_id=self.powerbi_conn_id) + try: + while self.end_time > time.time(): + dataset_refresh_status = await hook.get_dataset_refresh_status( + dataset_id=self.dataset_id, + group_id=self.group_id, + dataset_refresh_id=self.dataset_refresh_id, + ) + if dataset_refresh_status == PowerBIDatasetRefreshStatus.COMPLETED: + yield TriggerEvent( + { + "status": {dataset_refresh_status}, + "message": f"The dataset refresh {self.dataset_refresh_id} has {dataset_refresh_status}.", + "dataset_refresh_id": self.dataset_refresh_id, + } + ) + return + elif dataset_refresh_status == PowerBIDatasetRefreshStatus.FAILED: + yield TriggerEvent( + { + "status": {dataset_refresh_status}, + "message": f"The dataset refresh {self.dataset_refresh_id} has {dataset_refresh_status}.", + "dataset_refresh_id": self.dataset_refresh_id, + } + ) + return + self.log.info( + "Sleeping for %s. The dataset refresh status is %s.", + self.check_interval, + dataset_refresh_status, + ) + await asyncio.sleep(self.check_interval) + + yield TriggerEvent( + { + "status": "error", + "message": f"Timeout: The dataset refresh {self.dataset_refresh_id} has {dataset_refresh_status}.", + "dataset_refresh_id": self.dataset_refresh_id, + } + ) + return + except Exception as error: + if self.dataset_refresh_id: + try: + self.log.info( + "Unexpected error %s caught. Cancel pipeline run %s", error, self.dataset_refresh_id + ) + await hook.cancel_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, + dataset_refresh_id=self.dataset_refresh_id, + ) + except Exception as e: + yield TriggerEvent( + { + "status": "error", + "message": f"An error occurred while canceling pipeline: {e}", + "dataset_refresh_id": self.dataset_refresh_id, + } + ) + return + yield TriggerEvent( + { + "status": "error", + "message": f"An error occurred: {error}", + "dataset_refresh_id": self.dataset_refresh_id, + } + ) + return diff --git a/tests/providers/microsoft/azure/hooks/test_powerbi.py b/tests/providers/microsoft/azure/hooks/test_powerbi.py index 9036243aac6ee..15dcc3b56cb34 100644 --- a/tests/providers/microsoft/azure/hooks/test_powerbi.py +++ b/tests/providers/microsoft/azure/hooks/test_powerbi.py @@ -68,21 +68,18 @@ { PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", PowerBIDatasetRefreshFields.ERROR.value: "None", }, # In-progress refresh { PowerBIDatasetRefreshFields.REQUEST_ID.value: "6b6536c1-cfcb-4148-9c21-402c3f5241e4", PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.IN_PROGRESS, - PowerBIDatasetRefreshFields.END_TIME.value: "None", PowerBIDatasetRefreshFields.ERROR.value: "None", }, # Failed refresh { PowerBIDatasetRefreshFields.REQUEST_ID.value: "11bf290a-346b-48b7-8973-c5df149337ff", PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.FAILED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", PowerBIDatasetRefreshFields.ERROR.value: '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', }, ] diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index d7b01c056ed57..bd64065ee276b 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -46,14 +46,12 @@ COMPLETED_REFRESH_DETAILS = { PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", # serviceExceptionJson is not present when status is not "Failed" } FAILED_REFRESH_DETAILS = { PowerBIDatasetRefreshFields.REQUEST_ID.value: "11bf290a-346b-48b7-8973-c5df149337ff", PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.FAILED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", PowerBIDatasetRefreshFields.ERROR.value: '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', } @@ -105,7 +103,6 @@ def test_execute_no_wait_for_termination(mock_powerbi_hook, latest_refresh_detai return_value={ PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", # serviceExceptionJson is not present when status is not "Failed" } ) @@ -164,7 +161,6 @@ def test_execute_wait_for_termination_preexisting_refresh_going_on( return_value={ PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", # serviceExceptionJson is not present when status is not "Failed" } ) @@ -229,7 +225,6 @@ def test_execute_wait_for_termination_no_preexisting_refresh( return_value={ PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", # serviceExceptionJson is not present when status is not "Failed" } ) From 32b2bde546e3224d82eca8f79a10013ea9bcb27b Mon Sep 17 00:00:00 2001 From: David Blain Date: Thu, 11 Jul 2024 15:32:20 +0200 Subject: [PATCH 03/22] refactor: Refactored PowerBIHook based on the KiotaRequestAdapterHook into one hook, also take into account proxies. This is how I would do it, it isn't finished of course but that should put you in right direction. As there is a lot of polling involved, I would just like the MSGraphOperator, make it a pure async operator but that's my opinion. --- .../microsoft/azure/hooks/msgraph.py | 8 +- .../microsoft/azure/hooks/powerbi.py | 200 +++++------------- .../microsoft/azure/operators/powerbi.py | 44 ++-- .../microsoft/azure/triggers/powerbi.py | 52 ++++- 4 files changed, 133 insertions(+), 171 deletions(-) diff --git a/airflow/providers/microsoft/azure/hooks/msgraph.py b/airflow/providers/microsoft/azure/hooks/msgraph.py index 56abfa155da7c..76a2afaa2310c 100644 --- a/airflow/providers/microsoft/azure/hooks/msgraph.py +++ b/airflow/providers/microsoft/azure/hooks/msgraph.py @@ -110,14 +110,17 @@ def __init__( conn_id: str = default_conn_name, timeout: float | None = None, proxies: dict | None = None, + host: str = NationalClouds.Global.value, api_version: APIVersion | str | None = None, ): super().__init__() self.conn_id = conn_id self.timeout = timeout self.proxies = proxies + self.host = host self._api_version = self.resolve_api_version_from_value(api_version) + @property def api_version(self) -> APIVersion: self.get_conn() # Make sure config has been loaded through get_conn to have correct api version! @@ -141,11 +144,10 @@ def get_api_version(self, config: dict) -> APIVersion: ) return self._api_version - @staticmethod - def get_host(connection: Connection) -> str: + def get_host(self, connection: Connection) -> str: if connection.schema and connection.host: return f"{connection.schema}://{connection.host}" - return NationalClouds.Global.value + return self.host @staticmethod def format_no_proxy_url(url: str) -> str: diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index 053ae5babc7f8..d641cd3656ae6 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -20,17 +20,15 @@ import logging import time from enum import Enum -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any -import requests -from azure.identity import ClientSecretCredential +from msgraph_core import APIVersion from airflow.exceptions import AirflowException -from airflow.hooks.base import BaseHook from airflow.providers.microsoft.azure.hooks.msgraph import KiotaRequestAdapterHook if TYPE_CHECKING: - from azure.core.credentials import AccessToken + pass logger = logging.getLogger(__name__) @@ -59,19 +57,33 @@ class PowerBIDatasetRefreshException(AirflowException): """An exception that indicates a dataset refresh failed to complete.""" -class PowerBIHook(BaseHook): +class PowerBIHook(KiotaRequestAdapterHook): """ A hook to interact with Power BI. - :param powerbi_conn_id: Airflow Connection ID that contains the connection + :param conn_id: Airflow Connection ID that contains the connection information for the Power BI account used for authentication. """ conn_type: str = "powerbi" - conn_name_attr: str = "powerbi_conn_id" default_conn_name: str = "powerbi_default" hook_name: str = "Power BI" + def __init__( + self, + conn_id: str = default_conn_name, + timeout: float | None = None, + proxies: dict | None = None, + api_version: APIVersion | str | None = None, + ): + super().__init__( + conn_id=conn_id, + timeout=timeout, + proxies=proxies, + host="https://api.powerbi.com", + api_version=api_version, + ) + @classmethod def get_connection_form_widgets(cls) -> dict[str, Any]: """Return connection widgets to add to connection form.""" @@ -94,18 +106,7 @@ def get_ui_field_behaviour(cls) -> dict[str, Any]: }, } - def __init__( - self, - *, - powerbi_conn_id: str = default_conn_name, - ): - self.conn_id = powerbi_conn_id - self._api_version = "v1.0" - self._base_url = "https://api.powerbi.com" - self.cached_access_token: dict[str, str | None | int] = {"access_token": None, "expiry_time": 0} - super().__init__() - - def refresh_dataset(self, dataset_id: str, group_id: str) -> str: + async def refresh_dataset(self, dataset_id: str, group_id: str) -> str: """ Triggers a refresh for the specified dataset from the given group id. @@ -114,57 +115,22 @@ def refresh_dataset(self, dataset_id: str, group_id: str) -> str: :return: Request id of the dataset refresh request. """ - url = f"{self._base_url}/{self._api_version}/myorg" - - # add the group id if it is specified - url += f"/groups/{group_id}" - - # add the dataset key - url += f"/datasets/{dataset_id}/refreshes" - - response = self._send_request("POST", url=url) + try: + response = await self.run( + url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", + method="POST", + path_parameters={ + "group_id": group_id, + "dataset_id": dataset_id, + }, + ) - if response.ok: request_id = response.headers["RequestId"] return request_id + except AirflowException: + raise PowerBIDatasetRefreshException("Failed to trigger dataset refresh") - raise PowerBIDatasetRefreshException( - "Failed to trigger dataset refresh. Status code: %s", str(response.status_code) - ) - - def _get_token(self) -> str: - """Retrieve the access token used to authenticate against the API.""" - access_token = self.cached_access_token.get("access_token") - expiry_time = self.cached_access_token.get("expiry_time") - - if access_token and isinstance(expiry_time, int) and expiry_time > time.time(): - return str(access_token) - - conn = self.get_connection(self.conn_id) - extras = conn.extra_dejson - tenant = extras.get("tenant_id", None) - - if not conn.login or not conn.password: - raise ValueError("A Client ID and Secret is required to authenticate with Power BI.") - - if not tenant: - raise ValueError("A Tenant ID is required when authenticating with Client ID and Secret.") - - credentials = ClientSecretCredential( - client_id=conn.login, client_secret=conn.password, tenant_id=tenant - ) - - resource = "https://analysis.windows.net/powerbi/api/.default" - - raw_access_token: AccessToken = credentials.get_token(resource) - - self.cached_access_token = { - "access_token": raw_access_token.token, - "expiry_time": raw_access_token.expires_on, - } - return raw_access_token.token - - def get_refresh_history( + async def get_refresh_history( self, dataset_id: str, group_id: str, @@ -177,26 +143,22 @@ def get_refresh_history( :return: Dictionary containing all the refresh histories of the dataset. """ - url = f"{self._base_url}/{self._api_version}/myorg" - - # add the group id - url += f"/groups/{group_id}" - - # add the dataset id - url += f"/datasets/{dataset_id}/refreshes" - - raw_response = self._send_request("GET", url=url) + try: + response = await self.run( + url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", + path_parameters={ + "group_id": group_id, + "dataset_id": dataset_id, + }, + ) - if raw_response.ok: - response = raw_response.json() - refresh_histories = response.get("value") + refresh_histories = response.json().get("value") return [self.raw_to_refresh_details(refresh_history) for refresh_history in refresh_histories] + except AirflowException: + raise PowerBIDatasetRefreshException("Failed to retrieve refresh history") - raise PowerBIDatasetRefreshException( - "Failed to retrieve refresh history. Status code: %s", str(response.status_code) - ) - - def raw_to_refresh_details(self, refresh_details: dict) -> dict[str, str]: + @classmethod + def raw_to_refresh_details(cls, refresh_details: dict) -> dict[str, str]: """ Convert raw refresh details into a dictionary containing required fields. @@ -212,13 +174,13 @@ def raw_to_refresh_details(self, refresh_details: dict) -> dict[str, str]: PowerBIDatasetRefreshFields.ERROR.value: str(refresh_details.get("serviceExceptionJson")), } - def get_latest_refresh_details(self, dataset_id: str, group_id: str) -> dict[str, str] | None: + async def get_latest_refresh_details(self, dataset_id: str, group_id: str) -> dict[str, str] | None: """ Get the refresh details of the most recent dataset refresh in the refresh history of the data source. :return: Dictionary containing refresh status and end time if refresh history exists, otherwise None. """ - history = self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) + history = await self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) if len(history) == 0: return None @@ -226,13 +188,13 @@ def get_latest_refresh_details(self, dataset_id: str, group_id: str) -> dict[str refresh_details = history[0] return refresh_details - def get_refresh_details_by_request_id(self, dataset_id: str, group_id: str, request_id) -> dict[str, str]: + async def get_refresh_details_by_request_id(self, dataset_id: str, group_id: str, request_id) -> dict[str, str]: """ Get the refresh details of the given request Id. :param request_id: Request Id of the Dataset refresh. """ - refresh_histories = self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) + refresh_histories = await self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) if len(refresh_histories) == 0: raise PowerBIDatasetRefreshException( @@ -254,7 +216,8 @@ def get_refresh_details_by_request_id(self, dataset_id: str, group_id: str, requ return refresh_details - def wait_for_dataset_refresh_status( + # TODO: This should definitely always be executed through a trigger, as this will be blocking otherwise + async def wait_for_dataset_refresh_status( self, *, expected_status: str, @@ -276,7 +239,7 @@ def wait_for_dataset_refresh_status( start_time = time.monotonic() while start_time + timeout > time.monotonic(): - dataset_refresh_details = self.get_refresh_details_by_request_id( + dataset_refresh_details = await self.get_refresh_details_by_request_id( dataset_id=dataset_id, group_id=group_id, request_id=request_id ) dataset_refresh_status = dataset_refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) @@ -294,7 +257,8 @@ def wait_for_dataset_refresh_status( # Timeout reached return False - def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> str: + # TODO: Why have a method with another name doing the same thing? + async def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> str: """ Triggers the Power BI dataset refresh. @@ -305,55 +269,7 @@ def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> str: """ # Start dataset refresh self.log.info("Starting dataset refresh.") - request_id = self.refresh_dataset(dataset_id=dataset_id, group_id=group_id) - - return request_id - - def _send_request(self, request_type: str, url: str, **kwargs) -> requests.Response: - """ - Send a request to the Power BI REST API. - - :param request_type: The type of the request (GET, POST, PUT, etc.). - :param url: The URL against which the request needs to be made. - :param kwargs: Additional keyword arguments to be passed to the request function. - :return: The response object returned by the request. - :raises requests.HTTPError: If the request fails (e.g., non-2xx status code). - """ - self.header: dict[str, str] = {} - - request_funcs: dict[str, Callable[..., requests.Response]] = { - "GET": requests.get, - "POST": requests.post, - } - - func: Callable[..., requests.Response] = request_funcs[request_type.upper()] - - response = func(url=url, headers={"Authorization": f"Bearer {self._get_token()}"}, **kwargs) - - return response - - -class PowerBIAsyncHook(PowerBIHook): - """ - A hook to interact with Power BI asynchronously. - - :param powerbi_conn_id: Airflow Connection ID that contains the connection - information for the Power BI account used for authentication. - """ - - default_conn_name: str = "powerbi_default" - - def __init__( - self, - *, - powerbi_conn_id: str = default_conn_name, - ): - self.powerbi_conn_id = powerbi_conn_id - self._api_version = "v1.0" - self.helper_hook = KiotaRequestAdapterHook( - conn_id=self.powerbi_conn_id, api_version=self._api_version - ) - super().__init__() + return await self.refresh_dataset(dataset_id=dataset_id, group_id=group_id) async def get_dataset_refresh_status( self, dataset_id: str, group_id: str, dataset_refresh_id: str @@ -367,7 +283,7 @@ async def get_dataset_refresh_status( :return: Dataset refresh status. """ - response = await self.helper_hook.run( + response = await self.run( url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", response_type=None, path_parameters={ @@ -398,7 +314,7 @@ async def cancel_dataset_refresh(self, dataset_id: str, group_id: str, dataset_r :param group_id: The workspace Id. :param dataset_refresh_id: The dataset refresh Id. """ - await self.helper_hook.run( + await self.run( url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes/{dataset_refresh_id}", response_type=None, path_parameters={ diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index ffd4fc21bd038..e82a158d4260f 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -17,11 +17,14 @@ from __future__ import annotations +import asyncio import time import warnings from functools import cached_property from typing import TYPE_CHECKING, Any, Sequence +from msgraph_core import APIVersion + from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.models import BaseOperator, BaseOperatorLink @@ -87,44 +90,51 @@ def __init__( group_id: str, wait_for_termination: bool = True, force_refresh: bool = False, - powerbi_conn_id: str = PowerBIHook.default_conn_name, + conn_id: str = PowerBIHook.default_conn_name, timeout: int = 60 * 60 * 24 * 7, + proxies: dict | None = None, + api_version: APIVersion | None = None, check_interval: int = 60, deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False), **kwargs, ) -> None: super().__init__(**kwargs) + self.hook = PowerBIHook( + conn_id=conn_id, + timeout=timeout, + proxies=proxies, + api_version=api_version, + ) self.dataset_id = dataset_id self.group_id = group_id self.wait_for_termination = wait_for_termination self.force_refresh = force_refresh - self.powerbi_conn_id = powerbi_conn_id + self.conn_id = conn_id self.timeout = timeout self.check_interval = check_interval self.deferrable = deferrable - @cached_property - def hook(self) -> PowerBIHook: - """Create and return an PowerBIHook (cached).""" - return PowerBIHook(powerbi_conn_id=self.powerbi_conn_id) + @classmethod + def run_async(cls, future: Any) -> Any: + return asyncio.get_event_loop().run_until_complete(future) def execute(self, context: Context): """Refresh the Power BI Dataset.""" self.log.info("Executing Dataset refresh.") - request_id = self.hook.trigger_dataset_refresh( + request_id = self.run_async(self.hook.trigger_dataset_refresh( dataset_id=self.dataset_id, group_id=self.group_id, - ) + )) # Push Dataset Refresh ID to Xcom regardless of what happen durinh the refresh - context["ti"].xcom_push(key="powerbi_dataset_refresh_id", value=request_id) + self.xcom_push(context=context, key="powerbi_dataset_refresh_id", value=request_id) if self.wait_for_termination: if self.deferrable: end_time = time.time() + self.timeout self.defer( trigger=PowerBITrigger( - powerbi_conn_id=self.powerbi_conn_id, + powerbi_conn_id=self.conn_id, group_id=self.group_id, dataset_id=self.dataset_id, dataset_refresh_id=request_id, @@ -136,14 +146,14 @@ def execute(self, context: Context): ) else: self.log.info("Waiting for dataset refresh to terminate.") - if self.hook.wait_for_dataset_refresh_status( + if self.run_async(self.hook.wait_for_dataset_refresh_status( request_id=request_id, dataset_id=self.dataset_id, group_id=self.group_id, expected_status=PowerBIDatasetRefreshStatus.COMPLETED, check_interval=self.check_interval, timeout=self.timeout, - ): + )): self.log.info("Dataset refresh %s has completed successfully.", request_id) else: raise PowerBIDatasetRefreshException( @@ -159,16 +169,16 @@ def execute(self, context: Context): ) # Retrieve refresh details after triggering refresh - refresh_details = self.hook.get_refresh_details_by_request_id( + refresh_details = self.run_async(self.hook.get_refresh_details_by_request_id( dataset_id=self.dataset_id, group_id=self.group_id, request_id=request_id - ) + )) status = str(refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value)) error = str(refresh_details.get(PowerBIDatasetRefreshFields.ERROR.value)) # Xcom Integration - context["ti"].xcom_push(key="powerbi_dataset_refresh_status", value=status) - context["ti"].xcom_push(key="powerbi_dataset_refresh_error", value=error) + self.xcom_push(context=context, key="powerbi_dataset_refresh_status", value=status) + self.xcom_push(context=context, key="powerbi_dataset_refresh_error", value=error) def execute_complete(self, context: Context, event: dict[str, str]) -> Any: """ @@ -181,5 +191,5 @@ def execute_complete(self, context: Context, event: dict[str, str]) -> Any: raise AirflowException(event["message"]) else: # Push Dataset refresh status to Xcom - context["ti"].xcom_push(key="powerbi_dataset_refresh_status", value=event["status"]) + self.xcom_push(context=context, key="powerbi_dataset_refresh_status", value=event["status"]) self.log.info(event["message"]) diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index 70ef2b8bb9acb..67634b2922476 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -21,9 +21,11 @@ import time from typing import AsyncIterator +from msgraph_core import APIVersion + from airflow.providers.microsoft.azure.hooks.powerbi import ( PowerBIAsyncHook, - PowerBIDatasetRefreshStatus, + PowerBIDatasetRefreshStatus, PowerBIHook, ) from airflow.triggers.base import BaseTrigger, TriggerEvent @@ -34,7 +36,13 @@ class PowerBITrigger(BaseTrigger): Wait for termination will always be True. - :param powerbi_conn_id: The connection Id to connect to PowerBI. + :param conn_id: The connection Id to connect to PowerBI. + :param timeout: The HTTP timeout being used by the `KiotaRequestAdapter` (default is None). + When no timeout is specified or set to None then there is no HTTP timeout on each request. + :param proxies: A dict defining the HTTP proxies to be used (default is None). + :param api_version: The API version of the Microsoft Graph API to be used (default is v1). + You can pass an enum named APIVersion which has 2 possible members v1 and beta, + or you can pass a string as `v1.0` or `beta`. :param dataset_id: The dataset Id to refresh. :param group_id: The workspace Id where dataset is located. :param dataset_refresh_id: The dataset refresh Id. @@ -45,16 +53,24 @@ class PowerBITrigger(BaseTrigger): def __init__( self, - powerbi_conn_id: str, + conn_id: str, dataset_id: str, group_id: str, dataset_refresh_id: str, end_time: float, + timeout: float | None = None, + proxies: dict | None = None, + api_version: APIVersion | None = None, check_interval: int = 60, wait_for_termination: bool = True, ): super().__init__() - self.powerbi_conn_id = powerbi_conn_id + self.hook = PowerBIHook( + conn_id=conn_id, + timeout=timeout, + proxies=proxies, + api_version=api_version, + ) self.dataset_id = dataset_id self.group_id = group_id self.dataset_refresh_id = dataset_refresh_id @@ -64,10 +80,14 @@ def __init__( def serialize(self): """Serialize the trigger instance.""" + api_version = self.api_version.value if self.api_version else None return ( "airflow.providers.microsoft.azure.triggers.powerbi.PowerBITrigger", { - "powerbi_conn_id": self.powerbi_conn_id, + "conn_id": self.conn_id, + "timeout": self.timeout, + "proxies": self.proxies, + "api_version": api_version, "dataset_id": self.dataset_id, "group_id": self.group_id, "dataset_refresh_id": self.dataset_refresh_id, @@ -77,12 +97,27 @@ def serialize(self): }, ) + @property + def conn_id(self) -> str: + return self.hook.conn_id + + @property + def timeout(self) -> float | None: + return self.hook.timeout + + @property + def proxies(self) -> dict | None: + return self.hook.proxies + + @property + def api_version(self) -> APIVersion: + return self.hook.api_version + async def run(self) -> AsyncIterator[TriggerEvent]: """Make async connection to the PowerBI and polls for the dataset refresh status.""" - hook = PowerBIAsyncHook(powerbi_conn_id=self.powerbi_conn_id) try: while self.end_time > time.time(): - dataset_refresh_status = await hook.get_dataset_refresh_status( + dataset_refresh_status = await self.hook.get_dataset_refresh_status( dataset_id=self.dataset_id, group_id=self.group_id, dataset_refresh_id=self.dataset_refresh_id, @@ -126,7 +161,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: self.log.info( "Unexpected error %s caught. Cancel pipeline run %s", error, self.dataset_refresh_id ) - await hook.cancel_dataset_refresh( + await self.hook.cancel_dataset_refresh( dataset_id=self.dataset_id, group_id=self.group_id, dataset_refresh_id=self.dataset_refresh_id, @@ -147,4 +182,3 @@ async def run(self) -> AsyncIterator[TriggerEvent]: "dataset_refresh_id": self.dataset_refresh_id, } ) - return From 7f3d3f077b7b0d6d88872140fc5da17e31915f82 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 15 Jul 2024 20:00:06 -0400 Subject: [PATCH 04/22] Refactor: To support operator's async behavior --- .../microsoft/azure/hooks/msgraph.py | 5 +- .../microsoft/azure/hooks/powerbi.py | 198 ++++++------------ .../microsoft/azure/operators/powerbi.py | 94 +++------ .../microsoft/azure/triggers/powerbi.py | 27 ++- 4 files changed, 117 insertions(+), 207 deletions(-) diff --git a/airflow/providers/microsoft/azure/hooks/msgraph.py b/airflow/providers/microsoft/azure/hooks/msgraph.py index 76a2afaa2310c..53c2708cababa 100644 --- a/airflow/providers/microsoft/azure/hooks/msgraph.py +++ b/airflow/providers/microsoft/azure/hooks/msgraph.py @@ -111,6 +111,7 @@ def __init__( timeout: float | None = None, proxies: dict | None = None, host: str = NationalClouds.Global.value, + scopes: list[str] = ["https://graph.microsoft.com/.default"], # noqa: B006 api_version: APIVersion | str | None = None, ): super().__init__() @@ -118,9 +119,9 @@ def __init__( self.timeout = timeout self.proxies = proxies self.host = host + self.scopes = scopes self._api_version = self.resolve_api_version_from_value(api_version) - @property def api_version(self) -> APIVersion: self.get_conn() # Make sure config has been loaded through get_conn to have correct api version! @@ -200,7 +201,7 @@ def get_conn(self) -> RequestAdapter: proxies = self.proxies or config.get("proxies", {}) msal_proxies = self.to_msal_proxies(authority=authority, proxies=proxies) httpx_proxies = self.to_httpx_proxies(proxies=proxies) - scopes = config.get("scopes", ["https://graph.microsoft.com/.default"]) + scopes = config.get("scopes", self.scopes) verify = config.get("verify", True) trust_env = config.get("trust_env", False) disable_instance_discovery = config.get("disable_instance_discovery", False) diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index d641cd3656ae6..c955232b75f87 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -17,20 +17,14 @@ from __future__ import annotations -import logging -import time from enum import Enum from typing import TYPE_CHECKING, Any -from msgraph_core import APIVersion - from airflow.exceptions import AirflowException from airflow.providers.microsoft.azure.hooks.msgraph import KiotaRequestAdapterHook if TYPE_CHECKING: - pass - -logger = logging.getLogger(__name__) + from msgraph_core import APIVersion class PowerBIDatasetRefreshFields(Enum): @@ -59,28 +53,27 @@ class PowerBIDatasetRefreshException(AirflowException): class PowerBIHook(KiotaRequestAdapterHook): """ - A hook to interact with Power BI. + A async hook to interact with Power BI. - :param conn_id: Airflow Connection ID that contains the connection - information for the Power BI account used for authentication. + :param conn_id: The Power BI connection id. """ conn_type: str = "powerbi" + conn_name_attr: str = "conn_id" default_conn_name: str = "powerbi_default" hook_name: str = "Power BI" def __init__( self, conn_id: str = default_conn_name, - timeout: float | None = None, proxies: dict | None = None, api_version: APIVersion | str | None = None, ): super().__init__( conn_id=conn_id, - timeout=timeout, proxies=proxies, host="https://api.powerbi.com", + scopes=["https://analysis.windows.net/powerbi/api/.default"], api_version=api_version, ) @@ -102,34 +95,10 @@ def get_ui_field_behaviour(cls) -> dict[str, Any]: "hidden_fields": ["schema", "port", "host", "extra"], "relabeling": { "login": "Client ID", - "password": "Secret", + "password": "Client Secret", }, } - async def refresh_dataset(self, dataset_id: str, group_id: str) -> str: - """ - Triggers a refresh for the specified dataset from the given group id. - - :param dataset_id: The dataset id. - :param group_id: The workspace id. - - :return: Request id of the dataset refresh request. - """ - try: - response = await self.run( - url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", - method="POST", - path_parameters={ - "group_id": group_id, - "dataset_id": dataset_id, - }, - ) - - request_id = response.headers["RequestId"] - return request_id - except AirflowException: - raise PowerBIDatasetRefreshException("Failed to trigger dataset refresh") - async def get_refresh_history( self, dataset_id: str, @@ -152,8 +121,9 @@ async def get_refresh_history( }, ) - refresh_histories = response.json().get("value") + refresh_histories = response.get("value") return [self.raw_to_refresh_details(refresh_history) for refresh_history in refresh_histories] + except AirflowException: raise PowerBIDatasetRefreshException("Failed to retrieve refresh history") @@ -188,123 +158,93 @@ async def get_latest_refresh_details(self, dataset_id: str, group_id: str) -> di refresh_details = history[0] return refresh_details - async def get_refresh_details_by_request_id(self, dataset_id: str, group_id: str, request_id) -> dict[str, str]: + async def get_refresh_details_by_refresh_id( + self, dataset_id: str, group_id: str, refresh_id: str + ) -> dict[str, str]: """ Get the refresh details of the given request Id. - :param request_id: Request Id of the Dataset refresh. + :param refresh_id: Request Id of the Dataset refresh. """ refresh_histories = await self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) if len(refresh_histories) == 0: raise PowerBIDatasetRefreshException( - f"Unable to fetch the details of dataset refresh with Request Id: {request_id}" + f"Unable to fetch the details of dataset refresh with Request Id: {refresh_id}" ) - request_ids = [ + refresh_ids = [ refresh_history.get(PowerBIDatasetRefreshFields.REQUEST_ID.value) for refresh_history in refresh_histories ] - if request_id not in request_ids: + if refresh_id not in refresh_ids: raise PowerBIDatasetRefreshException( - f"Unable to fetch the details of dataset refresh with Request Id: {request_id}" + f"Unable to fetch the details of dataset refresh with Request Id: {refresh_id}" ) - request_id_index = request_ids.index(request_id) - refresh_details = refresh_histories[request_id_index] + refresh_details = refresh_histories[refresh_ids.index(refresh_id)] return refresh_details - # TODO: This should definitely always be executed through a trigger, as this will be blocking otherwise - async def wait_for_dataset_refresh_status( - self, - *, - expected_status: str, - request_id: str, - dataset_id: str, - group_id: str, - check_interval: int = 60, - timeout: int = 60 * 60 * 24 * 7, - ) -> bool: - """ - Wait until the dataset refresh of given request ID has reached the expected status. - - :param expected_status: The desired status to check against a dataset refresh's current status. - :param request_id: Request id for the dataset refresh request. - :param check_interval: Time in seconds to check on a dataset refresh's status. - :param timeout: Time in seconds to wait for a dataset to reach a terminal status or the expected status. - :return: Boolean indicating if the dataset refresh has reached the ``expected_status`` before the timeout. - """ - start_time = time.monotonic() - - while start_time + timeout > time.monotonic(): - dataset_refresh_details = await self.get_refresh_details_by_request_id( - dataset_id=dataset_id, group_id=group_id, request_id=request_id - ) - dataset_refresh_status = dataset_refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) - - if dataset_refresh_status in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES: - return dataset_refresh_status == expected_status - - logger.info( - "Current dataset refresh status is %s. Sleeping for %s", - dataset_refresh_status, - check_interval, - ) - time.sleep(check_interval) - - # Timeout reached - return False - - # TODO: Why have a method with another name doing the same thing? async def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> str: """ - Triggers the Power BI dataset refresh. - - :param dataset_id: The dataset ID. - :param group_id: The workspace ID. - - :return: Request ID of the dataset refresh request. - """ - # Start dataset refresh - self.log.info("Starting dataset refresh.") - return await self.refresh_dataset(dataset_id=dataset_id, group_id=group_id) - - async def get_dataset_refresh_status( - self, dataset_id: str, group_id: str, dataset_refresh_id: str - ) -> str: - """ - Retrieve the refresh status for the specified dataset refresh from the given group id. + Triggers a refresh for the specified dataset from the given group id. - :param dataset_id: The dataset Id. - :param group_id: The workspace Id. - :param dataset_refresh_id: The dataset refresh Id. + :param dataset_id: The dataset id. + :param group_id: The workspace id. - :return: Dataset refresh status. + :return: Request id of the dataset refresh request. """ - response = await self.run( - url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", - response_type=None, - path_parameters={ - "group_id": group_id, - "dataset_id": dataset_id, - }, - method="GET", - ) - # clean the raw refresh histories fetched from the API. - raw_refresh_histories = response.get("value") - clean_refresh_histories = [ - self.raw_to_refresh_details(refresh_history) for refresh_history in raw_refresh_histories - ] - - for refresh_history in clean_refresh_histories: - if refresh_history.get(PowerBIDatasetRefreshFields.REQUEST_ID.value) == dataset_refresh_id: - return str(refresh_history.get(PowerBIDatasetRefreshFields.STATUS.value, "Unknown")) + try: + response = await self.run( + url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", + method="POST", + path_parameters={ + "group_id": group_id, + "dataset_id": dataset_id, + }, + ) - raise PowerBIDatasetRefreshException( - f"Failed to retrieve the status of dataset refresh with Id: {dataset_refresh_id}" - ) + request_id = response.get("requestid") + return request_id + except AirflowException: + raise PowerBIDatasetRefreshException("Failed to trigger dataset refresh.") + + # async def get_dataset_refresh_status( + # self, dataset_id: str, group_id: str, dataset_refresh_id: str + # ) -> str: + # """ + # Retrieve the refresh status for the specified dataset refresh from the given group id. + + # :param dataset_id: The dataset Id. + # :param group_id: The workspace Id. + # :param dataset_refresh_id: The dataset refresh Id. + + # :return: Dataset refresh status. + # """ + # response = await self.run( + # url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", + # response_type=None, + # path_parameters={ + # "group_id": group_id, + # "dataset_id": dataset_id, + # }, + # method="GET", + # ) + # # clean the raw refresh histories fetched from the API. + # raw_refresh_histories = response.get("value") + # clean_refresh_histories = [ + # self.raw_to_refresh_details(refresh_history) for refresh_history in raw_refresh_histories + # ] + + # for refresh_history in clean_refresh_histories: + # if refresh_history.get(PowerBIDatasetRefreshFields.REQUEST_ID.value) == dataset_refresh_id: + # return str(refresh_history.get(PowerBIDatasetRefreshFields.STATUS.value, "Unknown")) + + # raise PowerBIDatasetRefreshException( + # f"Failed to retrieve the status of dataset refresh with Id: {dataset_refresh_id}" + # ) async def cancel_dataset_refresh(self, dataset_id: str, group_id: str, dataset_refresh_id: str) -> None: """ diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index e82a158d4260f..3bd1724fe5dae 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -19,24 +19,19 @@ import asyncio import time -import warnings -from functools import cached_property from typing import TYPE_CHECKING, Any, Sequence -from msgraph_core import APIVersion - -from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.models import BaseOperator, BaseOperatorLink from airflow.providers.microsoft.azure.hooks.powerbi import ( - PowerBIDatasetRefreshException, PowerBIDatasetRefreshFields, - PowerBIDatasetRefreshStatus, PowerBIHook, ) from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger if TYPE_CHECKING: + from msgraph_core import APIVersion + from airflow.models.taskinstancekey import TaskInstanceKey from airflow.utils.context import Context @@ -60,17 +55,11 @@ class PowerBIDatasetRefreshOperator(BaseOperator): """ Refreshes a Power BI dataset. - By default the operator will wait until the refresh has completed before - exiting. The refresh status is checked every 60 seconds as a default. This - can be changed by specifying a new value for `check_interval`. - :param dataset_id: The dataset id. :param group_id: The workspace id. :param wait_for_termination: Wait until the pre-existing or current triggered refresh completes before exiting. - :param force_refresh: Force refresh if pre-existing refresh found. - :param powerbi_conn_id: Airflow Connection ID that contains the connection - information for the Power BI account used for authentication. - :param timeout: Time in seconds to wait for a dataset to reach a terminal status for non-asynchronous waits. Used only if ``wait_for_termination`` is True. + :param conn_id: Airflow Connection ID that contains the connection information for the Power BI account used for authentication. + :param timeout: Time in seconds to wait for a dataset to reach a terminal status for asynchronous waits. Used only if ``wait_for_termination`` is True. :param check_interval: Number of seconds to wait before rechecking the refresh status. """ @@ -85,34 +74,29 @@ class PowerBIDatasetRefreshOperator(BaseOperator): def __init__( self, - *, # Indicates all the following parameters must be specified using keyword arguments. + *, dataset_id: str, group_id: str, wait_for_termination: bool = True, - force_refresh: bool = False, conn_id: str = PowerBIHook.default_conn_name, timeout: int = 60 * 60 * 24 * 7, proxies: dict | None = None, api_version: APIVersion | None = None, check_interval: int = 60, - deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False), **kwargs, ) -> None: super().__init__(**kwargs) self.hook = PowerBIHook( conn_id=conn_id, - timeout=timeout, proxies=proxies, api_version=api_version, ) self.dataset_id = dataset_id self.group_id = group_id self.wait_for_termination = wait_for_termination - self.force_refresh = force_refresh self.conn_id = conn_id self.timeout = timeout self.check_interval = check_interval - self.deferrable = deferrable @classmethod def run_async(cls, future: Any) -> Any: @@ -121,57 +105,37 @@ def run_async(cls, future: Any) -> Any: def execute(self, context: Context): """Refresh the Power BI Dataset.""" self.log.info("Executing Dataset refresh.") - request_id = self.run_async(self.hook.trigger_dataset_refresh( - dataset_id=self.dataset_id, - group_id=self.group_id, - )) + refresh_id = self.run_async( + self.hook.trigger_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, + ) + ) - # Push Dataset Refresh ID to Xcom regardless of what happen durinh the refresh - self.xcom_push(context=context, key="powerbi_dataset_refresh_id", value=request_id) + # Push Dataset Refresh ID to Xcom regardless of what happen during the refresh + self.xcom_push(context=context, key="powerbi_dataset_refresh_id", value=refresh_id) if self.wait_for_termination: - if self.deferrable: - end_time = time.time() + self.timeout - self.defer( - trigger=PowerBITrigger( - powerbi_conn_id=self.conn_id, - group_id=self.group_id, - dataset_id=self.dataset_id, - dataset_refresh_id=request_id, - end_time=end_time, - check_interval=self.check_interval, - wait_for_termination=self.wait_for_termination, - ), - method_name=self.execute_complete.__name__, - ) - else: - self.log.info("Waiting for dataset refresh to terminate.") - if self.run_async(self.hook.wait_for_dataset_refresh_status( - request_id=request_id, - dataset_id=self.dataset_id, + end_time = time.time() + self.timeout + self.defer( + trigger=PowerBITrigger( + conn_id=self.conn_id, group_id=self.group_id, - expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + dataset_id=self.dataset_id, + dataset_refresh_id=refresh_id, + end_time=end_time, check_interval=self.check_interval, - timeout=self.timeout, - )): - self.log.info("Dataset refresh %s has completed successfully.", request_id) - else: - raise PowerBIDatasetRefreshException( - f"Dataset refresh {request_id} has failed or has been cancelled." - ) - else: - if self.deferrable is True: - warnings.warn( - "Argument `wait_for_termination` is False and `deferrable` is True , hence " - "`deferrable` parameter doesn't have any effect", - UserWarning, - stacklevel=2, - ) + wait_for_termination=self.wait_for_termination, + ), + method_name=self.execute_complete.__name__, + ) # Retrieve refresh details after triggering refresh - refresh_details = self.run_async(self.hook.get_refresh_details_by_request_id( - dataset_id=self.dataset_id, group_id=self.group_id, request_id=request_id - )) + refresh_details = self.run_async( + self.hook.get_refresh_details_by_refresh_id( + dataset_id=self.dataset_id, group_id=self.group_id, refresh_id=refresh_id + ) + ) status = str(refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value)) error = str(refresh_details.get(PowerBIDatasetRefreshFields.ERROR.value)) diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index 67634b2922476..ca3beae26c513 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -19,16 +19,18 @@ import asyncio import time -from typing import AsyncIterator - -from msgraph_core import APIVersion +from typing import TYPE_CHECKING, AsyncIterator from airflow.providers.microsoft.azure.hooks.powerbi import ( - PowerBIAsyncHook, - PowerBIDatasetRefreshStatus, PowerBIHook, + PowerBIDatasetRefreshFields, + PowerBIDatasetRefreshStatus, + PowerBIHook, ) from airflow.triggers.base import BaseTrigger, TriggerEvent +if TYPE_CHECKING: + from msgraph_core import APIVersion + class PowerBITrigger(BaseTrigger): """ @@ -67,7 +69,6 @@ def __init__( super().__init__() self.hook = PowerBIHook( conn_id=conn_id, - timeout=timeout, proxies=proxies, api_version=api_version, ) @@ -117,11 +118,13 @@ async def run(self) -> AsyncIterator[TriggerEvent]: """Make async connection to the PowerBI and polls for the dataset refresh status.""" try: while self.end_time > time.time(): - dataset_refresh_status = await self.hook.get_dataset_refresh_status( + refresh_details = await self.hook.get_refresh_details_by_refresh_id( dataset_id=self.dataset_id, group_id=self.group_id, - dataset_refresh_id=self.dataset_refresh_id, + refresh_id=self.dataset_refresh_id, ) + dataset_refresh_status = refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) + if dataset_refresh_status == PowerBIDatasetRefreshStatus.COMPLETED: yield TriggerEvent( { @@ -150,7 +153,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: yield TriggerEvent( { "status": "error", - "message": f"Timeout: The dataset refresh {self.dataset_refresh_id} has {dataset_refresh_status}.", + "message": f"Timeout occurred while waiting for dataset refresh to complete: The dataset refresh {self.dataset_refresh_id} has status {dataset_refresh_status}.", "dataset_refresh_id": self.dataset_refresh_id, } ) @@ -159,7 +162,9 @@ async def run(self) -> AsyncIterator[TriggerEvent]: if self.dataset_refresh_id: try: self.log.info( - "Unexpected error %s caught. Cancel pipeline run %s", error, self.dataset_refresh_id + "Unexpected error %s caught. Canceling dataset refresh %s", + error, + self.dataset_refresh_id, ) await self.hook.cancel_dataset_refresh( dataset_id=self.dataset_id, @@ -170,7 +175,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: yield TriggerEvent( { "status": "error", - "message": f"An error occurred while canceling pipeline: {e}", + "message": f"An error occurred while canceling dataset: {e}", "dataset_refresh_id": self.dataset_refresh_id, } ) From eea4470e6ce2e070b057a6d44ac2a7f849dfb7ea Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Tue, 16 Jul 2024 13:33:20 -0400 Subject: [PATCH 05/22] Add unit tests for the power bi trigger and refactor the code --- .../microsoft/azure/hooks/powerbi.py | 37 +--- .../microsoft/azure/operators/powerbi.py | 9 +- .../microsoft/azure/triggers/powerbi.py | 20 +- .../microsoft/azure/hooks/test_powerbi.py | 10 - .../microsoft/azure/operators/test_powerbi.py | 139 ++++--------- .../microsoft/azure/triggers/test_powerbi.py | 186 ++++++++++++++++++ 6 files changed, 230 insertions(+), 171 deletions(-) create mode 100644 tests/providers/microsoft/azure/triggers/test_powerbi.py diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index c955232b75f87..779cb873a2815 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -67,11 +67,13 @@ def __init__( self, conn_id: str = default_conn_name, proxies: dict | None = None, + timeout: float | None = None, api_version: APIVersion | str | None = None, ): super().__init__( conn_id=conn_id, proxies=proxies, + timeout=timeout, host="https://api.powerbi.com", scopes=["https://analysis.windows.net/powerbi/api/.default"], api_version=api_version, @@ -211,41 +213,6 @@ async def trigger_dataset_refresh(self, *, dataset_id: str, group_id: str) -> st except AirflowException: raise PowerBIDatasetRefreshException("Failed to trigger dataset refresh.") - # async def get_dataset_refresh_status( - # self, dataset_id: str, group_id: str, dataset_refresh_id: str - # ) -> str: - # """ - # Retrieve the refresh status for the specified dataset refresh from the given group id. - - # :param dataset_id: The dataset Id. - # :param group_id: The workspace Id. - # :param dataset_refresh_id: The dataset refresh Id. - - # :return: Dataset refresh status. - # """ - # response = await self.run( - # url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes", - # response_type=None, - # path_parameters={ - # "group_id": group_id, - # "dataset_id": dataset_id, - # }, - # method="GET", - # ) - # # clean the raw refresh histories fetched from the API. - # raw_refresh_histories = response.get("value") - # clean_refresh_histories = [ - # self.raw_to_refresh_details(refresh_history) for refresh_history in raw_refresh_histories - # ] - - # for refresh_history in clean_refresh_histories: - # if refresh_history.get(PowerBIDatasetRefreshFields.REQUEST_ID.value) == dataset_refresh_id: - # return str(refresh_history.get(PowerBIDatasetRefreshFields.STATUS.value, "Unknown")) - - # raise PowerBIDatasetRefreshException( - # f"Failed to retrieve the status of dataset refresh with Id: {dataset_refresh_id}" - # ) - async def cancel_dataset_refresh(self, dataset_id: str, group_id: str, dataset_refresh_id: str) -> None: """ Cancel the dataset refresh. diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index 3bd1724fe5dae..577679c1ea0b5 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -79,18 +79,14 @@ def __init__( group_id: str, wait_for_termination: bool = True, conn_id: str = PowerBIHook.default_conn_name, - timeout: int = 60 * 60 * 24 * 7, + timeout: float = 60 * 60 * 24 * 7, proxies: dict | None = None, api_version: APIVersion | None = None, check_interval: int = 60, **kwargs, ) -> None: super().__init__(**kwargs) - self.hook = PowerBIHook( - conn_id=conn_id, - proxies=proxies, - api_version=api_version, - ) + self.hook = PowerBIHook(conn_id=conn_id, proxies=proxies, api_version=api_version, timeout=timeout) self.dataset_id = dataset_id self.group_id = group_id self.wait_for_termination = wait_for_termination @@ -124,6 +120,7 @@ def execute(self, context: Context): dataset_id=self.dataset_id, dataset_refresh_id=refresh_id, end_time=end_time, + timeout=self.timeout, check_interval=self.check_interval, wait_for_termination=self.wait_for_termination, ), diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index ca3beae26c513..65c3b287531a2 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -22,7 +22,6 @@ from typing import TYPE_CHECKING, AsyncIterator from airflow.providers.microsoft.azure.hooks.powerbi import ( - PowerBIDatasetRefreshFields, PowerBIDatasetRefreshStatus, PowerBIHook, ) @@ -67,11 +66,7 @@ def __init__( wait_for_termination: bool = True, ): super().__init__() - self.hook = PowerBIHook( - conn_id=conn_id, - proxies=proxies, - api_version=api_version, - ) + self.hook = PowerBIHook(conn_id=conn_id, proxies=proxies, api_version=api_version, timeout=timeout) self.dataset_id = dataset_id self.group_id = group_id self.dataset_refresh_id = dataset_refresh_id @@ -86,7 +81,6 @@ def serialize(self): "airflow.providers.microsoft.azure.triggers.powerbi.PowerBITrigger", { "conn_id": self.conn_id, - "timeout": self.timeout, "proxies": self.proxies, "api_version": api_version, "dataset_id": self.dataset_id, @@ -102,10 +96,6 @@ def serialize(self): def conn_id(self) -> str: return self.hook.conn_id - @property - def timeout(self) -> float | None: - return self.hook.timeout - @property def proxies(self) -> dict | None: return self.hook.proxies @@ -117,18 +107,20 @@ def api_version(self) -> APIVersion: async def run(self) -> AsyncIterator[TriggerEvent]: """Make async connection to the PowerBI and polls for the dataset refresh status.""" try: + dataset_refresh_status = None while self.end_time > time.time(): refresh_details = await self.hook.get_refresh_details_by_refresh_id( dataset_id=self.dataset_id, group_id=self.group_id, refresh_id=self.dataset_refresh_id, ) - dataset_refresh_status = refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value) + + dataset_refresh_status = refresh_details.get("status") if dataset_refresh_status == PowerBIDatasetRefreshStatus.COMPLETED: yield TriggerEvent( { - "status": {dataset_refresh_status}, + "status": dataset_refresh_status, "message": f"The dataset refresh {self.dataset_refresh_id} has {dataset_refresh_status}.", "dataset_refresh_id": self.dataset_refresh_id, } @@ -137,7 +129,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: elif dataset_refresh_status == PowerBIDatasetRefreshStatus.FAILED: yield TriggerEvent( { - "status": {dataset_refresh_status}, + "status": dataset_refresh_status, "message": f"The dataset refresh {self.dataset_refresh_id} has {dataset_refresh_status}.", "dataset_refresh_id": self.dataset_refresh_id, } diff --git a/tests/providers/microsoft/azure/hooks/test_powerbi.py b/tests/providers/microsoft/azure/hooks/test_powerbi.py index 15dcc3b56cb34..973f5c494b14e 100644 --- a/tests/providers/microsoft/azure/hooks/test_powerbi.py +++ b/tests/providers/microsoft/azure/hooks/test_powerbi.py @@ -262,13 +262,3 @@ def test_wait_for_dataset_refresh_status( else: with pytest.raises(PowerBIDatasetRefreshException): powerbi_hook.wait_for_dataset_refresh_status(**config) - - -def test_trigger_dataset_refresh(powerbi_hook): - # Mock the refresh_dataset method to return a request ID - powerbi_hook.refresh_dataset = MagicMock(return_value="request_id") - - # Assert trigger_dataset_refresh raises an exception. - response = powerbi_hook.trigger_dataset_refresh(dataset_id=DATASET_ID, group_id=GROUP_ID) - - assert response == "request_id" diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index bd64065ee276b..8e3f566721a59 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -17,7 +17,7 @@ from __future__ import annotations -from unittest.mock import MagicMock, call +from unittest.mock import AsyncMock, MagicMock, call import pytest @@ -35,28 +35,29 @@ DATASET_ID = "dataset_id" CONFIG = { "task_id": TASK_ID, - "powerbi_conn_id": DEFAULT_CONNECTION_CLIENT_SECRET, + "conn_id": DEFAULT_CONNECTION_CLIENT_SECRET, "group_id": GROUP_ID, "dataset_id": DATASET_ID, "check_interval": 1, "timeout": 3, } +NEW_REFRESH_REQUEST_ID = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" # Sample responses from PowerBI API COMPLETED_REFRESH_DETAILS = { - PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", + PowerBIDatasetRefreshFields.REQUEST_ID.value: NEW_REFRESH_REQUEST_ID, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, # serviceExceptionJson is not present when status is not "Failed" } FAILED_REFRESH_DETAILS = { - PowerBIDatasetRefreshFields.REQUEST_ID.value: "11bf290a-346b-48b7-8973-c5df149337ff", + PowerBIDatasetRefreshFields.REQUEST_ID.value: NEW_REFRESH_REQUEST_ID, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.FAILED, PowerBIDatasetRefreshFields.ERROR.value: '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', } IN_PROGRESS_REFRESH_DETAILS = { - PowerBIDatasetRefreshFields.REQUEST_ID.value: "6b6536c1-cfcb-4148-9c21-402c3f5241e4", + PowerBIDatasetRefreshFields.REQUEST_ID.value: NEW_REFRESH_REQUEST_ID, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.IN_PROGRESS, # endtime is not available. } @@ -69,7 +70,6 @@ def mock_powerbi_hook(): # Test cases: refresh_details returns None, Terminal Status, In-progress Status _get_latest_refresh_details_args = [ - (None), COMPLETED_REFRESH_DETAILS, FAILED_REFRESH_DETAILS, IN_PROGRESS_REFRESH_DETAILS, @@ -91,99 +91,37 @@ def mock_powerbi_hook(): def test_execute_no_wait_for_termination(mock_powerbi_hook, latest_refresh_details): operator = PowerBIDatasetRefreshOperator( wait_for_termination=False, - force_refresh=False, **CONFIG, ) operator.hook = mock_powerbi_hook context = {"ti": MagicMock()} - new_refresh_request_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" - mock_powerbi_hook.get_latest_refresh_details = MagicMock(return_value=latest_refresh_details) - mock_powerbi_hook.trigger_dataset_refresh = MagicMock(return_value=new_refresh_request_id) - mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock( - return_value={ - PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, - PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - # serviceExceptionJson is not present when status is not "Failed" - } - ) - mock_powerbi_hook.wait_for_dataset_refresh_status = MagicMock(return_value=True) - operator.execute(context) - if ( - latest_refresh_details is None - or latest_refresh_details[PowerBIDatasetRefreshFields.STATUS.value] - in PowerBIDatasetRefreshStatus.TERMINAL_STATUSES - ): - assert mock_powerbi_hook.get_latest_refresh_details.called - assert mock_powerbi_hook.trigger_dataset_refresh.called - else: - assert not mock_powerbi_hook.trigger_dataset_refresh.called + mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=NEW_REFRESH_REQUEST_ID) + mock_powerbi_hook.get_refresh_details_by_refresh_id = AsyncMock(return_value=latest_refresh_details) - assert not mock_powerbi_hook.wait_for_dataset_refresh_status.called - assert mock_powerbi_hook.get_refresh_details_by_request_id.called - assert context["ti"].xcom_push.call_count == 4 + operator.execute(context) + + assert mock_powerbi_hook.get_refresh_details_by_refresh_id.called + assert context["ti"].xcom_push.call_count == 3 assert context["ti"].xcom_push.call_args_list == [ - call(key="powerbi_dataset_refresh_id", value=new_refresh_request_id), - call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), - call(key="powerbi_dataset_refresh_end_time", value="2024-04-15T20:14:08.1458221Z"), - call(key="powerbi_dataset_refresh_error", value="None"), + call( + key="powerbi_dataset_refresh_id", + value="5e2d9921-e91b-491f-b7e1-e7d8db49194c", + execution_date=None, + ), + call( + key="powerbi_dataset_refresh_status", + value=latest_refresh_details.get("status"), + execution_date=None, + ), + call( + key="powerbi_dataset_refresh_error", + value=latest_refresh_details.get("error", "None"), + execution_date=None, + ), ] -_get_wait_for_status_args = [(True), (False)] - - -@pytest.mark.parametrize( - argnames=("wait_for_status_return_value"), - argvalues=_get_wait_for_status_args, - ids=[f"wait_for_status_return_value_{argval}" for argval in _get_wait_for_status_args], -) -def test_execute_wait_for_termination_preexisting_refresh_going_on( - mock_powerbi_hook, wait_for_status_return_value -): - operator = PowerBIDatasetRefreshOperator( - wait_for_termination=True, - force_refresh=True, - **CONFIG, - ) - preexisting_refresh_request_id = "6b6536c1-cfcb-4148-9c21-402c3f5241e4" - new_refresh_request_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" - operator.hook = mock_powerbi_hook - context = {"ti": MagicMock()} - mock_powerbi_hook.get_latest_refresh_details = MagicMock( - return_value={ - PowerBIDatasetRefreshFields.REQUEST_ID.value: preexisting_refresh_request_id, - PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.IN_PROGRESS, # endtime is not available. - } - ) - mock_powerbi_hook.trigger_dataset_refresh = MagicMock(return_value=new_refresh_request_id) - mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock( - return_value={ - PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, - PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - # serviceExceptionJson is not present when status is not "Failed" - } - ) - mock_powerbi_hook.wait_for_dataset_refresh_status = MagicMock(return_value=wait_for_status_return_value) - - if wait_for_status_return_value is False: - with pytest.raises(PowerBIDatasetRefreshException): - operator.execute(context) - assert not mock_powerbi_hook.trigger_dataset_refresh.called - else: - operator.execute(context) - assert mock_powerbi_hook.trigger_dataset_refresh.called - assert mock_powerbi_hook.get_refresh_details_by_request_id.called - assert mock_powerbi_hook.wait_for_dataset_refresh_status.call_count == 2 - assert context["ti"].xcom_push.call_count == 4 - assert context["ti"].xcom_push.call_args_list == [ - call(key="powerbi_dataset_refresh_id", value=new_refresh_request_id), - call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), - call(key="powerbi_dataset_refresh_end_time", value="2024-04-15T20:14:08.1458221Z"), - call(key="powerbi_dataset_refresh_error", value="None"), - ] - - _get_wait_for_status_and_latest_refresh_details_args = [ (True, None), (False, None), @@ -206,29 +144,19 @@ def test_execute_wait_for_termination_preexisting_refresh_going_on( for argval in _get_wait_for_status_and_latest_refresh_details_args ], ) -def test_execute_wait_for_termination_no_preexisting_refresh( +def test_execute_wait_for_termination_with_Deferrable( mock_powerbi_hook, wait_for_status_return_value, latest_refresh_details ): operator = PowerBIDatasetRefreshOperator( wait_for_termination=True, - force_refresh=True, **CONFIG, ) operator.hook = mock_powerbi_hook context = {"ti": MagicMock()} - new_refresh_request_id = "11bf290a-346b-48b7-8973-c5df149337ff" # Magic mock the hook methods - mock_powerbi_hook.get_latest_refresh_details = MagicMock(return_value=latest_refresh_details) - mock_powerbi_hook.trigger_dataset_refresh = MagicMock(return_value=new_refresh_request_id) - mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock( - return_value={ - PowerBIDatasetRefreshFields.REQUEST_ID.value: new_refresh_request_id, - PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - # serviceExceptionJson is not present when status is not "Failed" - } - ) - mock_powerbi_hook.wait_for_dataset_refresh_status = MagicMock(return_value=wait_for_status_return_value) + mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=NEW_REFRESH_REQUEST_ID) + mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock(return_value=latest_refresh_details) # Act and assert if wait_for_status_return_value is False: @@ -239,18 +167,17 @@ def test_execute_wait_for_termination_no_preexisting_refresh( assert mock_powerbi_hook.trigger_dataset_refresh.called assert mock_powerbi_hook.get_refresh_details_by_request_id.called mock_powerbi_hook.wait_for_dataset_refresh_status.assert_called_once_with( - request_id=new_refresh_request_id, + request_id=NEW_REFRESH_REQUEST_ID, dataset_id=DATASET_ID, group_id=GROUP_ID, expected_status=PowerBIDatasetRefreshStatus.COMPLETED, ) - assert context["ti"].xcom_push.call_count == 4 + assert context["ti"].xcom_push.call_count == 3 assert context["ti"].xcom_push.call_args_list == [ call( key="powerbi_dataset_refresh_id", - value=new_refresh_request_id, + value=NEW_REFRESH_REQUEST_ID, ), call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), - call(key="powerbi_dataset_refresh_end_time", value="2024-04-15T20:14:08.1458221Z"), call(key="powerbi_dataset_refresh_error", value="None"), ] diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py new file mode 100644 index 0000000000000..1c17753158bca --- /dev/null +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -0,0 +1,186 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import asyncio +import logging +import time +from unittest import mock +from unittest.mock import patch + +import pytest + +from airflow.providers.microsoft.azure.hooks.powerbi import PowerBIDatasetRefreshStatus +from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger +from airflow.triggers.base import TriggerEvent +from tests.providers.microsoft.conftest import get_airflow_connection + +logging.basicConfig(level=logging.DEBUG) +mylogger = logging.getLogger() + +POWERBI_CONN_ID = "powerbi_default" +DATASET_ID = "dataset_id" +GROUP_ID = "group_id" +DATASET_REFRESH_ID = "dataset_refresh_id" +POWERBI_DATASET_END_TIME = time.time() + 10 +MODULE = "airflow.providers.microsoft.azure" +TIMEOUT = 3 +CHECK_INTERVAL = 3 +API_VERSION = "v1.0" + + +@pytest.fixture +def powerbi_trigger(): + trigger = PowerBITrigger( + conn_id=POWERBI_CONN_ID, + proxies=None, + api_version=API_VERSION, + dataset_id=DATASET_ID, + group_id=GROUP_ID, + dataset_refresh_id=DATASET_REFRESH_ID, + end_time=POWERBI_DATASET_END_TIME, + check_interval=CHECK_INTERVAL, + wait_for_termination=True, + timeout=TIMEOUT, + ) + + return trigger + + +def test_powerbi_trigger_serialization(): + """Asserts that the PowerBI Trigger correctly serializes its arguments and classpath.""" + + with patch( + "airflow.hooks.base.BaseHook.get_connection", + side_effect=get_airflow_connection, + ): + powerbi_trigger = PowerBITrigger( + conn_id=POWERBI_CONN_ID, + proxies=None, + api_version=API_VERSION, + dataset_id=DATASET_ID, + group_id=GROUP_ID, + dataset_refresh_id=DATASET_REFRESH_ID, + end_time=POWERBI_DATASET_END_TIME, + check_interval=CHECK_INTERVAL, + wait_for_termination=True, + timeout=TIMEOUT, + ) + + classpath, kwargs = powerbi_trigger.serialize() + assert classpath == f"{MODULE}.triggers.powerbi.PowerBITrigger" + assert kwargs == { + "conn_id": POWERBI_CONN_ID, + "dataset_id": DATASET_ID, + "group_id": GROUP_ID, + "dataset_refresh_id": DATASET_REFRESH_ID, + "end_time": POWERBI_DATASET_END_TIME, + "proxies": None, + "api_version": API_VERSION, + "check_interval": CHECK_INTERVAL, + "wait_for_termination": True, + } + + +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_inprogress(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.IN_PROGRESS} + task = asyncio.create_task(powerbi_trigger.run().__anext__()) + await asyncio.sleep(0.5) + + # TriggerEvent was not returned + assert task.done() is False + asyncio.get_event_loop().stop() + + +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_failed(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.FAILED} + + generator = powerbi_trigger.run() + actual = await generator.asend(None) + expected = TriggerEvent( + { + "status": "Failed", + "message": f"The dataset refresh {DATASET_REFRESH_ID} has " + f"{PowerBIDatasetRefreshStatus.FAILED}.", + "dataset_refresh_id": DATASET_REFRESH_ID, + } + ) + assert expected == actual + + +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_completed(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.COMPLETED} + + generator = powerbi_trigger.run() + actual = await generator.asend(None) + expected = TriggerEvent( + { + "status": "Completed", + "message": f"The dataset refresh {DATASET_REFRESH_ID} has " + f"{PowerBIDatasetRefreshStatus.COMPLETED}.", + "dataset_refresh_id": DATASET_REFRESH_ID, + } + ) + assert expected == actual + + +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.cancel_dataset_refresh") +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_exception( + mock_get_refresh_details_by_refresh_id, mock_cancel_dataset_refresh, powerbi_trigger +): + """Assert that run catch exception if Power BI API throw exception""" + mock_get_refresh_details_by_refresh_id.side_effect = Exception("Test exception") + + task = [i async for i in powerbi_trigger.run()] + response = TriggerEvent( + { + "status": "error", + "message": "An error occurred: Test exception", + "dataset_refresh_id": DATASET_REFRESH_ID, + } + ) + assert len(task) == 1 + assert response in task + mock_cancel_dataset_refresh.assert_called_once() + + +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_timeout(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + """Assert that powerbi run timesout after end_time elapses""" + mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.IN_PROGRESS} + + generator = powerbi_trigger.run() + actual = await generator.asend(None) + expected = TriggerEvent( + { + "status": "error", + "message": f"Timeout occurred while waiting for dataset refresh to complete: The dataset refresh {DATASET_REFRESH_ID} has status In Progress.", + "dataset_refresh_id": DATASET_REFRESH_ID, + } + ) + + assert expected == actual From 483bbf66d6f88e2b454cf44cac4f4f62244f305d Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Tue, 16 Jul 2024 15:07:53 -0400 Subject: [PATCH 06/22] unit tests for powerbi operator --- .../microsoft/azure/hooks/powerbi.py | 14 -- .../microsoft/azure/hooks/test_powerbi.py | 228 ++++-------------- .../microsoft/azure/operators/test_powerbi.py | 90 ++++--- .../microsoft/azure/triggers/test_powerbi.py | 7 +- 4 files changed, 92 insertions(+), 247 deletions(-) diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index 779cb873a2815..bd0d2d5cf310a 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -146,20 +146,6 @@ def raw_to_refresh_details(cls, refresh_details: dict) -> dict[str, str]: PowerBIDatasetRefreshFields.ERROR.value: str(refresh_details.get("serviceExceptionJson")), } - async def get_latest_refresh_details(self, dataset_id: str, group_id: str) -> dict[str, str] | None: - """ - Get the refresh details of the most recent dataset refresh in the refresh history of the data source. - - :return: Dictionary containing refresh status and end time if refresh history exists, otherwise None. - """ - history = await self.get_refresh_history(dataset_id=dataset_id, group_id=group_id) - - if len(history) == 0: - return None - - refresh_details = history[0] - return refresh_details - async def get_refresh_details_by_refresh_id( self, dataset_id: str, group_id: str, refresh_id: str ) -> dict[str, str]: diff --git a/tests/providers/microsoft/azure/hooks/test_powerbi.py b/tests/providers/microsoft/azure/hooks/test_powerbi.py index 973f5c494b14e..6bb141e2cf242 100644 --- a/tests/providers/microsoft/azure/hooks/test_powerbi.py +++ b/tests/providers/microsoft/azure/hooks/test_powerbi.py @@ -17,11 +17,10 @@ from __future__ import annotations from unittest import mock -from unittest.mock import MagicMock import pytest -from airflow.models.connection import Connection +from airflow.providers.microsoft.azure.hooks.msgraph import KiotaRequestAdapterHook from airflow.providers.microsoft.azure.hooks.powerbi import ( PowerBIDatasetRefreshException, PowerBIDatasetRefreshFields, @@ -29,40 +28,6 @@ PowerBIHook, ) -DEFAULT_CONNECTION_CLIENT_SECRET = "powerbi_conn_id" -MODULE = "airflow.providers.microsoft.azure.hooks.powerbi" -CLIENT_ID = "client_id" -CLIENT_SECRET = "client_secret" -TENANT_ID = "tenant_id" -BASE_URL = "https://api.powerbi.com" -API_VERSION = "v1.0" -GROUP_ID = "group_id" -DATASET_ID = "dataset_id" - -API_RAW_RESPONSE = { - "value": [ - # Completed refresh - { - "requestId": "5e2d9921-e91b-491f-b7e1-e7d8db49194c", - "status": "Completed", - "endTime": "2024-04-15T20:14:08.1458221Z", - # serviceExceptionJson is not present when status is not "Failed" - }, - # In-progress refresh - { - "requestId": "6b6536c1-cfcb-4148-9c21-402c3f5241e4", - "status": "Unknown", # endtime is not available. - }, - # Failed refresh - { - "requestId": "11bf290a-346b-48b7-8973-c5df149337ff", - "status": "Failed", - "endTime": "2024-04-15T20:14:08.1458221Z", - "serviceExceptionJson": '{"errorCode":"ModelRefreshFailed_CredentialsNotSpecified"}', - }, - ] -} - FORMATTED_RESPONSE = [ # Completed refresh { @@ -84,181 +49,82 @@ }, ] +DEFAULT_CONNECTION_CLIENT_SECRET = "powerbi_conn_id" +GROUP_ID = "group_id" +DATASET_ID = "dataset_id" -@pytest.fixture -def powerbi_hook(): - client = PowerBIHook(powerbi_conn_id=DEFAULT_CONNECTION_CLIENT_SECRET) - return client +CONFIG = {"conn_id": DEFAULT_CONNECTION_CLIENT_SECRET, "timeout": 3, "api_version": "v1.0"} @pytest.fixture -def get_token(powerbi_hook): - powerbi_hook._get_token = MagicMock(return_value="access_token") - return powerbi_hook._get_token() - - -def test_get_token_with_missing_credentials(powerbi_hook): - # Mock the get_connection method to return a connection with missing credentials - powerbi_hook.get_connection = MagicMock( - return_value=Connection( - conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, - conn_type="powerbi", - login=None, - password=None, - extra={ - "tenant_id": TENANT_ID, - }, - ) - ) - - with pytest.raises(ValueError): - powerbi_hook._get_token() - - -def test_get_token_with_missing_tenant_id(powerbi_hook): - # Mock the get_connection method to return a connection with missing tenant ID - powerbi_hook.get_connection = MagicMock( - return_value=Connection( - conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, - conn_type="powerbi", - login=CLIENT_ID, - password=CLIENT_SECRET, - extra={}, - ) - ) - - with pytest.raises(ValueError): - powerbi_hook._get_token() - - -@mock.patch(f"{MODULE}.ClientSecretCredential") -def test_get_token_with_valid_credentials(mock_credential, powerbi_hook): - # Mock the get_connection method to return a connection with valid credentials - powerbi_hook.get_connection = MagicMock( - return_value=Connection( - conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, - conn_type="powerbi", - login=CLIENT_ID, - password=CLIENT_SECRET, - extra={ - "tenant_id": TENANT_ID, - }, - ) - ) - - token = powerbi_hook._get_token() - mock_credential.assert_called() - - assert token is not None - - -def test_refresh_dataset(powerbi_hook, requests_mock, get_token): - request_id = "request_id" - - # Mock the request in _send_request method to return a successful response - requests_mock.post( - f"{BASE_URL}/{API_VERSION}/myorg/groups/{GROUP_ID}/datasets/{DATASET_ID}/refreshes", - status_code=202, - headers={"Authorization": f"Bearer {get_token}", "RequestId": request_id}, - ) - - result = powerbi_hook.refresh_dataset(dataset_id=DATASET_ID, group_id=GROUP_ID) - - assert result == request_id - - -def test_get_refresh_history_success(powerbi_hook, requests_mock, get_token): - url = f"{BASE_URL}/{API_VERSION}/myorg/groups/{GROUP_ID}/datasets/{DATASET_ID}/refreshes" - - requests_mock.get( - url, json=API_RAW_RESPONSE, headers={"Authorization": f"Bearer {get_token}"}, status_code=200 - ) - - result = powerbi_hook.get_refresh_history(DATASET_ID, GROUP_ID) - - assert len(result) == 3 - assert result == FORMATTED_RESPONSE - - -def test_get_latest_refresh_details_with_no_history(powerbi_hook): - # Mock the get_refresh_history method to return an empty list - powerbi_hook.get_refresh_history = MagicMock(return_value=[]) - - result = powerbi_hook.get_latest_refresh_details(dataset_id=DATASET_ID, group_id=GROUP_ID) - - assert result is None +def powerbi_hook(): + return PowerBIHook(**CONFIG) -def test_get_latest_refresh_details_with_history(powerbi_hook): - # Mock the get_refresh_history method to return a list with refresh details - refresh_history = FORMATTED_RESPONSE - powerbi_hook.get_refresh_history = MagicMock(return_value=refresh_history) +@pytest.mark.asyncio +async def test_get_refresh_history(powerbi_hook): + response_data = {"value": [{"requestId": "1234", "status": "Completed", "serviceExceptionJson": ""}]} - result = powerbi_hook.get_latest_refresh_details(dataset_id=DATASET_ID, group_id=GROUP_ID) + with mock.patch.object(KiotaRequestAdapterHook, "run", new_callable=mock.AsyncMock) as mock_run: + mock_run.return_value = response_data + result = await powerbi_hook.get_refresh_history(DATASET_ID, GROUP_ID) - assert result == FORMATTED_RESPONSE[0] + expected = [{"request_id": "1234", "status": "Completed", "error": ""}] + assert result == expected -def test_get_refresh_details_by_request_id(powerbi_hook): +@pytest.mark.asyncio +async def test_get_refresh_details_by_refresh_id(powerbi_hook): # Mock the get_refresh_history method to return a list of refresh histories refresh_histories = FORMATTED_RESPONSE - powerbi_hook.get_refresh_history = MagicMock(return_value=refresh_histories) + powerbi_hook.get_refresh_history = mock.AsyncMock(return_value=refresh_histories) # Call the function with a valid request ID - request_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" - result = powerbi_hook.get_refresh_details_by_request_id( - dataset_id=DATASET_ID, group_id=GROUP_ID, request_id=request_id + refresh_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" + result = await powerbi_hook.get_refresh_details_by_refresh_id( + dataset_id=DATASET_ID, group_id=GROUP_ID, refresh_id=refresh_id ) # Assert that the correct refresh details are returned assert result == { PowerBIDatasetRefreshFields.REQUEST_ID.value: "5e2d9921-e91b-491f-b7e1-e7d8db49194c", PowerBIDatasetRefreshFields.STATUS.value: "Completed", - PowerBIDatasetRefreshFields.END_TIME.value: "2024-04-15T20:14:08.1458221Z", PowerBIDatasetRefreshFields.ERROR.value: "None", } # Call the function with an invalid request ID invalid_request_id = "invalid_request_id" with pytest.raises(PowerBIDatasetRefreshException): - powerbi_hook.get_refresh_details_by_request_id( - dataset_id=DATASET_ID, group_id=GROUP_ID, request_id=invalid_request_id + await powerbi_hook.get_refresh_details_by_refresh_id( + dataset_id=DATASET_ID, group_id=GROUP_ID, refresh_id=invalid_request_id ) -_wait_for_dataset_refresh_status_test_args = [ - (PowerBIDatasetRefreshStatus.COMPLETED, PowerBIDatasetRefreshStatus.COMPLETED, True), - (PowerBIDatasetRefreshStatus.FAILED, PowerBIDatasetRefreshStatus.COMPLETED, False), - (PowerBIDatasetRefreshStatus.IN_PROGRESS, PowerBIDatasetRefreshStatus.COMPLETED, "timeout"), -] +@pytest.mark.asyncio +async def test_trigger_dataset_refresh(powerbi_hook): + response_data = {"requestid": "5e2d9921-e91b-491f-b7e1-e7d8db49194c"} + with mock.patch.object(KiotaRequestAdapterHook, "run", new_callable=mock.AsyncMock) as mock_run: + mock_run.return_value = response_data + result = await powerbi_hook.trigger_dataset_refresh(dataset_id=DATASET_ID, group_id=GROUP_ID) + + assert result == "5e2d9921-e91b-491f-b7e1-e7d8db49194c" -@pytest.mark.parametrize( - argnames=("dataset_refresh_status", "expected_status", "expected_result"), - argvalues=_wait_for_dataset_refresh_status_test_args, - ids=[ - f"refresh_status_{argval[0]}_expected_{argval[1]}" - for argval in _wait_for_dataset_refresh_status_test_args - ], -) -def test_wait_for_dataset_refresh_status( - powerbi_hook, dataset_refresh_status, expected_status, expected_result -): - config = { - "dataset_id": DATASET_ID, - "group_id": GROUP_ID, - "request_id": "5e2d9921-e91b-491f-b7e1-e7d8db49194c", - "timeout": 3, - "check_interval": 1, - "expected_status": expected_status, - } - # Mock the get_refresh_details_by_request_id method to return a dataset refresh details - dataset_refresh_details = {PowerBIDatasetRefreshFields.STATUS.value: dataset_refresh_status} - powerbi_hook.get_refresh_details_by_request_id = MagicMock(return_value=dataset_refresh_details) +@pytest.mark.asyncio +async def test_cancel_dataset_refresh(powerbi_hook): + dataset_refresh_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" - if expected_result != "timeout": - assert powerbi_hook.wait_for_dataset_refresh_status(**config) == expected_result - else: - with pytest.raises(PowerBIDatasetRefreshException): - powerbi_hook.wait_for_dataset_refresh_status(**config) + with mock.patch.object(KiotaRequestAdapterHook, "run", new_callable=mock.AsyncMock) as mock_run: + await powerbi_hook.cancel_dataset_refresh(DATASET_ID, GROUP_ID, dataset_refresh_id) + + mock_run.assert_called_once_with( + url="myorg/groups/{group_id}/datasets/{dataset_id}/refreshes/{dataset_refresh_id}", + response_type=None, + path_parameters={ + "group_id": GROUP_ID, + "dataset_id": DATASET_ID, + "dataset_refresh_id": dataset_refresh_id, + }, + method="DELETE", + ) diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index 8e3f566721a59..b6ab90b8173a2 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -17,17 +17,19 @@ from __future__ import annotations +from unittest import mock from unittest.mock import AsyncMock, MagicMock, call import pytest +from airflow.exceptions import AirflowException, TaskDeferred from airflow.providers.microsoft.azure.hooks.powerbi import ( - PowerBIDatasetRefreshException, PowerBIDatasetRefreshFields, PowerBIDatasetRefreshStatus, PowerBIHook, ) from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator +from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger DEFAULT_CONNECTION_CLIENT_SECRET = "powerbi_conn_id" TASK_ID = "run_powerbi_operator" @@ -122,31 +124,7 @@ def test_execute_no_wait_for_termination(mock_powerbi_hook, latest_refresh_detai ] -_get_wait_for_status_and_latest_refresh_details_args = [ - (True, None), - (False, None), - (True, COMPLETED_REFRESH_DETAILS), - (False, COMPLETED_REFRESH_DETAILS), - (True, FAILED_REFRESH_DETAILS), - (False, FAILED_REFRESH_DETAILS), -] - - -@pytest.mark.parametrize( - argnames=("wait_for_status_return_value", "latest_refresh_details"), - argvalues=_get_wait_for_status_and_latest_refresh_details_args, - ids=[ - ( - f"wait_for_status_detail_{argval[1][PowerBIDatasetRefreshFields.STATUS.value]}_return_value_{argval[0]}" - if argval[1] is not None - else f"wait_for_status_detail_None_return_value_{argval[0]}" - ) - for argval in _get_wait_for_status_and_latest_refresh_details_args - ], -) -def test_execute_wait_for_termination_with_Deferrable( - mock_powerbi_hook, wait_for_status_return_value, latest_refresh_details -): +def test_execute_wait_for_termination_with_Deferrable(mock_powerbi_hook): operator = PowerBIDatasetRefreshOperator( wait_for_termination=True, **CONFIG, @@ -156,28 +134,44 @@ def test_execute_wait_for_termination_with_Deferrable( # Magic mock the hook methods mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=NEW_REFRESH_REQUEST_ID) - mock_powerbi_hook.get_refresh_details_by_request_id = MagicMock(return_value=latest_refresh_details) - # Act and assert - if wait_for_status_return_value is False: - with pytest.raises(PowerBIDatasetRefreshException): - operator.execute(context) - else: + with pytest.raises(TaskDeferred) as exc: operator.execute(context) - assert mock_powerbi_hook.trigger_dataset_refresh.called - assert mock_powerbi_hook.get_refresh_details_by_request_id.called - mock_powerbi_hook.wait_for_dataset_refresh_status.assert_called_once_with( - request_id=NEW_REFRESH_REQUEST_ID, - dataset_id=DATASET_ID, - group_id=GROUP_ID, - expected_status=PowerBIDatasetRefreshStatus.COMPLETED, + + assert mock_powerbi_hook.trigger_dataset_refresh.called + assert isinstance(exc.value.trigger, PowerBITrigger), "Trigger is not a PowerBITriiger" + + assert context["ti"].xcom_push.call_count == 1 + assert context["ti"].xcom_push.call_args_list == [ + call(key="powerbi_dataset_refresh_id", value=NEW_REFRESH_REQUEST_ID, execution_date=None), + ] + + +def test_powerbi_operator_async_execute_complete_success(): + """Assert that execute_complete log success message""" + operator = PowerBIDatasetRefreshOperator( + wait_for_termination=True, + **CONFIG, + ) + context = {"ti": MagicMock()} + with mock.patch.object(operator.log, "info") as mock_log_info: + operator.execute_complete( + context=context, + event={"status": "success", "message": "success", "dataset_refresh_id": "1234"}, + ) + mock_log_info.assert_called_with("success") + assert context["ti"].xcom_push.call_count == 1 + + +def test_powerbi_operator_async_execute_complete_fail(): + """Assert that execute_complete raise exception on error""" + operator = PowerBIDatasetRefreshOperator( + wait_for_termination=True, + **CONFIG, + ) + context = {"ti": MagicMock()} + with pytest.raises(AirflowException): + operator.execute_complete( + context=context, + event={"status": "error", "message": "error", "dataset_refresh_id": "1234"}, ) - assert context["ti"].xcom_push.call_count == 3 - assert context["ti"].xcom_push.call_args_list == [ - call( - key="powerbi_dataset_refresh_id", - value=NEW_REFRESH_REQUEST_ID, - ), - call(key="powerbi_dataset_refresh_status", value=PowerBIDatasetRefreshStatus.COMPLETED), - call(key="powerbi_dataset_refresh_error", value="None"), - ] diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index 1c17753158bca..b45558fa72772 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -18,7 +18,6 @@ from __future__ import annotations import asyncio -import logging import time from unittest import mock from unittest.mock import patch @@ -30,9 +29,6 @@ from airflow.triggers.base import TriggerEvent from tests.providers.microsoft.conftest import get_airflow_connection -logging.basicConfig(level=logging.DEBUG) -mylogger = logging.getLogger() - POWERBI_CONN_ID = "powerbi_default" DATASET_ID = "dataset_id" GROUP_ID = "group_id" @@ -100,6 +96,7 @@ def test_powerbi_trigger_serialization(): @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") async def test_powerbi_trigger_run_inprogress(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + """Assert task isn't completed until timeout if dataset refresh is in progress.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.IN_PROGRESS} task = asyncio.create_task(powerbi_trigger.run().__anext__()) await asyncio.sleep(0.5) @@ -112,6 +109,7 @@ async def test_powerbi_trigger_run_inprogress(mock_get_refresh_details_by_refres @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") async def test_powerbi_trigger_run_failed(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + """Assert event is triggered upon failed dataset refresh.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.FAILED} generator = powerbi_trigger.run() @@ -130,6 +128,7 @@ async def test_powerbi_trigger_run_failed(mock_get_refresh_details_by_refresh_id @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") async def test_powerbi_trigger_run_completed(mock_get_refresh_details_by_refresh_id, powerbi_trigger): + """Assert event is triggered upon successful dataset refresh.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.COMPLETED} generator = powerbi_trigger.run() From 2b943713247eb9c8db555490dfc0f8f0455416c3 Mon Sep 17 00:00:00 2001 From: David Blain Date: Thu, 18 Jul 2024 16:09:58 +0200 Subject: [PATCH 07/22] refactor: Did some small changes to PowerBIOperator, removed unnecessary logging statements (don't just log info statements to log them, those can have performance/cost implications) --- .../providers/microsoft/azure/operators/powerbi.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index 577679c1ea0b5..ccdf23d4df77a 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -43,9 +43,9 @@ class PowerBILink(BaseOperatorLink): def get_link(self, operator: BaseOperator, *, ti_key: TaskInstanceKey): url = ( - f"https://app.powerbi.com" # type: ignore[attr-defined] + "https://app.powerbi.com" # type: ignore[attr-defined] f"/groups/{operator.group_id}/datasets/{operator.dataset_id}" # type: ignore[attr-defined] - f"/details?experience=power-bi" + "/details?experience=power-bi" ) return url @@ -100,7 +100,7 @@ def run_async(cls, future: Any) -> Any: def execute(self, context: Context): """Refresh the Power BI Dataset.""" - self.log.info("Executing Dataset refresh.") + # TODO: You should use the deferrable mechanism more here instead of running async code in main thread refresh_id = self.run_async( self.hook.trigger_dataset_refresh( dataset_id=self.dataset_id, @@ -150,7 +150,6 @@ def execute_complete(self, context: Context, event: dict[str, str]) -> Any: if event: if event["status"] == "error": raise AirflowException(event["message"]) - else: - # Push Dataset refresh status to Xcom - self.xcom_push(context=context, key="powerbi_dataset_refresh_status", value=event["status"]) - self.log.info(event["message"]) + + # Push Dataset refresh status to Xcom + self.xcom_push(context=context, key="powerbi_dataset_refresh_status", value=event["status"]) From 2a3db173fcb319fdd725b85aaa84ec6ff7462c03 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 29 Jul 2024 09:12:16 +0530 Subject: [PATCH 08/22] Fixed the unit test --- .../microsoft/azure/operators/test_powerbi.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index b6ab90b8173a2..c373db1e2e9b3 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -17,7 +17,6 @@ from __future__ import annotations -from unittest import mock from unittest.mock import AsyncMock, MagicMock, call import pytest @@ -154,12 +153,10 @@ def test_powerbi_operator_async_execute_complete_success(): **CONFIG, ) context = {"ti": MagicMock()} - with mock.patch.object(operator.log, "info") as mock_log_info: - operator.execute_complete( - context=context, - event={"status": "success", "message": "success", "dataset_refresh_id": "1234"}, - ) - mock_log_info.assert_called_with("success") + operator.execute_complete( + context=context, + event={"status": "success", "message": "success", "dataset_refresh_id": "1234"}, + ) assert context["ti"].xcom_push.call_count == 1 From efbb02ae52897c4782551304969d5243d6e53ab8 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 29 Jul 2024 14:31:17 +0530 Subject: [PATCH 09/22] Added more tests for full code coverage --- .../microsoft/azure/hooks/test_powerbi.py | 101 +++++++++++++++++- .../microsoft/azure/operators/test_powerbi.py | 43 ++++++++ .../microsoft/azure/triggers/test_powerbi.py | 49 ++++++++- 3 files changed, 190 insertions(+), 3 deletions(-) diff --git a/tests/providers/microsoft/azure/hooks/test_powerbi.py b/tests/providers/microsoft/azure/hooks/test_powerbi.py index 6bb141e2cf242..a3a521b45e820 100644 --- a/tests/providers/microsoft/azure/hooks/test_powerbi.py +++ b/tests/providers/microsoft/azure/hooks/test_powerbi.py @@ -20,6 +20,7 @@ import pytest +from airflow.exceptions import AirflowException from airflow.providers.microsoft.azure.hooks.msgraph import KiotaRequestAdapterHook from airflow.providers.microsoft.azure.hooks.powerbi import ( PowerBIDatasetRefreshException, @@ -73,6 +74,60 @@ async def test_get_refresh_history(powerbi_hook): assert result == expected +@pytest.mark.asyncio +async def test_get_refresh_history_airflow_exception(powerbi_hook): + """Test handling of AirflowException in get_refresh_history.""" + + with mock.patch.object(KiotaRequestAdapterHook, "run", new_callable=mock.AsyncMock) as mock_run: + mock_run.side_effect = AirflowException("Test exception") + + with pytest.raises(PowerBIDatasetRefreshException, match="Failed to retrieve refresh history"): + await powerbi_hook.get_refresh_history(DATASET_ID, GROUP_ID) + + +@pytest.mark.parametrize( + "input_data, expected_output", + [ + ( + {"requestId": "1234", "status": "Completed", "serviceExceptionJson": ""}, + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "1234", + PowerBIDatasetRefreshFields.STATUS.value: "Completed", + PowerBIDatasetRefreshFields.ERROR.value: "", + }, + ), + ( + {"requestId": "5678", "status": "Unknown", "serviceExceptionJson": "Some error"}, + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "5678", + PowerBIDatasetRefreshFields.STATUS.value: "In Progress", + PowerBIDatasetRefreshFields.ERROR.value: "Some error", + }, + ), + ( + {"requestId": None, "status": None, "serviceExceptionJson": None}, + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "None", + PowerBIDatasetRefreshFields.STATUS.value: "None", + PowerBIDatasetRefreshFields.ERROR.value: "None", + }, + ), + ( + {}, # Empty input dictionary + { + PowerBIDatasetRefreshFields.REQUEST_ID.value: "None", + PowerBIDatasetRefreshFields.STATUS.value: "None", + PowerBIDatasetRefreshFields.ERROR.value: "None", + }, + ), + ], +) +def test_raw_to_refresh_details(input_data, expected_output): + """Test raw_to_refresh_details method.""" + result = PowerBIHook.raw_to_refresh_details(input_data) + assert result == expected_output + + @pytest.mark.asyncio async def test_get_refresh_details_by_refresh_id(powerbi_hook): # Mock the get_refresh_history method to return a list of refresh histories @@ -101,7 +156,41 @@ async def test_get_refresh_details_by_refresh_id(powerbi_hook): @pytest.mark.asyncio -async def test_trigger_dataset_refresh(powerbi_hook): +async def test_get_refresh_details_by_refresh_id_empty_history(powerbi_hook): + """Test exception when refresh history is empty.""" + # Mock the get_refresh_history method to return an empty list + powerbi_hook.get_refresh_history = mock.AsyncMock(return_value=[]) + + # Call the function with a request ID + refresh_id = "any_request_id" + with pytest.raises( + PowerBIDatasetRefreshException, + match=f"Unable to fetch the details of dataset refresh with Request Id: {refresh_id}", + ): + await powerbi_hook.get_refresh_details_by_refresh_id( + dataset_id=DATASET_ID, group_id=GROUP_ID, refresh_id=refresh_id + ) + + +@pytest.mark.asyncio +async def test_get_refresh_details_by_refresh_id_not_found(powerbi_hook): + """Test exception when the refresh ID is not found in the refresh history.""" + # Mock the get_refresh_history method to return a list of refresh histories without the specified ID + powerbi_hook.get_refresh_history = mock.AsyncMock(return_value=FORMATTED_RESPONSE) + + # Call the function with an invalid request ID + invalid_request_id = "invalid_request_id" + with pytest.raises( + PowerBIDatasetRefreshException, + match=f"Unable to fetch the details of dataset refresh with Request Id: {invalid_request_id}", + ): + await powerbi_hook.get_refresh_details_by_refresh_id( + dataset_id=DATASET_ID, group_id=GROUP_ID, refresh_id=invalid_request_id + ) + + +@pytest.mark.asyncio +async def test_trigger_dataset_refresh_success(powerbi_hook): response_data = {"requestid": "5e2d9921-e91b-491f-b7e1-e7d8db49194c"} with mock.patch.object(KiotaRequestAdapterHook, "run", new_callable=mock.AsyncMock) as mock_run: @@ -111,6 +200,16 @@ async def test_trigger_dataset_refresh(powerbi_hook): assert result == "5e2d9921-e91b-491f-b7e1-e7d8db49194c" +@pytest.mark.asyncio +async def test_trigger_dataset_refresh_failure(powerbi_hook): + """Test failure to trigger dataset refresh due to AirflowException.""" + with mock.patch.object(KiotaRequestAdapterHook, "run", new_callable=mock.AsyncMock) as mock_run: + mock_run.side_effect = AirflowException("Test exception") + + with pytest.raises(PowerBIDatasetRefreshException, match="Failed to trigger dataset refresh."): + await powerbi_hook.trigger_dataset_refresh(dataset_id=DATASET_ID, group_id=GROUP_ID) + + @pytest.mark.asyncio async def test_cancel_dataset_refresh(powerbi_hook): dataset_refresh_id = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index c373db1e2e9b3..e8c1bf92be368 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -29,6 +29,7 @@ ) from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger +from airflow.utils import timezone DEFAULT_CONNECTION_CLIENT_SECRET = "powerbi_conn_id" TASK_ID = "run_powerbi_operator" @@ -43,6 +44,8 @@ "timeout": 3, } NEW_REFRESH_REQUEST_ID = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" +DEFAULT_DATE = timezone.datetime(2021, 1, 1) + # Sample responses from PowerBI API COMPLETED_REFRESH_DETAILS = { @@ -172,3 +175,43 @@ def test_powerbi_operator_async_execute_complete_fail(): context=context, event={"status": "error", "message": "error", "dataset_refresh_id": "1234"}, ) + assert context["ti"].xcom_push.call_count == 0 + + +def test_execute_complete_no_event(create_task_instance_of_operator): + """Test execute_complete when event is None or empty.""" + operator = PowerBIDatasetRefreshOperator( + wait_for_termination=True, + **CONFIG, + ) + context = {"ti": MagicMock()} + operator.execute_complete( + context=context, + event=None, + ) + assert context["ti"].xcom_push.call_count == 0 + + +def test_powerbilink(create_task_instance_of_operator): + """Assert Power BI Extra link matches the expected URL.""" + ti = create_task_instance_of_operator( + PowerBIDatasetRefreshOperator, + dag_id="test_powerbi_refresh_op_link", + execution_date=DEFAULT_DATE, + task_id=TASK_ID, + conn_id=DEFAULT_CONNECTION_CLIENT_SECRET, + group_id=GROUP_ID, + dataset_id=DATASET_ID, + check_interval=1, + timeout=3, + ) + + ti.xcom_push(key="powerbi_dataset_refresh_id", value=NEW_REFRESH_REQUEST_ID) + url = ti.task.get_extra_links(ti, "Monitor PowerBI Dataset") + EXPECTED_ITEM_RUN_OP_EXTRA_LINK = ( + "https://app.powerbi.com", + f"/groups/{GROUP_ID}/datasets/{DATASET_ID}", + "/details?experience=power-bi", + ) + + assert url == EXPECTED_ITEM_RUN_OP_EXTRA_LINK diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index b45558fa72772..a17449dd74bea 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -35,7 +35,7 @@ DATASET_REFRESH_ID = "dataset_refresh_id" POWERBI_DATASET_END_TIME = time.time() + 10 MODULE = "airflow.providers.microsoft.azure" -TIMEOUT = 3 +TIMEOUT = 60 CHECK_INTERVAL = 3 API_VERSION = "v1.0" @@ -147,7 +147,7 @@ async def test_powerbi_trigger_run_completed(mock_get_refresh_details_by_refresh @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.cancel_dataset_refresh") @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") -async def test_powerbi_trigger_run_exception( +async def test_powerbi_trigger_run_exception_during_refresh_check_loop( mock_get_refresh_details_by_refresh_id, mock_cancel_dataset_refresh, powerbi_trigger ): """Assert that run catch exception if Power BI API throw exception""" @@ -166,6 +166,51 @@ async def test_powerbi_trigger_run_exception( mock_cancel_dataset_refresh.assert_called_once() +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.cancel_dataset_refresh") +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_exception_during_refresh_cancellation( + mock_get_refresh_details_by_refresh_id, mock_cancel_dataset_refresh, powerbi_trigger +): + """Assert that run catch exception if Power BI API throw exception""" + mock_get_refresh_details_by_refresh_id.side_effect = Exception("Test exception") + mock_cancel_dataset_refresh.side_effect = Exception("Exception caused by cancel_dataset_refresh") + + task = [i async for i in powerbi_trigger.run()] + response = TriggerEvent( + { + "status": "error", + "message": "An error occurred while canceling dataset: Exception caused by cancel_dataset_refresh", + "dataset_refresh_id": DATASET_REFRESH_ID, + } + ) + + assert len(task) == 1 + assert response in task + mock_cancel_dataset_refresh.assert_called_once() + + +@pytest.mark.asyncio +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +async def test_powerbi_trigger_run_exception_without_refresh_id( + mock_get_refresh_details_by_refresh_id, powerbi_trigger +): + """Assert handling of exception when there is no dataset_refresh_id""" + powerbi_trigger.dataset_refresh_id = None + mock_get_refresh_details_by_refresh_id.side_effect = Exception("Test exception for no dataset_refresh_id") + + task = [i async for i in powerbi_trigger.run()] + response = TriggerEvent( + { + "status": "error", + "message": "An error occurred: Test exception for no dataset_refresh_id", + "dataset_refresh_id": None, + } + ) + assert len(task) == 1 + assert response in task + + @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") async def test_powerbi_trigger_run_timeout(mock_get_refresh_details_by_refresh_id, powerbi_trigger): From 2f8f46f47728acd9156520ba8997effc0114523d Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 29 Jul 2024 15:32:30 +0530 Subject: [PATCH 10/22] Added system test for operator --- .../microsoft/azure/operators/test_powerbi.py | 6 +-- .../azure/example_refresh_dataset_powerbi.py | 48 +++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index e8c1bf92be368..e1f396bd27ba2 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -209,9 +209,9 @@ def test_powerbilink(create_task_instance_of_operator): ti.xcom_push(key="powerbi_dataset_refresh_id", value=NEW_REFRESH_REQUEST_ID) url = ti.task.get_extra_links(ti, "Monitor PowerBI Dataset") EXPECTED_ITEM_RUN_OP_EXTRA_LINK = ( - "https://app.powerbi.com", - f"/groups/{GROUP_ID}/datasets/{DATASET_ID}", - "/details?experience=power-bi", + "https://app.powerbi.com" # type: ignore[attr-defined] + f"/groups/{GROUP_ID}/datasets/{DATASET_ID}" # type: ignore[attr-defined] + "/details?experience=power-bi" ) assert url == EXPECTED_ITEM_RUN_OP_EXTRA_LINK diff --git a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py b/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py new file mode 100644 index 0000000000000..dad55f3a29084 --- /dev/null +++ b/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from datetime import datetime + +from airflow import DAG +from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator + +DAG_ID = "example_refresh_powerbi_dataset" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + tags=["example"], +) as dag: + # [START howto_operator_powerbi_refresh] + refresh_powerbi_dataset = PowerBIDatasetRefreshOperator( + conn_id="powerbi_default", + task_id="refresh_powerbi_dataset", + dataset_id="7bacf905-be8a-4f67-9512-71f4dc0c42dc", + group_id="aaaebfa6-194a-4edd-8a36-a5e42200df2e", + check_interval=30, + wait_for_termination=True, + ) + # [END howto_operator_powerbi_refresh] + + refresh_powerbi_dataset + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag) From d1996d5fd6c029e5d7ace8e603f2c42dee8266dc Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Thu, 1 Aug 2024 01:44:24 +0530 Subject: [PATCH 11/22] Fix system test --- .../azure/example_refresh_dataset_powerbi.py | 52 ++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py b/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py index dad55f3a29084..5be2026538c77 100644 --- a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py +++ b/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py @@ -16,12 +16,37 @@ # under the License. from __future__ import annotations +import os from datetime import datetime -from airflow import DAG +from airflow import DAG, settings +from airflow.decorators import task +from airflow.models import Connection +from airflow.models.baseoperator import chain from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator DAG_ID = "example_refresh_powerbi_dataset" +CONN_ID = "powerbi_default" +DATASET_ID = os.environ.get("DATASET_ID", "None") +GROUP_ID = os.environ.get("GROUP_ID", "None") +CLIENT_ID = os.environ.get("CLIENT_ID", None) +CLIENT_SECRET = os.environ.get("CLIENT_SECRET", None) +TENANT_ID = os.environ.get("TENANT_ID", None) + + +@task +def create_connection(conn_id_name: str): + conn = Connection( + conn_id=conn_id_name, + conn_type="powerbi", + login=CLIENT_ID, + password=CLIENT_SECRET, + extra={"tenant_id": TENANT_ID}, + ) + session = settings.Session() + session.add(conn) + session.commit() + with DAG( dag_id=DAG_ID, @@ -29,18 +54,31 @@ schedule=None, tags=["example"], ) as dag: - # [START howto_operator_powerbi_refresh] + set_up_connection = create_connection(CONN_ID) + + # [START howto_operator_powerbi_refresh_no_wait_for_termination] refresh_powerbi_dataset = PowerBIDatasetRefreshOperator( conn_id="powerbi_default", task_id="refresh_powerbi_dataset", - dataset_id="7bacf905-be8a-4f67-9512-71f4dc0c42dc", - group_id="aaaebfa6-194a-4edd-8a36-a5e42200df2e", + dataset_id=DATASET_ID, + group_id=GROUP_ID, check_interval=30, - wait_for_termination=True, + wait_for_termination=False, + ) + # [END howto_operator_powerbi_refresh_no_wait_for_termination] + + chain( + # TEST SETUP + set_up_connection, + # TEST BODY + refresh_powerbi_dataset, ) - # [END howto_operator_powerbi_refresh] - refresh_powerbi_dataset + from tests.system.utils.watcher import watcher + + # This test needs watcher in order to properly mark success/failure + # when "tearDown" task with trigger rule is part of the DAG + list(dag.tasks) >> watcher() from tests.system.utils import get_test_run # noqa: E402 From f7939d371660f28aaebed6b9c0558eb7b14174a6 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Wed, 7 Aug 2024 22:15:57 +0530 Subject: [PATCH 12/22] Refactor: To use more of defferable mechanism, shifted all the async code in trigger --- .../microsoft/azure/operators/powerbi.py | 33 +++---------------- .../microsoft/azure/triggers/powerbi.py | 11 +++++-- 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index ccdf23d4df77a..b8754fb6dad77 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -24,7 +24,6 @@ from airflow.exceptions import AirflowException from airflow.models import BaseOperator, BaseOperatorLink from airflow.providers.microsoft.azure.hooks.powerbi import ( - PowerBIDatasetRefreshFields, PowerBIHook, ) from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger @@ -100,17 +99,6 @@ def run_async(cls, future: Any) -> Any: def execute(self, context: Context): """Refresh the Power BI Dataset.""" - # TODO: You should use the deferrable mechanism more here instead of running async code in main thread - refresh_id = self.run_async( - self.hook.trigger_dataset_refresh( - dataset_id=self.dataset_id, - group_id=self.group_id, - ) - ) - - # Push Dataset Refresh ID to Xcom regardless of what happen during the refresh - self.xcom_push(context=context, key="powerbi_dataset_refresh_id", value=refresh_id) - if self.wait_for_termination: end_time = time.time() + self.timeout self.defer( @@ -118,7 +106,7 @@ def execute(self, context: Context): conn_id=self.conn_id, group_id=self.group_id, dataset_id=self.dataset_id, - dataset_refresh_id=refresh_id, + # dataset_refresh_id=refresh_id, end_time=end_time, timeout=self.timeout, check_interval=self.check_interval, @@ -127,20 +115,6 @@ def execute(self, context: Context): method_name=self.execute_complete.__name__, ) - # Retrieve refresh details after triggering refresh - refresh_details = self.run_async( - self.hook.get_refresh_details_by_refresh_id( - dataset_id=self.dataset_id, group_id=self.group_id, refresh_id=refresh_id - ) - ) - - status = str(refresh_details.get(PowerBIDatasetRefreshFields.STATUS.value)) - error = str(refresh_details.get(PowerBIDatasetRefreshFields.ERROR.value)) - - # Xcom Integration - self.xcom_push(context=context, key="powerbi_dataset_refresh_status", value=status) - self.xcom_push(context=context, key="powerbi_dataset_refresh_error", value=error) - def execute_complete(self, context: Context, event: dict[str, str]) -> Any: """ Return immediately - callback for when the trigger fires. @@ -151,5 +125,8 @@ def execute_complete(self, context: Context, event: dict[str, str]) -> Any: if event["status"] == "error": raise AirflowException(event["message"]) - # Push Dataset refresh status to Xcom + # Push Dataset refresh Id and status to Xcom + self.xcom_push( + context=context, key="powerbi_dataset_refresh_Id", value=event["dataset_refresh_id"] + ) self.xcom_push(context=context, key="powerbi_dataset_refresh_status", value=event["status"]) diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index 65c3b287531a2..c488c1fef7568 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -57,7 +57,7 @@ def __init__( conn_id: str, dataset_id: str, group_id: str, - dataset_refresh_id: str, + # dataset_refresh_id: str, end_time: float, timeout: float | None = None, proxies: dict | None = None, @@ -69,7 +69,7 @@ def __init__( self.hook = PowerBIHook(conn_id=conn_id, proxies=proxies, api_version=api_version, timeout=timeout) self.dataset_id = dataset_id self.group_id = group_id - self.dataset_refresh_id = dataset_refresh_id + # self.dataset_refresh_id = dataset_refresh_id self.end_time = end_time self.check_interval = check_interval self.wait_for_termination = wait_for_termination @@ -85,7 +85,7 @@ def serialize(self): "api_version": api_version, "dataset_id": self.dataset_id, "group_id": self.group_id, - "dataset_refresh_id": self.dataset_refresh_id, + # "dataset_refresh_id": self.dataset_refresh_id, "end_time": self.end_time, "check_interval": self.check_interval, "wait_for_termination": self.wait_for_termination, @@ -107,6 +107,11 @@ def api_version(self) -> APIVersion: async def run(self) -> AsyncIterator[TriggerEvent]: """Make async connection to the PowerBI and polls for the dataset refresh status.""" try: + self.dataset_refresh_id = await self.hook.trigger_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, + ) + dataset_refresh_status = None while self.end_time > time.time(): refresh_details = await self.hook.get_refresh_details_by_refresh_id( From 977b102da08c55799dfe1c43e31ee73a30d4f65b Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Thu, 8 Aug 2024 08:10:07 +0530 Subject: [PATCH 13/22] Fix unit tests and remove unnecessary parameters --- .../microsoft/azure/operators/powerbi.py | 9 +- .../microsoft/azure/triggers/powerbi.py | 13 +-- .../microsoft/azure/operators/test_powerbi.py | 83 +++---------------- .../microsoft/azure/triggers/test_powerbi.py | 58 ++++++++++--- .../azure/example_refresh_dataset_powerbi.py | 2 +- 5 files changed, 65 insertions(+), 100 deletions(-) diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index b8754fb6dad77..2b6ac5cfe56ad 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -17,7 +17,6 @@ from __future__ import annotations -import asyncio import time from typing import TYPE_CHECKING, Any, Sequence @@ -56,7 +55,6 @@ class PowerBIDatasetRefreshOperator(BaseOperator): :param dataset_id: The dataset id. :param group_id: The workspace id. - :param wait_for_termination: Wait until the pre-existing or current triggered refresh completes before exiting. :param conn_id: Airflow Connection ID that contains the connection information for the Power BI account used for authentication. :param timeout: Time in seconds to wait for a dataset to reach a terminal status for asynchronous waits. Used only if ``wait_for_termination`` is True. :param check_interval: Number of seconds to wait before rechecking the @@ -76,7 +74,6 @@ def __init__( *, dataset_id: str, group_id: str, - wait_for_termination: bool = True, conn_id: str = PowerBIHook.default_conn_name, timeout: float = 60 * 60 * 24 * 7, proxies: dict | None = None, @@ -88,15 +85,11 @@ def __init__( self.hook = PowerBIHook(conn_id=conn_id, proxies=proxies, api_version=api_version, timeout=timeout) self.dataset_id = dataset_id self.group_id = group_id - self.wait_for_termination = wait_for_termination + self.wait_for_termination = True self.conn_id = conn_id self.timeout = timeout self.check_interval = check_interval - @classmethod - def run_async(cls, future: Any) -> Any: - return asyncio.get_event_loop().run_until_complete(future) - def execute(self, context: Context): """Refresh the Power BI Dataset.""" if self.wait_for_termination: diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index c488c1fef7568..fba4b2c6317fe 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -46,7 +46,6 @@ class PowerBITrigger(BaseTrigger): or you can pass a string as `v1.0` or `beta`. :param dataset_id: The dataset Id to refresh. :param group_id: The workspace Id where dataset is located. - :param dataset_refresh_id: The dataset refresh Id. :param end_time: Time in seconds when trigger should stop polling. :param check_interval: Time in seconds to wait between each poll. :param wait_for_termination: Wait for the dataset refresh to complete or fail. @@ -57,7 +56,6 @@ def __init__( conn_id: str, dataset_id: str, group_id: str, - # dataset_refresh_id: str, end_time: float, timeout: float | None = None, proxies: dict | None = None, @@ -69,7 +67,6 @@ def __init__( self.hook = PowerBIHook(conn_id=conn_id, proxies=proxies, api_version=api_version, timeout=timeout) self.dataset_id = dataset_id self.group_id = group_id - # self.dataset_refresh_id = dataset_refresh_id self.end_time = end_time self.check_interval = check_interval self.wait_for_termination = wait_for_termination @@ -85,7 +82,6 @@ def serialize(self): "api_version": api_version, "dataset_id": self.dataset_id, "group_id": self.group_id, - # "dataset_refresh_id": self.dataset_refresh_id, "end_time": self.end_time, "check_interval": self.check_interval, "wait_for_termination": self.wait_for_termination, @@ -106,12 +102,11 @@ def api_version(self) -> APIVersion: async def run(self) -> AsyncIterator[TriggerEvent]: """Make async connection to the PowerBI and polls for the dataset refresh status.""" + self.dataset_refresh_id = await self.hook.trigger_dataset_refresh( + dataset_id=self.dataset_id, + group_id=self.group_id, + ) try: - self.dataset_refresh_id = await self.hook.trigger_dataset_refresh( - dataset_id=self.dataset_id, - group_id=self.group_id, - ) - dataset_refresh_status = None while self.end_time > time.time(): refresh_details = await self.hook.get_refresh_details_by_refresh_id( diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index e1f396bd27ba2..14560dcf135ed 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -17,7 +17,7 @@ from __future__ import annotations -from unittest.mock import AsyncMock, MagicMock, call +from unittest.mock import MagicMock import pytest @@ -44,6 +44,13 @@ "timeout": 3, } NEW_REFRESH_REQUEST_ID = "5e2d9921-e91b-491f-b7e1-e7d8db49194c" + +SUCCESS_TRIGGER_EVENT = { + "status": "success", + "message": "success", + "dataset_refresh_id": NEW_REFRESH_REQUEST_ID, +} + DEFAULT_DATE = timezone.datetime(2021, 1, 1) @@ -72,101 +79,36 @@ def mock_powerbi_hook(): return hook -# Test cases: refresh_details returns None, Terminal Status, In-progress Status -_get_latest_refresh_details_args = [ - COMPLETED_REFRESH_DETAILS, - FAILED_REFRESH_DETAILS, - IN_PROGRESS_REFRESH_DETAILS, -] - - -@pytest.mark.parametrize( - argnames=("latest_refresh_details"), - argvalues=_get_latest_refresh_details_args, - ids=[ - ( - f"latest_refresh_status_{argval[PowerBIDatasetRefreshFields.STATUS.value]}_no_wait_for_termination" - if argval is not None - else "latest_refresh_status_None_no_wait_for_termination" - ) - for argval in _get_latest_refresh_details_args - ], -) -def test_execute_no_wait_for_termination(mock_powerbi_hook, latest_refresh_details): - operator = PowerBIDatasetRefreshOperator( - wait_for_termination=False, - **CONFIG, - ) - operator.hook = mock_powerbi_hook - context = {"ti": MagicMock()} - - mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=NEW_REFRESH_REQUEST_ID) - mock_powerbi_hook.get_refresh_details_by_refresh_id = AsyncMock(return_value=latest_refresh_details) - - operator.execute(context) - - assert mock_powerbi_hook.get_refresh_details_by_refresh_id.called - assert context["ti"].xcom_push.call_count == 3 - assert context["ti"].xcom_push.call_args_list == [ - call( - key="powerbi_dataset_refresh_id", - value="5e2d9921-e91b-491f-b7e1-e7d8db49194c", - execution_date=None, - ), - call( - key="powerbi_dataset_refresh_status", - value=latest_refresh_details.get("status"), - execution_date=None, - ), - call( - key="powerbi_dataset_refresh_error", - value=latest_refresh_details.get("error", "None"), - execution_date=None, - ), - ] - - def test_execute_wait_for_termination_with_Deferrable(mock_powerbi_hook): operator = PowerBIDatasetRefreshOperator( - wait_for_termination=True, **CONFIG, ) operator.hook = mock_powerbi_hook context = {"ti": MagicMock()} - # Magic mock the hook methods - mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=NEW_REFRESH_REQUEST_ID) - with pytest.raises(TaskDeferred) as exc: operator.execute(context) - assert mock_powerbi_hook.trigger_dataset_refresh.called - assert isinstance(exc.value.trigger, PowerBITrigger), "Trigger is not a PowerBITriiger" - - assert context["ti"].xcom_push.call_count == 1 - assert context["ti"].xcom_push.call_args_list == [ - call(key="powerbi_dataset_refresh_id", value=NEW_REFRESH_REQUEST_ID, execution_date=None), - ] + # assert mock_powerbi_hook.trigger_dataset_refresh.called + assert isinstance(exc.value.trigger, PowerBITrigger) def test_powerbi_operator_async_execute_complete_success(): """Assert that execute_complete log success message""" operator = PowerBIDatasetRefreshOperator( - wait_for_termination=True, **CONFIG, ) context = {"ti": MagicMock()} operator.execute_complete( context=context, - event={"status": "success", "message": "success", "dataset_refresh_id": "1234"}, + event=SUCCESS_TRIGGER_EVENT, ) - assert context["ti"].xcom_push.call_count == 1 + assert context["ti"].xcom_push.call_count == 2 def test_powerbi_operator_async_execute_complete_fail(): """Assert that execute_complete raise exception on error""" operator = PowerBIDatasetRefreshOperator( - wait_for_termination=True, **CONFIG, ) context = {"ti": MagicMock()} @@ -181,7 +123,6 @@ def test_powerbi_operator_async_execute_complete_fail(): def test_execute_complete_no_event(create_task_instance_of_operator): """Test execute_complete when event is None or empty.""" operator = PowerBIDatasetRefreshOperator( - wait_for_termination=True, **CONFIG, ) context = {"ti": MagicMock()} diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index a17449dd74bea..59957c1643897 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -24,7 +24,7 @@ import pytest -from airflow.providers.microsoft.azure.hooks.powerbi import PowerBIDatasetRefreshStatus +from airflow.providers.microsoft.azure.hooks.powerbi import PowerBIDatasetRefreshStatus, PowerBIHook from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger from airflow.triggers.base import TriggerEvent from tests.providers.microsoft.conftest import get_airflow_connection @@ -48,7 +48,6 @@ def powerbi_trigger(): api_version=API_VERSION, dataset_id=DATASET_ID, group_id=GROUP_ID, - dataset_refresh_id=DATASET_REFRESH_ID, end_time=POWERBI_DATASET_END_TIME, check_interval=CHECK_INTERVAL, wait_for_termination=True, @@ -58,6 +57,17 @@ def powerbi_trigger(): return trigger +@pytest.fixture +def mock_powerbi_hook(): + hook = PowerBIHook() + return hook + + +# @pytest.fixture +# def mock_trigger_dataset_refresh(mock_powerbi_hook): +# mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=DATASET_REFRESH_ID) + + def test_powerbi_trigger_serialization(): """Asserts that the PowerBI Trigger correctly serializes its arguments and classpath.""" @@ -71,7 +81,6 @@ def test_powerbi_trigger_serialization(): api_version=API_VERSION, dataset_id=DATASET_ID, group_id=GROUP_ID, - dataset_refresh_id=DATASET_REFRESH_ID, end_time=POWERBI_DATASET_END_TIME, check_interval=CHECK_INTERVAL, wait_for_termination=True, @@ -84,7 +93,6 @@ def test_powerbi_trigger_serialization(): "conn_id": POWERBI_CONN_ID, "dataset_id": DATASET_ID, "group_id": GROUP_ID, - "dataset_refresh_id": DATASET_REFRESH_ID, "end_time": POWERBI_DATASET_END_TIME, "proxies": None, "api_version": API_VERSION, @@ -95,9 +103,13 @@ def test_powerbi_trigger_serialization(): @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") -async def test_powerbi_trigger_run_inprogress(mock_get_refresh_details_by_refresh_id, powerbi_trigger): +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") +async def test_powerbi_trigger_run_inprogress( + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger +): """Assert task isn't completed until timeout if dataset refresh is in progress.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.IN_PROGRESS} + mock_trigger_dataset_refresh.return_value = DATASET_REFRESH_ID task = asyncio.create_task(powerbi_trigger.run().__anext__()) await asyncio.sleep(0.5) @@ -108,9 +120,13 @@ async def test_powerbi_trigger_run_inprogress(mock_get_refresh_details_by_refres @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") -async def test_powerbi_trigger_run_failed(mock_get_refresh_details_by_refresh_id, powerbi_trigger): +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") +async def test_powerbi_trigger_run_failed( + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger +): """Assert event is triggered upon failed dataset refresh.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.FAILED} + mock_trigger_dataset_refresh.return_value = DATASET_REFRESH_ID generator = powerbi_trigger.run() actual = await generator.asend(None) @@ -127,9 +143,13 @@ async def test_powerbi_trigger_run_failed(mock_get_refresh_details_by_refresh_id @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") -async def test_powerbi_trigger_run_completed(mock_get_refresh_details_by_refresh_id, powerbi_trigger): +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") +async def test_powerbi_trigger_run_completed( + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger +): """Assert event is triggered upon successful dataset refresh.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.COMPLETED} + mock_trigger_dataset_refresh.return_value = DATASET_REFRESH_ID generator = powerbi_trigger.run() actual = await generator.asend(None) @@ -147,11 +167,16 @@ async def test_powerbi_trigger_run_completed(mock_get_refresh_details_by_refresh @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.cancel_dataset_refresh") @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") async def test_powerbi_trigger_run_exception_during_refresh_check_loop( - mock_get_refresh_details_by_refresh_id, mock_cancel_dataset_refresh, powerbi_trigger + mock_trigger_dataset_refresh, + mock_get_refresh_details_by_refresh_id, + mock_cancel_dataset_refresh, + powerbi_trigger, ): """Assert that run catch exception if Power BI API throw exception""" mock_get_refresh_details_by_refresh_id.side_effect = Exception("Test exception") + mock_trigger_dataset_refresh.return_value = DATASET_REFRESH_ID task = [i async for i in powerbi_trigger.run()] response = TriggerEvent( @@ -169,12 +194,17 @@ async def test_powerbi_trigger_run_exception_during_refresh_check_loop( @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.cancel_dataset_refresh") @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") async def test_powerbi_trigger_run_exception_during_refresh_cancellation( - mock_get_refresh_details_by_refresh_id, mock_cancel_dataset_refresh, powerbi_trigger + mock_trigger_dataset_refresh, + mock_get_refresh_details_by_refresh_id, + mock_cancel_dataset_refresh, + powerbi_trigger, ): """Assert that run catch exception if Power BI API throw exception""" mock_get_refresh_details_by_refresh_id.side_effect = Exception("Test exception") mock_cancel_dataset_refresh.side_effect = Exception("Exception caused by cancel_dataset_refresh") + mock_trigger_dataset_refresh.return_value = DATASET_REFRESH_ID task = [i async for i in powerbi_trigger.run()] response = TriggerEvent( @@ -192,12 +222,14 @@ async def test_powerbi_trigger_run_exception_during_refresh_cancellation( @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") async def test_powerbi_trigger_run_exception_without_refresh_id( - mock_get_refresh_details_by_refresh_id, powerbi_trigger + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger ): """Assert handling of exception when there is no dataset_refresh_id""" powerbi_trigger.dataset_refresh_id = None mock_get_refresh_details_by_refresh_id.side_effect = Exception("Test exception for no dataset_refresh_id") + mock_trigger_dataset_refresh.return_value = None task = [i async for i in powerbi_trigger.run()] response = TriggerEvent( @@ -213,9 +245,13 @@ async def test_powerbi_trigger_run_exception_without_refresh_id( @pytest.mark.asyncio @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") -async def test_powerbi_trigger_run_timeout(mock_get_refresh_details_by_refresh_id, powerbi_trigger): +@mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") +async def test_powerbi_trigger_run_timeout( + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger +): """Assert that powerbi run timesout after end_time elapses""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.IN_PROGRESS} + mock_trigger_dataset_refresh.return_value = DATASET_REFRESH_ID generator = powerbi_trigger.run() actual = await generator.asend(None) diff --git a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py b/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py index 5be2026538c77..f48ecb2bc6039 100644 --- a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py +++ b/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py @@ -63,7 +63,7 @@ def create_connection(conn_id_name: str): dataset_id=DATASET_ID, group_id=GROUP_ID, check_interval=30, - wait_for_termination=False, + timeout=120, ) # [END howto_operator_powerbi_refresh_no_wait_for_termination] From 1966a7d02995e517b35a1eafe52d455437a671b4 Mon Sep 17 00:00:00 2001 From: David Blain Date: Thu, 8 Aug 2024 08:39:01 +0200 Subject: [PATCH 14/22] refactor: Initialize hosts within constructor to make sure it's initialized correctly and immutable --- airflow/providers/microsoft/azure/hooks/msgraph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/providers/microsoft/azure/hooks/msgraph.py b/airflow/providers/microsoft/azure/hooks/msgraph.py index 53c2708cababa..8410d8d7077cd 100644 --- a/airflow/providers/microsoft/azure/hooks/msgraph.py +++ b/airflow/providers/microsoft/azure/hooks/msgraph.py @@ -111,7 +111,7 @@ def __init__( timeout: float | None = None, proxies: dict | None = None, host: str = NationalClouds.Global.value, - scopes: list[str] = ["https://graph.microsoft.com/.default"], # noqa: B006 + scopes: list[str] | None = None, api_version: APIVersion | str | None = None, ): super().__init__() @@ -119,7 +119,7 @@ def __init__( self.timeout = timeout self.proxies = proxies self.host = host - self.scopes = scopes + self.scopes = scopes or ["https://graph.microsoft.com/.default"] self._api_version = self.resolve_api_version_from_value(api_version) @property From fb2d832e7803ce8d5b7995840c539a5efc32a795 Mon Sep 17 00:00:00 2001 From: David Blain Date: Fri, 9 Aug 2024 14:02:14 +0200 Subject: [PATCH 15/22] fix: Changed the 'powerbi_conn_id' parameter to 'conn_id' for the dataset refresh example in PowerBI --- .../providers/microsoft/azure/example_dataset_refresh.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/system/providers/microsoft/azure/example_dataset_refresh.py b/tests/system/providers/microsoft/azure/example_dataset_refresh.py index 6c04d90d4c07d..92308f958a847 100644 --- a/tests/system/providers/microsoft/azure/example_dataset_refresh.py +++ b/tests/system/providers/microsoft/azure/example_dataset_refresh.py @@ -44,7 +44,7 @@ # [START howto_operator_powerbi_refresh_dataset] dataset_refresh = PowerBIDatasetRefreshOperator( - powerbi_conn_id="powerbi_default", + conn_id="powerbi_default", task_id="dataset_refresh", dataset_id="dataset-id", group_id="group-id", @@ -53,7 +53,7 @@ # [START howto_operator_powerbi_refresh_dataset_async] dataset_refresh2 = PowerBIDatasetRefreshOperator( - powerbi_conn_id="powerbi_default", + conn_id="powerbi_default", task_id="dataset_refresh_async", dataset_id="dataset-id", group_id="group-id", @@ -63,7 +63,7 @@ # [START howto_operator_powerbi_refresh_dataset_force_refresh] dataset_refresh3 = PowerBIDatasetRefreshOperator( - powerbi_conn_id="powerbi_default", + conn_id="powerbi_default", task_id="dataset_refresh_force_refresh", dataset_id="dataset-id", group_id="group-id", From 4032275f147ab1dbafa0a62238e61c130d9f97d6 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Sat, 10 Aug 2024 00:48:05 +0530 Subject: [PATCH 16/22] Remove redundant system test for powerbi dataset refresh operator and rename the existing test more meaningfully --- ..._dataset_powerbi.py => example_powerbi_dataset_refresh.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename tests/system/providers/microsoft/azure/{example_refresh_dataset_powerbi.py => example_powerbi_dataset_refresh.py} (95%) diff --git a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py b/tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py similarity index 95% rename from tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py rename to tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py index f48ecb2bc6039..cbc5035d4b066 100644 --- a/tests/system/providers/microsoft/azure/example_refresh_dataset_powerbi.py +++ b/tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py @@ -56,7 +56,7 @@ def create_connection(conn_id_name: str): ) as dag: set_up_connection = create_connection(CONN_ID) - # [START howto_operator_powerbi_refresh_no_wait_for_termination] + # [START howto_operator_powerbi_refresh_async] refresh_powerbi_dataset = PowerBIDatasetRefreshOperator( conn_id="powerbi_default", task_id="refresh_powerbi_dataset", @@ -65,7 +65,7 @@ def create_connection(conn_id_name: str): check_interval=30, timeout=120, ) - # [END howto_operator_powerbi_refresh_no_wait_for_termination] + # [END howto_operator_powerbi_refresh_async] chain( # TEST SETUP From 09268eadbba98c99aa4da454a5dd9e1be2b27252 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 12 Aug 2024 13:07:08 +0530 Subject: [PATCH 17/22] remove extra comments --- .../microsoft/azure/operators/test_powerbi.py | 1 - .../microsoft/azure/triggers/test_powerbi.py | 12 +-- .../azure/example_dataset_refresh.py | 87 ------------------- .../azure/example_powerbi_dataset_refresh.py | 2 + 4 files changed, 9 insertions(+), 93 deletions(-) delete mode 100644 tests/system/providers/microsoft/azure/example_dataset_refresh.py diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index 14560dcf135ed..170d0a1051cf2 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -89,7 +89,6 @@ def test_execute_wait_for_termination_with_Deferrable(mock_powerbi_hook): with pytest.raises(TaskDeferred) as exc: operator.execute(context) - # assert mock_powerbi_hook.trigger_dataset_refresh.called assert isinstance(exc.value.trigger, PowerBITrigger) diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index 59957c1643897..43b07eb1fce4d 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -27,6 +27,7 @@ from airflow.providers.microsoft.azure.hooks.powerbi import PowerBIDatasetRefreshStatus, PowerBIHook from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger from airflow.triggers.base import TriggerEvent +from tests.providers.microsoft.azure.base import Base from tests.providers.microsoft.conftest import get_airflow_connection POWERBI_CONN_ID = "powerbi_default" @@ -63,9 +64,10 @@ def mock_powerbi_hook(): return hook -# @pytest.fixture -# def mock_trigger_dataset_refresh(mock_powerbi_hook): -# mock_powerbi_hook.trigger_dataset_refresh = AsyncMock(return_value=DATASET_REFRESH_ID) +@pytest.fixture +def base_functions(): + base = Base() + return base def test_powerbi_trigger_serialization(): @@ -113,7 +115,7 @@ async def test_powerbi_trigger_run_inprogress( task = asyncio.create_task(powerbi_trigger.run().__anext__()) await asyncio.sleep(0.5) - # TriggerEvent was not returned + # Assert TriggerEvent was not returned assert task.done() is False asyncio.get_event_loop().stop() @@ -122,7 +124,7 @@ async def test_powerbi_trigger_run_inprogress( @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") async def test_powerbi_trigger_run_failed( - mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger, base_functions ): """Assert event is triggered upon failed dataset refresh.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.FAILED} diff --git a/tests/system/providers/microsoft/azure/example_dataset_refresh.py b/tests/system/providers/microsoft/azure/example_dataset_refresh.py deleted file mode 100644 index 92308f958a847..0000000000000 --- a/tests/system/providers/microsoft/azure/example_dataset_refresh.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from datetime import datetime, timedelta - -from airflow.models import DAG - -# Ignore missing args provided by default_args -# mypy: disable-error-code="call-arg" -from airflow.operators.empty import EmptyOperator -from airflow.providers.microsoft.azure.operators.powerbi import PowerBIDatasetRefreshOperator -from airflow.utils.edgemodifier import Label - -DAG_ID = "example_powerbi_dataset_refresh" - -with DAG( - dag_id=DAG_ID, - start_date=datetime(2021, 8, 13), - schedule="@daily", - catchup=False, - default_args={ - "retries": 1, - "retry_delay": timedelta(minutes=3), - }, - default_view="graph", -) as dag: - begin = EmptyOperator(task_id="begin") - end = EmptyOperator(task_id="end") - - # [START howto_operator_powerbi_refresh_dataset] - dataset_refresh = PowerBIDatasetRefreshOperator( - conn_id="powerbi_default", - task_id="dataset_refresh", - dataset_id="dataset-id", - group_id="group-id", - ) - # [END howto_operator_powerbi_refresh_dataset] - - # [START howto_operator_powerbi_refresh_dataset_async] - dataset_refresh2 = PowerBIDatasetRefreshOperator( - conn_id="powerbi_default", - task_id="dataset_refresh_async", - dataset_id="dataset-id", - group_id="group-id", - wait_for_termination=False, - ) - # [END howto_operator_powerbi_refresh_dataset_async] - - # [START howto_operator_powerbi_refresh_dataset_force_refresh] - dataset_refresh3 = PowerBIDatasetRefreshOperator( - conn_id="powerbi_default", - task_id="dataset_refresh_force_refresh", - dataset_id="dataset-id", - group_id="group-id", - force_refresh=True, - ) - # [END howto_operator_powerbi_refresh_dataset_force_refresh] - - begin >> Label("No async wait") >> dataset_refresh - begin >> Label("Do async wait with force refresh") >> dataset_refresh2 - begin >> Label("Do async wait") >> dataset_refresh3 >> end - - from tests.system.utils.watcher import watcher - - # This test needs watcher in order to properly mark success/failure - # when "tearDown" task with trigger rule is part of the DAG - list(dag.tasks) >> watcher() - -from tests.system.utils import get_test_run # noqa: E402 - -# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) -test_run = get_test_run(dag) diff --git a/tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py b/tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py index cbc5035d4b066..52f1f001e9988 100644 --- a/tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py +++ b/tests/system/providers/microsoft/azure/example_powerbi_dataset_refresh.py @@ -27,6 +27,8 @@ DAG_ID = "example_refresh_powerbi_dataset" CONN_ID = "powerbi_default" + +# Before running this system test, you should set following environment variables: DATASET_ID = os.environ.get("DATASET_ID", "None") GROUP_ID = os.environ.get("GROUP_ID", "None") CLIENT_ID = os.environ.get("CLIENT_ID", None) From 226b5b81a604066f7cf0a095d289cdec06e92b3a Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 12 Aug 2024 14:05:19 +0530 Subject: [PATCH 18/22] Fix msgraph hook tests --- tests/providers/microsoft/azure/hooks/test_msgraph.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/providers/microsoft/azure/hooks/test_msgraph.py b/tests/providers/microsoft/azure/hooks/test_msgraph.py index 71d280a1971da..390be17ba7f35 100644 --- a/tests/providers/microsoft/azure/hooks/test_msgraph.py +++ b/tests/providers/microsoft/azure/hooks/test_msgraph.py @@ -82,13 +82,15 @@ def test_get_api_version_when_api_version_in_config_dict(self): def test_get_host_when_connection_has_scheme_and_host(self): connection = mock_connection(schema="https", host="graph.microsoft.de") - actual = KiotaRequestAdapterHook.get_host(connection) + hook = KiotaRequestAdapterHook() + actual = hook.get_host(connection) assert actual == NationalClouds.Germany.value def test_get_host_when_connection_has_no_scheme_or_host(self): connection = mock_connection() - actual = KiotaRequestAdapterHook.get_host(connection) + hook = KiotaRequestAdapterHook() + actual = hook.get_host(connection) assert actual == NationalClouds.Global.value From 0ea609a80ce1e93370edbf266c76ce847e69bbb1 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Mon, 12 Aug 2024 17:20:19 +0530 Subject: [PATCH 19/22] Fix powerbi trigger tests --- tests/providers/microsoft/azure/operators/test_powerbi.py | 1 + tests/providers/microsoft/azure/triggers/test_powerbi.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index 170d0a1051cf2..2562a04077ba9 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -132,6 +132,7 @@ def test_execute_complete_no_event(create_task_instance_of_operator): assert context["ti"].xcom_push.call_count == 0 +@pytest.mark.db_test def test_powerbilink(create_task_instance_of_operator): """Assert Power BI Extra link matches the expected URL.""" ti = create_task_instance_of_operator( diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index 43b07eb1fce4d..23187943563ec 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -34,9 +34,9 @@ DATASET_ID = "dataset_id" GROUP_ID = "group_id" DATASET_REFRESH_ID = "dataset_refresh_id" -POWERBI_DATASET_END_TIME = time.time() + 10 -MODULE = "airflow.providers.microsoft.azure" TIMEOUT = 60 +POWERBI_DATASET_END_TIME = time.time() + TIMEOUT +MODULE = "airflow.providers.microsoft.azure" CHECK_INTERVAL = 3 API_VERSION = "v1.0" From 3e688ff38197001577f129f91370571ea328e834 Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Tue, 13 Aug 2024 00:38:22 +0530 Subject: [PATCH 20/22] Refactor to pass the provider[microsoft.azure] tests --- airflow/providers/microsoft/azure/hooks/powerbi.py | 2 +- airflow/providers/microsoft/azure/triggers/powerbi.py | 9 ++++++--- tests/providers/microsoft/azure/triggers/test_powerbi.py | 5 +++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index bd0d2d5cf310a..18a98cab70066 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -67,7 +67,7 @@ def __init__( self, conn_id: str = default_conn_name, proxies: dict | None = None, - timeout: float | None = None, + timeout: float = 60 * 60 * 24 * 7, api_version: APIVersion | str | None = None, ): super().__init__( diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index fba4b2c6317fe..5cebe0529c836 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -57,7 +57,7 @@ def __init__( dataset_id: str, group_id: str, end_time: float, - timeout: float | None = None, + timeout: float = 60 * 60 * 24 * 7, proxies: dict | None = None, api_version: APIVersion | None = None, check_interval: int = 60, @@ -66,6 +66,7 @@ def __init__( super().__init__() self.hook = PowerBIHook(conn_id=conn_id, proxies=proxies, api_version=api_version, timeout=timeout) self.dataset_id = dataset_id + self.timeout = timeout self.group_id = group_id self.end_time = end_time self.check_interval = check_interval @@ -83,6 +84,7 @@ def serialize(self): "dataset_id": self.dataset_id, "group_id": self.group_id, "end_time": self.end_time, + "timeout": self.timeout, "check_interval": self.check_interval, "wait_for_termination": self.wait_for_termination, }, @@ -108,13 +110,13 @@ async def run(self) -> AsyncIterator[TriggerEvent]: ) try: dataset_refresh_status = None - while self.end_time > time.time(): + start_time = time.monotonic() + while start_time + self.timeout > time.monotonic(): refresh_details = await self.hook.get_refresh_details_by_refresh_id( dataset_id=self.dataset_id, group_id=self.group_id, refresh_id=self.dataset_refresh_id, ) - dataset_refresh_status = refresh_details.get("status") if dataset_refresh_status == PowerBIDatasetRefreshStatus.COMPLETED: @@ -135,6 +137,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: } ) return + self.log.info( "Sleeping for %s. The dataset refresh status is %s.", self.check_interval, diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index 23187943563ec..ef5efd66dc5e2 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -34,10 +34,10 @@ DATASET_ID = "dataset_id" GROUP_ID = "group_id" DATASET_REFRESH_ID = "dataset_refresh_id" -TIMEOUT = 60 +TIMEOUT = 30 POWERBI_DATASET_END_TIME = time.time() + TIMEOUT MODULE = "airflow.providers.microsoft.azure" -CHECK_INTERVAL = 3 +CHECK_INTERVAL = 10 API_VERSION = "v1.0" @@ -94,6 +94,7 @@ def test_powerbi_trigger_serialization(): assert kwargs == { "conn_id": POWERBI_CONN_ID, "dataset_id": DATASET_ID, + "timeout": TIMEOUT, "group_id": GROUP_ID, "end_time": POWERBI_DATASET_END_TIME, "proxies": None, From a018bdebb7c31c32bccd4639f821869bc187da85 Mon Sep 17 00:00:00 2001 From: David Blain Date: Tue, 13 Aug 2024 15:56:06 +0200 Subject: [PATCH 21/22] refactor: Removed commented out (dead) code --- airflow/providers/microsoft/azure/operators/powerbi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index 2b6ac5cfe56ad..73995f7e5fedc 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -99,7 +99,6 @@ def execute(self, context: Context): conn_id=self.conn_id, group_id=self.group_id, dataset_id=self.dataset_id, - # dataset_refresh_id=refresh_id, end_time=end_time, timeout=self.timeout, check_interval=self.check_interval, @@ -118,7 +117,6 @@ def execute_complete(self, context: Context, event: dict[str, str]) -> Any: if event["status"] == "error": raise AirflowException(event["message"]) - # Push Dataset refresh Id and status to Xcom self.xcom_push( context=context, key="powerbi_dataset_refresh_Id", value=event["dataset_refresh_id"] ) From 45e4ae791f299b5fad74e481314033fa3b691a2b Mon Sep 17 00:00:00 2001 From: Ambika Garg Date: Wed, 14 Aug 2024 09:27:31 +0530 Subject: [PATCH 22/22] Refactor: Remove unused parameters and dead code --- airflow/providers/microsoft/azure/hooks/powerbi.py | 1 - .../providers/microsoft/azure/operators/powerbi.py | 3 --- .../providers/microsoft/azure/triggers/powerbi.py | 3 --- .../microsoft/azure/operators/test_powerbi.py | 3 +-- .../microsoft/azure/triggers/test_powerbi.py | 14 +------------- 5 files changed, 2 insertions(+), 22 deletions(-) diff --git a/airflow/providers/microsoft/azure/hooks/powerbi.py b/airflow/providers/microsoft/azure/hooks/powerbi.py index 18a98cab70066..04326f4fecee2 100644 --- a/airflow/providers/microsoft/azure/hooks/powerbi.py +++ b/airflow/providers/microsoft/azure/hooks/powerbi.py @@ -38,7 +38,6 @@ class PowerBIDatasetRefreshFields(Enum): class PowerBIDatasetRefreshStatus: """Power BI refresh dataset statuses.""" - # If the completion state is unknown or a refresh is in progress. IN_PROGRESS = "In Progress" FAILED = "Failed" COMPLETED = "Completed" diff --git a/airflow/providers/microsoft/azure/operators/powerbi.py b/airflow/providers/microsoft/azure/operators/powerbi.py index 73995f7e5fedc..e54ad250bde74 100644 --- a/airflow/providers/microsoft/azure/operators/powerbi.py +++ b/airflow/providers/microsoft/azure/operators/powerbi.py @@ -17,7 +17,6 @@ from __future__ import annotations -import time from typing import TYPE_CHECKING, Any, Sequence from airflow.exceptions import AirflowException @@ -93,13 +92,11 @@ def __init__( def execute(self, context: Context): """Refresh the Power BI Dataset.""" if self.wait_for_termination: - end_time = time.time() + self.timeout self.defer( trigger=PowerBITrigger( conn_id=self.conn_id, group_id=self.group_id, dataset_id=self.dataset_id, - end_time=end_time, timeout=self.timeout, check_interval=self.check_interval, wait_for_termination=self.wait_for_termination, diff --git a/airflow/providers/microsoft/azure/triggers/powerbi.py b/airflow/providers/microsoft/azure/triggers/powerbi.py index 5cebe0529c836..d25802b84fb74 100644 --- a/airflow/providers/microsoft/azure/triggers/powerbi.py +++ b/airflow/providers/microsoft/azure/triggers/powerbi.py @@ -56,7 +56,6 @@ def __init__( conn_id: str, dataset_id: str, group_id: str, - end_time: float, timeout: float = 60 * 60 * 24 * 7, proxies: dict | None = None, api_version: APIVersion | None = None, @@ -68,7 +67,6 @@ def __init__( self.dataset_id = dataset_id self.timeout = timeout self.group_id = group_id - self.end_time = end_time self.check_interval = check_interval self.wait_for_termination = wait_for_termination @@ -83,7 +81,6 @@ def serialize(self): "api_version": api_version, "dataset_id": self.dataset_id, "group_id": self.group_id, - "end_time": self.end_time, "timeout": self.timeout, "check_interval": self.check_interval, "wait_for_termination": self.wait_for_termination, diff --git a/tests/providers/microsoft/azure/operators/test_powerbi.py b/tests/providers/microsoft/azure/operators/test_powerbi.py index 2562a04077ba9..2ee5ee723d7a7 100644 --- a/tests/providers/microsoft/azure/operators/test_powerbi.py +++ b/tests/providers/microsoft/azure/operators/test_powerbi.py @@ -58,7 +58,6 @@ COMPLETED_REFRESH_DETAILS = { PowerBIDatasetRefreshFields.REQUEST_ID.value: NEW_REFRESH_REQUEST_ID, PowerBIDatasetRefreshFields.STATUS.value: PowerBIDatasetRefreshStatus.COMPLETED, - # serviceExceptionJson is not present when status is not "Failed" } FAILED_REFRESH_DETAILS = { @@ -119,7 +118,7 @@ def test_powerbi_operator_async_execute_complete_fail(): assert context["ti"].xcom_push.call_count == 0 -def test_execute_complete_no_event(create_task_instance_of_operator): +def test_execute_complete_no_event(): """Test execute_complete when event is None or empty.""" operator = PowerBIDatasetRefreshOperator( **CONFIG, diff --git a/tests/providers/microsoft/azure/triggers/test_powerbi.py b/tests/providers/microsoft/azure/triggers/test_powerbi.py index ef5efd66dc5e2..5b44a84149501 100644 --- a/tests/providers/microsoft/azure/triggers/test_powerbi.py +++ b/tests/providers/microsoft/azure/triggers/test_powerbi.py @@ -18,7 +18,6 @@ from __future__ import annotations import asyncio -import time from unittest import mock from unittest.mock import patch @@ -27,7 +26,6 @@ from airflow.providers.microsoft.azure.hooks.powerbi import PowerBIDatasetRefreshStatus, PowerBIHook from airflow.providers.microsoft.azure.triggers.powerbi import PowerBITrigger from airflow.triggers.base import TriggerEvent -from tests.providers.microsoft.azure.base import Base from tests.providers.microsoft.conftest import get_airflow_connection POWERBI_CONN_ID = "powerbi_default" @@ -35,7 +33,6 @@ GROUP_ID = "group_id" DATASET_REFRESH_ID = "dataset_refresh_id" TIMEOUT = 30 -POWERBI_DATASET_END_TIME = time.time() + TIMEOUT MODULE = "airflow.providers.microsoft.azure" CHECK_INTERVAL = 10 API_VERSION = "v1.0" @@ -49,7 +46,6 @@ def powerbi_trigger(): api_version=API_VERSION, dataset_id=DATASET_ID, group_id=GROUP_ID, - end_time=POWERBI_DATASET_END_TIME, check_interval=CHECK_INTERVAL, wait_for_termination=True, timeout=TIMEOUT, @@ -64,12 +60,6 @@ def mock_powerbi_hook(): return hook -@pytest.fixture -def base_functions(): - base = Base() - return base - - def test_powerbi_trigger_serialization(): """Asserts that the PowerBI Trigger correctly serializes its arguments and classpath.""" @@ -83,7 +73,6 @@ def test_powerbi_trigger_serialization(): api_version=API_VERSION, dataset_id=DATASET_ID, group_id=GROUP_ID, - end_time=POWERBI_DATASET_END_TIME, check_interval=CHECK_INTERVAL, wait_for_termination=True, timeout=TIMEOUT, @@ -96,7 +85,6 @@ def test_powerbi_trigger_serialization(): "dataset_id": DATASET_ID, "timeout": TIMEOUT, "group_id": GROUP_ID, - "end_time": POWERBI_DATASET_END_TIME, "proxies": None, "api_version": API_VERSION, "check_interval": CHECK_INTERVAL, @@ -125,7 +113,7 @@ async def test_powerbi_trigger_run_inprogress( @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.get_refresh_details_by_refresh_id") @mock.patch(f"{MODULE}.hooks.powerbi.PowerBIHook.trigger_dataset_refresh") async def test_powerbi_trigger_run_failed( - mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger, base_functions + mock_trigger_dataset_refresh, mock_get_refresh_details_by_refresh_id, powerbi_trigger ): """Assert event is triggered upon failed dataset refresh.""" mock_get_refresh_details_by_refresh_id.return_value = {"status": PowerBIDatasetRefreshStatus.FAILED}