diff --git a/python/mypy.ini b/python/mypy.ini index a2e5681ee78d..7fb15a21e5be 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -29,14 +29,6 @@ ignore_errors = true ignore_errors = true # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7134 -[mypy-semantic_kernel.connectors.utils.*] -ignore_errors = true -# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7136 - -[mypy-semantic_kernel.connectors.search_engine.*] -ignore_errors = true -# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7136 - [mypy-semantic_kernel.connectors.ai.function_choice_behavior.*] ignore_errors = true # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7137 diff --git a/python/semantic_kernel/connectors/search_engine/bing_connector.py b/python/semantic_kernel/connectors/search_engine/bing_connector.py index 03925ea96708..93dea06217b1 100644 --- a/python/semantic_kernel/connectors/search_engine/bing_connector.py +++ b/python/semantic_kernel/connectors/search_engine/bing_connector.py @@ -3,11 +3,12 @@ import logging import urllib -import aiohttp +from httpx import AsyncClient, HTTPStatusError, RequestError +from pydantic import ValidationError from semantic_kernel.connectors.search_engine.bing_connector_settings import BingSettings from semantic_kernel.connectors.search_engine.connector import ConnectorBase -from semantic_kernel.exceptions import ServiceInvalidRequestError +from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError logger: logging.Logger = logging.getLogger(__name__) @@ -35,12 +36,15 @@ def __init__( the settings are read from this file path location. env_file_encoding (str | None): The optional encoding of the .env file. """ - self._settings = BingSettings.create( - api_key=api_key, - custom_config=custom_config, - env_file_path=env_file_path, - env_file_encoding=env_file_encoding, - ) + try: + self._settings = BingSettings.create( + api_key=api_key, + custom_config=custom_config, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise ServiceInitializationError("Failed to create Bing settings.") from ex async def search(self, query: str, num_results: int = 1, offset: int = 0) -> list[str]: """Returns the search results of the query provided by pinging the Bing web search API.""" @@ -60,38 +64,33 @@ async def search(self, query: str, num_results: int = 1, offset: int = 0) -> lis params:\nquery: {query}\nnum_results: {num_results}\noffset: {offset}" ) - _base_url = ( + base_url = ( "https://api.bing.microsoft.com/v7.0/custom/search" if self._settings.custom_config else "https://api.bing.microsoft.com/v7.0/search" ) - _request_url = ( - f"{_base_url}?q={urllib.parse.quote_plus(query)}&count={num_results}&offset={offset}" - + ( - f"&customConfig={self._settings.custom_config}" - if self._settings.custom_config - else "" - ) + request_url = f"{base_url}?q={urllib.parse.quote_plus(query)}&count={num_results}&offset={offset}" + ( + f"&customConfig={self._settings.custom_config}" if self._settings.custom_config else "" ) - logger.info(f"Sending GET request to {_request_url}") + logger.info(f"Sending GET request to {request_url}") - headers = {"Ocp-Apim-Subscription-Key": self._settings.api_key.get_secret_value()} + if self._settings.api_key is not None: + headers = {"Ocp-Apim-Subscription-Key": self._settings.api_key.get_secret_value()} try: - async with aiohttp.ClientSession() as session, session.get(_request_url, headers=headers) as response: + async with AsyncClient() as client: + response = await client.get(request_url, headers=headers) response.raise_for_status() - if response.status == 200: - data = await response.json() - pages = data.get("webPages", {}).get("value") - if pages: - return list(map(lambda x: x["snippet"], pages)) or [] - return None + data = response.json() + pages = data.get("webPages", {}).get("value") + if pages: + return [page["snippet"] for page in pages] return [] - except aiohttp.ClientResponseError as ex: + except HTTPStatusError as ex: logger.error(f"Failed to get search results: {ex}") raise ServiceInvalidRequestError("Failed to get search results.") from ex - except aiohttp.ClientError as ex: + except RequestError as ex: logger.error(f"Client error occurred: {ex}") raise ServiceInvalidRequestError("A client error occurred while getting search results.") from ex except Exception as ex: diff --git a/python/semantic_kernel/connectors/search_engine/bing_connector_settings.py b/python/semantic_kernel/connectors/search_engine/bing_connector_settings.py index 45443df7409d..508993e35641 100644 --- a/python/semantic_kernel/connectors/search_engine/bing_connector_settings.py +++ b/python/semantic_kernel/connectors/search_engine/bing_connector_settings.py @@ -23,5 +23,5 @@ class BingSettings(KernelBaseSettings): env_prefix: ClassVar[str] = "BING_" - api_key: SecretStr | None = None + api_key: SecretStr custom_config: str | None = None diff --git a/python/semantic_kernel/connectors/search_engine/google_connector.py b/python/semantic_kernel/connectors/search_engine/google_connector.py index b0e13988ac4a..a0b286e20819 100644 --- a/python/semantic_kernel/connectors/search_engine/google_connector.py +++ b/python/semantic_kernel/connectors/search_engine/google_connector.py @@ -3,9 +3,11 @@ import logging import urllib -import aiohttp +from httpx import AsyncClient, HTTPStatusError, RequestError +from pydantic import ValidationError from semantic_kernel.connectors.search_engine.connector import ConnectorBase +from semantic_kernel.connectors.search_engine.google_search_settings import GoogleSearchSettings from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError logger: logging.Logger = logging.getLogger(__name__) @@ -14,22 +16,50 @@ class GoogleConnector(ConnectorBase): """A search engine connector that uses the Google Custom Search API to perform a web search.""" - _api_key: str - _search_engine_id: str - - def __init__(self, api_key: str, search_engine_id: str) -> None: - """Initializes a new instance of the GoogleConnector class.""" - self._api_key = api_key - self._search_engine_id = search_engine_id - - if not self._api_key: - raise ServiceInitializationError("Google Custom Search API key cannot be null.") - - if not self._search_engine_id: + _settings: GoogleSearchSettings + + def __init__( + self, + api_key: str | None = None, + search_engine_id: str | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + ) -> None: + """Initializes a new instance of the GoogleConnector class. + + Args: + api_key (str | None): The Google Custom Search API key. If provided, will override + the value in the env vars or .env file. + search_engine_id (str | None): The Google search engine ID. If provided, will override + the value in the env vars or .env file. + env_file_path (str | None): The optional path to the .env file. If provided, + the settings are read from this file path location. + env_file_encoding (str | None): The optional encoding of the .env file. + """ + try: + self._settings = GoogleSearchSettings.create( + api_key=api_key, + search_engine_id=search_engine_id, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise ServiceInitializationError("Failed to create Google Search settings.") from ex + + if not self._settings.search_engine_id: raise ServiceInitializationError("Google search engine ID cannot be null.") async def search(self, query: str, num_results: int = 1, offset: int = 0) -> list[str]: - """Returns the search results of the query provided by pinging the Google Custom search API.""" + """Returns the search results of the query provided by pinging the Google Custom search API. + + Args: + query (str): The search query. + num_results (int): The number of search results to return. Default is 1. + offset (int): The offset of the search results. Default is 0. + + Returns: + list[str]: A list of search results snippets. + """ if not query: raise ServiceInvalidRequestError("query cannot be 'None' or empty.") @@ -46,20 +76,31 @@ async def search(self, query: str, num_results: int = 1, offset: int = 0) -> lis params:\nquery: {query}\nnum_results: {num_results}\noffset: {offset}" ) - _base_url = "https://www.googleapis.com/customsearch/v1" - _request_url = ( - f"{_base_url}?q={urllib.parse.quote_plus(query)}" - f"&key={self._api_key}&cx={self._search_engine_id}" + base_url = "https://www.googleapis.com/customsearch/v1" + request_url = ( + f"{base_url}?q={urllib.parse.quote_plus(query)}" + f"&key={self._settings.search_api_key.get_secret_value()}&cx={self._settings.search_engine_id}" f"&num={num_results}&start={offset}" ) logger.info("Sending GET request to Google Search API.") - async with aiohttp.ClientSession() as session, session.get(_request_url, raise_for_status=True) as response: - if response.status == 200: - data = await response.json() + logger.info("Sending GET request to Google Search API.") + + try: + async with AsyncClient() as client: + response = await client.get(request_url) + response.raise_for_status() + data = response.json() logger.info("Request successful.") logger.info(f"API Response: {data}") - return [x["snippet"] for x in data["items"]] - logger.error(f"Request to Google Search API failed with status code: {response.status}.") - return [] + return [x["snippet"] for x in data.get("items", [])] + except HTTPStatusError as ex: + logger.error(f"Failed to get search results: {ex}") + raise ServiceInvalidRequestError("Failed to get search results.") from ex + except RequestError as ex: + logger.error(f"Client error occurred: {ex}") + raise ServiceInvalidRequestError("A client error occurred while getting search results.") from ex + except Exception as ex: + logger.error(f"An unexpected error occurred: {ex}") + raise ServiceInvalidRequestError("An unexpected error occurred while getting search results.") from ex diff --git a/python/semantic_kernel/connectors/search_engine/google_search_settings.py b/python/semantic_kernel/connectors/search_engine/google_search_settings.py new file mode 100644 index 000000000000..e715e6e84e61 --- /dev/null +++ b/python/semantic_kernel/connectors/search_engine/google_search_settings.py @@ -0,0 +1,30 @@ +# Copyright (c) Microsoft. All rights reserved. + +from typing import ClassVar + +from pydantic import SecretStr + +from semantic_kernel.kernel_pydantic import KernelBaseSettings + + +class GoogleSearchSettings(KernelBaseSettings): + """Google Search Connector settings. + + The settings are first loaded from environment variables with the prefix 'GOOGLE_'. If the + environment variables are not found, the settings can be loaded from a .env file with the + encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored; + however, validation will fail alerting that the settings are missing. + + Required settings for prefix 'GOOGLE_' are: + - search_api_key: SecretStr - The Google Search API key (Env var GOOGLE_API_KEY) + + Optional settings for prefix 'GOOGLE_' are: + - search_engine_id: str - The Google search engine ID (Env var GOOGLE_SEARCH_ENGINE_ID) + - env_file_path: str | None - if provided, the .env settings are read from this file path location + - env_file_encoding: str - if provided, the .env file encoding used. Defaults to "utf-8". + """ + + env_prefix: ClassVar[str] = "GOOGLE_" + + search_api_key: SecretStr + search_engine_id: str | None = None diff --git a/python/semantic_kernel/connectors/utils/document_loader.py b/python/semantic_kernel/connectors/utils/document_loader.py index 616ea6d83b46..74a0190b8bb1 100644 --- a/python/semantic_kernel/connectors/utils/document_loader.py +++ b/python/semantic_kernel/connectors/utils/document_loader.py @@ -1,34 +1,48 @@ # Copyright (c) Microsoft. All rights reserved. import logging -from collections.abc import Callable -from typing import Any +from collections.abc import Awaitable, Callable +from inspect import isawaitable -import httpx +from httpx import AsyncClient, HTTPStatusError, RequestError from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT +from semantic_kernel.exceptions import ServiceInvalidRequestError logger: logging.Logger = logging.getLogger(__name__) class DocumentLoader: - @staticmethod async def from_uri( url: str, - http_client: httpx.AsyncClient, - auth_callback: Callable[[Any], None] | None, + http_client: AsyncClient, + auth_callback: Callable[..., None | Awaitable[dict[str, str]]] | None, user_agent: str | None = HTTP_USER_AGENT, ): """Load the manifest from the given URL.""" - headers = {"User-Agent": user_agent} - async with http_client as client: - if auth_callback: - await auth_callback(client, url) - - logger.info(f"Importing document from {url}") + if user_agent is None: + user_agent = HTTP_USER_AGENT - response = await client.get(url, headers=headers) - response.raise_for_status() - - return response.text + headers = {"User-Agent": user_agent} + try: + async with http_client as client: + if auth_callback: + callback = auth_callback(client, url) + if isawaitable(callback): + await callback + + logger.info(f"Importing document from {url}") + + response = await client.get(url, headers=headers) + response.raise_for_status() + return response.text + except HTTPStatusError as ex: + logger.error(f"Failed to get document: {ex}") + raise ServiceInvalidRequestError("Failed to get document.") from ex + except RequestError as ex: + logger.error(f"Client error occurred: {ex}") + raise ServiceInvalidRequestError("A client error occurred while getting the document.") from ex + except Exception as ex: + logger.error(f"An unexpected error occurred: {ex}") + raise ServiceInvalidRequestError("An unexpected error occurred while getting the document.") from ex diff --git a/python/tests/conftest.py b/python/tests/conftest.py index e5481f1cb445..f58dde8744bf 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -258,10 +258,7 @@ def mistralai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): if override_env_param_dict is None: override_env_param_dict = {} - env_vars = { - "MISTRALAI_CHAT_MODEL_ID": "test_chat_model_id", - "MISTRALAI_API_KEY": "test_api_key" - } + env_vars = {"MISTRALAI_CHAT_MODEL_ID": "test_chat_model_id", "MISTRALAI_API_KEY": "test_api_key"} env_vars.update(override_env_param_dict) @@ -322,3 +319,53 @@ def azure_ai_search_unit_test_env(monkeypatch, exclude_list, override_env_param_ monkeypatch.delenv(key, raising=False) return env_vars + + +@pytest.fixture() +def bing_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): + """Fixture to set environment variables for BingConnector.""" + if exclude_list is None: + exclude_list = [] + + if override_env_param_dict is None: + override_env_param_dict = {} + + env_vars = { + "BING_API_KEY": "test_api_key", + "BING_CUSTOM_CONFIG": "test_org_id", + } + + env_vars.update(override_env_param_dict) + + for key, value in env_vars.items(): + if key not in exclude_list: + monkeypatch.setenv(key, value) + else: + monkeypatch.delenv(key, raising=False) + + return env_vars + + +@pytest.fixture() +def google_search_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): + """Fixture to set environment variables for the Google Search Connector.""" + if exclude_list is None: + exclude_list = [] + + if override_env_param_dict is None: + override_env_param_dict = {} + + env_vars = { + "GOOGLE_SEARCH_API_KEY": "test_api_key", + "GOOGLE_SEARCH_ENGINE_ID": "test_id", + } + + env_vars.update(override_env_param_dict) + + for key, value in env_vars.items(): + if key not in exclude_list: + monkeypatch.setenv(key, value) + else: + monkeypatch.delenv(key, raising=False) + + return env_vars diff --git a/python/tests/unit/connectors/search_engine/test_bing_search_connector.py b/python/tests/unit/connectors/search_engine/test_bing_search_connector.py new file mode 100644 index 000000000000..e13c02c0f70e --- /dev/null +++ b/python/tests/unit/connectors/search_engine/test_bing_search_connector.py @@ -0,0 +1,138 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, patch + +import pytest +from httpx import HTTPStatusError, Request, RequestError, Response + +from semantic_kernel.connectors.search_engine.bing_connector import BingConnector +from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError + + +@pytest.fixture +def bing_connector(bing_unit_test_env): + """Set up the fixture to configure the Bing connector for these tests.""" + return BingConnector() + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "status_code, response_data, expected_result", + [ + (200, {"webPages": {"value": [{"snippet": "test snippet"}]}}, ["test snippet"]), + (201, {"webPages": {"value": [{"snippet": "test snippet"}]}}, ["test snippet"]), + (202, {"webPages": {"value": [{"snippet": "test snippet"}]}}, ["test snippet"]), + (204, {}, []), + (200, {}, []), + ], +) +@patch("httpx.AsyncClient.get") +async def test_search_success(mock_get, bing_connector, status_code, response_data, expected_result): + query = "test query" + num_results = 1 + offset = 0 + + mock_request = Request(method="GET", url="https://api.bing.microsoft.com/v7.0/search") + + mock_response = Response( + status_code=status_code, + json=response_data, + request=mock_request, + ) + + mock_get.return_value = mock_response + + results = await bing_connector.search(query, num_results, offset) + assert results == expected_result + mock_get.assert_awaited_once() + + +@pytest.mark.parametrize("exclude_list", [["BING_API_KEY"]], indirect=True) +def test_bing_search_connector_init_with_empty_api_key(bing_unit_test_env) -> None: + with pytest.raises(ServiceInitializationError): + BingConnector( + env_file_path="test.env", + ) + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_search_http_status_error(mock_get, bing_connector): + query = "test query" + num_results = 1 + offset = 0 + + mock_get.side_effect = HTTPStatusError("error", request=AsyncMock(), response=AsyncMock(status_code=500)) + + with pytest.raises(ServiceInvalidRequestError, match="Failed to get search results."): + await bing_connector.search(query, num_results, offset) + mock_get.assert_awaited_once() + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_search_request_error(mock_get, bing_connector): + query = "test query" + num_results = 1 + offset = 0 + + mock_get.side_effect = RequestError("error", request=AsyncMock()) + + with pytest.raises(ServiceInvalidRequestError, match="A client error occurred while getting search results."): + await bing_connector.search(query, num_results, offset) + mock_get.assert_awaited_once() + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_search_general_exception(mock_get, bing_connector): + query = "test query" + num_results = 1 + offset = 0 + + mock_get.side_effect = Exception("Unexpected error") + + with pytest.raises(ServiceInvalidRequestError, match="An unexpected error occurred while getting search results."): + await bing_connector.search(query, num_results, offset) + mock_get.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_search_empty_query(bing_connector): + with pytest.raises(ServiceInvalidRequestError) as excinfo: + await bing_connector.search("", 1, 0) + assert str(excinfo.value) == "query cannot be 'None' or empty." + + +@pytest.mark.asyncio +async def test_search_invalid_num_results(bing_connector): + with pytest.raises(ServiceInvalidRequestError) as excinfo: + await bing_connector.search("test", 0, 0) + assert str(excinfo.value) == "num_results value must be greater than 0." + + with pytest.raises(ServiceInvalidRequestError) as excinfo: + await bing_connector.search("test", 51, 0) + assert str(excinfo.value) == "num_results value must be less than 50." + + +@pytest.mark.asyncio +async def test_search_invalid_offset(bing_connector): + with pytest.raises(ServiceInvalidRequestError) as excinfo: + await bing_connector.search("test", 1, -1) + assert str(excinfo.value) == "offset must be greater than 0." + + +@pytest.mark.asyncio +async def test_search_api_failure(bing_connector): + query = "test query" + num_results = 1 + offset = 0 + + async def mock_get(*args, **kwargs): + raise HTTPStatusError("error", request=AsyncMock(), response=AsyncMock(status_code=500)) + + with ( + patch("httpx.AsyncClient.get", new=mock_get), + pytest.raises(ServiceInvalidRequestError, match="Failed to get search results."), + ): + await bing_connector.search(query, num_results, offset) diff --git a/python/tests/unit/connectors/search_engine/test_google_search_connector.py b/python/tests/unit/connectors/search_engine/test_google_search_connector.py new file mode 100644 index 000000000000..8638b05bab23 --- /dev/null +++ b/python/tests/unit/connectors/search_engine/test_google_search_connector.py @@ -0,0 +1,131 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, patch + +import pytest +from httpx import HTTPStatusError, Request, RequestError, Response + +from semantic_kernel.connectors.search_engine.google_connector import GoogleConnector +from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError + + +@pytest.fixture +def google_connector(google_search_unit_test_env): + return GoogleConnector() + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "status_code, response_data, expected_result", + [ + (200, {"items": [{"snippet": "test snippet"}]}, ["test snippet"]), + (201, {"items": [{"snippet": "test snippet"}]}, ["test snippet"]), + (202, {"items": [{"snippet": "test snippet"}]}, ["test snippet"]), + (204, {}, []), + (200, {}, []), + ], +) +@patch("httpx.AsyncClient.get") +async def test_search_success(mock_get, google_connector, status_code, response_data, expected_result): + query = "test query" + num_results = 1 + offset = 0 + + mock_request = Request(method="GET", url="https://www.googleapis.com/customsearch/v1") + + mock_response = Response( + status_code=status_code, + json=response_data, + request=mock_request, + ) + + mock_get.return_value = mock_response + + results = await google_connector.search(query, num_results, offset) + assert results == expected_result + mock_get.assert_awaited_once() + + +@pytest.mark.parametrize("exclude_list", [["GOOGLE_SEARCH_API_KEY"]], indirect=True) +def test_google_search_connector_init_with_empty_api_key(google_search_unit_test_env) -> None: + with pytest.raises(ServiceInitializationError): + GoogleConnector( + env_file_path="test.env", + ) + + +@pytest.mark.parametrize("exclude_list", [["GOOGLE_SEARCH_ENGINE_ID"]], indirect=True) +def test_google_search_connector_init_with_empty_search_id(google_search_unit_test_env) -> None: + with pytest.raises(ServiceInitializationError): + GoogleConnector( + env_file_path="test.env", + ) + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_search_http_status_error(mock_get, google_connector): + query = "test query" + num_results = 1 + offset = 0 + + mock_get.side_effect = HTTPStatusError("error", request=AsyncMock(), response=AsyncMock(status_code=500)) + + with pytest.raises(ServiceInvalidRequestError, match="Failed to get search results."): + await google_connector.search(query, num_results, offset) + mock_get.assert_awaited_once() + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_search_request_error(mock_get, google_connector): + query = "test query" + num_results = 1 + offset = 0 + + mock_get.side_effect = RequestError("error", request=AsyncMock()) + + with pytest.raises(ServiceInvalidRequestError, match="A client error occurred while getting search results."): + await google_connector.search(query, num_results, offset) + mock_get.assert_awaited_once() + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_search_general_exception(mock_get, google_connector): + query = "test query" + num_results = 1 + offset = 0 + + mock_get.side_effect = Exception("Unexpected error") + + with pytest.raises(ServiceInvalidRequestError, match="An unexpected error occurred while getting search results."): + await google_connector.search(query, num_results, offset) + mock_get.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_search_invalid_query(google_connector): + with pytest.raises(ServiceInvalidRequestError, match="query cannot be 'None' or empty."): + await google_connector.search(query="") + + +@pytest.mark.asyncio +async def test_search_num_results_less_than_or_equal_to_zero(google_connector): + with pytest.raises(ServiceInvalidRequestError, match="num_results value must be greater than 0."): + await google_connector.search(query="test query", num_results=0) + + with pytest.raises(ServiceInvalidRequestError, match="num_results value must be greater than 0."): + await google_connector.search(query="test query", num_results=-1) + + +@pytest.mark.asyncio +async def test_search_num_results_greater_than_ten(google_connector): + with pytest.raises(ServiceInvalidRequestError, match="num_results value must be less than or equal to 10."): + await google_connector.search(query="test query", num_results=11) + + +@pytest.mark.asyncio +async def test_search_offset_less_than_zero(google_connector): + with pytest.raises(ServiceInvalidRequestError, match="offset must be greater than 0."): + await google_connector.search(query="test query", offset=-1) diff --git a/python/tests/unit/connectors/utils/test_document_loader.py b/python/tests/unit/connectors/utils/test_document_loader.py new file mode 100644 index 000000000000..a7ca87e6cd18 --- /dev/null +++ b/python/tests/unit/connectors/utils/test_document_loader.py @@ -0,0 +1,108 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, patch + +import pytest +from httpx import AsyncClient, HTTPStatusError, RequestError + +from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT +from semantic_kernel.connectors.utils.document_loader import DocumentLoader +from semantic_kernel.exceptions import ServiceInvalidRequestError + + +@pytest.fixture +def http_client(): + return AsyncClient() + + +@pytest.mark.parametrize( + ("user_agent", "expected_user_agent"), + [(None, HTTP_USER_AGENT), (HTTP_USER_AGENT, HTTP_USER_AGENT), ("Custom-Agent", "Custom-Agent")], +) +@pytest.mark.asyncio +async def test_from_uri_success(http_client, user_agent, expected_user_agent): + url = "https://example.com/document" + response_text = "Document content" + + mock_response = AsyncMock() + mock_response.status_code = 200 + mock_response.text = response_text + mock_response.raise_for_status = AsyncMock() + + http_client.get = AsyncMock(return_value=mock_response) + + result = await DocumentLoader.from_uri(url, http_client, None, user_agent) + assert result == response_text + http_client.get.assert_awaited_once_with(url, headers={"User-Agent": expected_user_agent}) + + +@pytest.mark.asyncio +async def test_from_uri_default_user_agent(http_client): + url = "https://example.com/document" + response_text = "Document content" + + mock_response = AsyncMock() + mock_response.status_code = 200 + mock_response.text = response_text + mock_response.raise_for_status = AsyncMock() + + http_client.get = AsyncMock(return_value=mock_response) + + result = await DocumentLoader.from_uri(url, http_client, None) + assert result == response_text + http_client.get.assert_awaited_once_with(url, headers={"User-Agent": HTTP_USER_AGENT}) + + +@pytest.mark.asyncio +async def test_from_uri_with_auth_callback(http_client): + url = "https://example.com/document" + response_text = "Document content" + + async def auth_callback(client, url): + return {"Authorization": "Bearer token"} + + mock_response = AsyncMock() + mock_response.status_code = 200 + mock_response.text = response_text + mock_response.raise_for_status = AsyncMock() + + http_client.get = AsyncMock(return_value=mock_response) + + result = await DocumentLoader.from_uri(url, http_client, auth_callback) + assert result == response_text + http_client.get.assert_awaited_once_with(url, headers={"User-Agent": HTTP_USER_AGENT}) + + +@pytest.mark.asyncio +async def test_from_uri_request_error(http_client): + url = "https://example.com/document" + + http_client.get = AsyncMock(side_effect=RequestError("error", request=None)) + + with pytest.raises(ServiceInvalidRequestError): + await DocumentLoader.from_uri(url, http_client, None) + http_client.get.assert_awaited_once_with(url, headers={"User-Agent": HTTP_USER_AGENT}) + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_from_uri_http_status_error(mock_get, http_client): + url = "https://example.com/document" + + mock_get.side_effect = HTTPStatusError("error", request=AsyncMock(), response=AsyncMock(status_code=500)) + + with pytest.raises(ServiceInvalidRequestError, match="Failed to get document."): + await DocumentLoader.from_uri(url, http_client, None) + mock_get.assert_awaited_once_with(url, headers={"User-Agent": HTTP_USER_AGENT}) + + +@pytest.mark.asyncio +@patch("httpx.AsyncClient.get") +async def test_from_uri_general_exception(mock_get, http_client): + url = "https://example.com/document" + + mock_get.side_effect = Exception("Unexpected error") + + with pytest.raises(ServiceInvalidRequestError, match="An unexpected error occurred while getting the document."): + await DocumentLoader.from_uri(url, http_client, None) + mock_get.assert_awaited_once_with(url, headers={"User-Agent": HTTP_USER_AGENT})