Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions python/mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,6 @@ ignore_errors = true
ignore_errors = true
# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7134

[mypy-semantic_kernel.connectors.utils.*]
ignore_errors = true
# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7136

[mypy-semantic_kernel.connectors.search_engine.*]
ignore_errors = true
# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7136

[mypy-semantic_kernel.connectors.ai.function_choice_behavior.*]
ignore_errors = true
# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7137
Expand Down
53 changes: 26 additions & 27 deletions python/semantic_kernel/connectors/search_engine/bing_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import logging
import urllib

import aiohttp
from httpx import AsyncClient, HTTPStatusError, RequestError
from pydantic import ValidationError

from semantic_kernel.connectors.search_engine.bing_connector_settings import BingSettings
from semantic_kernel.connectors.search_engine.connector import ConnectorBase
from semantic_kernel.exceptions import ServiceInvalidRequestError
from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -35,12 +36,15 @@ def __init__(
the settings are read from this file path location.
env_file_encoding (str | None): The optional encoding of the .env file.
"""
self._settings = BingSettings.create(
api_key=api_key,
custom_config=custom_config,
env_file_path=env_file_path,
env_file_encoding=env_file_encoding,
)
try:
self._settings = BingSettings.create(
api_key=api_key,
custom_config=custom_config,
env_file_path=env_file_path,
env_file_encoding=env_file_encoding,
)
except ValidationError as ex:
raise ServiceInitializationError("Failed to create Bing settings.") from ex

async def search(self, query: str, num_results: int = 1, offset: int = 0) -> list[str]:
"""Returns the search results of the query provided by pinging the Bing web search API."""
Expand All @@ -60,38 +64,33 @@ async def search(self, query: str, num_results: int = 1, offset: int = 0) -> lis
params:\nquery: {query}\nnum_results: {num_results}\noffset: {offset}"
)

_base_url = (
base_url = (
"https://api.bing.microsoft.com/v7.0/custom/search"
if self._settings.custom_config
else "https://api.bing.microsoft.com/v7.0/search"
)
_request_url = (
f"{_base_url}?q={urllib.parse.quote_plus(query)}&count={num_results}&offset={offset}"
+ (
f"&customConfig={self._settings.custom_config}"
if self._settings.custom_config
else ""
)
request_url = f"{base_url}?q={urllib.parse.quote_plus(query)}&count={num_results}&offset={offset}" + (
f"&customConfig={self._settings.custom_config}" if self._settings.custom_config else ""
)

logger.info(f"Sending GET request to {_request_url}")
logger.info(f"Sending GET request to {request_url}")

headers = {"Ocp-Apim-Subscription-Key": self._settings.api_key.get_secret_value()}
if self._settings.api_key is not None:
headers = {"Ocp-Apim-Subscription-Key": self._settings.api_key.get_secret_value()}

try:
async with aiohttp.ClientSession() as session, session.get(_request_url, headers=headers) as response:
async with AsyncClient() as client:
response = await client.get(request_url, headers=headers)
response.raise_for_status()
if response.status == 200:
data = await response.json()
pages = data.get("webPages", {}).get("value")
if pages:
return list(map(lambda x: x["snippet"], pages)) or []
return None
data = response.json()
pages = data.get("webPages", {}).get("value")
if pages:
return [page["snippet"] for page in pages]
return []
except aiohttp.ClientResponseError as ex:
except HTTPStatusError as ex:
logger.error(f"Failed to get search results: {ex}")
raise ServiceInvalidRequestError("Failed to get search results.") from ex
except aiohttp.ClientError as ex:
except RequestError as ex:
logger.error(f"Client error occurred: {ex}")
raise ServiceInvalidRequestError("A client error occurred while getting search results.") from ex
except Exception as ex:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ class BingSettings(KernelBaseSettings):

env_prefix: ClassVar[str] = "BING_"

api_key: SecretStr | None = None
api_key: SecretStr
custom_config: str | None = None
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import logging
import urllib

import aiohttp
from httpx import AsyncClient, HTTPStatusError, RequestError
from pydantic import ValidationError

from semantic_kernel.connectors.search_engine.connector import ConnectorBase
from semantic_kernel.connectors.search_engine.google_search_settings import GoogleSearchSettings
from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError

logger: logging.Logger = logging.getLogger(__name__)
Expand All @@ -14,22 +16,50 @@
class GoogleConnector(ConnectorBase):
"""A search engine connector that uses the Google Custom Search API to perform a web search."""

_api_key: str
_search_engine_id: str

def __init__(self, api_key: str, search_engine_id: str) -> None:
"""Initializes a new instance of the GoogleConnector class."""
self._api_key = api_key
self._search_engine_id = search_engine_id

if not self._api_key:
raise ServiceInitializationError("Google Custom Search API key cannot be null.")

if not self._search_engine_id:
_settings: GoogleSearchSettings

def __init__(
self,
api_key: str | None = None,
search_engine_id: str | None = None,
env_file_path: str | None = None,
env_file_encoding: str | None = None,
) -> None:
"""Initializes a new instance of the GoogleConnector class.

Args:
api_key (str | None): The Google Custom Search API key. If provided, will override
the value in the env vars or .env file.
search_engine_id (str | None): The Google search engine ID. If provided, will override
the value in the env vars or .env file.
env_file_path (str | None): The optional path to the .env file. If provided,
the settings are read from this file path location.
env_file_encoding (str | None): The optional encoding of the .env file.
"""
try:
self._settings = GoogleSearchSettings.create(
api_key=api_key,
search_engine_id=search_engine_id,
env_file_path=env_file_path,
env_file_encoding=env_file_encoding,
)
except ValidationError as ex:
raise ServiceInitializationError("Failed to create Google Search settings.") from ex

if not self._settings.search_engine_id:
raise ServiceInitializationError("Google search engine ID cannot be null.")

async def search(self, query: str, num_results: int = 1, offset: int = 0) -> list[str]:
"""Returns the search results of the query provided by pinging the Google Custom search API."""
"""Returns the search results of the query provided by pinging the Google Custom search API.

Args:
query (str): The search query.
num_results (int): The number of search results to return. Default is 1.
offset (int): The offset of the search results. Default is 0.

Returns:
list[str]: A list of search results snippets.
"""
if not query:
raise ServiceInvalidRequestError("query cannot be 'None' or empty.")

Expand All @@ -46,20 +76,31 @@ async def search(self, query: str, num_results: int = 1, offset: int = 0) -> lis
params:\nquery: {query}\nnum_results: {num_results}\noffset: {offset}"
)

_base_url = "https://www.googleapis.com/customsearch/v1"
_request_url = (
f"{_base_url}?q={urllib.parse.quote_plus(query)}"
f"&key={self._api_key}&cx={self._search_engine_id}"
base_url = "https://www.googleapis.com/customsearch/v1"
request_url = (
f"{base_url}?q={urllib.parse.quote_plus(query)}"
f"&key={self._settings.search_api_key.get_secret_value()}&cx={self._settings.search_engine_id}"
f"&num={num_results}&start={offset}"
)

logger.info("Sending GET request to Google Search API.")

async with aiohttp.ClientSession() as session, session.get(_request_url, raise_for_status=True) as response:
if response.status == 200:
data = await response.json()
logger.info("Sending GET request to Google Search API.")

try:
async with AsyncClient() as client:
response = await client.get(request_url)
response.raise_for_status()
data = response.json()
logger.info("Request successful.")
logger.info(f"API Response: {data}")
return [x["snippet"] for x in data["items"]]
logger.error(f"Request to Google Search API failed with status code: {response.status}.")
return []
return [x["snippet"] for x in data.get("items", [])]
except HTTPStatusError as ex:
logger.error(f"Failed to get search results: {ex}")
raise ServiceInvalidRequestError("Failed to get search results.") from ex
except RequestError as ex:
logger.error(f"Client error occurred: {ex}")
raise ServiceInvalidRequestError("A client error occurred while getting search results.") from ex
except Exception as ex:
logger.error(f"An unexpected error occurred: {ex}")
raise ServiceInvalidRequestError("An unexpected error occurred while getting search results.") from ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Microsoft. All rights reserved.

from typing import ClassVar

from pydantic import SecretStr

from semantic_kernel.kernel_pydantic import KernelBaseSettings


class GoogleSearchSettings(KernelBaseSettings):
"""Google Search Connector settings.

The settings are first loaded from environment variables with the prefix 'GOOGLE_'. If the
environment variables are not found, the settings can be loaded from a .env file with the
encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored;
however, validation will fail alerting that the settings are missing.

Required settings for prefix 'GOOGLE_' are:
- search_api_key: SecretStr - The Google Search API key (Env var GOOGLE_API_KEY)

Optional settings for prefix 'GOOGLE_' are:
- search_engine_id: str - The Google search engine ID (Env var GOOGLE_SEARCH_ENGINE_ID)
- env_file_path: str | None - if provided, the .env settings are read from this file path location
- env_file_encoding: str - if provided, the .env file encoding used. Defaults to "utf-8".
"""

env_prefix: ClassVar[str] = "GOOGLE_"

search_api_key: SecretStr
search_engine_id: str | None = None
46 changes: 30 additions & 16 deletions python/semantic_kernel/connectors/utils/document_loader.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,48 @@
# Copyright (c) Microsoft. All rights reserved.

import logging
from collections.abc import Callable
from typing import Any
from collections.abc import Awaitable, Callable
from inspect import isawaitable

import httpx
from httpx import AsyncClient, HTTPStatusError, RequestError

from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT
from semantic_kernel.exceptions import ServiceInvalidRequestError

logger: logging.Logger = logging.getLogger(__name__)


class DocumentLoader:

@staticmethod
async def from_uri(
url: str,
http_client: httpx.AsyncClient,
auth_callback: Callable[[Any], None] | None,
http_client: AsyncClient,
auth_callback: Callable[..., None | Awaitable[dict[str, str]]] | None,
user_agent: str | None = HTTP_USER_AGENT,
):
"""Load the manifest from the given URL."""
headers = {"User-Agent": user_agent}
async with http_client as client:
if auth_callback:
await auth_callback(client, url)

logger.info(f"Importing document from {url}")
if user_agent is None:
user_agent = HTTP_USER_AGENT

response = await client.get(url, headers=headers)
response.raise_for_status()

return response.text
headers = {"User-Agent": user_agent}
try:
async with http_client as client:
if auth_callback:
callback = auth_callback(client, url)
if isawaitable(callback):
await callback

logger.info(f"Importing document from {url}")

response = await client.get(url, headers=headers)
response.raise_for_status()
return response.text
except HTTPStatusError as ex:
logger.error(f"Failed to get document: {ex}")
raise ServiceInvalidRequestError("Failed to get document.") from ex
except RequestError as ex:
logger.error(f"Client error occurred: {ex}")
raise ServiceInvalidRequestError("A client error occurred while getting the document.") from ex
except Exception as ex:
logger.error(f"An unexpected error occurred: {ex}")
raise ServiceInvalidRequestError("An unexpected error occurred while getting the document.") from ex
55 changes: 51 additions & 4 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,7 @@ def mistralai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict):
if override_env_param_dict is None:
override_env_param_dict = {}

env_vars = {
"MISTRALAI_CHAT_MODEL_ID": "test_chat_model_id",
"MISTRALAI_API_KEY": "test_api_key"
}
env_vars = {"MISTRALAI_CHAT_MODEL_ID": "test_chat_model_id", "MISTRALAI_API_KEY": "test_api_key"}

env_vars.update(override_env_param_dict)

Expand Down Expand Up @@ -322,3 +319,53 @@ def azure_ai_search_unit_test_env(monkeypatch, exclude_list, override_env_param_
monkeypatch.delenv(key, raising=False)

return env_vars


@pytest.fixture()
def bing_unit_test_env(monkeypatch, exclude_list, override_env_param_dict):
"""Fixture to set environment variables for BingConnector."""
if exclude_list is None:
exclude_list = []

if override_env_param_dict is None:
override_env_param_dict = {}

env_vars = {
"BING_API_KEY": "test_api_key",
"BING_CUSTOM_CONFIG": "test_org_id",
}

env_vars.update(override_env_param_dict)

for key, value in env_vars.items():
if key not in exclude_list:
monkeypatch.setenv(key, value)
else:
monkeypatch.delenv(key, raising=False)

return env_vars


@pytest.fixture()
def google_search_unit_test_env(monkeypatch, exclude_list, override_env_param_dict):
"""Fixture to set environment variables for the Google Search Connector."""
if exclude_list is None:
exclude_list = []

if override_env_param_dict is None:
override_env_param_dict = {}

env_vars = {
"GOOGLE_SEARCH_API_KEY": "test_api_key",
"GOOGLE_SEARCH_ENGINE_ID": "test_id",
}

env_vars.update(override_env_param_dict)

for key, value in env_vars.items():
if key not in exclude_list:
monkeypatch.setenv(key, value)
else:
monkeypatch.delenv(key, raising=False)

return env_vars
Loading