diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index 037972ff516c..b2f3f8f75d16 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -36,6 +36,25 @@ async def get_chat_message_contents( """ pass + async def get_chat_message_content( + self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", **kwargs: Any + ) -> "ChatMessageContent | None": + """This is the method that is called from the kernel to get a response from a chat-optimized LLM. + + Args: + chat_history (ChatHistory): A list of chat chat_history, that can be rendered into a + set of chat_history, from system, user, assistant and function. + settings (PromptExecutionSettings): Settings for the request. + kwargs (Dict[str, Any]): The optional arguments. + + Returns: + A string representing the response from the LLM. + """ + results = await self.get_chat_message_contents(chat_history, settings, **kwargs) + if results: + return results[0] + return None + @abstractmethod def get_streaming_chat_message_contents( self, @@ -56,6 +75,31 @@ def get_streaming_chat_message_contents( """ ... + async def get_streaming_chat_message_content( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + **kwargs: Any, + ) -> AsyncGenerator["StreamingChatMessageContent | None", Any]: + """This is the method that is called from the kernel to get a stream response from a chat-optimized LLM. + + Args: + chat_history (ChatHistory): A list of chat chat_history, that can be rendered into a + set of chat_history, from system, user, assistant and function. + settings (PromptExecutionSettings): Settings for the request. + kwargs (Dict[str, Any]): The optional arguments. + + Yields: + A stream representing the response(s) from the LLM. + """ + async for streaming_chat_message_contents in self.get_streaming_chat_message_contents( + chat_history, settings, **kwargs + ): + if streaming_chat_message_contents: + yield streaming_chat_message_contents[0] + else: + yield None + def _prepare_chat_history_for_request( self, chat_history: "ChatHistory", diff --git a/python/semantic_kernel/connectors/ai/embeddings/embedding_generator_base.py b/python/semantic_kernel/connectors/ai/embeddings/embedding_generator_base.py index cd915cccfde5..3342d96baa02 100644 --- a/python/semantic_kernel/connectors/ai/embeddings/embedding_generator_base.py +++ b/python/semantic_kernel/connectors/ai/embeddings/embedding_generator_base.py @@ -9,18 +9,42 @@ if TYPE_CHECKING: from numpy import ndarray + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + @experimental_class class EmbeddingGeneratorBase(AIServiceClientBase, ABC): """Base class for embedding generators.""" @abstractmethod - async def generate_embeddings(self, texts: list[str], **kwargs: Any) -> "ndarray": + async def generate_embeddings( + self, + texts: list[str], + settings: "PromptExecutionSettings | None" = None, + **kwargs: Any, + ) -> "ndarray": """Returns embeddings for the given texts as ndarray. Args: texts (List[str]): The texts to generate embeddings for. - batch_size (Optional[int]): The batch size to use for the request. - kwargs (Dict[str, Any]): Additional arguments to pass to the request. + settings (PromptExecutionSettings): The settings to use for the request, optional. + kwargs (Any): Additional arguments to pass to the request. """ pass + + async def generate_raw_embeddings( + self, + texts: list[str], + settings: "PromptExecutionSettings | None" = None, + **kwargs: Any, + ) -> Any: + """Returns embeddings for the given texts in the unedited format. + + This is not implemented for all embedding services, falling back to the generate_embeddings method. + + Args: + texts (List[str]): The texts to generate embeddings for. + settings (PromptExecutionSettings): The settings to use for the request, optional. + kwargs (Any): Additional arguments to pass to the request. + """ + return await self.generate_embeddings(texts, settings, **kwargs) diff --git a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py index 057ec5be46dd..553e48fabf2e 100644 --- a/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py @@ -2,7 +2,7 @@ import logging import sys -from typing import Any +from typing import TYPE_CHECKING, Any if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -11,12 +11,17 @@ import sentence_transformers import torch -from numpy import array, ndarray +from numpy import ndarray from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase from semantic_kernel.exceptions import ServiceResponseException from semantic_kernel.utils.experimental_decorator import experimental_class +if TYPE_CHECKING: + from torch import Tensor + + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + logger: logging.Logger = logging.getLogger(__name__) @@ -50,10 +55,27 @@ def __init__( ) @override - async def generate_embeddings(self, texts: list[str], **kwargs: Any) -> ndarray: + async def generate_embeddings( + self, + texts: list[str], + settings: "PromptExecutionSettings | None" = None, + **kwargs: Any, + ) -> ndarray: + try: + logger.info(f"Generating embeddings for {len(texts)} texts.") + return self.generator.encode(sentences=texts, convert_to_numpy=True, **kwargs) + except Exception as e: + raise ServiceResponseException("Hugging Face embeddings failed", e) from e + + @override + async def generate_raw_embeddings( + self, + texts: list[str], + settings: "PromptExecutionSettings | None" = None, + **kwargs: Any, + ) -> "list[Tensor] | ndarray | Tensor": try: - logger.info(f"Generating embeddings for {len(texts)} texts") - embeddings = self.generator.encode(texts, **kwargs) - return array(embeddings) + logger.info(f"Generating raw embeddings for {len(texts)} texts.") + return self.generator.encode(sentences=texts, **kwargs) except Exception as e: raise ServiceResponseException("Hugging Face embeddings failed", e) from e diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py index 937b6b8cd427..61df57d7fa4f 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py @@ -2,8 +2,9 @@ import logging from abc import ABC +from typing import Any -from numpy import array, ndarray +from numpy import array from openai import AsyncOpenAI, AsyncStream, BadRequestError from openai.types import Completion, CreateEmbeddingResponse from openai.types.chat import ChatCompletion, ChatCompletionChunk @@ -57,7 +58,7 @@ async def _send_request( ex, ) from ex - async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecutionSettings) -> list[ndarray]: + async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecutionSettings) -> list[Any]: try: response = await self.client.embeddings.create(**settings.prepare_settings_dict()) self.store_usage(response) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py index 718c4873afb9..81601912ab58 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. import sys -from typing import Any +from typing import TYPE_CHECKING, Any from numpy import array, ndarray @@ -15,34 +15,59 @@ OpenAIEmbeddingPromptExecutionSettings, ) from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.utils.experimental_decorator import experimental_class +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + @experimental_class class OpenAITextEmbeddingBase(OpenAIHandler, EmbeddingGeneratorBase): @override - async def generate_embeddings(self, texts: list[str], batch_size: int | None = None, **kwargs: Any) -> ndarray: - settings: OpenAIEmbeddingPromptExecutionSettings | None = kwargs.pop("settings", None) - if settings: - for key, value in kwargs.items(): - setattr(settings, key, value) + async def generate_embeddings( + self, + texts: list[str], + settings: "PromptExecutionSettings | None" = None, + batch_size: int | None = None, + **kwargs: Any, + ) -> ndarray: + raw_embeddings = await self.generate_raw_embeddings(texts, settings, batch_size, **kwargs) + return array([array(emb) for emb in raw_embeddings]) + + @override + async def generate_raw_embeddings( + self, + texts: list[str], + settings: "PromptExecutionSettings | None" = None, + batch_size: int | None = None, + **kwargs: Any, + ) -> Any: + """Returns embeddings for the given texts in the unedited format. + + Args: + texts (List[str]): The texts to generate embeddings for. + settings (PromptExecutionSettings): The settings to use for the request. + batch_size (int): The batch size to use for the request. + kwargs (Dict[str, Any]): Additional arguments to pass to the request. + """ + if not settings: + settings = OpenAIEmbeddingPromptExecutionSettings(ai_model_id=self.ai_model_id) else: - settings = OpenAIEmbeddingPromptExecutionSettings( - **kwargs, - ) + if not isinstance(settings, OpenAIEmbeddingPromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, OpenAIEmbeddingPromptExecutionSettings) # nosec if settings.ai_model_id is None: settings.ai_model_id = self.ai_model_id + for key, value in kwargs.items(): + setattr(settings, key, value) raw_embeddings = [] batch_size = batch_size or len(texts) for i in range(0, len(texts), batch_size): batch = texts[i : i + batch_size] settings.input = batch - raw_embedding = await self._send_embedding_request( - settings=settings, - ) + raw_embedding = await self._send_embedding_request(settings=settings) raw_embeddings.extend(raw_embedding) - return array(raw_embeddings) + return raw_embeddings @override def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: diff --git a/python/semantic_kernel/connectors/ai/text_completion_client_base.py b/python/semantic_kernel/connectors/ai/text_completion_client_base.py index 272a46a80e40..3eaa602e4406 100644 --- a/python/semantic_kernel/connectors/ai/text_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/text_completion_client_base.py @@ -30,6 +30,18 @@ async def get_text_contents( list[TextContent]: A string or list of strings representing the response(s) from the LLM. """ + async def get_text_content(self, prompt: str, settings: "PromptExecutionSettings") -> "TextContent": + """This is the method that is called from the kernel to get a response from a text-optimized LLM. + + Args: + prompt (str): The prompt to send to the LLM. + settings (PromptExecutionSettings): Settings for the request. + + Returns: + TextContent: A string or list of strings representing the response(s) from the LLM. + """ + return (await self.get_text_contents(prompt, settings))[0] + @abstractmethod def get_streaming_text_contents( self, @@ -46,3 +58,21 @@ def get_streaming_text_contents( list[StreamingTextContent]: A stream representing the response(s) from the LLM. """ ... + + async def get_streaming_text_content( + self, prompt: str, settings: "PromptExecutionSettings" + ) -> "StreamingTextContent | Any": + """This is the method that is called from the kernel to get a stream response from a text-optimized LLM. + + Args: + prompt (str): The prompt to send to the LLM. + settings (PromptExecutionSettings): Settings for the request. + + Returns: + StreamingTextContent: A stream representing the response(s) from the LLM. + """ + async for contents in self.get_streaming_text_contents(prompt, settings): + if isinstance(contents, list): + yield contents[0] + else: + yield contents diff --git a/python/tests/unit/connectors/open_ai/services/test_openai_text_embedding.py b/python/tests/unit/connectors/open_ai/services/test_openai_text_embedding.py index 8202a066c50a..bf6c2cb09a47 100644 --- a/python/tests/unit/connectors/open_ai/services/test_openai_text_embedding.py +++ b/python/tests/unit/connectors/open_ai/services/test_openai_text_embedding.py @@ -10,6 +10,7 @@ OpenAIEmbeddingPromptExecutionSettings, ) from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException @@ -108,3 +109,22 @@ async def test_embedding_fail(mock_create, openai_unit_test_env) -> None: ) with pytest.raises(ServiceResponseException): await openai_text_embedding.generate_embeddings(texts, dimensions=embedding_dimensions) + + +@pytest.mark.asyncio +@patch.object(AsyncEmbeddings, "create", new_callable=AsyncMock) +async def test_embedding_pes(mock_create, openai_unit_test_env) -> None: + ai_model_id = "test_model_id" + texts = ["hello world", "goodbye world"] + embedding_dimensions = 1536 + pes = PromptExecutionSettings(ai_model_id=ai_model_id, dimensions=embedding_dimensions) + + openai_text_embedding = OpenAITextEmbedding(ai_model_id=ai_model_id) + + await openai_text_embedding.generate_raw_embeddings(texts, pes) + + mock_create.assert_awaited_once_with( + input=texts, + model=ai_model_id, + dimensions=embedding_dimensions, + )