Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,25 @@ async def get_chat_message_contents(
"""
pass

async def get_chat_message_content(
self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", **kwargs: Any
) -> "ChatMessageContent | None":
"""This is the method that is called from the kernel to get a response from a chat-optimized LLM.

Args:
chat_history (ChatHistory): A list of chat chat_history, that can be rendered into a
set of chat_history, from system, user, assistant and function.
settings (PromptExecutionSettings): Settings for the request.
kwargs (Dict[str, Any]): The optional arguments.

Returns:
A string representing the response from the LLM.
"""
results = await self.get_chat_message_contents(chat_history, settings, **kwargs)
if results:
return results[0]
return None

@abstractmethod
def get_streaming_chat_message_contents(
self,
Expand All @@ -56,6 +75,31 @@ def get_streaming_chat_message_contents(
"""
...

async def get_streaming_chat_message_content(
self,
chat_history: "ChatHistory",
settings: "PromptExecutionSettings",
**kwargs: Any,
) -> AsyncGenerator["StreamingChatMessageContent | None", Any]:
"""This is the method that is called from the kernel to get a stream response from a chat-optimized LLM.

Args:
chat_history (ChatHistory): A list of chat chat_history, that can be rendered into a
set of chat_history, from system, user, assistant and function.
settings (PromptExecutionSettings): Settings for the request.
kwargs (Dict[str, Any]): The optional arguments.

Yields:
A stream representing the response(s) from the LLM.
"""
async for streaming_chat_message_contents in self.get_streaming_chat_message_contents(
chat_history, settings, **kwargs
):
if streaming_chat_message_contents:
yield streaming_chat_message_contents[0]
else:
yield None

def _prepare_chat_history_for_request(
self,
chat_history: "ChatHistory",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,42 @@
if TYPE_CHECKING:
from numpy import ndarray

from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings


@experimental_class
class EmbeddingGeneratorBase(AIServiceClientBase, ABC):
"""Base class for embedding generators."""

@abstractmethod
async def generate_embeddings(self, texts: list[str], **kwargs: Any) -> "ndarray":
async def generate_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
**kwargs: Any,
) -> "ndarray":
"""Returns embeddings for the given texts as ndarray.

Args:
texts (List[str]): The texts to generate embeddings for.
batch_size (Optional[int]): The batch size to use for the request.
kwargs (Dict[str, Any]): Additional arguments to pass to the request.
settings (PromptExecutionSettings): The settings to use for the request, optional.
kwargs (Any): Additional arguments to pass to the request.
"""
pass

async def generate_raw_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
**kwargs: Any,
) -> Any:
"""Returns embeddings for the given texts in the unedited format.

This is not implemented for all embedding services, falling back to the generate_embeddings method.

Args:
texts (List[str]): The texts to generate embeddings for.
settings (PromptExecutionSettings): The settings to use for the request, optional.
kwargs (Any): Additional arguments to pass to the request.
"""
return await self.generate_embeddings(texts, settings, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import sys
from typing import Any
from typing import TYPE_CHECKING, Any

if sys.version_info >= (3, 12):
from typing import override # pragma: no cover
Expand All @@ -11,12 +11,17 @@

import sentence_transformers
import torch
from numpy import array, ndarray
from numpy import ndarray

from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase
from semantic_kernel.exceptions import ServiceResponseException
from semantic_kernel.utils.experimental_decorator import experimental_class

if TYPE_CHECKING:
from torch import Tensor

from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings

logger: logging.Logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -50,10 +55,27 @@ def __init__(
)

@override
async def generate_embeddings(self, texts: list[str], **kwargs: Any) -> ndarray:
async def generate_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
**kwargs: Any,
) -> ndarray:
try:
logger.info(f"Generating embeddings for {len(texts)} texts.")
return self.generator.encode(sentences=texts, convert_to_numpy=True, **kwargs)
except Exception as e:
raise ServiceResponseException("Hugging Face embeddings failed", e) from e

@override
async def generate_raw_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
**kwargs: Any,
) -> "list[Tensor] | ndarray | Tensor":
try:
logger.info(f"Generating embeddings for {len(texts)} texts")
embeddings = self.generator.encode(texts, **kwargs)
return array(embeddings)
logger.info(f"Generating raw embeddings for {len(texts)} texts.")
return self.generator.encode(sentences=texts, **kwargs)
except Exception as e:
raise ServiceResponseException("Hugging Face embeddings failed", e) from e
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

import logging
from abc import ABC
from typing import Any

from numpy import array, ndarray
from numpy import array
from openai import AsyncOpenAI, AsyncStream, BadRequestError
from openai.types import Completion, CreateEmbeddingResponse
from openai.types.chat import ChatCompletion, ChatCompletionChunk
Expand Down Expand Up @@ -57,7 +58,7 @@ async def _send_request(
ex,
) from ex

async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecutionSettings) -> list[ndarray]:
async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecutionSettings) -> list[Any]:
try:
response = await self.client.embeddings.create(**settings.prepare_settings_dict())
self.store_usage(response)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) Microsoft. All rights reserved.

import sys
from typing import Any
from typing import TYPE_CHECKING, Any

from numpy import array, ndarray

Expand All @@ -15,34 +15,59 @@
OpenAIEmbeddingPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.utils.experimental_decorator import experimental_class

if TYPE_CHECKING:
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings


@experimental_class
class OpenAITextEmbeddingBase(OpenAIHandler, EmbeddingGeneratorBase):
@override
async def generate_embeddings(self, texts: list[str], batch_size: int | None = None, **kwargs: Any) -> ndarray:
settings: OpenAIEmbeddingPromptExecutionSettings | None = kwargs.pop("settings", None)
if settings:
for key, value in kwargs.items():
setattr(settings, key, value)
async def generate_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
batch_size: int | None = None,
**kwargs: Any,
) -> ndarray:
raw_embeddings = await self.generate_raw_embeddings(texts, settings, batch_size, **kwargs)
return array([array(emb) for emb in raw_embeddings])

@override
async def generate_raw_embeddings(
self,
texts: list[str],
settings: "PromptExecutionSettings | None" = None,
batch_size: int | None = None,
**kwargs: Any,
) -> Any:
"""Returns embeddings for the given texts in the unedited format.

Args:
texts (List[str]): The texts to generate embeddings for.
settings (PromptExecutionSettings): The settings to use for the request.
batch_size (int): The batch size to use for the request.
kwargs (Dict[str, Any]): Additional arguments to pass to the request.
"""
if not settings:
settings = OpenAIEmbeddingPromptExecutionSettings(ai_model_id=self.ai_model_id)
else:
settings = OpenAIEmbeddingPromptExecutionSettings(
**kwargs,
)
if not isinstance(settings, OpenAIEmbeddingPromptExecutionSettings):
Comment thread
eavanvalkenburg marked this conversation as resolved.
settings = self.get_prompt_execution_settings_from_settings(settings)
assert isinstance(settings, OpenAIEmbeddingPromptExecutionSettings) # nosec
if settings.ai_model_id is None:
settings.ai_model_id = self.ai_model_id
for key, value in kwargs.items():
setattr(settings, key, value)
raw_embeddings = []
batch_size = batch_size or len(texts)
for i in range(0, len(texts), batch_size):
batch = texts[i : i + batch_size]
settings.input = batch
raw_embedding = await self._send_embedding_request(
settings=settings,
)
raw_embedding = await self._send_embedding_request(settings=settings)
raw_embeddings.extend(raw_embedding)
return array(raw_embeddings)
return raw_embeddings

@override
def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ async def get_text_contents(
list[TextContent]: A string or list of strings representing the response(s) from the LLM.
"""

async def get_text_content(self, prompt: str, settings: "PromptExecutionSettings") -> "TextContent":
"""This is the method that is called from the kernel to get a response from a text-optimized LLM.

Args:
prompt (str): The prompt to send to the LLM.
settings (PromptExecutionSettings): Settings for the request.

Returns:
TextContent: A string or list of strings representing the response(s) from the LLM.
"""
return (await self.get_text_contents(prompt, settings))[0]
Comment thread
eavanvalkenburg marked this conversation as resolved.

@abstractmethod
def get_streaming_text_contents(
self,
Expand All @@ -46,3 +58,21 @@ def get_streaming_text_contents(
list[StreamingTextContent]: A stream representing the response(s) from the LLM.
"""
...

async def get_streaming_text_content(
self, prompt: str, settings: "PromptExecutionSettings"
) -> "StreamingTextContent | Any":
"""This is the method that is called from the kernel to get a stream response from a text-optimized LLM.

Args:
prompt (str): The prompt to send to the LLM.
settings (PromptExecutionSettings): Settings for the request.

Returns:
StreamingTextContent: A stream representing the response(s) from the LLM.
"""
async for contents in self.get_streaming_text_contents(prompt, settings):
if isinstance(contents, list):
yield contents[0]
else:
yield contents
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
OpenAIEmbeddingPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException


Expand Down Expand Up @@ -108,3 +109,22 @@ async def test_embedding_fail(mock_create, openai_unit_test_env) -> None:
)
with pytest.raises(ServiceResponseException):
await openai_text_embedding.generate_embeddings(texts, dimensions=embedding_dimensions)


@pytest.mark.asyncio
@patch.object(AsyncEmbeddings, "create", new_callable=AsyncMock)
async def test_embedding_pes(mock_create, openai_unit_test_env) -> None:
ai_model_id = "test_model_id"
texts = ["hello world", "goodbye world"]
embedding_dimensions = 1536
pes = PromptExecutionSettings(ai_model_id=ai_model_id, dimensions=embedding_dimensions)

openai_text_embedding = OpenAITextEmbedding(ai_model_id=ai_model_id)

await openai_text_embedding.generate_raw_embeddings(texts, pes)

mock_create.assert_awaited_once_with(
input=texts,
model=ai_model_id,
dimensions=embedding_dimensions,
)