microsoft · eavanvalkenburg · Jul 11, 2024 · Jul 8, 2024 · Jul 9, 2024 · Jul 9, 2024
@@ -13,10 +13,6 @@ warn_untyped_fields = true
 [mypy-semantic_kernel]
 no_implicit_reexport = true
 
-[mypy-semantic_kernel.connectors.ai.open_ai.*]
-ignore_errors = true
-# TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7131
-
 [mypy-semantic_kernel.connectors.ai.azure_ai_inference.*]
 ignore_errors = true
 # TODO (eavanvalkenburg): remove this: https://github.com/microsoft/semantic-kernel/issues/7132

@@ -130,9 +130,8 @@ async def get_chat_message_contents(
         ):
             return await self._send_chat_request(chat_history, settings)
 
-        kernel: Kernel = kwargs.get("kernel")
-        arguments: KernelArguments = kwargs.get("arguments")
-        self._verify_function_choice_behavior(settings, kernel, arguments)
+        kernel = kwargs.get("kernel", None)
+        self._verify_function_choice_behavior(settings, kernel)
         self._configure_function_choice_behavior(settings, kernel)
 
         for request_index in range(settings.function_choice_behavior.maximum_auto_invoke_attempts):
@@ -146,7 +145,7 @@ async def get_chat_message_contents(
                 function_calls=function_calls,
                 chat_history=chat_history,
                 kernel=kernel,
-                arguments=arguments,
+                arguments=kwargs.get("arguments", None),
                 function_call_count=fc_count,
                 request_index=request_index,
                 function_behavior=settings.function_choice_behavior,
@@ -250,9 +249,8 @@ async def _get_streaming_chat_message_contents_auto_invoke(
         **kwargs: Any,
     ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]:
         """Get streaming chat message contents from the Azure AI Inference service with auto invoking functions."""
-        kernel: Kernel = kwargs.get("kernel")
-        arguments: KernelArguments = kwargs.get("arguments")
-        self._verify_function_choice_behavior(settings, kernel, arguments)
+        kernel: Kernel = kwargs.get("kernel", None)
+        self._verify_function_choice_behavior(settings, kernel)
         self._configure_function_choice_behavior(settings, kernel)
         request_attempts = settings.function_choice_behavior.maximum_auto_invoke_attempts
 
@@ -279,7 +277,7 @@ async def _get_streaming_chat_message_contents_auto_invoke(
                 function_calls=function_calls,
                 chat_history=chat_history,
                 kernel=kernel,
-                arguments=arguments,
+                arguments=kwargs.get("arguments", None),
                 function_call_count=len(function_calls),
                 request_index=request_index,
                 function_behavior=settings.function_choice_behavior,
@@ -396,14 +394,11 @@ def _verify_function_choice_behavior(
         self,
         settings: AzureAIInferenceChatPromptExecutionSettings,
         kernel: Kernel,
-        arguments: KernelArguments,
     ):
         """Verify the function choice behavior."""
         if settings.function_choice_behavior is not None:
             if kernel is None:
                 raise ServiceInvalidExecutionSettingsError("Kernel is required for tool calls.")
-            if arguments is None and settings.function_choice_behavior.auto_invoke_kernel_functions:
-                raise ServiceInvalidExecutionSettingsError("Kernel arguments are required for auto tool calls.")
             if settings.extra_parameters is not None and settings.extra_parameters.get("n", 1) > 1:
                 # Currently only OpenAI models allow multiple completions but the Azure AI Inference service
                 # does not expose the functionality directly. If users want to have more than 1 responses, they
@@ -425,7 +420,7 @@ async def _invoke_function_calls(
         function_calls: list[FunctionCallContent],
         chat_history: ChatHistory,
         kernel: Kernel,
-        arguments: KernelArguments,
+        arguments: KernelArguments | None,
         function_call_count: int,
         request_index: int,
         function_behavior: FunctionChoiceBehavior,

@@ -14,23 +14,25 @@
 
 
 class ChatCompletionClientBase(AIServiceClientBase, ABC):
+    """Base class for chat completion AI services."""
+
     @abstractmethod
     async def get_chat_message_contents(
         self,
         chat_history: "ChatHistory",
         settings: "PromptExecutionSettings",
         **kwargs: Any,
     ) -> list["ChatMessageContent"]:
-        """This is the method that is called from the kernel to get a response from a chat-optimized LLM.
+        """Create chat message contents, in the number specified by the settings.
 
         Args:
             chat_history (ChatHistory): A list of chats in a chat_history object, that can be
                 rendered into messages from system, user, assistant and tools.
             settings (PromptExecutionSettings): Settings for the request.
-            kwargs (Dict[str, Any]): The optional arguments.
+            **kwargs (Any): The optional arguments.
 
         Returns:
-            Union[str, List[str]]: A string or list of strings representing the response(s) from the LLM.
+            A list of chat message contents representing the response(s) from the LLM.
         """
         pass
 
@@ -41,7 +43,7 @@ def get_streaming_chat_message_contents(
         settings: "PromptExecutionSettings",
         **kwargs: Any,
     ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]:
-        """This is the method that is called from the kernel to get a stream response from a chat-optimized LLM.
+        """Create streaming chat message contents, in the number specified by the settings.
 
         Args:
             chat_history (ChatHistory): A list of chat chat_history, that can be rendered into a

@@ -12,6 +12,8 @@
 
 @experimental_class
 class EmbeddingGeneratorBase(AIServiceClientBase, ABC):
+    """Base class for embedding generators."""
+
     @abstractmethod
     async def generate_embeddings(self, texts: list[str], **kwargs: Any) -> "ndarray":
         """Returns embeddings for the given texts as ndarray.

@@ -50,7 +50,7 @@ class ContentFilterAIException(ServiceContentFilterException):
     """AI exception for an error from Azure OpenAI's content filter."""
 
     # The parameter that caused the error.
-    param: str
+    param: str | None
 
     # The error code specific to the content filter.
     content_filter_code: ContentFilterCodes
@@ -72,12 +72,12 @@ def __init__(
         super().__init__(message)
 
         self.param = inner_exception.param
-
-        inner_error = inner_exception.body.get("innererror", {})
-        self.content_filter_code = ContentFilterCodes(
-            inner_error.get("code", ContentFilterCodes.RESPONSIBLE_AI_POLICY_VIOLATION.value)
-        )
-        self.content_filter_result = {
-            key: ContentFilterResult.from_inner_error_result(values)
-            for key, values in inner_error.get("content_filter_result", {}).items()
-        }
+        if inner_exception.body is not None and isinstance(inner_exception.body, dict):
+            inner_error = inner_exception.body.get("innererror", {})
+            self.content_filter_code = ContentFilterCodes(
+                inner_error.get("code", ContentFilterCodes.RESPONSIBLE_AI_POLICY_VIOLATION.value)
+            )
+            self.content_filter_result = {
+                key: ContentFilterResult.from_inner_error_result(values)
+                for key, values in inner_error.get("content_filter_result", {}).items()
+            }
@@ -91,7 +91,7 @@ def validate_function_calling_behaviors(cls, data) -> Any:
 
         if isinstance(data, dict) and "function_call_behavior" in data.get("extension_data", {}):
             data["function_choice_behavior"] = FunctionChoiceBehavior.from_function_call_behavior(
-                data.get("extension_data").get("function_call_behavior")
+                data.get("extension_data", {}).get("function_call_behavior")
             )
         return data
 

@@ -3,7 +3,7 @@
 import logging
 from collections.abc import Mapping
 from copy import deepcopy
-from typing import Any
+from typing import Any, TypeVar
 from uuid import uuid4
 
 from openai import AsyncAzureOpenAI
@@ -29,10 +29,11 @@
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.finish_reason import FinishReason
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
-from semantic_kernel.kernel_pydantic import HttpsUrl
 
 logger: logging.Logger = logging.getLogger(__name__)
 
+TChatMessageContent = TypeVar("TChatMessageContent", ChatMessageContent, StreamingChatMessageContent)
+
 
 class AzureChatCompletion(AzureOpenAIConfigBase, OpenAIChatCompletionBase, OpenAITextCompletionBase):
     """Azure Chat completion class."""
@@ -93,13 +94,6 @@ def __init__(
         if not azure_openai_settings.api_key and not ad_token and not ad_token_provider:
             raise ServiceInitializationError("Please provide either api_key, ad_token or ad_token_provider")
 
-        if not azure_openai_settings.base_url and not azure_openai_settings.endpoint:
-            raise ServiceInitializationError("At least one of base_url or endpoint must be provided.")
-
-        if azure_openai_settings.endpoint and azure_openai_settings.chat_deployment_name:
-            azure_openai_settings.base_url = HttpsUrl(
-                f"{str(azure_openai_settings.endpoint).rstrip('/')}/openai/deployments/{azure_openai_settings.chat_deployment_name}"
-            )
         super().__init__(
             deployment_name=azure_openai_settings.chat_deployment_name,
             endpoint=azure_openai_settings.endpoint,
@@ -111,11 +105,11 @@ def __init__(
             ad_token_provider=ad_token_provider,
             default_headers=default_headers,
             ai_model_type=OpenAIModelTypes.CHAT,
-            async_client=async_client,
+            client=async_client,
         )
 
     @classmethod
-    def from_dict(cls, settings: dict[str, str]) -> "AzureChatCompletion":
+    def from_dict(cls, settings: dict[str, Any]) -> "AzureChatCompletion":
         """Initialize an Azure OpenAI service from a dictionary of settings.
 
         Args:
@@ -136,7 +130,7 @@ def from_dict(cls, settings: dict[str, str]) -> "AzureChatCompletion":
             env_file_path=settings.get("env_file_path"),
         )
 
-    def get_prompt_execution_settings_class(self) -> "PromptExecutionSettings":
+    def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
         """Create a request settings object."""
         return AzureChatPromptExecutionSettings
 
@@ -155,37 +149,41 @@ def _create_streaming_chat_message_content(
     ) -> "StreamingChatMessageContent":
         """Create an Azure streaming chat message content object from a choice."""
         content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata)
+        assert isinstance(content, StreamingChatMessageContent) and isinstance(choice, ChunkChoice)  # nosec
         return self._add_tool_message_to_chat_message_content(content, choice)
 
     def _add_tool_message_to_chat_message_content(
-        self, content: ChatMessageContent | StreamingChatMessageContent, choice: Choice
-    ) -> "ChatMessageContent | StreamingChatMessageContent":
+        self,
+        content: TChatMessageContent,
+        choice: Choice | ChunkChoice,
+    ) -> TChatMessageContent:
         if tool_message := self._get_tool_message_from_chat_choice(choice=choice):
-            try:
-                tool_message_dict = json.loads(tool_message)
-            except json.JSONDecodeError:
-                logger.error("Failed to parse tool message JSON: %s", tool_message)
-                tool_message_dict = {"citations": tool_message}
-
+            if not isinstance(tool_message, dict):
+                # try to json, to ensure it is a dictionary
+                try:
+                    tool_message = json.loads(tool_message)
+                except json.JSONDecodeError:
+                    logger.warning("Tool message is not a dictionary, ignore context.")
+                    return content
             function_call = FunctionCallContent(
                 id=str(uuid4()),
                 name="Azure-OnYourData",
-                arguments=json.dumps({"query": tool_message_dict.get("intent", [])}),
+                arguments=json.dumps({"query": tool_message.get("intent", [])}),
             )
             result = FunctionResultContent.from_function_call_content_and_result(
-                result=tool_message_dict["citations"], function_call_content=function_call
+                result=tool_message["citations"], function_call_content=function_call
             )
             content.items.insert(0, function_call)
             content.items.insert(1, result)
         return content
 
-    def _get_tool_message_from_chat_choice(self, choice: Choice | ChunkChoice) -> str | None:
+    def _get_tool_message_from_chat_choice(self, choice: Choice | ChunkChoice) -> dict[str, Any] | None:
         """Get the tool message from a choice."""
         content = choice.message if isinstance(choice, Choice) else choice.delta
-        if content.model_extra is not None and "context" in content.model_extra:
-            return json.dumps(content.model_extra["context"])
-
-        return None
+        if content.model_extra is not None:
+            return content.model_extra.get("context", None)
+        # openai allows extra content, so model_extra will be a dict, but we need to check anyway, but no way to test.
+        return None  # pragma: no cover
 
     @staticmethod
     def split_message(message: "ChatMessageContent") -> list["ChatMessageContent"]:

@@ -2,6 +2,7 @@
 
 import logging
 from collections.abc import Awaitable, Callable, Mapping
+from copy import copy
 
 from openai import AsyncAzureOpenAI
 from pydantic import ConfigDict, validate_call
@@ -32,7 +33,7 @@ def __init__(
         ad_token: str | None = None,
         ad_token_provider: Callable[[], str | Awaitable[str]] | None = None,
         default_headers: Mapping[str, str] | None = None,
-        async_client: AsyncAzureOpenAI | None = None,
+        client: AsyncAzureOpenAI | None = None,
     ) -> None:
         """Internal class for configuring a connection to an Azure OpenAI service.
 
@@ -42,51 +43,44 @@ def __init__(
         Args:
             deployment_name (str): Name of the deployment.
             ai_model_type (OpenAIModelTypes): The type of OpenAI model to deploy.
-            endpoint (Optional[HttpsUrl]): The specific endpoint URL for the deployment. (Optional)
-            base_url (Optional[HttpsUrl]): The base URL for Azure services. (Optional)
+            endpoint (HttpsUrl): The specific endpoint URL for the deployment. (Optional)
+            base_url (HttpsUrl): The base URL for Azure services. (Optional)
             api_version (str): Azure API version. Defaults to the defined DEFAULT_AZURE_API_VERSION.
-            service_id (Optional[str]): Service ID for the deployment. (Optional)
-            api_key (Optional[str]): API key for Azure services. (Optional)
-            ad_token (Optional[str]): Azure AD token for authentication. (Optional)
-            ad_token_provider (Optional[Callable[[], Union[str, Awaitable[str]]]]): A callable
+            service_id (str): Service ID for the deployment. (Optional)
+            api_key (str): API key for Azure services. (Optional)
+            ad_token (str): Azure AD token for authentication. (Optional)
+            ad_token_provider (Callable[[], Union[str, Awaitable[str]]]): A callable
                 or coroutine function providing Azure AD tokens. (Optional)
             default_headers (Union[Mapping[str, str], None]): Default headers for HTTP requests. (Optional)
-            async_client (Optional[AsyncAzureOpenAI]): An existing client to use. (Optional)
+            client (AsyncAzureOpenAI): An existing client to use. (Optional)
 
         """
         # Merge APP_INFO into the headers if it exists
-        merged_headers = default_headers.copy() if default_headers else {}
+        merged_headers = dict(copy(default_headers)) if default_headers else {}
         if APP_INFO:
             merged_headers.update(APP_INFO)
             merged_headers = prepend_semantic_kernel_to_user_agent(merged_headers)
 
-        if not async_client:
+        if not client:
             if not api_key and not ad_token and not ad_token_provider:
-                raise ServiceInitializationError("Please provide either api_key, ad_token or ad_token_provider")
-            if base_url:
-                async_client = AsyncAzureOpenAI(
-                    base_url=str(base_url),
-                    api_version=api_version,
-                    api_key=api_key,
-                    azure_ad_token=ad_token,
-                    azure_ad_token_provider=ad_token_provider,
-                    default_headers=merged_headers,
+                raise ServiceInitializationError(
+                    "Please provide either api_key, ad_token or ad_token_provider or a client."
                 )
-            else:
+            if not base_url:
                 if not endpoint:
-                    raise ServiceInitializationError("Please provide either base_url or endpoint")
-                async_client = AsyncAzureOpenAI(
-                    azure_endpoint=str(endpoint).rstrip("/"),
-                    azure_deployment=deployment_name,
-                    api_version=api_version,
-                    api_key=api_key,
-                    azure_ad_token=ad_token,
-                    azure_ad_token_provider=ad_token_provider,
-                    default_headers=merged_headers,
-                )
+                    raise ServiceInitializationError("Please provide an endpoint or a base_url")
+                base_url = HttpsUrl(f"{str(endpoint).rstrip('/')}/openai/deployments/{deployment_name}")
+            client = AsyncAzureOpenAI(
+                base_url=str(base_url),
+                api_version=api_version,
+                api_key=api_key,
+                azure_ad_token=ad_token,
+                azure_ad_token_provider=ad_token_provider,
+                default_headers=merged_headers,
+            )
         args = {
             "ai_model_id": deployment_name,
-            "client": async_client,
+            "client": client,
             "ai_model_type": ai_model_type,
         }
         if service_id:
@@ -99,8 +93,8 @@ def to_dict(self) -> dict[str, str]:
             "base_url": str(self.client.base_url),
             "api_version": self.client._custom_query["api-version"],
             "api_key": self.client.api_key,
-            "ad_token": self.client._azure_ad_token,
-            "ad_token_provider": self.client._azure_ad_token_provider,
+            "ad_token": getattr(self.client, "_azure_ad_token", None),
+            "ad_token_provider": getattr(self.client, "_azure_ad_token_provider", None),
             "default_headers": {k: v for k, v in self.client.default_headers.items() if k != USER_AGENT},
         }
         base = self.model_dump(