From 70a1f66848b814a01b0a5411f9b193e3e437b892 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Fri, 28 Jun 2024 08:53:34 -0700
Subject: [PATCH 01/10] Add function calling t non-streaming, next streaming
 and sample

---
 .../azure_ai_inference_chat_completion.py     |  50 +-----
 .../azure_ai_inference_conversion_utils.py    | 158 ++++++++++++++++++
 .../completions/test_chat_completions.py      |  52 ++++++
 3 files changed, 215 insertions(+), 45 deletions(-)
 create mode 100644 python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 5d39d3953e65..58879f0eec58 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -10,13 +10,8 @@
     AsyncStreamingChatCompletions,
     ChatChoice,
     ChatCompletions,
-    ChatRequestMessage,
-    ImageContentItem,
-    ImageDetailLevel,
-    ImageUrl,
     StreamingChatChoiceUpdate,
     SystemMessage,
-    TextContentItem,
     ToolMessage,
     UserMessage,
 )
@@ -28,11 +23,13 @@
     AzureAIInferenceSettings,
 )
 from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import AzureAIInferenceBase
+from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_conversion_utils import (
+    format_chat_history,
+)
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
-from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.streaming_text_content import StreamingTextContent
 from semantic_kernel.contents.text_content import TextContent
@@ -123,7 +120,7 @@ async def get_chat_message_contents(
             A list of chat message contents.
         """
         response: ChatCompletions = await self.client.complete(
-            messages=self._format_chat_history(chat_history),
+            messages=format_chat_history(chat_history),
             model_extras=settings.extra_parameters,
             **settings.prepare_settings_dict(),
         )
@@ -149,7 +146,7 @@ async def get_streaming_chat_message_contents(
         """
         response: AsyncStreamingChatCompletions = await self.client.complete(
             stream=True,
-            messages=self._format_chat_history(chat_history),
+            messages=format_chat_history(chat_history),
             model_extras=settings.extra_parameters,
             **settings.prepare_settings_dict(),
         )
@@ -264,43 +261,6 @@ def _create_streaming_chat_message_content(
             metadata=metadata,
         )
 
-    def _format_chat_history(self, chat_history: ChatHistory) -> list[ChatRequestMessage]:
-        """Format the chat history to the expected objects for the client.
-
-        Args:
-            chat_history: The chat history.
-
-        Returns:
-            A list of formatted chat history.
-        """
-        chat_request_messages: list[ChatRequestMessage] = []
-
-        for message in chat_history.messages:
-            if message.role != AuthorRole.USER or not any(isinstance(item, ImageContent) for item in message.items):
-                chat_request_messages.append(_MESSAGE_CONVERTER[message.role](content=message.content))
-                continue
-
-            # If it's a user message and there are any image items in the message, we need to create a list of
-            # content items, otherwise we need to just pass in the content as a string or it will error.
-            contentItems = []
-            for item in message.items:
-                if isinstance(item, TextContent):
-                    contentItems.append(TextContentItem(text=item.text))
-                elif isinstance(item, ImageContent) and (item.data_uri or item.uri):
-                    contentItems.append(
-                        ImageContentItem(
-                            image_url=ImageUrl(url=item.data_uri or str(item.uri), detail=ImageDetailLevel.Auto)
-                        )
-                    )
-                else:
-                    logger.warning(
-                        "Unsupported item type in User message while formatting chat history for Azure AI"
-                        f" Inference: {type(item)}"
-                    )
-            chat_request_messages.append(_MESSAGE_CONVERTER[message.role](content=contentItems))
-
-        return chat_request_messages
-
     def get_prompt_execution_settings_class(
         self,
     ) -> AzureAIInferenceChatPromptExecutionSettings:
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
new file mode 100644
index 000000000000..286b28b22cc8
--- /dev/null
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
@@ -0,0 +1,158 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+from collections.abc import Callable
+
+from azure.ai.inference.models import (
+    AssistantMessage,
+    ChatCompletionsFunctionToolCall,
+    ChatRequestMessage,
+    FunctionCall,
+    ImageContentItem,
+    ImageDetailLevel,
+    ImageUrl,
+    SystemMessage,
+    TextContentItem,
+    ToolMessage,
+    UserMessage,
+)
+
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.function_call_content import FunctionCallContent
+from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents.image_content import ImageContent
+from semantic_kernel.contents.text_content import TextContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
+
+
+def _format_system_message(message: ChatMessageContent) -> SystemMessage:
+    """Format a system message to the expected object for the client.
+
+    Args:
+        message: The system message.
+
+    Returns:
+        The formatted system message.
+    """
+    return SystemMessage(content=message.content)
+
+
+def _format_user_message(message: ChatMessageContent) -> UserMessage:
+    """Format a user message to the expected object for the client.
+
+    If there are any image items in the message, we need to create a list of content items,
+    otherwise we need to just pass in the content as a string or it will error.
+
+    Args:
+        message: The user message.
+
+    Returns:
+        The formatted user message.
+    """
+    if not any(isinstance(item, (ImageContent)) for item in message.items):
+        return UserMessage(content=message.content)
+
+    contentItems = []
+    for item in message.items:
+        if isinstance(item, TextContent):
+            contentItems.append(TextContentItem(text=item.text))
+        elif isinstance(item, ImageContent) and (item.data_uri or item.uri):
+            contentItems.append(
+                ImageContentItem(image_url=ImageUrl(url=item.data_uri or str(item.uri), detail=ImageDetailLevel.Auto))
+            )
+        else:
+            logger.warning(
+                "Unsupported item type in User message while formatting chat history for Azure AI"
+                f" Inference: {type(item)}"
+            )
+
+    return UserMessage(content=contentItems)
+
+
+def _format_assistant_message(message: ChatMessageContent) -> AssistantMessage:
+    """Format an assistant message to the expected object for the client.
+
+    Args:
+        message: The assistant message.
+
+    Returns:
+        The formatted assistant message.
+    """
+    contentItems = []
+    toolCalls = []
+
+    for item in message.items:
+        if isinstance(item, TextContent):
+            contentItems.append(TextContentItem(text=item.text))
+        elif isinstance(item, FunctionCallContent):
+            toolCalls.append(
+                ChatCompletionsFunctionToolCall(
+                    id=item.id, function=FunctionCall(name=item.name, arguments=item.arguments)
+                )
+            )
+        else:
+            logger.warning(
+                "Unsupported item type in Assistant message while formatting chat history for Azure AI"
+                f" Inference: {type(item)}"
+            )
+
+    # tollCalls cannot be an empty list, so we need to set it to None if it is empty
+    return AssistantMessage(content=contentItems, tool_calls=toolCalls if toolCalls else None)
+
+
+def _format_tool_message(message: ChatMessageContent) -> ToolMessage:
+    """Format a tool message to the expected object for the client.
+
+    Args:
+        message: The tool message.
+
+    Returns:
+        The formatted tool message.
+    """
+    if len(message.items) != 1:
+        logger.warning(
+            "Unsupported number of items in Tool message while formatting chat history for Azure AI"
+            f" Inference: {len(message.items)}"
+        )
+
+    if not isinstance(message.items[0], FunctionResultContent):
+        logger.warning(
+            "Unsupported item type in Tool message while formatting chat history for Azure AI"
+            f" Inference: {type(message.items[0])}"
+        )
+
+    return ToolMessage(content=message.items[0].result, tool_call_id=message.items[0].id)
+
+
+_MESSAGE_CONVERTER: dict[AuthorRole, Callable[[ChatMessageContent], ChatRequestMessage]] = {
+    AuthorRole.SYSTEM: _format_system_message,
+    AuthorRole.USER: _format_user_message,
+    AuthorRole.ASSISTANT: _format_assistant_message,
+    AuthorRole.TOOL: _format_tool_message,
+}
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def format_chat_history(chat_history: ChatHistory) -> list[ChatRequestMessage]:
+    """Format the chat history to the expected objects for the client.
+
+    Args:
+        chat_history: The chat history.
+
+    Returns:
+        A list of formatted chat history.
+    """
+    chat_request_messages: list[ChatRequestMessage] = []
+
+    for message in chat_history.messages:
+        if message.role not in _MESSAGE_CONVERTER:
+            logger.warning(
+                "Unsupported author role in chat history while formatting for Azure AI Inference: {message.role}"
+            )
+            continue
+
+        chat_request_messages.append(_MESSAGE_CONVERTER[message.role](message))
+
+    return chat_request_messages
diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py
index c70e548910bf..8bf1f56dab1c 100644
--- a/python/tests/integration/completions/test_chat_completions.py
+++ b/python/tests/integration/completions/test_chat_completions.py
@@ -383,6 +383,58 @@ def services() -> dict[str, tuple[ChatCompletionClientBase, type[PromptExecution
             ["house", "germany"],
             id="azure_ai_inference_image_input_file",
         ),
+        pytest.param(
+            "azure_ai_inference",
+            {
+                "function_choice_behavior": FunctionChoiceBehavior.Auto(
+                    auto_invoke=True, filters={"excluded_plugins": ["chat"]}
+                )
+            },
+            [
+                ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="What is 3+345?")]),
+            ],
+            ["348"],
+            id="azure_ai_inference_tool_call_auto",
+        ),
+        pytest.param(
+            "azure_ai_inference",
+            {
+                "function_choice_behavior": FunctionChoiceBehavior.Auto(
+                    auto_invoke=False, filters={"excluded_plugins": ["chat"]}
+                )
+            },
+            [
+                ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="What is 3+345?")]),
+            ],
+            ["348"],
+            id="azure_ai_inference_tool_call_non_auto",
+        ),
+        pytest.param(
+            "azure_ai_inference",
+            {},
+            [
+                [
+                    ChatMessageContent(
+                        role=AuthorRole.USER,
+                        items=[TextContent(text="What was our 2024 revenue?")],
+                    ),
+                    ChatMessageContent(
+                        role=AuthorRole.ASSISTANT,
+                        items=[
+                            FunctionCallContent(
+                                id="fin", name="finance-search", arguments='{"company": "contoso", "year": 2024}'
+                            )
+                        ],
+                    ),
+                    ChatMessageContent(
+                        role=AuthorRole.TOOL,
+                        items=[FunctionResultContent(id="fin", name="finance-search", result="1.2B")],
+                    ),
+                ],
+            ],
+            ["1.2"],
+            id="azure_ai_inference_tool_call_flow",
+        ),
     ],
 )
 

From 3df66836857e10ea3cca5ed0629d4bfc474c3f2b Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Mon, 1 Jul 2024 13:39:40 -0700
Subject: [PATCH 02/10] streaming

---
 .../azure_ai_inference_chat_completion.py     | 260 ++++++++++++++----
 .../services/open_ai_chat_completion_base.py  |  13 +-
 2 files changed, 204 insertions(+), 69 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 58879f0eec58..72ce92582f04 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -1,19 +1,17 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import asyncio
 import logging
 from collections.abc import AsyncGenerator
+from functools import reduce
 from typing import Any
 
 from azure.ai.inference.aio import ChatCompletionsClient
 from azure.ai.inference.models import (
-    AssistantMessage,
     AsyncStreamingChatCompletions,
     ChatChoice,
     ChatCompletions,
     StreamingChatChoiceUpdate,
-    SystemMessage,
-    ToolMessage,
-    UserMessage,
 )
 from azure.core.credentials import AzureKeyCredential
 from pydantic import ValidationError
@@ -27,6 +25,7 @@
     format_chat_history,
 )
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -35,16 +34,14 @@
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.contents.utils.finish_reason import FinishReason
-from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
+from semantic_kernel.exceptions.service_exceptions import (
+    ServiceInitializationError,
+    ServiceInvalidExecutionSettingsError,
+)
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
-_MESSAGE_CONVERTER: dict[AuthorRole, Any] = {
-    AuthorRole.SYSTEM: SystemMessage,
-    AuthorRole.USER: UserMessage,
-    AuthorRole.ASSISTANT: AssistantMessage,
-    AuthorRole.TOOL: ToolMessage,
-}
-
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -103,6 +100,7 @@ def __init__(
             client=client,
         )
 
+    # region Non-streaming
     async def get_chat_message_contents(
         self,
         chat_history: ChatHistory,
@@ -119,6 +117,46 @@ async def get_chat_message_contents(
         Returns:
             A list of chat message contents.
         """
+        if (
+            settings.function_choice_behavior is None
+            or not settings.function_choice_behavior.auto_invoke_kernel_functions
+        ):
+            return await self._send_chat_request(chat_history, settings)
+
+        self._verify_function_choice_behavior(settings, **kwargs)
+        kernel: Kernel = kwargs.get("kernel")
+        arguments: KernelArguments = kwargs.get("arguments")
+
+        for request_index in range(settings.function_choice_behavior.maximum_auto_invoke_attempts):
+            completions = await self._send_chat_request(chat_history, settings)
+            # TODO(<TaoChen>): make sure there is only one message.
+            # Currently only OpenAI models allow multiple messages but the Azure AI Inference service
+            # requires a special handling for multiple messages.
+            chat_history.add_message(message=completions[0])
+            function_calls = [item for item in chat_history.messages[-1].items if isinstance(item, FunctionCallContent)]
+            if (fc_count := len(function_calls)) == 0:
+                return completions
+
+            results = await self._process_function_calls(
+                function_calls=function_calls,
+                chat_history=chat_history,
+                kernel=kernel,
+                arguments=arguments,
+                function_call_count=fc_count,
+                request_index=request_index,
+                function_behavior=settings.function_choice_behavior,
+            )
+
+            if any(result.terminate for result in results if result is not None):
+                return completions
+        else:
+            # do a final call without auto function calling
+            return await self._send_chat_request(chat_history, settings)
+
+    async def _send_chat_request(
+        self, chat_history: ChatHistory, settings: AzureAIInferenceChatPromptExecutionSettings
+    ) -> list[ChatMessageContent]:
+        """Send a chat request to the Azure AI Inference service."""
         response: ChatCompletions = await self.client.complete(
             messages=format_chat_history(chat_history),
             model_extras=settings.extra_parameters,
@@ -128,53 +166,6 @@ async def get_chat_message_contents(
 
         return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices]
 
-    async def get_streaming_chat_message_contents(
-        self,
-        chat_history: ChatHistory,
-        settings: AzureAIInferenceChatPromptExecutionSettings,
-        **kwargs: Any,
-    ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]:
-        """Get streaming chat message contents from the Azure AI Inference service.
-
-        Args:
-            chat_history: A list of chats in a chat_history object.
-            settings: Settings for the request.
-            kwargs: Optional arguments.
-
-        Returns:
-            A list of chat message contents.
-        """
-        response: AsyncStreamingChatCompletions = await self.client.complete(
-            stream=True,
-            messages=format_chat_history(chat_history),
-            model_extras=settings.extra_parameters,
-            **settings.prepare_settings_dict(),
-        )
-
-        async for chunk in response:
-            if len(chunk.choices) == 0:
-                continue
-            chunk_metadata = self._get_metadata_from_response(chunk)
-            yield [
-                self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices
-            ]
-
-    def _get_metadata_from_response(self, response: ChatCompletions | AsyncStreamingChatCompletions) -> dict[str, Any]:
-        """Get metadata from the response.
-
-        Args:
-            response: The response from the service.
-
-        Returns:
-            A dictionary containing metadata.
-        """
-        return {
-            "id": response.id,
-            "model": response.model,
-            "created": response.created,
-            "usage": response.usage,
-        }
-
     def _create_chat_message_content(
         self, response: ChatCompletions, choice: ChatChoice, metadata: dict[str, Any]
     ) -> ChatMessageContent:
@@ -215,6 +206,101 @@ def _create_chat_message_content(
             metadata=metadata,
         )
 
+    # endregion
+
+    # region Streaming
+    async def get_streaming_chat_message_contents(
+        self,
+        chat_history: ChatHistory,
+        settings: AzureAIInferenceChatPromptExecutionSettings,
+        **kwargs: Any,
+    ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]:
+        """Get streaming chat message contents from the Azure AI Inference service.
+
+        Args:
+            chat_history: A list of chats in a chat_history object.
+            settings: Settings for the request.
+            kwargs: Optional arguments.
+
+        Returns:
+            A list of chat message contents.
+        """
+        if (
+            settings.function_choice_behavior is None
+            or not settings.function_choice_behavior.auto_invoke_kernel_functions
+        ):
+            # No auto invoke is required.
+            async_generator = self._send_chat_streaming_request(chat_history, settings)
+        else:
+            # Auto invoke is required.
+            async_generator = self._get_streaming_chat_message_contents_auto_invoke(chat_history, settings, **kwargs)
+
+        async for messages in async_generator:
+            yield messages
+
+    async def _get_streaming_chat_message_contents_auto_invoke(
+        self,
+        chat_history: ChatHistory,
+        settings: AzureAIInferenceChatPromptExecutionSettings,
+        **kwargs: Any,
+    ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]:
+        """Get streaming chat message contents from the Azure AI Inference service with auto invoking functions."""
+        self._verify_function_choice_behavior(settings, **kwargs)
+        kernel: Kernel = kwargs.get("kernel")
+        arguments: KernelArguments = kwargs.get("arguments")
+        request_attempts = settings.function_choice_behavior.maximum_auto_invoke_attempts
+
+        for request_index in range(request_attempts):
+            all_messages: list[StreamingChatMessageContent] = []
+            function_call_returned = False
+            async for messages in self._send_chat_streaming_request(chat_history, settings):
+                for message in messages:
+                    if message:
+                        all_messages.append(message)
+                        if any(isinstance(item, FunctionCallContent) for item in message.items):
+                            function_call_returned = True
+                yield messages
+
+            if not function_call_returned:
+                # Response doesn't contain any function calls. No need to proceed to the next request.
+                return
+
+            full_completion: StreamingChatMessageContent = reduce(lambda x, y: x + y, all_messages)
+            function_calls = [item for item in full_completion.items if isinstance(item, FunctionCallContent)]
+            chat_history.add_message(message=full_completion)
+
+            results = await self._process_function_calls(
+                function_calls=function_calls,
+                chat_history=chat_history,
+                kernel=kernel,
+                arguments=arguments,
+                function_call_count=len(function_calls),
+                request_index=request_index,
+                function_behavior=settings.function_choice_behavior,
+            )
+
+            if any(result.terminate for result in results if result is not None):
+                return
+
+    async def _send_chat_streaming_request(
+        self, chat_history: ChatHistory, settings: AzureAIInferenceChatPromptExecutionSettings
+    ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]:
+        """Send a streaming chat request to the Azure AI Inference service."""
+        response: AsyncStreamingChatCompletions = await self.client.complete(
+            stream=True,
+            messages=format_chat_history(chat_history),
+            model_extras=settings.extra_parameters,
+            **settings.prepare_settings_dict(),
+        )
+
+        async for chunk in response:
+            if len(chunk.choices) == 0:
+                continue
+            chunk_metadata = self._get_metadata_from_response(chunk)
+            yield [
+                self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices
+            ]
+
     def _create_streaming_chat_message_content(
         self,
         chunk: AsyncStreamingChatCompletions,
@@ -261,6 +347,62 @@ def _create_streaming_chat_message_content(
             metadata=metadata,
         )
 
+    # endregion
+
+    def _get_metadata_from_response(self, response: ChatCompletions | AsyncStreamingChatCompletions) -> dict[str, Any]:
+        """Get metadata from the response.
+
+        Args:
+            response: The response from the service.
+
+        Returns:
+            A dictionary containing metadata.
+        """
+        return {
+            "id": response.id,
+            "model": response.model,
+            "created": response.created,
+            "usage": response.usage,
+        }
+
+    def _verify_function_choice_behavior(self, settings: AzureAIInferenceChatPromptExecutionSettings, **kwargs: Any):
+        """Verify the function choice behavior."""
+        kernel = kwargs.get("kernel", None)
+        arguments = kwargs.get("arguments", None)
+
+        if settings.function_choice_behavior is not None:
+            if kernel is None:
+                raise ServiceInvalidExecutionSettingsError("Kernel is required for tool calls.")
+            if arguments is None and settings.function_choice_behavior.auto_invoke_kernel_functions:
+                raise ServiceInvalidExecutionSettingsError("Kernel arguments are required for auto tool calls.")
+
+    async def _process_function_calls(
+        self,
+        function_calls: list[FunctionCallContent],
+        chat_history: ChatHistory,
+        kernel: Kernel,
+        arguments: KernelArguments,
+        function_call_count: int,
+        request_index: int,
+        function_behavior: FunctionChoiceBehavior,
+    ):
+        """Process function calls."""
+        logger.info(f"processing {function_call_count} tool calls in parallel.")
+
+        return await asyncio.gather(
+            *[
+                await kernel.invoke_function_call(
+                    function_call=function_call,
+                    chat_history=chat_history,
+                    arguments=arguments,
+                    function_call_count=function_call_count,
+                    request_index=request_index,
+                    function_behavior=function_behavior,
+                )
+                for function_call in function_calls
+            ],
+        )
+
     def get_prompt_execution_settings_class(
         self,
     ) -> AzureAIInferenceChatPromptExecutionSettings:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 5047b1c0901b..4bdb95b8d62b 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -14,12 +14,8 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_calling_utils import (
-    update_settings_from_function_call_configuration,
-)
-from semantic_kernel.connectors.ai.function_choice_behavior import (
-    FunctionChoiceBehavior,
-)
+from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
@@ -33,10 +29,7 @@
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.contents.utils.finish_reason import FinishReason
-from semantic_kernel.exceptions import (
-    ServiceInvalidExecutionSettingsError,
-    ServiceInvalidResponseError,
-)
+from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError, ServiceInvalidResponseError
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
 )

From 20a7a7a5db1e6877a3b3e74c308653203a0d58bc Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Mon, 1 Jul 2024 15:17:21 -0700
Subject: [PATCH 03/10] Configure function call behavior

---
 ..._ai_inference_prompt_execution_settings.py |  5 +-
 .../azure_ai_inference_chat_completion.py     | 50 +++++++++++++++----
 .../azure_ai_inference_conversion_utils.py    |  3 +-
 .../connectors/ai/function_calling_utils.py   | 30 +----------
 .../services/open_ai_chat_completion_base.py  | 23 +++++++--
 5 files changed, 68 insertions(+), 43 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
index f64646dcf0c7..804ddfd80267 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from typing import Literal
+from typing import Any, Literal
 
 from pydantic import Field
 
@@ -30,6 +30,9 @@ class AzureAIInferencePromptExecutionSettings(PromptExecutionSettings):
 class AzureAIInferenceChatPromptExecutionSettings(AzureAIInferencePromptExecutionSettings):
     """Azure AI Inference Chat Prompt Execution Settings."""
 
+    tools: list[dict[str, Any]] | None = Field(None, max_length=64)
+    tool_choice: str | None = None
+
 
 @experimental_class
 class AzureAIInferenceEmbeddingPromptExecutionSettings(PromptExecutionSettings):
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 72ce92582f04..0d25990e2b94 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -11,6 +11,7 @@
     AsyncStreamingChatCompletions,
     ChatChoice,
     ChatCompletions,
+    ChatCompletionsFunctionToolCall,
     StreamingChatChoiceUpdate,
 )
 from azure.core.credentials import AzureKeyCredential
@@ -25,7 +26,11 @@
     format_chat_history,
 )
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
+from semantic_kernel.connectors.ai.function_choice_behavior import (
+    FunctionCallChoiceConfiguration,
+    FunctionChoiceBehavior,
+)
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -126,6 +131,7 @@ async def get_chat_message_contents(
         self._verify_function_choice_behavior(settings, **kwargs)
         kernel: Kernel = kwargs.get("kernel")
         arguments: KernelArguments = kwargs.get("arguments")
+        self._configure_function_choice_behavior(settings, kernel)
 
         for request_index in range(settings.function_choice_behavior.maximum_auto_invoke_attempts):
             completions = await self._send_chat_request(chat_history, settings)
@@ -248,6 +254,7 @@ async def _get_streaming_chat_message_contents_auto_invoke(
         self._verify_function_choice_behavior(settings, **kwargs)
         kernel: Kernel = kwargs.get("kernel")
         arguments: KernelArguments = kwargs.get("arguments")
+        self._configure_function_choice_behavior(settings, kernel)
         request_attempts = settings.function_choice_behavior.maximum_auto_invoke_attempts
 
         for request_index in range(request_attempts):
@@ -329,14 +336,15 @@ def _create_streaming_chat_message_content(
             )
         if choice.delta.tool_calls:
             for tool_call in choice.delta.tool_calls:
-                items.append(
-                    FunctionCallContent(
-                        id=tool_call.id,
-                        index=choice.index,
-                        name=tool_call.function.name,
-                        arguments=tool_call.function.arguments,
+                if isinstance(tool_call, ChatCompletionsFunctionToolCall):
+                    items.append(
+                        FunctionCallContent(
+                            id=tool_call.id,
+                            index=choice.index,
+                            name=tool_call.function.name,
+                            arguments=tool_call.function.arguments,
+                        )
                     )
-                )
 
         return StreamingChatMessageContent(
             role=AuthorRole(choice.delta.role) if choice.delta.role else AuthorRole.ASSISTANT,
@@ -376,6 +384,30 @@ def _verify_function_choice_behavior(self, settings: AzureAIInferenceChatPromptE
             if arguments is None and settings.function_choice_behavior.auto_invoke_kernel_functions:
                 raise ServiceInvalidExecutionSettingsError("Kernel arguments are required for auto tool calls.")
 
+    def _configure_function_choice_behavior(
+        self, settings: AzureAIInferenceChatPromptExecutionSettings, kernel: Kernel
+    ):
+        """Configure the function choice behavior to include the kernel functions."""
+
+        def _config_call_back(
+            function_choice_configuration: FunctionCallChoiceConfiguration,
+            settings: AzureAIInferenceChatPromptExecutionSettings,
+            type: str,
+        ):
+            """Update the settings from a FunctionChoiceConfiguration."""
+            if function_choice_configuration.available_functions:
+                settings.tool_choice = type
+                # The list of tool objects will be initialized with the JSON string returned by
+                # `kernel_function_metadata_to_function_call_format`.
+                settings.tools = [
+                    kernel_function_metadata_to_function_call_format(f)
+                    for f in function_choice_configuration.available_functions
+                ]
+
+        settings.function_choice_behavior.configure(
+            kernel=kernel, update_settings_callback=_config_call_back, settings=settings
+        )
+
     async def _process_function_calls(
         self,
         function_calls: list[FunctionCallContent],
@@ -391,7 +423,7 @@ async def _process_function_calls(
 
         return await asyncio.gather(
             *[
-                await kernel.invoke_function_call(
+                kernel.invoke_function_call(
                     function_call=function_call,
                     chat_history=chat_history,
                     arguments=arguments,
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
index 286b28b22cc8..8a222ae160aa 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
@@ -122,7 +122,8 @@ def _format_tool_message(message: ChatMessageContent) -> ToolMessage:
             f" Inference: {type(message.items[0])}"
         )
 
-    return ToolMessage(content=message.items[0].result, tool_call_id=message.items[0].id)
+    # The API expects the result to be a string, so we need to convert it to a string
+    return ToolMessage(content=str(message.items[0].result), tool_call_id=message.items[0].id)
 
 
 _MESSAGE_CONVERTER: dict[AuthorRole, Callable[[ChatMessageContent], ChatRequestMessage]] = {
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index 70704093141f..9e46a01b6278 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -1,37 +1,9 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-import logging
-from typing import TYPE_CHECKING, Any
+from typing import Any
 
-from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
-    OpenAIChatPromptExecutionSettings,
-)
 from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
-if TYPE_CHECKING:
-    from semantic_kernel.connectors.ai.function_choice_behavior import (
-        FunctionCallChoiceConfiguration,
-    )
-    from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
-        OpenAIChatPromptExecutionSettings,
-    )
-
-logger = logging.getLogger(__name__)
-
-
-def update_settings_from_function_call_configuration(
-    function_choice_configuration: "FunctionCallChoiceConfiguration",
-    settings: "OpenAIChatPromptExecutionSettings",
-    type: str,
-) -> None:
-    """Update the settings from a FunctionChoiceConfiguration."""
-    if function_choice_configuration.available_functions:
-        settings.tool_choice = type
-        settings.tools = [
-            kernel_function_metadata_to_function_call_format(f)
-            for f in function_choice_configuration.available_functions
-        ]
-
 
 def kernel_function_metadata_to_function_call_format(
     metadata: KernelFunctionMetadata,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 4bdb95b8d62b..0ba5427cc41a 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -14,8 +14,11 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
+from semantic_kernel.connectors.ai.function_choice_behavior import (
+    FunctionCallChoiceConfiguration,
+    FunctionChoiceBehavior,
+)
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
@@ -411,11 +414,25 @@ def _update_settings(
         kernel: "Kernel | None" = None,
     ) -> None:
         """Update the settings with the chat history."""
+
+        def _config_call_back(
+            function_choice_configuration: FunctionCallChoiceConfiguration,
+            settings: OpenAIChatPromptExecutionSettings,
+            type: str,
+        ):
+            """Update the settings from a FunctionChoiceConfiguration."""
+            if function_choice_configuration.available_functions:
+                settings.tool_choice = type
+                settings.tools = [
+                    kernel_function_metadata_to_function_call_format(f)
+                    for f in function_choice_configuration.available_functions
+                ]
+
         settings.messages = self._prepare_chat_history_for_request(chat_history)
         if settings.function_choice_behavior and kernel:
             settings.function_choice_behavior.configure(
                 kernel=kernel,
-                update_settings_callback=update_settings_from_function_call_configuration,
+                update_settings_callback=_config_call_back,
                 settings=settings,
             )
 

From 11af91ee36ef469b412c142918ebe0d91c094394 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Mon, 1 Jul 2024 15:35:30 -0700
Subject: [PATCH 04/10] Comments

---
 .../services/azure_ai_inference_chat_completion.py     | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 0d25990e2b94..3f5af1f072d3 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -135,9 +135,6 @@ async def get_chat_message_contents(
 
         for request_index in range(settings.function_choice_behavior.maximum_auto_invoke_attempts):
             completions = await self._send_chat_request(chat_history, settings)
-            # TODO(<TaoChen>): make sure there is only one message.
-            # Currently only OpenAI models allow multiple messages but the Azure AI Inference service
-            # requires a special handling for multiple messages.
             chat_history.add_message(message=completions[0])
             function_calls = [item for item in chat_history.messages[-1].items if isinstance(item, FunctionCallContent)]
             if (fc_count := len(function_calls)) == 0:
@@ -383,6 +380,13 @@ def _verify_function_choice_behavior(self, settings: AzureAIInferenceChatPromptE
                 raise ServiceInvalidExecutionSettingsError("Kernel is required for tool calls.")
             if arguments is None and settings.function_choice_behavior.auto_invoke_kernel_functions:
                 raise ServiceInvalidExecutionSettingsError("Kernel arguments are required for auto tool calls.")
+            if settings.extra_parameters is not None and settings.extra_parameters.get("n", 1) > 1:
+                # Currently only OpenAI models allow multiple completions but the Azure AI Inference service
+                # does not expose the functionality directly. If users want to have more than 1 responses, they
+                # need to configure `extra_parameters` with a key of "n" and a value greater than 1.
+                raise ServiceInvalidExecutionSettingsError(
+                    "Auto invocation of tool calls may only be used with a single completion."
+                )
 
     def _configure_function_choice_behavior(
         self, settings: AzureAIInferenceChatPromptExecutionSettings, kernel: Kernel

From e8dfbb1145a3cb385975410588a3aa7468ec874c Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Mon, 1 Jul 2024 15:42:41 -0700
Subject: [PATCH 05/10] Fix unit test

---
 .../open_ai/services/test_open_ai_chat_completion_base.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
index 38ac7313a121..a1eef6d81831 100644
--- a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
+++ b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
@@ -243,7 +243,7 @@ async def test_process_tool_calls_with_continuation_on_malformed_arguments():
         ai_model_id="test_model_id", service_id="test", client=MagicMock(spec=AsyncOpenAI)
     )
 
-    with patch("semantic_kernel.connectors.ai.function_calling_utils.logger", autospec=True):
+    with patch("semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.logger", autospec=True):
         await chat_completion_base._process_function_call(
             tool_call_mock,
             chat_history_mock,

From a97fca81f4e936344b957670cf0553566bdf4e38 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Tue, 2 Jul 2024 14:15:26 -0700
Subject: [PATCH 06/10] Address comments

---
 .../services/azure_ai_inference_chat_completion.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 3f5af1f072d3..65e33f40bf6f 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -128,9 +128,9 @@ async def get_chat_message_contents(
         ):
             return await self._send_chat_request(chat_history, settings)
 
-        self._verify_function_choice_behavior(settings, **kwargs)
         kernel: Kernel = kwargs.get("kernel")
         arguments: KernelArguments = kwargs.get("arguments")
+        self._verify_function_choice_behavior(settings, kernel, arguments)
         self._configure_function_choice_behavior(settings, kernel)
 
         for request_index in range(settings.function_choice_behavior.maximum_auto_invoke_attempts):
@@ -248,9 +248,9 @@ async def _get_streaming_chat_message_contents_auto_invoke(
         **kwargs: Any,
     ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]:
         """Get streaming chat message contents from the Azure AI Inference service with auto invoking functions."""
-        self._verify_function_choice_behavior(settings, **kwargs)
         kernel: Kernel = kwargs.get("kernel")
         arguments: KernelArguments = kwargs.get("arguments")
+        self._verify_function_choice_behavior(settings, kernel, arguments)
         self._configure_function_choice_behavior(settings, kernel)
         request_attempts = settings.function_choice_behavior.maximum_auto_invoke_attempts
 
@@ -370,11 +370,13 @@ def _get_metadata_from_response(self, response: ChatCompletions | AsyncStreaming
             "usage": response.usage,
         }
 
-    def _verify_function_choice_behavior(self, settings: AzureAIInferenceChatPromptExecutionSettings, **kwargs: Any):
+    def _verify_function_choice_behavior(
+        self,
+        settings: AzureAIInferenceChatPromptExecutionSettings,
+        kernel: Kernel,
+        arguments: KernelArguments,
+    ):
         """Verify the function choice behavior."""
-        kernel = kwargs.get("kernel", None)
-        arguments = kwargs.get("arguments", None)
-
         if settings.function_choice_behavior is not None:
             if kernel is None:
                 raise ServiceInvalidExecutionSettingsError("Kernel is required for tool calls.")

From 44ec18e2aa4e9129db176ebd14124c0c91dc5564 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Fri, 5 Jul 2024 10:58:15 -0700
Subject: [PATCH 07/10] Override _prepare_chat_history_for_request

---
 .../azure_ai_inference_chat_completion.py     | 33 +++++++++++++++++--
 .../azure_ai_inference_conversion_utils.py    | 30 ++---------------
 .../ai/chat_completion_client_base.py         |  6 ++--
 3 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 65e33f40bf6f..616c024faf86 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -2,16 +2,23 @@
 
 import asyncio
 import logging
+import sys
 from collections.abc import AsyncGenerator
 from functools import reduce
 from typing import Any
 
+if sys.version >= "3.12":
+    from typing import override  # pragma: no cover
+else:
+    from typing_extensions import override  # pragma: no cover
+
 from azure.ai.inference.aio import ChatCompletionsClient
 from azure.ai.inference.models import (
     AsyncStreamingChatCompletions,
     ChatChoice,
     ChatCompletions,
     ChatCompletionsFunctionToolCall,
+    ChatRequestMessage,
     StreamingChatChoiceUpdate,
 )
 from azure.core.credentials import AzureKeyCredential
@@ -23,7 +30,7 @@
 )
 from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import AzureAIInferenceBase
 from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_conversion_utils import (
-    format_chat_history,
+    MESSAGE_CONVERTERS,
 )
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
@@ -161,7 +168,7 @@ async def _send_chat_request(
     ) -> list[ChatMessageContent]:
         """Send a chat request to the Azure AI Inference service."""
         response: ChatCompletions = await self.client.complete(
-            messages=format_chat_history(chat_history),
+            messages=self._prepare_chat_history_for_request(chat_history),
             model_extras=settings.extra_parameters,
             **settings.prepare_settings_dict(),
         )
@@ -292,7 +299,7 @@ async def _send_chat_streaming_request(
         """Send a streaming chat request to the Azure AI Inference service."""
         response: AsyncStreamingChatCompletions = await self.client.complete(
             stream=True,
-            messages=format_chat_history(chat_history),
+            messages=self._prepare_chat_history_for_request(chat_history),
             model_extras=settings.extra_parameters,
             **settings.prepare_settings_dict(),
         )
@@ -354,6 +361,26 @@ def _create_streaming_chat_message_content(
 
     # endregion
 
+    @override
+    def _prepare_chat_history_for_request(
+        self,
+        chat_history: ChatHistory,
+        role_key: str = "role",
+        content_key: str = "content",
+    ) -> list[ChatRequestMessage]:
+        chat_request_messages: list[ChatRequestMessage] = []
+
+        for message in chat_history.messages:
+            if message.role not in MESSAGE_CONVERTERS:
+                logger.warning(
+                    "Unsupported author role in chat history while formatting for Azure AI Inference: {message.role}"
+                )
+                continue
+
+            chat_request_messages.append(MESSAGE_CONVERTERS[message.role](message))
+
+        return chat_request_messages
+
     def _get_metadata_from_response(self, response: ChatCompletions | AsyncStreamingChatCompletions) -> dict[str, Any]:
         """Get metadata from the response.
 
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
index 8a222ae160aa..33b1b04d631b 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
@@ -17,7 +17,6 @@
     UserMessage,
 )
 
-from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
@@ -25,6 +24,8 @@
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 
 def _format_system_message(message: ChatMessageContent) -> SystemMessage:
     """Format a system message to the expected object for the client.
@@ -126,34 +127,9 @@ def _format_tool_message(message: ChatMessageContent) -> ToolMessage:
     return ToolMessage(content=str(message.items[0].result), tool_call_id=message.items[0].id)
 
 
-_MESSAGE_CONVERTER: dict[AuthorRole, Callable[[ChatMessageContent], ChatRequestMessage]] = {
+MESSAGE_CONVERTERS: dict[AuthorRole, Callable[[ChatMessageContent], ChatRequestMessage]] = {
     AuthorRole.SYSTEM: _format_system_message,
     AuthorRole.USER: _format_user_message,
     AuthorRole.ASSISTANT: _format_assistant_message,
     AuthorRole.TOOL: _format_tool_message,
 }
-
-logger: logging.Logger = logging.getLogger(__name__)
-
-
-def format_chat_history(chat_history: ChatHistory) -> list[ChatRequestMessage]:
-    """Format the chat history to the expected objects for the client.
-
-    Args:
-        chat_history: The chat history.
-
-    Returns:
-        A list of formatted chat history.
-    """
-    chat_request_messages: list[ChatRequestMessage] = []
-
-    for message in chat_history.messages:
-        if message.role not in _MESSAGE_CONVERTER:
-            logger.warning(
-                "Unsupported author role in chat history while formatting for Azure AI Inference: {message.role}"
-            )
-            continue
-
-        chat_request_messages.append(_MESSAGE_CONVERTER[message.role](message))
-
-    return chat_request_messages
diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
index ab92d29fd65f..21332e7359b7 100644
--- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -59,7 +59,7 @@ def _prepare_chat_history_for_request(
         chat_history: "ChatHistory",
         role_key: str = "role",
         content_key: str = "content",
-    ) -> list[dict[str, str | None]]:
+    ) -> Any:
         """Prepare the chat history for a request.
 
         Allowing customization of the key names for role/author, and optionally overriding the role.
@@ -68,12 +68,14 @@ def _prepare_chat_history_for_request(
             They require a "tool_call_id" and (function) "name" key, and the "metadata" key should
             be removed. The "encoding" key should also be removed.
 
+        Override this method to customize the formatting of the chat history for a request.
+
         Args:
             chat_history (ChatHistory): The chat history to prepare.
             role_key (str): The key name for the role/author.
             content_key (str): The key name for the content/message.
 
         Returns:
-            List[Dict[str, Optional[str]]]: The prepared chat history.
+            prepared_chat_history (Any): The prepared chat history for a request.
         """
         return [message.to_dict(role_key=role_key, content_key=content_key) for message in chat_history.messages]

From 6cb10fb68202d85df4863b7bd77767c30d949d79 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Fri, 5 Jul 2024 11:43:45 -0700
Subject: [PATCH 08/10] update_settings_from_function_call_configuration

---
 .../azure_ai_inference_chat_completion.py     | 26 +++----------------
 .../connectors/ai/function_calling_utils.py   | 20 ++++++++++++++
 .../services/open_ai_chat_completion_base.py  | 23 +++-------------
 3 files changed, 27 insertions(+), 42 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 616c024faf86..a7b5416bf9d3 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -7,6 +7,8 @@
 from functools import reduce
 from typing import Any
 
+from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
+
 if sys.version >= "3.12":
     from typing import override  # pragma: no cover
 else:
@@ -33,11 +35,7 @@
     MESSAGE_CONVERTERS,
 )
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
-from semantic_kernel.connectors.ai.function_choice_behavior import (
-    FunctionCallChoiceConfiguration,
-    FunctionChoiceBehavior,
-)
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -421,24 +419,8 @@ def _configure_function_choice_behavior(
         self, settings: AzureAIInferenceChatPromptExecutionSettings, kernel: Kernel
     ):
         """Configure the function choice behavior to include the kernel functions."""
-
-        def _config_call_back(
-            function_choice_configuration: FunctionCallChoiceConfiguration,
-            settings: AzureAIInferenceChatPromptExecutionSettings,
-            type: str,
-        ):
-            """Update the settings from a FunctionChoiceConfiguration."""
-            if function_choice_configuration.available_functions:
-                settings.tool_choice = type
-                # The list of tool objects will be initialized with the JSON string returned by
-                # `kernel_function_metadata_to_function_call_format`.
-                settings.tools = [
-                    kernel_function_metadata_to_function_call_format(f)
-                    for f in function_choice_configuration.available_functions
-                ]
-
         settings.function_choice_behavior.configure(
-            kernel=kernel, update_settings_callback=_config_call_back, settings=settings
+            kernel=kernel, update_settings_callback=update_settings_from_function_call_configuration, settings=settings
         )
 
     async def _process_function_calls(
diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py
index 9e46a01b6278..e9ebb64d6f35 100644
--- a/python/semantic_kernel/connectors/ai/function_calling_utils.py
+++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py
@@ -2,9 +2,29 @@
 
 from typing import Any
 
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionCallChoiceConfiguration
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
 
 
+def update_settings_from_function_call_configuration(
+    function_choice_configuration: FunctionCallChoiceConfiguration,
+    settings: PromptExecutionSettings,
+    type: str,
+) -> None:
+    """Update the settings from a FunctionChoiceConfiguration."""
+    if (
+        function_choice_configuration.available_functions
+        and hasattr(settings, "tool_choice")
+        and hasattr(settings, "tools")
+    ):
+        settings.tool_choice = type
+        settings.tools = [
+            kernel_function_metadata_to_function_call_format(f)
+            for f in function_choice_configuration.available_functions
+        ]
+
+
 def kernel_function_metadata_to_function_call_format(
     metadata: KernelFunctionMetadata,
 ) -> dict[str, Any]:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 0ba5427cc41a..4bdb95b8d62b 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -14,11 +14,8 @@
 
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
-from semantic_kernel.connectors.ai.function_calling_utils import kernel_function_metadata_to_function_call_format
-from semantic_kernel.connectors.ai.function_choice_behavior import (
-    FunctionCallChoiceConfiguration,
-    FunctionChoiceBehavior,
-)
+from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIChatPromptExecutionSettings,
 )
@@ -414,25 +411,11 @@ def _update_settings(
         kernel: "Kernel | None" = None,
     ) -> None:
         """Update the settings with the chat history."""
-
-        def _config_call_back(
-            function_choice_configuration: FunctionCallChoiceConfiguration,
-            settings: OpenAIChatPromptExecutionSettings,
-            type: str,
-        ):
-            """Update the settings from a FunctionChoiceConfiguration."""
-            if function_choice_configuration.available_functions:
-                settings.tool_choice = type
-                settings.tools = [
-                    kernel_function_metadata_to_function_call_format(f)
-                    for f in function_choice_configuration.available_functions
-                ]
-
         settings.messages = self._prepare_chat_history_for_request(chat_history)
         if settings.function_choice_behavior and kernel:
             settings.function_choice_behavior.configure(
                 kernel=kernel,
-                update_settings_callback=_config_call_back,
+                update_settings_callback=update_settings_from_function_call_configuration,
                 settings=settings,
             )
 

From 6438c84fb6c9581d4879e10ba7a648603a7c14d1 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Mon, 8 Jul 2024 12:49:43 -0700
Subject: [PATCH 09/10] Address comments

---
 .../services/azure_ai_inference_chat_completion.py         | 7 ++-----
 .../{azure_ai_inference_conversion_utils.py => utils.py}   | 0
 2 files changed, 2 insertions(+), 5 deletions(-)
 rename python/semantic_kernel/connectors/ai/azure_ai_inference/services/{azure_ai_inference_conversion_utils.py => utils.py} (100%)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index a7b5416bf9d3..220989d62015 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -7,8 +7,6 @@
 from functools import reduce
 from typing import Any
 
-from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
-
 if sys.version >= "3.12":
     from typing import override  # pragma: no cover
 else:
@@ -31,10 +29,9 @@
     AzureAIInferenceSettings,
 )
 from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import AzureAIInferenceBase
-from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_conversion_utils import (
-    MESSAGE_CONVERTERS,
-)
+from semantic_kernel.connectors.ai.azure_ai_inference.services.utils import MESSAGE_CONVERTERS
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/utils.py
similarity index 100%
rename from python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_conversion_utils.py
rename to python/semantic_kernel/connectors/ai/azure_ai_inference/services/utils.py

From b51c21dbd2b9b56f4d49037058e3c419506e6315 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Tue, 9 Jul 2024 08:30:16 -0700
Subject: [PATCH 10/10] _process_function_calls -> _invoke_function_calls

---
 .../services/azure_ai_inference_chat_completion.py        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
index 220989d62015..4ebf2bbc7d19 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -142,7 +142,7 @@ async def get_chat_message_contents(
             if (fc_count := len(function_calls)) == 0:
                 return completions
 
-            results = await self._process_function_calls(
+            results = await self._invoke_function_calls(
                 function_calls=function_calls,
                 chat_history=chat_history,
                 kernel=kernel,
@@ -275,7 +275,7 @@ async def _get_streaming_chat_message_contents_auto_invoke(
             function_calls = [item for item in full_completion.items if isinstance(item, FunctionCallContent)]
             chat_history.add_message(message=full_completion)
 
-            results = await self._process_function_calls(
+            results = await self._invoke_function_calls(
                 function_calls=function_calls,
                 chat_history=chat_history,
                 kernel=kernel,
@@ -420,7 +420,7 @@ def _configure_function_choice_behavior(
             kernel=kernel, update_settings_callback=update_settings_from_function_call_configuration, settings=settings
         )
 
-    async def _process_function_calls(
+    async def _invoke_function_calls(
         self,
         function_calls: list[FunctionCallContent],
         chat_history: ChatHistory,
@@ -430,7 +430,7 @@ async def _process_function_calls(
         request_index: int,
         function_behavior: FunctionChoiceBehavior,
     ):
-        """Process function calls."""
+        """Invoke function calls."""
         logger.info(f"processing {function_call_count} tool calls in parallel.")
 
         return await asyncio.gather(