microsoft · TaoChenOSU · Jul 5, 2024 · Jun 25, 2024 · Jun 26, 2024 · Jun 26, 2024
diff --git a/.github/workflows/python-samples-tests.yml b/.github/workflows/python-samples-tests.yml
@@ -9,6 +9,7 @@ This section contains code snippets that demonstrate the usage of Semantic Kerne
 | Filtering | Creating and using Filters |
 | Functions | Invoking [`Method`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/functions/kernel_function_from_method.py) or [`Prompt`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/functions/kernel_function_from_prompt.py) functions with [`Kernel`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/kernel.py) |
 | Grounding | An example of how to perform LLM grounding |
+| Local Models | Using the [`OpenAI connector`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py) to talk to models hosted locally in Ollama and LM Studio |
 | Logging | Showing how to set up logging |
 | Memory | Using [`Memory`](https://github.com/microsoft/semantic-kernel/tree/main/dotnet/src/SemanticKernel.Abstractions/Memory) AI concepts |
 | On Your Data | Examples of using AzureOpenAI [`On Your Data`](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/use-your-data?tabs=mongo-db) |

@@ -0,0 +1,83 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+import asyncio
+
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
+
+# This concept sample shows how to use the OpenAI connector to create a
+# chat experience with a local model running in LM studio: https://lmstudio.ai/
+# Please follow the instructions here: https://lmstudio.ai/docs/local-server to set up LM studio.
+# The default model used in this sample is phi3 due to its compact size.
+
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+kernel = Kernel()
+
+service_id = "local-gpt"
+
+openAIClient: AsyncOpenAI = AsyncOpenAI(
+    api_key="fake-key",  # This cannot be an empty string, use a fake key
+    base_url="http://localhost:1234/v1",
+)
+kernel.add_service(OpenAIChatCompletion(service_id=service_id, ai_model_id="phi3", async_client=openAIClient))
+
+settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
+settings.max_tokens = 2000
+settings.temperature = 0.7
+settings.top_p = 0.8
+
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    prompt_execution_settings=settings,
+)
+
+chat_history = ChatHistory(system_message=system_message)
+chat_history.add_user_message("Hi there, who are you?")
+chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input, chat_history=chat_history))
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+    print(f"Mosscap:> {answer}")
+    return True
+
+
+async def main() -> None:
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,62 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding
+from semantic_kernel.core_plugins.text_memory_plugin import TextMemoryPlugin
+from semantic_kernel.kernel import Kernel
+from semantic_kernel.memory.semantic_text_memory import SemanticTextMemory
+from semantic_kernel.memory.volatile_memory_store import VolatileMemoryStore
+
+# This concept sample shows how to use the OpenAI connector to add memory
+# to applications with a local embedding model running in LM studio: https://lmstudio.ai/
+# Please follow the instructions here: https://lmstudio.ai/docs/local-server to set up LM studio.
+# The default model used in this sample is from nomic.ai due to its compact size.
+
+kernel = Kernel()
+
+service_id = "local-gpt"
+
+openAIClient: AsyncOpenAI = AsyncOpenAI(
+    api_key="fake_key",  # This cannot be an empty string, use a fake key
+    base_url="http://localhost:1234/v1",
+)
+kernel.add_service(
+    OpenAITextEmbedding(
+        service_id=service_id, ai_model_id="Nomic-embed-text-v1.5-Embedding-GGUF", async_client=openAIClient
+    )
+)
+
+memory = SemanticTextMemory(storage=VolatileMemoryStore(), embeddings_generator=kernel.get_service(service_id))
+kernel.add_plugin(TextMemoryPlugin(memory), "TextMemoryPlugin")
+
+
+async def populate_memory(memory: SemanticTextMemory, collection_id="generic") -> None:
+    # Add some documents to the semantic memory
+    await memory.save_information(collection=collection_id, id="info1", text="Your budget for 2024 is $100,000")
+    await memory.save_information(collection=collection_id, id="info2", text="Your savings from 2023 are $50,000")
+    await memory.save_information(collection=collection_id, id="info3", text="Your investments are $80,000")
+
+
+async def search_memory_examples(memory: SemanticTextMemory, collection_id="generic") -> None:
+    questions = [
+        "What is my budget for 2024?",
+        "What are my savings from 2023?",
+        "What are my investments?",
+    ]
+
+    for question in questions:
+        print(f"Question: {question}")
+        result = await memory.search(collection_id, question)
+        print(f"Answer: {result[0].text}\n")
+
+
+async def main() -> None:
+    await populate_memory(memory)
+    await search_memory_examples(memory)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,87 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+import asyncio
+
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
+
+# This concept sample shows how to use the OpenAI connector with
+# a local model running in Ollama: https://github.com/ollama/ollama
+# A docker image is also available: https://hub.docker.com/r/ollama/ollama
+# The default model used in this sample is phi3 due to its compact size.
+# At the time of creating this sample, Ollama only provides experimental
+# compatibility with the `chat/completions` endpoint:
+# https://github.com/ollama/ollama/blob/main/docs/openai.md
+# Please follow the instructions in the Ollama repository to set up Ollama.
+
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+kernel = Kernel()
+
+service_id = "local-gpt"
+
+openAIClient: AsyncOpenAI = AsyncOpenAI(
+    api_key="fake-key",  # This cannot be an empty string, use a fake key
+    base_url="http://localhost:11434/v1",
+)
+kernel.add_service(OpenAIChatCompletion(service_id=service_id, ai_model_id="phi3", async_client=openAIClient))
+
+settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
+settings.max_tokens = 2000
+settings.temperature = 0.7
+settings.top_p = 0.8
+
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    prompt_execution_settings=settings,
+)
+
+chat_history = ChatHistory(system_message=system_message)
+chat_history.add_user_message("Hi there, who are you?")
+chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input, chat_history=chat_history))
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+    print(f"Mosscap:> {answer}")
+    return True
+
+
+async def main() -> None:
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -209,7 +209,7 @@ async def handle_streaming(
     print("Security Agent:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_call_behavior.auto_invoke_kernel_functions and isinstance(
+        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
             message[0], StreamingChatMessageContent
         ):
             streamed_chunks.append(message[0])

@@ -57,8 +57,13 @@ def __init__(
             )
         except ValidationError as ex:
             raise ServiceInitializationError("Failed to create OpenAI settings.", ex) from ex
-        if not openai_settings.chat_model_id:
-            raise ServiceInitializationError("The OpenAI chat model ID is required.")
+
+        if not async_client:
+            if not openai_settings.api_key:
+                raise ServiceInitializationError("The OpenAI API key is required.")
+            if not openai_settings.chat_model_id:
+                raise ServiceInitializationError("The OpenAI chat model ID is required.")
+
         super().__init__(
             ai_model_id=openai_settings.chat_model_id,
             api_key=openai_settings.api_key.get_secret_value() if openai_settings.api_key else None,

@@ -15,11 +15,9 @@ class OpenAISettings(KernelBaseSettings):
     encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored;
     however, validation will fail alerting that the settings are missing.
 
-    Required settings for prefix 'OPENAI_' are:
+    Optional settings for prefix 'OPENAI_' are:
     - api_key: SecretStr - OpenAI API key, see https://platform.openai.com/account/api-keys
         (Env var OPENAI_API_KEY)
-
-    Optional settings for prefix 'OPENAI_' are:
     - org_id: str | None - This is usually optional unless your account belongs to multiple organizations.
         (Env var OPENAI_ORG_ID)
     - chat_model_id: str | None - The OpenAI chat model ID to use, for example, gpt-3.5-turbo or gpt-4.
@@ -33,7 +31,7 @@ class OpenAISettings(KernelBaseSettings):
 
     env_prefix: ClassVar[str] = "OPENAI_"
 
-    api_key: SecretStr
+    api_key: SecretStr | None = None
     org_id: str | None = None
     chat_model_id: str | None = None
     text_model_id: str | None = None

@@ -7,11 +7,19 @@
 logger = logging.getLogger()
 
 
-async def retry(func, max_retries=3):
-    """Retry a function a number of times before raising an exception."""
+async def retry(func, reset=None, max_retries=3):
+    """Retry a function a number of times before raising an exception.
+
+    args:
+        func: the async function to retry (required)
+        reset: a function to reset the state of any variables used in the function (optional)
+        max_retries: the number of times to retry the function before raising an exception (optional)
+    """
     attempt = 0
     while attempt < max_retries:
         try:
+            if reset:
+                reset()
             await func()
             break
         except Exception as e: