diff --git a/.github/workflows/python-samples-tests.yml b/.github/workflows/python-samples-tests.yml
deleted file mode 100644
index ed442503c9f7..000000000000
--- a/.github/workflows/python-samples-tests.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# This workflow will run all python samples tests.
-#
-
-name: Python Samples Tests
-
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: "0 1 * * 0" # Run at 1AM UTC daily on Sunday
-
-jobs:
-  python-samples-tests:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      max-parallel: 1
-      fail-fast: true
-      matrix:
-        python-version: ["3.10", "3.11", "3.12"]
-        os: [ubuntu-latest, windows-latest, macos-latest]
-        service: ['AzureOpenAI']
-    steps:
-      - uses: actions/checkout@v4
-      - name: Install poetry
-        run: pipx install poetry
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: "poetry"
-      - name: Run samples Tests
-        id: run_tests
-        shell: bash
-        env: # Set Azure credentials secret as an input
-          GLOBAL_LLM_SERVICE: ${{ matrix.service }}
-          AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME }}
-          AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
-          AZURE_OPENAI_TEXT_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_TEXT_DEPLOYMENT_NAME }}
-          AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }}
-          AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          BING_API_KEY: ${{ secrets.BING_API_KEY }}
-          OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI_CHAT_MODEL_ID }}
-          OPENAI_TEXT_MODEL_ID: ${{ vars.OPENAI_TEXT_MODEL_ID }}
-          OPENAI_EMBEDDING_MODEL_ID: ${{ vars.OPENAI_EMBEDDING_MODEL_ID }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          PINECONE_API_KEY: ${{ secrets.PINECONE__APIKEY }}
-          POSTGRES_CONNECTION_STRING: ${{secrets.POSTGRES__CONNECTIONSTR}}
-          AZURE_AI_SEARCH_API_KEY: ${{secrets.AZURE_AI_SEARCH_API_KEY}}
-          AZURE_AI_SEARCH_ENDPOINT: ${{secrets.AZURE_AI_SEARCH_ENDPOINT}}
-          MONGODB_ATLAS_CONNECTION_STRING: ${{secrets.MONGODB_ATLAS_CONNECTION_STRING}}
-        run: |
-          cd python
-          poetry run pytest ./tests/samples -v
-
diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md
index 72028080bd2a..7832d6717a9b 100644
--- a/python/samples/concepts/README.md
+++ b/python/samples/concepts/README.md
@@ -9,6 +9,7 @@ This section contains code snippets that demonstrate the usage of Semantic Kerne
 | Filtering | Creating and using Filters |
 | Functions | Invoking [`Method`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/functions/kernel_function_from_method.py) or [`Prompt`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/functions/kernel_function_from_prompt.py) functions with [`Kernel`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/kernel.py) |
 | Grounding | An example of how to perform LLM grounding |
+| Local Models | Using the [`OpenAI connector`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py) to talk to models hosted locally in Ollama and LM Studio |
 | Logging | Showing how to set up logging |
 | Memory | Using [`Memory`](https://github.com/microsoft/semantic-kernel/tree/main/dotnet/src/SemanticKernel.Abstractions/Memory) AI concepts |
 | On Your Data | Examples of using AzureOpenAI [`On Your Data`](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/use-your-data?tabs=mongo-db) |
diff --git a/python/samples/concepts/local_models/lm_studio_chat_completion.py b/python/samples/concepts/local_models/lm_studio_chat_completion.py
new file mode 100644
index 000000000000..d1c480720c89
--- /dev/null
+++ b/python/samples/concepts/local_models/lm_studio_chat_completion.py
@@ -0,0 +1,83 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+import asyncio
+
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
+
+# This concept sample shows how to use the OpenAI connector to create a
+# chat experience with a local model running in LM studio: https://lmstudio.ai/
+# Please follow the instructions here: https://lmstudio.ai/docs/local-server to set up LM studio.
+# The default model used in this sample is phi3 due to its compact size.
+
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+kernel = Kernel()
+
+service_id = "local-gpt"
+
+openAIClient: AsyncOpenAI = AsyncOpenAI(
+    api_key="fake-key",  # This cannot be an empty string, use a fake key
+    base_url="http://localhost:1234/v1",
+)
+kernel.add_service(OpenAIChatCompletion(service_id=service_id, ai_model_id="phi3", async_client=openAIClient))
+
+settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
+settings.max_tokens = 2000
+settings.temperature = 0.7
+settings.top_p = 0.8
+
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    prompt_execution_settings=settings,
+)
+
+chat_history = ChatHistory(system_message=system_message)
+chat_history.add_user_message("Hi there, who are you?")
+chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input, chat_history=chat_history))
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+    print(f"Mosscap:> {answer}")
+    return True
+
+
+async def main() -> None:
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/local_models/lm_studio_text_embedding.py b/python/samples/concepts/local_models/lm_studio_text_embedding.py
new file mode 100644
index 000000000000..807c0aff349c
--- /dev/null
+++ b/python/samples/concepts/local_models/lm_studio_text_embedding.py
@@ -0,0 +1,62 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding
+from semantic_kernel.core_plugins.text_memory_plugin import TextMemoryPlugin
+from semantic_kernel.kernel import Kernel
+from semantic_kernel.memory.semantic_text_memory import SemanticTextMemory
+from semantic_kernel.memory.volatile_memory_store import VolatileMemoryStore
+
+# This concept sample shows how to use the OpenAI connector to add memory
+# to applications with a local embedding model running in LM studio: https://lmstudio.ai/
+# Please follow the instructions here: https://lmstudio.ai/docs/local-server to set up LM studio.
+# The default model used in this sample is from nomic.ai due to its compact size.
+
+kernel = Kernel()
+
+service_id = "local-gpt"
+
+openAIClient: AsyncOpenAI = AsyncOpenAI(
+    api_key="fake_key",  # This cannot be an empty string, use a fake key
+    base_url="http://localhost:1234/v1",
+)
+kernel.add_service(
+    OpenAITextEmbedding(
+        service_id=service_id, ai_model_id="Nomic-embed-text-v1.5-Embedding-GGUF", async_client=openAIClient
+    )
+)
+
+memory = SemanticTextMemory(storage=VolatileMemoryStore(), embeddings_generator=kernel.get_service(service_id))
+kernel.add_plugin(TextMemoryPlugin(memory), "TextMemoryPlugin")
+
+
+async def populate_memory(memory: SemanticTextMemory, collection_id="generic") -> None:
+    # Add some documents to the semantic memory
+    await memory.save_information(collection=collection_id, id="info1", text="Your budget for 2024 is $100,000")
+    await memory.save_information(collection=collection_id, id="info2", text="Your savings from 2023 are $50,000")
+    await memory.save_information(collection=collection_id, id="info3", text="Your investments are $80,000")
+
+
+async def search_memory_examples(memory: SemanticTextMemory, collection_id="generic") -> None:
+    questions = [
+        "What is my budget for 2024?",
+        "What are my savings from 2023?",
+        "What are my investments?",
+    ]
+
+    for question in questions:
+        print(f"Question: {question}")
+        result = await memory.search(collection_id, question)
+        print(f"Answer: {result[0].text}\n")
+
+
+async def main() -> None:
+    await populate_memory(memory)
+    await search_memory_examples(memory)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/local_models/ollama_chat_completion.py b/python/samples/concepts/local_models/ollama_chat_completion.py
new file mode 100644
index 000000000000..32413d91a530
--- /dev/null
+++ b/python/samples/concepts/local_models/ollama_chat_completion.py
@@ -0,0 +1,87 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+import asyncio
+
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
+
+# This concept sample shows how to use the OpenAI connector with
+# a local model running in Ollama: https://github.com/ollama/ollama
+# A docker image is also available: https://hub.docker.com/r/ollama/ollama
+# The default model used in this sample is phi3 due to its compact size.
+# At the time of creating this sample, Ollama only provides experimental
+# compatibility with the `chat/completions` endpoint:
+# https://github.com/ollama/ollama/blob/main/docs/openai.md
+# Please follow the instructions in the Ollama repository to set up Ollama.
+
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+kernel = Kernel()
+
+service_id = "local-gpt"
+
+openAIClient: AsyncOpenAI = AsyncOpenAI(
+    api_key="fake-key",  # This cannot be an empty string, use a fake key
+    base_url="http://localhost:11434/v1",
+)
+kernel.add_service(OpenAIChatCompletion(service_id=service_id, ai_model_id="phi3", async_client=openAIClient))
+
+settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
+settings.max_tokens = 2000
+settings.temperature = 0.7
+settings.top_p = 0.8
+
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    prompt_execution_settings=settings,
+)
+
+chat_history = ChatHistory(system_message=system_message)
+chat_history.add_user_message("Hi there, who are you?")
+chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input, chat_history=chat_history))
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+    print(f"Mosscap:> {answer}")
+    return True
+
+
+async def main() -> None:
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
index e0d92e17e2e7..221fc44d2191 100644
--- a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
+++ b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
@@ -209,7 +209,7 @@ async def handle_streaming(
     print("Security Agent:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_call_behavior.auto_invoke_kernel_functions and isinstance(
+        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
             message[0], StreamingChatMessageContent
         ):
             streamed_chunks.append(message[0])
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
index d808bdd5a8af..5d6b4425c065 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion.py
@@ -57,8 +57,13 @@ def __init__(
             )
         except ValidationError as ex:
             raise ServiceInitializationError("Failed to create OpenAI settings.", ex) from ex
-        if not openai_settings.chat_model_id:
-            raise ServiceInitializationError("The OpenAI chat model ID is required.")
+
+        if not async_client:
+            if not openai_settings.api_key:
+                raise ServiceInitializationError("The OpenAI API key is required.")
+            if not openai_settings.chat_model_id:
+                raise ServiceInitializationError("The OpenAI chat model ID is required.")
+
         super().__init__(
             ai_model_id=openai_settings.chat_model_id,
             api_key=openai_settings.api_key.get_secret_value() if openai_settings.api_key else None,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py b/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py
index f005536343ed..f6266cab0f73 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py
@@ -15,11 +15,9 @@ class OpenAISettings(KernelBaseSettings):
     encoding 'utf-8'. If the settings are not found in the .env file, the settings are ignored;
     however, validation will fail alerting that the settings are missing.
 
-    Required settings for prefix 'OPENAI_' are:
+    Optional settings for prefix 'OPENAI_' are:
     - api_key: SecretStr - OpenAI API key, see https://platform.openai.com/account/api-keys
         (Env var OPENAI_API_KEY)
-
-    Optional settings for prefix 'OPENAI_' are:
     - org_id: str | None - This is usually optional unless your account belongs to multiple organizations.
         (Env var OPENAI_ORG_ID)
     - chat_model_id: str | None - The OpenAI chat model ID to use, for example, gpt-3.5-turbo or gpt-4.
@@ -33,7 +31,7 @@ class OpenAISettings(KernelBaseSettings):
 
     env_prefix: ClassVar[str] = "OPENAI_"
 
-    api_key: SecretStr
+    api_key: SecretStr | None = None
     org_id: str | None = None
     chat_model_id: str | None = None
     text_model_id: str | None = None
diff --git a/python/tests/samples/samples_utils.py b/python/tests/samples/samples_utils.py
index d04b39d3656b..de2b8257e7b7 100644
--- a/python/tests/samples/samples_utils.py
+++ b/python/tests/samples/samples_utils.py
@@ -7,11 +7,19 @@
 logger = logging.getLogger()
 
 
-async def retry(func, max_retries=3):
-    """Retry a function a number of times before raising an exception."""
+async def retry(func, reset=None, max_retries=3):
+    """Retry a function a number of times before raising an exception.
+
+    args:
+        func: the async function to retry (required)
+        reset: a function to reset the state of any variables used in the function (optional)
+        max_retries: the number of times to retry the function before raising an exception (optional)
+    """
     attempt = 0
     while attempt < max_retries:
         try:
+            if reset:
+                reset()
             await func()
             break
         except Exception as e:
diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py
index fabc3934d9cd..166366319bca 100644
--- a/python/tests/samples/test_concepts.py
+++ b/python/tests/samples/test_concepts.py
@@ -1,5 +1,8 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import copy
+
+import pytest
 from pytest import mark, param
 
 from samples.concepts.auto_function_calling.azure_python_code_interpreter_function_calling import (
@@ -23,6 +26,9 @@
 from samples.concepts.filtering.prompt_filters import main as prompt_filters
 from samples.concepts.functions.kernel_arguments import main as kernel_arguments
 from samples.concepts.grounding.grounded import main as grounded
+from samples.concepts.local_models.lm_studio_chat_completion import main as lm_studio_chat_completion
+from samples.concepts.local_models.lm_studio_text_embedding import main as lm_studio_text_embedding
+from samples.concepts.local_models.ollama_chat_completion import main as ollama_chat_completion
 from samples.concepts.memory.azure_cognitive_search_memory import main as azure_cognitive_search_memory
 from samples.concepts.memory.memory import main as memory
 from samples.concepts.planners.azure_openai_function_calling_stepwise_planner import (
@@ -89,11 +95,35 @@
     param(custom_service_selector, [], id="custom_service_selector"),
     param(function_defined_in_json_prompt, ["What is 3+3?", "exit"], id="function_defined_in_json_prompt"),
     param(function_defined_in_yaml_prompt, ["What is 3+3?", "exit"], id="function_defined_in_yaml_prompt"),
+    param(
+        ollama_chat_completion,
+        ["Why is the sky blue?", "exit"],
+        id="ollama_chat_completion",
+        marks=pytest.mark.skip(reason="Need to set up Ollama locally. Check out the module for more details."),
+    ),
+    param(
+        lm_studio_chat_completion,
+        ["Why is the sky blue?", "exit"],
+        id="lm_studio_chat_completion",
+        marks=pytest.mark.skip(reason="Need to set up LM Studio locally. Check out the module for more details."),
+    ),
+    param(
+        lm_studio_text_embedding,
+        [],
+        id="lm_studio_text_embedding",
+        marks=pytest.mark.skip(reason="Need to set up LM Studio locally. Check out the module for more details."),
+    ),
 ]
 
 
 @mark.asyncio
 @mark.parametrize("func, responses", concepts)
 async def test_concepts(func, responses, monkeypatch):
+    saved_responses = copy.deepcopy(responses)
+
+    def reset():
+        responses.clear()
+        responses.extend(saved_responses)
+
     monkeypatch.setattr("builtins.input", lambda _: responses.pop(0))
-    await retry(lambda: func())
+    await retry(lambda: func(), reset=reset)
diff --git a/python/tests/samples/test_learn_resources.py b/python/tests/samples/test_learn_resources.py
index 58e1f4c3371b..428515d30f35 100644
--- a/python/tests/samples/test_learn_resources.py
+++ b/python/tests/samples/test_learn_resources.py
@@ -1,5 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import copy
+
 from pytest import mark
 
 from samples.learn_resources.ai_services import main as ai_services
@@ -44,8 +46,15 @@
     ],
 )
 async def test_learn_resources(func, responses, monkeypatch):
+    saved_responses = copy.deepcopy(responses)
+
+    def reset():
+        responses.clear()
+        responses.extend(saved_responses)
+
     monkeypatch.setattr("builtins.input", lambda _: responses.pop(0))
     if func.__module__ == "samples.learn_resources.your_first_prompt":
-        await retry(lambda: func(delay=10))
+        await retry(lambda: func(delay=10), reset=reset)
         return
-    await retry(lambda: func())
+
+    await retry(lambda: func(), reset=reset)