From d02cb6f20f88e165a836431e635687699fce2624 Mon Sep 17 00:00:00 2001
From: tfwang <tfwang@alauda.io>
Date: Thu, 26 Feb 2026 15:27:56 +0800
Subject: [PATCH 1/4] add llama-stack introduciton and simple usage

---
 docs/en/llama_stack/index.mdx                 |   6 +
 docs/en/llama_stack/install.mdx               |  76 ++++
 docs/en/llama_stack/overview/features.mdx     |  29 ++
 docs/en/llama_stack/overview/index.mdx        |   7 +
 docs/en/llama_stack/overview/intro.mdx        |  28 ++
 docs/en/llama_stack/quickstart.mdx            |  72 +++
 .../llama-stack/llama-stack_quickstart.ipynb  | 427 ++++++++++++++++++
 .../llama-stack_quickstart_mcp.ipynb          | 173 +++++++
 8 files changed, 818 insertions(+)
 create mode 100644 docs/en/llama_stack/index.mdx
 create mode 100644 docs/en/llama_stack/install.mdx
 create mode 100644 docs/en/llama_stack/overview/features.mdx
 create mode 100644 docs/en/llama_stack/overview/index.mdx
 create mode 100644 docs/en/llama_stack/overview/intro.mdx
 create mode 100644 docs/en/llama_stack/quickstart.mdx
 create mode 100644 docs/public/llama-stack/llama-stack_quickstart.ipynb
 create mode 100644 docs/public/llama-stack/llama-stack_quickstart_mcp.ipynb
diff --git a/docs/en/llama_stack/index.mdx b/docs/en/llama_stack/index.mdx
new file mode 100644
index 0000000..d641bf4
--- /dev/null
+++ b/docs/en/llama_stack/index.mdx
@@ -0,0 +1,6 @@
+---
+weight: 82
+---
+# Llama Stack
+
+<Overview />
diff --git a/docs/en/llama_stack/install.mdx b/docs/en/llama_stack/install.mdx
new file mode 100644
index 0000000..1ba8109
--- /dev/null
+++ b/docs/en/llama_stack/install.mdx
@@ -0,0 +1,76 @@
+---
+weight: 20
+---
+
+# Install Llama Stack
+
+This document describes how to install and deploy Llama Stack Server on Kubernetes using the Llama Stack Operator.
+
+## Upload Operator
+
+Download the Llama Stack Operator installation file (e.g., `llama-stack-operator.alpha.ALL.v0.7.0.tgz`).
+
+Use the violet command to publish to the platform repository:
+
+```bash
+violet push --platform-address=platform-access-address --platform-username=platform-admin --platform-password=platform-admin-password llama-stack-operator.alpha.ALL.v0.7.0.tgz
+```
+
+## Install Operator
+
+1. Go to the `Administrator` view in the Alauda Container Platform.
+
+2. In the left navigation, select `Marketplace` / `Operator Hub`.
+
+3. In the right panel, find `Alauda build of Llama Stack` and click `Install`.
+
+4. Keep all parameters as default and complete the installation.
+
+## Deploy Llama Stack Server
+
+After the operator is installed, deploy Llama Stack Server by creating a `LlamaStackDistribution` custom resource:
+
+> **Note:** Prepare the following in advance; otherwise the distribution may not become ready:
+> - **Secret**: Create a Secret (e.g., `deepseek-api`) in the same namespace with the LLM API token. Example: `kubectl create secret generic deepseek-api -n default --from-literal=token=<LLM_API_KEY>`.
+> - **Storage Class**: Ensure the `default` Storage Class exists in the cluster; otherwise the PVC cannot be bound and the resource will not become ready.
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  annotations:
+    cpaas.io/display-name: ""
+  name: demo
+  namespace: default
+spec:
+  network:
+    exposeRoute: false                             # Whether to expose the route externally
+  replicas: 1                                      # Number of server replicas
+  server:
+    containerSpec:
+      env:
+        - name: VLLM_URL
+          value: "https://api.deepseek.com/v1"     # URL of the LLM API provider
+        - name: VLLM_MAX_TOKENS
+          value: "8192"                            # Maximum output tokens
+        - name: VLLM_API_TOKEN                     # Load LLM API token from secret
+          valueFrom:
+            secretKeyRef:                          # Create this Secret in the same namespace beforehand, e.g. kubectl create secret generic deepseek-api -n default --from-literal=token=<LLM_API_KEY>
+              key: token
+              name: deepseek-api
+      name: llama-stack
+      port: 8321
+    distribution:
+      name: starter                                # Distribution name (options: starter, postgres-demo, meta-reference-gpu)
+    storage:
+      mountPath: /home/lls/.lls
+      size: 20Gi                                   # Requires the "default" Storage Class to be configured beforehand
+```
+
+After deployment, the Llama Stack Server will be available within the cluster. The access URL is displayed in `status.serviceURL`, for example:
+
+```yaml
+status:
+  phase: Ready
+  serviceURL: http://demo-service.default.svc.cluster.local:8321
+```
diff --git a/docs/en/llama_stack/overview/features.mdx b/docs/en/llama_stack/overview/features.mdx
new file mode 100644
index 0000000..6352688
--- /dev/null
+++ b/docs/en/llama_stack/overview/features.mdx
@@ -0,0 +1,29 @@
+---
+weight: 20
+---
+
+# Main Features
+
+## Server-Based Architecture
+
+- **Centralized Server**: Llama Stack Server hosts inference, agents, safety, tool runtime, vector I/O, and files
+- **Remote or Inline Providers**: Support for remote APIs (e.g., OpenAI-compatible) and inline providers (e.g., meta-reference, sqlite-vec, localfs)
+- **Kubernetes Deployment**: Deploy via Llama Stack Operator using `LlamaStackDistribution` custom resources
+
+## AI Agents with Tools
+
+- **Agent Creation**: Create agents with model, instructions, and a list of tools
+- **Client-Side Tools**: Define tools with the `@client_tool` decorator; the client executes tool calls and returns results to the server
+- **Session Management**: Create sessions and run multi-turn conversations with streaming responses
+- **Streaming**: Support for streaming agent responses for real-time display
+
+## Configuration and Extensibility
+
+- **Stack Configuration**: YAML-based configuration for APIs, providers, persistence (e.g., kv_default, sql_default), and models
+- **Environment Fallbacks**: Use `${env.VAR:~default}` in config for flexible deployment
+- **Multiple Distributions**: Starter, postgres-demo, meta-reference-gpu and other distribution options
+
+## Integration
+
+- **Python Client**: `llama-stack-client` for Python 3.12+ with full agent and model APIs
+- **REST-Friendly**: Server exposes APIs for inference, agents, and tool runtime; can be wrapped in FastAPI or other web frameworks for production use
diff --git a/docs/en/llama_stack/overview/index.mdx b/docs/en/llama_stack/overview/index.mdx
new file mode 100644
index 0000000..66d6848
--- /dev/null
+++ b/docs/en/llama_stack/overview/index.mdx
@@ -0,0 +1,7 @@
+---
+weight: 10
+---
+
+# Overview
+
+<Overview />
diff --git a/docs/en/llama_stack/overview/intro.mdx b/docs/en/llama_stack/overview/intro.mdx
new file mode 100644
index 0000000..518079b
--- /dev/null
+++ b/docs/en/llama_stack/overview/intro.mdx
@@ -0,0 +1,28 @@
+---
+weight: 10
+---
+# Introduction
+
+## Llama Stack
+
+*Llama Stack* is a framework for building and running AI agents with tools. It provides a server-based architecture that enables developers to create agents that can interact with users, access external tools, and perform complex reasoning tasks.
+
+Main components and concepts include:
+
+- **Llama Stack Server**: Central service that hosts models, agents, and tool runtime. It can be deployed on Kubernetes via the Llama Stack Operator (see [Install Llama Stack](/en/llama_stack/install)).
+- **Client SDK** (`llama-stack-client`): Python client for connecting to the server, creating agents, defining tools with the `@client_tool` decorator, and managing sessions.
+- **Agents**: Configurable AI agents that use LLM models and can call tools (e.g., weather API, custom APIs) to answer user queries.
+- **Tools**: Functions exposed to the agent (e.g., weather query). Defined with `@client_tool` and passed to the agent at creation time.
+- **Configuration**: YAML stack configuration defines providers (inference, agents, safety, vector_io, files), persistence backends, and model registration (e.g., DeepSeek via OpenAI-compatible API).
+
+Llama Stack supports multiple API providers, storage and persistence backends, and distribution options (e.g., starter, postgres-demo, meta-reference-gpu), making it suitable for quick experiments and production deployments.
+
+## Documentation
+
+Llama Stack provides official documentation and resources for in-depth usage:
+
+### Official Documentation
+- **Main Documentation**: [https://llamastack.github.io/docs](https://llamastack.github.io/docs)
+  - Usage, API providers, and core concepts
+- **Core Concepts**: [https://llamastack.github.io/docs/concepts](https://llamastack.github.io/docs/concepts)
+  - Architecture, API stability, and resource management
diff --git a/docs/en/llama_stack/quickstart.mdx b/docs/en/llama_stack/quickstart.mdx
new file mode 100644
index 0000000..9f3889c
--- /dev/null
+++ b/docs/en/llama_stack/quickstart.mdx
@@ -0,0 +1,72 @@
+---
+weight: 30
+---
+
+# Quickstart
+
+This section provides a quickstart example for creating an AI Agent with Llama Stack.
+
+## Prerequisites
+
+- Python 3.12 or higher (if not satisfied, refer to [FAQ: How to prepare Python 3.12 in Notebook](#how-to-prepare-python-312-in-notebook))
+- Llama Stack Server installed and running via Operator (see [Install Llama Stack](./install))
+- Access to a Notebook environment (e.g., Jupyter Notebook, JupyterLab)
+- Python environment with `llama-stack-client` and required dependencies installed
+- API key for the LLM provider (e.g., DeepSeek API key)
+
+## Quickstart Example
+
+A simple example of creating an AI Agent with Llama Stack is available in the following resources:
+
+- **Notebook**:[Llama Stack Quick Start Demo](/llama-stack/llama-stack_quickstart.ipynb)
+
+Download the notebook and upload it to a Notebook environment to run.
+
+The notebook demonstrates:
+
+- Connecting to Llama Stack Server and client setup
+- Tool definition using the `@client_tool` decorator (weather query tool example)
+- Client connection to Llama Stack Server
+- Model selection and Agent creation with tools and instructions
+- Agent execution with session management and streaming responses
+- Result handling and display
+- Optional FastAPI deployment example
+
+## FAQ
+
+### How to prepare Python 3.12 in Notebook
+
+1. Download the pre-compiled Python installation package:
+
+   ```bash
+   wget -O /tmp/python312.tar.gz https://github.com/astral-sh/python-build-standalone/releases/download/20260114/cpython-3.12.12+20260114-x86_64-unknown-linux-gnu-install_only.tar.gz
+   ```
+
+2. Extract with:
+
+   ```bash
+   mkdir -p ~/python312
+   tar -xzf /tmp/python312.tar.gz -C ~/python312 --strip-components=1
+   ```
+
+3. Install and Register Kernel:
+
+   ```bash
+   export PATH="${HOME}/python312/bin:${PATH}"
+
+   python3 -m pip install ipykernel
+   python3 -m ipykernel install --user --name python312 --display-name "Python 3.12"
+   ```
+
+4. Switch kernel in the notebook page.
+
+**Note**: When executing python and pip commands directly in the notebook page, the default python will still be used. You need to specify the full path to use the python312 version commands.
+
+## Additional Resources
+
+For more resources on developing AI Agents with Llama Stack, see:
+
+- [Llama Stack Documentation](https://llamastack.github.io/docs) - The official Llama Stack documentation covering all usage-related topics, API providers, and core concepts.
+- [Llama Stack Core Concepts](https://llamastack.github.io/docs/concepts) - Deep dive into Llama Stack architecture, API stability, and resource management.
+- [Llama Stack GitHub Repository](https://github.com/llamastack/llama-stack) - Source code, example applications, distribution configurations, and how to add new API providers.
+- [Llama Stack Example Apps](https://github.com/llamastack/llama-stack-apps/) - Official examples demonstrating how to use Llama Stack in various scenarios.
diff --git a/docs/public/llama-stack/llama-stack_quickstart.ipynb b/docs/public/llama-stack/llama-stack_quickstart.ipynb
new file mode 100644
index 0000000..0339c3b
--- /dev/null
+++ b/docs/public/llama-stack/llama-stack_quickstart.ipynb
@@ -0,0 +1,427 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "5de79491",
+      "metadata": {},
+      "source": [
+        "# Llama Stack Quick Start Demo\n",
+        "\n",
+        "This notebook demonstrates how to use Llama Stack to run an agent with **client-side tools**."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e5d1fc8c",
+      "metadata": {},
+      "source": [
+        "## 1. Install Dependencies\n",
+        "\n",
+        "**Note:** `llama-stack-client` requires Python 3.12 or higher. If your Python version does not meet this requirement, refer to the FAQ section in the documentation: **How to prepare Python 3.12 in Notebook**."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a8f9e5e4",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Use current kernel's Python so PATH does not point to another env\n",
+        "# If download is slow, add: -i https://pypi.tuna.tsinghua.edu.cn/simple\n",
+        "import sys\n",
+        "!{sys.executable} -m pip install \"llama-stack-client>=0.4\" \"requests\" \"fastapi\" \"uvicorn\" --target ~/packages"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9d942699",
+      "metadata": {},
+      "source": [
+        "## 2. Import Libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "cfd65276",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "from pathlib import Path\n",
+        "\n",
+        "user_site_packages = Path.home() / \"packages\"\n",
+        "if str(user_site_packages) not in sys.path:\n",
+        "    sys.path.insert(0, str(user_site_packages))\n",
+        "\n",
+        "import os\n",
+        "import requests\n",
+        "from typing import Dict, Any\n",
+        "from urllib.parse import quote\n",
+        "from llama_stack_client import LlamaStackClient, Agent\n",
+        "from llama_stack_client.lib.agents.client_tool import client_tool\n",
+        "from llama_stack_client.lib.agents.event_logger import AgentEventLogger\n",
+        "\n",
+        "print('Libraries imported successfully')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "baabf4fc",
+      "metadata": {},
+      "source": [
+        "## 3. Define Tools\n",
+        "\n",
+        "Use the `@client_tool` decorator to define a weather query tool."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c57f95e5",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "@client_tool\n",
+        "def get_weather(city: str) -> Dict[str, Any]:\n",
+        "    \"\"\"Get current weather information for a specified city.\n",
+        "\n",
+        "    Uses the wttr.in free weather API to fetch weather data.\n",
+        "\n",
+        "    :param city: City name, e.g., Beijing, Shanghai, Paris\n",
+        "    :returns: Dictionary containing weather information including city, temperature and humidity\n",
+        "    \"\"\"\n",
+        "    try:\n",
+        "        # URL encode the city name to handle spaces and special characters\n",
+        "        encoded_city = quote(city)\n",
+        "        url = f'https://wttr.in/{encoded_city}?format=j1'\n",
+        "        response = requests.get(url, timeout=10)\n",
+        "        response.raise_for_status()\n",
+        "        data = response.json()\n",
+        "\n",
+        "        current = data['current_condition'][0]\n",
+        "        return {\n",
+        "            'city': city,\n",
+        "            'temperature': f\"{current['temp_C']}°C\",\n",
+        "            'humidity': f\"{current['humidity']}%\",\n",
+        "        }\n",
+        "    except Exception as e:\n",
+        "        return {'error': f'Failed to get weather information: {str(e)}'}\n",
+        "\n",
+        "print('Weather tool defined successfully')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "05cefded",
+      "metadata": {},
+      "source": [
+        "## 4. Connect to Server and Create Agent\n",
+        "\n",
+        "Use LlamaStackClient to connect to the running server, create an Agent with the client-side weather tool, and execute tool calls."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "394ee5db",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "base_url = os.getenv('LLAMA_STACK_URL', 'http://localhost:8321')\n",
+        "print(f'Connecting to Server: {base_url}')\n",
+        "\n",
+        "client = LlamaStackClient(base_url=base_url)\n",
+        "\n",
+        "models = client.models.list()\n",
+        "llm_model = next(\n",
+        "    (m for m in models\n",
+        "        if m.custom_metadata and m.custom_metadata.get('model_type') == 'llm'),\n",
+        "    None\n",
+        ")\n",
+        "if not llm_model:\n",
+        "    raise Exception('No LLM model found')\n",
+        "model_id = llm_model.id\n",
+        "print(f'Using model: {model_id}\\n')\n",
+        "\n",
+        "agent = Agent(\n",
+        "    client,\n",
+        "    model=model_id,\n",
+        "    instructions='You are a helpful weather assistant. When users ask about weather, use the weather tool to query and answer.',\n",
+        "    tools=[get_weather],\n",
+        ")\n",
+        "print('Agent created successfully')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "90c28b81",
+      "metadata": {},
+      "source": [
+        "## 5. Run the Agent"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "70e8d661",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Create session\n",
+        "session_id = agent.create_session('weather-agent-session')\n",
+        "print(f'✓ Session created: {session_id}\\n')\n",
+        "\n",
+        "# First query\n",
+        "print('=' * 60)\n",
+        "print('User> What is the weather like in Beijing today?')\n",
+        "print('-' * 60)\n",
+        "\n",
+        "response_stream = agent.create_turn(\n",
+        "    messages=[{'role': 'user', 'content': 'What is the weather like in Beijing today?'}],\n",
+        "    session_id=session_id,\n",
+        "    stream=True,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "ca2f26f2",
+      "metadata": {},
+      "source": [
+        "### Display the Result"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "4728a638",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "logger = AgentEventLogger()\n",
+        "for printable in logger.log(response_stream):\n",
+        "    print(printable, end='', flush=True)\n",
+        "print('\\n')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "728530b0",
+      "metadata": {},
+      "source": [
+        "### Try Different Queries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ed8cc5a0",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Second query\n",
+        "print('=' * 60)\n",
+        "print('User> What is the weather in Shanghai?')\n",
+        "print('-' * 60)\n",
+        "\n",
+        "response_stream = agent.create_turn(\n",
+        "    messages=[{'role': 'user', 'content': 'What is the weather in Shanghai?'}],\n",
+        "    session_id=session_id,\n",
+        "    stream=True,\n",
+        ")\n",
+        "\n",
+        "logger = AgentEventLogger()\n",
+        "for printable in logger.log(response_stream):\n",
+        "    print(printable, end='', flush=True)\n",
+        "print('\\n')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "6f8d31d0",
+      "metadata": {},
+      "source": [
+        "## 6. FastAPI Service Example\n",
+        "\n",
+        "You can also run the agent as a FastAPI web service for production use. This allows you to expose the agent functionality via HTTP API endpoints."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a5d732e4",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Import FastAPI components\n",
+        "from fastapi import FastAPI\n",
+        "from pydantic import BaseModel\n",
+        "from threading import Thread\n",
+        "import time\n",
+        "\n",
+        "# Create a simple FastAPI app\n",
+        "api_app = FastAPI(title=\"Llama Stack Agent API\")\n",
+        "\n",
+        "class ChatRequest(BaseModel):\n",
+        "    message: str\n",
+        "\n",
+        "\n",
+        "@api_app.post(\"/chat\")\n",
+        "def chat(request: ChatRequest):\n",
+        "    \"\"\"Chat endpoint that uses the Llama Stack Agent\"\"\"\n",
+        "    session_id = agent.create_session('fastapi-weather-session')\n",
+        "\n",
+        "    # Create turn and collect response\n",
+        "    response_stream = agent.create_turn(\n",
+        "        messages=[{'role': 'user', 'content': request.message}],\n",
+        "        session_id=session_id,\n",
+        "        stream=True,\n",
+        "    )\n",
+        "\n",
+        "    # Collect the full response\n",
+        "    full_response = \"\"\n",
+        "    logger = AgentEventLogger()\n",
+        "    for printable in logger.log(response_stream):\n",
+        "        full_response += printable\n",
+        "\n",
+        "    return {\"response\": full_response}\n",
+        "\n",
+        "print(\"FastAPI app created. Use the next cell to start the server.\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "475997ba",
+      "metadata": {},
+      "source": [
+        "### Start the FastAPI Server\n",
+        "\n",
+        "**Note**: In a notebook, you can start the server in a background thread. For production, run it as a separate process using `uvicorn`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "6f5db723",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Start server in background thread (for notebook demonstration)\n",
+        "from uvicorn import Config, Server\n",
+        "\n",
+        "# Create a server instance that can be controlled\n",
+        "config = Config(api_app, host=\"127.0.0.1\", port=8000, log_level=\"info\")\n",
+        "server = Server(config)\n",
+        "\n",
+        "def run_server():\n",
+        "    server.run()\n",
+        "\n",
+        "# Use daemon=True so the thread stops automatically when the kernel restarts\n",
+        "# This is safe for notebook demonstrations\n",
+        "# For production, use process managers instead of threads\n",
+        "server_thread = Thread(target=run_server, daemon=True)\n",
+        "server_thread.start()\n",
+        "\n",
+        "# Wait a moment for the server to start\n",
+        "time.sleep(2)\n",
+        "print(\"✓ FastAPI server started at http://127.0.0.1:8000\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "715b2d47",
+      "metadata": {},
+      "source": [
+        "### Test the API\n",
+        "\n",
+        "Now you can call the API using HTTP requests:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "407b82af",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Test the API endpoint\n",
+        "response = requests.post(\n",
+        "    \"http://127.0.0.1:8000/chat\",\n",
+        "    json={\"message\": \"What's the weather in Shanghai?\"},\n",
+        "    timeout=60\n",
+        ")\n",
+        "\n",
+        "print(f\"Status Code: {response.status_code}\")\n",
+        "print(\"Response:\")\n",
+        "print(response.json().get('response'))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "945a776f",
+      "metadata": {},
+      "source": [
+        "### Stop the FastAPI Server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c7795bba",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Stop the FastAPI server (section 6)\n",
+        "if 'server' in globals() and server.started:\n",
+        "    server.should_exit = True\n",
+        "    print(\"✓ FastAPI server shutdown requested.\")\n",
+        "else:\n",
+        "    print(\"FastAPI server is not running or has already stopped.\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a3ebed1f",
+      "metadata": {},
+      "source": [
+        "## 7. More Resources\n",
+        "\n",
+        "For more resources on developing AI Agents with Llama Stack, see:\n",
+        "\n",
+        "### Official Documentation\n",
+        "- [Llama Stack Documentation](https://llamastack.github.io/docs) - The official Llama Stack documentation covering all usage-related topics, API providers, and core concepts.\n",
+        "- [Llama Stack Core Concepts](https://llamastack.github.io/docs/concepts) - Deep dive into Llama Stack architecture, API stability, and resource management.\n",
+        "\n",
+        "### Code Examples and Projects\n",
+        "- [Llama Stack GitHub Repository](https://github.com/llamastack/llama-stack) - Source code, example applications, distribution configurations, and how to add new API providers.\n",
+        "- [Llama Stack Example Apps](https://github.com/llamastack/llama-stack-apps/) - Official examples demonstrating how to use Llama Stack in various scenarios.\n",
+        "\n",
+        "### Community and Support\n",
+        "- [Llama Stack GitHub Issues](https://github.com/llamastack/llama-stack/issues) - Report bugs, ask questions, and contribute to the project.\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python (llama-stack-demo)",
+      "language": "python",
+      "name": "llama-stack-demo"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/docs/public/llama-stack/llama-stack_quickstart_mcp.ipynb b/docs/public/llama-stack/llama-stack_quickstart_mcp.ipynb
new file mode 100644
index 0000000..71d2be0
--- /dev/null
+++ b/docs/public/llama-stack/llama-stack_quickstart_mcp.ipynb
@@ -0,0 +1,173 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Llama Stack Quick Start — MCP Option (Optional)\n",
+        "\n",
+        "This notebook contains **Option B: MCP tool** only. Use it when the Llama Stack MCP adapter is ready. The main quickstart uses client-side tools only.\n",
+        "\n",
+        "**Prerequisites:** Same as the main quickstart (Section 1–2: install deps, import libs, define `get_weather` is not needed here). Run the **MCP server** below, then **connect and create the agent** with MCP tools. MCP tools are **invoked by the Llama Stack Server (llama-server)**; the MCP server URL must be reachable from where the server runs."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Option B: MCP tool\n",
+        "\n",
+        "Run an MCP server that exposes a weather query tool (same capability as the client-side `get_weather`, via MCP). This example uses **Streamable HTTP** (single `/mcp` endpoint; SSE is deprecated). The server is registered with Llama Stack in the next section. *Requires the Llama Stack Server to have `tool_runtime` with the `model-context-protocol` provider.*"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Start the MCP server in a separate process\n",
+        "import os\n",
+        "from multiprocessing import Process\n",
+        "\n",
+        "def _run_mcp_weather_server():\n",
+        "    import logging\n",
+        "    logging.basicConfig(level=logging.DEBUG, format='%(name)s %(levelname)s: %(message)s')\n",
+        "    logging.getLogger(\"mcp\").setLevel(logging.DEBUG)\n",
+        "    from urllib.parse import quote\n",
+        "    import requests\n",
+        "    from mcp.server.fastmcp import FastMCP\n",
+        "    mcp = FastMCP(\"demo-weather\", host=\"0.0.0.0\", port=8002)\n",
+        "    @mcp.tool()\n",
+        "    def get_weather_mcp(city: str) -> str:\n",
+        "        \"\"\"Get current weather information for a specified city.\n",
+        "\n",
+        "        Uses the wttr.in free weather API to fetch weather data.\n",
+        "\n",
+        "        :param city: City name, e.g., Beijing, Shanghai, Paris\n",
+        "        :returns: Dictionary containing weather information including city, temperature and humidity\n",
+        "        \"\"\"\n",
+        "        try:\n",
+        "            encoded_city = quote(city)\n",
+        "            url = f\"https://wttr.in/{encoded_city}?format=j1\"\n",
+        "            r = requests.get(url, timeout=10)\n",
+        "            r.raise_for_status()\n",
+        "            data = r.json()\n",
+        "            cur = data[\"current_condition\"][0]\n",
+        "            return f\"City: {city}, Temperature: {cur['temp_C']}°C, Humidity: {cur['humidity']}%\"\n",
+        "        except Exception as e:\n",
+        "            return f\"Error: {e}\"\n",
+        "    # streamable-http: single endpoint; use transport=\"sse\" and /sse if server only supports legacy SSE\n",
+        "    mcp.run(transport=\"streamable-http\")\n",
+        "\n",
+        "mcp_process = Process(target=_run_mcp_weather_server, daemon=True)\n",
+        "mcp_process.start()\n",
+        "import socket\n",
+        "# Prefer env so Llama Stack Server can reach this URL\n",
+        "MCP_SERVER_URL = os.getenv(\"MCP_SERVER_URL\")\n",
+        "if not MCP_SERVER_URL:\n",
+        "    _host = socket.gethostbyname(socket.gethostname())\n",
+        "    if _host.startswith(\"127.\"):\n",
+        "        _host = os.getenv(\"MCP_SERVER_HOST\", \"127.0.0.1\")\n",
+        "    MCP_SERVER_URL = f\"http://{_host}:8002/mcp\"\n",
+        "os.environ[\"MCP_SERVER_URL\"] = MCP_SERVER_URL\n",
+        "print(f\"✓ MCP server running at {MCP_SERVER_URL} (Streamable HTTP, tool: get_weather_mcp, bind 0.0.0.0:8002)\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Connect to Server and Create Agent (MCP tools)\n",
+        "\n",
+        "Register the MCP tool group and create an agent that uses it."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client import LlamaStackClient, Agent\n",
+        "from llama_stack_client.lib.agents.event_logger import AgentEventLogger\n",
+        "\n",
+        "base_url = os.getenv('LLAMA_STACK_URL', 'http://localhost:8321')\n",
+        "client = LlamaStackClient(base_url=base_url)\n",
+        "\n",
+        "models = client.models.list()\n",
+        "llm_model = next(\n",
+        "    (m for m in models\n",
+        "        if m.custom_metadata and m.custom_metadata.get('model_type') == 'llm'),\n",
+        "    None\n",
+        ")\n",
+        "if not llm_model:\n",
+        "    raise Exception('No LLM model found')\n",
+        "model_id = llm_model.id\n",
+        "\n",
+        "MCP_TOOLGROUP_ID = \"mcp::demo-weather\"\n",
+        "mcp_server_url = os.getenv(\"MCP_SERVER_URL\", \"http://127.0.0.1:8002/mcp\")\n",
+        "client.toolgroups.register(\n",
+        "    toolgroup_id=MCP_TOOLGROUP_ID,\n",
+        "    provider_id=\"model-context-protocol\",\n",
+        "    mcp_endpoint={\"uri\": mcp_server_url},\n",
+        ")\n",
+        "agent_tools = [{\"type\": \"mcp\", \"server_label\": MCP_TOOLGROUP_ID, \"server_url\": mcp_server_url}]\n",
+        "\n",
+        "agent = Agent(\n",
+        "    client,\n",
+        "    model=model_id,\n",
+        "    instructions='You are a helpful weather assistant. When users ask about weather, use the weather tool to query and answer.',\n",
+        "    tools=agent_tools,\n",
+        ")\n",
+        "print('Agent created with MCP weather tool')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Troubleshooting (MCP / 400 error)\n",
+        "\n",
+        "If you see **400 - messages[3]: invalid type: sequence, expected a string**: the inference backend often expects message `content` to be a string, but the server may send tool-turn content as an array. This is a message-format compatibility issue between the server and the backend, **not caused by SSE/Streamable HTTP**. You can:\n",
+        "- Use the main quickstart with **client-side tool** (Option A) instead, or\n",
+        "- Use **stdio** for MCP (configure the server's `tool_runtime` with `command`/`args` so the server spawns the MCP process; no HTTP URL needed), or\n",
+        "- Check your Llama Stack Server and inference backend docs for tool message format compatibility."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Stop the MCP server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "if 'mcp_process' in globals() and mcp_process.is_alive():\n",
+        "    mcp_process.terminate()\n",
+        "    mcp_process.join(timeout=2)\n",
+        "    print(\"✓ MCP server process stopped.\")\n",
+        "else:\n",
+        "    print(\"MCP server process is not running or has already stopped.\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python (llama-stack-demo)",
+      "language": "python",
+      "name": "llama-stack-demo"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.12.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}

From b6cf36e0c74ee60a84d520d3e53c30fb7b91cd0e Mon Sep 17 00:00:00 2001
From: tfwang <tfwang@alauda.io>
Date: Thu, 26 Feb 2026 15:54:51 +0800
Subject: [PATCH 2/4] update

---
 docs/public/llama-stack/llama-stack_quickstart.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/public/llama-stack/llama-stack_quickstart.ipynb b/docs/public/llama-stack/llama-stack_quickstart.ipynb
index 0339c3b..a8f9269 100644
--- a/docs/public/llama-stack/llama-stack_quickstart.ipynb
+++ b/docs/public/llama-stack/llama-stack_quickstart.ipynb
@@ -90,7 +90,7 @@
         "    Uses the wttr.in free weather API to fetch weather data.\n",
         "\n",
         "    :param city: City name, e.g., Beijing, Shanghai, Paris\n",
-        "    :returns: Dictionary containing weather information including city, temperature and humidity\n",
+        "    :returns: Formatted weather summary string (city, temperature, humidity), or an error string\n",
         "    \"\"\"\n",
         "    try:\n",
         "        # URL encode the city name to handle spaces and special characters\n",

From c0ddb2b8c2481c63719780c2cd3e7ac0684a5efa Mon Sep 17 00:00:00 2001
From: tfwang <tfwang@alauda.io>
Date: Thu, 26 Feb 2026 16:41:51 +0800
Subject: [PATCH 3/4] update

---
 docs/public/llama-stack/llama-stack_quickstart.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/public/llama-stack/llama-stack_quickstart.ipynb b/docs/public/llama-stack/llama-stack_quickstart.ipynb
index a8f9269..0339c3b 100644
--- a/docs/public/llama-stack/llama-stack_quickstart.ipynb
+++ b/docs/public/llama-stack/llama-stack_quickstart.ipynb
@@ -90,7 +90,7 @@
         "    Uses the wttr.in free weather API to fetch weather data.\n",
         "\n",
         "    :param city: City name, e.g., Beijing, Shanghai, Paris\n",
-        "    :returns: Formatted weather summary string (city, temperature, humidity), or an error string\n",
+        "    :returns: Dictionary containing weather information including city, temperature and humidity\n",
         "    \"\"\"\n",
         "    try:\n",
         "        # URL encode the city name to handle spaces and special characters\n",

From 3f8db4519090d77e149841d717c74b120e6c2fdc Mon Sep 17 00:00:00 2001
From: tfwang <tfwang@alauda.io>
Date: Thu, 26 Feb 2026 22:16:22 +0800
Subject: [PATCH 4/4] update

---
 docs/en/llama_stack/quickstart.mdx | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/en/llama_stack/quickstart.mdx b/docs/en/llama_stack/quickstart.mdx
index 9f3889c..f9ada4c 100644
--- a/docs/en/llama_stack/quickstart.mdx
+++ b/docs/en/llama_stack/quickstart.mdx
@@ -58,7 +58,13 @@ The notebook demonstrates:
    python3 -m ipykernel install --user --name python312 --display-name "Python 3.12"
    ```
 
-4. Switch kernel in the notebook page.
+4. Switch kernel in the notebook page:
+
+   - Open your Notebook environment (e.g., Jupyter Notebook or JupyterLab) in the browser, then open an existing notebook or create a new one.
+   - In the notebook interface, find the current kernel name (usually shown in the **top-right corner** of the page, e.g., "Python 3" or "python3").
+   - Click that kernel name, or use the menu **Kernel → Change Kernel**.
+   - In the kernel list, select **"Python 3.12"** (the display name registered in step 3).
+   - After switching, new cells will run with Python 3.12.
 
 **Note**: When executing python and pip commands directly in the notebook page, the default python will still be used. You need to specify the full path to use the python312 version commands.