diff --git a/CMakeLists.txt b/CMakeLists.txt
index 71f073db2..a43a5907d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ option(MLLM_BUILD_ARM_BACKEND "Enable MLLM ARM backend" OFF)
 option(MLLM_BUILD_OPENCL_BACKEND "Enable MLLM OpenCL backend" OFF)
 option(MLLM_BUILD_CUDA_BACKEND "Enable MLLM CUDA backend" OFF)
 option(MLLM_BUILD_QNN_BACKEND "Enable MLLM QNN backend" OFF)
-option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" OFF)
+option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" ON)
 option(MLLM_BUILD_EXPERIMENTS "Enable MLLM experiments" OFF)
 
 # Extension Enable
diff --git a/docs/service/mllm_cli.rst b/docs/service/mllm_cli.rst
index 40a41d726..7cf913d09 100644
--- a/docs/service/mllm_cli.rst
+++ b/docs/service/mllm_cli.rst
@@ -1,2 +1,317 @@
 MLLM CLI
 ============
+
+Overview
+--------
+
+This document describes the MLLM command-line interface (CLI) tool, which operates within a client-server architecture. The system is designed to provide network access to MLLM's core inference capabilities. The backend service is written in Go and interacts with the core C++ MLLM library through a C API. The frontend can be a Go-based command-line client or a standard GUI client like Chatbox that communicates with the service via an OpenAI-compatible API.
+
+This guide covers three main areas:
+
+1.  **System Architecture and API**: An explanation of the components and the C API bridge.
+2.  **Build Configuration**: Instructions on how to adapt the build scripts for different environments.
+3.  **Compilation and Deployment**: A step-by-step guide to build and run the entire stack.
+
+Section 3 will guide you through the complete steps to reproduce the mllm_cli. Before you begin, we highly recommend reading and understanding Section 1 (System Architecture and API) and Section 2 (Build Configuration) first. This will help you follow the guide in Section 3 more smoothly.
+
+System Architecture
+-------------------
+
+The system consists of three primary components: the C++ core with a C API wrapper, a Go backend service, and a client (Go CLI or GUI).
+
+1. C/C++ Core & C API Layer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The core MLLM functionalities are implemented in C++. To allow communication with other languages like Go, a C-style API is exposed.
+
+**Key Data Structures (`mllm/c_api/Object.h`)**
+
+The C API uses shared data structures to pass information between Go and C++.
+
+* ``MllmCType``: An enum that defines the type of data being handled, such as integers, floats, tensors, or custom objects.
+* ``MllmCAny``: A versatile union-based struct that can hold different types of values, from primitive types to pointers for complex objects. This is the primary data exchange format.
+
+**Key API Functions (`mllm/c_api/Runtime.h`, `mllm/c_api/Runtime.cpp`)**
+
+These C functions wrap the C++ service logic, making them callable from Go via `cgo`.
+
+* `createQwen3Session(const char* model_path)`: Loads a model from the specified path and creates a session handle.
+* `insertSession(const char* session_id, MllmCAny handle)`: Registers the created session with a unique ID in the service.
+* `sendRequest(const char* session_id, const char* json_request)`: Sends a user's request (in JSON format) to the specified session for processing.
+* `pollResponse(const char* session_id)`: Polls for a response from the model. This is used for streaming results back to the client.
+* `freeSession(MllmCAny handle)`: Releases the resources associated with a session.
+
+2. Go Service Layer (`mllm-server`)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The `mllm-server` is an HTTP server written in Go. It acts as a bridge between network clients and the MLLM C++ core.
+
+* **Initialization**: On startup, it initializes the MLLM context, starts the backend service, and loads the specified model into a session.
+* **API Endpoint**: It exposes an OpenAI-compatible endpoint at `/v1/chat/completions`.
+* **Request Handling**: When it receives a request, it retrieves the appropriate model session, forwards the request JSON to the C++ core using `sendRequest`, and then continuously polls for results using `pollResponse`.
+* **Streaming Response**: Results are streamed back to the client over HTTP using Server-Sent Events (SSE).
+
+**Key Service Layer Files**
+
+The `mllm-server` functionality is implemented across several key Go files:
+
+* ``pkg/server/server.go``
+    * **Purpose**: Defines the HTTP server instance itself. It is responsible for starting the server (`Start()`), setting the listening address (e.g., ``:8080``), registering API routes, and managing graceful shutdown (`Shutdown()`).
+* ``pkg/server/handlers.go``
+    * **Purpose**: Contains the core logic for the API endpoint (`chatCompletionsHandler`). It is responsible for:
+        1. Parsing the incoming JSON request from the client.
+        2. Retrieving or validating the model session from the `mllmService` (`GetSession`).
+        3. Forwarding the request to the C++ core (`session.SendRequest`).
+        4. Continuously polling (`session.PollResponse`) for responses from the C++ layer.
+        5. Streaming the responses back to the client in the standard Server-Sent Events (SSE) format.
+* ``pkg/mllm/service.go``
+    * **Purpose**: Acts as the Go-level **session manager** (`Service`). It holds a map that links model IDs (e.g., "Qwen3-0.6B-w4a32kai") to their active MLLM sessions (`*mllm.Session`). `handlers.go` uses this to find the correct session instance.
+* ``pkg/api/types.go``
+    * **Purpose**: Defines the shared data structures (Go structs) used for communication between the client and server. This includes `OpenAIRequest` (request body) and `OpenAIResponseChunk` (response body), ensuring the data format adheres to the OpenAI-compatible API specification.
+
+3. Go Client (`mllm-client`)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The `mllm-client` is an interactive command-line tool that allows users to chat with the model.
+
+* **User Interaction**: It reads user input from the console.
+* **API Communication**: It formats the user input into an OpenAI-compatible JSON request and sends it to the `mllm-server`.
+* **Response Handling**: It receives the SSE stream from the server, decodes the JSON chunks, and prints the assistant's response to the console in real-time.
+
+Alternative Client: Chatbox
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the ``mllm-client`` command-line tool, you can use a graphical user interface (GUI) client like **Chatbox**, as it supports the OpenAI-compatible API.
+
+**Chatbox Configuration**
+
+To connect Chatbox (running on your host machine) to your ``mllm-server`` (running on the Android device), you must first forward the device's port to your local machine using `adb`.
+
+.. code-block:: bash
+
+   # Forward local port 8081 to the Android device's port 8080.
+   adb forward tcp:8081 tcp:8080
+
+After running this command, configure Chatbox with the following settings:
+
+* **Name**: ``mllmTeam`` (or any name you prefer)
+* **API Mode**: ``OpenAI API Compatible``
+* **API Key**: (Can be left blank or any value; the server does not currently check it)
+* **API Host**: ``http://localhost:8081``
+* **API Path**: ``/v1/chat/completions``
+
+Once configured, you can click the **Check** button to ensure the connection is successful. Please note that this step must be performed while the server is running.
+
+Build Configuration Guide
+-------------------------
+
+The Go build tasks (`build_android_mllm_server.yaml` and `build_android_mllm_client.yaml`) use hardcoded paths that are specific to the build server's environment. If you are setting up a new build environment, you **must** modify these paths before proceeding to compilation.
+
+Understanding the Build Scripts
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The core of the cross-compilation logic for Go is within the `ShellCommandTask` of the `.yaml` build files. It sets several environment variables to configure `cgo` for cross-compiling to Android ARM64.
+
+* ``GOOS=android``, ``GOARCH=arm64``: Tells Go to build for Android ARM64.
+* ``CGO_ENABLED=1``: Enables ``cgo`` to allow Go to call C/C++ code.
+* ``CC`` and ``CXX``: Specifies the C and C++ compilers from the Android NDK, used to compile any C/C++ parts within the Go program.
+* ``CGO_CFLAGS``: Tells the C compiler where to find the MLLM C API header files (e.g., ``Runtime.h``).
+* ``CGO_LDFLAGS``: Tells the linker where to find the compiled MLLM shared libraries (``.so`` files) that the final executable needs to link against.
+
+Modifying Hardcoded Paths
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The two most critical variables you will need to change are `CGO_CFLAGS` and `CGO_LDFLAGS`.
+
+**Example from `build_android_mllm_server.yaml`**:
+
+.. code-block:: yaml
+
+   # ...
+   export CGO_LDFLAGS="-L/root/zty_workspace/mllm_zty/build-android-arm64-v8a/bin"
+   export CGO_CFLAGS="-I/root/zty_workspace/mllm_zty"
+   # ...
+
+**How to Modify**:
+
+1.  **``CGO_CFLAGS="-I/path/to/your/project/root"``**
+    The `-I` flag specifies an include directory. This path should point to the root of the MLLM project directory on your build server, where the `mllm/c_api/` headers are located. In the example, this is `/root/zty_workspace/mllm_zty`. Change this to match your project's location.
+
+2.  **``CGO_LDFLAGS="-L/path/to/your/compiled/libs"``**
+    The `-L` flag specifies a library directory. This path must point to the directory where the C++ build (Step 1) placed the `.so` files. In the example, this is `/root/zty_workspace/mllm_zty/build-android-arm64-v8a/bin`. If your build output directory is different, you must update this path accordingly.
+
+By correctly updating these two paths in both `build_android_mllm_server.yaml` and `build_android_mllm_client.yaml`, you can adapt the build process to any server environment.
+
+Compilation and Deployment
+--------------------------
+
+This section provides the complete workflow for compiling all C++ and Go components, deploying them to an Android target, and running the system.
+
+Prerequisites
+~~~~~~~~~~~~~
+
+* A build environment, such as a server or Docker container, with the Android NDK and Go compiler installed, hereinafter referred to as the 'build server'.
+* An Android target device with `adb` access enabled.
+* `rsync` and `scp` for file synchronization between your development machine and the build server.
+
+Step 1: Compile C++ Core Libraries
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First, we compile the MLLM C++ core, which produces the essential shared libraries (`.so` files).
+
+1.  **Sync Code to Build Server**:
+    Synchronize your local project directory with the build server.
+
+    .. code-block:: bash
+
+       # Replace <port>, <user>, and <build-server-ip> with your server details
+       rsync -avz --checksum -e 'ssh -p <port>' --exclude 'build' --exclude '.git' ./ <user>@<build-server-ip>:/your_workspace/your_programname/
+
+2.  **Run the Build Task**:
+    On the build server, execute the build task. This task uses `tasks/build_android_debug.yaml` to configure and run CMake.
+
+    Before executing this step, you also need to ensure that the hardcoded directories in build_android_debug.yaml have been modified to match your requirements. The modification method is the same as for the Go compilation file mentioned earlier.
+    
+    .. code-block:: bash
+
+       # These commands are run on your build server.
+       cd /your_workspace/your_programname/
+       python task.py tasks/build_android_debug.yaml
+
+3.  **Retrieve Compiled Libraries**:
+    After the build succeeds, copy the compiled shared libraries from the build server back to your local machine. These libraries are the C++ backend that the Go application will call.
+
+    .. code-block:: bash
+
+       # You run these commands on your local machine to copy the files from the build server.
+       # Navigate to your local build artifacts directory
+       cd /path/to/your/local_artifacts_dir/
+
+       # Copy the libraries
+       scp -P <port> <user>@<build-server-ip>:/your_workspace/your_programname/build-android-arm64-v8a/bin/libMllmRT.so .
+       scp -P <port> <user>@<build-server-ip>:/your_workspace/your_programname/build-android-arm64-v8a/bin/libMllmCPUBackend.so .
+       scp -P <port> <user>@<build-server-ip>:/your_workspace/your_programname/build-android-arm64-v8a/bin/libMllmSdkC.so .
+
+Step 2: Compile the Go Server
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Next, cross-compile the Go server application for Android.
+
+1.  **Sync Code**: Ensure your latest Go code is on the build server. This is only necessary if you've made changes to the Go server files (e.g., in the ``mllm-cli`` directory).
+
+2.  **Run the Build Task**:
+    On the build server, execute the server build task. Make sure you have correctly configured the hardcoded paths in this YAML file as described in the "Build Configuration Guide" section.
+
+    .. code-block:: bash
+
+       cd /your_workspace/your_programname/
+       python task.py tasks/build_android_mllm_server.yaml
+
+3.  **Retrieve the Executable**:
+    Copy the compiled `mllm_web_server` binary from the build server back to your local machine.
+
+    .. code-block:: bash
+
+       # Navigate to your local build artifacts directory
+       cd /path/to/your/local_artifacts_dir/
+
+       # Copy the executable
+       scp -P <port> <user>@<build-server-ip>:/your_workspace/your_programname/build-android-arm64-v8a/bin/mllm_web_server .
+
+Step 3: Compile the Go Client
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you are using Chatbox or a similar client, you can skip this step.
+
+Similarly, compile the Go client application.
+
+1.  **Sync Code**: Ensure your latest Go client code is on the build server.
+
+2.  **Run the Build Task**:
+    On the build server, execute the client build task. This also requires the build YAML to be correctly configured.
+
+    .. code-block:: bash
+
+       cd /your_workspace/your_programname/
+       python task.py tasks/build_android_mllm_client.yaml
+
+3.  **Retrieve the Executable**:
+    Copy the compiled `mllm_ws_client` binary from the build server to your local machine.
+
+    .. code-block:: bash
+
+       # Navigate to your local build artifacts directory
+       cd /path/to/your/local_artifacts_dir/
+
+       # Copy the executable
+       scp -P <port> <user>@<build-server-ip>:/your_workspace/your_programname/build-android-arm64-v8a/bin/mllm_ws_client .
+
+Step 4: Deploy to Target Device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Push all compiled artifacts (libraries and executables) to your target Android device.
+
+.. code-block:: bash
+
+   # Connect to your device if you haven't already
+   adb connect <device-id-or-ip:port>
+
+   # Push the shared libraries from your local artifacts directory
+   adb push libMllmRT.so /path/to/your/deployment_dir/
+   adb push libMllmCPUBackend.so /path/to/your/deployment_dir/
+   adb push libMllmSdkC.so /path/to/your/deployment_dir/
+
+   # Push the server and client executables
+   adb push mllm_web_server /path/to/your/deployment_dir/
+
+   # (Optional) Push the Go client if you compiled it in Step 3
+   adb push mllm_ws_client /path/to/your/deployment_dir/
+
+Step 5: Running and Testing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This covers testing with both the Go CLI client and Chatbox.
+
+**A. Testing with the Go CLI Client (On-Device)**
+
+1.  **Terminal 1: Start the Server**:
+    Open a shell on the device, navigate to the deployment directory, set the library path, make the server executable, and run it with the required model path.
+
+    .. code-block:: bash
+
+       adb shell
+       # Inside the adb shell
+       cd /path/to/your/deployment_dir
+       chmod +x mllm_web_server
+       export LD_LIBRARY_PATH=.
+       # Ensure you provide the correct path to your model
+       ./mllm_web_server --model-path /path/to/your/model_directory/model_name
+
+    .. warning::
+       The `export LD_LIBRARY_PATH=.` command is crucial. It tells the Android dynamic linker to look for the `.so` files in the current directory. Without it, the server will fail to start.
+
+2.  **Terminal 2: Run the Go Client**:
+    Open a second terminal and start the client.
+
+    .. code-block:: bash
+
+       adb shell "cd /path/to/your/deployment_dir && chmod +x mllm_ws_client && ./mllm_ws_client"
+
+You should now be able to interact with the model from the client terminal. Type `/exit` to quit the client. Use `Ctrl+C` in the server terminal to stop the server.
+
+**B. Testing with Chatbox (Host Machine)**
+
+1.  **Terminal 1: Start the Server**:
+    Follow the same instructions as in **Step 5.A.1** to start the server on the Android device.
+
+2.  **Terminal 2: Set up Port Forwarding**:
+    On your host machine (not in the adb shell), run the following command. This maps your local host port (e.g., 8081) to the device's port (e.g., 8080).
+
+    .. code-block:: bash
+
+       # This forwards your host port 8081 to the device's port 8080
+       # You can change 8081 to any available port on your host.
+       adb forward tcp:8081 tcp:8080
+
+3.  **Open Chatbox**:
+    Open the Chatbox application on your host machine and configure it according to the "Alternative Client: Chatbox" section above. You can now chat with the model through the GUI.
\ No newline at end of file
diff --git a/mllm-cli/cmd/mllm-client/main.go b/mllm-cli/cmd/mllm-client/main.go
new file mode 100644
index 000000000..969ab48cf
--- /dev/null
+++ b/mllm-cli/cmd/mllm-client/main.go
@@ -0,0 +1,88 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"mllm-cli/pkg/api" 
+	"net/http"
+	"os"
+	"strings"
+)
+
+func main() {
+	serverURL := "http://localhost:8080/v1/chat/completions"
+	var history []api.RequestMessage
+	var currentSessionID string 
+
+	fmt.Println("\n--- MLLM Refactored Interactive Client ---")
+	fmt.Println("Supports multi-turn sessions. Type /exit to quit.")
+	log.Printf("Connecting to: %s", serverURL)
+
+	reader := bufio.NewReader(os.Stdin)
+	for {
+		fmt.Print("\n> ")
+		userInput, _ := reader.ReadString('\n')
+		cleanedInput := strings.TrimSpace(userInput)
+		if cleanedInput == "" { continue }
+		if cleanedInput == "/exit" || cleanedInput == "/quit" { return }
+
+		history = append(history, api.RequestMessage{Role: "user", Content: cleanedInput})
+		apiRequest := api.OpenAIRequest{
+			Model:     "Qwen3-0.6B-w4a32kai",
+			Messages:  history,
+			Stream:    true,
+			SessionID: currentSessionID, 
+		}
+		requestBody, _ := json.Marshal(apiRequest)
+
+		req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("Accept", "text/event-stream")
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			log.Printf("ERROR: Request failed: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			bodyBytes, _ := io.ReadAll(resp.Body)
+			log.Printf("ERROR: Server returned status %s: %s", resp.Status, string(bodyBytes))
+			history = history[:len(history)-1]
+			continue
+		}
+
+		sessionIDFromHeader := resp.Header.Get("X-Session-ID")
+		if sessionIDFromHeader != "" && currentSessionID != sessionIDFromHeader {
+			currentSessionID = sessionIDFromHeader
+			log.Printf("[Session Manager] New session established. ID: %s", currentSessionID)
+		}
+
+		var fullResponse strings.Builder
+		scanner := bufio.NewScanner(resp.Body)
+		fmt.Print("Assistant: ")
+		for scanner.Scan() {
+			line := scanner.Text()
+			if strings.HasPrefix(line, "data: ") {
+				jsonData := strings.TrimPrefix(line, "data: ")
+				if jsonData == "[DONE]" { break }
+				var chunk api.OpenAIResponseChunk
+				if json.Unmarshal([]byte(jsonData), &chunk) == nil && len(chunk.Choices) > 0 {
+					content := chunk.Choices[0].Delta.Content
+					fmt.Print(content)
+					fullResponse.WriteString(content)
+				}
+			}
+		}
+		fmt.Println()
+		if err := scanner.Err(); err != nil { log.Printf("ERROR reading stream: %v", err) }
+		history = append(history, api.RequestMessage{Role: "assistant", Content: fullResponse.String()})
+	}
+}
\ No newline at end of file
diff --git a/mllm-cli/cmd/mllm-server/main.go b/mllm-cli/cmd/mllm-server/main.go
new file mode 100644
index 000000000..fc08b5a9e
--- /dev/null
+++ b/mllm-cli/cmd/mllm-server/main.go
@@ -0,0 +1,72 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package main
+
+import (
+	"context"
+	"flag"
+	"log"
+	"mllm-cli/mllm"
+	pkgmllm "mllm-cli/pkg/mllm"
+	"mllm-cli/pkg/server"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"syscall"
+	"time"
+)
+
+func main() {
+	modelPath := flag.String("model-path", "", "Path to the MLLM model directory.")
+	flag.Parse()
+
+	if *modelPath == "" {
+		log.Fatal("FATAL: --model-path argument is required.")
+	}
+
+	if !mllm.InitializeContext() {
+		log.Fatal("FATAL: InitializeContext failed!")
+	}
+	mllm.SetLogLevel(2)
+	if !mllm.StartService(4) {
+		log.Fatal("FATAL: StartService failed!")
+	}
+	defer mllm.StopService()
+	defer mllm.ShutdownContext()
+
+	mllmService := pkgmllm.NewService()
+
+	log.Printf("Loading model and creating session from: %s", *modelPath)
+	session, err := mllm.NewSession(*modelPath)
+	if err != nil {
+		log.Fatalf("FATAL: Failed to create session: %v", err)
+	}
+
+	sessionID := filepath.Base(*modelPath)
+	if !session.Insert(sessionID) {
+		session.Close()
+		log.Fatalf("FATAL: Failed to insert session with ID '%s'", sessionID)
+	}
+	mllmService.RegisterSession(sessionID, session)
+	log.Printf("Session created and registered successfully with ID: %s", sessionID)
+
+	httpServer := server.NewServer(":8080", mllmService)
+
+	go httpServer.Start()
+
+	shutdownChan := make(chan os.Signal, 1)
+	signal.Notify(shutdownChan, syscall.SIGINT, syscall.SIGTERM)
+	<-shutdownChan
+
+	log.Println("Received shutdown signal. Starting graceful shutdown...")
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := httpServer.Shutdown(ctx); err != nil {
+		log.Printf("HTTP server shutdown failed: %v", err)
+	}
+
+	mllmService.Shutdown()
+
+	log.Println("Server gracefully stopped.")
+}
\ No newline at end of file
diff --git a/mllm-cli/go.mod b/mllm-cli/go.mod
index 016841bfa..8064a07ab 100644
--- a/mllm-cli/go.mod
+++ b/mllm-cli/go.mod
@@ -1,8 +1,11 @@
 module mllm-cli
 
-go 1.25.0
+go 1.22.2
 
-require github.com/charmbracelet/bubbles v0.21.0
+require (
+	github.com/charmbracelet/bubbles v0.21.0
+	github.com/gorilla/websocket v1.5.3 //
+)
 
 require (
 	github.com/atotto/clipboard v0.1.4 // indirect
@@ -28,4 +31,5 @@ require (
 	golang.org/x/sys v0.35.0 // indirect
 	golang.org/x/term v0.34.0
 	golang.org/x/text v0.3.8 // indirect
+	github.com/google/uuid v1.6.0
 )
diff --git a/mllm-cli/mllm/c.go b/mllm-cli/mllm/c.go
index 6812e36c9..27f02f09e 100644
--- a/mllm-cli/mllm/c.go
+++ b/mllm-cli/mllm/c.go
@@ -4,16 +4,31 @@
 package mllm
 
 /*
-#cgo CFLAGS: -fPIC -I${SRCDIR}/SDK/include/
 #cgo CFLAGS: -std=c11
-#cgo LDFLAGS: -L${SRCDIR}/SDK/lib/
-#cgo LDFLAGS: -lMllmSdkC
-#cgo LDFLAGS: -Wl,-rpath ${SRCDIR}/SDK/lib
+#cgo LDFLAGS: -lMllmSdkC -lMllmRT -lMllmCPUBackend
 
 #include <mllm/mllm-c.h>
 #include <stdlib.h>
+
+static void* MllmCAny_get_v_custom_ptr(MllmCAny handle) {
+    return handle.v_custom_ptr;
+}
+
+static MllmCAny MllmCAny_set_v_custom_ptr_null(MllmCAny handle) {
+    handle.v_custom_ptr = NULL;
+    return handle;
+}
 */
 import "C"
+import "unsafe"
+import "fmt"
+import "runtime"
+
+
+type Session struct {
+    cHandle C.MllmCAny
+    sessionID string
+}
 
 func isOk(any C.MllmCAny) bool {
 	return C.isOk(any) == 1
@@ -26,3 +41,88 @@ func InitializeContext() bool {
 func ShutdownContext() bool {
 	return isOk(C.shutdownContext())
 }
+
+func StartService(workerThreads int) bool {
+    result := C.startService(C.size_t(workerThreads))
+    return isOk(result)
+}
+
+func StopService() bool {
+    result := C.stopService()
+    return isOk(result)
+}
+
+func SetLogLevel(level int) {
+    C.setLogLevel(C.int(level))
+}
+
+func NewSession(modelPath string) (*Session, error) {
+    cModelPath := C.CString(modelPath)
+    defer C.free(unsafe.Pointer(cModelPath))
+
+    handle := C.createQwen3Session(cModelPath)
+    if !isOk(handle) {
+        return nil, fmt.Errorf("底层C API createQwen3Session 失败")
+    }
+    s := &Session{cHandle: handle}
+    runtime.SetFinalizer(s, func(s *Session) {
+        fmt.Println("[Go Finalizer] Mllm Session automatically released.") 
+        C.freeSession(s.cHandle)
+    })
+
+    return s, nil
+}
+
+func (s *Session) Close() {
+    if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
+        fmt.Println("[Go Close] Mllm Session manually closed.") 
+        C.freeSession(s.cHandle)
+        s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
+        runtime.SetFinalizer(s, nil)
+    }
+}
+
+func (s *Session) Insert(sessionID string) bool {
+    cSessionID := C.CString(sessionID)
+    defer C.free(unsafe.Pointer(cSessionID))
+    result := C.insertSession(cSessionID, s.cHandle)
+    if isOk(result) {
+        s.sessionID = sessionID 
+    }
+    return isOk(result)
+}
+
+func (s *Session) SendRequest(jsonRequest string) bool {
+    if s.sessionID == "" {
+        fmt.Println("[Go SendRequest] Error: sessionID is not set on this session.")
+        return false 
+    }
+    cSessionID := C.CString(s.sessionID)
+    cJsonRequest := C.CString(jsonRequest)
+    defer C.free(unsafe.Pointer(cSessionID))
+    defer C.free(unsafe.Pointer(cJsonRequest))
+
+    result := C.sendRequest(cSessionID, cJsonRequest)
+    return isOk(result)
+}
+
+func (s *Session) PollResponse(requestID string) string { 
+    if requestID == "" {
+        fmt.Println("[Go PollResponse] Error: requestID cannot be empty.")
+        return ""
+    }
+    cRequestID := C.CString(requestID)
+    defer C.free(unsafe.Pointer(cRequestID))
+
+    cResponse := C.pollResponse(cRequestID) 
+    if cResponse == nil {
+        return ""
+    }
+    defer C.freeResponseString(cResponse)
+    
+    return C.GoString(cResponse)
+}
+
+func (s *Session) SessionID() string {
+    return s.sessionID
+}
\ No newline at end of file
diff --git a/mllm-cli/pkg/api/types.go b/mllm-cli/pkg/api/types.go
new file mode 100644
index 000000000..d922773e2
--- /dev/null
+++ b/mllm-cli/pkg/api/types.go
@@ -0,0 +1,33 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package api
+
+type RequestMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type OpenAIRequest struct {
+	Model          string           `json:"model"`
+	Messages       []RequestMessage `json:"messages"`
+	Stream         bool             `json:"stream"`
+	EnableThinking bool             `json:"enable_thinking,omitempty"` 
+	Thinking       bool             `json:"thinking,omitempty"`       // <-- 新增此行，用于接收客户端可能发送的 "thinking": true
+	SessionID      string           `json:"session_id,omitempty"`     
+}
+
+type ResponseDelta struct {
+	Content string `json:"content"`
+}
+
+type ResponseChoice struct {
+	Delta ResponseDelta `json:"delta"`
+}
+
+type OpenAIResponseChunk struct {
+	ID      string           `json:"id"`
+	Object  string           `json:"object"`
+	Created int64            `json:"created"`
+	Model   string           `json:"model"`
+	Choices []ResponseChoice `json:"choices"`
+}
\ No newline at end of file
diff --git a/mllm-cli/pkg/mllm/service.go b/mllm-cli/pkg/mllm/service.go
new file mode 100644
index 000000000..a57e5da06
--- /dev/null
+++ b/mllm-cli/pkg/mllm/service.go
@@ -0,0 +1,52 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package mllm
+
+import (
+	"fmt"
+	"log"
+	"mllm-cli/mllm"
+	"sync"
+)
+
+type Service struct {
+	sessions map[string]*mllm.Session
+	mutex    sync.Mutex
+}
+
+func NewService() *Service {
+	return &Service{
+		sessions: make(map[string]*mllm.Session),
+	}
+}
+
+func (s *Service) RegisterSession(id string, session *mllm.Session) {
+	s.mutex.Lock()
+	defer s.mutex.Unlock()
+	s.sessions[id] = session
+}
+
+func (s *Service) GetSession(sessionID string) (*mllm.Session, error) {
+	s.mutex.Lock()
+	defer s.mutex.Unlock()
+
+	if session, ok := s.sessions[sessionID]; ok {
+		log.Printf("Found pre-registered session for model: %s", sessionID)
+		return session, nil
+	}
+
+	return nil, fmt.Errorf("session for model '%s' not found. Is the server started with the correct --model-path?", sessionID)
+}
+
+func (s *Service) Shutdown() {
+	s.mutex.Lock()
+	defer s.mutex.Unlock()
+
+	log.Println("Shutting down all active sessions...")
+	for id, session := range s.sessions {
+		log.Printf("Closing session: %s", id)
+		session.Close()
+	}
+	s.sessions = make(map[string]*mllm.Session)
+	log.Println("All sessions have been shut down.")
+}
\ No newline at end of file
diff --git a/mllm-cli/pkg/server/handlers.go b/mllm-cli/pkg/server/handlers.go
new file mode 100644
index 000000000..a15946fd7
--- /dev/null
+++ b/mllm-cli/pkg/server/handlers.go
@@ -0,0 +1,94 @@
+package server
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+
+	"github.com/google/uuid"
+)
+
+func (s *Server) chatCompletionsHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			http.Error(w, "Only POST method is allowed", http.StatusMethodNotAllowed)
+			return
+		}
+
+		var requestPayload map[string]interface{}
+		if err := json.NewDecoder(r.Body).Decode(&requestPayload); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		modelName, _ := requestPayload["model"].(string)
+		session, err := s.mllmService.GetSession(modelName)
+		if err != nil {
+			log.Printf("ERROR: Could not get session for model '%s': %v", modelName, err)
+			http.Error(w, fmt.Sprintf("Model '%s' is not available on this server.", modelName), http.StatusNotFound)
+			return
+		}
+
+		requestPayload["model"] = session.SessionID()
+
+		requestID, ok := requestPayload["id"].(string)
+		if !ok || requestID == "" {
+			newID := uuid.New().String()
+			log.Printf("Client did not provide a request ID. Generated a new one: %s", newID)
+			requestID = newID
+			requestPayload["id"] = newID
+		}
+
+		requestBytes, err := json.Marshal(requestPayload)
+		if err != nil {
+			http.Error(w, "Failed to re-marshal request payload", http.StatusInternalServerError)
+			return
+		}
+		if !session.SendRequest(string(requestBytes)) {
+			http.Error(w, "Failed to process request by the model", http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache")
+		w.Header().Set("Connection", "keep-alive")
+		flusher, _ := w.(http.Flusher)
+
+		log.Printf("Streaming response for session %s (Request ID: %s)...", session.SessionID(), requestID)
+		
+		for {
+			if r.Context().Err() != nil {
+				log.Printf("Client disconnected. Stopping stream for %s.", session.SessionID())
+				break
+			}
+
+			
+			rawResponse := session.PollResponse(requestID)
+
+			if rawResponse == "" {
+				log.Println("Received empty response from poll, assuming stream has ended.")
+				break
+			}
+			
+			fmt.Fprintf(w, "data: %s\n\n", rawResponse)
+			flusher.Flush()
+
+			var responseChunk map[string]interface{}
+			if json.Unmarshal([]byte(rawResponse), &responseChunk) == nil {
+				if choices, ok := responseChunk["choices"].([]interface{}); ok && len(choices) > 0 {
+					if choice, ok := choices[0].(map[string]interface{}); ok {
+						if reason, ok := choice["finish_reason"].(string); ok && reason == "stop" {
+							log.Println("End of stream detected: finish_reason is 'stop'.")
+							break
+						}
+					}
+				}
+			}
+		}
+
+		fmt.Fprintf(w, "data: [DONE]\n\n")
+		flusher.Flush()
+		log.Printf("Finished streaming for session %s (Request ID: %s).", session.SessionID(), requestID)
+	}
+}
\ No newline at end of file
diff --git a/mllm-cli/pkg/server/server.go b/mllm-cli/pkg/server/server.go
new file mode 100644
index 000000000..98a171a94
--- /dev/null
+++ b/mllm-cli/pkg/server/server.go
@@ -0,0 +1,42 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package server
+
+import (
+	"context"
+	"log"
+	"mllm-cli/pkg/mllm"
+	"net/http"
+)
+
+type Server struct {
+	httpServer  *http.Server
+	mllmService *mllm.Service
+}
+
+func NewServer(addr string, mllmService *mllm.Service) *Server {
+	mux := http.NewServeMux()
+	
+	s := &Server{
+		httpServer: &http.Server{
+			Addr:    addr,
+			Handler: mux,
+		},
+		mllmService: mllmService,
+	}
+
+	mux.HandleFunc("/v1/chat/completions", s.chatCompletionsHandler())
+	return s
+}
+
+func (s *Server) Start() {
+	log.Printf("OpenAI-compatible API server listening on %s", s.httpServer.Addr)
+	if err := s.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+		log.Fatalf("FATAL: Could not start HTTP server: %v", err)
+	}
+}
+
+func (s *Server) Shutdown(ctx context.Context) error {
+	log.Println("Shutting down HTTP server...")
+	return s.httpServer.Shutdown(ctx)
+}
\ No newline at end of file
diff --git a/mllm/c_api/Object.h b/mllm/c_api/Object.h
index 2db92771e..42a010788 100644
--- a/mllm/c_api/Object.h
+++ b/mllm/c_api/Object.h
@@ -37,37 +37,41 @@ typedef enum {
   kBuiltinContainerList = 260,
   kBuiltinContainer_End = 512,
 
+  // Custom Object
+  kCustomObject = 513,
+
 #ifdef __cplusplus
 };
 #else
 } MllmCType;
 #endif
 
-typedef struct {           // NOLINT
+typedef struct {     // NOLINT
   uint32_t type_id;        // 4B
   int64_t strong_ref_ptr;  // 8B
   uint32_t weak_ref_ptr;   // 4B
 } MllmCObject;
 
-typedef struct {  // NOLINT
+typedef struct {   // NOLINT
   char* data;
   size_t size;
 } MllmCByteArrayObject;
 
-typedef struct {     // NOLINT
+typedef struct {   // NOLINT
   uint32_t type_id;  // 4B
-  union {            // 8B
+  union {          // 8B
     int64_t v_int64;
     double v_fp64;
     int32_t v_bool;
     int32_t v_return_code;
     MllmCObject* v_object;
     void* v_bare_ptr;
+    void* v_custom_ptr; 
   };
 } MllmCAny;
 
 #ifdef __cplusplus
 }
-#endif  //! __cplusplus
+#endif // !__cplusplus
 
-#endif  //! MLLM_C_API_OBJECT_H_
+#endif // ! MLLM_C_API_OBJECT_H_
\ No newline at end of file
diff --git a/mllm/c_api/Runtime.cpp b/mllm/c_api/Runtime.cpp
index 7dbc31864..b116debbe 100644
--- a/mllm/c_api/Runtime.cpp
+++ b/mllm/c_api/Runtime.cpp
@@ -2,6 +2,17 @@
 // Licensed under the MIT License.
 #include "mllm/mllm.hpp"
 #include "mllm/c_api/Runtime.h"
+#include "mllm/engine/service/Service.hpp"
+#include "mllm/models/qwen3/modeling_qwen3_service.hpp"
+#include <memory>
+#include <string>
+#include <vector>
+#include <map>
+#include <string.h> // for strncpy
+
+struct MllmSessionWrapper {
+    std::shared_ptr<mllm::service::Session> session_ptr;
+};
 
 //===----------------------------------------------------------------------===//
 // Mllm main function
@@ -22,21 +33,143 @@ MllmCAny memoryReport() {
 }
 
 int32_t isOk(MllmCAny ret) {
-  if (ret.type_id == kRetCode && ret.v_return_code == 0) { return true; }
+  if (ret.type_id == kRetCode && ret.v_return_code == 0)
+      return true;
+  if (ret.type_id == kCustomObject && ret.v_custom_ptr != nullptr)
+      return true;
   return false;
 }
 
+
 //===----------------------------------------------------------------------===//
 // Mllm wrapper functions
 //===----------------------------------------------------------------------===//
 MllmCAny convert2String(char* ptr, size_t size) {
   // TODO
+  return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
 }
 
 MllmCAny convert2ByteArray(char* ptr, size_t size) {
   // TODO
+  return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
 }
 
 MllmCAny convert2Int(int64_t v) { return MllmCAny{.type_id = kInt, .v_int64 = v}; }
 
 MllmCAny convert2Float(double v) { return MllmCAny{.type_id = kFloat, .v_fp64 = v}; }
+
+//===----------------------------------------------------------------------===//
+// Mllm service functions
+//===----------------------------------------------------------------------===//
+
+MllmCAny startService(size_t worker_threads) {
+    mllm::service::startService(worker_threads);
+    return MllmCAny{.type_id = kRetCode, .v_return_code = 0};
+}
+
+MllmCAny stopService() {
+    mllm::service::stopService();
+    return MllmCAny{.type_id = kRetCode, .v_return_code = 0};
+}
+
+void setLogLevel(int level) {
+    mllm::setLogLevel(static_cast<mllm::LogLevel>(level));
+}
+
+MllmCAny createQwen3Session(const char* model_path) {
+    if (model_path == nullptr) {
+        printf("[C++ Service] createQwen3Session error: invalid arguments.\n");
+        return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
+    }
+    try {
+        auto qwen3_session = std::make_shared<mllm::models::qwen3::Qwen3Session>();
+        qwen3_session->fromPreTrain(model_path);
+
+        auto* handle = new MllmSessionWrapper();
+        handle->session_ptr = qwen3_session;
+
+        return MllmCAny{.type_id = kCustomObject, .v_custom_ptr = handle};
+    } catch (const std::exception& e) {
+        printf("[C++ Service] createQwen3Session exception: %s\n", e.what());
+        return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
+    }
+}
+
+MllmCAny insertSession(const char* session_id, MllmCAny handle) {
+    if (session_id == nullptr || handle.type_id != kCustomObject || handle.v_custom_ptr == nullptr) {
+        printf("[C++ Service] insertSession error: invalid arguments.\n");
+        return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
+    }
+
+    auto* session_wrapper = reinterpret_cast<MllmSessionWrapper*>(handle.v_custom_ptr);
+    mllm::service::insertSession(std::string(session_id), session_wrapper->session_ptr);
+    return MllmCAny{.type_id = kRetCode, .v_return_code = 0};
+}
+
+MllmCAny freeSession(MllmCAny handle) {
+    if (handle.type_id != kCustomObject || handle.v_custom_ptr == nullptr) {
+        printf("[C++ Service] freeSession error: invalid arguments.\n");
+        return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
+    }
+
+    auto* session_wrapper = reinterpret_cast<MllmSessionWrapper*>(handle.v_custom_ptr);
+    delete session_wrapper;
+    return MllmCAny{.type_id = kRetCode, .v_return_code = 0};
+}
+
+
+MllmCAny sendRequest(const char* session_id, const char* json_request) {
+    if (session_id == nullptr || json_request == nullptr) {
+        printf("[C++ Service] sendRequest error: invalid arguments.\n");
+        return MllmCAny{.type_id = kRetCode, .v_return_code = -1};
+    }
+    int status = mllm::service::sendRequest(std::string(json_request));
+    return MllmCAny{.type_id = kRetCode, .v_return_code = status};
+}
+
+const char* pollResponse(const char* session_id) {
+    if (session_id == nullptr) {
+        return nullptr;
+    }
+
+    std::string request_id = std::string(session_id);
+    mllm::service::Response response = mllm::service::getResponse(request_id);
+
+    if (response.empty()) {
+        return nullptr;
+    }
+
+    bool finished = false;
+    try {
+        nlohmann::json j = nlohmann::json::parse(response);
+
+
+        if (j.contains("choices")) {
+            if (j["choices"].is_array() && !j["choices"].empty()) {
+                const auto& first_choice = j["choices"][0];
+                if (first_choice.contains("finish_reason") && first_choice["finish_reason"] == "stop") {
+                    finished = true;
+                }
+            }
+        }
+
+    } catch (const nlohmann::json::parse_error& e) {
+        printf("[C++ Service] pollResponse JSON parse error: %s\n", e.what());
+        return nullptr;
+    }
+
+    if (finished) {
+        return nullptr; 
+    }
+
+    char* c_response = new char[response.length() + 1];
+    strncpy(c_response, response.c_str(), response.length() + 1);
+    
+    return c_response;
+}
+
+void freeResponseString(const char* response_str) {
+    if (response_str != nullptr) {
+        delete[] response_str;
+    }
+}
\ No newline at end of file
diff --git a/mllm/c_api/Runtime.h b/mllm/c_api/Runtime.h
index 91bb9d285..9b18b0259 100644
--- a/mllm/c_api/Runtime.h
+++ b/mllm/c_api/Runtime.h
@@ -31,6 +31,26 @@ MllmCAny convert2ByteArray(char* ptr, size_t size);
 MllmCAny convert2Int(int64_t v);
 
 MllmCAny convert2Float(double v);
+//===----------------------------------------------------------------------===//
+// Mllm service functions
+//===----------------------------------------------------------------------===//
+MllmCAny startService(size_t worker_threads);
+
+MllmCAny stopService();
+
+void setLogLevel(int level);
+
+MllmCAny createQwen3Session(const char* model_path);
+
+MllmCAny insertSession(const char* session_id, MllmCAny handle);
+
+MllmCAny freeSession(MllmCAny handle);
+
+MllmCAny sendRequest(const char* session_id, const char* json_request);
+
+const char* pollResponse(const char* session_id);
+
+void freeResponseString(const char* response_str);
 
 #ifdef __cplusplus
 }
diff --git a/task.py b/task.py
index e3932c086..9b283b664 100644
--- a/task.py
+++ b/task.py
@@ -512,6 +512,18 @@ def run(self):
 
         logging.info("MLLM CLI Build Task Completed")
 
+class ShellCommandTask(Task):
+    def __init__(self, config):
+        super().__init__(config)
+
+    def run(self):
+        logging.info("Generic Shell Command Task Start (re-enabled)...")
+        command_str = self.config.get("command", "")
+        if not command_str:
+            logging.error("No command provided in ShellCommandTask.")
+            return
+
+        throw_error_if_failed(os.system(command_str))
 
 TASKS = {
     "CMakeConfigTask": CMakeConfigTask,
@@ -523,6 +535,7 @@ def run(self):
     "BuildDocTask": BuildDocTask,
     "HexagonMakeTask": HexagonMakeTask,
     "MllmCliBuildTask": MllmCliBuildTask,
+    "ShellCommandTask": ShellCommandTask, 
 }
 
 
diff --git a/tasks/build_android_debug.yaml b/tasks/build_android_debug.yaml
new file mode 100644
index 000000000..05bd03b01
--- /dev/null
+++ b/tasks/build_android_debug.yaml
@@ -0,0 +1,17 @@
+Tasks:
+  - CMakeConfigTask:
+      cmake_cfg_path: "build-android-arm64-v8a"
+      cmake_build_type: "Debug"
+      cmake_toolchain_file: "$ANDROID_NDK_PATH/build/cmake/android.toolchain.cmake"
+      cmake_extra_args:
+        - "-DMLLM_CROSS_COMPILE=ON"
+        - "-DMLLM_BUILD_ARM_BACKEND=ON"
+        - "-DANDROID_PLATFORM=android-28"
+        - "-DANDROID_ABI=arm64-v8a"
+        - '-DMLLM_CPU_BACKEND_COMPILE_OPTIONS="-march=armv8.2-a+fp16+fp16fml+dotprod+i8mm;-ffast-math;-Wno-nan-infinity-disabled"'
+        - "-DCMAKE_INSTALL_PREFIX=/root/mllm-install-android-arm64-v8a"
+        - "-DMLLM_KERNEL_USE_THREADS=ON"
+        - "-DMLLM_KERNEL_THREADS_VENDOR_OPENMP=ON"
+
+  - CMakeBuildTask:
+      cmake_cfg_path: "build-android-arm64-v8a"
diff --git a/tasks/build_android_go_dialog_test.yaml b/tasks/build_android_go_dialog_test.yaml
new file mode 100644
index 000000000..d53c7e78b
--- /dev/null
+++ b/tasks/build_android_go_dialog_test.yaml
@@ -0,0 +1,21 @@
+Tasks:
+  - ShellCommandTask:
+      command: |
+        echo "===== STEP 2: Setting up Go cross-compilation environment... ====="
+        export GOPROXY=https://goproxy.cn,direct
+        export GOOS=android
+        export GOARCH=arm64
+        export CGO_ENABLED=1
+        export ANDROID_NDK_HOME=/opt/ndk/android-ndk-r28b
+        export CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang
+        export CXX=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang++
+
+       
+        echo "===== STEP 3: Setting CGo linker and compiler flags... ====="
+        export CGO_LDFLAGS="-L/root/zty_workspace/mllm_zty/build-android-arm64-v8a/bin"
+        export CGO_CFLAGS="-I/root/zty_workspace/mllm_zty"
+        
+     
+        echo "===== STEP 4: Compiling Go test application... ====="
+        cd mllm-cli && \
+        go build -o ../build-android-arm64-v8a/bin/go_dialog_test_runner -tags=mobile ./main.go
\ No newline at end of file
diff --git a/tasks/build_android_mllm_client.yaml b/tasks/build_android_mllm_client.yaml
new file mode 100644
index 000000000..9c10e0733
--- /dev/null
+++ b/tasks/build_android_mllm_client.yaml
@@ -0,0 +1,22 @@
+Tasks:
+ - ShellCommandTask:
+     command: |
+      echo "===== STEP 2: Setting up Go cross-compilation environment... ====="
+      export GOPROXY=https://goproxy.cn,direct
+      export GOOS=android
+      export GOARCH=arm64
+      export CGO_ENABLED=1
+      export ANDROID_NDK_HOME=/opt/ndk/android-ndk-r28b
+      export CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang
+      export CXX=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang++
+      
+      echo "===== STEP 3: Setting CGo linker and compiler flags... ====="
+      export CGO_LDFLAGS="-L/root/zty_workspace/mllm_zty/build-android-arm64-v8a/bin"
+      export CGO_CFLAGS="-I/root/zty_workspace/mllm_zty"
+      
+      echo "===== STEP 4: Compiling Go MLLM WebSocket Client... ====="
+      cd mllm-cli && \
+      echo "Running 'go mod tidy' to ensure all dependencies are present..." && \
+      go mod tidy && \
+      echo "Starting client build..." && \
+      go build -o ../build-android-arm64-v8a/bin/mllm_ws_client -tags=mobile ./cmd/mllm-client/main.go
\ No newline at end of file
diff --git a/tasks/build_android_mllm_server.yaml b/tasks/build_android_mllm_server.yaml
new file mode 100644
index 000000000..f8d856c65
--- /dev/null
+++ b/tasks/build_android_mllm_server.yaml
@@ -0,0 +1,22 @@
+Tasks:
+ - ShellCommandTask:
+     command: |
+      echo "===== STEP 2: Setting up Go cross-compilation environment... ====="
+      export GOPROXY=https://goproxy.cn,direct
+      export GOOS=android
+      export GOARCH=arm64
+      export CGO_ENABLED=1
+      export ANDROID_NDK_HOME=/opt/ndk/android-ndk-r28b
+      export CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang
+      export CXX=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang++
+      
+      echo "===== STEP 3: Setting CGo linker and compiler flags... ====="
+      export CGO_LDFLAGS="-L/root/zty_workspace/mllm_zty/build-android-arm64-v8a/bin"
+      export CGO_CFLAGS="-I/root/zty_workspace/mllm_zty"
+      
+      echo "===== STEP 4: Compiling Go MLLM Web Server... ====="
+      cd mllm-cli && \
+      echo "Running 'go mod tidy' to sync dependencies..." && \
+      go mod tidy && \
+      echo "Starting build..." && \
+      go build -o ../build-android-arm64-v8a/bin/mllm_web_server -tags=mobile ./cmd/mllm-server/main.go
\ No newline at end of file