CraftOS-dev · ahmad-ajmal · Apr 9, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/README.md b/README.md
@@ -186,6 +186,18 @@ python run.py --gui
 | `--cli` | Run in **CLI** mode (lightweight) |
 | `--gui` | Enable GUI automation mode (requires `install.py --gui` first) |
 
+### service.py
+
+| Command | Description |
+|---------|-------------|
+| `install` | Install deps, register auto-start, and start CraftBot |
+| `start` | Start CraftBot in the background |
+| `stop` | Stop CraftBot |
+| `restart` | Stop then start |
+| `status` | Show running status and auto-start state |
+| `logs [-n N]` | Show last N log lines (default: 50) |
+| `uninstall` | Remove auto-start registration |
+
 **Installation Examples:**
 ```bash
 # Simple pip installation (no conda)
@@ -247,6 +259,39 @@ python run.py --gui
 conda run -n craftbot python run.py
 ```
 
+### 🔧 Background Service (Recommended)
+
+Run CraftBot as a background service so it stays running even after you close the terminal. A desktop shortcut is created automatically so you can reopen the browser anytime.
+
+```bash
+# Install dependencies, register auto-start on login, and start CraftBot
+python service.py install
+```
+
+That's it. The terminal closes itself, CraftBot runs in the background, and the browser opens automatically.
+
+```bash
+# Other service commands:
+python service.py start    # Start CraftBot in background
+python service.py status   # Check if it's running
+python service.py stop     # Stop CraftBot
+python service.py restart  # Restart CraftBot
+python service.py logs     # See recent log output
+```
+
+| Command | Description |
+|---------|-------------|
+| `python service.py install` | Install dependencies, register auto-start on login, start CraftBot, open browser, and close the terminal automatically |
+| `python service.py start` | Start CraftBot in the background — auto-restarts if already running (terminal closes automatically) |
+| `python service.py stop` | Stop CraftBot |
+| `python service.py restart` | Stop and start CraftBot |
+| `python service.py status` | Check if CraftBot is running and if auto-start is enabled |
+| `python service.py logs` | Show recent log output (`-n 100` for more lines) |
+| `python service.py uninstall` | Stop CraftBot, remove auto-start registration, uninstall pip packages, and purge pip cache |
+
+> [!TIP]
+> After `service.py start` or `service.py install`, a **CraftBot desktop shortcut** is created automatically. If you accidentally close the browser, just double-click the shortcut to reopen it.
+
 > [!NOTE]
 > **Installation:** The installer now provides clear guidance if dependencies are missing. If Node.js is not found, you'll be prompted to install it or can switch to TUI mode. Installation automatically detects GPU availability and falls back to CPU-only mode if needed.
 

diff --git a/agent_core/core/embedding_interface.py b/agent_core/core/embedding_interface.py
@@ -148,7 +148,7 @@ def _get_ollama_embedding(self, text: str) -> Optional[List[float]]:
                 "model": self.model,
                 "prompt": text,  # Ollama accepts "prompt" for /api/embeddings
             }
-            url: str = f"{self.remote_url.rstrip('/')}/embeddings"
+            url: str = f"{self.remote_url.rstrip('/')}/api/embeddings"
             response = requests.post(url, json=payload, timeout=120)
             response.raise_for_status()
             result = response.json()

diff --git a/agent_core/core/event_stream/event.py b/agent_core/core/event_stream/event.py
@@ -24,7 +24,7 @@
 
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
-from typing import Optional, List
+from typing import Any, Dict, Optional, List
 
 
 SEVERITIES = ("DEBUG", "INFO", "WARN", "ERROR")
@@ -64,6 +64,32 @@ def display_text(self) -> Optional[str]:
         """
         return self.display_message
 
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize the event to a dictionary for persistence."""
+        return {
+            "message": self.message,
+            "kind": self.kind,
+            "severity": self.severity,
+            "display_message": self.display_message,
+            "ts": self.ts.isoformat(),
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Event":
+        """Deserialize an event from a dictionary."""
+        ts = (
+            datetime.fromisoformat(data["ts"])
+            if isinstance(data.get("ts"), str)
+            else datetime.now(timezone.utc)
+        )
+        return cls(
+            message=data["message"],
+            kind=data["kind"],
+            severity=data["severity"],
+            display_message=data.get("display_message"),
+            ts=ts,
+        )
+
     @property
     def iso_ts(self) -> str:
         """
@@ -92,6 +118,29 @@ class EventRecord:
     repeat_count: int = 1
     _cached_tokens: int | None = field(default=None, repr=False)
 
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize the event record to a dictionary for persistence."""
+        return {
+            "event": self.event.to_dict(),
+            "ts": self.ts.isoformat(),
+            "repeat_count": self.repeat_count,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EventRecord":
+        """Deserialize an event record from a dictionary."""
+        event = Event.from_dict(data["event"])
+        ts = (
+            datetime.fromisoformat(data["ts"])
+            if isinstance(data.get("ts"), str)
+            else datetime.now(timezone.utc)
+        )
+        return cls(
+            event=event,
+            ts=ts,
+            repeat_count=data.get("repeat_count", 1),
+        )
+
     def compact_line(self) -> str:
         """
         Generate a compact single-line representation of this event.

diff --git a/agent_core/core/impl/action/manager.py b/agent_core/core/impl/action/manager.py
@@ -260,6 +260,10 @@ async def execute_action(
                     logger.error(f"[ERROR] Failed to execute divisible action {action.name}: {e}", exc_info=True)
                     raise e
 
+            # Auto-save large base64 strings in action output to temp files
+            # This prevents LLMs from truncating binary data when it appears in context
+            outputs = self._extract_base64_to_files(outputs, action.name)
+
             logger.debug(f"[OUTPUT DATA] Final outputs for action {action.name}: {outputs}")
 
             if status != "error":
@@ -591,3 +595,66 @@ async def run_observe_step(self, action: Action, action_output: Dict) -> Dict[st
             attempt += 1
 
         return {"success": False, "message": "Observation failed or timed out."}
+
+    @staticmethod
+    def _extract_base64_to_files(data: dict, action_name: str) -> dict:
+        """
+        Scan action output for large base64 data URLs and save them to temp files.
+        Replaces the base64 string with the file path so LLMs don't truncate it.
+        """
+        import tempfile
+        import base64
+        import os
+        import re
+
+        if not isinstance(data, dict):
+            return data
+
+        MIN_BASE64_LENGTH = 500  # Only process strings longer than this
+
+        def process_value(key: str, value):
+            if not isinstance(value, str) or len(value) < MIN_BASE64_LENGTH:
+                return value
+
+            # Check for data URL format: data:image/png;base64,iVBOR...
+            match = re.match(r'^data:([\w/+.-]+);base64,(.+)$', value, re.DOTALL)
+            if match:
+                mime_type = match.group(1)
+                b64_data = match.group(2)
+                ext = {
+                    'image/png': '.png',
+                    'image/jpeg': '.jpg',
+                    'image/gif': '.gif',
+                    'image/webp': '.webp',
+                    'application/pdf': '.pdf',
+                }.get(mime_type, '.bin')
+
+                try:
+                    decoded = base64.b64decode(b64_data)
+                    tmp = tempfile.NamedTemporaryFile(
+                        delete=False, suffix=ext,
+                        prefix=f"{action_name}_{key}_",
+                    )
+                    tmp.write(decoded)
+                    tmp.close()
+                    logger.info(f"[ACTION] Saved base64 {key} ({len(b64_data)} chars) to {tmp.name}")
+                    return tmp.name
+                except Exception as e:
+                    logger.warning(f"[ACTION] Failed to extract base64 from {key}: {e}")
+
+            return value
+
+        result = {}
+        for k, v in data.items():
+            if isinstance(v, dict):
+                result[k] = ActionManager._extract_base64_to_files(v, action_name)
+            elif isinstance(v, list):
+                result[k] = [
+                    ActionManager._extract_base64_to_files(item, action_name) if isinstance(item, dict)
+                    else process_value(k, item) if isinstance(item, str)
+                    else item
+                    for item in v
+                ]
+            else:
+                result[k] = process_value(k, v)
+        return result
diff --git a/agent_core/core/impl/action/router.py b/agent_core/core/impl/action/router.py
@@ -16,6 +16,7 @@
 from agent_core.core.protocols.context import ContextEngineProtocol
 from agent_core.core.protocols.llm import LLMInterfaceProtocol
 from agent_core.core.impl.llm import LLMCallType
+from agent_core.core.impl.llm.errors import LLMConsecutiveFailureError
 from agent_core.core.prompts import (
     SELECT_ACTION_PROMPT,
     SELECT_ACTION_IN_TASK_PROMPT,
@@ -538,7 +539,7 @@ async def _prompt_for_decision(
             # agent_info is included for all modes to provide consistent agent context
             system_prompt, _ = self.context_engine.make_prompt(
                 user_flags={"query": False, "expected_output": False},
-                system_flags={"agent_info": True, "policy": False},
+                system_flags={"agent_info": True},
             )
 
             raw_response = None
@@ -620,6 +621,9 @@ async def _prompt_for_decision(
                     f"{raw_response} | error={feedback_error}"
                 )
                 current_prompt = self._augment_prompt_with_feedback(prompt, attempt + 1, raw_response, feedback_error)
+            except LLMConsecutiveFailureError:
+                # Fatal: LLM is in a broken state - re-raise immediately, do not retry
+                raise
             except RuntimeError as e:
                 # LLM provider error (empty response, API error, auth failure, etc.)
                 error_msg = str(e)
@@ -633,8 +637,8 @@ async def _prompt_for_decision(
                     raise last_error
                 # Otherwise, retry with more context in the prompt
                 current_prompt = self._augment_prompt_with_feedback(
-                    prompt, attempt + 1, 
-                    f"[LLM ERROR] {error_msg}", 
+                    prompt, attempt + 1,
+                    f"[LLM ERROR] {error_msg}",
                     "LLM provider failed - retrying"
                 )
             except Exception as e:

diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py
@@ -24,6 +24,7 @@
     AGENT_FILE_SYSTEM_CONTEXT_PROMPT,
     POLICY_PROMPT,
     USER_PROFILE_PROMPT,
+    SOUL_PROMPT,
     LANGUAGE_INSTRUCTION,
 )
 from agent_core.core.state import get_state, get_session_or_none
@@ -225,6 +226,21 @@ def create_system_user_profile(self) -> str:
 
         return ""
 
+    def create_system_soul(self) -> str:
+        """Create a system message block with agent soul/personality from SOUL.md."""
+        try:
+            from app.config import AGENT_FILE_SYSTEM_PATH
+            soul_md_path = AGENT_FILE_SYSTEM_PATH / "SOUL.md"
+
+            if soul_md_path.exists():
+                content = soul_md_path.read_text(encoding="utf-8").strip()
+                if content:
+                    return SOUL_PROMPT.format(soul_content=content)
+        except Exception as e:
+            logger.warning(f"[CONTEXT] Failed to read SOUL.md: {e}")
+
+        return ""
+
     def create_system_language_instruction(self) -> str:
         """Create a system message block with language instruction.
 
@@ -683,6 +699,7 @@ def make_prompt(
             "role_info": True,
             "agent_info": True,
             "user_profile": True,
+            "soul": True,
             "language_instruction": True,
             "policy": True,
             "environment": True,
@@ -700,6 +717,7 @@ def make_prompt(
         system_sections = [
             ("agent_info", self.create_system_agent_info),
             ("user_profile", self.create_system_user_profile),
+            ("soul", self.create_system_soul),
             ("language_instruction", self.create_system_language_instruction),
             ("policy", self.create_system_policy),
             ("role_info", self.create_system_role_info),

diff --git a/agent_core/core/impl/event_stream/__init__.py b/agent_core/core/impl/event_stream/__init__.py
@@ -9,10 +9,12 @@
 # Re-export data classes from existing location
 from agent_core.core.event_stream.event import Event, EventRecord
 
+# Token utilities (canonical location: agent_core.utils.token)
+from agent_core.utils.token import count_tokens
+
 # Implementation classes
 from agent_core.core.impl.event_stream.event_stream import (
     EventStream,
-    count_tokens,
     get_cached_token_count,
     SEVERITIES,
     MAX_EVENT_INLINE_CHARS,

diff --git a/agent_core/core/impl/event_stream/event_stream.py b/agent_core/core/impl/event_stream/event_stream.py
@@ -26,38 +26,12 @@
 from sklearn.feature_extraction.text import TfidfVectorizer
 from agent_core.utils.logger import logger
 from agent_core.decorators import profiler, OperationCategory
+from agent_core.utils.token import count_tokens
 import threading
-import tiktoken
-# Ensure tiktoken extension encodings (cl100k_base, etc.) are registered.
-# Required for tiktoken >= 0.12 and PyInstaller frozen builds.
-try:
-    import tiktoken_ext.openai_public  # noqa: F401
-except ImportError:
-    pass
 
 SEVERITIES = ("DEBUG", "INFO", "WARN", "ERROR")
 MAX_EVENT_INLINE_CHARS = 200000
 
-# Token counting utility
-_tokenizer = None
-
-def _get_tokenizer():
-    """Get or create the tiktoken tokenizer (cached for performance)."""
-    global _tokenizer
-    if _tokenizer is None:
-        try:
-            _tokenizer = tiktoken.get_encoding("cl100k_base")
-        except Exception:
-            # Fallback: use o200k_base if cl100k_base is unavailable
-            _tokenizer = tiktoken.get_encoding("o200k_base")
-    return _tokenizer
-
-def count_tokens(text: str) -> int:
-    """Count the number of tokens in a text string using tiktoken."""
-    if not text:
-        return 0
-    return len(_get_tokenizer().encode(text))
-
 
 def get_cached_token_count(rec: "EventRecord") -> int:
     """Get token count for an EventRecord, using cached value if available.
@@ -281,6 +255,16 @@ def summarize_by_LLM(self) -> None:
         )
 
         try:
+            # Skip LLM call if the LLM is already in a consecutive failure state
+            max_failures = getattr(self.llm, "_max_consecutive_failures", 5)
+            current_failures = getattr(self.llm, "consecutive_failures", 0)
+            if current_failures >= max_failures:
+                logger.warning(
+                    f"[EventStream] Skipping LLM summarization: LLM has {current_failures} "
+                    f"consecutive failures (max={max_failures}). Falling back to prune."
+                )
+                raise RuntimeError("LLM in consecutive failure state, skip summarization")
+
             logger.info(f"[EventStream] Running synchronous summarization ({self._total_tokens} tokens)")
             llm_output = self.llm.generate_response(user_prompt=prompt)
             new_summary = (llm_output or "").strip()
@@ -303,7 +287,17 @@ def summarize_by_LLM(self) -> None:
             logger.info(f"[EventStream] Summarization complete. Tokens: {self._total_tokens}")
 
         except Exception:
-            logger.exception("[EventStream] LLM summarization failed. Keeping all events without summarization.")
+            logger.exception(
+                "[EventStream] LLM summarization failed. "
+                "Pruning oldest events without a summary to prevent retry spam."
+            )
+            # Fallback: drop the oldest chunk without generating a summary so that
+            # _total_tokens falls below the threshold.  Without this, every subsequent
+            # log() call would immediately re-trigger summarization and flood the logs.
+            removed_tokens = sum(get_cached_token_count(r) for r in chunk)
+            self._total_tokens -= removed_tokens
+            self.tail_events = self.tail_events[cutoff:]
+            self._session_sync_points.clear()
 
     # ───────────────────── utilities ─────────────────────