diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c0dc72..4ca4d2f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
+### Fixed
+- **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99)
+- **Strict mode hard system prompt (second layer of defence)** — the SDK-level fix stops the model from emitting *real* tool calls, but cannot stop it from *fabricating* tool output in plain text (e.g. Qwen 7B happily replied "Here is the file content:" followed by invented code because the base prompt said "Always call `read_file` before describing file contents"). Strict now passes a *separate* `lms.Chat` seeded with a dedicated `_STRICT_SYSTEM_PROMPT` that explicitly forbids pretending to read files, fabricating output, or describing tool calls — while still listing what the model *can* do (review pasted code, answer general questions, rubber-duck debugging). `self._chat` is untouched so Tab-switching back to ask/auto keeps the base prompt and full history. (#99)
+
 ---
 
 ## [0.7.0] - 2026-03-26
diff --git a/README.md b/README.md
index ed63f6e..a4fa840 100644
--- a/README.md
+++ b/README.md
@@ -51,7 +51,7 @@ LM Studio   →   lmcode agent   →   your codebase
 - **Tool output panels** — syntax-highlighted file previews, side-by-side diff blocks for edits, IN/OUT panels for shell commands
 - **Ghost-text autocomplete** — fish-shell style: type `/h` → dim `elp` appears, Tab accepts
 - **Persistent history** — Ctrl+R and Up-arrow recall prompts across sessions (`~/.lmcode/history`)
-- **Permission modes** — `ask` (confirm each tool), `auto` (run freely), `strict` (read-only); Tab cycles between them
+- **Permission modes** — `ask` (confirm each tool), `auto` (run freely), `strict` (no tools — pure chat, enforced at SDK level); Tab cycles between them
 - **LMCODE.md** — per-repo context file injected into the system prompt
 - **/compact** — summarises conversation history via the model, resets the chat, and injects the summary as context
 - **/tokens** — session-wide prompt (↑) and generated (↓) token totals with context arc (`◔ 38%  14.2k / 32k tok`)
diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py
index b82ba0b..8e9c190 100644
--- a/src/lmcode/agent/core.py
+++ b/src/lmcode/agent/core.py
@@ -17,6 +17,7 @@
 import json
 import pathlib
 import random
+import time
 from typing import Any
 
 import lmstudio as lms
@@ -157,6 +158,85 @@ def _build_system_prompt() -> str:
     return base
 
 
+#: Hard system prompt for strict mode. The SDK-level fix (routing through
+#: ``model.respond()``) stops the model from *actually* calling tools, but
+#: cannot stop it from *fabricating* tool results in plain text — e.g.
+#: replying "Here is the file content:" followed by invented code. This
+#: prompt is the second layer of defence against that hallucination.
+_STRICT_SYSTEM_PROMPT = """\
+You are lmcode running in **STRICT MODE** — a pure-text chat with NO tools.
+
+<env>
+Working directory: {cwd}
+Platform: {platform}
+</env>
+
+## What you CANNOT do in this turn
+
+You have **no tools available**. You CANNOT:
+- read files or look at their contents
+- write, create, or modify any file
+- run shell commands
+- list directories
+- search the codebase with ripgrep or anything else
+- inspect the filesystem in any way
+
+There is no hidden tool you can invoke. There is no workaround.
+
+## Critical rules — violating these is a hallucination
+
+1. **Never pretend to have read a file.** If the user asks "what's in file
+   X" or "show me file X", you have NOT seen it. Do not invent its
+   contents. Do not reproduce code you "think" is probably there.
+
+2. **Never fabricate tool output.** Do not write phrases like "Here is the
+   content:" followed by code, or "Running that command gives:" followed
+   by output. You did not read anything. You did not run anything.
+
+3. **Never describe a tool call you are about to make.** There are no
+   tools to call. Do not say "I will read the file" — there is no read.
+
+4. **If the question genuinely needs filesystem access, say so honestly.**
+   Tell the user: "I'm in strict mode and have no filesystem access. Press
+   Tab to switch to ask or auto mode, or paste the relevant code snippet
+   directly into the chat and I'll help from there."
+
+## What you CAN do
+
+Strict mode is not "refuse everything" mode. You can still be useful:
+
+- Discuss, explain, or review code the user pastes directly into the chat.
+- Review code that appears earlier in this conversation's history.
+- Answer general programming and software-design questions from your own
+  knowledge.
+- Help the user plan, design, or reason through a problem conversationally.
+- Debug code the user shows you inline (rubber-duck style).
+- Write new code snippets from scratch when the user describes what they
+  want — just return them in a fenced code block for the user to copy.
+
+Be concise. Be honest. If you don't know, say so. If you need to see
+something that isn't in the conversation yet, ask for it.
+"""
+
+
+def _build_strict_system_prompt() -> str:
+    """Return the strict-mode system prompt with cwd/platform injected.
+
+    Also appends any ``LMCODE.md`` content so the model retains project
+    context while being locked out of tool use. Used only in strict mode;
+    ask/auto modes keep :func:`_build_system_prompt`.
+    """
+    import platform as _platform
+
+    cwd = pathlib.Path.cwd().as_posix()
+    plat = f"{_platform.system()} {_platform.release()}"
+    base = _STRICT_SYSTEM_PROMPT.format(cwd=cwd, platform=plat)
+    extra = read_lmcode_md()
+    if extra:
+        return f"{base}\n\n## Project context (LMCODE.md)\n\n{extra}"
+    return base
+
+
 # ---------------------------------------------------------------------------
 # Tool verbose wrapper
 # ---------------------------------------------------------------------------
@@ -321,6 +401,32 @@ def _ensure_chat(self) -> lms.Chat:
             self._chat = self._init_chat()
         return self._chat
 
+    def _build_strict_chat(self) -> lms.Chat:
+        """Return a fresh :class:`lms.Chat` seeded with the strict system prompt.
+
+        Replays ``self._raw_history`` into the new chat so the model still
+        sees all prior turns — only the system prompt differs.  Used only
+        by the strict branch of :meth:`_run_turn`; ``self._chat`` is left
+        untouched so switching back to ask/auto keeps the base prompt and
+        full persistent history intact.
+
+        Why a separate chat instead of swapping ``self._chat`` in place:
+
+        * Users Tab-cycle between modes mid-session; rewriting the system
+          prompt every switch would churn the persistent chat object and
+          risk SDK-side state drift.
+        * Strict mode is stateless at the prompt level — every strict
+          turn gets the same fresh hard prompt, no deriving from a
+          previous turn's state.
+        """
+        strict_chat = lms.Chat(_build_strict_system_prompt())
+        for role, msg in self._raw_history:
+            if role == "user":
+                strict_chat.add_user_message(msg)
+            else:
+                strict_chat.add_assistant_response(msg)
+        return strict_chat
+
     # ------------------------------------------------------------------
     # Slash-command handler
     # ------------------------------------------------------------------
@@ -1021,11 +1127,22 @@ def _on_prediction_completed(result: Any) -> None:
 
         tok_count: list[int] = [0]
 
-        def _on_fragment(fragment: Any, _round_index: int) -> None:
-            """Count generated tokens for the spinner label."""
+        def _on_fragment(fragment: Any, _round_index: int = 0) -> None:
+            """Count generated tokens for the spinner label.
+
+            ``_round_index`` has a default because ``model.act()`` invokes
+            this callback with two positional args (``fragment``, round)
+            while ``model.respond()`` (used in strict mode) invokes it
+            with one.  A single function serves both SDK paths.
+            """
             tok_count[0] += 1
 
-        tools = [self._wrap_tool(t) for t in self._tools]
+        # Strict mode only wraps tools when we're actually going to use
+        # them (ask/auto path).  The strict branch below skips tool
+        # plumbing entirely and calls ``model.respond()`` — the SDK
+        # refuses ``model.act(tools=[])`` so an empty-list hack is not
+        # viable enforcement (#99).
+        tools = [self._wrap_tool(t) for t in self._tools] if self._mode != "strict" else []
 
         stop_evt = asyncio.Event()
         shuffled_tips = random.sample(_TIPS, len(_TIPS)) if self._show_tips else []
@@ -1065,29 +1182,79 @@ async def _keepalive() -> None:
                 await asyncio.sleep(0.1)
 
         keepalive = asyncio.create_task(_keepalive())
+        act_result: Any = None
+        respond_result: Any = None
+        strict_start: float | None = None
         try:
-            act_result = await model.act(
-                chat,
-                tools=tools,
-                config=self._inference_config if self._inference_config else None,
-                on_message=_on_message,
-                on_prediction_completed=_on_prediction_completed,
-                on_prediction_fragment=_on_fragment,
-            )
+            if self._mode == "strict":
+                # Strict mode (#99): use ``model.respond()`` — the pure
+                # chat primitive — so the model never sees any tool
+                # schema.  ``model.act(tools=[])`` raises
+                # ``LMStudioValueError`` and is not a valid enforcement
+                # path.  ``respond()`` supports the same callback shape
+                # so the spinner/stats machinery is reused unchanged.
+                #
+                # Second layer of defence: pass a *separate* chat
+                # object seeded with the strict system prompt. The SDK
+                # fix stops the model from emitting real tool_calls,
+                # but it cannot stop the model from *fabricating* tool
+                # output in plain text — e.g. "Here is the file
+                # content:" followed by invented code. The strict
+                # prompt explicitly forbids that. ``self._chat`` keeps
+                # the base prompt so switching back to ask/auto mode
+                # retains full history without a rebuild.
+                strict_chat = self._build_strict_chat()
+                strict_start = time.monotonic()
+                respond_result = await model.respond(
+                    strict_chat,
+                    config=self._inference_config if self._inference_config else None,
+                    on_message=_on_message,
+                    on_prediction_fragment=_on_fragment,
+                )
+            else:
+                act_result = await model.act(
+                    chat,
+                    tools=tools,
+                    config=self._inference_config if self._inference_config else None,
+                    on_message=_on_message,
+                    on_prediction_completed=_on_prediction_completed,
+                    on_prediction_fragment=_on_fragment,
+                )
         finally:
             stop_evt.set()
             await keepalive
 
+        # ``respond()`` returns a single ``PredictionResult`` with ``.stats``
+        # directly, whereas ``act()`` fires ``_on_prediction_completed`` once
+        # per round.  Normalise so the downstream stats code runs identically.
+        if respond_result is not None and hasattr(respond_result, "stats"):
+            stats_capture.append(respond_result.stats)
+
         for s in stats_capture:
             self._session_prompt_tokens += getattr(s, "prompt_tokens_count", 0) or 0
             self._session_completion_tokens += getattr(s, "predicted_tokens_count", 0) or 0
         if stats_capture:
             self._last_prompt_tokens = getattr(stats_capture[-1], "prompt_tokens_count", 0) or 0
 
+        # ``respond()`` does not always invoke ``on_message`` for the final
+        # assistant turn — fall back to ``result.content`` so strict mode
+        # never shows "(no response)" when the model actually answered.
+        if not captured and respond_result is not None:
+            parts = getattr(respond_result, "content", None)
+            if isinstance(parts, list):
+                captured.append("".join(p.text for p in parts if hasattr(p, "text")))
+            elif parts is not None:
+                captured.append(str(parts))
+
         response_text = captured[-1] if captured else "(no response)"
         chat.add_assistant_response(response_text)
         self._turn_count += 1
-        elapsed = getattr(act_result, "total_time_seconds", None)
+        if act_result is not None:
+            elapsed = getattr(act_result, "total_time_seconds", None)
+        elif strict_start is not None:
+            elapsed = time.monotonic() - strict_start
+        else:
+            elapsed = None
         return response_text, _build_stats_line(stats_capture, elapsed)
 
     # ------------------------------------------------------------------
diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py
index 312a92c..0e97fe1 100644
--- a/tests/test_agent/test_core.py
+++ b/tests/test_agent/test_core.py
@@ -120,6 +120,180 @@ async def test_run_turn_adds_to_chat_history() -> None:
     assert agent._chat is not None
 
 
+def _make_mock_respond_model(response_text: str) -> MagicMock:
+    """Build a mock model whose respond() returns a PredictionResult-like object.
+
+    Mirrors :func:`_make_mock_model` but for the strict-mode path that
+    uses ``model.respond()`` instead of ``model.act()``.  The side effect
+    invokes ``on_prediction_fragment`` with a **single** positional arg,
+    exactly like the real SDK does in ``json_api.py:1486`` — that shape
+    mismatch with ``act()``'s two-arg callback was the source of a
+    regression during #99 development.
+    """
+
+    async def respond_side_effect(
+        history: object,
+        on_message: object = None,
+        on_prediction_fragment: object = None,
+        **kwargs: object,
+    ) -> MagicMock:
+        if callable(on_prediction_fragment):
+            fragment = MagicMock()
+            fragment.content = "tok "
+            on_prediction_fragment(fragment)  # <-- 1 arg, not 2
+        text_part = MagicMock()
+        text_part.text = response_text
+        result = MagicMock()
+        result.content = [text_part]
+        result.stats = MagicMock(
+            prompt_tokens_count=10,
+            predicted_tokens_count=20,
+            tokens_per_second=50.0,
+        )
+        return result
+
+    mock = MagicMock()
+    mock.respond = AsyncMock(side_effect=respond_side_effect)
+    mock.act = AsyncMock()  # should never be awaited in strict mode
+    return mock
+
+
+@pytest.mark.asyncio
+async def test_run_turn_strict_mode_uses_respond_not_act() -> None:
+    """Strict mode (#99) must route through ``model.respond()`` — not ``act()``.
+
+    ``model.act()`` refuses ``tools=[]`` with ``LMStudioValueError``, so
+    strict has to take a completely different SDK path.  This test pins
+    that routing: respond is awaited once, act is never awaited.
+    """
+    agent = Agent()
+    agent._mode = "strict"
+    mock_model = _make_mock_respond_model("strict reply")
+
+    with patch("lmcode.agent.core.read_lmcode_md", return_value=None):
+        response, _stats = await agent._run_turn(mock_model, "hello in strict")
+
+    mock_model.respond.assert_awaited_once()
+    mock_model.act.assert_not_awaited()
+    assert response == "strict reply"
+    # Ensure ``tools`` was not passed to respond — respond() doesn't
+    # accept that kwarg at all.
+    call_kwargs = mock_model.respond.await_args.kwargs
+    assert "tools" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_run_turn_strict_mode_uses_strict_system_prompt() -> None:
+    """Strict mode must pass a chat seeded with the hard strict system prompt.
+
+    The SDK-level fix (routing through ``model.respond()``) stops the
+    model from emitting *real* tool calls, but it cannot stop it from
+    *fabricating* tool output in plain text — e.g. replying with "Here
+    is the file content:" followed by invented code.  The strict
+    system prompt is the second layer of defence that forbids that
+    behaviour explicitly.
+
+    This test pins three properties of the strict branch:
+
+    1. ``_build_strict_system_prompt`` is actually invoked.
+    2. The chat passed to ``respond()`` is a *separate* object from
+       ``agent._chat`` — so switching back to ask/auto keeps the base
+       prompt intact.
+    3. The chat passed to ``respond()`` has a system message that
+       contains the strict-mode marker.
+    """
+    agent = Agent()
+    agent._mode = "strict"
+    # run() normally appends to _raw_history before calling _run_turn; mirror
+    # that here so _build_strict_chat has the current user message to replay.
+    agent._raw_history = [("user", "what's in calculator.py?")]
+    mock_model = _make_mock_respond_model("strict reply")
+
+    with (
+        patch("lmcode.agent.core.read_lmcode_md", return_value=None),
+        patch(
+            "lmcode.agent.core._build_strict_system_prompt",
+            wraps=__import__(
+                "lmcode.agent.core", fromlist=["_build_strict_system_prompt"]
+            )._build_strict_system_prompt,
+        ) as mock_builder,
+    ):
+        await agent._run_turn(mock_model, "what's in calculator.py?")
+
+    mock_builder.assert_called_once()
+
+    # The chat passed to respond() must not be the persistent chat.
+    sdk_chat = mock_model.respond.await_args.args[0]
+    assert sdk_chat is not agent._chat, (
+        "strict mode must build a *separate* chat so self._chat retains "
+        "the base system prompt for when the user switches back to ask/auto"
+    )
+
+    # The strict chat's system message must contain the hard marker.
+    system_msg = sdk_chat._messages[0]
+    system_text = "".join(p.text for p in system_msg.content if hasattr(p, "text"))
+    assert "STRICT MODE" in system_text, (
+        f"strict chat system prompt must contain 'STRICT MODE' marker, got:\n{system_text[:200]}"
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_turn_strict_mode_replays_history_into_strict_chat() -> None:
+    """Prior turns in ``_raw_history`` must be replayed into the strict chat.
+
+    Switching to strict mode mid-conversation must not lobotomise the
+    model — it still needs to see what was said before so it can answer
+    contextually (e.g. "explain the function we just discussed").
+    """
+    agent = Agent()
+    agent._mode = "strict"
+    # Seed two completed turns before the strict request.
+    agent._raw_history = [
+        ("user", "earlier question"),
+        ("assistant", "earlier answer"),
+        ("user", "follow-up in strict"),  # the current turn (already recorded by run())
+    ]
+    mock_model = _make_mock_respond_model("strict follow-up reply")
+
+    with patch("lmcode.agent.core.read_lmcode_md", return_value=None):
+        await agent._run_turn(mock_model, "follow-up in strict")
+
+    sdk_chat = mock_model.respond.await_args.args[0]
+    # Expect: system + 3 history messages = 4 total.
+    roles = [m.__class__.__name__ for m in sdk_chat._messages]
+    assert roles[0] == "SystemPrompt"
+    assert len(sdk_chat._messages) == 4, (
+        f"strict chat should replay 3 history entries after system msg, got roles={roles}"
+    )
+    # The last user message must be the current one.
+    last = sdk_chat._messages[-1]
+    last_text = "".join(p.text for p in last.content if hasattr(p, "text"))
+    assert "follow-up in strict" in last_text
+
+
+@pytest.mark.asyncio
+async def test_run_turn_non_strict_modes_use_act_with_tools() -> None:
+    """Ask and auto modes keep using ``model.act()`` with the full tool list.
+
+    Regression for #99 — the strict-mode fix must not silently reroute
+    the other permission modes through ``respond()``.
+    """
+    for mode in ("ask", "auto"):
+        agent = Agent()
+        agent._mode = mode
+        mock_model = _make_mock_model("ok")
+
+        with patch("lmcode.agent.core.read_lmcode_md", return_value=None):
+            await agent._run_turn(mock_model, "hi")
+
+        mock_model.act.assert_awaited_once()
+        call_kwargs = mock_model.act.await_args.kwargs
+        assert len(call_kwargs["tools"]) == len(agent._tools), (
+            f"mode={mode} should pass all {len(agent._tools)} tools, "
+            f"got {len(call_kwargs['tools'])}"
+        )
+
+
 # ---------------------------------------------------------------------------
 # _wrap_tool_verbose — positional-arg merging
 # ---------------------------------------------------------------------------