diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c0dc72..4ca4d2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Fixed +- **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99) +- **Strict mode hard system prompt (second layer of defence)** — the SDK-level fix stops the model from emitting *real* tool calls, but cannot stop it from *fabricating* tool output in plain text (e.g. Qwen 7B happily replied "Here is the file content:" followed by invented code because the base prompt said "Always call `read_file` before describing file contents"). Strict now passes a *separate* `lms.Chat` seeded with a dedicated `_STRICT_SYSTEM_PROMPT` that explicitly forbids pretending to read files, fabricating output, or describing tool calls — while still listing what the model *can* do (review pasted code, answer general questions, rubber-duck debugging). `self._chat` is untouched so Tab-switching back to ask/auto keeps the base prompt and full history. (#99) + --- ## [0.7.0] - 2026-03-26 diff --git a/README.md b/README.md index ed63f6e..a4fa840 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ LM Studio → lmcode agent → your codebase - **Tool output panels** — syntax-highlighted file previews, side-by-side diff blocks for edits, IN/OUT panels for shell commands - **Ghost-text autocomplete** — fish-shell style: type `/h` → dim `elp` appears, Tab accepts - **Persistent history** — Ctrl+R and Up-arrow recall prompts across sessions (`~/.lmcode/history`) -- **Permission modes** — `ask` (confirm each tool), `auto` (run freely), `strict` (read-only); Tab cycles between them +- **Permission modes** — `ask` (confirm each tool), `auto` (run freely), `strict` (no tools — pure chat, enforced at SDK level); Tab cycles between them - **LMCODE.md** — per-repo context file injected into the system prompt - **/compact** — summarises conversation history via the model, resets the chat, and injects the summary as context - **/tokens** — session-wide prompt (↑) and generated (↓) token totals with context arc (`◔ 38% 14.2k / 32k tok`) diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py index b82ba0b..8e9c190 100644 --- a/src/lmcode/agent/core.py +++ b/src/lmcode/agent/core.py @@ -17,6 +17,7 @@ import json import pathlib import random +import time from typing import Any import lmstudio as lms @@ -157,6 +158,85 @@ def _build_system_prompt() -> str: return base +#: Hard system prompt for strict mode. The SDK-level fix (routing through +#: ``model.respond()``) stops the model from *actually* calling tools, but +#: cannot stop it from *fabricating* tool results in plain text — e.g. +#: replying "Here is the file content:" followed by invented code. This +#: prompt is the second layer of defence against that hallucination. +_STRICT_SYSTEM_PROMPT = """\ +You are lmcode running in **STRICT MODE** — a pure-text chat with NO tools. + + +Working directory: {cwd} +Platform: {platform} + + +## What you CANNOT do in this turn + +You have **no tools available**. You CANNOT: +- read files or look at their contents +- write, create, or modify any file +- run shell commands +- list directories +- search the codebase with ripgrep or anything else +- inspect the filesystem in any way + +There is no hidden tool you can invoke. There is no workaround. + +## Critical rules — violating these is a hallucination + +1. **Never pretend to have read a file.** If the user asks "what's in file + X" or "show me file X", you have NOT seen it. Do not invent its + contents. Do not reproduce code you "think" is probably there. + +2. **Never fabricate tool output.** Do not write phrases like "Here is the + content:" followed by code, or "Running that command gives:" followed + by output. You did not read anything. You did not run anything. + +3. **Never describe a tool call you are about to make.** There are no + tools to call. Do not say "I will read the file" — there is no read. + +4. **If the question genuinely needs filesystem access, say so honestly.** + Tell the user: "I'm in strict mode and have no filesystem access. Press + Tab to switch to ask or auto mode, or paste the relevant code snippet + directly into the chat and I'll help from there." + +## What you CAN do + +Strict mode is not "refuse everything" mode. You can still be useful: + +- Discuss, explain, or review code the user pastes directly into the chat. +- Review code that appears earlier in this conversation's history. +- Answer general programming and software-design questions from your own + knowledge. +- Help the user plan, design, or reason through a problem conversationally. +- Debug code the user shows you inline (rubber-duck style). +- Write new code snippets from scratch when the user describes what they + want — just return them in a fenced code block for the user to copy. + +Be concise. Be honest. If you don't know, say so. If you need to see +something that isn't in the conversation yet, ask for it. +""" + + +def _build_strict_system_prompt() -> str: + """Return the strict-mode system prompt with cwd/platform injected. + + Also appends any ``LMCODE.md`` content so the model retains project + context while being locked out of tool use. Used only in strict mode; + ask/auto modes keep :func:`_build_system_prompt`. + """ + import platform as _platform + + cwd = pathlib.Path.cwd().as_posix() + plat = f"{_platform.system()} {_platform.release()}" + base = _STRICT_SYSTEM_PROMPT.format(cwd=cwd, platform=plat) + extra = read_lmcode_md() + if extra: + return f"{base}\n\n## Project context (LMCODE.md)\n\n{extra}" + return base + + # --------------------------------------------------------------------------- # Tool verbose wrapper # --------------------------------------------------------------------------- @@ -321,6 +401,32 @@ def _ensure_chat(self) -> lms.Chat: self._chat = self._init_chat() return self._chat + def _build_strict_chat(self) -> lms.Chat: + """Return a fresh :class:`lms.Chat` seeded with the strict system prompt. + + Replays ``self._raw_history`` into the new chat so the model still + sees all prior turns — only the system prompt differs. Used only + by the strict branch of :meth:`_run_turn`; ``self._chat`` is left + untouched so switching back to ask/auto keeps the base prompt and + full persistent history intact. + + Why a separate chat instead of swapping ``self._chat`` in place: + + * Users Tab-cycle between modes mid-session; rewriting the system + prompt every switch would churn the persistent chat object and + risk SDK-side state drift. + * Strict mode is stateless at the prompt level — every strict + turn gets the same fresh hard prompt, no deriving from a + previous turn's state. + """ + strict_chat = lms.Chat(_build_strict_system_prompt()) + for role, msg in self._raw_history: + if role == "user": + strict_chat.add_user_message(msg) + else: + strict_chat.add_assistant_response(msg) + return strict_chat + # ------------------------------------------------------------------ # Slash-command handler # ------------------------------------------------------------------ @@ -1021,11 +1127,22 @@ def _on_prediction_completed(result: Any) -> None: tok_count: list[int] = [0] - def _on_fragment(fragment: Any, _round_index: int) -> None: - """Count generated tokens for the spinner label.""" + def _on_fragment(fragment: Any, _round_index: int = 0) -> None: + """Count generated tokens for the spinner label. + + ``_round_index`` has a default because ``model.act()`` invokes + this callback with two positional args (``fragment``, round) + while ``model.respond()`` (used in strict mode) invokes it + with one. A single function serves both SDK paths. + """ tok_count[0] += 1 - tools = [self._wrap_tool(t) for t in self._tools] + # Strict mode only wraps tools when we're actually going to use + # them (ask/auto path). The strict branch below skips tool + # plumbing entirely and calls ``model.respond()`` — the SDK + # refuses ``model.act(tools=[])`` so an empty-list hack is not + # viable enforcement (#99). + tools = [self._wrap_tool(t) for t in self._tools] if self._mode != "strict" else [] stop_evt = asyncio.Event() shuffled_tips = random.sample(_TIPS, len(_TIPS)) if self._show_tips else [] @@ -1065,29 +1182,79 @@ async def _keepalive() -> None: await asyncio.sleep(0.1) keepalive = asyncio.create_task(_keepalive()) + act_result: Any = None + respond_result: Any = None + strict_start: float | None = None try: - act_result = await model.act( - chat, - tools=tools, - config=self._inference_config if self._inference_config else None, - on_message=_on_message, - on_prediction_completed=_on_prediction_completed, - on_prediction_fragment=_on_fragment, - ) + if self._mode == "strict": + # Strict mode (#99): use ``model.respond()`` — the pure + # chat primitive — so the model never sees any tool + # schema. ``model.act(tools=[])`` raises + # ``LMStudioValueError`` and is not a valid enforcement + # path. ``respond()`` supports the same callback shape + # so the spinner/stats machinery is reused unchanged. + # + # Second layer of defence: pass a *separate* chat + # object seeded with the strict system prompt. The SDK + # fix stops the model from emitting real tool_calls, + # but it cannot stop the model from *fabricating* tool + # output in plain text — e.g. "Here is the file + # content:" followed by invented code. The strict + # prompt explicitly forbids that. ``self._chat`` keeps + # the base prompt so switching back to ask/auto mode + # retains full history without a rebuild. + strict_chat = self._build_strict_chat() + strict_start = time.monotonic() + respond_result = await model.respond( + strict_chat, + config=self._inference_config if self._inference_config else None, + on_message=_on_message, + on_prediction_fragment=_on_fragment, + ) + else: + act_result = await model.act( + chat, + tools=tools, + config=self._inference_config if self._inference_config else None, + on_message=_on_message, + on_prediction_completed=_on_prediction_completed, + on_prediction_fragment=_on_fragment, + ) finally: stop_evt.set() await keepalive + # ``respond()`` returns a single ``PredictionResult`` with ``.stats`` + # directly, whereas ``act()`` fires ``_on_prediction_completed`` once + # per round. Normalise so the downstream stats code runs identically. + if respond_result is not None and hasattr(respond_result, "stats"): + stats_capture.append(respond_result.stats) + for s in stats_capture: self._session_prompt_tokens += getattr(s, "prompt_tokens_count", 0) or 0 self._session_completion_tokens += getattr(s, "predicted_tokens_count", 0) or 0 if stats_capture: self._last_prompt_tokens = getattr(stats_capture[-1], "prompt_tokens_count", 0) or 0 + # ``respond()`` does not always invoke ``on_message`` for the final + # assistant turn — fall back to ``result.content`` so strict mode + # never shows "(no response)" when the model actually answered. + if not captured and respond_result is not None: + parts = getattr(respond_result, "content", None) + if isinstance(parts, list): + captured.append("".join(p.text for p in parts if hasattr(p, "text"))) + elif parts is not None: + captured.append(str(parts)) + response_text = captured[-1] if captured else "(no response)" chat.add_assistant_response(response_text) self._turn_count += 1 - elapsed = getattr(act_result, "total_time_seconds", None) + if act_result is not None: + elapsed = getattr(act_result, "total_time_seconds", None) + elif strict_start is not None: + elapsed = time.monotonic() - strict_start + else: + elapsed = None return response_text, _build_stats_line(stats_capture, elapsed) # ------------------------------------------------------------------ diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py index 312a92c..0e97fe1 100644 --- a/tests/test_agent/test_core.py +++ b/tests/test_agent/test_core.py @@ -120,6 +120,180 @@ async def test_run_turn_adds_to_chat_history() -> None: assert agent._chat is not None +def _make_mock_respond_model(response_text: str) -> MagicMock: + """Build a mock model whose respond() returns a PredictionResult-like object. + + Mirrors :func:`_make_mock_model` but for the strict-mode path that + uses ``model.respond()`` instead of ``model.act()``. The side effect + invokes ``on_prediction_fragment`` with a **single** positional arg, + exactly like the real SDK does in ``json_api.py:1486`` — that shape + mismatch with ``act()``'s two-arg callback was the source of a + regression during #99 development. + """ + + async def respond_side_effect( + history: object, + on_message: object = None, + on_prediction_fragment: object = None, + **kwargs: object, + ) -> MagicMock: + if callable(on_prediction_fragment): + fragment = MagicMock() + fragment.content = "tok " + on_prediction_fragment(fragment) # <-- 1 arg, not 2 + text_part = MagicMock() + text_part.text = response_text + result = MagicMock() + result.content = [text_part] + result.stats = MagicMock( + prompt_tokens_count=10, + predicted_tokens_count=20, + tokens_per_second=50.0, + ) + return result + + mock = MagicMock() + mock.respond = AsyncMock(side_effect=respond_side_effect) + mock.act = AsyncMock() # should never be awaited in strict mode + return mock + + +@pytest.mark.asyncio +async def test_run_turn_strict_mode_uses_respond_not_act() -> None: + """Strict mode (#99) must route through ``model.respond()`` — not ``act()``. + + ``model.act()`` refuses ``tools=[]`` with ``LMStudioValueError``, so + strict has to take a completely different SDK path. This test pins + that routing: respond is awaited once, act is never awaited. + """ + agent = Agent() + agent._mode = "strict" + mock_model = _make_mock_respond_model("strict reply") + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + response, _stats = await agent._run_turn(mock_model, "hello in strict") + + mock_model.respond.assert_awaited_once() + mock_model.act.assert_not_awaited() + assert response == "strict reply" + # Ensure ``tools`` was not passed to respond — respond() doesn't + # accept that kwarg at all. + call_kwargs = mock_model.respond.await_args.kwargs + assert "tools" not in call_kwargs + + +@pytest.mark.asyncio +async def test_run_turn_strict_mode_uses_strict_system_prompt() -> None: + """Strict mode must pass a chat seeded with the hard strict system prompt. + + The SDK-level fix (routing through ``model.respond()``) stops the + model from emitting *real* tool calls, but it cannot stop it from + *fabricating* tool output in plain text — e.g. replying with "Here + is the file content:" followed by invented code. The strict + system prompt is the second layer of defence that forbids that + behaviour explicitly. + + This test pins three properties of the strict branch: + + 1. ``_build_strict_system_prompt`` is actually invoked. + 2. The chat passed to ``respond()`` is a *separate* object from + ``agent._chat`` — so switching back to ask/auto keeps the base + prompt intact. + 3. The chat passed to ``respond()`` has a system message that + contains the strict-mode marker. + """ + agent = Agent() + agent._mode = "strict" + # run() normally appends to _raw_history before calling _run_turn; mirror + # that here so _build_strict_chat has the current user message to replay. + agent._raw_history = [("user", "what's in calculator.py?")] + mock_model = _make_mock_respond_model("strict reply") + + with ( + patch("lmcode.agent.core.read_lmcode_md", return_value=None), + patch( + "lmcode.agent.core._build_strict_system_prompt", + wraps=__import__( + "lmcode.agent.core", fromlist=["_build_strict_system_prompt"] + )._build_strict_system_prompt, + ) as mock_builder, + ): + await agent._run_turn(mock_model, "what's in calculator.py?") + + mock_builder.assert_called_once() + + # The chat passed to respond() must not be the persistent chat. + sdk_chat = mock_model.respond.await_args.args[0] + assert sdk_chat is not agent._chat, ( + "strict mode must build a *separate* chat so self._chat retains " + "the base system prompt for when the user switches back to ask/auto" + ) + + # The strict chat's system message must contain the hard marker. + system_msg = sdk_chat._messages[0] + system_text = "".join(p.text for p in system_msg.content if hasattr(p, "text")) + assert "STRICT MODE" in system_text, ( + f"strict chat system prompt must contain 'STRICT MODE' marker, got:\n{system_text[:200]}" + ) + + +@pytest.mark.asyncio +async def test_run_turn_strict_mode_replays_history_into_strict_chat() -> None: + """Prior turns in ``_raw_history`` must be replayed into the strict chat. + + Switching to strict mode mid-conversation must not lobotomise the + model — it still needs to see what was said before so it can answer + contextually (e.g. "explain the function we just discussed"). + """ + agent = Agent() + agent._mode = "strict" + # Seed two completed turns before the strict request. + agent._raw_history = [ + ("user", "earlier question"), + ("assistant", "earlier answer"), + ("user", "follow-up in strict"), # the current turn (already recorded by run()) + ] + mock_model = _make_mock_respond_model("strict follow-up reply") + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + await agent._run_turn(mock_model, "follow-up in strict") + + sdk_chat = mock_model.respond.await_args.args[0] + # Expect: system + 3 history messages = 4 total. + roles = [m.__class__.__name__ for m in sdk_chat._messages] + assert roles[0] == "SystemPrompt" + assert len(sdk_chat._messages) == 4, ( + f"strict chat should replay 3 history entries after system msg, got roles={roles}" + ) + # The last user message must be the current one. + last = sdk_chat._messages[-1] + last_text = "".join(p.text for p in last.content if hasattr(p, "text")) + assert "follow-up in strict" in last_text + + +@pytest.mark.asyncio +async def test_run_turn_non_strict_modes_use_act_with_tools() -> None: + """Ask and auto modes keep using ``model.act()`` with the full tool list. + + Regression for #99 — the strict-mode fix must not silently reroute + the other permission modes through ``respond()``. + """ + for mode in ("ask", "auto"): + agent = Agent() + agent._mode = mode + mock_model = _make_mock_model("ok") + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + await agent._run_turn(mock_model, "hi") + + mock_model.act.assert_awaited_once() + call_kwargs = mock_model.act.await_args.kwargs + assert len(call_kwargs["tools"]) == len(agent._tools), ( + f"mode={mode} should pass all {len(agent._tools)} tools, " + f"got {len(call_kwargs['tools'])}" + ) + + # --------------------------------------------------------------------------- # _wrap_tool_verbose — positional-arg merging # ---------------------------------------------------------------------------