From 6c3aceaac385251c748c06d630a3f6366f49e273 Mon Sep 17 00:00:00 2001 From: VforVitorio Date: Tue, 7 Apr 2026 12:38:07 +0200 Subject: [PATCH 1/3] feat: wire max_rounds safety boundary into model.act() (#97) - AgentSettings.max_rounds default 50->10 with docstring explaining the trade-off and the 3 ways to override (config, env var, CLI flag) - _run_turn passes max_prediction_rounds= to model.act() in the non-strict branch; strict stays on respond() which has no round concept - Catch LMStudioPredictionError specifically when the message contains "final prediction round" and set self._last_turn_limit_reached. This prevents the run() top-level LMStudioServerError handler (parent class) from misreading the crash as an LM Studio disconnect - Post-act check: ActResult.rounds >= cap also sets the flag (the well-behaved model case where the final round produced a text answer instead of a tool_call) - run() prints an inline amber warning when the flag is set, matching the ctx window warning style - CLI --max-rounds flag is now actually wired: mutates get_settings().agent.max_rounds for the session. Was previously accepted by Typer but silently ignored - 5 regression tests + _make_mock_model fixed to return an ActResult-shaped mock with concrete rounds int (MagicMock >= int raises TypeError) Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 4 + docs/features/config.md | 7 +- src/lmcode/agent/core.py | 76 ++++++++++++++-- src/lmcode/cli/app.py | 3 +- src/lmcode/cli/chat.py | 11 ++- src/lmcode/config/settings.py | 10 +- tests/test_agent/test_core.py | 166 +++++++++++++++++++++++++++++++++- 7 files changed, 260 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ca4d2f..02b7f9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97) +- **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected. + ### Fixed - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99) - **Strict mode hard system prompt (second layer of defence)** — the SDK-level fix stops the model from emitting *real* tool calls, but cannot stop it from *fabricating* tool output in plain text (e.g. Qwen 7B happily replied "Here is the file content:" followed by invented code because the base prompt said "Always call `read_file` before describing file contents"). Strict now passes a *separate* `lms.Chat` seeded with a dedicated `_STRICT_SYSTEM_PROMPT` that explicitly forbids pretending to read files, fabricating output, or describing tool calls — while still listing what the model *can* do (review pasted code, answer general questions, rubber-duck debugging). `self._chat` is untouched so Tab-switching back to ask/auto keeps the base prompt and full history. (#99) diff --git a/docs/features/config.md b/docs/features/config.md index 666bff6..b4c048b 100644 --- a/docs/features/config.md +++ b/docs/features/config.md @@ -23,8 +23,9 @@ base_url = "http://localhost:1234" model = "auto" [agent] -# Maximum number of agent loop iterations per run -max_rounds = 50 +# Max tool-call rounds per turn. Applies to ask and auto modes. +# Override for a session with --max-rounds N or LMCODE_AGENT__MAX_ROUNDS=N. +max_rounds = 10 # Whether to confirm before writing files confirm_writes = false @@ -50,7 +51,7 @@ class LMStudioSettings(BaseSettings): model: str = "auto" class AgentSettings(BaseSettings): - max_rounds: int = 50 + max_rounds: int = 10 confirm_writes: bool = False class SessionSettings(BaseSettings): diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py index 8e9c190..e15a407 100644 --- a/src/lmcode/agent/core.py +++ b/src/lmcode/agent/core.py @@ -75,6 +75,17 @@ # Spinner / tips constants # --------------------------------------------------------------------------- +#: Substring found in :class:`lms.LMStudioPredictionError` when the model +#: requests a tool call on the final round that was capped by +#: ``max_prediction_rounds``. The SDK raises unconditionally in that case +#: (our ``handle_invalid_tool_request`` callback can only override the +#: message), so we match on this marker to convert the crash into a +#: graceful "limit reached" warning instead of letting the run() loop's +#: broader ``LMStudioServerError`` catch mistake it for a disconnect. +#: Source of truth: ``lmstudio.async_api.AsyncLLM.act`` — the string is +#: literally ``"Model requested tool use on final prediction round."``. +_MAX_ROUNDS_ERR_MARKER: str = "final prediction round" + #: Rich spinner style used during model inference. _SPINNER: str = "circleHalves" @@ -382,6 +393,11 @@ def __init__(self, model_id: str = "auto") -> None: self._show_stats: bool = get_settings().ui.show_stats self._always_allowed_tools: set[str] = set() self._inference_config: dict[str, Any] = {} # passed as config= to model.act() + # Set by _run_turn when ``max_prediction_rounds`` was hit this turn + # (either the SDK raised LMStudioPredictionError on the final round, + # or ActResult.rounds reached the configured cap). run() reads this + # after _run_turn returns to print an inline warning to the user. + self._last_turn_limit_reached: bool = False # Set by _wrap_tool when the user rejects a tool call and types a # redirect instruction. run() detects this after the turn ends and # feeds the instruction back as a brand-new user message. @@ -1185,6 +1201,13 @@ async def _keepalive() -> None: act_result: Any = None respond_result: Any = None strict_start: float | None = None + # Reset the flag at the start of every turn. run() reads it after + # _run_turn returns to decide whether to print the limit warning. + self._last_turn_limit_reached = False + # Pull max_rounds here (not later) so tests can patch + # get_settings() once and see a single call. None disables the cap. + max_rounds = get_settings().agent.max_rounds + max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None try: if self._mode == "strict": # Strict mode (#99): use ``model.respond()`` — the pure @@ -1212,18 +1235,43 @@ async def _keepalive() -> None: on_prediction_fragment=_on_fragment, ) else: - act_result = await model.act( - chat, - tools=tools, - config=self._inference_config if self._inference_config else None, - on_message=_on_message, - on_prediction_completed=_on_prediction_completed, - on_prediction_fragment=_on_fragment, - ) + try: + act_result = await model.act( + chat, + tools=tools, + max_prediction_rounds=max_prediction_rounds, + config=self._inference_config if self._inference_config else None, + on_message=_on_message, + on_prediction_completed=_on_prediction_completed, + on_prediction_fragment=_on_fragment, + ) + except lms.LMStudioPredictionError as e: + # Max-rounds hit with a stubborn model: the SDK disabled + # tools on the final round, the model still emitted a + # tool_call, and the SDK raised unconditionally. Convert + # that into a graceful limit-reached signal so run() + # prints a warning instead of the run() top-level catch + # mistaking it for a server disconnect (LMStudioServerError + # is a *parent* of LMStudioPredictionError). + if _MAX_ROUNDS_ERR_MARKER in str(e): + self._last_turn_limit_reached = True + else: + raise finally: stop_evt.set() await keepalive + # Well-behaved model case: the SDK forced a tool-free final round and + # the model produced a normal text response, but rounds == the cap + # still means the task likely got truncated. Flag it so the user + # knows why the answer might feel incomplete. + if ( + act_result is not None + and max_prediction_rounds is not None + and getattr(act_result, "rounds", 0) >= max_prediction_rounds + ): + self._last_turn_limit_reached = True + # ``respond()`` returns a single ``PredictionResult`` with ``.stats`` # directly, whereas ``act()`` fires ``_on_prediction_completed`` once # per round. Normalise so the downstream stats code runs identically. @@ -1398,6 +1446,18 @@ def _cycle_mode() -> None: f"[{TEXT_MUTED}] — run /compact to summarise " f"the conversation[/]" ) + + if self._last_turn_limit_reached: + cap = get_settings().agent.max_rounds + console.print( + f"\n[{WARNING}]agent stopped after {cap} rounds[/]" + f"[{TEXT_MUTED}] — raise the limit with " + f"LMCODE_AGENT__MAX_ROUNDS=N, set " + f"agent.max_rounds in config, or pass " + f"--max-rounds N on the CLI. " + f"Type to continue the conversation.[/]" + ) + console.print(Rule(style=f"dim {ACCENT}")) # If the user rejected a tool call with redirect diff --git a/src/lmcode/cli/app.py b/src/lmcode/cli/app.py index 0358da6..39b45e0 100644 --- a/src/lmcode/cli/app.py +++ b/src/lmcode/cli/app.py @@ -42,7 +42,8 @@ def main( if ctx.invoked_subcommand is None: from lmcode.cli.chat import chat - chat(model="auto", max_rounds=50) + # max_rounds=None → use agent.max_rounds from config.toml / env var + chat(model="auto", max_rounds=None) # --------------------------------------------------------------------------- diff --git a/src/lmcode/cli/chat.py b/src/lmcode/cli/chat.py index 7b2e7f4..f616607 100644 --- a/src/lmcode/cli/chat.py +++ b/src/lmcode/cli/chat.py @@ -385,10 +385,19 @@ def _exit_no_model() -> None: @app.callback(invoke_without_command=True) def chat( model: str = typer.Option("auto", "--model", "-m", help="Model ID (default: auto-detect)."), - max_rounds: int = typer.Option(50, "--max-rounds", help="Maximum agent loop iterations."), + max_rounds: int | None = typer.Option( + None, + "--max-rounds", + help="Max tool-call rounds per turn (overrides agent.max_rounds in config).", + ), ) -> None: """Start an interactive coding agent session in the current directory.""" settings = get_settings() + if max_rounds is not None: + # CLI flag overrides config.toml / env var for this session only. + # The Agent reads get_settings().agent.max_rounds at the start of + # every turn, so mutating it here is enough — no re-init needed. + settings.agent.max_rounds = max_rounds connected, detected_model = _probe_lmstudio() if not connected: diff --git a/src/lmcode/config/settings.py b/src/lmcode/config/settings.py index 4e5eeb7..362c3d7 100644 --- a/src/lmcode/config/settings.py +++ b/src/lmcode/config/settings.py @@ -32,7 +32,15 @@ def base_url(self) -> str: class AgentSettings(BaseSettings): """Runtime behaviour settings for the agent loop.""" - max_rounds: int = 50 + #: Maximum number of tool-call rounds inside a single ``model.act()`` + #: invocation. Passed to the LM Studio SDK as ``max_prediction_rounds``. + #: 10 is the sweet spot for Qwen 7B: enough headroom for multi-file + #: edits, tight enough to stop runaway loops before they burn context. + #: Raise via ``LMCODE_AGENT__MAX_ROUNDS=N`` or ``agent.max_rounds`` in + #: ``config.toml``, or pass ``--max-rounds N`` on the CLI for a single + #: session. Applies to ask and auto modes; strict mode routes through + #: ``model.respond()`` which has no round concept. + max_rounds: int = 10 permission_mode: Literal["ask", "auto", "strict"] = "ask" timeout_seconds: int = 30 max_file_bytes: int = 100_000 # ~100 KB; see issue #2 for token-aware improvement diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py index 0e97fe1..78f5a7a 100644 --- a/tests/test_agent/test_core.py +++ b/tests/test_agent/test_core.py @@ -74,8 +74,15 @@ def test_agent_ensure_chat_returns_same_instance() -> None: # --------------------------------------------------------------------------- -def _make_mock_model(response_text: str) -> MagicMock: - """Build a mock model whose act() calls on_message with a fake AssistantResponse.""" +def _make_mock_model(response_text: str, rounds: int = 1) -> MagicMock: + """Build a mock model whose act() calls on_message with a fake AssistantResponse. + + The returned ``ActResult``-shaped mock has a real ``rounds`` int + (default ``1``, well below the default ``max_rounds=10`` cap) so + the post-act limit check in :meth:`Agent._run_turn` does not crash + on a ``MagicMock >= int`` comparison. Pass ``rounds=N`` to simulate + a turn that used all N rounds for the limit-warning test. + """ async def act_side_effect( chat: object, tools: object, on_message: object = None, **kwargs: object @@ -88,7 +95,12 @@ async def act_side_effect( msg.role = "assistant" msg.tool_calls = None on_message(msg) - return MagicMock() + # Return an ActResult-shaped mock with concrete numeric fields so + # `getattr(result, "rounds", 0) >= max_prediction_rounds` works. + result = MagicMock() + result.rounds = rounds + result.total_time_seconds = 0.1 + return result mock = MagicMock() mock.act = AsyncMock(side_effect=act_side_effect) @@ -294,6 +306,154 @@ async def test_run_turn_non_strict_modes_use_act_with_tools() -> None: ) +# --------------------------------------------------------------------------- +# max_rounds safety boundary (#97) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_run_turn_passes_max_rounds_to_act() -> None: + """``max_prediction_rounds`` must flow from the agent config into ``model.act()``. + + Pins the config→SDK plumbing: if someone removes the kwarg from the + ``act()`` call, auto mode is once again unbounded and can loop + forever — the exact bug #97 was filed to fix. + """ + agent = Agent() + agent._mode = "auto" + mock_model = _make_mock_model("ok") + + # Patch get_settings so the test does not depend on ~/.config state. + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 7 + mock_settings.agent.max_file_bytes = 100_000 + with ( + patch("lmcode.agent.core.read_lmcode_md", return_value=None), + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + ): + await agent._run_turn(mock_model, "hi") + + call_kwargs = mock_model.act.await_args.kwargs + assert call_kwargs["max_prediction_rounds"] == 7 + + +@pytest.mark.asyncio +async def test_run_turn_max_rounds_none_when_config_zero() -> None: + """A non-positive ``max_rounds`` disables the cap (passes ``None`` to the SDK). + + Users who explicitly set ``max_rounds = 0`` in config.toml are opting + out of the safety boundary; we must not send ``0`` to the SDK because + ``model.act()`` rejects values < 1 with ``LMStudioValueError``. + """ + agent = Agent() + agent._mode = "auto" + mock_model = _make_mock_model("ok") + + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 0 + mock_settings.agent.max_file_bytes = 100_000 + with ( + patch("lmcode.agent.core.read_lmcode_md", return_value=None), + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + ): + await agent._run_turn(mock_model, "hi") + + call_kwargs = mock_model.act.await_args.kwargs + assert call_kwargs["max_prediction_rounds"] is None + + +@pytest.mark.asyncio +async def test_run_turn_flags_limit_reached_when_rounds_equal_cap() -> None: + """Well-behaved model case: ``ActResult.rounds == cap`` → set the flag. + + Even when the SDK closes the loop cleanly (model gave up tools on the + final round and produced a text answer), reaching the cap means the + task was likely truncated. ``self._last_turn_limit_reached`` must be + set so ``run()`` prints the warning. + """ + agent = Agent() + agent._mode = "auto" + mock_model = _make_mock_model("done", rounds=5) + + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 5 + mock_settings.agent.max_file_bytes = 100_000 + with ( + patch("lmcode.agent.core.read_lmcode_md", return_value=None), + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + ): + await agent._run_turn(mock_model, "hi") + + assert agent._last_turn_limit_reached is True + + +@pytest.mark.asyncio +async def test_run_turn_flags_limit_reached_on_final_round_error() -> None: + """Stubborn model case: SDK raises ``LMStudioPredictionError`` on the final round. + + When the model ignores the tools-disabled signal on the final round + and emits a tool_call anyway, the SDK raises unconditionally with a + message containing ``"final prediction round"``. ``_run_turn`` must + catch this *specific* error, set the limit flag, and NOT let it + bubble to the ``run()`` top-level handler (which catches + ``LMStudioServerError`` — the parent class — and would misinterpret + the crash as a server disconnect). + """ + import lmstudio as lms + + agent = Agent() + agent._mode = "auto" + + async def _raise_final_round( + chat: object, tools: object, on_message: object = None, **kwargs: object + ) -> None: + raise lms.LMStudioPredictionError("Model requested tool use on final prediction round.") + + mock_model = MagicMock() + mock_model.act = AsyncMock(side_effect=_raise_final_round) + + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 3 + mock_settings.agent.max_file_bytes = 100_000 + with ( + patch("lmcode.agent.core.read_lmcode_md", return_value=None), + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + ): + # Must NOT raise — the error is caught and converted to a flag. + await agent._run_turn(mock_model, "loop forever please") + + assert agent._last_turn_limit_reached is True + + +@pytest.mark.asyncio +async def test_run_turn_reraises_unrelated_prediction_error() -> None: + """A prediction error that is NOT the final-round case must propagate. + + The catch in ``_run_turn`` is narrow on purpose — matching only the + SDK's ``"final prediction round"`` marker. Any other + ``LMStudioPredictionError`` (malformed tool schema, invalid + response format, etc.) is a real bug the user needs to see. + """ + import lmstudio as lms + + agent = Agent() + agent._mode = "auto" + + async def _raise_other( + chat: object, tools: object, on_message: object = None, **kwargs: object + ) -> None: + raise lms.LMStudioPredictionError("something else went wrong") + + mock_model = MagicMock() + mock_model.act = AsyncMock(side_effect=_raise_other) + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + with pytest.raises(lms.LMStudioPredictionError, match="something else"): + await agent._run_turn(mock_model, "hi") + + assert agent._last_turn_limit_reached is False + + # --------------------------------------------------------------------------- # _wrap_tool_verbose — positional-arg merging # --------------------------------------------------------------------------- From 634815484e8ed0cedd2e20217db81ebf3fa5d448 Mon Sep 17 00:00:00 2001 From: VforVitorio Date: Tue, 7 Apr 2026 12:56:05 +0200 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20auto-mode=20UX=20=E2=80=94=20per-mo?= =?UTF-8?q?de=20spinner,=20round=20counter,=20first-time=20warning=20(#97)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on PR 1's max_rounds safety core with the UX layer that makes the new boundary visible and teachable. - Per-mode spinner colour: the in-turn spinner tracks the active mode (orange=ask, blue=auto, red=strict) via a new mode_color() accessor in ui/status.py so the current mode is visible at a glance without reading the prompt line. - Auto-mode round counter: in auto mode the spinner label appends " · round N/M" driven by an on_round_start callback wired into model.act(). Only shown in auto mode (ask blocks on user approval between tool calls, strict has no round concept). - First-time auto warning: the first Tab-cycle into auto in a session prints an amber one-liner above the prompt via run_in_terminal, gated by self._auto_warned so it never repeats. - /status surfaces the active max_rounds so users can verify which safety boundary is in effect after config / env / --max-rounds. Tests cover: on_round_start kwarg wiring, _auto_warned initial state and one-shot behaviour, mode-cycle preservation of _always_allowed_tools (regression guard), /status max-rounds row. Stacked on feat/auto-max-rounds — merge PR #104 first. --- CHANGELOG.md | 4 ++ src/lmcode/agent/core.py | 85 +++++++++++++++++++++++--- src/lmcode/ui/status.py | 10 +++ tests/test_agent/test_core.py | 111 ++++++++++++++++++++++++++++++++++ 4 files changed, 200 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02b7f9a..f2870e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Added - **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97) - **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected. +- **Per-mode spinner colour** — the in-turn spinner now tracks the active permission mode (orange=ask, blue=auto, red=strict) so the current mode is visible at a glance without reading the prompt line. Applies to both the initial `thinking.` spinner in `run()` and the live-updating spinner inside `_keepalive`. Added `mode_color(mode)` public accessor to `ui/status.py` so core doesn't reach into the module-private `_MODE_COLORS` dict. (#97) +- **Auto-mode round counter in spinner** — in auto mode the spinner label appends `· round N/M` while `model.act()` runs, wired via a new `on_round_start` callback that updates a closure-local counter. The counter only shows in auto mode (ask mode redraws more rarely because it blocks on user approval between tool calls; strict mode has no rounds at all). Gives the user real-time visibility into how much of the `max_rounds` budget the current turn has consumed. (#97) +- **First-time auto-mode warning** — the first time a user Tab-cycles into auto mode in a session, a one-shot amber hint prints above the prompt: `auto mode — tools run without asking, up to N rounds per turn. Ctrl+C stops a running turn.` Printed via prompt_toolkit's `run_in_terminal` so it lands cleanly above the live prompt without tearing the ghost-text completion layer. Session-scoped via `self._auto_warned` — never re-prints even after cycling away and back. (#97) +- **`/status` surfaces `max rounds`** — the session-state table now includes a `max rounds: N` row so users can verify which safety boundary is currently in effect (config, env var, or `--max-rounds` CLI override all funnel into the same value). ### Fixed - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99) diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py index e15a407..ee834d4 100644 --- a/src/lmcode/agent/core.py +++ b/src/lmcode/agent/core.py @@ -68,6 +68,7 @@ MODES, build_prompt, build_status_line, + mode_color, next_mode, ) @@ -393,6 +394,10 @@ def __init__(self, model_id: str = "auto") -> None: self._show_stats: bool = get_settings().ui.show_stats self._always_allowed_tools: set[str] = set() self._inference_config: dict[str, Any] = {} # passed as config= to model.act() + # True once the first-time auto-mode warning has been shown. The + # warning fires the first time the user Tab-cycles into auto mode + # during a session — never again, even after cycling away and back. + self._auto_warned: bool = False # Set by _run_turn when ``max_prediction_rounds`` was hit this turn # (either the SDK raised LMStudioPredictionError on the final round, # or ActResult.rounds reached the configured cap). run() reads this @@ -697,6 +702,7 @@ def _print_status(self) -> None: status_rows: list[tuple[str, str]] = [ ("model", self._model_display or "(none)"), ("mode", self._mode), + ("max rounds", str(get_settings().agent.max_rounds)), ("temperature", temp_display), ("verbose", "on" if self._verbose else "off"), ("tips", "on" if self._show_tips else "off"), @@ -713,6 +719,26 @@ def _print_status(self) -> None: console.print(row) console.print() + def _print_auto_warning(self) -> None: + """Print the first-time auto-mode caution and set :attr:`_auto_warned`. + + Called from ``_cycle_mode`` inside :meth:`run` via ``run_in_terminal`` + the first time the user Tab-cycles into ``auto`` mode during a + session. The flag prevents re-printing on subsequent cycles. + Intended to be a one-line amber hint that matches the style used + for the context-window and max-rounds warnings. + """ + if self._auto_warned: + return + self._auto_warned = True + cap = get_settings().agent.max_rounds + console.print( + f"[{WARNING}]auto mode[/]" + f"[{TEXT_MUTED}] — tools run without asking, " + f"up to {cap} rounds per turn. " + f"Ctrl+C stops a running turn.[/]" + ) + # ------------------------------------------------------------------ # /compact # ------------------------------------------------------------------ @@ -1153,6 +1179,19 @@ def _on_fragment(fragment: Any, _round_index: int = 0) -> None: """ tok_count[0] += 1 + # Current round (1-indexed) as reported by ``on_round_start``. + # Zero means no round has started yet (pre-first-round thinking) + # or the mode is strict (``model.respond()`` has no round concept). + current_round: list[int] = [0] + + def _on_round_start(round_index: int) -> None: + """Update the spinner round counter — auto mode displays ``N/M``. + + ``round_index`` is 0-based in the SDK, so we store ``+1`` to + match the human-friendly ``round 3/10`` format. + """ + current_round[0] = round_index + 1 + # Strict mode only wraps tools when we're actually going to use # them (ask/auto path). The strict branch below skips tool # plumbing entirely and calls ``model.respond()`` — the SDK @@ -1163,11 +1202,26 @@ def _on_fragment(fragment: Any, _round_index: int = 0) -> None: stop_evt = asyncio.Event() shuffled_tips = random.sample(_TIPS, len(_TIPS)) if self._show_tips else [] + # Reset the per-turn limit-reached flag here so it's defined before + # any early return inside the ``try`` block. run() reads it after + # _run_turn returns to decide whether to print the limit warning. + self._last_turn_limit_reached = False + # Pull max_rounds here (not later) so tests can patch + # get_settings() once and see a single call. None disables the cap. + # Assigned before the keepalive task is created so the closure + # below sees a bound cell when it first runs. + max_rounds = get_settings().agent.max_rounds + max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None + async def _keepalive() -> None: """Update the spinner label every 100 ms; animate dots; rotate tips every ~8 s. Runs on the main event loop alongside ``model.act()``. Gets CPU time whenever the SDK yields back to the loop during async HTTP prefill. + The spinner colour tracks the current mode (orange=ask, blue=auto, + red=strict) so the active permission mode is visible at a glance + without reading the prompt line. In auto mode the label also + includes a ``round N/M`` counter driven by ``on_round_start``. """ tip_idx = 0 dot_idx = 0 @@ -1190,7 +1244,14 @@ async def _keepalive() -> None: ) else: label = f" {base}" - rows: list[Any] = [Spinner(_SPINNER, text=label, style=ACCENT)] + if ( + self._mode == "auto" + and current_round[0] > 0 + and max_prediction_rounds is not None + ): + label = f"{label} · round {current_round[0]}/{max_prediction_rounds}" + spinner_color = mode_color(self._mode) + rows: list[Any] = [Spinner(_SPINNER, text=label, style=spinner_color)] if shuffled_tips: rows.append(Text(f" {shuffled_tips[tip_idx]}", style=f"dim {ACCENT}")) live.update(RenderGroup(*rows)) @@ -1201,13 +1262,6 @@ async def _keepalive() -> None: act_result: Any = None respond_result: Any = None strict_start: float | None = None - # Reset the flag at the start of every turn. run() reads it after - # _run_turn returns to decide whether to print the limit warning. - self._last_turn_limit_reached = False - # Pull max_rounds here (not later) so tests can patch - # get_settings() once and see a single call. None disables the cap. - max_rounds = get_settings().agent.max_rounds - max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None try: if self._mode == "strict": # Strict mode (#99): use ``model.respond()`` — the pure @@ -1242,6 +1296,7 @@ async def _keepalive() -> None: max_prediction_rounds=max_prediction_rounds, config=self._inference_config if self._inference_config else None, on_message=_on_message, + on_round_start=_on_round_start, on_prediction_completed=_on_prediction_completed, on_prediction_fragment=_on_fragment, ) @@ -1321,8 +1376,18 @@ async def run(self) -> None: settings = get_settings() def _cycle_mode() -> None: - """Advance to the next mode in-place (prompt redraws via invalidate).""" + """Advance to the next mode in-place (prompt redraws via invalidate). + + The first time the user cycles into ``auto`` in a given session, + schedule a one-shot amber warning via prompt-toolkit's + :func:`run_in_terminal` so the hint prints cleanly above the + live prompt without tearing the ghost-text completion layer. + """ self._mode = next_mode(self._mode) + if self._mode == "auto" and not self._auto_warned: + from prompt_toolkit.application import run_in_terminal + + run_in_terminal(self._print_auto_warning) session = make_session(cycle_mode=_cycle_mode) @@ -1387,7 +1452,7 @@ def _cycle_mode() -> None: continue initial: Any = RenderGroup( - Spinner(_SPINNER, text=" thinking.", style=ACCENT), + Spinner(_SPINNER, text=" thinking.", style=mode_color(self._mode)), ) self._raw_history.append(("user", stripped)) _interrupted = False diff --git a/src/lmcode/ui/status.py b/src/lmcode/ui/status.py index a544cfa..8c1ad35 100644 --- a/src/lmcode/ui/status.py +++ b/src/lmcode/ui/status.py @@ -31,6 +31,16 @@ def next_mode(current: str) -> str: return MODES[(idx + 1) % len(MODES)] +def mode_color(mode: str) -> str: + """Return the hex colour associated with *mode*, falling back to muted grey. + + The same palette is used by the ``[mode]`` indicator in the prompt and by + the in-turn spinner inside the agent loop, so the two always agree: + orange = ask, blue = auto, red = strict. + """ + return _MODE_COLORS.get(mode, _MUTED) + + def build_status_line(model: str) -> str: """Return a Rich markup string shown once after connecting to LM Studio. diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py index 78f5a7a..474ac6c 100644 --- a/tests/test_agent/test_core.py +++ b/tests/test_agent/test_core.py @@ -454,6 +454,117 @@ async def _raise_other( assert agent._last_turn_limit_reached is False +# --------------------------------------------------------------------------- +# auto mode UX — spinner colour, round counter, first-time warning (#97) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_run_turn_passes_on_round_start_to_act() -> None: + """``on_round_start`` must be wired so the spinner can display ``round N/M``. + + The callback lives entirely inside ``_run_turn``'s closure (it updates a + local ``current_round`` cell read by the keepalive task), so we assert on + two things: (1) the kwarg was passed as a callable to ``model.act()``, + and (2) calling it with a 0-based round index does not raise. + """ + agent = Agent() + agent._mode = "auto" + mock_model = _make_mock_model("ok") + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + await agent._run_turn(mock_model, "hi") + + call_kwargs = mock_model.act.await_args.kwargs + assert "on_round_start" in call_kwargs + assert callable(call_kwargs["on_round_start"]) + # Must accept a 0-based round index without raising. + call_kwargs["on_round_start"](0) + call_kwargs["on_round_start"](2) + + +def test_agent_auto_warned_initially_false() -> None: + """Fresh Agent starts with the first-time auto-mode warning un-fired.""" + agent = Agent() + assert agent._auto_warned is False + + +def test_print_auto_warning_fires_once_per_session() -> None: + """``_print_auto_warning`` sets the flag on first call and is a no-op afterwards. + + The warning is triggered from the ``_cycle_mode`` closure in ``run()`` via + ``run_in_terminal``; we test the method directly so the test does not + depend on prompt_toolkit's terminal plumbing. Calling it twice must + print exactly once — the second call should exit immediately. + """ + from lmcode.agent import _display + + agent = Agent() + with patch.object(_display.console, "print") as mock_print: + agent._print_auto_warning() + assert agent._auto_warned is True + assert mock_print.call_count == 1 + + agent._print_auto_warning() + # Flag still True and no additional prints — second call is a no-op. + assert agent._auto_warned is True + assert mock_print.call_count == 1 + + +def test_cycle_mode_preserves_always_allowed_tools() -> None: + """Tab-cycling the mode must not clear session-scoped always-allow grants. + + Regression guard for a subtle UX pitfall: if the user grants "always allow + write_file" in ask mode and then Tab-cycles to auto → strict → ask, the + grants should survive the round trip. The set is plain Agent state with + no cycle hook touching it, but this test pins the invariant so a future + refactor that adds ``_always_allowed_tools.clear()`` to the mode handler + will be caught. + """ + from lmcode.ui.status import next_mode + + agent = Agent() + agent._mode = "ask" + agent._always_allowed_tools = {"read_file", "write_file"} + + # Simulate three Tab presses: ask → auto → strict → ask. + for _ in range(3): + agent._mode = next_mode(agent._mode) + + assert agent._mode == "ask" + assert agent._always_allowed_tools == {"read_file", "write_file"} + + +def test_print_status_includes_max_rounds_line() -> None: + """``/status`` must surface the active ``max_rounds`` so users can verify the cap. + + The line is the only place in the running session that confirms which + safety boundary is in effect (config / env var / CLI flag). We capture + the Rich console output and assert the label is present. + """ + from lmcode.agent import _display + + agent = Agent() + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 13 + mock_settings.agent.max_file_bytes = 100_000 + + printed: list[str] = [] + + def _capture(obj: object = "", *args: object, **kwargs: object) -> None: + printed.append(str(obj)) + + with ( + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + patch.object(_display.console, "print", side_effect=_capture), + ): + agent._print_status() + + joined = "\n".join(printed) + assert "max rounds" in joined + assert "13" in joined + + # --------------------------------------------------------------------------- # _wrap_tool_verbose — positional-arg merging # --------------------------------------------------------------------------- From 6f8778d8f3265212071ae6edfa2805dabcbdb839 Mon Sep 17 00:00:00 2001 From: VforVitorio Date: Tue, 7 Apr 2026 13:37:58 +0200 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20restore=20PR=202=20content=20lost=20?= =?UTF-8?q?in=20dev=E2=86=92feat/auto-mode-ux=20merge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dev (containing the squash-merged #104) was merged into this branch, git's three-way merge dropped three pieces of PR 2 work because it saw them as conflicts with the squashed version of #104: - self._auto_warned init flag was removed from Agent.__init__, causing mypy `has-type` error in _print_auto_warning - on_round_start=_on_round_start kwarg was dropped from the model.act() call, breaking the round counter test - max_prediction_rounds was re-added in its original post-keepalive position, duplicating the moved-up declaration and causing the mypy `no-redef` error at line 1267 Also restores the 5 PR 2 tests and the 4 PR 2 CHANGELOG entries that the merge silently dropped. Local ruff + mypy + pytest green. --- CHANGELOG.md | 4 ++ src/lmcode/agent/core.py | 12 ++-- tests/test_agent/test_core.py | 111 ++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02b7f9a..f2870e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Added - **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97) - **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected. +- **Per-mode spinner colour** — the in-turn spinner now tracks the active permission mode (orange=ask, blue=auto, red=strict) so the current mode is visible at a glance without reading the prompt line. Applies to both the initial `thinking.` spinner in `run()` and the live-updating spinner inside `_keepalive`. Added `mode_color(mode)` public accessor to `ui/status.py` so core doesn't reach into the module-private `_MODE_COLORS` dict. (#97) +- **Auto-mode round counter in spinner** — in auto mode the spinner label appends `· round N/M` while `model.act()` runs, wired via a new `on_round_start` callback that updates a closure-local counter. The counter only shows in auto mode (ask mode redraws more rarely because it blocks on user approval between tool calls; strict mode has no rounds at all). Gives the user real-time visibility into how much of the `max_rounds` budget the current turn has consumed. (#97) +- **First-time auto-mode warning** — the first time a user Tab-cycles into auto mode in a session, a one-shot amber hint prints above the prompt: `auto mode — tools run without asking, up to N rounds per turn. Ctrl+C stops a running turn.` Printed via prompt_toolkit's `run_in_terminal` so it lands cleanly above the live prompt without tearing the ghost-text completion layer. Session-scoped via `self._auto_warned` — never re-prints even after cycling away and back. (#97) +- **`/status` surfaces `max rounds`** — the session-state table now includes a `max rounds: N` row so users can verify which safety boundary is currently in effect (config, env var, or `--max-rounds` CLI override all funnel into the same value). ### Fixed - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99) diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py index d450b02..ee834d4 100644 --- a/src/lmcode/agent/core.py +++ b/src/lmcode/agent/core.py @@ -394,6 +394,10 @@ def __init__(self, model_id: str = "auto") -> None: self._show_stats: bool = get_settings().ui.show_stats self._always_allowed_tools: set[str] = set() self._inference_config: dict[str, Any] = {} # passed as config= to model.act() + # True once the first-time auto-mode warning has been shown. The + # warning fires the first time the user Tab-cycles into auto mode + # during a session — never again, even after cycling away and back. + self._auto_warned: bool = False # Set by _run_turn when ``max_prediction_rounds`` was hit this turn # (either the SDK raised LMStudioPredictionError on the final round, # or ActResult.rounds reached the configured cap). run() reads this @@ -1258,13 +1262,6 @@ async def _keepalive() -> None: act_result: Any = None respond_result: Any = None strict_start: float | None = None - # Reset the flag at the start of every turn. run() reads it after - # _run_turn returns to decide whether to print the limit warning. - self._last_turn_limit_reached = False - # Pull max_rounds here (not later) so tests can patch - # get_settings() once and see a single call. None disables the cap. - max_rounds = get_settings().agent.max_rounds - max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None try: if self._mode == "strict": # Strict mode (#99): use ``model.respond()`` — the pure @@ -1299,6 +1296,7 @@ async def _keepalive() -> None: max_prediction_rounds=max_prediction_rounds, config=self._inference_config if self._inference_config else None, on_message=_on_message, + on_round_start=_on_round_start, on_prediction_completed=_on_prediction_completed, on_prediction_fragment=_on_fragment, ) diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py index 78f5a7a..474ac6c 100644 --- a/tests/test_agent/test_core.py +++ b/tests/test_agent/test_core.py @@ -454,6 +454,117 @@ async def _raise_other( assert agent._last_turn_limit_reached is False +# --------------------------------------------------------------------------- +# auto mode UX — spinner colour, round counter, first-time warning (#97) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_run_turn_passes_on_round_start_to_act() -> None: + """``on_round_start`` must be wired so the spinner can display ``round N/M``. + + The callback lives entirely inside ``_run_turn``'s closure (it updates a + local ``current_round`` cell read by the keepalive task), so we assert on + two things: (1) the kwarg was passed as a callable to ``model.act()``, + and (2) calling it with a 0-based round index does not raise. + """ + agent = Agent() + agent._mode = "auto" + mock_model = _make_mock_model("ok") + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + await agent._run_turn(mock_model, "hi") + + call_kwargs = mock_model.act.await_args.kwargs + assert "on_round_start" in call_kwargs + assert callable(call_kwargs["on_round_start"]) + # Must accept a 0-based round index without raising. + call_kwargs["on_round_start"](0) + call_kwargs["on_round_start"](2) + + +def test_agent_auto_warned_initially_false() -> None: + """Fresh Agent starts with the first-time auto-mode warning un-fired.""" + agent = Agent() + assert agent._auto_warned is False + + +def test_print_auto_warning_fires_once_per_session() -> None: + """``_print_auto_warning`` sets the flag on first call and is a no-op afterwards. + + The warning is triggered from the ``_cycle_mode`` closure in ``run()`` via + ``run_in_terminal``; we test the method directly so the test does not + depend on prompt_toolkit's terminal plumbing. Calling it twice must + print exactly once — the second call should exit immediately. + """ + from lmcode.agent import _display + + agent = Agent() + with patch.object(_display.console, "print") as mock_print: + agent._print_auto_warning() + assert agent._auto_warned is True + assert mock_print.call_count == 1 + + agent._print_auto_warning() + # Flag still True and no additional prints — second call is a no-op. + assert agent._auto_warned is True + assert mock_print.call_count == 1 + + +def test_cycle_mode_preserves_always_allowed_tools() -> None: + """Tab-cycling the mode must not clear session-scoped always-allow grants. + + Regression guard for a subtle UX pitfall: if the user grants "always allow + write_file" in ask mode and then Tab-cycles to auto → strict → ask, the + grants should survive the round trip. The set is plain Agent state with + no cycle hook touching it, but this test pins the invariant so a future + refactor that adds ``_always_allowed_tools.clear()`` to the mode handler + will be caught. + """ + from lmcode.ui.status import next_mode + + agent = Agent() + agent._mode = "ask" + agent._always_allowed_tools = {"read_file", "write_file"} + + # Simulate three Tab presses: ask → auto → strict → ask. + for _ in range(3): + agent._mode = next_mode(agent._mode) + + assert agent._mode == "ask" + assert agent._always_allowed_tools == {"read_file", "write_file"} + + +def test_print_status_includes_max_rounds_line() -> None: + """``/status`` must surface the active ``max_rounds`` so users can verify the cap. + + The line is the only place in the running session that confirms which + safety boundary is in effect (config / env var / CLI flag). We capture + the Rich console output and assert the label is present. + """ + from lmcode.agent import _display + + agent = Agent() + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 13 + mock_settings.agent.max_file_bytes = 100_000 + + printed: list[str] = [] + + def _capture(obj: object = "", *args: object, **kwargs: object) -> None: + printed.append(str(obj)) + + with ( + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + patch.object(_display.console, "print", side_effect=_capture), + ): + agent._print_status() + + joined = "\n".join(printed) + assert "max rounds" in joined + assert "13" in joined + + # --------------------------------------------------------------------------- # _wrap_tool_verbose — positional-arg merging # ---------------------------------------------------------------------------