diff --git a/CHANGELOG.md b/CHANGELOG.md index 02b7f9a..f2870e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Added - **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97) - **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected. +- **Per-mode spinner colour** — the in-turn spinner now tracks the active permission mode (orange=ask, blue=auto, red=strict) so the current mode is visible at a glance without reading the prompt line. Applies to both the initial `thinking.` spinner in `run()` and the live-updating spinner inside `_keepalive`. Added `mode_color(mode)` public accessor to `ui/status.py` so core doesn't reach into the module-private `_MODE_COLORS` dict. (#97) +- **Auto-mode round counter in spinner** — in auto mode the spinner label appends `· round N/M` while `model.act()` runs, wired via a new `on_round_start` callback that updates a closure-local counter. The counter only shows in auto mode (ask mode redraws more rarely because it blocks on user approval between tool calls; strict mode has no rounds at all). Gives the user real-time visibility into how much of the `max_rounds` budget the current turn has consumed. (#97) +- **First-time auto-mode warning** — the first time a user Tab-cycles into auto mode in a session, a one-shot amber hint prints above the prompt: `auto mode — tools run without asking, up to N rounds per turn. Ctrl+C stops a running turn.` Printed via prompt_toolkit's `run_in_terminal` so it lands cleanly above the live prompt without tearing the ghost-text completion layer. Session-scoped via `self._auto_warned` — never re-prints even after cycling away and back. (#97) +- **`/status` surfaces `max rounds`** — the session-state table now includes a `max rounds: N` row so users can verify which safety boundary is currently in effect (config, env var, or `--max-rounds` CLI override all funnel into the same value). ### Fixed - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99) diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py index e15a407..ee834d4 100644 --- a/src/lmcode/agent/core.py +++ b/src/lmcode/agent/core.py @@ -68,6 +68,7 @@ MODES, build_prompt, build_status_line, + mode_color, next_mode, ) @@ -393,6 +394,10 @@ def __init__(self, model_id: str = "auto") -> None: self._show_stats: bool = get_settings().ui.show_stats self._always_allowed_tools: set[str] = set() self._inference_config: dict[str, Any] = {} # passed as config= to model.act() + # True once the first-time auto-mode warning has been shown. The + # warning fires the first time the user Tab-cycles into auto mode + # during a session — never again, even after cycling away and back. + self._auto_warned: bool = False # Set by _run_turn when ``max_prediction_rounds`` was hit this turn # (either the SDK raised LMStudioPredictionError on the final round, # or ActResult.rounds reached the configured cap). run() reads this @@ -697,6 +702,7 @@ def _print_status(self) -> None: status_rows: list[tuple[str, str]] = [ ("model", self._model_display or "(none)"), ("mode", self._mode), + ("max rounds", str(get_settings().agent.max_rounds)), ("temperature", temp_display), ("verbose", "on" if self._verbose else "off"), ("tips", "on" if self._show_tips else "off"), @@ -713,6 +719,26 @@ def _print_status(self) -> None: console.print(row) console.print() + def _print_auto_warning(self) -> None: + """Print the first-time auto-mode caution and set :attr:`_auto_warned`. + + Called from ``_cycle_mode`` inside :meth:`run` via ``run_in_terminal`` + the first time the user Tab-cycles into ``auto`` mode during a + session. The flag prevents re-printing on subsequent cycles. + Intended to be a one-line amber hint that matches the style used + for the context-window and max-rounds warnings. + """ + if self._auto_warned: + return + self._auto_warned = True + cap = get_settings().agent.max_rounds + console.print( + f"[{WARNING}]auto mode[/]" + f"[{TEXT_MUTED}] — tools run without asking, " + f"up to {cap} rounds per turn. " + f"Ctrl+C stops a running turn.[/]" + ) + # ------------------------------------------------------------------ # /compact # ------------------------------------------------------------------ @@ -1153,6 +1179,19 @@ def _on_fragment(fragment: Any, _round_index: int = 0) -> None: """ tok_count[0] += 1 + # Current round (1-indexed) as reported by ``on_round_start``. + # Zero means no round has started yet (pre-first-round thinking) + # or the mode is strict (``model.respond()`` has no round concept). + current_round: list[int] = [0] + + def _on_round_start(round_index: int) -> None: + """Update the spinner round counter — auto mode displays ``N/M``. + + ``round_index`` is 0-based in the SDK, so we store ``+1`` to + match the human-friendly ``round 3/10`` format. + """ + current_round[0] = round_index + 1 + # Strict mode only wraps tools when we're actually going to use # them (ask/auto path). The strict branch below skips tool # plumbing entirely and calls ``model.respond()`` — the SDK @@ -1163,11 +1202,26 @@ def _on_fragment(fragment: Any, _round_index: int = 0) -> None: stop_evt = asyncio.Event() shuffled_tips = random.sample(_TIPS, len(_TIPS)) if self._show_tips else [] + # Reset the per-turn limit-reached flag here so it's defined before + # any early return inside the ``try`` block. run() reads it after + # _run_turn returns to decide whether to print the limit warning. + self._last_turn_limit_reached = False + # Pull max_rounds here (not later) so tests can patch + # get_settings() once and see a single call. None disables the cap. + # Assigned before the keepalive task is created so the closure + # below sees a bound cell when it first runs. + max_rounds = get_settings().agent.max_rounds + max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None + async def _keepalive() -> None: """Update the spinner label every 100 ms; animate dots; rotate tips every ~8 s. Runs on the main event loop alongside ``model.act()``. Gets CPU time whenever the SDK yields back to the loop during async HTTP prefill. + The spinner colour tracks the current mode (orange=ask, blue=auto, + red=strict) so the active permission mode is visible at a glance + without reading the prompt line. In auto mode the label also + includes a ``round N/M`` counter driven by ``on_round_start``. """ tip_idx = 0 dot_idx = 0 @@ -1190,7 +1244,14 @@ async def _keepalive() -> None: ) else: label = f" {base}" - rows: list[Any] = [Spinner(_SPINNER, text=label, style=ACCENT)] + if ( + self._mode == "auto" + and current_round[0] > 0 + and max_prediction_rounds is not None + ): + label = f"{label} · round {current_round[0]}/{max_prediction_rounds}" + spinner_color = mode_color(self._mode) + rows: list[Any] = [Spinner(_SPINNER, text=label, style=spinner_color)] if shuffled_tips: rows.append(Text(f" {shuffled_tips[tip_idx]}", style=f"dim {ACCENT}")) live.update(RenderGroup(*rows)) @@ -1201,13 +1262,6 @@ async def _keepalive() -> None: act_result: Any = None respond_result: Any = None strict_start: float | None = None - # Reset the flag at the start of every turn. run() reads it after - # _run_turn returns to decide whether to print the limit warning. - self._last_turn_limit_reached = False - # Pull max_rounds here (not later) so tests can patch - # get_settings() once and see a single call. None disables the cap. - max_rounds = get_settings().agent.max_rounds - max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None try: if self._mode == "strict": # Strict mode (#99): use ``model.respond()`` — the pure @@ -1242,6 +1296,7 @@ async def _keepalive() -> None: max_prediction_rounds=max_prediction_rounds, config=self._inference_config if self._inference_config else None, on_message=_on_message, + on_round_start=_on_round_start, on_prediction_completed=_on_prediction_completed, on_prediction_fragment=_on_fragment, ) @@ -1321,8 +1376,18 @@ async def run(self) -> None: settings = get_settings() def _cycle_mode() -> None: - """Advance to the next mode in-place (prompt redraws via invalidate).""" + """Advance to the next mode in-place (prompt redraws via invalidate). + + The first time the user cycles into ``auto`` in a given session, + schedule a one-shot amber warning via prompt-toolkit's + :func:`run_in_terminal` so the hint prints cleanly above the + live prompt without tearing the ghost-text completion layer. + """ self._mode = next_mode(self._mode) + if self._mode == "auto" and not self._auto_warned: + from prompt_toolkit.application import run_in_terminal + + run_in_terminal(self._print_auto_warning) session = make_session(cycle_mode=_cycle_mode) @@ -1387,7 +1452,7 @@ def _cycle_mode() -> None: continue initial: Any = RenderGroup( - Spinner(_SPINNER, text=" thinking.", style=ACCENT), + Spinner(_SPINNER, text=" thinking.", style=mode_color(self._mode)), ) self._raw_history.append(("user", stripped)) _interrupted = False diff --git a/src/lmcode/ui/status.py b/src/lmcode/ui/status.py index a544cfa..8c1ad35 100644 --- a/src/lmcode/ui/status.py +++ b/src/lmcode/ui/status.py @@ -31,6 +31,16 @@ def next_mode(current: str) -> str: return MODES[(idx + 1) % len(MODES)] +def mode_color(mode: str) -> str: + """Return the hex colour associated with *mode*, falling back to muted grey. + + The same palette is used by the ``[mode]`` indicator in the prompt and by + the in-turn spinner inside the agent loop, so the two always agree: + orange = ask, blue = auto, red = strict. + """ + return _MODE_COLORS.get(mode, _MUTED) + + def build_status_line(model: str) -> str: """Return a Rich markup string shown once after connecting to LM Studio. diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py index 78f5a7a..474ac6c 100644 --- a/tests/test_agent/test_core.py +++ b/tests/test_agent/test_core.py @@ -454,6 +454,117 @@ async def _raise_other( assert agent._last_turn_limit_reached is False +# --------------------------------------------------------------------------- +# auto mode UX — spinner colour, round counter, first-time warning (#97) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_run_turn_passes_on_round_start_to_act() -> None: + """``on_round_start`` must be wired so the spinner can display ``round N/M``. + + The callback lives entirely inside ``_run_turn``'s closure (it updates a + local ``current_round`` cell read by the keepalive task), so we assert on + two things: (1) the kwarg was passed as a callable to ``model.act()``, + and (2) calling it with a 0-based round index does not raise. + """ + agent = Agent() + agent._mode = "auto" + mock_model = _make_mock_model("ok") + + with patch("lmcode.agent.core.read_lmcode_md", return_value=None): + await agent._run_turn(mock_model, "hi") + + call_kwargs = mock_model.act.await_args.kwargs + assert "on_round_start" in call_kwargs + assert callable(call_kwargs["on_round_start"]) + # Must accept a 0-based round index without raising. + call_kwargs["on_round_start"](0) + call_kwargs["on_round_start"](2) + + +def test_agent_auto_warned_initially_false() -> None: + """Fresh Agent starts with the first-time auto-mode warning un-fired.""" + agent = Agent() + assert agent._auto_warned is False + + +def test_print_auto_warning_fires_once_per_session() -> None: + """``_print_auto_warning`` sets the flag on first call and is a no-op afterwards. + + The warning is triggered from the ``_cycle_mode`` closure in ``run()`` via + ``run_in_terminal``; we test the method directly so the test does not + depend on prompt_toolkit's terminal plumbing. Calling it twice must + print exactly once — the second call should exit immediately. + """ + from lmcode.agent import _display + + agent = Agent() + with patch.object(_display.console, "print") as mock_print: + agent._print_auto_warning() + assert agent._auto_warned is True + assert mock_print.call_count == 1 + + agent._print_auto_warning() + # Flag still True and no additional prints — second call is a no-op. + assert agent._auto_warned is True + assert mock_print.call_count == 1 + + +def test_cycle_mode_preserves_always_allowed_tools() -> None: + """Tab-cycling the mode must not clear session-scoped always-allow grants. + + Regression guard for a subtle UX pitfall: if the user grants "always allow + write_file" in ask mode and then Tab-cycles to auto → strict → ask, the + grants should survive the round trip. The set is plain Agent state with + no cycle hook touching it, but this test pins the invariant so a future + refactor that adds ``_always_allowed_tools.clear()`` to the mode handler + will be caught. + """ + from lmcode.ui.status import next_mode + + agent = Agent() + agent._mode = "ask" + agent._always_allowed_tools = {"read_file", "write_file"} + + # Simulate three Tab presses: ask → auto → strict → ask. + for _ in range(3): + agent._mode = next_mode(agent._mode) + + assert agent._mode == "ask" + assert agent._always_allowed_tools == {"read_file", "write_file"} + + +def test_print_status_includes_max_rounds_line() -> None: + """``/status`` must surface the active ``max_rounds`` so users can verify the cap. + + The line is the only place in the running session that confirms which + safety boundary is in effect (config / env var / CLI flag). We capture + the Rich console output and assert the label is present. + """ + from lmcode.agent import _display + + agent = Agent() + mock_settings = MagicMock() + mock_settings.agent.max_rounds = 13 + mock_settings.agent.max_file_bytes = 100_000 + + printed: list[str] = [] + + def _capture(obj: object = "", *args: object, **kwargs: object) -> None: + printed.append(str(obj)) + + with ( + patch("lmcode.agent.core.get_settings", return_value=mock_settings), + patch.object(_display.console, "print", side_effect=_capture), + ): + agent._print_status() + + joined = "\n".join(printed) + assert "max rounds" in joined + assert "13" in joined + + # --------------------------------------------------------------------------- # _wrap_tool_verbose — positional-arg merging # ---------------------------------------------------------------------------