From 6c3aceaac385251c748c06d630a3f6366f49e273 Mon Sep 17 00:00:00 2001
From: VforVitorio <victorvegasobral@yahoo.com>
Date: Tue, 7 Apr 2026 12:38:07 +0200
Subject: [PATCH 1/3] feat: wire max_rounds safety boundary into model.act()
 (#97)

- AgentSettings.max_rounds default 50->10 with docstring explaining
  the trade-off and the 3 ways to override (config, env var, CLI flag)
- _run_turn passes max_prediction_rounds= to model.act() in the
  non-strict branch; strict stays on respond() which has no round
  concept
- Catch LMStudioPredictionError specifically when the message contains
  "final prediction round" and set self._last_turn_limit_reached. This
  prevents the run() top-level LMStudioServerError handler (parent
  class) from misreading the crash as an LM Studio disconnect
- Post-act check: ActResult.rounds >= cap also sets the flag (the
  well-behaved model case where the final round produced a text
  answer instead of a tool_call)
- run() prints an inline amber warning when the flag is set, matching
  the ctx window warning style
- CLI --max-rounds flag is now actually wired: mutates
  get_settings().agent.max_rounds for the session. Was previously
  accepted by Typer but silently ignored
- 5 regression tests + _make_mock_model fixed to return an
  ActResult-shaped mock with concrete rounds int (MagicMock >= int
  raises TypeError)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                  |   4 +
 docs/features/config.md       |   7 +-
 src/lmcode/agent/core.py      |  76 ++++++++++++++--
 src/lmcode/cli/app.py         |   3 +-
 src/lmcode/cli/chat.py        |  11 ++-
 src/lmcode/config/settings.py |  10 +-
 tests/test_agent/test_core.py | 166 +++++++++++++++++++++++++++++++++-
 7 files changed, 260 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4ca4d2f..02b7f9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
+### Added
+- **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97)
+- **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected.
+
 ### Fixed
 - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99)
 - **Strict mode hard system prompt (second layer of defence)** — the SDK-level fix stops the model from emitting *real* tool calls, but cannot stop it from *fabricating* tool output in plain text (e.g. Qwen 7B happily replied "Here is the file content:" followed by invented code because the base prompt said "Always call `read_file` before describing file contents"). Strict now passes a *separate* `lms.Chat` seeded with a dedicated `_STRICT_SYSTEM_PROMPT` that explicitly forbids pretending to read files, fabricating output, or describing tool calls — while still listing what the model *can* do (review pasted code, answer general questions, rubber-duck debugging). `self._chat` is untouched so Tab-switching back to ask/auto keeps the base prompt and full history. (#99)
diff --git a/docs/features/config.md b/docs/features/config.md
index 666bff6..b4c048b 100644
--- a/docs/features/config.md
+++ b/docs/features/config.md
@@ -23,8 +23,9 @@ base_url = "http://localhost:1234"
 model = "auto"
 
 [agent]
-# Maximum number of agent loop iterations per run
-max_rounds = 50
+# Max tool-call rounds per turn. Applies to ask and auto modes.
+# Override for a session with --max-rounds N or LMCODE_AGENT__MAX_ROUNDS=N.
+max_rounds = 10
 
 # Whether to confirm before writing files
 confirm_writes = false
@@ -50,7 +51,7 @@ class LMStudioSettings(BaseSettings):
     model: str = "auto"
 
 class AgentSettings(BaseSettings):
-    max_rounds: int = 50
+    max_rounds: int = 10
     confirm_writes: bool = False
 
 class SessionSettings(BaseSettings):
diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py
index 8e9c190..e15a407 100644
--- a/src/lmcode/agent/core.py
+++ b/src/lmcode/agent/core.py
@@ -75,6 +75,17 @@
 # Spinner / tips constants
 # ---------------------------------------------------------------------------
 
+#: Substring found in :class:`lms.LMStudioPredictionError` when the model
+#: requests a tool call on the final round that was capped by
+#: ``max_prediction_rounds``.  The SDK raises unconditionally in that case
+#: (our ``handle_invalid_tool_request`` callback can only override the
+#: message), so we match on this marker to convert the crash into a
+#: graceful "limit reached" warning instead of letting the run() loop's
+#: broader ``LMStudioServerError`` catch mistake it for a disconnect.
+#: Source of truth: ``lmstudio.async_api.AsyncLLM.act`` — the string is
+#: literally ``"Model requested tool use on final prediction round."``.
+_MAX_ROUNDS_ERR_MARKER: str = "final prediction round"
+
 #: Rich spinner style used during model inference.
 _SPINNER: str = "circleHalves"
 
@@ -382,6 +393,11 @@ def __init__(self, model_id: str = "auto") -> None:
         self._show_stats: bool = get_settings().ui.show_stats
         self._always_allowed_tools: set[str] = set()
         self._inference_config: dict[str, Any] = {}  # passed as config= to model.act()
+        # Set by _run_turn when ``max_prediction_rounds`` was hit this turn
+        # (either the SDK raised LMStudioPredictionError on the final round,
+        # or ActResult.rounds reached the configured cap). run() reads this
+        # after _run_turn returns to print an inline warning to the user.
+        self._last_turn_limit_reached: bool = False
         # Set by _wrap_tool when the user rejects a tool call and types a
         # redirect instruction.  run() detects this after the turn ends and
         # feeds the instruction back as a brand-new user message.
@@ -1185,6 +1201,13 @@ async def _keepalive() -> None:
         act_result: Any = None
         respond_result: Any = None
         strict_start: float | None = None
+        # Reset the flag at the start of every turn. run() reads it after
+        # _run_turn returns to decide whether to print the limit warning.
+        self._last_turn_limit_reached = False
+        # Pull max_rounds here (not later) so tests can patch
+        # get_settings() once and see a single call. None disables the cap.
+        max_rounds = get_settings().agent.max_rounds
+        max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None
         try:
             if self._mode == "strict":
                 # Strict mode (#99): use ``model.respond()`` — the pure
@@ -1212,18 +1235,43 @@ async def _keepalive() -> None:
                     on_prediction_fragment=_on_fragment,
                 )
             else:
-                act_result = await model.act(
-                    chat,
-                    tools=tools,
-                    config=self._inference_config if self._inference_config else None,
-                    on_message=_on_message,
-                    on_prediction_completed=_on_prediction_completed,
-                    on_prediction_fragment=_on_fragment,
-                )
+                try:
+                    act_result = await model.act(
+                        chat,
+                        tools=tools,
+                        max_prediction_rounds=max_prediction_rounds,
+                        config=self._inference_config if self._inference_config else None,
+                        on_message=_on_message,
+                        on_prediction_completed=_on_prediction_completed,
+                        on_prediction_fragment=_on_fragment,
+                    )
+                except lms.LMStudioPredictionError as e:
+                    # Max-rounds hit with a stubborn model: the SDK disabled
+                    # tools on the final round, the model still emitted a
+                    # tool_call, and the SDK raised unconditionally. Convert
+                    # that into a graceful limit-reached signal so run()
+                    # prints a warning instead of the run() top-level catch
+                    # mistaking it for a server disconnect (LMStudioServerError
+                    # is a *parent* of LMStudioPredictionError).
+                    if _MAX_ROUNDS_ERR_MARKER in str(e):
+                        self._last_turn_limit_reached = True
+                    else:
+                        raise
         finally:
             stop_evt.set()
             await keepalive
 
+        # Well-behaved model case: the SDK forced a tool-free final round and
+        # the model produced a normal text response, but rounds == the cap
+        # still means the task likely got truncated. Flag it so the user
+        # knows why the answer might feel incomplete.
+        if (
+            act_result is not None
+            and max_prediction_rounds is not None
+            and getattr(act_result, "rounds", 0) >= max_prediction_rounds
+        ):
+            self._last_turn_limit_reached = True
+
         # ``respond()`` returns a single ``PredictionResult`` with ``.stats``
         # directly, whereas ``act()`` fires ``_on_prediction_completed`` once
         # per round.  Normalise so the downstream stats code runs identically.
@@ -1398,6 +1446,18 @@ def _cycle_mode() -> None:
                                 f"[{TEXT_MUTED}] — run /compact to summarise "
                                 f"the conversation[/]"
                             )
+
+                    if self._last_turn_limit_reached:
+                        cap = get_settings().agent.max_rounds
+                        console.print(
+                            f"\n[{WARNING}]agent stopped after {cap} rounds[/]"
+                            f"[{TEXT_MUTED}] — raise the limit with "
+                            f"LMCODE_AGENT__MAX_ROUNDS=N, set "
+                            f"agent.max_rounds in config, or pass "
+                            f"--max-rounds N on the CLI. "
+                            f"Type to continue the conversation.[/]"
+                        )
+
                     console.print(Rule(style=f"dim {ACCENT}"))
 
                     # If the user rejected a tool call with redirect
diff --git a/src/lmcode/cli/app.py b/src/lmcode/cli/app.py
index 0358da6..39b45e0 100644
--- a/src/lmcode/cli/app.py
+++ b/src/lmcode/cli/app.py
@@ -42,7 +42,8 @@ def main(
     if ctx.invoked_subcommand is None:
         from lmcode.cli.chat import chat
 
-        chat(model="auto", max_rounds=50)
+        # max_rounds=None → use agent.max_rounds from config.toml / env var
+        chat(model="auto", max_rounds=None)
 
 
 # ---------------------------------------------------------------------------
diff --git a/src/lmcode/cli/chat.py b/src/lmcode/cli/chat.py
index 7b2e7f4..f616607 100644
--- a/src/lmcode/cli/chat.py
+++ b/src/lmcode/cli/chat.py
@@ -385,10 +385,19 @@ def _exit_no_model() -> None:
 @app.callback(invoke_without_command=True)
 def chat(
     model: str = typer.Option("auto", "--model", "-m", help="Model ID (default: auto-detect)."),
-    max_rounds: int = typer.Option(50, "--max-rounds", help="Maximum agent loop iterations."),
+    max_rounds: int | None = typer.Option(
+        None,
+        "--max-rounds",
+        help="Max tool-call rounds per turn (overrides agent.max_rounds in config).",
+    ),
 ) -> None:
     """Start an interactive coding agent session in the current directory."""
     settings = get_settings()
+    if max_rounds is not None:
+        # CLI flag overrides config.toml / env var for this session only.
+        # The Agent reads get_settings().agent.max_rounds at the start of
+        # every turn, so mutating it here is enough — no re-init needed.
+        settings.agent.max_rounds = max_rounds
     connected, detected_model = _probe_lmstudio()
 
     if not connected:
diff --git a/src/lmcode/config/settings.py b/src/lmcode/config/settings.py
index 4e5eeb7..362c3d7 100644
--- a/src/lmcode/config/settings.py
+++ b/src/lmcode/config/settings.py
@@ -32,7 +32,15 @@ def base_url(self) -> str:
 class AgentSettings(BaseSettings):
     """Runtime behaviour settings for the agent loop."""
 
-    max_rounds: int = 50
+    #: Maximum number of tool-call rounds inside a single ``model.act()``
+    #: invocation.  Passed to the LM Studio SDK as ``max_prediction_rounds``.
+    #: 10 is the sweet spot for Qwen 7B: enough headroom for multi-file
+    #: edits, tight enough to stop runaway loops before they burn context.
+    #: Raise via ``LMCODE_AGENT__MAX_ROUNDS=N`` or ``agent.max_rounds`` in
+    #: ``config.toml``, or pass ``--max-rounds N`` on the CLI for a single
+    #: session.  Applies to ask and auto modes; strict mode routes through
+    #: ``model.respond()`` which has no round concept.
+    max_rounds: int = 10
     permission_mode: Literal["ask", "auto", "strict"] = "ask"
     timeout_seconds: int = 30
     max_file_bytes: int = 100_000  # ~100 KB; see issue #2 for token-aware improvement
diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py
index 0e97fe1..78f5a7a 100644
--- a/tests/test_agent/test_core.py
+++ b/tests/test_agent/test_core.py
@@ -74,8 +74,15 @@ def test_agent_ensure_chat_returns_same_instance() -> None:
 # ---------------------------------------------------------------------------
 
 
-def _make_mock_model(response_text: str) -> MagicMock:
-    """Build a mock model whose act() calls on_message with a fake AssistantResponse."""
+def _make_mock_model(response_text: str, rounds: int = 1) -> MagicMock:
+    """Build a mock model whose act() calls on_message with a fake AssistantResponse.
+
+    The returned ``ActResult``-shaped mock has a real ``rounds`` int
+    (default ``1``, well below the default ``max_rounds=10`` cap) so
+    the post-act limit check in :meth:`Agent._run_turn` does not crash
+    on a ``MagicMock >= int`` comparison. Pass ``rounds=N`` to simulate
+    a turn that used all N rounds for the limit-warning test.
+    """
 
     async def act_side_effect(
         chat: object, tools: object, on_message: object = None, **kwargs: object
@@ -88,7 +95,12 @@ async def act_side_effect(
             msg.role = "assistant"
             msg.tool_calls = None
             on_message(msg)
-        return MagicMock()
+        # Return an ActResult-shaped mock with concrete numeric fields so
+        # `getattr(result, "rounds", 0) >= max_prediction_rounds` works.
+        result = MagicMock()
+        result.rounds = rounds
+        result.total_time_seconds = 0.1
+        return result
 
     mock = MagicMock()
     mock.act = AsyncMock(side_effect=act_side_effect)
@@ -294,6 +306,154 @@ async def test_run_turn_non_strict_modes_use_act_with_tools() -> None:
         )
 
 
+# ---------------------------------------------------------------------------
+# max_rounds safety boundary (#97)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_run_turn_passes_max_rounds_to_act() -> None:
+    """``max_prediction_rounds`` must flow from the agent config into ``model.act()``.
+
+    Pins the config→SDK plumbing: if someone removes the kwarg from the
+    ``act()`` call, auto mode is once again unbounded and can loop
+    forever — the exact bug #97 was filed to fix.
+    """
+    agent = Agent()
+    agent._mode = "auto"
+    mock_model = _make_mock_model("ok")
+
+    # Patch get_settings so the test does not depend on ~/.config state.
+    mock_settings = MagicMock()
+    mock_settings.agent.max_rounds = 7
+    mock_settings.agent.max_file_bytes = 100_000
+    with (
+        patch("lmcode.agent.core.read_lmcode_md", return_value=None),
+        patch("lmcode.agent.core.get_settings", return_value=mock_settings),
+    ):
+        await agent._run_turn(mock_model, "hi")
+
+    call_kwargs = mock_model.act.await_args.kwargs
+    assert call_kwargs["max_prediction_rounds"] == 7
+
+
+@pytest.mark.asyncio
+async def test_run_turn_max_rounds_none_when_config_zero() -> None:
+    """A non-positive ``max_rounds`` disables the cap (passes ``None`` to the SDK).
+
+    Users who explicitly set ``max_rounds = 0`` in config.toml are opting
+    out of the safety boundary; we must not send ``0`` to the SDK because
+    ``model.act()`` rejects values < 1 with ``LMStudioValueError``.
+    """
+    agent = Agent()
+    agent._mode = "auto"
+    mock_model = _make_mock_model("ok")
+
+    mock_settings = MagicMock()
+    mock_settings.agent.max_rounds = 0
+    mock_settings.agent.max_file_bytes = 100_000
+    with (
+        patch("lmcode.agent.core.read_lmcode_md", return_value=None),
+        patch("lmcode.agent.core.get_settings", return_value=mock_settings),
+    ):
+        await agent._run_turn(mock_model, "hi")
+
+    call_kwargs = mock_model.act.await_args.kwargs
+    assert call_kwargs["max_prediction_rounds"] is None
+
+
+@pytest.mark.asyncio
+async def test_run_turn_flags_limit_reached_when_rounds_equal_cap() -> None:
+    """Well-behaved model case: ``ActResult.rounds == cap`` → set the flag.
+
+    Even when the SDK closes the loop cleanly (model gave up tools on the
+    final round and produced a text answer), reaching the cap means the
+    task was likely truncated. ``self._last_turn_limit_reached`` must be
+    set so ``run()`` prints the warning.
+    """
+    agent = Agent()
+    agent._mode = "auto"
+    mock_model = _make_mock_model("done", rounds=5)
+
+    mock_settings = MagicMock()
+    mock_settings.agent.max_rounds = 5
+    mock_settings.agent.max_file_bytes = 100_000
+    with (
+        patch("lmcode.agent.core.read_lmcode_md", return_value=None),
+        patch("lmcode.agent.core.get_settings", return_value=mock_settings),
+    ):
+        await agent._run_turn(mock_model, "hi")
+
+    assert agent._last_turn_limit_reached is True
+
+
+@pytest.mark.asyncio
+async def test_run_turn_flags_limit_reached_on_final_round_error() -> None:
+    """Stubborn model case: SDK raises ``LMStudioPredictionError`` on the final round.
+
+    When the model ignores the tools-disabled signal on the final round
+    and emits a tool_call anyway, the SDK raises unconditionally with a
+    message containing ``"final prediction round"``. ``_run_turn`` must
+    catch this *specific* error, set the limit flag, and NOT let it
+    bubble to the ``run()`` top-level handler (which catches
+    ``LMStudioServerError`` — the parent class — and would misinterpret
+    the crash as a server disconnect).
+    """
+    import lmstudio as lms
+
+    agent = Agent()
+    agent._mode = "auto"
+
+    async def _raise_final_round(
+        chat: object, tools: object, on_message: object = None, **kwargs: object
+    ) -> None:
+        raise lms.LMStudioPredictionError("Model requested tool use on final prediction round.")
+
+    mock_model = MagicMock()
+    mock_model.act = AsyncMock(side_effect=_raise_final_round)
+
+    mock_settings = MagicMock()
+    mock_settings.agent.max_rounds = 3
+    mock_settings.agent.max_file_bytes = 100_000
+    with (
+        patch("lmcode.agent.core.read_lmcode_md", return_value=None),
+        patch("lmcode.agent.core.get_settings", return_value=mock_settings),
+    ):
+        # Must NOT raise — the error is caught and converted to a flag.
+        await agent._run_turn(mock_model, "loop forever please")
+
+    assert agent._last_turn_limit_reached is True
+
+
+@pytest.mark.asyncio
+async def test_run_turn_reraises_unrelated_prediction_error() -> None:
+    """A prediction error that is NOT the final-round case must propagate.
+
+    The catch in ``_run_turn`` is narrow on purpose — matching only the
+    SDK's ``"final prediction round"`` marker. Any other
+    ``LMStudioPredictionError`` (malformed tool schema, invalid
+    response format, etc.) is a real bug the user needs to see.
+    """
+    import lmstudio as lms
+
+    agent = Agent()
+    agent._mode = "auto"
+
+    async def _raise_other(
+        chat: object, tools: object, on_message: object = None, **kwargs: object
+    ) -> None:
+        raise lms.LMStudioPredictionError("something else went wrong")
+
+    mock_model = MagicMock()
+    mock_model.act = AsyncMock(side_effect=_raise_other)
+
+    with patch("lmcode.agent.core.read_lmcode_md", return_value=None):
+        with pytest.raises(lms.LMStudioPredictionError, match="something else"):
+            await agent._run_turn(mock_model, "hi")
+
+    assert agent._last_turn_limit_reached is False
+
+
 # ---------------------------------------------------------------------------
 # _wrap_tool_verbose — positional-arg merging
 # ---------------------------------------------------------------------------

From 634815484e8ed0cedd2e20217db81ebf3fa5d448 Mon Sep 17 00:00:00 2001
From: VforVitorio <victorvegasobral@yahoo.com>
Date: Tue, 7 Apr 2026 12:56:05 +0200
Subject: [PATCH 2/3] =?UTF-8?q?feat:=20auto-mode=20UX=20=E2=80=94=20per-mo?=
 =?UTF-8?q?de=20spinner,=20round=20counter,=20first-time=20warning=20(#97)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds on PR 1's max_rounds safety core with the UX layer that makes the
new boundary visible and teachable.

- Per-mode spinner colour: the in-turn spinner tracks the active mode
  (orange=ask, blue=auto, red=strict) via a new mode_color() accessor
  in ui/status.py so the current mode is visible at a glance without
  reading the prompt line.
- Auto-mode round counter: in auto mode the spinner label appends
  " · round N/M" driven by an on_round_start callback wired into
  model.act(). Only shown in auto mode (ask blocks on user approval
  between tool calls, strict has no round concept).
- First-time auto warning: the first Tab-cycle into auto in a session
  prints an amber one-liner above the prompt via run_in_terminal,
  gated by self._auto_warned so it never repeats.
- /status surfaces the active max_rounds so users can verify which
  safety boundary is in effect after config / env / --max-rounds.

Tests cover: on_round_start kwarg wiring, _auto_warned initial state
and one-shot behaviour, mode-cycle preservation of
_always_allowed_tools (regression guard), /status max-rounds row.

Stacked on feat/auto-max-rounds — merge PR #104 first.
---
 CHANGELOG.md                  |   4 ++
 src/lmcode/agent/core.py      |  85 +++++++++++++++++++++++---
 src/lmcode/ui/status.py       |  10 +++
 tests/test_agent/test_core.py | 111 ++++++++++++++++++++++++++++++++++
 4 files changed, 200 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 02b7f9a..f2870e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### Added
 - **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97)
 - **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected.
+- **Per-mode spinner colour** — the in-turn spinner now tracks the active permission mode (orange=ask, blue=auto, red=strict) so the current mode is visible at a glance without reading the prompt line. Applies to both the initial `thinking.` spinner in `run()` and the live-updating spinner inside `_keepalive`. Added `mode_color(mode)` public accessor to `ui/status.py` so core doesn't reach into the module-private `_MODE_COLORS` dict. (#97)
+- **Auto-mode round counter in spinner** — in auto mode the spinner label appends `· round N/M` while `model.act()` runs, wired via a new `on_round_start` callback that updates a closure-local counter. The counter only shows in auto mode (ask mode redraws more rarely because it blocks on user approval between tool calls; strict mode has no rounds at all). Gives the user real-time visibility into how much of the `max_rounds` budget the current turn has consumed. (#97)
+- **First-time auto-mode warning** — the first time a user Tab-cycles into auto mode in a session, a one-shot amber hint prints above the prompt: `auto mode — tools run without asking, up to N rounds per turn. Ctrl+C stops a running turn.` Printed via prompt_toolkit's `run_in_terminal` so it lands cleanly above the live prompt without tearing the ghost-text completion layer. Session-scoped via `self._auto_warned` — never re-prints even after cycling away and back. (#97)
+- **`/status` surfaces `max rounds`** — the session-state table now includes a `max rounds: N` row so users can verify which safety boundary is currently in effect (config, env var, or `--max-rounds` CLI override all funnel into the same value).
 
 ### Fixed
 - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99)
diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py
index e15a407..ee834d4 100644
--- a/src/lmcode/agent/core.py
+++ b/src/lmcode/agent/core.py
@@ -68,6 +68,7 @@
     MODES,
     build_prompt,
     build_status_line,
+    mode_color,
     next_mode,
 )
 
@@ -393,6 +394,10 @@ def __init__(self, model_id: str = "auto") -> None:
         self._show_stats: bool = get_settings().ui.show_stats
         self._always_allowed_tools: set[str] = set()
         self._inference_config: dict[str, Any] = {}  # passed as config= to model.act()
+        # True once the first-time auto-mode warning has been shown.  The
+        # warning fires the first time the user Tab-cycles into auto mode
+        # during a session — never again, even after cycling away and back.
+        self._auto_warned: bool = False
         # Set by _run_turn when ``max_prediction_rounds`` was hit this turn
         # (either the SDK raised LMStudioPredictionError on the final round,
         # or ActResult.rounds reached the configured cap). run() reads this
@@ -697,6 +702,7 @@ def _print_status(self) -> None:
         status_rows: list[tuple[str, str]] = [
             ("model", self._model_display or "(none)"),
             ("mode", self._mode),
+            ("max rounds", str(get_settings().agent.max_rounds)),
             ("temperature", temp_display),
             ("verbose", "on" if self._verbose else "off"),
             ("tips", "on" if self._show_tips else "off"),
@@ -713,6 +719,26 @@ def _print_status(self) -> None:
             console.print(row)
         console.print()
 
+    def _print_auto_warning(self) -> None:
+        """Print the first-time auto-mode caution and set :attr:`_auto_warned`.
+
+        Called from ``_cycle_mode`` inside :meth:`run` via ``run_in_terminal``
+        the first time the user Tab-cycles into ``auto`` mode during a
+        session.  The flag prevents re-printing on subsequent cycles.
+        Intended to be a one-line amber hint that matches the style used
+        for the context-window and max-rounds warnings.
+        """
+        if self._auto_warned:
+            return
+        self._auto_warned = True
+        cap = get_settings().agent.max_rounds
+        console.print(
+            f"[{WARNING}]auto mode[/]"
+            f"[{TEXT_MUTED}] — tools run without asking, "
+            f"up to {cap} rounds per turn. "
+            f"Ctrl+C stops a running turn.[/]"
+        )
+
     # ------------------------------------------------------------------
     # /compact
     # ------------------------------------------------------------------
@@ -1153,6 +1179,19 @@ def _on_fragment(fragment: Any, _round_index: int = 0) -> None:
             """
             tok_count[0] += 1
 
+        # Current round (1-indexed) as reported by ``on_round_start``.
+        # Zero means no round has started yet (pre-first-round thinking)
+        # or the mode is strict (``model.respond()`` has no round concept).
+        current_round: list[int] = [0]
+
+        def _on_round_start(round_index: int) -> None:
+            """Update the spinner round counter — auto mode displays ``N/M``.
+
+            ``round_index`` is 0-based in the SDK, so we store ``+1`` to
+            match the human-friendly ``round 3/10`` format.
+            """
+            current_round[0] = round_index + 1
+
         # Strict mode only wraps tools when we're actually going to use
         # them (ask/auto path).  The strict branch below skips tool
         # plumbing entirely and calls ``model.respond()`` — the SDK
@@ -1163,11 +1202,26 @@ def _on_fragment(fragment: Any, _round_index: int = 0) -> None:
         stop_evt = asyncio.Event()
         shuffled_tips = random.sample(_TIPS, len(_TIPS)) if self._show_tips else []
 
+        # Reset the per-turn limit-reached flag here so it's defined before
+        # any early return inside the ``try`` block.  run() reads it after
+        # _run_turn returns to decide whether to print the limit warning.
+        self._last_turn_limit_reached = False
+        # Pull max_rounds here (not later) so tests can patch
+        # get_settings() once and see a single call. None disables the cap.
+        # Assigned before the keepalive task is created so the closure
+        # below sees a bound cell when it first runs.
+        max_rounds = get_settings().agent.max_rounds
+        max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None
+
         async def _keepalive() -> None:
             """Update the spinner label every 100 ms; animate dots; rotate tips every ~8 s.
 
             Runs on the main event loop alongside ``model.act()``.  Gets CPU time
             whenever the SDK yields back to the loop during async HTTP prefill.
+            The spinner colour tracks the current mode (orange=ask, blue=auto,
+            red=strict) so the active permission mode is visible at a glance
+            without reading the prompt line.  In auto mode the label also
+            includes a ``round N/M`` counter driven by ``on_round_start``.
             """
             tip_idx = 0
             dot_idx = 0
@@ -1190,7 +1244,14 @@ async def _keepalive() -> None:
                         )
                     else:
                         label = f" {base}"
-                    rows: list[Any] = [Spinner(_SPINNER, text=label, style=ACCENT)]
+                    if (
+                        self._mode == "auto"
+                        and current_round[0] > 0
+                        and max_prediction_rounds is not None
+                    ):
+                        label = f"{label}  ·  round {current_round[0]}/{max_prediction_rounds}"
+                    spinner_color = mode_color(self._mode)
+                    rows: list[Any] = [Spinner(_SPINNER, text=label, style=spinner_color)]
                     if shuffled_tips:
                         rows.append(Text(f"  {shuffled_tips[tip_idx]}", style=f"dim {ACCENT}"))
                     live.update(RenderGroup(*rows))
@@ -1201,13 +1262,6 @@ async def _keepalive() -> None:
         act_result: Any = None
         respond_result: Any = None
         strict_start: float | None = None
-        # Reset the flag at the start of every turn. run() reads it after
-        # _run_turn returns to decide whether to print the limit warning.
-        self._last_turn_limit_reached = False
-        # Pull max_rounds here (not later) so tests can patch
-        # get_settings() once and see a single call. None disables the cap.
-        max_rounds = get_settings().agent.max_rounds
-        max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None
         try:
             if self._mode == "strict":
                 # Strict mode (#99): use ``model.respond()`` — the pure
@@ -1242,6 +1296,7 @@ async def _keepalive() -> None:
                         max_prediction_rounds=max_prediction_rounds,
                         config=self._inference_config if self._inference_config else None,
                         on_message=_on_message,
+                        on_round_start=_on_round_start,
                         on_prediction_completed=_on_prediction_completed,
                         on_prediction_fragment=_on_fragment,
                     )
@@ -1321,8 +1376,18 @@ async def run(self) -> None:
         settings = get_settings()
 
         def _cycle_mode() -> None:
-            """Advance to the next mode in-place (prompt redraws via invalidate)."""
+            """Advance to the next mode in-place (prompt redraws via invalidate).
+
+            The first time the user cycles into ``auto`` in a given session,
+            schedule a one-shot amber warning via prompt-toolkit's
+            :func:`run_in_terminal` so the hint prints cleanly above the
+            live prompt without tearing the ghost-text completion layer.
+            """
             self._mode = next_mode(self._mode)
+            if self._mode == "auto" and not self._auto_warned:
+                from prompt_toolkit.application import run_in_terminal
+
+                run_in_terminal(self._print_auto_warning)
 
         session = make_session(cycle_mode=_cycle_mode)
 
@@ -1387,7 +1452,7 @@ def _cycle_mode() -> None:
                         continue
 
                     initial: Any = RenderGroup(
-                        Spinner(_SPINNER, text=" thinking.", style=ACCENT),
+                        Spinner(_SPINNER, text=" thinking.", style=mode_color(self._mode)),
                     )
                     self._raw_history.append(("user", stripped))
                     _interrupted = False
diff --git a/src/lmcode/ui/status.py b/src/lmcode/ui/status.py
index a544cfa..8c1ad35 100644
--- a/src/lmcode/ui/status.py
+++ b/src/lmcode/ui/status.py
@@ -31,6 +31,16 @@ def next_mode(current: str) -> str:
     return MODES[(idx + 1) % len(MODES)]
 
 
+def mode_color(mode: str) -> str:
+    """Return the hex colour associated with *mode*, falling back to muted grey.
+
+    The same palette is used by the ``[mode]`` indicator in the prompt and by
+    the in-turn spinner inside the agent loop, so the two always agree:
+    orange = ask, blue = auto, red = strict.
+    """
+    return _MODE_COLORS.get(mode, _MUTED)
+
+
 def build_status_line(model: str) -> str:
     """Return a Rich markup string shown once after connecting to LM Studio.
 
diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py
index 78f5a7a..474ac6c 100644
--- a/tests/test_agent/test_core.py
+++ b/tests/test_agent/test_core.py
@@ -454,6 +454,117 @@ async def _raise_other(
     assert agent._last_turn_limit_reached is False
 
 
+# ---------------------------------------------------------------------------
+# auto mode UX — spinner colour, round counter, first-time warning (#97)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_run_turn_passes_on_round_start_to_act() -> None:
+    """``on_round_start`` must be wired so the spinner can display ``round N/M``.
+
+    The callback lives entirely inside ``_run_turn``'s closure (it updates a
+    local ``current_round`` cell read by the keepalive task), so we assert on
+    two things: (1) the kwarg was passed as a callable to ``model.act()``,
+    and (2) calling it with a 0-based round index does not raise.
+    """
+    agent = Agent()
+    agent._mode = "auto"
+    mock_model = _make_mock_model("ok")
+
+    with patch("lmcode.agent.core.read_lmcode_md", return_value=None):
+        await agent._run_turn(mock_model, "hi")
+
+    call_kwargs = mock_model.act.await_args.kwargs
+    assert "on_round_start" in call_kwargs
+    assert callable(call_kwargs["on_round_start"])
+    # Must accept a 0-based round index without raising.
+    call_kwargs["on_round_start"](0)
+    call_kwargs["on_round_start"](2)
+
+
+def test_agent_auto_warned_initially_false() -> None:
+    """Fresh Agent starts with the first-time auto-mode warning un-fired."""
+    agent = Agent()
+    assert agent._auto_warned is False
+
+
+def test_print_auto_warning_fires_once_per_session() -> None:
+    """``_print_auto_warning`` sets the flag on first call and is a no-op afterwards.
+
+    The warning is triggered from the ``_cycle_mode`` closure in ``run()`` via
+    ``run_in_terminal``; we test the method directly so the test does not
+    depend on prompt_toolkit's terminal plumbing. Calling it twice must
+    print exactly once — the second call should exit immediately.
+    """
+    from lmcode.agent import _display
+
+    agent = Agent()
+    with patch.object(_display.console, "print") as mock_print:
+        agent._print_auto_warning()
+        assert agent._auto_warned is True
+        assert mock_print.call_count == 1
+
+        agent._print_auto_warning()
+        # Flag still True and no additional prints — second call is a no-op.
+        assert agent._auto_warned is True
+        assert mock_print.call_count == 1
+
+
+def test_cycle_mode_preserves_always_allowed_tools() -> None:
+    """Tab-cycling the mode must not clear session-scoped always-allow grants.
+
+    Regression guard for a subtle UX pitfall: if the user grants "always allow
+    write_file" in ask mode and then Tab-cycles to auto → strict → ask, the
+    grants should survive the round trip. The set is plain Agent state with
+    no cycle hook touching it, but this test pins the invariant so a future
+    refactor that adds ``_always_allowed_tools.clear()`` to the mode handler
+    will be caught.
+    """
+    from lmcode.ui.status import next_mode
+
+    agent = Agent()
+    agent._mode = "ask"
+    agent._always_allowed_tools = {"read_file", "write_file"}
+
+    # Simulate three Tab presses: ask → auto → strict → ask.
+    for _ in range(3):
+        agent._mode = next_mode(agent._mode)
+
+    assert agent._mode == "ask"
+    assert agent._always_allowed_tools == {"read_file", "write_file"}
+
+
+def test_print_status_includes_max_rounds_line() -> None:
+    """``/status`` must surface the active ``max_rounds`` so users can verify the cap.
+
+    The line is the only place in the running session that confirms which
+    safety boundary is in effect (config / env var / CLI flag). We capture
+    the Rich console output and assert the label is present.
+    """
+    from lmcode.agent import _display
+
+    agent = Agent()
+    mock_settings = MagicMock()
+    mock_settings.agent.max_rounds = 13
+    mock_settings.agent.max_file_bytes = 100_000
+
+    printed: list[str] = []
+
+    def _capture(obj: object = "", *args: object, **kwargs: object) -> None:
+        printed.append(str(obj))
+
+    with (
+        patch("lmcode.agent.core.get_settings", return_value=mock_settings),
+        patch.object(_display.console, "print", side_effect=_capture),
+    ):
+        agent._print_status()
+
+    joined = "\n".join(printed)
+    assert "max rounds" in joined
+    assert "13" in joined
+
+
 # ---------------------------------------------------------------------------
 # _wrap_tool_verbose — positional-arg merging
 # ---------------------------------------------------------------------------

From 6f8778d8f3265212071ae6edfa2805dabcbdb839 Mon Sep 17 00:00:00 2001
From: VforVitorio <victorvegasobral@yahoo.com>
Date: Tue, 7 Apr 2026 13:37:58 +0200
Subject: [PATCH 3/3] =?UTF-8?q?fix:=20restore=20PR=202=20content=20lost=20?=
 =?UTF-8?q?in=20dev=E2=86=92feat/auto-mode-ux=20merge?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When dev (containing the squash-merged #104) was merged into this
branch, git's three-way merge dropped three pieces of PR 2 work
because it saw them as conflicts with the squashed version of #104:

- self._auto_warned init flag was removed from Agent.__init__,
  causing mypy `has-type` error in _print_auto_warning
- on_round_start=_on_round_start kwarg was dropped from the
  model.act() call, breaking the round counter test
- max_prediction_rounds was re-added in its original post-keepalive
  position, duplicating the moved-up declaration and causing the
  mypy `no-redef` error at line 1267

Also restores the 5 PR 2 tests and the 4 PR 2 CHANGELOG entries
that the merge silently dropped. Local ruff + mypy + pytest green.
---
 CHANGELOG.md                  |   4 ++
 src/lmcode/agent/core.py      |  12 ++--
 tests/test_agent/test_core.py | 111 ++++++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 02b7f9a..f2870e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,10 @@ lmcode uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### Added
 - **`max_rounds` safety boundary** — `model.act()` now runs with `max_prediction_rounds` set from `agent.max_rounds` (default `10`, down from the previously unused `50`). Applies to ask and auto modes (strict routes through `model.respond()` which has no round concept). When the limit is hit the agent prints an inline warning — `agent stopped after N rounds — raise the limit with LMCODE_AGENT__MAX_ROUNDS=N, set agent.max_rounds in config, or pass --max-rounds N on the CLI` — instead of letting runaway tool loops burn the context window. Handles both cases: the well-behaved model finishing its final tool-free round at the cap, and the stubborn model that still emits a `tool_call` on the final round (SDK raises `LMStudioPredictionError` with `"final prediction round"` in the message, now caught specifically in `_run_turn` so the broader `LMStudioServerError` catch at `run()` level does not mistake it for a disconnect). (#97)
 - **CLI `--max-rounds N` flag is now actually wired** — previously accepted by `lmcode chat` but silently ignored. Now mutates `get_settings().agent.max_rounds` for the session before launching the agent, so `lmcode chat --max-rounds 25` works as expected.
+- **Per-mode spinner colour** — the in-turn spinner now tracks the active permission mode (orange=ask, blue=auto, red=strict) so the current mode is visible at a glance without reading the prompt line. Applies to both the initial `thinking.` spinner in `run()` and the live-updating spinner inside `_keepalive`. Added `mode_color(mode)` public accessor to `ui/status.py` so core doesn't reach into the module-private `_MODE_COLORS` dict. (#97)
+- **Auto-mode round counter in spinner** — in auto mode the spinner label appends `· round N/M` while `model.act()` runs, wired via a new `on_round_start` callback that updates a closure-local counter. The counter only shows in auto mode (ask mode redraws more rarely because it blocks on user approval between tool calls; strict mode has no rounds at all). Gives the user real-time visibility into how much of the `max_rounds` budget the current turn has consumed. (#97)
+- **First-time auto-mode warning** — the first time a user Tab-cycles into auto mode in a session, a one-shot amber hint prints above the prompt: `auto mode — tools run without asking, up to N rounds per turn. Ctrl+C stops a running turn.` Printed via prompt_toolkit's `run_in_terminal` so it lands cleanly above the live prompt without tearing the ghost-text completion layer. Session-scoped via `self._auto_warned` — never re-prints even after cycling away and back. (#97)
+- **`/status` surfaces `max rounds`** — the session-state table now includes a `max rounds: N` row so users can verify which safety boundary is currently in effect (config, env var, or `--max-rounds` CLI override all funnel into the same value).
 
 ### Fixed
 - **Strict mode now truly disables tools** — previously the `strict` permission mode label said "no tools — pure chat only" but the runtime still passed the full tool list to `model.act()`, so the model could happily emit tool calls and the runtime would execute them silently. Strict now routes through `model.respond()` — the pure-chat SDK primitive that has no tool concept at all — so the model never even sees a tool schema. (`model.act(tools=[])` is not a viable alternative: the SDK rejects it with `LMStudioValueError`.) (#99)
diff --git a/src/lmcode/agent/core.py b/src/lmcode/agent/core.py
index d450b02..ee834d4 100644
--- a/src/lmcode/agent/core.py
+++ b/src/lmcode/agent/core.py
@@ -394,6 +394,10 @@ def __init__(self, model_id: str = "auto") -> None:
         self._show_stats: bool = get_settings().ui.show_stats
         self._always_allowed_tools: set[str] = set()
         self._inference_config: dict[str, Any] = {}  # passed as config= to model.act()
+        # True once the first-time auto-mode warning has been shown.  The
+        # warning fires the first time the user Tab-cycles into auto mode
+        # during a session — never again, even after cycling away and back.
+        self._auto_warned: bool = False
         # Set by _run_turn when ``max_prediction_rounds`` was hit this turn
         # (either the SDK raised LMStudioPredictionError on the final round,
         # or ActResult.rounds reached the configured cap). run() reads this
@@ -1258,13 +1262,6 @@ async def _keepalive() -> None:
         act_result: Any = None
         respond_result: Any = None
         strict_start: float | None = None
-        # Reset the flag at the start of every turn. run() reads it after
-        # _run_turn returns to decide whether to print the limit warning.
-        self._last_turn_limit_reached = False
-        # Pull max_rounds here (not later) so tests can patch
-        # get_settings() once and see a single call. None disables the cap.
-        max_rounds = get_settings().agent.max_rounds
-        max_prediction_rounds: int | None = max_rounds if max_rounds and max_rounds > 0 else None
         try:
             if self._mode == "strict":
                 # Strict mode (#99): use ``model.respond()`` — the pure
@@ -1299,6 +1296,7 @@ async def _keepalive() -> None:
                         max_prediction_rounds=max_prediction_rounds,
                         config=self._inference_config if self._inference_config else None,
                         on_message=_on_message,
+                        on_round_start=_on_round_start,
                         on_prediction_completed=_on_prediction_completed,
                         on_prediction_fragment=_on_fragment,
                     )
diff --git a/tests/test_agent/test_core.py b/tests/test_agent/test_core.py
index 78f5a7a..474ac6c 100644
--- a/tests/test_agent/test_core.py
+++ b/tests/test_agent/test_core.py
@@ -454,6 +454,117 @@ async def _raise_other(
     assert agent._last_turn_limit_reached is False
 
 
+# ---------------------------------------------------------------------------
+# auto mode UX — spinner colour, round counter, first-time warning (#97)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_run_turn_passes_on_round_start_to_act() -> None:
+    """``on_round_start`` must be wired so the spinner can display ``round N/M``.
+
+    The callback lives entirely inside ``_run_turn``'s closure (it updates a
+    local ``current_round`` cell read by the keepalive task), so we assert on
+    two things: (1) the kwarg was passed as a callable to ``model.act()``,
+    and (2) calling it with a 0-based round index does not raise.
+    """
+    agent = Agent()
+    agent._mode = "auto"
+    mock_model = _make_mock_model("ok")
+
+    with patch("lmcode.agent.core.read_lmcode_md", return_value=None):
+        await agent._run_turn(mock_model, "hi")
+
+    call_kwargs = mock_model.act.await_args.kwargs
+    assert "on_round_start" in call_kwargs
+    assert callable(call_kwargs["on_round_start"])
+    # Must accept a 0-based round index without raising.
+    call_kwargs["on_round_start"](0)
+    call_kwargs["on_round_start"](2)
+
+
+def test_agent_auto_warned_initially_false() -> None:
+    """Fresh Agent starts with the first-time auto-mode warning un-fired."""
+    agent = Agent()
+    assert agent._auto_warned is False
+
+
+def test_print_auto_warning_fires_once_per_session() -> None:
+    """``_print_auto_warning`` sets the flag on first call and is a no-op afterwards.
+
+    The warning is triggered from the ``_cycle_mode`` closure in ``run()`` via
+    ``run_in_terminal``; we test the method directly so the test does not
+    depend on prompt_toolkit's terminal plumbing. Calling it twice must
+    print exactly once — the second call should exit immediately.
+    """
+    from lmcode.agent import _display
+
+    agent = Agent()
+    with patch.object(_display.console, "print") as mock_print:
+        agent._print_auto_warning()
+        assert agent._auto_warned is True
+        assert mock_print.call_count == 1
+
+        agent._print_auto_warning()
+        # Flag still True and no additional prints — second call is a no-op.
+        assert agent._auto_warned is True
+        assert mock_print.call_count == 1
+
+
+def test_cycle_mode_preserves_always_allowed_tools() -> None:
+    """Tab-cycling the mode must not clear session-scoped always-allow grants.
+
+    Regression guard for a subtle UX pitfall: if the user grants "always allow
+    write_file" in ask mode and then Tab-cycles to auto → strict → ask, the
+    grants should survive the round trip. The set is plain Agent state with
+    no cycle hook touching it, but this test pins the invariant so a future
+    refactor that adds ``_always_allowed_tools.clear()`` to the mode handler
+    will be caught.
+    """
+    from lmcode.ui.status import next_mode
+
+    agent = Agent()
+    agent._mode = "ask"
+    agent._always_allowed_tools = {"read_file", "write_file"}
+
+    # Simulate three Tab presses: ask → auto → strict → ask.
+    for _ in range(3):
+        agent._mode = next_mode(agent._mode)
+
+    assert agent._mode == "ask"
+    assert agent._always_allowed_tools == {"read_file", "write_file"}
+
+
+def test_print_status_includes_max_rounds_line() -> None:
+    """``/status`` must surface the active ``max_rounds`` so users can verify the cap.
+
+    The line is the only place in the running session that confirms which
+    safety boundary is in effect (config / env var / CLI flag). We capture
+    the Rich console output and assert the label is present.
+    """
+    from lmcode.agent import _display
+
+    agent = Agent()
+    mock_settings = MagicMock()
+    mock_settings.agent.max_rounds = 13
+    mock_settings.agent.max_file_bytes = 100_000
+
+    printed: list[str] = []
+
+    def _capture(obj: object = "", *args: object, **kwargs: object) -> None:
+        printed.append(str(obj))
+
+    with (
+        patch("lmcode.agent.core.get_settings", return_value=mock_settings),
+        patch.object(_display.console, "print", side_effect=_capture),
+    ):
+        agent._print_status()
+
+    joined = "\n".join(printed)
+    assert "max rounds" in joined
+    assert "13" in joined
+
+
 # ---------------------------------------------------------------------------
 # _wrap_tool_verbose — positional-arg merging
 # ---------------------------------------------------------------------------