From a9525c6230503889c99ca667bd87231afd406985 Mon Sep 17 00:00:00 2001 From: Bill Thornton Date: Sat, 16 May 2026 13:58:00 -0700 Subject: [PATCH] fix(TAP-1877): kill '(00 scope, N system)' double-zero leak in Execution stats WARN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `grep -c | ... || echo 0` is the canonical pitfall already documented in CLAUDE.md: `grep -c` on a no-match exits 1 with stdout "0", so the `|| echo 0` branch appends another "0" — `tr -d '[:space:]'` then collapses the pair into the literal "00" that leaked through to the WARN line on 9 of 50 lines in the 2026-05-15 → 2026-05-16 tapps-brain ralph.log. Two-part fix: 1. Replace the inline grep/echo/tr-d-space idiom in exec_run_live with the documented `tr -cd '0-9' || true` + `${var:-0}` pattern. The change keeps only digits regardless of whether the inner grep matched, and the default keeps arithmetic safe if the pipeline produces "0", "" or any digit run. 2. Extract the stats-line emission into a new `exec_log_execution_stats` helper at the bottom of lib/exec_helpers.sh so the regression surface is unit-testable. exec_run_live becomes one line. The new tests/unit/test_exec_post_run.bats covers the canonical regression case (errors > 0 + zero scope matches must print `(0 scope, N system)`, not `(00 scope, …)`) plus the mixed scope/system split, the negative-system clamp, the missing-output_file defensive path, and the zero-errors INFO branch. Full unit (1731) and integration (203) suites green. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/exec_helpers.sh | 56 ++++++++++--- tests/unit/test_exec_post_run.bats | 128 +++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 12 deletions(-) create mode 100644 tests/unit/test_exec_post_run.bats diff --git a/lib/exec_helpers.sh b/lib/exec_helpers.sh index b0cdf6ad..f80ec951 100644 --- a/lib/exec_helpers.sh +++ b/lib/exec_helpers.sh @@ -126,18 +126,8 @@ exec_run_live() { # LOGFIX-4: export tool count for fast-trip detection in main loop. LAST_TOOL_COUNT=${_tool_count:-0} # LOGFIX-5: categorize errors into expected (tool scope) vs system (real failures). - local _expected_errors=0 _system_errors=0 - if [[ ${_error_count:-0} -gt 0 ]]; then - _expected_errors=$(grep -B1 '"is_error":true' "$output_file" 2>/dev/null \ - | grep -ciE 'permission|denied|too large|exceeds.*token|exceeds.*limit|outside.*allowed|not allowed' \ - || echo 0) - _expected_errors=$(echo "$_expected_errors" | tr -d '[:space:]') - _system_errors=$(( ${_error_count:-0} - ${_expected_errors:-0} )) - [[ $_system_errors -lt 0 ]] && _system_errors=0 - log_status "WARN" "Execution stats: Tools=${_tool_count:-0} Agents=${_agent_count:-0} Errors=${_error_count:-0} (${_expected_errors} scope, ${_system_errors} system)" - else - log_status "INFO" "Execution stats: Tools=${_tool_count:-0} Agents=${_agent_count:-0} Errors=0" - fi + exec_log_execution_stats "$output_file" \ + "${_tool_count:-0}" "${_agent_count:-0}" "${_error_count:-0}" # Extract session ID from stream-json output for session continuity. # Stream-json format has session_id in the final "result" type message. @@ -1040,3 +1030,45 @@ exec_aggregate_qa_results() { printf 'FAIL: %s (%s)\n' "$joined" "$first_fail_kind" return 1 } + +# exec_log_execution_stats — Emit the post-run Tools/Agents/Errors line. +# +# Args: +# $1 output_file — path to the NDJSON stream captured during the run +# $2 tool_count — count of "type":"tool_use" events +# $3 agent_count — count of "subtype":"task_started" events +# $4 error_count — count of "is_error":true events +# +# When error_count == 0, emits an INFO line. When error_count > 0, splits the +# error budget into expected (tool-scope: permission denied, oversize input, +# token-limit exceeded, etc.) vs. system (real CLI/runtime failures) and emits +# a WARN line. +# +# TAP-1877: the original inline version paired `grep -c | ... || echo 0` with +# `tr -d '[:space:]'`, which combined into the literal "00" string when grep +# found no matches — leaking `(00 scope, N system)` to the operator-facing +# WARN line. The fix uses the documented `tr -cd '0-9' || true` pattern +# plus `${var:-0}` so future arithmetic stays safe regardless of whether +# the pipeline produced "0", "" or a digit run. +exec_log_execution_stats() { + local output_file="$1" + local tool_count="${2:-0}" + local agent_count="${3:-0}" + local error_count="${4:-0}" + + if [[ "${error_count:-0}" -eq 0 ]]; then + log_status "INFO" "Execution stats: Tools=${tool_count:-0} Agents=${agent_count:-0} Errors=0" + return 0 + fi + + local expected_errors + expected_errors=$(grep -B1 '"is_error":true' "$output_file" 2>/dev/null \ + | grep -ciE 'permission|denied|too large|exceeds.*token|exceeds.*limit|outside.*allowed|not allowed' \ + | tr -cd '0-9' || true) + expected_errors=${expected_errors:-0} + + local system_errors=$(( ${error_count:-0} - ${expected_errors:-0} )) + [[ $system_errors -lt 0 ]] && system_errors=0 + + log_status "WARN" "Execution stats: Tools=${tool_count:-0} Agents=${agent_count:-0} Errors=${error_count:-0} (${expected_errors} scope, ${system_errors} system)" +} diff --git a/tests/unit/test_exec_post_run.bats b/tests/unit/test_exec_post_run.bats new file mode 100644 index 00000000..079c16b1 --- /dev/null +++ b/tests/unit/test_exec_post_run.bats @@ -0,0 +1,128 @@ +#!/usr/bin/env bats +# TAP-1877: exec_log_execution_stats must NOT print multi-zero counters +# in the post-run Execution stats line. The original inline implementation +# in exec_run_live paired `grep -c | ... || echo 0` with `tr -d '[:space:]'`, +# which collapsed grep's "0\n0" no-match output into the literal "00" that +# leaked through to the WARN line ("(00 scope, N system)" — observed on 9 +# of 50 lines in the 2026-05-15 → 2026-05-16 tapps-brain ralph.log). + +bats_require_minimum_version 1.5.0 + +PROJECT_ROOT="${BATS_TEST_DIRNAME}/../.." + +setup() { + export TEST_DIR="$(mktemp -d "${BATS_TEST_TMPDIR:-/tmp}/exec_post_run.XXXXXX")" + # Stub log_status so we can capture the emitted line in stdout. + log_status() { + # First arg is the level (INFO|WARN|ERROR); rest is the message. + printf '%s: %s\n' "$1" "$2" + } + export -f log_status + + # shellcheck disable=SC1090 + source "$PROJECT_ROOT/lib/exec_helpers.sh" +} + +teardown() { + rm -rf "$TEST_DIR" 2>/dev/null || true +} + +# Build a synthetic Claude NDJSON stream with a given number of is_error:true +# entries, optionally one of them matching a tool-scope pattern. +# +# Usage: make_stream +make_stream() { + local out="$1" + local total="${2:-0}" + local scope_matches="${3:-0}" + + : > "$out" + local i=0 + while (( i < scope_matches )); do + # Pair the scope-pattern line with the is_error sentinel so the + # `grep -B1 '"is_error":true' | grep -ciE 'permission|...'` filter + # picks it up. grep -B1 emits the preceding line + the matching + # line, then the inner grep matches the scope pattern on either. + printf '{"type":"tool_result","content":"permission denied to write /etc/foo"}\n' >> "$out" + printf '{"type":"tool_result","is_error":true}\n' >> "$out" + i=$(( i + 1 )) + done + local non_scope=$(( total - scope_matches )) + while (( non_scope > 0 )); do + printf '{"type":"tool_result","content":"connection reset by peer"}\n' >> "$out" + printf '{"type":"tool_result","is_error":true}\n' >> "$out" + non_scope=$(( non_scope - 1 )) + done +} + +@test "TAP-1877: zero errors → INFO line, no scope/system counters" { + local stream="$TEST_DIR/stream.log" + make_stream "$stream" 0 0 + + run exec_log_execution_stats "$stream" 7 1 0 + [[ "$status" -eq 0 ]] || fail "expected zero exit, got $status: $output" + [[ "$output" == *"INFO: Execution stats: Tools=7 Agents=1 Errors=0"* ]] \ + || fail "expected INFO line with Errors=0, got: $output" + [[ "$output" != *"scope"* ]] \ + || fail "INFO line should not mention scope/system, got: $output" +} + +@test "TAP-1877: errors with zero scope matches prints '(0 scope, N system)' — NOT '(00 scope, …)'" { + # This is the canonical regression case the ticket cites: 2 is_error + # entries, neither matching the scope pattern. The buggy code would + # emit `(00 scope, 2 system)`; the fixed code emits `(0 scope, 2 system)`. + local stream="$TEST_DIR/stream.log" + make_stream "$stream" 2 0 + + run exec_log_execution_stats "$stream" 5 0 2 + [[ "$status" -eq 0 ]] || fail "expected zero exit, got $status: $output" + [[ "$output" == *"(0 scope, "* ]] \ + || fail "expected '(0 scope, ' in WARN line, got: $output" + [[ "$output" != *"(00 scope"* ]] \ + || fail "double-zero leak still present, got: $output" + [[ "$output" == *"(0 scope, 2 system)"* ]] \ + || fail "expected '(0 scope, 2 system)' suffix, got: $output" +} + +@test "TAP-1877: mixed scope + system errors splits the count correctly" { + local stream="$TEST_DIR/stream.log" + # 3 total errors — 1 scope-pattern match (permission denied), 2 system. + make_stream "$stream" 3 1 + + run exec_log_execution_stats "$stream" 4 0 3 + [[ "$status" -eq 0 ]] || fail "expected zero exit, got $status: $output" + [[ "$output" == *"Errors=3"* ]] \ + || fail "expected Errors=3, got: $output" + [[ "$output" == *"(1 scope, 2 system)"* ]] \ + || fail "expected '(1 scope, 2 system)' suffix, got: $output" +} + +@test "TAP-1877: system clamps to 0 when scope count exceeds error count" { + # Defensive: if grep -B1 matches both a context line AND its is_error + # sentinel for the same record, scope could nominally exceed total. + # The arithmetic clamp must keep system_errors >= 0. + local stream="$TEST_DIR/stream.log" + # 1 error, but the scope pattern appears on both the context line and + # the is_error line — `grep -ciE` returns 2. + { + printf '{"type":"tool_result","content":"permission denied to write"}\n' + printf '{"type":"tool_result","is_error":true,"content":"permission denied"}\n' + } > "$stream" + + run exec_log_execution_stats "$stream" 1 0 1 + [[ "$status" -eq 0 ]] || fail "expected zero exit, got $status: $output" + # The system value MUST never go negative regardless of grep arithmetic. + [[ "$output" != *"-1 system"* && "$output" != *"-2 system"* ]] \ + || fail "system_errors leaked negative value, got: $output" +} + +@test "TAP-1877: missing output_file falls back to '0 scope' without crashing" { + # Defensive: grep on a non-existent file returns 1 with empty stdout. + # `tr -cd '0-9'` empties to "", `${var:-0}` defaults to 0. + run exec_log_execution_stats "$TEST_DIR/does-not-exist.log" 0 0 2 + [[ "$status" -eq 0 ]] || fail "expected zero exit on missing file, got $status: $output" + [[ "$output" == *"(0 scope, 2 system)"* ]] \ + || fail "expected '(0 scope, 2 system)' on missing file, got: $output" + [[ "$output" != *"(00 scope"* ]] \ + || fail "double-zero leak on missing file path, got: $output" +}