From 637b2024744d815d17d85030405a70882ac1fc48 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Tue, 7 Apr 2026 17:26:42 -0500
Subject: [PATCH 1/3] feat(execution): configurable log error pattern scan
 (additional_context) Allow disabling or tuning post-run log grep via
 additional_context and/or model_info:

- log_error_pattern_scan / disable_log_error_scan to skip substring scan
- log_error_benign_patterns to extend exclusions (e.g. noisy DLM logs)
- log_error_patterns to override default pattern list

Extract resolution into container_runner_helpers.resolve_log_error_scan_config;
validate new keys in CLI validators; add unit tests.

Motivation: avoid false FAILURE from benign RuntimeError: lines in suites
already validated by pytest/JUnit (e.g. pyt_dl_tool_unit_test / AIMODELS-571).
---
 src/madengine/cli/validators.py               |  36 +++++++
 src/madengine/execution/container_runner.py   |  38 ++++---
 .../execution/container_runner_helpers.py     | 100 ++++++++++++++++++
 tests/unit/test_container_runner_helpers.py   |  98 +++++++++++++++++
 4 files changed, 252 insertions(+), 20 deletions(-)
 create mode 100644 tests/unit/test_container_runner_helpers.py

diff --git a/src/madengine/cli/validators.py b/src/madengine/cli/validators.py
index d2d0a288..25706f94 100644
--- a/src/madengine/cli/validators.py
+++ b/src/madengine/cli/validators.py
@@ -170,6 +170,42 @@ def validate_additional_context_structure(context: Dict[str, Any]) -> None:
     if "guest_os" in context and not isinstance(context["guest_os"], str):
         _fail_structure("guest_os", "a string")
 
+    if "log_error_pattern_scan" in context and not isinstance(
+        context["log_error_pattern_scan"], (bool, str, int, float, type(None))
+    ):
+        _fail_structure(
+            "log_error_pattern_scan",
+            "a boolean, string, number, or null",
+        )
+
+    if "disable_log_error_scan" in context and not isinstance(
+        context["disable_log_error_scan"], (bool, str, int, float, type(None))
+    ):
+        _fail_structure(
+            "disable_log_error_scan",
+            "a boolean, string, number, or null",
+        )
+
+    if "log_error_benign_patterns" in context:
+        lebp = context["log_error_benign_patterns"]
+        if not isinstance(lebp, list) or not all(
+            isinstance(x, str) for x in lebp
+        ):
+            _fail_structure(
+                "log_error_benign_patterns",
+                "an array of strings",
+            )
+
+    if "log_error_patterns" in context:
+        lep = context["log_error_patterns"]
+        if not isinstance(lep, list) or not lep or not all(
+            isinstance(x, str) for x in lep
+        ):
+            _fail_structure(
+                "log_error_patterns",
+                "a non-empty array of strings",
+            )
+
 
 def _normalize_docker_build_arg_values(context: Dict[str, Any]) -> None:
     dba = context.get("docker_build_arg")
diff --git a/src/madengine/execution/container_runner.py b/src/madengine/execution/container_runner.py
index cfa5e523..e9a87f6f 100644
--- a/src/madengine/execution/container_runner.py
+++ b/src/madengine/execution/container_runner.py
@@ -35,6 +35,7 @@
 from madengine.utils.run_details import get_build_number, get_pipeline
 from madengine.execution.container_runner_helpers import (
     make_run_log_file_path,
+    resolve_log_error_scan_config,
     resolve_run_timeout,
 )
 
@@ -1249,27 +1250,18 @@ def run_container(
                         # Set status based on performance and error patterns
                         # First check for obvious failure patterns in the logs
                         try:
-                            # Check for common failure patterns in the log file
-                            # Note: Patterns should be specific enough to avoid false positives
-                            # from profiling tools (rocprof, etc.) that use "Error:" as log level
-                            error_patterns = [
-                                "OutOfMemoryError",
-                                "HIP out of memory",
-                                "CUDA out of memory",
-                                "RuntimeError:",  # More specific with colon
-                                "AssertionError:",
-                                "ValueError:",
-                                "SystemExit",
-                                "failed (exitcode:",  # Literal text in logs
-                                "Traceback (most recent call last)",  # Python tracebacks
-                                "FAILED",
-                                "Exception:",
-                                "ImportError:",
-                                "ModuleNotFoundError:",
-                            ]
+                            scan_logs, error_patterns, extra_benign = (
+                                resolve_log_error_scan_config(
+                                    model_info, self.additional_context
+                                )
+                            )
 
                             has_errors = False
-                            if log_file_path and os.path.exists(log_file_path):
+                            if (
+                                scan_logs
+                                and log_file_path
+                                and os.path.exists(log_file_path)
+                            ):
                                 try:
                                     # Define benign patterns to exclude from error detection
                                     # These are known warnings/info messages that should not trigger failures
@@ -1289,7 +1281,8 @@ def run_container(
                                         "rocpd_op:",                          # ROCProf operation logs
                                         "rpd_tracer:",                        # ROCProf tracer logs
                                     ]
-                                    
+                                    benign_patterns.extend(extra_benign)
+
                                     # Check for error patterns in the log (exclude our own grep commands, output messages, and benign patterns).
                                     # Use subprocess (not console.sh) so the check runs silently and does not clutter console output.
                                     for pattern in error_patterns:
@@ -1326,6 +1319,11 @@ def run_container(
                                             pass  # Error checking is optional; treat as no match
                                 except Exception:
                                     pass  # Error checking is optional
+                            elif not scan_logs:
+                                self.rich_console.print(
+                                    "[dim]ℹ️  Log error pattern scan disabled "
+                                    "(log_error_pattern_scan / disable_log_error_scan).[/dim]"
+                                )
 
                             # Status logic: Must have performance AND no errors to be considered success
                             # Exception: Worker nodes in multi-node training (MAD_COLLECT_METRICS=false)
diff --git a/src/madengine/execution/container_runner_helpers.py b/src/madengine/execution/container_runner_helpers.py
index c6f7a25f..7a92251b 100644
--- a/src/madengine/execution/container_runner_helpers.py
+++ b/src/madengine/execution/container_runner_helpers.py
@@ -7,6 +7,106 @@
 
 import typing
 
+# Default substrings matched in container run logs post-hoc (see ContainerRunner).
+DEFAULT_LOG_ERROR_PATTERNS: typing.Tuple[str, ...] = (
+    "OutOfMemoryError",
+    "HIP out of memory",
+    "CUDA out of memory",
+    "RuntimeError:",
+    "AssertionError:",
+    "ValueError:",
+    "SystemExit",
+    "failed (exitcode:",
+    "Traceback (most recent call last)",
+    "FAILED",
+    "Exception:",
+    "ImportError:",
+    "ModuleNotFoundError:",
+)
+
+
+def _coerce_bool(value: typing.Any, *, default: bool) -> bool:
+    """Interpret JSON/CLI scalars as bool; fall back to *default* if None."""
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)) and not isinstance(value, bool):
+        return value != 0
+    if isinstance(value, str):
+        s = value.strip().lower()
+        if s in ("0", "false", "no", "off", ""):
+            return False
+        if s in ("1", "true", "yes", "on"):
+            return True
+    return default
+
+
+def _pick_context_over_model(
+    model_info: typing.Dict,
+    additional_context: typing.Dict,
+    key: str,
+    default: typing.Any = None,
+) -> typing.Any:
+    """Resolve key from model_info, overridden by additional_context when present."""
+    ctx = additional_context or {}
+    mi = model_info or {}
+    if key in ctx:
+        return ctx[key]
+    if key in mi:
+        return mi[key]
+    return default
+
+
+def resolve_log_error_scan_config(
+    model_info: typing.Dict,
+    additional_context: typing.Optional[typing.Dict] = None,
+) -> typing.Tuple[bool, typing.List[str], typing.List[str]]:
+    """
+    Resolve whether to scan run logs for error substrings and which patterns to use.
+
+    Keys (in ``additional_context`` and/or ``model_info``; context wins):
+
+    - ``log_error_pattern_scan`` (default True): set False to skip grep-based failure detection.
+    - ``disable_log_error_scan`` (default False): if True, same as disabling scan (alias).
+    - ``log_error_benign_patterns``: list of extra substrings/regex fragments excluded from matches.
+    - ``log_error_patterns``: non-empty list of strings replaces the default error pattern list.
+
+    Returns:
+        (scan_enabled, error_patterns, extra_benign_patterns)
+    """
+    ctx = additional_context if additional_context is not None else {}
+    mi = model_info if model_info is not None else {}
+
+    disable = _coerce_bool(
+        _pick_context_over_model(mi, ctx, "disable_log_error_scan", False),
+        default=False,
+    )
+    scan_on = _coerce_bool(
+        _pick_context_over_model(mi, ctx, "log_error_pattern_scan", True),
+        default=True,
+    )
+    scan_enabled = (not disable) and scan_on
+
+    raw_benign_mi = mi.get("log_error_benign_patterns")
+    raw_benign_ctx = ctx.get("log_error_benign_patterns")
+    extra_benign: typing.List[str] = []
+    for part in (raw_benign_mi, raw_benign_ctx):
+        if isinstance(part, list):
+            extra_benign.extend(str(x) for x in part if x is not None)
+
+    custom_patterns = _pick_context_over_model(mi, ctx, "log_error_patterns", None)
+    if (
+        isinstance(custom_patterns, list)
+        and len(custom_patterns) > 0
+        and all(isinstance(x, str) for x in custom_patterns)
+    ):
+        error_patterns = list(custom_patterns)
+    else:
+        error_patterns = list(DEFAULT_LOG_ERROR_PATTERNS)
+
+    return scan_enabled, error_patterns, extra_benign
+
 
 def resolve_run_timeout(
     model_info: typing.Dict,
diff --git a/tests/unit/test_container_runner_helpers.py b/tests/unit/test_container_runner_helpers.py
new file mode 100644
index 00000000..a389daa8
--- /dev/null
+++ b/tests/unit/test_container_runner_helpers.py
@@ -0,0 +1,98 @@
+"""Unit tests for container_runner_helpers (including log error scan config)."""
+
+import pytest
+
+from madengine.execution.container_runner_helpers import (
+    DEFAULT_LOG_ERROR_PATTERNS,
+    resolve_log_error_scan_config,
+)
+
+
+class TestResolveLogErrorScanConfig:
+    def test_defaults_enable_scan_and_default_patterns(self):
+        enabled, patterns, extra = resolve_log_error_scan_config({}, {})
+        assert enabled is True
+        assert patterns == list(DEFAULT_LOG_ERROR_PATTERNS)
+        assert extra == []
+
+    def test_disable_log_error_scan(self):
+        enabled, _, extra = resolve_log_error_scan_config(
+            {}, {"disable_log_error_scan": True}
+        )
+        assert enabled is False
+        assert extra == []
+
+    def test_log_error_pattern_scan_false_string(self):
+        enabled, _, _ = resolve_log_error_scan_config(
+            {}, {"log_error_pattern_scan": "false"}
+        )
+        assert enabled is False
+
+    def test_disable_wins_over_scan_true(self):
+        enabled, _, _ = resolve_log_error_scan_config(
+            {},
+            {"disable_log_error_scan": True, "log_error_pattern_scan": True},
+        )
+        assert enabled is False
+
+    def test_context_overrides_model(self):
+        enabled, _, _ = resolve_log_error_scan_config(
+            {"log_error_pattern_scan": True},
+            {"log_error_pattern_scan": False},
+        )
+        assert enabled is False
+
+    def test_model_only_false_when_no_context(self):
+        enabled, _, _ = resolve_log_error_scan_config(
+            {"log_error_pattern_scan": False},
+            {},
+        )
+        assert enabled is False
+
+    def test_extra_benign_merges_model_then_context(self):
+        _, _, extra = resolve_log_error_scan_config(
+            {"log_error_benign_patterns": ["a", "b"]},
+            {"log_error_benign_patterns": ["c"]},
+        )
+        assert extra == ["a", "b", "c"]
+
+    def test_custom_log_error_patterns(self):
+        enabled, patterns, _ = resolve_log_error_scan_config(
+            {},
+            {"log_error_patterns": ["OOM", "Killed"]},
+        )
+        assert enabled is True
+        assert patterns == ["OOM", "Killed"]
+
+    def test_invalid_custom_patterns_falls_back_to_default(self):
+        enabled, patterns, _ = resolve_log_error_scan_config(
+            {},
+            {"log_error_patterns": []},
+        )
+        assert enabled is True
+        assert patterns == list(DEFAULT_LOG_ERROR_PATTERNS)
+
+    def test_invalid_benign_type_skipped(self):
+        _, _, extra = resolve_log_error_scan_config(
+            {"log_error_benign_patterns": "not-a-list"},
+            {},
+        )
+        assert extra == []
+
+    @pytest.mark.parametrize(
+        "raw,expected",
+        [
+            (False, False),
+            ("no", False),
+            ("off", False),
+            ("0", False),
+            (True, True),
+            ("true", True),
+            (1, True),
+        ],
+    )
+    def test_scan_toggle_coercion(self, raw, expected):
+        enabled, _, _ = resolve_log_error_scan_config(
+            {}, {"log_error_pattern_scan": raw}
+        )
+        assert enabled is expected

From 7caadad60f929a63f0b223d15a8c0d8c5f164cfa Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Wed, 8 Apr 2026 09:54:18 -0500
Subject: [PATCH 2/3] feat(execution): configurable log error pattern scan;
 document and drop duplicate flag Add optional additional_context / model keys
 to control post-run log substring checks (log_error_pattern_scan,
 log_error_benign_patterns, log_error_patterns). Resolve settings in
 container_runner_helpers; wire into ContainerRunner; validate CLI types.
 Remove disable_log_error_scan in favor of log_error_pattern_scan only.

Document behavior in README, docs/configuration.md, cli-reference, usage, profiling,
and docs index. Cross-link troubleshooting for benign RuntimeError: false failures.

Tests: extend test_container_runner_helpers; unit suite passes.
---
 README.md                                     | 12 ++++++-
 docs/README.md                                |  2 +-
 docs/cli-reference.md                         | 10 ++++++
 docs/configuration.md                         | 35 +++++++++++++++++++
 docs/profiling.md                             |  2 ++
 docs/usage.md                                 |  2 ++
 src/madengine/cli/validators.py               |  8 -----
 src/madengine/execution/container_runner.py   |  2 +-
 .../execution/container_runner_helpers.py     |  8 +----
 tests/unit/test_container_runner_helpers.py   | 14 --------
 10 files changed, 63 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index b7bf745e..ce0cfbd4 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ madengine is a modern CLI tool for running Large Language Models (LLMs) and Deep
 - [Reporting and Database](#-reporting-and-database)
 - [Installation](#-installation)
 - [Tips & Best Practices](#-tips--best-practices)
+  - [Log error pattern scan](#log-error-pattern-scan)
   - [Exit codes and CI](#exit-codes-and-ci)
 - [Contributing](#-contributing)
 - [License](#-license)
@@ -46,6 +47,7 @@ madengine is a modern CLI tool for running Large Language Models (LLMs) and Deep
 - **🎯 ROCprofv3 Profiles** - 8 pre-configured profiles for compute/memory/communication bottleneck analysis
 - **🔍 Environment Validation** - TheRock ROCm detection and validation tools
 - **⚙️ Intelligent Defaults** - Minimal K8s configs with automatic preset application
+- **📋 Configurable log scan** - Optional `--additional-context` keys to disable or tune post-run log substring checks (see [Log error pattern scan](#log-error-pattern-scan))
 
 ## 🚀 Quick Start
 
@@ -122,7 +124,7 @@ For detailed command options, see the **[CLI Command Reference](docs/cli-referen
 | [Usage Guide](docs/usage.md) | Commands, workflows, and examples |
 | **[CLI Reference](docs/cli-reference.md)** | **Detailed command options and examples** |
 | [Deployment](docs/deployment.md) | Kubernetes and SLURM deployment |
-| [Configuration](docs/configuration.md) | Advanced configuration options |
+| [Configuration](docs/configuration.md) | Advanced options; [run log error pattern scan](docs/configuration.md#run-phase-log-error-pattern-scan) |
 | [Batch Build](docs/batch-build.md) | Selective builds for CI/CD |
 | [Launchers](docs/launchers.md) | Distributed training frameworks |
 | [Profiling](docs/profiling.md) | Performance analysis tools |
@@ -553,6 +555,13 @@ See [Installation Guide](docs/installation.md) for detailed instructions.
 - **Enable verbose logging** (`--verbose`) when debugging issues
 - **Use `--live-output`** for real-time monitoring of long-running operations
 
+### Log error pattern scan
+
+After a local Docker run, madengine can scan the captured **run log** for common failure substrings (for example `RuntimeError:`, `CUDA out of memory`, `Traceback`). That helps catch hard failures when exit codes are ambiguous, but some workloads log benign `RuntimeError:` text while tests still pass.
+
+- **Disable** the scan when another signal is authoritative (e.g. pytest/JUnit inside the image): set `"log_error_pattern_scan": false` in `--additional-context` or in the model entry in `models.json`. See [Configuration — Run phase: log error pattern scan](docs/configuration.md#run-phase-log-error-pattern-scan).
+- **Extend exclusions** with `log_error_benign_patterns` (list of strings), or **replace** the default pattern list with `log_error_patterns` (non-empty list of strings) for advanced cases.
+
 ### CI / Jenkins
 
 - **Exit codes:** The CLI uses fixed exit codes (`ExitCode` in `madengine.cli.constants`, e.g. `SUCCESS=0`, `RUN_FAILURE=3`, `INVALID_ARGS=4`). Pipelines should treat **non-zero** as failure; no log scraping is required for pass/fail.
@@ -597,6 +606,7 @@ madengine build --tags model --clean-docker-cache --verbose
 
 **Common Issues:**
 - **False failures with profiling**: If models show FAILURE but have performance metrics, see [Profiling Troubleshooting](docs/profiling.md#false-failure-detection-with-rocprof)
+- **False failures from `RuntimeError:` in logs**: If the workload logs expected exception text but tests pass, disable or tune the scan with `log_error_pattern_scan` / `log_error_benign_patterns` — see [Configuration](docs/configuration.md#run-phase-log-error-pattern-scan)
 - **ROCProf log errors**: Messages like `E20251230` are informational logs, not errors (fixed in v2.0+)
 - **Configuration errors**: Validate JSON with `python -m json.tool your-config.json`
 
diff --git a/docs/README.md b/docs/README.md
index 61e5d2d8..d29d121b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -15,7 +15,7 @@ Complete documentation for madengine - AI model automation and distributed bench
 
 | Guide | Description |
 |-------|-------------|
-| [Configuration](configuration.md) | Advanced configuration options |
+| [Configuration](configuration.md) | Advanced configuration options (includes [run log error pattern scan](configuration.md#run-phase-log-error-pattern-scan)) |
 | [Batch Build](batch-build.md) | Selective builds with batch manifests |
 | [Deployment](deployment.md) | Kubernetes and SLURM deployment |
 | [Launchers](launchers.md) | Multi-node training frameworks |
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index bfd1ca51..99eee192 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -592,6 +592,16 @@ For complex configurations, use JSON files with `--additional-context-file`:
 
 To run on specific nodes, add `"nodelist": "node01,node02"` to the `slurm` section. When set, the job runs only on those nodes and node health preflight is skipped. See [examples/slurm-configs/basic/03-multi-node-basic-nodelist.json](../examples/slurm-configs/basic/03-multi-node-basic-nodelist.json).
 
+### Run phase: log error pattern scan (optional)
+
+These keys apply to **local Docker runs** when madengine post-processes the run log. Use them when substring matches cause false `FAILURE` status (for example benign `RuntimeError:` lines). Full details: [Configuration — Run phase: log error pattern scan](configuration.md#run-phase-log-error-pattern-scan).
+
+| Key | Description |
+|-----|-------------|
+| `log_error_pattern_scan` | Default `true`. Set `false` to skip grep-based log failure detection. |
+| `log_error_benign_patterns` | Array of extra strings to exclude from matching (merged with built-in benign list). |
+| `log_error_patterns` | Non-empty array replaces the default substring list (advanced). |
+
 ---
 
 ## Environment Variables
diff --git a/docs/configuration.md b/docs/configuration.md
index c1d4d13d..f6ae79a1 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -84,6 +84,41 @@ For production deployments:
 The **run** command does NOT require these values because it can detect GPU vendor at runtime.
 Defaults only apply to the **build** command where Dockerfile selection requires them.
 
+## Run phase: log error pattern scan
+
+After a successful container run, madengine may scan the **run log file** for fixed substrings (for example `RuntimeError:`, `OutOfMemoryError`, `Traceback (most recent call last)`). If a match is found, the run can be marked `FAILURE` even when performance metrics exist—intended as a safety net when logs show obvious Python or OOM errors.
+
+Some suites (for example layer unit tests) intentionally print benign `RuntimeError:` text while pytest still passes. In those cases you can **disable** the scan or **narrow** what counts as an error.
+
+Keys can be set in `--additional-context` / `--additional-context-file`, or on the **model** entry in `models.json` (same keys). **Runtime context overrides the model** when both are set.
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `log_error_pattern_scan` | bool or string/number (coerced) | `true` | If `false`, skip substring-based log failure detection entirely (rely on exit codes and other signals). |
+| `log_error_benign_patterns` | array of strings | `[]` | Extra lines to **exclude** before matching (appended to built-in exclusions such as ROCProf/metrics noise). Model list is merged first, then context list. |
+| `log_error_patterns` | array of strings (non-empty) | (built-in list) | If set, **replaces** the default pattern list. Use only when you need a custom allowlist of failure substrings. |
+
+**Example — disable scan for a tag (pytest is authoritative):**
+
+```bash
+madengine run --tags my_unit_test_suite \
+  --additional-context '{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "log_error_pattern_scan": false}'
+```
+
+**Example — extra benign substrings (prefer stable strings from real logs):**
+
+```json
+{
+  "gpu_vendor": "AMD",
+  "guest_os": "UBUNTU",
+  "log_error_benign_patterns": [
+    "expected benign fragment from workload log"
+  ]
+}
+```
+
+Disabling the scan does **not** change performance metric extraction from the log; it only affects the post-hoc grep used to set `has_errors` for status.
+
 ## Basic Configuration
 
 **gpu_vendor** (case-insensitive):
diff --git a/docs/profiling.md b/docs/profiling.md
index f4323d69..fe926844 100644
--- a/docs/profiling.md
+++ b/docs/profiling.md
@@ -841,6 +841,8 @@ ROCProf uses glog-style logging where `E` prefix means "Error level log" (not an
 
 **Fixed in:** madengine v2.0+
 
+For false failures **not** caused by ROCProf (for example workloads that print benign `RuntimeError:` text), see [Configuration — Run phase: log error pattern scan](configuration.md#run-phase-log-error-pattern-scan) (`log_error_pattern_scan`, `log_error_benign_patterns`).
+
 **Verification:**
 ```bash
 # Run with profiling - should show SUCCESS status
diff --git a/docs/usage.md b/docs/usage.md
index f29f8426..6cf4c160 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -385,6 +385,8 @@ madengine run --tags model --verbose --live-output
 madengine run --tags model --keep-alive --verbose --live-output
 ```
 
+If the run is marked `FAILURE` because the log contains benign substrings (for example `RuntimeError:`) while the workload actually passed, configure [log error pattern scan](configuration.md#run-phase-log-error-pattern-scan) (`log_error_pattern_scan`, `log_error_benign_patterns`).
+
 ### Clean Rebuild
 
 ```bash
diff --git a/src/madengine/cli/validators.py b/src/madengine/cli/validators.py
index 25706f94..d99e87f7 100644
--- a/src/madengine/cli/validators.py
+++ b/src/madengine/cli/validators.py
@@ -178,14 +178,6 @@ def validate_additional_context_structure(context: Dict[str, Any]) -> None:
             "a boolean, string, number, or null",
         )
 
-    if "disable_log_error_scan" in context and not isinstance(
-        context["disable_log_error_scan"], (bool, str, int, float, type(None))
-    ):
-        _fail_structure(
-            "disable_log_error_scan",
-            "a boolean, string, number, or null",
-        )
-
     if "log_error_benign_patterns" in context:
         lebp = context["log_error_benign_patterns"]
         if not isinstance(lebp, list) or not all(
diff --git a/src/madengine/execution/container_runner.py b/src/madengine/execution/container_runner.py
index e9a87f6f..40550248 100644
--- a/src/madengine/execution/container_runner.py
+++ b/src/madengine/execution/container_runner.py
@@ -1322,7 +1322,7 @@ def run_container(
                             elif not scan_logs:
                                 self.rich_console.print(
                                     "[dim]ℹ️  Log error pattern scan disabled "
-                                    "(log_error_pattern_scan / disable_log_error_scan).[/dim]"
+                                    "(log_error_pattern_scan).[/dim]"
                                 )
 
                             # Status logic: Must have performance AND no errors to be considered success
diff --git a/src/madengine/execution/container_runner_helpers.py b/src/madengine/execution/container_runner_helpers.py
index 7a92251b..35f4fba4 100644
--- a/src/madengine/execution/container_runner_helpers.py
+++ b/src/madengine/execution/container_runner_helpers.py
@@ -68,7 +68,6 @@ def resolve_log_error_scan_config(
     Keys (in ``additional_context`` and/or ``model_info``; context wins):
 
     - ``log_error_pattern_scan`` (default True): set False to skip grep-based failure detection.
-    - ``disable_log_error_scan`` (default False): if True, same as disabling scan (alias).
     - ``log_error_benign_patterns``: list of extra substrings/regex fragments excluded from matches.
     - ``log_error_patterns``: non-empty list of strings replaces the default error pattern list.
 
@@ -78,15 +77,10 @@ def resolve_log_error_scan_config(
     ctx = additional_context if additional_context is not None else {}
     mi = model_info if model_info is not None else {}
 
-    disable = _coerce_bool(
-        _pick_context_over_model(mi, ctx, "disable_log_error_scan", False),
-        default=False,
-    )
-    scan_on = _coerce_bool(
+    scan_enabled = _coerce_bool(
         _pick_context_over_model(mi, ctx, "log_error_pattern_scan", True),
         default=True,
     )
-    scan_enabled = (not disable) and scan_on
 
     raw_benign_mi = mi.get("log_error_benign_patterns")
     raw_benign_ctx = ctx.get("log_error_benign_patterns")
diff --git a/tests/unit/test_container_runner_helpers.py b/tests/unit/test_container_runner_helpers.py
index a389daa8..fcf58b10 100644
--- a/tests/unit/test_container_runner_helpers.py
+++ b/tests/unit/test_container_runner_helpers.py
@@ -15,26 +15,12 @@ def test_defaults_enable_scan_and_default_patterns(self):
         assert patterns == list(DEFAULT_LOG_ERROR_PATTERNS)
         assert extra == []
 
-    def test_disable_log_error_scan(self):
-        enabled, _, extra = resolve_log_error_scan_config(
-            {}, {"disable_log_error_scan": True}
-        )
-        assert enabled is False
-        assert extra == []
-
     def test_log_error_pattern_scan_false_string(self):
         enabled, _, _ = resolve_log_error_scan_config(
             {}, {"log_error_pattern_scan": "false"}
         )
         assert enabled is False
 
-    def test_disable_wins_over_scan_true(self):
-        enabled, _, _ = resolve_log_error_scan_config(
-            {},
-            {"disable_log_error_scan": True, "log_error_pattern_scan": True},
-        )
-        assert enabled is False
-
     def test_context_overrides_model(self):
         enabled, _, _ = resolve_log_error_scan_config(
             {"log_error_pattern_scan": True},

From b62e8b81df86ab4b5c1b5a913c321947cb170d5a Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Wed, 8 Apr 2026 16:18:13 -0500
Subject: [PATCH 3/3] refactor: scan container logs in Python; validate
 log_error_* in tests Replace sh/grep-based log error detection with
 in-process scanning to avoid shell quoting and injection issues when patterns
 come from model_info or additional_context.

- Add log_text_has_error_pattern() with literal benign substrings vs
  explicit regex benign rules (ROCProf).
- Wire ContainerRunner to read the log once and use the helper.
- Document log_error_benign_patterns as literal substrings in helpers.
- Extend validate_additional_context tests for log_error_pattern_scan,
  log_error_benign_patterns, and log_error_patterns (valid + invalid).
---
 src/madengine/execution/container_runner.py   |  81 ++++++------
 .../execution/container_runner_helpers.py     |  49 +++++++-
 tests/unit/test_container_runner_helpers.py   |  48 +++++++
 tests/unit/test_validators.py                 | 117 ++++++++++++++++++
 4 files changed, 248 insertions(+), 47 deletions(-)

diff --git a/src/madengine/execution/container_runner.py b/src/madengine/execution/container_runner.py
index 40550248..0b561f09 100644
--- a/src/madengine/execution/container_runner.py
+++ b/src/madengine/execution/container_runner.py
@@ -34,6 +34,7 @@
 from madengine.utils.path_utils import scripts_base_dir_from
 from madengine.utils.run_details import get_build_number, get_pipeline
 from madengine.execution.container_runner_helpers import (
+    log_text_has_error_pattern,
     make_run_log_file_path,
     resolve_log_error_scan_config,
     resolve_run_timeout,
@@ -1263,60 +1264,48 @@ def run_container(
                                 and os.path.exists(log_file_path)
                             ):
                                 try:
-                                    # Define benign patterns to exclude from error detection
-                                    # These are known warnings/info messages that should not trigger failures
-                                    benign_patterns = [
+                                    # Benign: literal substrings (incl. user extra_benign) vs regex (ROCProf lines).
+                                    benign_substrings = [
                                         "Failed to establish connection to the metrics exporter agent",
                                         "RpcError: Running out of retries to initialize the metrics agent",
                                         "Metrics will not be exported",
                                         "FutureWarning",
-                                        # ROCProf/glog logging patterns (E/W/I prefixes are log levels, not errors)
-                                        r"^E[0-9]{8}.*generateRocpd\.cpp",  # ROCProf error-level logs
-                                        r"^W[0-9]{8}.*simple_timer\.cpp",    # ROCProf warning-level logs
-                                        r"^W[0-9]{8}.*generateRocpd\.cpp",   # ROCProf warning-level logs
-                                        r"^E[0-9]{8}.*tool\.cpp",            # ROCProf tool logs
-                                        "Opened result file:",                # ROCProf result file messages
-                                        "SQLite3 generation ::",              # ROCProf SQLite messages
-                                        r"\[rocprofv3\]",                     # ROCProf v3 messages
-                                        "rocpd_op:",                          # ROCProf operation logs
-                                        "rpd_tracer:",                        # ROCProf tracer logs
+                                        "Opened result file:",
+                                        "SQLite3 generation ::",
+                                        "rocpd_op:",
+                                        "rpd_tracer:",
                                     ]
-                                    benign_patterns.extend(extra_benign)
+                                    benign_substrings.extend(extra_benign)
+                                    benign_regexes = [
+                                        # ROCProf/glog: E/W prefixes are log levels, not app errors
+                                        r"^E[0-9]{8}.*generateRocpd\.cpp",
+                                        r"^W[0-9]{8}.*simple_timer\.cpp",
+                                        r"^W[0-9]{8}.*generateRocpd\.cpp",
+                                        r"^E[0-9]{8}.*tool\.cpp",
+                                        r"\[rocprofv3\]",
+                                    ]
+
+                                    # Scan in Python (no shell; literals vs regex benign rules are explicit).
+                                    with open(
+                                        log_file_path,
+                                        "r",
+                                        encoding="utf-8",
+                                        errors="ignore",
+                                    ) as _lf:
+                                        log_scan_text = _lf.read()
 
-                                    # Check for error patterns in the log (exclude our own grep commands, output messages, and benign patterns).
-                                    # Use subprocess (not console.sh) so the check runs silently and does not clutter console output.
                                     for pattern in error_patterns:
-                                        # Build exclusion regex: our own commands, output messages, and benign patterns.
-                                        # Use re.escape(pattern) so parentheses and other special chars are safe in grep -E.
-                                        pattern_escaped = re.escape(pattern)
-                                        exclusions = f"(grep -q.*{pattern_escaped}|Found error pattern.*{pattern_escaped}"
-                                        for benign in benign_patterns:
-                                            # Escape special regex characters in benign patterns
-                                            escaped_benign = benign.replace(".", r"\.").replace("(", r"\(").replace(")", r"\)")
-                                            exclusions += f"|{escaped_benign}"
-                                        exclusions += ")"
-                                        # Match pattern literally in the filtered log (grep -F avoids regex issues)
-                                        error_check_cmd = [
-                                            "sh",
-                                            "-c",
-                                            f"grep -v -E '{exclusions}' {log_file_path} | grep -F -q -- '{pattern}' && echo 'FOUND' || echo 'NOT_FOUND'",
-                                        ]
-                                        try:
-                                            proc = subprocess.run(
-                                                error_check_cmd,
-                                                capture_output=True,
-                                                text=True,
-                                                timeout=60,
+                                        if log_text_has_error_pattern(
+                                            log_scan_text,
+                                            pattern,
+                                            benign_substrings,
+                                            benign_regexes,
+                                        ):
+                                            has_errors = True
+                                            print(
+                                                f"Found error pattern '{pattern}' in logs"
                                             )
-                                            result = (proc.stdout or "").strip()
-                                            if result == "FOUND":
-                                                has_errors = True
-                                                print(
-                                                    f"Found error pattern '{pattern}' in logs"
-                                                )
-                                                break
-                                        except (subprocess.TimeoutExpired, OSError):
-                                            pass  # Error checking is optional; treat as no match
+                                            break
                                 except Exception:
                                     pass  # Error checking is optional
                             elif not scan_logs:
diff --git a/src/madengine/execution/container_runner_helpers.py b/src/madengine/execution/container_runner_helpers.py
index 35f4fba4..4f60b45a 100644
--- a/src/madengine/execution/container_runner_helpers.py
+++ b/src/madengine/execution/container_runner_helpers.py
@@ -5,6 +5,7 @@
 Extracted so run_container logic is easier to test and maintain.
 """
 
+import re
 import typing
 
 # Default substrings matched in container run logs post-hoc (see ContainerRunner).
@@ -68,7 +69,8 @@ def resolve_log_error_scan_config(
     Keys (in ``additional_context`` and/or ``model_info``; context wins):
 
     - ``log_error_pattern_scan`` (default True): set False to skip grep-based failure detection.
-    - ``log_error_benign_patterns``: list of extra substrings/regex fragments excluded from matches.
+    - ``log_error_benign_patterns``: list of extra **literal** substrings; a log line containing
+      any of them is excluded from error matching (not interpreted as regex).
     - ``log_error_patterns``: non-empty list of strings replaces the default error pattern list.
 
     Returns:
@@ -102,6 +104,51 @@ def resolve_log_error_scan_config(
     return scan_enabled, error_patterns, extra_benign
 
 
+def log_text_has_error_pattern(
+    log_text: str,
+    pattern: str,
+    benign_substrings: typing.Sequence[str],
+    benign_regexes: typing.Sequence[str] = (),
+) -> bool:
+    """
+    Whether *log_text* contains a literal *pattern* on some line that is not excluded.
+
+    Exclusions (same intent as the old ``grep -v -E | grep -F`` pipeline):
+
+    - Meta lines mentioning our own ``grep`` / "Found error pattern" machinery.
+    - *benign_substrings*: line skipped if any string appears as a **literal** substring.
+    - *benign_regexes*: line skipped if any compiled regex matches (for built-in ROCProf rules).
+
+    User-supplied benign entries should use *benign_substrings* only so regex metacharacters
+    in config are not interpreted unless explicitly added to *benign_regexes*.
+    """
+    pattern_escaped = re.escape(pattern)
+    try:
+        meta_excl = re.compile(
+            f"(grep -q.*{pattern_escaped}|Found error pattern.*{pattern_escaped})"
+        )
+    except re.error:
+        return False
+
+    compiled_benign: typing.List[re.Pattern[str]] = []
+    for rx in benign_regexes:
+        try:
+            compiled_benign.append(re.compile(rx))
+        except re.error:
+            continue
+
+    for line in log_text.splitlines():
+        if meta_excl.search(line):
+            continue
+        if any(s in line for s in benign_substrings):
+            continue
+        if any(br.search(line) for br in compiled_benign):
+            continue
+        if pattern in line:
+            return True
+    return False
+
+
 def resolve_run_timeout(
     model_info: typing.Dict,
     cli_timeout: int,
diff --git a/tests/unit/test_container_runner_helpers.py b/tests/unit/test_container_runner_helpers.py
index fcf58b10..a4d96539 100644
--- a/tests/unit/test_container_runner_helpers.py
+++ b/tests/unit/test_container_runner_helpers.py
@@ -4,6 +4,7 @@
 
 from madengine.execution.container_runner_helpers import (
     DEFAULT_LOG_ERROR_PATTERNS,
+    log_text_has_error_pattern,
     resolve_log_error_scan_config,
 )
 
@@ -82,3 +83,50 @@ def test_scan_toggle_coercion(self, raw, expected):
             {}, {"log_error_pattern_scan": raw}
         )
         assert enabled is expected
+
+
+class TestLogTextHasErrorPattern:
+    def test_finds_literal_pattern(self):
+        log = "line1\nRuntimeError: boom\n"
+        assert log_text_has_error_pattern(log, "RuntimeError:", [])
+
+    def test_respects_benign_substring(self):
+        log = "ignore FutureWarning in this line\n"
+        assert not log_text_has_error_pattern(
+            log,
+            "FutureWarning",
+            ["FutureWarning"],
+            (),
+        )
+
+    def test_quotes_in_pattern_no_shell(self):
+        """Patterns with quotes must match literally; must not raise or crash."""
+        log = "msg: can't happen\n"
+        assert log_text_has_error_pattern(log, "can't happen", [])
+
+    def test_excludes_grep_meta_line(self):
+        log = "some grep -q stuff RuntimeError: x\nreal RuntimeError: bad\n"
+        # First line matches exclusion grep -q.*RuntimeError
+        assert log_text_has_error_pattern(log, "RuntimeError:", [], ())
+
+    def test_regex_benign_excludes_rocprof_style_line(self):
+        log = (
+            "E12345678  generateRocpd.cpp: noise\n"
+            "clean RuntimeError: real issue\n"
+        )
+        assert log_text_has_error_pattern(
+            log,
+            "RuntimeError:",
+            [],
+            (r"^E[0-9]{8}.*generateRocpd\.cpp",),
+        )
+
+    def test_user_benign_literal_parentheses(self):
+        # User-config benign strings must be literal substrings (not broken by ad-hoc regex escaping).
+        log = "info (benign marker) ok\nRuntimeError: real\n"
+        assert log_text_has_error_pattern(
+            log,
+            "RuntimeError:",
+            ["(benign marker)"],
+            (),
+        )
diff --git a/tests/unit/test_validators.py b/tests/unit/test_validators.py
index cb74cce0..6a78c52d 100644
--- a/tests/unit/test_validators.py
+++ b/tests/unit/test_validators.py
@@ -194,3 +194,120 @@ def test_validate_additional_context_tools_list_ok(self):
         ctx = '{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": []}'
         result = validate_additional_context(additional_context=ctx)
         assert result["tools"] == []
+
+    def test_validate_additional_context_log_error_keys_accepted(self):
+        """Valid types for log scan keys pass structure validation."""
+        ctx = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_pattern_scan": False,
+                "log_error_benign_patterns": ["noise", "(ok)"],
+                "log_error_patterns": ["OOM", "Killed"],
+            }
+        )
+        result = validate_additional_context(additional_context=ctx)
+        assert result["log_error_pattern_scan"] is False
+        assert result["log_error_benign_patterns"] == ["noise", "(ok)"]
+        assert result["log_error_patterns"] == ["OOM", "Killed"]
+
+    @pytest.mark.parametrize(
+        "leps",
+        [
+            True,
+            "true",
+            0,
+            1,
+            None,
+        ],
+    )
+    def test_validate_additional_context_log_error_pattern_scan_coercible_types(
+        self, leps
+    ):
+        ctx = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_pattern_scan": leps,
+            }
+        )
+        result = validate_additional_context(additional_context=ctx)
+        assert result["log_error_pattern_scan"] == leps
+
+    def test_validate_additional_context_log_error_pattern_scan_rejects_bad_type(self):
+        bad = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_pattern_scan": [],
+            }
+        )
+        with pytest.raises(typer.Exit) as exc_info:
+            validate_additional_context(additional_context=bad)
+        assert exc_info.value.exit_code == ExitCode.INVALID_ARGS
+
+    def test_validate_additional_context_log_error_benign_patterns_rejects_non_list(
+        self,
+    ):
+        bad = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_benign_patterns": "not-a-list",
+            }
+        )
+        with pytest.raises(typer.Exit) as exc_info:
+            validate_additional_context(additional_context=bad)
+        assert exc_info.value.exit_code == ExitCode.INVALID_ARGS
+
+    def test_validate_additional_context_log_error_benign_patterns_rejects_non_strings(
+        self,
+    ):
+        bad = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_benign_patterns": ["a", 1],
+            }
+        )
+        with pytest.raises(typer.Exit) as exc_info:
+            validate_additional_context(additional_context=bad)
+        assert exc_info.value.exit_code == ExitCode.INVALID_ARGS
+
+    def test_validate_additional_context_log_error_patterns_rejects_empty_list(self):
+        bad = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_patterns": [],
+            }
+        )
+        with pytest.raises(typer.Exit) as exc_info:
+            validate_additional_context(additional_context=bad)
+        assert exc_info.value.exit_code == ExitCode.INVALID_ARGS
+
+    def test_validate_additional_context_log_error_patterns_rejects_non_list(self):
+        bad = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_patterns": {},
+            }
+        )
+        with pytest.raises(typer.Exit) as exc_info:
+            validate_additional_context(additional_context=bad)
+        assert exc_info.value.exit_code == ExitCode.INVALID_ARGS
+
+    def test_validate_additional_context_log_error_patterns_rejects_non_string_element(
+        self,
+    ):
+        bad = json.dumps(
+            {
+                "gpu_vendor": "AMD",
+                "guest_os": "UBUNTU",
+                "log_error_patterns": ["ok", 2],
+            }
+        )
+        with pytest.raises(typer.Exit) as exc_info:
+            validate_additional_context(additional_context=bad)
+        assert exc_info.value.exit_code == ExitCode.INVALID_ARGS