From b22bc7b55f5e3a6c805c2b4f115a7d76c79f40fd Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 15:58:51 -0400
Subject: [PATCH 1/9] Implemented a batch input arg for madengine-cli build

---
 src/madengine/mad_cli.py | 240 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 232 insertions(+), 8 deletions(-)

diff --git a/src/madengine/mad_cli.py b/src/madengine/mad_cli.py
index ac4527ed..fbd68305 100644
--- a/src/madengine/mad_cli.py
+++ b/src/madengine/mad_cli.py
@@ -119,6 +119,56 @@ def __init__(self, **kwargs):
     return Args(**kwargs)
 
 
+def process_batch_manifest(batch_manifest_file: str) -> Dict[str, List[str]]:
+    """Process batch manifest file and extract model tags based on build_new flag.
+    
+    Args:
+        batch_manifest_file: Path to the input manifest.json file
+        
+    Returns:
+        Dict containing 'build_tags' and 'all_tags' lists
+        
+    Raises:
+        FileNotFoundError: If the manifest file doesn't exist
+        ValueError: If the manifest format is invalid
+    """
+    if not os.path.exists(batch_manifest_file):
+        raise FileNotFoundError(f"Batch manifest file not found: {batch_manifest_file}")
+    
+    try:
+        with open(batch_manifest_file, 'r') as f:
+            manifest_data = json.load(f)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON in batch manifest file: {e}")
+    
+    if not isinstance(manifest_data, list):
+        raise ValueError("Batch manifest must be a list of model objects")
+    
+    build_tags = []  # Models that need to be built (build_new=true)
+    all_tags = []    # All models in the manifest
+    
+    for i, model in enumerate(manifest_data):
+        if not isinstance(model, dict):
+            raise ValueError(f"Model entry {i} must be a dictionary")
+        
+        if "model_name" not in model:
+            raise ValueError(f"Model entry {i} missing required 'model_name' field")
+        
+        model_name = model["model_name"]
+        build_new = model.get("build_new", False)
+        
+        all_tags.append(model_name)
+        if build_new:
+            build_tags.append(model_name)
+    
+    return {
+        "build_tags": build_tags,
+        "all_tags": all_tags,
+        "manifest_data": manifest_data
+    }
+
+
+
 def validate_additional_context(
     additional_context: str,
     additional_context_file: Optional[str] = None,
@@ -219,6 +269,127 @@ def save_summary_with_feedback(summary: Dict, output_path: Optional[str], summar
             raise typer.Exit(ExitCode.FAILURE)
 
 
+def _process_batch_manifest_entries(batch_data: Dict, manifest_output: str, registry: Optional[str]) -> None:
+    """Process batch manifest and add entries for all models to build_manifest.json.
+    
+    Args:
+        batch_data: Processed batch manifest data
+        manifest_output: Path to the build manifest file
+        registry: Registry used for the build
+    """
+    from madengine.tools.discover_models import DiscoverModels
+    
+    # Load the existing build manifest
+    if os.path.exists(manifest_output):
+        with open(manifest_output, 'r') as f:
+            build_manifest = json.load(f)
+    else:
+        # Create a minimal manifest structure
+        build_manifest = {
+            "built_images": {},
+            "built_models": {},
+            "context": {},
+            "credentials_required": [],
+            "registry": registry or ""
+        }
+    
+    # Process each model in the batch manifest
+    for model_entry in batch_data["manifest_data"]:
+        model_name = model_entry["model_name"]
+        build_new = model_entry.get("build_new", False)
+        model_registry_image = model_entry.get("registry_image", "")
+        model_registry = model_entry.get("registry", "")
+        
+        # If the model was not built (build_new=false), create an entry for it
+        if not build_new:
+            # Find the model configuration by discovering models with this tag
+            try:
+                # Create a temporary args object to discover the model
+                temp_args = create_args_namespace(
+                    tags=[model_name],
+                    registry=registry,
+                    additional_context="{}",
+                    additional_context_file=None,
+                    clean_docker_cache=False,
+                    manifest_output=manifest_output,
+                    live_output=False,
+                    output="perf.csv",
+                    ignore_deprecated_flag=False,
+                    data_config_file_name="data.json",
+                    tools_json_file_name="scripts/common/tools.json",
+                    generate_sys_env_details=True,
+                    force_mirror_local=None,
+                    disable_skip_gpu_arch=False,
+                    verbose=False,
+                    _separate_phases=True,
+                )
+                
+                discover_models = DiscoverModels(args=temp_args)
+                models = discover_models.run()
+                
+                for model_info in models:
+                    if model_info["name"] == model_name:
+                        # Create a synthetic image name for this model
+                        synthetic_image_name = f"ci-{model_name}_{model_name}.ubuntu.amd"
+                        
+                        # Add to built_images (even though it wasn't actually built)
+                        build_manifest["built_images"][synthetic_image_name] = {
+                            "docker_image": synthetic_image_name,
+                            "dockerfile": model_info.get("dockerfile", f"docker/{model_name}"),
+                            "base_docker": "rocm/pytorch",  # Default base
+                            "docker_sha": "",  # No SHA since not built
+                            "build_duration": 0,
+                            "build_command": f"# Skipped build for {model_name} (build_new=false)",
+                            "log_file": f"{model_name}_{model_name}.ubuntu.amd.build.skipped.log",
+                            "registry_image": model_registry_image or f"{model_registry or registry or 'dockerhub'}/{synthetic_image_name}" if model_registry_image or model_registry or registry else ""
+                        }
+                        
+                        # Add to built_models
+                        build_manifest["built_models"][synthetic_image_name] = {
+                            "name": model_info["name"],
+                            "dockerfile": model_info.get("dockerfile", f"docker/{model_name}"),
+                            "scripts": model_info.get("scripts", f"scripts/{model_name}/run.sh"),
+                            "n_gpus": model_info.get("n_gpus", "1"),
+                            "owner": model_info.get("owner", ""),
+                            "training_precision": model_info.get("training_precision", ""),
+                            "tags": model_info.get("tags", []),
+                            "args": model_info.get("args", ""),
+                            "cred": model_info.get("cred", "")
+                        }
+                        break
+                        
+            except Exception as e:
+                console.print(f"Warning: Could not process model {model_name}: {e}")
+                # Create a minimal entry anyway
+                synthetic_image_name = f"ci-{model_name}_{model_name}.ubuntu.amd"
+                build_manifest["built_images"][synthetic_image_name] = {
+                    "docker_image": synthetic_image_name,
+                    "dockerfile": f"docker/{model_name}",
+                    "base_docker": "rocm/pytorch",
+                    "docker_sha": "",
+                    "build_duration": 0,
+                    "build_command": f"# Skipped build for {model_name} (build_new=false)",
+                    "log_file": f"{model_name}_{model_name}.ubuntu.amd.build.skipped.log",
+                    "registry_image": model_registry_image or ""
+                }
+                build_manifest["built_models"][synthetic_image_name] = {
+                    "name": model_name,
+                    "dockerfile": f"docker/{model_name}",
+                    "scripts": f"scripts/{model_name}/run.sh",
+                    "n_gpus": "1",
+                    "owner": "",
+                    "training_precision": "",
+                    "tags": [],
+                    "args": ""
+                }
+    
+    # Save the updated manifest
+    with open(manifest_output, 'w') as f:
+        json.dump(build_manifest, f, indent=2)
+    
+    console.print(f"✅ Added entries for all models from batch manifest to {manifest_output}")
+
+
 def display_results_table(summary: Dict, title: str) -> None:
     """Display results in a formatted table."""
     table = Table(title=title, show_header=True, header_style="bold magenta")
@@ -265,6 +436,7 @@ def get_display_names(items, limit=5):
 def build(
     tags: Annotated[List[str], typer.Option("--tags", "-t", help="Model tags to build (can specify multiple)")] = [],
     registry: Annotated[Optional[str], typer.Option("--registry", "-r", help="Docker registry to push images to")] = None,
+    batch_manifest: Annotated[Optional[str], typer.Option("--batch-manifest", help="Input manifest.json file for batch build mode")] = None,
     additional_context: Annotated[str, typer.Option("--additional-context", "-c", help="Additional context as JSON string")] = "{}",
     additional_context_file: Annotated[Optional[str], typer.Option("--additional-context-file", "-f", help="File containing additional context JSON")] = None,
     clean_docker_cache: Annotated[bool, typer.Option("--clean-docker-cache", help="Rebuild images without using cache")] = False,
@@ -286,16 +458,62 @@ def build(
     This command builds Docker images for the specified model tags and optionally
     pushes them to a registry. Additional context with gpu_vendor and guest_os
     is required for build-only operations.
+    
+    Batch Build Mode:
+    Use --batch-manifest to specify a manifest.json file containing a list of models.
+    For each model with build_new=true, the image will be built. For all models
+    (regardless of build_new), entries will be created in the build_manifest.json.
+    
+    Example batch manifest.json:
+    [
+        {
+            "model_name": "dummy",
+            "build_new": false,
+            "registry_image": "rocm/mad-private:ci-dummy_dummy.ubuntu.amd",
+            "registry": "dockerhub"
+        },
+        {
+            "model_name": "dummy2", 
+            "build_new": true,
+            "registry_image": "",
+            "registry": ""
+        }
+    ]
     """
     setup_logging(verbose)
     
-    console.print(Panel(
-        f"🔨 [bold cyan]Building Models[/bold cyan]\n"
-        f"Tags: [yellow]{', '.join(tags) if tags else 'All models'}[/yellow]\n"
-        f"Registry: [yellow]{registry or 'Local only'}[/yellow]",
-        title="Build Configuration",
-        border_style="blue"
-    ))
+    # Validate mutually exclusive options
+    if batch_manifest and tags:
+        console.print("❌ [bold red]Error: Cannot specify both --batch-manifest and --tags options[/bold red]")
+        raise typer.Exit(ExitCode.INVALID_ARGS)
+    
+    # Process batch manifest if provided
+    batch_data = None
+    effective_tags = tags
+    if batch_manifest:
+        try:
+            batch_data = process_batch_manifest(batch_manifest)
+            effective_tags = batch_data["build_tags"]
+            console.print(Panel(
+                f"� [bold cyan]Batch Build Mode[/bold cyan]\n"
+                f"Input manifest: [yellow]{batch_manifest}[/yellow]\n"
+                f"Total models: [yellow]{len(batch_data['all_tags'])}[/yellow]\n"
+                f"Models to build: [yellow]{len(batch_data['build_tags'])}[/yellow] ({', '.join(batch_data['build_tags']) if batch_data['build_tags'] else 'none'})\n"
+                f"Registry: [yellow]{registry or 'Local only'}[/yellow]",
+                title="Batch Build Configuration",
+                border_style="blue"
+            ))
+        except (FileNotFoundError, ValueError) as e:
+            console.print(f"❌ [bold red]Error processing batch manifest: {e}[/bold red]")
+            raise typer.Exit(ExitCode.INVALID_ARGS)
+    else:
+        console.print(Panel(
+            f"�🔨 [bold cyan]Building Models[/bold cyan]\n"
+            f"Tags: [yellow]{', '.join(tags) if tags else 'All models'}[/yellow]\n"
+            f"Registry: [yellow]{registry or 'Local only'}[/yellow]",
+            title="Build Configuration",
+            border_style="blue"
+        ))
     
     try:
         # Validate additional context
@@ -303,7 +521,7 @@ def build(
         
         # Create arguments object
         args = create_args_namespace(
-            tags=tags,
+            tags=effective_tags,
             registry=registry,
             additional_context=additional_context,
             additional_context_file=additional_context_file,
@@ -338,6 +556,12 @@ def build(
             )
             progress.update(task, description="Build completed!")
         
+        # Handle batch manifest post-processing
+        if batch_data:
+            with console.status("Processing batch manifest..."):
+                _process_batch_manifest_entries(batch_data, manifest_output, registry)
+        
+        
         # Display results
         display_results_table(build_summary, "Build Results")
         

From 768dcf92eb06a86d584508b6ab4a28240faaa038 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 17:05:26 -0400
Subject: [PATCH 2/9] enhanced logging system is now active and will
 automatically highlight all Docker operations

---
 src/madengine/core/console.py                 |  77 +++++++-
 src/madengine/mad_cli.py                      |   8 +-
 .../pre_scripts/rocEnvTool/csv_parser.py      |  18 +-
 src/madengine/tools/container_runner.py       |  23 ++-
 src/madengine/tools/csv_to_html.py            |  24 ++-
 src/madengine/tools/docker_builder.py         |  27 +--
 src/madengine/tools/run_models.py             |  12 +-
 src/madengine/tools/update_perf_csv.py        |  28 ++-
 src/madengine/utils/log_formatting.py         | 172 ++++++++++++++++++
 9 files changed, 359 insertions(+), 30 deletions(-)
 create mode 100644 src/madengine/utils/log_formatting.py

diff --git a/src/madengine/core/console.py b/src/madengine/core/console.py
index 9340924a..e25a1eba 100644
--- a/src/madengine/core/console.py
+++ b/src/madengine/core/console.py
@@ -8,6 +8,7 @@
 # built-in modules
 import subprocess
 import typing
+import re
 # third-party modules
 import typing_extensions
 
@@ -33,6 +34,73 @@ def __init__(
         self.shellVerbose = shellVerbose
         self.live_output = live_output
 
+    def _highlight_docker_operations(self, command: str) -> str:
+        """Highlight docker push/pull/build/run operations for better visibility.
+        
+        Args:
+            command (str): The command to potentially highlight.
+            
+        Returns:
+            str: The highlighted command if it's a docker operation.
+        """
+        # Check if this is a docker operation
+        docker_push_pattern = r'^docker\s+push\s+'
+        docker_pull_pattern = r'^docker\s+pull\s+'
+        docker_build_pattern = r'^docker\s+build\s+'
+        docker_run_pattern = r'^docker\s+run\s+'
+        
+        if re.match(docker_push_pattern, command, re.IGNORECASE):
+            return f"\n{'='*80}\n🚀 DOCKER PUSH OPERATION: {command}\n{'='*80}"
+        elif re.match(docker_pull_pattern, command, re.IGNORECASE):
+            return f"\n{'='*80}\n📥 DOCKER PULL OPERATION: {command}\n{'='*80}"
+        elif re.match(docker_build_pattern, command, re.IGNORECASE):
+            return f"\n{'='*80}\n🔨 DOCKER BUILD OPERATION: {command}\n{'='*80}"
+        elif re.match(docker_run_pattern, command, re.IGNORECASE):
+            return f"\n{'='*80}\n🏃 DOCKER RUN OPERATION: {command}\n{'='*80}"
+        
+        return command
+
+    def _show_docker_completion(self, command: str, success: bool = True) -> None:
+        """Show completion message for docker operations.
+        
+        Args:
+            command (str): The command that was executed.
+            success (bool): Whether the operation was successful.
+        """
+        docker_push_pattern = r'^docker\s+push\s+'
+        docker_pull_pattern = r'^docker\s+pull\s+'
+        docker_build_pattern = r'^docker\s+build\s+'
+        docker_run_pattern = r'^docker\s+run\s+'
+        
+        if re.match(docker_push_pattern, command, re.IGNORECASE):
+            if success:
+                print(f"✅ DOCKER PUSH COMPLETED SUCCESSFULLY")
+                print(f"{'='*80}\n")
+            else:
+                print(f"❌ DOCKER PUSH FAILED")
+                print(f"{'='*80}\n")
+        elif re.match(docker_pull_pattern, command, re.IGNORECASE):
+            if success:
+                print(f"✅ DOCKER PULL COMPLETED SUCCESSFULLY") 
+                print(f"{'='*80}\n")
+            else:
+                print(f"❌ DOCKER PULL FAILED")
+                print(f"{'='*80}\n")
+        elif re.match(docker_build_pattern, command, re.IGNORECASE):
+            if success:
+                print(f"✅ DOCKER BUILD COMPLETED SUCCESSFULLY")
+                print(f"{'='*80}\n")
+            else:
+                print(f"❌ DOCKER BUILD FAILED")
+                print(f"{'='*80}\n")
+        elif re.match(docker_run_pattern, command, re.IGNORECASE):
+            if success:
+                print(f"✅ DOCKER RUN COMPLETED SUCCESSFULLY")
+                print(f"{'='*80}\n")
+            else:
+                print(f"❌ DOCKER RUN FAILED")
+                print(f"{'='*80}\n")
+
     def sh(
             self, 
             command: str, 
@@ -60,7 +128,8 @@ def sh(
         """
         # Print the command if shellVerbose is True
         if self.shellVerbose and not secret:
-            print("> " + command, flush=True)
+            highlighted_command = self._highlight_docker_operations(command)
+            print("> " + highlighted_command, flush=True)
 
         # Run the shell command
         proc = subprocess.Popen(
@@ -91,6 +160,12 @@ def sh(
             raise RuntimeError("Console script timeout") from exc
         
         # Check for failure
+        success = proc.returncode == 0
+        
+        # Show docker operation completion status
+        if not secret:
+            self._show_docker_completion(command, success)
+        
         if proc.returncode != 0:
             if not canFail:
                 if not secret:
diff --git a/src/madengine/mad_cli.py b/src/madengine/mad_cli.py
index fbd68305..b08c7a36 100644
--- a/src/madengine/mad_cli.py
+++ b/src/madengine/mad_cli.py
@@ -123,7 +123,7 @@ def process_batch_manifest(batch_manifest_file: str) -> Dict[str, List[str]]:
     """Process batch manifest file and extract model tags based on build_new flag.
     
     Args:
-        batch_manifest_file: Path to the input manifest.json file
+        batch_manifest_file: Path to the input batch.json file
         
     Returns:
         Dict containing 'build_tags' and 'all_tags' lists
@@ -436,7 +436,7 @@ def get_display_names(items, limit=5):
 def build(
     tags: Annotated[List[str], typer.Option("--tags", "-t", help="Model tags to build (can specify multiple)")] = [],
     registry: Annotated[Optional[str], typer.Option("--registry", "-r", help="Docker registry to push images to")] = None,
-    batch_manifest: Annotated[Optional[str], typer.Option("--batch-manifest", help="Input manifest.json file for batch build mode")] = None,
+    batch_manifest: Annotated[Optional[str], typer.Option("--batch-manifest", help="Input batch.json file for batch build mode")] = None,
     additional_context: Annotated[str, typer.Option("--additional-context", "-c", help="Additional context as JSON string")] = "{}",
     additional_context_file: Annotated[Optional[str], typer.Option("--additional-context-file", "-f", help="File containing additional context JSON")] = None,
     clean_docker_cache: Annotated[bool, typer.Option("--clean-docker-cache", help="Rebuild images without using cache")] = False,
@@ -460,11 +460,11 @@ def build(
     is required for build-only operations.
     
     Batch Build Mode:
-    Use --batch-manifest to specify a manifest.json file containing a list of models.
+    Use --batch-manifest to specify a batch.json file containing a list of models.
     For each model with build_new=true, the image will be built. For all models
     (regardless of build_new), entries will be created in the build_manifest.json.
     
-    Example batch manifest.json:
+    Example batch batch.json:
     [
         {
             "model_name": "dummy",
diff --git a/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py b/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
index 66fb84ac..db504803 100644
--- a/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
+++ b/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
@@ -284,11 +284,23 @@ def dump_csv_output(self):
             fs.write(sys_config_info[j])
             fs.write("\n")
         fs.close()
-        print ("OK: Dumped into {} file.".format(self.filename))
+        print("\n" + "="*60)
+        print(f"✅ SUCCESS: System config data dumped to {self.filename}")
+        print("="*60 + "\n")
 
     def print_csv_output(self):
-        print ("Printing the sys config info env variables...")
+        print("\n" + "="*80)
+        print("📋 SYSTEM CONFIG INFO - ENVIRONMENT VARIABLES")
+        print("="*80)
         if self.sys_config_info_list:
             for j in range(len(self.sys_config_info_list)):
                 line = self.sys_config_info_list[j]
-                print (line)
+                # Add some formatting for key-value pairs
+                if "|" in line and not line.startswith("Tag"):
+                    key, value = line.split("|", 1)
+                    print(f"🔹 {key:<30}: {value}")
+                else:
+                    print(f"📌 {line}")
+        else:
+            print("❌ No system config information available")
+        print("="*80 + "\n")
diff --git a/src/madengine/tools/container_runner.py b/src/madengine/tools/container_runner.py
index f29ef9ea..0f56b373 100644
--- a/src/madengine/tools/container_runner.py
+++ b/src/madengine/tools/container_runner.py
@@ -211,15 +211,21 @@ def pull_image(self, registry_image: str, local_name: str = None,
         if registry and credentials:
             self.login_to_registry(registry, credentials)
         
-        print(f"Pulling image: {registry_image}")
+        print(f"\n📥 Starting docker pull from registry...")
+        print(f"📍 Registry: {registry or 'Default'}")
+        print(f"🏷️  Image: {registry_image}")
         try:
             self.console.sh(f"docker pull {registry_image}")
             
             if local_name:
                 self.console.sh(f"docker tag {registry_image} {local_name}")
-                print(f"Tagged as: {local_name}")
+                print(f"🏷️  Tagged as: {local_name}")
+                print(f"✅ Successfully pulled and tagged image")
+                print(f"{'='*80}")
                 return local_name
             
+            print(f"✅ Successfully pulled image: {registry_image}")
+            print(f"{'='*80}")
             return registry_image
             
         except Exception as e:
@@ -542,7 +548,14 @@ def run_container(self, model_info: typing.Dict, docker_image: str,
         print(f"Docker options: {docker_options}")
         
         # set timeout
-        print(f"Setting timeout to {str(timeout)} seconds.")
+        print(f"⏰ Setting timeout to {str(timeout)} seconds.")
+        
+        print(f"\n🏃 Starting Docker container execution...")
+        print(f"🏷️  Image: {docker_image}")
+        print(f"📦 Container: {container_name}")
+        print(f"📝 Log file: {log_file_path}")
+        print(f"🎮 GPU Vendor: {gpu_vendor}")
+        print(f"{'='*80}")
 
         # Run the container with logging
         try:
@@ -554,13 +567,15 @@ def run_container(self, model_info: typing.Dict, docker_image: str,
                         
                         # Check user
                         whoami = model_docker.sh("whoami")
-                        print(f"USER is {whoami}")
+                        print(f"👤 Running as user: {whoami}")
 
                         # Show GPU info
                         if gpu_vendor.find("AMD") != -1:
+                            print(f"🎮 Checking AMD GPU status...")
                             smi = model_docker.sh("/opt/rocm/bin/rocm-smi || true")
                             print(smi)
                         elif gpu_vendor.find("NVIDIA") != -1:
+                            print(f"🎮 Checking NVIDIA GPU status...")
                             smi = model_docker.sh("/usr/bin/nvidia-smi || true")
                             print(smi)
 
diff --git a/src/madengine/tools/csv_to_html.py b/src/madengine/tools/csv_to_html.py
index 5a27952a..2bbcc38d 100644
--- a/src/madengine/tools/csv_to_html.py
+++ b/src/madengine/tools/csv_to_html.py
@@ -30,7 +30,17 @@ def convert_csv_to_html(file_path: str):
     output_name += file_name + ".html"
     # read csv
     df = pd.read_csv(file_path)
-    print(df)
+    
+    # Use beautiful formatting for dataframe display
+    try:
+        from madengine.utils.log_formatting import print_dataframe_beautiful
+        print_dataframe_beautiful(df, f"Converting CSV: {file_name}")
+    except ImportError:
+        # Fallback to basic formatting if utils not available
+        print(f"\n📊 Converting CSV: {file_name}")
+        print("="*80)
+        print(df.to_string(max_rows=20, max_cols=10))
+        print("="*80)
 
     # Use the .to_html() to get your table in html
     df_html = df.to_html(index=False)
@@ -67,7 +77,17 @@ def run(self):
 
         # read csv
         df = pd.read_csv(file_path)
-        print(df)
+        
+        # Use beautiful formatting for dataframe display
+        try:
+            from madengine.utils.log_formatting import print_dataframe_beautiful
+            print_dataframe_beautiful(df, f"CSV Data from {file_name}")
+        except ImportError:
+            # Fallback to basic formatting if utils not available
+            print(f"\n📊 CSV Data from {file_name}")
+            print("="*80)
+            print(df.to_string(max_rows=20, max_cols=10))
+            print("="*80)
 
         # Use the .to_html() to get your table in html
         df_html = df.to_html(index=False)
diff --git a/src/madengine/tools/docker_builder.py b/src/madengine/tools/docker_builder.py
index 23190e5b..90eed423 100644
--- a/src/madengine/tools/docker_builder.py
+++ b/src/madengine/tools/docker_builder.py
@@ -91,8 +91,11 @@ def build_image(self, model_info: typing.Dict, dockerfile: str,
         Returns:
             dict: Build information including image name, build duration, etc.
         """
-        print(f"Building Docker image for model {model_info['name']} from {dockerfile}")
-        print(f"Building Docker image...")
+        print(f"\n🔨 Starting Docker build for model: {model_info['name']}")
+        print(f"📁 Dockerfile: {dockerfile}")
+        print(f"🏷️  Target image: {docker_image}")
+        print(f"📝 Build log: {log_file_path}")
+        print(f"{'='*80}")
         
         # Generate image name
         image_docker_name = (
@@ -115,9 +118,6 @@ def build_image(self, model_info: typing.Dict, dockerfile: str,
         # Replace / with _ in log file path (already done above, but keeping for safety)
         log_file_path = log_file_path.replace("/", "_")
         
-        print(f"Processing Dockerfile: {dockerfile}")
-        print(f"Build log will be written to: {log_file_path}")
-        
         # Get docker context
         docker_context = self.get_context_path(model_info)
         
@@ -148,13 +148,15 @@ def build_image(self, model_info: typing.Dict, dockerfile: str,
         # Execute build with log redirection
         with open(log_file_path, mode="w", buffering=1) as outlog:
             with redirect_stdout(PythonicTee(outlog, self.live_output)), redirect_stderr(PythonicTee(outlog, self.live_output)):
-                print(f"Executing: {build_command}")
+                print(f"🔨 Executing build command...")
                 self.console.sh(build_command, timeout=None)
                 
                 build_duration = time.time() - build_start_time
                 
-                print(f"Build Duration: {build_duration} seconds")
-                print(f"MAD_CONTAINER_IMAGE is {docker_image}")
+                print(f"⏱️  Build Duration: {build_duration:.2f} seconds")
+                print(f"🏷️  MAD_CONTAINER_IMAGE is {docker_image}")
+                print(f"✅ Docker build completed successfully")
+                print(f"{'='*80}")
                 
                 # Get base docker info
                 base_docker = ""
@@ -294,15 +296,18 @@ def push_image(self, docker_image: str, registry: str = None, credentials: typin
             # Tag the image if different from local name
             if registry_image != docker_image:
                 tag_command = f"docker tag {docker_image} {registry_image}"
-                print(f"Tagging image: {tag_command}")
+                print(f"🏷️  Tagging image: {tag_command}")
                 self.console.sh(tag_command)
             
             # Push the image
             push_command = f"docker push {registry_image}"
-            print(f"Pushing image: {push_command}")
+            print(f"\n🚀 Starting docker push to registry...")
+            print(f"📤 Registry: {registry}")
+            print(f"🏷️  Image: {registry_image}")
             self.console.sh(push_command)
             
-            print(f"Successfully pushed image to registry: {registry_image}")
+            print(f"✅ Successfully pushed image to registry: {registry_image}")
+            print(f"{'='*80}")
             return registry_image
             
         except Exception as e:
diff --git a/src/madengine/tools/run_models.py b/src/madengine/tools/run_models.py
index ddcc166d..cd2f3a46 100644
--- a/src/madengine/tools/run_models.py
+++ b/src/madengine/tools/run_models.py
@@ -118,7 +118,17 @@ def print_perf(self):
 
         Method to print stage perf results of a model.
         """
-        print(f"{self.model} performance is {self.performance} {self.metric}")
+        print("\n" + "="*60)
+        print(f"📊 PERFORMANCE RESULTS")
+        print("="*60)
+        print(f"🏷️  Model: {self.model}")
+        print(f"⚡ Performance: {self.performance} {self.metric}")
+        print(f"📈 Status: {self.status}")
+        if self.machine_name:
+            print(f"🖥️  Machine: {self.machine_name}")
+        if self.gpu_architecture:
+            print(f"🎮 GPU Architecture: {self.gpu_architecture}")
+        print("="*60 + "\n")
 
     # Exports all info in json format to json_name
     # multiple_results excludes the info provided on csv
diff --git a/src/madengine/tools/update_perf_csv.py b/src/madengine/tools/update_perf_csv.py
index 09c267f1..f26da890 100644
--- a/src/madengine/tools/update_perf_csv.py
+++ b/src/madengine/tools/update_perf_csv.py
@@ -195,12 +195,17 @@ def update_perf_csv(
         model_name: typing.Optional[str] = None,
     ):
     """Update the performance csv file with the latest performance data."""
-    print(f"Attaching performance metrics of models to perf.csv")
+    print("\n" + "="*80)
+    print("📈 ATTACHING PERFORMANCE METRICS TO DATABASE")
+    print("="*80)
+    print(f"📂 Target file: {perf_csv}")
+    
     # read perf.csv
     perf_csv_df = df_strip_columns(pd.read_csv(perf_csv))
 
     # handle multiple_results, single_result, and exception_result
     if multiple_results:
+        print("🔄 Processing multiple results...")
         perf_csv_df = handle_multiple_results(
             perf_csv_df,
             multiple_results,
@@ -208,17 +213,22 @@ def update_perf_csv(
             model_name,
         )
     elif single_result:
+        print("🔄 Processing single result...")
         perf_csv_df = handle_single_result(perf_csv_df, single_result)
     elif exception_result:
+        print("⚠️  Processing exception result...")
         perf_csv_df = handle_exception_result(
             perf_csv_df, exception_result
         )
     else:
-        print("No results to update in perf.csv")
+        print("ℹ️  No results to update in perf.csv")
 
     # write new perf.csv
     # Note that this file will also generate a perf_entry.csv regardless of the output file args.
     perf_csv_df.to_csv(perf_csv, index=False)
+    print(f"✅ Successfully updated: {perf_csv}")
+    print("="*80 + "\n")
+    perf_csv_df.to_csv(perf_csv, index=False)
 
 
 class UpdatePerfCsv:
@@ -238,12 +248,17 @@ def __init__(self, args: argparse.Namespace):
 
     def run(self):
         """Update the performance csv file with the latest performance data."""
-        print(f"Updating performance metrics of models perf.csv to database")
+        print("\n" + "="*80)
+        print("📊 UPDATING PERFORMANCE METRICS DATABASE")
+        print("="*80)
+        print(f"📂 Processing: {self.args.perf_csv}")
+        
         # read perf.csv
         perf_csv_df = df_strip_columns(pd.read_csv(self.args.perf_csv))
 
         # handle multiple_results, single_result, and exception_result
         if self.args.multiple_results:
+            print("🔄 Processing multiple results...")
             perf_csv_df = handle_multiple_results(
                 perf_csv_df,
                 self.args.multiple_results,
@@ -251,17 +266,22 @@ def run(self):
                 self.args.model_name,
             )
         elif self.args.single_result:
+            print("🔄 Processing single result...")
             perf_csv_df = handle_single_result(perf_csv_df, self.args.single_result)
         elif self.args.exception_result:
+            print("⚠️  Processing exception result...")
             perf_csv_df = handle_exception_result(
                 perf_csv_df, self.args.exception_result
             )
         else:
-            print("No results to update in perf.csv")
+            print("ℹ️  No results to update in perf.csv")
 
         # write new perf.csv
         # Note that this file will also generate a perf_entry.csv regardless of the output file args.
         perf_csv_df.to_csv(self.args.perf_csv, index=False)
+        
+        print(f"✅ Successfully updated: {self.args.perf_csv}")
+        print("="*80 + "\n")
 
         self.return_status = True
         return self.return_status
diff --git a/src/madengine/utils/log_formatting.py b/src/madengine/utils/log_formatting.py
new file mode 100644
index 00000000..99803a3b
--- /dev/null
+++ b/src/madengine/utils/log_formatting.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Utility functions for formatting and displaying data in logs.
+
+This module provides enhanced formatting utilities for better log readability,
+including dataframe formatting and other display utilities.
+
+Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
+"""
+
+import pandas as pd
+import typing
+from rich.table import Table
+from rich.console import Console as RichConsole
+from rich.text import Text
+
+
+def format_dataframe_for_log(df: pd.DataFrame, title: str = "DataFrame", max_rows: int = 20, max_cols: int = 10) -> str:
+    """
+    Format a pandas DataFrame for beautiful log output.
+    
+    Args:
+        df: The pandas DataFrame to format
+        title: Title for the dataframe display
+        max_rows: Maximum number of rows to display
+        max_cols: Maximum number of columns to display
+    
+    Returns:
+        str: Beautifully formatted string representation of the DataFrame
+    """
+    if df.empty:
+        return f"\n📊 {title}\n{'='*60}\n❌ DataFrame is empty\n{'='*60}\n"
+    
+    # Truncate if necessary
+    display_df = df.copy()
+    truncated_rows = False
+    truncated_cols = False
+    
+    if len(df) > max_rows:
+        display_df = display_df.head(max_rows)
+        truncated_rows = True
+    
+    if len(df.columns) > max_cols:
+        display_df = display_df.iloc[:, :max_cols]
+        truncated_cols = True
+    
+    # Create header
+    header = f"\n📊 {title}\n"
+    header += f"{'='*80}\n"
+    header += f"📏 Shape: {df.shape[0]} rows × {df.shape[1]} columns\n"
+    
+    if truncated_rows or truncated_cols:
+        header += "⚠️  Display truncated: "
+        if truncated_rows:
+            header += f"showing first {max_rows} rows "
+        if truncated_cols:
+            header += f"showing first {max_cols} columns"
+        header += "\n"
+    
+    header += f"{'='*80}\n"
+    
+    # Format the DataFrame with nice styling
+    formatted_df = display_df.to_string(
+        index=True,
+        max_rows=max_rows,
+        max_cols=max_cols,
+        width=None,
+        float_format='{:.4f}'.format
+    )
+    
+    # Add some visual separators
+    footer = f"\n{'='*80}\n"
+    
+    return header + formatted_df + footer
+
+
+def format_dataframe_rich(df: pd.DataFrame, title: str = "DataFrame", max_rows: int = 20) -> None:
+    """
+    Display a pandas DataFrame using Rich formatting for enhanced readability.
+    
+    Args:
+        df: The pandas DataFrame to display
+        title: Title for the table
+        max_rows: Maximum number of rows to display
+    """
+    console = RichConsole()
+    
+    if df.empty:
+        console.print(f"📊 [bold cyan]{title}[/bold cyan]: [red]DataFrame is empty[/red]")
+        return
+    
+    # Create Rich table
+    table = Table(title=f"📊 {title}", show_header=True, header_style="bold magenta")
+    
+    # Add index column
+    table.add_column("Index", style="dim", width=8)
+    
+    # Add data columns
+    for col in df.columns:
+        table.add_column(str(col), style="cyan")
+    
+    # Add rows (truncate if necessary)
+    display_rows = min(len(df), max_rows)
+    for i in range(display_rows):
+        row_data = [str(df.index[i])]
+        for col in df.columns:
+            value = df.iloc[i][col]
+            if pd.isna(value):
+                row_data.append("[dim]NaN[/dim]")
+            elif isinstance(value, float):
+                row_data.append(f"{value:.4f}")
+            else:
+                row_data.append(str(value))
+        table.add_row(*row_data)
+    
+    # Show truncation info
+    if len(df) > max_rows:
+        table.add_row(*["..." for _ in range(len(df.columns) + 1)])
+        console.print(f"[yellow]⚠️  Showing first {max_rows} of {len(df)} rows[/yellow]")
+    
+    console.print(table)
+    console.print(f"[green]✨ DataFrame shape: {df.shape[0]} rows × {df.shape[1]} columns[/green]")
+
+
+def print_dataframe_beautiful(df: pd.DataFrame, title: str = "Data", use_rich: bool = True) -> None:
+    """
+    Print a pandas DataFrame with beautiful formatting.
+    
+    Args:
+        df: The pandas DataFrame to print
+        title: Title for the display
+        use_rich: Whether to use Rich formatting (if available) or fall back to simple formatting
+    """
+    try:
+        if use_rich:
+            format_dataframe_rich(df, title)
+        else:
+            raise ImportError("Fallback to simple formatting")
+    except (ImportError, Exception):
+        # Fallback to simple but nice formatting
+        formatted_output = format_dataframe_for_log(df, title)
+        print(formatted_output)
+
+
+def highlight_log_section(title: str, content: str, style: str = "info") -> str:
+    """
+    Create a highlighted log section with borders and styling.
+    
+    Args:
+        title: Section title
+        content: Section content
+        style: Style type ('info', 'success', 'warning', 'error')
+    
+    Returns:
+        str: Formatted log section
+    """
+    styles = {
+        'info': {'emoji': 'ℹ️', 'border': '-'},
+        'success': {'emoji': '✅', 'border': '='},
+        'warning': {'emoji': '⚠️', 'border': '!'},
+        'error': {'emoji': '❌', 'border': '#'}
+    }
+    
+    style_config = styles.get(style, styles['info'])
+    emoji = style_config['emoji']
+    border_char = style_config['border']
+    
+    border = border_char * 80
+    header = f"\n{border}\n{emoji} {title.upper()}\n{border}"
+    footer = f"{border}\n"
+    
+    return f"{header}\n{content}\n{footer}"

From a4b324ff7fcb8c2815a4c9638a468a4b283ba14d Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 17:13:43 -0400
Subject: [PATCH 3/9] Fix the error local variable docker_image referenced
 before assignment

---
 src/madengine/tools/docker_builder.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/madengine/tools/docker_builder.py b/src/madengine/tools/docker_builder.py
index 90eed423..26183433 100644
--- a/src/madengine/tools/docker_builder.py
+++ b/src/madengine/tools/docker_builder.py
@@ -91,13 +91,7 @@ def build_image(self, model_info: typing.Dict, dockerfile: str,
         Returns:
             dict: Build information including image name, build duration, etc.
         """
-        print(f"\n🔨 Starting Docker build for model: {model_info['name']}")
-        print(f"📁 Dockerfile: {dockerfile}")
-        print(f"🏷️  Target image: {docker_image}")
-        print(f"📝 Build log: {log_file_path}")
-        print(f"{'='*80}")
-        
-        # Generate image name
+        # Generate image name first
         image_docker_name = (
             model_info["name"].replace("/", "_").lower()
             + "_"
@@ -118,6 +112,12 @@ def build_image(self, model_info: typing.Dict, dockerfile: str,
         # Replace / with _ in log file path (already done above, but keeping for safety)
         log_file_path = log_file_path.replace("/", "_")
         
+        print(f"\n🔨 Starting Docker build for model: {model_info['name']}")
+        print(f"📁 Dockerfile: {dockerfile}")
+        print(f"🏷️  Target image: {docker_image}")
+        print(f"📝 Build log: {log_file_path}")
+        print(f"{'='*80}")
+        
         # Get docker context
         docker_context = self.get_context_path(model_info)
         

From ebfb472d6afccfa241775a447a0937f008a5c750 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 17:38:49 -0400
Subject: [PATCH 4/9] Updated the perf dataframe output

---
 src/madengine/utils/log_formatting.py | 83 +++++++++++++++++----------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/src/madengine/utils/log_formatting.py b/src/madengine/utils/log_formatting.py
index 99803a3b..26daae7b 100644
--- a/src/madengine/utils/log_formatting.py
+++ b/src/madengine/utils/log_formatting.py
@@ -31,31 +31,41 @@ def format_dataframe_for_log(df: pd.DataFrame, title: str = "DataFrame", max_row
     if df.empty:
         return f"\n📊 {title}\n{'='*60}\n❌ DataFrame is empty\n{'='*60}\n"
     
-    # Truncate if necessary
-    display_df = df.copy()
+    # Define key columns to display for performance results
+    key_columns = [
+        "model", "n_gpus", "docker_file", "machine_name", "gpu_architecture", 
+        "performance", "metric", "status", "dataname"
+    ]
+    
+    # Filter DataFrame to show only key columns that exist
+    available_columns = [col for col in key_columns if col in df.columns]
+    if available_columns:
+        display_df = df[available_columns].copy()
+        total_columns_note = f"(showing {len(available_columns)} of {len(df.columns)} columns)"
+    else:
+        # If no key columns found, show all columns as fallback with truncation
+        display_df = df.copy()
+        total_columns_note = f"(showing all {len(df.columns)} columns)"
+        if len(df.columns) > max_cols:
+            display_df = display_df.iloc[:, :max_cols]
+            total_columns_note = f"(showing first {max_cols} of {len(df.columns)} columns)"
+    
+    # Truncate rows if necessary
     truncated_rows = False
-    truncated_cols = False
-    
-    if len(df) > max_rows:
+    if len(display_df) > max_rows:
         display_df = display_df.head(max_rows)
         truncated_rows = True
     
-    if len(df.columns) > max_cols:
-        display_df = display_df.iloc[:, :max_cols]
-        truncated_cols = True
-    
     # Create header
-    header = f"\n📊 {title}\n"
+    header = f"\n📊 {title} {total_columns_note}\n"
     header += f"{'='*80}\n"
-    header += f"📏 Shape: {df.shape[0]} rows × {df.shape[1]} columns\n"
+    if available_columns:
+        header += f"📏 Shape: {df.shape[0]} rows × {len(available_columns)} key columns (total: {df.shape[1]} columns)\n"
+    else:
+        header += f"📏 Shape: {df.shape[0]} rows × {df.shape[1]} columns\n"
     
-    if truncated_rows or truncated_cols:
-        header += "⚠️  Display truncated: "
-        if truncated_rows:
-            header += f"showing first {max_rows} rows "
-        if truncated_cols:
-            header += f"showing first {max_cols} columns"
-        header += "\n"
+    if truncated_rows:
+        header += f"⚠️  Display truncated: showing first {max_rows} rows\n"
     
     header += f"{'='*80}\n"
     
@@ -63,7 +73,6 @@ def format_dataframe_for_log(df: pd.DataFrame, title: str = "DataFrame", max_row
     formatted_df = display_df.to_string(
         index=True,
         max_rows=max_rows,
-        max_cols=max_cols,
         width=None,
         float_format='{:.4f}'.format
     )
@@ -89,22 +98,38 @@ def format_dataframe_rich(df: pd.DataFrame, title: str = "DataFrame", max_rows:
         console.print(f"📊 [bold cyan]{title}[/bold cyan]: [red]DataFrame is empty[/red]")
         return
     
+    # Define key columns to display for performance results
+    key_columns = [
+        "model", "n_gpus", "machine_name", "gpu_architecture", 
+        "performance", "metric", "status", "dataname"
+    ]
+    
+    # Filter DataFrame to show only key columns that exist
+    available_columns = [col for col in key_columns if col in df.columns]
+    if available_columns:
+        display_df = df[available_columns]
+        total_columns_note = f"(showing {len(available_columns)} of {len(df.columns)} columns)"
+    else:
+        # If no key columns found, show all columns as fallback
+        display_df = df
+        total_columns_note = f"(showing all {len(df.columns)} columns)"
+    
     # Create Rich table
-    table = Table(title=f"📊 {title}", show_header=True, header_style="bold magenta")
+    table = Table(title=f"📊 {title} {total_columns_note}", show_header=True, header_style="bold magenta")
     
     # Add index column
     table.add_column("Index", style="dim", width=8)
     
     # Add data columns
-    for col in df.columns:
+    for col in display_df.columns:
         table.add_column(str(col), style="cyan")
     
     # Add rows (truncate if necessary)
-    display_rows = min(len(df), max_rows)
+    display_rows = min(len(display_df), max_rows)
     for i in range(display_rows):
-        row_data = [str(df.index[i])]
-        for col in df.columns:
-            value = df.iloc[i][col]
+        row_data = [str(display_df.index[i])]
+        for col in display_df.columns:
+            value = display_df.iloc[i][col]
             if pd.isna(value):
                 row_data.append("[dim]NaN[/dim]")
             elif isinstance(value, float):
@@ -114,12 +139,12 @@ def format_dataframe_rich(df: pd.DataFrame, title: str = "DataFrame", max_rows:
         table.add_row(*row_data)
     
     # Show truncation info
-    if len(df) > max_rows:
-        table.add_row(*["..." for _ in range(len(df.columns) + 1)])
-        console.print(f"[yellow]⚠️  Showing first {max_rows} of {len(df)} rows[/yellow]")
+    if len(display_df) > max_rows:
+        table.add_row(*["..." for _ in range(len(display_df.columns) + 1)])
+        console.print(f"[yellow]⚠️  Showing first {max_rows} of {len(display_df)} rows[/yellow]")
     
     console.print(table)
-    console.print(f"[green]✨ DataFrame shape: {df.shape[0]} rows × {df.shape[1]} columns[/green]")
+    console.print(f"[green]✨ DataFrame shape: {df.shape[0]} rows × {len(available_columns)} key columns (total: {df.shape[1]} columns)[/green]")
 
 
 def print_dataframe_beautiful(df: pd.DataFrame, title: str = "Data", use_rich: bool = True) -> None:

From e47572eb4feb864a50c873c88cc4d899e4b5d01f Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 18:16:29 -0400
Subject: [PATCH 5/9] The fixes are backward compatible and maintain existing
 functionality for truly successful runs while correctly identifying and
 handling various failure scenarios.

---
 src/madengine/tools/container_runner.py       | 46 ++++++++++++++++++-
 .../tools/distributed_orchestrator.py         | 22 ++++++---
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/src/madengine/tools/container_runner.py b/src/madengine/tools/container_runner.py
index 0f56b373..f3ab0da5 100644
--- a/src/madengine/tools/container_runner.py
+++ b/src/madengine/tools/container_runner.py
@@ -706,8 +706,50 @@ def run_container(self, model_info: typing.Dict, docker_image: str,
                         except Exception as e:
                             print(f"Warning: Could not extract performance metrics: {e}")
                         
-                        # Set status based on performance
-                        run_results["status"] = 'SUCCESS' if run_results.get("performance") else 'FAILURE'
+                        # Set status based on performance and error patterns
+                        # First check for obvious failure patterns in the logs
+                        try:
+                            # Check for common failure patterns in the log file
+                            error_patterns = [
+                                "OutOfMemoryError", "HIP out of memory", "CUDA out of memory",
+                                "RuntimeError", "AssertionError", "ValueError", "SystemExit",
+                                "failed (exitcode:", "Traceback (most recent call last):",
+                                "Error:", "FAILED", "Exception:"
+                            ]
+                            
+                            has_errors = False
+                            if log_file_path and os.path.exists(log_file_path):
+                                try:
+                                    # Check for error patterns in the log
+                                    for pattern in error_patterns:
+                                        error_check_cmd = f"grep -q '{pattern}' {log_file_path} && echo 'FOUND' || echo 'NOT_FOUND'"
+                                        result = self.console.sh(error_check_cmd, canFail=True)
+                                        if result.strip() == "FOUND":
+                                            has_errors = True
+                                            print(f"Found error pattern '{pattern}' in logs")
+                                            break
+                                except Exception:
+                                    pass  # Error checking is optional
+                            
+                            # Status logic: Must have performance AND no errors to be considered success
+                            performance_value = run_results.get("performance")
+                            has_performance = performance_value and performance_value.strip() and performance_value.strip() != "N/A"
+                            
+                            if has_errors:
+                                run_results["status"] = 'FAILURE'
+                                print(f"Status: FAILURE (error patterns detected in logs)")
+                            elif has_performance:
+                                run_results["status"] = 'SUCCESS'
+                                print(f"Status: SUCCESS (performance metrics found, no errors)")
+                            else:
+                                run_results["status"] = 'FAILURE' 
+                                print(f"Status: FAILURE (no performance metrics)")
+                                
+                        except Exception as e:
+                            print(f"Warning: Error in status determination: {e}")
+                            # Fallback to simple performance check
+                            run_results["status"] = 'SUCCESS' if run_results.get("performance") else 'FAILURE'
+                        
                         print(f"{model_info['name']} performance is {run_results.get('performance', 'N/A')} {run_results.get('metric', '')}")
 
                         # Generate performance results and update perf.csv
diff --git a/src/madengine/tools/distributed_orchestrator.py b/src/madengine/tools/distributed_orchestrator.py
index c6246c4c..d21a9a0d 100644
--- a/src/madengine/tools/distributed_orchestrator.py
+++ b/src/madengine/tools/distributed_orchestrator.py
@@ -311,10 +311,15 @@ def run_phase(self, manifest_file: str = "build_manifest.json",
                             generate_sys_env_details=getattr(self.args, 'generate_sys_env_details', True)
                         )
                         
-                        execution_summary["successful_runs"].append(run_results)
-                        execution_summary["total_execution_time"] += run_results.get("test_duration", 0)
+                        # Add to appropriate list based on actual status
+                        if run_results.get("status") == "SUCCESS":
+                            execution_summary["successful_runs"].append(run_results)
+                            print(f"Successfully completed: {model_info['name']} -> {run_results['status']}")
+                        else:
+                            execution_summary["failed_runs"].append(run_results)
+                            print(f"Failed to complete: {model_info['name']} -> {run_results['status']}")
                         
-                        print(f"Successfully completed: {model_info['name']} -> {run_results['status']}")
+                        execution_summary["total_execution_time"] += run_results.get("test_duration", 0)
                         
                     except Exception as e:
                         print(f"Failed to run {model_info['name']} with image {image_name}: {e}")
@@ -404,10 +409,15 @@ def run_phase(self, manifest_file: str = "build_manifest.json",
                             generate_sys_env_details=getattr(self.args, 'generate_sys_env_details', True)
                         )
                         
-                        execution_summary["successful_runs"].append(run_results)
-                        execution_summary["total_execution_time"] += run_results.get("test_duration", 0)
+                        # Add to appropriate list based on actual status
+                        if run_results.get("status") == "SUCCESS":
+                            execution_summary["successful_runs"].append(run_results)
+                            print(f"Successfully completed: {model_name} -> {run_results['status']}")
+                        else:
+                            execution_summary["failed_runs"].append(run_results)
+                            print(f"Failed to complete: {model_name} -> {run_results['status']}")
                         
-                        print(f"Successfully completed: {model_name} -> {run_results['status']}")
+                        execution_summary["total_execution_time"] += run_results.get("test_duration", 0)
                         
                     except Exception as e:
                         print(f"Failed to run {model_name} with image {image_name}: {e}")

From 3a73edca0bb30e98bd85f29bf6cc908d88541dd8 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 18:33:28 -0400
Subject: [PATCH 6/9] Fixed the problematic log

---
 src/madengine/tools/container_runner.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/madengine/tools/container_runner.py b/src/madengine/tools/container_runner.py
index f3ab0da5..7a41be53 100644
--- a/src/madengine/tools/container_runner.py
+++ b/src/madengine/tools/container_runner.py
@@ -720,9 +720,10 @@ def run_container(self, model_info: typing.Dict, docker_image: str,
                             has_errors = False
                             if log_file_path and os.path.exists(log_file_path):
                                 try:
-                                    # Check for error patterns in the log
+                                    # Check for error patterns in the log (exclude our own grep commands and output messages)
                                     for pattern in error_patterns:
-                                        error_check_cmd = f"grep -q '{pattern}' {log_file_path} && echo 'FOUND' || echo 'NOT_FOUND'"
+                                        # Use grep with -v to exclude our own commands and output to avoid false positives
+                                        error_check_cmd = f"grep -v -E '(grep -q.*{pattern}|Found error pattern.*{pattern})' {log_file_path} | grep -q '{pattern}' && echo 'FOUND' || echo 'NOT_FOUND'"
                                         result = self.console.sh(error_check_cmd, canFail=True)
                                         if result.strip() == "FOUND":
                                             has_errors = True

From e1000a41e907c4ae11ce1617b1b417e14c98de19 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 19:07:21 -0400
Subject: [PATCH 7/9] Fixed the error pattern, removed the wrong string

---
 src/madengine/tools/container_runner.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/madengine/tools/container_runner.py b/src/madengine/tools/container_runner.py
index 7a41be53..4057ba93 100644
--- a/src/madengine/tools/container_runner.py
+++ b/src/madengine/tools/container_runner.py
@@ -713,8 +713,7 @@ def run_container(self, model_info: typing.Dict, docker_image: str,
                             error_patterns = [
                                 "OutOfMemoryError", "HIP out of memory", "CUDA out of memory",
                                 "RuntimeError", "AssertionError", "ValueError", "SystemExit",
-                                "failed (exitcode:", "Traceback (most recent call last):",
-                                "Error:", "FAILED", "Exception:"
+                                "failed (exitcode:", "Error:", "FAILED", "Exception:"
                             ]
                             
                             has_errors = False

From 06934d3263c110adce6739f2d2f16b3e0658b394 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 11 Jul 2025 22:41:14 -0400
Subject: [PATCH 8/9] Fixed the error of test prof

---
 tests/test_distributed_integration.py | 40 +++++++++++++++++++--------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/tests/test_distributed_integration.py b/tests/test_distributed_integration.py
index daae5f67..4feaaf6d 100644
--- a/tests/test_distributed_integration.py
+++ b/tests/test_distributed_integration.py
@@ -774,8 +774,8 @@ def test_distributed_profiling_tools_integration(self, mock_exists, mock_data, m
         
         # Mock successful container run
         mock_run_container.return_value = {
-            "model": "dummy",
-            "status": "success",
+            "model": "dummy_prof",
+            "status": "SUCCESS",
             "test_duration": 30.5,
             "profiling_data": {
                 "rocprof_output": "/tmp/rocprof/output.csv"
@@ -785,22 +785,38 @@ def test_distributed_profiling_tools_integration(self, mock_exists, mock_data, m
         # Mock manifest with profiling tools
         manifest_with_profiling = {
             "built_images": {
-                "ci-dummy_profiling.ubuntu.amd": {
-                    "docker_image": "ci-dummy_profiling.ubuntu.amd",
+                "ci-dummy_prof_dummy.ubuntu.amd": {
+                    "docker_image": "ci-dummy_prof_dummy.ubuntu.amd",
                     "dockerfile": "docker/dummy.ubuntu.amd.Dockerfile",
-                    "build_duration": 45.2
+                    "base_docker": "rocm/pytorch",
+                    "docker_sha": "sha256:47efe367d76c620ee828750fb294303f3f9f5fb6c184362a4741ce5e55ed3769",
+                    "build_duration": 0.559730052947998,
+                    "build_command": "docker build  --network=host -t ci-dummy_prof_dummy.ubuntu.amd --pull -f docker/dummy.ubuntu.amd.Dockerfile  ./docker",
+                    "log_file": "dummy_prof_dummy.ubuntu.amd.build.live.log"
                 }
             },
             "built_models": {
-                "ci-dummy_profiling.ubuntu.amd": {
-                    "name": "dummy_profiling",
+                "ci-dummy_prof_dummy.ubuntu.amd": {
+                    "name": "dummy_prof",
+                    "dockerfile": "docker/dummy",
+                    "scripts": "scripts/dummy/run_prof.sh",
                     "n_gpus": "1",
-                    "scripts": "scripts/dummy/run.sh",
-                    "dockerfile": "docker/dummy.ubuntu.amd.Dockerfile",
-                    "tags": ["dummy", "profiling"],
-                    "tools": ["rocprof", "roctracer"]
+                    "owner": "mmelesse@amd.com",
+                    "training_precision": "",
+                    "tags": [
+                        "dummies"
+                    ],
+                    "args": ""
                 }
-            }
+            },
+            "context": {
+                "docker_env_vars": {},
+                "docker_mounts": {},
+                "docker_build_arg": {},
+                "gpu_vendor": "AMD",
+                "docker_gpus": ""
+            },
+            "credentials_required": []
         }
         
         with patch('builtins.open', mock_open(read_data=json.dumps(manifest_with_profiling))):

From 59dd584cd9214c4e4b2aafb7184d5981d68d0ae5 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Sat, 12 Jul 2025 11:39:25 -0400
Subject: [PATCH 9/9] Updated the interface of mad_cli

---
 src/madengine/mad_cli.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/src/madengine/mad_cli.py b/src/madengine/mad_cli.py
index b08c7a36..7db910b4 100644
--- a/src/madengine/mad_cli.py
+++ b/src/madengine/mad_cli.py
@@ -458,27 +458,6 @@ def build(
     This command builds Docker images for the specified model tags and optionally
     pushes them to a registry. Additional context with gpu_vendor and guest_os
     is required for build-only operations.
-    
-    Batch Build Mode:
-    Use --batch-manifest to specify a batch.json file containing a list of models.
-    For each model with build_new=true, the image will be built. For all models
-    (regardless of build_new), entries will be created in the build_manifest.json.
-    
-    Example batch batch.json:
-    [
-        {
-            "model_name": "dummy",
-            "build_new": false,
-            "registry_image": "rocm/mad-private:ci-dummy_dummy.ubuntu.amd",
-            "registry": "dockerhub"
-        },
-        {
-            "model_name": "dummy2", 
-            "build_new": true,
-            "registry_image": "",
-            "registry": ""
-        }
-    ]
     """
     setup_logging(verbose)