From 51900dc071be4d6835f31ffc75eebf4b9efd7b45 Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Mon, 14 Apr 2025 01:36:28 -0700 Subject: [PATCH 1/4] Log code in wandb Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/utils/logger.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo_reinforcer/utils/logger.py b/nemo_reinforcer/utils/logger.py index bc0157d564..7bf7b8aaaa 100644 --- a/nemo_reinforcer/utils/logger.py +++ b/nemo_reinforcer/utils/logger.py @@ -125,6 +125,7 @@ class WandbLogger(LoggerInterface): def __init__(self, cfg: WandbConfig, log_dir: Optional[str] = None): self.run = wandb.init(**cfg, dir=log_dir) + self.run.log_code("./nemo_reinforcer") print( f"Initialized WandbLogger for project {cfg.get('project')}, run {cfg.get('name')} at {log_dir}" ) From 19fb18686934df398b936f78c0488719952ed95b Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Thu, 17 Apr 2025 12:10:05 -0700 Subject: [PATCH 2/4] Only log files tracked by git Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/utils/logger.py | 39 ++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/utils/logger.py b/nemo_reinforcer/utils/logger.py index 7bf7b8aaaa..95581ef235 100644 --- a/nemo_reinforcer/utils/logger.py +++ b/nemo_reinforcer/utils/logger.py @@ -19,6 +19,7 @@ import time import threading import requests +import subprocess from abc import ABC, abstractmethod import logging from typing import List, Any, Dict, Optional, TypedDict, Union @@ -125,11 +126,47 @@ class WandbLogger(LoggerInterface): def __init__(self, cfg: WandbConfig, log_dir: Optional[str] = None): self.run = wandb.init(**cfg, dir=log_dir) - self.run.log_code("./nemo_reinforcer") + self._log_code() print( f"Initialized WandbLogger for project {cfg.get('project')}, run {cfg.get('name')} at {log_dir}" ) + def _log_code(self): + """Log code that is tracked by git to wandb. + + This function gets a list of all files tracked by git in the project root + and manually uploads them to the current wandb run as an artifact. + """ + try: + result = subprocess.run( + ["git", "ls-files"], capture_output=True, text=True, check=True + ) + + tracked_files = result.stdout.strip().split("\n") + + if not tracked_files: + print("No git-tracked files found") + return + + code_artifact = wandb.Artifact( + name=f"source-code-{self.run.project}", type="code" + ) + + for file_path in tracked_files: + if os.path.isfile(file_path): + try: + code_artifact.add_file(file_path, name=file_path) + except Exception as e: + print(f"Error adding file {file_path}: {e}") + + self.run.log_artifact(code_artifact) + print(f"Logged {len(tracked_files)} git-tracked files to wandb") + + except subprocess.CalledProcessError as e: + print(f"Error getting git-tracked files: {e}") + except Exception as e: + print(f"Unexpected error during git code logging: {e}") + def define_metric( self, name: str, From 3b4ba65946b165c57f89552c7ba5ad2542be660e Mon Sep 17 00:00:00 2001 From: Yi-Fu Wu Date: Fri, 18 Apr 2025 17:18:09 -0700 Subject: [PATCH 3/4] Save diffs too Signed-off-by: Yi-Fu Wu --- nemo_reinforcer/utils/logger.py | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/nemo_reinforcer/utils/logger.py b/nemo_reinforcer/utils/logger.py index 2d802ef948..a5ce0ae7f2 100644 --- a/nemo_reinforcer/utils/logger.py +++ b/nemo_reinforcer/utils/logger.py @@ -127,10 +127,81 @@ class WandbLogger(LoggerInterface): def __init__(self, cfg: WandbConfig, log_dir: Optional[str] = None): self.run = wandb.init(**cfg, dir=log_dir) self._log_code() + self._log_diffs() print( f"Initialized WandbLogger for project {cfg.get('project')}, run {cfg.get('name')} at {log_dir}" ) + def _log_diffs(self): + """Log git diffs to wandb. + + This function captures and logs two types of diffs: + 1. Uncommitted changes (working tree diff against HEAD) + 2. All changes (including uncommitted) against the main branch + + Each diff is saved as a text file in a wandb artifact. + """ + try: + branch_result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + current_branch = branch_result.stdout.strip() + + diff_artifact = wandb.Artifact( + name=f"git-diffs-{self.run.project}-{self.run.id}", type="git-diffs" + ) + + # 1. Log uncommitted changes (working tree diff) + uncommitted_result = subprocess.run( + ["git", "diff", "HEAD"], capture_output=True, text=True, check=True + ) + uncommitted_diff = uncommitted_result.stdout + + if uncommitted_diff: + diff_path = os.path.join( + wandb.run.dir if wandb.run else ".", "uncommitted_changes_diff.txt" + ) + with open(diff_path, "w") as f: + f.write(uncommitted_diff) + + # Add file to artifact + diff_artifact.add_file(diff_path, name="uncommitted_changes_diff.txt") + print("Logged uncommitted changes diff to wandb") + else: + print("No uncommitted changes found") + + # 2. Log diff against main branch (if current branch is not main) + if current_branch != "main": + # Log diff between main and working tree (includes uncommitted changes) + working_diff_result = subprocess.run( + ["git", "diff", "main"], capture_output=True, text=True, check=True + ) + working_diff = working_diff_result.stdout + + if working_diff: + # Save diff to a temporary file + diff_path = os.path.join( + wandb.run.dir if wandb.run else ".", "main_diff.txt" + ) + with open(diff_path, "w") as f: + f.write(working_diff) + + # Add file to artifact + diff_artifact.add_file(diff_path, name="main_diff.txt") + print("Logged diff against main branch") + else: + print("No differences found between main and working tree") + + self.run.log_artifact(diff_artifact) + + except subprocess.CalledProcessError as e: + print(f"Error during git operations: {e}") + except Exception as e: + print(f"Unexpected error during git diff logging: {e}") + def _log_code(self): """Log code that is tracked by git to wandb. From f0d481c5f184dd63e69dd16d487da6d6215484aa Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Thu, 26 Jun 2025 21:43:33 +0000 Subject: [PATCH 4/4] Add better print message when git repo is missing Signed-off-by: Parth Chadha --- nemo_rl/utils/logger.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py index 26247251ce..b99ebcc858 100644 --- a/nemo_rl/utils/logger.py +++ b/nemo_rl/utils/logger.py @@ -229,7 +229,9 @@ def _log_code(self): tracked_files = result.stdout.strip().split("\n") if not tracked_files: - print("No git-tracked files found") + print( + "Warning: No git repository found. Wandb logs will not track code changes for reproducibility." + ) return code_artifact = wandb.Artifact(