OpenHands · juanmichelini · Feb 17, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,3 +37,11 @@ repos:
         pass_filenames: true
         always_run: false
         exclude: ^legacy/
+      - id: validate-git-reset
+        name: Validate git reset after clone/checkout
+        entry: uv
+        args: [run, python, benchmarks/scripts/validate_git_reset.py]
+        language: system
+        types_or: [python, shell]
+        pass_filenames: true
+        always_run: false
diff --git a/benchmarks/commit0/run_infer.py b/benchmarks/commit0/run_infer.py
@@ -245,6 +245,12 @@ def prepare_workspace(
             raise RuntimeError(f"Failed to clone repo: {res.stderr}")
         logger.info(f"Cloned repository: {instance.data['repo']}")
 
+        # Reset to ensure clean state at HEAD
+        reset_cmd = f"cd /workspace/{workspace_dir_name} && git reset --hard HEAD"
+        res = workspace.execute_command(reset_cmd, timeout=60)
+        if res.exit_code != 0:
+            raise RuntimeError(f"Failed to reset repo: {res.stderr}")
+
         # Create new branch
         branch_cmd = f"cd /workspace/{workspace_dir_name} && git checkout -b openhands"
         res = workspace.execute_command(branch_cmd, timeout=600)

diff --git a/benchmarks/scripts/validate_git_reset.py b/benchmarks/scripts/validate_git_reset.py
@@ -0,0 +1,178 @@
+"""Validation script to ensure git reset follows git clone/checkout operations.
+
+During benchmark evaluation, we want to test a git repository at a specific commit.
+To prevent the agent from looking at commits that are not part of the benchmark,
+this script validates that every `git clone` or `git checkout` is followed by a
+`git reset` command (or has a comment indicating git reset is not needed).
+
+Usage:
+    validate-git-reset [path...]
+
+If no paths are provided, defaults to scanning the repository root.
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+
+# Patterns for finding git clone/checkout commands
+GIT_CLONE_PATTERN = re.compile(r"git\s+clone\b")
+GIT_CHECKOUT_PATTERN = re.compile(r"git\s+checkout\b")
+
+# Pattern for git reset (can be in code or in a comment)
+GIT_RESET_PATTERN = re.compile(r"git\s+reset\b")
+
+# File extensions to check
+CHECK_EXTENSIONS = {".py", ".sh"}
+
+# Files to skip (relative to repository root)
+SKIP_PATTERNS = [
+    "validate_git_reset.py",  # This script itself
+    "test_validate_git_reset.py",  # Test file for this script
+]
+
+
+def should_skip_file(file_path: Path) -> bool:
+    """Check if a file should be skipped from validation."""
+    for pattern in SKIP_PATTERNS:
+        if file_path.name == pattern or pattern in str(file_path):
+            return True
+    return False
+
+
+def find_git_operations(
+    content: str,
+) -> list[tuple[int, str, str]]:
+    """Find all git clone and git checkout operations in the content.
+
+    Returns a list of tuples: (line_number, line_content, operation_type)
+    """
+    operations = []
+    lines = content.split("\n")
+
+    for i, line in enumerate(lines):
+        line_num = i + 1  # 1-indexed line numbers
+        if GIT_CLONE_PATTERN.search(line):
+            operations.append((line_num, line, "git clone"))
+        # Only flag git checkout if it's not "git checkout -b" (creating a branch)
+        # which doesn't need a reset since it's creating a new branch
+        elif GIT_CHECKOUT_PATTERN.search(line):
+            # Skip "git checkout -b" (create branch) as it doesn't need reset
+            if not re.search(r"git\s+checkout\s+-b\b", line):
+                operations.append((line_num, line, "git checkout"))
+
+    return operations
+
+
+def has_git_reset_nearby(
+    content: str,
+    operation_line: int,
+    context_lines_after: int = 20,
+    context_lines_before: int = 5,
+) -> bool:
+    """Check if there's a git reset within context lines around the operation.
+
+    Also accepts git reset in comments as valid (to allow explicit documentation
+    that reset is intentionally skipped).
+    """
+    lines = content.split("\n")
+    start_line = max(0, operation_line - 1 - context_lines_before)
+    end_line = min(operation_line + context_lines_after, len(lines))
+
+    # Check lines before and after the operation
+    for i in range(start_line, end_line):
+        if GIT_RESET_PATTERN.search(lines[i]):
+            return True
+
+    return False
+
+
+def validate_file(file_path: Path) -> list[tuple[int, str, str]]:
+    """Validate a single file for git reset after clone/checkout.
+
+    Returns a list of violations: (line_number, line_content, operation_type)
+    """
+    violations = []
+
+    try:
+        content = file_path.read_text()
+    except (OSError, UnicodeDecodeError):
+        return violations
+
+    operations = find_git_operations(content)
+
+    for line_num, line_content, op_type in operations:
+        if not has_git_reset_nearby(content, line_num):
+            violations.append((line_num, line_content.strip(), op_type))
+
+    return violations
+
+
+def find_files_to_check(paths: list[Path]) -> list[Path]:
+    """Find all files that should be checked for git operations."""
+    files = []
+
+    for path in paths:
+        if path.is_file():
+            if path.suffix in CHECK_EXTENSIONS and not should_skip_file(path):
+                files.append(path)
+        elif path.is_dir():
+            for ext in CHECK_EXTENSIONS:
+                for file in path.rglob(f"*{ext}"):
+                    if not should_skip_file(file):
+                        files.append(file)
+
+    return files
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Validate that git reset follows git clone/checkout operations"
+    )
+    parser.add_argument(
+        "paths",
+        nargs="*",
+        type=Path,
+        default=[Path(".")],
+        help="Paths to check (files or directories). Defaults to current directory.",
+    )
+    args = parser.parse_args()
+
+    files = find_files_to_check(args.paths)
+
+    all_violations: list[tuple[Path, int, str, str]] = []
+
+    for file in files:
+        violations = validate_file(file)
+        for line_num, line_content, op_type in violations:
+            all_violations.append((file, line_num, line_content, op_type))
+
+    if all_violations:
+        print("ERROR: Found git clone/checkout without git reset:", file=sys.stderr)
+        print(file=sys.stderr)
+        for file, line_num, line_content, op_type in all_violations:
+            print(f"  {file}:{line_num}: {op_type}", file=sys.stderr)
+            print(f"    {line_content}", file=sys.stderr)
+            print(file=sys.stderr)
+        print(
+            "To fix: Add 'git reset --hard <commit>' after the git operation,",
+            file=sys.stderr,
+        )
+        print(
+            "or add a comment containing 'git reset' to indicate it's intentional.",
+            file=sys.stderr,
+        )
+        print(
+            "Example: # git reset is not needed here because...",
+            file=sys.stderr,
+        )
+        return 1
+
+    print(f"OK: Checked {len(files)} files, no violations found.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/legacy/lca_ci_build_repair/eval_infer.py b/legacy/lca_ci_build_repair/eval_infer.py
@@ -105,6 +105,11 @@ def run_eval(
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command='git reset --hard HEAD')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
     script_dir = os.path.dirname(
         os.path.abspath(__file__)
     )  # Get the absolute path of the script

diff --git a/legacy/lca_ci_build_repair/run_infer.py b/legacy/lca_ci_build_repair/run_infer.py
@@ -132,6 +132,11 @@ def initialize_runtime(
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command='git reset --hard HEAD')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
     script_dir = os.path.dirname(
         os.path.abspath(__file__)
     )  # Get the absolute path of the script

diff --git a/legacy/ml_bench/run_infer.py b/legacy/ml_bench/run_infer.py
@@ -122,6 +122,11 @@ def initialize_runtime(
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command=f'cd /workspace/{repo_name} && git reset --hard HEAD')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
     action = CmdRunAction(command=f'chmod -R 777 /workspace/{repo_name}')
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)

diff --git a/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh b/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh
@@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ===="
 OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
 OH_SWE_BENCH_REPO_BRANCH="eval"
 git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
+cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD
 
 # 2. Prepare DATA
 echo "==== Prepare SWE-bench data ===="

diff --git a/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh b/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh
@@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ===="
 OH_SWE_BENCH_REPO_PATH="https://github.com/OpenHands/SWE-bench.git"
 OH_SWE_BENCH_REPO_BRANCH="eval"
 git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
+cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD
 
 # 2. Prepare DATA
 echo "==== Prepare SWE-bench data ===="

diff --git a/legacy/utils/version_control.sh b/legacy/utils/version_control.sh
@@ -17,12 +17,15 @@ checkout_eval_branch() {
     current_branch=$(git rev-parse --abbrev-ref HEAD)
     echo "Current version is: $current_branch"
     echo "Check out OpenHands to version: $COMMIT_HASH"
+    # git reset is not needed here - this checkout targets a specific commit hash
+    # and is used for version control within the development environment
     if ! git checkout $COMMIT_HASH; then
         echo "Failed to check out to $COMMIT_HASH"
         exit 1
     fi
 
     echo "Revert changes in evaluation folder"
+    # git reset is not needed - restoring evaluation folder from current branch
     git checkout $current_branch -- evaluation
 
     # Trap the EXIT signal to checkout original branch
@@ -36,6 +39,7 @@ checkout_original_branch() {
         return 0
     fi
     echo "Checkout back to original branch $current_branch"
+    # git reset is not needed - restoring to original branch after evaluation
     git checkout $current_branch
 }
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
 
 [project.scripts]
 validate-cfg = "benchmarks.scripts.validate_cfg:main"
+validate-git-reset = "benchmarks.scripts.validate_git_reset:main"
 swebench-infer = "benchmarks.swebench.run_infer:main"
 swtbench-infer = "benchmarks.swtbench.run_infer:main"
 swebench-eval = "benchmarks.swebench.eval_infer:main"