diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c130cfd2..c5c2c4c5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,3 +37,11 @@ repos:
         pass_filenames: true
         always_run: false
         exclude: ^legacy/
+      - id: validate-git-reset
+        name: Validate git reset after clone/checkout
+        entry: uv
+        args: [run, python, benchmarks/scripts/validate_git_reset.py]
+        language: system
+        types_or: [python, shell]
+        pass_filenames: true
+        always_run: false
diff --git a/benchmarks/commit0/run_infer.py b/benchmarks/commit0/run_infer.py
index df10feb2..777d87ab 100644
--- a/benchmarks/commit0/run_infer.py
+++ b/benchmarks/commit0/run_infer.py
@@ -245,6 +245,12 @@ def prepare_workspace(
             raise RuntimeError(f"Failed to clone repo: {res.stderr}")
         logger.info(f"Cloned repository: {instance.data['repo']}")
 
+        # Reset to ensure clean state at HEAD
+        reset_cmd = f"cd /workspace/{workspace_dir_name} && git reset --hard HEAD"
+        res = workspace.execute_command(reset_cmd, timeout=60)
+        if res.exit_code != 0:
+            raise RuntimeError(f"Failed to reset repo: {res.stderr}")
+
         # Create new branch
         branch_cmd = f"cd /workspace/{workspace_dir_name} && git checkout -b openhands"
         res = workspace.execute_command(branch_cmd, timeout=600)
diff --git a/benchmarks/scripts/validate_git_reset.py b/benchmarks/scripts/validate_git_reset.py
new file mode 100644
index 00000000..9bf1855d
--- /dev/null
+++ b/benchmarks/scripts/validate_git_reset.py
@@ -0,0 +1,178 @@
+"""Validation script to ensure git reset follows git clone/checkout operations.
+
+During benchmark evaluation, we want to test a git repository at a specific commit.
+To prevent the agent from looking at commits that are not part of the benchmark,
+this script validates that every `git clone` or `git checkout` is followed by a
+`git reset` command (or has a comment indicating git reset is not needed).
+
+Usage:
+    validate-git-reset [path...]
+
+If no paths are provided, defaults to scanning the repository root.
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+
+# Patterns for finding git clone/checkout commands
+GIT_CLONE_PATTERN = re.compile(r"git\s+clone\b")
+GIT_CHECKOUT_PATTERN = re.compile(r"git\s+checkout\b")
+
+# Pattern for git reset (can be in code or in a comment)
+GIT_RESET_PATTERN = re.compile(r"git\s+reset\b")
+
+# File extensions to check
+CHECK_EXTENSIONS = {".py", ".sh"}
+
+# Files to skip (relative to repository root)
+SKIP_PATTERNS = [
+    "validate_git_reset.py",  # This script itself
+    "test_validate_git_reset.py",  # Test file for this script
+]
+
+
+def should_skip_file(file_path: Path) -> bool:
+    """Check if a file should be skipped from validation."""
+    for pattern in SKIP_PATTERNS:
+        if file_path.name == pattern or pattern in str(file_path):
+            return True
+    return False
+
+
+def find_git_operations(
+    content: str,
+) -> list[tuple[int, str, str]]:
+    """Find all git clone and git checkout operations in the content.
+
+    Returns a list of tuples: (line_number, line_content, operation_type)
+    """
+    operations = []
+    lines = content.split("\n")
+
+    for i, line in enumerate(lines):
+        line_num = i + 1  # 1-indexed line numbers
+        if GIT_CLONE_PATTERN.search(line):
+            operations.append((line_num, line, "git clone"))
+        # Only flag git checkout if it's not "git checkout -b" (creating a branch)
+        # which doesn't need a reset since it's creating a new branch
+        elif GIT_CHECKOUT_PATTERN.search(line):
+            # Skip "git checkout -b" (create branch) as it doesn't need reset
+            if not re.search(r"git\s+checkout\s+-b\b", line):
+                operations.append((line_num, line, "git checkout"))
+
+    return operations
+
+
+def has_git_reset_nearby(
+    content: str,
+    operation_line: int,
+    context_lines_after: int = 20,
+    context_lines_before: int = 5,
+) -> bool:
+    """Check if there's a git reset within context lines around the operation.
+
+    Also accepts git reset in comments as valid (to allow explicit documentation
+    that reset is intentionally skipped).
+    """
+    lines = content.split("\n")
+    start_line = max(0, operation_line - 1 - context_lines_before)
+    end_line = min(operation_line + context_lines_after, len(lines))
+
+    # Check lines before and after the operation
+    for i in range(start_line, end_line):
+        if GIT_RESET_PATTERN.search(lines[i]):
+            return True
+
+    return False
+
+
+def validate_file(file_path: Path) -> list[tuple[int, str, str]]:
+    """Validate a single file for git reset after clone/checkout.
+
+    Returns a list of violations: (line_number, line_content, operation_type)
+    """
+    violations = []
+
+    try:
+        content = file_path.read_text()
+    except (OSError, UnicodeDecodeError):
+        return violations
+
+    operations = find_git_operations(content)
+
+    for line_num, line_content, op_type in operations:
+        if not has_git_reset_nearby(content, line_num):
+            violations.append((line_num, line_content.strip(), op_type))
+
+    return violations
+
+
+def find_files_to_check(paths: list[Path]) -> list[Path]:
+    """Find all files that should be checked for git operations."""
+    files = []
+
+    for path in paths:
+        if path.is_file():
+            if path.suffix in CHECK_EXTENSIONS and not should_skip_file(path):
+                files.append(path)
+        elif path.is_dir():
+            for ext in CHECK_EXTENSIONS:
+                for file in path.rglob(f"*{ext}"):
+                    if not should_skip_file(file):
+                        files.append(file)
+
+    return files
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Validate that git reset follows git clone/checkout operations"
+    )
+    parser.add_argument(
+        "paths",
+        nargs="*",
+        type=Path,
+        default=[Path(".")],
+        help="Paths to check (files or directories). Defaults to current directory.",
+    )
+    args = parser.parse_args()
+
+    files = find_files_to_check(args.paths)
+
+    all_violations: list[tuple[Path, int, str, str]] = []
+
+    for file in files:
+        violations = validate_file(file)
+        for line_num, line_content, op_type in violations:
+            all_violations.append((file, line_num, line_content, op_type))
+
+    if all_violations:
+        print("ERROR: Found git clone/checkout without git reset:", file=sys.stderr)
+        print(file=sys.stderr)
+        for file, line_num, line_content, op_type in all_violations:
+            print(f"  {file}:{line_num}: {op_type}", file=sys.stderr)
+            print(f"    {line_content}", file=sys.stderr)
+            print(file=sys.stderr)
+        print(
+            "To fix: Add 'git reset --hard <commit>' after the git operation,",
+            file=sys.stderr,
+        )
+        print(
+            "or add a comment containing 'git reset' to indicate it's intentional.",
+            file=sys.stderr,
+        )
+        print(
+            "Example: # git reset is not needed here because...",
+            file=sys.stderr,
+        )
+        return 1
+
+    print(f"OK: Checked {len(files)} files, no violations found.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/legacy/lca_ci_build_repair/eval_infer.py b/legacy/lca_ci_build_repair/eval_infer.py
index 93636515..07cd3958 100644
--- a/legacy/lca_ci_build_repair/eval_infer.py
+++ b/legacy/lca_ci_build_repair/eval_infer.py
@@ -105,6 +105,11 @@ def run_eval(
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command='git reset --hard HEAD')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
     script_dir = os.path.dirname(
         os.path.abspath(__file__)
     )  # Get the absolute path of the script
diff --git a/legacy/lca_ci_build_repair/run_infer.py b/legacy/lca_ci_build_repair/run_infer.py
index e6938301..d96f185f 100644
--- a/legacy/lca_ci_build_repair/run_infer.py
+++ b/legacy/lca_ci_build_repair/run_infer.py
@@ -132,6 +132,11 @@ def initialize_runtime(
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command='git reset --hard HEAD')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
     script_dir = os.path.dirname(
         os.path.abspath(__file__)
     )  # Get the absolute path of the script
diff --git a/legacy/ml_bench/run_infer.py b/legacy/ml_bench/run_infer.py
index 48e3d678..7c0b5f12 100644
--- a/legacy/ml_bench/run_infer.py
+++ b/legacy/ml_bench/run_infer.py
@@ -122,6 +122,11 @@ def initialize_runtime(
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command=f'cd /workspace/{repo_name} && git reset --hard HEAD')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
     action = CmdRunAction(command=f'chmod -R 777 /workspace/{repo_name}')
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
diff --git a/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh b/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh
index c5726a40..f99bcb16 100755
--- a/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh
+++ b/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh
@@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ===="
 OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
 OH_SWE_BENCH_REPO_BRANCH="eval"
 git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
+cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD
 
 # 2. Prepare DATA
 echo "==== Prepare SWE-bench data ===="
diff --git a/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh b/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh
index 528224fe..ed14f776 100755
--- a/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh
+++ b/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh
@@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ===="
 OH_SWE_BENCH_REPO_PATH="https://github.com/OpenHands/SWE-bench.git"
 OH_SWE_BENCH_REPO_BRANCH="eval"
 git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
+cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD
 
 # 2. Prepare DATA
 echo "==== Prepare SWE-bench data ===="
diff --git a/legacy/utils/version_control.sh b/legacy/utils/version_control.sh
index 5fc58e43..83f9e64c 100644
--- a/legacy/utils/version_control.sh
+++ b/legacy/utils/version_control.sh
@@ -17,12 +17,15 @@ checkout_eval_branch() {
     current_branch=$(git rev-parse --abbrev-ref HEAD)
     echo "Current version is: $current_branch"
     echo "Check out OpenHands to version: $COMMIT_HASH"
+    # git reset is not needed here - this checkout targets a specific commit hash
+    # and is used for version control within the development environment
     if ! git checkout $COMMIT_HASH; then
         echo "Failed to check out to $COMMIT_HASH"
         exit 1
     fi
 
     echo "Revert changes in evaluation folder"
+    # git reset is not needed - restoring evaluation folder from current branch
     git checkout $current_branch -- evaluation
 
     # Trap the EXIT signal to checkout original branch
@@ -36,6 +39,7 @@ checkout_original_branch() {
         return 0
     fi
     echo "Checkout back to original branch $current_branch"
+    # git reset is not needed - restoring to original branch after evaluation
     git checkout $current_branch
 }
 
diff --git a/pyproject.toml b/pyproject.toml
index fc5ed7e0..777ddc27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
 
 [project.scripts]
 validate-cfg = "benchmarks.scripts.validate_cfg:main"
+validate-git-reset = "benchmarks.scripts.validate_git_reset:main"
 swebench-infer = "benchmarks.swebench.run_infer:main"
 swtbench-infer = "benchmarks.swtbench.run_infer:main"
 swebench-eval = "benchmarks.swebench.eval_infer:main"
diff --git a/tests/test_validate_git_reset.py b/tests/test_validate_git_reset.py
new file mode 100644
index 00000000..6dfc8fee
--- /dev/null
+++ b/tests/test_validate_git_reset.py
@@ -0,0 +1,150 @@
+"""Tests for the validate_git_reset script."""
+
+import tempfile
+from pathlib import Path
+
+from benchmarks.scripts.validate_git_reset import (
+    find_git_operations,
+    has_git_reset_nearby,
+    validate_file,
+)
+
+
+class TestFindGitOperations:
+    def test_finds_git_clone(self):
+        content = """
+some code
+git clone https://example.com/repo.git
+more code
+"""
+        operations = find_git_operations(content)
+        assert len(operations) == 1
+        assert operations[0][0] == 3  # line number
+        assert "git clone" in operations[0][1]  # line content
+        assert operations[0][2] == "git clone"  # operation type
+
+    def test_finds_git_checkout(self):
+        content = """
+some code
+git checkout main
+more code
+"""
+        operations = find_git_operations(content)
+        assert len(operations) == 1
+        assert operations[0][0] == 3
+        assert "git checkout" in operations[0][1]
+        assert operations[0][2] == "git checkout"
+
+    def test_ignores_git_checkout_b(self):
+        """git checkout -b creates a new branch, so doesn't need reset."""
+        content = """
+some code
+git checkout -b new-branch
+more code
+"""
+        operations = find_git_operations(content)
+        assert len(operations) == 0
+
+    def test_finds_multiple_operations(self):
+        content = """
+git clone https://repo1.git
+some code
+git clone https://repo2.git
+git checkout feature
+"""
+        operations = find_git_operations(content)
+        assert len(operations) == 3
+
+    def test_finds_operations_in_strings(self):
+        content = """
+action = CmdRunAction(command=f'git clone {repo_url}')
+"""
+        operations = find_git_operations(content)
+        assert len(operations) == 1
+
+
+class TestHasGitResetNearby:
+    def test_finds_reset_after_operation(self):
+        content = """git clone https://example.com/repo.git
+git reset --hard HEAD
+"""
+        assert has_git_reset_nearby(content, 1) is True
+
+    def test_finds_reset_in_comment_after(self):
+        content = """git clone https://example.com/repo.git
+# git reset is not needed here
+"""
+        assert has_git_reset_nearby(content, 1) is True
+
+    def test_finds_reset_in_comment_before(self):
+        content = """# git reset is not needed here because this is a utility
+git checkout main
+"""
+        assert has_git_reset_nearby(content, 2) is True
+
+    def test_no_reset_nearby(self):
+        content = """git clone https://example.com/repo.git
+some other code
+more code
+"""
+        assert has_git_reset_nearby(content, 1, context_lines_after=2) is False
+
+    def test_reset_beyond_context_window(self):
+        content = """git clone https://example.com/repo.git
+line 1
+line 2
+line 3
+line 4
+line 5
+git reset --hard HEAD
+"""
+        # Default context is 20 lines, so this should be found
+        assert has_git_reset_nearby(content, 1) is True
+        # With small context window, should not be found
+        assert has_git_reset_nearby(content, 1, context_lines_after=3) is False
+
+
+class TestValidateFile:
+    def test_valid_file_with_reset(self):
+        with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
+            f.write("""
+clone_cmd = 'git clone https://example.com/repo.git'
+os.system(clone_cmd)
+os.system('git reset --hard HEAD')
+""")
+            f.flush()
+            violations = validate_file(Path(f.name))
+            assert len(violations) == 0
+
+    def test_valid_file_with_comment(self):
+        with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
+            f.write("""
+# git reset is not needed here - this is a dev environment utility
+cmd = 'git checkout main'
+os.system(cmd)
+""")
+            f.flush()
+            violations = validate_file(Path(f.name))
+            assert len(violations) == 0
+
+    def test_invalid_file_missing_reset(self):
+        with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
+            f.write("""
+clone_cmd = 'git clone https://example.com/repo.git'
+os.system(clone_cmd)
+# no reset here
+""")
+            f.flush()
+            violations = validate_file(Path(f.name))
+            assert len(violations) == 1
+            assert violations[0][2] == "git clone"
+
+    def test_shell_script_with_reset(self):
+        with tempfile.NamedTemporaryFile(suffix=".sh", mode="w", delete=False) as f:
+            f.write("""#!/bin/bash
+git clone https://example.com/repo.git /workspace/repo
+cd /workspace/repo && git reset --hard HEAD
+""")
+            f.flush()
+            violations = validate_file(Path(f.name))
+            assert len(violations) == 0