diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c130cfd2..c5c2c4c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,3 +37,11 @@ repos: pass_filenames: true always_run: false exclude: ^legacy/ + - id: validate-git-reset + name: Validate git reset after clone/checkout + entry: uv + args: [run, python, benchmarks/scripts/validate_git_reset.py] + language: system + types_or: [python, shell] + pass_filenames: true + always_run: false diff --git a/benchmarks/commit0/run_infer.py b/benchmarks/commit0/run_infer.py index df10feb2..777d87ab 100644 --- a/benchmarks/commit0/run_infer.py +++ b/benchmarks/commit0/run_infer.py @@ -245,6 +245,12 @@ def prepare_workspace( raise RuntimeError(f"Failed to clone repo: {res.stderr}") logger.info(f"Cloned repository: {instance.data['repo']}") + # Reset to ensure clean state at HEAD + reset_cmd = f"cd /workspace/{workspace_dir_name} && git reset --hard HEAD" + res = workspace.execute_command(reset_cmd, timeout=60) + if res.exit_code != 0: + raise RuntimeError(f"Failed to reset repo: {res.stderr}") + # Create new branch branch_cmd = f"cd /workspace/{workspace_dir_name} && git checkout -b openhands" res = workspace.execute_command(branch_cmd, timeout=600) diff --git a/benchmarks/scripts/validate_git_reset.py b/benchmarks/scripts/validate_git_reset.py new file mode 100644 index 00000000..9bf1855d --- /dev/null +++ b/benchmarks/scripts/validate_git_reset.py @@ -0,0 +1,178 @@ +"""Validation script to ensure git reset follows git clone/checkout operations. + +During benchmark evaluation, we want to test a git repository at a specific commit. +To prevent the agent from looking at commits that are not part of the benchmark, +this script validates that every `git clone` or `git checkout` is followed by a +`git reset` command (or has a comment indicating git reset is not needed). + +Usage: + validate-git-reset [path...] + +If no paths are provided, defaults to scanning the repository root. +""" + +import argparse +import re +import sys +from pathlib import Path + + +# Patterns for finding git clone/checkout commands +GIT_CLONE_PATTERN = re.compile(r"git\s+clone\b") +GIT_CHECKOUT_PATTERN = re.compile(r"git\s+checkout\b") + +# Pattern for git reset (can be in code or in a comment) +GIT_RESET_PATTERN = re.compile(r"git\s+reset\b") + +# File extensions to check +CHECK_EXTENSIONS = {".py", ".sh"} + +# Files to skip (relative to repository root) +SKIP_PATTERNS = [ + "validate_git_reset.py", # This script itself + "test_validate_git_reset.py", # Test file for this script +] + + +def should_skip_file(file_path: Path) -> bool: + """Check if a file should be skipped from validation.""" + for pattern in SKIP_PATTERNS: + if file_path.name == pattern or pattern in str(file_path): + return True + return False + + +def find_git_operations( + content: str, +) -> list[tuple[int, str, str]]: + """Find all git clone and git checkout operations in the content. + + Returns a list of tuples: (line_number, line_content, operation_type) + """ + operations = [] + lines = content.split("\n") + + for i, line in enumerate(lines): + line_num = i + 1 # 1-indexed line numbers + if GIT_CLONE_PATTERN.search(line): + operations.append((line_num, line, "git clone")) + # Only flag git checkout if it's not "git checkout -b" (creating a branch) + # which doesn't need a reset since it's creating a new branch + elif GIT_CHECKOUT_PATTERN.search(line): + # Skip "git checkout -b" (create branch) as it doesn't need reset + if not re.search(r"git\s+checkout\s+-b\b", line): + operations.append((line_num, line, "git checkout")) + + return operations + + +def has_git_reset_nearby( + content: str, + operation_line: int, + context_lines_after: int = 20, + context_lines_before: int = 5, +) -> bool: + """Check if there's a git reset within context lines around the operation. + + Also accepts git reset in comments as valid (to allow explicit documentation + that reset is intentionally skipped). + """ + lines = content.split("\n") + start_line = max(0, operation_line - 1 - context_lines_before) + end_line = min(operation_line + context_lines_after, len(lines)) + + # Check lines before and after the operation + for i in range(start_line, end_line): + if GIT_RESET_PATTERN.search(lines[i]): + return True + + return False + + +def validate_file(file_path: Path) -> list[tuple[int, str, str]]: + """Validate a single file for git reset after clone/checkout. + + Returns a list of violations: (line_number, line_content, operation_type) + """ + violations = [] + + try: + content = file_path.read_text() + except (OSError, UnicodeDecodeError): + return violations + + operations = find_git_operations(content) + + for line_num, line_content, op_type in operations: + if not has_git_reset_nearby(content, line_num): + violations.append((line_num, line_content.strip(), op_type)) + + return violations + + +def find_files_to_check(paths: list[Path]) -> list[Path]: + """Find all files that should be checked for git operations.""" + files = [] + + for path in paths: + if path.is_file(): + if path.suffix in CHECK_EXTENSIONS and not should_skip_file(path): + files.append(path) + elif path.is_dir(): + for ext in CHECK_EXTENSIONS: + for file in path.rglob(f"*{ext}"): + if not should_skip_file(file): + files.append(file) + + return files + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Validate that git reset follows git clone/checkout operations" + ) + parser.add_argument( + "paths", + nargs="*", + type=Path, + default=[Path(".")], + help="Paths to check (files or directories). Defaults to current directory.", + ) + args = parser.parse_args() + + files = find_files_to_check(args.paths) + + all_violations: list[tuple[Path, int, str, str]] = [] + + for file in files: + violations = validate_file(file) + for line_num, line_content, op_type in violations: + all_violations.append((file, line_num, line_content, op_type)) + + if all_violations: + print("ERROR: Found git clone/checkout without git reset:", file=sys.stderr) + print(file=sys.stderr) + for file, line_num, line_content, op_type in all_violations: + print(f" {file}:{line_num}: {op_type}", file=sys.stderr) + print(f" {line_content}", file=sys.stderr) + print(file=sys.stderr) + print( + "To fix: Add 'git reset --hard ' after the git operation,", + file=sys.stderr, + ) + print( + "or add a comment containing 'git reset' to indicate it's intentional.", + file=sys.stderr, + ) + print( + "Example: # git reset is not needed here because...", + file=sys.stderr, + ) + return 1 + + print(f"OK: Checked {len(files)} files, no violations found.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/legacy/lca_ci_build_repair/eval_infer.py b/legacy/lca_ci_build_repair/eval_infer.py index 93636515..07cd3958 100644 --- a/legacy/lca_ci_build_repair/eval_infer.py +++ b/legacy/lca_ci_build_repair/eval_infer.py @@ -105,6 +105,11 @@ def run_eval( obs = runtime.run_action(action) assert obs.exit_code == 0 + action = CmdRunAction(command='git reset --hard HEAD') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + assert obs.exit_code == 0 + script_dir = os.path.dirname( os.path.abspath(__file__) ) # Get the absolute path of the script diff --git a/legacy/lca_ci_build_repair/run_infer.py b/legacy/lca_ci_build_repair/run_infer.py index e6938301..d96f185f 100644 --- a/legacy/lca_ci_build_repair/run_infer.py +++ b/legacy/lca_ci_build_repair/run_infer.py @@ -132,6 +132,11 @@ def initialize_runtime( obs = runtime.run_action(action) assert obs.exit_code == 0 + action = CmdRunAction(command='git reset --hard HEAD') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + assert obs.exit_code == 0 + script_dir = os.path.dirname( os.path.abspath(__file__) ) # Get the absolute path of the script diff --git a/legacy/ml_bench/run_infer.py b/legacy/ml_bench/run_infer.py index 48e3d678..7c0b5f12 100644 --- a/legacy/ml_bench/run_infer.py +++ b/legacy/ml_bench/run_infer.py @@ -122,6 +122,11 @@ def initialize_runtime( obs = runtime.run_action(action) assert obs.exit_code == 0 + action = CmdRunAction(command=f'cd /workspace/{repo_name} && git reset --hard HEAD') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + assert obs.exit_code == 0 + action = CmdRunAction(command=f'chmod -R 777 /workspace/{repo_name}') logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) diff --git a/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh b/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh index c5726a40..f99bcb16 100755 --- a/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh +++ b/legacy/swefficiency/scripts/setup/prepare_swe_utils.sh @@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ====" OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git" OH_SWE_BENCH_REPO_BRANCH="eval" git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench +cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD # 2. Prepare DATA echo "==== Prepare SWE-bench data ====" diff --git a/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh b/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh index 528224fe..ed14f776 100755 --- a/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh +++ b/legacy/testgeneval/scripts/setup/prepare_swe_utils.sh @@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ====" OH_SWE_BENCH_REPO_PATH="https://github.com/OpenHands/SWE-bench.git" OH_SWE_BENCH_REPO_BRANCH="eval" git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench +cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD # 2. Prepare DATA echo "==== Prepare SWE-bench data ====" diff --git a/legacy/utils/version_control.sh b/legacy/utils/version_control.sh index 5fc58e43..83f9e64c 100644 --- a/legacy/utils/version_control.sh +++ b/legacy/utils/version_control.sh @@ -17,12 +17,15 @@ checkout_eval_branch() { current_branch=$(git rev-parse --abbrev-ref HEAD) echo "Current version is: $current_branch" echo "Check out OpenHands to version: $COMMIT_HASH" + # git reset is not needed here - this checkout targets a specific commit hash + # and is used for version control within the development environment if ! git checkout $COMMIT_HASH; then echo "Failed to check out to $COMMIT_HASH" exit 1 fi echo "Revert changes in evaluation folder" + # git reset is not needed - restoring evaluation folder from current branch git checkout $current_branch -- evaluation # Trap the EXIT signal to checkout original branch @@ -36,6 +39,7 @@ checkout_original_branch() { return 0 fi echo "Checkout back to original branch $current_branch" + # git reset is not needed - restoring to original branch after evaluation git checkout $current_branch } diff --git a/pyproject.toml b/pyproject.toml index fc5ed7e0..777ddc27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ [project.scripts] validate-cfg = "benchmarks.scripts.validate_cfg:main" +validate-git-reset = "benchmarks.scripts.validate_git_reset:main" swebench-infer = "benchmarks.swebench.run_infer:main" swtbench-infer = "benchmarks.swtbench.run_infer:main" swebench-eval = "benchmarks.swebench.eval_infer:main" diff --git a/tests/test_validate_git_reset.py b/tests/test_validate_git_reset.py new file mode 100644 index 00000000..6dfc8fee --- /dev/null +++ b/tests/test_validate_git_reset.py @@ -0,0 +1,150 @@ +"""Tests for the validate_git_reset script.""" + +import tempfile +from pathlib import Path + +from benchmarks.scripts.validate_git_reset import ( + find_git_operations, + has_git_reset_nearby, + validate_file, +) + + +class TestFindGitOperations: + def test_finds_git_clone(self): + content = """ +some code +git clone https://example.com/repo.git +more code +""" + operations = find_git_operations(content) + assert len(operations) == 1 + assert operations[0][0] == 3 # line number + assert "git clone" in operations[0][1] # line content + assert operations[0][2] == "git clone" # operation type + + def test_finds_git_checkout(self): + content = """ +some code +git checkout main +more code +""" + operations = find_git_operations(content) + assert len(operations) == 1 + assert operations[0][0] == 3 + assert "git checkout" in operations[0][1] + assert operations[0][2] == "git checkout" + + def test_ignores_git_checkout_b(self): + """git checkout -b creates a new branch, so doesn't need reset.""" + content = """ +some code +git checkout -b new-branch +more code +""" + operations = find_git_operations(content) + assert len(operations) == 0 + + def test_finds_multiple_operations(self): + content = """ +git clone https://repo1.git +some code +git clone https://repo2.git +git checkout feature +""" + operations = find_git_operations(content) + assert len(operations) == 3 + + def test_finds_operations_in_strings(self): + content = """ +action = CmdRunAction(command=f'git clone {repo_url}') +""" + operations = find_git_operations(content) + assert len(operations) == 1 + + +class TestHasGitResetNearby: + def test_finds_reset_after_operation(self): + content = """git clone https://example.com/repo.git +git reset --hard HEAD +""" + assert has_git_reset_nearby(content, 1) is True + + def test_finds_reset_in_comment_after(self): + content = """git clone https://example.com/repo.git +# git reset is not needed here +""" + assert has_git_reset_nearby(content, 1) is True + + def test_finds_reset_in_comment_before(self): + content = """# git reset is not needed here because this is a utility +git checkout main +""" + assert has_git_reset_nearby(content, 2) is True + + def test_no_reset_nearby(self): + content = """git clone https://example.com/repo.git +some other code +more code +""" + assert has_git_reset_nearby(content, 1, context_lines_after=2) is False + + def test_reset_beyond_context_window(self): + content = """git clone https://example.com/repo.git +line 1 +line 2 +line 3 +line 4 +line 5 +git reset --hard HEAD +""" + # Default context is 20 lines, so this should be found + assert has_git_reset_nearby(content, 1) is True + # With small context window, should not be found + assert has_git_reset_nearby(content, 1, context_lines_after=3) is False + + +class TestValidateFile: + def test_valid_file_with_reset(self): + with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f: + f.write(""" +clone_cmd = 'git clone https://example.com/repo.git' +os.system(clone_cmd) +os.system('git reset --hard HEAD') +""") + f.flush() + violations = validate_file(Path(f.name)) + assert len(violations) == 0 + + def test_valid_file_with_comment(self): + with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f: + f.write(""" +# git reset is not needed here - this is a dev environment utility +cmd = 'git checkout main' +os.system(cmd) +""") + f.flush() + violations = validate_file(Path(f.name)) + assert len(violations) == 0 + + def test_invalid_file_missing_reset(self): + with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f: + f.write(""" +clone_cmd = 'git clone https://example.com/repo.git' +os.system(clone_cmd) +# no reset here +""") + f.flush() + violations = validate_file(Path(f.name)) + assert len(violations) == 1 + assert violations[0][2] == "git clone" + + def test_shell_script_with_reset(self): + with tempfile.NamedTemporaryFile(suffix=".sh", mode="w", delete=False) as f: + f.write("""#!/bin/bash +git clone https://example.com/repo.git /workspace/repo +cd /workspace/repo && git reset --hard HEAD +""") + f.flush() + violations = validate_file(Path(f.name)) + assert len(violations) == 0