Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,11 @@ repos:
pass_filenames: true
always_run: false
exclude: ^legacy/
- id: validate-git-reset
name: Validate git reset after clone/checkout
entry: uv
args: [run, python, benchmarks/scripts/validate_git_reset.py]
language: system
types_or: [python, shell]
pass_filenames: true
always_run: false
6 changes: 6 additions & 0 deletions benchmarks/commit0/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ def prepare_workspace(
raise RuntimeError(f"Failed to clone repo: {res.stderr}")
logger.info(f"Cloned repository: {instance.data['repo']}")

# Reset to ensure clean state at HEAD
reset_cmd = f"cd /workspace/{workspace_dir_name} && git reset --hard HEAD"
res = workspace.execute_command(reset_cmd, timeout=60)
if res.exit_code != 0:
raise RuntimeError(f"Failed to reset repo: {res.stderr}")

# Create new branch
branch_cmd = f"cd /workspace/{workspace_dir_name} && git checkout -b openhands"
res = workspace.execute_command(branch_cmd, timeout=600)
Expand Down
178 changes: 178 additions & 0 deletions benchmarks/scripts/validate_git_reset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""Validation script to ensure git reset follows git clone/checkout operations.

During benchmark evaluation, we want to test a git repository at a specific commit.
To prevent the agent from looking at commits that are not part of the benchmark,
this script validates that every `git clone` or `git checkout` is followed by a
`git reset` command (or has a comment indicating git reset is not needed).

Usage:
validate-git-reset [path...]

If no paths are provided, defaults to scanning the repository root.
"""

import argparse
import re
import sys
from pathlib import Path


# Patterns for finding git clone/checkout commands
GIT_CLONE_PATTERN = re.compile(r"git\s+clone\b")
GIT_CHECKOUT_PATTERN = re.compile(r"git\s+checkout\b")

# Pattern for git reset (can be in code or in a comment)
GIT_RESET_PATTERN = re.compile(r"git\s+reset\b")

# File extensions to check
CHECK_EXTENSIONS = {".py", ".sh"}

# Files to skip (relative to repository root)
SKIP_PATTERNS = [
"validate_git_reset.py", # This script itself
"test_validate_git_reset.py", # Test file for this script
]


def should_skip_file(file_path: Path) -> bool:
"""Check if a file should be skipped from validation."""
for pattern in SKIP_PATTERNS:
if file_path.name == pattern or pattern in str(file_path):
return True
return False


def find_git_operations(
content: str,
) -> list[tuple[int, str, str]]:
"""Find all git clone and git checkout operations in the content.

Returns a list of tuples: (line_number, line_content, operation_type)
"""
operations = []
lines = content.split("\n")

for i, line in enumerate(lines):
line_num = i + 1 # 1-indexed line numbers
if GIT_CLONE_PATTERN.search(line):
operations.append((line_num, line, "git clone"))
# Only flag git checkout if it's not "git checkout -b" (creating a branch)
# which doesn't need a reset since it's creating a new branch
elif GIT_CHECKOUT_PATTERN.search(line):
# Skip "git checkout -b" (create branch) as it doesn't need reset
if not re.search(r"git\s+checkout\s+-b\b", line):
operations.append((line_num, line, "git checkout"))

return operations


def has_git_reset_nearby(
content: str,
operation_line: int,
context_lines_after: int = 20,
context_lines_before: int = 5,
) -> bool:
"""Check if there's a git reset within context lines around the operation.

Also accepts git reset in comments as valid (to allow explicit documentation
that reset is intentionally skipped).
"""
lines = content.split("\n")
start_line = max(0, operation_line - 1 - context_lines_before)
end_line = min(operation_line + context_lines_after, len(lines))

# Check lines before and after the operation
for i in range(start_line, end_line):
if GIT_RESET_PATTERN.search(lines[i]):
return True

return False


def validate_file(file_path: Path) -> list[tuple[int, str, str]]:
"""Validate a single file for git reset after clone/checkout.

Returns a list of violations: (line_number, line_content, operation_type)
"""
violations = []

try:
content = file_path.read_text()
except (OSError, UnicodeDecodeError):
return violations

operations = find_git_operations(content)

for line_num, line_content, op_type in operations:
if not has_git_reset_nearby(content, line_num):
violations.append((line_num, line_content.strip(), op_type))

return violations


def find_files_to_check(paths: list[Path]) -> list[Path]:
"""Find all files that should be checked for git operations."""
files = []

for path in paths:
if path.is_file():
if path.suffix in CHECK_EXTENSIONS and not should_skip_file(path):
files.append(path)
elif path.is_dir():
for ext in CHECK_EXTENSIONS:
for file in path.rglob(f"*{ext}"):
if not should_skip_file(file):
files.append(file)

return files


def main() -> int:
parser = argparse.ArgumentParser(
description="Validate that git reset follows git clone/checkout operations"
)
parser.add_argument(
"paths",
nargs="*",
type=Path,
default=[Path(".")],
help="Paths to check (files or directories). Defaults to current directory.",
)
args = parser.parse_args()

files = find_files_to_check(args.paths)

all_violations: list[tuple[Path, int, str, str]] = []

for file in files:
violations = validate_file(file)
for line_num, line_content, op_type in violations:
all_violations.append((file, line_num, line_content, op_type))

if all_violations:
print("ERROR: Found git clone/checkout without git reset:", file=sys.stderr)
print(file=sys.stderr)
for file, line_num, line_content, op_type in all_violations:
print(f" {file}:{line_num}: {op_type}", file=sys.stderr)
print(f" {line_content}", file=sys.stderr)
print(file=sys.stderr)
print(
"To fix: Add 'git reset --hard <commit>' after the git operation,",
file=sys.stderr,
)
print(
"or add a comment containing 'git reset' to indicate it's intentional.",
file=sys.stderr,
)
print(
"Example: # git reset is not needed here because...",
file=sys.stderr,
)
return 1

print(f"OK: Checked {len(files)} files, no violations found.")
return 0


if __name__ == "__main__":
sys.exit(main())
5 changes: 5 additions & 0 deletions legacy/lca_ci_build_repair/eval_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def run_eval(
obs = runtime.run_action(action)
assert obs.exit_code == 0

action = CmdRunAction(command='git reset --hard HEAD')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0

script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
Expand Down
5 changes: 5 additions & 0 deletions legacy/lca_ci_build_repair/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ def initialize_runtime(
obs = runtime.run_action(action)
assert obs.exit_code == 0

action = CmdRunAction(command='git reset --hard HEAD')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0

script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
Expand Down
5 changes: 5 additions & 0 deletions legacy/ml_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ def initialize_runtime(
obs = runtime.run_action(action)
assert obs.exit_code == 0

action = CmdRunAction(command=f'cd /workspace/{repo_name} && git reset --hard HEAD')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0

action = CmdRunAction(command=f'chmod -R 777 /workspace/{repo_name}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
Expand Down
1 change: 1 addition & 0 deletions legacy/swefficiency/scripts/setup/prepare_swe_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ===="
OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
OH_SWE_BENCH_REPO_BRANCH="eval"
git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD

# 2. Prepare DATA
echo "==== Prepare SWE-bench data ===="
Expand Down
1 change: 1 addition & 0 deletions legacy/testgeneval/scripts/setup/prepare_swe_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ echo "==== Prepare SWE-bench repo ===="
OH_SWE_BENCH_REPO_PATH="https://github.com/OpenHands/SWE-bench.git"
OH_SWE_BENCH_REPO_BRANCH="eval"
git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
cd $EVAL_WORKSPACE/OH-SWE-bench && git reset --hard HEAD

# 2. Prepare DATA
echo "==== Prepare SWE-bench data ===="
Expand Down
4 changes: 4 additions & 0 deletions legacy/utils/version_control.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ checkout_eval_branch() {
current_branch=$(git rev-parse --abbrev-ref HEAD)
echo "Current version is: $current_branch"
echo "Check out OpenHands to version: $COMMIT_HASH"
# git reset is not needed here - this checkout targets a specific commit hash
# and is used for version control within the development environment
if ! git checkout $COMMIT_HASH; then
echo "Failed to check out to $COMMIT_HASH"
exit 1
fi

echo "Revert changes in evaluation folder"
# git reset is not needed - restoring evaluation folder from current branch
git checkout $current_branch -- evaluation

# Trap the EXIT signal to checkout original branch
Expand All @@ -36,6 +39,7 @@ checkout_original_branch() {
return 0
fi
echo "Checkout back to original branch $current_branch"
# git reset is not needed - restoring to original branch after evaluation
git checkout $current_branch
}

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dependencies = [

[project.scripts]
validate-cfg = "benchmarks.scripts.validate_cfg:main"
validate-git-reset = "benchmarks.scripts.validate_git_reset:main"
swebench-infer = "benchmarks.swebench.run_infer:main"
swtbench-infer = "benchmarks.swtbench.run_infer:main"
swebench-eval = "benchmarks.swebench.eval_infer:main"
Expand Down
Loading