Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,17 @@ repos:
name: LFS data
always_run: true
pass_filenames: false
entry: bin/lfs_check
entry: bin/hooks/lfs_check
language: script

- id: largefiles-check
name: Large files check
always_run: true
pass_filenames: false
entry: python bin/hooks/largefiles_check
language: python
additional_dependencies: ['tomli']

- id: doclinks
name: Doclinks
always_run: true
Expand Down
62 changes: 62 additions & 0 deletions bin/hooks/largefiles_check
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Pre-commit hook to detect large files that should be in LFS."""

import argparse
import fnmatch
import os
import shutil
import subprocess
import sys

import tomli

parser = argparse.ArgumentParser()
parser.add_argument("--all", action="store_true", help="Check all files in repo, not just staged")
args = parser.parse_args()

# Check git-lfs is installed
if not shutil.which("git-lfs"):
print("git-lfs is not installed.")
print("\nInstall with:")
print(" Arch: pacman -S git-lfs")
print(" Ubuntu: apt install git-lfs")
print(" macOS: brew install git-lfs")
print("\nThen run: git lfs install")
sys.exit(1)

# Load config
with open("pyproject.toml", "rb") as f:
config = tomli.load(f).get("tool", {}).get("largefiles", {})

max_size_kb = config.get("max_size_kb", 50)
max_bytes = max_size_kb * 1024
ignore_patterns = config.get("ignore", [])

# Get LFS files to exclude
result = subprocess.run(
["git", "lfs", "ls-files", "-n"], capture_output=True, text=True, check=True
)
lfs_files = set(result.stdout.splitlines())

# Get files to check
if args.all:
files_cmd = ["git", "ls-files"]
else:
files_cmd = ["git", "diff", "--cached", "--name-only"]

violations = []
result = subprocess.run(files_cmd, capture_output=True, text=True, check=True)
for file in result.stdout.splitlines():
if file in lfs_files:
continue
if any(fnmatch.fnmatch(file, p) for p in ignore_patterns):
continue
if os.path.isfile(file) and os.path.getsize(file) > max_bytes:
violations.append((file, os.path.getsize(file)))

if violations:
print(f"Large files detected (limit: {max_size_kb}KB):")
for f, size in sorted(violations, key=lambda x: -x[1]):
print(f" {size // 1024}KB {f}")
print("\nEither add to LFS or to [tool.largefiles].ignore in pyproject.toml")
sys.exit(1)
File renamed without changes.
2 changes: 1 addition & 1 deletion docs/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ The [`lfs_push`](/bin/lfs_push) script:
2. Uploads to Git LFS
3. Stages the compressed file

A pre-commit hook ([`bin/lfs_check`](/bin/lfs_check#L26)) blocks commits if you have uncompressed directories in `data/` without a corresponding `.tar.gz` in `data/.lfs/`.
A pre-commit hook ([`bin/hooks/lfs_check`](/bin/hooks/lfs_check#L26)) blocks commits if you have uncompressed directories in `data/` without a corresponding `.tar.gz` in `data/.lfs/`.

## Location Resolution

Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,15 @@ addopts = "-v -p no:warnings -ra --color=yes -m 'not vis and not benchmark and n
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"

[tool.largefiles]
max_size_kb = 50
ignore = [
"uv.lock",
"*/package-lock.json",
"dimos/dashboard/dimos.rbl",
"dimos/web/dimos_interface/themes.json",
]

[tool.uv]
# Build dependencies for packages that don't declare them properly
extra-build-dependencies = { detectron2 = ["torch"], contact-graspnet-pytorch = ["numpy"] }
Expand Down
Loading