diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 549514f0e4..cc44a82bba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -75,9 +75,17 @@ repos: name: LFS data always_run: true pass_filenames: false - entry: bin/lfs_check + entry: bin/hooks/lfs_check language: script + - id: largefiles-check + name: Large files check + always_run: true + pass_filenames: false + entry: python bin/hooks/largefiles_check + language: python + additional_dependencies: ['tomli'] + - id: doclinks name: Doclinks always_run: true diff --git a/bin/hooks/largefiles_check b/bin/hooks/largefiles_check new file mode 100755 index 0000000000..190183ecc6 --- /dev/null +++ b/bin/hooks/largefiles_check @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""Pre-commit hook to detect large files that should be in LFS.""" + +import argparse +import fnmatch +import os +import shutil +import subprocess +import sys + +import tomli + +parser = argparse.ArgumentParser() +parser.add_argument("--all", action="store_true", help="Check all files in repo, not just staged") +args = parser.parse_args() + +# Check git-lfs is installed +if not shutil.which("git-lfs"): + print("git-lfs is not installed.") + print("\nInstall with:") + print(" Arch: pacman -S git-lfs") + print(" Ubuntu: apt install git-lfs") + print(" macOS: brew install git-lfs") + print("\nThen run: git lfs install") + sys.exit(1) + +# Load config +with open("pyproject.toml", "rb") as f: + config = tomli.load(f).get("tool", {}).get("largefiles", {}) + +max_size_kb = config.get("max_size_kb", 50) +max_bytes = max_size_kb * 1024 +ignore_patterns = config.get("ignore", []) + +# Get LFS files to exclude +result = subprocess.run( + ["git", "lfs", "ls-files", "-n"], capture_output=True, text=True, check=True +) +lfs_files = set(result.stdout.splitlines()) + +# Get files to check +if args.all: + files_cmd = ["git", "ls-files"] +else: + files_cmd = ["git", "diff", "--cached", "--name-only"] + +violations = [] +result = subprocess.run(files_cmd, capture_output=True, text=True, check=True) +for file in result.stdout.splitlines(): + if file in lfs_files: + continue + if any(fnmatch.fnmatch(file, p) for p in ignore_patterns): + continue + if os.path.isfile(file) and os.path.getsize(file) > max_bytes: + violations.append((file, os.path.getsize(file))) + +if violations: + print(f"Large files detected (limit: {max_size_kb}KB):") + for f, size in sorted(violations, key=lambda x: -x[1]): + print(f" {size // 1024}KB {f}") + print("\nEither add to LFS or to [tool.largefiles].ignore in pyproject.toml") + sys.exit(1) diff --git a/bin/lfs_check b/bin/hooks/lfs_check similarity index 100% rename from bin/lfs_check rename to bin/hooks/lfs_check diff --git a/docs/data.md b/docs/data.md index a30a0e3328..34313098f9 100644 --- a/docs/data.md +++ b/docs/data.md @@ -194,7 +194,7 @@ The [`lfs_push`](/bin/lfs_push) script: 2. Uploads to Git LFS 3. Stages the compressed file -A pre-commit hook ([`bin/lfs_check`](/bin/lfs_check#L26)) blocks commits if you have uncompressed directories in `data/` without a corresponding `.tar.gz` in `data/.lfs/`. +A pre-commit hook ([`bin/hooks/lfs_check`](/bin/hooks/lfs_check#L26)) blocks commits if you have uncompressed directories in `data/` without a corresponding `.tar.gz` in `data/.lfs/`. ## Location Resolution diff --git a/pyproject.toml b/pyproject.toml index bd7bc13da9..700fbee22a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -353,6 +353,15 @@ addopts = "-v -p no:warnings -ra --color=yes -m 'not vis and not benchmark and n asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" +[tool.largefiles] +max_size_kb = 50 +ignore = [ + "uv.lock", + "*/package-lock.json", + "dimos/dashboard/dimos.rbl", + "dimos/web/dimos_interface/themes.json", +] + [tool.uv] # Build dependencies for packages that don't declare them properly extra-build-dependencies = { detectron2 = ["torch"], contact-graspnet-pytorch = ["numpy"] }