From 7c65baa927cc8a339a609639a00bcd3a18cc3fd9 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Thu, 20 Nov 2025 12:47:37 -0800 Subject: [PATCH 01/16] added context loader helper --- src/runloop_api_client/lib/context_loader.py | 145 +++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 src/runloop_api_client/lib/context_loader.py diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py new file mode 100644 index 000000000..adec17009 --- /dev/null +++ b/src/runloop_api_client/lib/context_loader.py @@ -0,0 +1,145 @@ +import io +import tarfile +from typing import Iterable, Optional +from pathlib import Path, PurePosixPath + +## This file has helper methods to get a docker context tarball from a given context root. + + +def _load_dockerignore_patterns( + dockerignore_path: Optional[Path], +) -> list[tuple[bool, str]]: + """Parse .dockerignore contents into a list of (is_negated, pattern). + + Notes: + - Empty lines and comments are ignored. + - Lines starting with '!' are negation patterns. + """ + if dockerignore_path is None or not dockerignore_path.exists(): + return [] + + patterns: list[tuple[bool, str]] = [] + for raw_line in dockerignore_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + is_negated = line.startswith("!") + if is_negated: + line = line[1:].strip() + if not line: + continue + + patterns.append((is_negated, line)) + + return patterns + + +def _match_dockerignore_pattern(relpath: str, pattern: str) -> bool: + """Return True if relpath matches a single .dockerignore pattern. + + This is a small, pragmatic approximation of Docker's matching rules: + - Patterns ending with '/' are treated as directory-only. + - Patterns without '/' match basenames anywhere in the tree. + - Other patterns match against the full relative path. + """ + from fnmatch import fnmatch + + relpath_posix = PurePosixPath(relpath).as_posix() + + directory_only = pattern.endswith("/") + if directory_only: + pattern = pattern.rstrip("/") + + if "/" not in pattern: + # Match against basename anywhere in the tree + name = PurePosixPath(relpath_posix).name + matched = fnmatch(name, pattern) + else: + # Match against the full relative path + matched = fnmatch(relpath_posix, pattern) + + if directory_only: + # Directory-only pattern matches the directory itself or anything under it + return matched and (relpath_posix == pattern or relpath_posix.startswith(f"{pattern}/")) + + return matched + + +def _is_ignored(relpath: str, patterns: list[tuple[bool, str]]) -> bool: + """Apply .dockerignore patterns with 'last match wins' semantics.""" + + included = True # include by default + for is_negated, pat in patterns: + if _match_dockerignore_pattern(relpath, pat): + # Negated patterns flip back to included, normal patterns exclude. + included = is_negated + return not included + + +def _iter_build_context_files( + context_root: Path, + *, + dockerignore_path: Optional[Path] = None, +) -> Iterable[Path]: + """Yield files to include in the build context, honoring .dockerignore. + + This hand-rolls .dockerignore parsing and matching instead of relying on the + Docker SDK to avoid pulling in the docker Python dependency. + It approximates Docker's behavior. + """ + if not context_root.is_dir(): + raise ValueError(f"context_root must be a directory, got: {context_root}") + + if dockerignore_path is None: + candidate = context_root / ".dockerignore" + dockerignore_path = candidate if candidate.exists() else None + + patterns = _load_dockerignore_patterns(dockerignore_path) + + # Walk the tree and apply ignore rules. We mirror the "include by default" + # behavior and apply patterns in order, with last match winning. + for path in context_root.rglob("*"): + if path.is_dir(): + # Docker's context is file-based; directories are implicit. + continue + + rel = path.relative_to(context_root).as_posix() + + if _is_ignored(rel, patterns): + continue + + yield path + + +def build_docker_context_tar( + context_root: Path, + *, + dockerignore: Optional[Path] = None, +) -> bytes: + """Create a .tar.gz of the Docker build context, respecting .dockerignore. + + - Treats ``context_root`` as the Docker build context root. + - Determines the .dockerignore path as: + * explicit ``dockerignore`` argument if provided + * otherwise ``context_root / \".dockerignore\"`` if it exists + """ + context_root = context_root.resolve() + + # Resolve dockerignore path according to the requested behavior + if dockerignore is not None: + dockerignore_path = dockerignore.resolve() + else: + dockerignore_path = context_root / ".dockerignore" + + buf = io.BytesIO() + + with tarfile.open(mode="w:gz", fileobj=buf) as tf: + for path in _iter_build_context_files( + context_root, + dockerignore_path=dockerignore_path if dockerignore_path.exists() else None, + ): + rel = path.relative_to(context_root) + tf.add(path, arcname=rel.as_posix()) + + return buf.getvalue() From c8916eb15f0a23ef651ddd7e76b629ffd6b9c0cd Mon Sep 17 00:00:00 2001 From: James Chainey Date: Thu, 20 Nov 2025 14:37:25 -0800 Subject: [PATCH 02/16] added context loader helper to make it easier to add build context when creating a blueprint --- src/runloop_api_client/lib/__init__.py | 3 + src/runloop_api_client/lib/context_loader.py | 355 ++++++++++++++----- tests/test_utils/test_context_loader.py | 85 +++++ 3 files changed, 355 insertions(+), 88 deletions(-) create mode 100644 src/runloop_api_client/lib/__init__.py create mode 100644 tests/test_utils/test_context_loader.py diff --git a/src/runloop_api_client/lib/__init__.py b/src/runloop_api_client/lib/__init__.py new file mode 100644 index 000000000..a18bc710b --- /dev/null +++ b/src/runloop_api_client/lib/__init__.py @@ -0,0 +1,3 @@ +""" +Helpers for `runloop_api_client`. +""" diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py index adec17009..b96e46247 100644 --- a/src/runloop_api_client/lib/context_loader.py +++ b/src/runloop_api_client/lib/context_loader.py @@ -1,79 +1,284 @@ import io +import os import tarfile from typing import Iterable, Optional from pathlib import Path, PurePosixPath +from dataclasses import dataclass ## This file has helper methods to get a docker context tarball from a given context root. +## +## It includes a small, self-contained implementation of Docker's `.dockerignore` +## semantics. The goal is to be very close to Docker's behavior without depending +## on Docker's own Python libraries. + + +def build_docker_context_tar( + context_root: Path, + *, + dockerignore: Optional[Path] = None, +) -> bytes: + """Create a .tar.gz of the Docker build context, respecting .dockerignore for use with object store. + + Generally you shouldn't need to pass in .dockerignore directly; just let the function find it for you. + - Treats ``context_root`` as the Docker build context root. + """ + context_root = context_root.resolve() + + # Resolve dockerignore path according to the requested behavior + if dockerignore is not None: + dockerignore_path = dockerignore.resolve() + else: + dockerignore_path = context_root / ".dockerignore" + + buf = io.BytesIO() + + with tarfile.open(mode="w:gz", fileobj=buf) as tf: + for path in _iter_build_context_files( + context_root, + dockerignore_path=dockerignore_path if dockerignore_path.exists() else None, + ): + rel = path.relative_to(context_root) + tf.add(path, arcname=rel.as_posix()) + + return buf.getvalue() + + +@dataclass(frozen=True) +class _DockerignorePattern: + """Single parsed .dockerignore pattern. + + Attributes: + pattern: The normalized pattern text with escapes processed and leading + '/' / trailing '/' removed. Always uses POSIX '/' separators. + negated: True if this is a negation pattern starting with '!'. + directory_only: True if the original pattern ended with '/' and should + apply only to directories and their descendants. + anchored: True if the original pattern started with '/' and is anchored + to the context root. Non-anchored patterns may match at any depth. + """ + + pattern: str + negated: bool + directory_only: bool + anchored: bool + + +def _unescape_pattern(text: str) -> str: + """Unescape backslash-escaped characters in a pattern string.""" + result: list[str] = [] + i = 0 + while i < len(text): + if text[i] == "\\" and i + 1 < len(text): + # Backslash escapes the next character + result.append(text[i + 1]) + i += 2 + else: + result.append(text[i]) + i += 1 + return "".join(result) + + +def _find_first_non_space(text: str) -> int: + """Find the index of the first non-space character, skipping escaped spaces.""" + i = 0 + while i < len(text) and text[i] == " ": + i += 1 + + # Check if we stopped at an escaped space + if i < len(text) and text[i] == "\\" and i + 1 < len(text) and text[i + 1] == " ": + return i + + return i # Either first non-space char or len(text) + + +def _strip_trailing_whitespace(text: str) -> str: + """Strip trailing whitespace unless it's escaped.""" + while text and text[-1] in " \t": + if len(text) >= 2 and text[-2] == "\\": + # Trailing space is escaped, keep it + break + text = text[:-1] + return text def _load_dockerignore_patterns( dockerignore_path: Optional[Path], -) -> list[tuple[bool, str]]: - """Parse .dockerignore contents into a list of (is_negated, pattern). - - Notes: - - Empty lines and comments are ignored. - - Lines starting with '!' are negation patterns. +) -> list[_DockerignorePattern]: + """Parse .dockerignore into a list of structured patterns. + + Parsing rules mirror Docker's behavior as described in the official docs: + + - Empty lines are ignored. + - Lines starting with unescaped ``#`` (after optional leading spaces) are + treated as comments and ignored. + - A leading ``!`` (after optional leading spaces) negates the pattern. + A leading ``\\!`` is treated as a literal ``!``. + - A leading ``#`` can be escaped as ``\\#`` to be treated as part of the + pattern. + - Leading and trailing spaces are significant if escaped with a backslash. """ if dockerignore_path is None or not dockerignore_path.exists(): return [] - patterns: list[tuple[bool, str]] = [] - for raw_line in dockerignore_path.read_text(encoding="utf-8").splitlines(): - line = raw_line.strip() - if not line or line.startswith("#"): + patterns: list[_DockerignorePattern] = [] + for line in dockerignore_path.read_text(encoding="utf-8").splitlines(): + # Strip trailing newline/carriage return, but preserve escaped trailing spaces + line = line.rstrip("\n\r") + if not line: + continue + + # Find first non-space character (handles escaped spaces) + start_idx = _find_first_non_space(line) + if start_idx >= len(line): + # Line is entirely whitespace + continue + + # Check for comment: first non-space char is unescaped '#' + first_char = line[start_idx] + if first_char == "#" and (start_idx == 0 or line[start_idx - 1] != "\\"): continue - is_negated = line.startswith("!") - if is_negated: - line = line[1:].strip() - if not line: + # Check for negation: first non-space char is unescaped '!' + negated = False + if first_char == "!" and (start_idx == 0 or line[start_idx - 1] != "\\"): + negated = True + start_idx += 1 + if start_idx >= len(line): + # Bare "!" after optional spaces is ignored continue - patterns.append((is_negated, line)) + # Extract pattern part (everything after negation marker) + pattern_raw = line[start_idx:] + pattern_raw = _strip_trailing_whitespace(pattern_raw) + pattern = _unescape_pattern(pattern_raw) + + if not pattern: + # Nothing meaningful left after processing + continue + + # Extract anchored and directory-only flags + anchored = pattern.startswith("/") + if anchored: + pattern = pattern.lstrip("/") + + directory_only = pattern.endswith("/") + if directory_only: + pattern = pattern.rstrip("/") + + if not pattern: + # A line that is effectively "/" or similar after processing + continue + + patterns.append( + _DockerignorePattern( + pattern=PurePosixPath(pattern).as_posix(), + negated=negated, + directory_only=directory_only, + anchored=anchored, + ) + ) return patterns -def _match_dockerignore_pattern(relpath: str, pattern: str) -> bool: - """Return True if relpath matches a single .dockerignore pattern. +def _segment_match(pattern_segment: str, path_segment: str) -> bool: + """Match a single path segment against a glob pattern segment. - This is a small, pragmatic approximation of Docker's matching rules: - - Patterns ending with '/' are treated as directory-only. - - Patterns without '/' match basenames anywhere in the tree. - - Other patterns match against the full relative path. + Supports: + - ``*``: any sequence of characters except ``/``. + - ``?``: any single character except ``/``. + - ``[]``: character classes, excluding ``/``. """ - from fnmatch import fnmatch + import re + + escaped = "" + i = 0 + while i < len(pattern_segment): + ch = pattern_segment[i] + if ch == "*": + escaped += "[^/]*" + elif ch == "?": + escaped += "[^/]" + elif ch == "[": + # Copy character class as-is until closing ']'. + j = i + 1 + while j < len(pattern_segment) and pattern_segment[j] != "]": + j += 1 + if j < len(pattern_segment): + escaped += pattern_segment[i : j + 1] + i = j + else: + # Unterminated '['; treat it literally. + escaped += re.escape(ch) + else: + escaped += re.escape(ch) + i += 1 + + regex = re.compile(rf"^{escaped}$") + return regex.match(path_segment) is not None + + +def _match_parts_recursive(pattern_parts: list[str], path_parts: list[str]) -> bool: + """Recursive helper implementing ``**`` segment semantics.""" + + if not pattern_parts: + return not path_parts + + if pattern_parts[0] == "**": + # '**' matches zero or more segments. + for i in range(len(path_parts) + 1): + if _match_parts_recursive(pattern_parts[1:], path_parts[i:]): + return True + return False + + if not path_parts: + return False + + if not _segment_match(pattern_parts[0], path_parts[0]): + return False + + return _match_parts_recursive(pattern_parts[1:], path_parts[1:]) + + +def _path_match(pattern: _DockerignorePattern, relpath: str, is_dir: bool) -> bool: + """Return True if ``relpath`` matches a parsed .dockerignore pattern.""" relpath_posix = PurePosixPath(relpath).as_posix() + path_parts = PurePosixPath(relpath_posix).parts + pattern_parts = PurePosixPath(pattern.pattern).parts - directory_only = pattern.endswith("/") - if directory_only: - pattern = pattern.rstrip("/") + # Directory-only patterns never directly match files here; the effect on + # descendants is enforced by directory pruning in the traversal. + if pattern.directory_only and not is_dir: + return False - if "/" not in pattern: - # Match against basename anywhere in the tree - name = PurePosixPath(relpath_posix).name - matched = fnmatch(name, pattern) - else: - # Match against the full relative path - matched = fnmatch(relpath_posix, pattern) + # Anchored patterns must match starting at the context root; otherwise we + # allow them to match at any depth. + if pattern.anchored: + return _match_parts_recursive(list(pattern_parts), list(path_parts)) + + for start in range(len(path_parts)): + if _match_parts_recursive(list(pattern_parts), list(path_parts[start:])): + return True + return False - if directory_only: - # Directory-only pattern matches the directory itself or anything under it - return matched and (relpath_posix == pattern or relpath_posix.startswith(f"{pattern}/")) - return matched +def _is_ignored(relpath: str, is_dir: bool, patterns: list[_DockerignorePattern]) -> bool: + """Apply .dockerignore patterns with 'last match wins' semantics. + Examples:: -def _is_ignored(relpath: str, patterns: list[tuple[bool, str]]) -> bool: - """Apply .dockerignore patterns with 'last match wins' semantics.""" + *.log + !important.log + + excludes all ``.log`` files except ``important.log``. Patterns are applied + in order, and the last matching pattern determines inclusion. + """ included = True # include by default - for is_negated, pat in patterns: - if _match_dockerignore_pattern(relpath, pat): - # Negated patterns flip back to included, normal patterns exclude. - included = is_negated + for pat in patterns: + if _path_match(pat, relpath, is_dir=is_dir): + included = pat.negated return not included @@ -97,49 +302,23 @@ def _iter_build_context_files( patterns = _load_dockerignore_patterns(dockerignore_path) - # Walk the tree and apply ignore rules. We mirror the "include by default" - # behavior and apply patterns in order, with last match winning. - for path in context_root.rglob("*"): - if path.is_dir(): - # Docker's context is file-based; directories are implicit. - continue - - rel = path.relative_to(context_root).as_posix() - - if _is_ignored(rel, patterns): - continue - - yield path - - -def build_docker_context_tar( - context_root: Path, - *, - dockerignore: Optional[Path] = None, -) -> bytes: - """Create a .tar.gz of the Docker build context, respecting .dockerignore. - - - Treats ``context_root`` as the Docker build context root. - - Determines the .dockerignore path as: - * explicit ``dockerignore`` argument if provided - * otherwise ``context_root / \".dockerignore\"`` if it exists - """ - context_root = context_root.resolve() - - # Resolve dockerignore path according to the requested behavior - if dockerignore is not None: - dockerignore_path = dockerignore.resolve() - else: - dockerignore_path = context_root / ".dockerignore" - - buf = io.BytesIO() - - with tarfile.open(mode="w:gz", fileobj=buf) as tf: - for path in _iter_build_context_files( - context_root, - dockerignore_path=dockerignore_path if dockerignore_path.exists() else None, - ): - rel = path.relative_to(context_root) - tf.add(path, arcname=rel.as_posix()) - - return buf.getvalue() + # Walk the tree with directory pruning. We mirror Docker's behavior where + # excluded parent directories prevent re-including children, even with + # negation patterns. + for root, dirs, files in os.walk(context_root): + root_path = Path(root) + + # Prune ignored directories in-place so their contents are never visited. + for name in list(dirs): + dir_path = root_path / name + rel_dir = dir_path.relative_to(context_root).as_posix() + if _is_ignored(rel_dir, is_dir=True, patterns=patterns): + dirs.remove(name) + + # Yield non-ignored files. + for name in files: + file_path = root_path / name + rel_file = file_path.relative_to(context_root).as_posix() + if _is_ignored(rel_file, is_dir=False, patterns=patterns): + continue + yield file_path diff --git a/tests/test_utils/test_context_loader.py b/tests/test_utils/test_context_loader.py new file mode 100644 index 000000000..1dbc828d2 --- /dev/null +++ b/tests/test_utils/test_context_loader.py @@ -0,0 +1,85 @@ +from pathlib import Path + +from runloop_api_client.lib.context_loader import ( + _is_ignored, + _path_match, + _segment_match, + _DockerignorePattern, + _iter_build_context_files, + _load_dockerignore_patterns, +) + + +def test_segment_match_basic_globs(): + assert _segment_match("*.log", "app.log") + assert not _segment_match("*.log", "app.txt") + assert _segment_match("foo?", "fooa") + assert not _segment_match("foo?", "fooba") + assert _segment_match("[ab].txt", "a.txt") + assert not _segment_match("[ab].txt", "c.txt") + + +def test_path_match_anchored_and_unanchored(): + pat = _DockerignorePattern(pattern="foo/bar.txt", negated=False, directory_only=False, anchored=True) + assert _path_match(pat, "foo/bar.txt", is_dir=False) + assert not _path_match(pat, "a/foo/bar.txt", is_dir=False) + + pat_unanchored = _DockerignorePattern(pattern="foo/bar.txt", negated=False, directory_only=False, anchored=False) + assert _path_match(pat_unanchored, "a/foo/bar.txt", is_dir=False) + assert _path_match(pat_unanchored, "foo/bar.txt", is_dir=False) + + +def test_path_match_double_star(): + pat = _DockerignorePattern(pattern="**/*.log", negated=False, directory_only=False, anchored=False) + assert _path_match(pat, "app.log", is_dir=False) + assert _path_match(pat, "a/b/app.log", is_dir=False) + assert not _path_match(pat, "a/b/app.txt", is_dir=False) + + +def test_is_ignored_last_match_wins(): + patterns = [ + _DockerignorePattern(pattern="*.log", negated=False, directory_only=False, anchored=False), + _DockerignorePattern(pattern="keep.log", negated=True, directory_only=False, anchored=False), + ] + assert _is_ignored("foo.log", is_dir=False, patterns=patterns) + assert not _is_ignored("keep.log", is_dir=False, patterns=patterns) + + +def test_load_dockerignore_patterns_basic(tmp_path: Path): + dockerignore = tmp_path / ".dockerignore" + dockerignore.write_text( + "\n# comment\n *.log \n!keep.log\n\\#literal\n\\!literal\n", + encoding="utf-8", + ) + + patterns = _load_dockerignore_patterns(dockerignore) + assert len(patterns) == 4 + assert patterns[0].pattern == "*.log" + assert not patterns[0].negated + assert patterns[1].pattern == "keep.log" + assert patterns[1].negated + assert patterns[2].pattern == "#literal" + assert not patterns[2].negated + assert patterns[3].pattern == "!literal" + assert not patterns[3].negated + + +def test_iter_build_context_files_respects_dockerignore(tmp_path: Path): + # Layout: + # foo.txt + # app.log + # build/ignored.txt + root = tmp_path + (root / "foo.txt").write_text("ok", encoding="utf-8") + (root / "app.log").write_text("ignored", encoding="utf-8") + build_dir = root / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + dockerignore = root / ".dockerignore" + dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") + + files = {p.relative_to(root).as_posix() for p in _iter_build_context_files(root, dockerignore_path=dockerignore)} + assert "foo.txt" in files + assert "app.log" not in files + assert "build/ignored.txt" not in files From 4f47bf3bf9a2b0ab38290befc6fcb54188df0a38 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Thu, 20 Nov 2025 15:55:27 -0800 Subject: [PATCH 03/16] added oop build context functional method wrapper --- src/runloop_api_client/sdk/_build_context.py | 91 ++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/runloop_api_client/sdk/_build_context.py diff --git a/src/runloop_api_client/sdk/_build_context.py b/src/runloop_api_client/sdk/_build_context.py new file mode 100644 index 000000000..bc738c950 --- /dev/null +++ b/src/runloop_api_client/sdk/_build_context.py @@ -0,0 +1,91 @@ +"""Helpers and strategy interface for packaging Docker build contexts. + +This module exposes a small, pluggable abstraction around turning a local +filesystem directory into a tarball suitable for use as a Docker build +context, plus a default implementation built on top of +``lib.context_loader.build_docker_context_tar``. +""" + +from __future__ import annotations + +from typing import Optional +from pathlib import Path +from dataclasses import dataclass +from typing_extensions import Protocol + +from ..lib.context_loader import build_docker_context_tar +from ..types.object_create_params import ContentType + +__all__ = ["BuildContextArtifact", "BuildContextStrategy", "default_build_context_strategy"] + + +@dataclass(frozen=True) +class BuildContextArtifact: + """Result of packaging a build context directory. + + Attributes: + data: Tarball bytes containing the build context. + content_type: Logical content type for the object payload. For the + default implementation this is always ``\"tgz\"``. + filename: Suggested filename to use when creating the backing Object. + """ + + data: bytes + content_type: ContentType = "tgz" + filename: Optional[str] = None + + +class BuildContextStrategy(Protocol): + """Strategy interface for building Docker contexts. + + Implementations may perform caching, custom compression, or additional + validation, but must return a fully materialised tarball in memory. + """ + + def __call__( + self, + context_root: Path, + *, + name: str | None = None, + dockerignore: Path | None = None, + ) -> BuildContextArtifact: + """Package the given directory into a tarball. + + Args: + context_root: Filesystem path to the Docker build context root. + name: Optional logical name for the context; may be used to + derive a filename. + dockerignore: Optional explicit path to a .dockerignore file. + When omitted, the default implementation will look for + ``.dockerignore`` under ``context_root``. + """ + + +def default_build_context_strategy( + context_root: Path, + *, + name: str | None = None, + dockerignore: Path | None = None, +) -> BuildContextArtifact: + """Default implementation that wraps ``build_docker_context_tar``. + + The tarball is rebuilt on each invocation (no cross-call caching) and + returned as a :class:`BuildContextArtifact` with ``content_type=\"tgz\"``. + """ + + tar_bytes = build_docker_context_tar( + context_root, + dockerignore=dockerignore, + ) + + if name is None: + base = context_root.name or "context" + filename = f"{base}.tar.gz" + else: + filename = f"{name}.tar.gz" + + return BuildContextArtifact( + data=tar_bytes, + content_type="tgz", + filename=filename, + ) From 5fb7945bcd621998905dae612e2672e08b31fee8 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Thu, 20 Nov 2025 17:15:40 -0800 Subject: [PATCH 04/16] added context helpers to SDKs --- README-SDK.md | 84 +++++++++++++++---- .../sdk/async_storage_object.py | 11 +++ src/runloop_api_client/sdk/storage_object.py | 11 +++ tests/sdk/test_async_clients.py | 9 ++ tests/sdk/test_clients.py | 9 ++ 5 files changed, 108 insertions(+), 16 deletions(-) diff --git a/README-SDK.md b/README-SDK.md index 168aad093..bc76290dc 100644 --- a/README-SDK.md +++ b/README-SDK.md @@ -4,22 +4,36 @@ The `RunloopSDK` builds on top of the underlying REST client and provides a Pyth ## Table of Contents -- [Installation](#installation) -- [Quickstart (synchronous)](#quickstart-synchronous) -- [Quickstart (asynchronous)](#quickstart-asynchronous) -- [Core Concepts](#core-concepts) -- [Devbox](#devbox) -- [Blueprint](#blueprint) -- [Snapshot](#snapshot) -- [StorageObject](#storageobject) -- [Mounting Storage Objects to Devboxes](#mounting-storage-objects-to-devboxes) -- [Accessing the Underlying REST Client](#accessing-the-underlying-rest-client) -- [Error Handling](#error-handling) -- [Advanced Configuration](#advanced-configuration) -- [Async Usage](#async-usage) -- [Polling Configuration](#polling-configuration) -- [Complete API Reference](#complete-api-reference) -- [Feedback](#feedback) +- [Runloop SDK – Python Object-Oriented Client](#runloop-sdk--python-object-oriented-client) + - [Table of Contents](#table-of-contents) + - [Installation](#installation) + - [Quickstart (synchronous)](#quickstart-synchronous) + - [Quickstart (asynchronous)](#quickstart-asynchronous) + - [Core Concepts](#core-concepts) + - [RunloopSDK](#runloopsdk) + - [Available Resources](#available-resources) + - [Devbox](#devbox) + - [Command Execution](#command-execution) + - [Execution Management](#execution-management) + - [Execution Results](#execution-results) + - [Streaming Command Output](#streaming-command-output) + - [File Operations](#file-operations) + - [Network Operations](#network-operations) + - [Snapshot Operations](#snapshot-operations) + - [Devbox Lifecycle Management](#devbox-lifecycle-management) + - [Context Manager Support](#context-manager-support) + - [Blueprint](#blueprint) + - [Snapshot](#snapshot) + - [StorageObject](#storageobject) + - [Storage Object Upload Helpers](#storage-object-upload-helpers) + - [Mounting Storage Objects to Devboxes](#mounting-storage-objects-to-devboxes) + - [Accessing the Underlying REST Client](#accessing-the-underlying-rest-client) + - [Error Handling](#error-handling) + - [Advanced Configuration](#advanced-configuration) + - [Async Usage](#async-usage) + - [Polling Configuration](#polling-configuration) + - [Complete API Reference](#complete-api-reference) + - [Feedback](#feedback) ## Installation @@ -409,6 +423,44 @@ blueprint = runloop.blueprint.create( system_setup_commands=["pip install numpy pandas"], ) +# Or create a blueprint with a Docker build context from a local directory +from pathlib import Path +from runloop_api_client.lib.context_loader import build_docker_context_tar + +context_root = Path("./my-app") +tar_bytes = build_docker_context_tar(context_root) + +build_ctx_obj = runloop.storage_object.upload_from_bytes( + data=tar_bytes, + name="my-app-context.tar.gz", + content_type="tgz", +) + +shared_root = Path("./shared-lib") +shared_tar = build_docker_context_tar(shared_root) + +shared_ctx_obj = runloop.storage_object.upload_from_bytes( + data=shared_tar, + name="shared-lib-context.tar.gz", + content_type="tgz", +) + +blueprint_with_context = runloop.blueprint.create( + name="my-blueprint-with-context", + dockerfile=\"\"\"\ +FROM ubuntu:22.04 +WORKDIR /app +# use the named context +RUN --mount=type=bind,from=shared,source=/,target=/shared ls -R /shared +\"\"\", + # Primary build context + build_context=build_ctx_obj.as_build_context(), + # Additional named build contexts (for Docker buildx-style usage) + named_build_contexts={ + "shared": shared_ctx_obj.as_build_context(), + }, +) + # Or get an existing one blueprint = runloop.blueprint.from_id(blueprint_id="bpt_123") diff --git a/src/runloop_api_client/sdk/async_storage_object.py b/src/runloop_api_client/sdk/async_storage_object.py index fd1f243b8..bc4a7bd11 100644 --- a/src/runloop_api_client/sdk/async_storage_object.py +++ b/src/runloop_api_client/sdk/async_storage_object.py @@ -158,6 +158,17 @@ async def upload_content(self, content: str | bytes) -> None: response = await self._client._client.put(url, content=content) response.raise_for_status() + def as_build_context(self) -> dict[str, str]: + """Return this object in the shape expected for a Blueprint build context. + + The returned dict can be passed directly to ``build_context`` or + ``named_build_contexts`` when creating a blueprint. + """ + return { + "object_id": self._id, + "type": "object", + } + def _ensure_upload_url(self) -> str: """Return the upload URL, ensuring it exists. diff --git a/src/runloop_api_client/sdk/storage_object.py b/src/runloop_api_client/sdk/storage_object.py index 8b6ec341f..12bad7db6 100644 --- a/src/runloop_api_client/sdk/storage_object.py +++ b/src/runloop_api_client/sdk/storage_object.py @@ -158,6 +158,17 @@ def upload_content(self, content: str | bytes) -> None: response = self._client._client.put(url, content=content) response.raise_for_status() + def as_build_context(self) -> dict[str, str]: + """Return this object in the shape expected for a Blueprint build context. + + The returned dict can be passed directly to ``build_context`` or + ``named_build_contexts`` when creating a blueprint. + """ + return { + "object_id": self._id, + "type": "object", + } + def _ensure_upload_url(self) -> str: """Return the upload URL, ensuring it is present. diff --git a/tests/sdk/test_async_clients.py b/tests/sdk/test_async_clients.py index 6fa1dd9fb..ce12bd92a 100644 --- a/tests/sdk/test_async_clients.py +++ b/tests/sdk/test_async_clients.py @@ -327,6 +327,15 @@ async def test_upload_from_file_missing_path(self, mock_async_client: AsyncMock, with pytest.raises(OSError, match="Failed to read file"): await client.upload_from_file(missing_file) + def test_as_build_context(self, mock_async_client: AsyncMock, object_view: MockObjectView) -> None: + """as_build_context should return the correct dict shape.""" + obj = AsyncStorageObject(mock_async_client, object_view.id, upload_url=None) + + assert obj.as_build_context() == { + "object_id": object_view.id, + "type": "object", + } + class TestAsyncRunloopSDK: """Tests for AsyncRunloopSDK class.""" diff --git a/tests/sdk/test_clients.py b/tests/sdk/test_clients.py index 246715008..8d11d61ec 100644 --- a/tests/sdk/test_clients.py +++ b/tests/sdk/test_clients.py @@ -306,6 +306,15 @@ def test_upload_from_file_missing_path(self, mock_client: Mock, tmp_path: Path) with pytest.raises(OSError, match="Failed to read file"): client.upload_from_file(missing_file) + def test_as_build_context(self, mock_client: Mock, object_view: MockObjectView) -> None: + """as_build_context should return the correct dict shape.""" + obj = StorageObject(mock_client, object_view.id, upload_url=None) + + assert obj.as_build_context() == { + "object_id": object_view.id, + "type": "object", + } + class TestRunloopSDK: """Tests for RunloopSDK class.""" From dcbaad0af141b1c19b062601089a070203196880 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Thu, 20 Nov 2025 17:23:44 -0800 Subject: [PATCH 05/16] made ctxt example a bit better --- README-SDK.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README-SDK.md b/README-SDK.md index bc76290dc..0a0dd7b5d 100644 --- a/README-SDK.md +++ b/README-SDK.md @@ -450,6 +450,8 @@ blueprint_with_context = runloop.blueprint.create( dockerfile=\"\"\"\ FROM ubuntu:22.04 WORKDIR /app +# copy using the build context from the object +COPY app /app # use the named context RUN --mount=type=bind,from=shared,source=/,target=/shared ls -R /shared \"\"\", From e19c7758f2178156a204b3c6c8e2402b55ac2220 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Thu, 20 Nov 2025 17:26:43 -0800 Subject: [PATCH 06/16] made this example better --- README-SDK.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/README-SDK.md b/README-SDK.md index 0a0dd7b5d..e7879a2ab 100644 --- a/README-SDK.md +++ b/README-SDK.md @@ -448,12 +448,18 @@ shared_ctx_obj = runloop.storage_object.upload_from_bytes( blueprint_with_context = runloop.blueprint.create( name="my-blueprint-with-context", dockerfile=\"\"\"\ -FROM ubuntu:22.04 -WORKDIR /app +FROM node:22 +WORKDIR /usr/src/app + # copy using the build context from the object -COPY app /app -# use the named context -RUN --mount=type=bind,from=shared,source=/,target=/shared ls -R /shared +COPY package.json package.json +COPY src src + +# copy from named context +COPY --from=shared / ./libs + +RUN npm install --only=production +CMD ["node", "src/app.js"] \"\"\", # Primary build context build_context=build_ctx_obj.as_build_context(), From 2f62feec802c94da968e39ba8bb0958e81c9bebf Mon Sep 17 00:00:00 2001 From: James Chainey Date: Mon, 24 Nov 2025 17:08:51 -0800 Subject: [PATCH 07/16] refactored ignore logic to follow moby (docker project) pattern matching logic and tests --- src/runloop_api_client/lib/_ignore.py | 283 ++++++++++++++++ src/runloop_api_client/lib/context_loader.py | 335 +++---------------- src/runloop_api_client/sdk/_build_context.py | 19 +- src/runloop_api_client/sdk/async_.py | 21 +- src/runloop_api_client/sdk/sync.py | 22 +- tests/sdk/test_async_clients.py | 71 ++++ tests/sdk/test_clients.py | 113 ++++++- tests/test_utils/test_context_loader.py | 157 ++++++--- 8 files changed, 652 insertions(+), 369 deletions(-) create mode 100644 src/runloop_api_client/lib/_ignore.py diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py new file mode 100644 index 000000000..2c45f0ead --- /dev/null +++ b/src/runloop_api_client/lib/_ignore.py @@ -0,0 +1,283 @@ +from __future__ import annotations + +import os +from typing import Iterable, Optional, Sequence +from pathlib import Path, PurePosixPath +from dataclasses import dataclass + +__all__ = [ + "IgnorePattern", + "read_ignorefile", + "compile_ignore", + "path_match", + "is_ignored", +] + + +@dataclass(frozen=True) +class IgnorePattern: + """Single parsed ignore pattern. + + Follows Docker-style .dockerignore semantics, supports other ignore use cases following same approach. + + Details: + - ``pattern``: The normalized pattern text with leading/trailing ``/`` removed. + Always uses POSIX ``'/'`` separators. + - ``negated``: True if this is a negation pattern starting with ``!``. + - ``directory_only``: True if the original pattern ended with ``/`` and should + apply only to directories and their descendants. + - ``anchored``: True if the pattern contains a path separator and should be + matched relative to the root path rather than at any depth. + """ + + pattern: str + negated: bool + directory_only: bool + anchored: bool + + +def _normalize_pattern_line(raw: bytes, *, is_first_line: bool) -> Optional[str]: + """Normalize a single ignorefile line, mirroring moby's ignorefile.ReadAll. + + Behavior is based on: + https://github.com/moby/patternmatcher/blob/main/ignorefile/ignorefile.go + """ + + # Strip UTF-8 BOM from the first line if present + if is_first_line and raw.startswith(b"\xef\xbb\xbf"): + raw = raw[len(b"\xef\xbb\xbf") :] + + # Decode as UTF-8; we are strict here to surface bad encodings + text = raw.decode("utf-8", errors="strict") + text = text.rstrip("\r\n") + + # Lines starting with '#' are comments and are ignored before processing, + # i.e. we do *not* treat leading spaces as part of the comment detection. + if text.startswith("#"): + return None + + # Trim leading and trailing whitespace + pattern = text.strip() + if not pattern: + return None + + # Normalize absolute paths to paths relative to the context (taking care of '!' prefix) + invert = pattern[0] == "!" + if invert: + pattern = pattern[1:].strip() + + if pattern: + # filepath.Clean equivalent + pattern = os.path.normpath(pattern) + # filepath.ToSlash equivalent + pattern = pattern.replace(os.sep, "/") + # Leading forward-slashes are removed so "/some/path" and "some/path" + # are considered equivalent. + if len(pattern) > 1 and pattern[0] == "/": + pattern = pattern[1:] + + if invert: + pattern = "!" + pattern + + return pattern + + +def read_ignorefile(path: Optional[Path]) -> list[str]: + """Read an ignore file and return a list of normalized pattern strings. + + This mirrors the behavior of moby's ``ignorefile.ReadAll``: + + - UTF-8 BOM on the first line is stripped. + - Lines starting with ``#`` are treated as comments and skipped. + - Remaining lines are trimmed, optionally negated with ``!``, cleaned, + have path separators normalized to ``/``, and leading ``/`` removed. + """ + + if path is None: + return [] + + if not path.exists(): + return [] + + patterns: list[str] = [] + with path.open("rb") as f: + first = True + for raw in f: + normalized = _normalize_pattern_line(raw, is_first_line=first) + first = False + if normalized is None: + continue + patterns.append(normalized) + + return patterns + + +def compile_ignore(patterns: Sequence[str]) -> list[IgnorePattern]: + """Compile raw pattern strings into :class:`IgnorePattern` objects.""" + + compiled: list[IgnorePattern] = [] + + for raw in patterns: + if not raw: + continue + + negated = raw[0] == "!" + pattern_text = raw[1:] if negated else raw + + if not pattern_text: + # Bare "!" is ignored, matching Docker / moby behavior. + continue + + directory_only = pattern_text.endswith("/") + if directory_only: + pattern_text = pattern_text.rstrip("/") + + if not pattern_text: + continue + + # Treat patterns containing a path separator as anchored to the root + anchored = "/" in pattern_text + + compiled.append( + IgnorePattern( + pattern=PurePosixPath(pattern_text).as_posix(), + negated=negated, + directory_only=directory_only, + anchored=anchored, + ) + ) + + return compiled + + +def _segment_match(pattern_segment: str, path_segment: str) -> bool: + """Match a single path segment against a glob pattern segment. + + Supports: + - ``*``: any sequence of characters except ``/``. + - ``?``: any single character except ``/``. + - ``[]``: character classes, excluding ``/``. + """ + + import re + + escaped = "" + i = 0 + while i < len(pattern_segment): + ch = pattern_segment[i] + if ch == "*": + escaped += "[^/]*" + elif ch == "?": + escaped += "[^/]" + elif ch == "[": + # Copy character class as-is until closing ']'. + j = i + 1 + while j < len(pattern_segment) and pattern_segment[j] != "]": + j += 1 + if j < len(pattern_segment): + escaped += pattern_segment[i : j + 1] + i = j + else: + # Unterminated '['; treat it literally. + escaped += re.escape(ch) + else: + escaped += re.escape(ch) + i += 1 + + regex = re.compile(rf"^{escaped}$") + return regex.match(path_segment) is not None + + +def _match_parts_recursive(pattern_parts: list[str], path_parts: list[str]) -> bool: + """Recursive helper implementing ``**`` segment semantics.""" + + if not pattern_parts: + return not path_parts + + if pattern_parts[0] == "**": + # '**' matches zero or more segments. + for i in range(len(path_parts) + 1): + if _match_parts_recursive(pattern_parts[1:], path_parts[i:]): + return True + return False + + if not path_parts: + return False + + if not _segment_match(pattern_parts[0], path_parts[0]): + return False + + return _match_parts_recursive(pattern_parts[1:], path_parts[1:]) + + +def path_match(pattern: IgnorePattern, relpath: str, *, is_dir: bool) -> bool: + """Return True if ``relpath`` matches a compiled ignore pattern.""" + + relpath_posix = PurePosixPath(relpath).as_posix() + path_parts = PurePosixPath(relpath_posix).parts + pattern_parts = PurePosixPath(pattern.pattern).parts + + # Directory-only patterns never directly match files here; the effect on + # descendants is enforced by directory pruning in the traversal. + if pattern.directory_only and not is_dir: + return False + + if pattern.anchored: + return _match_parts_recursive(list(pattern_parts), list(path_parts)) + + for start in range(len(path_parts)): + if _match_parts_recursive(list(pattern_parts), list(path_parts[start:])): + return True + return False + + +def is_ignored(relpath: str, *, is_dir: bool, patterns: Sequence[IgnorePattern]) -> bool: + """Apply ignore patterns with 'last match wins' semantics. + + Examples:: + + *.log + !important.log + + excludes all ``.log`` files except ``important.log``. Patterns are applied + in order, and the last matching pattern determines inclusion. + """ + + included = True # include by default + for pat in patterns: + if path_match(pat, relpath, is_dir=is_dir): + included = pat.negated + return not included + + +def iter_included_files( + root: Path, + *, + patterns: Sequence[IgnorePattern], +) -> Iterable[Path]: + """Yield all files under ``root`` that are not ignored. + + This performs directory pruning so that ignored directories are never + traversed, mirroring Docker's behavior for .dockerignore. + """ + + if not root.is_dir(): + raise ValueError(f"root must be a directory, got: {root}") + + for dirpath, dirs, files in os.walk(root): + dir_path = Path(dirpath) + + # Prune ignored directories + for name in list(dirs): + subdir = dir_path / name + rel_dir = subdir.relative_to(root).as_posix() + if is_ignored(rel_dir, is_dir=True, patterns=patterns): + dirs.remove(name) + + # Yield non-ignored files + for name in files: + file_path = dir_path / name + rel_file = file_path.relative_to(root).as_posix() + if is_ignored(rel_file, is_dir=False, patterns=patterns): + continue + yield file_path diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py index b96e46247..4b6be4451 100644 --- a/src/runloop_api_client/lib/context_loader.py +++ b/src/runloop_api_client/lib/context_loader.py @@ -1,324 +1,69 @@ +from __future__ import annotations + import io -import os import tarfile -from typing import Iterable, Optional -from pathlib import Path, PurePosixPath -from dataclasses import dataclass +from typing import Iterable, Optional, Sequence +from pathlib import Path -## This file has helper methods to get a docker context tarball from a given context root. -## -## It includes a small, self-contained implementation of Docker's `.dockerignore` -## semantics. The goal is to be very close to Docker's behavior without depending -## on Docker's own Python libraries. +from ._ignore import IgnorePattern, compile_ignore, read_ignorefile, iter_included_files def build_docker_context_tar( context_root: Path, *, - dockerignore: Optional[Path] = None, + ignore: Optional[Sequence[str] | Path | str] = None, ) -> bytes: - """Create a .tar.gz of the Docker build context, respecting .dockerignore for use with object store. - - Generally you shouldn't need to pass in .dockerignore directly; just let the function find it for you. - - Treats ``context_root`` as the Docker build context root. - """ - context_root = context_root.resolve() - - # Resolve dockerignore path according to the requested behavior - if dockerignore is not None: - dockerignore_path = dockerignore.resolve() - else: - dockerignore_path = context_root / ".dockerignore" - - buf = io.BytesIO() - - with tarfile.open(mode="w:gz", fileobj=buf) as tf: - for path in _iter_build_context_files( - context_root, - dockerignore_path=dockerignore_path if dockerignore_path.exists() else None, - ): - rel = path.relative_to(context_root) - tf.add(path, arcname=rel.as_posix()) + """Create a .tar.gz of the build context, honoring ignore patterns. - return buf.getvalue() + - Treats ``context_root`` as the build context root. + - Always loads ``.dockerignore`` under ``context_root`` if present. + - An optional ``ignore`` argument may be provided: + * If a :class:`pathlib.Path` or string is given, it is treated as an + additional ignorefile path whose patterns are appended after + ``.dockerignore``. + * If a sequence of strings is given, they are treated as inline patterns + appended after any file-derived patterns. -@dataclass(frozen=True) -class _DockerignorePattern: - """Single parsed .dockerignore pattern. - - Attributes: - pattern: The normalized pattern text with escapes processed and leading - '/' / trailing '/' removed. Always uses POSIX '/' separators. - negated: True if this is a negation pattern starting with '!'. - directory_only: True if the original pattern ended with '/' and should - apply only to directories and their descendants. - anchored: True if the original pattern started with '/' and is anchored - to the context root. Non-anchored patterns may match at any depth. + Patterns use Docker-style semantics with ``!`` negation and ``**`` support. """ - pattern: str - negated: bool - directory_only: bool - anchored: bool - - -def _unescape_pattern(text: str) -> str: - """Unescape backslash-escaped characters in a pattern string.""" - result: list[str] = [] - i = 0 - while i < len(text): - if text[i] == "\\" and i + 1 < len(text): - # Backslash escapes the next character - result.append(text[i + 1]) - i += 2 - else: - result.append(text[i]) - i += 1 - return "".join(result) - - -def _find_first_non_space(text: str) -> int: - """Find the index of the first non-space character, skipping escaped spaces.""" - i = 0 - while i < len(text) and text[i] == " ": - i += 1 - - # Check if we stopped at an escaped space - if i < len(text) and text[i] == "\\" and i + 1 < len(text) and text[i + 1] == " ": - return i - - return i # Either first non-space char or len(text) - - -def _strip_trailing_whitespace(text: str) -> str: - """Strip trailing whitespace unless it's escaped.""" - while text and text[-1] in " \t": - if len(text) >= 2 and text[-2] == "\\": - # Trailing space is escaped, keep it - break - text = text[:-1] - return text - - -def _load_dockerignore_patterns( - dockerignore_path: Optional[Path], -) -> list[_DockerignorePattern]: - """Parse .dockerignore into a list of structured patterns. - - Parsing rules mirror Docker's behavior as described in the official docs: - - - Empty lines are ignored. - - Lines starting with unescaped ``#`` (after optional leading spaces) are - treated as comments and ignored. - - A leading ``!`` (after optional leading spaces) negates the pattern. - A leading ``\\!`` is treated as a literal ``!``. - - A leading ``#`` can be escaped as ``\\#`` to be treated as part of the - pattern. - - Leading and trailing spaces are significant if escaped with a backslash. - """ - if dockerignore_path is None or not dockerignore_path.exists(): - return [] - - patterns: list[_DockerignorePattern] = [] - for line in dockerignore_path.read_text(encoding="utf-8").splitlines(): - # Strip trailing newline/carriage return, but preserve escaped trailing spaces - line = line.rstrip("\n\r") - if not line: - continue - - # Find first non-space character (handles escaped spaces) - start_idx = _find_first_non_space(line) - if start_idx >= len(line): - # Line is entirely whitespace - continue - - # Check for comment: first non-space char is unescaped '#' - first_char = line[start_idx] - if first_char == "#" and (start_idx == 0 or line[start_idx - 1] != "\\"): - continue - - # Check for negation: first non-space char is unescaped '!' - negated = False - if first_char == "!" and (start_idx == 0 or line[start_idx - 1] != "\\"): - negated = True - start_idx += 1 - if start_idx >= len(line): - # Bare "!" after optional spaces is ignored - continue - - # Extract pattern part (everything after negation marker) - pattern_raw = line[start_idx:] - pattern_raw = _strip_trailing_whitespace(pattern_raw) - pattern = _unescape_pattern(pattern_raw) - - if not pattern: - # Nothing meaningful left after processing - continue - - # Extract anchored and directory-only flags - anchored = pattern.startswith("/") - if anchored: - pattern = pattern.lstrip("/") - - directory_only = pattern.endswith("/") - if directory_only: - pattern = pattern.rstrip("/") - - if not pattern: - # A line that is effectively "/" or similar after processing - continue - - patterns.append( - _DockerignorePattern( - pattern=PurePosixPath(pattern).as_posix(), - negated=negated, - directory_only=directory_only, - anchored=anchored, - ) - ) - - return patterns + context_root = context_root.resolve() + all_patterns: list[str] = [] -def _segment_match(pattern_segment: str, path_segment: str) -> bool: - """Match a single path segment against a glob pattern segment. + # 1) Always consider .dockerignore under the context root, if present. + default_ignorefile = context_root / ".dockerignore" + all_patterns.extend(read_ignorefile(default_ignorefile)) - Supports: - - ``*``: any sequence of characters except ``/``. - - ``?``: any single character except ``/``. - - ``[]``: character classes, excluding ``/``. - """ - import re - - escaped = "" - i = 0 - while i < len(pattern_segment): - ch = pattern_segment[i] - if ch == "*": - escaped += "[^/]*" - elif ch == "?": - escaped += "[^/]" - elif ch == "[": - # Copy character class as-is until closing ']'. - j = i + 1 - while j < len(pattern_segment) and pattern_segment[j] != "]": - j += 1 - if j < len(pattern_segment): - escaped += pattern_segment[i : j + 1] - i = j - else: - # Unterminated '['; treat it literally. - escaped += re.escape(ch) + # 2) Optional additional ignore source + if ignore is not None: + if isinstance(ignore, (str, Path)): + ignore_path = Path(ignore) + if not ignore_path.exists(): + raise FileNotFoundError(f"Ignore file does not exist: {ignore_path}") + all_patterns.extend(read_ignorefile(ignore_path)) else: - escaped += re.escape(ch) - i += 1 - - regex = re.compile(rf"^{escaped}$") - return regex.match(path_segment) is not None - - -def _match_parts_recursive(pattern_parts: list[str], path_parts: list[str]) -> bool: - """Recursive helper implementing ``**`` segment semantics.""" - - if not pattern_parts: - return not path_parts - - if pattern_parts[0] == "**": - # '**' matches zero or more segments. - for i in range(len(path_parts) + 1): - if _match_parts_recursive(pattern_parts[1:], path_parts[i:]): - return True - return False - - if not path_parts: - return False - - if not _segment_match(pattern_parts[0], path_parts[0]): - return False - - return _match_parts_recursive(pattern_parts[1:], path_parts[1:]) - - -def _path_match(pattern: _DockerignorePattern, relpath: str, is_dir: bool) -> bool: - """Return True if ``relpath`` matches a parsed .dockerignore pattern.""" - - relpath_posix = PurePosixPath(relpath).as_posix() - path_parts = PurePosixPath(relpath_posix).parts - pattern_parts = PurePosixPath(pattern.pattern).parts + # Treat as a sequence of raw patterns + all_patterns.extend(list(ignore)) - # Directory-only patterns never directly match files here; the effect on - # descendants is enforced by directory pruning in the traversal. - if pattern.directory_only and not is_dir: - return False + compiled: list[IgnorePattern] = compile_ignore(all_patterns) - # Anchored patterns must match starting at the context root; otherwise we - # allow them to match at any depth. - if pattern.anchored: - return _match_parts_recursive(list(pattern_parts), list(path_parts)) - - for start in range(len(path_parts)): - if _match_parts_recursive(list(pattern_parts), list(path_parts[start:])): - return True - return False - - -def _is_ignored(relpath: str, is_dir: bool, patterns: list[_DockerignorePattern]) -> bool: - """Apply .dockerignore patterns with 'last match wins' semantics. - - Examples:: - - *.log - !important.log + buf = io.BytesIO() - excludes all ``.log`` files except ``important.log``. Patterns are applied - in order, and the last matching pattern determines inclusion. - """ + with tarfile.open(mode="w:gz", fileobj=buf) as tf: + for path in _iter_build_context_files(context_root, patterns=compiled): + rel = path.relative_to(context_root) + tf.add(path, arcname=rel.as_posix()) - included = True # include by default - for pat in patterns: - if _path_match(pat, relpath, is_dir=is_dir): - included = pat.negated - return not included + return buf.getvalue() def _iter_build_context_files( context_root: Path, *, - dockerignore_path: Optional[Path] = None, + patterns: Sequence[IgnorePattern], ) -> Iterable[Path]: - """Yield files to include in the build context, honoring .dockerignore. - - This hand-rolls .dockerignore parsing and matching instead of relying on the - Docker SDK to avoid pulling in the docker Python dependency. - It approximates Docker's behavior. - """ - if not context_root.is_dir(): - raise ValueError(f"context_root must be a directory, got: {context_root}") - - if dockerignore_path is None: - candidate = context_root / ".dockerignore" - dockerignore_path = candidate if candidate.exists() else None - - patterns = _load_dockerignore_patterns(dockerignore_path) - - # Walk the tree with directory pruning. We mirror Docker's behavior where - # excluded parent directories prevent re-including children, even with - # negation patterns. - for root, dirs, files in os.walk(context_root): - root_path = Path(root) - - # Prune ignored directories in-place so their contents are never visited. - for name in list(dirs): - dir_path = root_path / name - rel_dir = dir_path.relative_to(context_root).as_posix() - if _is_ignored(rel_dir, is_dir=True, patterns=patterns): - dirs.remove(name) + """Yield files to include in the build context, honoring ignore patterns.""" - # Yield non-ignored files. - for name in files: - file_path = root_path / name - rel_file = file_path.relative_to(context_root).as_posix() - if _is_ignored(rel_file, is_dir=False, patterns=patterns): - continue - yield file_path + return iter_included_files(context_root, patterns=patterns) diff --git a/src/runloop_api_client/sdk/_build_context.py b/src/runloop_api_client/sdk/_build_context.py index bc738c950..895594e22 100644 --- a/src/runloop_api_client/sdk/_build_context.py +++ b/src/runloop_api_client/sdk/_build_context.py @@ -42,12 +42,12 @@ class BuildContextStrategy(Protocol): validation, but must return a fully materialised tarball in memory. """ - def __call__( + def __call__( # pragma: no cover - interface only self, context_root: Path, *, name: str | None = None, - dockerignore: Path | None = None, + ignore: str | Path | tuple[str, ...] | list[str] | None = None, ) -> BuildContextArtifact: """Package the given directory into a tarball. @@ -55,9 +55,11 @@ def __call__( context_root: Filesystem path to the Docker build context root. name: Optional logical name for the context; may be used to derive a filename. - dockerignore: Optional explicit path to a .dockerignore file. - When omitted, the default implementation will look for - ``.dockerignore`` under ``context_root``. + ignore: Optional ignore configuration. If a string or :class:`pathlib.Path` + is provided it is treated as the path to an additional + ignorefile. If a sequence of strings is provided, they are + interpreted as inline ignore patterns appended after patterns + loaded from ``.dockerignore`` under ``context_root``. """ @@ -65,7 +67,7 @@ def default_build_context_strategy( context_root: Path, *, name: str | None = None, - dockerignore: Path | None = None, + ignore: str | Path | tuple[str, ...] | list[str] | None = None, ) -> BuildContextArtifact: """Default implementation that wraps ``build_docker_context_tar``. @@ -73,10 +75,7 @@ def default_build_context_strategy( returned as a :class:`BuildContextArtifact` with ``content_type=\"tgz\"``. """ - tar_bytes = build_docker_context_tar( - context_root, - dockerignore=dockerignore, - ) + tar_bytes = build_docker_context_tar(context_root, ignore=ignore) if name is None: base = context_root.name or "context" diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index b32310be6..bddf3e47e 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -2,9 +2,7 @@ from __future__ import annotations -import io import asyncio -import tarfile from typing import Dict, Mapping, Optional from pathlib import Path from datetime import timedelta @@ -376,6 +374,7 @@ async def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, + ignore: str | Path | Sequence[str] | None = None, **options: Unpack[LongRequestOptions], ) -> AsyncStorageObject: """Create and upload an object from a local directory. @@ -390,21 +389,27 @@ async def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :param ignore: Optional ignore configuration. If a string or :class:`Path` + is provided it is treated as the path to an additional ignorefile. + If a sequence of strings is provided, they are interpreted as inline + ignore patterns appended after patterns loaded from + ``.dockerignore`` under ``dir_path``. + :type ignore: Optional[str | Path | Sequence[str]] + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` + for available options :return: Wrapper for the uploaded object :rtype: AsyncStorageObject :raises OSError: If the local file cannot be read """ path = Path(dir_path) + if not path.is_dir(): + raise ValueError(f"dir_path must be a directory, got: {path}") + name = name or f"{path.name}.tar.gz" ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None def synchronous_io() -> bytes: - with io.BytesIO() as tar_buffer: - with tarfile.open(fileobj=tar_buffer, mode="w:gz") as tar: - tar.add(path, arcname=".", recursive=True) - tar_buffer.seek(0) - return tar_buffer.read() + return build_docker_context_tar(path, ignore=ignore) tar_bytes = await asyncio.to_thread(synchronous_io) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 7368a0f7a..a8187701b 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -2,8 +2,6 @@ from __future__ import annotations -import io -import tarfile from typing import Dict, Mapping, Optional from pathlib import Path from datetime import timedelta @@ -375,6 +373,7 @@ def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, + ignore: str | Path | Sequence[str] | None = None, **options: Unpack[LongRequestOptions], ) -> StorageObject: """Create and upload an object from a local directory. @@ -389,22 +388,29 @@ def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options + :param ignore: Optional ignore configuration. If a string or :class:`pathlib.Path` + is provided it is treated as the path to an additional ignorefile. + If a sequence of strings is provided, they are interpreted as inline + ignore patterns appended after patterns loaded from + ``.dockerignore`` under ``dir_path``. + :type ignore: Optional[str | Path | Sequence[str]] + :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` + for available options :return: Wrapper for the uploaded object :rtype: StorageObject :raises OSError: If the local file cannot be read """ path = Path(dir_path) + if not path.is_dir(): + raise ValueError(f"dir_path must be a directory, got: {path}") + name = name or f"{path.name}.tar.gz" ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None - tar_buffer = io.BytesIO() - with tarfile.open(fileobj=tar_buffer, mode="w:gz") as tar: - tar.add(path, arcname=".", recursive=True) - tar_buffer.seek(0) + tar_bytes = build_docker_context_tar(path, ignore=ignore) obj = self.create(name=name, content_type="tgz", metadata=metadata, ttl_ms=ttl_ms, **options) - obj.upload_content(tar_buffer) + obj.upload_content(tar_bytes) obj.complete() return obj diff --git a/tests/sdk/test_async_clients.py b/tests/sdk/test_async_clients.py index 15243c987..482fbe3fd 100644 --- a/tests/sdk/test_async_clients.py +++ b/tests/sdk/test_async_clients.py @@ -402,6 +402,77 @@ async def test_upload_from_dir( mock_async_client.objects.complete.assert_awaited_once() + @pytest.mark.asyncio + async def test_upload_from_dir_respects_dockerignore( + self, mock_async_client: AsyncMock, object_view: MockObjectView, tmp_path: Path + ) -> None: + """upload_from_dir should respect .dockerignore patterns by default.""" + mock_async_client.objects.create = AsyncMock(return_value=object_view) + mock_async_client.objects.complete = AsyncMock(return_value=object_view) + + test_dir = tmp_path / "ctx" + test_dir.mkdir() + (test_dir / "keep.txt").write_text("keep", encoding="utf-8") + (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") + build_dir = test_dir / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + dockerignore = test_dir / ".dockerignore" + dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") + + http_client = AsyncMock() + mock_response = create_mock_httpx_response() + http_client.put = AsyncMock(return_value=mock_response) + mock_async_client._client = http_client + + client = AsyncStorageObjectOps(mock_async_client) + obj = await client.upload_from_dir(test_dir) + + assert isinstance(obj, AsyncStorageObject) + uploaded_content = http_client.put.call_args[1]["content"] + + with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: + names = {m.name for m in tar.getmembers()} + + assert "keep.txt" in names + assert "ignore.log" not in names + assert not any(name.startswith("build/") for name in names) + + @pytest.mark.asyncio + async def test_upload_from_dir_with_inline_ignore_patterns( + self, mock_async_client: AsyncMock, object_view: MockObjectView, tmp_path: Path + ) -> None: + """upload_from_dir should respect inline ignore patterns.""" + mock_async_client.objects.create = AsyncMock(return_value=object_view) + mock_async_client.objects.complete = AsyncMock(return_value=object_view) + + test_dir = tmp_path / "ctx" + test_dir.mkdir() + (test_dir / "keep.txt").write_text("keep", encoding="utf-8") + (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") + build_dir = test_dir / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + http_client = AsyncMock() + mock_response = create_mock_httpx_response() + http_client.put = AsyncMock(return_value=mock_response) + mock_async_client._client = http_client + + client = AsyncStorageObjectOps(mock_async_client) + obj = await client.upload_from_dir(test_dir, ignore=["*.log", "build/"]) + + assert isinstance(obj, AsyncStorageObject) + uploaded_content = http_client.put.call_args[1]["content"] + + with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: + names = {m.name for m in tar.getmembers()} + + assert "keep.txt" in names + assert "ignore.log" not in names + assert not any(name.startswith("build/") for name in names) + @pytest.mark.asyncio async def test_upload_from_dir_default_name( self, mock_async_client: AsyncMock, object_view: MockObjectView, tmp_path: Path diff --git a/tests/sdk/test_clients.py b/tests/sdk/test_clients.py index 22081501d..e6349c24b 100644 --- a/tests/sdk/test_clients.py +++ b/tests/sdk/test_clients.py @@ -2,6 +2,8 @@ from __future__ import annotations +import io +import tarfile from types import SimpleNamespace from pathlib import Path from unittest.mock import Mock @@ -366,9 +368,9 @@ def test_upload_from_dir(self, mock_client: Mock, object_view: MockObjectView, t http_client.put.assert_called_once() call_args = http_client.put.call_args assert call_args[0][0] == object_view.upload_url - # Verify it's a BytesIO object uploaded_content = call_args[1]["content"] - assert hasattr(uploaded_content, "read") + # Verify it is bytes representing a gzipped tar archive + assert isinstance(uploaded_content, (bytes, bytearray)) mock_client.objects.complete.assert_called_once() def test_upload_from_dir_default_name(self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path) -> None: @@ -480,6 +482,113 @@ def test_upload_from_dir_with_string_path( http_client.put.assert_called_once() mock_client.objects.complete.assert_called_once() + def test_upload_from_dir_respects_dockerignore( + self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path + ) -> None: + """upload_from_dir should respect .dockerignore patterns by default.""" + mock_client.objects.create.return_value = object_view + + test_dir = tmp_path / "ctx" + test_dir.mkdir() + (test_dir / "keep.txt").write_text("keep", encoding="utf-8") + (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") + build_dir = test_dir / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + dockerignore = test_dir / ".dockerignore" + dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") + + http_client = Mock() + mock_response = create_mock_httpx_response() + http_client.put.return_value = mock_response + mock_client._client = http_client + + client = StorageObjectOps(mock_client) + obj = client.upload_from_dir(test_dir) + + assert isinstance(obj, StorageObject) + http_client.put.assert_called_once() + uploaded_content = http_client.put.call_args[1]["content"] + + with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: + names = {m.name for m in tar.getmembers()} + + assert "keep.txt" in names + assert "ignore.log" not in names + assert not any(name.startswith("build/") for name in names) + + def test_upload_from_dir_with_extra_ignore_file( + self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path + ) -> None: + """upload_from_dir should merge .dockerignore and an extra ignore file.""" + mock_client.objects.create.return_value = object_view + + test_dir = tmp_path / "ctx" + test_dir.mkdir() + (test_dir / "keep.txt").write_text("keep", encoding="utf-8") + (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") + build_dir = test_dir / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + # Only ignore logs in .dockerignore + dockerignore = test_dir / ".dockerignore" + dockerignore.write_text("*.log\n", encoding="utf-8") + + extra_ignore = tmp_path / "extra.ignore" + extra_ignore.write_text("build/\n", encoding="utf-8") + + http_client = Mock() + mock_response = create_mock_httpx_response() + http_client.put.return_value = mock_response + mock_client._client = http_client + + client = StorageObjectOps(mock_client) + obj = client.upload_from_dir(test_dir, ignore=extra_ignore) + + assert isinstance(obj, StorageObject) + uploaded_content = http_client.put.call_args[1]["content"] + + with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: + names = {m.name for m in tar.getmembers()} + + assert "keep.txt" in names + assert "ignore.log" not in names + assert not any(name.startswith("build/") for name in names) + + def test_upload_from_dir_with_inline_ignore_patterns( + self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path + ) -> None: + """upload_from_dir should respect inline ignore patterns.""" + mock_client.objects.create.return_value = object_view + + test_dir = tmp_path / "ctx" + test_dir.mkdir() + (test_dir / "keep.txt").write_text("keep", encoding="utf-8") + (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") + build_dir = test_dir / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + http_client = Mock() + mock_response = create_mock_httpx_response() + http_client.put.return_value = mock_response + mock_client._client = http_client + + client = StorageObjectOps(mock_client) + obj = client.upload_from_dir(test_dir, ignore=["*.log", "build/"]) + + assert isinstance(obj, StorageObject) + uploaded_content = http_client.put.call_args[1]["content"] + + with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: + names = {m.name for m in tar.getmembers()} + + assert "keep.txt" in names + assert "ignore.log" not in names + assert not any(name.startswith("build/") for name in names) + class TestRunloopSDK: """Tests for RunloopSDK class.""" diff --git a/tests/test_utils/test_context_loader.py b/tests/test_utils/test_context_loader.py index 1dbc828d2..3f1d0ed07 100644 --- a/tests/test_utils/test_context_loader.py +++ b/tests/test_utils/test_context_loader.py @@ -1,67 +1,56 @@ from pathlib import Path -from runloop_api_client.lib.context_loader import ( - _is_ignored, - _path_match, - _segment_match, - _DockerignorePattern, - _iter_build_context_files, - _load_dockerignore_patterns, +from runloop_api_client.lib._ignore import ( + IgnorePattern, + is_ignored, + path_match, + compile_ignore, + read_ignorefile, ) +from runloop_api_client.lib.context_loader import _iter_build_context_files def test_segment_match_basic_globs(): - assert _segment_match("*.log", "app.log") - assert not _segment_match("*.log", "app.txt") - assert _segment_match("foo?", "fooa") - assert not _segment_match("foo?", "fooba") - assert _segment_match("[ab].txt", "a.txt") - assert not _segment_match("[ab].txt", "c.txt") + patterns = compile_ignore(["*.log", "foo?", "[ab].txt"]) + pat_glob, pat_q, pat_class = patterns + + assert path_match(pat_glob, "app.log", is_dir=False) + assert not path_match(pat_glob, "app.txt", is_dir=False) + assert path_match(pat_q, "fooa", is_dir=False) + assert not path_match(pat_q, "fooba", is_dir=False) + assert path_match(pat_class, "a.txt", is_dir=False) + assert not path_match(pat_class, "c.txt", is_dir=False) def test_path_match_anchored_and_unanchored(): - pat = _DockerignorePattern(pattern="foo/bar.txt", negated=False, directory_only=False, anchored=True) - assert _path_match(pat, "foo/bar.txt", is_dir=False) - assert not _path_match(pat, "a/foo/bar.txt", is_dir=False) + pat = IgnorePattern(pattern="foo/bar.txt", negated=False, directory_only=False, anchored=True) + assert path_match(pat, "foo/bar.txt", is_dir=False) + assert not path_match(pat, "a/foo/bar.txt", is_dir=False) - pat_unanchored = _DockerignorePattern(pattern="foo/bar.txt", negated=False, directory_only=False, anchored=False) - assert _path_match(pat_unanchored, "a/foo/bar.txt", is_dir=False) - assert _path_match(pat_unanchored, "foo/bar.txt", is_dir=False) + pat_unanchored = IgnorePattern(pattern="foo/bar.txt", negated=False, directory_only=False, anchored=False) + assert path_match(pat_unanchored, "a/foo/bar.txt", is_dir=False) + assert path_match(pat_unanchored, "foo/bar.txt", is_dir=False) def test_path_match_double_star(): - pat = _DockerignorePattern(pattern="**/*.log", negated=False, directory_only=False, anchored=False) - assert _path_match(pat, "app.log", is_dir=False) - assert _path_match(pat, "a/b/app.log", is_dir=False) - assert not _path_match(pat, "a/b/app.txt", is_dir=False) + pat = IgnorePattern(pattern="**/*.log", negated=False, directory_only=False, anchored=False) + assert path_match(pat, "app.log", is_dir=False) + assert path_match(pat, "a/b/app.log", is_dir=False) + assert not path_match(pat, "a/b/app.txt", is_dir=False) def test_is_ignored_last_match_wins(): - patterns = [ - _DockerignorePattern(pattern="*.log", negated=False, directory_only=False, anchored=False), - _DockerignorePattern(pattern="keep.log", negated=True, directory_only=False, anchored=False), - ] - assert _is_ignored("foo.log", is_dir=False, patterns=patterns) - assert not _is_ignored("keep.log", is_dir=False, patterns=patterns) + patterns = compile_ignore(["*.log", "!keep.log"]) + assert is_ignored("foo.log", is_dir=False, patterns=patterns) + assert not is_ignored("keep.log", is_dir=False, patterns=patterns) -def test_load_dockerignore_patterns_basic(tmp_path: Path): +def test_read_ignorefile_basic(tmp_path: Path): dockerignore = tmp_path / ".dockerignore" - dockerignore.write_text( - "\n# comment\n *.log \n!keep.log\n\\#literal\n\\!literal\n", - encoding="utf-8", - ) - - patterns = _load_dockerignore_patterns(dockerignore) - assert len(patterns) == 4 - assert patterns[0].pattern == "*.log" - assert not patterns[0].negated - assert patterns[1].pattern == "keep.log" - assert patterns[1].negated - assert patterns[2].pattern == "#literal" - assert not patterns[2].negated - assert patterns[3].pattern == "!literal" - assert not patterns[3].negated + dockerignore.write_bytes(b"\xef\xbb\xbf# comment line\n*.log \n!keep.log\nbuild/\n") + + patterns = read_ignorefile(dockerignore) + assert patterns == ["*.log", "!keep.log", "build"] def test_iter_build_context_files_respects_dockerignore(tmp_path: Path): @@ -79,7 +68,83 @@ def test_iter_build_context_files_respects_dockerignore(tmp_path: Path): dockerignore = root / ".dockerignore" dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") - files = {p.relative_to(root).as_posix() for p in _iter_build_context_files(root, dockerignore_path=dockerignore)} + compiled = compile_ignore(read_ignorefile(dockerignore)) + files = {p.relative_to(root).as_posix() for p in _iter_build_context_files(root, patterns=compiled)} assert "foo.txt" in files assert "app.log" not in files assert "build/ignored.txt" not in files + + +def test_is_ignored_folder_exclusion_cannot_be_reincluded() -> None: + """Folder exclusion followed by file inclusion should still exclude. + + Mirrors Docker behavior exercised in moby's TestPatternMatchesFolderExclusions + style tests: a directory excluded by a pattern like ``docs`` cannot have + children re-included by a later ``!docs/README.md`` pattern. + """ + + patterns = compile_ignore(["docs", "!docs/README.md"]) + # The file under docs remains ignored overall. + assert is_ignored("docs/README.md", is_dir=False, patterns=patterns) + + +def test_compile_ignore_directory_only_and_files() -> None: + patterns = compile_ignore(["build/", "*.log"]) + + build_pat, log_pat = patterns + assert build_pat.directory_only + assert not log_pat.directory_only + + # Directory-only pattern does not directly match files at that path. + assert not path_match(build_pat, "build", is_dir=False) + assert path_match(build_pat, "build", is_dir=True) + + # But files under the directory are ignored via ancestor directory match. + assert is_ignored("build/output.bin", is_dir=False, patterns=patterns) + # Log files are ignored everywhere. + assert is_ignored("app.log", is_dir=False, patterns=patterns) + assert is_ignored("subdir/app.log", is_dir=False, patterns=patterns) + + +def test_double_star_matching_variants() -> None: + patterns = compile_ignore(["**", "dir/**", "**/file", "**/*.txt"]) + + any_pat, dir_pat, file_pat, txt_pat = patterns + + # '**' matches everything. + assert path_match(any_pat, "file", is_dir=False) + assert path_match(any_pat, "dir/file", is_dir=False) + + # 'dir/**' matches anything under dir. + assert path_match(dir_pat, "dir/file", is_dir=False) + assert path_match(dir_pat, "dir/sub/file", is_dir=False) + assert not path_match(dir_pat, "other/file", is_dir=False) + + # '**/file' matches at any depth. + assert path_match(file_pat, "file", is_dir=False) + assert path_match(file_pat, "dir/file", is_dir=False) + assert path_match(file_pat, "a/b/file", is_dir=False) + + # '**/*.txt' matches text files at any depth. + assert path_match(txt_pat, "file.txt", is_dir=False) + assert path_match(txt_pat, "dir/file.txt", is_dir=False) + assert not path_match(txt_pat, "dir/file.log", is_dir=False) + + +def test_iter_build_context_files_respects_directory_pruning(tmp_path: Path) -> None: + """Directories excluded by patterns are not traversed, even with negation.""" + + root = tmp_path + docs = root / "docs" + docs.mkdir() + (docs / "README.md").write_text("keep?", encoding="utf-8") + + ignorefile = root / ".dockerignore" + # Attempt to re-include a file under an ignored directory. + ignorefile.write_text("docs/\n!docs/README.md\n", encoding="utf-8") + + compiled = compile_ignore(read_ignorefile(ignorefile)) + files = {p.relative_to(root).as_posix() for p in _iter_build_context_files(root, patterns=compiled)} + + # README.md should not appear because the parent directory is pruned. + assert "docs/README.md" not in files From 81dba83752e79900a094c8b728a2829307b28642 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Mon, 24 Nov 2025 17:13:11 -0800 Subject: [PATCH 08/16] fixed some tests and broken imports --- src/runloop_api_client/sdk/async_.py | 1 + src/runloop_api_client/sdk/sync.py | 1 + tests/test_utils/test_context_loader.py | 23 ++++++++++++----------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index bddf3e47e..9a2415ced 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -27,6 +27,7 @@ from .async_devbox import AsyncDevbox from .async_snapshot import AsyncSnapshot from .async_blueprint import AsyncBlueprint +from ..lib.context_loader import build_docker_context_tar from .async_storage_object import AsyncStorageObject from ..types.object_create_params import ContentType diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index a8187701b..3bf0b01f6 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -27,6 +27,7 @@ from .snapshot import Snapshot from .blueprint import Blueprint from .storage_object import StorageObject +from ..lib.context_loader import build_docker_context_tar from ..types.object_create_params import ContentType diff --git a/tests/test_utils/test_context_loader.py b/tests/test_utils/test_context_loader.py index 3f1d0ed07..8a833324e 100644 --- a/tests/test_utils/test_context_loader.py +++ b/tests/test_utils/test_context_loader.py @@ -75,17 +75,16 @@ def test_iter_build_context_files_respects_dockerignore(tmp_path: Path): assert "build/ignored.txt" not in files -def test_is_ignored_folder_exclusion_cannot_be_reincluded() -> None: - """Folder exclusion followed by file inclusion should still exclude. - - Mirrors Docker behavior exercised in moby's TestPatternMatchesFolderExclusions - style tests: a directory excluded by a pattern like ``docs`` cannot have - children re-included by a later ``!docs/README.md`` pattern. - """ +def test_is_ignored_directory_pattern_affects_directory_entry_only() -> None: + """Directory patterns apply directly to directory entries, not to children.""" patterns = compile_ignore(["docs", "!docs/README.md"]) - # The file under docs remains ignored overall. - assert is_ignored("docs/README.md", is_dir=False, patterns=patterns) + + # The directory itself is ignored. + assert is_ignored("docs", is_dir=True, patterns=patterns) + # The child file is not ignored by the pattern set alone; directory pruning + # in ``iter_included_files`` is responsible for excluding its contents. + assert not is_ignored("docs/README.md", is_dir=False, patterns=patterns) def test_compile_ignore_directory_only_and_files() -> None: @@ -99,8 +98,10 @@ def test_compile_ignore_directory_only_and_files() -> None: assert not path_match(build_pat, "build", is_dir=False) assert path_match(build_pat, "build", is_dir=True) - # But files under the directory are ignored via ancestor directory match. - assert is_ignored("build/output.bin", is_dir=False, patterns=patterns) + # Files under the directory are not ignored purely by the directory-only + # pattern; directory pruning in ``iter_included_files`` is responsible + # for skipping their traversal. + assert not is_ignored("build/output.bin", is_dir=False, patterns=patterns) # Log files are ignored everywhere. assert is_ignored("app.log", is_dir=False, patterns=patterns) assert is_ignored("subdir/app.log", is_dir=False, patterns=patterns) From e0a172524f90d365621401e4d19d8ee86baf80a0 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Tue, 25 Nov 2025 14:46:46 -0800 Subject: [PATCH 09/16] improved typing and made the ignore matching less docker-specific --- src/runloop_api_client/lib/_ignore.py | 61 +++++++++++++++++++ src/runloop_api_client/lib/context_loader.py | 38 +++--------- src/runloop_api_client/sdk/_build_context.py | 5 +- src/runloop_api_client/sdk/async_.py | 14 ++--- .../sdk/async_storage_object.py | 5 +- src/runloop_api_client/sdk/storage_object.py | 5 +- src/runloop_api_client/sdk/sync.py | 14 ++--- tests/sdk/test_async_clients.py | 4 +- tests/sdk/test_clients.py | 7 ++- 9 files changed, 99 insertions(+), 54 deletions(-) diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py index 2c45f0ead..a13afd3d6 100644 --- a/src/runloop_api_client/lib/_ignore.py +++ b/src/runloop_api_client/lib/_ignore.py @@ -1,12 +1,15 @@ from __future__ import annotations import os +from abc import ABC, abstractmethod from typing import Iterable, Optional, Sequence from pathlib import Path, PurePosixPath from dataclasses import dataclass __all__ = [ "IgnorePattern", + "IgnoreMatcher", + "DockerIgnoreMatcher", "read_ignorefile", "compile_ignore", "path_match", @@ -281,3 +284,61 @@ def iter_included_files( if is_ignored(rel_file, is_dir=False, patterns=patterns): continue yield file_path + + +class IgnoreMatcher(ABC): + """Abstract interface for ignore matchers like .dockerignore and .gitignore. + + There is considerable variation for each ignore file format, so this interface + provides a minimal contract for supporting each format. Implementations are + responsible for interpreting any underlying ignore configuration (files, inline + patterns, etc.) and returning all files that should be included under a given + root directory. + """ + + @abstractmethod + def iter_paths(self, root: Path) -> Iterable[Path]: + """Yield filesystem paths to include under ``root``.""" + + +@dataclass(frozen=True) +class DockerIgnoreMatcher(IgnoreMatcher): + """Ignore matcher that mirrors Docker's .dockerignore semantics. + + This matcher: + - Closely follows Docker's .dockerignore semantics. + - Always loads patterns from ``.dockerignore`` in the provided context + root, if present. + - Optionally loads additional patterns from an extra ignorefile. + - Optionally appends inline pattern strings. + + Note: Patterns follow Docker-style semantics (``!`` negation, ``**`` support). + """ + + extra_ignorefile: str | Path | None = None + patterns: Sequence[str] | None = None + + def iter_paths(self, root: Path) -> Iterable[Path]: + """Yield non-ignored files under ``root`` honoring Docker-style patterns.""" + + root = root.resolve() + + all_patterns: list[str] = [] + + # 1) Always consider .dockerignore under the context root, if present. + default_ignorefile = root / ".dockerignore" + all_patterns.extend(read_ignorefile(default_ignorefile)) + + # 2) Optional additional ignorefile. + if self.extra_ignorefile is not None: + ignore_path = Path(self.extra_ignorefile) + if not ignore_path.exists(): + raise FileNotFoundError(f"Ignore file does not exist: {ignore_path}") + all_patterns.extend(read_ignorefile(ignore_path)) + + # 3) Optional inline patterns appended last. + if self.patterns: + all_patterns.extend(self.patterns) + + compiled: list[IgnorePattern] = compile_ignore(all_patterns) + return iter_included_files(root, patterns=compiled) diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py index 4b6be4451..9181e25e6 100644 --- a/src/runloop_api_client/lib/context_loader.py +++ b/src/runloop_api_client/lib/context_loader.py @@ -5,54 +5,30 @@ from typing import Iterable, Optional, Sequence from pathlib import Path -from ._ignore import IgnorePattern, compile_ignore, read_ignorefile, iter_included_files +from ._ignore import IgnoreMatcher, IgnorePattern, DockerIgnoreMatcher, iter_included_files def build_docker_context_tar( context_root: Path, *, - ignore: Optional[Sequence[str] | Path | str] = None, + ignore: Optional[IgnoreMatcher] = None, ) -> bytes: - """Create a .tar.gz of the build context, honoring ignore patterns. + """Create a .tar.gz of the build context, honoring Docker-style ignore patterns. - Treats ``context_root`` as the build context root. - Always loads ``.dockerignore`` under ``context_root`` if present. - - An optional ``ignore`` argument may be provided: - - * If a :class:`pathlib.Path` or string is given, it is treated as an - additional ignorefile path whose patterns are appended after - ``.dockerignore``. - * If a sequence of strings is given, they are treated as inline patterns - appended after any file-derived patterns. - - Patterns use Docker-style semantics with ``!`` negation and ``**`` support. + - An optional :class:`IgnoreMatcher` may be provided to customise how ignore + patterns are resolved; when omitted, :class:`DockerIgnoreMatcher` is used. """ context_root = context_root.resolve() - all_patterns: list[str] = [] - - # 1) Always consider .dockerignore under the context root, if present. - default_ignorefile = context_root / ".dockerignore" - all_patterns.extend(read_ignorefile(default_ignorefile)) - - # 2) Optional additional ignore source - if ignore is not None: - if isinstance(ignore, (str, Path)): - ignore_path = Path(ignore) - if not ignore_path.exists(): - raise FileNotFoundError(f"Ignore file does not exist: {ignore_path}") - all_patterns.extend(read_ignorefile(ignore_path)) - else: - # Treat as a sequence of raw patterns - all_patterns.extend(list(ignore)) - - compiled: list[IgnorePattern] = compile_ignore(all_patterns) + matcher: IgnoreMatcher = ignore or DockerIgnoreMatcher() buf = io.BytesIO() with tarfile.open(mode="w:gz", fileobj=buf) as tf: - for path in _iter_build_context_files(context_root, patterns=compiled): + for path in matcher.iter_paths(context_root): rel = path.relative_to(context_root) tf.add(path, arcname=rel.as_posix()) diff --git a/src/runloop_api_client/sdk/_build_context.py b/src/runloop_api_client/sdk/_build_context.py index 895594e22..eb47b0ebf 100644 --- a/src/runloop_api_client/sdk/_build_context.py +++ b/src/runloop_api_client/sdk/_build_context.py @@ -13,6 +13,7 @@ from dataclasses import dataclass from typing_extensions import Protocol +from ..lib._ignore import IgnoreMatcher from ..lib.context_loader import build_docker_context_tar from ..types.object_create_params import ContentType @@ -47,7 +48,7 @@ def __call__( # pragma: no cover - interface only context_root: Path, *, name: str | None = None, - ignore: str | Path | tuple[str, ...] | list[str] | None = None, + ignore: IgnoreMatcher | None = None, ) -> BuildContextArtifact: """Package the given directory into a tarball. @@ -67,7 +68,7 @@ def default_build_context_strategy( context_root: Path, *, name: str | None = None, - ignore: str | Path | tuple[str, ...] | list[str] | None = None, + ignore: IgnoreMatcher | None = None, ) -> BuildContextArtifact: """Default implementation that wraps ``build_docker_context_tar``. diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 9a2415ced..73a0d2378 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -24,6 +24,7 @@ from .._types import Timeout, NotGiven, not_given from .._client import DEFAULT_MAX_RETRIES, AsyncRunloop from ._helpers import detect_content_type +from ..lib._ignore import IgnoreMatcher from .async_devbox import AsyncDevbox from .async_snapshot import AsyncSnapshot from .async_blueprint import AsyncBlueprint @@ -375,7 +376,7 @@ async def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: str | Path | Sequence[str] | None = None, + ignore: IgnoreMatcher | None = None, **options: Unpack[LongRequestOptions], ) -> AsyncStorageObject: """Create and upload an object from a local directory. @@ -390,12 +391,11 @@ async def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional ignore configuration. If a string or :class:`Path` - is provided it is treated as the path to an additional ignorefile. - If a sequence of strings is provided, they are interpreted as inline - ignore patterns appended after patterns loaded from - ``.dockerignore`` under ``dir_path``. - :type ignore: Optional[str | Path | Sequence[str]] + :param ignore: Optional ignore matcher. When provided it controls which + files under ``dir_path`` are included in the archived build + context. When omitted, a default Docker-style matcher that honors + ``.dockerignore`` under ``dir_path`` is used. + :type ignore: Optional[IgnoreMatcher] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object diff --git a/src/runloop_api_client/sdk/async_storage_object.py b/src/runloop_api_client/sdk/async_storage_object.py index b5a9aefc1..e0c85d3e9 100644 --- a/src/runloop_api_client/sdk/async_storage_object.py +++ b/src/runloop_api_client/sdk/async_storage_object.py @@ -8,6 +8,7 @@ from ._types import BaseRequestOptions, LongRequestOptions, SDKObjectDownloadParams from .._client import AsyncRunloop from ..types.object_view import ObjectView +from ..types.blueprint_create_params import BuildContext from ..types.object_download_url_view import ObjectDownloadURLView @@ -159,10 +160,10 @@ async def upload_content(self, content: str | bytes | Iterable[bytes]) -> None: response = await self._client._client.put(url, content=content) response.raise_for_status() - def as_build_context(self) -> dict[str, str]: + def as_build_context(self) -> BuildContext: """Return this object in the shape expected for a Blueprint build context. - The returned dict can be passed directly to ``build_context`` or + The returned mapping can be passed directly to ``build_context`` or ``named_build_contexts`` when creating a blueprint. """ return { diff --git a/src/runloop_api_client/sdk/storage_object.py b/src/runloop_api_client/sdk/storage_object.py index 369274aa2..f6f4e6648 100644 --- a/src/runloop_api_client/sdk/storage_object.py +++ b/src/runloop_api_client/sdk/storage_object.py @@ -8,6 +8,7 @@ from ._types import BaseRequestOptions, LongRequestOptions, SDKObjectDownloadParams from .._client import Runloop from ..types.object_view import ObjectView +from ..types.blueprint_create_params import BuildContext from ..types.object_download_url_view import ObjectDownloadURLView @@ -159,10 +160,10 @@ def upload_content(self, content: str | bytes | Iterable[bytes]) -> None: response = self._client._client.put(url, content=content) response.raise_for_status() - def as_build_context(self) -> dict[str, str]: + def as_build_context(self) -> BuildContext: """Return this object in the shape expected for a Blueprint build context. - The returned dict can be passed directly to ``build_context`` or + The returned mapping can be passed directly to ``build_context`` or ``named_build_contexts`` when creating a blueprint. """ return { diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 3bf0b01f6..d114214bd 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -26,6 +26,7 @@ from ._helpers import detect_content_type from .snapshot import Snapshot from .blueprint import Blueprint +from ..lib._ignore import IgnoreMatcher from .storage_object import StorageObject from ..lib.context_loader import build_docker_context_tar from ..types.object_create_params import ContentType @@ -374,7 +375,7 @@ def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: str | Path | Sequence[str] | None = None, + ignore: IgnoreMatcher | None = None, **options: Unpack[LongRequestOptions], ) -> StorageObject: """Create and upload an object from a local directory. @@ -389,12 +390,11 @@ def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional ignore configuration. If a string or :class:`pathlib.Path` - is provided it is treated as the path to an additional ignorefile. - If a sequence of strings is provided, they are interpreted as inline - ignore patterns appended after patterns loaded from - ``.dockerignore`` under ``dir_path``. - :type ignore: Optional[str | Path | Sequence[str]] + :param ignore: Optional ignore matcher. When provided it controls which + files under ``dir_path`` are included in the archived build + context. When omitted, a default Docker-style matcher that honors + ``.dockerignore`` under ``dir_path`` is used. + :type ignore: Optional[IgnoreMatcher] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object diff --git a/tests/sdk/test_async_clients.py b/tests/sdk/test_async_clients.py index 482fbe3fd..8c58e9b90 100644 --- a/tests/sdk/test_async_clients.py +++ b/tests/sdk/test_async_clients.py @@ -25,6 +25,7 @@ AsyncBlueprintOps, AsyncStorageObjectOps, ) +from runloop_api_client.lib._ignore import DockerIgnoreMatcher from runloop_api_client.lib.polling import PollingConfig @@ -461,7 +462,8 @@ async def test_upload_from_dir_with_inline_ignore_patterns( mock_async_client._client = http_client client = AsyncStorageObjectOps(mock_async_client) - obj = await client.upload_from_dir(test_dir, ignore=["*.log", "build/"]) + matcher = DockerIgnoreMatcher(patterns=["*.log", "build/"]) + obj = await client.upload_from_dir(test_dir, ignore=matcher) assert isinstance(obj, AsyncStorageObject) uploaded_content = http_client.put.call_args[1]["content"] diff --git a/tests/sdk/test_clients.py b/tests/sdk/test_clients.py index e6349c24b..df83273cb 100644 --- a/tests/sdk/test_clients.py +++ b/tests/sdk/test_clients.py @@ -25,6 +25,7 @@ BlueprintOps, StorageObjectOps, ) +from runloop_api_client.lib._ignore import DockerIgnoreMatcher from runloop_api_client.lib.polling import PollingConfig @@ -545,7 +546,8 @@ def test_upload_from_dir_with_extra_ignore_file( mock_client._client = http_client client = StorageObjectOps(mock_client) - obj = client.upload_from_dir(test_dir, ignore=extra_ignore) + matcher = DockerIgnoreMatcher(extra_ignorefile=extra_ignore) + obj = client.upload_from_dir(test_dir, ignore=matcher) assert isinstance(obj, StorageObject) uploaded_content = http_client.put.call_args[1]["content"] @@ -577,7 +579,8 @@ def test_upload_from_dir_with_inline_ignore_patterns( mock_client._client = http_client client = StorageObjectOps(mock_client) - obj = client.upload_from_dir(test_dir, ignore=["*.log", "build/"]) + matcher = DockerIgnoreMatcher(patterns=["*.log", "build/"]) + obj = client.upload_from_dir(test_dir, ignore=matcher) assert isinstance(obj, StorageObject) uploaded_content = http_client.put.call_args[1]["content"] From 9ef3450f104a9a9e5bbcf18a91c7fd5ba11d1d2d Mon Sep 17 00:00:00 2001 From: James Chainey Date: Tue, 25 Nov 2025 16:17:37 -0800 Subject: [PATCH 10/16] added tar filter for upload_from_dir --- src/runloop_api_client/lib/context_loader.py | 26 +++++- src/runloop_api_client/sdk/async_.py | 16 ++-- src/runloop_api_client/sdk/sync.py | 16 ++-- tests/sdk/test_async_clients.py | 48 ++--------- tests/sdk/test_clients.py | 87 ++------------------ 5 files changed, 55 insertions(+), 138 deletions(-) diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py index 9181e25e6..1ef82711a 100644 --- a/src/runloop_api_client/lib/context_loader.py +++ b/src/runloop_api_client/lib/context_loader.py @@ -2,11 +2,13 @@ import io import tarfile -from typing import Iterable, Optional, Sequence +from typing import Callable, Iterable, Optional, Sequence from pathlib import Path from ._ignore import IgnoreMatcher, IgnorePattern, DockerIgnoreMatcher, iter_included_files +TarFilter = Callable[[tarfile.TarInfo], tarfile.TarInfo | None] + def build_docker_context_tar( context_root: Path, @@ -35,6 +37,28 @@ def build_docker_context_tar( return buf.getvalue() +def build_directory_tar( + root: Path, + *, + tar_filter: TarFilter | None = None, +) -> bytes: + """Create a .tar.gz archive containing all files under ``root``. + + No ignore semantics are applied by default; callers may pass a tar filter + compatible with :meth:`tarfile.TarFile.add` to modify or exclude members. + """ + + root = root.resolve() + buf = io.BytesIO() + with tarfile.open(mode="w:gz", fileobj=buf) as tf: + for file_path in root.rglob("*"): + if not file_path.is_file(): + continue + rel = file_path.relative_to(root) + tf.add(file_path, arcname=rel.as_posix(), filter=tar_filter) + return buf.getvalue() + + def _iter_build_context_files( context_root: Path, *, diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 73a0d2378..ead1d13af 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -24,11 +24,10 @@ from .._types import Timeout, NotGiven, not_given from .._client import DEFAULT_MAX_RETRIES, AsyncRunloop from ._helpers import detect_content_type -from ..lib._ignore import IgnoreMatcher from .async_devbox import AsyncDevbox from .async_snapshot import AsyncSnapshot from .async_blueprint import AsyncBlueprint -from ..lib.context_loader import build_docker_context_tar +from ..lib.context_loader import TarFilter, build_directory_tar from .async_storage_object import AsyncStorageObject from ..types.object_create_params import ContentType @@ -376,7 +375,7 @@ async def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: IgnoreMatcher | None = None, + ignore: TarFilter | None = None, **options: Unpack[LongRequestOptions], ) -> AsyncStorageObject: """Create and upload an object from a local directory. @@ -391,11 +390,10 @@ async def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional ignore matcher. When provided it controls which - files under ``dir_path`` are included in the archived build - context. When omitted, a default Docker-style matcher that honors - ``.dockerignore`` under ``dir_path`` is used. - :type ignore: Optional[IgnoreMatcher] + :param ignore: Optional tar filter function compatible with + :meth:`tarfile.TarFile.add`. If provided, it will be called for each + member to allow modification or exclusion (by returning ``None``). + :type ignore: Optional[TarFilter] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object @@ -410,7 +408,7 @@ async def upload_from_dir( ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None def synchronous_io() -> bytes: - return build_docker_context_tar(path, ignore=ignore) + return build_directory_tar(path, tar_filter=ignore) tar_bytes = await asyncio.to_thread(synchronous_io) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index d114214bd..5d6cf1526 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -26,9 +26,8 @@ from ._helpers import detect_content_type from .snapshot import Snapshot from .blueprint import Blueprint -from ..lib._ignore import IgnoreMatcher from .storage_object import StorageObject -from ..lib.context_loader import build_docker_context_tar +from ..lib.context_loader import TarFilter, build_directory_tar from ..types.object_create_params import ContentType @@ -375,7 +374,7 @@ def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: IgnoreMatcher | None = None, + ignore: TarFilter | None = None, **options: Unpack[LongRequestOptions], ) -> StorageObject: """Create and upload an object from a local directory. @@ -390,11 +389,10 @@ def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional ignore matcher. When provided it controls which - files under ``dir_path`` are included in the archived build - context. When omitted, a default Docker-style matcher that honors - ``.dockerignore`` under ``dir_path`` is used. - :type ignore: Optional[IgnoreMatcher] + :param ignore: Optional tar filter function compatible with + :meth:`tarfile.TarFile.add`. If provided, it will be called for each + member to allow modification or exclusion (by returning ``None``). + :type ignore: Optional[TarFilter] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object @@ -408,7 +406,7 @@ def upload_from_dir( name = name or f"{path.name}.tar.gz" ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None - tar_bytes = build_docker_context_tar(path, ignore=ignore) + tar_bytes = build_directory_tar(path, tar_filter=ignore) obj = self.create(name=name, content_type="tgz", metadata=metadata, ttl_ms=ttl_ms, **options) obj.upload_content(tar_bytes) diff --git a/tests/sdk/test_async_clients.py b/tests/sdk/test_async_clients.py index 8c58e9b90..45688ce2c 100644 --- a/tests/sdk/test_async_clients.py +++ b/tests/sdk/test_async_clients.py @@ -25,7 +25,6 @@ AsyncBlueprintOps, AsyncStorageObjectOps, ) -from runloop_api_client.lib._ignore import DockerIgnoreMatcher from runloop_api_client.lib.polling import PollingConfig @@ -403,43 +402,6 @@ async def test_upload_from_dir( mock_async_client.objects.complete.assert_awaited_once() - @pytest.mark.asyncio - async def test_upload_from_dir_respects_dockerignore( - self, mock_async_client: AsyncMock, object_view: MockObjectView, tmp_path: Path - ) -> None: - """upload_from_dir should respect .dockerignore patterns by default.""" - mock_async_client.objects.create = AsyncMock(return_value=object_view) - mock_async_client.objects.complete = AsyncMock(return_value=object_view) - - test_dir = tmp_path / "ctx" - test_dir.mkdir() - (test_dir / "keep.txt").write_text("keep", encoding="utf-8") - (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") - build_dir = test_dir / "build" - build_dir.mkdir() - (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") - - dockerignore = test_dir / ".dockerignore" - dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") - - http_client = AsyncMock() - mock_response = create_mock_httpx_response() - http_client.put = AsyncMock(return_value=mock_response) - mock_async_client._client = http_client - - client = AsyncStorageObjectOps(mock_async_client) - obj = await client.upload_from_dir(test_dir) - - assert isinstance(obj, AsyncStorageObject) - uploaded_content = http_client.put.call_args[1]["content"] - - with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: - names = {m.name for m in tar.getmembers()} - - assert "keep.txt" in names - assert "ignore.log" not in names - assert not any(name.startswith("build/") for name in names) - @pytest.mark.asyncio async def test_upload_from_dir_with_inline_ignore_patterns( self, mock_async_client: AsyncMock, object_view: MockObjectView, tmp_path: Path @@ -462,8 +424,14 @@ async def test_upload_from_dir_with_inline_ignore_patterns( mock_async_client._client = http_client client = AsyncStorageObjectOps(mock_async_client) - matcher = DockerIgnoreMatcher(patterns=["*.log", "build/"]) - obj = await client.upload_from_dir(test_dir, ignore=matcher) + + # Tar filter: drop logs and anything under build/ + def ignore_logs_and_build(ti: tarfile.TarInfo) -> tarfile.TarInfo | None: + if ti.name.endswith(".log") or ti.name.startswith("build/"): + return None + return ti + + obj = await client.upload_from_dir(test_dir, ignore=ignore_logs_and_build) assert isinstance(obj, AsyncStorageObject) uploaded_content = http_client.put.call_args[1]["content"] diff --git a/tests/sdk/test_clients.py b/tests/sdk/test_clients.py index df83273cb..8c398b7e4 100644 --- a/tests/sdk/test_clients.py +++ b/tests/sdk/test_clients.py @@ -25,7 +25,6 @@ BlueprintOps, StorageObjectOps, ) -from runloop_api_client.lib._ignore import DockerIgnoreMatcher from runloop_api_client.lib.polling import PollingConfig @@ -483,10 +482,10 @@ def test_upload_from_dir_with_string_path( http_client.put.assert_called_once() mock_client.objects.complete.assert_called_once() - def test_upload_from_dir_respects_dockerignore( + def test_upload_from_dir_respects_filter( self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path ) -> None: - """upload_from_dir should respect .dockerignore patterns by default.""" + """upload_from_dir should respect a tar filter when provided.""" mock_client.objects.create.return_value = object_view test_dir = tmp_path / "ctx" @@ -497,90 +496,20 @@ def test_upload_from_dir_respects_dockerignore( build_dir.mkdir() (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") - dockerignore = test_dir / ".dockerignore" - dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") - http_client = Mock() mock_response = create_mock_httpx_response() http_client.put.return_value = mock_response mock_client._client = http_client client = StorageObjectOps(mock_client) - obj = client.upload_from_dir(test_dir) - - assert isinstance(obj, StorageObject) - http_client.put.assert_called_once() - uploaded_content = http_client.put.call_args[1]["content"] - - with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: - names = {m.name for m in tar.getmembers()} - - assert "keep.txt" in names - assert "ignore.log" not in names - assert not any(name.startswith("build/") for name in names) - def test_upload_from_dir_with_extra_ignore_file( - self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path - ) -> None: - """upload_from_dir should merge .dockerignore and an extra ignore file.""" - mock_client.objects.create.return_value = object_view - - test_dir = tmp_path / "ctx" - test_dir.mkdir() - (test_dir / "keep.txt").write_text("keep", encoding="utf-8") - (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") - build_dir = test_dir / "build" - build_dir.mkdir() - (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") - - # Only ignore logs in .dockerignore - dockerignore = test_dir / ".dockerignore" - dockerignore.write_text("*.log\n", encoding="utf-8") - - extra_ignore = tmp_path / "extra.ignore" - extra_ignore.write_text("build/\n", encoding="utf-8") - - http_client = Mock() - mock_response = create_mock_httpx_response() - http_client.put.return_value = mock_response - mock_client._client = http_client + # Tar filter: drop logs and anything under build/ + def ignore_logs_and_build(ti: tarfile.TarInfo) -> tarfile.TarInfo | None: + if ti.name.endswith(".log") or ti.name.startswith("build/"): + return None + return ti - client = StorageObjectOps(mock_client) - matcher = DockerIgnoreMatcher(extra_ignorefile=extra_ignore) - obj = client.upload_from_dir(test_dir, ignore=matcher) - - assert isinstance(obj, StorageObject) - uploaded_content = http_client.put.call_args[1]["content"] - - with tarfile.open(fileobj=io.BytesIO(uploaded_content), mode="r:gz") as tar: - names = {m.name for m in tar.getmembers()} - - assert "keep.txt" in names - assert "ignore.log" not in names - assert not any(name.startswith("build/") for name in names) - - def test_upload_from_dir_with_inline_ignore_patterns( - self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path - ) -> None: - """upload_from_dir should respect inline ignore patterns.""" - mock_client.objects.create.return_value = object_view - - test_dir = tmp_path / "ctx" - test_dir.mkdir() - (test_dir / "keep.txt").write_text("keep", encoding="utf-8") - (test_dir / "ignore.log").write_text("ignore", encoding="utf-8") - build_dir = test_dir / "build" - build_dir.mkdir() - (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") - - http_client = Mock() - mock_response = create_mock_httpx_response() - http_client.put.return_value = mock_response - mock_client._client = http_client - - client = StorageObjectOps(mock_client) - matcher = DockerIgnoreMatcher(patterns=["*.log", "build/"]) - obj = client.upload_from_dir(test_dir, ignore=matcher) + obj = client.upload_from_dir(test_dir, ignore=ignore_logs_and_build) assert isinstance(obj, StorageObject) uploaded_content = http_client.put.call_args[1]["content"] From d5621ab1a23efc8cf4794fe04a642383d32816f3 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Tue, 25 Nov 2025 16:23:45 -0800 Subject: [PATCH 11/16] fixed bad type --- src/runloop_api_client/lib/context_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py index 1ef82711a..67de3e226 100644 --- a/src/runloop_api_client/lib/context_loader.py +++ b/src/runloop_api_client/lib/context_loader.py @@ -7,7 +7,7 @@ from ._ignore import IgnoreMatcher, IgnorePattern, DockerIgnoreMatcher, iter_included_files -TarFilter = Callable[[tarfile.TarInfo], tarfile.TarInfo | None] +TarFilter = Callable[[tarfile.TarInfo], Optional[tarfile.TarInfo]] def build_docker_context_tar( From bbc1f7880333680070ce756d9519edd5a851b0d8 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Mon, 1 Dec 2025 15:18:06 -0800 Subject: [PATCH 12/16] removed some dead code and standardized the ignore interface & made it a bit more ergonomic --- src/runloop_api_client/lib/_ignore.py | 101 ++++++++++++++++++- src/runloop_api_client/lib/context_loader.py | 45 +++++---- src/runloop_api_client/sdk/_build_context.py | 91 ----------------- src/runloop_api_client/sdk/async_.py | 34 +++++-- src/runloop_api_client/sdk/sync.py | 36 +++++-- tests/sdk/test_async_clients.py | 9 +- tests/sdk/test_clients.py | 12 +-- tests/test_utils/test_context_loader.py | 68 +++++++++++-- 8 files changed, 250 insertions(+), 146 deletions(-) delete mode 100644 src/runloop_api_client/sdk/_build_context.py diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py index a13afd3d6..f74751a4d 100644 --- a/src/runloop_api_client/lib/_ignore.py +++ b/src/runloop_api_client/lib/_ignore.py @@ -1,8 +1,9 @@ from __future__ import annotations import os +import tarfile from abc import ABC, abstractmethod -from typing import Iterable, Optional, Sequence +from typing import Callable, Iterable, Optional, Sequence from pathlib import Path, PurePosixPath from dataclasses import dataclass @@ -10,10 +11,13 @@ "IgnorePattern", "IgnoreMatcher", "DockerIgnoreMatcher", + "FilePatternMatcher", + "TarFilterMatcher", "read_ignorefile", "compile_ignore", "path_match", "is_ignored", + "iter_included_files", ] @@ -286,6 +290,74 @@ def iter_included_files( yield file_path +TarFilter = Callable[[tarfile.TarInfo], Optional[tarfile.TarInfo]] + + +def _compute_included_dirs_from_files(included_files: set[str]) -> set[str]: + """Return all directory ancestors (plus ``'.'``) for a set of file paths.""" + + included_dirs: set[str] = {"."} + for rel in included_files: + parent = PurePosixPath(rel).parent + while True: + as_posix = parent.as_posix() or "." + included_dirs.add(as_posix) + if as_posix == ".": + break + parent = parent.parent + return included_dirs + + +class TarFilterMatcher: + """Adapt an :class:`IgnoreMatcher` to a :class:`TarFilter`-compatible callable. + + This helper precomputes the set of included files under ``root`` using the + provided :class:`IgnoreMatcher` and converts that into a simple tar filter: + + - Only files returned by ``matcher.iter_paths(root)`` are included. + - Directory entries are included only when they are ancestors of at least + one included file (plus the root ``'.'`` entry). + + Member names passed to ``__call__`` are expected to be relative to + ``root`` and to use POSIX ``'/'`` separators, matching the behaviour of + ``build_directory_tar`` in :mod:`runloop_api_client.lib.context_loader`. + """ + + def __init__(self, root: Path, matcher: IgnoreMatcher) -> None: + self._root = root.resolve() + + # Compute the set of included files as relative POSIX paths. + # Note: the majority of the work being performed here is simply to deal with the path to the root. + included_files: set[str] = set() + for path in matcher.iter_paths(self._root): + rel = path.resolve().relative_to(self._root) + rel_posix = PurePosixPath(rel).as_posix() + included_files.add(rel_posix) + + included_dirs = _compute_included_dirs_from_files(included_files) + + self._included_files = included_files + self._included_dirs = included_dirs + + def __call__(self, ti: tarfile.TarInfo) -> Optional[tarfile.TarInfo]: + name = ti.name + + # The root of the archive is always kept. + if name == ".": + return ti + + if ti.isdir(): + if name in self._included_dirs: + return ti + return None + + # Non-directory entries (files, symlinks, etc.) are kept only if their + # relative path is in the included file set. + if name in self._included_files: + return ti + return None + + class IgnoreMatcher(ABC): """Abstract interface for ignore matchers like .dockerignore and .gitignore. @@ -342,3 +414,30 @@ def iter_paths(self, root: Path) -> Iterable[Path]: compiled: list[IgnorePattern] = compile_ignore(all_patterns) return iter_included_files(root, patterns=compiled) + + +@dataclass(frozen=True) +class FilePatternMatcher(IgnoreMatcher): + """Ignore matcher that applies only inline patterns, without .dockerignore. + + Patterns follow the same semantics as :func:`compile_ignore` / Docker-style + ignore files and are treated as *ignore* rules (``!`` negation for + re-inclusion, ``**`` support, etc.). + + The constructor accepts either a single pattern string or a sequence of + pattern strings; a single string is automatically wrapped into a list. + """ + + patterns: Sequence[str] | str + + def __post_init__(self) -> None: + # Normalise a single pattern string into a list for downstream helpers. + if isinstance(self.patterns, str): + object.__setattr__(self, "patterns", [self.patterns]) + + def iter_paths(self, root: Path) -> Iterable[Path]: + """Yield non-ignored files under ``root`` based only on ``patterns``.""" + + root = root.resolve() + compiled: list[IgnorePattern] = compile_ignore(self.patterns) # type: ignore[arg-type] + return iter_included_files(root, patterns=compiled) diff --git a/src/runloop_api_client/lib/context_loader.py b/src/runloop_api_client/lib/context_loader.py index 67de3e226..bfa433a89 100644 --- a/src/runloop_api_client/lib/context_loader.py +++ b/src/runloop_api_client/lib/context_loader.py @@ -2,10 +2,10 @@ import io import tarfile -from typing import Callable, Iterable, Optional, Sequence +from typing import Callable, Optional, Sequence from pathlib import Path -from ._ignore import IgnoreMatcher, IgnorePattern, DockerIgnoreMatcher, iter_included_files +from ._ignore import IgnoreMatcher, DockerIgnoreMatcher TarFilter = Callable[[tarfile.TarInfo], Optional[tarfile.TarInfo]] @@ -13,7 +13,7 @@ def build_docker_context_tar( context_root: Path, *, - ignore: Optional[IgnoreMatcher] = None, + ignore: IgnoreMatcher | Sequence[str] | None = None, ) -> bytes: """Create a .tar.gz of the build context, honoring Docker-style ignore patterns. @@ -25,7 +25,15 @@ def build_docker_context_tar( context_root = context_root.resolve() - matcher: IgnoreMatcher = ignore or DockerIgnoreMatcher() + if ignore is None: + matcher: IgnoreMatcher = DockerIgnoreMatcher() + elif isinstance(ignore, IgnoreMatcher): + matcher = ignore + else: + # Treat sequences of pattern strings as additional inline patterns + # appended after ``.dockerignore`` (if present), mirroring + # :class:`DockerIgnoreMatcher` semantics. + matcher = DockerIgnoreMatcher(patterns=list(ignore)) buf = io.BytesIO() @@ -50,20 +58,21 @@ def build_directory_tar( root = root.resolve() buf = io.BytesIO() - with tarfile.open(mode="w:gz", fileobj=buf) as tf: - for file_path in root.rglob("*"): - if not file_path.is_file(): - continue - rel = file_path.relative_to(root) - tf.add(file_path, arcname=rel.as_posix(), filter=tar_filter) - return buf.getvalue() + def _wrapped_filter(ti: tarfile.TarInfo) -> Optional[tarfile.TarInfo]: + # Normalise member names so callers see paths relative to ``root`` + # without a leading ``./``, preserving existing TarFilter semantics and + # archive layout. This applies to both files and directories. + if ti.name.startswith("./"): + ti.name = ti.name[2:] -def _iter_build_context_files( - context_root: Path, - *, - patterns: Sequence[IgnorePattern], -) -> Iterable[Path]: - """Yield files to include in the build context, honoring ignore patterns.""" + if tar_filter is not None: + return tar_filter(ti) + return ti - return iter_included_files(context_root, patterns=patterns) + with tarfile.open(mode="w:gz", fileobj=buf) as tf: + # Add the root directory recursively in one call, delegating member + # handling to the wrapped filter above. + tf.add(root, arcname=".", filter=_wrapped_filter) + + return buf.getvalue() diff --git a/src/runloop_api_client/sdk/_build_context.py b/src/runloop_api_client/sdk/_build_context.py deleted file mode 100644 index eb47b0ebf..000000000 --- a/src/runloop_api_client/sdk/_build_context.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Helpers and strategy interface for packaging Docker build contexts. - -This module exposes a small, pluggable abstraction around turning a local -filesystem directory into a tarball suitable for use as a Docker build -context, plus a default implementation built on top of -``lib.context_loader.build_docker_context_tar``. -""" - -from __future__ import annotations - -from typing import Optional -from pathlib import Path -from dataclasses import dataclass -from typing_extensions import Protocol - -from ..lib._ignore import IgnoreMatcher -from ..lib.context_loader import build_docker_context_tar -from ..types.object_create_params import ContentType - -__all__ = ["BuildContextArtifact", "BuildContextStrategy", "default_build_context_strategy"] - - -@dataclass(frozen=True) -class BuildContextArtifact: - """Result of packaging a build context directory. - - Attributes: - data: Tarball bytes containing the build context. - content_type: Logical content type for the object payload. For the - default implementation this is always ``\"tgz\"``. - filename: Suggested filename to use when creating the backing Object. - """ - - data: bytes - content_type: ContentType = "tgz" - filename: Optional[str] = None - - -class BuildContextStrategy(Protocol): - """Strategy interface for building Docker contexts. - - Implementations may perform caching, custom compression, or additional - validation, but must return a fully materialised tarball in memory. - """ - - def __call__( # pragma: no cover - interface only - self, - context_root: Path, - *, - name: str | None = None, - ignore: IgnoreMatcher | None = None, - ) -> BuildContextArtifact: - """Package the given directory into a tarball. - - Args: - context_root: Filesystem path to the Docker build context root. - name: Optional logical name for the context; may be used to - derive a filename. - ignore: Optional ignore configuration. If a string or :class:`pathlib.Path` - is provided it is treated as the path to an additional - ignorefile. If a sequence of strings is provided, they are - interpreted as inline ignore patterns appended after patterns - loaded from ``.dockerignore`` under ``context_root``. - """ - - -def default_build_context_strategy( - context_root: Path, - *, - name: str | None = None, - ignore: IgnoreMatcher | None = None, -) -> BuildContextArtifact: - """Default implementation that wraps ``build_docker_context_tar``. - - The tarball is rebuilt on each invocation (no cross-call caching) and - returned as a :class:`BuildContextArtifact` with ``content_type=\"tgz\"``. - """ - - tar_bytes = build_docker_context_tar(context_root, ignore=ignore) - - if name is None: - base = context_root.name or "context" - filename = f"{base}.tar.gz" - else: - filename = f"{name}.tar.gz" - - return BuildContextArtifact( - data=tar_bytes, - content_type="tgz", - filename=filename, - ) diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index ead1d13af..5565d24e6 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -3,7 +3,7 @@ from __future__ import annotations import asyncio -from typing import Dict, Mapping, Optional +from typing import Dict, Mapping, Optional, Sequence from pathlib import Path from datetime import timedelta from typing_extensions import Unpack @@ -24,10 +24,11 @@ from .._types import Timeout, NotGiven, not_given from .._client import DEFAULT_MAX_RETRIES, AsyncRunloop from ._helpers import detect_content_type +from ..lib._ignore import IgnoreMatcher, TarFilterMatcher, FilePatternMatcher from .async_devbox import AsyncDevbox from .async_snapshot import AsyncSnapshot from .async_blueprint import AsyncBlueprint -from ..lib.context_loader import TarFilter, build_directory_tar +from ..lib.context_loader import build_directory_tar from .async_storage_object import AsyncStorageObject from ..types.object_create_params import ContentType @@ -375,7 +376,7 @@ async def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: TarFilter | None = None, + ignore: IgnoreMatcher | Sequence[str] | str | None = None, **options: Unpack[LongRequestOptions], ) -> AsyncStorageObject: """Create and upload an object from a local directory. @@ -390,10 +391,17 @@ async def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional tar filter function compatible with - :meth:`tarfile.TarFile.add`. If provided, it will be called for each - member to allow modification or exclusion (by returning ``None``). - :type ignore: Optional[TarFilter] + :param ignore: Optional ignore configuration controlling which files from + ``dir_path`` are included in the uploaded tarball. This may be: + + - An :class:`~runloop_api_client.lib._ignore.IgnoreMatcher` + implementation such as :class:`~runloop_api_client.lib._ignore.DockerIgnoreMatcher` + or :class:`~runloop_api_client.lib._ignore.FilePatternMatcher`. + - A single pattern string. + - A sequence of pattern strings. + + Patterns follow Docker-style semantics (``!`` negation, ``**`` support). + :type ignore: Optional[IgnoreMatcher | Sequence[str] | str] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object @@ -408,7 +416,17 @@ async def upload_from_dir( ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None def synchronous_io() -> bytes: - return build_directory_tar(path, tar_filter=ignore) + matcher: IgnoreMatcher | None + if ignore is None: + matcher = None + elif isinstance(ignore, IgnoreMatcher): + matcher = ignore + else: + matcher = FilePatternMatcher(ignore) # type: ignore[arg-type] + + if matcher is None: + return build_directory_tar(path) + return build_directory_tar(path, tar_filter=TarFilterMatcher(path, matcher)) tar_bytes = await asyncio.to_thread(synchronous_io) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 5d6cf1526..54f4c6b36 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Dict, Mapping, Optional +from typing import Dict, Mapping, Optional, Sequence from pathlib import Path from datetime import timedelta from typing_extensions import Unpack @@ -26,8 +26,9 @@ from ._helpers import detect_content_type from .snapshot import Snapshot from .blueprint import Blueprint +from ..lib._ignore import IgnoreMatcher, TarFilterMatcher, FilePatternMatcher from .storage_object import StorageObject -from ..lib.context_loader import TarFilter, build_directory_tar +from ..lib.context_loader import build_directory_tar from ..types.object_create_params import ContentType @@ -374,7 +375,7 @@ def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: TarFilter | None = None, + ignore: IgnoreMatcher | Sequence[str] | str | None = None, **options: Unpack[LongRequestOptions], ) -> StorageObject: """Create and upload an object from a local directory. @@ -389,10 +390,17 @@ def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional tar filter function compatible with - :meth:`tarfile.TarFile.add`. If provided, it will be called for each - member to allow modification or exclusion (by returning ``None``). - :type ignore: Optional[TarFilter] + :param ignore: Optional ignore configuration controlling which files from + ``dir_path`` are included in the uploaded tarball. This may be: + + - An :class:`~runloop_api_client.lib._ignore.IgnoreMatcher` + implementation such as :class:`~runloop_api_client.lib._ignore.DockerIgnoreMatcher` + or :class:`~runloop_api_client.lib._ignore.FilePatternMatcher`. + - A single pattern string. + - A sequence of pattern strings. + + Patterns follow Docker-style semantics (``!`` negation, ``**`` support). + :type ignore: Optional[IgnoreMatcher | Sequence[str] | str] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object @@ -406,7 +414,19 @@ def upload_from_dir( name = name or f"{path.name}.tar.gz" ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None - tar_bytes = build_directory_tar(path, tar_filter=ignore) + # Pick the right matcher + matcher: IgnoreMatcher | None + if ignore is None: + matcher = None + elif isinstance(ignore, IgnoreMatcher): + matcher = ignore + else: + matcher = FilePatternMatcher(ignore) # type: ignore[arg-type] + + if matcher is None: + tar_bytes = build_directory_tar(path) + else: + tar_bytes = build_directory_tar(path, tar_filter=TarFilterMatcher(path, matcher)) obj = self.create(name=name, content_type="tgz", metadata=metadata, ttl_ms=ttl_ms, **options) obj.upload_content(tar_bytes) diff --git a/tests/sdk/test_async_clients.py b/tests/sdk/test_async_clients.py index 45688ce2c..dc8ce4e23 100644 --- a/tests/sdk/test_async_clients.py +++ b/tests/sdk/test_async_clients.py @@ -425,13 +425,8 @@ async def test_upload_from_dir_with_inline_ignore_patterns( client = AsyncStorageObjectOps(mock_async_client) - # Tar filter: drop logs and anything under build/ - def ignore_logs_and_build(ti: tarfile.TarInfo) -> tarfile.TarInfo | None: - if ti.name.endswith(".log") or ti.name.startswith("build/"): - return None - return ti - - obj = await client.upload_from_dir(test_dir, ignore=ignore_logs_and_build) + # Inline patterns: drop logs and anything under build/ + obj = await client.upload_from_dir(test_dir, ignore=["*.log", "build/"]) assert isinstance(obj, AsyncStorageObject) uploaded_content = http_client.put.call_args[1]["content"] diff --git a/tests/sdk/test_clients.py b/tests/sdk/test_clients.py index 8c398b7e4..e975a495b 100644 --- a/tests/sdk/test_clients.py +++ b/tests/sdk/test_clients.py @@ -25,6 +25,7 @@ BlueprintOps, StorageObjectOps, ) +from runloop_api_client.lib._ignore import FilePatternMatcher from runloop_api_client.lib.polling import PollingConfig @@ -485,7 +486,7 @@ def test_upload_from_dir_with_string_path( def test_upload_from_dir_respects_filter( self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path ) -> None: - """upload_from_dir should respect a tar filter when provided.""" + """upload_from_dir should respect ignore patterns when provided.""" mock_client.objects.create.return_value = object_view test_dir = tmp_path / "ctx" @@ -503,13 +504,10 @@ def test_upload_from_dir_respects_filter( client = StorageObjectOps(mock_client) - # Tar filter: drop logs and anything under build/ - def ignore_logs_and_build(ti: tarfile.TarInfo) -> tarfile.TarInfo | None: - if ti.name.endswith(".log") or ti.name.startswith("build/"): - return None - return ti + # Ignore patterns: drop logs and anything under build/ + matcher = FilePatternMatcher(["*.log", "build/"]) - obj = client.upload_from_dir(test_dir, ignore=ignore_logs_and_build) + obj = client.upload_from_dir(test_dir, ignore=matcher) assert isinstance(obj, StorageObject) uploaded_content = http_client.put.call_args[1]["content"] diff --git a/tests/test_utils/test_context_loader.py b/tests/test_utils/test_context_loader.py index 8a833324e..430aaa98c 100644 --- a/tests/test_utils/test_context_loader.py +++ b/tests/test_utils/test_context_loader.py @@ -1,13 +1,21 @@ +import io +import tarfile from pathlib import Path from runloop_api_client.lib._ignore import ( IgnorePattern, + TarFilterMatcher, + FilePatternMatcher, is_ignored, path_match, compile_ignore, read_ignorefile, + iter_included_files, +) +from runloop_api_client.lib.context_loader import ( + build_directory_tar, + build_docker_context_tar, ) -from runloop_api_client.lib.context_loader import _iter_build_context_files def test_segment_match_basic_globs(): @@ -69,7 +77,7 @@ def test_iter_build_context_files_respects_dockerignore(tmp_path: Path): dockerignore.write_text("*.log\nbuild/\n", encoding="utf-8") compiled = compile_ignore(read_ignorefile(dockerignore)) - files = {p.relative_to(root).as_posix() for p in _iter_build_context_files(root, patterns=compiled)} + files = {p.relative_to(root).as_posix() for p in iter_included_files(root, patterns=compiled)} assert "foo.txt" in files assert "app.log" not in files assert "build/ignored.txt" not in files @@ -144,8 +152,56 @@ def test_iter_build_context_files_respects_directory_pruning(tmp_path: Path) -> # Attempt to re-include a file under an ignored directory. ignorefile.write_text("docs/\n!docs/README.md\n", encoding="utf-8") - compiled = compile_ignore(read_ignorefile(ignorefile)) - files = {p.relative_to(root).as_posix() for p in _iter_build_context_files(root, patterns=compiled)} - # README.md should not appear because the parent directory is pruned. - assert "docs/README.md" not in files +def test_build_docker_context_tar_supports_pattern_list(tmp_path: Path) -> None: + """build_docker_context_tar should accept a sequence of ignore patterns.""" + + root = tmp_path + (root / "keep.txt").write_text("keep", encoding="utf-8") + (root / "env.venv").write_text("ignored", encoding="utf-8") + + tar_bytes = build_docker_context_tar(root, ignore=["*.venv"]) + + with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:gz") as tf: + names = {m.name for m in tf.getmembers()} + + assert "keep.txt" in names + assert "env.venv" not in names + + +def test_build_docker_context_tar_supports_file_pattern_matcher(tmp_path: Path) -> None: + """build_docker_context_tar should accept a FilePatternMatcher instance.""" + + root = tmp_path + (root / "keep.bin").write_text("keep", encoding="utf-8") + (root / "ignore.txt").write_text("ignored", encoding="utf-8") + + matcher = FilePatternMatcher("**/*.txt") + tar_bytes = build_docker_context_tar(root, ignore=matcher) + + with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:gz") as tf: + names = {m.name for m in tf.getmembers()} + + assert "keep.bin" in names + assert "ignore.txt" not in names + + +def test_tar_filter_matcher_respects_patterns(tmp_path: Path) -> None: + """TarFilterMatcher should apply FilePatternMatcher patterns at tar level.""" + + root = tmp_path + (root / "keep.txt").write_text("keep", encoding="utf-8") + (root / "ignore.log").write_text("ignore", encoding="utf-8") + build_dir = root / "build" + build_dir.mkdir() + (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") + + matcher = FilePatternMatcher(["*.log", "build/"]) + tar_bytes = build_directory_tar(root, tar_filter=TarFilterMatcher(root, matcher)) + + with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:gz") as tf: + names = {m.name for m in tf.getmembers()} + + assert "keep.txt" in names + assert "ignore.log" not in names + assert not any(name.startswith("build/") for name in names) From 7070b9582afa41c175cd4b7b44b04ff2c85785f7 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Mon, 1 Dec 2025 15:44:55 -0800 Subject: [PATCH 13/16] rolled back change that made tar its own filter type -- big misunderstanding --- src/runloop_api_client/lib/_ignore.py | 72 +------------------------ src/runloop_api_client/sdk/async_.py | 34 +++--------- src/runloop_api_client/sdk/sync.py | 36 +++---------- tests/sdk/test_async_clients.py | 9 +++- tests/sdk/test_clients.py | 12 +++-- tests/test_utils/test_context_loader.py | 27 +--------- 6 files changed, 32 insertions(+), 158 deletions(-) diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py index f74751a4d..382b55cd4 100644 --- a/src/runloop_api_client/lib/_ignore.py +++ b/src/runloop_api_client/lib/_ignore.py @@ -1,9 +1,8 @@ from __future__ import annotations import os -import tarfile from abc import ABC, abstractmethod -from typing import Callable, Iterable, Optional, Sequence +from typing import Iterable, Optional, Sequence from pathlib import Path, PurePosixPath from dataclasses import dataclass @@ -12,7 +11,6 @@ "IgnoreMatcher", "DockerIgnoreMatcher", "FilePatternMatcher", - "TarFilterMatcher", "read_ignorefile", "compile_ignore", "path_match", @@ -290,74 +288,6 @@ def iter_included_files( yield file_path -TarFilter = Callable[[tarfile.TarInfo], Optional[tarfile.TarInfo]] - - -def _compute_included_dirs_from_files(included_files: set[str]) -> set[str]: - """Return all directory ancestors (plus ``'.'``) for a set of file paths.""" - - included_dirs: set[str] = {"."} - for rel in included_files: - parent = PurePosixPath(rel).parent - while True: - as_posix = parent.as_posix() or "." - included_dirs.add(as_posix) - if as_posix == ".": - break - parent = parent.parent - return included_dirs - - -class TarFilterMatcher: - """Adapt an :class:`IgnoreMatcher` to a :class:`TarFilter`-compatible callable. - - This helper precomputes the set of included files under ``root`` using the - provided :class:`IgnoreMatcher` and converts that into a simple tar filter: - - - Only files returned by ``matcher.iter_paths(root)`` are included. - - Directory entries are included only when they are ancestors of at least - one included file (plus the root ``'.'`` entry). - - Member names passed to ``__call__`` are expected to be relative to - ``root`` and to use POSIX ``'/'`` separators, matching the behaviour of - ``build_directory_tar`` in :mod:`runloop_api_client.lib.context_loader`. - """ - - def __init__(self, root: Path, matcher: IgnoreMatcher) -> None: - self._root = root.resolve() - - # Compute the set of included files as relative POSIX paths. - # Note: the majority of the work being performed here is simply to deal with the path to the root. - included_files: set[str] = set() - for path in matcher.iter_paths(self._root): - rel = path.resolve().relative_to(self._root) - rel_posix = PurePosixPath(rel).as_posix() - included_files.add(rel_posix) - - included_dirs = _compute_included_dirs_from_files(included_files) - - self._included_files = included_files - self._included_dirs = included_dirs - - def __call__(self, ti: tarfile.TarInfo) -> Optional[tarfile.TarInfo]: - name = ti.name - - # The root of the archive is always kept. - if name == ".": - return ti - - if ti.isdir(): - if name in self._included_dirs: - return ti - return None - - # Non-directory entries (files, symlinks, etc.) are kept only if their - # relative path is in the included file set. - if name in self._included_files: - return ti - return None - - class IgnoreMatcher(ABC): """Abstract interface for ignore matchers like .dockerignore and .gitignore. diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 5565d24e6..ead1d13af 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -3,7 +3,7 @@ from __future__ import annotations import asyncio -from typing import Dict, Mapping, Optional, Sequence +from typing import Dict, Mapping, Optional from pathlib import Path from datetime import timedelta from typing_extensions import Unpack @@ -24,11 +24,10 @@ from .._types import Timeout, NotGiven, not_given from .._client import DEFAULT_MAX_RETRIES, AsyncRunloop from ._helpers import detect_content_type -from ..lib._ignore import IgnoreMatcher, TarFilterMatcher, FilePatternMatcher from .async_devbox import AsyncDevbox from .async_snapshot import AsyncSnapshot from .async_blueprint import AsyncBlueprint -from ..lib.context_loader import build_directory_tar +from ..lib.context_loader import TarFilter, build_directory_tar from .async_storage_object import AsyncStorageObject from ..types.object_create_params import ContentType @@ -376,7 +375,7 @@ async def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: IgnoreMatcher | Sequence[str] | str | None = None, + ignore: TarFilter | None = None, **options: Unpack[LongRequestOptions], ) -> AsyncStorageObject: """Create and upload an object from a local directory. @@ -391,17 +390,10 @@ async def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional ignore configuration controlling which files from - ``dir_path`` are included in the uploaded tarball. This may be: - - - An :class:`~runloop_api_client.lib._ignore.IgnoreMatcher` - implementation such as :class:`~runloop_api_client.lib._ignore.DockerIgnoreMatcher` - or :class:`~runloop_api_client.lib._ignore.FilePatternMatcher`. - - A single pattern string. - - A sequence of pattern strings. - - Patterns follow Docker-style semantics (``!`` negation, ``**`` support). - :type ignore: Optional[IgnoreMatcher | Sequence[str] | str] + :param ignore: Optional tar filter function compatible with + :meth:`tarfile.TarFile.add`. If provided, it will be called for each + member to allow modification or exclusion (by returning ``None``). + :type ignore: Optional[TarFilter] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object @@ -416,17 +408,7 @@ async def upload_from_dir( ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None def synchronous_io() -> bytes: - matcher: IgnoreMatcher | None - if ignore is None: - matcher = None - elif isinstance(ignore, IgnoreMatcher): - matcher = ignore - else: - matcher = FilePatternMatcher(ignore) # type: ignore[arg-type] - - if matcher is None: - return build_directory_tar(path) - return build_directory_tar(path, tar_filter=TarFilterMatcher(path, matcher)) + return build_directory_tar(path, tar_filter=ignore) tar_bytes = await asyncio.to_thread(synchronous_io) diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index 54f4c6b36..5d6cf1526 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Dict, Mapping, Optional, Sequence +from typing import Dict, Mapping, Optional from pathlib import Path from datetime import timedelta from typing_extensions import Unpack @@ -26,9 +26,8 @@ from ._helpers import detect_content_type from .snapshot import Snapshot from .blueprint import Blueprint -from ..lib._ignore import IgnoreMatcher, TarFilterMatcher, FilePatternMatcher from .storage_object import StorageObject -from ..lib.context_loader import build_directory_tar +from ..lib.context_loader import TarFilter, build_directory_tar from ..types.object_create_params import ContentType @@ -375,7 +374,7 @@ def upload_from_dir( name: Optional[str] = None, metadata: Optional[Dict[str, str]] = None, ttl: Optional[timedelta] = None, - ignore: IgnoreMatcher | Sequence[str] | str | None = None, + ignore: TarFilter | None = None, **options: Unpack[LongRequestOptions], ) -> StorageObject: """Create and upload an object from a local directory. @@ -390,17 +389,10 @@ def upload_from_dir( :type metadata: Optional[Dict[str, str]] :param ttl: Optional Time-To-Live, after which the object is automatically deleted :type ttl: Optional[timedelta] - :param ignore: Optional ignore configuration controlling which files from - ``dir_path`` are included in the uploaded tarball. This may be: - - - An :class:`~runloop_api_client.lib._ignore.IgnoreMatcher` - implementation such as :class:`~runloop_api_client.lib._ignore.DockerIgnoreMatcher` - or :class:`~runloop_api_client.lib._ignore.FilePatternMatcher`. - - A single pattern string. - - A sequence of pattern strings. - - Patterns follow Docker-style semantics (``!`` negation, ``**`` support). - :type ignore: Optional[IgnoreMatcher | Sequence[str] | str] + :param ignore: Optional tar filter function compatible with + :meth:`tarfile.TarFile.add`. If provided, it will be called for each + member to allow modification or exclusion (by returning ``None``). + :type ignore: Optional[TarFilter] :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options :return: Wrapper for the uploaded object @@ -414,19 +406,7 @@ def upload_from_dir( name = name or f"{path.name}.tar.gz" ttl_ms = int(ttl.total_seconds()) * 1000 if ttl else None - # Pick the right matcher - matcher: IgnoreMatcher | None - if ignore is None: - matcher = None - elif isinstance(ignore, IgnoreMatcher): - matcher = ignore - else: - matcher = FilePatternMatcher(ignore) # type: ignore[arg-type] - - if matcher is None: - tar_bytes = build_directory_tar(path) - else: - tar_bytes = build_directory_tar(path, tar_filter=TarFilterMatcher(path, matcher)) + tar_bytes = build_directory_tar(path, tar_filter=ignore) obj = self.create(name=name, content_type="tgz", metadata=metadata, ttl_ms=ttl_ms, **options) obj.upload_content(tar_bytes) diff --git a/tests/sdk/test_async_clients.py b/tests/sdk/test_async_clients.py index dc8ce4e23..45688ce2c 100644 --- a/tests/sdk/test_async_clients.py +++ b/tests/sdk/test_async_clients.py @@ -425,8 +425,13 @@ async def test_upload_from_dir_with_inline_ignore_patterns( client = AsyncStorageObjectOps(mock_async_client) - # Inline patterns: drop logs and anything under build/ - obj = await client.upload_from_dir(test_dir, ignore=["*.log", "build/"]) + # Tar filter: drop logs and anything under build/ + def ignore_logs_and_build(ti: tarfile.TarInfo) -> tarfile.TarInfo | None: + if ti.name.endswith(".log") or ti.name.startswith("build/"): + return None + return ti + + obj = await client.upload_from_dir(test_dir, ignore=ignore_logs_and_build) assert isinstance(obj, AsyncStorageObject) uploaded_content = http_client.put.call_args[1]["content"] diff --git a/tests/sdk/test_clients.py b/tests/sdk/test_clients.py index e975a495b..8c398b7e4 100644 --- a/tests/sdk/test_clients.py +++ b/tests/sdk/test_clients.py @@ -25,7 +25,6 @@ BlueprintOps, StorageObjectOps, ) -from runloop_api_client.lib._ignore import FilePatternMatcher from runloop_api_client.lib.polling import PollingConfig @@ -486,7 +485,7 @@ def test_upload_from_dir_with_string_path( def test_upload_from_dir_respects_filter( self, mock_client: Mock, object_view: MockObjectView, tmp_path: Path ) -> None: - """upload_from_dir should respect ignore patterns when provided.""" + """upload_from_dir should respect a tar filter when provided.""" mock_client.objects.create.return_value = object_view test_dir = tmp_path / "ctx" @@ -504,10 +503,13 @@ def test_upload_from_dir_respects_filter( client = StorageObjectOps(mock_client) - # Ignore patterns: drop logs and anything under build/ - matcher = FilePatternMatcher(["*.log", "build/"]) + # Tar filter: drop logs and anything under build/ + def ignore_logs_and_build(ti: tarfile.TarInfo) -> tarfile.TarInfo | None: + if ti.name.endswith(".log") or ti.name.startswith("build/"): + return None + return ti - obj = client.upload_from_dir(test_dir, ignore=matcher) + obj = client.upload_from_dir(test_dir, ignore=ignore_logs_and_build) assert isinstance(obj, StorageObject) uploaded_content = http_client.put.call_args[1]["content"] diff --git a/tests/test_utils/test_context_loader.py b/tests/test_utils/test_context_loader.py index 430aaa98c..d8ee1eae0 100644 --- a/tests/test_utils/test_context_loader.py +++ b/tests/test_utils/test_context_loader.py @@ -4,7 +4,6 @@ from runloop_api_client.lib._ignore import ( IgnorePattern, - TarFilterMatcher, FilePatternMatcher, is_ignored, path_match, @@ -12,10 +11,7 @@ read_ignorefile, iter_included_files, ) -from runloop_api_client.lib.context_loader import ( - build_directory_tar, - build_docker_context_tar, -) +from runloop_api_client.lib.context_loader import build_docker_context_tar def test_segment_match_basic_globs(): @@ -184,24 +180,3 @@ def test_build_docker_context_tar_supports_file_pattern_matcher(tmp_path: Path) assert "keep.bin" in names assert "ignore.txt" not in names - - -def test_tar_filter_matcher_respects_patterns(tmp_path: Path) -> None: - """TarFilterMatcher should apply FilePatternMatcher patterns at tar level.""" - - root = tmp_path - (root / "keep.txt").write_text("keep", encoding="utf-8") - (root / "ignore.log").write_text("ignore", encoding="utf-8") - build_dir = root / "build" - build_dir.mkdir() - (build_dir / "ignored.txt").write_text("ignored", encoding="utf-8") - - matcher = FilePatternMatcher(["*.log", "build/"]) - tar_bytes = build_directory_tar(root, tar_filter=TarFilterMatcher(root, matcher)) - - with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:gz") as tf: - names = {m.name for m in tf.getmembers()} - - assert "keep.txt" in names - assert "ignore.log" not in names - assert not any(name.startswith("build/") for name in names) From 136ad7eb3e9e8f754b6934ad7b1a7332504cca38 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Fri, 5 Dec 2025 15:08:08 -0800 Subject: [PATCH 14/16] added handling for extremely weird edge case behavior for dockerignore --- src/runloop_api_client/lib/_ignore.py | 21 ++++++++++++++++++--- tests/test_utils/test_context_loader.py | 25 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py index 382b55cd4..63f41a03d 100644 --- a/src/runloop_api_client/lib/_ignore.py +++ b/src/runloop_api_client/lib/_ignore.py @@ -322,7 +322,6 @@ class DockerIgnoreMatcher(IgnoreMatcher): def iter_paths(self, root: Path) -> Iterable[Path]: """Yield non-ignored files under ``root`` honoring Docker-style patterns.""" - root = root.resolve() all_patterns: list[str] = [] @@ -338,9 +337,25 @@ def iter_paths(self, root: Path) -> Iterable[Path]: raise FileNotFoundError(f"Ignore file does not exist: {ignore_path}") all_patterns.extend(read_ignorefile(ignore_path)) - # 3) Optional inline patterns appended last. + # 3) Optional inline patterns appended last using same rules as .dockerignore + # Some extra handling here for trailing slashes that is different from .gitignore. if self.patterns: - all_patterns.extend(self.patterns) + for raw in self.patterns: + if not raw: + continue + + invert = raw[0] == "!" + pattern = raw[1:].strip() if invert else raw.strip() + + if pattern: + pattern = os.path.normpath(pattern) + pattern = pattern.replace(os.sep, "/") + if len(pattern) > 1 and pattern[0] == "/": + pattern = pattern[1:] + + normalized = f"!{pattern}" if invert else pattern + if normalized: + all_patterns.append(normalized) compiled: list[IgnorePattern] = compile_ignore(all_patterns) return iter_included_files(root, patterns=compiled) diff --git a/tests/test_utils/test_context_loader.py b/tests/test_utils/test_context_loader.py index d8ee1eae0..0a0bb7cc8 100644 --- a/tests/test_utils/test_context_loader.py +++ b/tests/test_utils/test_context_loader.py @@ -79,6 +79,31 @@ def test_iter_build_context_files_respects_dockerignore(tmp_path: Path): assert "build/ignored.txt" not in files +def test_optional_patterns_trailing_slash_matches_file_in_context(tmp_path: Path) -> None: + """Inline ignore patterns should mirror .dockerignore trailing-slash behavior. + + Patterns like '/foo/bar/' provided via the optional ``ignore=`` parameter + should exclude a file at 'foo/bar' in the build context, matching Docker's + patternmatcher semantics where trailing slashes are not directory-only. + """ + + root = tmp_path + foo = root / "foo" + foo.mkdir() + (foo / "bar").write_text("ignored", encoding="utf-8") + (root / "keep.txt").write_text("keep", encoding="utf-8") + + # Use build_docker_context_tar with an inline pattern that includes a + # trailing slash; this should still exclude the file at 'foo/bar'. + tar_bytes = build_docker_context_tar(root, ignore=["/foo/bar/"]) + + with tarfile.open(fileobj=io.BytesIO(tar_bytes), mode="r:gz") as tf: + names = {m.name for m in tf.getmembers()} + + assert "keep.txt" in names + assert "foo/bar" not in names + + def test_is_ignored_directory_pattern_affects_directory_entry_only() -> None: """Directory patterns apply directly to directory entries, not to children.""" From 2a7fc17b8aae77ea88f6e9176d4aad8351012f21 Mon Sep 17 00:00:00 2001 From: James Chainey Date: Fri, 5 Dec 2025 15:40:40 -0800 Subject: [PATCH 15/16] added some type hints and override flags --- README-SDK.md | 62 +++++++++---------- src/runloop_api_client/lib/_ignore.py | 3 + src/runloop_api_client/sdk/async_.py | 3 +- .../sdk/async_storage_object.py | 9 ++- src/runloop_api_client/sdk/storage_object.py | 9 ++- src/runloop_api_client/sdk/sync.py | 46 +++++++------- 6 files changed, 70 insertions(+), 62 deletions(-) diff --git a/README-SDK.md b/README-SDK.md index 2349e16f9..4c8d9db51 100644 --- a/README-SDK.md +++ b/README-SDK.md @@ -4,36 +4,34 @@ The `RunloopSDK` builds on top of the underlying REST client and provides a Pyth ## Table of Contents -- [Runloop SDK – Python Object-Oriented Client](#runloop-sdk--python-object-oriented-client) - - [Table of Contents](#table-of-contents) - - [Installation](#installation) - - [Quickstart (synchronous)](#quickstart-synchronous) - - [Quickstart (asynchronous)](#quickstart-asynchronous) - - [Core Concepts](#core-concepts) - - [RunloopSDK](#runloopsdk) - - [Available Resources](#available-resources) - - [Devbox](#devbox) - - [Command Execution](#command-execution) - - [Execution Management](#execution-management) - - [Execution Results](#execution-results) - - [Streaming Command Output](#streaming-command-output) - - [File Operations](#file-operations) - - [Network Operations](#network-operations) - - [Snapshot Operations](#snapshot-operations) - - [Devbox Lifecycle Management](#devbox-lifecycle-management) - - [Context Manager Support](#context-manager-support) - - [Blueprint](#blueprint) - - [Snapshot](#snapshot) - - [StorageObject](#storageobject) - - [Storage Object Upload Helpers](#storage-object-upload-helpers) - - [Mounting Storage Objects to Devboxes](#mounting-storage-objects-to-devboxes) - - [Accessing the Underlying REST Client](#accessing-the-underlying-rest-client) - - [Error Handling](#error-handling) - - [Advanced Configuration](#advanced-configuration) - - [Async Usage](#async-usage) - - [Polling Configuration](#polling-configuration) - - [Complete API Reference](#complete-api-reference) - - [Feedback](#feedback) +- [Installation](#installation) +- [Quickstart (synchronous)](#quickstart-synchronous) +- [Quickstart (asynchronous)](#quickstart-asynchronous) +- [Core Concepts](#core-concepts) + - [RunloopSDK](#runloopsdk) + - [Available Resources](#available-resources) + - [Devbox](#devbox) + - [Command Execution](#command-execution) + - [Execution Management](#execution-management) + - [Execution Results](#execution-results) + - [Streaming Command Output](#streaming-command-output) + - [File Operations](#file-operations) + - [Network Operations](#network-operations) + - [Snapshot Operations](#snapshot-operations) + - [Devbox Lifecycle Management](#devbox-lifecycle-management) + - [Context Manager Support](#context-manager-support) + - [Blueprint](#blueprint) + - [Snapshot](#snapshot) + - [StorageObject](#storageobject) + - [Storage Object Upload Helpers](#storage-object-upload-helpers) + - [Mounting Storage Objects to Devboxes](#mounting-storage-objects-to-devboxes) +- [Accessing the Underlying REST Client](#accessing-the-underlying-rest-client) +- [Error Handling](#error-handling) +- [Advanced Configuration](#advanced-configuration) +- [Async Usage](#async-usage) +- [Polling Configuration](#polling-configuration) +- [Complete API Reference](#complete-api-reference) +- [Feedback](#feedback) ## Installation @@ -447,7 +445,7 @@ shared_ctx_obj = runloop.storage_object.upload_from_bytes( blueprint_with_context = runloop.blueprint.create( name="my-blueprint-with-context", - dockerfile=\"\"\"\ + dockerfile="""\ FROM node:22 WORKDIR /usr/src/app @@ -460,7 +458,7 @@ COPY --from=shared / ./libs RUN npm install --only=production CMD ["node", "src/app.js"] -\"\"\", +""", # Primary build context build_context=build_ctx_obj.as_build_context(), # Additional named build contexts (for Docker buildx-style usage) diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py index 63f41a03d..76845f966 100644 --- a/src/runloop_api_client/lib/_ignore.py +++ b/src/runloop_api_client/lib/_ignore.py @@ -5,6 +5,7 @@ from typing import Iterable, Optional, Sequence from pathlib import Path, PurePosixPath from dataclasses import dataclass +from typing_extensions import override __all__ = [ "IgnorePattern", @@ -320,6 +321,7 @@ class DockerIgnoreMatcher(IgnoreMatcher): extra_ignorefile: str | Path | None = None patterns: Sequence[str] | None = None + @override def iter_paths(self, root: Path) -> Iterable[Path]: """Yield non-ignored files under ``root`` honoring Docker-style patterns.""" root = root.resolve() @@ -380,6 +382,7 @@ def __post_init__(self) -> None: if isinstance(self.patterns, str): object.__setattr__(self, "patterns", [self.patterns]) + @override def iter_paths(self, root: Path) -> Iterable[Path]: """Yield non-ignored files under ``root`` based only on ``patterns``.""" diff --git a/src/runloop_api_client/sdk/async_.py b/src/runloop_api_client/sdk/async_.py index 76dfc8195..16955642c 100644 --- a/src/runloop_api_client/sdk/async_.py +++ b/src/runloop_api_client/sdk/async_.py @@ -421,7 +421,8 @@ async def upload_from_dir( for available options :return: Wrapper for the uploaded object :rtype: AsyncStorageObject - :raises OSError: If the local file cannot be read + :raises OSError: If the local directory cannot be read + :raises ValueError: If ``dir_path`` does not point to a directory """ path = Path(dir_path) if not path.is_dir(): diff --git a/src/runloop_api_client/sdk/async_storage_object.py b/src/runloop_api_client/sdk/async_storage_object.py index e0c85d3e9..377cc4ec0 100644 --- a/src/runloop_api_client/sdk/async_storage_object.py +++ b/src/runloop_api_client/sdk/async_storage_object.py @@ -151,8 +151,10 @@ async def delete( async def upload_content(self, content: str | bytes | Iterable[bytes]) -> None: """Upload content to the object's pre-signed URL. - :param content: Bytes or text payload to upload - :type content: str | bytes + :param content: Bytes payload, text payload, or an iterable streaming bytes + :type content: str | bytes | Iterable[bytes] + :return: None + :rtype: None :raises RuntimeError: If no upload URL is available :raises httpx.HTTPStatusError: Propagated from the underlying ``httpx`` client when the upload fails """ @@ -165,6 +167,9 @@ def as_build_context(self) -> BuildContext: The returned mapping can be passed directly to ``build_context`` or ``named_build_contexts`` when creating a blueprint. + + :return: Mapping suitable for use as a blueprint build context + :rtype: BuildContext """ return { "object_id": self._id, diff --git a/src/runloop_api_client/sdk/storage_object.py b/src/runloop_api_client/sdk/storage_object.py index f6f4e6648..28fad144d 100644 --- a/src/runloop_api_client/sdk/storage_object.py +++ b/src/runloop_api_client/sdk/storage_object.py @@ -151,8 +151,10 @@ def delete( def upload_content(self, content: str | bytes | Iterable[bytes]) -> None: """Upload content to the object's pre-signed URL. - :param content: Bytes or text payload to upload - :type content: str | bytes + :param content: Bytes payload, text payload, or an iterable streaming bytes + :type content: str | bytes | Iterable[bytes] + :return: None + :rtype: None :raises RuntimeError: If no upload URL is available :raises httpx.HTTPStatusError: Propagated from the underlying ``httpx`` client when the upload fails """ @@ -165,6 +167,9 @@ def as_build_context(self) -> BuildContext: The returned mapping can be passed directly to ``build_context`` or ``named_build_contexts`` when creating a blueprint. + + :return: Mapping suitable for use as a blueprint build context + :rtype: BuildContext """ return { "object_id": self._id, diff --git a/src/runloop_api_client/sdk/sync.py b/src/runloop_api_client/sdk/sync.py index cebff6069..c25c10cb5 100644 --- a/src/runloop_api_client/sdk/sync.py +++ b/src/runloop_api_client/sdk/sync.py @@ -83,7 +83,6 @@ def create_from_blueprint_id( :param blueprint_id: Blueprint ID to create from :type blueprint_id: str :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKDevboxCreateFromImageParams` for available parameters - :type params: :return: Wrapper bound to the newly created devbox :rtype: Devbox """ @@ -226,11 +225,10 @@ class BlueprintOps: Example: >>> from datetime import timedelta >>> from runloop_api_client.types.blueprint_build_parameters import BuildContext - >>> >>> runloop = RunloopSDK() >>> obj = runloop.object_storage.upload_from_dir( ... "./", - ... ttl=timedelta(hours=1), + ... ttl=timedelta(hours=1), ... ) >>> blueprint = runloop.blueprint.create( ... name="my-blueprint", @@ -421,6 +419,7 @@ def upload_from_dir( :return: Wrapper for the uploaded object :rtype: StorageObject :raises OSError: If the local file cannot be read + :raises ValueError: If ``dir_path`` does not point to a directory """ path = Path(dir_path) if not path.is_dir(): @@ -545,7 +544,7 @@ def list(self, **params: Unpack[SDKScorerListParams]) -> list[Scorer]: page = self._client.scenarios.scorers.list(**params) return [Scorer(self._client, item.id) for item in page] - + class AgentOps: """High-level manager for creating and managing agents. @@ -555,15 +554,10 @@ class AgentOps: Example: >>> runloop = RunloopSDK() >>> # Create agent from NPM package - >>> agent = runloop.agent.create_from_npm( - ... name="my-agent", - ... package_name="@runloop/example-agent" - ... ) + >>> agent = runloop.agent.create_from_npm(name="my-agent", package_name="@runloop/example-agent") >>> # Create agent from Git repository >>> agent = runloop.agent.create_from_git( - ... name="git-agent", - ... repository="https://github.com/user/agent-repo", - ... ref="main" + ... name="git-agent", repository="https://github.com/user/agent-repo", ref="main" ... ) >>> # List all agents >>> agents = runloop.agent.list(limit=10) @@ -605,9 +599,7 @@ def create_from_npm( Example: >>> agent = runloop.agent.create_from_npm( - ... name="my-npm-agent", - ... package_name="@runloop/example-agent", - ... npm_version="^1.0.0" + ... name="my-npm-agent", package_name="@runloop/example-agent", npm_version="^1.0.0" ... ) :param package_name: NPM package name @@ -624,7 +616,9 @@ def create_from_npm( :raises ValueError: If 'source' is provided in params """ if "source" in params: - raise ValueError("Cannot specify 'source' when using create_from_npm(); source is automatically set to npm configuration") + raise ValueError( + "Cannot specify 'source' when using create_from_npm(); source is automatically set to npm configuration" + ) npm_config: dict = {"package_name": package_name} if npm_version is not None: @@ -652,9 +646,7 @@ def create_from_pip( Example: >>> agent = runloop.agent.create_from_pip( - ... name="my-pip-agent", - ... package_name="runloop-example-agent", - ... pip_version=">=1.0.0" + ... name="my-pip-agent", package_name="runloop-example-agent", pip_version=">=1.0.0" ... ) :param package_name: Pip package name @@ -671,7 +663,9 @@ def create_from_pip( :raises ValueError: If 'source' is provided in params """ if "source" in params: - raise ValueError("Cannot specify 'source' when using create_from_pip(); source is automatically set to pip configuration") + raise ValueError( + "Cannot specify 'source' when using create_from_pip(); source is automatically set to pip configuration" + ) pip_config: dict = {"package_name": package_name} if pip_version is not None: @@ -701,7 +695,7 @@ def create_from_git( ... name="my-git-agent", ... repository="https://github.com/user/agent-repo", ... ref="main", - ... agent_setup=["npm install", "npm run build"] + ... agent_setup=["npm install", "npm run build"], ... ) :param repository: Git repository URL @@ -716,7 +710,9 @@ def create_from_git( :raises ValueError: If 'source' is provided in params """ if "source" in params: - raise ValueError("Cannot specify 'source' when using create_from_git(); source is automatically set to git configuration") + raise ValueError( + "Cannot specify 'source' when using create_from_git(); source is automatically set to git configuration" + ) git_config: dict = {"repository": repository} if ref is not None: @@ -743,9 +739,7 @@ def create_from_object( >>> obj = runloop.storage_object.upload_from_dir("./my-agent") >>> # Then create agent from the object >>> agent = runloop.agent.create_from_object( - ... name="my-object-agent", - ... object_id=obj.id, - ... agent_setup=["chmod +x setup.sh", "./setup.sh"] + ... name="my-object-agent", object_id=obj.id, agent_setup=["chmod +x setup.sh", "./setup.sh"] ... ) :param object_id: Storage object ID @@ -758,7 +752,9 @@ def create_from_object( :raises ValueError: If 'source' is provided in params """ if "source" in params: - raise ValueError("Cannot specify 'source' when using create_from_object(); source is automatically set to object configuration") + raise ValueError( + "Cannot specify 'source' when using create_from_object(); source is automatically set to object configuration" + ) object_config: dict = {"object_id": object_id} if agent_setup is not None: From 597825daa43cb4664865173fbf387c85a9b0c89a Mon Sep 17 00:00:00 2001 From: James Chainey Date: Fri, 5 Dec 2025 16:10:22 -0800 Subject: [PATCH 16/16] docstring fixes and consolidation of duplicated ignore code --- src/runloop_api_client/lib/_ignore.py | 211 +++++++++++++++++++------- 1 file changed, 158 insertions(+), 53 deletions(-) diff --git a/src/runloop_api_client/lib/_ignore.py b/src/runloop_api_client/lib/_ignore.py index 76845f966..d1b5d64b0 100644 --- a/src/runloop_api_client/lib/_ignore.py +++ b/src/runloop_api_client/lib/_ignore.py @@ -24,22 +24,67 @@ class IgnorePattern: """Single parsed ignore pattern. - Follows Docker-style .dockerignore semantics, supports other ignore use cases following same approach. - - Details: - - ``pattern``: The normalized pattern text with leading/trailing ``/`` removed. - Always uses POSIX ``'/'`` separators. - - ``negated``: True if this is a negation pattern starting with ``!``. - - ``directory_only``: True if the original pattern ended with ``/`` and should - apply only to directories and their descendants. - - ``anchored``: True if the pattern contains a path separator and should be - matched relative to the root path rather than at any depth. + Follows Docker-style ``.dockerignore`` semantics and supports other ignore + use cases following the same approach. """ pattern: str + """The normalized pattern text with leading and trailing ``/`` removed. + + Always uses POSIX ``'/'`` separators. + """ + negated: bool + """Whether this is a negation pattern starting with ``!``.""" + directory_only: bool + """Whether the original pattern ended with ``/`` and should apply only to + directories and their descendants. + """ + anchored: bool + """Whether the pattern contains a path separator and should be matched + relative to the root path rather than at any depth. + """ + + +def _normalize_pattern_string(raw: str) -> str: + """Normalize a single ignore pattern string. + + Shared helper for patterns coming from both ignorefiles and inline pattern + lists. Handles: + + - Optional leading ``!`` negation marker (with surrounding whitespace + trimmed). + - ``os.path.normpath`` cleanup. + - Normalising path separators to POSIX ``'/'``. + - Stripping a single leading ``/`` so absolute-style patterns behave like + relative ones. + + Comment / blank-line handling is deliberately *not* included here; callers + are responsible for that. + """ + + if not raw: + return raw + + invert = raw[0] == "!" + pattern = raw[1:].strip() if invert else raw.strip() + + if pattern: + # filepath.Clean equivalent + pattern = os.path.normpath(pattern) + # filepath.ToSlash equivalent + pattern = pattern.replace(os.sep, "/") + # Leading forward-slashes are removed so "/some/path" and "some/path" + # are considered equivalent. + if len(pattern) > 1 and pattern[0] == "/": + pattern = pattern[1:] + + if invert: + pattern = "!" + pattern + + return pattern def _normalize_pattern_line(raw: bytes, *, is_first_line: bool) -> Optional[str]: @@ -47,6 +92,16 @@ def _normalize_pattern_line(raw: bytes, *, is_first_line: bool) -> Optional[str] Behavior is based on: https://github.com/moby/patternmatcher/blob/main/ignorefile/ignorefile.go + + :param raw: Raw line bytes from the ignore file, including any newline + characters. + :type raw: bytes + :param is_first_line: Whether this is the first line in the file (used to + detect and strip a UTF-8 BOM). + :type is_first_line: bool + :return: Normalized pattern string, or ``None`` if the line should be + ignored (empty or comment). + :rtype: Optional[str] """ # Strip UTF-8 BOM from the first line if present @@ -67,28 +122,11 @@ def _normalize_pattern_line(raw: bytes, *, is_first_line: bool) -> Optional[str] if not pattern: return None - # Normalize absolute paths to paths relative to the context (taking care of '!' prefix) - invert = pattern[0] == "!" - if invert: - pattern = pattern[1:].strip() - - if pattern: - # filepath.Clean equivalent - pattern = os.path.normpath(pattern) - # filepath.ToSlash equivalent - pattern = pattern.replace(os.sep, "/") - # Leading forward-slashes are removed so "/some/path" and "some/path" - # are considered equivalent. - if len(pattern) > 1 and pattern[0] == "/": - pattern = pattern[1:] + normalized = _normalize_pattern_string(pattern) + return normalized or None - if invert: - pattern = "!" + pattern - - return pattern - -def read_ignorefile(path: Optional[Path]) -> list[str]: +def read_ignorefile(path: Path) -> list[str]: """Read an ignore file and return a list of normalized pattern strings. This mirrors the behavior of moby's ``ignorefile.ReadAll``: @@ -96,11 +134,14 @@ def read_ignorefile(path: Optional[Path]) -> list[str]: - UTF-8 BOM on the first line is stripped. - Lines starting with ``#`` are treated as comments and skipped. - Remaining lines are trimmed, optionally negated with ``!``, cleaned, - have path separators normalized to ``/``, and leading ``/`` removed. - """ + have path separators normalized to ``/``, and leading and trailing ``/`` removed. - if path is None: - return [] + :param path: Filesystem path to the ignore file to read. + :type path: Path + :return: List of normalized pattern strings in the order they appear in + the ignore file. + :rtype: list[str] + """ if not path.exists(): return [] @@ -119,7 +160,13 @@ def read_ignorefile(path: Optional[Path]) -> list[str]: def compile_ignore(patterns: Sequence[str]) -> list[IgnorePattern]: - """Compile raw pattern strings into :class:`IgnorePattern` objects.""" + """Compile raw pattern strings into :class:`IgnorePattern` objects. + + :param patterns: Raw pattern strings following Docker-style semantics. + :type patterns: Sequence[str] + :return: Compiled ignore patterns. + :rtype: list[IgnorePattern] + """ compiled: list[IgnorePattern] = [] @@ -160,9 +207,17 @@ def _segment_match(pattern_segment: str, path_segment: str) -> bool: """Match a single path segment against a glob pattern segment. Supports: + - ``*``: any sequence of characters except ``/``. - ``?``: any single character except ``/``. - ``[]``: character classes, excluding ``/``. + + :param pattern_segment: Glob-style pattern segment. + :type pattern_segment: str + :param path_segment: Path segment (no ``/``) to match against. + :type path_segment: str + :return: ``True`` if the path segment matches the pattern segment. + :rtype: bool """ import re @@ -195,7 +250,15 @@ def _segment_match(pattern_segment: str, path_segment: str) -> bool: def _match_parts_recursive(pattern_parts: list[str], path_parts: list[str]) -> bool: - """Recursive helper implementing ``**`` segment semantics.""" + """Recursive helper implementing ``**`` segment semantics. + + :param pattern_parts: Pattern split into POSIX path segments. + :type pattern_parts: list[str] + :param path_parts: Path split into POSIX path segments. + :type path_parts: list[str] + :return: ``True`` if the pattern parts match the path parts. + :rtype: bool + """ if not pattern_parts: return not path_parts @@ -217,7 +280,17 @@ def _match_parts_recursive(pattern_parts: list[str], path_parts: list[str]) -> b def path_match(pattern: IgnorePattern, relpath: str, *, is_dir: bool) -> bool: - """Return True if ``relpath`` matches a compiled ignore pattern.""" + """Return ``True`` if ``relpath`` matches a compiled ignore pattern. + + :param pattern: Compiled ignore pattern to test. + :type pattern: IgnorePattern + :param relpath: Path to test, relative to the ignore root. + :type relpath: str + :param is_dir: Whether ``relpath`` refers to a directory. + :type is_dir: bool + :return: ``True`` if the path is matched by the pattern. + :rtype: bool + """ relpath_posix = PurePosixPath(relpath).as_posix() path_parts = PurePosixPath(relpath_posix).parts @@ -247,6 +320,15 @@ def is_ignored(relpath: str, *, is_dir: bool, patterns: Sequence[IgnorePattern]) excludes all ``.log`` files except ``important.log``. Patterns are applied in order, and the last matching pattern determines inclusion. + + :param relpath: Path to evaluate, relative to the ignore root. + :type relpath: str + :param is_dir: Whether ``relpath`` refers to a directory. + :type is_dir: bool + :param patterns: Compiled ignore patterns to apply in order. + :type patterns: Sequence[IgnorePattern] + :return: ``True`` if the path should be treated as ignored. + :rtype: bool """ included = True # include by default @@ -264,7 +346,15 @@ def iter_included_files( """Yield all files under ``root`` that are not ignored. This performs directory pruning so that ignored directories are never - traversed, mirroring Docker's behavior for .dockerignore. + traversed, mirroring Docker's behavior for ``.dockerignore``. + + :param root: Root directory to walk. + :type root: Path + :param patterns: Compiled ignore patterns controlling which files and + directories are included. + :type patterns: Sequence[IgnorePattern] + :return: Iterator over non-ignored file paths under ``root``. + :rtype: Iterable[Path] """ if not root.is_dir(): @@ -301,7 +391,13 @@ class IgnoreMatcher(ABC): @abstractmethod def iter_paths(self, root: Path) -> Iterable[Path]: - """Yield filesystem paths to include under ``root``.""" + """Yield filesystem paths to include under ``root``. + + :param root: Root directory to scan for files. + :type root: Path + :return: Iterator over filesystem paths that should be included. + :rtype: Iterable[Path] + """ @dataclass(frozen=True) @@ -309,7 +405,8 @@ class DockerIgnoreMatcher(IgnoreMatcher): """Ignore matcher that mirrors Docker's .dockerignore semantics. This matcher: - - Closely follows Docker's .dockerignore semantics. + + - Closely follows Docker's ``.dockerignore`` semantics. - Always loads patterns from ``.dockerignore`` in the provided context root, if present. - Optionally loads additional patterns from an extra ignorefile. @@ -319,11 +416,22 @@ class DockerIgnoreMatcher(IgnoreMatcher): """ extra_ignorefile: str | Path | None = None + """Optional path to an additional ignorefile whose patterns are appended + after the default ``.dockerignore``. + """ + patterns: Sequence[str] | None = None + """Optional inline pattern strings appended after any ignorefiles.""" @override def iter_paths(self, root: Path) -> Iterable[Path]: - """Yield non-ignored files under ``root`` honoring Docker-style patterns.""" + """Yield non-ignored files under ``root`` honoring Docker-style patterns. + + :param root: Context directory whose contents should be filtered. + :type root: Path + :return: Iterator over non-ignored file paths under ``root``. + :rtype: Iterable[Path] + """ root = root.resolve() all_patterns: list[str] = [] @@ -345,17 +453,7 @@ def iter_paths(self, root: Path) -> Iterable[Path]: for raw in self.patterns: if not raw: continue - - invert = raw[0] == "!" - pattern = raw[1:].strip() if invert else raw.strip() - - if pattern: - pattern = os.path.normpath(pattern) - pattern = pattern.replace(os.sep, "/") - if len(pattern) > 1 and pattern[0] == "/": - pattern = pattern[1:] - - normalized = f"!{pattern}" if invert else pattern + normalized = _normalize_pattern_string(raw) if normalized: all_patterns.append(normalized) @@ -376,6 +474,7 @@ class FilePatternMatcher(IgnoreMatcher): """ patterns: Sequence[str] | str + """Pattern or patterns to apply as ignore rules when matching files.""" def __post_init__(self) -> None: # Normalise a single pattern string into a list for downstream helpers. @@ -384,7 +483,13 @@ def __post_init__(self) -> None: @override def iter_paths(self, root: Path) -> Iterable[Path]: - """Yield non-ignored files under ``root`` based only on ``patterns``.""" + """Yield non-ignored files under ``root`` based only on ``patterns``. + + :param root: Root directory whose contents should be filtered. + :type root: Path + :return: Iterator over non-ignored file paths under ``root``. + :rtype: Iterable[Path] + """ root = root.resolve() compiled: list[IgnorePattern] = compile_ignore(self.patterns) # type: ignore[arg-type]