From c69276a01a74862054273d8564618085adfa795f Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 18:18:46 +0800 Subject: [PATCH 01/21] =?UTF-8?q?docs(spec):=20Phase=202=20W3b=20=E2=80=94?= =?UTF-8?q?=20HF=20reverse-proxy=20design=20(SEC-02=20/=20INVARIANT=202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Controller-side GET /api/v1/hf-proxy/subtask/{id} streams HF files with the tenant token injected server-side; executors stop calling HF directly and lose ExecutorSettings.hf_token/hf_endpoint. Zero schema changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...-14-phase-2-w3b-hf-reverse-proxy-design.md | 493 ++++++++++++++++++ 1 file changed, 493 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-14-phase-2-w3b-hf-reverse-proxy-design.md diff --git a/docs/superpowers/specs/2026-05-14-phase-2-w3b-hf-reverse-proxy-design.md b/docs/superpowers/specs/2026-05-14-phase-2-w3b-hf-reverse-proxy-design.md new file mode 100644 index 0000000..5a4b083 --- /dev/null +++ b/docs/superpowers/specs/2026-05-14-phase-2-w3b-hf-reverse-proxy-design.md @@ -0,0 +1,493 @@ +# Phase 2 Week 3b — HF Reverse Proxy Design + +> **Status:** Draft (brainstormed 2026-05-14). +> **Companion plan:** `docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md` (to be written by writing-plans skill after spec approval). +> **Roadmap source:** `docs/v2.0/08-mvp-roadmap.md` §2.6 — Phase 2 Week 3 Day 4 (HF reverse-proxy). +> **Companion split (W3a / W3c):** W3a (mTLS + JWT + HMAC) merged — PR #12. W3c (active/standby + chaos drill) is a separate spec/plan. +> **Security source:** `docs/v2.0/04-security-and-tenancy.md` §3.1 (HF Token reverse proxy, SEC-02) + INVARIANT 2. + +--- + +## 1. Goal & Non-Goals + +### 1.1 Goal + +Close SEC-02 / INVARIANT 2 for the executor download path: the tenant-level HF token must never reach an executor. W3b adds a controller-side reverse proxy — `GET /api/v1/hf-proxy/subtask/{subtask_id}` — that: + +1. Authenticates the executor via the W3a mTLS → JWT dependency chain. +2. Verifies the executor actually owns the subtask: the `X-Assignment-Token` header matches `subtask.assignment_token` (W1 fence), `subtask.executor_id` matches the authenticated executor (confused-deputy guard), and `subtask.executor_epoch` matches the authenticated executor's epoch (W1 fence). +3. Reconstructs the HF URL from the subtask row (`subtask → DownloadTask.repo_id + .revision`, `subtask.filename`) — the executor never supplies a path, eliminating arbitrary-path SSRF surface. +4. Injects the controller's HF token (`Settings.hf_token`), follows HF's `302 → CDN` redirect server-side, and streams HF's response straight back as a `StreamingResponse`. + +The executor's two downloaders (`HfS3StreamDownloader`, `DirectOffsetDownloader`) stop calling `huggingface.co` directly — they fetch through a new `ControllerClient.stream_hf` context manager. `ExecutorSettings.hf_token` and `ExecutorSettings.hf_endpoint` are deleted. + +After W3b, the only place a tenant HF token exists is the controller process (`Settings.hf_token` env). Executors hold only their mTLS cert + JWT; they reach HF exclusively through the authenticated proxy. + +### 1.2 Non-goals (deferred — explicit list) + +| Item | Where | +|---|---| +| `tenant_secrets` table / per-tenant HF token | **Phase 3** multi-tenant — Phase 2 single-tenant uses the controller `Settings.hf_token` env | +| Envelope encryption (AES-GCM DEK + KMS KEK) of the HF token | **Phase 3** | +| Global 429/5xx throttle coordination (`04 §3.1`'s "Controller 全局协调") + `source_throttle_state` | **Phase 3** — same split as W2b2. W3b passes 429/503 straight through; the executor's W2b2 `paused_external` handling catches them | +| `X-Repo-Commit` / commit-pin verification (`03 §10`) | **Phase 3** — revisions are already 40-char SHAs, so HF force-push conflicts can't happen | +| Executor-local OOB HF credential pool (`14 §3` — the documented INVARIANT-2 exception for bypassing per-user rate limits) | **v2.1** | +| Proxy-side caching / range coalescing / connection pooling | not needed for internal beta — per-request httpx client is correct + simple | +| HTTP/2 to HF (`httpx.AsyncClient(http2=True)` from `04 §3.1`) | optional polish — W3b uses HTTP/1.1 streaming; HTTP/2 is a Phase-3 perf tweak | +| `hf_metadata` (the controller's `/tasks` repo-file listing) | **unchanged** — it already runs controller-side; `Settings.hf_token` never leaves the controller process. W3b does not touch it | +| Active/standby controller + chaos drill | **W3c** | + +--- + +## 2. Tech Stack Additions + +**None.** `httpx` (already pinned) provides `AsyncClient(follow_redirects=True)` + streaming; `fastapi.responses.StreamingResponse` is stdlib-FastAPI. No new runtime deps, no new dev deps, no new CI jobs, **zero alembic migrations**. + +--- + +## 3. Components + +### 3.1 New: `src/dlw/api/hf_proxy.py` + +A new FastAPI router. One endpoint: + +```python +GET /api/v1/hf-proxy/subtask/{subtask_id} +``` + +```python +"""HF reverse-proxy — controller-side, injects the tenant HF token (SEC-02). + +The executor never holds the HF token (INVARIANT 2). It calls this proxy +keyed by subtask_id; the controller verifies ownership (assignment_token + +epoch fence + confused-deputy guard), reconstructs the HF URL from the +subtask row, injects Settings.hf_token, follows HF's 302→CDN redirect +server-side, and streams the bytes back. +""" +from __future__ import annotations + +import uuid + +import httpx +from fastapi import APIRouter, Depends, Header, HTTPException, Request +from fastapi.responses import StreamingResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from dlw.api.tasks import _session +from dlw.auth.executor_jwt_dep import require_executor_jwt +from dlw.config import get_settings +from dlw.db.models.executor import Executor +from dlw.db.models.task import DownloadTask, FileSubTask + +router = APIRouter(prefix="/api/v1/hf-proxy", tags=["executors"]) + +_HF_HEADER_ALLOWLIST = frozenset({ + "content-length", "content-range", "content-type", + "accept-ranges", "etag", +}) + + +@router.get("/subtask/{subtask_id}") +async def hf_proxy_subtask( + subtask_id: uuid.UUID, + request: Request, + x_assignment_token: str = Header(..., alias="X-Assignment-Token"), + auth_ex: Executor = Depends(require_executor_jwt), + session: AsyncSession = Depends(_session), +) -> StreamingResponse: + sub = await session.get(FileSubTask, subtask_id) + if sub is None: + raise HTTPException(status_code=404, detail="subtask not found") + if sub.executor_id != auth_ex.id: + raise HTTPException( + status_code=403, + detail={"code": "NOT_YOUR_SUBTASK", + "subtask_executor": sub.executor_id, + "authenticated": auth_ex.id}, + ) + if str(sub.assignment_token) != x_assignment_token: + raise HTTPException(status_code=409, detail={"code": "STALE_ASSIGNMENT"}) + if sub.executor_epoch != auth_ex.epoch: + raise HTTPException( + status_code=409, + detail={"code": "EPOCH_MISMATCH", + "expected": sub.executor_epoch, "got": auth_ex.epoch}, + ) + + task = await session.get(DownloadTask, sub.task_id) + if task is None: # FK guarantees this won't happen + raise HTTPException(status_code=500, detail="parent task missing") + + settings = get_settings() + hf_url = (f"{settings.hf_endpoint.rstrip('/')}/{task.repo_id}" + f"/resolve/{task.revision}/{sub.filename}") + + hf_headers: dict[str, str] = {} + if settings.hf_token: + hf_headers["Authorization"] = f"Bearer {settings.hf_token}" + range_header = request.headers.get("Range") + if range_header: + hf_headers["Range"] = range_header + + hf_client = httpx.AsyncClient( + follow_redirects=True, + timeout=settings.hf_proxy_timeout_seconds, + ) + hf_req = hf_client.build_request("GET", hf_url, headers=hf_headers) + hf_resp = await hf_client.send(hf_req, stream=True) + + fwd = { + k: v for k, v in hf_resp.headers.items() + if k.lower() in _HF_HEADER_ALLOWLIST + } + + async def _body(): + try: + async for chunk in hf_resp.aiter_bytes(64 * 1024): + yield chunk + finally: + await hf_resp.aclose() + await hf_client.aclose() + + return StreamingResponse( + _body(), status_code=hf_resp.status_code, headers=fwd, + ) +``` + +Implementation notes: + +- `hf_client.send(..., stream=True)` is awaited eagerly so a connection error or 4xx surfaces *before* the `StreamingResponse` is constructed. The body generator runs only while the response is being sent. +- **Header allowlist** — only `content-length / content-range / content-type / accept-ranges / etag` reach the executor. HF's `Authorization` echo, `Set-Cookie`, `X-Repo-Commit` (Phase-3 commit-pin concern), and internal headers are dropped. +- **Status passthrough** — HF's 200/206/403/404/429/503 is forwarded verbatim. The executor's W2b1 tenacity (`_TRANSIENT_RETRY`) retries 5xx; W2b2's `paused_external` catches 429/503; 403/404 → the executor's generic failure path. +- **Cleanup in `finally`** — the generator's `finally` closes the HF response + client when the executor finishes or disconnects (FastAPI cancels the generator on client disconnect → `GeneratorExit` → `finally` runs). No leaked connections. +- The `hf_client` is created per-request — the proxy is stateless. Phase 3 may pool a module-level client. + +### 3.2 Verification chain (security crux) + +The endpoint runs four checks fail-fast before touching HF: + +| # | Check | Failure | Why it matters | +|---|---|---|---| +| 1 | subtask exists | 404 | basic | +| 2 | `sub.executor_id == auth_ex.id` | 403 `NOT_YOUR_SUBTASK` | confused-deputy guard — `require_executor_jwt` only proves "a registered executor", not "owns this subtask". Without (2), executor-A could pull executor-B's file. | +| 3 | `str(sub.assignment_token) == X-Assignment-Token` | 409 `STALE_ASSIGNMENT` | a reclaimed executor still holds the old token; the row's token was cleared/reassigned. Also covers `assignment_token = NULL` (pending subtask): `str(None) == "None"` won't match a real token. | +| 4 | `sub.executor_epoch == auth_ex.epoch` | 409 `EPOCH_MISMATCH` | a re-registered executor (new epoch) can't proxy-fetch under work claimed by its old incarnation. | + +### 3.3 Modified: `src/dlw/main.py` + +`create_app()` includes the new router (one line, alongside the existing routers): + +```python + from dlw.api.hf_proxy import router as hf_proxy_router + app.include_router(hf_proxy_router) +``` + +### 3.4 Modified: `src/dlw/config.py` + +`Settings` gains one field: + +```python + # Phase 2 W3b — HF reverse-proxy + hf_proxy_timeout_seconds: int = Field(default=300, ge=10, le=3600) +``` + +`Settings.hf_token` + `Settings.hf_endpoint` already exist and are unchanged — the proxy reads them. + +### 3.5 New: `ControllerClient.stream_hf` in `src/dlw/executor/client.py` + +```python + @asynccontextmanager + async def stream_hf( + self, + *, + subtask_id: uuid.UUID, + assignment_token: uuid.UUID, + range_header: str | None = None, + ): + """W3b: stream a file from HF via the controller proxy. Yields the + httpx streaming Response — callers consume resp.aiter_bytes() and + check resp.status_code, exactly as they did with a direct HF GET.""" + headers = { + **self._auth_headers(), + "X-Assignment-Token": str(assignment_token), + } + if range_header: + headers["Range"] = range_header + async with self._make_client() as client: + async with client.stream( + "GET", + f"/api/v1/hf-proxy/subtask/{subtask_id}", + headers=headers, + ) as resp: + yield resp +``` + +`from contextlib import asynccontextmanager` + `import uuid` are added to `client.py`. `_make_client()` (W3a) already builds the mTLS + JWT httpx client; `stream_hf` reuses it. When a `_transport` is injected (tests), `_make_client` short-circuits to the MockTransport — `stream_hf` works under both real-mTLS and test-transport modes. + +### 3.6 Modified: `src/dlw/executor/types.py` (`Assignment`) + +`Assignment` gains one field: + +```python +@dataclass(frozen=True) +class Assignment: + subtask_id: uuid.UUID + task_id: uuid.UUID + assignment_token: uuid.UUID # NEW (W3b) — needed for the proxy call + repo_id: str + revision: str + filename: str + file_size: int | None + expected_sha256: str | None + storage_config: StorageConfig +``` + +`repo_id` / `revision` / `filename` are retained — the executor still uses them for S3 key composition (`_compose_key` / `compose_key`). They are no longer used to build an HF URL on the executor (the proxy reconstructs that controller-side). + +### 3.7 Modified: `src/dlw/executor/downloader.py` (`HfS3StreamDownloader`) + +- `HfS3StreamDownloader.__init__` gains a `client: ControllerClient` parameter, stored as `self._controller`. +- `_download_once` replaces the HF-URL construction + `_make_http_client()` GET with a `stream_hf` call: + +```python + async with self._controller.stream_hf( + subtask_id=assignment.subtask_id, + assignment_token=assignment.assignment_token, + ) as resp: + resp.raise_for_status() + # ... existing: create_multipart_upload, aiter_bytes loop, + # upload_part, complete_multipart_upload — UNCHANGED ... +``` + +The S3 side (`make_s3_client`, `upload_part`, the multipart logic, the 0-byte-file `put_object` path) is **entirely unchanged**. Only the byte *source* moves from "direct HF GET" to "controller proxy stream". The downloader no longer calls `make_http_client()` for the HF leg (it may still need it removed entirely from the file if nothing else uses it — verify during implementation). + +### 3.8 Modified: `src/dlw/executor/chunk_downloader.py` (`DirectOffsetDownloader`) + +- `DirectOffsetDownloader.__init__` gains `client: ControllerClient`, stored as `self._controller`. +- **`_download_one_chunk`** replaces the direct HF `Range` GET with: + +```python + @_TRANSIENT_RETRY + async def _download_one_chunk(self, a, plan, dest_dir): + range_header = f"bytes={plan.offset}-{plan.offset + plan.length - 1}" + async with self._controller.stream_hf( + subtask_id=a.subtask_id, + assignment_token=a.assignment_token, + range_header=range_header, + ) as resp: + resp.raise_for_status() + target = dest_dir / f"{plan.index}.bin" + try: + with self._open_writer(target) as f: + async for buf in resp.aiter_bytes(_HTTP_CHUNK_BYTES): + # ... existing ENOSPC catch — UNCHANGED ... +``` + + The `_pass1_parallel` `make_http_client()` context wrapper is removed — each `_download_one_chunk` opens its own `stream_hf`. + +- **`_resolve_size`** — the W2b1 version does a `HEAD` against the HF URL. The proxy is `GET`-only (a download proxy). `_resolve_size` instead does a 1-byte probe via `stream_hf(range_header="bytes=0-0")` and reads `Content-Range` (`bytes 0-0/`) for the total size. If `Content-Range` is absent (HF returned a full 200 instead of 206), fall back to the response's `Content-Length`. The probe body is discarded either way. This keeps the proxy `GET`-only — no `HEAD` route. + +- The S3-side `_pass2_upload` (multipart upload, sequential SHA256) is **unchanged**. + +### 3.9 Modified: `src/dlw/executor/runner.py` + `src/dlw/executor/cli.py` + +- `runner._execute_subtask` builds the `Assignment` — it already receives `assignment_token` from the `/poll` response; add it to the `Assignment(...)` constructor call. +- `cli.py` builds the `ControllerClient` first, then passes it into both downloader constructors: `HfS3StreamDownloader(settings=settings, client=client)` + `DirectOffsetDownloader(settings=settings, client=client)`. The same `client` instance the `ExecutorRunner` uses. + +### 3.10 Modified: `src/dlw/executor/config.py` + +Delete `hf_token` and `hf_endpoint` from `ExecutorSettings`. After W3b the executor has no HF knowledge — it only knows the controller URL. The `s3_*` settings stay (the executor still uploads to S3 directly; that is W3b-unrelated). + +> `pydantic-settings` is configured with `extra="ignore"` — a deployment env that still sets `DLW_EXECUTOR_HF_TOKEN` won't error, the value is just unused. The operator runbook (§5) notes it should be removed. + +### 3.11 Modified: `tools/lint_invariants.py` + +New helper `check_no_hf_token_in_executor` — line-scans every `.py` under `src/dlw/executor/`; fails if the identifier `hf_token` or `hf_endpoint` appears (outside comments). This locks INVARIANT 2 for the executor package: a future change re-introducing direct HF access is caught by CI. + +```python +def check_no_hf_token_in_executor() -> list[str]: + """W3b: INVARIANT 2 — the tenant HF token must never reach an executor. + Forbid `hf_token` / `hf_endpoint` identifiers anywhere in src/dlw/executor/.""" + errors: list[str] = [] + exec_dir = ROOT / "src" / "dlw" / "executor" + if not exec_dir.exists(): + return [] + for py in exec_dir.rglob("*.py"): + text = py.read_text(encoding="utf-8") + for lineno, line in enumerate(text.splitlines(), start=1): + stripped = line.strip() + if stripped.startswith("#"): + continue + if "hf_token" in line or "hf_endpoint" in line: + errors.append( + f"{py.relative_to(ROOT)}:{lineno}: " + f"'hf_token'/'hf_endpoint' forbidden in executor package " + f"(INVARIANT 2 — HF access goes through the controller proxy)" + ) + return errors +``` + +Wired into `main()` next to the W3a `check_no_bearer_on_executor_routes`. + +--- + +## 4. Schema Changes + +**None.** No new table, no new column, no alembic migration. The proxy reads `Settings.hf_token` (controller env) + existing `FileSubTask` / `DownloadTask` columns (`repo_id`, `revision`, `filename`, `assignment_token`, `executor_id`, `executor_epoch` — all present since W1). `tenant_secrets` is Phase 3. + +W3b is the first Phase-2 sub-week since W2b1 with no alembic migration. + +--- + +## 5. Wire Format Changes + +### 5.1 New endpoint `GET /api/v1/hf-proxy/subtask/{subtask_id}` + +| Aspect | Value | +|---|---| +| Auth | mTLS (client cert) + `Authorization: Bearer ` — the W3a chain | +| Request headers | `X-Assignment-Token` (required); `Range` (optional, forwarded to HF) | +| Response 200 / 206 | streamed file bytes; `Content-Length` / `Content-Range` / `Content-Type` / `Accept-Ranges` / `ETag` forwarded | +| 401 | missing/invalid mTLS or JWT (from `require_executor_jwt`) | +| 403 | `NOT_YOUR_SUBTASK` — authenticated executor ≠ subtask's executor | +| 404 | subtask not found | +| 409 | `STALE_ASSIGNMENT` (token mismatch) or `EPOCH_MISMATCH` | +| 429 / 503 | forwarded from HF — the executor maps these to `paused_external` (W2b2) | +| 5xx | forwarded from HF — the executor's tenacity retries | + +### 5.2 Executor → controller link (existing endpoints) + +No change to `/register`, `/renew`, `/heartbeat`, `/poll`, `/report`. `stream_hf` adds the `X-Assignment-Token` header on top of the W3a auth headers — no new endpoint shape. + +### 5.3 Config surface + +- **Controller** `Settings`: gains `hf_proxy_timeout_seconds` (env `DLW_HF_PROXY_TIMEOUT_SECONDS`, default 300). +- **Executor** `ExecutorSettings`: `hf_token` (`DLW_EXECUTOR_HF_TOKEN`) and `hf_endpoint` (`DLW_EXECUTOR_HF_ENDPOINT`) are **removed**. + +### 5.4 OpenAPI + +`api/openapi.yaml` gains the `/hf-proxy/subtask/{subtaskId}` GET operation (tag `executors`, mTLS+JWT security, `X-Assignment-Token` + `Range` parameters, `200`/`206` streamed binary response, `403`/`404`/`409` error responses). No new schema components — the response is a binary stream. + +--- + +## 6. Error Handling Matrix + +| Situation | Behaviour | +|---|---| +| missing mTLS / JWT | 401 from `require_executor_jwt` (W3a) — before the handler runs | +| subtask not found | 404 `subtask not found` | +| authenticated executor ≠ subtask's executor | 403 `NOT_YOUR_SUBTASK` | +| `X-Assignment-Token` ≠ `subtask.assignment_token` (incl. NULL token on a pending subtask) | 409 `STALE_ASSIGNMENT` | +| executor epoch ≠ subtask's epoch | 409 `EPOCH_MISMATCH` | +| HF returns 200/206 | streamed through with allowlisted headers | +| HF returns 403/404 (private repo / missing file) | forwarded — the executor's generic failure path reports `failed` | +| HF returns 429/503 | forwarded — the executor's W2b2 `paused_external` handling catches it | +| HF returns 5xx | forwarded — the executor's W2b1 `_TRANSIENT_RETRY` retries (3 attempts) | +| HF connection error before any bytes | `hf_client.send(stream=True)` raises → FastAPI maps to 500 (or the executor's tenacity retries the proxy call) | +| HF drops mid-stream after 200 | the `_body()` generator's `aiter_bytes` raises → `finally` cleans up → the executor sees a truncated body. `DirectOffsetDownloader` asserts `len(body) == plan.length` for non-final chunks → chunk fails → tenacity retries. `HfS3StreamDownloader` → fewer bytes → the W4 sha256 verify gate flips the subtask to `failed`. Both fail-closed. | +| executor disconnects mid-stream | FastAPI cancels the `_body()` generator → `GeneratorExit` → `finally` closes the HF response + client. No leak. | +| `_resolve_size` probe: HF returns 200 (no `Content-Range`) instead of 206 | fall back to the response's `Content-Length`; the probe body is discarded | +| `Settings.hf_token` is empty (dev) | the proxy sends no `Authorization` header — HF serves public repos fine, private repos 403 (forwarded) | + +--- + +## 7. Testing Strategy + +### 7.1 Unit + integration (~13 new cases) + +| # | File | Case | What it asserts | +|---|---|---|---| +| 1 | `tests/api/test_hf_proxy.py` | `test_proxy_streams_file_with_token_injected` | register executor + claim subtask; mock HF (asserts it received `Authorization: Bearer `); proxy returns 200 + correct body | +| 2 | same | `test_proxy_forwards_range_header` | executor sends `Range: bytes=0-1023`; mock HF asserts it received the Range, returns 206 + `Content-Range`; proxy forwards 206 + header | +| 3 | same | `test_proxy_rejects_unauthenticated` | no mTLS/JWT → 401 | +| 4 | same | `test_proxy_rejects_not_your_subtask` | executor-A authenticated, requests executor-B's subtask → 403 `NOT_YOUR_SUBTASK` | +| 5 | same | `test_proxy_rejects_stale_assignment_token` | `X-Assignment-Token` ≠ subtask row → 409 `STALE_ASSIGNMENT` | +| 6 | same | `test_proxy_rejects_epoch_mismatch` | executor epoch ≠ `subtask.executor_epoch` → 409 `EPOCH_MISMATCH` | +| 7 | same | `test_proxy_404_on_missing_subtask` | random subtask_id → 404 | +| 8 | same | `test_proxy_forwards_hf_429` | mock HF returns 429 → proxy forwards 429 | +| 9 | same | `test_proxy_reconstructs_url_from_subtask` | executor sends no path; assert the HF URL the proxy hit == `{endpoint}/{task.repo_id}/resolve/{task.revision}/{sub.filename}` | +| 10 | `tests/executor/test_client.py` | `test_stream_hf_attaches_auth_and_token_headers` | `stream_hf` request carries `Authorization` + `X-Executor-Epoch` + `X-Assignment-Token`; `Range` forwarded when given | +| 11 | `tests/executor/test_chunk_downloader.py` | `test_chunk_downloader_uses_controller_proxy` | `DirectOffsetDownloader` fetches via `client.stream_hf` (fake `ControllerClient` injected; asserts it was called, no direct HF) | +| 12 | `tests/executor/test_downloader.py` | `test_stream_downloader_uses_controller_proxy` | `HfS3StreamDownloader` likewise | +| 13 | `tests/tools/test_lint_no_hf_token.py` (or folded into the existing lint self-tests) | `test_lint_flags_hf_token_in_executor` | a fixture file containing `hf_token` makes `check_no_hf_token_in_executor` report an error | + +### 7.2 Existing test migration + +W3b changes the `Assignment` dataclass (+`assignment_token`) and both downloader constructors (+`client`): + +| Test set | Change | +|---|---| +| `tests/executor/test_downloader.py` (W4) | `HfS3StreamDownloader(settings=)` → `(settings=, client=)`; `Assignment(...)` gains `assignment_token=`. The HF GET now goes through `client.stream_hf` — the test's HF MockTransport attaches to a **fake `ControllerClient`'s `stream_hf`**, not the downloader's `_make_http_client`. ~5-8 setup edits. | +| `tests/executor/test_chunk_downloader.py` (W2b1) | Same: `DirectOffsetDownloader(settings=, client=)`; the happy-path `_mock_transport_for_synthetic` moves from "attached to `_io.make_http_client`" to "attached to the fake `ControllerClient.stream_hf`". Pass-2 (S3 multipart) unchanged. | +| `tests/executor/test_runner*.py` (W3a) | `Assignment` gains a field — `runner._execute_subtask` adds `assignment_token=` to its `Assignment(...)` call. Runner tests already use `MagicMock` downloaders (which accept any kwargs), so the constructor-signature change doesn't break them. | +| `tests/e2e/test_executor_e2e.py` (W3a-migrated) | This e2e runs the real runner + real downloaders. The HF MockTransport now attaches to the **controller-side** proxy's httpx client (the `hf_proxy` router's HF leg), not the downloader. Moderate rewire — the executor↔controller link is the ASGI transport; the controller↔HF link is the MockTransport. | +| `tests/services/test_hf_metadata.py` (if present) | **unchanged** — `hf_metadata` is controller-side; W3b doesn't touch it. | + +**New test infrastructure:** a `tests/conftest.py` helper `make_fake_controller_client(hf_handler)` — returns an object whose `stream_hf` is an `@asynccontextmanager` that internally uses `httpx.MockTransport(hf_handler)` and yields a streaming response. Shared by the two downloader test files. + +### 7.3 Proxy endpoint test approach + +`tests/api/test_hf_proxy.py` follows the W3a API-test pattern: +- `client` fixture: `create_app()` + `app.state.{ca,jwt_keypair,nonce_store,enrollment_token}` injected from `ephemeral_ca` + `DLW_TLS_TRUSTED_PROXY=1`. +- `Settings.hf_token` injected via `monkeypatch.setenv("DLW_HF_TOKEN", "test-hf-token-xyz")`. +- The HF upstream: the proxy's controller-side `httpx.AsyncClient` is monkeypatched (`monkeypatch.setattr("dlw.api.hf_proxy.httpx.AsyncClient", ...)`) to return a `MockTransport`-backed client whose handler asserts `Authorization: Bearer test-hf-token-xyz` (+ optional Range) and returns a synthetic body. +- Executor side: W3a's `register_test_executor` registers + a task is created → `/poll` claims a subtask → the test has `assignment_token`; then it hits the proxy with `executor_request_headers(reg)` + `X-Assignment-Token`. + +### 7.4 Not tested + +- A real HF endpoint — all `MockTransport`. +- A real multi-hop `302 → CDN` chain — the test asserts `follow_redirects=True` is set + the final body is correct; it does not exercise deep redirect chains. +- Large-file streaming back-pressure / memory — the design is O(64 KiB)-buffered; no perf baseline (P-004 is after W3). +- executor-disconnect-mid-stream cleanup — noted as Phase-3 hardening (hard to simulate cleanly with `ASGITransport`). +- Global 429 throttle coordination — Phase 3. + +### 7.5 CI 12 checks expectations + +| Check | W3b impact | +|---|---| +| pytest | +13 new; ~10-15 migrated W4/W2b1/e2e setups | +| Invariant + cross-ref lint | **+`check_no_hf_token_in_executor`** helper | +| OpenAPI lint | new `/hf-proxy/subtask/{subtaskId}` GET operation | +| Markdown lint | spec/plan cross-ref 04 §3.1 + INVARIANTS §2 | +| Other 8 | no change | + +--- + +## 8. Acceptance Criteria + +- [ ] `GET /api/v1/hf-proxy/subtask/{subtask_id}` endpoint: mTLS+JWT auth + the 4-check verification chain + URL reconstruction + streaming with `Settings.hf_token` injected + header allowlist. +- [ ] `ControllerClient.stream_hf` — `@asynccontextmanager` yielding the httpx streaming response, attaching auth + `X-Assignment-Token` (+ optional `Range`). +- [ ] `HfS3StreamDownloader` + `DirectOffsetDownloader` rewired to `client.stream_hf`; S3-side logic unchanged; `_resolve_size` uses a 1-byte range probe. +- [ ] `Assignment` dataclass gains `assignment_token`; the runner threads it in. +- [ ] `ExecutorSettings.hf_token` + `ExecutorSettings.hf_endpoint` deleted. +- [ ] `check_no_hf_token_in_executor` lint reports 0 on the production tree. +- [ ] ~13 new pytest cases pass; the migrated W4/W2b1/e2e setups pass; full suite green. +- [ ] OpenAPI: `/hf-proxy/subtask/{subtaskId}` GET documented; spectral clean. +- [ ] `docs/operator/` notes the removed `DLW_EXECUTOR_HF_TOKEN` / `DLW_EXECUTOR_HF_ENDPOINT` env + the bandwidth-through-controller tradeoff. +- [ ] No new runtime deps; no new CI jobs; **zero alembic migrations**. + +--- + +## 9. Implementation Phasing (preview for plan) + +The plan will be written by the writing-plans skill after spec approval. Expected milestone shape (3 milestones, ~7 tasks): + +- **M1 — Proxy endpoint.** `src/dlw/api/hf_proxy.py` + `main.py` router include + `Settings.hf_proxy_timeout_seconds` + `tests/api/test_hf_proxy.py` (9 cases). Controller-side only; executors still call HF directly at this point (the proxy exists but is unused). +- **M2 — Executor rewiring.** `ControllerClient.stream_hf` + `Assignment.assignment_token` + both downloaders rewired + runner/cli wiring + `ExecutorSettings.hf_token`/`hf_endpoint` deleted + the `make_fake_controller_client` conftest helper + migrated downloader tests + 3 new executor tests. +- **M3 — e2e + lint + OpenAPI + PR.** `test_executor_e2e.py` rewire (HF MockTransport → controller-side) + `check_no_hf_token_in_executor` lint + the lint self-test + OpenAPI + operator runbook + PR. + +Branch: `feat/phase-2-w3b-hf-reverse-proxy`. Branched off `main` at `1611d61` (PR #12 merge). + +--- + +## 10. References + +- Spec source: brainstormed 2026-05-14 (this document). +- Roadmap: `docs/v2.0/08-mvp-roadmap.md` §2.6 Phase 2 W3 Day 4. +- Security: `docs/v2.0/04-security-and-tenancy.md` §3.1 (HF Token reverse proxy, SEC-02). +- Invariants: `docs/v2.0/INVARIANTS.md` §A — INVARIANT 2 (tenant HF token never leaves the controller). +- Predecessor specs: + - W3a: `docs/superpowers/specs/2026-05-14-phase-2-w3a-mtls-jwt-hmac-design.md` (mTLS + JWT — the auth chain `stream_hf` reuses) + - W2b1: `docs/superpowers/specs/2026-05-13-phase-2-w2b1-chunk-level-downloader-design.md` (`DirectOffsetDownloader` — the chunk downloader being rewired) + - W2b2: `docs/superpowers/specs/2026-05-14-phase-2-w2b2-cancel-and-paused-external-design.md` (`paused_external` — catches the forwarded 429/503) +- W3a PR (merged): https://github.com/l17728/modelpull/pull/12 (squash `1611d61`). From 8b5001c01412ddf2716edde9ff09ada3d530c080 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 18:38:18 +0800 Subject: [PATCH 02/21] =?UTF-8?q?docs(plan):=20Phase=202=20W3b=20=E2=80=94?= =?UTF-8?q?=20HF=20reverse-proxy=20implementation=20plan=20(9=20tasks)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3 milestones: controller proxy endpoint, executor rewiring, integration (lint + e2e + OpenAPI + docs + PR). TDD bite-sized steps, complete code. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...2026-05-14-phase-2-w3b-hf-reverse-proxy.md | 1989 +++++++++++++++++ 1 file changed, 1989 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md diff --git a/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md b/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md new file mode 100644 index 0000000..738c235 --- /dev/null +++ b/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md @@ -0,0 +1,1989 @@ +# W3b HF Reverse Proxy Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a controller-side HF reverse proxy so the tenant HF token never reaches an executor (SEC-02 / INVARIANT 2). + +**Architecture:** A new `GET /api/v1/hf-proxy/subtask/{subtask_id}` endpoint authenticates the executor (W3a mTLS+JWT), verifies subtask ownership (confused-deputy guard + assignment_token fence + epoch fence), reconstructs the HF URL from the subtask row, injects `Settings.hf_token`, follows HF's 302→CDN redirect, and streams the bytes back. Executor downloaders fetch through a new `ControllerClient.stream_hf` context manager instead of calling HF directly; `ExecutorSettings.hf_token`/`hf_endpoint` are deleted. + +**Tech Stack:** FastAPI, `httpx` (streaming + `follow_redirects`), `fastapi.responses.StreamingResponse`, SQLAlchemy 2.x async, pytest + `httpx.MockTransport` + `httpx.ASGITransport`. No new runtime/dev deps, no new CI jobs, **zero alembic migrations**. + +**Spec:** `docs/superpowers/specs/2026-05-14-phase-2-w3b-hf-reverse-proxy-design.md` + +**Branch:** `feat/phase-2-w3b-hf-reverse-proxy` (already created off `main`). + +--- + +## File Structure + +**New files:** +- `src/dlw/api/hf_proxy.py` — the reverse-proxy router (one endpoint). Owns: auth + ownership verification, HF URL reconstruction, token injection, streaming passthrough. +- `tests/api/test_hf_proxy.py` — proxy endpoint tests (9 cases). +- `tests/tools/__init__.py` + `tests/tools/test_lint_no_hf_token.py` — self-test for the new invariant lint. +- `tests/test_config_w3b.py` — `Settings.hf_proxy_timeout_seconds` config test. + +**Modified files:** +- `src/dlw/config.py` — `Settings` gains `hf_proxy_timeout_seconds`. +- `src/dlw/main.py` — `create_app()` includes the hf_proxy router. +- `src/dlw/executor/client.py` — `ControllerClient` gains `stream_hf` (`@asynccontextmanager`). +- `src/dlw/executor/types.py` — `Assignment` gains `assignment_token`. +- `src/dlw/executor/downloader.py` — `HfS3StreamDownloader` rewired to `client.stream_hf`; `_make_http_client` deleted. +- `src/dlw/executor/chunk_downloader.py` — `DirectOffsetDownloader` rewired to `client.stream_hf`; `_resolve_size` becomes a range probe. +- `src/dlw/executor/runner.py` — `_execute_subtask` threads `assignment_token` into `Assignment`. +- `src/dlw/executor/cli.py` — builds `ControllerClient` first, passes it into both downloaders. +- `src/dlw/executor/config.py` — `ExecutorSettings.hf_token` + `hf_endpoint` deleted. +- `src/dlw/executor/_io.py` — `make_http_client` deleted (now unused). +- `tools/lint_invariants.py` — gains `check_no_hf_token_in_executor`. +- `tests/conftest.py` — gains `make_fake_controller_client` test double. +- `tests/executor/test_downloader.py` — migrated to the fake controller client. +- `tests/executor/test_chunk_downloader.py` — migrated to the fake controller client; `_resolve_size` tests added. +- `tests/e2e/test_executor_e2e.py` — HF MockTransport moves to the controller side. +- `api/openapi.yaml` — documents the new endpoint. +- `docs/operator/executor-runbook.md` — notes removed env vars + bandwidth tradeoff. + +**Out of scope (do NOT touch):** `tests/e2e/test_hf_s3_smoke_local.py` — `@pytest.mark.manual` (never runs on CI) and already stale from W3a (uses removed `ControllerClient(bearer_token=...)` kwarg + wrong `ExecutorRunner(downloader=...)` kwarg). Leave it; fixing it is not W3b's concern. + +--- + +## Milestone 1 — Controller-side proxy endpoint + +### Task 1: Add `hf_proxy_timeout_seconds` to controller Settings + +**Files:** +- Modify: `src/dlw/config.py:33-37` +- Test: `tests/test_config_w3b.py` (create) + +- [ ] **Step 1: Write the failing test** + +Create `tests/test_config_w3b.py`: + +```python +"""W3b: Settings.hf_proxy_timeout_seconds config field.""" +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from dlw.config import Settings + + +def test_settings_has_hf_proxy_timeout_default() -> None: + s = Settings() + assert s.hf_proxy_timeout_seconds == 300 + + +def test_settings_hf_proxy_timeout_rejects_below_min() -> None: + with pytest.raises(ValidationError): + Settings(hf_proxy_timeout_seconds=5) + + +def test_settings_hf_proxy_timeout_rejects_above_max() -> None: + with pytest.raises(ValidationError): + Settings(hf_proxy_timeout_seconds=4000) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/test_config_w3b.py -v` +Expected: FAIL — `AttributeError: 'Settings' object has no attribute 'hf_proxy_timeout_seconds'`. + +- [ ] **Step 3: Add the field** + +In `src/dlw/config.py`, after the W3a block (the line `tls_trusted_proxy: bool = Field(default=False)`) and before the `@property` `db_url`, add: + +```python + # Phase 2 W3b — HF reverse-proxy + hf_proxy_timeout_seconds: int = Field(default=300, ge=10, le=3600) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `uv run pytest tests/test_config_w3b.py -v` +Expected: PASS (3 passed). + +- [ ] **Step 5: Commit** + +```bash +git add src/dlw/config.py tests/test_config_w3b.py +git commit -m "feat(config): add hf_proxy_timeout_seconds Setting (W3b)" +``` + +--- + +### Task 2: HF reverse-proxy endpoint — happy path (streaming, token injection, URL reconstruction) + +This task builds the endpoint with auth + subtask lookup + URL reconstruction + streaming. The ownership/fence verification chain is added in Task 3 (so Task 3's tests have a real red phase). + +**Files:** +- Create: `src/dlw/api/hf_proxy.py` +- Modify: `src/dlw/main.py:122-135` +- Test: `tests/api/test_hf_proxy.py` (create) + +- [ ] **Step 1: Write the failing test file** + +Create `tests/api/test_hf_proxy.py`: + +```python +"""W3b: GET /api/v1/hf-proxy/subtask/{id} — controller-side HF reverse proxy.""" +from __future__ import annotations + +import uuid + +import httpx +import pytest +from httpx import ASGITransport, AsyncClient +from sqlalchemy import update +from sqlalchemy.ext.asyncio import async_sessionmaker + +from dlw.config import get_settings +from dlw.db.base import Base +from dlw.db.models.task import FileSubTask +from tests.conftest import executor_request_headers, register_test_executor + +_TOKEN = "hf-proxy-ui-token" +_ENROLL = "hf-proxy-enrollment-token" +_HF_TOKEN = "test-hf-token-xyz" + + +@pytest.fixture(scope="module", autouse=True) +async def _bootstrap(engine): + """Tables + minimal seed (tenant + project + user + storage).""" + from dlw.db.models.storage import StorageBackend + from dlw.db.models.tenant import Project, Tenant, User + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + factory = async_sessionmaker(engine, expire_on_commit=False) + async with factory() as s: + s.add(Tenant(id=1, slug="d", display_name="D")) + await s.flush() + s.add(Project(id=1, tenant_id=1, name="d")) + s.add(User(id=1, tenant_id=1, oidc_subject="d", + email="d@l", role="tenant_admin")) + s.add(StorageBackend(id=1, tenant_id=1, name="d", + backend_type="s3", config_encrypted=b"")) + await s.commit() + yield + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + + +@pytest.fixture(autouse=True) +def _set_env(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("DLW_BEARER_TOKEN", _TOKEN) + monkeypatch.setenv("DLW_TLS_TRUSTED_PROXY", "1") + monkeypatch.setenv("DLW_HF_TOKEN", _HF_TOKEN) + monkeypatch.setenv("DLW_HF_ENDPOINT", "https://huggingface.co") + get_settings.cache_clear() + yield + get_settings.cache_clear() + + +@pytest.fixture +def proxy_app(ephemeral_ca): + from dlw.auth.hmac_nonce import NonceStore + from dlw.main import create_app + app = create_app() + app.state.ca = ephemeral_ca["ca"] + app.state.jwt_keypair = ephemeral_ca["jwt_keypair"] + app.state.nonce_store = NonceStore(maxsize=1000, ttl_seconds=300) + app.state.enrollment_token = _ENROLL + return app + + +def _install_hf_mock(monkeypatch, handler): + """Point the proxy's HF client factory at an httpx.MockTransport(handler).""" + import dlw.api.hf_proxy as hf_proxy_mod + + def _fake_make_hf_client(timeout_seconds): + return httpx.AsyncClient( + transport=httpx.MockTransport(handler), follow_redirects=True, + ) + monkeypatch.setattr(hf_proxy_mod, "_make_hf_client", _fake_make_hf_client) + + +async def _create_task_and_claim(c, reg): + """POST a task (2 files via the _patch_hf_global autouse fixture), poll once + as `reg`, return (subtask_id, assignment_token, filename).""" + r = await c.post("/api/v1/tasks", json={ + "repo_id": "deepseek-ai/DeepSeek-V3", + "revision": "a" * 40, + "storage_id": 1, + }, headers={"Authorization": f"Bearer {_TOKEN}"}) + assert r.status_code == 201, r.text + r = await c.post(f"/api/v1/executors/{reg['executor_id']}/poll", + headers=executor_request_headers(reg)) + assert r.status_code == 200, r.text + assert r.json()["assigned"] is True + sub = r.json()["subtask"] + return sub["id"], r.json()["assignment_token"], sub["filename"] + + +@pytest.mark.slow +async def test_proxy_streams_file_with_token_injected( + proxy_app, monkeypatch, +) -> None: + seen: dict = {} + + def hf_handler(request: httpx.Request) -> httpx.Response: + seen["auth"] = request.headers.get("authorization") + return httpx.Response(200, content=b"PROXIED-BYTES") + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-1", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + + assert r.status_code == 200, r.text + assert r.content == b"PROXIED-BYTES" + assert seen["auth"] == f"Bearer {_HF_TOKEN}" + + +@pytest.mark.slow +async def test_proxy_forwards_range_header(proxy_app, monkeypatch) -> None: + seen: dict = {} + + def hf_handler(request: httpx.Request) -> httpx.Response: + seen["range"] = request.headers.get("range") + return httpx.Response(206, content=b"RANGE", + headers={"Content-Range": "bytes 0-4/100"}) + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-2", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + "Range": "bytes=0-4", + }) + + assert r.status_code == 206 + assert seen["range"] == "bytes=0-4" + assert r.headers["content-range"] == "bytes 0-4/100" + + +@pytest.mark.slow +async def test_proxy_reconstructs_url_from_subtask(proxy_app, monkeypatch) -> None: + seen: dict = {} + + def hf_handler(request: httpx.Request) -> httpx.Response: + seen["url"] = str(request.url) + return httpx.Response(200, content=b"OK") + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-3", host_id="hp-host", + ) + sub_id, token, filename = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + + assert r.status_code == 200 + expected = (f"https://huggingface.co/deepseek-ai/DeepSeek-V3" + f"/resolve/{'a' * 40}/{filename}") + assert seen["url"] == expected + + +@pytest.mark.slow +async def test_proxy_forwards_hf_429(proxy_app, monkeypatch) -> None: + def hf_handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429, content=b"rate limited") + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-4", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + + assert r.status_code == 429 + + +@pytest.mark.slow +async def test_proxy_rejects_unauthenticated(proxy_app) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + r = await c.get(f"/api/v1/hf-proxy/subtask/{uuid.uuid4()}", headers={ + "X-Assignment-Token": str(uuid.uuid4()), + }) + assert r.status_code == 401 + + +@pytest.mark.slow +async def test_proxy_404_on_missing_subtask(proxy_app) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-5", host_id="hp-host", + ) + r = await c.get(f"/api/v1/hf-proxy/subtask/{uuid.uuid4()}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": str(uuid.uuid4()), + }) + assert r.status_code == 404 +``` + +- [ ] **Step 2: Run the test file to verify it fails** + +Run: `uv run pytest tests/api/test_hf_proxy.py -v` +Expected: FAIL — all cases error/404 because the route does not exist yet (`GET /api/v1/hf-proxy/...` → 404 from FastAPI's default handler, or import error if `dlw.api.hf_proxy` is referenced). + +- [ ] **Step 3: Create the proxy router** + +Create `src/dlw/api/hf_proxy.py`: + +```python +"""HF reverse-proxy — controller-side, injects the tenant HF token (SEC-02). + +The executor never holds the HF token (INVARIANT 2). It calls this proxy keyed +by subtask_id; the controller verifies ownership (assignment_token + epoch fence ++ confused-deputy guard — added in Task 3), reconstructs the HF URL from the +subtask row, injects Settings.hf_token, follows HF's 302->CDN redirect +server-side, and streams the bytes back. +""" +from __future__ import annotations + +import uuid + +import httpx +from fastapi import APIRouter, Depends, Header, HTTPException, Request +from fastapi.responses import StreamingResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from dlw.api.tasks import _session +from dlw.auth.executor_jwt_dep import require_executor_jwt +from dlw.config import get_settings +from dlw.db.models.executor import Executor +from dlw.db.models.task import DownloadTask, FileSubTask + +router = APIRouter(prefix="/api/v1/hf-proxy", tags=["executors"]) + +_HF_HEADER_ALLOWLIST = frozenset({ + "content-length", "content-range", "content-type", + "accept-ranges", "etag", +}) + + +def _make_hf_client(timeout_seconds: int) -> httpx.AsyncClient: + """Test seam — monkeypatched in tests to inject an httpx.MockTransport.""" + return httpx.AsyncClient(follow_redirects=True, timeout=timeout_seconds) + + +@router.get("/subtask/{subtask_id}") +async def hf_proxy_subtask( + subtask_id: uuid.UUID, + request: Request, + x_assignment_token: str = Header(..., alias="X-Assignment-Token"), + auth_ex: Executor = Depends(require_executor_jwt), + session: AsyncSession = Depends(_session), +) -> StreamingResponse: + sub = await session.get(FileSubTask, subtask_id) + if sub is None: + raise HTTPException(status_code=404, detail="subtask not found") + + task = await session.get(DownloadTask, sub.task_id) + if task is None: # FK guarantees this won't happen + raise HTTPException(status_code=500, detail="parent task missing") + + settings = get_settings() + hf_url = (f"{settings.hf_endpoint.rstrip('/')}/{task.repo_id}" + f"/resolve/{task.revision}/{sub.filename}") + + hf_headers: dict[str, str] = {} + if settings.hf_token: + hf_headers["Authorization"] = f"Bearer {settings.hf_token}" + range_header = request.headers.get("Range") + if range_header: + hf_headers["Range"] = range_header + + hf_client = _make_hf_client(settings.hf_proxy_timeout_seconds) + hf_req = hf_client.build_request("GET", hf_url, headers=hf_headers) + hf_resp = await hf_client.send(hf_req, stream=True) + + fwd = { + k: v for k, v in hf_resp.headers.items() + if k.lower() in _HF_HEADER_ALLOWLIST + } + + async def _body(): + try: + async for chunk in hf_resp.aiter_bytes(64 * 1024): + yield chunk + finally: + await hf_resp.aclose() + await hf_client.aclose() + + return StreamingResponse( + _body(), status_code=hf_resp.status_code, headers=fwd, + ) +``` + +- [ ] **Step 4: Wire the router into the app** + +In `src/dlw/main.py`, inside `create_app()`, after the `subtasks_router` include (line ~134) and before `return app`, add: + +```python + from dlw.api.hf_proxy import router as hf_proxy_router + app.include_router(hf_proxy_router) +``` + +- [ ] **Step 5: Run the test file to verify it passes** + +Run: `uv run pytest tests/api/test_hf_proxy.py -v` +Expected: PASS (6 passed) — the 6 Task-2 cases. + +- [ ] **Step 6: Commit** + +```bash +git add src/dlw/api/hf_proxy.py src/dlw/main.py tests/api/test_hf_proxy.py +git commit -m "feat(api): HF reverse-proxy endpoint — streaming + token injection (W3b)" +``` + +--- + +### Task 3: HF reverse-proxy — ownership + fence verification chain + +Adds the three checks between the 404 check and the `task = ...` lookup: confused-deputy guard, assignment_token fence, epoch fence. + +**Files:** +- Modify: `src/dlw/api/hf_proxy.py` +- Test: `tests/api/test_hf_proxy.py` (append 3 cases) + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/api/test_hf_proxy.py`: + +```python +@pytest.mark.slow +async def test_proxy_rejects_not_your_subtask(proxy_app) -> None: + """Authenticated executor B cannot proxy-fetch executor A's subtask.""" + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg_a = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-owner-a", host_id="hp-host", + ) + reg_b = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-owner-b", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg_a) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg_b), + "X-Assignment-Token": token, + }) + assert r.status_code == 403 + assert r.json()["detail"]["code"] == "NOT_YOUR_SUBTASK" + + +@pytest.mark.slow +async def test_proxy_rejects_stale_assignment_token(proxy_app) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-stale", host_id="hp-host", + ) + sub_id, _token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": str(uuid.uuid4()), # wrong token + }) + assert r.status_code == 409 + assert r.json()["detail"]["code"] == "STALE_ASSIGNMENT" + + +@pytest.mark.slow +async def test_proxy_rejects_epoch_mismatch(proxy_app, db_session) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-epoch", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + # Corrupt the subtask's stamped epoch so it no longer matches the + # authenticated executor's current epoch. + await db_session.execute( + update(FileSubTask) + .where(FileSubTask.id == uuid.UUID(sub_id)) + .values(executor_epoch=99_999) + ) + await db_session.commit() + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + assert r.status_code == 409 + assert r.json()["detail"]["code"] == "EPOCH_MISMATCH" +``` + +- [ ] **Step 2: Run the new tests to verify they fail** + +Run: `uv run pytest tests/api/test_hf_proxy.py -v -k "not_your_subtask or stale_assignment or epoch_mismatch"` +Expected: FAIL — the handler has no ownership/fence checks yet, so it proceeds to the HF call. `not_your_subtask` and `stale_assignment` reach the (un-mocked) real `_make_hf_client` and error or return a non-403/409 status; `epoch_mismatch` likewise. All three assertions fail. + +- [ ] **Step 3: Add the verification chain** + +In `src/dlw/api/hf_proxy.py`, replace this block: + +```python + sub = await session.get(FileSubTask, subtask_id) + if sub is None: + raise HTTPException(status_code=404, detail="subtask not found") + + task = await session.get(DownloadTask, sub.task_id) +``` + +with: + +```python + sub = await session.get(FileSubTask, subtask_id) + if sub is None: + raise HTTPException(status_code=404, detail="subtask not found") + if sub.executor_id != auth_ex.id: + raise HTTPException( + status_code=403, + detail={"code": "NOT_YOUR_SUBTASK", + "subtask_executor": sub.executor_id, + "authenticated": auth_ex.id}, + ) + if str(sub.assignment_token) != x_assignment_token: + raise HTTPException( + status_code=409, detail={"code": "STALE_ASSIGNMENT"}, + ) + if sub.executor_epoch != auth_ex.epoch: + raise HTTPException( + status_code=409, + detail={"code": "EPOCH_MISMATCH", + "expected": sub.executor_epoch, "got": auth_ex.epoch}, + ) + + task = await session.get(DownloadTask, sub.task_id) +``` + +- [ ] **Step 4: Run the full proxy test file to verify all pass** + +Run: `uv run pytest tests/api/test_hf_proxy.py -v` +Expected: PASS (9 passed). + +- [ ] **Step 5: Commit** + +```bash +git add src/dlw/api/hf_proxy.py tests/api/test_hf_proxy.py +git commit -m "feat(api): HF proxy ownership + assignment_token + epoch fence (W3b)" +``` + +--- + +## Milestone 2 — Executor rewiring + +### Task 4: `Assignment.assignment_token` + `ControllerClient.stream_hf` + +**Files:** +- Modify: `src/dlw/executor/types.py:14-24` +- Modify: `src/dlw/executor/client.py` +- Test: `tests/executor/test_client.py` (append 1 case) + +- [ ] **Step 1: Write the failing test** + +Append to `tests/executor/test_client.py`: + +```python +@pytest.mark.slow +async def test_stream_hf_attaches_auth_and_token_headers(tmp_path) -> None: + """stream_hf carries Authorization + X-Executor-Epoch + X-Assignment-Token, + forwards Range when given, and hits /api/v1/hf-proxy/subtask/{id}.""" + seen: dict = {} + body = b"hf-proxied-bytes" + + def handler(request: httpx.Request) -> httpx.Response: + seen["headers"] = {k.lower(): v for k, v in request.headers.items()} + seen["path"] = request.url.path + return httpx.Response(200, content=body) + + state = make_fake_auth_state( + tmp_path, executor_id="ex-stream", epoch=4, jwt="jwt-stream", + hmac_seed=_HMAC_SEED, + ) + sub_id = uuid.uuid4() + tok = uuid.uuid4() + async with ControllerClient( + base_url="http://test", auth_state=state, + _transport=httpx.MockTransport(handler), + ) as c: + async with c.stream_hf( + subtask_id=sub_id, assignment_token=tok, + range_header="bytes=0-1023", + ) as resp: + got = await resp.aread() + + assert got == body + assert seen["path"] == f"/api/v1/hf-proxy/subtask/{sub_id}" + assert seen["headers"]["authorization"] == "Bearer jwt-stream" + assert seen["headers"]["x-executor-epoch"] == "4" + assert seen["headers"]["x-assignment-token"] == str(tok) + assert seen["headers"]["range"] == "bytes=0-1023" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/executor/test_client.py::test_stream_hf_attaches_auth_and_token_headers -v` +Expected: FAIL — `AttributeError: 'ControllerClient' object has no attribute 'stream_hf'`. + +- [ ] **Step 3: Add `assignment_token` to `Assignment`** + +In `src/dlw/executor/types.py`, change the `Assignment` dataclass body to insert `assignment_token` right after `task_id`: + +```python +@dataclass(frozen=True) +class Assignment: + """Slim payload passed from runner to downloader.""" + subtask_id: uuid.UUID + task_id: uuid.UUID + assignment_token: uuid.UUID + repo_id: str + revision: str + filename: str + file_size: int | None + expected_sha256: str | None + storage_config: StorageConfig +``` + +- [ ] **Step 4: Add `stream_hf` to `ControllerClient`** + +In `src/dlw/executor/client.py`, add `asynccontextmanager` to the imports — change: + +```python +import secrets +import time +import uuid +from typing import Any, Self +``` + +to: + +```python +import secrets +import time +import uuid +from contextlib import asynccontextmanager +from typing import Any, Self +``` + +Then add the `stream_hf` method at the end of the `ControllerClient` class (after `report`): + +```python + @asynccontextmanager + async def stream_hf( + self, + *, + subtask_id: uuid.UUID, + assignment_token: uuid.UUID, + range_header: str | None = None, + ): + """W3b: stream a file from HF via the controller reverse-proxy. Yields + the httpx streaming Response — callers consume resp.aiter_bytes() and + check resp.status_code, exactly as they did with a direct HF GET.""" + headers = { + **self._auth_headers(), + "X-Assignment-Token": str(assignment_token), + } + if range_header: + headers["Range"] = range_header + async with self._make_client() as client: + async with client.stream( + "GET", + f"/api/v1/hf-proxy/subtask/{subtask_id}", + headers=headers, + ) as resp: + yield resp +``` + +- [ ] **Step 5: Run test to verify it passes** + +Run: `uv run pytest tests/executor/test_client.py -v` +Expected: PASS (all `test_client.py` cases, including the new one). + +- [ ] **Step 6: Commit** + +```bash +git add src/dlw/executor/types.py src/dlw/executor/client.py tests/executor/test_client.py +git commit -m "feat(executor): Assignment.assignment_token + ControllerClient.stream_hf (W3b)" +``` + +--- + +### Task 5: Rewire `HfS3StreamDownloader` to the controller proxy + +**Files:** +- Modify: `tests/conftest.py` (add `make_fake_controller_client`) +- Modify: `tests/executor/test_downloader.py` +- Modify: `src/dlw/executor/downloader.py` + +- [ ] **Step 1: Add the `make_fake_controller_client` test double to conftest** + +Append to `tests/conftest.py`: + +```python +def make_fake_controller_client(hf_handler): + """W3b test double for ControllerClient — its stream_hf() routes through an + httpx.MockTransport(hf_handler) instead of a real controller proxy. Lets + downloader tests simulate HF responses without a running controller. + hf_handler is a Callable[[httpx.Request], httpx.Response].""" + import httpx as _httpx + from contextlib import asynccontextmanager as _acm + + class _FakeControllerClient: + @_acm + async def stream_hf(self, *, subtask_id, assignment_token, + range_header=None): + transport = _httpx.MockTransport(hf_handler) + async with _httpx.AsyncClient( + transport=transport, base_url="http://fake-controller", + ) as client: + headers = {} + if range_header: + headers["Range"] = range_header + async with client.stream( + "GET", f"/api/v1/hf-proxy/subtask/{subtask_id}", + headers=headers, + ) as resp: + yield resp + + return _FakeControllerClient() +``` + +- [ ] **Step 2: Migrate `tests/executor/test_downloader.py`** + +Replace the imports block (lines 1-22) — change the `from dlw.executor.downloader import (...)` line and add the conftest import: + +Replace: + +```python +from dlw.executor.config import ExecutorSettings +from dlw.executor.downloader import ( + Assignment, + DownloadResult, + HfS3StreamDownloader, + StorageConfig, +) +``` + +with: + +```python +from dlw.executor.config import ExecutorSettings +from dlw.executor.downloader import ( + Assignment, + DownloadResult, + HfS3StreamDownloader, + StorageConfig, +) +from tests.conftest import make_fake_controller_client +``` + +Replace the `_assignment` helper (lines 32-41) — add `assignment_token`: + +```python +def _assignment(*, repo_id="o/r", revision="a" * 40, filename="config.json", + key_prefix="phase1/", bucket="b") -> Assignment: + import uuid as _uuid + return Assignment( + subtask_id=_uuid.uuid4(), + task_id=_uuid.uuid4(), + assignment_token=_uuid.uuid4(), + repo_id=repo_id, revision=revision, filename=filename, + file_size=4096, expected_sha256=None, + storage_config=StorageConfig(bucket=bucket, key_prefix=key_prefix), + ) +``` + +Add a shared no-op client helper right after `_assignment` (the three `test_compose_key_*` tests never stream, but the constructor now requires `client`): + +```python +def _noop_client(): + return make_fake_controller_client( + lambda request: httpx.Response(200, content=b"") + ) +``` + +Update the three `test_compose_key_*` tests — replace each `HfS3StreamDownloader(settings=_settings())` with `HfS3StreamDownloader(settings=_settings(), client=_noop_client())`. There are three occurrences (lines 45, 52, 59). + +Replace the `_make_hf_transport` helper (lines 81-85) with a handler-returning version: + +```python +def _hf_handler(body_bytes: bytes): + """Returns an httpx handler that streams body_bytes for any request.""" + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(200, content=body_bytes) + return handler +``` + +Now migrate the seven streaming tests. The pattern is: each currently does + +```python + d = HfS3StreamDownloader(settings=settings) + monkeypatch.setattr(d, "_make_http_client", + lambda: httpx.AsyncClient(transport=, ...)) +``` + +Replace each with a single line: + +```python + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client()) +``` + +Apply per test: + +`test_downloader_streams_hf_to_s3_full_pipeline` — replace lines 100-106 with: + +```python + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(_hf_handler(body))) +``` + +`test_downloader_small_file_single_part` — replace lines 133-137 with: + +```python + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(_hf_handler(body))) +``` + +`test_downloader_exact_5mb_yields_one_part` — replace lines 159-163 with: + +```python + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(_hf_handler(body))) +``` + +`test_downloader_404_fails_fast_no_multipart` — this test defines its own `handler` (returns 404) and builds a `transport`. Replace lines 179-188 (the `def handler` ... through the `monkeypatch.setattr`) with: + +```python + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(404, content=b"not found") + + settings = ExecutorSettings(id="host-w4-worker-x", bearer_token="t") + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) +``` + +`test_downloader_aborts_multipart_on_mid_stream_drop` — keep the `streaming_body` async generator and `handler` (lines 215-219); replace lines 221-228 (`transport = ...` through `monkeypatch.setattr`) with: + +```python + settings = ExecutorSettings(id="host-w4-worker-mid", bearer_token="t") + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) +``` + +`test_downloader_handles_zero_byte_file` — keep `def handler` returning empty 200 (lines 254-255); replace lines 256-263 (`transport = ...` through `monkeypatch.setattr`) with: + +```python + settings = ExecutorSettings(id="host-w4-worker-zero", bearer_token="t") + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) +``` + +`test_downloader_retries_transient_5xx` — keep the `call_count` closure + `handler` (lines 284-291); replace lines 293-298 (`settings = ...` through `monkeypatch.setattr`) with: + +```python + settings = ExecutorSettings(id="host-w4-worker-r", bearer_token="t") + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) +``` + +After these edits, `monkeypatch` is no longer used by `test_downloader_404_fails_fast_no_multipart`, `test_downloader_aborts_multipart_on_mid_stream_drop`, `test_downloader_handles_zero_byte_file`, and `test_downloader_retries_transient_5xx` for the HF seam — but each still uses `monkeypatch` via the `s3_bucket` fixture (which calls `monkeypatch.setenv`), so the `monkeypatch` parameter stays in every signature. Leave the signatures unchanged. + +- [ ] **Step 3: Run `test_downloader.py` to verify it fails** + +Run: `uv run pytest tests/executor/test_downloader.py -v` +Expected: FAIL — `TypeError: HfS3StreamDownloader.__init__() got an unexpected keyword argument 'client'`. + +- [ ] **Step 4: Rewire `HfS3StreamDownloader`** + +In `src/dlw/executor/downloader.py`: + +Replace the imports block (lines 12-32): + +```python +from __future__ import annotations + +import asyncio +import hashlib +import logging +from typing import Any + +import httpx + +from dlw.executor._io import ( + _HTTP_CHUNK_BYTES, + _TRANSIENT_RETRY, + compose_key as _compose_key_io, + make_http_client, + make_s3_client, + upload_part as _upload_part_io, +) +from dlw.executor.config import ExecutorSettings +from dlw.executor.types import Assignment, DownloadResult # re-exported for callers +from dlw.schemas.storage import StorageConfig +``` + +with: + +```python +from __future__ import annotations + +import asyncio +import hashlib +import logging +from typing import Any + +from dlw.executor._io import ( + _HTTP_CHUNK_BYTES, + _TRANSIENT_RETRY, + compose_key as _compose_key_io, + make_s3_client, + upload_part as _upload_part_io, +) +from dlw.executor.client import ControllerClient +from dlw.executor.config import ExecutorSettings +from dlw.executor.types import Assignment, DownloadResult # re-exported for callers +from dlw.schemas.storage import StorageConfig +``` + +(`import httpx` and `make_http_client` are removed — neither is used after the rewire.) + +Replace the `__init__` + `_make_http_client` block (lines 41-52): + +```python + def __init__(self, *, settings: ExecutorSettings) -> None: + self._s = settings + + def _compose_key(self, a: Assignment) -> str: + return _compose_key_io(a) + + def _make_s3_client(self, cfg: StorageConfig) -> Any: + return make_s3_client(self._s, cfg) + + def _make_http_client(self) -> httpx.AsyncClient: + """Test seam — overridden in unit tests via monkeypatch.""" + return make_http_client(self._s) +``` + +with: + +```python + def __init__(self, *, settings: ExecutorSettings, + client: ControllerClient) -> None: + self._s = settings + self._controller = client + + def _compose_key(self, a: Assignment) -> str: + return _compose_key_io(a) + + def _make_s3_client(self, cfg: StorageConfig) -> Any: + return make_s3_client(self._s, cfg) +``` + +Replace the HF-fetch portion of `_download_once` (lines 61-83) — change: + +```python + async def _download_once(self, *, assignment: Assignment) -> DownloadResult: + url = (f"{self._s.hf_endpoint.rstrip('/')}/{assignment.repo_id}" + f"/resolve/{assignment.revision}/{assignment.filename}") + s3 = self._make_s3_client(assignment.storage_config) + bucket = assignment.storage_config.bucket + key = self._compose_key(assignment) + part_size = self._s.multipart_part_size_bytes + + headers: dict[str, str] = {} + if self._s.hf_token: + headers["Authorization"] = f"Bearer {self._s.hf_token}" + + upload_id: str | None = None + sha = hashlib.sha256() + bytes_total = 0 + parts: list[dict[str, Any]] = [] + buf = bytearray() + part_no = 1 + + try: + async with self._make_http_client() as hc: + async with hc.stream("GET", url, headers=headers) as resp: + resp.raise_for_status() +``` + +to: + +```python + async def _download_once(self, *, assignment: Assignment) -> DownloadResult: + s3 = self._make_s3_client(assignment.storage_config) + bucket = assignment.storage_config.bucket + key = self._compose_key(assignment) + part_size = self._s.multipart_part_size_bytes + + upload_id: str | None = None + sha = hashlib.sha256() + bytes_total = 0 + parts: list[dict[str, Any]] = [] + buf = bytearray() + part_no = 1 + + try: + async with self._controller.stream_hf( + subtask_id=assignment.subtask_id, + assignment_token=assignment.assignment_token, + ) as resp: + resp.raise_for_status() +``` + +The rest of the method body (the `upload_id = await asyncio.to_thread(...)` line onward) stays exactly as-is. Note this removes one level of `async with` nesting (the old `async with self._make_http_client() as hc:` wrapper is gone) — re-indent the body that was under `async with hc.stream(...)` so it now sits directly under `async with self._controller.stream_hf(...)`. The indentation depth is unchanged because one `async with` replaced two. + +- [ ] **Step 5: Run `test_downloader.py` to verify it passes** + +Run: `uv run pytest tests/executor/test_downloader.py -v` +Expected: PASS (all 10 cases). + +- [ ] **Step 6: Commit** + +```bash +git add tests/conftest.py tests/executor/test_downloader.py src/dlw/executor/downloader.py +git commit -m "feat(executor): HfS3StreamDownloader fetches via controller proxy (W3b)" +``` + +--- + +### Task 6: Rewire `DirectOffsetDownloader` + `_resolve_size` range probe + +**Files:** +- Modify: `tests/executor/test_chunk_downloader.py` +- Modify: `src/dlw/executor/chunk_downloader.py` + +- [ ] **Step 1: Migrate + extend `tests/executor/test_chunk_downloader.py`** + +Replace the second imports block (lines 43-56) — add the conftest import: + +```python +import asyncio +import errno +import hashlib +import uuid +from typing import Any + +import boto3 +import httpx +from moto import mock_aws + +from dlw.executor.chunk_downloader import DirectOffsetDownloader +from dlw.executor.config import ExecutorSettings +from dlw.executor.types import Assignment +from dlw.schemas.storage import StorageConfig +from tests.conftest import make_fake_controller_client +``` + +Replace the `_mock_transport_for_synthetic` helper (lines 65-78) with a handler-returning version: + +```python +def _synthetic_hf_handler(): + """httpx handler: HTTP 206 Partial Content reading the Range header.""" + def handler(request: httpx.Request) -> httpx.Response: + rng = request.headers.get("Range", "") + assert rng.startswith("bytes="), f"unexpected Range header: {rng!r}" + a, b = rng.removeprefix("bytes=").split("-") + start, end = int(a), int(b) + body = _SYNTHETIC[start:end + 1] + return httpx.Response( + status_code=206, + content=body, + headers={"Content-Length": str(len(body))}, + ) + return handler +``` + +Replace the `chunk_settings` fixture (lines 81-92) — drop the now-deleted `hf_endpoint` kwarg: + +```python +@pytest.fixture +def chunk_settings(tmp_path) -> ExecutorSettings: + return ExecutorSettings( + id="ex-chunk-test", + bearer_token="t", + chunk_size_bytes=_CHUNK_SIZE, + chunk_concurrency=2, + parts_dir_path=str(tmp_path / "parts"), + s3_region="us-east-1", + s3_endpoint_url=None, + ) +``` + +In `test_pass1_pass2_happy_path_with_moto` — replace lines 96-104 (the `from dlw.executor import _io as _io_mod` ... through the `monkeypatch.setattr`) with nothing (delete them), and replace the `Assignment(...)` block (lines 112-121) + the `DirectOffsetDownloader(settings=chunk_settings)` line (127) so the function reads: + +```python +async def test_pass1_pass2_happy_path_with_moto(chunk_settings) -> None: + """Full pipeline: 4 chunks via the fake controller proxy -> moto multipart + -> sha256 matches.""" + storage_config = StorageConfig( + bucket="test-bucket", region="us-east-1", endpoint_url=None, + access_key_id="dummy", secret_access_key="dummy", + key_prefix="phase1", + ) + + sub_id = uuid.uuid4() + a = Assignment( + subtask_id=sub_id, + task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="owner/repo", + revision="b" * 40, + filename="model.bin", + file_size=_FILE_SIZE, + expected_sha256=None, + storage_config=storage_config, + ) + + with mock_aws(): + s3 = boto3.client("s3", region_name="us-east-1") + s3.create_bucket(Bucket="test-bucket") + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(_synthetic_hf_handler()), + ) + result = await d.download(assignment=a) + + assert result.bytes_written == _FILE_SIZE + assert result.actual_sha256 == _EXPECTED_SHA + assert result.s3_key.endswith("model.bin") + + from dlw.executor.parts_dir import parts_dir_for + assert not parts_dir_for(chunk_settings.parts_dir_path, sub_id).exists() +``` + +(Note: the `monkeypatch` parameter is dropped from this test's signature — it no longer monkeypatches anything.) + +In `test_pass1_enospc_raises_disk_full_and_leaks_parts` — keep the `_NoSpaceWriter` + the `monkeypatch.setattr(cd_mod.DirectOffsetDownloader, "_open_writer", ...)` (that monkeypatch stays — it injects the ENOSPC writer). Replace the HF-transport monkeypatch (lines 144-149, `transport = _mock_transport_for_synthetic()` through the `monkeypatch.setattr(_io_mod, "make_http_client", ...)`) with nothing (delete them); also delete the now-unused `from dlw.executor import _io as _io_mod` import on line 143. Then replace the `Assignment(...)` block (lines 169-178) and the `DirectOffsetDownloader(settings=chunk_settings)` line (180) so the function reads: + +```python +async def test_pass1_enospc_raises_disk_full_and_leaks_parts( + chunk_settings, monkeypatch, +) -> None: + """Inject ENOSPC into pass-1 chunk write -> DiskFullError + parts NOT cleaned.""" + from dlw.executor import chunk_downloader as cd_mod + + class _NoSpaceWriter: + def write(self, data): + raise OSError(errno.ENOSPC, "No space left on device") + def __enter__(self): return self + def __exit__(self, *_): return False + + monkeypatch.setattr( + cd_mod.DirectOffsetDownloader, + "_open_writer", + staticmethod(lambda path: _NoSpaceWriter()), + ) + + storage_config = StorageConfig( + bucket="test-bucket", region="us-east-1", endpoint_url=None, + access_key_id="dummy", secret_access_key="dummy", + key_prefix="phase1", + ) + sub_id = uuid.uuid4() + a = Assignment( + subtask_id=sub_id, + task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="owner/repo", + revision="b" * 40, + filename="model.bin", + file_size=_FILE_SIZE, + expected_sha256=None, + storage_config=storage_config, + ) + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(_synthetic_hf_handler()), + ) + from dlw.executor.chunk_downloader import DiskFullError + with pytest.raises(DiskFullError): + await d.download(assignment=a) + + from dlw.executor.parts_dir import parts_dir_for + assert parts_dir_for(chunk_settings.parts_dir_path, sub_id).exists() +``` + +Append two new `_resolve_size` tests at the end of the file: + +```python +async def test_resolve_size_via_range_probe(chunk_settings) -> None: + """When file_size is None, _resolve_size does a bytes=0-0 probe and reads + Content-Range to recover the total size.""" + def handler(request: httpx.Request) -> httpx.Response: + assert request.headers.get("Range") == "bytes=0-0" + return httpx.Response( + 206, content=b"\x00", + headers={"Content-Range": "bytes 0-0/123456", "Content-Length": "1"}, + ) + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(handler), + ) + a = Assignment( + subtask_id=uuid.uuid4(), task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="o/r", revision="b" * 40, filename="big.bin", + file_size=None, expected_sha256=None, + storage_config=StorageConfig( + bucket="b", region="us-east-1", endpoint_url=None, + key_prefix="p", + ), + ) + resolved = await d._resolve_size(a) + assert resolved.file_size == 123456 + + +async def test_resolve_size_falls_back_to_content_length(chunk_settings) -> None: + """If HF answers a 200 (no Content-Range), _resolve_size uses Content-Length.""" + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, content=b"\x00" * 10, + headers={"Content-Length": "777"}, + ) + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(handler), + ) + a = Assignment( + subtask_id=uuid.uuid4(), task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="o/r", revision="b" * 40, filename="big.bin", + file_size=None, expected_sha256=None, + storage_config=StorageConfig( + bucket="b", region="us-east-1", endpoint_url=None, + key_prefix="p", + ), + ) + resolved = await d._resolve_size(a) + assert resolved.file_size == 777 +``` + +- [ ] **Step 2: Run `test_chunk_downloader.py` to verify it fails** + +Run: `uv run pytest tests/executor/test_chunk_downloader.py -v` +Expected: FAIL — `TypeError: DirectOffsetDownloader.__init__() got an unexpected keyword argument 'client'`. + +- [ ] **Step 3: Rewire `DirectOffsetDownloader`** + +In `src/dlw/executor/chunk_downloader.py`: + +Replace the imports block (lines 6-30): + +```python +from __future__ import annotations + +import asyncio +import dataclasses +import errno +import hashlib +import logging +import math +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import httpx + +from dlw.executor import _io as _io_mod +from dlw.executor._io import ( + _HTTP_CHUNK_BYTES, + _TRANSIENT_RETRY, + compose_key, + make_s3_client, + upload_part, +) +from dlw.executor.config import ExecutorSettings +from dlw.executor.parts_dir import cleanup_parts_dir, parts_dir_for +from dlw.executor.types import Assignment, DownloadResult +``` + +with: + +```python +from __future__ import annotations + +import asyncio +import dataclasses +import errno +import hashlib +import logging +import math +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from dlw.executor._io import ( + _HTTP_CHUNK_BYTES, + _TRANSIENT_RETRY, + compose_key, + make_s3_client, + upload_part, +) +from dlw.executor.client import ControllerClient +from dlw.executor.config import ExecutorSettings +from dlw.executor.parts_dir import cleanup_parts_dir, parts_dir_for +from dlw.executor.types import Assignment, DownloadResult +``` + +(`import httpx` and `from dlw.executor import _io as _io_mod` are removed — neither is used after the rewire.) + +Replace `__init__` (lines 60-63): + +```python + def __init__(self, *, settings: ExecutorSettings) -> None: + self._s = settings + assert settings.chunk_size_bytes >= 5 * 1024 * 1024, \ + f"chunk_size_bytes ({settings.chunk_size_bytes}) < 5 MiB" +``` + +with: + +```python + def __init__(self, *, settings: ExecutorSettings, + client: ControllerClient) -> None: + self._s = settings + self._controller = client + assert settings.chunk_size_bytes >= 5 * 1024 * 1024, \ + f"chunk_size_bytes ({settings.chunk_size_bytes}) < 5 MiB" +``` + +Replace `_resolve_size` (lines 70-82): + +```python + async def _resolve_size(self, a: Assignment) -> Assignment: + url = (f"{self._s.hf_endpoint.rstrip('/')}/{a.repo_id}" + f"/resolve/{a.revision}/{a.filename}") + headers: dict[str, str] = {} + if self._s.hf_token: + headers["Authorization"] = f"Bearer {self._s.hf_token}" + async with _io_mod.make_http_client(self._s) as hc: + resp = await hc.head(url, headers=headers) + resp.raise_for_status() + cl = resp.headers.get("Content-Length") + if cl is None: + raise RuntimeError("file_size unresolvable: HEAD returned no Content-Length") + return dataclasses.replace(a, file_size=int(cl)) +``` + +with: + +```python + async def _resolve_size(self, a: Assignment) -> Assignment: + """W3b: the proxy is GET-only, so probe size with a bytes=0-0 range + request and read Content-Range (`bytes 0-0/`). Fall back to + Content-Length if HF answered a full 200 instead of a 206.""" + async with self._controller.stream_hf( + subtask_id=a.subtask_id, + assignment_token=a.assignment_token, + range_header="bytes=0-0", + ) as resp: + resp.raise_for_status() + content_range = resp.headers.get("Content-Range") + content_length = resp.headers.get("Content-Length") + if content_range and "/" in content_range: + total = content_range.rsplit("/", 1)[1].strip() + if total.isdigit(): + return dataclasses.replace(a, file_size=int(total)) + if content_length is None: + raise RuntimeError( + "file_size unresolvable: no Content-Range or Content-Length" + ) + return dataclasses.replace(a, file_size=int(content_length)) +``` + +Replace `_pass1_parallel` (lines 101-109): + +```python + async def _pass1_parallel( + self, a: Assignment, plans: list[ChunkPlan], dest_dir: Path, + ) -> None: + sem = asyncio.Semaphore(self._s.chunk_concurrency) + async with _io_mod.make_http_client(self._s) as hc: + async def one(plan: ChunkPlan) -> None: + async with sem: + await self._download_one_chunk(hc, a, plan, dest_dir) + await asyncio.gather(*(one(p) for p in plans)) +``` + +with: + +```python + async def _pass1_parallel( + self, a: Assignment, plans: list[ChunkPlan], dest_dir: Path, + ) -> None: + sem = asyncio.Semaphore(self._s.chunk_concurrency) + + async def one(plan: ChunkPlan) -> None: + async with sem: + await self._download_one_chunk(a, plan, dest_dir) + + await asyncio.gather(*(one(p) for p in plans)) +``` + +Replace `_download_one_chunk` (lines 111-136): + +```python + @_TRANSIENT_RETRY + async def _download_one_chunk( + self, hc: httpx.AsyncClient, a: Assignment, + plan: ChunkPlan, dest_dir: Path, + ) -> None: + url = (f"{self._s.hf_endpoint.rstrip('/')}/{a.repo_id}" + f"/resolve/{a.revision}/{a.filename}") + headers = {"Range": f"bytes={plan.offset}-{plan.offset + plan.length - 1}"} + if self._s.hf_token: + headers["Authorization"] = f"Bearer {self._s.hf_token}" + async with hc.stream("GET", url, headers=headers) as resp: + resp.raise_for_status() + target = dest_dir / f"{plan.index}.bin" + try: + with self._open_writer(target) as f: + async for buf in resp.aiter_bytes(_HTTP_CHUNK_BYTES): + try: + f.write(buf) + except OSError as ex: + if ex.errno == errno.ENOSPC: + raise DiskFullError( + f"ENOSPC writing chunk {plan.index}" + ) from ex + raise + except DiskFullError: + raise +``` + +with: + +```python + @_TRANSIENT_RETRY + async def _download_one_chunk( + self, a: Assignment, plan: ChunkPlan, dest_dir: Path, + ) -> None: + range_header = f"bytes={plan.offset}-{plan.offset + plan.length - 1}" + async with self._controller.stream_hf( + subtask_id=a.subtask_id, + assignment_token=a.assignment_token, + range_header=range_header, + ) as resp: + resp.raise_for_status() + target = dest_dir / f"{plan.index}.bin" + try: + with self._open_writer(target) as f: + async for buf in resp.aiter_bytes(_HTTP_CHUNK_BYTES): + try: + f.write(buf) + except OSError as ex: + if ex.errno == errno.ENOSPC: + raise DiskFullError( + f"ENOSPC writing chunk {plan.index}" + ) from ex + raise + except DiskFullError: + raise +``` + +- [ ] **Step 4: Run `test_chunk_downloader.py` to verify it passes** + +Run: `uv run pytest tests/executor/test_chunk_downloader.py -v` +Expected: PASS (all cases — the 4 `plan_chunks`/`DiskFullError` unit tests, the 2 migrated pipeline tests, and the 2 new `_resolve_size` tests). + +- [ ] **Step 5: Commit** + +```bash +git add tests/executor/test_chunk_downloader.py src/dlw/executor/chunk_downloader.py +git commit -m "feat(executor): DirectOffsetDownloader fetches via controller proxy (W3b)" +``` + +--- + +### Task 7: Thread `assignment_token` through the runner, wire the CLI, delete executor HF config + +**Files:** +- Modify: `src/dlw/executor/runner.py:203-212` +- Modify: `src/dlw/executor/cli.py:38-47` +- Modify: `src/dlw/executor/config.py:35-37` +- Modify: `src/dlw/executor/_io.py:54-59` + +- [ ] **Step 1: Thread `assignment_token` into the `Assignment` the runner builds** + +In `src/dlw/executor/runner.py`, in `_execute_subtask`, change the `Assignment(...)` construction (lines 203-212) — add `assignment_token`: + +```python + assignment = Assignment( + subtask_id=sub_id, + task_id=uuid.UUID(subtask["task_id"]), + assignment_token=assignment_token, + repo_id=repo_id, + revision=revision, + filename=subtask["filename"], + file_size=subtask.get("file_size"), + expected_sha256=subtask.get("expected_sha256"), + storage_config=StorageConfig(**storage_config), + ) +``` + +(`assignment_token` is already a parameter of `_execute_subtask` — no signature change needed.) + +- [ ] **Step 2: Wire the CLI to pass the client into both downloaders** + +In `src/dlw/executor/cli.py`, change `_async_main` (lines 38-47): + +```python + settings = ExecutorSettings() + # W3a: the client starts without an AuthState; ExecutorRunner.run() does + # load_or_register and then calls client.update_auth() before any request. + client = ControllerClient(base_url=settings.controller_url) + stream = HfS3StreamDownloader(settings=settings) + chunk = DirectOffsetDownloader(settings=settings) + runner = ExecutorRunner( + settings=settings, client=client, + stream_downloader=stream, chunk_downloader=chunk, + ) +``` + +to: + +```python + settings = ExecutorSettings() + # W3a: the client starts without an AuthState; ExecutorRunner.run() does + # load_or_register and then calls client.update_auth() before any request. + # W3b: both downloaders fetch HF bytes through the same client's reverse + # proxy (stream_hf) — the executor never holds the HF token. + client = ControllerClient(base_url=settings.controller_url) + stream = HfS3StreamDownloader(settings=settings, client=client) + chunk = DirectOffsetDownloader(settings=settings, client=client) + runner = ExecutorRunner( + settings=settings, client=client, + stream_downloader=stream, chunk_downloader=chunk, + ) +``` + +- [ ] **Step 3: Delete the HF fields from `ExecutorSettings`** + +In `src/dlw/executor/config.py`, delete the W4 HF Hub block (lines 35-37): + +```python + # Phase 1 W4 — HF Hub + hf_endpoint: str = Field(default="https://huggingface.co") + hf_token: str | None = Field(default=None) +``` + +(Leave the `# Phase 1 W4 — S3 / S3-compatible` block and everything else intact.) + +- [ ] **Step 4: Delete the now-unused `make_http_client` from `_io.py`** + +In `src/dlw/executor/_io.py`, delete the `make_http_client` function (lines 54-59): + +```python +def make_http_client(settings: ExecutorSettings) -> httpx.AsyncClient: + """Test seam — overridable via monkeypatch.""" + return httpx.AsyncClient( + timeout=settings.download_timeout_seconds, + follow_redirects=True, + ) +``` + +Leave `import httpx` in `_io.py` — it is still used by `_is_transient_http`. + +- [ ] **Step 5: Verify no remaining `Assignment(` / downloader construction sites were missed** + +Run: `git grep -n "Assignment(" -- src/ ; git grep -n "HfS3StreamDownloader(\|DirectOffsetDownloader(\|make_http_client\|_make_http_client" -- src/` +Expected: the only `Assignment(` in `src/` is `src/dlw/executor/runner.py` (now updated); no `make_http_client` / `_make_http_client` references remain anywhere in `src/`; the only downloader constructions are in `src/dlw/executor/cli.py` (now updated). + +- [ ] **Step 6: Run the executor + e2e-affected suites to verify nothing regressed** + +Run: `uv run pytest tests/executor/ -v` +Expected: PASS — `test_runner.py` is unaffected (it builds `Assignment` only via the real runner, which now supplies `assignment_token`; its downloaders are `MagicMock`s). `test_client.py`, `test_downloader.py`, `test_chunk_downloader.py` all pass. + +- [ ] **Step 7: Commit** + +```bash +git add src/dlw/executor/runner.py src/dlw/executor/cli.py src/dlw/executor/config.py src/dlw/executor/_io.py +git commit -m "feat(executor): wire proxy client end-to-end, delete ExecutorSettings HF fields (W3b)" +``` + +--- + +## Milestone 3 — Integration: lint, e2e, OpenAPI, docs, PR + +### Task 8: `check_no_hf_token_in_executor` invariant lint + +**Files:** +- Modify: `tools/lint_invariants.py` +- Create: `tests/tools/__init__.py`, `tests/tools/test_lint_no_hf_token.py` + +- [ ] **Step 1: Write the failing self-test** + +Create `tests/tools/__init__.py` (empty file). + +Create `tests/tools/test_lint_no_hf_token.py`: + +```python +"""Self-test for tools/lint_invariants.py::check_no_hf_token_in_executor (W3b).""" +from __future__ import annotations + +import importlib.util +from pathlib import Path + +_LINT_PATH = Path(__file__).resolve().parents[2] / "tools" / "lint_invariants.py" + + +def _load_lint(): + spec = importlib.util.spec_from_file_location("lint_invariants", _LINT_PATH) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def test_lint_flags_hf_token_in_executor(tmp_path, monkeypatch) -> None: + lint = _load_lint() + exec_dir = tmp_path / "src" / "dlw" / "executor" + exec_dir.mkdir(parents=True) + (exec_dir / "bad.py").write_text( + "hf_token = 'leaked-secret'\n", encoding="utf-8", + ) + monkeypatch.setattr(lint, "ROOT", tmp_path) + errors = lint.check_no_hf_token_in_executor() + assert any("bad.py" in e for e in errors), errors + + +def test_lint_passes_clean_executor(tmp_path, monkeypatch) -> None: + lint = _load_lint() + exec_dir = tmp_path / "src" / "dlw" / "executor" + exec_dir.mkdir(parents=True) + (exec_dir / "ok.py").write_text( + "# hf_token mentioned in a comment is fine\nx = 1\n", encoding="utf-8", + ) + monkeypatch.setattr(lint, "ROOT", tmp_path) + assert lint.check_no_hf_token_in_executor() == [] +``` + +- [ ] **Step 2: Run the self-test to verify it fails** + +Run: `uv run pytest tests/tools/test_lint_no_hf_token.py -v` +Expected: FAIL — `AttributeError: module 'lint_invariants' has no attribute 'check_no_hf_token_in_executor'`. + +- [ ] **Step 3: Add the lint check** + +In `tools/lint_invariants.py`, add this function right after `check_no_bearer_on_executor_routes` (after line 225, before `def main()`): + +```python +def check_no_hf_token_in_executor() -> list[str]: + """W3b §3.11: INVARIANT 2 — the tenant HF token must never reach an + executor. Forbid the `hf_token` / `hf_endpoint` identifiers anywhere in + src/dlw/executor/ (comment lines are exempt). After W3b, HF access goes + exclusively through the controller's reverse proxy.""" + errors: list[str] = [] + exec_dir = ROOT / "src" / "dlw" / "executor" + if not exec_dir.exists(): + return [] + for py in sorted(exec_dir.rglob("*.py")): + try: + text = py.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as e: + errors.append(f"Cannot read {py.relative_to(ROOT)}: {e}") + continue + for lineno, line in enumerate(text.splitlines(), start=1): + if line.strip().startswith("#"): + continue + if "hf_token" in line or "hf_endpoint" in line: + errors.append( + f"{py.relative_to(ROOT)}:{lineno}: " + f"'hf_token'/'hf_endpoint' forbidden in the executor package " + f"(INVARIANT 2 — HF access goes through the controller proxy)" + ) + return errors +``` + +Then in `main()`, add it to the `failures.extend(...)` block — change: + +```python + failures.extend(check_executor_status_domain()) + failures.extend(check_subtask_status_domain()) + failures.extend(check_task_status_domain()) + failures.extend(check_d10_host_affinity_test_owner()) + failures.extend(check_no_bearer_on_executor_routes()) +``` + +to: + +```python + failures.extend(check_executor_status_domain()) + failures.extend(check_subtask_status_domain()) + failures.extend(check_task_status_domain()) + failures.extend(check_d10_host_affinity_test_owner()) + failures.extend(check_no_bearer_on_executor_routes()) + failures.extend(check_no_hf_token_in_executor()) +``` + +- [ ] **Step 4: Run the self-test to verify it passes** + +Run: `uv run pytest tests/tools/test_lint_no_hf_token.py -v` +Expected: PASS (2 passed). + +- [ ] **Step 5: Run the real lint against the production tree** + +Run: `uv run python tools/lint_invariants.py` +Expected: exit code 0, prints `OK: ...` — the new check finds zero `hf_token`/`hf_endpoint` references in `src/dlw/executor/` because Tasks 5/6/7 removed them all. If it reports a violation, fix the offending line in the executor package before continuing. + +- [ ] **Step 6: Commit** + +```bash +git add tools/lint_invariants.py tests/tools/__init__.py tests/tools/test_lint_no_hf_token.py +git commit -m "feat(lint): check_no_hf_token_in_executor — lock INVARIANT 2 (W3b)" +``` + +--- + +### Task 9: e2e rewire, OpenAPI, operator runbook, PR + +**Files:** +- Modify: `tests/e2e/test_executor_e2e.py:181-187` +- Modify: `api/openapi.yaml` +- Modify: `docs/operator/executor-runbook.md` + +- [ ] **Step 1: Rewire the e2e — HF MockTransport moves to the controller side** + +In `tests/e2e/test_executor_e2e.py`, the executor downloader currently injects the HF transport directly into the downloader's `_make_http_client`. After W3b, the executor fetches through the controller proxy, so the HF MockTransport must be installed on the **controller's** HF client factory instead. + +Replace this block (lines 181-187): + +```python + downloader = HfS3StreamDownloader(settings=settings) + # Inject the HF transport into the downloader's http client factory + downloader._make_http_client = lambda: httpx.AsyncClient( + transport=hf_transport, + timeout=settings.download_timeout_seconds, + follow_redirects=True, + ) +``` + +with: + +```python + downloader = HfS3StreamDownloader( + settings=settings, client=executor_client, + ) + # W3b: the executor fetches HF bytes through the controller's + # reverse proxy. Install the HF MockTransport on the controller's + # HF client factory (not the downloader). + import dlw.api.hf_proxy as _hf_proxy_mod + monkeypatch.setattr( + _hf_proxy_mod, "_make_hf_client", + lambda timeout_seconds: httpx.AsyncClient( + transport=hf_transport, follow_redirects=True, + ), + ) +``` + +(`executor_client` is the `ControllerClient` already constructed earlier in the test with `_transport=asgi_transport` + the `X-Client-Cert-PEM` bypass header — `stream_hf` rides on it straight to the ASGI controller. `monkeypatch` is already a parameter of `test_e2e_hf_to_s3_full_pipeline`.) + +- [ ] **Step 2: Run the e2e to verify it passes** + +Run: `uv run pytest tests/e2e/test_executor_e2e.py -v` +Expected: PASS — the runner downloads both files (`config.json`, `model.safetensors`) HF→controller-proxy→executor→S3, the task ends `succeeded`, both objects land in moto S3. + +- [ ] **Step 3: Document the endpoint in OpenAPI** + +In `api/openapi.yaml`, add the `/hf-proxy/subtask/{subtaskId}` path under `paths:` (match the file's existing indentation and `tags`/security style — model it on the existing executor routes like `/executors/{executorId}/poll`): + +```yaml + /api/v1/hf-proxy/subtask/{subtaskId}: + get: + tags: [executors] + summary: HF reverse proxy — stream a subtask's file from HF + description: >- + Controller-side reverse proxy. Authenticates the executor (mTLS + JWT), + verifies subtask ownership (assignment_token + epoch fence), reconstructs + the HF URL from the subtask row, injects the tenant HF token, and streams + the bytes back. The executor never holds the HF token (INVARIANT 2). + operationId: hfProxySubtask + security: + - executorMtls: [] + executorJwt: [] + parameters: + - name: subtaskId + in: path + required: true + schema: + type: string + format: uuid + - name: X-Assignment-Token + in: header + required: true + schema: + type: string + format: uuid + - name: Range + in: header + required: false + schema: + type: string + responses: + '200': + description: Full file stream. + content: + application/octet-stream: + schema: + type: string + format: binary + '206': + description: Partial content (Range request). + content: + application/octet-stream: + schema: + type: string + format: binary + '401': + description: Missing or invalid mTLS / executor JWT. + '403': + description: NOT_YOUR_SUBTASK — authenticated executor does not own this subtask. + '404': + description: Subtask not found. + '409': + description: STALE_ASSIGNMENT or EPOCH_MISMATCH (fence-token mismatch). + '429': + description: Forwarded from HF — rate limited. + '503': + description: Forwarded from HF — upstream unavailable. +``` + +If `api/openapi.yaml` does not already define `executorMtls` / `executorJwt` security schemes, reuse whatever scheme names the existing W3a executor routes (`/executors/{executorId}/poll`, `/executors/{executorId}/heartbeat`) use — copy their `security:` block verbatim rather than inventing new scheme names. + +- [ ] **Step 4: Verify the OpenAPI spec lints clean** + +Run: `npx --yes @stoplight/spectral-cli lint api/openapi.yaml` +Expected: no errors (warnings about descriptions/examples are acceptable if the rest of the file already has them; there must be zero new **errors**). If spectral is configured via a project ruleset, the CI command in `.github/workflows/` is authoritative — run that exact command instead. + +- [ ] **Step 5: Update the operator runbook** + +In `docs/operator/executor-runbook.md`, add a short subsection noting the W3b change. Place it near the existing executor-configuration / environment-variable section: + +```markdown +## W3b — HF access via the controller reverse proxy + +As of Phase 2 W3b, executors no longer talk to huggingface.co directly. All +HF file downloads flow through the controller's reverse proxy +(`GET /api/v1/hf-proxy/subtask/{id}`), which injects the tenant HF token +server-side (INVARIANT 2 — the token never leaves the controller). + +**Removed executor environment variables** (delete them from `.env.executor` +and any deployment manifests — they are now ignored): + +- `DLW_EXECUTOR_HF_TOKEN` +- `DLW_EXECUTOR_HF_ENDPOINT` + +**Controller environment variables:** + +- `DLW_HF_TOKEN` — the tenant HF token (already used by the controller for + repo-metadata enumeration; W3b also uses it for the download proxy). +- `DLW_HF_ENDPOINT` — defaults to `https://huggingface.co`. +- `DLW_HF_PROXY_TIMEOUT_SECONDS` — per-request timeout for the proxy's HF + fetch (default 300, range 10–3600). + +**Operational tradeoff:** download bandwidth now flows through the controller +rather than executor→HF directly. For the internal beta this is acceptable; +global rate-limit coordination and an executor-local credential pool are +Phase 3 items. +``` + +- [ ] **Step 6: Run the full test suite + all lints** + +Run: `uv run pytest -q` +Expected: PASS — entire suite green. + +Run: `uv run python tools/lint_invariants.py` +Expected: exit 0, `OK: ...`. + +- [ ] **Step 7: Commit** + +```bash +git add tests/e2e/test_executor_e2e.py api/openapi.yaml docs/operator/executor-runbook.md +git commit -m "test(e2e)+docs: e2e through controller proxy, OpenAPI + operator runbook (W3b)" +``` + +- [ ] **Step 8: Push and open the PR** + +```bash +git push -u origin feat/phase-2-w3b-hf-reverse-proxy +gh pr create --title "Phase 2 W3b — HF reverse proxy (SEC-02 / INVARIANT 2)" --body "$(cat <<'EOF' +## Summary +- New controller-side `GET /api/v1/hf-proxy/subtask/{id}` reverse proxy: mTLS+JWT auth, ownership + assignment_token + epoch fence verification, HF URL reconstruction, tenant-token injection, streaming passthrough with a header allowlist. +- Executor downloaders (`HfS3StreamDownloader`, `DirectOffsetDownloader`) now fetch HF bytes through `ControllerClient.stream_hf` instead of calling huggingface.co directly; `ExecutorSettings.hf_token`/`hf_endpoint` deleted. +- New `check_no_hf_token_in_executor` invariant lint locks INVARIANT 2 for the executor package. Zero schema changes / no alembic migration. + +## Test plan +- [ ] `uv run pytest tests/api/test_hf_proxy.py -v` — 9 proxy cases (streaming, token injection, URL reconstruction, Range, 429, 401, 404, 403 NOT_YOUR_SUBTASK, 409 STALE_ASSIGNMENT / EPOCH_MISMATCH) +- [ ] `uv run pytest tests/executor/ -v` — client `stream_hf`, both downloaders rewired, `_resolve_size` range probe, runner unaffected +- [ ] `uv run pytest tests/e2e/test_executor_e2e.py -v` — full HF→controller-proxy→executor→S3 path +- [ ] `uv run python tools/lint_invariants.py` — `check_no_hf_token_in_executor` passes on the production tree +- [ ] `uv run pytest -q` — full suite green + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +--- + +## Self-Review + +**1. Spec coverage** — every spec section maps to a task: +- Spec §1 Goal (close SEC-02 for the download path) → Tasks 2, 3, 5, 6, 7 collectively. +- Spec §3.1 `hf_proxy.py` router + `_make_hf_client` seam → Task 2 (+ verification chain Task 3). +- Spec §3.2 verification chain (4 checks) → Task 2 (404) + Task 3 (403/409/409). +- Spec §3.3 `main.py` router include → Task 2 Step 4. +- Spec §3.4 `Settings.hf_proxy_timeout_seconds` → Task 1. +- Spec §3.5 `ControllerClient.stream_hf` → Task 4. +- Spec §3.6 `Assignment.assignment_token` → Task 4. +- Spec §3.7 `HfS3StreamDownloader` rewire → Task 5. +- Spec §3.8 `DirectOffsetDownloader` rewire + `_resolve_size` probe → Task 6. +- Spec §3.9 runner + cli wiring → Task 7 Steps 1-2. +- Spec §3.10 delete `ExecutorSettings` HF fields → Task 7 Step 3. +- Spec §3.11 `check_no_hf_token_in_executor` lint → Task 8. +- Spec §4 zero schema changes → no alembic task exists (correct — nothing to add). +- Spec §5 wire format / config surface → Tasks 1, 2, 4, 7 + OpenAPI Task 9 Step 3. +- Spec §6 error matrix → covered by Task 2/3 tests + the downloaders' existing fail-closed behaviour (unchanged). +- Spec §7 testing strategy → Tasks 2, 3, 4, 6, 8 (new cases) + Tasks 5, 6, 9 (migrations). `make_fake_controller_client` conftest helper → Task 5 Step 1. +- Spec §8 acceptance criteria → all line items map to a task above. +- Spec §9 phasing (M1/M2/M3) → the three milestones here. +- `_io.py` `make_http_client` removal (spec §3.7 "verify during implementation") → Task 7 Step 4. + +**2. Placeholder scan** — no "TBD"/"TODO"/"handle edge cases"/"similar to Task N". Every code step shows complete code or a precise before/after block. The one judgement call left to the implementer (Task 9 Step 3: matching existing OpenAPI security-scheme names) is explicit and bounded — it says copy the W3a executor routes' block verbatim. + +**3. Type consistency** — `Assignment` field is `assignment_token: uuid.UUID` everywhere (types.py, runner, all test constructions, both downloaders read `assignment.assignment_token`). `ControllerClient.stream_hf(*, subtask_id, assignment_token, range_header=None)` — the signature in Task 4 matches every call site: `HfS3StreamDownloader._download_once` (Task 5, no `range_header`), `DirectOffsetDownloader._download_one_chunk` (Task 6, `range_header=`), `DirectOffsetDownloader._resolve_size` (Task 6, `range_header="bytes=0-0"`), `make_fake_controller_client._FakeControllerClient.stream_hf` (Task 5, same kwargs). The proxy's `_make_hf_client(timeout_seconds: int)` seam — monkeypatched consistently in `tests/api/test_hf_proxy.py` (Task 2), `tests/e2e/test_executor_e2e.py` (Task 9) with the matching one-arg signature. Both downloader `__init__` signatures are `(*, settings, client)` — matching the `cli.py` wiring (Task 7) and all test constructions (Tasks 5, 6). From 0d9b5719fc66c0940113fa7847498885eac0b533 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:06:40 +0800 Subject: [PATCH 03/21] feat(config): add hf_proxy_timeout_seconds Setting (W3b) --- src/dlw/config.py | 3 +++ tests/test_config_w3b.py | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/test_config_w3b.py diff --git a/src/dlw/config.py b/src/dlw/config.py index 350a58b..bff44a3 100644 --- a/src/dlw/config.py +++ b/src/dlw/config.py @@ -36,6 +36,9 @@ class Settings(BaseSettings): controller_hostname: str = Field(default="dlw-controller") tls_trusted_proxy: bool = Field(default=False) + # Phase 2 W3b — HF reverse-proxy + hf_proxy_timeout_seconds: int = Field(default=300, ge=10, le=3600) + @property def db_url(self) -> str: auth = f"{self.db_user}:{self.db_password}" if self.db_password else self.db_user diff --git a/tests/test_config_w3b.py b/tests/test_config_w3b.py new file mode 100644 index 0000000..6805e08 --- /dev/null +++ b/tests/test_config_w3b.py @@ -0,0 +1,22 @@ +"""W3b: Settings.hf_proxy_timeout_seconds config field.""" +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from dlw.config import Settings + + +def test_settings_has_hf_proxy_timeout_default() -> None: + s = Settings() + assert s.hf_proxy_timeout_seconds == 300 + + +def test_settings_hf_proxy_timeout_rejects_below_min() -> None: + with pytest.raises(ValidationError): + Settings(hf_proxy_timeout_seconds=5) + + +def test_settings_hf_proxy_timeout_rejects_above_max() -> None: + with pytest.raises(ValidationError): + Settings(hf_proxy_timeout_seconds=4000) From 020d13c40d77216914f7dfe2da1ca7a67bb1786b Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:11:21 +0800 Subject: [PATCH 04/21] test(config): rename test_config_w3b->test_config, add env-override test (W3b) Co-Authored-By: Claude Sonnet 4.6 --- tests/{test_config_w3b.py => test_config.py} | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) rename tests/{test_config_w3b.py => test_config.py} (53%) diff --git a/tests/test_config_w3b.py b/tests/test_config.py similarity index 53% rename from tests/test_config_w3b.py rename to tests/test_config.py index 6805e08..1d0be0c 100644 --- a/tests/test_config_w3b.py +++ b/tests/test_config.py @@ -1,4 +1,4 @@ -"""W3b: Settings.hf_proxy_timeout_seconds config field.""" +"""Tests for the controller Settings class (dlw.config).""" from __future__ import annotations import pytest @@ -7,7 +7,8 @@ from dlw.config import Settings -def test_settings_has_hf_proxy_timeout_default() -> None: +def test_settings_has_hf_proxy_timeout_default(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("DLW_HF_PROXY_TIMEOUT_SECONDS", raising=False) s = Settings() assert s.hf_proxy_timeout_seconds == 300 @@ -20,3 +21,9 @@ def test_settings_hf_proxy_timeout_rejects_below_min() -> None: def test_settings_hf_proxy_timeout_rejects_above_max() -> None: with pytest.raises(ValidationError): Settings(hf_proxy_timeout_seconds=4000) + + +def test_settings_hf_proxy_timeout_reads_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("DLW_HF_PROXY_TIMEOUT_SECONDS", "600") + s = Settings() + assert s.hf_proxy_timeout_seconds == 600 From f8b7bcc6830cdc1333c7658709b1e54cf3fa8039 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:18:52 +0800 Subject: [PATCH 05/21] =?UTF-8?q?feat(api):=20HF=20reverse-proxy=20endpoin?= =?UTF-8?q?t=20=E2=80=94=20streaming=20+=20token=20injection=20(W3b)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dlw/api/hf_proxy.py | 83 +++++++++++++ src/dlw/main.py | 2 + tests/api/test_hf_proxy.py | 230 +++++++++++++++++++++++++++++++++++++ 3 files changed, 315 insertions(+) create mode 100644 src/dlw/api/hf_proxy.py create mode 100644 tests/api/test_hf_proxy.py diff --git a/src/dlw/api/hf_proxy.py b/src/dlw/api/hf_proxy.py new file mode 100644 index 0000000..120e9d2 --- /dev/null +++ b/src/dlw/api/hf_proxy.py @@ -0,0 +1,83 @@ +"""HF reverse-proxy — controller-side, injects the tenant HF token (SEC-02). + +The executor never holds the HF token (INVARIANT 2). It calls this proxy keyed +by subtask_id; the controller verifies ownership (assignment_token + epoch fence ++ confused-deputy guard — added in Task 3), reconstructs the HF URL from the +subtask row, injects Settings.hf_token, follows HF's 302->CDN redirect +server-side, and streams the bytes back. +""" +from __future__ import annotations + +import uuid + +import httpx +from fastapi import APIRouter, Depends, Header, HTTPException, Request +from fastapi.responses import StreamingResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from dlw.api.tasks import _session +from dlw.auth.executor_jwt_dep import require_executor_jwt +from dlw.config import get_settings +from dlw.db.models.executor import Executor +from dlw.db.models.task import DownloadTask, FileSubTask + +router = APIRouter(prefix="/api/v1/hf-proxy", tags=["executors"]) + +_HF_HEADER_ALLOWLIST = frozenset({ + "content-length", "content-range", "content-type", + "accept-ranges", "etag", +}) + + +def _make_hf_client(timeout_seconds: int) -> httpx.AsyncClient: + """Test seam — monkeypatched in tests to inject an httpx.MockTransport.""" + return httpx.AsyncClient(follow_redirects=True, timeout=timeout_seconds) + + +@router.get("/subtask/{subtask_id}") +async def hf_proxy_subtask( + subtask_id: uuid.UUID, + request: Request, + x_assignment_token: str = Header(..., alias="X-Assignment-Token"), + auth_ex: Executor = Depends(require_executor_jwt), + session: AsyncSession = Depends(_session), +) -> StreamingResponse: + sub = await session.get(FileSubTask, subtask_id) + if sub is None: + raise HTTPException(status_code=404, detail="subtask not found") + + task = await session.get(DownloadTask, sub.task_id) + if task is None: # FK guarantees this won't happen + raise HTTPException(status_code=500, detail="parent task missing") + + settings = get_settings() + hf_url = (f"{settings.hf_endpoint.rstrip('/')}/{task.repo_id}" + f"/resolve/{task.revision}/{sub.filename}") + + hf_headers: dict[str, str] = {} + if settings.hf_token: + hf_headers["Authorization"] = f"Bearer {settings.hf_token}" + range_header = request.headers.get("Range") + if range_header: + hf_headers["Range"] = range_header + + hf_client = _make_hf_client(settings.hf_proxy_timeout_seconds) + hf_req = hf_client.build_request("GET", hf_url, headers=hf_headers) + hf_resp = await hf_client.send(hf_req, stream=True) + + fwd = { + k: v for k, v in hf_resp.headers.items() + if k.lower() in _HF_HEADER_ALLOWLIST + } + + async def _body(): + try: + async for chunk in hf_resp.aiter_bytes(64 * 1024): + yield chunk + finally: + await hf_resp.aclose() + await hf_client.aclose() + + return StreamingResponse( + _body(), status_code=hf_resp.status_code, headers=fwd, + ) diff --git a/src/dlw/main.py b/src/dlw/main.py index 2140901..e8a44dd 100644 --- a/src/dlw/main.py +++ b/src/dlw/main.py @@ -132,6 +132,8 @@ def create_app() -> FastAPI: app.include_router(executors_router) from dlw.api.subtasks import router as subtasks_router app.include_router(subtasks_router) + from dlw.api.hf_proxy import router as hf_proxy_router + app.include_router(hf_proxy_router) return app diff --git a/tests/api/test_hf_proxy.py b/tests/api/test_hf_proxy.py new file mode 100644 index 0000000..3ca7ae2 --- /dev/null +++ b/tests/api/test_hf_proxy.py @@ -0,0 +1,230 @@ +"""W3b: GET /api/v1/hf-proxy/subtask/{id} — controller-side HF reverse proxy.""" +from __future__ import annotations + +import uuid + +import httpx +import pytest +from httpx import ASGITransport, AsyncClient +from sqlalchemy import update +from sqlalchemy.ext.asyncio import async_sessionmaker + +from dlw.config import get_settings +from dlw.db.base import Base +from dlw.db.models.task import FileSubTask +from tests.conftest import ( + executor_request_headers, register_test_executor, signed_heartbeat_headers, +) + +_TOKEN = "hf-proxy-ui-token" +_ENROLL = "hf-proxy-enrollment-token" +_HF_TOKEN = "test-hf-token-xyz" + + +@pytest.fixture(scope="module", autouse=True) +async def _bootstrap(engine): + """Tables + minimal seed (tenant + project + user + storage).""" + from dlw.db.models.storage import StorageBackend + from dlw.db.models.tenant import Project, Tenant, User + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + factory = async_sessionmaker(engine, expire_on_commit=False) + async with factory() as s: + s.add(Tenant(id=1, slug="d", display_name="D")) + await s.flush() + s.add(Project(id=1, tenant_id=1, name="d")) + s.add(User(id=1, tenant_id=1, oidc_subject="d", + email="d@l", role="tenant_admin")) + s.add(StorageBackend(id=1, tenant_id=1, name="d", + backend_type="s3", config_encrypted=b"")) + await s.commit() + yield + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + + +@pytest.fixture(autouse=True) +def _set_env(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("DLW_BEARER_TOKEN", _TOKEN) + monkeypatch.setenv("DLW_TLS_TRUSTED_PROXY", "1") + monkeypatch.setenv("DLW_HF_TOKEN", _HF_TOKEN) + monkeypatch.setenv("DLW_HF_ENDPOINT", "https://huggingface.co") + get_settings.cache_clear() + yield + get_settings.cache_clear() + + +@pytest.fixture +def proxy_app(ephemeral_ca): + from dlw.auth.hmac_nonce import NonceStore + from dlw.main import create_app + app = create_app() + app.state.ca = ephemeral_ca["ca"] + app.state.jwt_keypair = ephemeral_ca["jwt_keypair"] + app.state.nonce_store = NonceStore(maxsize=1000, ttl_seconds=300) + app.state.enrollment_token = _ENROLL + return app + + +def _install_hf_mock(monkeypatch, handler): + """Point the proxy's HF client factory at an httpx.MockTransport(handler).""" + import dlw.api.hf_proxy as hf_proxy_mod + + def _fake_make_hf_client(timeout_seconds): + return httpx.AsyncClient( + transport=httpx.MockTransport(handler), follow_redirects=True, + ) + monkeypatch.setattr(hf_proxy_mod, "_make_hf_client", _fake_make_hf_client) + + +async def _create_task_and_claim(c, reg): + """POST a task (2 files via the _patch_hf_global autouse fixture), heartbeat + once to flip the executor joining->healthy (the scheduler only assigns work + to healthy/degraded executors), poll once as `reg`, and return + (subtask_id, assignment_token, filename).""" + r = await c.post("/api/v1/tasks", json={ + "repo_id": "deepseek-ai/DeepSeek-V3", + "revision": "a" * 40, + "storage_id": 1, + }, headers={"Authorization": f"Bearer {_TOKEN}"}) + assert r.status_code == 201, r.text + hb_body = b'{"health_score": 100, "parts_dir_bytes": 0, "disk_free_gb": 100}' + r = await c.post(f"/api/v1/executors/{reg['executor_id']}/heartbeat", + content=hb_body, + headers=signed_heartbeat_headers(reg, hb_body)) + assert r.status_code == 200, r.text + r = await c.post(f"/api/v1/executors/{reg['executor_id']}/poll", + headers=executor_request_headers(reg)) + assert r.status_code == 200, r.text + assert r.json()["assigned"] is True + sub = r.json()["subtask"] + return sub["id"], r.json()["assignment_token"], sub["filename"] + + +@pytest.mark.slow +async def test_proxy_streams_file_with_token_injected( + proxy_app, monkeypatch, +) -> None: + seen: dict = {} + + def hf_handler(request: httpx.Request) -> httpx.Response: + seen["auth"] = request.headers.get("authorization") + return httpx.Response(200, content=b"PROXIED-BYTES") + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-1", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + + assert r.status_code == 200, r.text + assert r.content == b"PROXIED-BYTES" + assert seen["auth"] == f"Bearer {_HF_TOKEN}" + + +@pytest.mark.slow +async def test_proxy_forwards_range_header(proxy_app, monkeypatch) -> None: + seen: dict = {} + + def hf_handler(request: httpx.Request) -> httpx.Response: + seen["range"] = request.headers.get("range") + return httpx.Response(206, content=b"RANGE", + headers={"Content-Range": "bytes 0-4/100"}) + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-2", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + "Range": "bytes=0-4", + }) + + assert r.status_code == 206 + assert seen["range"] == "bytes=0-4" + assert r.headers["content-range"] == "bytes 0-4/100" + + +@pytest.mark.slow +async def test_proxy_reconstructs_url_from_subtask(proxy_app, monkeypatch) -> None: + seen: dict = {} + + def hf_handler(request: httpx.Request) -> httpx.Response: + seen["url"] = str(request.url) + return httpx.Response(200, content=b"OK") + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-3", host_id="hp-host", + ) + sub_id, token, filename = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + + assert r.status_code == 200 + expected = (f"https://huggingface.co/deepseek-ai/DeepSeek-V3" + f"/resolve/{'a' * 40}/{filename}") + assert seen["url"] == expected + + +@pytest.mark.slow +async def test_proxy_forwards_hf_429(proxy_app, monkeypatch) -> None: + def hf_handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429, content=b"rate limited") + _install_hf_mock(monkeypatch, hf_handler) + + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-4", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + + assert r.status_code == 429 + + +@pytest.mark.slow +async def test_proxy_rejects_unauthenticated(proxy_app) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + r = await c.get(f"/api/v1/hf-proxy/subtask/{uuid.uuid4()}", headers={ + "X-Assignment-Token": str(uuid.uuid4()), + }) + assert r.status_code == 401 + + +@pytest.mark.slow +async def test_proxy_404_on_missing_subtask(proxy_app) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-5", host_id="hp-host", + ) + r = await c.get(f"/api/v1/hf-proxy/subtask/{uuid.uuid4()}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": str(uuid.uuid4()), + }) + assert r.status_code == 404 From 5e84028168e7f3287ebddd2ca6618ac57d144303 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:24:35 +0800 Subject: [PATCH 06/21] fix(api): close HF proxy client on send() failure + test cleanup fixture (W3b) --- src/dlw/api/hf_proxy.py | 11 ++++++++++- tests/api/test_hf_proxy.py | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/dlw/api/hf_proxy.py b/src/dlw/api/hf_proxy.py index 120e9d2..b52905b 100644 --- a/src/dlw/api/hf_proxy.py +++ b/src/dlw/api/hf_proxy.py @@ -63,7 +63,16 @@ async def hf_proxy_subtask( hf_client = _make_hf_client(settings.hf_proxy_timeout_seconds) hf_req = hf_client.build_request("GET", hf_url, headers=hf_headers) - hf_resp = await hf_client.send(hf_req, stream=True) + try: + hf_resp = await hf_client.send(hf_req, stream=True) + except (httpx.TimeoutException, httpx.NetworkError) as e: + await hf_client.aclose() + raise HTTPException( + status_code=503, detail=f"HF upstream unreachable: {e}", + ) from e + except BaseException: + await hf_client.aclose() + raise fwd = { k: v for k, v in hf_resp.headers.items() diff --git a/tests/api/test_hf_proxy.py b/tests/api/test_hf_proxy.py index 3ca7ae2..363dde6 100644 --- a/tests/api/test_hf_proxy.py +++ b/tests/api/test_hf_proxy.py @@ -43,6 +43,16 @@ async def _bootstrap(engine): await conn.run_sync(Base.metadata.drop_all) +@pytest.fixture(autouse=True) +async def _cleanup_tasks(engine): + """Truncate task rows after each test so a later test's poll cannot claim a + prior test's leftover subtask (matches tests/api/test_subtasks.py).""" + yield + from sqlalchemy import text + async with engine.begin() as conn: + await conn.execute(text("TRUNCATE file_subtasks, download_tasks CASCADE")) + + @pytest.fixture(autouse=True) def _set_env(monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("DLW_BEARER_TOKEN", _TOKEN) From d956086eb30c39d6c3a78c4ab0a44b45b45b729c Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:26:52 +0800 Subject: [PATCH 07/21] feat(api): HF proxy ownership + assignment_token + epoch fence (W3b) Co-Authored-By: Claude Sonnet 4.6 --- src/dlw/api/hf_proxy.py | 17 ++++++++++ tests/api/test_hf_proxy.py | 64 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/src/dlw/api/hf_proxy.py b/src/dlw/api/hf_proxy.py index b52905b..cd7a144 100644 --- a/src/dlw/api/hf_proxy.py +++ b/src/dlw/api/hf_proxy.py @@ -45,6 +45,23 @@ async def hf_proxy_subtask( sub = await session.get(FileSubTask, subtask_id) if sub is None: raise HTTPException(status_code=404, detail="subtask not found") + if sub.executor_id != auth_ex.id: + raise HTTPException( + status_code=403, + detail={"code": "NOT_YOUR_SUBTASK", + "subtask_executor": sub.executor_id, + "authenticated": auth_ex.id}, + ) + if str(sub.assignment_token) != x_assignment_token: + raise HTTPException( + status_code=409, detail={"code": "STALE_ASSIGNMENT"}, + ) + if sub.executor_epoch != auth_ex.epoch: + raise HTTPException( + status_code=409, + detail={"code": "EPOCH_MISMATCH", + "expected": sub.executor_epoch, "got": auth_ex.epoch}, + ) task = await session.get(DownloadTask, sub.task_id) if task is None: # FK guarantees this won't happen diff --git a/tests/api/test_hf_proxy.py b/tests/api/test_hf_proxy.py index 363dde6..80f1935 100644 --- a/tests/api/test_hf_proxy.py +++ b/tests/api/test_hf_proxy.py @@ -238,3 +238,67 @@ async def test_proxy_404_on_missing_subtask(proxy_app) -> None: "X-Assignment-Token": str(uuid.uuid4()), }) assert r.status_code == 404 + + +@pytest.mark.slow +async def test_proxy_rejects_not_your_subtask(proxy_app) -> None: + """Authenticated executor B cannot proxy-fetch executor A's subtask.""" + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg_a = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-owner-a", host_id="hp-host", + ) + reg_b = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-owner-b", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg_a) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg_b), + "X-Assignment-Token": token, + }) + assert r.status_code == 403 + assert r.json()["detail"]["code"] == "NOT_YOUR_SUBTASK" + + +@pytest.mark.slow +async def test_proxy_rejects_stale_assignment_token(proxy_app) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-stale", host_id="hp-host", + ) + sub_id, _token, _ = await _create_task_and_claim(c, reg) + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": str(uuid.uuid4()), # wrong token + }) + assert r.status_code == 409 + assert r.json()["detail"]["code"] == "STALE_ASSIGNMENT" + + +@pytest.mark.slow +async def test_proxy_rejects_epoch_mismatch(proxy_app, db_session) -> None: + async with AsyncClient(transport=ASGITransport(app=proxy_app), + base_url="http://test") as c: + reg = await register_test_executor( + c, enrollment_token=_ENROLL, + executor_id="hp-worker-epoch", host_id="hp-host", + ) + sub_id, token, _ = await _create_task_and_claim(c, reg) + # Corrupt the subtask's stamped epoch so it no longer matches the + # authenticated executor's current epoch. + await db_session.execute( + update(FileSubTask) + .where(FileSubTask.id == uuid.UUID(sub_id)) + .values(executor_epoch=99_999) + ) + await db_session.commit() + r = await c.get(f"/api/v1/hf-proxy/subtask/{sub_id}", headers={ + **executor_request_headers(reg), + "X-Assignment-Token": token, + }) + assert r.status_code == 409 + assert r.json()["detail"]["code"] == "EPOCH_MISMATCH" From e31b8073cef46cb9be21e390528b2e3d36601dc0 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:32:54 +0800 Subject: [PATCH 08/21] refactor(api): explicit None guard on assignment_token + test comment (W3b) Co-Authored-By: Claude Sonnet 4.6 --- src/dlw/api/hf_proxy.py | 2 +- tests/api/test_hf_proxy.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/dlw/api/hf_proxy.py b/src/dlw/api/hf_proxy.py index cd7a144..62fc57b 100644 --- a/src/dlw/api/hf_proxy.py +++ b/src/dlw/api/hf_proxy.py @@ -52,7 +52,7 @@ async def hf_proxy_subtask( "subtask_executor": sub.executor_id, "authenticated": auth_ex.id}, ) - if str(sub.assignment_token) != x_assignment_token: + if sub.assignment_token is None or str(sub.assignment_token) != x_assignment_token: raise HTTPException( status_code=409, detail={"code": "STALE_ASSIGNMENT"}, ) diff --git a/tests/api/test_hf_proxy.py b/tests/api/test_hf_proxy.py index 80f1935..10f16f9 100644 --- a/tests/api/test_hf_proxy.py +++ b/tests/api/test_hf_proxy.py @@ -289,7 +289,9 @@ async def test_proxy_rejects_epoch_mismatch(proxy_app, db_session) -> None: ) sub_id, token, _ = await _create_task_and_claim(c, reg) # Corrupt the subtask's stamped epoch so it no longer matches the - # authenticated executor's current epoch. + # authenticated executor's current epoch. commit() (not flush()) is + # required: the proxy reads via its own _session dependency on a + # separate connection, so the update must be committed to be visible. await db_session.execute( update(FileSubTask) .where(FileSubTask.id == uuid.UUID(sub_id)) From dc378a539ecc88df2727adbbcc2050b526d5b923 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:34:49 +0800 Subject: [PATCH 09/21] feat(executor): Assignment.assignment_token + ControllerClient.stream_hf (W3b) --- src/dlw/executor/client.py | 26 +++++++++++++++++++++++++ src/dlw/executor/types.py | 1 + tests/executor/test_client.py | 36 +++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/src/dlw/executor/client.py b/src/dlw/executor/client.py index 9f10a07..280289f 100644 --- a/src/dlw/executor/client.py +++ b/src/dlw/executor/client.py @@ -15,6 +15,7 @@ import secrets import time import uuid +from contextlib import asynccontextmanager from typing import Any, Self import httpx @@ -206,3 +207,28 @@ async def report( return await self._post_json( f"/api/v1/subtasks/{subtask_id}/report", body, self._auth_headers(), ) + + @asynccontextmanager + async def stream_hf( + self, + *, + subtask_id: uuid.UUID, + assignment_token: uuid.UUID, + range_header: str | None = None, + ): + """W3b: stream a file from HF via the controller reverse-proxy. Yields + the httpx streaming Response — callers consume resp.aiter_bytes() and + check resp.status_code, exactly as they did with a direct HF GET.""" + headers = { + **self._auth_headers(), + "X-Assignment-Token": str(assignment_token), + } + if range_header: + headers["Range"] = range_header + async with self._make_client() as client: + async with client.stream( + "GET", + f"/api/v1/hf-proxy/subtask/{subtask_id}", + headers=headers, + ) as resp: + yield resp diff --git a/src/dlw/executor/types.py b/src/dlw/executor/types.py index 2b6e540..a6264c8 100644 --- a/src/dlw/executor/types.py +++ b/src/dlw/executor/types.py @@ -16,6 +16,7 @@ class Assignment: """Slim payload passed from runner to downloader.""" subtask_id: uuid.UUID task_id: uuid.UUID + assignment_token: uuid.UUID repo_id: str revision: str filename: str diff --git a/tests/executor/test_client.py b/tests/executor/test_client.py index a80cd52..9ee583d 100644 --- a/tests/executor/test_client.py +++ b/tests/executor/test_client.py @@ -173,3 +173,39 @@ async def test_current_epoch_reads_auth_state(tmp_path) -> None: new = make_fake_auth_state(tmp_path, epoch=43) c.update_auth(new) assert c.current_epoch() == 43 + + +@pytest.mark.slow +async def test_stream_hf_attaches_auth_and_token_headers(tmp_path) -> None: + """stream_hf carries Authorization + X-Executor-Epoch + X-Assignment-Token, + forwards Range when given, and hits /api/v1/hf-proxy/subtask/{id}.""" + seen: dict = {} + body = b"hf-proxied-bytes" + + def handler(request: httpx.Request) -> httpx.Response: + seen["headers"] = {k.lower(): v for k, v in request.headers.items()} + seen["path"] = request.url.path + return httpx.Response(200, content=body) + + state = make_fake_auth_state( + tmp_path, executor_id="ex-stream", epoch=4, jwt="jwt-stream", + hmac_seed=_HMAC_SEED, + ) + sub_id = uuid.uuid4() + tok = uuid.uuid4() + async with ControllerClient( + base_url="http://test", auth_state=state, + _transport=httpx.MockTransport(handler), + ) as c: + async with c.stream_hf( + subtask_id=sub_id, assignment_token=tok, + range_header="bytes=0-1023", + ) as resp: + got = await resp.aread() + + assert got == body + assert seen["path"] == f"/api/v1/hf-proxy/subtask/{sub_id}" + assert seen["headers"]["authorization"] == "Bearer jwt-stream" + assert seen["headers"]["x-executor-epoch"] == "4" + assert seen["headers"]["x-assignment-token"] == str(tok) + assert seen["headers"]["range"] == "bytes=0-1023" From 6c5bc0c23a19aefba82c25f0d2b1150e5b8a3267 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:40:45 +0800 Subject: [PATCH 10/21] refactor(executor): annotate stream_hf return type + test streaming path (W3b) --- src/dlw/executor/client.py | 8 ++++++-- tests/executor/test_client.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/dlw/executor/client.py b/src/dlw/executor/client.py index 280289f..6875e04 100644 --- a/src/dlw/executor/client.py +++ b/src/dlw/executor/client.py @@ -15,6 +15,7 @@ import secrets import time import uuid +from collections.abc import AsyncIterator from contextlib import asynccontextmanager from typing import Any, Self @@ -215,10 +216,13 @@ async def stream_hf( subtask_id: uuid.UUID, assignment_token: uuid.UUID, range_header: str | None = None, - ): + ) -> AsyncIterator[httpx.Response]: """W3b: stream a file from HF via the controller reverse-proxy. Yields the httpx streaming Response — callers consume resp.aiter_bytes() and - check resp.status_code, exactly as they did with a direct HF GET.""" + check resp.status_code, exactly as they did with a direct HF GET. + + Unlike heartbeat/poll/report, this does NOT call raise_for_status() — + callers MUST inspect resp.status_code themselves (the downloaders do).""" headers = { **self._auth_headers(), "X-Assignment-Token": str(assignment_token), diff --git a/tests/executor/test_client.py b/tests/executor/test_client.py index 9ee583d..68c93b0 100644 --- a/tests/executor/test_client.py +++ b/tests/executor/test_client.py @@ -201,9 +201,9 @@ def handler(request: httpx.Request) -> httpx.Response: subtask_id=sub_id, assignment_token=tok, range_header="bytes=0-1023", ) as resp: - got = await resp.aread() + chunks = [chunk async for chunk in resp.aiter_bytes(8)] - assert got == body + assert b"".join(chunks) == body assert seen["path"] == f"/api/v1/hf-proxy/subtask/{sub_id}" assert seen["headers"]["authorization"] == "Bearer jwt-stream" assert seen["headers"]["x-executor-epoch"] == "4" From 651175a41813101bec0473ac9245561b11924f25 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:45:59 +0800 Subject: [PATCH 11/21] feat(executor): HfS3StreamDownloader fetches via controller proxy (W3b) Co-Authored-By: Claude Sonnet 4.6 --- src/dlw/executor/downloader.py | 72 +++++++++++++----------------- tests/conftest.py | 28 ++++++++++++ tests/executor/test_downloader.py | 74 ++++++++++++------------------- 3 files changed, 88 insertions(+), 86 deletions(-) diff --git a/src/dlw/executor/downloader.py b/src/dlw/executor/downloader.py index 3dc8897..7ef913b 100644 --- a/src/dlw/executor/downloader.py +++ b/src/dlw/executor/downloader.py @@ -16,16 +16,14 @@ import logging from typing import Any -import httpx - from dlw.executor._io import ( _HTTP_CHUNK_BYTES, _TRANSIENT_RETRY, compose_key as _compose_key_io, - make_http_client, make_s3_client, upload_part as _upload_part_io, ) +from dlw.executor.client import ControllerClient from dlw.executor.config import ExecutorSettings from dlw.executor.types import Assignment, DownloadResult # re-exported for callers from dlw.schemas.storage import StorageConfig @@ -38,8 +36,10 @@ class HfS3StreamDownloader: """HF GET stream → S3 multipart upload, sha256 tee'd on the same bytes.""" - def __init__(self, *, settings: ExecutorSettings) -> None: + def __init__(self, *, settings: ExecutorSettings, + client: ControllerClient) -> None: self._s = settings + self._controller = client def _compose_key(self, a: Assignment) -> str: return _compose_key_io(a) @@ -47,10 +47,6 @@ def _compose_key(self, a: Assignment) -> str: def _make_s3_client(self, cfg: StorageConfig) -> Any: return make_s3_client(self._s, cfg) - def _make_http_client(self) -> httpx.AsyncClient: - """Test seam — overridden in unit tests via monkeypatch.""" - return make_http_client(self._s) - async def download(self, *, assignment: Assignment) -> DownloadResult: """Public entry — retries transient errors (5xx, network, timeout) × 3.""" @_TRANSIENT_RETRY @@ -59,17 +55,11 @@ async def _retry_wrapper() -> DownloadResult: return await _retry_wrapper() async def _download_once(self, *, assignment: Assignment) -> DownloadResult: - url = (f"{self._s.hf_endpoint.rstrip('/')}/{assignment.repo_id}" - f"/resolve/{assignment.revision}/{assignment.filename}") s3 = self._make_s3_client(assignment.storage_config) bucket = assignment.storage_config.bucket key = self._compose_key(assignment) part_size = self._s.multipart_part_size_bytes - headers: dict[str, str] = {} - if self._s.hf_token: - headers["Authorization"] = f"Bearer {self._s.hf_token}" - upload_id: str | None = None sha = hashlib.sha256() bytes_total = 0 @@ -78,37 +68,39 @@ async def _download_once(self, *, assignment: Assignment) -> DownloadResult: part_no = 1 try: - async with self._make_http_client() as hc: - async with hc.stream("GET", url, headers=headers) as resp: - resp.raise_for_status() - - upload_id = await asyncio.to_thread( - lambda: s3.create_multipart_upload( - Bucket=bucket, Key=key - )["UploadId"] - ) + async with self._controller.stream_hf( + subtask_id=assignment.subtask_id, + assignment_token=assignment.assignment_token, + ) as resp: + resp.raise_for_status() + + upload_id = await asyncio.to_thread( + lambda: s3.create_multipart_upload( + Bucket=bucket, Key=key + )["UploadId"] + ) - async for chunk in resp.aiter_bytes(chunk_size=_HTTP_CHUNK_BYTES): - sha.update(chunk) - bytes_total += len(chunk) - buf.extend(chunk) - while len(buf) >= part_size: - body = bytes(buf[:part_size]) - del buf[:part_size] - etag = await asyncio.to_thread( - self._upload_part, - s3, bucket, key, upload_id, part_no, body, - ) - parts.append({"PartNumber": part_no, "ETag": etag}) - part_no += 1 - - # last (possibly < part_size; allowed for last only) - if buf: + async for chunk in resp.aiter_bytes(chunk_size=_HTTP_CHUNK_BYTES): + sha.update(chunk) + bytes_total += len(chunk) + buf.extend(chunk) + while len(buf) >= part_size: + body = bytes(buf[:part_size]) + del buf[:part_size] etag = await asyncio.to_thread( self._upload_part, - s3, bucket, key, upload_id, part_no, bytes(buf), + s3, bucket, key, upload_id, part_no, body, ) parts.append({"PartNumber": part_no, "ETag": etag}) + part_no += 1 + + # last (possibly < part_size; allowed for last only) + if buf: + etag = await asyncio.to_thread( + self._upload_part, + s3, bucket, key, upload_id, part_no, bytes(buf), + ) + parts.append({"PartNumber": part_no, "ETag": etag}) # W5-D: 0-byte file → empty parts list would error S3 MalformedXML. # Abort the (unused) multipart and use put_object instead. diff --git a/tests/conftest.py b/tests/conftest.py index 552352a..e25002a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -273,3 +273,31 @@ def make_fake_auth_state( jwt=jwt, jwt_exp=far, cert_exp=far, hmac_seed=hmac_seed, cert_dir=_Path(str(cert_dir)), ) + + +def make_fake_controller_client(hf_handler): + """W3b test double for ControllerClient — its stream_hf() routes through an + httpx.MockTransport(hf_handler) instead of a real controller proxy. Lets + downloader tests simulate HF responses without a running controller. + hf_handler is a Callable[[httpx.Request], httpx.Response].""" + import httpx as _httpx + from contextlib import asynccontextmanager as _acm + + class _FakeControllerClient: + @_acm + async def stream_hf(self, *, subtask_id, assignment_token, + range_header=None): + transport = _httpx.MockTransport(hf_handler) + async with _httpx.AsyncClient( + transport=transport, base_url="http://fake-controller", + ) as client: + headers = {} + if range_header: + headers["Range"] = range_header + async with client.stream( + "GET", f"/api/v1/hf-proxy/subtask/{subtask_id}", + headers=headers, + ) as resp: + yield resp + + return _FakeControllerClient() diff --git a/tests/executor/test_downloader.py b/tests/executor/test_downloader.py index a3fdf6d..3e23278 100644 --- a/tests/executor/test_downloader.py +++ b/tests/executor/test_downloader.py @@ -20,6 +20,7 @@ HfS3StreamDownloader, StorageConfig, ) +from tests.conftest import make_fake_controller_client def _settings() -> ExecutorSettings: @@ -35,28 +36,35 @@ def _assignment(*, repo_id="o/r", revision="a" * 40, filename="config.json", return Assignment( subtask_id=_uuid.uuid4(), task_id=_uuid.uuid4(), + assignment_token=_uuid.uuid4(), repo_id=repo_id, revision=revision, filename=filename, file_size=4096, expected_sha256=None, storage_config=StorageConfig(bucket=bucket, key_prefix=key_prefix), ) +def _noop_client(): + return make_fake_controller_client( + lambda request: httpx.Response(200, content=b"") + ) + + def test_compose_key_includes_prefix_repo_revision_filename() -> None: - d = HfS3StreamDownloader(settings=_settings()) + d = HfS3StreamDownloader(settings=_settings(), client=_noop_client()) a = _assignment(filename="model.safetensors", key_prefix="phase1/") key = d._compose_key(a) assert key == "phase1/o/r/" + ("a" * 40) + "/model.safetensors" def test_compose_key_handles_empty_prefix() -> None: - d = HfS3StreamDownloader(settings=_settings()) + d = HfS3StreamDownloader(settings=_settings(), client=_noop_client()) a = _assignment(key_prefix="") key = d._compose_key(a) assert key.startswith("o/r/") def test_compose_key_strips_prefix_trailing_slash() -> None: - d = HfS3StreamDownloader(settings=_settings()) + d = HfS3StreamDownloader(settings=_settings(), client=_noop_client()) a = _assignment(key_prefix="phase1////") key = d._compose_key(a) # No double slashes, single separator @@ -78,11 +86,11 @@ def s3_bucket(monkeypatch: pytest.MonkeyPatch): yield bucket -def _make_hf_transport(body_bytes: bytes) -> httpx.MockTransport: - """Returns an httpx transport that streams body_bytes on the resolve URL.""" +def _hf_handler(body_bytes: bytes): + """Returns an httpx handler that streams body_bytes for any request.""" def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(200, content=body_bytes) - return httpx.MockTransport(handler) + return handler @pytest.mark.slow @@ -97,13 +105,8 @@ async def test_downloader_streams_hf_to_s3_full_pipeline( id="host-w4-worker-1", bearer_token="t", s3_endpoint_url=None, # moto via env ) - d = HfS3StreamDownloader(settings=settings) - - # Inject httpx MockTransport via test-only seam - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=_make_hf_transport(body), - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(_hf_handler(body))) a = _assignment( filename="model.safetensors", key_prefix="phase1/", @@ -130,11 +133,8 @@ async def test_downloader_small_file_single_part( expected_sha = hashlib.sha256(body).hexdigest() settings = ExecutorSettings(id="host-w4-worker-2", bearer_token="t") - d = HfS3StreamDownloader(settings=settings) - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=_make_hf_transport(body), - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(_hf_handler(body))) a = _assignment(filename="config.json", bucket=s3_bucket) result = await d.download(assignment=a) @@ -156,11 +156,8 @@ async def test_downloader_exact_5mb_yields_one_part( expected_sha = hashlib.sha256(body).hexdigest() settings = ExecutorSettings(id="host-w4-worker-3", bearer_token="t") - d = HfS3StreamDownloader(settings=settings) - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=_make_hf_transport(body), - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(_hf_handler(body))) a = _assignment(filename="exact5.bin", bucket=s3_bucket) result = await d.download(assignment=a) @@ -178,14 +175,10 @@ async def test_downloader_404_fails_fast_no_multipart( """ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(404, content=b"not found") - transport = httpx.MockTransport(handler) settings = ExecutorSettings(id="host-w4-worker-x", bearer_token="t") - d = HfS3StreamDownloader(settings=settings) - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=transport, - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) a = _assignment(filename="x.bin", bucket=s3_bucket) with pytest.raises(httpx.HTTPStatusError): @@ -219,13 +212,9 @@ async def streaming_body(): def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(200, content=streaming_body()) - transport = httpx.MockTransport(handler) settings = ExecutorSettings(id="host-w4-worker-mid", bearer_token="t") - d = HfS3StreamDownloader(settings=settings) - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=transport, - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) a = _assignment(filename="dropped.bin", bucket=s3_bucket) # ProtocolError is transient → tenacity retries × 3 → all fail → reraise. @@ -253,14 +242,10 @@ async def test_downloader_handles_zero_byte_file( """ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(200, content=b"") - transport = httpx.MockTransport(handler) settings = ExecutorSettings(id="host-w4-worker-zero", bearer_token="t") - d = HfS3StreamDownloader(settings=settings) - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=transport, - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) a = _assignment(filename="empty.bin", bucket=s3_bucket) result = await d.download(assignment=a) @@ -291,11 +276,8 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(200, content=body) settings = ExecutorSettings(id="host-w4-worker-r", bearer_token="t") - d = HfS3StreamDownloader(settings=settings) - monkeypatch.setattr(d, "_make_http_client", - lambda: httpx.AsyncClient(transport=httpx.MockTransport(handler), - timeout=settings.download_timeout_seconds, - follow_redirects=True)) + d = HfS3StreamDownloader(settings=settings, + client=make_fake_controller_client(handler)) a = _assignment(filename="retry.bin", bucket=s3_bucket) result = await d.download(assignment=a) From 021ce86c455ce8678d9fad501d3c3fa8083db212 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:53:07 +0800 Subject: [PATCH 12/21] test: make_fake_controller_client forwards X-Assignment-Token + builds transport once (W3b) --- tests/conftest.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index e25002a..08bee50 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -284,16 +284,18 @@ def make_fake_controller_client(hf_handler): from contextlib import asynccontextmanager as _acm class _FakeControllerClient: + def __init__(self): + self._transport = _httpx.MockTransport(hf_handler) + @_acm async def stream_hf(self, *, subtask_id, assignment_token, range_header=None): - transport = _httpx.MockTransport(hf_handler) + headers = {"X-Assignment-Token": str(assignment_token)} + if range_header: + headers["Range"] = range_header async with _httpx.AsyncClient( - transport=transport, base_url="http://fake-controller", + transport=self._transport, base_url="http://fake-controller", ) as client: - headers = {} - if range_header: - headers["Range"] = range_header async with client.stream( "GET", f"/api/v1/hf-proxy/subtask/{subtask_id}", headers=headers, From 58d9640c8b1c983ded1c6ddb2d3f115a28506617 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 20:58:28 +0800 Subject: [PATCH 13/21] feat(executor): DirectOffsetDownloader fetches via controller proxy (W3b) Co-Authored-By: Claude Sonnet 4.6 --- src/dlw/executor/chunk_downloader.py | 64 +++++++++-------- tests/executor/test_chunk_downloader.py | 96 ++++++++++++++++++------- 2 files changed, 108 insertions(+), 52 deletions(-) diff --git a/src/dlw/executor/chunk_downloader.py b/src/dlw/executor/chunk_downloader.py index cdf6d4b..abbf73b 100644 --- a/src/dlw/executor/chunk_downloader.py +++ b/src/dlw/executor/chunk_downloader.py @@ -15,9 +15,6 @@ from pathlib import Path from typing import Any -import httpx - -from dlw.executor import _io as _io_mod from dlw.executor._io import ( _HTTP_CHUNK_BYTES, _TRANSIENT_RETRY, @@ -25,6 +22,7 @@ make_s3_client, upload_part, ) +from dlw.executor.client import ControllerClient from dlw.executor.config import ExecutorSettings from dlw.executor.parts_dir import cleanup_parts_dir, parts_dir_for from dlw.executor.types import Assignment, DownloadResult @@ -57,8 +55,10 @@ def plan_chunks(file_size: int, chunk_size: int) -> list[ChunkPlan]: class DirectOffsetDownloader: - def __init__(self, *, settings: ExecutorSettings) -> None: + def __init__(self, *, settings: ExecutorSettings, + client: ControllerClient) -> None: self._s = settings + self._controller = client assert settings.chunk_size_bytes >= 5 * 1024 * 1024, \ f"chunk_size_bytes ({settings.chunk_size_bytes}) < 5 MiB" @@ -68,18 +68,26 @@ def _open_writer(path: Path): return path.open("wb") async def _resolve_size(self, a: Assignment) -> Assignment: - url = (f"{self._s.hf_endpoint.rstrip('/')}/{a.repo_id}" - f"/resolve/{a.revision}/{a.filename}") - headers: dict[str, str] = {} - if self._s.hf_token: - headers["Authorization"] = f"Bearer {self._s.hf_token}" - async with _io_mod.make_http_client(self._s) as hc: - resp = await hc.head(url, headers=headers) + """W3b: the proxy is GET-only, so probe size with a bytes=0-0 range + request and read Content-Range (`bytes 0-0/`). Fall back to + Content-Length if HF answered a full 200 instead of a 206.""" + async with self._controller.stream_hf( + subtask_id=a.subtask_id, + assignment_token=a.assignment_token, + range_header="bytes=0-0", + ) as resp: resp.raise_for_status() - cl = resp.headers.get("Content-Length") - if cl is None: - raise RuntimeError("file_size unresolvable: HEAD returned no Content-Length") - return dataclasses.replace(a, file_size=int(cl)) + content_range = resp.headers.get("Content-Range") + content_length = resp.headers.get("Content-Length") + if content_range and "/" in content_range: + total = content_range.rsplit("/", 1)[1].strip() + if total.isdigit(): + return dataclasses.replace(a, file_size=int(total)) + if content_length is None: + raise RuntimeError( + "file_size unresolvable: no Content-Range or Content-Length" + ) + return dataclasses.replace(a, file_size=int(content_length)) async def download(self, *, assignment: Assignment) -> DownloadResult: if assignment.file_size is None: @@ -102,23 +110,23 @@ async def _pass1_parallel( self, a: Assignment, plans: list[ChunkPlan], dest_dir: Path, ) -> None: sem = asyncio.Semaphore(self._s.chunk_concurrency) - async with _io_mod.make_http_client(self._s) as hc: - async def one(plan: ChunkPlan) -> None: - async with sem: - await self._download_one_chunk(hc, a, plan, dest_dir) - await asyncio.gather(*(one(p) for p in plans)) + + async def one(plan: ChunkPlan) -> None: + async with sem: + await self._download_one_chunk(a, plan, dest_dir) + + await asyncio.gather(*(one(p) for p in plans)) @_TRANSIENT_RETRY async def _download_one_chunk( - self, hc: httpx.AsyncClient, a: Assignment, - plan: ChunkPlan, dest_dir: Path, + self, a: Assignment, plan: ChunkPlan, dest_dir: Path, ) -> None: - url = (f"{self._s.hf_endpoint.rstrip('/')}/{a.repo_id}" - f"/resolve/{a.revision}/{a.filename}") - headers = {"Range": f"bytes={plan.offset}-{plan.offset + plan.length - 1}"} - if self._s.hf_token: - headers["Authorization"] = f"Bearer {self._s.hf_token}" - async with hc.stream("GET", url, headers=headers) as resp: + range_header = f"bytes={plan.offset}-{plan.offset + plan.length - 1}" + async with self._controller.stream_hf( + subtask_id=a.subtask_id, + assignment_token=a.assignment_token, + range_header=range_header, + ) as resp: resp.raise_for_status() target = dest_dir / f"{plan.index}.bin" try: diff --git a/tests/executor/test_chunk_downloader.py b/tests/executor/test_chunk_downloader.py index f2d8710..3ec5651 100644 --- a/tests/executor/test_chunk_downloader.py +++ b/tests/executor/test_chunk_downloader.py @@ -54,6 +54,7 @@ def test_disk_full_error_is_exception_subclass() -> None: from dlw.executor.config import ExecutorSettings from dlw.executor.types import Assignment from dlw.schemas.storage import StorageConfig +from tests.conftest import make_fake_controller_client _FILE_SIZE = 200 * 1024 * 1024 # 200 MiB → 4 chunks at 64 MiB @@ -62,8 +63,8 @@ def test_disk_full_error_is_exception_subclass() -> None: _EXPECTED_SHA = hashlib.sha256(_SYNTHETIC).hexdigest() -def _mock_transport_for_synthetic() -> httpx.MockTransport: - """Respond to GET with HTTP 206 Partial Content reading Range header.""" +def _synthetic_hf_handler(): + """httpx handler: HTTP 206 Partial Content reading the Range header.""" def handler(request: httpx.Request) -> httpx.Response: rng = request.headers.get("Range", "") assert rng.startswith("bytes="), f"unexpected Range header: {rng!r}" @@ -75,7 +76,7 @@ def handler(request: httpx.Request) -> httpx.Response: content=body, headers={"Content-Length": str(len(body))}, ) - return httpx.MockTransport(handler) + return handler @pytest.fixture @@ -83,7 +84,6 @@ def chunk_settings(tmp_path) -> ExecutorSettings: return ExecutorSettings( id="ex-chunk-test", bearer_token="t", - hf_endpoint="http://hf.fake", chunk_size_bytes=_CHUNK_SIZE, chunk_concurrency=2, parts_dir_path=str(tmp_path / "parts"), @@ -92,16 +92,9 @@ def chunk_settings(tmp_path) -> ExecutorSettings: ) -async def test_pass1_pass2_happy_path_with_moto(chunk_settings, monkeypatch) -> None: - """Full pipeline: 4 chunks via MockTransport → moto multipart → sha256 matches.""" - from dlw.executor import _io as _io_mod - - transport = _mock_transport_for_synthetic() - monkeypatch.setattr( - _io_mod, "make_http_client", - lambda settings: httpx.AsyncClient(transport=transport), - ) - +async def test_pass1_pass2_happy_path_with_moto(chunk_settings) -> None: + """Full pipeline: 4 chunks via the fake controller proxy -> moto multipart + -> sha256 matches.""" storage_config = StorageConfig( bucket="test-bucket", region="us-east-1", endpoint_url=None, access_key_id="dummy", secret_access_key="dummy", @@ -112,6 +105,7 @@ async def test_pass1_pass2_happy_path_with_moto(chunk_settings, monkeypatch) -> a = Assignment( subtask_id=sub_id, task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), repo_id="owner/repo", revision="b" * 40, filename="model.bin", @@ -124,7 +118,10 @@ async def test_pass1_pass2_happy_path_with_moto(chunk_settings, monkeypatch) -> s3 = boto3.client("s3", region_name="us-east-1") s3.create_bucket(Bucket="test-bucket") - d = DirectOffsetDownloader(settings=chunk_settings) + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(_synthetic_hf_handler()), + ) result = await d.download(assignment=a) assert result.bytes_written == _FILE_SIZE @@ -138,16 +135,9 @@ async def test_pass1_pass2_happy_path_with_moto(chunk_settings, monkeypatch) -> async def test_pass1_enospc_raises_disk_full_and_leaks_parts( chunk_settings, monkeypatch, ) -> None: - """Inject ENOSPC into pass-1 chunk write → DiskFullError + parts NOT cleaned.""" - from dlw.executor import _io as _io_mod + """Inject ENOSPC into pass-1 chunk write -> DiskFullError + parts NOT cleaned.""" from dlw.executor import chunk_downloader as cd_mod - transport = _mock_transport_for_synthetic() - monkeypatch.setattr( - _io_mod, "make_http_client", - lambda settings: httpx.AsyncClient(transport=transport), - ) - class _NoSpaceWriter: def write(self, data): raise OSError(errno.ENOSPC, "No space left on device") @@ -169,6 +159,7 @@ def __exit__(self, *_): return False a = Assignment( subtask_id=sub_id, task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), repo_id="owner/repo", revision="b" * 40, filename="model.bin", @@ -177,10 +168,67 @@ def __exit__(self, *_): return False storage_config=storage_config, ) - d = DirectOffsetDownloader(settings=chunk_settings) + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(_synthetic_hf_handler()), + ) from dlw.executor.chunk_downloader import DiskFullError with pytest.raises(DiskFullError): await d.download(assignment=a) from dlw.executor.parts_dir import parts_dir_for assert parts_dir_for(chunk_settings.parts_dir_path, sub_id).exists() + + +async def test_resolve_size_via_range_probe(chunk_settings) -> None: + """When file_size is None, _resolve_size does a bytes=0-0 probe and reads + Content-Range to recover the total size.""" + def handler(request: httpx.Request) -> httpx.Response: + assert request.headers.get("Range") == "bytes=0-0" + return httpx.Response( + 206, content=b"\x00", + headers={"Content-Range": "bytes 0-0/123456", "Content-Length": "1"}, + ) + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(handler), + ) + a = Assignment( + subtask_id=uuid.uuid4(), task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="o/r", revision="b" * 40, filename="big.bin", + file_size=None, expected_sha256=None, + storage_config=StorageConfig( + bucket="b", region="us-east-1", endpoint_url=None, + key_prefix="p", + ), + ) + resolved = await d._resolve_size(a) + assert resolved.file_size == 123456 + + +async def test_resolve_size_falls_back_to_content_length(chunk_settings) -> None: + """If HF answers a 200 (no Content-Range), _resolve_size uses Content-Length.""" + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, content=b"\x00" * 10, + headers={"Content-Length": "777"}, + ) + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(handler), + ) + a = Assignment( + subtask_id=uuid.uuid4(), task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="o/r", revision="b" * 40, filename="big.bin", + file_size=None, expected_sha256=None, + storage_config=StorageConfig( + bucket="b", region="us-east-1", endpoint_url=None, + key_prefix="p", + ), + ) + resolved = await d._resolve_size(a) + assert resolved.file_size == 777 From cdab6fb7f9fe7c3b8688363a1bbe0843f05b9a53 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:11:11 +0800 Subject: [PATCH 14/21] test(executor): cover _resolve_size RuntimeError branch + clarify fallback (W3b) Co-Authored-By: Claude Sonnet 4.6 --- src/dlw/executor/chunk_downloader.py | 3 +++ tests/executor/test_chunk_downloader.py | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/dlw/executor/chunk_downloader.py b/src/dlw/executor/chunk_downloader.py index abbf73b..ab00608 100644 --- a/src/dlw/executor/chunk_downloader.py +++ b/src/dlw/executor/chunk_downloader.py @@ -83,6 +83,9 @@ async def _resolve_size(self, a: Assignment) -> Assignment: total = content_range.rsplit("/", 1)[1].strip() if total.isdigit(): return dataclasses.replace(a, file_size=int(total)) + # Content-Length fallback: only correct when HF answered a full 200 + # (then it is the total size). A well-behaved 206 always carries + # Content-Range, handled above. if content_length is None: raise RuntimeError( "file_size unresolvable: no Content-Range or Content-Length" diff --git a/tests/executor/test_chunk_downloader.py b/tests/executor/test_chunk_downloader.py index 3ec5651..cab193b 100644 --- a/tests/executor/test_chunk_downloader.py +++ b/tests/executor/test_chunk_downloader.py @@ -232,3 +232,27 @@ def handler(request: httpx.Request) -> httpx.Response: ) resolved = await d._resolve_size(a) assert resolved.file_size == 777 + + +async def test_resolve_size_raises_when_no_headers(chunk_settings) -> None: + """If neither Content-Range nor Content-Length is present, raise RuntimeError.""" + def handler(request: httpx.Request) -> httpx.Response: + # Use stream= to prevent httpx from auto-injecting Content-Length. + return httpx.Response(200, stream=httpx.ByteStream(b"\x00")) # no size headers + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(handler), + ) + a = Assignment( + subtask_id=uuid.uuid4(), task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="o/r", revision="b" * 40, filename="big.bin", + file_size=None, expected_sha256=None, + storage_config=StorageConfig( + bucket="b", region="us-east-1", endpoint_url=None, + key_prefix="p", + ), + ) + with pytest.raises(RuntimeError, match="unresolvable"): + await d._resolve_size(a) From cad2db1683599b836c9f4693ab0b128b20aa86aa Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:15:30 +0800 Subject: [PATCH 15/21] feat(executor): wire proxy client end-to-end, delete ExecutorSettings HF fields (W3b) Co-Authored-By: Claude Sonnet 4.6 --- src/dlw/executor/_io.py | 8 -------- src/dlw/executor/cli.py | 6 ++++-- src/dlw/executor/config.py | 4 ---- src/dlw/executor/runner.py | 1 + tests/executor/test_config.py | 8 +------- 5 files changed, 6 insertions(+), 21 deletions(-) diff --git a/src/dlw/executor/_io.py b/src/dlw/executor/_io.py index 891ec68..b2a918c 100644 --- a/src/dlw/executor/_io.py +++ b/src/dlw/executor/_io.py @@ -51,14 +51,6 @@ def make_s3_client(settings: ExecutorSettings, cfg: StorageConfig) -> Any: ) -def make_http_client(settings: ExecutorSettings) -> httpx.AsyncClient: - """Test seam — overridable via monkeypatch.""" - return httpx.AsyncClient( - timeout=settings.download_timeout_seconds, - follow_redirects=True, - ) - - def compose_key(a: Assignment) -> str: prefix = a.storage_config.key_prefix.strip("/") parts = [p for p in (prefix, a.repo_id, a.revision, a.filename) if p] diff --git a/src/dlw/executor/cli.py b/src/dlw/executor/cli.py index c464a1f..15b74b5 100644 --- a/src/dlw/executor/cli.py +++ b/src/dlw/executor/cli.py @@ -38,9 +38,11 @@ async def _async_main(args: argparse.Namespace) -> int: settings = ExecutorSettings() # W3a: the client starts without an AuthState; ExecutorRunner.run() does # load_or_register and then calls client.update_auth() before any request. + # W3b: both downloaders fetch HF bytes through the same client's reverse + # proxy (stream_hf) — the executor never holds the HF token. client = ControllerClient(base_url=settings.controller_url) - stream = HfS3StreamDownloader(settings=settings) - chunk = DirectOffsetDownloader(settings=settings) + stream = HfS3StreamDownloader(settings=settings, client=client) + chunk = DirectOffsetDownloader(settings=settings, client=client) runner = ExecutorRunner( settings=settings, client=client, stream_downloader=stream, chunk_downloader=chunk, diff --git a/src/dlw/executor/config.py b/src/dlw/executor/config.py index 6ae13bf..defe3f5 100644 --- a/src/dlw/executor/config.py +++ b/src/dlw/executor/config.py @@ -32,10 +32,6 @@ class ExecutorSettings(BaseSettings): nic_speed_gbps: int = Field(default=1, ge=1, le=400) region: str = Field(default="local") - # Phase 1 W4 — HF Hub - hf_endpoint: str = Field(default="https://huggingface.co") - hf_token: str | None = Field(default=None) - # Phase 1 W4 — S3 / S3-compatible s3_region: str = Field(default="us-east-1") s3_endpoint_url: str | None = Field(default=None) diff --git a/src/dlw/executor/runner.py b/src/dlw/executor/runner.py index 7a1b0f9..c9c29fd 100644 --- a/src/dlw/executor/runner.py +++ b/src/dlw/executor/runner.py @@ -203,6 +203,7 @@ async def _execute_subtask( assignment = Assignment( subtask_id=sub_id, task_id=uuid.UUID(subtask["task_id"]), + assignment_token=assignment_token, repo_id=repo_id, revision=revision, filename=subtask["filename"], diff --git a/tests/executor/test_config.py b/tests/executor/test_config.py index 3b5b7a7..59d55f0 100644 --- a/tests/executor/test_config.py +++ b/tests/executor/test_config.py @@ -39,12 +39,10 @@ def test_host_id_defaults_to_id_prefix(monkeypatch: pytest.MonkeyPatch) -> None: @pytest.mark.slow def test_w4_defaults(monkeypatch: pytest.MonkeyPatch) -> None: - """Phase 1 W4 fields have safe defaults (public HF + AWS S3).""" + """Phase 1 W4 S3 fields have safe defaults (AWS S3). HF fields removed in W3b.""" monkeypatch.setenv("DLW_EXECUTOR_ID", "host-w4-worker-1") monkeypatch.setenv("DLW_EXECUTOR_BEARER_TOKEN", "secret") s = ExecutorSettings() - assert s.hf_endpoint == "https://huggingface.co" - assert s.hf_token is None assert s.s3_region == "us-east-1" assert s.s3_endpoint_url is None assert s.s3_path_style is True @@ -56,12 +54,8 @@ def test_w4_defaults(monkeypatch: pytest.MonkeyPatch) -> None: def test_w4_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("DLW_EXECUTOR_ID", "host-w4-worker-2") monkeypatch.setenv("DLW_EXECUTOR_BEARER_TOKEN", "secret") - monkeypatch.setenv("DLW_EXECUTOR_HF_ENDPOINT", "https://hf-mirror.com") - monkeypatch.setenv("DLW_EXECUTOR_HF_TOKEN", "hf_xxx") monkeypatch.setenv("DLW_EXECUTOR_S3_ENDPOINT_URL", "http://minio:9000") monkeypatch.setenv("DLW_EXECUTOR_S3_REGION", "cn-east-1") s = ExecutorSettings() - assert s.hf_endpoint == "https://hf-mirror.com" - assert s.hf_token == "hf_xxx" assert s.s3_endpoint_url == "http://minio:9000" assert s.s3_region == "cn-east-1" From a2fbf1fa7b74d7eee270099043b4f18ec31749fc Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:25:05 +0800 Subject: [PATCH 16/21] test(executor): assert runner threads assignment_token into Assignment (W3b) --- tests/executor/test_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/executor/test_runner.py b/tests/executor/test_runner.py index 36031e0..bc46eb2 100644 --- a/tests/executor/test_runner.py +++ b/tests/executor/test_runner.py @@ -236,6 +236,7 @@ async def report(self, **kw): captured["report_kw"] = kw a = captured["assignment"] assert a.repo_id == "o/runner-test" assert a.revision == "z" * 40 + assert isinstance(a.assignment_token, uuid.UUID) assert a.storage_config.bucket == "b" assert a.storage_config.key_prefix == "p/" assert captured["report_kw"]["status"] == "succeeded" From 2ec622f2edc82e4196b939c9d148e531456d0062 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:27:37 +0800 Subject: [PATCH 17/21] =?UTF-8?q?feat(lint):=20check=5Fno=5Fhf=5Ftoken=5Fi?= =?UTF-8?q?n=5Fexecutor=20=E2=80=94=20lock=20INVARIANT=202=20(W3b)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- tests/tools/__init__.py | 0 tests/tools/test_lint_no_hf_token.py | 37 ++++++++++++++++++++++++++++ tools/lint_invariants.py | 28 +++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 tests/tools/__init__.py create mode 100644 tests/tools/test_lint_no_hf_token.py diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/tools/test_lint_no_hf_token.py b/tests/tools/test_lint_no_hf_token.py new file mode 100644 index 0000000..41e3bbe --- /dev/null +++ b/tests/tools/test_lint_no_hf_token.py @@ -0,0 +1,37 @@ +"""Self-test for tools/lint_invariants.py::check_no_hf_token_in_executor (W3b).""" +from __future__ import annotations + +import importlib.util +from pathlib import Path + +_LINT_PATH = Path(__file__).resolve().parents[2] / "tools" / "lint_invariants.py" + + +def _load_lint(): + spec = importlib.util.spec_from_file_location("lint_invariants", _LINT_PATH) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +def test_lint_flags_hf_token_in_executor(tmp_path, monkeypatch) -> None: + lint = _load_lint() + exec_dir = tmp_path / "src" / "dlw" / "executor" + exec_dir.mkdir(parents=True) + (exec_dir / "bad.py").write_text( + "hf_token = 'leaked-secret'\n", encoding="utf-8", + ) + monkeypatch.setattr(lint, "ROOT", tmp_path) + errors = lint.check_no_hf_token_in_executor() + assert any("bad.py" in e for e in errors), errors + + +def test_lint_passes_clean_executor(tmp_path, monkeypatch) -> None: + lint = _load_lint() + exec_dir = tmp_path / "src" / "dlw" / "executor" + exec_dir.mkdir(parents=True) + (exec_dir / "ok.py").write_text( + "# hf_token mentioned in a comment is fine\nx = 1\n", encoding="utf-8", + ) + monkeypatch.setattr(lint, "ROOT", tmp_path) + assert lint.check_no_hf_token_in_executor() == [] diff --git a/tools/lint_invariants.py b/tools/lint_invariants.py index b000931..b69f9f7 100644 --- a/tools/lint_invariants.py +++ b/tools/lint_invariants.py @@ -225,6 +225,33 @@ def check_no_bearer_on_executor_routes() -> list[str]: return errors +def check_no_hf_token_in_executor() -> list[str]: + """W3b §3.11: INVARIANT 2 — the tenant HF token must never reach an + executor. Forbid the `hf_token` / `hf_endpoint` identifiers anywhere in + src/dlw/executor/ (comment lines are exempt). After W3b, HF access goes + exclusively through the controller's reverse proxy.""" + errors: list[str] = [] + exec_dir = ROOT / "src" / "dlw" / "executor" + if not exec_dir.exists(): + return [] + for py in sorted(exec_dir.rglob("*.py")): + try: + text = py.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as e: + errors.append(f"Cannot read {py.relative_to(ROOT)}: {e}") + continue + for lineno, line in enumerate(text.splitlines(), start=1): + if line.strip().startswith("#"): + continue + if "hf_token" in line or "hf_endpoint" in line: + errors.append( + f"{py.relative_to(ROOT)}:{lineno}: " + f"'hf_token'/'hf_endpoint' forbidden in the executor package " + f"(INVARIANT 2 — HF access goes through the controller proxy)" + ) + return errors + + def main() -> int: failures: list[str] = [] @@ -363,6 +390,7 @@ def main() -> int: failures.extend(check_task_status_domain()) failures.extend(check_d10_host_affinity_test_owner()) failures.extend(check_no_bearer_on_executor_routes()) + failures.extend(check_no_hf_token_in_executor()) # --- Report --- if failures: From 327e37f26f7a1165e8ddf565e04bb31736de3f16 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:33:05 +0800 Subject: [PATCH 18/21] test(lint): document full-text scan behavior + cache _load_lint (W3b) Co-Authored-By: Claude Sonnet 4.6 --- tests/tools/test_lint_no_hf_token.py | 17 +++++++++++++++++ tools/lint_invariants.py | 10 +++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_lint_no_hf_token.py b/tests/tools/test_lint_no_hf_token.py index 41e3bbe..c2721c2 100644 --- a/tests/tools/test_lint_no_hf_token.py +++ b/tests/tools/test_lint_no_hf_token.py @@ -1,12 +1,14 @@ """Self-test for tools/lint_invariants.py::check_no_hf_token_in_executor (W3b).""" from __future__ import annotations +import functools import importlib.util from pathlib import Path _LINT_PATH = Path(__file__).resolve().parents[2] / "tools" / "lint_invariants.py" +@functools.lru_cache(maxsize=None) def _load_lint(): spec = importlib.util.spec_from_file_location("lint_invariants", _LINT_PATH) mod = importlib.util.module_from_spec(spec) @@ -35,3 +37,18 @@ def test_lint_passes_clean_executor(tmp_path, monkeypatch) -> None: ) monkeypatch.setattr(lint, "ROOT", tmp_path) assert lint.check_no_hf_token_in_executor() == [] + + +def test_lint_flags_hf_token_in_string_literal(tmp_path, monkeypatch) -> None: + """The scan is full-text by design — a string literal mentioning the + forbidden identifier in executor code is flagged too (only whole + comment lines are exempt).""" + lint = _load_lint() + exec_dir = tmp_path / "src" / "dlw" / "executor" + exec_dir.mkdir(parents=True) + (exec_dir / "guard.py").write_text( + 'raise ValueError("hf_token must not reach executor")\n', + encoding="utf-8", + ) + monkeypatch.setattr(lint, "ROOT", tmp_path) + assert lint.check_no_hf_token_in_executor() != [] diff --git a/tools/lint_invariants.py b/tools/lint_invariants.py index b69f9f7..9df0e34 100644 --- a/tools/lint_invariants.py +++ b/tools/lint_invariants.py @@ -228,13 +228,17 @@ def check_no_bearer_on_executor_routes() -> list[str]: def check_no_hf_token_in_executor() -> list[str]: """W3b §3.11: INVARIANT 2 — the tenant HF token must never reach an executor. Forbid the `hf_token` / `hf_endpoint` identifiers anywhere in - src/dlw/executor/ (comment lines are exempt). After W3b, HF access goes - exclusively through the controller's reverse proxy.""" + src/dlw/executor/. After W3b, HF access goes exclusively through the + controller's reverse proxy. + + Only whole comment lines (first non-whitespace char is `#`) are exempt — + string literals and trailing comments containing the identifiers are + flagged too (this is a full-text scan, by design).""" errors: list[str] = [] exec_dir = ROOT / "src" / "dlw" / "executor" if not exec_dir.exists(): return [] - for py in sorted(exec_dir.rglob("*.py")): + for py in sorted(exec_dir.rglob("*.py")): # sorted for deterministic CI output try: text = py.read_text(encoding="utf-8") except (OSError, UnicodeDecodeError) as e: From 7586471072a7107cab3de53cfce8edf7cae8a022 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:39:01 +0800 Subject: [PATCH 19/21] test(e2e)+docs: e2e through controller proxy, OpenAPI + operator runbook (W3b) Co-Authored-By: Claude Sonnet 4.6 --- api/openapi.yaml | 56 +++++++++++++++++++++++++++++++ docs/operator/executor-runbook.md | 26 ++++++++++++++ tests/e2e/test_executor_e2e.py | 18 ++++++---- 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/api/openapi.yaml b/api/openapi.yaml index eb0fe55..9914d66 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -730,6 +730,62 @@ paths: got: type: integer + /hf-proxy/subtask/{subtaskId}: + get: + tags: [executors] + summary: HF reverse proxy — stream a subtask's file from HF + description: >- + Controller-side reverse proxy. Authenticates the executor (mTLS + JWT), + verifies subtask ownership (assignment_token + epoch fence), reconstructs + the HF URL from the subtask row, injects the tenant HF token, and streams + the bytes back. The executor never holds the HF token (INVARIANT 2). + operationId: hfProxySubtask + parameters: + - name: subtaskId + in: path + required: true + schema: + type: string + format: uuid + - name: X-Assignment-Token + in: header + required: true + schema: + type: string + format: uuid + - name: Range + in: header + required: false + schema: + type: string + responses: + '200': + description: Full file stream. + content: + application/octet-stream: + schema: + type: string + format: binary + '206': + description: Partial content (Range request). + content: + application/octet-stream: + schema: + type: string + format: binary + '401': + description: Missing or invalid mTLS / executor JWT. + '403': + description: NOT_YOUR_SUBTASK — authenticated executor does not own this subtask. + '404': + description: Subtask not found. + '409': + description: STALE_ASSIGNMENT or EPOCH_MISMATCH (fence-token mismatch). + '429': + description: Forwarded from HF — rate limited. + '503': + description: Forwarded from HF — upstream unavailable. + /executors/{executorId}/subtasks/{subtaskId}/complete: parameters: - in: path diff --git a/docs/operator/executor-runbook.md b/docs/operator/executor-runbook.md index 9110db8..ca25d13 100644 --- a/docs/operator/executor-runbook.md +++ b/docs/operator/executor-runbook.md @@ -86,3 +86,29 @@ anyone can forge the header. Default is `0` (direct uvicorn TLS only). Heartbeats carry an HMAC timestamp validated within ±5 min. Run `chrony` / `systemd-timesyncd` on all executor + controller hosts. + +## W3b — HF access via the controller reverse proxy + +As of Phase 2 W3b, executors no longer talk to huggingface.co directly. All +HF file downloads flow through the controller's reverse proxy +(`GET /api/v1/hf-proxy/subtask/{id}`), which injects the tenant HF token +server-side (INVARIANT 2 — the token never leaves the controller). + +**Removed executor environment variables** (delete them from `.env.executor` +and any deployment manifests — they are now ignored): + +- `DLW_EXECUTOR_HF_TOKEN` +- `DLW_EXECUTOR_HF_ENDPOINT` + +**Controller environment variables:** + +- `DLW_HF_TOKEN` — the tenant HF token (already used by the controller for + repo-metadata enumeration; W3b also uses it for the download proxy). +- `DLW_HF_ENDPOINT` — defaults to `https://huggingface.co`. +- `DLW_HF_PROXY_TIMEOUT_SECONDS` — per-request timeout for the proxy's HF + fetch (default 300, range 10–3600). + +**Operational tradeoff:** download bandwidth now flows through the controller +rather than executor→HF directly. For the internal beta this is acceptable; +global rate-limit coordination and an executor-local credential pool are +Phase 3 items. diff --git a/tests/e2e/test_executor_e2e.py b/tests/e2e/test_executor_e2e.py index ef94e92..6d5ae2c 100644 --- a/tests/e2e/test_executor_e2e.py +++ b/tests/e2e/test_executor_e2e.py @@ -178,12 +178,18 @@ def hf_handler(request: httpx.Request) -> httpx.Response: heartbeat_interval_seconds=1, poll_interval_seconds=1, ) - downloader = HfS3StreamDownloader(settings=settings) - # Inject the HF transport into the downloader's http client factory - downloader._make_http_client = lambda: httpx.AsyncClient( - transport=hf_transport, - timeout=settings.download_timeout_seconds, - follow_redirects=True, + downloader = HfS3StreamDownloader( + settings=settings, client=executor_client, + ) + # W3b: the executor fetches HF bytes through the controller's + # reverse proxy. Install the HF MockTransport on the controller's + # HF client factory (not the downloader). + import dlw.api.hf_proxy as _hf_proxy_mod + monkeypatch.setattr( + _hf_proxy_mod, "_make_hf_client", + lambda timeout_seconds: httpx.AsyncClient( + transport=hf_transport, follow_redirects=True, + ), ) runner = ExecutorRunner( From 8a7bed003bdfe380a2c03ef15b9f315f3a4edac0 Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:49:56 +0800 Subject: [PATCH 20/21] docs(test): note W3b seam migration in e2e module docstring (W3b) --- tests/e2e/test_executor_e2e.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/e2e/test_executor_e2e.py b/tests/e2e/test_executor_e2e.py index 6d5ae2c..9dfe16f 100644 --- a/tests/e2e/test_executor_e2e.py +++ b/tests/e2e/test_executor_e2e.py @@ -1,8 +1,10 @@ """E2E: real controller + real ExecutorRunner — full HF→S3 happy path. -W4 rewrite: replaces MockDownloader with HfS3StreamDownloader; HF served by -httpx MockTransport (returns deterministic bytes per filename); S3 served by -moto[s3] in-process. No Docker required. +W4 rewrite: replaces MockDownloader with HfS3StreamDownloader. +W3b update: the HF MockTransport is installed on the controller's +`dlw.api.hf_proxy._make_hf_client` seam — the executor fetches through the +controller reverse-proxy, not directly from HF. S3 served by moto[s3] +in-process. No Docker required. """ from __future__ import annotations From c5ca86c92ce358eb0b20e854634b47a95b55998b Mon Sep 17 00:00:00 2001 From: l17728 Date: Thu, 14 May 2026 21:56:38 +0800 Subject: [PATCH 21/21] docs(plan): sync W3b plan with review-driven fixes during execution Reflects the corrections applied across the 9 implementation tasks: test heartbeat-before-poll, proxy client cleanup + 503 mapping, stream_hf return annotation, fake client X-Assignment-Token, the _resolve_size RuntimeError test, and the lint full-text-scan note. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...2026-05-14-phase-2-w3b-hf-reverse-proxy.md | 141 +++++++++++++++--- 1 file changed, 121 insertions(+), 20 deletions(-) diff --git a/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md b/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md index 738c235..5c56e5a 100644 --- a/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md +++ b/docs/superpowers/plans/2026-05-14-phase-2-w3b-hf-reverse-proxy.md @@ -138,7 +138,9 @@ from sqlalchemy.ext.asyncio import async_sessionmaker from dlw.config import get_settings from dlw.db.base import Base from dlw.db.models.task import FileSubTask -from tests.conftest import executor_request_headers, register_test_executor +from tests.conftest import ( + executor_request_headers, register_test_executor, signed_heartbeat_headers, +) _TOKEN = "hf-proxy-ui-token" _ENROLL = "hf-proxy-enrollment-token" @@ -167,6 +169,16 @@ async def _bootstrap(engine): await conn.run_sync(Base.metadata.drop_all) +@pytest.fixture(autouse=True) +async def _cleanup_tasks(engine): + """Truncate task rows after each test so a later test's poll cannot claim a + prior test's leftover subtask (matches tests/api/test_subtasks.py).""" + yield + from sqlalchemy import text + async with engine.begin() as conn: + await conn.execute(text("TRUNCATE file_subtasks, download_tasks CASCADE")) + + @pytest.fixture(autouse=True) def _set_env(monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("DLW_BEARER_TOKEN", _TOKEN) @@ -202,14 +214,21 @@ def _install_hf_mock(monkeypatch, handler): async def _create_task_and_claim(c, reg): - """POST a task (2 files via the _patch_hf_global autouse fixture), poll once - as `reg`, return (subtask_id, assignment_token, filename).""" + """POST a task (2 files via the _patch_hf_global autouse fixture), heartbeat + once to flip the executor joining->healthy (the scheduler only assigns work + to healthy/degraded executors), poll once as `reg`, and return + (subtask_id, assignment_token, filename).""" r = await c.post("/api/v1/tasks", json={ "repo_id": "deepseek-ai/DeepSeek-V3", "revision": "a" * 40, "storage_id": 1, }, headers={"Authorization": f"Bearer {_TOKEN}"}) assert r.status_code == 201, r.text + hb_body = b'{"health_score": 100, "parts_dir_bytes": 0, "disk_free_gb": 100}' + r = await c.post(f"/api/v1/executors/{reg['executor_id']}/heartbeat", + content=hb_body, + headers=signed_heartbeat_headers(reg, hb_body)) + assert r.status_code == 200, r.text r = await c.post(f"/api/v1/executors/{reg['executor_id']}/poll", headers=executor_request_headers(reg)) assert r.status_code == 200, r.text @@ -422,7 +441,16 @@ async def hf_proxy_subtask( hf_client = _make_hf_client(settings.hf_proxy_timeout_seconds) hf_req = hf_client.build_request("GET", hf_url, headers=hf_headers) - hf_resp = await hf_client.send(hf_req, stream=True) + try: + hf_resp = await hf_client.send(hf_req, stream=True) + except (httpx.TimeoutException, httpx.NetworkError) as e: + await hf_client.aclose() + raise HTTPException( + status_code=503, detail=f"HF upstream unreachable: {e}", + ) from e + except BaseException: + await hf_client.aclose() + raise fwd = { k: v for k, v in hf_resp.headers.items() @@ -442,6 +470,8 @@ async def hf_proxy_subtask( ) ``` +> The `try/except` around `hf_client.send()` guarantees the per-request client is closed even when the HF connection fails before the `StreamingResponse` body generator is constructed (otherwise the `AsyncClient` + its transport leak). Transport errors map to a `503` so executors can distinguish "HF unreachable, retry" from a controller bug; any other exception closes the client and re-raises unchanged. + - [ ] **Step 4: Wire the router into the app** In `src/dlw/main.py`, inside `create_app()`, after the `subtasks_router` include (line ~134) and before `return app`, add: @@ -527,7 +557,9 @@ async def test_proxy_rejects_epoch_mismatch(proxy_app, db_session) -> None: ) sub_id, token, _ = await _create_task_and_claim(c, reg) # Corrupt the subtask's stamped epoch so it no longer matches the - # authenticated executor's current epoch. + # authenticated executor's current epoch. commit() (not flush()) is + # required: the proxy reads via its own _session dependency on a + # separate connection, so the update must be committed to be visible. await db_session.execute( update(FileSubTask) .where(FileSubTask.id == uuid.UUID(sub_id)) @@ -572,7 +604,7 @@ with: "subtask_executor": sub.executor_id, "authenticated": auth_ex.id}, ) - if str(sub.assignment_token) != x_assignment_token: + if sub.assignment_token is None or str(sub.assignment_token) != x_assignment_token: raise HTTPException( status_code=409, detail={"code": "STALE_ASSIGNMENT"}, ) @@ -640,9 +672,9 @@ async def test_stream_hf_attaches_auth_and_token_headers(tmp_path) -> None: subtask_id=sub_id, assignment_token=tok, range_header="bytes=0-1023", ) as resp: - got = await resp.aread() + chunks = [chunk async for chunk in resp.aiter_bytes(8)] - assert got == body + assert b"".join(chunks) == body assert seen["path"] == f"/api/v1/hf-proxy/subtask/{sub_id}" assert seen["headers"]["authorization"] == "Bearer jwt-stream" assert seen["headers"]["x-executor-epoch"] == "4" @@ -676,7 +708,7 @@ class Assignment: - [ ] **Step 4: Add `stream_hf` to `ControllerClient`** -In `src/dlw/executor/client.py`, add `asynccontextmanager` to the imports — change: +In `src/dlw/executor/client.py`, add `asynccontextmanager` + `AsyncIterator` to the imports — change: ```python import secrets @@ -691,11 +723,12 @@ to: import secrets import time import uuid +from collections.abc import AsyncIterator from contextlib import asynccontextmanager from typing import Any, Self ``` -Then add the `stream_hf` method at the end of the `ControllerClient` class (after `report`): +Then add the `stream_hf` method at the end of the `ControllerClient` class (after `report`). The `-> AsyncIterator[httpx.Response]` return annotation is required — `pyproject.toml` sets `[tool.mypy] strict = true`, so an unannotated method fails `no-untyped-def`: ```python @asynccontextmanager @@ -705,10 +738,13 @@ Then add the `stream_hf` method at the end of the `ControllerClient` class (afte subtask_id: uuid.UUID, assignment_token: uuid.UUID, range_header: str | None = None, - ): + ) -> AsyncIterator[httpx.Response]: """W3b: stream a file from HF via the controller reverse-proxy. Yields the httpx streaming Response — callers consume resp.aiter_bytes() and - check resp.status_code, exactly as they did with a direct HF GET.""" + check resp.status_code, exactly as they did with a direct HF GET. + + Unlike heartbeat/poll/report, this does NOT call raise_for_status() — + callers MUST inspect resp.status_code themselves (the downloaders do).""" headers = { **self._auth_headers(), "X-Assignment-Token": str(assignment_token), @@ -759,16 +795,18 @@ def make_fake_controller_client(hf_handler): from contextlib import asynccontextmanager as _acm class _FakeControllerClient: + def __init__(self): + self._transport = _httpx.MockTransport(hf_handler) + @_acm async def stream_hf(self, *, subtask_id, assignment_token, range_header=None): - transport = _httpx.MockTransport(hf_handler) + headers = {"X-Assignment-Token": str(assignment_token)} + if range_header: + headers["Range"] = range_header async with _httpx.AsyncClient( - transport=transport, base_url="http://fake-controller", + transport=self._transport, base_url="http://fake-controller", ) as client: - headers = {} - if range_header: - headers["Range"] = range_header async with client.stream( "GET", f"/api/v1/hf-proxy/subtask/{subtask_id}", headers=headers, @@ -1292,6 +1330,30 @@ async def test_resolve_size_falls_back_to_content_length(chunk_settings) -> None ) resolved = await d._resolve_size(a) assert resolved.file_size == 777 + + +async def test_resolve_size_raises_when_no_headers(chunk_settings) -> None: + """If neither Content-Range nor Content-Length is present, raise RuntimeError.""" + def handler(request: httpx.Request) -> httpx.Response: + # Use stream= to prevent httpx from auto-injecting Content-Length. + return httpx.Response(200, stream=httpx.ByteStream(b"\x00")) # no size headers + + d = DirectOffsetDownloader( + settings=chunk_settings, + client=make_fake_controller_client(handler), + ) + a = Assignment( + subtask_id=uuid.uuid4(), task_id=uuid.uuid4(), + assignment_token=uuid.uuid4(), + repo_id="o/r", revision="b" * 40, filename="big.bin", + file_size=None, expected_sha256=None, + storage_config=StorageConfig( + bucket="b", region="us-east-1", endpoint_url=None, + key_prefix="p", + ), + ) + with pytest.raises(RuntimeError, match="unresolvable"): + await d._resolve_size(a) ``` - [ ] **Step 2: Run `test_chunk_downloader.py` to verify it fails** @@ -1420,6 +1482,9 @@ with: total = content_range.rsplit("/", 1)[1].strip() if total.isdigit(): return dataclasses.replace(a, file_size=int(total)) + # Content-Length fallback: only correct when HF answered a full 200 + # (then it is the total size). A well-behaved 206 always carries + # Content-Range, handled above. if content_length is None: raise RuntimeError( "file_size unresolvable: no Content-Range or Content-Length" @@ -1538,6 +1603,8 @@ git commit -m "feat(executor): DirectOffsetDownloader fetches via controller pro - Modify: `src/dlw/executor/cli.py:38-47` - Modify: `src/dlw/executor/config.py:35-37` - Modify: `src/dlw/executor/_io.py:54-59` +- Modify: `tests/executor/test_config.py` — two pre-existing tests (`test_w4_defaults`, `test_w4_env_overrides`) assert on the now-deleted `hf_endpoint`/`hf_token` fields; drop those HF-specific `setenv`/`assert` lines (keep the S3 field coverage). +- Modify: `tests/executor/test_runner.py` — in `test_runner_passes_assignment_with_repo_and_storage`, add `assert isinstance(a.assignment_token, uuid.UUID)` to directly verify the runner threads `assignment_token` into the `Assignment` it builds. - [ ] **Step 1: Thread `assignment_token` into the `Assignment` the runner builds** @@ -1657,12 +1724,14 @@ Create `tests/tools/test_lint_no_hf_token.py`: """Self-test for tools/lint_invariants.py::check_no_hf_token_in_executor (W3b).""" from __future__ import annotations +import functools import importlib.util from pathlib import Path _LINT_PATH = Path(__file__).resolve().parents[2] / "tools" / "lint_invariants.py" +@functools.lru_cache(maxsize=None) def _load_lint(): spec = importlib.util.spec_from_file_location("lint_invariants", _LINT_PATH) mod = importlib.util.module_from_spec(spec) @@ -1691,6 +1760,21 @@ def test_lint_passes_clean_executor(tmp_path, monkeypatch) -> None: ) monkeypatch.setattr(lint, "ROOT", tmp_path) assert lint.check_no_hf_token_in_executor() == [] + + +def test_lint_flags_hf_token_in_string_literal(tmp_path, monkeypatch) -> None: + """The scan is full-text by design — a string literal mentioning the + forbidden identifier in executor code is flagged too (only whole + comment lines are exempt).""" + lint = _load_lint() + exec_dir = tmp_path / "src" / "dlw" / "executor" + exec_dir.mkdir(parents=True) + (exec_dir / "guard.py").write_text( + 'raise ValueError("hf_token must not reach executor")\n', + encoding="utf-8", + ) + monkeypatch.setattr(lint, "ROOT", tmp_path) + assert lint.check_no_hf_token_in_executor() != [] ``` - [ ] **Step 2: Run the self-test to verify it fails** @@ -1706,13 +1790,17 @@ In `tools/lint_invariants.py`, add this function right after `check_no_bearer_on def check_no_hf_token_in_executor() -> list[str]: """W3b §3.11: INVARIANT 2 — the tenant HF token must never reach an executor. Forbid the `hf_token` / `hf_endpoint` identifiers anywhere in - src/dlw/executor/ (comment lines are exempt). After W3b, HF access goes - exclusively through the controller's reverse proxy.""" + src/dlw/executor/. After W3b, HF access goes exclusively through the + controller's reverse proxy. + + Only whole comment lines (first non-whitespace char is `#`) are exempt — + string literals and trailing comments containing the identifiers are + flagged too (this is a full-text scan, by design).""" errors: list[str] = [] exec_dir = ROOT / "src" / "dlw" / "executor" if not exec_dir.exists(): return [] - for py in sorted(exec_dir.rglob("*.py")): + for py in sorted(exec_dir.rglob("*.py")): # sorted for deterministic CI output try: text = py.read_text(encoding="utf-8") except (OSError, UnicodeDecodeError) as e: @@ -1813,6 +1901,19 @@ with: (`executor_client` is the `ControllerClient` already constructed earlier in the test with `_transport=asgi_transport` + the `X-Client-Cert-PEM` bypass header — `stream_hf` rides on it straight to the ASGI controller. `monkeypatch` is already a parameter of `test_e2e_hf_to_s3_full_pipeline`.) +Also update the module docstring at the top of `tests/e2e/test_executor_e2e.py` so it reflects the W3b seam migration (the HF MockTransport now sits on the controller's `dlw.api.hf_proxy._make_hf_client` seam, not the executor downloader): + +```python +"""E2E: real controller + real ExecutorRunner — full HF→S3 happy path. + +W4 rewrite: replaces MockDownloader with HfS3StreamDownloader. +W3b update: the HF MockTransport is installed on the controller's +`dlw.api.hf_proxy._make_hf_client` seam — the executor fetches through the +controller reverse-proxy, not directly from HF. S3 served by moto[s3] +in-process. No Docker required. +""" +``` + - [ ] **Step 2: Run the e2e to verify it passes** Run: `uv run pytest tests/e2e/test_executor_e2e.py -v`