From 10e41fec359c257569bc9faed29abc8a92a8559d Mon Sep 17 00:00:00 2001 From: Dominikus Nold Date: Thu, 9 Apr 2026 22:09:35 +0200 Subject: [PATCH 1/4] feat: add github hierarchy cache sync --- AGENTS.md | 9 + openspec/CHANGE_ORDER.md | 2 + .../.openspec.yaml | 2 + .../CHANGE_VALIDATION.md | 12 + .../TDD_EVIDENCE.md | 36 ++ .../design.md | 72 +++ .../proposal.md | 31 ++ .../specs/agile-feature-hierarchy/spec.md | 22 + .../specs/github-hierarchy-cache/spec.md | 29 + .../tasks.md | 20 + openspec/config.yaml | 6 + scripts/sync_github_hierarchy_cache.py | 502 ++++++++++++++++++ .../test_sync_github_hierarchy_cache.py | 215 ++++++++ 13 files changed, 958 insertions(+) create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/.openspec.yaml create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/CHANGE_VALIDATION.md create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/design.md create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/proposal.md create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/specs/agile-feature-hierarchy/spec.md create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md create mode 100644 openspec/changes/governance-02-github-hierarchy-cache/tasks.md create mode 100644 scripts/sync_github_hierarchy_cache.py create mode 100644 tests/unit/scripts/test_sync_github_hierarchy_cache.py diff --git a/AGENTS.md b/AGENTS.md index 24c320fb..b1dd559f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -293,6 +293,15 @@ Agent enforcement: - **Change modified/renamed**: update the folder name and any affected dependency references - **Blocker resolved**: update the "Blocked by" column (append ✅ to resolved blockers) +#### GitHub hierarchy cache + +`.specfact/backlog/github_hierarchy_cache.md` is the local lookup source for current Epic and Feature hierarchy metadata in this repository. It is ephemeral local state and MUST NOT be committed. + +- Before creating a new change issue, syncing an existing change, or resolving parent or blocker metadata, consult the cache first. +- If the cache is missing or stale, rerun `python scripts/sync_github_hierarchy_cache.py`. +- Recreate the cache as part of OpenSpec and GitHub issue work rather than treating it as a versioned repo artifact. +- Use manual GitHub lookup only when the cache cannot answer the question after refresh. + Use the `specfact-openspec-workflows` skill as the default execution path for OpenSpec lifecycle work. - When a Markdown plan exists and the intent is to create a change from that plan, use `.cursor/commands/wf-create-change-from-plan.md` (`/wf-change-from-plan`) to generate the proposal/tasks/spec deltas. diff --git a/openspec/CHANGE_ORDER.md b/openspec/CHANGE_ORDER.md index c1ed8b7d..c164f229 100644 --- a/openspec/CHANGE_ORDER.md +++ b/openspec/CHANGE_ORDER.md @@ -181,6 +181,7 @@ Cross-repo dependency: `docs-07-core-handoff-conversion` depends on `specfact-cl | Module | Order | Change folder | GitHub # | Blocked by | |--------|-------|---------------|----------|------------| | governance | 01 | cross-repo-issue-realignment | [#484](https://github.com/nold-ai/specfact-cli/issues/484) | agile-01 ✅; module-migration-11 [#408](https://github.com/nold-ai/specfact-cli/issues/408); backlog-module-ownership-cleanup | +| governance | 02 | governance-02-github-hierarchy-cache | [#491](https://github.com/nold-ai/specfact-cli/issues/491) | governance-01 [#484](https://github.com/nold-ai/specfact-cli/issues/484); Parent Feature: [#486](https://github.com/nold-ai/specfact-cli/issues/486) | ### Cross-cutting foundations (no hard dependencies — implement early) @@ -256,6 +257,7 @@ Cross-repo dependency: `docs-07-core-handoff-conversion` depends on `specfact-cl | profile | 01 | profile-01-config-layering | [#237](https://github.com/nold-ai/specfact-cli/issues/237) | #193 (existing init/module-state baseline) | | profile | 02 | profile-02-central-config-sources | [#249](https://github.com/nold-ai/specfact-cli/issues/249) | #237 (profile-01) | | profile | 03 | profile-03-domain-overlays | [#250](https://github.com/nold-ai/specfact-cli/issues/250) | #237 (profile-01), #249 (profile-02), #213 | +| profile | 04 | profile-04-safe-project-artifact-writes | [#490](https://github.com/nold-ai/specfact-cli/issues/490) | Parent Feature: [#365](https://github.com/nold-ai/specfact-cli/issues/365); related bug [#487](https://github.com/nold-ai/specfact-cli/issues/487) | ### Requirements layer (architecture integration plan, 2026-02-15) diff --git a/openspec/changes/governance-02-github-hierarchy-cache/.openspec.yaml b/openspec/changes/governance-02-github-hierarchy-cache/.openspec.yaml new file mode 100644 index 00000000..98d7681c --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-09 diff --git a/openspec/changes/governance-02-github-hierarchy-cache/CHANGE_VALIDATION.md b/openspec/changes/governance-02-github-hierarchy-cache/CHANGE_VALIDATION.md new file mode 100644 index 00000000..6c5c097f --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/CHANGE_VALIDATION.md @@ -0,0 +1,12 @@ +# CHANGE VALIDATION + +- Change: `governance-02-github-hierarchy-cache` +- Date: 2026-04-09 +- Command: `openspec validate governance-02-github-hierarchy-cache --strict` +- Result: PASS + +## Notes + +- The new capability `github-hierarchy-cache` validates as a net-new spec delta. +- The modified capability `agile-feature-hierarchy` remains aligned with the existing spec folder name. +- The change is apply-ready from an OpenSpec artifact perspective. diff --git a/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md b/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md new file mode 100644 index 00000000..487be9b6 --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md @@ -0,0 +1,36 @@ +# TDD Evidence + +## Failing-before implementation + +- Timestamp: `2026-04-09T21:03:37+02:00` +- Command: `python3 -m pytest tests/unit/scripts/test_sync_github_hierarchy_cache.py -q` +- Result: FAIL +- Summary: All three tests failed with `FileNotFoundError` because `scripts/sync_github_hierarchy_cache.py` did not exist yet. + +## Failing-before path relocation refinement + +- Timestamp: `2026-04-09T21:17:04+02:00` +- Command: `python3 -m pytest tests/unit/scripts/test_sync_github_hierarchy_cache.py -q` +- Result: FAIL +- Summary: The new default-path test failed because the script still targeted `openspec/GITHUB_HIERARCHY_CACHE.md` instead of ignored `.specfact/backlog/` storage. + +## Passing-after implementation + +- Timestamp: `2026-04-09T21:17:35+02:00` +- Command: `python3 -m pytest tests/unit/scripts/test_sync_github_hierarchy_cache.py -q` +- Result: PASS +- Summary: All five script tests passed after moving the cache into ignored `.specfact/backlog/` storage and keeping the no-op fingerprint path intact. + +## Additional verification + +- `python3 -m py_compile scripts/sync_github_hierarchy_cache.py` → PASS +- `python3 scripts/sync_github_hierarchy_cache.py --force` → generated `.specfact/backlog/github_hierarchy_cache.md` +- Second `python3 scripts/sync_github_hierarchy_cache.py` run → `GitHub hierarchy cache unchanged (46 issues).` + +## Final scoped quality gates + +- Timestamp: `2026-04-09T22:04:18+02:00` +- Ruff: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/ruff check scripts/sync_github_hierarchy_cache.py tests/unit/scripts/test_sync_github_hierarchy_cache.py` → PASS +- basedpyright: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/basedpyright scripts/sync_github_hierarchy_cache.py tests/unit/scripts/test_sync_github_hierarchy_cache.py` → PASS (`0 errors, 0 warnings`) +- pytest: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/pytest tests/unit/scripts/test_sync_github_hierarchy_cache.py -q` → PASS (`5 passed`) +- SpecFact code review: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/specfact code review run --scope changed --include-tests --json --out .specfact/code-review.json` → PASS (`Review completed with no findings.`) diff --git a/openspec/changes/governance-02-github-hierarchy-cache/design.md b/openspec/changes/governance-02-github-hierarchy-cache/design.md new file mode 100644 index 00000000..c0653ca4 --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/design.md @@ -0,0 +1,72 @@ +## Context + +`specfact-cli` already maintains GitHub planning hierarchy through issue labels, parent-child links, and `openspec/CHANGE_ORDER.md`, but contributors still discover that structure by hitting the GitHub API manually. The new requirement is to make hierarchy lookup deterministic, cheap, and local: a generated markdown file under ignored `.specfact/backlog/` becomes the first source for parent Feature and Epic resolution, and the sync command is rerun only when the hierarchy changed. + +This is a cross-cutting governance change because it affects GitHub automation, OpenSpec operating rules, and agent instructions. The same pattern is needed in `specfact-cli-modules`, but each repo should own its own script, state file, and cache output so the result remains self-contained. + +## Goals / Non-Goals + +**Goals:** +- Generate a deterministic markdown cache of Epic and Feature issues for this repository. +- Include enough metadata for issue-parenting work without another GitHub lookup: issue number, title, short summary, labels, parent/child relationships, and issue URLs. +- Make the sync fast on no-op runs by using a small fingerprint/state check before regenerating markdown. +- Update repo guidance so contributors use the cache first and only rerun sync when needed. + +**Non-Goals:** +- Replacing GitHub as the authoritative source of issue hierarchy. +- Caching every issue type or full issue bodies. +- Synchronizing User Story issues into the cache in this first version. +- Introducing a new external dependency beyond the existing `gh` CLI. + +## Decisions + +### Use `gh api graphql` as the sole upstream source +The script will query GitHub through `gh api graphql` so it can access issue type, labels, relationships, and brief body content in one supported path. This avoids scraping markdown or depending on REST endpoints that do not expose hierarchy fields consistently. + +Alternative considered: +- `gh issue list/view` JSON loops: simpler, but requires many calls and awkward relationship reconstruction. + +### Split the sync into a lightweight fingerprint pass and a full render pass +The script will first fetch only the Epic and Feature issue identity set plus timestamps/relationship fingerprints, hash that data, and compare it with a local state file. If the fingerprint matches, the script exits successfully without rewriting markdown. If it differs, the script performs a fuller metadata query and regenerates the cache. + +Alternative considered: +- Always regenerate markdown: deterministic but wastes GitHub calls and makes local workflows slower. + +### Store human-readable cache plus machine-readable state under ignored `.specfact/backlog` +The canonical human-facing output will be `.specfact/backlog/github_hierarchy_cache.md`. A companion state file, `.specfact/backlog/github_hierarchy_cache_state.json`, will hold the last fingerprint and generator metadata. Both files stay local and ignored by Git so the cache can be recreated freely without creating repository drift. + +Alternative considered: +- State embedded in markdown comments: workable, but couples machine state to user-facing output and complicates deterministic rendering. + +### Render by deterministic section and sort order +The markdown will use fixed sections for Epics and Features, with issues sorted stably by type, then issue number. Relationship lists and labels will also be sorted deterministically so reruns only change the file when source metadata actually changes. + +Alternative considered: +- Preserve GitHub API order: easier, but can drift between runs and create noisy diffs. + +### Keep instruction updates in repo-local governance files +The change will update `openspec/config.yaml` and `AGENTS.md` in this repo so the workflow explicitly says: consult the cache first, regenerate it when fresh planning metadata is needed, and avoid ad hoc GitHub lookups unless the cache is stale or missing. + +Alternative considered: +- Document the behavior only in the script help text: insufficient because agents and OpenSpec flows read governance files first. + +## Risks / Trade-offs + +- [GitHub schema drift] → Keep GraphQL fields minimal and cover parsing/rendering with tests that pin expected shapes. +- [Cache becomes stale if users forget to rerun sync] → Update `AGENTS.md` and `openspec/config.yaml` to make rerun conditions explicit and keep the script fast enough to run routinely. +- [Relationship data differs between repos or issue states] → Normalize missing parents/children to explicit empty values and show unresolved relationships clearly in markdown. +- [No-op fingerprint misses relevant content changes] → Include type, number, title, updated timestamp, labels, and parent identity in the fingerprint rather than only issue count. + +## Migration Plan + +1. Add the sync script, state handling, markdown renderer, and tests. +2. Generate the initial cache file under ignored `.specfact/backlog/`. +3. Update `openspec/config.yaml` and `AGENTS.md` to use the cache-first workflow. +4. Run validation and repository tests, then sync the paired change issue metadata. + +Rollback is straightforward: remove the script, state file, cache file, and governance references if the workflow proves noisy or unreliable. + +## Open Questions + +- Whether a later follow-up should also cache User Story issues once the Feature/Epic workflow is stable. +- Whether the fingerprint pass should use a dedicated smaller GraphQL query or reuse one richer query and short-circuit before rendering if unchanged. diff --git a/openspec/changes/governance-02-github-hierarchy-cache/proposal.md b/openspec/changes/governance-02-github-hierarchy-cache/proposal.md new file mode 100644 index 00000000..edd72b3c --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/proposal.md @@ -0,0 +1,31 @@ +## Why + +OpenSpec and agent workflows still have to query GitHub ad hoc to rediscover Epics, Features, and their parent links before creating or syncing change issues. That is slow, expensive, and error-prone, especially now that planning hierarchy matters in both `specfact-cli` and `specfact-cli-modules`. + +## What Changes + +- Add a deterministic repo-local hierarchy cache generator for GitHub Epic and Feature issues. +- Persist a central markdown inventory under `openspec/` with issue number, title, brief summary, labels, and hierarchy relationships. +- Add a lightweight fingerprint/state check so the sync exits quickly when Epic and Feature metadata has not changed. +- Update governance instructions in `openspec/config.yaml` and `AGENTS.md` to consult the cached hierarchy inventory first and rerun the sync script when fresh data is needed. +- Cover the script with tests so cache output and no-change behavior remain stable. + +## Capabilities + +### New Capabilities +- `github-hierarchy-cache`: Deterministic synchronization of GitHub Epic and Feature hierarchy metadata into a repo-local OpenSpec markdown cache for low-cost parent and planning lookups. + +### Modified Capabilities +- `agile-feature-hierarchy`: Local governance workflows must be able to resolve current Epic and Feature planning metadata from the repo-local cache before performing manual GitHub lookups. + +## Impact + +- Affected code: new script and tests under `scripts/` and `tests/`, plus OpenSpec governance guidance in `openspec/config.yaml` and `AGENTS.md`. +- Affected workflow: OpenSpec change creation, GitHub issue creation/sync, and `CHANGE_ORDER.md` maintenance become cache-first instead of lookup-first. +- Cross-repo impact: a sibling change in `specfact-cli-modules` must implement the same pattern so both repos expose equivalent planning metadata locally. + +## Source Tracking + +- GitHub Issue: [#491](https://github.com/nold-ai/specfact-cli/issues/491) +- Parent Feature: [#486](https://github.com/nold-ai/specfact-cli/issues/486) +- Related Modules Change: `specfact-cli-modules/governance-03-github-hierarchy-cache` diff --git a/openspec/changes/governance-02-github-hierarchy-cache/specs/agile-feature-hierarchy/spec.md b/openspec/changes/governance-02-github-hierarchy-cache/specs/agile-feature-hierarchy/spec.md new file mode 100644 index 00000000..757c8a37 --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/specs/agile-feature-hierarchy/spec.md @@ -0,0 +1,22 @@ +## MODIFIED Requirements + +### Requirement: GitHub Agile Feature Hierarchy +The project governance workflow SHALL maintain a three-level GitHub planning hierarchy of Epic -> Feature -> User Story for the public SpecFact CLI backlog, and SHALL expose the current Epic and Feature metadata through a repo-local hierarchy cache before manual GitHub lookups are used. + +#### Scenario: Feature issues group user stories under the correct epic +- **GIVEN** the public backlog contains Epic issues and change-proposal issues +- **WHEN** the hierarchy setup work is completed +- **THEN** each planned Feature issue is linked to its parent Epic +- **AND** each grouped User Story issue is assigned to the correct Feature + +#### Scenario: CHANGE_ORDER stays aligned with the GitHub hierarchy +- **GIVEN** new Epic or Feature-level hierarchy items are introduced in GitHub +- **WHEN** the change is updated +- **THEN** `openspec/CHANGE_ORDER.md` reflects the current Epic and Feature sequencing metadata +- **AND** stale issue state such as archived-but-open items is reconciled during validation + +#### Scenario: Local cache is consulted before manual hierarchy lookup +- **GIVEN** a contributor needs a parent Feature or Epic while creating or syncing a change issue +- **WHEN** the local hierarchy cache is present and current +- **THEN** the contributor can resolve the parent relationship from the cache without an additional GitHub lookup +- **AND** the sync script is rerun only when the cache is stale or missing diff --git a/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md b/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md new file mode 100644 index 00000000..01728b27 --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md @@ -0,0 +1,29 @@ +## ADDED Requirements + +### Requirement: Repository hierarchy cache sync +The repository SHALL provide a deterministic sync mechanism that retrieves GitHub Epic and Feature issues for the current repository and writes a local hierarchy cache under ignored `.specfact/backlog/`. + +#### Scenario: Generate hierarchy cache from GitHub metadata +- **WHEN** the user runs the hierarchy cache sync script for the repository +- **THEN** the script retrieves GitHub issues whose Type is `Epic` or `Feature` +- **AND** writes a markdown cache under ignored `.specfact/backlog/` with each issue's number, title, URL, short summary, labels, and hierarchy relationships +- **AND** the output ordering is deterministic across repeated runs with unchanged source data + +#### Scenario: Represent hierarchy relationships in cache output +- **WHEN** a synced Epic or Feature has parent or child hierarchy links +- **THEN** the markdown cache includes those relationships in normalized form +- **AND** missing relationships are rendered as explicit empty or none values rather than omitted ambiguously + +#### Scenario: Fast exit on unchanged hierarchy state +- **WHEN** the script detects that the current Epic and Feature hierarchy fingerprint matches the last synced fingerprint +- **THEN** it exits successfully without regenerating the markdown cache +- **AND** it reports that no hierarchy update was required + +### Requirement: Repository governance must use cache-first hierarchy lookup +Repository governance instructions SHALL direct contributors and agents to consult the local hierarchy cache before performing manual GitHub lookups for Epic or Feature parenting. + +#### Scenario: Cache-first governance guidance +- **WHEN** a contributor reads `AGENTS.md` or `openspec/config.yaml` for GitHub issue setup guidance +- **THEN** the instructions tell them to consult the local hierarchy cache first +- **AND** the instructions define when the sync script must be rerun to refresh stale hierarchy metadata +- **AND** the instructions state that the cache is local ephemeral state and must not be committed diff --git a/openspec/changes/governance-02-github-hierarchy-cache/tasks.md b/openspec/changes/governance-02-github-hierarchy-cache/tasks.md new file mode 100644 index 00000000..ee49e6d4 --- /dev/null +++ b/openspec/changes/governance-02-github-hierarchy-cache/tasks.md @@ -0,0 +1,20 @@ +## 1. Change setup and governance sync + +- [x] 1.1 Create and sync the GitHub issue for `governance-02-github-hierarchy-cache`, link it to the correct parent Feature, and update `openspec/CHANGE_ORDER.md` plus proposal source tracking. +- [x] 1.2 Validate the change artifacts and capture the validation report in `openspec/changes/governance-02-github-hierarchy-cache/CHANGE_VALIDATION.md`. + +## 2. Spec-first test setup + +- [x] 2.1 Add or update tests for hierarchy fingerprinting, deterministic markdown rendering, and fast no-change exit behavior. +- [x] 2.2 Run the targeted test command, confirm it fails before implementation, and record the failing run in `openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md`. + +## 3. Implementation + +- [x] 3.1 Implement the repository-local GitHub hierarchy cache sync script and state file handling under `scripts/`. +- [x] 3.2 Generate the initial `.specfact/backlog/github_hierarchy_cache.md` output and ensure reruns remain deterministic without committing it. +- [x] 3.3 Update `openspec/config.yaml` and `AGENTS.md` so GitHub issue setup and parent lookup use the cache-first workflow. + +## 4. Verification + +- [x] 4.1 Re-run the targeted tests and record the passing run in `openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md`. +- [x] 4.2 Run the required repo quality gates for the touched scope, including code review JSON refresh if stale. diff --git a/openspec/config.yaml b/openspec/config.yaml index 1bcde599..c89e95fd 100644 --- a/openspec/config.yaml +++ b/openspec/config.yaml @@ -88,6 +88,9 @@ rules: - **Repository**: /, - **Last Synced Status**: ). - After creation, update proposal.md Source Tracking section with issue number, URL, repository, and status. + - Resolve Parent Feature or Epic from `.specfact/backlog/github_hierarchy_cache.md` first; this + cache is ephemeral local state and MUST NOT be committed. If it is missing or stale, rerun + `python scripts/sync_github_hierarchy_cache.py` before manual GitHub lookup. - Source tracking: Only track public repos (specfact-cli, platform-frontend). Skip for internal repos (specfact-cli-internal) specs: @@ -157,6 +160,9 @@ rules: - Place this task after quality gates and documentation, before PR creation. - Include git workflow tasks: branch creation (first task), PR creation (last task) - For public-facing changes in public repos (specfact-cli, platform-frontend): + - Before GitHub issue creation or parent linking, consult `.specfact/backlog/github_hierarchy_cache.md`; + rerun `python scripts/sync_github_hierarchy_cache.py` when the cache is missing or stale. + Treat this cache as ephemeral local state, not a committed OpenSpec artifact. - Include GitHub issue creation task with format: - title `[Change] ` - labels `enhancement` and `change-proposal` diff --git a/scripts/sync_github_hierarchy_cache.py b/scripts/sync_github_hierarchy_cache.py new file mode 100644 index 00000000..fe589769 --- /dev/null +++ b/scripts/sync_github_hierarchy_cache.py @@ -0,0 +1,502 @@ +#!/usr/bin/env python3 +"""Sync GitHub Epic/Feature hierarchy into a local OpenSpec cache.""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import subprocess +import sys +from collections.abc import Mapping +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from beartype import beartype +from icontract import ensure, require + + +DEFAULT_REPO_OWNER = "nold-ai" +DEFAULT_REPO_NAME = Path(__file__).resolve().parents[1].name +DEFAULT_OUTPUT_PATH = Path(".specfact") / "backlog" / "github_hierarchy_cache.md" +DEFAULT_STATE_PATH = Path(".specfact") / "backlog" / "github_hierarchy_cache_state.json" +SUPPORTED_ISSUE_TYPES = frozenset({"Epic", "Feature"}) +_SUMMARY_SKIP_LINES = {"why", "scope", "summary", "changes", "capabilities", "impact"} + +_FINGERPRINT_QUERY = """ +query($owner: String!, $name: String!, $after: String) { + repository(owner: $owner, name: $name) { + issues(first: 100, after: $after, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { + pageInfo { hasNextPage endCursor } + nodes { + number + title + url + updatedAt + issueType { name } + labels(first: 100) { nodes { name } } + parent { number title url } + subIssues(first: 100) { nodes { number title url } } + } + } + } +} +""" + +_DETAIL_QUERY = """ +query($owner: String!, $name: String!, $after: String) { + repository(owner: $owner, name: $name) { + issues(first: 100, after: $after, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { + pageInfo { hasNextPage endCursor } + nodes { + number + title + url + updatedAt + bodyText + issueType { name } + labels(first: 100) { nodes { name } } + parent { number title url } + subIssues(first: 100) { nodes { number title url } } + } + } + } +} +""" + + +@dataclass(frozen=True) +class IssueLink: + """Compact link to a related issue.""" + + number: int + title: str + url: str + + +@dataclass(frozen=True) +class HierarchyIssue: + """Normalized hierarchy issue used for cache rendering.""" + + number: int + title: str + url: str + issue_type: str + labels: list[str] + summary: str + updated_at: str + parent: IssueLink | None + children: list[IssueLink] + + +@dataclass(frozen=True) +class SyncResult: + """Outcome of a cache sync attempt.""" + + changed: bool + issue_count: int + fingerprint: str + output_path: Path + + +@beartype +def _extract_summary(body_text: str) -> str: + """Return a compact summary line for markdown output.""" + normalized = body_text.replace("\\n", "\n") + for line in normalized.splitlines(): + cleaned = line.strip() + if not cleaned: + continue + if cleaned.startswith("#"): + cleaned = cleaned.lstrip("#").strip() + if cleaned.lower().rstrip(":") in _SUMMARY_SKIP_LINES: + continue + if cleaned: + return cleaned[:200] + return "No summary provided." + + +@beartype +def _parse_issue_link(node: Mapping[str, Any] | None) -> IssueLink | None: + """Convert a GraphQL link node to IssueLink.""" + if not node: + return None + return IssueLink( + number=int(node["number"]), + title=str(node["title"]), + url=str(node["url"]), + ) + + +@beartype +def _mapping_value(node: Mapping[str, Any], key: str) -> Mapping[str, Any] | None: + """Return a nested mapping value when present.""" + value = node.get(key) + return value if isinstance(value, Mapping) else None + + +@beartype +def _mapping_nodes(container: Mapping[str, Any] | None) -> list[Mapping[str, Any]]: + """Return a filtered list of mapping nodes from a GraphQL connection.""" + if container is None: + return [] + + raw_nodes = container.get("nodes") + if not isinstance(raw_nodes, list): + return [] + + return [item for item in raw_nodes if isinstance(item, Mapping)] + + +@beartype +def _label_names(label_nodes: list[Mapping[str, Any]]) -> list[str]: + """Extract sorted label names from GraphQL label nodes.""" + names: list[str] = [] + for item in label_nodes: + name = item.get("name") + if name: + names.append(str(name)) + return sorted(names, key=str.lower) + + +@beartype +def _child_links(subissue_nodes: list[Mapping[str, Any]]) -> list[IssueLink]: + """Extract sorted child issue links from GraphQL subissue nodes.""" + children = [ + IssueLink(number=int(item["number"]), title=str(item["title"]), url=str(item["url"])) + for item in subissue_nodes + if item.get("number") is not None + ] + children.sort(key=lambda item: item.number) + return children + + +@beartype +def _parse_issue_node(node: Mapping[str, Any], *, include_body: bool) -> HierarchyIssue | None: + """Convert a GraphQL issue node to HierarchyIssue when supported.""" + issue_type_node = _mapping_value(node, "issueType") + issue_type_name = str(issue_type_node["name"]) if issue_type_node and issue_type_node.get("name") else None + if issue_type_name not in SUPPORTED_ISSUE_TYPES: + return None + + summary = _extract_summary(str(node.get("bodyText", ""))) if include_body else "" + return HierarchyIssue( + number=int(node["number"]), + title=str(node["title"]), + url=str(node["url"]), + issue_type=str(issue_type_name), + labels=_label_names(_mapping_nodes(_mapping_value(node, "labels"))), + summary=summary, + updated_at=str(node["updatedAt"]), + parent=_parse_issue_link(_mapping_value(node, "parent")), + children=_child_links(_mapping_nodes(_mapping_value(node, "subIssues"))), + ) + + +@beartype +def _run_graphql_query(query: str, *, repo_owner: str, repo_name: str, after: str | None) -> Mapping[str, Any]: + """Run a GitHub GraphQL query through `gh`.""" + command = [ + "gh", + "api", + "graphql", + "-f", + f"query={query}", + "-F", + f"owner={repo_owner}", + "-F", + f"name={repo_name}", + ] + if after is not None: + command.extend(["-F", f"after={after}"]) + + completed = subprocess.run(command, check=False, capture_output=True, text=True) + if completed.returncode != 0: + raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "GitHub GraphQL query failed") + + payload = json.loads(completed.stdout) + if "errors" in payload: + raise RuntimeError(json.dumps(payload["errors"], indent=2)) + return payload + + +@beartype +def _is_not_blank(value: str) -> bool: + """Return whether a required CLI string value is non-blank.""" + return bool(value.strip()) + + +@beartype +def _has_non_blank_value( + repo_owner: str | None = None, + repo_name: str | None = None, + repo_full_name: str | None = None, + generated_at: str | None = None, + fingerprint: str | None = None, +) -> bool: + """Return whether the provided predicate value is non-blank.""" + for candidate in (repo_owner, repo_name, repo_full_name, generated_at, fingerprint): + if candidate is not None: + return _is_not_blank(candidate) + return False + + +@beartype +def _all_supported_issue_types(result: list[HierarchyIssue]) -> bool: + """Return whether every issue has a supported issue type.""" + return all(issue.issue_type in SUPPORTED_ISSUE_TYPES for issue in result) + + +@beartype +@require(_has_non_blank_value, "repo_owner must not be blank") +@require(_has_non_blank_value, "repo_name must not be blank") +@ensure(_all_supported_issue_types, "Only Epic and Feature issues should be returned") +def fetch_hierarchy_issues(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> list[HierarchyIssue]: + """Fetch Epic and Feature issues from GitHub for the given repository.""" + query = _FINGERPRINT_QUERY if fingerprint_only else _DETAIL_QUERY + issues: list[HierarchyIssue] = [] + after: str | None = None + + while True: + payload = _run_graphql_query(query, repo_owner=repo_owner, repo_name=repo_name, after=after) + repository = payload.get("data", {}).get("repository", {}) + issue_connection = repository.get("issues", {}) + nodes = issue_connection.get("nodes", []) + for node in nodes: + if not isinstance(node, Mapping): + continue + parsed = _parse_issue_node(node, include_body=not fingerprint_only) + if parsed is not None: + issues.append(parsed) + page_info = issue_connection.get("pageInfo", {}) + if not page_info.get("hasNextPage"): + break + after = page_info.get("endCursor") + + return issues + + +@beartype +@ensure(lambda result: len(result) == 64, "Fingerprint must be a SHA-256 hex digest") +def compute_hierarchy_fingerprint(issues: list[HierarchyIssue]) -> str: + """Compute a deterministic fingerprint for hierarchy state.""" + canonical_rows: list[dict[str, Any]] = [] + for issue in sorted(issues, key=lambda item: (item.issue_type, item.number)): + canonical_rows.append( + { + "number": issue.number, + "title": issue.title, + "issue_type": issue.issue_type, + "updated_at": issue.updated_at, + "labels": sorted(issue.labels, key=str.lower), + "parent_number": issue.parent.number if issue.parent else None, + "child_numbers": [child.number for child in sorted(issue.children, key=lambda item: item.number)], + } + ) + + canonical_json = json.dumps(canonical_rows, sort_keys=True, separators=(",", ":")) + return hashlib.sha256(canonical_json.encode("utf-8")).hexdigest() + + +@beartype +def _group_issues_by_type(issues: list[HierarchyIssue]) -> dict[str, list[HierarchyIssue]]: + """Return issues grouped by supported type in deterministic order.""" + return { + issue_type: sorted((item for item in issues if item.issue_type == issue_type), key=lambda item: item.number) + for issue_type in SUPPORTED_ISSUE_TYPES + } + + +@beartype +def _render_issue_block(issue: HierarchyIssue) -> list[str]: + """Render one issue block for the hierarchy cache.""" + parent_text = "none" + if issue.parent is not None: + parent_text = f"#{issue.parent.number} {issue.parent.title}" + + child_text = "none" + if issue.children: + child_text = ", ".join(f"#{child.number} {child.title}" for child in issue.children) + + label_text = ", ".join(sorted(issue.labels, key=str.lower)) if issue.labels else "none" + return [ + f"### #{issue.number} {issue.title}", + f"- URL: {issue.url}", + f"- Parent: {parent_text}", + f"- Children: {child_text}", + f"- Labels: {label_text}", + f"- Summary: {issue.summary or 'No summary provided.'}", + "", + ] + + +@beartype +def _render_issue_section(*, title: str, issues: list[HierarchyIssue]) -> list[str]: + """Render one section of grouped issues.""" + lines = [f"## {title}", ""] + if not issues: + lines.extend(["_None_", ""]) + return lines + + for issue in issues: + lines.extend(_render_issue_block(issue)) + return lines + + +@beartype +@require(_has_non_blank_value, "repo_full_name must not be blank") +@require(_has_non_blank_value, "generated_at must not be blank") +@require(_has_non_blank_value, "fingerprint must not be blank") +def render_cache_markdown( + *, + repo_full_name: str, + issues: list[HierarchyIssue], + generated_at: str, + fingerprint: str, +) -> str: + """Render deterministic markdown for the hierarchy cache.""" + grouped = _group_issues_by_type(issues) + + lines = [ + "# GitHub Hierarchy Cache", + "", + f"- Repository: `{repo_full_name}`", + f"- Generated At: `{generated_at}`", + f"- Fingerprint: `{fingerprint}`", + f"- Included Issue Types: `{', '.join(sorted(SUPPORTED_ISSUE_TYPES))}`", + "", + ( + "Use this file as the first lookup source for parent Epic or Feature relationships " + "during OpenSpec and GitHub issue setup." + ), + "", + ] + + for section_name, issue_type in (("Epics", "Epic"), ("Features", "Feature")): + lines.extend(_render_issue_section(title=section_name, issues=grouped[issue_type])) + + return "\n".join(lines).rstrip() + "\n" + + +@beartype +def _load_state(state_path: Path) -> Mapping[str, Any]: + """Load state JSON if it exists; otherwise return empty mapping.""" + if not state_path.exists(): + return {} + try: + loaded = json.loads(state_path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return {} + return loaded if isinstance(loaded, Mapping) else {} + + +@beartype +def _write_state( + *, state_path: Path, repo_full_name: str, fingerprint: str, issue_count: int, generated_at: str +) -> None: + """Persist machine-readable sync state.""" + state_path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "repo": repo_full_name, + "fingerprint": fingerprint, + "issue_count": issue_count, + "generated_at": generated_at, + } + state_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +@beartype +@require(_has_non_blank_value, "repo_owner must not be blank") +@require(_has_non_blank_value, "repo_name must not be blank") +def sync_cache( + *, + repo_owner: str, + repo_name: str, + output_path: Path, + state_path: Path, + force: bool = False, +) -> SyncResult: + """Sync the local hierarchy cache from GitHub.""" + fingerprint_issues = fetch_hierarchy_issues( + repo_owner=repo_owner, + repo_name=repo_name, + fingerprint_only=True, + ) + fingerprint = compute_hierarchy_fingerprint(fingerprint_issues) + state = _load_state(state_path) + + if not force and state.get("fingerprint") == fingerprint and output_path.exists(): + return SyncResult( + changed=False, + issue_count=len(fingerprint_issues), + fingerprint=fingerprint, + output_path=output_path, + ) + + detailed_issues = fetch_hierarchy_issues( + repo_owner=repo_owner, + repo_name=repo_name, + fingerprint_only=False, + ) + generated_at = datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text( + render_cache_markdown( + repo_full_name=f"{repo_owner}/{repo_name}", + issues=detailed_issues, + generated_at=generated_at, + fingerprint=fingerprint, + ), + encoding="utf-8", + ) + _write_state( + state_path=state_path, + repo_full_name=f"{repo_owner}/{repo_name}", + fingerprint=fingerprint, + issue_count=len(detailed_issues), + generated_at=generated_at, + ) + return SyncResult( + changed=True, + issue_count=len(detailed_issues), + fingerprint=fingerprint, + output_path=output_path, + ) + + +@beartype +def _build_parser() -> argparse.ArgumentParser: + """Create CLI argument parser.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo-owner", default=DEFAULT_REPO_OWNER, help="GitHub repo owner") + parser.add_argument("--repo-name", default=DEFAULT_REPO_NAME, help="GitHub repo name") + parser.add_argument("--output", default=str(DEFAULT_OUTPUT_PATH), help="Markdown cache output path") + parser.add_argument("--state-file", default=str(DEFAULT_STATE_PATH), help="Fingerprint state file path") + parser.add_argument("--force", action="store_true", help="Rewrite cache even when fingerprint is unchanged") + return parser + + +@beartype +@ensure(lambda result: result >= 0, "exit code must be non-negative") +def main(argv: list[str] | None = None) -> int: + """Run the hierarchy cache sync.""" + parser = _build_parser() + args = parser.parse_args(argv) + result = sync_cache( + repo_owner=args.repo_owner, + repo_name=args.repo_name, + output_path=Path(args.output), + state_path=Path(args.state_file), + force=bool(args.force), + ) + if result.changed: + sys.stdout.write(f"Updated GitHub hierarchy cache with {result.issue_count} issues at {result.output_path}\n") + else: + sys.stdout.write(f"GitHub hierarchy cache unchanged ({result.issue_count} issues).\n") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/unit/scripts/test_sync_github_hierarchy_cache.py b/tests/unit/scripts/test_sync_github_hierarchy_cache.py new file mode 100644 index 00000000..9c940856 --- /dev/null +++ b/tests/unit/scripts/test_sync_github_hierarchy_cache.py @@ -0,0 +1,215 @@ +"""Tests for scripts/sync_github_hierarchy_cache.py.""" + +from __future__ import annotations + +import importlib.util +import sys +from functools import lru_cache +from pathlib import Path +from typing import Any, TypedDict + + +class IssueOptions(TypedDict, total=False): + """Optional test issue fields.""" + + labels: list[str] + summary: str + parent: tuple[int, str] + children: list[tuple[int, str]] + updated_at: str + + +@lru_cache(maxsize=1) +def _load_script_module() -> Any: + """Load scripts/sync_github_hierarchy_cache.py as a Python module.""" + script_path = Path(__file__).resolve().parents[3] / "scripts" / "sync_github_hierarchy_cache.py" + spec = importlib.util.spec_from_file_location("sync_github_hierarchy_cache", script_path) + if spec is None or spec.loader is None: + raise AssertionError(f"Unable to load script module at {script_path}") + sys.modules.pop(spec.name, None) + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +def _make_issue( + module: Any, + *, + number: int, + title: str, + issue_type: str, + options: IssueOptions | None = None, +) -> Any: + """Create a HierarchyIssue instance for tests.""" + issue_options = options or {} + children = issue_options.get("children", []) + child_links = [ + module.IssueLink(number=child_number, title=child_title, url=f"https://example.test/issues/{child_number}") + for child_number, child_title in children + ] + + parent_link = None + parent = issue_options.get("parent") + if parent is not None: + parent_number, parent_title = parent + parent_link = module.IssueLink( + number=parent_number, + title=parent_title, + url=f"https://example.test/issues/{parent_number}", + ) + + return module.HierarchyIssue( + number=number, + title=title, + url=f"https://example.test/issues/{number}", + issue_type=issue_type, + labels=issue_options.get("labels", []), + summary=issue_options.get("summary", ""), + updated_at=issue_options.get("updated_at", "2026-04-09T08:00:00Z"), + parent=parent_link, + children=child_links, + ) + + +def test_compute_hierarchy_fingerprint_is_order_independent() -> None: + """Fingerprinting should stay stable regardless of input ordering.""" + module = _load_script_module() + + epic = _make_issue( + module, + number=485, + title="[Epic] Governance", + issue_type="Epic", + options={ + "labels": ["openspec", "Epic"], + "summary": "Governance epic.", + "children": [(486, "[Feature] Alignment")], + }, + ) + feature = _make_issue( + module, + number=486, + title="[Feature] Alignment", + issue_type="Feature", + options={ + "labels": ["Feature", "openspec"], + "summary": "Alignment feature.", + "parent": (485, "[Epic] Governance"), + }, + ) + + first = module.compute_hierarchy_fingerprint([epic, feature]) + second = module.compute_hierarchy_fingerprint([feature, epic]) + + assert first == second + + +def test_extract_summary_skips_heading_only_lines() -> None: + """Summary extraction should skip markdown section headers.""" + module = _load_script_module() + extract_summary = module._extract_summary # pylint: disable=protected-access + + summary = extract_summary("## Why\n\nThis cache avoids repeated GitHub lookups.") + + assert summary == "This cache avoids repeated GitHub lookups." + + +def test_default_paths_use_ephemeral_specfact_backlog_cache() -> None: + """Default cache files should live in ignored .specfact/backlog storage.""" + module = _load_script_module() + + assert str(module.DEFAULT_OUTPUT_PATH) == ".specfact/backlog/github_hierarchy_cache.md" + assert str(module.DEFAULT_STATE_PATH) == ".specfact/backlog/github_hierarchy_cache_state.json" + + +def test_render_cache_markdown_groups_epics_and_features() -> None: + """Rendered markdown should be deterministic and grouped by issue type.""" + module = _load_script_module() + + issues = [ + _make_issue( + module, + number=486, + title="[Feature] Alignment", + issue_type="Feature", + options={ + "labels": ["openspec", "Feature"], + "summary": "Alignment feature.", + "parent": (485, "[Epic] Governance"), + }, + ), + _make_issue( + module, + number=485, + title="[Epic] Governance", + issue_type="Epic", + options={ + "labels": ["Epic", "openspec"], + "summary": "Governance epic.", + "children": [(486, "[Feature] Alignment")], + }, + ), + ] + + rendered = module.render_cache_markdown( + repo_full_name="nold-ai/specfact-cli", + issues=issues, + generated_at="2026-04-09T08:30:00Z", + fingerprint="abc123", + ) + + assert "# GitHub Hierarchy Cache" in rendered + assert "## Epics" in rendered + assert "## Features" in rendered + assert rendered.index("### #485") < rendered.index("### #486") + assert "- Parent: none" in rendered + assert "- Parent: #485 [Epic] Governance" in rendered + assert "- Labels: Epic, openspec" in rendered + assert "- Labels: Feature, openspec" in rendered + + +def test_sync_cache_skips_write_when_fingerprint_is_unchanged(monkeypatch: Any, tmp_path: Path) -> None: + """sync_cache should not rewrite output when the fingerprint matches state.""" + module = _load_script_module() + + output_path = tmp_path / "GITHUB_HIERARCHY_CACHE.md" + state_path = tmp_path / ".github_hierarchy_cache_state.json" + output_path.write_text("unchanged cache\n", encoding="utf-8") + state_path.write_text('{"fingerprint":"same"}', encoding="utf-8") + + issues = [ + _make_issue( + module, + number=485, + title="[Epic] Governance", + issue_type="Epic", + options={ + "labels": ["Epic"], + "summary": "Governance epic.", + }, + ) + ] + + def _fake_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> list[Any]: + assert repo_owner == "nold-ai" + assert repo_name == "specfact-cli" + assert fingerprint_only is True + return issues + + def _same_fingerprint(_: list[Any]) -> str: + return "same" + + monkeypatch.setattr(module, "fetch_hierarchy_issues", _fake_fetch) + monkeypatch.setattr(module, "compute_hierarchy_fingerprint", _same_fingerprint) + + result = module.sync_cache( + repo_owner="nold-ai", + repo_name="specfact-cli", + output_path=output_path, + state_path=state_path, + ) + + assert result.changed is False + assert result.issue_count == 1 + assert output_path.read_text(encoding="utf-8") == "unchanged cache\n" From 08922ac98356696f94700731051512d728089796 Mon Sep 17 00:00:00 2001 From: Dominikus Nold Date: Thu, 9 Apr 2026 22:46:50 +0200 Subject: [PATCH 2/4] Backport improvements from modules scripts --- .../.openspec.yaml | 2 + .../CHANGE_VALIDATION.md | 12 + .../design.md | 117 ++++++++++ .../proposal.md | 42 ++++ .../init-ide-prompt-source-selection/spec.md | 23 ++ .../specs/module-owned-ide-prompts/spec.md | 14 ++ .../project-artifact-write-safety/spec.md | 40 ++++ .../tasks.md | 28 +++ scripts/sync_github_hierarchy_cache.py | 213 ++++++++++++------ .../test_sync_github_hierarchy_cache.py | 155 ++++++++++++- 10 files changed, 571 insertions(+), 75 deletions(-) create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/.openspec.yaml create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/CHANGE_VALIDATION.md create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/design.md create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/proposal.md create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/specs/init-ide-prompt-source-selection/spec.md create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/specs/module-owned-ide-prompts/spec.md create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/specs/project-artifact-write-safety/spec.md create mode 100644 openspec/changes/profile-04-safe-project-artifact-writes/tasks.md diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/.openspec.yaml b/openspec/changes/profile-04-safe-project-artifact-writes/.openspec.yaml new file mode 100644 index 00000000..98d7681c --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-09 diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/CHANGE_VALIDATION.md b/openspec/changes/profile-04-safe-project-artifact-writes/CHANGE_VALIDATION.md new file mode 100644 index 00000000..5d09135c --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/CHANGE_VALIDATION.md @@ -0,0 +1,12 @@ +# CHANGE VALIDATION + +- **Change**: `profile-04-safe-project-artifact-writes` +- **Date**: 2026-04-09 +- **Method**: `openspec validate profile-04-safe-project-artifact-writes --strict` +- **Result**: PASS + +## Notes + +- Proposal, design, specs, and tasks are present and parse successfully. +- The change is intentionally scoped as the core policy/contract side of a paired cross-repo safety effort. +- GitHub tracking is synced to issue [#490](https://github.com/nold-ai/specfact-cli/issues/490) under parent feature [#365](https://github.com/nold-ai/specfact-cli/issues/365), with bug linkage to [#487](https://github.com/nold-ai/specfact-cli/issues/487). diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/design.md b/openspec/changes/profile-04-safe-project-artifact-writes/design.md new file mode 100644 index 00000000..779e87a4 --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/design.md @@ -0,0 +1,117 @@ +## Context + +Core init/setup flows currently decide write behavior locally inside command helpers such as `ide_setup.py` and `modules/init/src/commands.py`. That makes file mutation semantics inconsistent: some paths skip existing files, some overwrite, and some merge only part of the payload without an explicit ownership model. Issue `#487` exposed the most visible failure mode: `.vscode/settings.json` is a user-owned config file that SpecFact touches for one narrow purpose, but the current workflow can still wipe unrelated settings if the write path degrades from merge to replacement. + +This is a cross-cutting change because the same trust boundary exists for any local artifact under a user repository. The design therefore needs a reusable policy, not a one-off patch in `create_vscode_settings()`. + +## Goals / Non-Goals + +**Goals:** +- Define a single core contract for user-project artifact writes. +- Separate artifact ownership from mutation mechanics so commands can declare what they own. +- Preserve unrelated user configuration by default for mergeable structured files. +- Make destructive replacement explicit, recoverable, and auditable. +- Add CI/static enforcement so future init/setup work cannot reintroduce raw overwrite behavior. + +**Non-Goals:** +- Rebuild every existing local-write path in one change across both repos. +- Introduce interactive patch review for every init command in this first slice. +- Support arbitrary semantic merges for all file formats; unsupported formats can fail-safe or use create-only/explicit-replace behavior. + +## Decisions + +### 1. Introduce a core `safe_project_write` layer with declared write modes + +Core will add a shared helper that accepts: +- target path +- artifact owner id +- write mode (`create_only`, `merge_structured`, `append_managed_block`, `explicit_replace`) +- managed keys or managed block selectors +- backup/recovery policy + +Rationale: +- Command code should describe intent, not implement bespoke overwrite logic. +- A central helper is the only realistic way to enforce policy in CI. + +Alternatives considered: +- Fix only `.vscode/settings.json` merge logic. Rejected because the same failure pattern would persist elsewhere. +- Rely on `--force` flags alone. Rejected because the unsafe default remains. + +### 2. Treat project artifacts as partially owned unless SpecFact is authoritative for the full file + +The helper will require ownership classification: +- full-file ownership: SpecFact may replace with backup/explicit confirmation semantics +- partial ownership: SpecFact may modify only declared keys/sections/blocks +- unowned: command must fail unless it is create-only + +For `.vscode/settings.json`, SpecFact owns only its prompt recommendation entries, not the document. + +Rationale: +- Ownership is the boundary between safe reconciliation and unacceptable overwrite. +- This generalizes to YAML/JSON/TOML configs and managed markdown blocks. + +Alternatives considered: +- Infer ownership heuristically from file path. Rejected because path-based assumptions are fragile and opaque. + +### 3. Structured-file reconciliation will preserve unrelated user data and only rewrite managed sections + +For JSON settings files, the merge logic will: +- parse existing content +- preserve all non-managed keys +- remove/refresh only prior SpecFact-managed entries +- write back normalized JSON + +If parsing fails, the default behavior will be fail-safe with guidance, not empty-file replacement. Explicit replace may still exist behind force-style intent plus backup. + +Rationale: +- The bug exists because full-document replacement was allowed for a partial-ownership file. + +Alternatives considered: +- Best-effort fallback to `{}` on parse error. Rejected because that recreates silent data loss. + +### 4. Backups and recovery metadata are mandatory for lossy operations + +Any `explicit_replace` or fallback-recovery path will create a timestamped backup under a SpecFact-managed recovery location and emit actionable output naming: +- original path +- backup path +- reason replacement was required + +Rationale: +- Even explicit destructive actions need a reversible path. + +### 5. Add a CI gate for unsafe user-project writes + +The repo will add a gate with two signals: +- static scan/rule: block direct writes to likely user-project artifacts from init/setup flows unless routed through the safe-write helper +- regression tests: fixture repositories with existing user config verifying no unrelated keys are lost + +Rationale: +- Policy without enforcement will drift. +- Fixture tests catch behavior regressions the static rule cannot prove. + +Alternatives considered: +- Tests only. Rejected because new raw-write code paths could land without touching existing fixtures. +- Static rule only. Rejected because safe helper misuse still needs behavioral coverage. + +## Risks / Trade-offs + +- `[Risk]` Initial scope may not cover every write path in one pass. → Mitigation: enforce the policy first for core init/setup and pair it with a modules-runtime adoption change. +- `[Risk]` Structured merge rules may become format-specific and verbose. → Mitigation: support only a narrow set of sanctioned merge strategies and fail-safe otherwise. +- `[Risk]` Static detection may produce false positives on safe writes outside init/setup. → Mitigation: scope the first gate to user-project artifact paths and allow helper-based exemptions only. +- `[Risk]` Backup files can clutter repos if stored locally. → Mitigation: store recovery artifacts in a dedicated SpecFact-managed location outside normal source files and document cleanup. + +## Migration Plan + +1. Add the core safe-write abstraction and ownership model. +2. Move `init ide` settings mutation onto the helper and cover the `#487` regression with fixtures. +3. Route other core init/setup artifact writes through the helper where applicable. +4. Add the CI/static gate and regression fixtures. +5. Land the paired modules-runtime adoption change so bundle commands use the same contract. + +Rollback strategy: +- If helper rollout causes unexpected breakage, commands can temporarily fail-safe (skip with warning) rather than performing legacy overwrite behavior. + +## Open Questions + +- Which existing user-project artifact paths should be in the first “protected path” CI rule set beyond `.vscode/settings.json`? +- Should explicit destructive replacement remain non-interactive in CI only via `--force`, or require an additional machine-readable confirmation flag? diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/proposal.md b/openspec/changes/profile-04-safe-project-artifact-writes/proposal.md new file mode 100644 index 00000000..1537d233 --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/proposal.md @@ -0,0 +1,42 @@ +# Change: Safe Project Artifact Writes For Init And IDE Setup + +## Why + +`specfact init` and `specfact init ide` currently mutate user-owned project artifacts such as `.vscode/settings.json` without a first-class safety contract. Issue [#487](https://github.com/nold-ai/specfact-cli/issues/487) showed that a single setup run can destroy unrelated local configuration, forcing manual git restore and hand repair; that failure mode is unacceptable for any tool that writes into customer repositories. + +## What Changes + +- **NEW**: Introduce a core safe-write policy for project artifacts that classifies targets as create-only, mergeable, append-only, or replace-only-with-explicit-approval. +- **NEW**: Add a structured write planning flow for init/setup commands that records whether an operation will create, merge, skip, back up, or fail before touching an existing user file. +- **NEW**: Require backup and recovery metadata for destructive or lossy local mutations initiated by core setup flows. +- **NEW**: Add conflict handling rules for structured files such as `.vscode/settings.json` so SpecFact-managed keys are merged into existing documents instead of replacing the whole artifact. +- **EXTEND**: `specfact init ide` to preserve non-SpecFact settings, strip only prior SpecFact-managed prompt recommendations when needed, and fail safely on malformed settings files unless the user explicitly chooses a replacement path. +- **EXTEND**: `specfact init` and related bootstrap helpers to route project-file writes through the same safe-write contract instead of ad hoc `write_text` or overwrite behavior. +- **EXTEND**: Documentation for init/setup commands with explicit guarantees about preservation, backup behavior, and how users can preview or force replacements when necessary. + +## Capabilities + +### New Capabilities +- `project-artifact-write-safety`: Policy, planning, and recovery rules for any core command that creates or mutates user-project artifacts. + +### Modified Capabilities +- `init-ide-prompt-source-selection`: `specfact init ide` must reconcile prompt recommendations with existing IDE settings without deleting unrelated user configuration. +- `module-owned-ide-prompts`: Core setup flows that materialize bundle-owned IDE assets must use the safe-write policy when touching user-project files. + +## Impact + +- Affected code: `src/specfact_cli/utils/ide_setup.py`, `src/specfact_cli/modules/init/src/commands.py`, and any shared core helpers introduced for safe project-file mutations. +- Affected docs: `README.md`, `docs/getting-started/installation.md`, `docs/getting-started/quickstart.md`, and core CLI/init reference pages. +- Integration points: installed bundle prompt exports from `specfact-cli-modules`; paired runtime adoption change required in `nold-ai/specfact-cli-modules` so bundle commands follow the same guarantees. +- Dependencies: linked bug [#487](https://github.com/nold-ai/specfact-cli/issues/487); sync under parent feature [#365](https://github.com/nold-ai/specfact-cli/issues/365) Configuration Profiles. + +## Source Tracking + +- **GitHub Issue**: #490 +- **Issue URL**: https://github.com/nold-ai/specfact-cli/issues/490 +- **Repository**: nold-ai/specfact-cli +- **Last Synced Status**: open +- **Parent Feature**: #365 +- **Parent Feature URL**: https://github.com/nold-ai/specfact-cli/issues/365 +- **Related Bug**: #487 +- **Related Bug URL**: https://github.com/nold-ai/specfact-cli/issues/487 diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/specs/init-ide-prompt-source-selection/spec.md b/openspec/changes/profile-04-safe-project-artifact-writes/specs/init-ide-prompt-source-selection/spec.md new file mode 100644 index 00000000..349bf4c2 --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/specs/init-ide-prompt-source-selection/spec.md @@ -0,0 +1,23 @@ +## ADDED Requirements + +### Requirement: Init IDE SHALL preserve unrelated VS Code settings +`specfact init ide` SHALL reconcile prompt recommendations into `.vscode/settings.json` without deleting unrelated user-managed settings. + +#### Scenario: Existing non-SpecFact settings survive prompt export +- **WHEN** a repository already contains `.vscode/settings.json` with Python, test, or formatter settings +- **AND** the user runs `specfact init ide` +- **THEN** the command SHALL preserve those unrelated settings +- **AND** SHALL update only SpecFact-managed prompt recommendation entries + +#### Scenario: Selective prompt export removes only prior SpecFact-managed recommendations +- **WHEN** the user runs `specfact init ide --prompts ` +- **THEN** prior SpecFact-managed prompt recommendations outside the selected subset MAY be removed +- **AND** unrelated `.github/prompts/` recommendations and non-SpecFact settings SHALL remain unchanged + +### Requirement: Init IDE SHALL fail safe on malformed settings documents +`specfact init ide` SHALL not replace malformed or unparsable VS Code settings with an empty or generated document by default. + +#### Scenario: Malformed settings file blocks destructive rewrite +- **WHEN** `.vscode/settings.json` exists but cannot be parsed as JSON +- **THEN** `specfact init ide` SHALL stop with an actionable error +- **AND** SHALL leave the existing file unchanged unless explicit replacement is requested through the safe-write policy diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/specs/module-owned-ide-prompts/spec.md b/openspec/changes/profile-04-safe-project-artifact-writes/specs/module-owned-ide-prompts/spec.md new file mode 100644 index 00000000..09500a0f --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/specs/module-owned-ide-prompts/spec.md @@ -0,0 +1,14 @@ +## ADDED Requirements + +### Requirement: Core materialization of module-owned IDE assets SHALL use safe project writes +When core setup flows materialize module-owned IDE assets into a user repository, they SHALL route all local file mutations through the core safe-write policy. + +#### Scenario: Module-owned prompt export uses safe-write helper for settings mutation +- **WHEN** `specfact init ide` exports bundle-owned prompt files and updates a related IDE config artifact +- **THEN** the config mutation SHALL use the safe-write helper with declared ownership metadata +- **AND** the command SHALL preserve unrelated user-managed content in the target artifact + +#### Scenario: Module-owned template copy does not silently replace existing user customization +- **WHEN** a core setup flow copies a module-owned template asset into a target path that already exists in the user repository +- **THEN** the flow SHALL skip, merge, or require explicit replacement according to the declared safe-write mode +- **AND** SHALL NOT silently overwrite the existing file diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/specs/project-artifact-write-safety/spec.md b/openspec/changes/profile-04-safe-project-artifact-writes/specs/project-artifact-write-safety/spec.md new file mode 100644 index 00000000..c975cf48 --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/specs/project-artifact-write-safety/spec.md @@ -0,0 +1,40 @@ +## ADDED Requirements + +### Requirement: Core commands SHALL classify project artifact writes by ownership and mutation mode +The system SHALL require core init/setup flows to declare whether a target artifact is create-only, mergeable, append-managed, or explicit-replace before writing into a user repository. + +#### Scenario: Partial-ownership artifact cannot use implicit full replacement +- **WHEN** a core command targets a user-project artifact that SpecFact owns only partially +- **THEN** the command SHALL use a partial-ownership write mode such as structured merge or managed-block append +- **AND** SHALL NOT replace the full file implicitly + +#### Scenario: Unowned existing artifact fails safe +- **WHEN** a core command would modify an existing artifact with no declared SpecFact-owned section or full-file ownership +- **THEN** the command SHALL stop with an actionable conflict message +- **AND** SHALL NOT mutate the artifact unless an explicit replacement mode is requested + +### Requirement: Lossy project artifact mutations SHALL create recovery material +The system SHALL create backup and recovery metadata for any lossy local artifact mutation initiated by a core command. + +#### Scenario: Explicit replacement emits backup path +- **WHEN** a core command performs an explicit replace of an existing project artifact +- **THEN** a backup copy SHALL be created in a SpecFact-managed recovery location before replacement +- **AND** the command output SHALL identify the backup path and original target + +#### Scenario: Failed structured merge leaves original file untouched +- **WHEN** structured reconciliation cannot be completed safely +- **THEN** the original project artifact SHALL remain unchanged +- **AND** the command SHALL report why reconciliation failed and how to proceed safely + +### Requirement: CI SHALL detect unsafe core writes to user-project artifacts +The repository SHALL enforce a CI or quality gate that flags unsafe write paths for user-project artifacts touched by core init/setup flows. + +#### Scenario: Raw overwrite path is rejected in CI +- **WHEN** a core init/setup code path writes a protected user-project artifact without using the sanctioned safe-write helper +- **THEN** the quality gate SHALL fail +- **AND** the failure output SHALL identify the offending path or call site + +#### Scenario: Regression fixture preserves unrelated user configuration +- **WHEN** CI runs regression fixtures for existing user-owned project configs +- **THEN** init/setup commands SHALL preserve unrelated user-managed content +- **AND** only declared SpecFact-managed sections or keys may change diff --git a/openspec/changes/profile-04-safe-project-artifact-writes/tasks.md b/openspec/changes/profile-04-safe-project-artifact-writes/tasks.md new file mode 100644 index 00000000..ad3a54e3 --- /dev/null +++ b/openspec/changes/profile-04-safe-project-artifact-writes/tasks.md @@ -0,0 +1,28 @@ +## 1. Branch, coordination, and issue sync + +- [ ] 1.1 Create `bugfix/profile-04-safe-project-artifact-writes` in a dedicated worktree from `origin/dev` and bootstrap Hatch in that worktree. +- [ ] 1.2 Sync the change proposal to GitHub under parent feature `#365`, link bug `#487`, and update `proposal.md` Source Tracking with issue metadata. +- [ ] 1.3 Confirm the paired modules-side change `project-runtime-01-safe-artifact-write-policy` is available and note the dependency in both PR descriptions/change evidence. + +## 2. Specs, regression fixtures, and failing evidence + +- [ ] 2.1 Add or update regression fixtures for existing user-owned project artifacts such as `.vscode/settings.json` with unrelated custom settings. +- [ ] 2.2 Write tests from the new scenarios covering partial ownership, malformed settings fail-safe behavior, backup creation, and preservation of unrelated settings. +- [ ] 2.3 Write tests for the CI/static unsafe-write gate so direct writes to protected project artifacts are rejected unless routed through the sanctioned helper. +- [ ] 2.4 Run the targeted tests before implementation, capture the failing results, and record commands/timestamps in `openspec/changes/profile-04-safe-project-artifact-writes/TDD_EVIDENCE.md`. + +## 3. Core safe-write implementation + +- [ ] 3.1 Implement the core safe-write helper and ownership model with `@beartype` and `@icontract` on public APIs. +- [ ] 3.2 Route `src/specfact_cli/utils/ide_setup.py` settings mutation through the helper so `.vscode/settings.json` preserves unrelated user-managed settings and strips only SpecFact-managed entries when needed. +- [ ] 3.3 Route applicable init/setup artifact copy flows through the helper or explicit safe modes, including fail-safe handling for malformed structured files and backup creation for explicit replacement. +- [ ] 3.4 Implement the CI/static guard for protected user-project artifacts in init/setup code paths and integrate it into the relevant local/CI quality workflow. + +## 4. Verification, docs, and cross-repo handoff + +- [ ] 4.1 Re-run the targeted tests and any broader init/setup regression coverage, capture passing results, and update `TDD_EVIDENCE.md`. +- [ ] 4.2 Research and update affected docs (`README.md`, installation/quickstart/init references) to document preservation guarantees, backup behavior, and explicit replacement semantics. +- [ ] 4.3 Run quality gates: `hatch run format`, `hatch run type-check`, `hatch run lint`, `hatch run yaml-lint`, `hatch run contract-test`, and `hatch run smart-test`. +- [ ] 4.4 Run `hatch run ./scripts/verify-modules-signature.py --require-signature`; if any bundled module manifests changed, bump versions, re-sign as required, and re-run verification. +- [ ] 4.5 Ensure `.specfact/code-review.json` is fresh, remediate all findings, and record the final review command/timestamp in `TDD_EVIDENCE.md` or PR notes. +- [ ] 4.6 Apply the appropriate version/changelog update for a bugfix release if implementation changes user-facing behavior, then open a PR to `dev` referencing the paired modules change. diff --git a/scripts/sync_github_hierarchy_cache.py b/scripts/sync_github_hierarchy_cache.py index fe589769..75b81f14 100644 --- a/scripts/sync_github_hierarchy_cache.py +++ b/scripts/sync_github_hierarchy_cache.py @@ -19,52 +19,81 @@ DEFAULT_REPO_OWNER = "nold-ai" -DEFAULT_REPO_NAME = Path(__file__).resolve().parents[1].name +_SCRIPT_DIR = Path(__file__).resolve().parent + + +@beartype +def parse_repo_name_from_remote_url(url: str) -> str | None: + """Return the repository name segment from a Git remote URL, if parseable.""" + stripped = url.strip() + if not stripped: + return None + if stripped.startswith("git@"): + _, _, rest = stripped.partition(":") + path = rest + elif "://" in stripped: + host_and_path = stripped.split("://", 1)[1] + if "/" not in host_and_path: + return None + path = host_and_path.split("/", 1)[1] + else: + path = stripped + path = path.rstrip("/") + if path.endswith(".git"): + path = path[:-4] + segments = [segment for segment in path.split("/") if segment] + if not segments: + return None + return segments[-1] + + +@beartype +def _default_repo_name_from_git(script_dir: Path) -> str | None: + """Resolve the GitHub repository name from ``origin`` (works in worktrees).""" + completed = subprocess.run( + ["git", "-C", str(script_dir), "config", "--get", "remote.origin.url"], + check=False, + capture_output=True, + text=True, + ) + if completed.returncode != 0: + return None + return parse_repo_name_from_remote_url(completed.stdout) + + +_DEFAULT_REPO_NAME_FALLBACK = Path(__file__).resolve().parents[1].name +DEFAULT_REPO_NAME = _default_repo_name_from_git(_SCRIPT_DIR) or _DEFAULT_REPO_NAME_FALLBACK DEFAULT_OUTPUT_PATH = Path(".specfact") / "backlog" / "github_hierarchy_cache.md" DEFAULT_STATE_PATH = Path(".specfact") / "backlog" / "github_hierarchy_cache_state.json" SUPPORTED_ISSUE_TYPES = frozenset({"Epic", "Feature"}) +SUPPORTED_ISSUE_TYPES_ORDER: tuple[str, ...] = ("Epic", "Feature") _SUMMARY_SKIP_LINES = {"why", "scope", "summary", "changes", "capabilities", "impact"} +_GH_GRAPHQL_TIMEOUT_SEC = 120 -_FINGERPRINT_QUERY = """ -query($owner: String!, $name: String!, $after: String) { - repository(owner: $owner, name: $name) { - issues(first: 100, after: $after, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { - pageInfo { hasNextPage endCursor } - nodes { - number - title - url - updatedAt - issueType { name } - labels(first: 100) { nodes { name } } - parent { number title url } - subIssues(first: 100) { nodes { number title url } } - } - } - } -} -""" - -_DETAIL_QUERY = """ -query($owner: String!, $name: String!, $after: String) { - repository(owner: $owner, name: $name) { - issues(first: 100, after: $after, states: [OPEN, CLOSED], orderBy: {field: CREATED_AT, direction: ASC}) { - pageInfo { hasNextPage endCursor } - nodes { + +@beartype +def _build_hierarchy_issues_query(*, include_body: bool) -> str: + """Return the shared GitHub GraphQL query, optionally including issue body text.""" + body_field = " bodyText\n" if include_body else "" + return f""" +query($owner: String!, $name: String!, $after: String) {{ + repository(owner: $owner, name: $name) {{ + issues(first: 100, after: $after, states: [OPEN, CLOSED], orderBy: {{field: CREATED_AT, direction: ASC}}) {{ + pageInfo {{ hasNextPage endCursor }} + nodes {{ number title url updatedAt - bodyText - issueType { name } - labels(first: 100) { nodes { name } } - parent { number title url } - subIssues(first: 100) { nodes { number title url } } - } - } - } -} -""" +{body_field} issueType {{ name }} + labels(first: 100) {{ nodes {{ name }} }} + parent {{ number title url }} + subIssues(first: 100) {{ nodes {{ number title url }} }} + }} + }} + }} +}} +""".strip() @dataclass(frozen=True) @@ -212,7 +241,22 @@ def _run_graphql_query(query: str, *, repo_owner: str, repo_name: str, after: st if after is not None: command.extend(["-F", f"after={after}"]) - completed = subprocess.run(command, check=False, capture_output=True, text=True) + try: + completed = subprocess.run( + command, + check=False, + capture_output=True, + text=True, + timeout=_GH_GRAPHQL_TIMEOUT_SEC, + ) + except subprocess.TimeoutExpired as exc: + detail = f"GitHub GraphQL subprocess timed out after {_GH_GRAPHQL_TIMEOUT_SEC}s" + out = (exc.stdout or "").strip() + err = (exc.stderr or "").strip() + if out or err: + detail = f"{detail}; stdout={out!r}; stderr={err!r}" + raise RuntimeError(detail) from exc + if completed.returncode != 0: raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "GitHub GraphQL query failed") @@ -228,21 +272,6 @@ def _is_not_blank(value: str) -> bool: return bool(value.strip()) -@beartype -def _has_non_blank_value( - repo_owner: str | None = None, - repo_name: str | None = None, - repo_full_name: str | None = None, - generated_at: str | None = None, - fingerprint: str | None = None, -) -> bool: - """Return whether the provided predicate value is non-blank.""" - for candidate in (repo_owner, repo_name, repo_full_name, generated_at, fingerprint): - if candidate is not None: - return _is_not_blank(candidate) - return False - - @beartype def _all_supported_issue_types(result: list[HierarchyIssue]) -> bool: """Return whether every issue has a supported issue type.""" @@ -250,12 +279,22 @@ def _all_supported_issue_types(result: list[HierarchyIssue]) -> bool: @beartype -@require(_has_non_blank_value, "repo_owner must not be blank") -@require(_has_non_blank_value, "repo_name must not be blank") +def _require_repo_owner_for_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> bool: + return _is_not_blank(repo_owner) + + +@beartype +def _require_repo_name_for_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> bool: + return _is_not_blank(repo_name) + + +@beartype +@require(_require_repo_owner_for_fetch, "repo_owner must not be blank") +@require(_require_repo_name_for_fetch, "repo_name must not be blank") @ensure(_all_supported_issue_types, "Only Epic and Feature issues should be returned") def fetch_hierarchy_issues(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> list[HierarchyIssue]: """Fetch Epic and Feature issues from GitHub for the given repository.""" - query = _FINGERPRINT_QUERY if fingerprint_only else _DETAIL_QUERY + query = _build_hierarchy_issues_query(include_body=not fingerprint_only) issues: list[HierarchyIssue] = [] after: str | None = None @@ -305,7 +344,7 @@ def _group_issues_by_type(issues: list[HierarchyIssue]) -> dict[str, list[Hierar """Return issues grouped by supported type in deterministic order.""" return { issue_type: sorted((item for item in issues if item.issue_type == issue_type), key=lambda item: item.number) - for issue_type in SUPPORTED_ISSUE_TYPES + for issue_type in SUPPORTED_ISSUE_TYPES_ORDER } @@ -346,9 +385,30 @@ def _render_issue_section(*, title: str, issues: list[HierarchyIssue]) -> list[s @beartype -@require(_has_non_blank_value, "repo_full_name must not be blank") -@require(_has_non_blank_value, "generated_at must not be blank") -@require(_has_non_blank_value, "fingerprint must not be blank") +def _require_repo_full_name_for_render( + *, repo_full_name: str, issues: list[HierarchyIssue], generated_at: str, fingerprint: str +) -> bool: + return _is_not_blank(repo_full_name) + + +@beartype +def _require_generated_at_for_render( + *, repo_full_name: str, issues: list[HierarchyIssue], generated_at: str, fingerprint: str +) -> bool: + return _is_not_blank(generated_at) + + +@beartype +def _require_fingerprint_for_render( + *, repo_full_name: str, issues: list[HierarchyIssue], generated_at: str, fingerprint: str +) -> bool: + return _is_not_blank(fingerprint) + + +@beartype +@require(_require_repo_full_name_for_render, "repo_full_name must not be blank") +@require(_require_generated_at_for_render, "generated_at must not be blank") +@require(_require_fingerprint_for_render, "fingerprint must not be blank") def render_cache_markdown( *, repo_full_name: str, @@ -408,8 +468,22 @@ def _write_state( @beartype -@require(_has_non_blank_value, "repo_owner must not be blank") -@require(_has_non_blank_value, "repo_name must not be blank") +def _require_repo_owner_for_sync( + *, repo_owner: str, repo_name: str, output_path: Path, state_path: Path, force: bool = False +) -> bool: + return _is_not_blank(repo_owner) + + +@beartype +def _require_repo_name_for_sync( + *, repo_owner: str, repo_name: str, output_path: Path, state_path: Path, force: bool = False +) -> bool: + return _is_not_blank(repo_name) + + +@beartype +@require(_require_repo_owner_for_sync, "repo_owner must not be blank") +@require(_require_repo_name_for_sync, "repo_name must not be blank") def sync_cache( *, repo_owner: str, @@ -419,27 +493,22 @@ def sync_cache( force: bool = False, ) -> SyncResult: """Sync the local hierarchy cache from GitHub.""" - fingerprint_issues = fetch_hierarchy_issues( + state = _load_state(state_path) + detailed_issues = fetch_hierarchy_issues( repo_owner=repo_owner, repo_name=repo_name, - fingerprint_only=True, + fingerprint_only=False, ) - fingerprint = compute_hierarchy_fingerprint(fingerprint_issues) - state = _load_state(state_path) + fingerprint = compute_hierarchy_fingerprint(detailed_issues) if not force and state.get("fingerprint") == fingerprint and output_path.exists(): return SyncResult( changed=False, - issue_count=len(fingerprint_issues), + issue_count=len(detailed_issues), fingerprint=fingerprint, output_path=output_path, ) - detailed_issues = fetch_hierarchy_issues( - repo_owner=repo_owner, - repo_name=repo_name, - fingerprint_only=False, - ) generated_at = datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text( diff --git a/tests/unit/scripts/test_sync_github_hierarchy_cache.py b/tests/unit/scripts/test_sync_github_hierarchy_cache.py index 9c940856..b1f91933 100644 --- a/tests/unit/scripts/test_sync_github_hierarchy_cache.py +++ b/tests/unit/scripts/test_sync_github_hierarchy_cache.py @@ -3,11 +3,14 @@ from __future__ import annotations import importlib.util +import subprocess import sys from functools import lru_cache from pathlib import Path from typing import Any, TypedDict +import pytest + class IssueOptions(TypedDict, total=False): """Optional test issue fields.""" @@ -21,7 +24,7 @@ class IssueOptions(TypedDict, total=False): @lru_cache(maxsize=1) def _load_script_module() -> Any: - """Load scripts/sync_github_hierarchy_cache.py as a Python module.""" + """Load scripts/sync_github_hierarchy_cache.py as a Python module (cached for stable types).""" script_path = Path(__file__).resolve().parents[3] / "scripts" / "sync_github_hierarchy_cache.py" spec = importlib.util.spec_from_file_location("sync_github_hierarchy_cache", script_path) if spec is None or spec.loader is None: @@ -115,6 +118,44 @@ def test_extract_summary_skips_heading_only_lines() -> None: assert summary == "This cache avoids repeated GitHub lookups." +@pytest.mark.parametrize( + ("url", "expected"), + [ + ("https://github.com/nold-ai/specfact-cli.git", "specfact-cli"), + ("git@github.com:nold-ai/specfact-cli.git", "specfact-cli"), + ("https://github.com/org/my-repo/", "my-repo"), + ], +) +def test_parse_repo_name_from_remote_url(url: str, expected: str) -> None: + """Remote URL tail parsing should yield the GitHub repository name.""" + module = _load_script_module() + assert module.parse_repo_name_from_remote_url(url) == expected + + +def test_parse_repo_name_from_remote_url_empty_returns_none() -> None: + """Blank remote URLs should not produce a repository name.""" + module = _load_script_module() + assert module.parse_repo_name_from_remote_url("") is None + assert module.parse_repo_name_from_remote_url(" ") is None + + +def test_default_repo_name_matches_git_origin_url() -> None: + """When ``remote.origin.url`` exists, DEFAULT_REPO_NAME must match its repository segment (worktrees).""" + module = _load_script_module() + scripts_dir = Path(__file__).resolve().parents[3] / "scripts" + completed = subprocess.run( + ["git", "-C", str(scripts_dir), "config", "--get", "remote.origin.url"], + check=False, + capture_output=True, + text=True, + ) + if completed.returncode != 0 or not completed.stdout.strip(): + pytest.skip("No git origin in this environment") + expected = module.parse_repo_name_from_remote_url(completed.stdout) + assert expected is not None + assert expected == module.DEFAULT_REPO_NAME + + def test_default_paths_use_ephemeral_specfact_backlog_cache() -> None: """Default cache files should live in ignored .specfact/backlog storage.""" module = _load_script_module() @@ -169,7 +210,7 @@ def test_render_cache_markdown_groups_epics_and_features() -> None: assert "- Labels: Feature, openspec" in rendered -def test_sync_cache_skips_write_when_fingerprint_is_unchanged(monkeypatch: Any, tmp_path: Path) -> None: +def test_sync_cache_skips_write_when_fingerprint_is_unchanged(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: """sync_cache should not rewrite output when the fingerprint matches state.""" module = _load_script_module() @@ -194,7 +235,7 @@ def test_sync_cache_skips_write_when_fingerprint_is_unchanged(monkeypatch: Any, def _fake_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> list[Any]: assert repo_owner == "nold-ai" assert repo_name == "specfact-cli" - assert fingerprint_only is True + assert fingerprint_only is False return issues def _same_fingerprint(_: list[Any]) -> str: @@ -213,3 +254,111 @@ def _same_fingerprint(_: list[Any]) -> str: assert result.changed is False assert result.issue_count == 1 assert output_path.read_text(encoding="utf-8") == "unchanged cache\n" + + +def test_sync_cache_force_rewrites_when_fingerprint_unchanged(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + """sync_cache with force=True must rewrite output even when fingerprint matches state.""" + module = _load_script_module() + + output_path = tmp_path / "GITHUB_HIERARCHY_CACHE.md" + state_path = tmp_path / ".github_hierarchy_cache_state.json" + output_path.write_text("stale cache\n", encoding="utf-8") + state_path.write_text('{"fingerprint":"same"}', encoding="utf-8") + + issues = [ + _make_issue( + module, + number=485, + title="[Epic] Governance", + issue_type="Epic", + options={ + "labels": ["Epic"], + "summary": "Governance epic.", + }, + ) + ] + + def _fake_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> list[Any]: + assert repo_owner == "nold-ai" + assert repo_name == "specfact-cli" + assert fingerprint_only is False + return issues + + monkeypatch.setattr(module, "fetch_hierarchy_issues", _fake_fetch) + monkeypatch.setattr(module, "compute_hierarchy_fingerprint", lambda _: "same") + + result = module.sync_cache( + repo_owner="nold-ai", + repo_name="specfact-cli", + output_path=output_path, + state_path=state_path, + force=True, + ) + + assert result.changed is True + assert "# GitHub Hierarchy Cache" in output_path.read_text(encoding="utf-8") + + +def test_sync_cache_propagates_graphql_failure(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + """RuntimeError from GitHub GraphQL should surface to callers.""" + module = _load_script_module() + + def _boom(_query: str, *, repo_owner: str, repo_name: str, **_kwargs: Any) -> Any: + assert repo_owner == "nold-ai" + assert repo_name == "specfact-cli" + raise RuntimeError("graphql failed") + + monkeypatch.setattr(module, "_run_graphql_query", _boom) + + with pytest.raises(RuntimeError, match="graphql failed"): + module.sync_cache( + repo_owner="nold-ai", + repo_name="specfact-cli", + output_path=tmp_path / "out.md", + state_path=tmp_path / "state.json", + ) + + +def test_sync_cache_malformed_state_regenerates_cache(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + """Invalid state JSON is treated as missing state and triggers a full sync.""" + module = _load_script_module() + + output_path = tmp_path / "GITHUB_HIERARCHY_CACHE.md" + state_path = tmp_path / ".github_hierarchy_cache_state.json" + state_path.write_text("{not-json", encoding="utf-8") + + issues = [ + _make_issue( + module, + number=485, + title="[Epic] Governance", + issue_type="Epic", + options={ + "labels": ["Epic"], + "summary": "Governance epic.", + }, + ) + ] + + fetch_calls = 0 + + def _fake_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> list[Any]: + nonlocal fetch_calls + fetch_calls += 1 + assert repo_owner == "nold-ai" + assert repo_name == "specfact-cli" + assert fingerprint_only is False + return issues + + monkeypatch.setattr(module, "fetch_hierarchy_issues", _fake_fetch) + + result = module.sync_cache( + repo_owner="nold-ai", + repo_name="specfact-cli", + output_path=output_path, + state_path=state_path, + ) + + assert fetch_calls == 1 + assert result.changed is True + assert "# GitHub Hierarchy Cache" in output_path.read_text(encoding="utf-8") From 54b53a9a7dbcf03b5cde2837e20b43111aaa90e6 Mon Sep 17 00:00:00 2001 From: Dominikus Nold Date: Thu, 9 Apr 2026 23:11:24 +0200 Subject: [PATCH 3/4] Fix review findings --- .../TDD_EVIDENCE.md | 4 +- .../design.md | 14 ++++++- .../proposal.md | 13 ++++-- .../specs/github-hierarchy-cache/spec.md | 20 +++++---- openspec/config.yaml | 17 +++++--- scripts/sync_github_hierarchy_cache.py | 41 +++++++++++++++---- .../test_sync_github_hierarchy_cache.py | 32 +++++++++++++++ 7 files changed, 113 insertions(+), 28 deletions(-) diff --git a/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md b/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md index 487be9b6..6eb812c8 100644 --- a/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md +++ b/openspec/changes/governance-02-github-hierarchy-cache/TDD_EVIDENCE.md @@ -32,5 +32,5 @@ - Timestamp: `2026-04-09T22:04:18+02:00` - Ruff: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/ruff check scripts/sync_github_hierarchy_cache.py tests/unit/scripts/test_sync_github_hierarchy_cache.py` → PASS - basedpyright: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/basedpyright scripts/sync_github_hierarchy_cache.py tests/unit/scripts/test_sync_github_hierarchy_cache.py` → PASS (`0 errors, 0 warnings`) -- pytest: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/pytest tests/unit/scripts/test_sync_github_hierarchy_cache.py -q` → PASS (`5 passed`) -- SpecFact code review: `PATH=/home/dom/git/nold-ai/specfact-cli/.venv/bin:$PATH PYTHONPATH=/home/dom/git/nold-ai/specfact-cli-worktrees/feature/governance-02-github-hierarchy-cache/src /home/dom/git/nold-ai/specfact-cli/.venv/bin/specfact code review run --scope changed --include-tests --json --out .specfact/code-review.json` → PASS (`Review completed with no findings.`) +- pytest: `hatch test -- tests/unit/scripts/test_sync_github_hierarchy_cache.py -q` → PASS (`15 passed`, after parity with modules-side script/tests) +- SpecFact code review: `hatch run specfact code review run --json --out .specfact/code-review.json scripts/sync_github_hierarchy_cache.py tests/unit/scripts/test_sync_github_hierarchy_cache.py` → PASS (`overall_verdict` PASS, `ci_exit_code` 0; low-severity DRY hints on icontract preconditions documented in `proposal.md`) diff --git a/openspec/changes/governance-02-github-hierarchy-cache/design.md b/openspec/changes/governance-02-github-hierarchy-cache/design.md index c0653ca4..b490b9e5 100644 --- a/openspec/changes/governance-02-github-hierarchy-cache/design.md +++ b/openspec/changes/governance-02-github-hierarchy-cache/design.md @@ -1,4 +1,4 @@ -## Context +# Context `specfact-cli` already maintains GitHub planning hierarchy through issue labels, parent-child links, and `openspec/CHANGE_ORDER.md`, but contributors still discover that structure by hitting the GitHub API manually. The new requirement is to make hierarchy lookup deterministic, cheap, and local: a generated markdown file under ignored `.specfact/backlog/` becomes the first source for parent Feature and Epic resolution, and the sync command is rerun only when the hierarchy changed. @@ -7,12 +7,14 @@ This is a cross-cutting governance change because it affects GitHub automation, ## Goals / Non-Goals **Goals:** + - Generate a deterministic markdown cache of Epic and Feature issues for this repository. - Include enough metadata for issue-parenting work without another GitHub lookup: issue number, title, short summary, labels, parent/child relationships, and issue URLs. - Make the sync fast on no-op runs by using a small fingerprint/state check before regenerating markdown. - Update repo guidance so contributors use the cache first and only rerun sync when needed. **Non-Goals:** + - Replacing GitHub as the authoritative source of issue hierarchy. - Caching every issue type or full issue bodies. - Synchronizing User Story issues into the cache in this first version. @@ -21,33 +23,43 @@ This is a cross-cutting governance change because it affects GitHub automation, ## Decisions ### Use `gh api graphql` as the sole upstream source + The script will query GitHub through `gh api graphql` so it can access issue type, labels, relationships, and brief body content in one supported path. This avoids scraping markdown or depending on REST endpoints that do not expose hierarchy fields consistently. Alternative considered: + - `gh issue list/view` JSON loops: simpler, but requires many calls and awkward relationship reconstruction. ### Split the sync into a lightweight fingerprint pass and a full render pass + The script will first fetch only the Epic and Feature issue identity set plus timestamps/relationship fingerprints, hash that data, and compare it with a local state file. If the fingerprint matches, the script exits successfully without rewriting markdown. If it differs, the script performs a fuller metadata query and regenerates the cache. Alternative considered: + - Always regenerate markdown: deterministic but wastes GitHub calls and makes local workflows slower. ### Store human-readable cache plus machine-readable state under ignored `.specfact/backlog` + The canonical human-facing output will be `.specfact/backlog/github_hierarchy_cache.md`. A companion state file, `.specfact/backlog/github_hierarchy_cache_state.json`, will hold the last fingerprint and generator metadata. Both files stay local and ignored by Git so the cache can be recreated freely without creating repository drift. Alternative considered: + - State embedded in markdown comments: workable, but couples machine state to user-facing output and complicates deterministic rendering. ### Render by deterministic section and sort order + The markdown will use fixed sections for Epics and Features, with issues sorted stably by type, then issue number. Relationship lists and labels will also be sorted deterministically so reruns only change the file when source metadata actually changes. Alternative considered: + - Preserve GitHub API order: easier, but can drift between runs and create noisy diffs. ### Keep instruction updates in repo-local governance files + The change will update `openspec/config.yaml` and `AGENTS.md` in this repo so the workflow explicitly says: consult the cache first, regenerate it when fresh planning metadata is needed, and avoid ad hoc GitHub lookups unless the cache is stale or missing. Alternative considered: + - Document the behavior only in the script help text: insufficient because agents and OpenSpec flows read governance files first. ## Risks / Trade-offs diff --git a/openspec/changes/governance-02-github-hierarchy-cache/proposal.md b/openspec/changes/governance-02-github-hierarchy-cache/proposal.md index edd72b3c..6738003a 100644 --- a/openspec/changes/governance-02-github-hierarchy-cache/proposal.md +++ b/openspec/changes/governance-02-github-hierarchy-cache/proposal.md @@ -1,3 +1,5 @@ +# Governance: GitHub hierarchy cache (specfact-cli) + ## Why OpenSpec and agent workflows still have to query GitHub ad hoc to rediscover Epics, Features, and their parent links before creating or syncing change issues. That is slow, expensive, and error-prone, especially now that planning hierarchy matters in both `specfact-cli` and `specfact-cli-modules`. @@ -5,17 +7,18 @@ OpenSpec and agent workflows still have to query GitHub ad hoc to rediscover Epi ## What Changes - Add a deterministic repo-local hierarchy cache generator for GitHub Epic and Feature issues. -- Persist a central markdown inventory under `openspec/` with issue number, title, brief summary, labels, and hierarchy relationships. -- Add a lightweight fingerprint/state check so the sync exits quickly when Epic and Feature metadata has not changed. -- Update governance instructions in `openspec/config.yaml` and `AGENTS.md` to consult the cached hierarchy inventory first and rerun the sync script when fresh data is needed. +- Persist a repo-local markdown hierarchy cache at `.specfact/backlog/github_hierarchy_cache.md` (ignored; not committed) with issue number, title, brief summary, labels, and hierarchy relationships, plus a companion fingerprint/state file `.specfact/backlog/github_hierarchy_cache_state.json` so the sync can exit quickly when Epic and Feature metadata has not changed. +- Update governance instructions in `openspec/config.yaml` and `AGENTS.md` to consult the cached hierarchy first and rerun `python scripts/sync_github_hierarchy_cache.py` when fresh data is needed. - Cover the script with tests so cache output and no-change behavior remain stable. ## Capabilities ### New Capabilities + - `github-hierarchy-cache`: Deterministic synchronization of GitHub Epic and Feature hierarchy metadata into a repo-local OpenSpec markdown cache for low-cost parent and planning lookups. ### Modified Capabilities + - `agile-feature-hierarchy`: Local governance workflows must be able to resolve current Epic and Feature planning metadata from the repo-local cache before performing manual GitHub lookups. ## Impact @@ -29,3 +32,7 @@ OpenSpec and agent workflows still have to query GitHub ad hoc to rediscover Epi - GitHub Issue: [#491](https://github.com/nold-ai/specfact-cli/issues/491) - Parent Feature: [#486](https://github.com/nold-ai/specfact-cli/issues/486) - Related Modules Change: `specfact-cli-modules/governance-03-github-hierarchy-cache` + +## Code review note (SpecFact dogfood) + +Icontract `@require` preconditions on `fetch_hierarchy_issues`, `render_cache_markdown`, and `sync_cache` intentionally use small, similarly shaped predicates. The code-review module may emit low-severity DRY / duplicate-shape hints for those helpers; that is accepted here because collapsing them would break icontract’s per-parameter argument binding (e.g. `**kwargs` predicates are not supported the same way). diff --git a/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md b/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md index 01728b27..f81a5656 100644 --- a/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md +++ b/openspec/changes/governance-02-github-hierarchy-cache/specs/github-hierarchy-cache/spec.md @@ -1,28 +1,34 @@ -## ADDED Requirements +# ADDED Requirements + +## Requirement: Repository hierarchy cache sync -### Requirement: Repository hierarchy cache sync The repository SHALL provide a deterministic sync mechanism that retrieves GitHub Epic and Feature issues for the current repository and writes a local hierarchy cache under ignored `.specfact/backlog/`. -#### Scenario: Generate hierarchy cache from GitHub metadata +### Scenario: Generate hierarchy cache from GitHub metadata + - **WHEN** the user runs the hierarchy cache sync script for the repository - **THEN** the script retrieves GitHub issues whose Type is `Epic` or `Feature` - **AND** writes a markdown cache under ignored `.specfact/backlog/` with each issue's number, title, URL, short summary, labels, and hierarchy relationships - **AND** the output ordering is deterministic across repeated runs with unchanged source data -#### Scenario: Represent hierarchy relationships in cache output +### Scenario: Represent hierarchy relationships in cache output + - **WHEN** a synced Epic or Feature has parent or child hierarchy links - **THEN** the markdown cache includes those relationships in normalized form - **AND** missing relationships are rendered as explicit empty or none values rather than omitted ambiguously -#### Scenario: Fast exit on unchanged hierarchy state +### Scenario: Fast exit on unchanged hierarchy state + - **WHEN** the script detects that the current Epic and Feature hierarchy fingerprint matches the last synced fingerprint - **THEN** it exits successfully without regenerating the markdown cache - **AND** it reports that no hierarchy update was required -### Requirement: Repository governance must use cache-first hierarchy lookup +## Requirement: Repository governance must use cache-first hierarchy lookup + Repository governance instructions SHALL direct contributors and agents to consult the local hierarchy cache before performing manual GitHub lookups for Epic or Feature parenting. -#### Scenario: Cache-first governance guidance +### Scenario: Cache-first governance guidance + - **WHEN** a contributor reads `AGENTS.md` or `openspec/config.yaml` for GitHub issue setup guidance - **THEN** the instructions tell them to consult the local hierarchy cache first - **AND** the instructions define when the sync script must be rerun to refresh stale hierarchy metadata diff --git a/openspec/config.yaml b/openspec/config.yaml index c89e95fd..91ba81c9 100644 --- a/openspec/config.yaml +++ b/openspec/config.yaml @@ -88,9 +88,12 @@ rules: - **Repository**: /, - **Last Synced Status**: ). - After creation, update proposal.md Source Tracking section with issue number, URL, repository, and status. - - Resolve Parent Feature or Epic from `.specfact/backlog/github_hierarchy_cache.md` first; this - cache is ephemeral local state and MUST NOT be committed. If it is missing or stale, rerun - `python scripts/sync_github_hierarchy_cache.py` before manual GitHub lookup. + - >- + Resolve Parent Feature or Epic from `.specfact/backlog/github_hierarchy_cache.md` first (regenerate + via `python scripts/sync_github_hierarchy_cache.py` when missing or stale). The cache is ephemeral + local state and MUST NOT be committed. **Pending:** until backlog commands read this cache + automatically, treat this as contributor/agent workflow (docs + local script), not enforced CLI + behavior for every `specfact backlog` path. - Source tracking: Only track public repos (specfact-cli, platform-frontend). Skip for internal repos (specfact-cli-internal) specs: @@ -160,9 +163,11 @@ rules: - Place this task after quality gates and documentation, before PR creation. - Include git workflow tasks: branch creation (first task), PR creation (last task) - For public-facing changes in public repos (specfact-cli, platform-frontend): - - Before GitHub issue creation or parent linking, consult `.specfact/backlog/github_hierarchy_cache.md`; - rerun `python scripts/sync_github_hierarchy_cache.py` when the cache is missing or stale. - Treat this cache as ephemeral local state, not a committed OpenSpec artifact. + - >- + Before GitHub issue creation or parent linking, consult `.specfact/backlog/github_hierarchy_cache.md`; + rerun `python scripts/sync_github_hierarchy_cache.py` when the cache is missing or stale. Treat this + cache as ephemeral local state, not a committed OpenSpec artifact. **Pending:** wire cache-first + lookup into backlog add/sync codepaths when the governance hierarchy-cache work lands end-to-end. - Include GitHub issue creation task with format: - title `[Change] ` - labels `enhancement` and `change-proposal` diff --git a/scripts/sync_github_hierarchy_cache.py b/scripts/sync_github_hierarchy_cache.py index 75b81f14..5e4031d5 100644 --- a/scripts/sync_github_hierarchy_cache.py +++ b/scripts/sync_github_hierarchy_cache.py @@ -23,6 +23,10 @@ @beartype +@ensure( + lambda result: result is None or bool(str(result).strip()), + "parsed repository name must be non-blank when present", +) def parse_repo_name_from_remote_url(url: str) -> str | None: """Return the repository name segment from a Git remote URL, if parseable.""" stripped = url.strip() @@ -50,12 +54,15 @@ def parse_repo_name_from_remote_url(url: str) -> str | None: @beartype def _default_repo_name_from_git(script_dir: Path) -> str | None: """Resolve the GitHub repository name from ``origin`` (works in worktrees).""" - completed = subprocess.run( - ["git", "-C", str(script_dir), "config", "--get", "remote.origin.url"], - check=False, - capture_output=True, - text=True, - ) + try: + completed = subprocess.run( + ["git", "-C", str(script_dir), "config", "--get", "remote.origin.url"], + check=False, + capture_output=True, + text=True, + ) + except (FileNotFoundError, OSError): + return None if completed.returncode != 0: return None return parse_repo_name_from_remote_url(completed.stdout) @@ -88,7 +95,7 @@ def _build_hierarchy_issues_query(*, include_body: bool) -> str: {body_field} issueType {{ name }} labels(first: 100) {{ nodes {{ name }} }} parent {{ number title url }} - subIssues(first: 100) {{ nodes {{ number title url }} }} + subIssues(first: 100) {{ nodes {{ number title url issueType {{ name }} }} }} }} }} }} @@ -190,13 +197,22 @@ def _label_names(label_nodes: list[Mapping[str, Any]]) -> list[str]: return sorted(names, key=str.lower) +@beartype +def _subissue_type_name(item: Mapping[str, Any]) -> str | None: + """Return sub-issue type name when present.""" + issue_type_node = _mapping_value(item, "issueType") + if issue_type_node and issue_type_node.get("name"): + return str(issue_type_node["name"]) + return None + + @beartype def _child_links(subissue_nodes: list[Mapping[str, Any]]) -> list[IssueLink]: - """Extract sorted child issue links from GraphQL subissue nodes.""" + """Extract sorted child issue links from GraphQL subissue nodes (Epic/Feature only).""" children = [ IssueLink(number=int(item["number"]), title=str(item["title"]), url=str(item["url"])) for item in subissue_nodes - if item.get("number") is not None + if item.get("number") is not None and _subissue_type_name(item) in SUPPORTED_ISSUE_TYPES ] children.sort(key=lambda item: item.number) return children @@ -280,11 +296,13 @@ def _all_supported_issue_types(result: list[HierarchyIssue]) -> bool: @beartype def _require_repo_owner_for_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> bool: + _ = (repo_name, fingerprint_only) return _is_not_blank(repo_owner) @beartype def _require_repo_name_for_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> bool: + _ = (repo_owner, fingerprint_only) return _is_not_blank(repo_name) @@ -388,6 +406,7 @@ def _render_issue_section(*, title: str, issues: list[HierarchyIssue]) -> list[s def _require_repo_full_name_for_render( *, repo_full_name: str, issues: list[HierarchyIssue], generated_at: str, fingerprint: str ) -> bool: + _ = (issues, generated_at, fingerprint) return _is_not_blank(repo_full_name) @@ -395,6 +414,7 @@ def _require_repo_full_name_for_render( def _require_generated_at_for_render( *, repo_full_name: str, issues: list[HierarchyIssue], generated_at: str, fingerprint: str ) -> bool: + _ = (repo_full_name, issues, fingerprint) return _is_not_blank(generated_at) @@ -402,6 +422,7 @@ def _require_generated_at_for_render( def _require_fingerprint_for_render( *, repo_full_name: str, issues: list[HierarchyIssue], generated_at: str, fingerprint: str ) -> bool: + _ = (repo_full_name, issues, generated_at) return _is_not_blank(fingerprint) @@ -471,6 +492,7 @@ def _write_state( def _require_repo_owner_for_sync( *, repo_owner: str, repo_name: str, output_path: Path, state_path: Path, force: bool = False ) -> bool: + _ = (repo_name, output_path, state_path, force) return _is_not_blank(repo_owner) @@ -478,6 +500,7 @@ def _require_repo_owner_for_sync( def _require_repo_name_for_sync( *, repo_owner: str, repo_name: str, output_path: Path, state_path: Path, force: bool = False ) -> bool: + _ = (repo_owner, output_path, state_path, force) return _is_not_blank(repo_name) diff --git a/tests/unit/scripts/test_sync_github_hierarchy_cache.py b/tests/unit/scripts/test_sync_github_hierarchy_cache.py index b1f91933..251fc294 100644 --- a/tests/unit/scripts/test_sync_github_hierarchy_cache.py +++ b/tests/unit/scripts/test_sync_github_hierarchy_cache.py @@ -164,6 +164,20 @@ def test_default_paths_use_ephemeral_specfact_backlog_cache() -> None: assert str(module.DEFAULT_STATE_PATH) == ".specfact/backlog/github_hierarchy_cache_state.json" +def test_child_links_include_only_epic_and_feature_subissues() -> None: + """Sub-issue GraphQL nodes should contribute children only when type is Epic or Feature.""" + module = _load_script_module() + child_links = module._child_links( # pylint: disable=protected-access + [ + {"number": 1, "title": "Task", "url": "https://example.test/1", "issueType": {"name": "Task"}}, + {"number": 2, "title": "Feat", "url": "https://example.test/2", "issueType": {"name": "Feature"}}, + {"number": 3, "title": "Ep", "url": "https://example.test/3", "issueType": {"name": "Epic"}}, + {"number": 4, "title": "Untyped", "url": "https://example.test/4"}, + ] + ) + assert [link.number for link in child_links] == [2, 3] + + def test_render_cache_markdown_groups_epics_and_features() -> None: """Rendered markdown should be deterministic and grouped by issue type.""" module = _load_script_module() @@ -362,3 +376,21 @@ def _fake_fetch(*, repo_owner: str, repo_name: str, fingerprint_only: bool) -> l assert fetch_calls == 1 assert result.changed is True assert "# GitHub Hierarchy Cache" in output_path.read_text(encoding="utf-8") + + +def test_default_repo_name_falls_back_when_git_unavailable(monkeypatch: pytest.MonkeyPatch) -> None: + """If ``git`` is missing, DEFAULT_REPO_NAME must use the checkout directory fallback.""" + _load_script_module.cache_clear() + sys.modules.pop("sync_github_hierarchy_cache", None) + + def _no_git(*_args: Any, **_kwargs: Any) -> Any: + raise FileNotFoundError("git not found") + + monkeypatch.setattr(subprocess, "run", _no_git) + module = _load_script_module() + script_path = Path(__file__).resolve().parents[3] / "scripts" / "sync_github_hierarchy_cache.py" + expected_fallback = script_path.resolve().parents[1].name + assert expected_fallback == module.DEFAULT_REPO_NAME + + _load_script_module.cache_clear() + sys.modules.pop("sync_github_hierarchy_cache", None) From 9f41a7c5913da507add71a2db984e433f3b82fcd Mon Sep 17 00:00:00 2001 From: Dominikus Nold Date: Thu, 9 Apr 2026 23:13:04 +0200 Subject: [PATCH 4/4] Make github sync script executable --- scripts/sync_github_hierarchy_cache.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/sync_github_hierarchy_cache.py diff --git a/scripts/sync_github_hierarchy_cache.py b/scripts/sync_github_hierarchy_cache.py old mode 100644 new mode 100755