diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs index 3203eee276f..04bf947949a 100644 --- a/docs/astro.config.mjs +++ b/docs/astro.config.mjs @@ -133,6 +133,9 @@ export default defineConfig({ '/patterns/taskops/': '/gh-aw/patterns/task-ops/', '/patterns/trialops/': '/gh-aw/patterns/trial-ops/', '/patterns/workqueueops/': '/gh-aw/patterns/workqueue-ops/', + '/patterns/shadowops/': '/gh-aw/organization-practices/safe-rollout/', + '/patterns/shadow-ops/': '/gh-aw/organization-practices/safe-rollout/', + '/organization-practices/shadow-evaluation/': '/gh-aw/organization-practices/safe-rollout/', }, integrations: [ sitemap(), @@ -279,12 +282,21 @@ export default defineConfig({ { label: 'Audit Reports', link: '/guides/audit-with-agents/' }, ], }, + { + label: 'Organization Practices', + items: [ + { label: 'Overview', link: '/organization-practices/' }, + { label: 'Safe Rollout', link: '/organization-practices/safe-rollout/' }, + { label: 'Sharing Workflows', link: '/organization-practices/sharing-workflows/' }, + ], + }, { label: 'Design Patterns', items: [ { label: 'BatchOps', link: '/patterns/batch-ops/' }, { label: 'CentralRepoOps', link: '/patterns/central-repo-ops/' }, { label: 'ChatOps', link: '/patterns/chat-ops/' }, + { label: 'CorrectionOps', link: '/patterns/correction-ops/' }, { label: 'DailyOps', link: '/patterns/daily-ops/' }, { label: 'DataOps', link: '/patterns/data-ops/' }, { label: 'DispatchOps', link: '/patterns/dispatch-ops/' }, diff --git a/docs/src/content/docs/guides/packaging-imports.mdx b/docs/src/content/docs/guides/packaging-imports.mdx index d63f1a66186..f653f996070 100644 --- a/docs/src/content/docs/guides/packaging-imports.mdx +++ b/docs/src/content/docs/guides/packaging-imports.mdx @@ -7,6 +7,10 @@ sidebar: import { Tabs, TabItem } from '@astrojs/starlight/components'; +:::caution[Evolving guidance] +Enterprise workflow sharing capabilities are actively expanding. For organization-scale patterns such as central repositories, import governance, and access controls, see [Sharing Workflows](/gh-aw/organization-practices/sharing-workflows/). Details may change as the platform matures. +::: + ## Adding Workflows You can add any existing workflow you have access to from external repositories. diff --git a/docs/src/content/docs/organization-practices/index.mdx b/docs/src/content/docs/organization-practices/index.mdx new file mode 100644 index 00000000000..0eb91f42e2d --- /dev/null +++ b/docs/src/content/docs/organization-practices/index.mdx @@ -0,0 +1,31 @@ +--- +title: Organization Practices +description: Guidance for adopting, sharing, and governing agentic workflows across teams and repositories. +--- + +Organization Practices collects guidance that matters at team and enterprise scale but does not need to be presented as a standalone design pattern. + +Patterns describe durable workflow shapes such as [CorrectionOps](/gh-aw/patterns/correction-ops/) or [MultiRepoOps](/gh-aw/patterns/multi-repo-ops/). Organization practices cover how those patterns are rolled out, shared, and governed across repositories and teams. + +This section is the right place for topics such as: + +- safe rollout strategies before production writes are enabled +- workflow sharing across repositories and organizations +- centralized ownership models for workflow infrastructure +- platform conventions for versioning, review, and promotion + +## Included Topics + +### Safe Rollout + +[Safe Rollout](/gh-aw/organization-practices/safe-rollout/) describes how to move from report-only or staged behavior to production writes with evidence and control. One technique inside that progression is shadow evaluation, where the workflow writes to a safe non-production target before promotion. + +### Sharing Workflows + +[Sharing Workflows](/gh-aw/organization-practices/sharing-workflows/) describes how workflows can be reused across repositories and organizations. It covers imports, reusable components, central workflow repositories, and when to use templates or starter repositories. + +## Relationship To Other Sections + +- Use [Design Patterns](/gh-aw/patterns/) to learn reusable workflow shapes. +- Use [Guides](/gh-aw/guides/) for task-oriented instructions such as [Reusing Workflows](/gh-aw/guides/packaging-imports/). +- Use [Reference](/gh-aw/reference/) for exact configuration syntax and field behavior. \ No newline at end of file diff --git a/docs/src/content/docs/organization-practices/safe-rollout.md b/docs/src/content/docs/organization-practices/safe-rollout.md new file mode 100644 index 00000000000..d2c5476ea75 --- /dev/null +++ b/docs/src/content/docs/organization-practices/safe-rollout.md @@ -0,0 +1,78 @@ +--- +title: Safe Rollout +description: Move from report-only or staged behavior to direct production writes with evidence and control. +sidebar: + badge: { text: 'Rollout', variant: 'caution' } +--- + +Safe rollout is the practice of increasing workflow autonomy in steps instead of enabling direct production writes immediately. + +The main question is not whether a workflow is useful, but whether it is trusted enough to act on the live system. In practice, teams usually move through a ladder: report-only first, then staged behavior, then a more realistic safe-write technique if needed, and finally direct production writes. + +This is especially useful for [CorrectionOps](/gh-aw/patterns/correction-ops/), where the goal is to improve the workflow over time using persisted predictions and later human truth. + +## Rollout Ladder + +The usual progression is: + +1. Start in report-only mode. +2. Enable `staged` behavior when proposed writes need to be previewed. +3. Use shadow evaluation when preview mode is not enough and the real write path needs to be exercised safely. +4. Promote the same workflow to direct production writes. + +`staged` and shadow evaluation are not interchangeable. Staged mode is sufficient when the question is what the workflow would do. Shadow evaluation is needed when the question is whether the real write path behaves correctly on a safe non-production target. + +## When Staged Is Enough + +Use staged mode when the main risk is decision quality rather than operational behavior. + +It is usually enough when maintainers only need to review proposed actions, compare alternatives, or inspect whether the workflow's judgment is reasonable before any write is allowed. + +## When Shadow Evaluation Is Needed + +Use shadow evaluation when staged mode is too weak because the real write path itself needs validation. + +This is a good fit when: + +- the workflow must update real target objects to prove the behavior is correct +- concurrency, deduplication, or serialization needs to be tested on a live-like surface +- maintainers need to inspect the actual produced state, not only proposed intent +- cross-repository writes, permissions, or dispatch boundaries need to be exercised safely + +Shadow evaluation is one technique inside safe rollout, not a separate top-level pattern. + +## Design Rules + +### Production truth stays authoritative + +Do not let the evaluation surface become the new source of truth. Production events and later trusted human actions should remain authoritative. + +### Prediction snapshots should be explicit + +If later comparison matters, persist what the workflow predicted at decision time. Do not reconstruct predictions from logs. + +### Correction evidence needs provenance + +Not every later edit should count as trustworthy truth. Record provenance such as actor type, manual versus automated source, trust status, and origin repository role. + +### Evaluation surfaces should remain disposable + +Keep the shadow target thin. It should support measurement and rollout, not become a second long-lived control plane. + +## Example Shape + +The common repository split is: + +- production repository: emits live events and contains authoritative later human truth +- ops repository: persists predictions, collects corrections, publishes reports, and updates instructions +- shadow repository: temporary non-production write target during rollout + +That shape is often useful, but it is still rollout guidance rather than a primary pattern. The stronger reusable pattern remains [CorrectionOps](/gh-aw/patterns/correction-ops/). + +## Related Documentation + +- [CorrectionOps](/gh-aw/patterns/correction-ops/) +- [SideRepoOps](/gh-aw/patterns/side-repo-ops/) +- [MultiRepoOps](/gh-aw/patterns/multi-repo-ops/) +- [Staged Mode](/gh-aw/reference/staged-mode/) +- [Safe Outputs Reference](/gh-aw/reference/safe-outputs/) diff --git a/docs/src/content/docs/organization-practices/sharing-workflows.md b/docs/src/content/docs/organization-practices/sharing-workflows.md new file mode 100644 index 00000000000..42f47f74b40 --- /dev/null +++ b/docs/src/content/docs/organization-practices/sharing-workflows.md @@ -0,0 +1,128 @@ +--- +title: Sharing Workflows +description: Share, reuse, and govern workflows across repositories and organizations. +sidebar: + badge: { text: 'Platform', variant: 'tip' } +--- + +:::caution[Evolving guidance] +Enterprise workflow sharing capabilities are actively expanding. Details in this guide may change as the platform matures. +::: + +Sharing workflows across repositories is an organization practice, not a single design pattern. GitHub Agentic Workflows supports multiple layers of sharing, from installing a complete workflow into a repository to parameterized imports and cross-repository execution. + +The recommended enterprise pattern is one central `agentic-workflows` repository that publishes versioned workflow templates and shared components. Consuming repositories install full workflows with `gh aw add` and pull in shared modules through `imports:`. + +## Sharing Layers + +### Layer 1: Copy or install whole workflows + +A repository can pull in a complete workflow from another repository using `gh aw add`: + +```bash +gh aw add acme-org/agentic-workflows/ci-doctor@v1.2.0 +``` + +`gh aw add-wizard` provides interactive guidance for the same operation. When a workflow is installed, a `source:` field is added to its frontmatter so the origin is tracked. Updates can then be applied later with `gh aw update`. + +Version references support semantic tags (`@v1.2.0`), branches (`@main`), and commit SHAs for strict pinning. + +### Layer 2: Reusable workflow components + +Shared pieces such as common MCP server configuration, security setup steps, or reusable prompt fragments can be imported by any workflow: + +```yaml +imports: + - acme-org/shared-workflows/shared/security-setup.md@v2.1.0 + - acme-org/shared-workflows/shared/mcp/tavily.md@v1.0.0 +``` + +Imports compose into the consuming workflow at compile time. Frontmatter fields such as `tools:`, `network:`, and `mcp-servers:` are merged so imported configuration is additive. + +### Layer 3: Parameterized templates + +Shared workflows can accept inputs so the same template is usable across teams with different requirements: + +```yaml +imports: + - uses: acme-org/shared-workflows/shared/reviewer.md@v1 + with: + languages: ["go", "typescript"] + severity: "high" +``` + +The `uses` / `with` syntax makes it possible to share workflows that have team-specific settings while keeping a single maintained source. + +### Layer 4: Versioning and update flow + +Enterprise sharing depends on a predictable versioning model: + +- **Semantic versions** (`@v1.2.0`) for stable workflows that consuming teams can pin. +- **Branch refs** (`@main`, `@develop`) for pre-release versions during active development. +- **SHA pins** for strict reproducibility when drift must be ruled out. + +Use `gh aw update` to pull upstream changes into installed workflows: + +```bash +gh aw update # update all tracked workflows +gh aw update ci-doctor # update a specific workflow +``` + +Updates apply a three-way merge that preserves local edits while incorporating upstream changes. + +### Layer 5: Private and internal sharing controls + +Not every workflow should be available for installation everywhere. GitHub Agentic Workflows supports access-based controls: + +- **`private: true`** in workflow frontmatter blocks `gh aw add` from installing that workflow into other repositories. +- Repository and organization visibility settings control who can read the workflow sources at all. +- `gh aw add` performs access checks before installation and surfaces warnings for workflows from untrusted sources. +- Org-internal workflow catalogs can be created using organization repositories with appropriate visibility settings. + +```yaml +--- +private: true +--- +``` + +### Layer 6: Import caching and lock behavior + +Remote imports are resolved at compile time and cached in `.github/aw/imports/` by commit SHA. This means: + +- Compiled `.lock.yml` files are fully reproducible: the exact import content is pinned at compile time. +- Offline compilation works once imports have been downloaded. +- The SHA cache is shared across refs that resolve to the same commit, reducing redundant network calls. + +The `.lock.yml` file and the `.github/aw/imports/` directory should both be committed to the repository so workflow runs are reproducible across environments. + +### Layer 7: Cross-repository execution model + +Separate from sharing workflow definitions, workflows can operate across repositories at runtime: + +- Read other repositories using GitHub tool access configured with appropriate permissions. +- Check out code from other repositories using cross-repository checkout. +- Create safe outputs (issues, pull requests, comments) in target repositories using `target-repo` and `allowed-repos`. +- Explicit authentication (PAT or GitHub App token) and allowlists control which repositories a workflow may write to. + +This execution model is covered in detail in [Cross-Repository Workflows](/gh-aw/reference/cross-repository/) and [MultiRepoOps](/gh-aw/patterns/multi-repo-ops/). + +## Governance Questions + +When workflows are shared across an organization, the important questions are usually operational rather than technical: + +- Who owns the source workflow and approves changes. +- How updates are reviewed and promoted from the central repository to consuming repositories. +- Which repositories may consume or dispatch to shared workflows. +- How secrets, permissions, and safe outputs are standardized across teams. +- When teams may fork a workflow rather than stay on the shared source. + +Those decisions affect reliability more than the file format does. + +## Related Documentation + +- [Reusing Workflows](/gh-aw/guides/packaging-imports/) +- [Imports Reference](/gh-aw/reference/imports/) +- [Frontmatter Reference](/gh-aw/reference/frontmatter/) (source, private, resources fields) +- [Cross-Repository Workflows](/gh-aw/reference/cross-repository/) +- [SideRepoOps](/gh-aw/patterns/side-repo-ops/) +- [MultiRepoOps](/gh-aw/patterns/multi-repo-ops/) diff --git a/docs/src/content/docs/patterns/correction-ops.md b/docs/src/content/docs/patterns/correction-ops.md new file mode 100644 index 00000000000..637fa8aa511 --- /dev/null +++ b/docs/src/content/docs/patterns/correction-ops.md @@ -0,0 +1,298 @@ +--- +title: CorrectionOps +description: Improve agentic workflows from trusted human corrections without retraining the underlying model +sidebar: + badge: Pattern +--- + +CorrectionOps is a workflow pattern that compares predictions with later human corrections. + +Instead of retraining the model, CorrectionOps improves the workflow around the model. It stores predictions at decision time, compares them with later trusted human truth, and uses that evidence to update instructions, routing, thresholds, and rollout decisions. + +The basic loop is simple: + +1. Save what the workflow predicted +2. Collect what humans later decided +3. Use the difference to improve the workflow + +Discussion labeling is a good example: a workflow applies labels, humans later correct those labels, and the system uses that correction evidence to improve future runs. + +## When to Use CorrectionOps + +Use CorrectionOps when you want to turn a human decision process into an agentic workflow iteratively rather than all at once. + +It is a good fit when humans still make or correct the real decision, but you want the workflow to improve over time by updating instructions, routing, thresholds, or rollout state. + +Typical fits include labeling and classification, routing and prioritization, moderation and approvals, and summaries or recommendations that humans later correct. + +It is especially useful when the rollout path is gradual: + +- start in report-only mode +- move to a shadow or other safe write target +- use later corrections to improve the workflow +- promote to direct writes only when the evidence is strong enough + +## How It Works + +A clean CorrectionOps setup has two long-lived surfaces and one optional temporary one. Production stays authoritative. Ops is the long-lived home for prediction, correction intake, reporting, and instruction updates. Shadow, when used, is just a safe write target during evaluation. + +That means the workflows usually stay in ops. During evaluation they write to shadow. After promotion they can write directly to production without moving to a different repository. CorrectionOps is therefore broader than shadow evaluation. Shadow evaluation is one rollout shape inside CorrectionOps, not the whole pattern. + +Most implementations reduce to three workflow classes: a thin relay that forwards stable facts into ops, a prediction workflow that persists snapshots and writes safely, and a compare/report/decide workflow that checks later human truth and updates the system when the evidence is strong enough. + +The important rule is to keep relays, snapshot resolution, diffing, and grouping deterministic. Use the agent for semantic judgment, not for reconstructing event history or inferring provenance after the fact. + +```aw wrap +--- +on: + schedule: daily + workflow_dispatch: + repository_dispatch: + types: [truth-feedback] +permissions: + contents: read + issues: read +safe-outputs: + create-issue: + create-pull-request: +--- + +# CorrectionOps Worker + +Read persisted predictions and later trusted truth, compare them deterministically, then either publish a health report or open a draft PR updating instructions. +``` + +CorrectionOps solves a different problem than model training. Reinforcement Learning from Human Feedback (RLHF) updates model weights from human feedback. CorrectionOps updates the workflow system around the model. In practice that usually means changing instruction files, routing rules, deterministic checks, thresholds, or rollout decisions rather than trying to retrain the engine. + +In a healthy CorrectionOps loop, production truth stays authoritative, predictions are saved explicitly, corrections include provenance, and diffs are built deterministically before the agent is asked to reason about them. + +CorrectionOps does not require a shadow surface, but many teams start with one. The normal progression is report-only first, then shadow evaluation when a safe write target is needed, then direct production writes once the evidence is strong enough. + +## Implementing It With GitHub Actions + +GitHub Actions is a strong fit because the pattern is mostly orchestration, artifact passing, and controlled writes across repositories. In practice, production events create the initial signal, a thin relay forwards that signal into ops, and the ops repo runs prediction and comparison work on schedules, manual dispatch, or forwarded events. + +For most teams, the clearest starting point is three workflows: one thin relay in the source repo, one prediction workflow in ops, and one compare/report/decide workflow in ops. Split further only when the boundary is real, such as a different trigger, a different permission boundary, or a separate serialized write path. + +In `gh-aw`, keep orchestration in frontmatter and step sections, use a small trusted set of GitHub Actions for plumbing, and keep policy-critical normalization, diffing, and grouping in repo-local scripts. `actions/github-script`, checkout, and artifact upload/download are usually enough. + +```yaml title="prod/.github/workflows/relay-correction-signals.yml" +name: Relay Correction Signals + +on: + discussion: + types: [created, labeled, unlabeled] + +jobs: + relay: + runs-on: ubuntu-latest + steps: + - name: Forward stable facts to ops + uses: actions/github-script@v8 + with: + github-token: ${{ secrets.OPS_DISPATCH_TOKEN }} + script: | + await github.rest.repos.createDispatchEvent({ + owner: 'org', + repo: 'ops-repo', + event_type: context.payload.action === 'created' ? 'item-created' : 'truth-feedback', + client_payload: { + data: { + source_repository: `${context.repo.owner}/${context.repo.repo}`, + item_number: context.payload.discussion.number, + label: context.payload.label?.name || null, + actor: context.actor, + actor_type: context.actor.endsWith('[bot]') ? 'bot' : 'human', + }, + }, + }); +``` + +Most CorrectionOps systems still need both scheduled and manual entry points. A scheduled run catches drift and stale backlog. `workflow_dispatch` makes it possible to backfill one item, rerun one parent correction issue, or test a new instruction revision safely. Artifact handoff is often simpler than re-fetching everything in every step, and checkout should usually stay in ops rather than in production relays. + +## Portable Starter Architecture + +CorrectionOps is implementable for almost any repository that has three ingredients: + +1. a production object to observe, such as issues, pull requests, discussions, labels, approvals, or comments +2. a later human action that counts as trustworthy truth +3. an operational surface, usually an ops repo, where instructions and reports can live + +The minimal reusable architecture is: + +- one production relay workflow +- one ops prediction workflow +- one ops compare, report, and decide workflow +- one stable snapshot schema + +Many teams add a separate correction-collector workflow because the truth-ingest boundary is naturally deterministic and often triggered by `repository_dispatch`. That is a useful operational split, but it is not the simplest shape to teach first. + +The repository-specific work is usually limited to how to fetch and normalize the production object, which human actions count as trusted truth, what grouped correction patterns are meaningful, and which instruction or policy files are allowed to change. That is what keeps the pattern portable across different business domains. + +## Reproducible Starter Setup + +This page intentionally uses generic repository and workflow names so the pattern can be reproduced without depending on any partner repository. + +The simplest teachable setup uses two repositories and an optional third: + +- `prod-repo`: the authoritative system where the original object and later human truth live +- `ops-repo`: the long-lived control plane for prediction, correction review, reporting, and instruction updates +- `shadow-repo`: an optional safe write target used only during rollout + +The workflow layout is: + +| Repository | Workflow | Role | +| --- | --- | --- | +| `prod-repo` | `relay-correction-signals.yml` | Thin deterministic relay | +| `ops-repo` | `predict-items.md` | Predict and persist snapshots | +| `ops-repo` | `review-corrections.md` | Compare, report, and decide | +| `ops-repo` | `collect-corrections.yml` | Optional deterministic truth intake | +| `shadow-repo` | `mirror-items.yml` | Optional safe-write support | + +If the source event stream already contains everything needed for later comparison, skip `collect-corrections.yml`. If direct writes are too risky during rollout, add `mirror-items.yml` and point safe outputs at `shadow-repo` until the evidence is strong enough. + +### 1. Thin Relay In The Source Repo + +The relay only forwards stable facts and provenance into ops. It should not compute diffs, infer human intent, or decide whether the workflow was correct. + +```yaml title="prod-repo/.github/workflows/relay-correction-signals.yml" +name: Relay Correction Signals + +on: + issues: + types: [opened, labeled, unlabeled] + +jobs: + relay: + runs-on: ubuntu-latest + steps: + - name: Forward stable facts to ops + uses: actions/github-script@v8 + with: + github-token: ${{ secrets.OPS_DISPATCH_TOKEN }} + script: | + await github.rest.repos.createDispatchEvent({ + owner: 'org', + repo: 'ops-repo', + event_type: context.payload.action === 'opened' ? 'item-created' : 'truth-feedback', + client_payload: { + data: { + source_repository: `${context.repo.owner}/${context.repo.repo}`, + source_type: 'issue', + item_number: context.payload.issue.number, + item_title: context.payload.issue.title, + item_url: context.payload.issue.html_url, + event_type: context.payload.action, + label: context.payload.label?.name || null, + actor: context.actor, + actor_type: context.actor.endsWith('[bot]') ? 'bot' : 'human', + occurred_at: new Date().toISOString(), + }, + }, + }); +``` + +### 2. Prediction Workflow In Ops + +The prediction workflow consumes normalized inputs, applies the current instructions, writes through safe outputs, and persists a durable snapshot that can be compared later. + +```aw wrap title="ops-repo/.github/workflows/predict-items.md" +--- +name: Predict Items + +on: + schedule: daily + workflow_dispatch: + repository_dispatch: + types: [item-created] + +tools: + github: + toolsets: [issues, repos] + +safe-outputs: + create-issue: + update-issue: + target-repo: ${{ inputs.target-repo || 'shadow-repo' }} +--- + +# Predict Items + +Read prepared items from `/tmp/gh-aw/agent/item-scan`, apply the current instructions, write the proposed changes through safe outputs, and append a prediction snapshot containing the source identifier, predicted action, instruction version, and timestamp. +``` + +### 3. Compare, Report, And Decide In Ops + +The review workflow reads persisted predictions and later human truth, builds deterministic diffs first, and only then asks the agent to summarize patterns or propose instruction updates. + +```aw wrap title="ops-repo/.github/workflows/review-corrections.md" +--- +name: Review Corrections + +on: + schedule: weekly + workflow_dispatch: + inputs: + mode: + description: report or adaptation + required: false + default: report + type: choice + options: [report, adaptation] + +safe-outputs: + create-issue: + create-pull-request: +--- + +# Review Corrections + +Read `correction-diffs.json` from `/tmp/gh-aw/agent/correction-review`. In `report` mode, publish a health summary. In `adaptation` mode, open a draft PR updating the instruction file only when the grouped evidence is strong enough. +``` + +### 4. Optional Deterministic Collector + +Add a separate collector only when the later-truth boundary deserves its own trigger, permissions, or serialized write path. + +```yaml title="ops-repo/.github/workflows/collect-corrections.yml" +name: Collect Corrections + +on: + repository_dispatch: + types: [truth-feedback] + +jobs: + collect: + runs-on: ubuntu-latest + steps: + - name: Resolve authoritative truth and store correction evidence + run: ./scripts/store-correction-evidence.sh +``` + +### 5. Stable Contracts To Define First + +Before adding rollout logic or adaptation prompts, define four small deterministic contracts: + +1. relay payload: the minimal source identity, object identity, event type, actor facts, and timestamps forwarded into ops +2. prediction snapshot: the durable record of what the workflow predicted and under which instruction version +3. correction review input: the deterministic diff artifact used by reporting and adaptation +4. write target contract: which repository receives evaluation writes before direct production writes are enabled + +Discussion labeling, routing, moderation, prioritization, approvals, and summaries can all reuse this shape. The production object changes, but the CorrectionOps setup does not. + +## Relationship To Other Patterns + +CorrectionOps overlaps with several adjacent ideas, but it solves a narrower problem. + +- Shadow deployment evaluates a candidate safely on live traffic. CorrectionOps adds the correction-driven adaptation loop. +- Human-in-the-loop review adds oversight at decision time. CorrectionOps adds a durable memory of corrections and uses it to change the workflow later. +- LLMOps and AgentOps provide broader tracing, evaluation, and governance capabilities. CorrectionOps is a specific design pattern for using trusted corrections to improve production-adjacent workflows. +- RLHF updates model weights from human preference data. CorrectionOps updates the operational system around the model instead. + +## Related Documentation + +- [Safe Rollout](/gh-aw/organization-practices/safe-rollout/) for the optional safe-write rollout guidance inside CorrectionOps +- [SideRepoOps](/gh-aw/patterns/side-repo-ops/) for separating workflow infrastructure from the production repository +- [MultiRepoOps](/gh-aw/patterns/multi-repo-ops/) for coordinating workflows across repository boundaries +- [Safe Outputs Reference](/gh-aw/reference/safe-outputs/) for controlling write targets and protections +- [GitHub Tools](/gh-aw/reference/github-tools/) for cross-repository reads and operations diff --git a/docs/src/content/docs/reference/glossary.md b/docs/src/content/docs/reference/glossary.md index dd410c7d269..d2ecaf3f263 100644 --- a/docs/src/content/docs/reference/glossary.md +++ b/docs/src/content/docs/reference/glossary.md @@ -620,6 +620,10 @@ A [MultiRepoOps](#multirepoops) deployment variant where a single private reposi Interactive automation triggered by slash commands (`/review`, `/deploy`) in issues and pull requests, enabling human-in-the-loop automation where developers invoke AI assistance on demand. See [ChatOps](/gh-aw/patterns/chat-ops/). +### CorrectionOps + +Pattern for improving production-adjacent workflows from trusted human corrections without retraining the underlying model. CorrectionOps persists predictions, compares them with later human truth, and uses deterministic correction evidence to update instructions, routing, thresholds, and rollout policy. See [CorrectionOps](/gh-aw/patterns/correction-ops/). + ### DailyOps Scheduled workflows for incremental daily improvements, automating progress toward large goals through small, manageable changes on weekday schedules. See [DailyOps](/gh-aw/patterns/daily-ops/). @@ -672,6 +676,22 @@ Testing and validation pattern executing workflows in isolated trial repositorie Pattern for incrementally processing a backlog of work items using a durable queue backend — issue checklists, sub-issues, [cache-memory](#cache-memory), or GitHub Discussions. Each run picks up where the last left off, making it resilient to interruptions and rate limits. Items should be idempotent and independently processable. See [WorkQueueOps](/gh-aw/patterns/workqueue-ops/). +## Organization Practices + +Topics that matter at team and enterprise scale but are not themselves standalone design patterns. + +### Organization Practices (section) + +Documentation section covering rollout, sharing, ownership, and governance concerns for workflows across teams and repositories. See [Organization Practices](/gh-aw/organization-practices/). + +### Safe Rollout + +Rollout practice for moving from report-only or staged behavior to direct production writes with evidence and control. Safe rollout can include techniques such as shadow evaluation, where a workflow runs on live production signals but writes to a non-production target while trust is being built. See [Safe Rollout](/gh-aw/organization-practices/safe-rollout/). + +### Shadow Evaluation + +One safe-rollout technique where a workflow runs on live production signals without writing directly to the production system. Shadow evaluation usually keeps production as the source of truth, uses an ops repository to measure prediction-versus-truth deltas, and may introduce a temporary mirror surface during trust-building. See [Safe Rollout](/gh-aw/organization-practices/safe-rollout/#when-shadow-evaluation-is-needed). + ## Related Resources For detailed documentation on specific topics, see: