diff --git a/.github/workflows/check-upstream-drift.yml b/.github/workflows/check-upstream-drift.yml
new file mode 100644
index 0000000..6a5c13e
--- /dev/null
+++ b/.github/workflows/check-upstream-drift.yml
@@ -0,0 +1,53 @@
+name: Check upstream drift
+
+on:
+  schedule:
+    - cron: '0 6 * * 1'  # Mondays 06:00 UTC
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  issues: write
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Run drift check
+        id: drift
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set +e
+          OUTPUT=$(./scripts/check-upstream-drift.sh)
+          STATUS=$?
+          echo "$OUTPUT"
+          {
+            echo 'report<<EOF'
+            echo "$OUTPUT"
+            echo 'EOF'
+          } >> "$GITHUB_OUTPUT"
+          echo "status=$STATUS" >> "$GITHUB_OUTPUT"
+          exit 0
+
+      - name: Open or update drift issue
+        if: steps.drift.outputs.status == '1'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DRIFT_REPORT: ${{ steps.drift.outputs.report }}
+        run: |
+          set -euo pipefail
+          TITLE="Upstream glide-mq skills drift detected"
+          # Compose body via env var (not direct ${{ }} interpolation in the
+          # script body) to prevent any character in the upstream-sourced
+          # report (commit messages, version strings) from breaking out of
+          # the heredoc and executing arbitrary shell.
+          BODY=$(printf 'Automated drift detection found that vendored skills are behind `avifenesh/glide-mq` HEAD.\n\n```\n%s\n```\n\nRun `./scripts/sync-upstream.sh` to update.\n' "$DRIFT_REPORT")
+          existing=$(gh issue list --state open --search "$TITLE in:title" --json number -q '.[0].number' || echo "")
+          if [ -n "$existing" ]; then
+            gh issue comment "$existing" --body "$BODY"
+          else
+            gh issue create --title "$TITLE" --body "$BODY" --label "upstream-sync"
+          fi
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1befad3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,191 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to the Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by the Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding any notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2024-2026 Avi Fenesh
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/scripts/check-upstream-drift.sh b/scripts/check-upstream-drift.sh
new file mode 100644
index 0000000..f38c71b
--- /dev/null
+++ b/scripts/check-upstream-drift.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Check if vendored skills have drifted from upstream.
+# Reads recorded SHA from skills/UPSTREAM.md and compares to upstream main.
+# Exit code 0 = up to date, 1 = drift detected.
+
+set -euo pipefail
+
+UPSTREAM_OWNER="avifenesh"
+UPSTREAM_REPO="glide-mq"
+TRACKER="skills/UPSTREAM.md"
+
+if ! command -v gh >/dev/null 2>&1; then
+  echo "[ERROR] gh CLI required" >&2
+  exit 2
+fi
+
+if [ ! -f "$TRACKER" ]; then
+  echo "[ERROR] $TRACKER not found" >&2
+  exit 2
+fi
+
+RECORDED_SHA=$(grep -oE '`[a-f0-9]{40}`' "$TRACKER" | head -1 | tr -d '`')
+RECORDED_VERSION=$(grep -oE '`v[0-9]+\.[0-9]+\.[0-9]+`' "$TRACKER" | head -1 | tr -d '`')
+
+UPSTREAM_SHA=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/commits/main" -q .sha 2>/dev/null)
+UPSTREAM_DATE=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/commits/main" -q .commit.author.date 2>/dev/null)
+UPSTREAM_VERSION=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/contents/package.json?ref=$UPSTREAM_SHA" -q .content 2>/dev/null \
+  | base64 -d | grep -oE '"version"[[:space:]]*:[[:space:]]*"[^"]+"' | head -1 | cut -d'"' -f4)
+
+if [ "$RECORDED_SHA" = "$UPSTREAM_SHA" ]; then
+  echo "[OK] Vendored skills are at upstream HEAD (v$RECORDED_VERSION, $RECORDED_SHA)"
+  exit 0
+fi
+
+# Count commits behind
+BEHIND=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/compare/$RECORDED_SHA...$UPSTREAM_SHA" -q .ahead_by 2>/dev/null || echo "?")
+
+echo "[DRIFT] Vendored skills are behind upstream"
+echo "  Recorded:  $RECORDED_VERSION ($RECORDED_SHA)"
+echo "  Upstream:  v$UPSTREAM_VERSION ($UPSTREAM_SHA)"
+echo "  Behind by: $BEHIND commits (as of $UPSTREAM_DATE)"
+echo ""
+echo "Run: ./scripts/sync-upstream.sh"
+exit 1
diff --git a/scripts/sync-upstream.sh b/scripts/sync-upstream.sh
new file mode 100644
index 0000000..7f2f23f
--- /dev/null
+++ b/scripts/sync-upstream.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+# Sync vendored skills from upstream avifenesh/glide-mq.
+# Usage: scripts/sync-upstream.sh [ref]   (default: main)
+
+set -euo pipefail
+
+UPSTREAM_OWNER="avifenesh"
+UPSTREAM_REPO="glide-mq"
+SKILLS=("glide-mq" "glide-mq-migrate-bullmq" "glide-mq-migrate-bee")
+REF="${1:-main}"
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT"
+
+if ! command -v gh >/dev/null 2>&1; then
+  echo "[ERROR] gh CLI required" >&2
+  exit 1
+fi
+
+echo "[INFO] Resolving ref '$REF' on $UPSTREAM_OWNER/$UPSTREAM_REPO..."
+SHA=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/commits/$REF" -q .sha 2>/dev/null)
+DATE=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/commits/$REF" -q .commit.author.date 2>/dev/null)
+VERSION=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/contents/package.json?ref=$SHA" -q .content 2>/dev/null \
+  | base64 -d | grep -oE '"version"[[:space:]]*:[[:space:]]*"[^"]+"' | head -1 | cut -d'"' -f4)
+
+if [ -z "$SHA" ]; then
+  echo "[ERROR] Could not resolve ref '$REF'" >&2
+  exit 1
+fi
+
+echo "[INFO] Syncing to $SHA ($DATE, v$VERSION)"
+
+# Sync each skill SKILL.md and references/*
+for s in "${SKILLS[@]}"; do
+  echo "[INFO] $s/SKILL.md"
+  gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/contents/skills/$s/SKILL.md?ref=$SHA" -q .content 2>/dev/null \
+    | base64 -d > "skills/$s/SKILL.md"
+
+  # Newline-separated list, safe under spaces in filenames
+  ref_files=$(gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/contents/skills/$s/references?ref=$SHA" -q '.[] | select(.type=="file") | .name' 2>/dev/null || true)
+  if [ -n "$ref_files" ]; then
+    mkdir -p "skills/$s/references"
+    # Remove stale references not in upstream
+    for existing in "skills/$s/references"/*; do
+      [ -f "$existing" ] || continue
+      base=$(basename "$existing")
+      if ! grep -qxF "$base" <<< "$ref_files"; then
+        echo "  [REMOVE] references/$base (no longer upstream)"
+        rm -f "$existing"
+      fi
+    done
+    while IFS= read -r f; do
+      [ -z "$f" ] && continue
+      gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/contents/skills/$s/references/$f?ref=$SHA" -q .content 2>/dev/null \
+        | base64 -d > "skills/$s/references/$f"
+      echo "  [OK] references/$f"
+    done <<< "$ref_files"
+  fi
+done
+
+# Sync LICENSE
+gh api "repos/$UPSTREAM_OWNER/$UPSTREAM_REPO/contents/LICENSE?ref=$SHA" -q .content 2>/dev/null \
+  | base64 -d > LICENSE
+
+# Update UPSTREAM.md tracker table
+TODAY=$(date -u +%Y-%m-%d)
+TRACKER="skills/UPSTREAM.md"
+if [ -f "$TRACKER" ]; then
+  python - <<PY
+import re, sys
+path = "$TRACKER"
+with open(path, "r", encoding="utf-8") as f:
+    s = f.read()
+s = re.sub(r"\| Upstream SHA \| \`[^\`]+\` \|", f"| Upstream SHA | \`$SHA\` |", s)
+s = re.sub(r"\| Upstream date \| \`[^\`]+\` \|", f"| Upstream date | \`$DATE\` |", s)
+s = re.sub(r"\| Upstream version \| \`[^\`]+\` \|", f"| Upstream version | \`v$VERSION\` |", s)
+s = re.sub(r"\| Synced on \| \`[^\`]+\` \|", f"| Synced on | \`$TODAY\` |", s)
+with open(path, "w", encoding="utf-8") as f:
+    f.write(s)
+print("[OK] UPSTREAM.md updated")
+PY
+fi
+
+echo ""
+echo "[DONE] Sync complete. Review with: git diff --stat"
+echo "[NEXT] Commit with: sync: glide-mq skills to v$VERSION ($SHA)"
diff --git a/skills/UPSTREAM.md b/skills/UPSTREAM.md
new file mode 100644
index 0000000..e256ceb
--- /dev/null
+++ b/skills/UPSTREAM.md
@@ -0,0 +1,65 @@
+# Glide-MQ Skills - Upstream Tracking
+
+The 3 SKILL.md files (and their `references/` directories) under this directory are **vendored from the upstream `avifenesh/glide-mq` repo**. We do not own them. They are mirrored here so the `agent-sh/glidemq` plugin can be installed standalone via the agentsys marketplace without requiring users to install `glide-mq` from npm first.
+
+## Upstream
+
+- **Repo**: https://github.com/avifenesh/glide-mq
+- **Path**: `skills/`
+- **License**: Apache-2.0 (see `LICENSE` at this repo root)
+- **Author**: Avi Fenesh (also maintainer of this plugin)
+
+## Vendored files
+
+| File | Upstream path |
+|------|---------------|
+| `glide-mq/SKILL.md` + `references/*.md` | `skills/glide-mq/` |
+| `glide-mq-migrate-bullmq/SKILL.md` + `references/*.md` | `skills/glide-mq-migrate-bullmq/` |
+| `glide-mq-migrate-bee/SKILL.md` + `references/*.md` | `skills/glide-mq-migrate-bee/` |
+
+## Last sync
+
+| Field | Value |
+|-------|-------|
+| Upstream SHA | `ae9c0fa6d35921b939ed78bbcc287f7eb3982694` |
+| Upstream date | `2026-04-14T22:07:13Z` |
+| Upstream version | `v0.15.1` |
+| Synced on | `2026-04-15` |
+| Synced by | manual sync via `scripts/sync-upstream.sh` |
+
+## How to update
+
+When upstream releases a new version of the skills:
+
+```bash
+# From this repo root
+./scripts/sync-upstream.sh                 # sync to current upstream/main
+./scripts/sync-upstream.sh v0.16.0         # sync to a specific tag
+./scripts/sync-upstream.sh <sha>           # sync to a specific commit
+```
+
+The script:
+1. Fetches `skills/glide-mq*` and the LICENSE from the requested ref via the GitHub API
+2. Overwrites local copies (no merge - upstream is source of truth)
+3. Updates the **Last sync** table in this file with the new SHA, date, and version
+4. Prints a diff summary
+
+After running, review the diff, commit, and open a PR titled `sync: glide-mq skills to <ref>`.
+
+## Drift detection
+
+CI runs `scripts/check-upstream-drift.sh` weekly (and on demand). It compares the recorded SHA in this file against `avifenesh/glide-mq` HEAD and opens an issue if they differ by more than 30 days or the upstream version bumped a major/minor.
+
+## Why vendor instead of fetch at install time
+
+- **Offline install**: agentsys plugins should work without network calls during install
+- **Marketplace consistency**: the plugin's content is reviewable in this repo before users install it
+- **Determinism**: a given marketplace version pins to a specific upstream SHA
+
+## When NOT to edit these files locally
+
+Do **not** hand-edit the SKILL.md or any file in `references/`. Open a PR upstream at https://github.com/avifenesh/glide-mq instead, then re-sync here. Local edits will be lost on the next sync.
+
+The only files in this directory that are owned by `agent-sh/glidemq` and safe to edit:
+- `skills/UPSTREAM.md` (this file)
+- New skill directories that don't exist upstream (none today)
diff --git a/skills/glide-mq-migrate-bee/SKILL.md b/skills/glide-mq-migrate-bee/SKILL.md
index de54dde..380c241 100644
--- a/skills/glide-mq-migrate-bee/SKILL.md
+++ b/skills/glide-mq-migrate-bee/SKILL.md
@@ -1,129 +1,346 @@
 ---
 name: glide-mq-migrate-bee
-description: "Migrates Bee-Queue applications to glide-mq. Use when user wants to convert, migrate, replace, or switch from Bee-Queue to glide-mq, or asks about Bee-Queue vs glide-mq differences."
-version: 1.0.0
-argument-hint: "[migration scope or question]"
+description: >-
+  Migrates Node.js applications from Bee-Queue to glide-mq. Covers the chained
+  builder-to-options API conversion, Queue/Worker separation, and event mapping.
+  Use when converting bee-queue projects to glide-mq, replacing bee-queue with
+  glide-mq, or planning a bee-queue migration. Triggers on
+  "bee-queue to glide-mq", "replace bee-queue with glide-mq",
+  "migrate from bee-queue", "beequeue migration glide-mq".
+license: Apache-2.0
+metadata:
+  author: glide-mq
+  version: "0.14.0"
+  tags: bee-queue, migration, glide-mq, valkey, redis, job-queue
+  sources: docs/USAGE.md
 ---
 
-# glide-mq-migrate-bee
+# Migrate from Bee-Queue to glide-mq
 
-Provides guidance for migrating Bee-Queue applications to glide-mq - chained builder to options object conversion, API mapping, and architectural changes.
+## When to Apply
 
-> This is a thin wrapper. For the complete migration guide, see `node_modules/glide-mq/skills/` or https://avifenesh.github.io/glide-mq.dev/migration/from-bee-queue
+Use this skill when:
+- Replacing bee-queue with glide-mq in an existing project
+- Converting Bee-Queue's chained job API to glide-mq's options API
+- Updating connection configuration from ioredis to valkey-glide
+- Upgrading from bee-queue due to Node.js compatibility or maintenance issues
 
-## When to Use
-
-Invoke this skill when:
-- User wants to migrate from Bee-Queue to glide-mq
-- User asks about differences between Bee-Queue and glide-mq
-- User needs help converting Bee-Queue chained job builders
-- User is evaluating Bee-Queue alternatives or has compatibility issues
+Step-by-step guide for converting Bee-Queue projects to glide-mq. Bee-Queue uses a chained job builder pattern - this migration requires rewriting job creation and separating producer/consumer concerns.
 
 ## Why Migrate
 
-Bee-Queue is unmaintained (last release 2021) with Node.js compatibility issues. It lacks:
-- Cluster support and TLS
-- Delayed jobs and priority queues
-- TypeScript types
-- Workflow orchestration
-- Active maintenance and security patches
+- **Unmaintained** - last release 2021, accumulating Node.js compatibility issues
+- **No cluster support** - cannot scale beyond a single Redis instance
+- **No TLS** - requires manual ioredis workarounds for encrypted connections
+- **No native TypeScript** - community `@types/bee-queue` only, often outdated
+- **No priority queues** - workaround is multiple queues
+- **No workflows** - no parent-child jobs, no DAGs, no repeatable/cron jobs
+- **No rate limiting, batch processing, or broadcast**
+- glide-mq provides all Bee-Queue features plus 35%+ higher throughput
 
-glide-mq provides all of these with 35%+ higher throughput.
+## Breaking Changes Summary
 
-## Install
+| Feature | Bee-Queue | glide-mq |
+|---------|-----------|----------|
+| Queue + Worker | Single `Queue` class | Separate `Queue` (producer) and `Worker` (consumer) |
+| Job creation | `queue.createJob(data).save()` (chained) | `queue.add(name, data, opts)` (single call) |
+| Job name | Not used - no name parameter | **Required** first argument to `queue.add()` |
+| Job options | Chained: `.timeout(ms).retries(n)` | Options object: `{ attempts, backoff, delay }` |
+| Retries | `.retries(n)` | `{ attempts: n }` (different name!) |
+| Processing | `queue.process(concurrency, handler)` | `new Worker(name, handler, { concurrency })` |
+| Connection | `{ host, port }` or redis URL | `{ addresses: [{ host, port }] }` |
+| Progress | `job.reportProgress(anyJSON)` | `job.updateProgress(number \| object)` (number 0-100 or object) |
+| Per-job events | `job.on('succeeded', ...)` | `QueueEvents` class (centralized) |
+| Stall detection | Manual `checkStalledJobs()` | Automatic on Worker |
+| Batch save | `queue.saveAll(jobs)` | `queue.addBulk(jobs)` |
+| Producer-only | `{ isWorker: false }` | `Producer` class or just `Queue` |
 
-```bash
-npm uninstall bee-queue @types/bee-queue
-npm install glide-mq
-```
+## Queue Settings Mapping
+
+| Bee-Queue Setting | Default | glide-mq Equivalent | Notes |
+|-------------------|---------|---------------------|-------|
+| `redis` | `{}` | `connection: { addresses: [...] }` | Array of `{ host, port }` objects |
+| `isWorker` | `true` | Use `Producer` or `Queue` class | Separate classes replace flag |
+| `getEvents` | `true` | Use `QueueEvents` class | Separate class for event subscription |
+| `sendEvents` | `true` | `events: true` on Worker | Controls lifecycle event emission |
+| `storeJobs` | `true` | Always true | glide-mq always stores jobs |
+| `ensureScripts` | `true` | Automatic | Server Functions loaded automatically |
+| `activateDelayedJobs` | `false` | Automatic | Server-side delayed job activation |
+| `removeOnSuccess` | `false` | `{ removeOnComplete: true }` | Per-job option on `queue.add()` |
+| `removeOnFailure` | `false` | `{ removeOnFail: true }` | Per-job option on `queue.add()` |
+| `stallInterval` | `5000` | `lockDuration` on Worker | Lock-based stall detection |
+| `nearTermWindow` | `20min` | N/A | Valkey-native delayed processing |
+| `delayedDebounce` | `1000` | N/A | Server-side scheduling |
+| `prefix` | `'bq'` | `prefix` on Queue | Default: `'glide'` |
+| `quitCommandClient` | `true` | Automatic | Handled by graceful shutdown |
+| `redisScanCount` | `100` | N/A | Different key strategy |
+
+## Queue Method Mapping
+
+| Bee-Queue Method | glide-mq Equivalent | Notes |
+|------------------|---------------------|-------|
+| `queue.createJob(data)` | `queue.add(name, data, opts)` | Name is required; returns Job not builder |
+| `queue.process(n, handler)` | `new Worker(name, handler, { concurrency: n })` | Separate class |
+| `queue.checkStalledJobs(interval)` | Automatic on Worker | No manual call needed |
+| `queue.checkHealth()` | `queue.getJobCounts()` | Returns `{ waiting, active, completed, failed, delayed }` |
+| `queue.close()` | `gracefulShutdown([...])` | Or individual `.close()` calls |
+| `queue.ready()` | `worker.waitUntilReady()` | On Worker, not Queue |
+| `queue.isRunning()` | `worker.isRunning()` | On Worker |
+| `queue.getJob(id)` | `queue.getJob(id)` | Same API |
+| `queue.getJobs(type, page)` | `queue.getJobs(type, start, end)` | Range-based pagination |
+| `queue.removeJob(id)` | `(await queue.getJob(id)).remove()` | Via Job instance |
+| `queue.saveAll(jobs)` | `queue.addBulk(jobs)` | Different input format |
+| `queue.destroy()` | `queue.obliterate()` | Removes all queue data |
+
+## Event Mapping
 
-## Connection Conversion
+| Bee-Queue Event | Source | glide-mq Equivalent | Source |
+|-----------------|--------|---------------------|--------|
+| `queue.on('ready')` | Queue | `worker.waitUntilReady()` | Worker |
+| `queue.on('error', err)` | Queue | `worker.on('error', err)` | Worker |
+| `queue.on('succeeded', job, result)` | Queue (local) | `worker.on('completed', job)` | Worker |
+| `queue.on('retrying', job, err)` | Queue (local) | `worker.on('failed', job, err)` | Worker (with retries remaining) |
+| `queue.on('failed', job, err)` | Queue (local) | `worker.on('failed', job, err)` | Worker |
+| `queue.on('stalled', jobId)` | Queue | `worker.on('stalled', jobId)` | Worker |
+| `queue.on('job succeeded', id, result)` | Queue (PubSub) | `events.on('completed', { jobId })` | QueueEvents |
+| `queue.on('job failed', id, err)` | Queue (PubSub) | `events.on('failed', { jobId })` | QueueEvents |
+| `queue.on('job retrying', id, err)` | Queue (PubSub) | No direct equivalent | Use `events.on('failed')` + retry check |
+| `queue.on('job progress', id, data)` | Queue (PubSub) | `events.on('progress', { jobId, data })` | QueueEvents |
+| `job.on('succeeded', result)` | Job | `events.on('completed', { jobId })` | QueueEvents (filter by jobId) |
+| `job.on('failed', err)` | Job | `events.on('failed', { jobId })` | QueueEvents (filter by jobId) |
+| `job.on('progress', data)` | Job | `events.on('progress', { jobId })` | QueueEvents (filter by jobId) |
+
+Per-job events (`job.on(...)`) do not exist in glide-mq. Use `QueueEvents` and filter by `jobId`, or use `queue.addAndWait()` for request-reply patterns.
+
+## Step-by-Step Conversion
+
+### 1. Connection
 
-**Bee-Queue:**
 ```typescript
+// BEFORE (Bee-Queue)
+const Queue = require('bee-queue');
 const queue = new Queue('tasks', {
   redis: { host: 'localhost', port: 6379 }
 });
-```
 
-**glide-mq:**
-```typescript
+// AFTER (glide-mq)
 import { Queue, Worker } from 'glide-mq';
 const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
 const queue = new Queue('tasks', { connection });
 ```
 
-## Chained Builder to Options Object
+### 2. Job Creation (Biggest Change)
 
-The biggest migration change. Bee-Queue uses chained methods; glide-mq uses an options object.
+Bee-Queue uses chained builder with no job name. glide-mq uses a single call with a required name.
 
-**Bee-Queue (chained builder):**
 ```typescript
-const job = queue.createJob(data)
-  .timeout(30000)
+// BEFORE (Bee-Queue) - chained builder, no name
+const job = await queue.createJob({ email: 'user@example.com' })
   .retries(3)
   .backoff('exponential', 1000)
   .delayUntil(Date.now() + 60000)
   .setId('unique-123')
   .save();
+
+// AFTER (glide-mq) - options object, name required
+await queue.add('send-email',
+  { email: 'user@example.com' },
+  {
+    attempts: 3,  // NOT "retries" - different name!
+    backoff: { type: 'exponential', delay: 1000 },
+    delay: 60000,
+    jobId: 'unique-123',
+  }
+);
 ```
 
-**glide-mq (options object):**
+### 3. Worker
+
 ```typescript
-await queue.add('task-name', data, {
-  timeout: 30000,                                  // .timeout(ms) -> timeout (job option)
-  attempts: 3,                                    // .retries(n) -> attempts
-  backoff: { type: 'exponential', delay: 1000 },  // .backoff() -> backoff object
-  delay: 60000,                                    // .delayUntil() -> delay (relative ms)
-  jobId: 'unique-123',                             // .setId() -> jobId
+// BEFORE (Bee-Queue)
+queue.process(10, async (job) => {
+  return { processed: true };
 });
+queue.on('succeeded', (job, result) => console.log('Done:', result));
+
+// AFTER (glide-mq) - separate Worker class
+const worker = new Worker('tasks', async (job) => {
+  return { processed: true };
+}, { connection, concurrency: 10 });
+worker.on('completed', (job) => console.log('Done:', job.returnValue));
 ```
 
-**CRITICAL**: Bee-Queue `.retries(n)` maps to glide-mq `attempts` - different name!
+### 4. Batch Save
+
+```typescript
+// BEFORE (Bee-Queue)
+const jobs = items.map(item => queue.createJob(item));
+await queue.saveAll(jobs);
+
+// AFTER (glide-mq) - each entry needs a name
+await queue.addBulk(items.map(item => ({
+  name: 'process',
+  data: item
+})));
+```
+
+### 5. Producer-Only
+
+```typescript
+// BEFORE (Bee-Queue) - disable worker mode
+const queue = new Queue('tasks', {
+  isWorker: false, getEvents: false, sendEvents: false,
+  redis: { host: 'localhost', port: 6379 }
+});
+
+// AFTER (glide-mq) - Producer class
+import { Producer } from 'glide-mq';
+const producer = new Producer('tasks', { connection });
+await producer.add('job-name', data);
+await producer.close();
+```
 
-## Worker Processing
+### 6. Progress Reporting
 
 ```typescript
-// Bee-Queue:  queue.process(10, handler); queue.on('succeeded', ...)
-// glide-mq:   new Worker(name, handler, { connection, concurrency: 10 })
+// BEFORE (Bee-Queue) - arbitrary JSON
+queue.process(async (job) => {
+  job.reportProgress({ percent: 50, message: 'halfway' });
+  return result;
+});
+
+// AFTER (glide-mq) - number (0-100) or object
 const worker = new Worker('tasks', async (job) => {
-  return { processed: true };
-}, { connection, concurrency: 10 });
-worker.on('completed', (job) => console.log('Done:', job.returnValue));
+  await job.updateProgress(50);
+  await job.updateProgress({ page: 3, total: 10 });  // objects also supported
+  await job.log('halfway done');  // structured info goes to job.log()
+  return result;
+}, { connection });
 ```
 
-## Key Differences
-
-| Feature | Bee-Queue | glide-mq | Notes |
-|---------|-----------|----------|-------|
-| Job creation | `createJob(data).save()` | `queue.add(name, data, opts)` | Pattern changed |
-| Options style | Chained methods | Options object | Architectural |
-| Retries | `.retries(n)` | `attempts: n` | **Name changed!** |
-| Timeout | `.timeout(ms)` | `timeout` on job options | Per-job option |
-| Worker setup | `queue.process(n, fn)` | `new Worker(name, fn, { concurrency: n })` | Separate class |
-| Progress | `reportProgress(json)` | `updateProgress(0-100 or object)` | Number or object |
-| Stall detection | Manual `stallInterval` | Auto via Worker `lockDuration` | Simplified |
-| `succeeded` event | `queue.on('succeeded')` | `worker.on('completed')` | Renamed |
-| Producer-only | `{ isWorker: false }` | `new Producer('queue', { connection })` | Dedicated class |
-| Batch save | `queue.saveAll(jobs)` | `queue.addBulk(jobs)` | Renamed |
-| Connection | `{ redis: { host, port } }` | `{ addresses: [{ host, port }] }` | Must convert |
-| Delayed jobs | Not supported | `delay` option (ms) | New |
-| Priority | Not supported | `priority` option (0 = highest) | New |
+### 7. Stall Detection
+
+```typescript
+// BEFORE (Bee-Queue) - manual setup required
+const queue = new Queue('tasks', { stallInterval: 5000 });
+queue.checkStalledJobs(5000);  // must call manually!
+
+// AFTER (glide-mq) - automatic on Worker
+const worker = new Worker('tasks', processor, {
+  connection,
+  lockDuration: 30000,
+  stalledInterval: 30000,
+  maxStalledCount: 2
+});
+// Stall detection runs automatically - no manual call
+```
+
+### 8. Health Check
+
+```typescript
+// BEFORE (Bee-Queue)
+const health = await queue.checkHealth();
+// { waiting, active, succeeded, failed, delayed, newestJob }
+
+// AFTER (glide-mq)
+const counts = await queue.getJobCounts();
+// { waiting, active, completed, failed, delayed }
+```
+
+### 9. Web UI (Arena to Dashboard)
+
+```typescript
+// BEFORE (Bee-Queue) - Arena
+const Arena = require('bull-arena');
+app.use('/', Arena({ Bee: require('bee-queue'), queues: [{ name: 'tasks' }] }));
+
+// AFTER (glide-mq) - Dashboard
+import { createDashboard } from '@glidemq/dashboard';
+app.use('/dashboard', createDashboard([queue]));
+```
+
+## What You Gain
+
+Features Bee-Queue does not have that are available after migration:
+
+| Feature | glide-mq API |
+|---------|-------------|
+| Priority queues | `{ priority: 0 }` (lower = higher, 0 is highest) |
+| FlowProducer | Parent-child job trees and DAG workflows |
+| Broadcast | Fan-out with subscriber groups |
+| Batch processing | Process multiple jobs per worker call |
+| Deduplication | Simple, throttle, and debounce modes |
+| Schedulers | Cron patterns and interval repeatable jobs |
+| Rate limiting | `limiter: { max: 100, duration: 60000 }` on Worker |
+| LIFO mode | Process newest jobs first with `{ lifo: true }` |
+| Dead letter queue | `deadLetterQueue: { name: 'dlq' }` on Queue |
+| Serverless pool | Connection caching for Lambda/Edge |
+| HTTP proxy | Cross-language queue access via REST |
+| OpenTelemetry | Automatic span emission |
+| Testing utilities | `TestQueue`/`TestWorker` without Valkey |
+| Cluster support | Hash-tagged keys, AZ-affinity routing |
+| TLS / IAM auth | `useTLS: true`, IAM credentials for ElastiCache |
+| Native TypeScript | Full generic type support throughout |
+| **AI usage tracking** | `job.reportUsage({ model, tokens, costs, ... })` |
+| **Token streaming** | `job.stream()` / `queue.readStream()` for real-time LLM output |
+| **Suspend/resume** | `job.suspend()` / `queue.signal()` for human-in-the-loop |
+| **Flow budget** | `flow.add(tree, { budget: { maxTotalTokens } })` |
+| **Fallback chains** | `opts.fallbacks: [{ model, provider }]` |
+| **Dual-axis rate limiting** | `tokenLimiter` for RPM + TPM compliance |
+| **Vector search** | `queue.createJobIndex()` / `queue.vectorSearch()` |
 
 ## Migration Checklist
 
-- [ ] Replace `bee-queue` with `glide-mq` in package.json
-- [ ] Convert `{ redis: { host, port } }` to `{ addresses: [{ host, port }] }`
-- [ ] Split queue instances into Queue (producer) and Worker (consumer)
-- [ ] Convert `.createJob().save()` chains to `queue.add(name, data, opts)`
-- [ ] Rename `.retries(n)` to `attempts: n` in all job options
-- [ ] Rename `'succeeded'` events to `'completed'`
-- [ ] Replace `queue.process()` with `new Worker()` constructor
+```
+- [ ] Install glide-mq, uninstall bee-queue and @types/bee-queue
+- [ ] Create connection config (addresses array format)
+- [ ] Convert queue.createJob().save() to queue.add(name, data, opts)
+- [ ] Add job names to every queue.add() call (Bee-Queue had none)
+- [ ] Convert .retries(n) to { attempts: n } (different name!)
+- [ ] Convert .backoff(strategy, delay) to { backoff: { type, delay } }
+- [ ] Convert .delayUntil(date) to { delay: ms }
+- [ ] Convert .setId(id) to { jobId: id }
+- [ ] Convert queue.process() to new Worker()
+- [ ] Convert queue.saveAll() to queue.addBulk()
+- [ ] Separate producer queues (isWorker:false to Producer class)
+- [ ] Convert job.reportProgress(json) to job.updateProgress(number | object)
+- [ ] Remove manual checkStalledJobs() calls (automatic on Worker)
+- [ ] Convert checkHealth() to getJobCounts()
+- [ ] Update event listeners (queue.on to worker.on or QueueEvents)
+- [ ] Convert per-job events (job.on) to QueueEvents
+- [ ] Keep the project's existing module system (CommonJS or ESM)
 - [ ] Run full test suite
+- [ ] Confirm queue counts: await queue.getJobCounts()
+- [ ] Confirm no jobs stuck in active state
+- [ ] Smoke-test QueueEvents or SSE listeners if the app exposes them
+- [ ] Confirm workers, queues, and connections close cleanly
+```
+
+## Troubleshooting
+
+| Error | Cause | Fix |
+|-------|-------|-----|
+| `queue.createJob is not a function` | API changed | Use `queue.add(name, data, opts)` |
+| `queue.process is not a function` | Separated producer/consumer | Use `new Worker(name, handler, opts)` |
+| `Cannot use require()` | Module system mismatch | Keep the project's existing module system; glide-mq supports CommonJS and ESM |
+| `job.reportProgress is not a function` | API renamed | Use `job.updateProgress(number)` |
+| `Cannot find module 'bee-queue'` | Leftover import | `grep -r "bee-queue" src/` to find remaining |
+| `Missing job name` | Bee-Queue had no name | Add a name as first arg to `queue.add()` |
+| `retries option not recognized` | Different name | Use `attempts` not `retries` |
+| No stall detection | Bee-Queue needed manual start | glide-mq runs it automatically on Worker |
+| Progress type changed | Bee-Queue accepted any JSON | Use `job.updateProgress(number \| object)` - numbers (0-100) or objects supported |
+| Per-job events not working | No per-job events in glide-mq | Use `QueueEvents` class and filter by `jobId` |
+
+## Quick Start Commands
+
+```bash
+npm uninstall bee-queue @types/bee-queue
+npm install glide-mq
+```
 
-## Deep Dive
+## References
 
-For the complete migration guide with batch operation details and edge cases:
-- Full migration guide: `node_modules/glide-mq/skills/`
-- Online guide: https://avifenesh.github.io/glide-mq.dev/migration/from-bee-queue
-- Repository: https://github.com/avifenesh/glide-mq
+| Document | Content |
+|----------|---------|
+| [references/api-mapping.md](references/api-mapping.md) | Complete method-by-method API mapping |
+| [references/new-features.md](references/new-features.md) | Features available after migration |
diff --git a/skills/glide-mq-migrate-bee/references/api-mapping.md b/skills/glide-mq-migrate-bee/references/api-mapping.md
new file mode 100644
index 0000000..9881599
--- /dev/null
+++ b/skills/glide-mq-migrate-bee/references/api-mapping.md
@@ -0,0 +1,471 @@
+# Bee-Queue to glide-mq - Complete API Mapping
+
+Method-by-method reference for converting every Bee-Queue API call to its glide-mq equivalent.
+
+## Constructor
+
+```typescript
+// BEFORE
+const Queue = require('bee-queue');
+const queue = new Queue('tasks', {
+  redis: { host: 'localhost', port: 6379 },
+  prefix: 'bq',
+  isWorker: true,
+  getEvents: true,
+  sendEvents: true,
+  storeJobs: true,
+  removeOnSuccess: false,
+  removeOnFailure: false,
+  stallInterval: 5000,
+  activateDelayedJobs: true,
+});
+
+// AFTER - split into Queue + Worker
+import { Queue, Worker, QueueEvents } from 'glide-mq';
+const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
+
+const queue = new Queue('tasks', {
+  connection,
+  prefix: 'glide',
+});
+
+const worker = new Worker('tasks', processor, {
+  connection,
+  lockDuration: 30000,
+  stalledInterval: 30000,
+});
+
+const events = new QueueEvents('tasks', { connection });
+```
+
+## Job Creation Methods
+
+### createJob + save -> add
+
+```typescript
+// BEFORE - chained builder (no job name)
+const job = await queue.createJob({ x: 1 }).save();
+console.log(job.id);
+
+// AFTER - single call (name required)
+const job = await queue.add('compute', { x: 1 });
+console.log(job.id);
+```
+
+### setId -> jobId option
+
+```typescript
+// BEFORE
+queue.createJob(data).setId('unique-key').save();
+
+// AFTER
+await queue.add('task', data, { jobId: 'unique-key' });
+```
+
+### retries -> attempts
+
+**Name change: `retries` becomes `attempts`.**
+
+```typescript
+// BEFORE
+queue.createJob(data).retries(3).save();
+
+// AFTER
+await queue.add('task', data, { attempts: 3 });
+```
+
+### backoff -> backoff option
+
+```typescript
+// BEFORE - immediate (default)
+queue.createJob(data).retries(3).backoff('immediate').save();
+
+// AFTER
+await queue.add('task', data, {
+  attempts: 3,
+  backoff: { type: 'fixed', delay: 0 },
+});
+
+// BEFORE - fixed
+queue.createJob(data).retries(3).backoff('fixed', 1000).save();
+
+// AFTER
+await queue.add('task', data, {
+  attempts: 3,
+  backoff: { type: 'fixed', delay: 1000 },
+});
+
+// BEFORE - exponential
+queue.createJob(data).retries(3).backoff('exponential', 1000).save();
+
+// AFTER
+await queue.add('task', data, {
+  attempts: 3,
+  backoff: { type: 'exponential', delay: 1000 },
+});
+```
+
+### delayUntil -> delay
+
+```typescript
+// BEFORE - absolute timestamp
+queue.createJob(data).delayUntil(Date.now() + 60000).save();
+
+// AFTER - relative milliseconds
+await queue.add('task', data, { delay: 60000 });
+```
+
+### timeout -> timeout job option
+
+```typescript
+// BEFORE - per-job timeout
+queue.createJob(data).timeout(30000).save();
+
+// AFTER - per-job timeout option
+await queue.add('task', data, { timeout: 30000 });
+```
+
+### Full chained builder conversion
+
+```typescript
+// BEFORE - all options chained
+const job = await queue.createJob({ email: 'user@example.com' })
+  .setId('email-123')
+  .retries(3)
+  .backoff('exponential', 1000)
+  .delayUntil(Date.now() + 60000)
+  .timeout(30000)
+  .save();
+
+// AFTER - single options object
+const job = await queue.add('send-email',
+  { email: 'user@example.com' },
+  {
+    jobId: 'email-123',
+    attempts: 3,
+    backoff: { type: 'exponential', delay: 1000 },
+    delay: 60000,
+    timeout: 30000,
+  }
+);
+```
+
+## Processing Methods
+
+### process -> Worker
+
+```typescript
+// BEFORE - promise-based
+queue.process(async (job) => {
+  return { result: job.data.x * 2 };
+});
+
+// AFTER
+const worker = new Worker('tasks', async (job) => {
+  return { result: job.data.x * 2 };
+}, { connection });
+
+// BEFORE - with concurrency
+queue.process(10, async (job) => {
+  return await processJob(job);
+});
+
+// AFTER
+const worker = new Worker('tasks', async (job) => {
+  return await processJob(job);
+}, { connection, concurrency: 10 });
+
+// BEFORE - callback-based (deprecated pattern)
+queue.process(function(job, done) {
+  done(null, { result: job.data.x * 2 });
+});
+
+// AFTER - always promise-based
+const worker = new Worker('tasks', async (job) => {
+  return { result: job.data.x * 2 };
+}, { connection });
+```
+
+### reportProgress -> updateProgress
+
+```typescript
+// BEFORE - any JSON value
+queue.process(async (job) => {
+  job.reportProgress({ page: 3, total: 10 });
+  job.reportProgress(50);
+  job.reportProgress('halfway');
+  return result;
+});
+
+// AFTER - number (0-100) or object, use job.log() for text messages
+const worker = new Worker('tasks', async (job) => {
+  await job.updateProgress(30);
+  await job.updateProgress({ page: 3, total: 10 });  // objects also supported
+  await job.log('Processing page 3 of 10');
+  await job.updateProgress(50);
+  return result;
+}, { connection });
+```
+
+## Bulk Operations
+
+### saveAll -> addBulk
+
+```typescript
+// BEFORE
+const jobs = [
+  queue.createJob({ x: 1 }),
+  queue.createJob({ x: 2 }),
+  queue.createJob({ x: 3 }),
+];
+const errors = await queue.saveAll(jobs);
+// errors is Map<Job, Error>
+
+// AFTER
+const results = await queue.addBulk([
+  { name: 'compute', data: { x: 1 } },
+  { name: 'compute', data: { x: 2 } },
+  { name: 'compute', data: { x: 3 } },
+]);
+```
+
+## Query Methods
+
+### getJob
+
+```typescript
+// BEFORE
+const job = await queue.getJob('42');
+
+// AFTER - same API
+const job = await queue.getJob('42');
+```
+
+### getJobs
+
+```typescript
+// BEFORE - type + page object
+const waiting = await queue.getJobs('waiting', { start: 0, end: 25 });
+const failed = await queue.getJobs('failed', { size: 100 });
+
+// AFTER - type + start + end
+const waiting = await queue.getJobs('waiting', 0, 25);
+const failed = await queue.getJobs('failed', 0, 100);
+```
+
+### removeJob
+
+```typescript
+// BEFORE - by ID on queue
+await queue.removeJob('42');
+
+// AFTER - via Job instance
+const job = await queue.getJob('42');
+await job.remove();
+```
+
+### checkHealth -> getJobCounts
+
+```typescript
+// BEFORE
+const health = await queue.checkHealth();
+// { waiting: 5, active: 2, succeeded: 100, failed: 3, delayed: 1, newestJob: '108' }
+
+// AFTER
+const counts = await queue.getJobCounts();
+// { waiting: 5, active: 2, completed: 100, failed: 3, delayed: 1 }
+// Note: "succeeded" renamed to "completed", no "newestJob"
+```
+
+## Lifecycle Methods
+
+### close
+
+```typescript
+// BEFORE
+await queue.close(30000);
+
+// AFTER - close individual components
+await worker.close();
+await queue.close();
+await events.close();
+
+// OR - graceful shutdown (registers SIGTERM/SIGINT, blocks until signal)
+import { gracefulShutdown } from 'glide-mq';
+const handle = gracefulShutdown([worker, queue, events]);
+// For programmatic shutdown: await handle.shutdown();
+```
+
+### destroy -> obliterate
+
+```typescript
+// BEFORE
+await queue.destroy();
+
+// AFTER
+await queue.obliterate();
+```
+
+### ready
+
+```typescript
+// BEFORE
+await queue.ready();
+
+// AFTER
+await worker.waitUntilReady();
+```
+
+### isRunning
+
+```typescript
+// BEFORE
+queue.isRunning();
+
+// AFTER
+worker.isRunning();
+```
+
+## Stall Detection
+
+```typescript
+// BEFORE - manual setup, repeated call required
+queue.checkStalledJobs(5000, (err, numStalled) => {
+  console.log('Stalled:', numStalled);
+});
+
+// AFTER - automatic, configured on Worker
+const worker = new Worker('tasks', processor, {
+  connection,
+  lockDuration: 30000,     // how long a job can run before considered stalled
+  stalledInterval: 30000,  // how often to check for stalled jobs
+  maxStalledCount: 2,      // re-queue up to 2 times before failing
+});
+
+worker.on('stalled', (jobId) => {
+  console.log('Stalled:', jobId);
+});
+```
+
+## Event Migration
+
+### Local events (Queue -> Worker)
+
+```typescript
+// BEFORE
+queue.on('succeeded', (job, result) => {});
+queue.on('failed', (job, err) => {});
+queue.on('retrying', (job, err) => {});
+queue.on('stalled', (jobId) => {});
+queue.on('error', (err) => {});
+
+// AFTER
+worker.on('completed', (job, result) => {});
+worker.on('failed', (job, err) => {});
+// No separate 'retrying' event - failed fires for all failures
+worker.on('stalled', (jobId) => {});
+worker.on('error', (err) => {});
+```
+
+### PubSub events (Queue -> QueueEvents)
+
+```typescript
+// BEFORE
+queue.on('job succeeded', (jobId, result) => {});
+queue.on('job failed', (jobId, err) => {});
+queue.on('job progress', (jobId, data) => {});
+
+// AFTER
+const events = new QueueEvents('tasks', { connection });
+events.on('completed', ({ jobId, returnvalue }) => {});
+events.on('failed', ({ jobId, failedReason }) => {});
+events.on('progress', ({ jobId, data }) => {});
+```
+
+### Per-job events (Job -> QueueEvents)
+
+```typescript
+// BEFORE
+const job = await queue.createJob(data).save();
+job.on('succeeded', (result) => console.log('Done:', result));
+job.on('failed', (err) => console.error('Failed:', err));
+job.on('progress', (p) => console.log('Progress:', p));
+
+// AFTER - filter by jobId in QueueEvents
+const job = await queue.add('task', data);
+const events = new QueueEvents('tasks', { connection });
+events.on('completed', ({ jobId, returnvalue }) => {
+  if (jobId === job.id) console.log('Done:', returnvalue);
+});
+
+// OR - use addAndWait for request-reply
+const result = await queue.addAndWait('task', data, { waitTimeout: 30000 });
+```
+
+## Custom Backoff Strategies
+
+```typescript
+// BEFORE
+queue.backoffStrategies.set('linear', (job) => {
+  return job.options.backoff.delay * (job.options.retries + 1);
+});
+queue.createJob(data).retries(5).backoff('linear', 1000).save();
+
+// AFTER
+const worker = new Worker('tasks', processor, {
+  connection,
+  backoffStrategies: {
+    linear: (attemptsMade) => attemptsMade * 1000,
+  },
+});
+await queue.add('task', data, {
+  attempts: 5,
+  backoff: { type: 'linear', delay: 1000 },
+});
+```
+
+## Connection Formats
+
+```typescript
+// BEFORE - object
+new Queue('tasks', { redis: { host: 'redis.example.com', port: 6380 } });
+
+// BEFORE - URL string
+new Queue('tasks', { redis: 'redis://user:pass@host:6379/0' });
+
+// BEFORE - existing ioredis client
+const Redis = require('ioredis');
+new Queue('tasks', { redis: new Redis() });
+
+// AFTER - always addresses array
+const connection = { addresses: [{ host: 'redis.example.com', port: 6380 }] };
+
+// AFTER - with TLS
+const connection = { addresses: [{ host: 'redis.example.com', port: 6380 }], useTLS: true };
+
+// AFTER - cluster mode
+const connection = {
+  addresses: [
+    { host: 'node1', port: 7000 },
+    { host: 'node2', port: 7001 },
+  ],
+  clusterMode: true,
+};
+```
+
+## Graceful Shutdown
+
+```typescript
+// BEFORE
+async function shutdown() {
+  await queue.close(30000);
+  process.exit(0);
+}
+process.on('SIGTERM', shutdown);
+process.on('SIGINT', shutdown);
+
+// AFTER - gracefulShutdown registers SIGTERM/SIGINT automatically
+import { gracefulShutdown } from 'glide-mq';
+const handle = gracefulShutdown([worker, queue, events]);
+// Blocks until signal fires. For programmatic: await handle.shutdown()
+```
diff --git a/skills/glide-mq-migrate-bee/references/new-features.md b/skills/glide-mq-migrate-bee/references/new-features.md
new file mode 100644
index 0000000..800235f
--- /dev/null
+++ b/skills/glide-mq-migrate-bee/references/new-features.md
@@ -0,0 +1,450 @@
+# New Features Available After Migration
+
+Everything Bee-Queue cannot do that glide-mq provides out of the box.
+
+## Priority Queues
+
+Bee-Queue has no priority support. glide-mq uses numeric priority where lower = higher priority (0 is the highest, default).
+
+```typescript
+// High priority (processed first)
+await queue.add('urgent-alert', data, { priority: 0 });
+
+// Normal priority
+await queue.add('report', data, { priority: 5 });
+
+// Low priority (processed last)
+await queue.add('cleanup', data, { priority: 20 });
+```
+
+Processing order: priority > LIFO > FIFO.
+
+## Job Workflows (FlowProducer)
+
+Parent-child job trees and DAG workflows. The parent waits for all children to complete.
+
+```typescript
+import { FlowProducer } from 'glide-mq';
+
+const flow = new FlowProducer({ connection });
+await flow.add({
+  name: 'assemble-report',
+  queueName: 'reports',
+  data: { reportId: 42 },
+  children: [
+    { name: 'fetch-users', queueName: 'data', data: { source: 'users' } },
+    { name: 'fetch-orders', queueName: 'data', data: { source: 'orders' } },
+    { name: 'fetch-metrics', queueName: 'data', data: { source: 'metrics' } },
+  ],
+});
+```
+
+## Broadcast (Fan-Out)
+
+Bee-Queue is point-to-point only. glide-mq supports fan-out where every subscriber receives every message.
+
+```typescript
+import { Broadcast, BroadcastWorker } from 'glide-mq';
+
+const broadcast = new Broadcast('events', { connection, maxMessages: 1000 });
+
+// Every subscriber gets the message
+const inventory = new BroadcastWorker('events', async (job) => {
+  await updateInventory(job.data);
+}, { connection, subscription: 'inventory-service' });
+
+const email = new BroadcastWorker('events', async (job) => {
+  await sendNotification(job.data);
+}, { connection, subscription: 'email-service' });
+
+await broadcast.publish('orders', { event: 'order.placed', orderId: 42 });
+```
+
+## Batch Processing
+
+Process multiple jobs in a single handler call for I/O-bound operations.
+
+```typescript
+import { Worker, BatchError } from 'glide-mq';
+
+const worker = new Worker('bulk-insert', async (jobs) => {
+  // jobs is Job[] when batch is enabled
+  const results = await db.insertMany(jobs.map(j => j.data));
+  return results; // must return R[] with length === jobs.length
+}, {
+  connection,
+  batch: { size: 50, timeout: 1000 },
+});
+```
+
+## Deduplication
+
+Prevent duplicate job processing with three modes.
+
+```typescript
+// Simple - reject if job with same deduplication ID exists
+await queue.add('task', data, {
+  deduplication: { id: 'unique-key' },
+});
+
+// Throttle - reject duplicates within a time window
+await queue.add('task', data, {
+  deduplication: { id: 'user-123', ttl: 60000 },
+});
+```
+
+## Schedulers (Cron and Interval)
+
+Bee-Queue has no repeatable jobs. glide-mq supports cron patterns and fixed intervals.
+
+```typescript
+// Cron - run every day at midnight
+await queue.upsertJobScheduler(
+  'daily-report',
+  { pattern: '0 0 * * *' },
+  { name: 'daily-report', data: {} },
+);
+
+// Interval - run every 5 minutes
+await queue.upsertJobScheduler(
+  'health-check',
+  { every: 300000 },
+  { name: 'health-check', data: {} },
+);
+```
+
+## Rate Limiting
+
+Global and per-group rate limits on workers.
+
+```typescript
+const worker = new Worker('api-calls', processor, {
+  connection,
+  limiter: {
+    max: 100,       // max 100 jobs
+    duration: 60000, // per minute
+  },
+});
+```
+
+## Dead Letter Queue
+
+Route permanently-failed jobs to a separate queue for inspection.
+
+```typescript
+const worker = new Worker('tasks', processor, {
+  connection,
+  deadLetterQueue: { name: 'failed-jobs' },
+});
+```
+
+## LIFO Mode
+
+Process newest jobs first instead of FIFO.
+
+```typescript
+await queue.add('urgent-report', data, { lifo: true });
+```
+
+## Job TTL
+
+Automatically fail jobs that are not processed within a time window.
+
+```typescript
+await queue.add('time-sensitive', data, { ttl: 300000 }); // 5 min expiry
+```
+
+## Per-Key Ordering
+
+Process jobs sequentially per ordering key while maintaining parallelism across keys.
+
+```typescript
+await queue.add('process-order', data, { ordering: { key: 'customer-123' } });
+await queue.add('process-order', data, { ordering: { key: 'customer-456' } });
+// Jobs for customer-123 run sequentially; customer-456 runs in parallel
+```
+
+## Request-Reply
+
+Wait for a worker result in the producer without polling.
+
+```typescript
+const result = await queue.addAndWait('inference', { prompt: 'Hello' }, {
+  waitTimeout: 30000,
+});
+console.log(result); // processor return value
+```
+
+## Step Jobs (Pause and Resume)
+
+Pause a job and resume it later without completing.
+
+```typescript
+const worker = new Worker('drip-campaign', async (job) => {
+  if (job.data.step === 'send') {
+    await sendEmail(job.data);
+    return job.moveToDelayed(Date.now() + 86400000, 'check');
+  }
+  if (job.data.step === 'check') {
+    return await checkOpened(job.data) ? 'done' : job.moveToDelayed(Date.now() + 3600000, 'followup');
+  }
+  await sendFollowUp(job.data);
+  return 'done';
+}, { connection });
+```
+
+## UnrecoverableError
+
+Skip all retries and fail permanently.
+
+```typescript
+import { UnrecoverableError } from 'glide-mq';
+
+const worker = new Worker('tasks', async (job) => {
+  if (!job.data.requiredField) {
+    throw new UnrecoverableError('missing required field');
+  }
+  return processJob(job);
+}, { connection });
+```
+
+## Serverless Producer
+
+Lightweight producer with no EventEmitter overhead for Lambda/Edge.
+
+```typescript
+import { Producer } from 'glide-mq';
+
+export async function handler(event) {
+  const producer = new Producer('queue', { connection });
+  await producer.add('process', event.body);
+  await producer.close();
+  return { statusCode: 200 };
+}
+```
+
+## Testing Without Valkey
+
+In-memory queue and worker for unit tests.
+
+```typescript
+import { TestQueue, TestWorker } from 'glide-mq/testing';
+
+const queue = new TestQueue('tasks');
+await queue.add('test-job', { key: 'value' });
+const worker = new TestWorker(queue, async (job) => {
+  return { processed: true };
+});
+await worker.run();
+```
+
+## Cluster Support
+
+Native Valkey/Redis Cluster with hash-tagged keys.
+
+```typescript
+const connection = {
+  addresses: [
+    { host: 'node1', port: 7000 },
+    { host: 'node2', port: 7001 },
+  ],
+  clusterMode: true,
+  readFrom: 'AZAffinity',
+  clientAz: 'us-east-1a',
+};
+```
+
+## TLS and IAM Authentication
+
+```typescript
+// TLS
+const connection = {
+  addresses: [{ host: 'redis.example.com', port: 6380 }],
+  useTLS: true,
+};
+
+// AWS IAM
+const connection = {
+  addresses: [{ host: 'cluster.cache.amazonaws.com', port: 6379 }],
+  clusterMode: true,
+  credentials: {
+    type: 'iam',
+    serviceType: 'elasticache',
+    region: 'us-east-1',
+    userId: 'my-iam-user',
+    clusterName: 'my-cluster',
+  },
+};
+```
+
+## QueueEvents (Real-Time Stream)
+
+Centralized job lifecycle events via Valkey Streams - replaces Bee-Queue's PubSub model.
+
+```typescript
+import { QueueEvents } from 'glide-mq';
+
+const events = new QueueEvents('tasks', { connection });
+events.on('added', ({ jobId }) => console.log('added', jobId));
+events.on('completed', ({ jobId, returnvalue }) => console.log('done', jobId));
+events.on('failed', ({ jobId, failedReason }) => console.log('failed', jobId));
+events.on('progress', ({ jobId, data }) => console.log('progress', jobId, data));
+events.on('stalled', ({ jobId }) => console.log('stalled', jobId));
+```
+
+## Time-Series Metrics
+
+Per-minute throughput and latency data with zero extra round trips.
+
+```typescript
+const metrics = await queue.getMetrics('completed');
+// { count, data: [{ timestamp, count, avgDuration }], meta: { resolution: 'minute' } }
+```
+
+## Queue Management
+
+```typescript
+// Pause/resume all workers
+await queue.pause();
+await queue.resume();
+
+// Drain waiting jobs
+await queue.drain();
+
+// Clean old completed/failed jobs
+await queue.clean(3600000, 1000, 'completed'); // older than 1 hour
+
+// Obliterate all queue data
+await queue.obliterate({ force: true });
+```
+
+## Dashboard
+
+Web UI for monitoring and managing queues.
+
+```typescript
+import { createDashboard } from '@glidemq/dashboard';
+import express from 'express';
+
+const app = express();
+app.use('/dashboard', createDashboard([queue]));
+```
+
+## Framework Integrations
+
+Native integrations for Hono, Fastify, NestJS, and Hapi.
+
+## OpenTelemetry
+
+Automatic span emission for distributed tracing.
+
+## Pluggable Serializers
+
+Custom serialization for job data (e.g., MessagePack, Protocol Buffers).
+
+```typescript
+const queue = new Queue('tasks', { connection, serializer: customSerializer });
+const worker = new Worker('tasks', processor, { connection, serializer: customSerializer });
+```
+
+## AI-Native Primitives
+
+glide-mq is purpose-built for LLM/AI orchestration. None of these exist in Bee-Queue.
+
+### Usage Metadata
+
+Track model, tokens, cost, and latency per job.
+
+```typescript
+await job.reportUsage({
+  model: 'gpt-5.4',
+  provider: 'openai',
+  tokens: { input: 500, output: 200 },
+  costs: { total: 0.003 },
+  costUnit: 'usd',
+  latencyMs: 800,
+});
+```
+
+### Token Streaming
+
+Stream LLM output tokens in real-time via per-job Valkey Streams.
+
+```typescript
+// Worker: emit chunks
+await job.stream({ token: 'Hello' });
+
+// Consumer: read chunks (supports long-polling)
+const entries = await queue.readStream(jobId, { block: 5000 });
+```
+
+### Suspend / Resume (Human-in-the-Loop)
+
+Pause a job for external approval, resume with signals.
+
+```typescript
+await job.suspend({ reason: 'Needs review', timeout: 86_400_000 });
+// Externally:
+await queue.signal(jobId, 'approve', { reviewer: 'alice' });
+```
+
+### Flow Budget
+
+Cap total tokens/cost across all jobs in a workflow flow.
+
+```typescript
+await flow.add(flowTree, {
+  budget: { maxTotalTokens: 50_000, maxTotalCost: 0.50, costUnit: 'usd' },
+});
+```
+
+### Fallback Chains
+
+Ordered model/provider alternatives on retryable failure.
+
+```typescript
+await queue.add('inference', data, {
+  attempts: 4,
+  fallbacks: [
+    { model: 'gpt-5.4', provider: 'openai' },
+    { model: 'claude-sonnet-4-20250514', provider: 'anthropic' },
+    { model: 'llama-3-70b', provider: 'groq' },
+  ],
+});
+```
+
+### Dual-Axis Rate Limiting (RPM + TPM)
+
+Rate-limit by both requests and tokens per minute for LLM API compliance.
+
+```typescript
+const worker = new Worker('inference', processor, {
+  connection,
+  limiter: { max: 60, duration: 60_000 },
+  tokenLimiter: { maxTokens: 100_000, duration: 60_000 },
+});
+```
+
+### Flow Usage Aggregation
+
+Aggregate AI usage across all jobs in a flow.
+
+```typescript
+const usage = await queue.getFlowUsage(parentJobId);
+// { tokens, totalTokens, costs, totalCost, costUnit, jobCount, models }
+```
+
+### Vector Search
+
+KNN similarity search over job hashes via Valkey Search.
+
+```typescript
+await queue.createJobIndex({
+  vectorField: { name: 'embedding', dimensions: 1536 },
+});
+const job = await queue.add('document', { text: 'Hello world' });
+if (job) {
+  await job.storeVector('embedding', queryEmbedding);
+}
+const results = await queue.vectorSearch(queryEmbedding, { k: 10 });
+```
diff --git a/skills/glide-mq-migrate-bullmq/SKILL.md b/skills/glide-mq-migrate-bullmq/SKILL.md
index 4480fd7..487cee8 100644
--- a/skills/glide-mq-migrate-bullmq/SKILL.md
+++ b/skills/glide-mq-migrate-bullmq/SKILL.md
@@ -1,23 +1,38 @@
 ---
 name: glide-mq-migrate-bullmq
-description: "Migrates BullMQ applications to glide-mq. Use when user wants to convert, migrate, replace, or switch from BullMQ to glide-mq, or asks about BullMQ vs glide-mq differences."
-version: 1.0.0
-argument-hint: "[migration scope or question]"
+description: >-
+  Migrates Node.js applications from BullMQ to glide-mq. Covers connection
+  config conversion, API mapping, breaking changes, and new features available
+  after migration. Use when converting BullMQ queues and workers to glide-mq,
+  replacing bullmq with glide-mq, or comparing BullMQ vs glide-mq APIs.
+  Triggers on "bullmq to glide-mq", "replace bullmq with glide-mq",
+  "migrate from bullmq", "switch from bullmq to glide-mq",
+  "convert bullmq to glide-mq", "bullmq migration glide-mq".
+license: Apache-2.0
+metadata:
+  author: glide-mq
+  version: "0.14.0"
+  tags: glide-mq, bullmq, migration, queue, valkey, redis
+  sources: docs/MIGRATION.md
 ---
 
-# glide-mq-migrate-bullmq
+# Migrate from BullMQ to glide-mq
 
-Provides guidance for migrating BullMQ applications to glide-mq - connection conversion, API mapping, and breaking changes.
+The glide-mq API is intentionally similar to BullMQ. Most changes are connection format and imports.
 
-> This is a thin wrapper. For the complete migration guide with advanced patterns, see `node_modules/glide-mq/skills/` or https://avifenesh.github.io/glide-mq.dev/migration/from-bullmq
+## When to Apply
 
-## When to Use
+Use this skill when:
+- Replacing BullMQ with glide-mq in an existing project
+- Converting BullMQ Queue/Worker/FlowProducer code
+- Updating connection configuration from ioredis to valkey-glide format
+- Comparing API differences between BullMQ and glide-mq
 
-Invoke this skill when:
-- User wants to migrate from BullMQ to glide-mq
-- User asks about differences between BullMQ and glide-mq
-- User needs help converting BullMQ connection or job configs
-- User is evaluating BullMQ alternatives
+## Prerequisites
+
+- Node.js 20+
+- Valkey 7.0+ or Redis 7.0+ (both supported)
+- TypeScript 5+ recommended
 
 ## Install
 
@@ -26,99 +41,357 @@ npm remove bullmq
 npm install glide-mq
 ```
 
-Update all imports from `'bullmq'` to `'glide-mq'`.
+```ts
+// Before
+import { Queue, Worker, Job, QueueEvents, FlowProducer } from 'bullmq';
+
+// After
+import { Queue, Worker, Job, QueueEvents, FlowProducer } from 'glide-mq';
+```
+
+---
 
-## Connection Conversion
+## Breaking changes
 
-The most critical change. BullMQ uses flat ioredis format; glide-mq uses an addresses array.
+| Feature | BullMQ | glide-mq |
+|---------|--------|----------|
+| **Connection config** | `{ host, port }` | `{ addresses: [{ host, port }] }` |
+| **TLS** | `tls: {}` | `useTLS: true` |
+| **Password** | `password: 'secret'` | `credentials: { password: 'secret' }` |
+| **Cluster mode** | Implicit / `natMap` | `clusterMode: true` |
+| **`defaultJobOptions`** | On `QueueOptions` | Removed - wrap `queue.add()` with defaults |
+| **`queue.getJobs()`** | Accepts array of types | Single type per call |
+| **`queue.getJobCounts()`** | Variadic type list | Always returns all states |
+| **`settings.backoffStrategy`** | Single function | `backoffStrategies` named map on WorkerOptions |
+| **`worker.on('active')`** | Emits `(job, prev)` | Emits `(job, jobId)` |
+| **`job.waitUntilFinished()`** | `(queueEvents, ttl)` | `(pollIntervalMs, timeoutMs)` - no QueueEvents needed |
+| **Sandboxed processor** | `useWorkerThreads: true` | `sandbox: { useWorkerThreads: true }` |
+| **`QueueScheduler`** | Required in v1, optional in v2+ | Does not exist - promotion runs inside Worker |
+| **`opts.repeat`** | On `queue.add()` | Removed - use `queue.upsertJobScheduler()` |
+| **FlowJob `data`** | Optional | Required |
+| **`retries-exhausted` event** | Separate QueueEvents event | Check `attemptsMade >= opts.attempts` in `'failed'` |
+| **BullMQ Pro `group.id`** | `group: { id }` (Pro license) | `ordering: { key }` (open source) |
+| **Group concurrency** | `group.limit.max` (Pro) | `ordering: { key, concurrency: N }` |
+| **Group rate limit** | `group.limit` (Pro) | `ordering: { key, rateLimit: { max, duration } }` |
 
-**BullMQ:**
-```typescript
+---
+
+## Step-by-step conversion
+
+### 1. Connection config (the biggest change)
+
+```ts
+// BEFORE (BullMQ)
 const connection = { host: 'localhost', port: 6379 };
 ```
 
-**glide-mq:**
-```typescript
+```ts
+// AFTER (glide-mq)
 const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
 ```
 
-**With TLS:**
-```typescript
-const connection = {
-  addresses: [{ host: 'my-cluster.cache.amazonaws.com', port: 6379 }],
-  useTLS: true,
-  credentials: { password: 'secret' },
-  clusterMode: true,
-};
+For TLS + password + cluster, see [references/connection-mapping.md](references/connection-mapping.md).
+
+### 2. Queue.add - identical API
+
+```ts
+// BEFORE
+const queue = new Queue('tasks', { connection });
+await queue.add('send-email', { to: 'user@example.com' });
+```
+
+```ts
+// AFTER - only the connection changes
+const queue = new Queue('tasks', { connection });
+await queue.add('send-email', { to: 'user@example.com' });
+```
+
+### 3. Worker - identical API, different connection
+
+```ts
+// BEFORE
+const worker = new Worker('tasks', async (job) => {
+  await sendEmail(job.data.to);
+}, { connection: { host: 'localhost', port: 6379 }, concurrency: 10 });
+```
+
+```ts
+// AFTER
+const worker = new Worker('tasks', async (job) => {
+  await sendEmail(job.data.to);
+}, { connection: { addresses: [{ host: 'localhost', port: 6379 }] }, concurrency: 10 });
+```
+
+### 4. FlowProducer - identical API
+
+```ts
+// Both - same usage, only connection format differs
+const flow = new FlowProducer({ connection });
+await flow.add({
+  name: 'parent',
+  queueName: 'tasks',
+  data: { step: 'final' },       // NOTE: data is required in glide-mq
+  children: [
+    { name: 'child-1', queueName: 'tasks', data: { step: '1' } },
+    { name: 'child-2', queueName: 'tasks', data: { step: '2' } },
+  ],
+});
+```
+
+### 5. QueueEvents - identical API
+
+```ts
+// Both - same, only connection format differs
+const qe = new QueueEvents('tasks', { connection });
+qe.on('completed', ({ jobId }) => console.log(jobId, 'done'));
+qe.on('failed', ({ jobId, failedReason }) => console.error(jobId, failedReason));
 ```
 
-## Quick Comparison
+Note: some BullMQ events are not yet emitted. See [Current gaps](#current-gaps).
+
+### 6. Graceful shutdown
 
-```typescript
-// BullMQ                                    // glide-mq
-import { Queue, Worker } from 'bullmq';      import { Queue, Worker } from 'glide-mq';
-// connection: { host, port }                // connection: { addresses: [{ host, port }] }
+```ts
+// BullMQ
+await worker.close();
+await queue.close();
 ```
 
-The processor function signature is identical. Most code is a drop-in replacement after fixing the connection and imports.
+```ts
+// glide-mq - identical
+await worker.close();
+await queue.close();
+```
 
-## Key Differences
+### 7. UnrecoverableError - identical
 
-| Feature | BullMQ | glide-mq | Notes |
-|---------|--------|----------|-------|
-| Connection | `{ host, port }` | `{ addresses: [{ host, port }] }` | Must convert |
-| Job scheduling | `opts.repeat` | `queue.upsertJobScheduler()` | API changed |
-| Default job opts | `defaultJobOptions` in Queue | Removed - wrap `add()` | Breaking |
-| Backoff strategy | `settings.backoffStrategy` | `backoffStrategies` map | Breaking |
-| `waitUntilFinished` | `job.waitUntilFinished(qe, ttl)` | `job.waitUntilFinished(pollMs, timeoutMs)` | Signature changed |
-| Per-key ordering | BullMQ Pro only | `opts.ordering.key` | Free in glide-mq |
-| Group concurrency | `group: { id, limit }` | `ordering: { key, concurrency }` | Renamed |
-| Runtime group rate limit | Not available | `job.rateLimitGroup(ms)` / `queue.rateLimitGroup(key, ms)` | New in glide-mq |
-| Dead letter queue | Not native | Built-in `deadLetterQueue` option | New |
-| Compression | Not available | `compression: 'gzip'` | New |
-| Worker `'active'` event | Emits `(job, prev)` | Emits `(job, jobId)` | Breaking |
-| `getJobs()` | Multiple types array | Single type per call | Breaking |
-| Priority | Lower = higher (0 highest) | Same | Compatible |
+```ts
+// Both
+import { UnrecoverableError } from 'glide-mq'; // was 'bullmq'
 
-## Breaking Changes
+throw new UnrecoverableError('permanent failure');
+```
+
+### 8. Scheduling (repeatable jobs)
 
-**`defaultJobOptions` removed** - wrap `add()` instead:
-```typescript
-const DEFAULTS = { attempts: 3, backoff: { type: 'exponential', delay: 1000 } };
-const add = (name, data, opts) => queue.add(name, data, { ...DEFAULTS, ...opts });
+```ts
+// BEFORE - opts.repeat (deprecated in BullMQ v5)
+await queue.add('report', data, {
+  repeat: { pattern: '0 9 * * *', tz: 'America/New_York' },
+});
 ```
 
-**Scheduling** - `opts.repeat` replaced by scheduler API:
-```typescript
-await queue.upsertJobScheduler('report',
+```ts
+// AFTER - upsertJobScheduler
+await queue.upsertJobScheduler(
+  'report',
   { pattern: '0 9 * * *', tz: 'America/New_York' },
-  { name: 'report', data },
+  { name: 'report', data: { v: 1 } },
 );
 ```
 
-**Backoff** - single function replaced by named map:
-```typescript
-new Worker('q', processor, {
+### 9. Custom backoff strategies
+
+```ts
+// BEFORE
+const worker = new Worker('q', processor, {
   connection,
-  backoffStrategies: { jitter: (attempts, err) => 1000 + Math.random() * 1000 },
+  settings: {
+    backoffStrategy: (attemptsMade, type, delay, err) => {
+      if (type === 'jitter') return delay + Math.random() * delay;
+      return delay * attemptsMade;
+    },
+  },
+});
+```
+
+```ts
+// AFTER
+const worker = new Worker('q', processor, {
+  connection,
+  backoffStrategies: {
+    jitter: (attemptsMade, err) => 1000 + Math.random() * 1000,
+    linear: (attemptsMade, err) => 1000 * attemptsMade,
+  },
+});
+```
+
+### 10. defaultJobOptions removal
+
+```ts
+// BEFORE
+const queue = new Queue('tasks', {
+  connection,
+  defaultJobOptions: { attempts: 3, backoff: { type: 'exponential', delay: 1000 } },
+});
+```
+
+```ts
+// AFTER - wrap add() with your defaults
+const DEFAULTS = { attempts: 3, backoff: { type: 'exponential', delay: 1000 } } as const;
+const add = (name: string, data: unknown, opts?: JobOptions) =>
+  queue.add(name, data, { ...DEFAULTS, ...opts });
+```
+
+### 11. getJobs with multiple types
+
+```ts
+// BEFORE
+const jobs = await queue.getJobs(['waiting', 'active'], 0, 99);
+```
+
+```ts
+// AFTER
+const [waiting, active] = await Promise.all([
+  queue.getJobs('waiting', 0, 99),
+  queue.getJobs('active', 0, 99),
+]);
+const jobs = [...waiting, ...active];
+```
+
+### 12. job.waitUntilFinished
+
+```ts
+// BEFORE
+const qe = new QueueEvents('tasks', { connection });
+const result = await job.waitUntilFinished(qe, 30000);
+```
+
+```ts
+// AFTER - no QueueEvents needed
+const result = await job.waitUntilFinished(500, 30000);
+// args: pollIntervalMs (default 500), timeoutMs (default 30000)
+```
+
+### 13. BullMQ Pro groups to ordering keys
+
+```ts
+// BEFORE (BullMQ Pro)
+await queue.add('job', data, {
+  group: { id: 'tenant-123', limit: { max: 2, duration: 0 } },
+});
+```
+
+```ts
+// AFTER (glide-mq, open source)
+await queue.add('job', data, {
+  ordering: { key: 'tenant-123', concurrency: 2 },
 });
 ```
 
-## Migration Checklist
+---
+
+## What's new in glide-mq (not in BullMQ)
+
+| Feature | API | Description |
+|---------|-----|-------------|
+| Per-key ordering | `ordering: { key }` | Sequential execution per key across all workers |
+| Group concurrency | `ordering: { key, concurrency: N }` | Max N parallel jobs per key |
+| Group rate limit | `ordering: { key, rateLimit: { max, duration } }` | Per-key rate limiting |
+| Token bucket | `ordering: { key, tokenBucket }` + `opts.cost` | Weighted rate limiting per key |
+| Global rate limit | `queue.setGlobalRateLimit({ max, duration })` | Queue-wide cap across all workers |
+| Dead letter queue | `deadLetterQueue: { name, maxRetries }` | Native DLQ on QueueOptions |
+| Job revocation | `queue.revoke(jobId)` + `job.abortSignal` | Cancel in-flight jobs cooperatively |
+| Transparent compression | `compression: 'gzip'` on QueueOptions | 98% reduction on 15 KB payloads |
+| AZ-affinity routing | `readFrom: 'AZAffinity'` | Pin reads to local AZ replicas |
+| IAM auth | `credentials: { type: 'iam', ... }` | ElastiCache / MemoryDB native auth |
+| In-memory test mode | `TestQueue`, `TestWorker` from `glide-mq/testing` | No Valkey needed for tests |
+| Broadcast | `BroadcastWorker` | Pub/sub fan-out to all workers |
+| Batch processing | `batch: { size, timeout }` on WorkerOptions | Multiple jobs per processor call |
+| DAG workflows | `FlowProducer.addDAG()`, `dag()` helper | Jobs with multiple parents |
+| Workflow helpers | `chain()`, `group()`, `chord()` | Higher-level orchestration |
+| Step jobs | `job.moveToDelayed(ts, nextStep?)` | Multi-step state machines |
+| addAndWait | `queue.addAndWait(name, data, { waitTimeout })` | Request-reply pattern |
+| Pluggable serializers | `{ serialize, deserialize }` on options | MessagePack, Protobuf, etc. |
+| Job TTL | `opts.ttl` | Auto-expire jobs after N ms |
+| repeatAfterComplete | `upsertJobScheduler('name', { repeatAfterComplete: 5000 })` | No-overlap scheduling (ms delay after completion) |
+| LIFO mode | `lifo: true` | Last-in-first-out processing |
+| Job search | `queue.searchJobs(opts)` | Full-text search over job data |
+| excludeData | `queue.getJobs(type, start, end, { excludeData: true })` | Lightweight listings |
+| `globalConcurrency` | On WorkerOptions | Set queue-wide cap at worker startup |
+| **AI usage tracking** | `job.reportUsage({ model, tokens, costs, ... })` | Per-job LLM usage metadata |
+| **Token streaming** | `job.stream({ token })` / `queue.readStream(jobId)` | Real-time LLM output via per-job streams |
+| **Suspend/resume** | `job.suspend()` / `queue.signal(jobId, name, data)` | Human-in-the-loop approval |
+| **Flow budget** | `flow.add(tree, { budget: { maxTotalTokens } })` | Cap tokens/cost across a flow |
+| **Fallback chains** | `opts.fallbacks: [{ model, provider }]` | Ordered model/provider failover |
+| **Dual-axis rate limiting** | `tokenLimiter: { maxTokens, duration }` | RPM + TPM for LLM API compliance |
+| **Flow usage aggregation** | `queue.getFlowUsage(parentJobId)` | Aggregate tokens/cost across a flow |
+| **Vector search** | `queue.createJobIndex()` / `queue.vectorSearch()` | KNN similarity search over job hashes |
+
+See [references/new-features.md](references/new-features.md) for detailed documentation.
 
+---
+
+## Current gaps
+
+| Missing feature | Workaround |
+|-----------------|------------|
+| QueueEvents `'waiting'`, `'active'`, `'delayed'`, `'drained'`, `'deduplicated'` events | Use worker-level events or poll `getJobCounts()` |
+| `failParentOnFailure` in FlowJob | Implement manually in the worker's `failed` handler |
+
+---
+
+## Performance comparison
+
+AWS ElastiCache Valkey 8.2 (r7g.large), TLS enabled, same-region EC2 client.
+
+| Concurrency | glide-mq | BullMQ | Delta |
+|:-----------:|----------:|--------:|:-----:|
+| c=1 | 2,479 j/s | 2,535 j/s | -2% |
+| c=5 | 10,754 j/s | 9,866 j/s | +9% |
+| c=10 | **18,218 j/s** | 13,541 j/s | **+35%** |
+| c=15 | **19,583 j/s** | 14,162 j/s | **+38%** |
+| c=20 | 19,408 j/s | 16,085 j/s | +21% |
+| c=50 | 19,768 j/s | 19,159 j/s | +3% |
+
+Most production deployments run c=5 to c=20, where glide-mq's 1-RTT architecture pays off the most.
+
+---
+
+## Migration checklist
+
+```
 - [ ] Replace `bullmq` with `glide-mq` in package.json
-- [ ] Update all imports from `'bullmq'` to `'glide-mq'`
-- [ ] Convert connection configs to `{ addresses: [{ host, port }] }`
-- [ ] Replace `opts.repeat` with `upsertJobScheduler()`
-- [ ] Remove `QueueScheduler` instantiation (not needed)
-- [ ] Remove `defaultJobOptions` - use wrapper pattern
-- [ ] Replace `settings.backoffStrategy` with `backoffStrategies` map
-- [ ] Update `waitUntilFinished()` call signatures
-- [ ] Run full test suite
-
-## Deep Dive
-
-For the complete migration guide with advanced patterns, multi-tenant examples, and edge cases:
-- Full migration guide: `node_modules/glide-mq/skills/`
-- Online guide: https://avifenesh.github.io/glide-mq.dev/migration/from-bullmq
-- Repository: https://github.com/avifenesh/glide-mq
+- [ ] Update all imports from 'bullmq' to 'glide-mq'
+- [ ] Convert connection configs: { host, port } -> { addresses: [{ host, port }] }
+- [ ] Convert TLS: tls: {} -> useTLS: true
+- [ ] Convert password: password -> credentials: { password }
+- [ ] Replace opts.repeat with queue.upsertJobScheduler()
+- [ ] Replace settings.backoffStrategy with backoffStrategies map
+- [ ] Remove QueueScheduler instantiation (not needed)
+- [ ] Remove defaultJobOptions from QueueOptions; apply per job or via wrapper
+- [ ] Replace queue.getJobs([...types]) with per-type calls
+- [ ] Update worker.on('active') handlers: (job, jobId) not (job, prev)
+- [ ] Replace job.waitUntilFinished(queueEvents, ttl) with (pollMs, timeoutMs)
+- [ ] Check QueueEvents listeners for removed events (waiting, active, delayed, drained)
+- [ ] Replace group.id (BullMQ Pro) with ordering.key
+- [ ] Run test suite: npm test
+- [ ] Confirm queue counts: await queue.getJobCounts()
+- [ ] Confirm no jobs stuck in active state
+- [ ] Smoke-test QueueEvents or SSE listeners if the app exposes them
+- [ ] Confirm workers, queues, and connections close cleanly
+```
+
+---
+
+## Troubleshooting
+
+| Error | Cause | Fix |
+|-------|-------|-----|
+| `TypeError: connection.host is not defined` | Using BullMQ `{ host, port }` format | Change to `{ addresses: [{ host, port }] }` |
+| `Cannot read properties of undefined (reading 'backoffStrategy')` | Using `settings.backoffStrategy` | Move to `backoffStrategies` map on WorkerOptions |
+| `defaultJobOptions is not a valid option` | glide-mq removed `defaultJobOptions` | Wrap `queue.add()` with a helper that spreads defaults |
+| `getJobs expects a string, got array` | Passing array of types to `getJobs()` | Call `getJobs()` once per type, combine results |
+| `QueueScheduler is not exported` | glide-mq has no QueueScheduler | Remove it - promotion runs inside the Worker |
+| `opts.repeat is not supported` | glide-mq uses upsertJobScheduler | Replace `opts.repeat` with `queue.upsertJobScheduler()` |
+| `waitUntilFinished expects number` | API changed from `(qe, ttl)` to `(pollMs, ttl)` | Pass `(500, 30000)` instead of `(queueEvents, 30000)` |
+| Job stuck in `active` forever | Worker crashed without completing | Stall detection auto-recovers stream jobs. For LIFO/priority, reset: `DEL glide:{queueName}:list-active` |
+| `retries-exhausted` listener never fires | Event renamed | Listen to `'failed'` and check `attemptsMade >= opts.attempts` |
+| `FlowProducer.add` throws on missing data | `data` is required in glide-mq FlowJob | Always pass `data` field (use `{}` if empty) |
+| Duplicate custom jobId returns null | Expected behavior | `queue.add()` returns `null` for duplicate IDs (silent skip) |
+
+## Full Documentation
+
+- [Migration Guide](https://www.glidemq.dev/migration/from-bullmq)
+- [New Features Reference](references/new-features.md)
+- [Connection Mapping Reference](references/connection-mapping.md)
diff --git a/skills/glide-mq-migrate-bullmq/references/connection-mapping.md b/skills/glide-mq-migrate-bullmq/references/connection-mapping.md
new file mode 100644
index 0000000..36a8926
--- /dev/null
+++ b/skills/glide-mq-migrate-bullmq/references/connection-mapping.md
@@ -0,0 +1,206 @@
+# Connection config mapping: BullMQ to glide-mq
+
+BullMQ uses ioredis's flat connection format. glide-mq uses valkey-glide's structured format with an `addresses` array. This is the most common source of migration errors.
+
+---
+
+## Basic (standalone)
+
+```ts
+// BullMQ
+const connection = { host: 'localhost', port: 6379 };
+```
+
+```ts
+// glide-mq
+const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
+```
+
+---
+
+## TLS
+
+```ts
+// BullMQ
+const connection = {
+  host: 'my-server.example.com',
+  port: 6380,
+  tls: {},
+};
+```
+
+```ts
+// glide-mq
+const connection = {
+  addresses: [{ host: 'my-server.example.com', port: 6380 }],
+  useTLS: true,
+};
+```
+
+Note: BullMQ uses an empty `tls: {}` object (or with TLS options). glide-mq uses a boolean `useTLS: true`.
+
+---
+
+## Password authentication
+
+```ts
+// BullMQ
+const connection = {
+  host: 'my-server.example.com',
+  port: 6379,
+  password: 'secret',
+};
+```
+
+```ts
+// glide-mq
+const connection = {
+  addresses: [{ host: 'my-server.example.com', port: 6379 }],
+  credentials: { password: 'secret' },
+};
+```
+
+---
+
+## Username + password (ACL auth)
+
+```ts
+// BullMQ
+const connection = {
+  host: 'my-server.example.com',
+  port: 6379,
+  username: 'myuser',
+  password: 'secret',
+};
+```
+
+```ts
+// glide-mq
+const connection = {
+  addresses: [{ host: 'my-server.example.com', port: 6379 }],
+  credentials: { username: 'myuser', password: 'secret' },
+};
+```
+
+---
+
+## TLS + password + cluster
+
+```ts
+// BullMQ
+const connection = {
+  host: 'my-cluster.cache.amazonaws.com',
+  port: 6379,
+  tls: {},
+  password: 'secret',
+};
+// BullMQ auto-detects cluster mode in some configurations, or you use natMap
+```
+
+```ts
+// glide-mq
+const connection = {
+  addresses: [{ host: 'my-cluster.cache.amazonaws.com', port: 6379 }],
+  useTLS: true,
+  credentials: { password: 'secret' },
+  clusterMode: true,
+};
+```
+
+Key difference: glide-mq requires explicit `clusterMode: true` for Redis Cluster / ElastiCache cluster / MemoryDB.
+
+---
+
+## IAM authentication (AWS ElastiCache / MemoryDB)
+
+BullMQ has no equivalent. This is glide-mq only.
+
+```ts
+// glide-mq only
+const connection = {
+  addresses: [{ host: 'my-cluster.cache.amazonaws.com', port: 6379 }],
+  useTLS: true,
+  clusterMode: true,
+  credentials: {
+    type: 'iam',
+    serviceType: 'elasticache',   // or 'memorydb'
+    region: 'us-east-1',
+    userId: 'my-iam-user',
+    clusterName: 'my-cluster',
+  },
+};
+```
+
+No credential rotation needed - the client handles IAM token refresh automatically.
+
+---
+
+## AZ-affinity routing (cluster only)
+
+BullMQ has no equivalent. Reduces cross-AZ network cost and latency.
+
+```ts
+// glide-mq only
+const connection = {
+  addresses: [{ host: 'cluster.cache.amazonaws.com', port: 6379 }],
+  clusterMode: true,
+  useTLS: true,
+  readFrom: 'AZAffinity',
+  clientAz: 'us-east-1a',
+};
+```
+
+---
+
+## Multiple seed nodes (cluster)
+
+```ts
+// BullMQ - typically one host, or uses natMap for discovery
+const connection = { host: 'node-1.example.com', port: 6379 };
+```
+
+```ts
+// glide-mq - pass multiple seed addresses for cluster discovery
+const connection = {
+  addresses: [
+    { host: 'node-1.example.com', port: 6379 },
+    { host: 'node-2.example.com', port: 6379 },
+    { host: 'node-3.example.com', port: 6379 },
+  ],
+  clusterMode: true,
+};
+```
+
+---
+
+## Option mapping table
+
+| BullMQ (ioredis) | glide-mq (valkey-glide) | Notes |
+|-------------------|-------------------------|-------|
+| `host` | `addresses: [{ host }]` | Wrapped in array of address objects |
+| `port` | `addresses: [{ port }]` | Part of address object |
+| `password` | `credentials: { password }` | Nested under credentials |
+| `username` | `credentials: { username }` | Nested under credentials |
+| `tls: {}` | `useTLS: true` | Boolean instead of object |
+| `db` | Not supported - Valkey GLIDE uses db 0 | Database selection not available |
+| `natMap` | Multiple entries in `addresses` | Cluster topology handled automatically |
+| `maxRetriesPerRequest` | Handled internally | valkey-glide manages reconnection |
+| `enableReadyCheck` | Not needed | valkey-glide handles readiness internally |
+| `lazyConnect` | Not applicable | Connection is managed by the client |
+| - | `clusterMode: true` | Must be explicit for cluster deployments |
+| - | `readFrom: 'AZAffinity'` | glide-mq only |
+| - | `clientAz` | glide-mq only |
+| - | `credentials: { type: 'iam' }` | glide-mq only |
+| - | `requestTimeout` | Command timeout in ms (default: 500). glide-mq only |
+
+---
+
+## Common mistakes
+
+1. **Forgetting the array wrapper**: `{ addresses: { host, port } }` will fail. It must be `{ addresses: [{ host, port }] }` - note the square brackets.
+
+2. **Using `tls: {}` instead of `useTLS: true`**: valkey-glide does not accept a TLS options object. Pass the boolean flag.
+
+3. **Omitting `clusterMode: true`**: Unlike ioredis which can auto-detect cluster mode, valkey-glide requires you to explicitly opt in.
+
+4. **Using `password` at top level**: Must be `credentials: { password }`, not `password` directly.
diff --git a/skills/glide-mq-migrate-bullmq/references/new-features.md b/skills/glide-mq-migrate-bullmq/references/new-features.md
new file mode 100644
index 0000000..2c96a97
--- /dev/null
+++ b/skills/glide-mq-migrate-bullmq/references/new-features.md
@@ -0,0 +1,584 @@
+# glide-mq features not available in BullMQ
+
+These features have no BullMQ equivalent. They are available after migrating to glide-mq.
+
+---
+
+## Per-key ordering
+
+Guarantees sequential execution per key across all workers, regardless of worker concurrency. Jobs with the same `ordering.key` run one at a time in enqueue order. Jobs with different keys run in parallel.
+
+```ts
+await queue.add('sync', data, {
+  ordering: { key: 'tenant-123' },
+});
+```
+
+Replaces BullMQ Pro's `group.id` feature (which requires a Pro license).
+
+### Group concurrency
+
+Allow N parallel jobs per key instead of strict serialization:
+
+```ts
+await queue.add('sync', data, {
+  ordering: { key: 'tenant-123', concurrency: 3 },
+});
+```
+
+Jobs exceeding the limit are automatically parked in a per-group wait list and released when a slot opens.
+
+### Per-group rate limiting
+
+Cap throughput per ordering key:
+
+```ts
+await queue.add('sync', data, {
+  ordering: {
+    key: 'tenant-123',
+    concurrency: 3,
+    rateLimit: { max: 10, duration: 60_000 },
+  },
+});
+```
+
+Rate-limited jobs are promoted by the scheduler loop (latency up to `promotionInterval`, default 5 s).
+
+### Cost-based token bucket
+
+Assign a cost to each job and deduct from a refilling bucket per key:
+
+```ts
+await queue.add('heavy-job', data, {
+  ordering: {
+    key: 'tenant-123',
+    tokenBucket: { capacity: 100, refillRate: 10 },
+  },
+  cost: 25,  // this job consumes 25 tokens
+});
+```
+
+---
+
+## Global rate limiting
+
+Queue-wide rate limit stored in Valkey, dynamically picked up by all workers:
+
+```ts
+await queue.setGlobalRateLimit({ max: 500, duration: 60_000 });
+
+const limit = await queue.getGlobalRateLimit(); // { max, duration } or null
+await queue.removeGlobalRateLimit();
+```
+
+When both global rate limit and `WorkerOptions.limiter` are set, the stricter limit wins.
+
+---
+
+## Dead letter queue
+
+First-class DLQ support configured at the queue level:
+
+```ts
+const queue = new Queue('tasks', {
+  connection,
+  deadLetterQueue: {
+    name: 'tasks-dlq',
+    maxRetries: 3,
+  },
+});
+
+// Retrieve DLQ jobs:
+const dlqQueue = new Queue('tasks-dlq', { connection });
+const dlqJobs = await dlqQueue.getDeadLetterJobs();
+```
+
+BullMQ has no native DLQ - failed jobs stay in the failed state.
+
+---
+
+## Job revocation
+
+Cancel an in-flight job from outside the worker:
+
+```ts
+await queue.revoke(jobId);
+```
+
+The processor must cooperate via `job.abortSignal`:
+
+```ts
+const worker = new Worker('q', async (job) => {
+  for (const chunk of data) {
+    if (job.abortSignal?.aborted) return;
+    await processChunk(chunk);
+  }
+}, { connection });
+```
+
+---
+
+## Transparent compression
+
+Gzip compression of all job payloads, transparent to application code:
+
+```ts
+const queue = new Queue('tasks', {
+  connection,
+  compression: 'gzip',
+});
+// No changes needed in worker or job code
+```
+
+98% payload reduction on 15 KB JSON payloads (15 KB -> 331 bytes).
+
+---
+
+## AZ-affinity routing
+
+Pin worker reads to replicas in your availability zone to reduce cross-AZ network cost:
+
+```ts
+const connection = {
+  addresses: [{ host: 'cluster.cache.amazonaws.com', port: 6379 }],
+  clusterMode: true,
+  readFrom: 'AZAffinity',
+  clientAz: 'us-east-1a',
+};
+```
+
+---
+
+## IAM authentication
+
+Native AWS ElastiCache and MemoryDB IAM auth with automatic token refresh:
+
+```ts
+const connection = {
+  addresses: [{ host: 'my-cluster.cache.amazonaws.com', port: 6379 }],
+  useTLS: true,
+  clusterMode: true,
+  credentials: {
+    type: 'iam',
+    serviceType: 'elasticache',
+    region: 'us-east-1',
+    userId: 'my-iam-user',
+    clusterName: 'my-cluster',
+  },
+};
+```
+
+---
+
+## In-memory test mode
+
+Test queue logic without a running Valkey/Redis instance:
+
+```ts
+import { TestQueue, TestWorker } from 'glide-mq/testing';
+
+const queue = new TestQueue<{ email: string }, { sent: boolean }>('tasks');
+const worker = new TestWorker(queue, async (job) => {
+  return { sent: true };
+});
+
+await queue.add('send-email', { email: 'user@example.com' });
+await new Promise(r => setTimeout(r, 10));
+
+const jobs = await queue.getJobs('completed');
+```
+
+BullMQ has no equivalent. Typically requires `ioredis-mock` or a real Redis instance.
+
+---
+
+## Broadcast / BroadcastWorker
+
+Pub/sub fan-out where every connected `BroadcastWorker` receives every message. Supports per-subscriber retries for reliable delivery:
+
+```ts
+import { Broadcast, BroadcastWorker } from 'glide-mq';
+
+const broadcast = new Broadcast('notifications', { connection });
+const bw = new BroadcastWorker('notifications', async (message) => {
+  console.log('Received:', message);
+}, { connection, subscription: 'my-group' });
+
+await broadcast.publish('alerts', { type: 'alert', text: 'Server restarting' });
+```
+
+---
+
+## Batch processing
+
+Process multiple jobs in a single processor invocation:
+
+```ts
+const worker = new Worker('q', async (jobs) => {
+  // jobs is an array when batch mode is enabled
+  const results = await bulkProcess(jobs.map(j => j.data));
+  return results; // per-job results array
+}, {
+  connection,
+  batch: { size: 50, timeout: 1000 },
+});
+```
+
+---
+
+## DAG workflows
+
+Arbitrary directed acyclic graphs where a job can depend on multiple parents (BullMQ only supports trees - one parent per job):
+
+```ts
+import { FlowProducer, dag } from 'glide-mq';
+
+// Option 1: dag() helper - standalone, creates its own FlowProducer
+const jobs = await dag([
+  { name: 'fetch-a', queueName: 'tasks', data: { source: 'a' } },
+  { name: 'fetch-b', queueName: 'tasks', data: { source: 'b' } },
+  { name: 'aggregate', queueName: 'tasks', data: {}, deps: ['fetch-a', 'fetch-b'] },
+], connection);
+
+// Option 2: FlowProducer.addDAG() - when you manage the FlowProducer
+const flow = new FlowProducer({ connection });
+const jobs2 = await flow.addDAG({
+  nodes: [
+    { name: 'fetch-a', queueName: 'tasks', data: { source: 'a' } },
+    { name: 'fetch-b', queueName: 'tasks', data: { source: 'b' } },
+    { name: 'aggregate', queueName: 'tasks', data: {}, deps: ['fetch-a', 'fetch-b'] },
+  ],
+});
+await flow.close();
+```
+
+---
+
+## Workflow helpers
+
+Higher-level orchestration built on FlowProducer:
+
+```ts
+import { chain, group, chord } from 'glide-mq';
+
+const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
+
+// chain: sequential pipeline
+await chain('tasks', [
+  { name: 'step-1', data: {} },
+  { name: 'step-2', data: {} },
+  { name: 'step-3', data: {} },
+], connection);
+
+// group: parallel fan-out, synthetic parent waits for all
+await group('tasks', [
+  { name: 'shard-1', data: {} },
+  { name: 'shard-2', data: {} },
+], connection);
+
+// chord: group then callback
+await chord('tasks', [
+  { name: 'task-1', data: {} },
+  { name: 'task-2', data: {} },
+], { name: 'aggregate', data: {} }, connection);
+```
+
+---
+
+## Step jobs
+
+Multi-step state machines using `job.moveToDelayed()` with an optional step token:
+
+```ts
+const worker = new Worker('q', async (job) => {
+  const step = job.data.__step ?? 'init';
+
+  switch (step) {
+    case 'init':
+      await doInit(job.data);
+      await job.moveToDelayed(Date.now(), 'process');
+      return;
+    case 'process':
+      await doProcess(job.data);
+      await job.moveToDelayed(Date.now(), 'finalize');
+      return;
+    case 'finalize':
+      return doFinalize(job.data);
+  }
+}, { connection });
+```
+
+BullMQ's `moveToDelayed` has no step parameter.
+
+---
+
+## addAndWait (request-reply)
+
+Synchronous RPC pattern - enqueue a job and wait for its result:
+
+```ts
+const result = await queue.addAndWait('compute', { input: 42 }, {
+  waitTimeout: 30_000,
+});
+console.log(result); // the job's return value
+```
+
+---
+
+## Pluggable serializers
+
+Use MessagePack, Protobuf, or any custom format instead of JSON:
+
+```ts
+import msgpack from 'msgpack-lite';
+
+const queue = new Queue('tasks', {
+  connection,
+  serializer: {
+    serialize: (data) => msgpack.encode(data),
+    deserialize: (buffer) => msgpack.decode(buffer),
+  },
+});
+```
+
+---
+
+## Job TTL
+
+Auto-expire jobs after a given duration:
+
+```ts
+await queue.add('ephemeral', data, {
+  ttl: 60_000,  // job fails if not completed within 60 seconds
+});
+```
+
+---
+
+## repeatAfterComplete
+
+Scheduler mode that enqueues the next job only after the previous one completes, guaranteeing no overlap:
+
+```ts
+await queue.upsertJobScheduler(
+  'sequential-poll',
+  { repeatAfterComplete: 5000 },
+  { name: 'poll', data: {} },
+);
+```
+
+---
+
+## LIFO mode
+
+Last-in-first-out processing - newest jobs are processed first:
+
+```ts
+await queue.add('urgent', data, { lifo: true });
+```
+
+Priority and delayed jobs take precedence over LIFO. Cannot be combined with ordering keys.
+
+Note: LIFO + `globalConcurrency` has a crash limitation. If a worker is killed hard (SIGKILL, OOM) while processing a LIFO job, the `list-active` counter is not decremented. Reset with: `DEL glide:{queueName}:list-active`.
+
+---
+
+## Job search
+
+Search over job data fields:
+
+```ts
+const results = await queue.searchJobs({
+  // search options
+});
+```
+
+---
+
+## excludeData
+
+Lightweight job listings without payload data:
+
+```ts
+const jobs = await queue.getJobs('waiting', 0, 99, { excludeData: true });
+// jobs[0].data is undefined - useful for dashboard listings of large-payload queues
+```
+
+---
+
+## globalConcurrency on WorkerOptions
+
+Set queue-wide concurrency cap at worker startup (shorthand for `queue.setGlobalConcurrency()`):
+
+```ts
+const worker = new Worker('q', processor, {
+  connection,
+  concurrency: 10,
+  globalConcurrency: 50,  // queue-wide cap across all workers
+});
+```
+
+---
+
+## Deduplication modes
+
+Beyond BullMQ's simple deduplication, glide-mq adds explicit modes:
+
+```ts
+await queue.add('job', data, {
+  deduplication: {
+    id: 'my-dedup-key',
+    ttl: 60_000,
+    mode: 'simple',    // drop if exists (default)
+    // mode: 'throttle' - drop duplicates within window
+    // mode: 'debounce' - reset window on each add
+  },
+});
+```
+
+---
+
+## Backoff jitter
+
+Spread retries under load with a jitter field:
+
+```ts
+await queue.add('job', data, {
+  attempts: 5,
+  backoff: { type: 'exponential', delay: 1000, jitter: 0.25 }, // +/- 25% random jitter
+});
+```
+
+---
+
+## AI-Native Primitives
+
+The following features are purpose-built for LLM/AI orchestration pipelines. None of them exist in BullMQ.
+
+### Usage Metadata (job.reportUsage)
+
+Track model, tokens, cost, and latency per job. Persisted to the job hash and emitted as a `'usage'` event.
+
+```ts
+const worker = new Worker('inference', async (job) => {
+  const result = await callLLM(job.data);
+  await job.reportUsage({
+    model: 'gpt-5.4',
+    provider: 'openai',
+    tokens: { input: result.promptTokens, output: result.completionTokens },
+    costs: { total: 0.003 },
+    costUnit: 'usd',
+    latencyMs: 800,
+  });
+  return result.content;
+}, { connection });
+```
+
+### Token Streaming (job.stream / queue.readStream)
+
+Stream LLM output tokens in real-time via per-job Valkey Streams.
+
+```ts
+// Worker side
+const worker = new Worker('chat', async (job) => {
+  for await (const chunk of llmStream) {
+    await job.stream({ token: chunk.text });
+  }
+  return { done: true };
+}, { connection });
+
+// Consumer side
+const entries = await queue.readStream(jobId, { block: 5000 });
+```
+
+### Suspend / Resume (Human-in-the-Loop)
+
+Pause a job to wait for external approval, then resume with signals.
+
+```ts
+// Suspend in processor
+await job.suspend({ reason: 'Needs review', timeout: 86_400_000 });
+
+// Resume externally
+await queue.signal(jobId, 'approve', { reviewer: 'alice' });
+
+// On resume, job.signals contains all received signals
+```
+
+### Budget Middleware (Flow-Level Caps)
+
+Cap total tokens and/or cost across all jobs in a flow.
+
+```ts
+await flow.add(flowTree, {
+  budget: { maxTotalTokens: 50_000, maxTotalCost: 0.50, costUnit: 'usd', onExceeded: 'fail' },
+});
+
+const budget = await queue.getFlowBudget(parentJobId);
+```
+
+### Fallback Chains
+
+Ordered model/provider alternatives tried on retryable failure.
+
+```ts
+await queue.add('inference', { prompt: '...' }, {
+  attempts: 4,
+  fallbacks: [
+    { model: 'gpt-5.4', provider: 'openai' },
+    { model: 'claude-sonnet-4-20250514', provider: 'anthropic' },
+    { model: 'llama-3-70b', provider: 'groq' },
+  ],
+});
+
+// Worker reads job.currentFallback for the active model/provider
+```
+
+### Dual-Axis Rate Limiting (RPM + TPM)
+
+Rate-limit by both requests and tokens per minute for LLM API compliance.
+
+```ts
+const worker = new Worker('inference', processor, {
+  connection,
+  limiter: { max: 60, duration: 60_000 },           // RPM
+  tokenLimiter: { maxTokens: 100_000, duration: 60_000 },  // TPM
+});
+
+// Report tokens in processor
+await job.reportTokens(totalTokens);
+```
+
+### Flow Usage Aggregation
+
+Aggregate AI usage across all jobs in a flow.
+
+```ts
+const usage = await queue.getFlowUsage(parentJobId);
+// { tokens, totalTokens, costs, totalCost, costUnit, jobCount, models }
+```
+
+### Vector Search (Valkey Search)
+
+Create search indexes and run KNN vector similarity queries over job hashes.
+
+```ts
+await queue.createJobIndex({
+  vectorField: { name: 'embedding', dimensions: 1536 },
+});
+
+const job = await queue.add('document', { text: 'Hello world' });
+if (job) {
+  await job.storeVector('embedding', queryEmbedding);
+}
+
+const results = await queue.vectorSearch(queryEmbedding, {
+  k: 10,
+  filter: '@state:{completed}',
+});
+// results: { job, score }[]
+
+await queue.dropJobIndex();
+```
+
+Requires `valkey-search` module on the server (standalone mode).
diff --git a/skills/glide-mq/SKILL.md b/skills/glide-mq/SKILL.md
index 78535e5..56bd094 100644
--- a/skills/glide-mq/SKILL.md
+++ b/skills/glide-mq/SKILL.md
@@ -1,141 +1,220 @@
 ---
 name: glide-mq
-description: "Creates glide-mq message queue implementations. Use for new queue setup, producer/consumer patterns, job scheduling, workflows, batch processing, or any greenfield glide-mq development."
-version: 1.0.0
-argument-hint: "[task description]"
+description: >-
+  Creates message queues, workers, job workflows, and fan-out broadcasts using
+  glide-mq on Valkey/Redis Streams. Provides API reference, code patterns, and
+  configuration for queues, workers, delayed/priority jobs, schedulers, batch
+  processing, DAG workflows, request-reply, serverless producers, and AI-native
+  primitives (usage tracking, token streaming, suspend/resume, budget caps,
+  fallback chains, dual-axis rate limiting, rolling usage summaries, vector search, HTTP proxy/SSE). Triggers on
+  "glide-mq", "glidemq", "job queue valkey", "background tasks valkey",
+  "message queue redis streams", "glide-mq LLM queue",
+  "glide-mq AI orchestration queue", "glide-mq token rate limiting",
+  "glide-mq model fallback", "glide-mq human-in-the-loop queue",
+  "glide-mq vector search", "glide-mq AI pipeline".
+license: Apache-2.0
+metadata:
+  author: glide-mq
+  version: "0.14.0"
+  tags: glide-mq, message-queue, valkey, redis, job-queue, worker, streams, ai-native, llm, vector-search
+  sources: docs/USAGE.md, docs/ADVANCED.md, docs/WORKFLOWS.md, docs/BROADCAST.md, docs/SERVERLESS.md, docs/TESTING.md, docs/OBSERVABILITY.md
 ---
 
 # glide-mq
 
-Provides guidance for greenfield glide-mq message queue development - queues, workers, producers, job scheduling, and workflows.
+High-performance AI-native message queue for Node.js on Valkey/Redis Streams with a Rust NAPI core.
 
-> This is a thin wrapper. For full API reference, advanced patterns, and deep documentation, see `node_modules/glide-mq/skills/` or https://avifenesh.github.io/glide-mq.dev/
+## Quick Start
 
-## When to Use
+```typescript
+import { Queue, Worker } from 'glide-mq';
 
-Invoke this skill when:
-- User is building a new message queue system with glide-mq
-- User needs queue, worker, producer, or job scheduling setup
-- User asks about glide-mq API, patterns, or configuration
-- User wants workflow orchestration (flows, DAGs, chains)
+const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
 
-## Install
+const queue = new Queue('tasks', { connection });
+await queue.add('send-email', { to: 'user@example.com', subject: 'Hello' });
+
+const worker = new Worker('tasks', async (job) => {
+  console.log(`Processing ${job.name}:`, job.data);
+  return { sent: true };
+}, { connection, concurrency: 10 });
 
-```bash
-npm install glide-mq
+worker.on('completed', (job) => console.log(`Done: ${job.id}`));
+worker.on('failed', (job, err) => console.error(`Failed: ${job.id}`, err.message));
 ```
 
-Requires Node.js 20+ and Valkey 7.0+ (or Redis 7.0+).
+## When to Apply
+
+Use this skill when:
+- Creating or configuring queues, workers, or producers
+- Adding jobs (single, bulk, delayed, priority)
+- Setting up retries, backoff, or dead-letter queues
+- Building job workflows (parent-child, DAGs, chains)
+- Implementing fan-out broadcast patterns
+- Configuring cron/interval schedulers
+- Setting up connection options (TLS, IAM, AZ-affinity)
+- Working with batch processing or rate limiting
+- Tracking AI/LLM usage (tokens, cost, model) per job or flow
+- Streaming LLM output tokens in real-time
+- Implementing human-in-the-loop approval with suspend/resume
+- Setting budget caps (tokens, cost) on workflow flows
+- Configuring fallback chains for model/provider failover
+- Dual-axis rate limiting (RPM + TPM) for LLM API compliance
+- Aggregating rolling usage/cost summaries across queues
+- Searching jobs by vector similarity (KNN) with Valkey Search
+- Exposing queues or broadcasts over the HTTP proxy, including SSE endpoints
+- Integrating with frameworks (Hono, Fastify, NestJS, Hapi)
+- Deploying in serverless environments (Lambda, Vercel Edge)
+
+## Core API by Priority
+
+| Priority | Category | Impact | Reference |
+|----------|----------|--------|-----------|
+| 1 | Queue & Job Operations | CRITICAL | [references/queue.md](references/queue.md) |
+| 2 | Worker & Processing | CRITICAL | [references/worker.md](references/worker.md) |
+| 3 | Connection & Config | HIGH | [references/connection.md](references/connection.md) |
+| 4 | Workflows & FlowProducer | HIGH | [references/workflows.md](references/workflows.md) |
+| 5 | Broadcast (Fan-Out) | MEDIUM | [references/broadcast.md](references/broadcast.md) |
+| 6 | Schedulers (Cron/Interval) | MEDIUM | [references/schedulers.md](references/schedulers.md) |
+| 7 | Observability & Events | MEDIUM | [references/observability.md](references/observability.md) |
+| 8 | AI-Native Primitives | HIGH | [references/ai-native.md](references/ai-native.md) |
+| 9 | Vector Search | MEDIUM | [references/search.md](references/search.md) |
+| 10 | Serverless & Testing | LOW | [references/serverless.md](references/serverless.md) |
+
+## Key Patterns
+
+### Delayed & Priority Jobs
 
-## Connection
+```typescript
+// Delayed: run after 5 minutes
+await queue.add('reminder', data, { delay: 300_000 });
 
-All glide-mq classes use the addresses array format:
+// Priority: lower number = higher priority (default: 0)
+await queue.add('urgent', data, { priority: 0 });
+await queue.add('low-priority', data, { priority: 10 });
 
-```typescript
-const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
+// Retries with exponential backoff
+await queue.add('webhook', data, {
+  attempts: 5,
+  backoff: { type: 'exponential', delay: 1000 }
+});
 ```
 
-With TLS and authentication:
+### Bulk Ingestion (10,000 jobs in ~350ms)
 
 ```typescript
-const connection = {
-  addresses: [{ host: 'my-cluster.cache.amazonaws.com', port: 6379 }],
-  useTLS: true,
-  credentials: { password: 'secret' },
-  clusterMode: true,
-};
+const jobs = items.map(item => ({
+  name: 'process',
+  data: item,
+  opts: { jobId: `item-${item.id}` }
+}));
+await queue.addBulk(jobs);
 ```
 
-## Quick Start
+### Batch Worker (Process Multiple Jobs at Once)
 
 ```typescript
-import { Queue, Worker } from 'glide-mq';
+const worker = new Worker('analytics', async (jobs) => {
+  // jobs is Job[] when batch is enabled
+  await db.insertMany('events', jobs.map(j => j.data));
+}, {
+  connection,
+  batch: { size: 50, timeout: 5000 }
+});
+```
 
-const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
+### Request-Reply (addAndWait)
 
-// Producer
-const queue = new Queue('tasks', { connection });
-await queue.add('send-email', { to: 'user@example.com' }, {
-  attempts: 3,
-  backoff: { type: 'exponential', delay: 1000 },
-  priority: 1,
+```typescript
+const result = await queue.addAndWait('compute', { input: 42 }, {
+  waitTimeout: 30_000
 });
+console.log(result); // processor return value
+```
 
-// Consumer
-const worker = new Worker('tasks', async (job) => {
-  console.log(`Processing ${job.name}:`, job.data);
-  return { sent: true };
-}, { connection, concurrency: 10 });
+### Serverless Producer (No EventEmitter Overhead)
 
-worker.on('completed', (job) => console.log(`Job ${job.id} done`));
-worker.on('failed', (job, err) => console.error(`Job ${job.id} failed:`, err.message));
+```typescript
+import { Producer } from 'glide-mq';
+const producer = new Producer('queue', { connection });
+await producer.add('job-name', data);
+await producer.close();
 ```
 
-## Core API
-
-| Class | Purpose | Key Methods |
-|-------|---------|-------------|
-| `Queue` | Enqueue and manage jobs | `add()`, `addBulk()`, `addAndWait()`, `pause()`, `resume()`, `drain()` |
-| `Worker` | Process jobs | Constructor takes `(name, processor, opts)`. Events: `completed`, `failed`, `active` |
-| `Producer` | Lightweight enqueue (serverless) | `add()` - no EventEmitter overhead |
-| `FlowProducer` | Parent-child job trees | `add()` for DAG workflows |
-| `QueueEvents` | Monitor queue events | `on('completed')`, `on('failed')`, `on('delayed')` |
-| `Broadcast` | Durable pub/sub | Fan-out with subject filtering |
-
-## Job Options
-
-| Option | Type | Description |
-|--------|------|-------------|
-| `attempts` | number | Retry count on failure |
-| `backoff` | object | `{ type: 'exponential' \| 'fixed', delay: ms }` |
-| `delay` | number | Delay before processing (ms) |
-| `priority` | number | Lower number = higher priority (0 is highest) |
-| `ttl` | number | Auto-expire after time-to-live (ms) |
-| `jobId` | string | Custom deduplication ID |
-| `ordering.key` | string | Per-key ordering group |
-| `ordering.concurrency` | number | Max parallel jobs per group (default 1) |
-| `ordering.rateLimit` | object | `{ max, duration }` - static sliding window per group |
-| `ordering.tokenBucket` | object | `{ capacity, refillRate }` - cost-based rate limiting per group |
-
-**Runtime group rate limiting** (new in v0.12):
-- `job.rateLimitGroup(duration, opts?)` - pause group from inside processor (e.g., on 429)
-- `throw new GroupRateLimitError(duration, opts?)` - throw-style sugar
-- `queue.rateLimitGroup(key, duration, opts?)` - pause group from outside (webhook, health check)
-- Options: `currentJob` ('requeue'|'fail'), `requeuePosition` ('front'|'back'), `extend` ('max'|'replace')
-
-**Note:** Compression (`compression: 'gzip'`) is a Queue-level option passed to the Queue constructor, not a per-job option.
-
-## Worker Options
-
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `concurrency` | number | 1 | Parallel job limit |
-| `lockDuration` | number | 30000 | Lock timeout (ms) |
-| `stalledInterval` | number | 30000 | Recovery check frequency (ms) |
-
-## Scheduling and Testing
+### Graceful Shutdown
 
 ```typescript
-// Cron scheduling
-await queue.upsertJobScheduler('daily-report',
-  { pattern: '0 9 * * *', tz: 'America/New_York' },
-  { name: 'daily-report', data: { v: 1 } },
-);
+import { gracefulShutdown } from 'glide-mq';
 
-// In-memory testing (no Valkey/Redis required)
+// Registers SIGTERM/SIGINT handlers and returns a handle.
+// await blocks until a signal fires - use as last line of your program.
+const handle = gracefulShutdown([worker1, worker2, queue, events]);
+
+// For programmatic shutdown (e.g., in tests):
+await handle.shutdown();
+
+// To remove signal handlers without closing:
+handle.dispose();
+```
+
+### Testing Without Valkey
+
+```typescript
 import { TestQueue, TestWorker } from 'glide-mq/testing';
 const queue = new TestQueue('tasks');
-const worker = new TestWorker(queue, async (job) => ({ sent: true }));
+await queue.add('test-job', { key: 'value' });
+const worker = new TestWorker(queue, processor);
+await worker.run();
 ```
 
+## Problem-to-Reference Mapping
+
+| Problem | Start With |
+|---------|------------|
+| Need to create a queue and add jobs | [references/queue.md](references/queue.md) |
+| Need to process jobs with workers | [references/worker.md](references/worker.md) |
+| Jobs failing, need retries/backoff | [references/queue.md](references/queue.md) - Retry section |
+| Need parent-child job dependencies | [references/workflows.md](references/workflows.md) |
+| Need fan-out to multiple consumers | [references/broadcast.md](references/broadcast.md) |
+| Need cron or repeating jobs | [references/schedulers.md](references/schedulers.md) |
+| Connection errors or TLS/IAM setup | [references/connection.md](references/connection.md) |
+| Stalled jobs or lock issues | [references/worker.md](references/worker.md) - Stalled Jobs |
+| Need real-time job events | [references/observability.md](references/observability.md) |
+| Integrating with Fastify/NestJS/Hono | [Framework Integrations](https://www.glidemq.dev/integrations/) |
+| Deploying to Lambda/Vercel Edge | [references/serverless.md](references/serverless.md) |
+| Need deduplication or idempotent jobs | [references/queue.md](references/queue.md) - Dedup |
+| Need rate limiting | [references/queue.md](references/queue.md) - Rate Limit |
+| Running tests without Valkey | [references/serverless.md](references/serverless.md) - Testing |
+| Need to track LLM tokens/cost per job | [references/ai-native.md](references/ai-native.md) - Usage Metadata |
+| Need to stream LLM output tokens | [references/ai-native.md](references/ai-native.md) - Token Streaming |
+| Need human approval before proceeding | [references/ai-native.md](references/ai-native.md) - Suspend/Resume |
+| Need to cap token/cost budget on a flow | [references/ai-native.md](references/ai-native.md) - Budget |
+| Need model fallback on failure | [references/ai-native.md](references/ai-native.md) - Fallback Chains |
+| Need RPM + TPM rate limiting for LLM APIs | [references/ai-native.md](references/ai-native.md) - Dual-Axis Rate Limiting |
+| Need rolling usage/cost summary across queues | [references/ai-native.md](references/ai-native.md) - Usage Metadata |
+| Need vector similarity search over jobs | [references/search.md](references/search.md) |
+| Need to aggregate usage across a flow | [references/ai-native.md](references/ai-native.md) - Flow Usage |
+| Need to create or inspect flows over HTTP | [references/serverless.md](references/serverless.md) - HTTP Proxy |
+| Need cross-language HTTP or SSE access | [references/serverless.md](references/serverless.md) - HTTP Proxy |
+
 ## Critical Notes
 
-- Connection uses `{ addresses: [{ host, port }] }` - NOT `{ host, port }` directly
-- Priority: lower number = higher priority (0 is highest)
-- Keys are hash-tagged (`glide:{queueName}:*`) for native cluster support
-- Single FCALL per operation - no Lua EVAL overhead
+- **Node.js 20+** and **Valkey 7.0+** (or Redis 7.0+) required
+- **At-least-once delivery** - make processors idempotent
+- **Priority**: lower number = higher priority (0 is default, highest)
+- **Cluster-native** - hash-tagged keys (`glide:{queueName}:*`) work out of the box
+- All queue logic runs as a single Valkey Server Function (FCALL) - 1 round-trip per job
+- Connection format uses `addresses: [{ host, port }]` array, NOT `{ host, port }` object
+- **Never use `customCommand`** - use typed API methods with dummy keys for cluster routing
+
+## Done When
+
+- `npm test` or the project-equivalent test command passes
+- `await queue.getJobCounts()` matches the expected queue state
+- no jobs are left unexpectedly stuck in `active`
+- any QueueEvents or SSE behavior touched by the change has been smoke-tested
+- temporary queues, workers, and listeners are closed cleanly
 
-## Deep Dive
+## Full Documentation
 
-For complete API reference, workflows, observability, and serverless guides:
-- `node_modules/glide-mq/skills/` | https://avifenesh.github.io/glide-mq.dev/
+https://www.glidemq.dev/
diff --git a/skills/glide-mq/references/ai-native.md b/skills/glide-mq/references/ai-native.md
new file mode 100644
index 0000000..e4bee36
--- /dev/null
+++ b/skills/glide-mq/references/ai-native.md
@@ -0,0 +1,385 @@
+# AI-Native Primitives Reference
+
+glide-mq provides 7 AI-native primitives designed for LLM orchestration pipelines.
+
+## 1. Usage Metadata (job.reportUsage)
+
+Track model, tokens, cost, and latency per job.
+
+```typescript
+const worker = new Worker('inference', async (job) => {
+  const response = await openai.chat.completions.create({ ... });
+
+  await job.reportUsage({
+    model: 'gpt-5.4',
+    provider: 'openai',
+    tokens: {
+      input: response.usage.prompt_tokens,
+      output: response.usage.completion_tokens,
+    },
+    // totalTokens auto-computed as sum of all token categories if omitted
+    costs: { total: 0.0032 },
+    costUnit: 'usd',
+    latencyMs: 1200,
+    cached: false,
+  });
+
+  return response.choices[0].message.content;
+}, { connection });
+```
+
+### JobUsage Interface
+
+```typescript
+interface JobUsage {
+  model?: string;           // e.g. 'gpt-5.4', 'claude-sonnet-4-20250514'
+  provider?: string;        // e.g. 'openai', 'anthropic'
+  tokens?: Record<string, number>;   // e.g. { input: 500, output: 200, reasoning: 100 }
+  totalTokens?: number;     // auto-computed as sum of tokens values if omitted
+  costs?: Record<string, number>;    // e.g. { total: 0.003 } or { input: 0.001, output: 0.002 }
+  totalCost?: number;       // auto-computed as sum of costs values if omitted
+  costUnit?: string;        // e.g. 'usd', 'credits', 'ils' (informational)
+  latencyMs?: number;       // inference latency (not queue wait)
+  cached?: boolean;         // cache hit flag
+}
+```
+
+- Calling `reportUsage()` multiple times overwrites previous values on that job.
+- Token counts must not be negative (throws).
+- Emits a `'usage'` event on the events stream with the full usage object.
+- Stored in the job hash as `usage:model`, `usage:tokens` (JSON), `usage:costs` (JSON), `usage:totalTokens`, `usage:totalCost`, `usage:costUnit`.
+- Also updates rolling per-minute usage buckets used by `queue.getUsageSummary()`.
+
+### Rolling Usage Summary (queue.getUsageSummary / Queue.getUsageSummary)
+
+```typescript
+const summary = await queue.getUsageSummary({
+  queues: ['inference', 'embeddings'],
+  windowMs: 3_600_000,   // last hour
+});
+
+// {
+//   totalTokens,
+//   totalCost,
+//   jobCount,
+//   models: Record<string, number>,
+//   perQueue: Record<string, { totalTokens, totalCost, jobCount, models }>
+// }
+```
+
+Use `Queue.getUsageSummary()` when you want the same rollup without an existing queue instance. The HTTP proxy exposes the same aggregation at `GET /usage/summary`.
+
+## 2. Token Streaming (job.stream / job.streamChunk / queue.readStream)
+
+Emit and consume LLM output tokens in real-time via per-job Valkey Streams.
+
+### Producer Side (Worker)
+
+```typescript
+const worker = new Worker('chat', async (job) => {
+  const stream = await openai.chat.completions.create({ stream: true, ... });
+
+  for await (const chunk of stream) {
+    const token = chunk.choices[0]?.delta?.content;
+    if (token) {
+      await job.stream({ token, index: String(chunk.choices[0].index) });
+    }
+  }
+
+  return { done: true };
+}, { connection });
+```
+
+`job.stream(chunk)` appends a flat `Record<string, string>` to a per-job Valkey Stream via XADD. Returns the stream entry ID.
+
+### Convenience: job.streamChunk(type, content?)
+
+Typed shorthand for streaming LLM chunks with a `type` field and optional `content`:
+
+```typescript
+await job.streamChunk('reasoning', 'Let me think about this...');
+await job.streamChunk('content', 'The answer is 42.');
+await job.streamChunk('done');
+```
+
+Equivalent to `job.stream({ type, content })` - useful for structured streaming with thinking models.
+
+### Consumer Side (Queue)
+
+```typescript
+const entries = await queue.readStream(jobId);
+// entries: { id: string; fields: Record<string, string> }[]
+
+// Resume from last known position
+const more = await queue.readStream(jobId, { lastId: entries.at(-1)?.id });
+
+// Long-polling (blocks until new entries arrive)
+const live = await queue.readStream(jobId, { lastId, block: 5000 });
+```
+
+### ReadStreamOptions
+
+```typescript
+interface ReadStreamOptions {
+  lastId?: string;     // resume from this stream ID (exclusive)
+  count?: number;      // max entries to return (default: 100)
+  block?: number;      // XREAD BLOCK ms for long-polling (0 = non-blocking)
+}
+```
+
+## 3. Suspend / Resume (Human-in-the-Loop)
+
+Pause a job to wait for external approval, then resume with signals.
+
+### Suspending (Worker Side)
+
+```typescript
+const worker = new Worker('content-review', async (job) => {
+  // Check if this is a resume after suspension
+  if (job.signals.length > 0) {
+    const approval = job.signals.find(s => s.name === 'approve');
+    if (approval) {
+      return { published: true, approver: approval.data.approvedBy };
+    }
+    return { rejected: true };
+  }
+
+  // First run - generate content and suspend for review
+  const content = await generateContent(job.data);
+  await job.updateData({ ...job.data, generatedContent: content });
+
+  await job.suspend({
+    reason: 'Awaiting human review',
+    timeout: 86_400_000,  // 24h timeout (0 = infinite, default)
+  });
+}, { connection });
+```
+
+`job.suspend()` throws `SuspendError` internally - no code after it executes. The job moves to `'suspended'` state.
+
+If `timeout` is set, glide-mq stores the deadline on the suspended sorted set and any live `Queue` or `Worker` runtime can fail expired suspended jobs with `'Suspend timeout exceeded'`. This no longer depends on the original worker staying online, but it does require at least one glide-mq process to remain connected to the queue.
+
+### Resuming (Queue Side)
+
+```typescript
+// Send a signal to resume the job
+const resumed = await queue.signal(jobId, 'approve', { approvedBy: 'alice' });
+// true if job was suspended and is now resumed, false otherwise
+
+// Inspect suspension state
+const info = await queue.getSuspendInfo(jobId);
+// null if not suspended, otherwise:
+// {
+//   reason?: string,
+//   suspendedAt: number (epoch ms),
+//   timeout?: number (ms),
+//   signals: SignalEntry[]
+// }
+```
+
+### SignalEntry
+
+```typescript
+interface SignalEntry {
+  name: string;        // signal name (e.g. 'approve', 'reject')
+  data: any;           // arbitrary payload
+  receivedAt: number;  // epoch ms
+}
+```
+
+### SuspendOptions
+
+```typescript
+interface SuspendOptions {
+  reason?: string;     // human-readable reason
+  timeout?: number;    // ms, 0 = infinite (default)
+}
+```
+
+## 4. Budget Middleware (Flow-Level Caps)
+
+Cap total token usage and/or cost across all jobs in a flow. Supports per-category limits and weighted totals for thinking model budgets.
+
+### Setting Budget on a Flow
+
+```typescript
+import { FlowProducer } from 'glide-mq';
+
+const flow = new FlowProducer({ connection });
+await flow.add(
+  {
+    name: 'research-report',
+    queueName: 'ai',
+    data: { topic: 'quantum computing' },
+    children: [
+      { name: 'search', queueName: 'ai', data: { query: 'latest papers' } },
+      { name: 'summarize', queueName: 'ai', data: {} },
+      { name: 'critique', queueName: 'ai', data: {} },
+    ],
+  },
+  {
+    budget: {
+      maxTotalTokens: 50_000,
+      maxTotalCost: 0.50,
+      costUnit: 'usd',
+      tokenWeights: { reasoning: 4, cachedInput: 0.25 },
+      onExceeded: 'fail',    // 'fail' (default) or 'pause'
+    },
+  },
+);
+```
+
+### BudgetOptions
+
+```typescript
+interface BudgetOptions {
+  maxTotalTokens?: number;                // hard cap on weighted total tokens
+  maxTokens?: Record<string, number>;     // per-category token caps (e.g. { input: 50000, reasoning: 5000 })
+  tokenWeights?: Record<string, number>;  // weight multipliers for maxTotalTokens (unlisted = 1)
+  maxTotalCost?: number;                  // hard cap on total cost
+  maxCosts?: Record<string, number>;      // per-category cost caps
+  costUnit?: string;                      // e.g. 'usd', 'credits', 'ils' (informational)
+  onExceeded?: 'pause' | 'fail';         // default: 'fail'
+}
+```
+
+### Reading Budget State
+
+```typescript
+const budget = await queue.getFlowBudget(parentJobId);
+// null if no budget was set, otherwise:
+// {
+//   maxTotalTokens?: number,
+//   maxTokens?: Record<string, number>,
+//   tokenWeights?: Record<string, number>,
+//   maxTotalCost?: number,
+//   maxCosts?: Record<string, number>,
+//   costUnit?: string,
+//   usedTokens: number,
+//   usedCost: number,
+//   exceeded: boolean,
+//   onExceeded: 'pause' | 'fail'
+// }
+```
+
+Budget is enforced per flow by writing a `budgetKey` to every job hash in the tree.
+
+## 5. Fallback Chains
+
+Ordered list of model/provider alternatives tried on retryable failure.
+
+### Setting Fallbacks
+
+```typescript
+await queue.add('inference', { prompt: 'Explain quantum entanglement' }, {
+  attempts: 4,  // 1 original + 3 fallbacks
+  fallbacks: [
+    { model: 'gpt-5.4', provider: 'openai' },
+    { model: 'claude-sonnet-4-20250514', provider: 'anthropic' },
+    { model: 'llama-3-70b', provider: 'groq', metadata: { temperature: 0.7 } },
+  ],
+});
+```
+
+### Reading Fallback State (Worker Side)
+
+```typescript
+const worker = new Worker('inference', async (job) => {
+  const fallback = job.currentFallback;
+  // undefined on first attempt (original request)
+  // { model: 'gpt-5.4', provider: 'openai' } on first fallback
+  // { model: 'claude-sonnet-4-20250514', provider: 'anthropic' } on second, etc.
+
+  const model = fallback?.model ?? job.data.defaultModel;
+  const provider = fallback?.provider ?? job.data.defaultProvider;
+
+  return await callLLM(provider, model, job.data.prompt);
+}, { connection });
+```
+
+- `job.fallbackIndex` is 0 for the original request, 1+ for fallback entries.
+- `job.currentFallback` returns `fallbacks[fallbackIndex - 1]` or `undefined` when index is 0.
+- Each fallback entry has `model` (required), `provider` (optional), and `metadata` (optional).
+
+## 6. Dual-Axis Rate Limiting (RPM + TPM)
+
+Rate-limit workers by both requests-per-minute (RPM) and tokens-per-minute (TPM).
+
+### Configuration
+
+```typescript
+const worker = new Worker('inference', processor, {
+  connection,
+  limiter: { max: 60, duration: 60_000 },          // RPM: 60 req/min
+  tokenLimiter: {
+    maxTokens: 100_000,
+    duration: 60_000,
+    scope: 'both',  // 'queue' | 'worker' | 'both' (default)
+  },
+});
+```
+
+### TokenLimiter Options
+
+```typescript
+interface TokenLimiter {
+  maxTokens: number;        // max tokens per window
+  duration: number;         // window duration in ms
+  scope?: 'queue' | 'worker' | 'both';
+  // 'queue': Valkey counter shared across all workers
+  // 'worker': in-memory counter per worker instance
+  // 'both': local check first, then Valkey (optimal, default)
+}
+```
+
+### Reporting Tokens
+
+```typescript
+const worker = new Worker('inference', async (job) => {
+  const result = await callLLM(job.data);
+
+  // Option 1: report tokens directly for TPM tracking
+  await job.reportTokens(result.totalTokens);
+
+  // Option 2: reportUsage auto-extracts totalTokens for TPM
+  await job.reportUsage({
+    model: 'gpt-5.4',
+    tokens: { input: result.promptTokens, output: result.completionTokens },
+  });
+
+  return result;
+}, { connection, tokenLimiter: { maxTokens: 100_000, duration: 60_000 } });
+```
+
+Worker pauses fetching when either RPM or TPM limit is exceeded.
+
+## 7. Flow Usage Aggregation (getFlowUsage)
+
+Aggregate AI usage metadata across all jobs in a flow tree.
+
+```typescript
+const usage = await queue.getFlowUsage(parentJobId);
+// {
+//   tokens: Record<string, number>,    // aggregated per-category tokens (e.g. { input: 2500, output: 1200 })
+//   totalTokens: number,               // sum of all token categories
+//   costs: Record<string, number>,     // aggregated per-category costs
+//   totalCost: number,                 // sum of all cost categories
+//   costUnit?: string,                 // unit from the first job that reported one
+//   jobCount: number,
+//   models: Record<string, number>     // model name -> call count
+// }
+```
+
+Walks the parent and all children via the deps set. Useful for cost reporting, billing, and observability dashboards.
+
+## Gotchas
+
+- `job.suspend()` and `job.moveToWaitingChildren()` both throw internally - no code after them executes.
+- `job.reportUsage()` and `job.reportTokens()` reject negative values.
+- `reportUsage()` overwrites previous usage data on the same job.
+- `getUsageSummary()` reads rolling buckets, not job hashes, so it is cheap for queue-wide summaries but not a replacement for per-job detail.
+- `reportTokens()` overwrites the previous value - it does not accumulate.
+- Budget enforcement happens at the flow level, not per-job. Individual jobs report usage; the budget key tracks aggregates.
+- Fallback chains require `attempts >= fallbacks.length + 1` (original + N fallbacks).
+- `queue.signal()` returns false if the job is not in suspended state.
+- `readStream()` with `block > 0` uses XREAD BLOCK (a blocking Valkey call) - do not use on a shared client that serves other queries.
diff --git a/skills/glide-mq/references/broadcast.md b/skills/glide-mq/references/broadcast.md
new file mode 100644
index 0000000..6d88fcc
--- /dev/null
+++ b/skills/glide-mq/references/broadcast.md
@@ -0,0 +1,139 @@
+# Broadcast Reference
+
+## Overview
+
+`Broadcast` is pub/sub fan-out. Unlike `Queue` (point-to-point), every message is delivered to **all** subscribers.
+
+## Broadcast Constructor
+
+```typescript
+import { Broadcast, BroadcastWorker } from 'glide-mq';
+
+const broadcast = new Broadcast('events', {
+  connection: ConnectionOptions,
+  maxMessages?: number,  // retain at most N messages in the stream
+});
+```
+
+## Publishing
+
+```typescript
+// publish(subject, data, opts?) - subject is the first arg
+await broadcast.publish('orders', { event: 'order.placed', orderId: 42 });
+
+// With dotted subjects (for subject filtering)
+await broadcast.publish('orders.created', { orderId: 42 });
+await broadcast.publish('inventory.low', { sku: 'ABC', qty: 0 });
+
+await broadcast.close();
+```
+
+## BroadcastWorker Constructor
+
+```typescript
+const worker = new BroadcastWorker(
+  'events',                         // broadcast name
+  async (job) => {                  // processor
+    console.log(job.name, job.data);
+  },
+  {
+    connection: ConnectionOptions,
+    subscription: string,           // REQUIRED - unique subscriber name (consumer group)
+    startFrom?: string,             // '$' (default, new only) | '0-0' (replay all history)
+    subjects?: string[],            // NATS-style subject filter patterns
+    concurrency?: number,           // same as Worker
+    limiter?: { max, duration },    // same as Worker
+    // All other Worker options supported (backoff, etc.)
+  },
+);
+
+await worker.close();
+```
+
+## Subject Filtering (NATS-style)
+
+Patterns use `.` as token separator:
+
+| Token | Meaning |
+|-------|---------|
+| `*` | Matches exactly one token |
+| `>` | Matches one or more tokens (must be last token) |
+| literal | Matches exactly |
+
+### Pattern Examples
+
+| Pattern | Matches | Does NOT match |
+|---------|---------|----------------|
+| `orders.created` | `orders.created` | `orders.updated`, `orders.created.us` |
+| `orders.*` | `orders.created`, `orders.updated` | `orders.created.us` |
+| `orders.>` | `orders.created`, `orders.created.us`, `orders.a.b.c` | `inventory.created` |
+| `*.created` | `orders.created`, `inventory.created` | `orders.updated` |
+
+### Usage
+
+```typescript
+// Single pattern
+const worker = new BroadcastWorker('events', processor, {
+  connection,
+  subscription: 'order-handler',
+  subjects: ['orders.*'],
+});
+
+// Multiple patterns
+const worker = new BroadcastWorker('events', processor, {
+  connection,
+  subscription: 'mixed-handler',
+  subjects: ['orders.*', 'inventory.low', 'shipping.>'],
+});
+```
+
+### How Filtering Works
+
+1. `subjects` compiled to matcher at construction via `compileSubjectMatcher`.
+2. Non-matching messages are auto-acknowledged (`XACK`) and skipped.
+3. Empty/unset `subjects` = all messages processed.
+
+### Utility Functions
+
+```typescript
+import { matchSubject, compileSubjectMatcher } from 'glide-mq';
+
+matchSubject('orders.*', 'orders.created');  // true
+matchSubject('orders.*', 'orders.a.b');      // false
+
+const matcher = compileSubjectMatcher(['orders.*', 'shipping.>']);
+matcher('orders.created');    // true
+matcher('shipping.us.west');  // true
+matcher('inventory.low');     // false
+```
+
+## Queue vs Broadcast
+
+| | Queue | Broadcast |
+|---|---|---|
+| Delivery | Point-to-point (one consumer) | Fan-out (all subscribers) |
+| Use case | Task processing | Event distribution |
+| API | `queue.add(name, data, opts)` | `broadcast.publish(subject, data, opts?)` |
+| Consumer | `Worker` | `BroadcastWorker` |
+| Retry | Per job | Per subscriber, per message |
+| Trimming | Auto (completion/removal) | `maxMessages` option |
+
+## HTTP Proxy
+
+Cross-language producers and consumers can use the proxy instead of `Broadcast` / `BroadcastWorker` directly:
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/broadcast/:name` | Publish `{ subject, data?, opts? }` |
+| GET | `/broadcast/:name/events` | SSE fan-out stream. Requires `subscription`; optional `subjects=a.*,b.>` |
+
+SSE payloads arrive as `event: message` with JSON `{ id, subject, data, timestamp }`.
+
+## Gotchas
+
+- `subscription` is required on BroadcastWorker - it becomes the consumer group name.
+- Proxy SSE `subscription` follows the same rule and becomes the consumer-group name.
+- Subject filtering requires publishing with a `name` using dotted convention.
+- `>` wildcard must be the **last** token in the pattern.
+- `startFrom: '0-0'` replays all retained history (backfill).
+- Per-subscriber retries - each subscriber independently retries failed messages.
diff --git a/skills/glide-mq/references/connection.md b/skills/glide-mq/references/connection.md
new file mode 100644
index 0000000..9828dd7
--- /dev/null
+++ b/skills/glide-mq/references/connection.md
@@ -0,0 +1,192 @@
+# Connection Reference
+
+## ConnectionOptions Interface
+
+```typescript
+interface ConnectionOptions {
+  addresses: { host: string; port: number }[];  // ARRAY of address objects
+  useTLS?: boolean;
+  credentials?: PasswordCredentials | IamCredentials;
+  clusterMode?: boolean;
+  readFrom?: ReadFrom;
+  clientAz?: string;
+  inflightRequestsLimit?: number;  // default: 1000
+  requestTimeout?: number;         // command timeout in ms, default: 500
+}
+```
+
+## Basic Connection
+
+```typescript
+const connection = { addresses: [{ host: 'localhost', port: 6379 }] };
+const queue = new Queue('tasks', { connection });
+```
+
+## TLS
+
+```typescript
+const connection = {
+  addresses: [{ host: 'my-server.com', port: 6379 }],
+  useTLS: true,
+};
+```
+
+## Authentication
+
+### Password-based
+
+```typescript
+interface PasswordCredentials {
+  username?: string;
+  password: string;
+}
+
+const connection = {
+  addresses: [{ host: 'server.com', port: 6379 }],
+  useTLS: true,
+  credentials: { password: 'secret' },
+};
+```
+
+### IAM (AWS ElastiCache / MemoryDB)
+
+```typescript
+interface IamCredentials {
+  type: 'iam';
+  serviceType: 'elasticache' | 'memorydb';
+  region: string;         // e.g. 'us-east-1'
+  userId: string;         // IAM user ID (maps to username in AUTH)
+  clusterName: string;
+  refreshIntervalSeconds?: number;  // default: 300 (5 min)
+}
+
+const connection = {
+  addresses: [{ host: 'my-cluster.cache.amazonaws.com', port: 6379 }],
+  clusterMode: true,
+  credentials: {
+    type: 'iam',
+    serviceType: 'elasticache',
+    region: 'us-east-1',
+    userId: 'my-iam-user',
+    clusterName: 'my-cluster',
+  },
+};
+```
+
+## Cluster Mode
+
+```typescript
+const connection = {
+  addresses: [
+    { host: 'node1', port: 7000 },
+    { host: 'node2', port: 7001 },
+  ],
+  clusterMode: true,
+};
+```
+
+Keys are hash-tagged automatically (`glide:{queueName}:*`) for cluster compatibility.
+
+## Read Strategies
+
+```typescript
+const connection = {
+  addresses: [{ host: 'cluster.cache.amazonaws.com', port: 6379 }],
+  clusterMode: true,
+  readFrom: 'AZAffinity',
+  clientAz: 'us-east-1a',
+};
+```
+
+| `readFrom` value | Behavior |
+|------------------|----------|
+| `'primary'` | Always read from primary (default) |
+| `'preferReplica'` | Round-robin across replicas, fallback to primary |
+| `'AZAffinity'` | Route reads to replicas in same AZ |
+| `'AZAffinityReplicasAndPrimary'` | Route reads to any node in same AZ |
+
+AZ-based strategies require `clientAz` to be set.
+
+## Shared Client Pattern
+
+By default each component creates its own GLIDE client. You can inject a shared client to reduce connections.
+
+```typescript
+import { GlideClient } from '@glidemq/speedkey';
+
+const client = await GlideClient.createClient({ addresses: [{ host: 'localhost' }] });
+
+const queue  = new Queue('jobs', { client });           // borrows client
+const flow   = new FlowProducer({ client });            // borrows client
+const worker = new Worker('jobs', handler, {
+  connection,         // REQUIRED - blocking client auto-created
+  commandClient: client,  // shared client for non-blocking ops
+});
+const events = new QueueEvents('jobs', { connection }); // always own connection
+// Total: 2 TCP connections (shared + worker's blocking client)
+```
+
+### What can share
+
+Queue, FlowProducer, Worker's command client - all non-blocking operations.
+GLIDE multiplexes up to 1000 in-flight requests over one TCP connection.
+
+### What cannot share
+
+- Worker's blocking client (`XREADGROUP BLOCK`) - always auto-created
+- QueueEvents (`XREAD BLOCK`) - always own connection. Throws if you pass `client`.
+
+### Close order
+
+```typescript
+// Close components first, then shared client
+await queue.close();    // detaches (does not close shared client)
+await worker.close();   // closes only auto-created blocking client
+await flow.close();
+client.close();         // now safe
+```
+
+### inflightRequestsLimit
+
+Default 1000. At Worker concurrency=50, peak inflight is ~55 commands.
+
+```typescript
+const connection = {
+  addresses: [{ host: 'localhost' }],
+  inflightRequestsLimit: 2000,
+};
+```
+
+### requestTimeout
+
+Command timeout in milliseconds. Default: 500. Commands exceeding this throw a `TimeoutError`. Increase for operations that may take longer (e.g. `FT.CREATE` with many existing keys, `FUNCTION LOAD` with large libraries).
+
+```typescript
+const connection = {
+  addresses: [{ host: 'localhost', port: 6379 }],
+  requestTimeout: 2000,  // 2 seconds
+};
+```
+
+## Valkey Modules (Search / JSON / Bloom)
+
+Vector search (`queue.createJobIndex()`, `queue.vectorSearch()`) requires the `valkey-search` module loaded on the server. The easiest way to get all modules is to use `valkey-bundle`, which bundles search, JSON, bloom, and other modules:
+
+```bash
+# Docker (standalone with all modules)
+docker run -p 6379:6379 valkey/valkey-bundle:latest
+
+# Or load the search module explicitly
+valkey-server --loadmodule /path/to/valkeysearch.so
+```
+
+Vector search is supported in standalone mode only (not cluster mode) due to Valkey Search module limitations.
+
+## Gotchas
+
+- `addresses` is an **array** of `{ host, port }` objects, not a single host/port.
+- Worker always requires `connection` even when `commandClient` is provided.
+- `commandClient` and `client` are aliases on Worker - use one, not both.
+- Don't close shared client while components are alive.
+- QueueEvents cannot accept an injected `client` - throws.
+- Don't mutate shared client state externally (e.g., `SELECT`).
diff --git a/skills/glide-mq/references/observability.md b/skills/glide-mq/references/observability.md
new file mode 100644
index 0000000..780ac75
--- /dev/null
+++ b/skills/glide-mq/references/observability.md
@@ -0,0 +1,232 @@
+# Observability Reference
+
+## QueueEvents
+
+Stream-based lifecycle events via `XREAD BLOCK`. Real-time without polling.
+
+```typescript
+import { QueueEvents } from 'glide-mq';
+
+const events = new QueueEvents('tasks', { connection });
+
+events.on('added', ({ jobId }) => { ... });
+events.on('progress', ({ jobId, data }) => { ... });
+events.on('completed', ({ jobId, returnvalue }) => { ... });
+events.on('failed', ({ jobId, failedReason }) => { ... });
+events.on('stalled', ({ jobId }) => { ... });
+events.on('paused', () => { ... });
+events.on('resumed', () => { ... });
+events.on('usage', ({ jobId, data }) => { ... });  // AI usage reported
+
+await events.close();
+```
+
+### Disabling Server-Side Events
+
+Save 1 redis.call() per job on high-throughput workloads:
+
+```typescript
+const queue = new Queue('tasks', { connection, events: false });
+const worker = new Worker('tasks', handler, { connection, events: false });
+```
+
+TS-side `EventEmitter` events (`worker.on('completed', ...)`) are unaffected.
+
+### QueueEvents Cannot Share Clients
+
+`QueueEvents` uses `XREAD BLOCK` - always creates its own connection. Throws if you pass `client`.
+
+## Job Logs
+
+```typescript
+// Inside processor
+await job.log('Starting step 1');
+await job.log('Step 1 done');
+
+// Fetching externally
+const { logs, count } = await queue.getJobLogs(jobId);
+// logs: string[], count: number
+
+// Paginated
+const { logs } = await queue.getJobLogs(jobId, 0, 49);   // first 50
+const { logs } = await queue.getJobLogs(jobId, 50, 99);  // next 50
+```
+
+## Job Progress
+
+```typescript
+// Inside processor
+await job.updateProgress(50);              // number (0-100)
+await job.updateProgress({ step: 3 });     // or object
+
+// Listen via QueueEvents
+events.on('progress', ({ jobId, data }) => { ... });
+
+// Or via Worker events
+worker.on('active', (job) => { ... });
+```
+
+## Job Counts
+
+```typescript
+const counts = await queue.getJobCounts();
+// { waiting: 12, active: 3, delayed: 5, completed: 842, failed: 7 }
+
+const waitingCount = await queue.count();  // stream length only
+```
+
+## Time-Series Metrics
+
+```typescript
+const metrics = await queue.getMetrics('completed');
+// {
+//   count: 15234,
+//   data: [
+//     { timestamp: 1709654400000, count: 142, avgDuration: 234 },
+//     { timestamp: 1709654460000, count: 156, avgDuration: 218 },
+//   ],
+//   meta: { resolution: 'minute' }
+// }
+
+// Slice (e.g., last 10 data points)
+const recent = await queue.getMetrics('completed', { start: -10 });
+```
+
+- Recorded server-side with zero extra RTTs.
+- Minute-resolution buckets retained for 24 hours, trimmed automatically.
+- Type: `'completed'` or `'failed'`.
+
+### Disabling Metrics
+
+```typescript
+const worker = new Worker('tasks', handler, {
+  connection,
+  metrics: false,  // skip HINCRBY per job
+});
+```
+
+## Waiting for a Job
+
+```typescript
+// Poll job hash until finished
+const state = await job.waitUntilFinished(pollIntervalMs, timeoutMs);
+// Returns 'completed' | 'failed'
+
+// Request-reply (no polling)
+const result = await queue.addAndWait('inference', data, { waitTimeout: 30_000 });
+```
+
+## AI Usage Telemetry
+
+### Per-Job Usage
+
+```typescript
+// Report usage inside a processor
+await job.reportUsage({
+  model: 'gpt-5.4',
+  provider: 'openai',
+  tokens: { input: 500, output: 200 },
+  costs: { total: 0.003 },
+  costUnit: 'usd',
+  latencyMs: 800,
+  cached: false,
+});
+
+// Emits a 'usage' event on the events stream
+events.on('usage', ({ jobId, data }) => {
+  const usage = JSON.parse(data);
+  console.log(`Job ${jobId}: ${usage.model} - ${usage.totalTokens} tokens`);
+});
+
+// Read usage from a completed job
+const job = await queue.getJob(jobId);
+console.log(job.usage);
+// { model, provider, tokens, totalTokens, costs, totalCost, costUnit, latencyMs, cached }
+```
+
+### Flow-Level Aggregation
+
+```typescript
+const usage = await queue.getFlowUsage(parentJobId);
+// {
+//   tokens: { input: 2500, output: 1200 },
+//   totalTokens: 3700,
+//   costs: { total: 0.015 },
+//   totalCost: 0.015,
+//   costUnit: 'usd',
+//   jobCount: 4,
+//   models: { 'gpt-5.4': 3, 'claude-sonnet-4-20250514': 1 }
+// }
+```
+
+Walks the parent job and all children via the deps set. Includes usage from the parent itself.
+
+### Rolling Usage Summary
+
+```typescript
+const summary = await queue.getUsageSummary({
+  queues: ['tasks', 'embeddings'],
+  windowMs: 3_600_000,
+});
+
+// { totalTokens, totalCost, jobCount, models, perQueue }
+```
+
+This reads rolling per-minute buckets instead of scanning job hashes, so it is the right primitive for dashboards and queue-wide cost telemetry.
+
+### Budget Monitoring
+
+```typescript
+const budget = await queue.getFlowBudget(flowId);
+if (budget && budget.exceeded) {
+  console.warn(`Flow ${flowId} exceeded budget: ${budget.usedTokens} tokens, $${budget.usedCost}`);
+}
+```
+
+## Proxy SSE Surfaces
+
+For cross-language observability, the HTTP proxy exposes:
+
+| Path | Description |
+|------|-------------|
+| `/queues/:name/events` | Queue-wide lifecycle events via SSE with `Last-Event-ID` resume |
+| `/queues/:name/jobs/:id/stream` | Per-job streaming output via SSE |
+| `/broadcast/:name/events` | Broadcast SSE with `subscription` and optional `subjects` filters |
+
+These routes require the proxy to be created with `connection`, because they allocate blocking readers internally.
+
+## OpenTelemetry
+
+Auto-emits spans when `@opentelemetry/api` is installed. No code changes needed.
+
+```bash
+npm install @opentelemetry/api
+```
+
+Initialize tracer provider before creating Queue/Worker (standard OTel setup).
+
+### Custom Tracer
+
+```typescript
+import { setTracer, isTracingEnabled } from 'glide-mq';
+import { trace } from '@opentelemetry/api';
+
+setTracer(trace.getTracer('my-service', '1.0.0'));
+console.log('Tracing:', isTracingEnabled());
+```
+
+### Instrumented Operations
+
+| Operation | Span Name | Key Attributes |
+|-----------|-----------|----------------|
+| `queue.add()` | `glide-mq.queue.add` | `glide-mq.queue`, `glide-mq.job.name`, `glide-mq.job.id`, `.delay`, `.priority` |
+| `flowProducer.add()` | `glide-mq.flow.add` | `glide-mq.queue`, `glide-mq.flow.name`, `.childCount` |
+| `flowProducer.addDAG()` | `glide-mq.flow.addDAG` | `glide-mq.flow.nodeCount` |
+
+## Gotchas
+
+- `QueueEvents` always creates its own connection - cannot use shared `client`.
+- Disabling `events` only affects the Valkey events stream, not TS-side EventEmitter.
+- `getMetrics()` type is `'completed'` or `'failed'` only.
+- OTel spans are automatic if `@opentelemetry/api` is installed - no explicit setup in glide-mq.
+- `job.waitUntilFinished()` does NOT require QueueEvents (unlike BullMQ) - polls job hash directly.
diff --git a/skills/glide-mq/references/queue.md b/skills/glide-mq/references/queue.md
new file mode 100644
index 0000000..ff280e7
--- /dev/null
+++ b/skills/glide-mq/references/queue.md
@@ -0,0 +1,241 @@
+# Queue Reference
+
+## Constructor
+
+```typescript
+import { Queue } from 'glide-mq';
+
+const queue = new Queue('tasks', {
+  connection: ConnectionOptions,   // required unless `client` provided
+  client?: Client,                 // pre-existing GLIDE client (not owned)
+  prefix?: string,                 // key prefix (default: 'glide')
+  compression?: 'none' | 'gzip',  // default: 'none'
+  serializer?: Serializer,        // default: JSON_SERIALIZER
+  events?: boolean,               // emit 'added' events (default: true)
+  deadLetterQueue?: { name: string; maxRetries?: number },
+});
+```
+
+## Adding Jobs
+
+```typescript
+// Single job - returns Job | null (null if dedup/collision)
+const job = await queue.add(name: string, data: any, opts?: JobOptions);
+
+// Bulk add - 12.7x faster via GLIDE Batch API
+const jobs = await queue.addBulk([
+  { name: 'job1', data: { a: 1 }, opts?: JobOptions },
+]);
+
+// Request-reply - blocks until worker returns result
+const result = await queue.addAndWait(name, data, {
+  waitTimeout: 30_000,  // producer-side wait budget (separate from job timeout)
+  // Does NOT support removeOnComplete or removeOnFail
+  // Rejects if dedup returns null
+});
+```
+
+## JobOptions
+
+| Option | Type | Default | Notes |
+|--------|------|---------|-------|
+| `delay` | `number` (ms) | 0 | Run after delay |
+| `priority` | `number` | 0 | **LOWER = HIGHER** (0 is highest, max 2048) |
+| `attempts` | `number` | 1 | Total attempts (initial + retries) |
+| `backoff` | `{ type, delay, jitter? }` | - | `'fixed'`, `'exponential'`, or custom name |
+| `timeout` | `number` (ms) | - | Fail if processor exceeds this |
+| `ttl` | `number` (ms) | - | Fail as `'expired'` if not processed in time. Clock starts at creation. |
+| `jobId` | `string` | auto-increment | Custom ID. Max 256 chars. No `{}:` or control chars. Returns `null` on collision. |
+| `lifo` | `boolean` | false | Last-in-first-out. Cannot combine with `ordering.key`. |
+| `removeOnComplete` | `boolean \| { age, count }` | false | Auto-remove on success |
+| `removeOnFail` | `boolean \| number \| { age, count }` | false | Auto-remove on failure. Number = max count to keep. |
+| `deduplication` | `{ id, mode, ttl? }` | - | Modes: `'simple'`, `'throttle'`, `'debounce'`. Returns `null` when skipped. |
+| `ordering` | `{ key, concurrency?, rateLimit?, tokenBucket? }` | - | Per-key sequential/grouped processing |
+| `cost` | `number` | 1 | Token cost for token bucket rate limiting |
+| `lockDuration` | `number` (ms) | - | Override worker-level lockDuration for this job. Controls heartbeat frequency and stall threshold. |
+| `fallbacks` | `Array<{ model, provider?, metadata? }>` | - | Ordered fallback chain for model/provider failover |
+
+> **Note:** Compression is not a per-job option. Set `compression: 'gzip'` at Queue level in the Queue constructor.
+
+### Processing Order
+
+**priority > LIFO > FIFO**. Priority jobs first, then LIFO list, then FIFO stream.
+
+## Queue Management
+
+```typescript
+await queue.pause();             // workers stop picking up new jobs
+await queue.resume();
+const paused = await queue.isPaused();
+
+// Drain - remove waiting jobs
+await queue.drain();             // waiting only
+await queue.drain(true);         // also delayed/scheduled
+
+// Obliterate - remove ALL queue data
+await queue.obliterate();              // fails if active jobs exist
+await queue.obliterate({ force: true });
+
+// Clean old jobs by age
+const ids = await queue.clean(grace: number, limit: number, type: 'completed' | 'failed');
+
+await queue.close();
+```
+
+## Inspecting Jobs
+
+```typescript
+const job = await queue.getJob('42');
+const job = await queue.getJob('42', { excludeData: true });  // metadata only
+
+const jobs = await queue.getJobs(state, start?, end?);
+// state: 'waiting' | 'active' | 'delayed' | 'completed' | 'failed'
+const lite = await queue.getJobs('waiting', 0, 99, { excludeData: true });
+
+const counts = await queue.getJobCounts();
+// { waiting, active, delayed, completed, failed }
+
+const results = await queue.searchJobs({ state?, name?, data?, limit? });
+// data: shallow key-value match. limit default: 100
+
+const waitingCount = await queue.count();  // stream length
+```
+
+## Rate Limiting
+
+```typescript
+// Per-worker rate limit (in WorkerOptions)
+limiter: { max: 100, duration: 60_000 }  // 100 jobs/min
+
+// Global rate limit (across all workers)
+await queue.setGlobalRateLimit({ max: 500, duration: 60_000 });
+const limit = await queue.getGlobalRateLimit();
+await queue.removeGlobalRateLimit();
+
+// Global concurrency
+await queue.setGlobalConcurrency(20);
+await queue.setGlobalConcurrency(0);  // remove limit
+```
+
+## Dead Letter Queue
+
+```typescript
+// Configure on Worker
+const worker = new Worker('tasks', processor, {
+  connection,
+  deadLetterQueue: { name: 'tasks-dlq' },
+});
+
+// Inspect DLQ
+const dlqJobs = await queue.getDeadLetterJobs(0, 49);
+```
+
+## Token Streaming
+
+```typescript
+// Read entries from a job's streaming channel
+const entries = await queue.readStream(jobId);
+// entries: { id: string; fields: Record<string, string> }[]
+
+// Resume from last position
+const more = await queue.readStream(jobId, { lastId: entries.at(-1)?.id });
+
+// Long-polling (blocks until new entries or timeout)
+const live = await queue.readStream(jobId, {
+  lastId: '0-0',
+  count: 50,        // max entries (default: 100)
+  block: 5000,      // XREAD BLOCK ms
+});
+```
+
+## Flow Usage Aggregation
+
+```typescript
+const usage = await queue.getFlowUsage(parentJobId);
+// {
+//   tokens: Record<string, number>,    // aggregated per-category (e.g. { input, output })
+//   totalTokens: number,
+//   costs: Record<string, number>,     // aggregated per-category costs
+//   totalCost: number,
+//   costUnit?: string,
+//   jobCount: number,
+//   models: Record<string, number>     // model -> call count
+// }
+```
+
+### Rolling Usage Summary
+
+```typescript
+const summary = await queue.getUsageSummary({
+  queues: ['tasks', 'embeddings'],
+  windowMs: 3_600_000,
+});
+
+// Static form:
+const sameSummary = await Queue.getUsageSummary({ connection, queues: ['tasks'] });
+```
+
+## Flow Budget
+
+```typescript
+const budget = await queue.getFlowBudget(flowId);
+// null if no budget set, otherwise:
+// {
+//   maxTotalTokens?: number,
+//   maxTokens?: Record<string, number>,
+//   tokenWeights?: Record<string, number>,
+//   maxTotalCost?: number,
+//   maxCosts?: Record<string, number>,
+//   costUnit?: string,
+//   usedTokens: number,
+//   usedCost: number,
+//   exceeded: boolean,
+//   onExceeded: 'pause' | 'fail'
+// }
+```
+
+## Suspend / Resume
+
+```typescript
+// Send a signal to resume a suspended job
+const resumed = await queue.signal(jobId, 'approve', { approvedBy: 'alice' });
+// true if job was resumed, false if not suspended
+
+// Inspect suspension state
+const info = await queue.getSuspendInfo(jobId);
+// null if not suspended, otherwise:
+// { reason?, suspendedAt, timeout?, signals: SignalEntry[] }
+```
+
+## Vector Search
+
+```typescript
+// Create a search index over job hashes
+await queue.createJobIndex({
+  vectorField: { name: 'embedding', dimensions: 1536 },
+  fields: [{ type: 'TAG', name: 'category' }],
+});
+
+// Search by vector similarity
+const results = await queue.vectorSearch(embedding, {
+  k: 10,
+  filter: '@state:{completed}',
+});
+// results: { job: Job, score: number }[]
+
+// Drop the index (does not delete jobs)
+await queue.dropJobIndex();
+```
+
+See [references/ai-native.md](ai-native.md) and [references/search.md](search.md) for full details.
+
+## Gotchas
+
+- Priority: **0 is highest priority**. Lower number = higher priority. Max 2048.
+- `addAndWait()` rejects if dedup returns null. Does not support `removeOnComplete`/`removeOnFail`.
+- `queue.add()` returns `null` on custom jobId collision or deduplication skip.
+- `FlowProducer.add()` throws on duplicate jobId (flows cannot be partial).
+- `getUsageSummary()` is for queue-wide rollups. Use `getJob()` / `job.usage` for per-job detail.
+- Payload size limit: job data must be <= 1 MB after serialization, before compression.
+- Same serializer must be used on Queue, Worker, and FlowProducer. Mismatch causes silent corruption.
+- `lifo` and `ordering.key` are mutually exclusive - throws at enqueue time.
diff --git a/skills/glide-mq/references/schedulers.md b/skills/glide-mq/references/schedulers.md
new file mode 100644
index 0000000..e162308
--- /dev/null
+++ b/skills/glide-mq/references/schedulers.md
@@ -0,0 +1,116 @@
+# Schedulers Reference
+
+## Overview
+
+`upsertJobScheduler` defines repeatable jobs via cron or fixed interval. Schedulers survive restarts - next run time is stored in Valkey.
+
+## API
+
+All scheduler operations are on the `Queue` instance:
+
+```typescript
+const queue = new Queue('tasks', { connection });
+```
+
+### Cron Schedule
+
+```typescript
+await queue.upsertJobScheduler(
+  'daily-report',                    // scheduler ID (unique per queue)
+  { pattern: '0 8 * * *' },         // cron expression
+  { name: 'generate-report', data: { type: 'daily' } },  // job template
+);
+```
+
+### Fixed Interval
+
+```typescript
+await queue.upsertJobScheduler(
+  'cleanup',
+  { every: 5 * 60 * 1_000 },        // interval in ms
+  { name: 'cleanup-old', data: {} },
+);
+```
+
+### Repeat After Complete
+
+Schedules next job only after current completes (no overlap).
+
+```typescript
+await queue.upsertJobScheduler(
+  'sensor-poll',
+  { repeatAfterComplete: 5000 },     // 5s after previous completes
+  { name: 'poll', data: { sensor: 'temp-1' } },
+);
+```
+
+Mutually exclusive with `pattern` and `every`.
+
+## Schedule Options
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `pattern` | `string` | Cron expression |
+| `every` | `number` (ms) | Fixed interval |
+| `repeatAfterComplete` | `number` (ms) | Interval after previous job completes |
+| `startDate` | `Date \| number` | Defer first run until this time |
+| `endDate` | `Date \| number` | Auto-remove scheduler when next run exceeds this |
+| `limit` | `number` | Auto-remove after creating this many jobs |
+| `tz` | `string` | IANA timezone for cron patterns (e.g., `'America/New_York'`) |
+
+Only one of `pattern`, `every`, `repeatAfterComplete` per scheduler.
+
+## Bounded Schedulers
+
+```typescript
+// Campaign window with max runs
+await queue.upsertJobScheduler(
+  'black-friday',
+  {
+    pattern: '0 */2 * * *',
+    startDate: new Date('2026-11-28T00:00:00Z'),
+    endDate: new Date('2026-12-01T00:00:00Z'),
+    limit: 36,
+  },
+  { name: 'promote-deal', data: { campaign: 'bf' } },
+);
+
+// Interval with delayed start and hard stop
+await queue.upsertJobScheduler(
+  'warmup-cache',
+  {
+    every: 30_000,
+    startDate: Date.now() + 60_000,
+    endDate: new Date('2026-12-31'),
+    limit: 100,
+  },
+  { name: 'warmup', data: { region: 'us-east' } },
+);
+```
+
+## Management
+
+```typescript
+// List all schedulers
+const schedulers = await queue.getRepeatableJobs();
+// Returns stored bounds + iterationCount
+
+// Get single scheduler details
+const info = await queue.getJobScheduler('daily-report');
+
+// Remove a scheduler (does not cancel in-flight jobs)
+await queue.removeJobScheduler('cleanup');
+
+// Upsert updates existing scheduler atomically
+await queue.upsertJobScheduler('cleanup', { every: 10_000 }, { name: 'cleanup', data: {} });
+```
+
+## Gotchas
+
+- `pattern`, `every`, `repeatAfterComplete` are mutually exclusive.
+- `repeatAfterComplete` prevents overlap - next job only after current finishes or terminally fails.
+- Scheduler ID is unique per queue. `upsert` replaces if exists.
+- `removeJobScheduler` does not cancel jobs already in flight.
+- Bounded options (`startDate`, `endDate`, `limit`) work with all three modes.
+- Internal `Scheduler` class fires a promotion loop that converts due entries into real jobs.
+- `getRepeatableJobs()` / `getJobScheduler()` expose `iterationCount` for inspection.
diff --git a/skills/glide-mq/references/search.md b/skills/glide-mq/references/search.md
new file mode 100644
index 0000000..f2b4dca
--- /dev/null
+++ b/skills/glide-mq/references/search.md
@@ -0,0 +1,204 @@
+# Vector Search Reference
+
+Create Valkey Search indexes over job hashes for vector similarity search (KNN).
+
+Requires the `valkey-search` module loaded on the Valkey server (standalone mode only).
+
+## Creating an Index
+
+```typescript
+import { Queue } from 'glide-mq';
+import type { Field } from 'glide-mq';
+
+const queue = new Queue('embeddings', { connection });
+
+// Minimal index (base fields only, no vector search)
+await queue.createJobIndex();
+
+// Index with vector field for KNN search
+await queue.createJobIndex({
+  name: 'embeddings-idx',            // default: '{queueName}-idx'
+  vectorField: {
+    name: 'embedding',               // field name in the job hash
+    dimensions: 1536,                // vector dimensions (e.g. OpenAI ada-002)
+    algorithm: 'HNSW',              // 'HNSW' (default) or 'FLAT'
+    distanceMetric: 'COSINE',       // 'COSINE' (default), 'L2', or 'IP'
+  },
+  fields: [                          // additional schema fields
+    { type: 'TAG', name: 'category' } as Field,
+    { type: 'TEXT', name: 'summary' } as Field,
+    { type: 'NUMERIC', name: 'score' } as Field,
+  ],
+});
+```
+
+### Auto-Included Base Fields
+
+Every index automatically includes:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | TAG | Job name |
+| `state` | TAG | Job state (waiting, active, completed, etc.) |
+| `timestamp` | NUMERIC | Job creation timestamp |
+| `priority` | NUMERIC | Job priority |
+
+### JobIndexOptions
+
+```typescript
+interface JobIndexOptions {
+  name?: string;                      // index name, default: '{queueName}-idx'
+  fields?: Field[];                   // additional schema fields
+  vectorField?: {
+    name: string;                     // field name where vector is stored
+    dimensions: number;               // vector dimensions
+    algorithm?: 'HNSW' | 'FLAT';     // default: 'HNSW'
+    distanceMetric?: 'COSINE' | 'L2' | 'IP';  // default: 'COSINE'
+  };
+  createOptions?: IndexCreateOptions; // pass-through to FT.CREATE
+}
+```
+
+### IndexCreateOptions
+
+```typescript
+interface IndexCreateOptions {
+  score?: number;              // default document score
+  language?: string;           // default stemming language
+  skipInitialScan?: boolean;   // skip indexing existing docs
+  minStemSize?: number;
+  withOffsets?: boolean;
+  noOffsets?: boolean;
+  noStopWords?: boolean;
+  stopWords?: string[];
+  punctuation?: string;
+}
+```
+
+## Vector Search (KNN)
+
+```typescript
+// Generate an embedding for the query
+const queryEmbedding = await openai.embeddings.create({
+  model: 'text-embedding-ada-002',
+  input: 'machine learning optimization',
+});
+
+const results = await queue.vectorSearch(
+  queryEmbedding.data[0].embedding,   // number[] or Float32Array
+  {
+    k: 10,                             // nearest neighbours (default: 10)
+    filter: '@state:{completed}',      // pre-filter expression
+    indexName: 'embeddings-idx',       // default: '{queueName}-idx'
+    scoreField: '__score',             // score field name (default: '__score')
+  },
+);
+
+for (const { job, score } of results) {
+  console.log(`Job ${job.id}: ${job.name} (score: ${score})`);
+  console.log('  Data:', job.data);
+}
+```
+
+### VectorSearchOptions
+
+```typescript
+interface VectorSearchOptions {
+  indexName?: string;           // default: '{queueName}-idx'
+  k?: number;                   // nearest neighbours (default: 10)
+  filter?: string;              // pre-filter expression (default: '*')
+  scoreField?: string;          // score field name (default: '__score')
+  searchOptions?: SearchQueryOptions;
+}
+```
+
+### VectorSearchResult
+
+```typescript
+interface VectorSearchResult<D = any, R = any> {
+  job: Job<D, R>;     // fully hydrated Job object
+  score: number;       // distance/similarity score
+}
+```
+
+Score interpretation depends on distance metric:
+- **COSINE**: 0 = identical, 2 = opposite (lower is more similar)
+- **L2**: 0 = identical (lower is more similar)
+- **IP** (inner product): higher is more similar
+
+### SearchQueryOptions
+
+```typescript
+interface SearchQueryOptions {
+  nocontent?: boolean;          // return only IDs
+  dialect?: number;             // query dialect version
+  verbatim?: boolean;           // disable stemming
+  inorder?: boolean;            // proximity terms must be in order
+  slop?: number;                // proximity matching slop
+  sortby?: { field: string; order?: 'ASC' | 'DESC' };
+  scorer?: string;              // scoring function name
+}
+```
+
+## Storing Vectors in Jobs
+
+Create the job first, then store the embedding with `job.storeVector(...)`. This writes the raw FLOAT32 buffer to the job hash in the format Valkey Search expects.
+
+```typescript
+// When adding jobs with embeddings
+const embedding = await getEmbedding(text);
+const job = await queue.add('document', {
+  text,
+  summary: 'A document about...',
+  category: 'research',
+});
+if (job) {
+  await job.storeVector('embedding', embedding);
+}
+```
+
+Testing mode provides parity via `TestJob.storeVector(...)`, `TestQueue.createJobIndex(...)`, and `TestQueue.vectorSearch(...)`.
+
+## Dropping an Index
+
+```typescript
+// Drop by default name
+await queue.dropJobIndex();
+
+// Drop by custom name
+await queue.dropJobIndex('embeddings-idx');
+```
+
+Dropping an index does not delete the job hashes - only the search index is removed.
+
+## Pre-Filter Expressions
+
+Use Valkey Search query syntax for pre-filtering before KNN:
+
+```typescript
+// Filter by state
+await queue.vectorSearch(embedding, { filter: '@state:{completed}' });
+
+// Filter by job name
+await queue.vectorSearch(embedding, { filter: '@name:{summarize}' });
+
+// Filter by priority range
+await queue.vectorSearch(embedding, { filter: '@priority:[0 5]' });
+
+// Combine filters
+await queue.vectorSearch(embedding, {
+  filter: '@state:{completed} @name:{embed|summarize}',
+});
+
+// No filter (search all indexed jobs)
+await queue.vectorSearch(embedding, { filter: '*' });
+```
+
+## Gotchas
+
+- Requires `valkey-search` module loaded on the Valkey server (standalone mode only, not cluster).
+- When no `vectorField` is specified in `createJobIndex()`, a minimal 2-dimensional placeholder vector field (`_vec`) is added because valkey-search requires at least one vector field.
+- The index prefix is automatically scoped to this queue's job hashes.
+- `dropJobIndex()` only removes the index, not the underlying job data.
+- Vector search returns fully hydrated `Job` objects - each result triggers an HMGET to fetch the full job hash.
+- The `Field` type is re-exported from `@glidemq/speedkey`.
diff --git a/skills/glide-mq/references/serverless.md b/skills/glide-mq/references/serverless.md
new file mode 100644
index 0000000..e820654
--- /dev/null
+++ b/skills/glide-mq/references/serverless.md
@@ -0,0 +1,223 @@
+# Serverless & Testing Reference
+
+## Producer (Lightweight Queue.add)
+
+No EventEmitter, no Job instances, no state tracking. Same FCALL functions as Queue.
+
+```typescript
+import { Producer } from 'glide-mq';
+
+const producer = new Producer('emails', {
+  connection: ConnectionOptions,  // required unless `client` provided
+  client?: Client,               // pre-existing GLIDE client (not owned)
+  prefix?: string,               // default: 'glide'
+  compression?: 'none' | 'gzip', // default: 'none'
+  serializer?: Serializer,       // default: JSON
+  events?: boolean,              // emit 'added' events (default: true, set false to save 1 call)
+});
+
+// Returns string ID (not Job object) or null for dedup/collision
+const id = await producer.add('send-welcome', { to: 'user@example.com' });
+const id = await producer.add('urgent', data, { delay: 3600000, priority: 1 });
+
+// Bulk - returns (string | null)[]
+const ids = await producer.addBulk([
+  { name: 'email', data: { to: 'a@test.com' } },
+  { name: 'sms', data: { phone: '+123' } },
+]);
+
+await producer.close();  // if external client was provided, it is NOT closed
+```
+
+All `JobOptions` work: delay, priority, deduplication, jobId, ordering, ttl, lifo, cost.
+
+## ServerlessPool
+
+Reuses connections across warm Lambda/Edge invocations.
+
+```typescript
+import { serverlessPool, ServerlessPool } from 'glide-mq';
+
+// Module-level singleton
+const producer = serverlessPool.getProducer('notifications', {
+  connection: { addresses: [{ host: process.env.VALKEY_HOST!, port: 6379 }] },
+});
+await producer.add('push', { userId: 42 });
+
+// Or create your own pool
+const pool = new ServerlessPool();
+const p = pool.getProducer('queue', { connection });
+await pool.closeAll();
+```
+
+### AWS Lambda Example
+
+```typescript
+import { serverlessPool } from 'glide-mq';
+
+const CONNECTION = {
+  addresses: [{ host: process.env.VALKEY_HOST!, port: 6379 }],
+};
+
+export async function handler(event: any) {
+  const producer = serverlessPool.getProducer('notifications', {
+    connection: CONNECTION,
+  });
+  const id = await producer.add('push-notification', {
+    userId: event.userId,
+    message: event.message,
+  });
+  return { statusCode: 200, body: JSON.stringify({ jobId: id }) };
+}
+
+process.on('SIGTERM', async () => { await serverlessPool.closeAll(); });
+```
+
+### Connection Behavior
+
+- **Cold start**: creates new GLIDE connection + loads function library
+- **Warm invocation**: returns cached producer (zero overhead)
+- **Container freeze/thaw**: GLIDE auto-reconnects on next command
+
+## HTTP Proxy
+
+Express-based HTTP proxy for enqueueing, request-reply, queue telemetry, and SSE consumption from any language/environment.
+
+```typescript
+import { createProxyServer } from 'glide-mq/proxy';
+
+const proxy = createProxyServer({
+  connection: ConnectionOptions,  // required unless client provided
+  client?: Client,               // pre-existing GLIDE client
+  prefix?: string,               // default: 'glide'
+  queues?: string[],             // allowlist (403 for unlisted queues)
+  compression?: 'none' | 'gzip',
+  onError?: (err, queueName) => void,
+});
+
+proxy.app.listen(3000);
+await proxy.close();  // shuts down all cached Queue instances
+```
+
+### Proxy Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/queues/:name/jobs` | Add single job `{ name, data?, opts? }` |
+| POST | `/queues/:name/jobs/bulk` | Add bulk `{ jobs: [...] }` (max 1000) |
+| GET | `/queues/:name/jobs?state=waiting` | List jobs by state (`waiting`, `active`, `delayed`, `completed`, `failed`) |
+| POST | `/queues/:name/jobs/wait` | Add and wait for worker result `{ result }` |
+| GET | `/queues/:name/jobs/:id` | Get job details |
+| POST | `/queues/:name/jobs/:id/priority` | Change priority |
+| POST | `/queues/:name/jobs/:id/delay` | Change delay |
+| POST | `/queues/:name/jobs/:id/promote` | Promote delayed job immediately |
+| GET | `/queues/:name/jobs/:id/stream` | SSE stream of `job.stream()` output with `Last-Event-ID` / `?lastId=` resume |
+| POST | `/queues/:name/jobs/:id/signal` | Resume a suspended job with `{ name, data? }` |
+| GET | `/queues/:name/events` | Queue-wide lifecycle SSE stream |
+| GET | `/queues/:name/counts` | Get job counts |
+| GET | `/queues/:name/metrics?type=completed` | Get minute-bucket metrics |
+| GET | `/queues/:name/workers` | List live workers |
+| POST | `/queues/:name/pause` | Pause queue |
+| POST | `/queues/:name/resume` | Resume queue |
+| POST | `/queues/:name/drain` | Drain waiting jobs (`?delayed=true` to include delayed) |
+| POST | `/queues/:name/retry` | Retry failed jobs |
+| DELETE | `/queues/:name/clean?state=completed&age=60` | Remove old completed/failed jobs |
+| GET | `/queues/:name/schedulers` | List schedulers |
+| GET | `/queues/:name/schedulers/:id` | Fetch one scheduler by name |
+| PUT | `/queues/:name/schedulers/:id` | Upsert scheduler `{ schedule, template? }` |
+| DELETE | `/queues/:name/schedulers/:id` | Remove scheduler |
+| POST | `/flows` | Create a tree flow or DAG over HTTP. Body: `{ flow, budget? }` or `{ dag }` |
+| GET | `/flows/:id` | Inspect flow snapshot (nodes, roots, counts, usage, budget) |
+| GET | `/flows/:id/tree` | Inspect the nested tree view for a flow or DAG |
+| DELETE | `/flows/:id` | Revoke or flag remaining jobs in a flow and remove the HTTP flow record |
+| GET | `/queues/:name/flows/:parentId/usage` | Aggregate flow usage |
+| GET | `/queues/:name/flows/:flowId/budget` | Read flow budget state |
+| GET | `/usage/summary` | Rolling usage summary (`windowMs`, `start`, `end`, `queues=a,b`) |
+| POST | `/broadcast/:name` | Publish broadcast `{ subject, data?, opts? }` |
+| GET | `/broadcast/:name/events` | Broadcast SSE stream. Requires `subscription`, optional `subjects=a.*,b.>` |
+| GET | `/health` | `{ status, uptime, queues }` |
+
+### Proxy Notes
+
+- Add your own auth/rate limiting middleware before exposing the proxy publicly.
+- Queue-wide SSE and broadcast SSE require `connection`, not just `client`, because they allocate blocking readers.
+- `queues` is an allowlist. Unlisted queue names return `403`.
+- `POST /flows` supports FlowProducer-style trees and DAG payloads. HTTP budgets are currently supported for tree flows only.
+
+## Testing (In-Memory)
+
+No Valkey needed. Import from `glide-mq/testing`.
+
+```typescript
+import { TestQueue, TestWorker } from 'glide-mq/testing';
+
+const queue = new TestQueue('tasks');   // no connection config needed
+const worker = new TestWorker(queue, async (job) => {
+  return { processed: job.data };
+});
+
+worker.on('completed', (job, result) => { ... });
+worker.on('failed', (job, err) => { ... });
+
+await queue.add('send-email', { to: 'user@example.com' });
+const counts = await queue.getJobCounts();
+// { waiting: 0, active: 0, delayed: 0, completed: 1, failed: 0 }
+
+await worker.close();
+await queue.close();
+```
+
+### TestQueue API
+
+| Method | Notes |
+|--------|-------|
+| `add(name, data, opts?)` | Triggers processing immediately |
+| `addBulk(jobs)` | Bulk add |
+| `getJob(id)` | By ID |
+| `getJobs(state, start?, end?)` | By state |
+| `getJobCounts()` | `{ waiting, active, delayed, completed, failed }` |
+| `searchJobs({ state?, name?, data? })` | Filter by state/name/data (shallow match) |
+| `drain(delayed?)` | Remove waiting (+ delayed if true) |
+| `pause()` / `resume()` | Pause/resume |
+| `isPaused()` | Synchronous (note: real Queue is async) |
+
+### TestJob API
+
+| Method | Notes |
+|--------|-------|
+| `changePriority(n)` | Re-prioritize |
+| `changeDelay(n)` | Change delay |
+| `promote()` | Delayed -> waiting immediately |
+
+### TestWorker Events
+
+Same as Worker: `active`, `completed`, `failed`, `drained`.
+
+### Batch Testing
+
+```typescript
+const worker = new TestWorker(queue, async (jobs) => {
+  return jobs.map(j => ({ doubled: j.data.n * 2 }));
+}, { batch: { size: 5, timeout: 100 } });
+```
+
+### Key Testing Behaviors
+
+- Processing is synchronous-ish - check state right after `await queue.add()`.
+- Delayed jobs become waiting immediately (delay not honored in test mode).
+- `moveToDelayed` not supported in test mode.
+- Custom jobId returns `null` on duplicate (mirrors production).
+- All three dedup modes (`simple`, `throttle`, `debounce`) work.
+- Retries work normally with `attempts` and `backoff`.
+- Swap without changing processors - same interface as Queue/Worker.
+
+## Gotchas
+
+- Producer returns `string` IDs, not `Job` objects.
+- Producer `close()` does NOT close an externally provided `client`.
+- `serverlessPool` is a module-level singleton - shared across handler invocations.
+- HTTP proxy requires `express` as a peer dependency.
+- Proxy `queues` option is an allowlist - unlisted names get 403, and the same allowlist applies to `/usage/summary?queues=...` and `/broadcast/:name`.
+- Queue-wide/broadcast SSE proxy routes require `connection`, not only `client`.
+- TestQueue `isPaused()` is synchronous (real Queue returns Promise).
+- Test mode does not honor `delay` or `moveToDelayed`.
diff --git a/skills/glide-mq/references/worker.md b/skills/glide-mq/references/worker.md
new file mode 100644
index 0000000..9f6b511
--- /dev/null
+++ b/skills/glide-mq/references/worker.md
@@ -0,0 +1,264 @@
+# Worker Reference
+
+## Constructor
+
+```typescript
+import { Worker } from 'glide-mq';
+
+const worker = new Worker(
+  'tasks',              // queue name
+  async (job) => {      // processor function
+    // job.data, job.name, job.id, job.opts
+    await job.log('step done');
+    await job.updateProgress(50);       // 0-100 or object
+    await job.updateData({ ...job.data, enriched: true });
+    return { ok: true };                // becomes job.returnvalue
+  },
+  {
+    connection: ConnectionOptions,       // required (even if commandClient provided)
+    commandClient?: Client,              // shared client for non-blocking ops (alias: client)
+    concurrency?: number,                // parallel jobs (default: 1)
+    blockTimeout?: number,               // XREADGROUP BLOCK ms (default: 5000)
+    stalledInterval?: number,            // stall check interval ms (default: 30000)
+    lockDuration?: number,               // stall detection window per job ms (default: 30000)
+    maxStalledCount?: number,            // max stall recoveries before fail
+    limiter?: { max, duration },         // rate limit per worker
+    deadLetterQueue?: { name: string },  // inherited from QueueOptions - usually set on Queue
+    events?: boolean,                    // emit completed/failed events (default: true)
+    metrics?: boolean,                   // record metrics (default: true)
+    prefix?: string,
+    serializer?: Serializer,
+    tokenLimiter?: {
+      maxTokens: number,          // max tokens per window
+      duration: number,           // window duration in ms
+      scope?: 'queue' | 'worker' | 'both',  // default: 'both'
+    },
+    backoffStrategies?: Record<string, (attemptsMade: number, err: Error) => number>,
+  },
+);
+```
+
+## Batch Processing
+
+```typescript
+import { Worker, BatchError } from 'glide-mq';
+
+const worker = new Worker(
+  'bulk-insert',
+  async (jobs) => {              // receives Job[] in batch mode
+    const results = await db.insertMany(jobs.map(j => j.data));
+    return results;              // must return R[] with length === jobs.length
+  },
+  {
+    connection,
+    batch: {
+      size: 50,          // max jobs per batch (1-1000)
+      timeout: 1000,     // ms to wait for full batch (optional)
+    },
+  },
+);
+
+// Partial failures - report per-job outcomes
+async (jobs) => {
+  const results = await Promise.allSettled(jobs.map(processOne));
+  const mapped = results.map(r => r.status === 'fulfilled' ? r.value : r.reason);
+  if (mapped.some(r => r instanceof Error)) {
+    throw new BatchError(mapped);  // each job individually completed/failed
+  }
+  return mapped;
+};
+```
+
+## Worker Events
+
+| Event | Arguments | Description |
+|-------|-----------|-------------|
+| `active` | `(job, jobId)` | Job started processing |
+| `completed` | `(job, result)` | Job finished successfully |
+| `failed` | `(job, err)` | Job threw or timed out |
+| `error` | `(err)` | Internal worker error (connection issues) |
+| `stalled` | `(jobId)` | Job exceeded lockDuration, re-queued |
+| `drained` | `()` | Queue transitioned from non-empty to empty |
+| `closing` | `()` | Worker beginning to close |
+| `closed` | `()` | Worker fully closed |
+
+```typescript
+worker.on('completed', (job, result) => { ... });
+worker.on('failed', (job, err) => { ... });
+worker.on('error', (err) => { ... });
+worker.on('stalled', (jobId) => { ... });
+```
+
+## Stall Detection
+
+- Worker extends job lock every `lockRenewTime` (default: lockDuration/2).
+- If lock expires (job exceeds `lockDuration` without renewal), job is stalled.
+- Stalled jobs are re-queued up to `maxStalledCount` times, then failed.
+- Check interval controlled by `stalledInterval`.
+
+## LIFO Mode
+
+Workers check sources in order: **priority > LIFO > FIFO**.
+Add jobs with `{ lifo: true }` to process newest first.
+LIFO uses a dedicated Valkey LIST separate from the FIFO stream.
+
+## Job Revocation (AbortSignal)
+
+```typescript
+// Queue-side: revoke a job
+const result = await queue.revoke(job.id);
+// 'revoked'    - was waiting/delayed, now failed
+// 'flagged'    - active, worker will abort cooperatively
+// 'not_found'  - job does not exist
+
+// Worker-side: check for revocation
+const worker = new Worker('tasks', async (job) => {
+  for (const chunk of dataset) {
+    if (job.abortSignal?.aborted) throw new Error('Revoked');
+    await processChunk(chunk);
+  }
+}, { connection });
+```
+
+`job.abortSignal` is a standard `AbortSignal` - pass to `fetch`, `axios`, etc.
+
+## Pause / Resume / Close
+
+```typescript
+await worker.pause();        // stop accepting new jobs (active finish)
+await worker.pause(true);    // force-stop immediately
+await worker.resume();
+
+await worker.close();        // graceful: waits for active jobs
+await worker.close(true);    // force-close immediately
+```
+
+## AI Usage & Token Tracking
+
+```typescript
+const worker = new Worker('inference', async (job) => {
+  const result = await callLLM(job.data.prompt);
+
+  // Report AI usage metadata (persisted to job hash, emits 'usage' event)
+  await job.reportUsage({
+    model: 'gpt-5.4',
+    provider: 'openai',
+    tokens: { input: result.promptTokens, output: result.completionTokens },
+    costs: { total: 0.003 },
+    costUnit: 'usd',
+    latencyMs: 800,
+  });
+
+  // Or report just tokens for TPM rate limiting
+  await job.reportTokens(result.totalTokens);
+
+  return result.content;
+}, {
+  connection,
+  limiter: { max: 60, duration: 60_000 },        // RPM limit
+  tokenLimiter: { maxTokens: 100_000, duration: 60_000 },  // TPM limit
+});
+```
+
+Worker pauses fetching when either RPM limiter or TPM tokenLimiter is exceeded.
+
+## Token Streaming
+
+```typescript
+const worker = new Worker('chat', async (job) => {
+  const stream = await openai.chat.completions.create({ stream: true, ... });
+  for await (const chunk of stream) {
+    const token = chunk.choices[0]?.delta?.content;
+    if (token) {
+      await job.stream({ token });  // XADD to per-job stream
+    }
+  }
+  return { done: true };
+}, { connection });
+```
+
+Consumers read via `queue.readStream(jobId, opts)`.
+
+## Suspend / Resume (Human-in-the-Loop)
+
+```typescript
+const worker = new Worker('review', async (job) => {
+  // On resume, signals are populated
+  if (job.signals.length > 0) {
+    const approval = job.signals.find(s => s.name === 'approve');
+    if (approval) return { approved: true };
+    return { rejected: true };
+  }
+
+  // First run - suspend for human review
+  await job.suspend({ reason: 'Needs approval', timeout: 86_400_000 });
+  // throws SuspendError - no code after this executes
+}, { connection });
+```
+
+Resume externally via `queue.signal(jobId, 'approve', { ... })`.
+
+## Fallback Chains
+
+```typescript
+const worker = new Worker('inference', async (job) => {
+  const fallback = job.currentFallback;
+  // undefined on first attempt, then fallbacks[0], fallbacks[1], etc.
+  const model = fallback?.model ?? 'gpt-5.4-nano';
+  return await callLLM(model, job.data.prompt);
+}, { connection });
+```
+
+Set via `queue.add('inference', data, { fallbacks: [...], attempts: 4 })`.
+
+## Skipping Retries
+
+```typescript
+import { UnrecoverableError } from 'glide-mq';
+
+// Option 1: UnrecoverableError - skips all remaining retries
+throw new UnrecoverableError('bad input');
+
+// Option 2: job.discard() + throw - same effect
+job.discard();
+throw new Error('discarded');
+```
+
+## Step Jobs (moveToDelayed)
+
+```typescript
+const worker = new Worker('drip', async (job) => {
+  switch (job.data.step) {
+    case 'send':
+      await sendEmail(job.data);
+      return job.moveToDelayed(Date.now() + 86400_000, 'check');
+    case 'check':
+      return 'done';
+  }
+}, { connection });
+```
+
+`moveToDelayed(timestampMs, nextStep?)` - pauses job until timestamp, optionally updates `job.data.step`.
+
+## Graceful Shutdown
+
+```typescript
+import { gracefulShutdown } from 'glide-mq';
+// Returns a handle that auto-registers SIGTERM/SIGINT handlers.
+// await blocks until a signal fires. For manual shutdown: handle.shutdown()
+const handle = gracefulShutdown([queue, worker, events]);
+await handle.shutdown(); // programmatic trigger
+```
+
+## Gotchas
+
+- Worker **always requires `connection`** even with `commandClient` - blocking client is auto-created.
+- `commandClient` and `client` are aliases - provide one, not both.
+- Don't close shared client while worker is alive. Close worker first.
+- Batch processor must return array with length === jobs.length.
+- `moveToDelayed()` must be called from active processor. Throws `DelayedError` internally.
+- `job.suspend()` throws `SuspendError` internally - no code after it executes.
+- `job.reportUsage()` and `job.reportTokens()` reject negative values.
+- `reportTokens()` overwrites previous value (does not accumulate).
+- `tokenLimiter` scope `'both'` checks local counter first, then Valkey (optimal for most setups).
+- Fallback chains require `attempts >= fallbacks.length + 1`.
diff --git a/skills/glide-mq/references/workflows.md b/skills/glide-mq/references/workflows.md
new file mode 100644
index 0000000..a73879e
--- /dev/null
+++ b/skills/glide-mq/references/workflows.md
@@ -0,0 +1,260 @@
+# Workflows Reference
+
+## FlowProducer
+
+Atomically enqueues a tree of parent-child jobs. Parent only runs after **all** children complete.
+
+```typescript
+import { FlowProducer } from 'glide-mq';
+
+const flow = new FlowProducer({ connection });
+// Also accepts: { client } for shared client
+
+const { job: parent } = await flow.add({
+  name: 'aggregate',
+  queueName: 'reports',
+  data: { month: '2025-01' },
+  children: [
+    { name: 'fetch-sales', queueName: 'data', data: { region: 'eu' } },
+    { name: 'fetch-returns', queueName: 'data', data: {} },
+    {
+      name: 'fetch-inventory', queueName: 'data', data: {},
+      children: [  // nested children supported
+        { name: 'load-a', queueName: 'data', data: {} },
+      ],
+    },
+  ],
+});
+
+await flow.close();
+```
+
+### FlowJob Structure
+
+```typescript
+interface FlowJob {
+  name: string;
+  queueName: string;
+  data: any;
+  opts?: JobOptions;
+  children?: FlowJob[];
+}
+```
+
+### Bulk Flows
+
+```typescript
+const nodes = await flow.addBulk([
+  { name: 'report-jan', queueName: 'reports', data: {}, children: [...] },
+  { name: 'report-feb', queueName: 'reports', data: {}, children: [...] },
+]);
+```
+
+### Reading Child Results
+
+```typescript
+const worker = new Worker('reports', async (job) => {
+  const childValues = await job.getChildrenValues();
+  // Keys are opaque internal IDs - use Object.values()
+  const results = Object.values(childValues);
+  return { total: results.reduce((s, v) => s + v.count, 0) };
+}, { connection });
+```
+
+## DAG Workflows (Multiple Parents)
+
+`addDAG()` supports arbitrary DAG topologies where a job can depend on multiple parents.
+
+```typescript
+import { FlowProducer, dag } from 'glide-mq';
+
+// Helper function (simpler API)
+const jobs = await dag([
+  { name: 'A', queueName: 'tasks', data: { step: 1 } },
+  { name: 'B', queueName: 'tasks', data: { step: 2 }, deps: ['A'] },
+  { name: 'C', queueName: 'tasks', data: { step: 3 }, deps: ['A'] },
+  { name: 'D', queueName: 'tasks', data: { step: 4 }, deps: ['B', 'C'] },  // fan-in
+], connection);
+
+// Or via FlowProducer directly
+const flow = new FlowProducer({ connection });
+const jobs = await flow.addDAG({
+  nodes: [
+    { name: 'A', queueName: 'tasks', data: {}, deps: [] },
+    { name: 'B', queueName: 'tasks', data: {}, deps: ['A'] },
+    { name: 'C', queueName: 'tasks', data: {}, deps: ['A'] },
+    { name: 'D', queueName: 'tasks', data: {}, deps: ['B', 'C'] },
+  ],
+});
+// Returns Map<string, Job> keyed by node name
+```
+
+### DAGNode
+
+- `name` - unique within the DAG (used in `deps`)
+- `queueName` - target queue
+- `data` - payload
+- `opts?` - JobOptions
+- `deps?` - array of node names that must complete first
+
+### Reading Multiple Parent Results
+
+```typescript
+const worker = new Worker('tasks', async (job) => {
+  if (job.name === 'D') {
+    const parents = await job.getParents();
+    // Returns { queue, id }[] - not Job instances
+    // Fetch full jobs if needed:
+    const parentJobs = await Promise.all(
+      parents.map(p => new Queue(p.queue, { connection }).getJob(p.id))
+    );
+    const results = parentJobs.map(p => p.returnvalue);
+    return { merged: results };
+  }
+}, { connection });
+```
+
+## Convenience Helpers
+
+### chain() - Sequential Pipeline
+
+Array is in **reverse execution order** (last element runs first).
+
+```typescript
+import { chain } from 'glide-mq';
+
+// Execution: download -> parse -> transform -> upload
+await chain('pipeline', [
+  { name: 'upload',    data: {} },   // runs LAST (root)
+  { name: 'transform', data: {} },
+  { name: 'parse',     data: {} },
+  { name: 'download',  data: {} },   // runs FIRST (leaf)
+], connection);
+```
+
+### group() - Parallel Execution
+
+```typescript
+import { group } from 'glide-mq';
+
+await group('tasks', [
+  { name: 'resize-sm', data: { size: 'sm' } },
+  { name: 'resize-md', data: { size: 'md' } },
+  { name: 'resize-lg', data: { size: 'lg' } },
+], connection);
+// Creates synthetic __group__ parent that waits for all children
+```
+
+### chord() - Parallel + Callback
+
+```typescript
+import { chord } from 'glide-mq';
+
+await chord(
+  'tasks',
+  // Group (parallel)
+  [
+    { name: 'score-a', data: { model: 'a' } },
+    { name: 'score-b', data: { model: 'b' } },
+  ],
+  // Callback (after group completes)
+  { name: 'select-best', data: {} },
+  connection,
+);
+```
+
+## Dynamic Children (moveToWaitingChildren)
+
+Spawn children at runtime, then pause parent until they complete.
+
+```typescript
+import { Queue, Worker, WaitingChildrenError } from 'glide-mq';
+
+const worker = new Worker('orchestrator', async (job) => {
+  // Detect re-entry
+  const existing = await job.getChildrenValues();
+  if (Object.keys(existing).length > 0) {
+    return { merged: Object.values(existing) };  // aggregate results
+  }
+
+  // Spawn children dynamically
+  const childQueue = new Queue('subtasks', { connection });
+  for (const url of job.data.urls) {
+    await childQueue.add('fetch', { url }, {
+      parent: { id: job.id!, queue: job.queueQualifiedName },
+    });
+  }
+  await childQueue.close();
+
+  // Pause until all children complete - throws WaitingChildrenError
+  await job.moveToWaitingChildren();
+}, { connection });
+```
+
+## Budget on Flows
+
+Cap total token usage and/or cost across all jobs in a flow tree. Supports per-category limits and weighted totals.
+
+```typescript
+const flow = new FlowProducer({ connection });
+
+await flow.add(
+  {
+    name: 'research',
+    queueName: 'ai',
+    data: { topic: 'quantum computing' },
+    children: [
+      { name: 'search', queueName: 'ai', data: {} },
+      { name: 'summarize', queueName: 'ai', data: {} },
+    ],
+  },
+  {
+    budget: {
+      maxTotalTokens: 50_000,
+      maxTotalCost: 0.50,
+      costUnit: 'usd',
+      tokenWeights: { reasoning: 4, cachedInput: 0.25 },
+      onExceeded: 'fail',      // 'fail' (default) or 'pause'
+    },
+  },
+);
+
+// Check budget state
+const budget = await queue.getFlowBudget(parentJobId);
+// { maxTotalTokens, maxTokens, tokenWeights, maxTotalCost, maxCosts, costUnit,
+//   usedTokens, usedCost, exceeded, onExceeded }
+```
+
+Budget is propagated to every job in the flow via a `budgetKey` field.
+
+## Suspend / Resume as Workflow Primitive
+
+Suspend a job in a flow to await human approval, then resume and continue the pipeline.
+
+```typescript
+const worker = new Worker('ai', async (job) => {
+  if (job.name === 'review') {
+    if (job.signals.length > 0) {
+      return { approved: job.signals.some(s => s.name === 'approve') };
+    }
+    await job.suspend({ reason: 'Human review required', timeout: 86_400_000 });
+  }
+  // other job types...
+}, { connection });
+
+// Resume externally
+await queue.signal(jobId, 'approve', { reviewer: 'alice' });
+```
+
+When a suspended job resumes, it re-enters the stream and the processor is invoked again with `job.signals` populated. The parent flow continues once all children (including the resumed one) complete.
+
+## Gotchas
+
+- `chain()` array is **reverse execution order** - last element is leaf (runs first).
+- `moveToWaitingChildren()` always throws `WaitingChildrenError`. No code after it executes.
+- Processor re-runs **from the top** when children complete. Use `getChildrenValues()` to detect re-entry.
+- Children must reference parent via `opts.parent: { id, queue }`.
+- Cycles in DAGs are detected and rejected with `CycleError`.
+- If a parent in a DAG fails, dependent jobs remain blocked indefinitely.
+- `FlowProducer.add()` throws on duplicate jobId (cannot be partially created).
+- Cross-queue dependencies are supported - each DAG node can have its own `queueName`.