From 44d9fa832c3c7bfc18f7b7b067ab5eec7efc7fa2 Mon Sep 17 00:00:00 2001 From: Jimmy Lai Date: Wed, 11 Feb 2026 13:44:26 -0800 Subject: [PATCH 1/3] docs: agent skills, pr-status script, and AGENTS.md updates --- .agents/skills/README.md | 115 ++++++++ .agents/skills/authoring-skills/SKILL.md | 114 ++++++++ .agents/skills/dce-edge/SKILL.md | 66 +++++ .agents/skills/flags/SKILL.md | 43 +++ .agents/skills/pr-status-triage/SKILL.md | 33 +++ .../skills/pr-status-triage/local-repro.md | 30 +++ .agents/skills/pr-status-triage/workflow.md | 27 ++ .agents/skills/react-vendoring/SKILL.md | 70 +++++ .agents/skills/runtime-debug/SKILL.md | 54 ++++ .claude/commands/pr-status.md | 9 + AGENTS.md | 169 ++++++++++-- scripts/pr-status.js | 255 +++++++++++++++++- 12 files changed, 963 insertions(+), 22 deletions(-) create mode 100644 .agents/skills/README.md create mode 100644 .agents/skills/authoring-skills/SKILL.md create mode 100644 .agents/skills/dce-edge/SKILL.md create mode 100644 .agents/skills/flags/SKILL.md create mode 100644 .agents/skills/pr-status-triage/SKILL.md create mode 100644 .agents/skills/pr-status-triage/local-repro.md create mode 100644 .agents/skills/pr-status-triage/workflow.md create mode 100644 .agents/skills/react-vendoring/SKILL.md create mode 100644 .agents/skills/runtime-debug/SKILL.md diff --git a/.agents/skills/README.md b/.agents/skills/README.md new file mode 100644 index 000000000000..a96f846e4e3c --- /dev/null +++ b/.agents/skills/README.md @@ -0,0 +1,115 @@ +# Skills Authoring Guide + +Skills are on-demand context files that Claude loads when relevant. They extend `AGENTS.md` with deep-dive workflows, code templates, and verification steps. + +## When to Create a Skill + +Create a skill when content is: +- **Too detailed for AGENTS.md** (code templates, multi-step workflows, diagnostic procedures) +- **Only relevant for specific tasks** (not every session needs it) +- **Self-contained enough to load independently** + +Do NOT create a skill for: +- One-liner rules or guardrails (keep those in AGENTS.md) +- Content every agent session needs (that's what AGENTS.md is for) +- Simple facts without actionable steps + +## File Structure + +``` +.agents/skills/ +├── my-skill/ +│ └── SKILL.md # Required: frontmatter + content +│ └── workflow.md # Optional: supplementary files +│ └── examples.md # Optional: referenced from SKILL.md +└── README.md # This file +``` + +## SKILL.md Format + +```yaml +--- +name: my-skill +description: > + What this skill covers and when to use it. Include key file names, + concepts, and trigger phrases so Claude can match user intent to this + skill. This is the primary field Claude uses for auto-activation. +--- +``` + +### Supported Frontmatter Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `name` | Yes | Skill name, used for `$name` references and `/name` slash commands | +| `description` | Yes | What the skill does and when to use it. **This is how Claude decides to auto-load the skill.** Include file names, concepts, and keywords. | +| `argument-hint` | No | Hint for expected arguments in autocomplete | +| `user-invocable` | No | Set to `false` to hide from `/` slash command menu | +| `disable-model-invocation` | No | Set to `true` to prevent Claude from auto-triggering this skill | +| `allowed-tools` | No | Tools Claude can use without permission when this skill is active | +| `model` | No | Model override for this skill | +| `context` | No | Set to `fork` for isolated subagent execution | +| `agent` | No | Subagent type to use with `context: fork` | +| `hooks` | No | Hooks scoped to this skill's lifecycle | + +Only use fields from this table. Unknown fields are ignored by Claude Code. + +### Writing Good Descriptions + +The `description` is the single most important field. Claude uses it to decide when to auto-load the skill. Include: + +- **What the skill covers** (the topic) +- **When to use it** (the trigger scenario) +- **Key file names** mentioned in the skill (e.g. `config-shared.ts`, `entry-base.ts`) +- **Key concepts/keywords** a user or agent might mention (e.g. "DCE", "feature flag", "vendored React") + +```yaml +# Bad: too vague, won't match well +description: Helps with flags. + +# Good: specific, includes file names and keywords +description: > + How to add or modify Next.js experimental feature flags end-to-end. + Use when editing config-shared.ts, config-schema.ts, define-env-plugin.ts, + next-server.ts, export/worker.ts, or module.compiled.js. +``` + +## Content Guidelines + +### Relationship to AGENTS.md + +AGENTS.md holds **always-loaded guardrails** (one-liner rules every session needs). Skills hold **deep-dive content** loaded on demand. + +- AGENTS.md should have a one-liner version of any critical rule +- Skills expand on those rules with verification steps, code examples, and context +- AGENTS.md points to skills via `$skill-name` references +- Skills should not duplicate AGENTS.md content; they should go deeper + +### Structure a Skill for Action + +Skills should tell the agent what to **do**, not just what to **know**: + +- Lead with a clear "Use this skill when..." statement +- Include step-by-step procedures where applicable +- Add code templates ready to adapt +- End with verification commands +- Cross-reference related skills with a "Related Skills" section + +### Naming + +- Use short, descriptive names scoped to the topic: `flags`, `dce-edge`, `react-vendoring` +- No repo-name prefix (skills are already scoped to this repo by being in `.agents/skills/`) +- Use hyphens for multi-word names + +### Supplementary Files + +For complex skills, split into a hub SKILL.md + detail files: + +``` +pr-status-triage/ +├── SKILL.md # Overview + quick commands +├── workflow.md # Detailed prioritization and patterns +└── local-repro.md # CI env matching guide +``` + +Reference detail files from SKILL.md with relative links. Keep SKILL.md scannable as an entry point. diff --git a/.agents/skills/authoring-skills/SKILL.md b/.agents/skills/authoring-skills/SKILL.md new file mode 100644 index 000000000000..8e420838f330 --- /dev/null +++ b/.agents/skills/authoring-skills/SKILL.md @@ -0,0 +1,114 @@ +--- +name: authoring-skills +description: > + How to create and maintain agent skills in .agents/skills/. Use when + creating a new SKILL.md, writing skill descriptions, choosing frontmatter + fields, or deciding what content belongs in a skill vs AGENTS.md. + Covers the supported spec fields, description writing, naming conventions, + and the relationship between always-loaded AGENTS.md and on-demand skills. +user-invocable: false +--- + +# Authoring Skills + +Use this skill when creating or modifying agent skills in `.agents/skills/`. + +## When to Create a Skill + +Create a skill when content is: + +- Too detailed for AGENTS.md (code templates, multi-step workflows, diagnostic procedures) +- Only relevant for specific tasks (not needed every session) +- Self-contained enough to load independently + +Keep in AGENTS.md instead when: + +- It's a one-liner rule or guardrail every session needs +- It's a general-purpose gotcha any agent could hit + +## File Structure + +``` +.agents/skills/ +└── my-skill/ + ├── SKILL.md # Required: frontmatter + content + ├── workflow.md # Optional: supplementary detail + └── examples.md # Optional: referenced from SKILL.md +``` + +## Supported Frontmatter Fields + +```yaml +--- +name: my-skill # Required. Used for $name references and /name commands. +description: > # Required. How Claude decides to auto-load the skill. + What this covers and when to use it. Include file names and keywords. +argument-hint: '' # Optional. Hint for expected arguments. +user-invocable: false # Optional. Set false to hide from / menu. +disable-model-invocation: true # Optional. Set true to prevent auto-triggering. +allowed-tools: [Bash, Read] # Optional. Tools allowed without permission. +model: opus # Optional. Model override. +context: fork # Optional. Isolated subagent execution. +agent: Explore # Optional. Subagent type (with context: fork). +--- +``` + +Only use fields from this list. Unknown fields are silently ignored. + +## Writing Descriptions + +The `description` is the primary matching surface for auto-activation. Include: + +1. **What the skill covers** (topic) +2. **When to use it** (trigger scenario) +3. **Key file names** the skill references (e.g. `config-shared.ts`) +4. **Keywords** a user or agent might mention (e.g. "feature flag", "DCE") + +```yaml +# Too vague - won't auto-trigger reliably +description: Helps with flags. + +# Good - specific files and concepts for matching +description: > + How to add or modify Next.js experimental feature flags end-to-end. + Use when editing config-shared.ts, config-schema.ts, define-env-plugin.ts. +``` + +## Content Conventions + +### Structure for Action + +Skills should tell the agent what to **do**, not just what to **know**: + +- Lead with "Use this skill when..." +- Include step-by-step procedures +- Add code templates ready to adapt +- End with verification commands +- Cross-reference related skills in a "Related Skills" section + +### Relationship to AGENTS.md + +| AGENTS.md (always loaded) | Skills (on demand) | +| --------------------------------------- | ---------------------------------------------------------------------- | +| One-liner guardrails | Step-by-step workflows | +| "Keep require() behind if/else for DCE" | Full DCE pattern with code examples, verification commands, edge cases | +| Points to skills via `$name` | Expands on AGENTS.md rules | + +When adding a skill, also add a one-liner summary to the relevant AGENTS.md section with a `$skill-name` reference. + +### Naming + +- Short, descriptive, topic-scoped: `flags`, `dce-edge`, `react-vendoring` +- No repo prefix (already scoped by `.agents/skills/`) +- Hyphens for multi-word names + +### Supplementary Files + +For complex skills, use a hub + detail pattern: + +``` +pr-status-triage/ +├── SKILL.md # Overview, quick commands, links to details +├── workflow.md # Prioritization and patterns +└── local-repro.md # CI env matching +``` diff --git a/.agents/skills/dce-edge/SKILL.md b/.agents/skills/dce-edge/SKILL.md new file mode 100644 index 000000000000..4030715f92d9 --- /dev/null +++ b/.agents/skills/dce-edge/SKILL.md @@ -0,0 +1,66 @@ +--- +name: dce-edge +description: > + DCE-safe require() patterns and edge runtime constraints. Use when writing + conditional require() calls, guarding Node-only imports (node:stream etc.), + or editing define-env-plugin.ts / app-render / stream-utils for edge builds. + Covers if/else branching for webpack DCE, TypeScript definite assignment, + the NEXT_RUNTIME vs real feature flag distinction, and forcing flags false + for edge in define-env.ts. +--- + +# DCE + Edge + +Use this skill when changing conditional `require()` paths, Node-only imports, or edge/runtime branching. + +## DCE-Safe `require()` Pattern + +Webpack only DCEs a `require()` when it sits inside the dead branch of an `if/else` whose condition DefinePlugin can evaluate at compile time. + +```ts +// CORRECT - webpack can eliminate the dead branch +if (process.env.__NEXT_USE_NODE_STREAMS) { + require('node:stream') +} else { + // web path +} +``` + +What does NOT work: + +- **Early-return/throw guards**: webpack doesn't do control-flow analysis for throws/returns, so the `require()` is still traced. +- **Bare `if` without `else`**: works for inline `node:*` specifiers but NOT for `require('./some-module')` that pulls a new file into the module graph. + +Always test edge changes with `pnpm test-start-webpack` on `test/e2e/app-dir/app/standalone.test.ts` (has edge routes), not with `NEXT_SKIP_ISOLATE=1` which skips the full webpack compilation. + +## TypeScript + DCE Interaction + +Use `if/else` (not two independent `if` blocks) when assigning a variable conditionally on `process.env.X`. TypeScript cannot prove exhaustiveness across `if (flag) { x = a }; if (!flag) { x = b }` and will error with "variable used before being assigned". The `if/else` pattern satisfies both TypeScript (definite assignment) and webpack DCE. + +## Compile-Time Switcher Pattern + +Platform-specific code (node vs web) can use a single `.ts` switcher module that conditionally `require()`s either `.node.ts` or `.web.ts` into a typed variable, then re-exports the shared runtime API as named exports. Keep the branch as `if/else` so DefinePlugin can dead-code-eliminate the unused `require()`. Keep shared types canonical in `.node.ts`, with `.web.ts` importing them via `import type` and the switcher re-exporting types as needed. Examples: `stream-ops.ts` and `debug-channel-server.ts`. + +## `NEXT_RUNTIME` Is Not a Feature Flag + +In user-project webpack server compilers, `process.env.NEXT_RUNTIME` is inlined to `'nodejs'`. Guarding Node-only `require('node:*')` paths with `NEXT_RUNTIME === 'nodejs'` does **not** prune anything. For feature-gated codepaths, guard on the real feature define (e.g. `process.env.__NEXT_USE_NODE_STREAMS`). + +## Edge Runtime Constraints + +Edge routes do NOT use pre-compiled runtime bundles. They are compiled by the user's webpack/Turbopack, so `define-env.ts` controls DCE. Feature flags that gate `node:*` imports must be forced to `false` for edge builds in `define-env.ts` (`isEdgeServer ? false : flagValue`), otherwise webpack will try to resolve `node:stream` etc. and fail. + +## `app-page.ts` Template Gotchas + +- `app-page.ts` is a build template compiled by the user's bundler. Any `require()` in this file is traced by webpack/turbopack at `next build` time. You cannot require internal modules with relative paths because they won't be resolvable from the user's project. Instead, export new helpers from `entry-base.ts` and access them via `entryBase.*` in the template. +- Template helpers should stay out of `RenderResult`. If `app-page.ts` needs a Node-stream-only utility, prefer a small dedicated helper module in `server/stream-utils/` (with DCE-safe `if/else` + `require()`). + +## Verification + +- Validate edge bundling regressions with `pnpm test-start-webpack test/e2e/app-dir/app/standalone.test.ts` +- For module-resolution/build-graph fixes, verify without `NEXT_SKIP_ISOLATE=1` + +## Related Skills + +- `$flags` - flag wiring (config/schema/define-env/runtime env) +- `$react-vendoring` - entry-base boundaries and vendored React +- `$runtime-debug` - reproduction and verification workflow diff --git a/.agents/skills/flags/SKILL.md b/.agents/skills/flags/SKILL.md new file mode 100644 index 000000000000..d8ec97a57f61 --- /dev/null +++ b/.agents/skills/flags/SKILL.md @@ -0,0 +1,43 @@ +--- +name: flags +description: > + How to add or modify Next.js experimental feature flags end-to-end. + Use when editing config-shared.ts, config-schema.ts, define-env-plugin.ts, + next-server.ts, export/worker.ts, or module.compiled.js. Covers type + declaration, zod schema, build-time injection, runtime env plumbing, + and the decision between runtime env-var branching vs separate bundle variants. +--- + +# Feature Flags + +Use this skill when adding or changing framework feature flags in Next.js internals. + +## Required Wiring + +All flags need: `config-shared.ts` (type) → `config-schema.ts` (zod). If the flag is consumed in user-bundled code (client components, edge routes, `app-page.ts` template), also add it to `define-env.ts` for build-time injection. Runtime-only flags consumed exclusively in pre-compiled bundles can skip `define-env.ts`. + +## Where the Flag Is Consumed + +**Client/bundled code only** (e.g. `__NEXT_PPR` in client components): `define-env.ts` is sufficient. Webpack/Turbopack replaces `process.env.X` at the user's build time. + +**Pre-compiled runtime bundles** (e.g. code in `app-render.tsx`): The flag must also be set as a real `process.env` var at runtime, because `app-render.tsx` runs from pre-compiled bundles where `define-env.ts` doesn't reach. Two approaches: + +- **Runtime env var**: Set in `next-server.ts` + `export/worker.ts`. Both code paths stay in one bundle. Simple but increases bundle size. +- **Separate bundle variant**: Add DefinePlugin entry in `next-runtime.webpack-config.js` (scoped to `bundleType === 'app'`), new taskfile tasks, update `module.compiled.js` selector, and still set env var in `next-server.ts` + `export/worker.ts` for bundle selection. Eliminates dead code but adds build complexity. + +For runtime flags, also add the field to the `NextConfigRuntime` Pick type in `config-shared.ts`. + +## Runtime-Bundle Model + +- Runtime bundles are built by `next-runtime.webpack-config.js` (rspack) via `taskfile.js` bundle tasks. +- Bundle selection occurs at runtime in `src/server/route-modules/app-page/module.compiled.js` based on `process.env` vars. +- Variants: `{turbo/webpack} × {experimental/stable/nodestreams/experimental-nodestreams} × {dev/prod}` = up to 16 bundles per route type. +- `define-env.ts` affects user bundling, not pre-compiled runtime internals. +- `process.env.X` checks in `app-render.tsx` are either replaced by DefinePlugin at runtime-bundle-build time, or read as actual env vars at server startup. They are NOT affected by the user's defines from `define-env.ts`. +- **Gotcha**: DefinePlugin entries in `next-runtime.webpack-config.js` must be scoped to the correct `bundleType` (e.g. `app` only, not `server`) to avoid replacing assignment targets in `next-server.ts`. + +## Related Skills + +- `$dce-edge` - DCE-safe require patterns and edge constraints +- `$react-vendoring` - entry-base boundaries and vendored React +- `$runtime-debug` - reproduction and verification workflow diff --git a/.agents/skills/pr-status-triage/SKILL.md b/.agents/skills/pr-status-triage/SKILL.md new file mode 100644 index 000000000000..f6afda365010 --- /dev/null +++ b/.agents/skills/pr-status-triage/SKILL.md @@ -0,0 +1,33 @@ +--- +name: pr-status-triage +description: > + Triage CI failures and PR review comments using scripts/pr-status.js. + Use when investigating failing CI jobs, flaky tests, or PR review feedback. + Covers blocker-first prioritization (build > lint > types > tests), + CI env var matching for local reproduction, and the Known Flaky Tests + distinction. +--- + +# PR Status Triage + +Use this skill when the user asks about PR status, CI failures, or review comments in the Next.js monorepo. + +## Workflow + +1. Run `node scripts/pr-status.js` (or `node scripts/pr-status.js `). +2. Read generated files in `scripts/pr-status/`. +3. Prioritize blocking jobs first: build, lint, types, then test jobs. +4. Treat failures as real until disproven; check the "Known Flaky Tests" section before calling anything flaky. +5. Reproduce locally with the same mode and env vars as CI. + +## Quick Commands + +```bash +node scripts/pr-status.js +node scripts/pr-status.js +``` + +## Detailed References + +- [workflow.md](./workflow.md) - prioritization and common failure patterns +- [local-repro.md](./local-repro.md) - mode/env matching and isolation guidance diff --git a/.agents/skills/pr-status-triage/local-repro.md b/.agents/skills/pr-status-triage/local-repro.md new file mode 100644 index 000000000000..e6f16aeb2ef2 --- /dev/null +++ b/.agents/skills/pr-status-triage/local-repro.md @@ -0,0 +1,30 @@ +# Local Reproduction Guide + +## Match CI Job Mode + +- Dev-mode failures: use `pnpm test-dev-turbo` or `pnpm test-dev-webpack` based on job mode. +- Start-mode failures: use `pnpm test-start-turbo` or `pnpm test-start-webpack`. + +## Match CI Environment Variables + +Read the job environment variables from `pr-status` output and mirror them locally. + +Key variables to watch: + +- `IS_WEBPACK_TEST=1` forces webpack mode. +- `NEXT_SKIP_ISOLATE=1` skips package isolation and can hide module-resolution issues. + +## Isolation Rule + +When validating module-resolution, entrypoint-export, or internal require-path fixes, rerun without `NEXT_SKIP_ISOLATE=1`. + +## One-Run Log Analysis + +Capture once, analyze multiple times: + +```bash +HEADLESS=true pnpm test-dev-turbo test/path/to/test.ts > /tmp/test-output.log 2>&1 +grep "●" /tmp/test-output.log +grep -A5 "Error:" /tmp/test-output.log +tail -5 /tmp/test-output.log +``` diff --git a/.agents/skills/pr-status-triage/workflow.md b/.agents/skills/pr-status-triage/workflow.md new file mode 100644 index 000000000000..5307ab74f42e --- /dev/null +++ b/.agents/skills/pr-status-triage/workflow.md @@ -0,0 +1,27 @@ +# CI Triage Workflow + +## Prioritization Order + +1. Build failures +2. Lint failures +3. Type failures +4. Test failures +5. Review comments (after CI blockers) + +## Failure Handling Rules + +- Investigate each failing job as if it is caused by the current changes. +- Do not assume flakiness by default. +- If the job output has a "Known Flaky Tests" section, use it as historical context, not as automatic dismissal. + +## Common Patterns + +- `rust check / build`: + - Run `cargo fmt -- --check` + - Fix with `cargo fmt` +- `lint / build`: + - Run `pnpm prettier --write ` + - Run the repo lint command if needed +- test failures: + - Run the exact failing test file locally + - Match dev vs start mode to the CI job diff --git a/.agents/skills/react-vendoring/SKILL.md b/.agents/skills/react-vendoring/SKILL.md new file mode 100644 index 000000000000..6b97b4640d3b --- /dev/null +++ b/.agents/skills/react-vendoring/SKILL.md @@ -0,0 +1,70 @@ +--- +name: react-vendoring +description: > + React vendoring and react-server layer boundaries. Use when editing + entry-base.ts, $$compiled.internal.d.ts, compiled/react* packages, + or taskfile.js copy_vendor_react. Covers the entry-base.ts boundary + (all react-server-dom-webpack/* imports must go through it), vendored + React channels, type declarations, Turbopack remap to + react-server-dom-turbopack, ComponentMod access patterns, and ESLint + suppression for guarded requires. +--- + +# React Vendoring + +Use this skill for changes touching vendored React, `react-server-dom-webpack/*`, or react-server layer boundaries. + +## App Router Vendoring + +React is NOT resolved from `node_modules` for App Router. It's vendored into `packages/next/src/compiled/` during `pnpm build` (task: `copy_vendor_react()` in `taskfile.js`). Pages Router resolves React from `node_modules` normally. + +- **Two channels**: stable (`compiled/react/`) and experimental (`compiled/react-experimental/`). The runtime bundle webpack config aliases to the correct channel via `makeAppAliases({ experimental })`. + +## `entry-base.ts` Boundary + +Only `entry-base.ts` is compiled in rspack's `(react-server)` layer. ALL imports from `react-server-dom-webpack/*` (Flight server/static APIs) must go through `entry-base.ts`. Other files like `stream-ops.node.ts` or `app-render.tsx` must access Flight APIs via the `ComponentMod` parameter (which is the `entry-base.ts` module exposed through the `app-page.ts` build template). + +Direct imports from `react-server-dom-webpack/server.node` or `react-server-dom-webpack/static` in files outside `entry-base.ts` will fail at runtime with "The react-server condition must be enabled". Dev mode may mask this error, but production workers fail immediately. + +## Type Declarations + +`packages/next/types/$$compiled.internal.d.ts` contains `declare module` blocks for vendored React packages. When adding new APIs (e.g. `renderToPipeableStream`, `prerenderToNodeStream`), you must add type declarations here. The bare specifier types (e.g. `declare module 'react-server-dom-webpack/server'`) are what source code in `src/` imports against. + +## Adding Node.js-Only React APIs + +These exist in `.node` builds but not in the type definitions. Steps: + +1. Add type declarations to `$$compiled.internal.d.ts`. +2. Export the API from `entry-base.ts` behind a `process.env` guard. +3. Access it via `ComponentMod` in other files. + +```typescript +// In entry-base.ts (react-server layer) only: +/* eslint-disable import/no-extraneous-dependencies */ +export let renderToPipeableStream: ... | undefined +if (process.env.__NEXT_USE_NODE_STREAMS) { + renderToPipeableStream = ( + require('react-server-dom-webpack/server.node') as typeof import('react-server-dom-webpack/server.node') + ).renderToPipeableStream +} else { + renderToPipeableStream = undefined +} +/* eslint-enable import/no-extraneous-dependencies */ + +// In other files, access via ComponentMod: +ComponentMod.renderToPipeableStream!(payload, clientModules, opts) +``` + +## ESLint Practical Rule + +For guarded runtime `require()` blocks that need `import/no-extraneous-dependencies` suppression, prefer scoped block disable/enable. If using `eslint-disable-next-line`, the comment must be on the line immediately before the `require()` call, NOT before the `const` declaration. When the `const` and `require()` are on different lines, this is error-prone. + +## Turbopack Remap + +`react-server-dom-webpack/*` is silently remapped to `react-server-dom-turbopack/*` by Turbopack's import map. Code says "webpack" everywhere, but Turbopack gets its own bindings at runtime. This affects debugging: stack traces and error messages will reference the turbopack variant. + +## Related Skills + +- `$flags` - flag wiring (config/schema/define-env/runtime env) +- `$dce-edge` - DCE-safe require patterns and edge constraints +- `$runtime-debug` - reproduction and verification workflow diff --git a/.agents/skills/runtime-debug/SKILL.md b/.agents/skills/runtime-debug/SKILL.md new file mode 100644 index 000000000000..b1a61f539421 --- /dev/null +++ b/.agents/skills/runtime-debug/SKILL.md @@ -0,0 +1,54 @@ +--- +name: runtime-debug +description: > + Debug and verification workflow for runtime-bundle and module-resolution + regressions. Use when diagnosing unexpected module inclusions, bundle + size regressions, or CI failures related to NEXT_SKIP_ISOLATE, nft.json + traces, or runtime bundle selection (module.compiled.js). Covers CI env + mirroring, full stack traces via __NEXT_SHOW_IGNORE_LISTED, route trace + inspection, and webpack stats diffing. +--- + +# Runtime Debug + +Use this skill when reproducing runtime-bundle, module-resolution, or user-bundle inclusion regressions. + +## Local Repro Discipline + +- Mirror CI env vars when reproducing CI failures. +- Key variables: `IS_WEBPACK_TEST=1` forces webpack (turbopack is default), `NEXT_SKIP_ISOLATE=1` skips packing next.js. +- For module-resolution validation, always rerun without `NEXT_SKIP_ISOLATE=1`. + +## Stack Trace Visibility + +Set `__NEXT_SHOW_IGNORE_LISTED=true` to disable the ignore-list filtering in dev server error output. By default, Next.js collapses internal frames to `at ignore-listed frames`, which hides useful context when debugging framework internals. Defined in `packages/next/src/server/patch-error-inspect.ts`. + +## User-Bundle Regression Guardrail + +When user `next build` starts bundling internal Node-only helpers unexpectedly: + +1. Inspect route trace artifacts (`.next/server/.../page.js.nft.json`). +2. Inspect traced server chunks for forbidden internals (e.g. `next/dist/server/stream-utils/node-stream-helpers.js`, `node:stream/promises`). +3. Add a `test-start-webpack` assertion that reads the route trace and traced server chunks, and fails on forbidden internals. This validates user-project bundling (not publish-time runtime bundling). + +## Bundle Tracing / Inclusion Proof + +To prove what user bundling includes, emit webpack stats from the app's `next.config.js`: + +```js +// next.config.js +module.exports = { + webpack(config) { + config.profile = true + return config + }, +} +``` + +Then use `stats.toJson({ modules: true, chunks: true, reasons: true })` and diff `webpack-stats-server.json` between modes. This gives concrete inclusion reasons (e.g. which module required `node:stream/promises`) and is more reliable than analyzer HTML alone. + +## Related Skills + +- `$flags` - flag wiring (config/schema/define-env/runtime env) +- `$dce-edge` - DCE-safe require patterns and edge constraints +- `$react-vendoring` - entry-base boundaries and vendored React diff --git a/.claude/commands/pr-status.md b/.claude/commands/pr-status.md index 1c2018d01431..583574cbcd49 100644 --- a/.claude/commands/pr-status.md +++ b/.claude/commands/pr-status.md @@ -95,6 +95,15 @@ Analyze PR status including CI failures and review comments. - **MEDIUM priority**: Identify root cause pattern, address open suggestions - **LOW priority**: Mark as likely flaky/transient, note resolved/nitpick comments +8. When proposing local repro commands, **always include the exact env vars from the CI job** (shown in the "Job Environment Variables" section of index.md). Key variables that change behavior: + - `IS_WEBPACK_TEST=1` forces webpack (turbopack is default locally) + - `NEXT_SKIP_ISOLATE=1` skips packing next.js into a separate project (hides module resolution failures) + - Feature flags like `__NEXT_USE_NODE_STREAMS=true`, `__NEXT_CACHE_COMPONENTS=true` change build-time DefinePlugin replacements + - Example: a failure in "test node streams prod" needs `IS_WEBPACK_TEST=1 __NEXT_USE_NODE_STREAMS=true __NEXT_CACHE_COMPONENTS=true __NEXT_EXPERIMENTAL_DEBUG_CHANNEL=true NEXT_TEST_MODE=start` + - Never use `NEXT_SKIP_ISOLATE=1` when verifying module resolution or build-time compilation fixes + +9. The script automatically checks the last 3 main branch CI runs for known flaky tests. Check the **"Known Flaky Tests"** section in index.md and the `flaky-tests.json` file. Tests listed there also fail on main and are likely pre-existing flakes, not caused by the PR. Mark them as **FLAKY (pre-existing)** in your summary table. Use `--skip-flaky-check` to skip this step if it's too slow. + - Do not try to fix these failures or address review comments without user confirmation. - If failures would require complex analysis and there are multiple problems, only do some basic analysis and point out that further investigation is needed and could be performed when requested. diff --git a/AGENTS.md b/AGENTS.md index 7a6f5e5a13bd..3f072645a3a2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,5 +1,7 @@ # Next.js Development Guide +> **Note:** `CLAUDE.md` is a symlink to `AGENTS.md`. They are the same file. + ## Codebase structure ### Monorepo Overview @@ -65,35 +67,52 @@ pnpm --filter=next exec taskr ## Fast Local Development -For iterative development, use watch mode + fast test execution: +For iterative development, default to watch mode + skip-isolate for the inner loop (not full builds), with exceptions noted below. + +**Default agent rule:** If you are changing Next.js source or integration tests, start `pnpm --filter=next dev` in a separate terminal session before making edits (unless it is already running). If you skip this, explicitly state why (for example: docs-only, read-only investigation, or CI-only analysis). **1. Start watch build in background:** ```bash -# Runs taskr in watch mode - auto-rebuilds on file changes -# Use Bash(run_in_background=true) to keep working while it runs +# Auto-rebuilds on file changes (~1-2s per change vs ~60s full build) +# Keep this running while you iterate on code pnpm --filter=next dev ``` **2. Run tests fast (no isolation, no packing):** ```bash -# NEXT_SKIP_ISOLATE=1 - skip packing Next.js for each test (much faster) +# NEXT_SKIP_ISOLATE=1 - skip packing Next.js for each test (~100s faster) # testonly - runs with --runInBand (no worker isolation overhead) NEXT_SKIP_ISOLATE=1 NEXT_TEST_MODE=dev pnpm testonly test/path/to/test.ts ``` -**3. When done, kill the background watch process.** +**3. When done, kill the background watch process (if you started it).** -Only use full `pnpm --filter=next build` for one-off builds (after branch switch, before CI push). +**For type errors only:** Use `pnpm --filter=next types` (~10s) instead of `pnpm --filter=next build` (~60s). -**Always rebuild after switching branches:** +After the workspace is bootstrapped, prefer `pnpm --filter=next build` when edits are limited to core Next.js files. Use full `pnpm build` for branch switches/bootstrap, before CI push, or when changes span multiple packages. + +**Always run a full bootstrap build after switching branches:** ```bash git checkout -pnpm build # Required before running tests (Turborepo dedupes if unchanged) +pnpm build # Sets up outputs for dependent packages (Turborepo dedupes if unchanged) +``` + +**When NOT to use NEXT_SKIP_ISOLATE:** Drop it when testing module resolution changes (new require() paths, new exports from entry-base.ts, edge route imports). Without isolation, the test uses local dist/ directly, hiding resolution failures that occur when Next.js is packed as a real npm package. + +## Bundler Selection + +Turbopack is the default bundler for both `next dev` and `next build`. To force webpack: + +```bash +next build --webpack # Production build with webpack +next dev --webpack # Dev server with webpack ``` +There is no `--no-turbopack` flag. + ## Testing ```bash @@ -114,10 +133,16 @@ pnpm test-dev-turbo test/development/ - `pnpm test-start-turbo` - Production build+start with Turbopack - `pnpm test-start-webpack` - Production build+start with Webpack +**Run tests headless** (no browser window): Set `HEADLESS=true` when running e2e tests unless you need visual browser debugging: + +```bash +HEADLESS=true pnpm test-dev-turbo test/path/to/test.ts +``` + **Other test commands:** - `pnpm test-unit` - Run unit tests only (fast, no browser) -- `pnpm testonly ` - Run tests without rebuilding (faster iteration) +- `pnpm testonly ` - Run tests without rebuilding (faster iteration when build artifacts are already up to date) - `pnpm new-test` - Generate a new test file from template (interactive) **Generate tests non-interactively (for AI agents):** @@ -134,6 +159,20 @@ Generating tests using `pnpm new-test` is mandatory. pnpm new-test --args true my-feature e2e ``` +**Analyzing test output efficiently:** + +Never re-run the same test suite with different grep filters. Capture output once to a file, then read from it: + +```bash +# Run once, save everything +HEADLESS=true pnpm test-dev-turbo test/path/to/test.ts > /tmp/test-output.log 2>&1 + +# Then analyze without re-running +grep "●" /tmp/test-output.log # Failed test names +grep -A5 "Error:" /tmp/test-output.log # Error details +tail -5 /tmp/test-output.log # Summary +``` + ## Writing Tests **Test writing expectations:** @@ -201,20 +240,29 @@ node scripts/pr-status.js # Auto-detects PR from current branch node scripts/pr-status.js # Analyze specific PR by number ``` -This fetches CI workflow runs, failed jobs, logs, and PR review comments, generating markdown files in `scripts/pr-status/`. +This generates analysis files in `scripts/pr-status/`. + +General triage rules (always apply; `$pr-status-triage` skill expands on these): + +- Prioritize blocking failures first: build, lint, types, then tests. +- Assume failures are real until disproven; use "Known Flaky Tests" as context, not auto-dismissal. +- Reproduce with the same CI mode/env vars (especially `IS_WEBPACK_TEST=1` when present). +- For module-resolution/build-graph fixes, verify without `NEXT_SKIP_ISOLATE=1`. + +For full triage workflow (failure prioritization, mode selection, CI env reproduction, and common failure patterns), use the `$pr-status-triage` skill: + +- Skill file: `.agents/skills/pr-status-triage/SKILL.md` **Use `/pr-status` for automated analysis** - analyzes failing jobs and review comments in parallel, groups failures by test file. **CI Analysis Tips:** -- Prioritize blocking jobs first: build, lint, types, then test jobs - Prioritize CI failures over review comments - -**Common failure patterns:** - -- `rust check / build` → Run `cargo fmt -- --check` locally, fix with `cargo fmt` -- `lint / build` → Run `pnpm prettier --write ` for prettier errors -- Test failures → Run the specific test locally with `pnpm test-dev-turbo ` +- Prioritize blocking jobs first: build, lint, types, then test jobs +- Common fast checks: + - `rust check / build` → Run `cargo fmt -- --check`, then `cargo fmt` + - `lint / build` → Run `pnpm prettier --write ` for prettier errors + - test failures → Run the specific failing test path locally **Run tests in the right mode:** @@ -253,6 +301,50 @@ Both `next dev` and `next build --debug-prerender` produce bundles with `NODE_EN - `process.env.NODE_ENV !== 'production'` — code that should exist in dev bundles but be eliminated from prod bundles. This is a build-time check. - `process.env.__NEXT_DEV_SERVER` — code that should only run with the dev server (`next dev`), not during `next build --debug-prerender` or `next start`. +## Secrets and Env Safety + +Always treat environment variable values as sensitive unless they are known test-mode flags. + +- Never print or paste secret values (tokens, API keys, cookies) in chat responses, commits, or shared logs. +- Mirror CI env **names and modes** exactly, but do not inline literal secret values in commands. +- If a required secret is missing locally, stop and ask the user rather than inventing placeholder credentials. +- Never commit local secret files; if documenting env setup, use placeholder-only examples. +- When sharing command output, summarize and redact sensitive-looking values. + +## Specialized Skills + +Use skills for conditional, deep workflows. Keep baseline iteration/build/test policy in this file. + +- `$pr-status-triage` - CI failure and PR review triage with `scripts/pr-status.js` +- `$flags` - feature-flag wiring across config/schema/define-env/runtime env +- `$dce-edge` - DCE-safe `require()` patterns and edge/runtime constraints +- `$react-vendoring` - `entry-base.ts` boundaries and vendored React type/runtime rules +- `$runtime-debug` - runtime-bundle/module-resolution regression reproduction and verification +- `$authoring-skills` - how to create and maintain skills in `.agents/skills/` + +## Context-Efficient Workflows + +**Reading large files** (>500 lines, e.g. `app-render.tsx`): + +- Grep first to find relevant line numbers, then read targeted ranges with `offset`/`limit` +- Never re-read the same section of a file without code changes in between +- For generated files (`dist/`, `node_modules/`, `.next/`): search only, don't read + +**Build & test output:** + +- Capture to file once, then analyze: `pnpm build 2>&1 | tee /tmp/build.log` +- Don't re-run the same test command without code changes; re-analyze saved output instead + +**Batch edits before building:** + +- Group related edits across files, then run one build, not build-per-edit +- Use `pnpm --filter=next types` (~10s) to check type errors without full rebuild + +**External API calls (gh, curl):** + +- Save response to variable or file: `JOBS=$(gh api ...) && echo "$JOBS" | jq '...'` +- Don't re-fetch the same API data to analyze from different angles + ## Commit and PR Style - Do NOT add "Generated with Claude Code" or co-author footers to commits or PRs @@ -267,20 +359,57 @@ Both `next dev` and `next build --debug-prerender` produce bundles with `NODE_EN - **Choose the right verification method for each change.** This may include running unit tests, integration tests, type checking, linting, building the project, or inspecting runtime behavior depending on what was changed. - **When unclear how to verify a change, ask the user.** If there is no obvious test or verification method for a particular change, ask the user how they would like it verified before moving on. +**Pre-validate before committing** to avoid slow lint-staged failures (~2 min each): + +```bash +# Run exactly what the pre-commit hook runs on your changed files: +pnpm prettier --with-node-modules --ignore-path .prettierignore --write +npx eslint --config eslint.config.mjs --fix +``` + ## Rebuilding Before Running Tests When running Next.js integration tests, you must rebuild if source files have changed: -- **Edited Next.js code?** → `pnpm build` +- **First run after branch switch/bootstrap (or if unsure)?** → `pnpm build` +- **Edited only core Next.js files (`packages/next/**`) after bootstrap?** → `pnpm --filter=next build` - **Edited Turbopack (Rust)?** → `pnpm swc-build-native` - **Edited both?** → `pnpm turbo build build-native` ## Development Anti-Patterns +For runtime internals, use focused skills: + +- Feature-flag plumbing and runtime bundle wiring: `$flags` (`.agents/skills/flags/SKILL.md`) +- DCE and edge/runtime constraints: `$dce-edge` (`.agents/skills/dce-edge/SKILL.md`) +- React vendoring and `entry-base.ts` boundaries: `$react-vendoring` (`.agents/skills/react-vendoring/SKILL.md`) +- Debugging and verification workflow: `$runtime-debug` (`.agents/skills/runtime-debug/SKILL.md`) + +Keep these high-frequency guardrails in mind: + +- Reproduce module resolution and bundling issues without `NEXT_SKIP_ISOLATE=1` +- Validate edge bundling regressions with `pnpm test-start-webpack test/e2e/app-dir/app/standalone.test.ts` +- Use `__NEXT_SHOW_IGNORE_LISTED=true` when you need full internal stack traces + +Core runtime/bundling rules (always apply; skills above expand on these with verification steps and examples): + +- New flags: add type in `config-shared.ts`, schema in `config-schema.ts`, and `define-env.ts` when used in user-bundled code. +- If a flag is consumed in pre-compiled runtime internals, also wire runtime env values (`next-server.ts`/`export/worker.ts` as needed). +- `define-env.ts` affects user bundling; it does not control pre-compiled runtime bundle internals. +- Keep `require()` behind compile-time `if/else` branches for DCE (avoid early-return/throw patterns). +- In edge builds, force feature flags that gate Node-only imports to `false` in `define-env.ts`. +- `react-server-dom-webpack/*` imports must stay in `entry-base.ts`; consume via component module exports elsewhere. + ### Test Gotchas +- **Cache components enables PPR by default**: When `__NEXT_CACHE_COMPONENTS=true`, most app-dir pages use PPR implicitly. Dedicated `ppr-full/` and `ppr/` test suites are mostly `describe.skip` (migrating to cache components). To test PPR codepaths, run normal app-dir e2e tests with `__NEXT_CACHE_COMPONENTS=true` rather than looking for explicit PPR test suites. +- **Quick smoke testing with toy apps**: For fast feedback, generate a minimal test fixture with `pnpm new-test --args true e2e`, then run the dev server directly with `node packages/next/dist/bin/next dev --port ` and `curl --max-time 10`. This avoids the overhead of the full test harness and gives immediate feedback on hangs/crashes. - Mode-specific tests need `skipStart: true` + manual `next.start()` in `beforeAll` after mode check - Don't rely on exact log messages - filter by content patterns, find sequences not positions +- **Snapshot tests vary by env flags**: Tests with inline snapshots can produce different output depending on env flags. When updating snapshots, always run the test with the exact env flags the CI job uses (check `.github/workflows/build_and_test.yml` `afterBuild:` sections). Turbopack resolves `react-dom/server.edge` (no Node APIs like `renderToPipeableStream`), while webpack resolves the `.node` build (has them). +- **`app-page.ts` is a build template compiled by the user's bundler**: Any `require()` in this file is traced by webpack/turbopack at `next build` time. You cannot require internal modules with relative paths because they won't be resolvable from the user's project. Instead, export new helpers from `entry-base.ts` and access them via `entryBase.*` in the template. +- **Reproducing CI failures locally**: Always match the exact CI env vars (check `pr-status` output for "Job Environment Variables"). Key differences: `IS_WEBPACK_TEST=1` forces webpack (turbopack is default), `NEXT_SKIP_ISOLATE=1` skips packing next.js (hides module resolution failures). Always run without `NEXT_SKIP_ISOLATE` when verifying module resolution fixes. +- **Showing full stack traces**: Set `__NEXT_SHOW_IGNORE_LISTED=true` to disable the ignore-list filtering in dev server error output. By default, Next.js collapses internal frames to `at ignore-listed frames`, which hides useful context when debugging framework internals. Defined in `packages/next/src/server/patch-error-inspect.ts`. ### Rust/Cargo @@ -293,6 +422,10 @@ When running Next.js integration tests, you must rebuild if source files have ch - Source map paths vary (webpack: `./src/`, tsc: `src/`) - try multiple formats - `process.cwd()` in stack trace formatting produces different paths in tests vs production +### Stale Native Binary + +If Turbopack produces unexpected errors after switching branches or pulling, check if `packages/next-swc/native/*.node` is stale. Delete it and run `pnpm install` to get the npm-published binary instead of a locally-built one. + ### Documentation Code Blocks - When adding `highlight={...}` attributes to code blocks, carefully count the actual line numbers within the code block diff --git a/scripts/pr-status.js b/scripts/pr-status.js index c84c13a5a410..a776a1c90ce3 100644 --- a/scripts/pr-status.js +++ b/scripts/pr-status.js @@ -83,6 +83,60 @@ function isBot(username) { return username.endsWith('-bot') || username.endsWith('[bot]') } +/** + * Parses the build_and_test.yml workflow to extract env vars from afterBuild + * sections. Returns a map of job display name prefix → env var list. + */ +function getJobEnvVarsFromWorkflow() { + const workflowPath = path.join( + __dirname, + '..', + '.github', + 'workflows', + 'build_and_test.yml' + ) + try { + const content = require('fs').readFileSync(workflowPath, 'utf8') + const envMap = {} + // Match job blocks: " job-id:\n name: display name\n" ... "afterBuild: |" + const jobRegex = + /^ {2}([\w-]+):\s*\n\s+name:\s*(.+)\n[\s\S]*?afterBuild:\s*\|\n([\s\S]*?)(?=\n\s+stepName:)/gm + let match + while ((match = jobRegex.exec(content)) !== null) { + const displayName = match[2].trim() + const afterBuild = match[3] + const exports = [] + for (const line of afterBuild.split('\n')) { + const exportMatch = line.match( + /^\s*export\s+([\w]+)=["']?([^"'\s]+)["']?/ + ) + if (exportMatch) { + exports.push(`${exportMatch[1]}=${exportMatch[2]}`) + } + } + if (exports.length > 0) { + envMap[displayName] = exports + } + } + return envMap + } catch { + return {} + } +} + +/** + * Given a job name like "test node streams prod (4/7) / build" and the env map, + * returns the relevant env vars or null. + */ +function getEnvVarsForJob(jobName, envMap) { + for (const [prefix, vars] of Object.entries(envMap)) { + if (jobName.startsWith(prefix)) { + return vars + } + } + return null +} + // ============================================================================ // Data Fetching Functions // ============================================================================ @@ -413,7 +467,9 @@ function generateIndexMd( runMetadata, categorizedJobs, jobTestCounts, - reviewData + reviewData, + jobEnvMap, + flakyTests ) { const { failed, inProgress, queued, succeeded, cancelled, skipped } = categorizedJobs @@ -493,6 +549,41 @@ function generateIndexMd( ) } lines.push('') + + // Show env vars for failed jobs if they differ from defaults + if (jobEnvMap && Object.keys(jobEnvMap).length > 0) { + const jobEnvGroups = new Map() + for (const job of failed) { + const envVars = getEnvVarsForJob(job.name, jobEnvMap) + if (envVars) { + const key = envVars.join(', ') + if (!jobEnvGroups.has(key)) { + jobEnvGroups.set(key, []) + } + jobEnvGroups.get(key).push(job.name) + } + } + if (jobEnvGroups.size > 0) { + lines.push('### Job Environment Variables', '') + for (const [envStr, jobNames] of jobEnvGroups) { + const prefix = jobNames[0].replace(/ \(.*/, '') + lines.push(`**${prefix}**: \`${envStr}\``, '') + } + } + } + + // Known flaky tests section + if (flakyTests && flakyTests.size > 0) { + lines.push('### Known Flaky Tests (failing on 2+ branches)', '') + lines.push( + 'These tests also failed in recent CI runs across multiple different branches and are likely pre-existing flakes, not caused by this PR:', + '' + ) + for (const testPath of [...flakyTests].sort()) { + lines.push(`- \`${testPath}\``) + } + lines.push('') + } } // In-progress jobs section (only when CI is running) @@ -838,6 +929,146 @@ function generateThreadMd(thread, index) { return lines.join('\n') } +// ============================================================================ +// Flaky Test Detection +// ============================================================================ + +/** + * Fetches recent failed CI runs across all branches and identifies tests that + * fail on multiple different branches (indicating flakiness, not branch-specific bugs). + * Excludes the current PR's branch to avoid self-matching. + * Returns a Set of test file paths that are likely flaky. + */ +async function getFlakyTests(currentBranch, runsToCheck = 5) { + console.log( + `Checking last ${runsToCheck} failed CI runs across all branches for known flaky tests...` + ) + + // Get recent failed build-and-test runs across ALL branches + const jqQuery = `.workflow_runs[] | select(.conclusion == "failure") | {id, head_branch}` + let output + try { + output = exec( + `gh api "repos/vercel/next.js/actions/workflows/57419851/runs?status=completed&per_page=30" --jq '${jqQuery}'` + ) + } catch { + console.log(' Could not fetch CI runs, skipping flaky check') + return new Set() + } + + if (!output.trim()) { + console.log(' No failed runs found') + return new Set() + } + + // Filter out the current branch and take up to runsToCheck + const allRuns = output + .split('\n') + .filter((line) => line.trim()) + .map((line) => JSON.parse(line)) + .filter((run) => run.head_branch !== currentBranch) + .slice(0, runsToCheck) + + if (allRuns.length === 0) { + console.log(' No failed runs from other branches found') + return new Set() + } + + const branchCount = new Set(allRuns.map((r) => r.head_branch)).size + console.log( + ` Checking ${allRuns.length} runs from ${branchCount} different branches...` + ) + + // Fetch failed jobs for all runs in parallel + const runJobResults = await Promise.all( + allRuns.map(async (run) => { + try { + const jobsJq = '.jobs[] | select(.conclusion == "failure") | {id, name}' + const jobsOutput = exec( + `gh api "repos/vercel/next.js/actions/runs/${run.id}/jobs?per_page=100" --jq '${jobsJq}'` + ) + if (!jobsOutput.trim()) return { run, jobs: [] } + const jobs = jobsOutput + .split('\n') + .filter((line) => line.trim()) + .map((line) => JSON.parse(line)) + // Skip runs with 20+ failed jobs (likely systemic, not flaky) + if (jobs.length > 20) return { run, jobs: [] } + return { run, jobs } + } catch { + return { run, jobs: [] } + } + }) + ) + + // Collect all (job, branch) pairs, then fetch logs in parallel (batch of 5) + const jobBranchPairs = [] + for (const { run, jobs } of runJobResults) { + for (const job of jobs) { + jobBranchPairs.push({ job, branch: run.head_branch }) + } + } + + console.log(` Fetching logs for ${jobBranchPairs.length} failed jobs...`) + + // Map: testPath → Set of branches where it failed + const testFailBranches = new Map() + + // Process in batches of 5 to avoid overwhelming the API + const BATCH_SIZE = 5 + for (let i = 0; i < jobBranchPairs.length; i += BATCH_SIZE) { + const batch = jobBranchPairs.slice(i, i + BATCH_SIZE) + const results = await Promise.all( + batch.map(async ({ job, branch }) => { + try { + const logs = exec( + `gh api "repos/vercel/next.js/actions/jobs/${job.id}/logs"` + ) + return { logs, branch } + } catch { + return { logs: null, branch } + } + }) + ) + + for (const { logs, branch } of results) { + if (!logs) continue + const testResults = extractTestOutputJson(logs) + for (const result of testResults) { + if (result.testResults) { + for (const tr of result.testResults) { + const hasFailed = tr.assertionResults?.some( + (a) => a.status === 'failed' + ) + if (hasFailed) { + const shortPath = tr.name?.replace(/.*\/(test\/)/, '$1') + if (shortPath) { + if (!testFailBranches.has(shortPath)) { + testFailBranches.set(shortPath, new Set()) + } + testFailBranches.get(shortPath).add(branch) + } + } + } + } + } + } + } + + // A test is flaky if it fails on 2+ different branches + const flakyTestFiles = new Set() + for (const [testPath, branches] of testFailBranches) { + if (branches.size >= 2) { + flakyTestFiles.add(testPath) + } + } + + console.log( + ` Found ${flakyTestFiles.size} flaky tests (failing on 2+ different branches)` + ) + return flakyTestFiles +} + // ============================================================================ // Main Function // ============================================================================ @@ -972,7 +1203,8 @@ async function main() { runMetadata, emptyCategorizedJobs, {}, - reviewData + reviewData, + {} ) ) process.exit(0) @@ -1082,19 +1314,34 @@ async function main() { } } - // Step 8: Generate index.md + // Step 8: Check for known flaky tests across branches (skip with --skip-flaky-check) + let flakyTests = new Set() + if (!process.argv.includes('--skip-flaky-check')) { + flakyTests = await getFlakyTests(branchInfo.branchName, 5) + if (flakyTests.size > 0) { + await fs.writeFile( + path.join(OUTPUT_DIR, 'flaky-tests.json'), + JSON.stringify([...flakyTests].sort(), null, 2) + ) + } + } + + // Step 9: Generate index.md console.log('Generating index.md...') // Update categorizedJobs.failed with full processed metadata const finalCategorizedJobs = { ...categorizedJobs, failed: processedFailedJobs, } + const jobEnvMap = getJobEnvVarsFromWorkflow() const indexMd = generateIndexMd( branchInfo, runMetadata, finalCategorizedJobs, jobTestCounts, - reviewData + reviewData, + jobEnvMap, + flakyTests ) await fs.writeFile(path.join(OUTPUT_DIR, 'index.md'), indexMd) From 2a1a5cd1c8eda9a77e10535794926b12080d162c Mon Sep 17 00:00:00 2001 From: Tim Neutkens Date: Mon, 16 Feb 2026 15:57:07 +0100 Subject: [PATCH 2/3] Fix comment --- AGENTS.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 3f072645a3a2..87864950b131 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -133,12 +133,6 @@ pnpm test-dev-turbo test/development/ - `pnpm test-start-turbo` - Production build+start with Turbopack - `pnpm test-start-webpack` - Production build+start with Webpack -**Run tests headless** (no browser window): Set `HEADLESS=true` when running e2e tests unless you need visual browser debugging: - -```bash -HEADLESS=true pnpm test-dev-turbo test/path/to/test.ts -``` - **Other test commands:** - `pnpm test-unit` - Run unit tests only (fast, no browser) From dac7602af2a6e3f3a50a0f4849abeaed7de2f91e Mon Sep 17 00:00:00 2001 From: Jimmy Lai Date: Wed, 11 Feb 2026 13:44:46 -0800 Subject: [PATCH 3/3] bench: render-pipeline benchmarks and stress routes --- bench/BENCHMARKING.md | 192 +++ .../app/streaming/_shared/client-boundary.js | 20 + .../app/streaming/_shared/stress-page.js | 105 ++ bench/basic-app/app/streaming/bulk/page.js | 21 + .../app/streaming/chunkstorm/page.js | 17 + bench/basic-app/app/streaming/heavy/page.js | 17 + bench/basic-app/app/streaming/light/page.js | 17 + bench/basic-app/app/streaming/medium/page.js | 17 + bench/basic-app/app/streaming/wide/page.js | 17 + bench/basic-app/benchmark.sh | 142 +++ bench/basic-app/next.config.js | 6 +- .../next-minimal-server/bin/minimal-server.js | 37 +- bench/render-pipeline/.gitignore | 1 + bench/render-pipeline/README.md | 105 ++ bench/render-pipeline/analyze-profiles.ts | 399 ++++++ bench/render-pipeline/benchmark.ts | 1127 +++++++++++++++++ eslint.cli.config.mjs | 8 +- package.json | 2 + 18 files changed, 2235 insertions(+), 15 deletions(-) create mode 100644 bench/BENCHMARKING.md create mode 100644 bench/basic-app/app/streaming/_shared/client-boundary.js create mode 100644 bench/basic-app/app/streaming/_shared/stress-page.js create mode 100644 bench/basic-app/app/streaming/bulk/page.js create mode 100644 bench/basic-app/app/streaming/chunkstorm/page.js create mode 100644 bench/basic-app/app/streaming/heavy/page.js create mode 100644 bench/basic-app/app/streaming/light/page.js create mode 100644 bench/basic-app/app/streaming/medium/page.js create mode 100644 bench/basic-app/app/streaming/wide/page.js create mode 100755 bench/basic-app/benchmark.sh create mode 100644 bench/render-pipeline/.gitignore create mode 100644 bench/render-pipeline/README.md create mode 100644 bench/render-pipeline/analyze-profiles.ts create mode 100644 bench/render-pipeline/benchmark.ts diff --git a/bench/BENCHMARKING.md b/bench/BENCHMARKING.md new file mode 100644 index 000000000000..da154a355c35 --- /dev/null +++ b/bench/BENCHMARKING.md @@ -0,0 +1,192 @@ +# Benchmarking Playbook (Render Pipeline / Node Streams) + +This is the practical workflow for benchmarking and profiling render pipeline changes in this repo. + +Primary tools: + +- `pnpm bench:render-pipeline` +- `pnpm bench:render-pipeline:analyze` + +## 1. Build-first baseline + +Always rebuild `next` before benchmark runs when framework source changed. + +```bash +pnpm --filter=next build +``` + +## 2. End-to-end benchmark (full app render path) + +This measures the full request path (`renderToHTMLOrFlight`) through `bench/next-minimal-server`. +In `scenario=full` and `scenario=all`, `--capture-cpu` defaults to `true`. + +Node streams only: + +```bash +pnpm bench:render-pipeline \ + --scenario=full \ + --stream-mode=node \ + --build-full=true \ + --json-out=bench/render-pipeline/artifacts//results.json \ + --artifact-dir=bench/render-pipeline/artifacts/ +``` + +Web vs Node comparison: + +```bash +pnpm bench:render-pipeline \ + --scenario=full \ + --stream-mode=both \ + --build-full=true \ + --json-out=bench/render-pipeline/artifacts//results.json \ + --artifact-dir=bench/render-pipeline/artifacts/ +``` + +## 3. Route-focused stress runs + +Use this when targeting streaming-heavy behavior only. + +```bash +pnpm bench:render-pipeline \ + --scenario=full \ + --stream-mode=node \ + --build-full=true \ + --routes=/streaming/heavy,/streaming/chunkstorm,/streaming/wide \ + --warmup-requests=10 \ + --serial-requests=40 \ + --load-requests=400 \ + --load-concurrency=40 \ + --json-out=bench/render-pipeline/artifacts//results.json \ + --artifact-dir=bench/render-pipeline/artifacts/ +``` + +Default stress routes currently include: + +- `/` +- `/streaming/light` +- `/streaming/medium` +- `/streaming/heavy` +- `/streaming/chunkstorm` +- `/streaming/wide` +- `/streaming/bulk` + +## 4. Isolate helper-level costs (micro scenario) + +Use this to quickly test helper-level changes before full runs. + +```bash +pnpm bench:render-pipeline \ + --scenario=micro \ + --iterations=300 \ + --warmup=30 +``` + +Micro benchmark output includes cases for: + +- `teeNodeReadable` +- `createBufferedTransformNode` +- `createInlinedDataNodeStream` +- `continueStaticPrerender` / `continueDynamicPrerender` / `continueDynamicHTMLResume` + +Flight payload mode toggles: + +```bash +# Binary-heavy flight chunks +pnpm bench:render-pipeline --scenario=micro --binary-flight=true + +# UTF-8-heavy flight chunks +pnpm bench:render-pipeline --scenario=micro --binary-flight=false +``` + +Stress payload shape: + +```bash +pnpm bench:render-pipeline \ + --scenario=micro \ + --iterations=300 \ + --warmup=30 \ + --flight-chunks=128 \ + --flight-chunk-bytes=8192 \ + --html-chunks=128 \ + --html-chunk-bytes=32768 +``` + +## 5. Capture CPU profiles and traces + +```bash +pnpm bench:render-pipeline \ + --scenario=full \ + --stream-mode=node \ + --build-full=true \ + --capture-trace=true \ + --capture-next-trace=true \ + --json-out=bench/render-pipeline/artifacts//results.json \ + --artifact-dir=bench/render-pipeline/artifacts/ +``` + +Artifacts are written under: + +- `bench/render-pipeline/artifacts//node/node.cpuprofile` +- `bench/render-pipeline/artifacts//node/node-trace-*.json` +- `bench/render-pipeline/artifacts//node/next-runtime-trace.log` +- `bench/render-pipeline/artifacts//results.json` + +## 6. Analyze hotspots + +```bash +pnpm bench:render-pipeline:analyze \ + --artifact-dir=bench/render-pipeline/artifacts/ \ + --top=20 +``` + +Filter only the Node-stream-relevant hotspots: + +```bash +pnpm bench:render-pipeline:analyze --artifact-dir=bench/render-pipeline/artifacts/ --top=20 > /tmp/analyze.txt +rg "use-flight-response|encodeFlightDataChunkNode|node-stream-tee|flushPending|node-stream-helpers|htmlEscapeJsonString" /tmp/analyze.txt +``` + +## 7. Compare two runs quickly + +```bash +node - <<'NODE' +const fs = require('fs') +const [baseRun, candRun] = process.argv.slice(2) +const load = (name) => + JSON.parse( + fs.readFileSync(`bench/render-pipeline/artifacts/${name}/results.json`, 'utf8') + ).fullResults[0].routeResults + +const base = load(baseRun) +const cand = load(candRun) +for (const b of base) { + const c = cand.find((x) => x.route === b.route && x.phase === b.phase) + if (!c) continue + const throughputDelta = + ((c.throughputRps - b.throughputRps) / b.throughputRps) * 100 + const p95Delta = ((b.latency.p95 - c.latency.p95) / b.latency.p95) * 100 + console.log( + `${b.route} ${b.phase} throughput ${throughputDelta >= 0 ? '+' : ''}${throughputDelta.toFixed(2)}% p95 ${p95Delta >= 0 ? '+' : ''}${p95Delta.toFixed(2)}%` + ) +} +NODE investigation-10-boundary-data investigation-17-profile-current +``` + +## 8. Noise control rules + +Use these rules to keep measurements trustworthy: + +- Build first (`pnpm --filter=next build`) after framework source changes. +- Compare runs with identical route sets and request knobs. +- Repeat suspicious runs at least once (especially if one route regresses while others improve). +- Use dedicated artifact directories per run. +- Prefer relative deltas across multiple runs over one-off absolute numbers. + +## 9. Suggested iteration loop + +1. Change one thing. +2. Build. +3. Run `scenario=micro` for quick signal. +4. Run focused full stress (`heavy/chunkstorm/wide`) with CPU profile. +5. Analyze hotspots and compare deltas. +6. Keep only changes that hold up across repeat runs. diff --git a/bench/basic-app/app/streaming/_shared/client-boundary.js b/bench/basic-app/app/streaming/_shared/client-boundary.js new file mode 100644 index 000000000000..841c5f43ac7f --- /dev/null +++ b/bench/basic-app/app/streaming/_shared/client-boundary.js @@ -0,0 +1,20 @@ +'use client' + +import React from 'react' + +export function StreamingClientBoundary({ + chunkId, + payload, + fragments, + checksum, +}) { + return ( +
+

client-{chunkId}

+

checksum:{checksum}

+

payload-bytes:{payload.length}

+

fragment-count:{fragments.length}

+

{fragments[0] ?? ''}

+
+ ) +} diff --git a/bench/basic-app/app/streaming/_shared/stress-page.js b/bench/basic-app/app/streaming/_shared/stress-page.js new file mode 100644 index 000000000000..4c590abd618e --- /dev/null +++ b/bench/basic-app/app/streaming/_shared/stress-page.js @@ -0,0 +1,105 @@ +import React, { Suspense } from 'react' + +import { StreamingClientBoundary } from './client-boundary' + +function sleep(ms) { + if (ms <= 0) return Promise.resolve() + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +function createPayload(title, payloadBytes) { + const prefix = `${title}:` + if (prefix.length >= payloadBytes) return prefix + return `${prefix}${'x'.repeat(payloadBytes - prefix.length)}` +} + +function createClientPayload({ title, id, payloadBytes, fragmentCount }) { + const payload = createPayload(`${title}-client-${id}`, payloadBytes) + const safeFragmentCount = Math.max(1, fragmentCount) + const fragmentSize = Math.max( + 16, + Math.floor(payload.length / safeFragmentCount) + ) + const fragments = Array.from({ length: safeFragmentCount }, (_, index) => { + const start = Math.min(index * fragmentSize, payload.length) + const end = Math.min(start + fragmentSize, payload.length) + return payload.slice(start, end) + }) + + return { + chunkId: id, + payload, + fragments, + checksum: payload.length + id * 31 + safeFragmentCount, + } +} + +async function StreamedChunk({ + title, + id, + delayMs, + payload, + clientPayloadBytes, + clientPayloadFragments, +}) { + await sleep(delayMs) + + const clientPayload = createClientPayload({ + title, + id, + payloadBytes: clientPayloadBytes, + fragmentCount: clientPayloadFragments, + }) + + return ( +
+

chunk-{id}

+

{payload}

+ +
+ ) +} + +export function StreamingStressPage({ + title, + boundaryCount, + payloadBytes, + clientPayloadBytes = Math.max(128, Math.floor(payloadBytes / 2)), + clientPayloadFragments = 4, + maxDelayMs, +}) { + const payload = createPayload(title, payloadBytes) + const boundaries = Array.from({ length: boundaryCount }, (_, index) => index) + + return ( +
+

{title}

+

+ boundaries={boundaryCount} payloadBytes={payloadBytes}{' '} + clientPayloadBytes= + {clientPayloadBytes} clientPayloadFragments={clientPayloadFragments}{' '} + maxDelayMs={maxDelayMs} +

+ + {boundaries.map((id) => { + const delayMs = maxDelayMs === 0 ? 0 : id % (maxDelayMs + 1) + + return ( + loading-{id}} + > + + + ) + })} +
+ ) +} diff --git a/bench/basic-app/app/streaming/bulk/page.js b/bench/basic-app/app/streaming/bulk/page.js new file mode 100644 index 000000000000..ac797a8c66d0 --- /dev/null +++ b/bench/basic-app/app/streaming/bulk/page.js @@ -0,0 +1,21 @@ +import React from 'react' + +export const dynamic = 'force-dynamic' + +const ROWS = 2500 +const PAYLOAD = 'x'.repeat(384) +const DATA = Array.from( + { length: ROWS }, + (_, index) => `row-${index}-${PAYLOAD}` +) + +export default function Page() { + return ( +
+

stream-bulk

+ {DATA.map((line, index) => ( +

{line}

+ ))} +
+ ) +} diff --git a/bench/basic-app/app/streaming/chunkstorm/page.js b/bench/basic-app/app/streaming/chunkstorm/page.js new file mode 100644 index 000000000000..82ac50556539 --- /dev/null +++ b/bench/basic-app/app/streaming/chunkstorm/page.js @@ -0,0 +1,17 @@ +import React from 'react' +import { StreamingStressPage } from '../_shared/stress-page' + +export const dynamic = 'force-dynamic' + +export default function Page() { + return ( + + ) +} diff --git a/bench/basic-app/app/streaming/heavy/page.js b/bench/basic-app/app/streaming/heavy/page.js new file mode 100644 index 000000000000..08b112e8b51b --- /dev/null +++ b/bench/basic-app/app/streaming/heavy/page.js @@ -0,0 +1,17 @@ +import React from 'react' +import { StreamingStressPage } from '../_shared/stress-page' + +export const dynamic = 'force-dynamic' + +export default function Page() { + return ( + + ) +} diff --git a/bench/basic-app/app/streaming/light/page.js b/bench/basic-app/app/streaming/light/page.js new file mode 100644 index 000000000000..b7d96a0c4566 --- /dev/null +++ b/bench/basic-app/app/streaming/light/page.js @@ -0,0 +1,17 @@ +import React from 'react' +import { StreamingStressPage } from '../_shared/stress-page' + +export const dynamic = 'force-dynamic' + +export default function Page() { + return ( + + ) +} diff --git a/bench/basic-app/app/streaming/medium/page.js b/bench/basic-app/app/streaming/medium/page.js new file mode 100644 index 000000000000..a3e5cf1ae426 --- /dev/null +++ b/bench/basic-app/app/streaming/medium/page.js @@ -0,0 +1,17 @@ +import React from 'react' +import { StreamingStressPage } from '../_shared/stress-page' + +export const dynamic = 'force-dynamic' + +export default function Page() { + return ( + + ) +} diff --git a/bench/basic-app/app/streaming/wide/page.js b/bench/basic-app/app/streaming/wide/page.js new file mode 100644 index 000000000000..e66caae9de8b --- /dev/null +++ b/bench/basic-app/app/streaming/wide/page.js @@ -0,0 +1,17 @@ +import React from 'react' +import { StreamingStressPage } from '../_shared/stress-page' + +export const dynamic = 'force-dynamic' + +export default function Page() { + return ( + + ) +} diff --git a/bench/basic-app/benchmark.sh b/bench/basic-app/benchmark.sh new file mode 100755 index 000000000000..1faecb5f482b --- /dev/null +++ b/bench/basic-app/benchmark.sh @@ -0,0 +1,142 @@ +#!/bin/bash +# Benchmark script for comparing web streams vs node streams performance. +# Uses the minimal server (bench/next-minimal-server) for lowest overhead. +# Warms up with 50 requests, then runs two phases: +# Phase 1: 10s at concurrency=1 (single-client latency) +# Phase 2: 10s at concurrency=100 (throughput under load) +# Reports throughput and latency percentiles for each phase. +# +# Usage: +# ./benchmark.sh [duration] [warmup_requests] +# +# Defaults: 10s duration per phase, 50 warmup requests + +set -euo pipefail + +DURATION=${1:-10} +WARMUP_REQS=${2:-50} +PORT=3199 +NEXT_BIN="../../packages/next/dist/bin/next" +MINIMAL_SERVER="../next-minimal-server/bin/minimal-server.js" + +if ! command -v npx &>/dev/null; then + echo "npx is required (for autocannon)" + exit 1 +fi + +cleanup() { + lsof -ti :"$PORT" 2>/dev/null | xargs kill -9 2>/dev/null || true +} +trap cleanup EXIT + +start_server() { + cleanup + sleep 0.5 + PORT=$PORT node "$MINIMAL_SERVER" &>/dev/null & + SERVER_PID=$! + + # Wait for server to be ready + local retries=0 + while ! curl -sf "http://localhost:$PORT" >/dev/null 2>&1; do + retries=$((retries + 1)) + if [ "$retries" -gt 30 ]; then + echo "ERROR: Server failed to start after 15s" + exit 1 + fi + sleep 0.5 + done +} + +stop_server() { + kill "$SERVER_PID" 2>/dev/null || true + wait "$SERVER_PID" 2>/dev/null || true + cleanup + sleep 1 +} + +warmup() { + echo " Warming up ($WARMUP_REQS requests)..." + for i in $(seq 1 "$WARMUP_REQS"); do + curl -sf "http://localhost:$PORT" >/dev/null 2>&1 || true + done + sleep 0.5 +} + +run_phase() { + local label="$1" + local connections="$2" + + echo "" + echo " --- $label (${DURATION}s, c=$connections) ---" + + local result + result=$(npx autocannon -d "$DURATION" -c "$connections" -j "http://localhost:$PORT" 2>/dev/null) + + node -e " + const d = JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')); + const r = d.requests; + const l = d.latency; + console.log(' Throughput:'); + console.log(' avg: ' + r.average + ' req/s'); + console.log(' mean: ' + r.mean + ' req/s'); + console.log(' total: ' + r.total + ' requests in ${DURATION}s'); + console.log(' Latency:'); + console.log(' avg: ' + l.average.toFixed(2) + ' ms'); + console.log(' p50: ' + l.p50.toFixed(2) + ' ms'); + console.log(' p90: ' + l.p90.toFixed(2) + ' ms'); + console.log(' p99: ' + l.p99.toFixed(2) + ' ms'); + console.log(' max: ' + l.max.toFixed(2) + ' ms'); + " <<< "$result" +} + +run_benchmark() { + local mode="$1" + + echo "" + echo "============================================" + echo " $mode" + echo "============================================" + + start_server + warmup + run_phase "Single client" 1 + run_phase "Under load" 100 + stop_server +} + +echo "Benchmark: web streams vs node streams" +echo "=======================================" +echo "Duration: ${DURATION}s per phase | Warmup: ${WARMUP_REQS} reqs" +echo "Server: minimal-server (minimalMode: true)" + +# --- Web Streams (default) --- +cat > next.config.js <<'CONF' +module.exports = {} +CONF + +echo "" +echo "Building (web streams)..." +node "$NEXT_BIN" build &>/dev/null +run_benchmark "Web Streams (default)" + +# --- Node Streams --- +cat > next.config.js <<'CONF' +module.exports = { + experimental: { + useNodeStreams: true, + }, +} +CONF + +echo "" +echo "Building (node streams)..." +node "$NEXT_BIN" build &>/dev/null +run_benchmark "Node Streams (useNodeStreams: true)" + +# Restore config +cat > next.config.js <<'CONF' +module.exports = {} +CONF + +echo "" +echo "Done." diff --git a/bench/basic-app/next.config.js b/bench/basic-app/next.config.js index 0957c472383f..4ba52ba2c8df 100644 --- a/bench/basic-app/next.config.js +++ b/bench/basic-app/next.config.js @@ -1,5 +1 @@ -module.exports = { - experimental: { - serverMinification: true, - }, -} +module.exports = {} diff --git a/bench/next-minimal-server/bin/minimal-server.js b/bench/next-minimal-server/bin/minimal-server.js index 332e53d0b727..ecd6a0e68eab 100755 --- a/bench/next-minimal-server/bin/minimal-server.js +++ b/bench/next-minimal-server/bin/minimal-server.js @@ -26,13 +26,32 @@ const nextServer = new NextServer({ const requestHandler = nextServer.getRequestHandler() -require('http') - .createServer((req, res) => { - console.time('next-request') - return requestHandler(req, res).finally(() => { - console.timeEnd('next-request') - }) - }) - .listen(3000, () => { - console.timeEnd('next-cold-start') +const port = parseInt(process.env.PORT, 10) || 3000 + +const server = require('http').createServer((req, res) => { + return requestHandler(req, res) +}) + +server.listen(port, () => { + console.timeEnd('next-cold-start') + console.log('Listening on port ' + port) +}) + +let shuttingDown = false +function shutdown() { + if (shuttingDown) return + shuttingDown = true + + // Allow Node to exit cleanly so --cpu-prof/--heap-prof outputs are flushed. + server.close(() => { + process.exit(0) }) + + // Fallback in case active keep-alive connections prevent close callback. + setTimeout(() => { + process.exit(1) + }, 5000).unref() +} + +process.on('SIGTERM', shutdown) +process.on('SIGINT', shutdown) diff --git a/bench/render-pipeline/.gitignore b/bench/render-pipeline/.gitignore new file mode 100644 index 000000000000..d4f588edfef5 --- /dev/null +++ b/bench/render-pipeline/.gitignore @@ -0,0 +1 @@ +artifacts/ diff --git a/bench/render-pipeline/README.md b/bench/render-pipeline/README.md new file mode 100644 index 000000000000..da861658a620 --- /dev/null +++ b/bench/render-pipeline/README.md @@ -0,0 +1,105 @@ +# Render Pipeline Benchmark + +This benchmark targets the full App Router render path (`renderToHTMLOrFlight`) via real HTTP requests through `bench/next-minimal-server`. + +It supports: +- `web` vs `node` streams mode comparison +- route-based stress suites for streaming SSR +- CPU/heap profiling for the server process +- Node trace events and Next internal trace artifact capture + +## Quick start + +Run end-to-end benchmark (default stress routes): + +```bash +pnpm bench:render-pipeline --scenario=full --stream-mode=both +``` + +For `scenario=full` and `scenario=all`, CPU profiles are captured by default. +Disable with `--capture-cpu=false` if you want lower-overhead runs. + +Skip rebuild for faster iteration (after you already built once): + +```bash +pnpm bench:render-pipeline --scenario=full --stream-mode=node --build-full=false +``` + +When `--stream-mode=both`, the runner forces `--build-full=true` so web/node +comparisons do not accidentally reuse stale build output. + +Output JSON report: + +```bash +pnpm bench:render-pipeline --scenario=full --stream-mode=both --json-out=/tmp/render-pipeline.json +``` + +## Profiling and traces + +Capture CPU profiles + Node trace events + Next trace logs: + +```bash +pnpm bench:render-pipeline \ + --scenario=full \ + --stream-mode=both \ + --capture-trace=true \ + --capture-next-trace=true +``` + +Artifacts are written to: + +```text +bench/render-pipeline/artifacts// +``` + +Per mode (`web` and `node`) this includes: +- `.cpuprofile` (if `--capture-cpu=true`) +- `.heapprofile` (if `--capture-heap=true`) +- `-trace-*.json` (if `--capture-trace=true`) +- `next-trace-build.log` and `next-runtime-trace.log` (if `--capture-next-trace=true`) + +Open `.cpuprofile` files in Chrome DevTools Performance panel. + +Analyze results and CPU hotspots from artifacts: + +```bash +pnpm bench:render-pipeline:analyze --artifact-dir=bench/render-pipeline/artifacts/ +``` + +Omit `--artifact-dir` to analyze the latest run automatically. + +## Stress routes + +Default routes: +- `/` +- `/streaming/light` +- `/streaming/medium` +- `/streaming/heavy` +- `/streaming/chunkstorm` +- `/streaming/wide` +- `/streaming/bulk` + +The `streaming/*` pages now include a client boundary per Suspense chunk, so benchmark runs also stress Server-to-Client payload serialization in Flight data. + +Override with: + +```bash +pnpm bench:render-pipeline --scenario=full --routes=/,/streaming/heavy +``` + +## Common tuning flags + +- `--warmup-requests=30` +- `--serial-requests=120` +- `--load-requests=1200` +- `--load-concurrency=80` +- `--timeout-ms=30000` +- `--port=3199` + +## Optional micro benchmarks + +The runner also supports helper-only micro benchmarks: + +```bash +pnpm bench:render-pipeline --scenario=micro +``` diff --git a/bench/render-pipeline/analyze-profiles.ts b/bench/render-pipeline/analyze-profiles.ts new file mode 100644 index 000000000000..05cf255d2cfe --- /dev/null +++ b/bench/render-pipeline/analyze-profiles.ts @@ -0,0 +1,399 @@ +// This script must be run with tsx + +import { constants } from 'node:fs' +import { access, readdir, readFile, stat } from 'node:fs/promises' +import { SourceMap } from 'node:module' +import { resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +const REPO_ROOT = fileURLToPath(new URL('../..', import.meta.url)) +const DEFAULT_ARTIFACTS_ROOT = resolve( + REPO_ROOT, + 'bench/render-pipeline/artifacts' +) + +type FullRoutePhaseResult = { + mode: 'web' | 'node' + route: string + phase: 'single-client' | 'under-load' + requests: number + concurrency: number + throughputRps: number + latency: { + min: number + median: number + mean: number + p95: number + max: number + } +} + +type BenchmarkJson = { + fullResults?: Array<{ + mode: 'web' | 'node' + routeResults: FullRoutePhaseResult[] + }> +} + +type ProfileAnalysis = { + totalUs: number + runtimeUs: number + runtimeFile: string | null + topModules: Array<{ name: string; us: number }> + topRuntimeSources: Array<{ name: string; us: number }> + topRuntimeSymbols: Array<{ name: string; us: number }> +} + +function usage() { + console.log(`Usage: pnpm bench:render-pipeline:analyze [options] + +Options: + --artifact-dir= Artifact run directory, or parent artifacts directory. + Default: latest run under bench/render-pipeline/artifacts + --top= Number of top hotspots to show per section (default: 15) +`) +} + +function parseArgs() { + const rawArgs = process.argv.slice(2) + if (rawArgs.includes('--help')) { + usage() + process.exit(0) + } + + const args = new Map() + for (const rawArg of rawArgs) { + if (!rawArg.startsWith('--')) continue + const [rawKey, rawValue] = rawArg.slice(2).split('=') + args.set(rawKey, rawValue ?? 'true') + } + + const topRaw = args.get('top') + const top = topRaw ? Number(topRaw) : 15 + if (!Number.isFinite(top) || top < 1) { + throw new Error(`Invalid --top value: ${topRaw}`) + } + + return { + artifactDirArg: args.get('artifact-dir'), + top: Math.floor(top), + } +} + +async function exists(path: string): Promise { + try { + await access(path, constants.F_OK) + return true + } catch { + return false + } +} + +async function resolveArtifactRunDir(artifactDirArg?: string): Promise { + const requested = resolve(REPO_ROOT, artifactDirArg ?? DEFAULT_ARTIFACTS_ROOT) + const requestedResults = resolve(requested, 'results.json') + if (await exists(requestedResults)) { + return requested + } + + const entries = await readdir(requested, { withFileTypes: true }) + const dirs = entries.filter((entry) => entry.isDirectory()) + const runs: Array<{ dir: string; mtimeMs: number }> = [] + + for (const dirent of dirs) { + const dir = resolve(requested, dirent.name) + const resultsPath = resolve(dir, 'results.json') + if (!(await exists(resultsPath))) continue + const stats = await stat(resultsPath) + runs.push({ dir, mtimeMs: stats.mtimeMs }) + } + + if (runs.length === 0) { + throw new Error( + `No artifact run found in ${requested}. Expected a results.json file.` + ) + } + + runs.sort((a, b) => b.mtimeMs - a.mtimeMs) + return runs[0].dir +} + +function toPercent(part: number, total: number): string { + if (total <= 0) return '0.00%' + return `${((part / total) * 100).toFixed(2)}%` +} + +function toMs(us: number): string { + return `${(us / 1000).toFixed(1)}ms` +} + +function sortTop( + entries: Iterable<[string, number]>, + limit: number +): Array<{ name: string; us: number }> { + return [...entries] + .sort((a, b) => b[1] - a[1]) + .slice(0, limit) + .map(([name, us]) => ({ name, us })) +} + +function mapModuleFromUrl(url: string): string { + if (!url || url === '(no-url)') return '(no-url)' + if (url.startsWith('node:')) return url + const appPageMatch = url.match(/app-page-turbo[\w-]*\.runtime\.prod\.js/) + if (appPageMatch) return appPageMatch[0] + if (url.includes('/.next/server/chunks/')) return '.next/server/chunks/*' + if (url.includes('/next/dist/')) return 'next/dist/*' + if (url.includes('/node_modules/')) return 'node_modules/*' + return url +} + +function detectRuntimeFile( + urlsByUs: Array<{ url: string; us: number }> +): string | null { + for (const entry of urlsByUs) { + const match = entry.url.match(/app-page-turbo[\w-]*\.runtime\.prod\.js/) + if (match) return match[0] + } + return null +} + +async function analyzeProfile( + profilePath: string, + top: number +): Promise { + if (!(await exists(profilePath))) return null + + const rawProfile = await readFile(profilePath, 'utf8') + const profile = JSON.parse(rawProfile) as { + nodes: Array<{ + id: number + callFrame: { + functionName: string + url: string + lineNumber: number + columnNumber: number + } + }> + samples: number[] + timeDeltas: number[] + } + + const idToNode = new Map(profile.nodes.map((node) => [node.id, node])) + const urlTotals = new Map() + const moduleTotals = new Map() + let totalUs = 0 + + for (let i = 0; i < profile.samples.length; i++) { + const sampleId = profile.samples[i] + const deltaUs = profile.timeDeltas[i] ?? 0 + totalUs += deltaUs + + const node = idToNode.get(sampleId) + if (!node) continue + const url = node.callFrame.url || '(no-url)' + urlTotals.set(url, (urlTotals.get(url) ?? 0) + deltaUs) + + const moduleName = mapModuleFromUrl(url) + moduleTotals.set(moduleName, (moduleTotals.get(moduleName) ?? 0) + deltaUs) + } + + const topUrls = sortTop(urlTotals.entries(), 30).map((entry) => ({ + url: entry.name, + us: entry.us, + })) + const runtimeFile = detectRuntimeFile(topUrls) + + let runtimeUs = 0 + const runtimeSources = new Map() + const runtimeSymbols = new Map() + let sourceMap: SourceMap | null = null + + if (runtimeFile) { + const mapPath = resolve( + REPO_ROOT, + `packages/next/dist/compiled/next-server/${runtimeFile}.map` + ) + if (await exists(mapPath)) { + sourceMap = new SourceMap(JSON.parse(await readFile(mapPath, 'utf8'))) + } + } + + if (runtimeFile) { + for (let i = 0; i < profile.samples.length; i++) { + const sampleId = profile.samples[i] + const deltaUs = profile.timeDeltas[i] ?? 0 + const node = idToNode.get(sampleId) + if (!node) continue + + const { callFrame } = node + if (!callFrame.url.includes(runtimeFile)) continue + runtimeUs += deltaUs + + const generatedLine = callFrame.lineNumber ?? 0 + const generatedColumn = callFrame.columnNumber ?? 0 + + let sourceName = callFrame.url + let symbolName = callFrame.functionName || '(anonymous)' + let sourceLine = generatedLine + let sourceColumn = generatedColumn + + if (sourceMap) { + const entry = sourceMap.findEntry(generatedLine, generatedColumn) as { + originalSource?: string + originalLine?: number + originalColumn?: number + name?: string + } + if (entry.originalSource) sourceName = entry.originalSource + if (entry.name) symbolName = entry.name + if (entry.originalLine !== undefined) sourceLine = entry.originalLine + if (entry.originalColumn !== undefined) + sourceColumn = entry.originalColumn + } + + runtimeSources.set( + sourceName, + (runtimeSources.get(sourceName) ?? 0) + deltaUs + ) + const symbolKey = `${symbolName} @ ${sourceName}:${sourceLine}:${sourceColumn}` + runtimeSymbols.set( + symbolKey, + (runtimeSymbols.get(symbolKey) ?? 0) + deltaUs + ) + } + } + + return { + totalUs, + runtimeUs, + runtimeFile, + topModules: sortTop(moduleTotals.entries(), top), + topRuntimeSources: sortTop(runtimeSources.entries(), top), + topRuntimeSymbols: sortTop(runtimeSymbols.entries(), top), + } +} + +function printProfileAnalysis( + mode: 'web' | 'node', + analysis: ProfileAnalysis, + top: number +) { + console.log(`\n[${mode}]`) + console.log(` sampled: ${toMs(analysis.totalUs)}`) + if (analysis.runtimeFile) { + console.log( + ` runtime: ${analysis.runtimeFile} (${toMs(analysis.runtimeUs)}, ${toPercent(analysis.runtimeUs, analysis.totalUs)})` + ) + } else { + console.log(' runtime: not detected') + } + + console.log(` top ${top} modules:`) + for (const entry of analysis.topModules) { + console.log( + ` ${toPercent(entry.us, analysis.totalUs).padStart(7)} ${toMs(entry.us).padStart(9)} ${entry.name}` + ) + } + + if (analysis.topRuntimeSources.length > 0) { + console.log(` top ${top} runtime sources:`) + for (const entry of analysis.topRuntimeSources) { + console.log( + ` ${toPercent(entry.us, analysis.runtimeUs).padStart(7)} ${toMs(entry.us).padStart(9)} ${entry.name}` + ) + } + } + + if (analysis.topRuntimeSymbols.length > 0) { + console.log(` top ${top} runtime symbols:`) + for (const entry of analysis.topRuntimeSymbols) { + console.log( + ` ${toPercent(entry.us, analysis.runtimeUs).padStart(7)} ${toMs(entry.us).padStart(9)} ${entry.name}` + ) + } + } +} + +function printComparison(results: BenchmarkJson) { + const fullResults = results.fullResults + if (!fullResults || fullResults.length < 2) return + + const web = fullResults.find((entry) => entry.mode === 'web') + const node = fullResults.find((entry) => entry.mode === 'node') + if (!web || !node) return + + const webByKey = new Map( + web.routeResults.map((item) => [`${item.route}|${item.phase}`, item]) + ) + + console.log('\n[comparison node vs web]') + console.log( + ' route'.padEnd(20) + + 'phase'.padEnd(16) + + 'RPS delta'.padEnd(14) + + 'P95 delta' + ) + + for (const nodeEntry of node.routeResults) { + const key = `${nodeEntry.route}|${nodeEntry.phase}` + const webEntry = webByKey.get(key) + if (!webEntry) continue + const rpsDelta = + ((nodeEntry.throughputRps - webEntry.throughputRps) / + webEntry.throughputRps) * + 100 + const p95Delta = + ((webEntry.latency.p95 - nodeEntry.latency.p95) / webEntry.latency.p95) * + 100 + + const line = + ` ${nodeEntry.route}`.padEnd(20) + + `${nodeEntry.phase}`.padEnd(16) + + `${rpsDelta >= 0 ? '+' : ''}${rpsDelta.toFixed(2)}%`.padEnd(14) + + `${p95Delta >= 0 ? '+' : ''}${p95Delta.toFixed(2)}%` + console.log(line) + } +} + +async function main() { + const { artifactDirArg, top } = parseArgs() + const runDir = await resolveArtifactRunDir(artifactDirArg) + + console.log(`Analyzing render pipeline artifacts:`) + console.log(` ${runDir}`) + + const resultsPath = resolve(runDir, 'results.json') + const resultsRaw = await readFile(resultsPath, 'utf8') + const resultsJson = JSON.parse(resultsRaw) as BenchmarkJson + printComparison(resultsJson) + + const webProfile = resolve(runDir, 'web/web.cpuprofile') + const nodeProfile = resolve(runDir, 'node/node.cpuprofile') + + const [webAnalysis, nodeAnalysis] = await Promise.all([ + analyzeProfile(webProfile, top), + analyzeProfile(nodeProfile, top), + ]) + + if (!webAnalysis && !nodeAnalysis) { + console.log('\nNo CPU profiles found in this artifact run.') + console.log( + 'This analyzer reads only /.cpuprofile artifacts (not trace-event JSON or next-runtime-trace.log).' + ) + console.log( + 'Run benchmark with --capture-cpu=true, e.g. pnpm bench:render-pipeline --scenario=full --stream-mode=node --capture-cpu=true' + ) + return + } + + if (webAnalysis) printProfileAnalysis('web', webAnalysis, top) + if (nodeAnalysis) printProfileAnalysis('node', nodeAnalysis, top) + + console.log('\nDone.') +} + +main().catch((error) => { + console.error(error) + process.exit(1) +}) diff --git a/bench/render-pipeline/benchmark.ts b/bench/render-pipeline/benchmark.ts new file mode 100644 index 000000000000..79169d7ccdd7 --- /dev/null +++ b/bench/render-pipeline/benchmark.ts @@ -0,0 +1,1127 @@ +// This script must be run with tsx + +import { spawn } from 'node:child_process' +import { once } from 'node:events' +import { access, copyFile, mkdir, readFile, writeFile } from 'node:fs/promises' +import { resolve } from 'node:path' +import { performance } from 'node:perf_hooks' +import { Readable } from 'node:stream' +import { setTimeout as sleep } from 'node:timers/promises' +import { fileURLToPath } from 'node:url' +import { teeNodeReadable } from '../../packages/next/src/server/app-render/node-stream-tee' +import { + createInlinedDataNodeStream, + createInlinedDataReadableStream, +} from '../../packages/next/src/server/app-render/use-flight-response' +import { + chainNodeTransforms, + continueDynamicHTMLResumeNode, + continueDynamicPrerenderNode, + continueStaticPrerenderNode, + createBufferedTransformNode, +} from '../../packages/next/src/server/stream-utils/node-stream-helpers' +import { + continueDynamicHTMLResume, + continueDynamicPrerender, + continueStaticPrerender, +} from '../../packages/next/src/server/stream-utils/node-web-streams-helper' + +const REPO_ROOT = fileURLToPath(new URL('../..', import.meta.url)) +const NEXT_BIN = resolve(REPO_ROOT, 'packages/next/dist/bin/next') +const MINIMAL_SERVER = resolve( + REPO_ROOT, + 'bench/next-minimal-server/bin/minimal-server.js' +) + +type Scenario = 'full' | 'micro' | 'all' +type StreamMode = 'web' | 'node' | 'both' + +type CliOptions = { + scenario: Scenario + jsonOut?: string + + appDir: string + routes: string[] + streamMode: StreamMode + buildFull: boolean + warmupRequests: number + serialRequests: number + loadRequests: number + loadConcurrency: number + timeoutMs: number + port: number + + captureCpu: boolean + captureHeap: boolean + captureTrace: boolean + captureNextTrace: boolean + traceCategories: string + artifactDir: string + + iterations: number + warmup: number + htmlChunks: number + htmlChunkBytes: number + flightChunks: number + flightChunkBytes: number + binaryFlight: boolean +} + +type BenchStats = { + min: number + median: number + mean: number + p95: number + max: number +} + +type BenchResult = { + name: string + group: 'unit' | 'integration' + stats: BenchStats +} + +type BenchCase = { + name: string + group: 'unit' | 'integration' + run: () => Promise +} + +type FullRoutePhaseResult = { + mode: 'web' | 'node' + route: string + phase: 'single-client' | 'under-load' + requests: number + concurrency: number + throughputRps: number + latency: BenchStats +} + +type FullRunResult = { + mode: 'web' | 'node' + routeResults: FullRoutePhaseResult[] +} + +function parseBoolean(value: string): boolean { + return value === '1' || value === 'true' || value === 'yes' +} + +function parseNumberArg( + args: Map, + key: string, + fallback: number +): number { + const value = args.get(key) + if (value === undefined) return fallback + const parsed = Number(value) + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid numeric value for --${key}: ${value}`) + } + return parsed +} + +function parseRoutes(rawRoutes: string | undefined): string[] { + if (!rawRoutes) { + return [ + '/', + '/streaming/light', + '/streaming/medium', + '/streaming/heavy', + '/streaming/chunkstorm', + '/streaming/wide', + '/streaming/bulk', + ] + } + + const routes = rawRoutes + .split(',') + .map((route) => route.trim()) + .filter(Boolean) + + if (routes.length === 0) { + throw new Error('--routes cannot be empty') + } + + for (const route of routes) { + if (!route.startsWith('/')) { + throw new Error(`Each route must start with '/': ${route}`) + } + } + + return routes +} + +function usage() { + console.log(`Usage: pnpm bench:render-pipeline [options] + +Defaults to FULL end-to-end app-render benchmark. + +Options: + --scenario=full|micro|all (default: full) + --json-out= + +Full benchmark options: + --app-dir= (default: bench/basic-app) + --routes=/,/streaming/light,... (default: built-in stress suite) + --stream-mode=web|node|both (default: both) + --build-full=true|false (default: true) + When stream-mode=both, build-full is forced to true. + --warmup-requests= (default: 30) + --serial-requests= (default: 120) + --load-requests= (default: 1200) + --load-concurrency= (default: 80) + --port= (default: 3199) + --timeout-ms= (default: 30000) + +Profiling and trace options: + --capture-cpu=true|false (default: true for scenario=full|all, false for scenario=micro) + --capture-heap=true|false (default: false) + --capture-trace=true|false (default: false) + --capture-next-trace=true|false (default: true) + --trace-categories= (default: node,node.async_hooks,v8) + --artifact-dir= (default: bench/render-pipeline/artifacts/) + +Micro benchmark options: + --iterations= (default: 10) + --warmup= (default: 2) + --html-chunks= (default: 64) + --html-chunk-bytes= (default: 16384) + --flight-chunks= (default: 64) + --flight-chunk-bytes= (default: 4096) + --binary-flight=true|false (default: true) +`) +} + +function parseCli(): CliOptions { + const rawArgs = process.argv.slice(2) + if (rawArgs.includes('--help')) { + usage() + process.exit(0) + } + + const args = new Map() + for (const rawArg of rawArgs) { + if (!rawArg.startsWith('--')) continue + const [rawKey, rawValue] = rawArg.slice(2).split('=') + args.set(rawKey, rawValue ?? 'true') + } + + const scenarioRaw = args.get('scenario') ?? 'full' + if ( + scenarioRaw !== 'full' && + scenarioRaw !== 'micro' && + scenarioRaw !== 'all' + ) { + throw new Error( + `Invalid --scenario value: ${scenarioRaw}. Use full|micro|all` + ) + } + + const streamModeRaw = args.get('stream-mode') ?? 'both' + if ( + streamModeRaw !== 'web' && + streamModeRaw !== 'node' && + streamModeRaw !== 'both' + ) { + throw new Error( + `Invalid --stream-mode value: ${streamModeRaw}. Use web|node|both` + ) + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-') + const artifactDir = resolve( + REPO_ROOT, + args.get('artifact-dir') ?? `bench/render-pipeline/artifacts/${timestamp}` + ) + + const htmlChunkBytes = parseNumberArg(args, 'html-chunk-bytes', 16 * 1024) + const flightChunkBytes = parseNumberArg(args, 'flight-chunk-bytes', 4 * 1024) + const iterations = parseNumberArg(args, 'iterations', 10) + const warmup = parseNumberArg(args, 'warmup', 2) + + if (htmlChunkBytes < 64) throw new Error('--html-chunk-bytes must be >= 64') + if (flightChunkBytes < 64) + throw new Error('--flight-chunk-bytes must be >= 64') + if (iterations < 1) throw new Error('--iterations must be >= 1') + if (warmup < 0) throw new Error('--warmup must be >= 0') + + const routes = parseRoutes(args.get('routes')) + const buildFull = parseBoolean(args.get('build-full') ?? 'true') + const defaultCaptureCpu = + scenarioRaw === 'full' || scenarioRaw === 'all' ? 'true' : 'false' + const shouldForceBuildFull = + (scenarioRaw === 'full' || scenarioRaw === 'all') && + streamModeRaw === 'both' && + !buildFull + + if (shouldForceBuildFull) { + console.warn( + '[bench/render-pipeline] forcing --build-full=true for stream-mode=both to avoid comparing stale build output.' + ) + } + + return { + scenario: scenarioRaw, + jsonOut: args.get('json-out'), + + appDir: resolve(REPO_ROOT, args.get('app-dir') ?? 'bench/basic-app'), + routes, + streamMode: streamModeRaw, + buildFull: buildFull || shouldForceBuildFull, + warmupRequests: parseNumberArg(args, 'warmup-requests', 30), + serialRequests: parseNumberArg(args, 'serial-requests', 120), + loadRequests: parseNumberArg(args, 'load-requests', 1200), + loadConcurrency: parseNumberArg(args, 'load-concurrency', 80), + timeoutMs: parseNumberArg(args, 'timeout-ms', 30_000), + port: parseNumberArg(args, 'port', 3199), + + captureCpu: parseBoolean(args.get('capture-cpu') ?? defaultCaptureCpu), + captureHeap: parseBoolean(args.get('capture-heap') ?? 'false'), + captureTrace: parseBoolean(args.get('capture-trace') ?? 'false'), + captureNextTrace: parseBoolean(args.get('capture-next-trace') ?? 'true'), + traceCategories: args.get('trace-categories') ?? 'node,node.async_hooks,v8', + artifactDir, + + iterations, + warmup, + htmlChunks: parseNumberArg(args, 'html-chunks', 64), + htmlChunkBytes, + flightChunks: parseNumberArg(args, 'flight-chunks', 64), + flightChunkBytes, + binaryFlight: parseBoolean(args.get('binary-flight') ?? 'true'), + } +} + +function fixedSizeChunkWithPrefix(prefix: Buffer, size: number, fill: number) { + if (prefix.byteLength >= size) { + return prefix.subarray(0, size) + } + return Buffer.concat([prefix, Buffer.alloc(size - prefix.byteLength, fill)]) +} + +function fixedSizeChunkWithSuffix(suffix: Buffer, size: number, fill: number) { + if (suffix.byteLength >= size) { + return suffix.subarray(suffix.byteLength - size) + } + return Buffer.concat([Buffer.alloc(size - suffix.byteLength, fill), suffix]) +} + +function makeHtmlChunks(chunkCount: number, chunkBytes: number): Buffer[] { + const chunks: Buffer[] = [] + const prefix = Buffer.from('') + const suffix = Buffer.from('') + + if (chunkCount < 2) { + throw new Error('--html-chunks must be >= 2') + } + + chunks.push(fixedSizeChunkWithPrefix(prefix, chunkBytes, 97)) + + for (let i = 1; i < chunkCount - 1; i++) { + chunks.push(Buffer.alloc(chunkBytes, 97 + (i % 26))) + } + + chunks.push(fixedSizeChunkWithSuffix(suffix, chunkBytes, 122)) + return chunks +} + +function makeFlightChunks( + chunkCount: number, + chunkBytes: number, + binary: boolean +): Buffer[] { + const chunks: Buffer[] = [] + for (let i = 0; i < chunkCount; i++) { + const chunk = Buffer.alloc(chunkBytes) + if (binary) { + for (let j = 0; j < chunkBytes; j++) { + chunk[j] = (i * 17 + j * 31) % 256 + } + } else { + chunk.fill(97 + (i % 26)) + } + chunks.push(chunk) + } + return chunks +} + +function createWebStream( + chunks: readonly Uint8Array[] +): ReadableStream { + let index = 0 + return new ReadableStream({ + pull(controller) { + if (index >= chunks.length) { + controller.close() + return + } + controller.enqueue(chunks[index++]) + }, + }) +} + +async function consumeNodeReadable(stream: Readable): Promise { + let totalBytes = 0 + for await (const chunk of stream) { + if (typeof chunk === 'string') { + totalBytes += Buffer.byteLength(chunk) + } else { + totalBytes += (chunk as Uint8Array).byteLength + } + } + return totalBytes +} + +async function consumeWebReadable( + stream: ReadableStream +): Promise { + let totalBytes = 0 + const reader = stream.getReader() + while (true) { + const { done, value } = await reader.read() + if (done) break + totalBytes += value.byteLength + } + return totalBytes +} + +function computeStats(samples: number[]): BenchStats { + const sorted = [...samples].sort((a, b) => a - b) + const min = sorted[0] + const max = sorted[sorted.length - 1] + const median = sorted[Math.floor(sorted.length / 2)] + const mean = samples.reduce((sum, value) => sum + value, 0) / samples.length + const p95 = sorted[Math.max(0, Math.ceil(sorted.length * 0.95) - 1)] + return { min, median, mean, p95, max } +} + +async function runBenchCase( + bench: BenchCase, + iterations: number, + warmup: number +): Promise { + for (let i = 0; i < warmup; i++) { + await bench.run() + } + + const samples: number[] = [] + for (let i = 0; i < iterations; i++) { + const start = performance.now() + await bench.run() + samples.push(performance.now() - start) + } + + return { + name: bench.name, + group: bench.group, + stats: computeStats(samples), + } +} + +function printMicroResults(results: BenchResult[]) { + const groups: Array<'unit' | 'integration'> = ['unit', 'integration'] + for (const group of groups) { + const groupResults = results.filter((result) => result.group === group) + if (groupResults.length === 0) continue + console.log(`\n${group.toUpperCase()} BENCHMARKS`) + console.log( + 'name'.padEnd(42), + 'median'.padStart(10), + 'p95'.padStart(10), + 'mean'.padStart(10), + 'min'.padStart(10), + 'max'.padStart(10) + ) + for (const result of groupResults) { + const { stats } = result + console.log( + result.name.padEnd(42), + `${stats.median.toFixed(2)}ms`.padStart(10), + `${stats.p95.toFixed(2)}ms`.padStart(10), + `${stats.mean.toFixed(2)}ms`.padStart(10), + `${stats.min.toFixed(2)}ms`.padStart(10), + `${stats.max.toFixed(2)}ms`.padStart(10) + ) + } + } +} + +function buildMicroBenchCases( + htmlChunks: Buffer[], + flightChunks: Buffer[], + secondaryFlightChunks: Buffer[], + secondaryFlightLabel: string +): BenchCase[] { + const webHtmlChunks = htmlChunks.map( + (chunk) => new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength) + ) + const webFlightChunks = flightChunks.map( + (chunk) => new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength) + ) + const webSecondaryFlightChunks = secondaryFlightChunks.map( + (chunk) => new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength) + ) + + return [ + { + name: 'teeNodeReadable (drain both branches)', + group: 'unit', + run: async () => { + const source = Readable.from(htmlChunks) + const [left, right] = teeNodeReadable(source) + const [leftBytes, rightBytes] = await Promise.all([ + consumeNodeReadable(left as Readable), + consumeNodeReadable(right as Readable), + ]) + return leftBytes + rightBytes + }, + }, + { + name: 'createBufferedTransformNode only', + group: 'unit', + run: async () => { + const source = Readable.from(htmlChunks) + const transformed = chainNodeTransforms(source, [ + createBufferedTransformNode(), + ]) + return consumeNodeReadable(transformed) + }, + }, + { + name: 'createInlinedDataNodeStream only', + group: 'unit', + run: async () => { + const source = Readable.from(flightChunks) + const transformed = chainNodeTransforms(source, [ + createInlinedDataNodeStream(undefined, null), + ]) + return consumeNodeReadable(transformed) + }, + }, + { + name: `createInlinedDataNodeStream only (${secondaryFlightLabel})`, + group: 'unit', + run: async () => { + const source = Readable.from(secondaryFlightChunks) + const transformed = chainNodeTransforms(source, [ + createInlinedDataNodeStream(undefined, null), + ]) + return consumeNodeReadable(transformed) + }, + }, + { + name: 'Node continueStaticPrerender', + group: 'integration', + run: async () => { + const renderStream = Readable.from(htmlChunks) + const inlinedDataStream = chainNodeTransforms( + Readable.from(flightChunks), + [createInlinedDataNodeStream(undefined, null)] + ) + const stream = await continueStaticPrerenderNode(renderStream, { + inlinedDataStream, + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeNodeReadable(stream) + }, + }, + { + name: 'Node continueDynamicPrerender', + group: 'integration', + run: async () => { + const renderStream = Readable.from(htmlChunks) + const stream = await continueDynamicPrerenderNode(renderStream, { + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeNodeReadable(stream) + }, + }, + { + name: 'Node continueDynamicHTMLResume', + group: 'integration', + run: async () => { + const renderStream = Readable.from(htmlChunks) + const inlinedDataStream = chainNodeTransforms( + Readable.from(flightChunks), + [createInlinedDataNodeStream(undefined, null)] + ) + const stream = await continueDynamicHTMLResumeNode(renderStream, { + inlinedDataStream, + delayDataUntilFirstHtmlChunk: false, + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeNodeReadable(stream) + }, + }, + { + name: `Node continueDynamicHTMLResume (${secondaryFlightLabel})`, + group: 'integration', + run: async () => { + const renderStream = Readable.from(htmlChunks) + const inlinedDataStream = chainNodeTransforms( + Readable.from(secondaryFlightChunks), + [createInlinedDataNodeStream(undefined, null)] + ) + const stream = await continueDynamicHTMLResumeNode(renderStream, { + inlinedDataStream, + delayDataUntilFirstHtmlChunk: false, + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeNodeReadable(stream) + }, + }, + { + name: 'Web continueStaticPrerender', + group: 'integration', + run: async () => { + const renderStream = createWebStream(webHtmlChunks) + const inlinedDataStream = createInlinedDataReadableStream( + createWebStream(webFlightChunks), + undefined, + null + ) + const stream = await continueStaticPrerender(renderStream, { + inlinedDataStream, + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeWebReadable(stream) + }, + }, + { + name: 'Web continueDynamicPrerender', + group: 'integration', + run: async () => { + const renderStream = createWebStream(webHtmlChunks) + const stream = await continueDynamicPrerender(renderStream, { + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeWebReadable(stream) + }, + }, + { + name: 'Web continueDynamicHTMLResume', + group: 'integration', + run: async () => { + const renderStream = createWebStream(webHtmlChunks) + const inlinedDataStream = createInlinedDataReadableStream( + createWebStream(webFlightChunks), + undefined, + null + ) + const stream = await continueDynamicHTMLResume(renderStream, { + inlinedDataStream, + delayDataUntilFirstHtmlChunk: false, + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeWebReadable(stream) + }, + }, + { + name: `Web continueDynamicHTMLResume (${secondaryFlightLabel})`, + group: 'integration', + run: async () => { + const renderStream = createWebStream(webHtmlChunks) + const inlinedDataStream = createInlinedDataReadableStream( + createWebStream(webSecondaryFlightChunks), + undefined, + null + ) + const stream = await continueDynamicHTMLResume(renderStream, { + inlinedDataStream, + delayDataUntilFirstHtmlChunk: false, + getServerInsertedHTML: async () => '', + getServerInsertedMetadata: async () => '', + deploymentId: undefined, + }) + return consumeWebReadable(stream) + }, + }, + ] +} + +async function runMicroBenchmarks(options: CliOptions): Promise { + const prevRuntime = process.env.NEXT_RUNTIME + const prevUseNodeStreams = process.env.__NEXT_USE_NODE_STREAMS + process.env.NEXT_RUNTIME = 'nodejs' + process.env.__NEXT_USE_NODE_STREAMS = 'true' + + try { + const htmlChunks = makeHtmlChunks( + options.htmlChunks, + options.htmlChunkBytes + ) + const flightChunks = makeFlightChunks( + options.flightChunks, + options.flightChunkBytes, + options.binaryFlight + ) + const secondaryFlightChunks = makeFlightChunks( + options.flightChunks, + options.flightChunkBytes, + !options.binaryFlight + ) + const secondaryFlightLabel = options.binaryFlight + ? 'utf8 flight' + : 'binary flight' + + const cases = buildMicroBenchCases( + htmlChunks, + flightChunks, + secondaryFlightChunks, + secondaryFlightLabel + ) + const results: BenchResult[] = [] + for (const benchCase of cases) { + const result = await runBenchCase( + benchCase, + options.iterations, + options.warmup + ) + results.push(result) + } + return results + } finally { + if (prevRuntime === undefined) { + delete process.env.NEXT_RUNTIME + } else { + process.env.NEXT_RUNTIME = prevRuntime + } + + if (prevUseNodeStreams === undefined) { + delete process.env.__NEXT_USE_NODE_STREAMS + } else { + process.env.__NEXT_USE_NODE_STREAMS = prevUseNodeStreams + } + } +} + +async function runCommand( + command: string, + args: string[], + cwd: string, + env: NodeJS.ProcessEnv = process.env +): Promise { + const child = spawn(command, args, { + cwd, + env, + stdio: 'inherit', + }) + const [code] = (await once(child, 'exit')) as [number | null] + if (code !== 0) { + throw new Error( + `Command failed: ${command} ${args.join(' ')} (exit ${code})` + ) + } +} + +async function ensureNextBuilt() { + try { + await access(NEXT_BIN) + } catch { + throw new Error( + `Missing ${NEXT_BIN}. Build Next.js first (pnpm --filter=next build).` + ) + } +} + +function configForMode(mode: 'web' | 'node'): string { + if (mode === 'web') { + return 'module.exports = {}\n' + } + return `module.exports = { + experimental: { + useNodeStreams: true, + }, +}\n` +} + +async function waitForServerReady( + url: string, + timeoutMs: number +): Promise { + const start = performance.now() + while (performance.now() - start < timeoutMs) { + try { + const response = await fetch(url, { cache: 'no-store' }) + await response.arrayBuffer() + if (response.ok) return + } catch { + // server not ready yet + } + await sleep(200) + } + throw new Error(`Server did not become ready within ${timeoutMs}ms`) +} + +async function requestLatencyMs( + url: string, + timeoutMs: number +): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), timeoutMs) + + try { + const start = performance.now() + const response = await fetch(url, { + cache: 'no-store', + signal: controller.signal, + }) + await response.arrayBuffer() + if (!response.ok) { + throw new Error(`Request failed (${response.status}) for ${url}`) + } + return performance.now() - start + } finally { + clearTimeout(timeout) + } +} + +async function runSerialRequests( + url: string, + count: number, + timeoutMs: number +): Promise { + const latencies: number[] = [] + for (let i = 0; i < count; i++) { + latencies.push(await requestLatencyMs(url, timeoutMs)) + } + return latencies +} + +async function runConcurrentRequests( + url: string, + totalRequests: number, + concurrency: number, + timeoutMs: number +): Promise { + const latencies = new Array(totalRequests) + let index = 0 + + const workers = Array.from({ length: Math.max(1, concurrency) }, async () => { + while (true) { + const current = index + index++ + if (current >= totalRequests) return + latencies[current] = await requestLatencyMs(url, timeoutMs) + } + }) + + await Promise.all(workers) + return latencies +} + +async function copyIfExists(fromPath: string, toPath: string) { + try { + await access(fromPath) + await copyFile(fromPath, toPath) + } catch { + // Ignore missing optional traces. + } +} + +function printFullResults(results: FullRunResult[]) { + console.log('\nFULL APP-RENDER BENCHMARKS (end-to-end request path)') + + for (const result of results) { + console.log(`\nMode: ${result.mode}`) + + for (const route of new Set( + result.routeResults.map((entry) => entry.route) + )) { + console.log(` Route: ${route}`) + const routeEntries = result.routeResults.filter( + (entry) => entry.route === route + ) + for (const entry of routeEntries) { + console.log( + ` ${entry.phase} requests=${entry.requests} concurrency=${entry.concurrency}` + ) + console.log( + ` throughput=${entry.throughputRps.toFixed(2)} req/s median=${entry.latency.median.toFixed(2)}ms p95=${entry.latency.p95.toFixed(2)}ms` + ) + } + } + } + + if (results.length === 2) { + const web = results.find((result) => result.mode === 'web') + const node = results.find((result) => result.mode === 'node') + if (web && node) { + console.log('\nComparison (node vs web)') + + const joinKeys = new Set( + web.routeResults.map((entry) => `${entry.route}|${entry.phase}`) + ) + + for (const key of joinKeys) { + const [route, phase] = key.split('|') as [ + string, + 'single-client' | 'under-load', + ] + const webEntry = web.routeResults.find( + (entry) => entry.route === route && entry.phase === phase + ) + const nodeEntry = node.routeResults.find( + (entry) => entry.route === route && entry.phase === phase + ) + + if (!webEntry || !nodeEntry) continue + + const throughputDelta = + ((nodeEntry.throughputRps - webEntry.throughputRps) / + webEntry.throughputRps) * + 100 + const p95Delta = + ((webEntry.latency.p95 - nodeEntry.latency.p95) / + webEntry.latency.p95) * + 100 + + console.log(` ${route} (${phase})`) + console.log( + ` throughput delta: ${throughputDelta >= 0 ? '+' : ''}${throughputDelta.toFixed(2)}%` + ) + console.log( + ` p95 latency delta: ${p95Delta >= 0 ? '+' : ''}${p95Delta.toFixed(2)}% (positive is better)` + ) + } + } + } +} + +async function runFullModeBenchmark( + options: CliOptions, + mode: 'web' | 'node' +): Promise { + const nextConfigPath = resolve(options.appDir, 'next.config.js') + const originalConfig = await readFile(nextConfigPath, 'utf8') + + let server: ReturnType | null = null + const routeResults: FullRoutePhaseResult[] = [] + const modeArtifactDir = resolve(options.artifactDir, mode) + + await mkdir(modeArtifactDir, { recursive: true }) + + try { + await writeFile(nextConfigPath, configForMode(mode)) + + if (options.buildFull) { + console.log(`\n[full/${mode}] building app fixture...`) + await runCommand('node', [NEXT_BIN, 'build'], options.appDir, { + ...process.env, + NEXT_TELEMETRY_DISABLED: '1', + }) + if (options.captureNextTrace) { + await copyIfExists( + resolve(options.appDir, '.next/trace-build'), + resolve(modeArtifactDir, 'next-trace-build.log') + ) + } + } + + console.log(`[full/${mode}] starting minimal server...`) + + const serverArgs: string[] = [] + if (options.captureCpu) { + serverArgs.push( + '--cpu-prof', + `--cpu-prof-dir=${modeArtifactDir}`, + `--cpu-prof-name=${mode}.cpuprofile` + ) + } + if (options.captureHeap) { + serverArgs.push( + '--heap-prof', + `--heap-prof-dir=${modeArtifactDir}`, + `--heap-prof-name=${mode}.heapprofile` + ) + } + if (options.captureTrace) { + serverArgs.push( + '--trace-events-enabled', + `--trace-event-categories=${options.traceCategories}`, + `--trace-event-file-pattern=${resolve(modeArtifactDir, `${mode}-trace-\${pid}.json`)}` + ) + } + serverArgs.push(MINIMAL_SERVER) + + server = spawn('node', serverArgs, { + cwd: options.appDir, + env: { + ...process.env, + NODE_ENV: 'production', + NEXT_TELEMETRY_DISABLED: '1', + PORT: String(options.port), + }, + stdio: 'ignore', + }) + + await waitForServerReady( + `http://127.0.0.1:${options.port}${options.routes[0]}`, + options.timeoutMs + ) + + for (const route of options.routes) { + const url = `http://127.0.0.1:${options.port}${route}` + + console.log( + `[full/${mode}] route ${route}: warmup ${options.warmupRequests}` + ) + await runSerialRequests(url, options.warmupRequests, options.timeoutMs) + + console.log(`[full/${mode}] route ${route}: single-client phase`) + const serialStart = performance.now() + const serialLatencies = await runSerialRequests( + url, + options.serialRequests, + options.timeoutMs + ) + const serialDurationMs = performance.now() - serialStart + routeResults.push({ + mode, + route, + phase: 'single-client', + requests: options.serialRequests, + concurrency: 1, + throughputRps: options.serialRequests / (serialDurationMs / 1000), + latency: computeStats(serialLatencies), + }) + + console.log(`[full/${mode}] route ${route}: under-load phase`) + const loadStart = performance.now() + const loadLatencies = await runConcurrentRequests( + url, + options.loadRequests, + options.loadConcurrency, + options.timeoutMs + ) + const loadDurationMs = performance.now() - loadStart + routeResults.push({ + mode, + route, + phase: 'under-load', + requests: options.loadRequests, + concurrency: options.loadConcurrency, + throughputRps: options.loadRequests / (loadDurationMs / 1000), + latency: computeStats(loadLatencies), + }) + } + + return { mode, routeResults } + } finally { + if (server) { + const tryKill = async (signal: NodeJS.Signals, timeoutMs: number) => { + server!.kill(signal) + const didExit = await Promise.race([ + once(server!, 'exit') + .then(() => true) + .catch(() => true), + sleep(timeoutMs).then(() => false), + ]) + return didExit + } + + if (!(await tryKill('SIGINT', 3000))) { + if (!(await tryKill('SIGTERM', 3000))) { + server.kill('SIGKILL') + await once(server, 'exit').catch(() => undefined) + } + } + } + + if (options.captureNextTrace) { + await copyIfExists( + resolve(options.appDir, '.next/trace'), + resolve(modeArtifactDir, 'next-runtime-trace.log') + ) + } + + await writeFile(nextConfigPath, originalConfig) + } +} + +async function runFullBenchmarks( + options: CliOptions +): Promise { + await ensureNextBuilt() + await mkdir(options.artifactDir, { recursive: true }) + + const modes: Array<'web' | 'node'> = + options.streamMode === 'both' ? ['web', 'node'] : [options.streamMode] + + const results: FullRunResult[] = [] + for (const mode of modes) { + results.push(await runFullModeBenchmark(options, mode)) + } + return results +} + +async function main() { + const options = parseCli() + + console.log('Render pipeline benchmark') + console.log(`scenario=${options.scenario}`) + + let microResults: BenchResult[] | undefined + let fullResults: FullRunResult[] | undefined + + if (options.scenario === 'micro' || options.scenario === 'all') { + console.log( + `\nRunning micro benchmarks: iterations=${options.iterations} warmup=${options.warmup}` + ) + console.log( + `html=${options.htmlChunks}x${options.htmlChunkBytes} flight=${options.flightChunks}x${options.flightChunkBytes} binaryFlight=${options.binaryFlight}` + ) + + microResults = await runMicroBenchmarks(options) + printMicroResults(microResults) + } + + if (options.scenario === 'full' || options.scenario === 'all') { + console.log( + `\nRunning full benchmark: appDir=${options.appDir} streamMode=${options.streamMode}` + ) + console.log(`routes=${options.routes.join(', ')}`) + console.log(`artifacts=${options.artifactDir}`) + + fullResults = await runFullBenchmarks(options) + printFullResults(fullResults) + } + + if (options.jsonOut) { + const outputPath = resolve(process.cwd(), options.jsonOut) + await writeFile( + outputPath, + JSON.stringify( + { + options, + microResults, + fullResults, + generatedAt: new Date().toISOString(), + node: process.version, + }, + null, + 2 + ) + ) + console.log(`\nWrote JSON report: ${outputPath}`) + } +} + +main().catch((error) => { + console.error(error) + process.exit(1) +}) diff --git a/eslint.cli.config.mjs b/eslint.cli.config.mjs index 6f7319621cd5..85729ea54796 100644 --- a/eslint.cli.config.mjs +++ b/eslint.cli.config.mjs @@ -10,7 +10,13 @@ export default defineConfig([ // This override adds type-checked rules. // Linting with type-checked rules is very slow and needs a lot of memory, // so we exclude non-essential files. - ignores: ['examples/**/*', 'test/**/*', '**/*.d.ts', 'turbopack/**/*'], + ignores: [ + 'bench/**/*', + 'examples/**/*', + 'test/**/*', + '**/*.d.ts', + 'turbopack/**/*', + ], languageOptions: { parserOptions: { project: true, diff --git a/package.json b/package.json index 3d8f0c921f1f..cb708e6d3887 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,8 @@ "build": "turbo run build --remote-cache-timeout 60 --summarize true", "lerna": "lerna", "dev": "turbo run dev --parallel --filter=\"!@next/bundle-analyzer-ui\"", + "bench:render-pipeline": "tsx bench/render-pipeline/benchmark.ts", + "bench:render-pipeline:analyze": "tsx bench/render-pipeline/analyze-profiles.ts", "pack-next": "tsx scripts/pack-next.ts", "test-types": "tsc", "test-unit": "jest test/unit/ packages/next/ packages/font",