diff --git a/packages/format/src/types/program/context.test.ts b/packages/format/src/types/program/context.test.ts index 4470a322d..a36e1e4c7 100644 --- a/packages/format/src/types/program/context.test.ts +++ b/packages/format/src/types/program/context.test.ts @@ -46,4 +46,8 @@ testSchemaGuards("ethdebug/format/program/context", [ schema: "schema:ethdebug/format/program/context/function/revert", guard: Context.isRevert, }, + { + schema: "schema:ethdebug/format/program/context/transform", + guard: Context.isTransform, + }, ] as const); diff --git a/packages/format/src/types/program/context.ts b/packages/format/src/types/program/context.ts index 104f27196..d9c96c23f 100644 --- a/packages/format/src/types/program/context.ts +++ b/packages/format/src/types/program/context.ts @@ -11,7 +11,8 @@ export type Context = | Context.Frame | Context.Invoke | Context.Return - | Context.Revert; + | Context.Revert + | Context.Transform; export const isContext = (value: unknown): value is Context => [ @@ -24,6 +25,7 @@ export const isContext = (value: unknown): value is Context => Context.isInvoke, Context.isReturn, Context.isRevert, + Context.isTransform, ].some((guard) => guard(value)); export namespace Context { @@ -274,4 +276,30 @@ export namespace Context { (!("reason" in value) || Function.isPointerRef(value.reason)) && (!("panic" in value) || typeof value.panic === "number"); } + + export interface Transform { + transform: Transform.Identifier[]; + } + + export const isTransform = (value: unknown): value is Transform => + typeof value === "object" && + !!value && + "transform" in value && + Array.isArray(value.transform) && + value.transform.length > 0 && + value.transform.every( + (item) => typeof item === "string" && item.length > 0, + ); + + export namespace Transform { + // Recognized v1 identifiers. Unknown strings are permitted + // (the identifier set is extensible); the union preserves + // autocomplete for known values. + export type Identifier = + | "inline" + | "tailcall" + | "fold" + | "coalesce" + | (string & {}); + } } diff --git a/packages/web/spec/program/context/gather.mdx b/packages/web/spec/program/context/gather.mdx index eb9cc3613..0501ed70d 100644 --- a/packages/web/spec/program/context/gather.mdx +++ b/packages/web/spec/program/context/gather.mdx @@ -6,6 +6,34 @@ import SchemaViewer from "@site/src/components/SchemaViewer"; # Gather multiple contexts +A `gather` context asserts that every one of its child contexts +holds at the marked instruction. It is the tool for composing +multiple context facts that cannot coexist as sibling keys on a +single object. + + +## When to use `gather` + +The context schema is open: a single context object may carry +any number of discriminator keys together — `code`, `variables`, +`invoke`, `return`, `transform`, and so on all compose as +siblings on the same object. Prefer the flat form when it +works. + +Reach for `gather` only when two or more facts would collide on +the same key. The canonical cases are: + +- **Multiple `frame`s** — an instruction that maps + simultaneously to an IR step and a source step needs one + entry per frame, each with its own `code` range. +- **Multiple `variables` blocks** — when separate pipeline + passes each contribute variable information (e.g., one + names the variable, the other supplies its pointer), each + set lives in its own context. + +If every child context uses a different discriminator key, a +`gather` can be collapsed into a single flat object with the +same meaning — and that flat form is the preferred style. diff --git a/packages/web/spec/program/context/transform.mdx b/packages/web/spec/program/context/transform.mdx new file mode 100644 index 000000000..427e42eb6 --- /dev/null +++ b/packages/web/spec/program/context/transform.mdx @@ -0,0 +1,119 @@ +--- +sidebar_position: 8 +--- + +import SchemaViewer from "@site/src/components/SchemaViewer"; + +# Transform contexts + +A transform context annotates an instruction with the compiler +transformations that produced it. The value is a list of short +identifiers; the list may repeat the same identifier when the +transformation has been applied multiple times—for example, +doubly-inlined code carries `transform: ["inline", "inline"]`. + + + +## Role: additional annotation + +A transform context does not replace semantic contexts. When the +compiler inlines a function, the caller's debug info should still +carry invoke/return contexts naming the inlined callee at the +call boundary—so the debugger's logical call stack reflects the +source-level structure. The transform context is _additional_ +information telling the debugger **how** the call was realized. + +Consumers are free to ignore transform contexts entirely; the +invoke/return contexts alone always give a sound source-level +view. Consumers that understand transform contexts can offer +optimization-aware presentations: + +- Render inlined code as a collapsible block tied to the + original callee's source location. +- Show which call sites were tail-call-optimized vs. realized as + full call/return sequences. +- Explain apparent anomalies in the trace (e.g., a JUMP that + carries an invoke context is a TCO back-edge). + +## v1 identifiers + +Four identifiers are recognized in v1: + +- **`"inline"`** — the marked instruction is part of an inlined + function body. Surrounding invoke/return contexts name the + inlined callee; this marker tells the debugger the physical + code does not correspond to a separate activation record. +- **`"tailcall"`** — the marked instruction is a + tail-call-optimized back-edge JUMP or continuation, where the + call was realized without pushing/popping a full activation. + A JUMP carrying a `tailcall` transform typically sits on a + context that also carries both a `return` (from the previous + iteration) and an `invoke` (of the new iteration). +- **`"fold"`** — the marked instruction carries the result of + a compile-time constant fold. Typically a PUSH of the folded + value replacing a compute sequence (e.g., `ADD` over two + known constants) that appeared in source. The instruction's + surrounding `code` context, if present, points to the + original expression. +- **`"coalesce"`** — the marked instruction is part of a + read-write merging sequence the compiler introduced to + combine adjacent source-level reads or writes. Common + examples include SHL/OR sequences that pack narrower fields + into a single storage slot, or wider loads split into + narrower field extractions. The user did not write these + instructions directly; the `coalesce` marker lets a debugger + present the sequence as one source-level operation rather + than stepping through each byte-shuffling opcode. + +The identifier set is extensible. Compilers may emit additional +identifiers for optimizations not yet standardized; debuggers +should preserve unfamiliar identifiers as opaque labels rather +than rejecting them. + +## Repetition and composition + +Identifiers may repeat. A function inlined into another inlined +function produces `transform: ["inline", "inline"]`. A coalesce +sequence nested inside another coalesced region produces +`transform: ["coalesce", "coalesce"]`. + +Different transformations compose: +`transform: ["inline", "tailcall"]` marks an instruction inside +an inlined body that was itself a TCO back-edge in the callee; +`transform: ["inline", "fold"]` marks a constant-folded PUSH +sitting inside an inlined body. + +Order in the array is not semantically significant—only the +multiset of identifiers matters. + +## Composing with other contexts + +A context object can carry several discriminator keys at once — +`code`, `variables`, `invoke`, `return`, `transform`, and so on +all live in the same object. A TCO back-edge JUMP, for example, +typically combines three facts as sibling keys on a single +context: + +```yaml +return: + identifier: "fact" + declaration: { ... } +invoke: + jump: true + identifier: "fact" + target: { pointer: { location: code, offset: ... } } +transform: ["tailcall"] +``` + +The `return` and `invoke` state the source-level facts +(iteration N returned, iteration N+1 was invoked); the +`transform` explains how the compiler realized that pair as a +single JUMP. + +Reach for [`gather`](/spec/program/context/gather) only when +two contexts would collide on the same key — e.g., two +independent `variables` blocks or two `frame`s from different +pipeline stages. When keys don't collide, the flat form is +preferred. diff --git a/schemas/program/context.schema.yaml b/schemas/program/context.schema.yaml index a57fce654..1a82e76df 100644 --- a/schemas/program/context.schema.yaml +++ b/schemas/program/context.schema.yaml @@ -89,6 +89,14 @@ allOf: description: | Indicates association with a function revert. $ref: "schema:ethdebug/format/program/context/function/revert" + - if: + required: ["transform"] + then: + description: | + Compiler transformations applied to produce this instruction + (e.g., inlining, tail-call optimization). Additional + annotation — does not replace semantic contexts. + $ref: "schema:ethdebug/format/program/context/transform" unevaluatedProperties: false diff --git a/schemas/program/context/transform.schema.yaml b/schemas/program/context/transform.schema.yaml new file mode 100644 index 000000000..8951a00ca --- /dev/null +++ b/schemas/program/context/transform.schema.yaml @@ -0,0 +1,79 @@ +$schema: "https://json-schema.org/draft/2020-12/schema" +$id: "schema:ethdebug/format/program/context/transform" + +title: ethdebug/format/program/context/transform +description: | + Annotates an instruction with compiler transformations that + produced it. The value is a list of short identifiers naming + each transformation; the list may repeat an identifier when + the same transformation has been applied more than once (e.g., + `["inline", "inline"]` for doubly-inlined code). + + A transform context is *additional* annotation — it does not + replace semantic contexts. When the compiler inlines a + function, the invoke/return contexts for the logical call + should still be emitted at the call boundary so the debugger's + source-level call stack remains coherent. The transform + context tells debuggers **how** the call was realized. + + Combine a transform with other discriminator keys (`invoke`, + `return`, `code`, etc.) by placing them side-by-side on the + same context object — `gather` is only needed when two + contexts would collide on the same key. + + Consumers that ignore transform contexts still get a sound + source-level view from the invoke/return contexts alone. + Consumers that understand transform contexts can offer + optimization-aware presentations — e.g., rendering inlined + code as a collapsible block, or reconciling tail-call-optimized + back-edges with the logical call stack. + + The identifier set is extensible. v1 defines: + + - `"inline"` — the marked instruction is part of an inlined + function body. Surrounding invoke/return contexts name the + inlined callee. + - `"tailcall"` — the marked instruction is a + tail-call-optimized back-edge JUMP or continuation, where + the call was realized as a direct jump (or reuse of the + caller's frame) rather than a standard call/return sequence. + - `"fold"` — the marked instruction carries the result of a + compile-time constant fold. Typically a PUSH of the folded + value, replacing a compute sequence that appeared in source. + - `"coalesce"` — the marked instruction is part of a + read-write merging sequence (e.g., SHL/OR sequences packing + narrower fields into a wider word) that the user did not + explicitly write; the compiler introduced it to combine + adjacent source-level reads or writes. + + Debuggers unfamiliar with a given identifier should preserve + it as an opaque label. + + Order in the array is not semantically significant — only the + multiset of identifiers matters. + +type: object +properties: + transform: + title: Applied transformations + description: | + List of transformation identifiers. Identifiers may + repeat; order is not semantically significant. + type: array + items: + type: string + minLength: 1 + minItems: 1 + +required: + - transform + +examples: + - transform: ["inline"] + - transform: ["tailcall"] + - transform: ["fold"] + - transform: ["coalesce"] + - transform: ["inline", "inline"] + - transform: ["inline", "tailcall"] + - transform: ["inline", "fold"] + - transform: ["coalesce", "coalesce"]