Detect and fill missing values. isna(), notna(), isnull(), notnull() for scalars/Series/DataFrame. ffillSeries(), bfillSeries(), dataFrameFfill(), dataFrameBfill() with optional limit and axis support.
+ isna / notna — detect missing values in scalars,
+ Series, and DataFrames.
+ ffill / bfill — propagate the last (or next) valid
+ value to fill gaps.
+ Mirrors pd.isna(), Series.ffill(), and
+ DataFrame.bfill() from pandas.
+
+
+
+
+
1 · isna / notna on scalars
+
+ Returns true / false for individual values.
+ null, undefined, and NaN are all
+ considered "missing".
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · isna on a Series
+
+ When passed a Series, isna returns a boolean Series of the
+ same length — true where values are missing.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · isna on a DataFrame
+
+ Returns a DataFrame of booleans with the same shape — one column per
+ original column, true where missing.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · Forward-fill (ffillSeries)
+
+ Propagates the last valid value forward to fill gaps. Leading
+ nulls that have no preceding value remain null.
+ Use the optional limit to cap consecutive fills.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · Backward-fill (bfillSeries)
+
+ Propagates the next valid value backward to fill gaps. Trailing
+ nulls that have no following value remain null.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · DataFrame forward-fill & backward-fill
+
+ dataFrameFfill and dataFrameBfill apply fill
+ column-wise by default (axis=0). Pass axis: 1 to fill
+ row-wise across columns.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series<boolean>
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series<boolean>
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...) // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+ limit?: number | null, // max consecutive fills (default: no limit)
+ axis?: 0 | 1 | "index" | "columns", // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+ limit?: number | null,
+ axis?: 0 | 1 | "index" | "columns",
+}): DataFrame
+
+
+
+
+
+
diff --git a/src/index.ts b/src/index.ts
index 1dd0aa57..ec702a7e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -107,3 +107,14 @@ export {
export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts";
export { valueCounts, dataFrameValueCounts } from "./stats/index.ts";
export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts";
+export {
+ isna,
+ notna,
+ isnull,
+ notnull,
+ ffillSeries,
+ bfillSeries,
+ dataFrameFfill,
+ dataFrameBfill,
+} from "./stats/index.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts";
diff --git a/src/stats/index.ts b/src/stats/index.ts
index b1de48eb..84202fde 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -39,3 +39,14 @@ export {
nsmallestDataFrame,
} from "./nlargest.ts";
export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts";
+export {
+ isna,
+ notna,
+ isnull,
+ notnull,
+ ffillSeries,
+ bfillSeries,
+ dataFrameFfill,
+ dataFrameBfill,
+} from "./na_ops.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts";
diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts
new file mode 100644
index 00000000..c776bb1f
--- /dev/null
+++ b/src/stats/na_ops.ts
@@ -0,0 +1,336 @@
+/**
+ * na_ops — missing-value utilities for Series and DataFrame.
+ *
+ * Mirrors the following pandas module-level functions and methods:
+ * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values
+ * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values
+ * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values
+ * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link ffillSeries} and {@link bfillSeries}. */
+export interface FillDirectionOptions {
+ /**
+ * Maximum number of consecutive NaN/null values to fill.
+ * `null` means no limit (default).
+ */
+ readonly limit?: number | null;
+}
+
+/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */
+export interface DataFrameFillOptions extends FillDirectionOptions {
+ /**
+ * - `0` or `"index"` (default): fill missing values down each **column**.
+ * - `1` or `"columns"`: fill missing values across each **row**.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Forward-fill an array of scalars in-place (returns a new array). */
+function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+ const out: Scalar[] = Array.from(vals);
+ let lastValid: Scalar = null;
+ let streak = 0;
+ for (let i = 0; i < out.length; i++) {
+ if (isMissing(out[i])) {
+ if (!isMissing(lastValid) && (limit === null || streak < limit)) {
+ out[i] = lastValid;
+ streak++;
+ }
+ } else {
+ lastValid = out[i] as Scalar;
+ streak = 0;
+ }
+ }
+ return out;
+}
+
+/** Backward-fill an array of scalars (returns a new array). */
+function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+ const out: Scalar[] = Array.from(vals);
+ let nextValid: Scalar = null;
+ let streak = 0;
+ for (let i = out.length - 1; i >= 0; i--) {
+ if (isMissing(out[i])) {
+ if (!isMissing(nextValid) && (limit === null || streak < limit)) {
+ out[i] = nextValid;
+ streak++;
+ }
+ } else {
+ nextValid = out[i] as Scalar;
+ streak = 0;
+ }
+ }
+ return out;
+}
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+/**
+ * Detect missing values in a scalar, Series, or DataFrame.
+ *
+ * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`.
+ * - For a **Series**: returns a `Series` of the same index.
+ * - For a **DataFrame**: returns a `DataFrame` of boolean columns.
+ *
+ * Mirrors `pandas.isna()` / `pandas.isnull()`.
+ *
+ * @example
+ * ```ts
+ * import { isna } from "tsb";
+ * isna(null); // true
+ * isna(42); // false
+ * isna(NaN); // true
+ *
+ * const s = new Series({ data: [1, null, NaN, 4] });
+ * isna(s); // Series([false, true, true, false])
+ * ```
+ */
+export function isna(value: Scalar): boolean;
+export function isna(value: Series): Series;
+export function isna(value: DataFrame): DataFrame;
+export function isna(
+ value: Scalar | Series | DataFrame,
+): boolean | Series | DataFrame {
+ if (value instanceof DataFrame) {
+ return value.isna();
+ }
+ if (value instanceof Series) {
+ return value.isna();
+ }
+ return isMissing(value as Scalar);
+}
+
+/**
+ * Detect non-missing values in a scalar, Series, or DataFrame.
+ *
+ * Mirrors `pandas.notna()` / `pandas.notnull()`.
+ *
+ * @example
+ * ```ts
+ * import { notna } from "tsb";
+ * notna(null); // false
+ * notna(42); // true
+ * ```
+ */
+export function notna(value: Scalar): boolean;
+export function notna(value: Series): Series;
+export function notna(value: DataFrame): DataFrame;
+export function notna(
+ value: Scalar | Series | DataFrame,
+): boolean | Series | DataFrame {
+ if (value instanceof DataFrame) {
+ return value.notna();
+ }
+ if (value instanceof Series) {
+ return value.notna();
+ }
+ return !isMissing(value as Scalar);
+}
+
+/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */
+export const isnull = isna;
+
+/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */
+export const notnull = notna;
+
+// ─── ffill ────────────────────────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the last non-missing value
+ * that precedes it (if any). Values before the first non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.ffill()`.
+ *
+ * @param series - Input Series (unchanged).
+ * @param options - Optional `{ limit }` — max consecutive fills.
+ * @returns New Series with forward-filled values.
+ *
+ * @example
+ * ```ts
+ * import { ffillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * ffillSeries(s); // Series([1, 1, 1, 4])
+ * ```
+ */
+export function ffillSeries(
+ series: Series,
+ options?: FillDirectionOptions,
+): Series {
+ const limit = options?.limit ?? null;
+ const filled = ffillArray(series.values as readonly Scalar[], limit) as T[];
+ return new Series({
+ data: filled,
+ index: series.index,
+ dtype: series.dtype,
+ name: series.name ?? undefined,
+ });
+}
+
+/**
+ * Backward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the next non-missing value
+ * that follows it (if any). Values after the last non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.bfill()`.
+ *
+ * @example
+ * ```ts
+ * import { bfillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * bfillSeries(s); // Series([1, 4, 4, 4])
+ * ```
+ */
+export function bfillSeries(
+ series: Series,
+ options?: FillDirectionOptions,
+): Series {
+ const limit = options?.limit ?? null;
+ const filled = bfillArray(series.values as readonly Scalar[], limit) as T[];
+ return new Series({
+ data: filled,
+ index: series.index,
+ dtype: series.dtype,
+ name: series.name ?? undefined,
+ });
+}
+
+// ─── DataFrame ffill / bfill ──────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0): each column is independently
+ * forward-filled. With `axis=1` each row is forward-filled across columns.
+ *
+ * Mirrors `pandas.DataFrame.ffill()`.
+ *
+ * @example
+ * ```ts
+ * import { dataFrameFfill } from "tsb";
+ * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } });
+ * dataFrameFfill(df);
+ * // a: [1, 1, 3]
+ * // b: [null, 2, 2]
+ * ```
+ */
+export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+ const limit = options?.limit ?? null;
+ const axis = options?.axis ?? 0;
+ const byRow = axis === 1 || axis === "columns";
+
+ if (!byRow) {
+ // column-wise: fill each column independently
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const filled = ffillArray(col.values, limit) as Scalar[];
+ colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // row-wise: fill across columns for each row
+ const nRows = df.shape[0];
+ const cols = df.columns.values;
+ const columns = cols.map((name) => df.col(name));
+ const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+ for (let r = 0; r < nRows; r++) {
+ const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+ const filled = ffillArray(rowVals, limit);
+ for (let ci = 0; ci < cols.length; ci++) {
+ const rowsFilledCI = rowsFilled[ci];
+ if (rowsFilledCI !== undefined) {
+ rowsFilledCI[r] = filled[ci] ?? null;
+ }
+ }
+ }
+ const colMap = new Map>();
+ for (let ci = 0; ci < cols.length; ci++) {
+ const name = cols[ci] as string;
+ const col = columns[ci] as Series;
+ colMap.set(
+ name,
+ new Series({
+ data: rowsFilled[ci] ?? [],
+ index: col.index,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Backward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0). With `axis=1` fills across rows.
+ *
+ * Mirrors `pandas.DataFrame.bfill()`.
+ */
+export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+ const limit = options?.limit ?? null;
+ const axis = options?.axis ?? 0;
+ const byRow = axis === 1 || axis === "columns";
+
+ if (!byRow) {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const filled = bfillArray(col.values, limit) as Scalar[];
+ colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ const nRows = df.shape[0];
+ const cols = df.columns.values;
+ const columns = cols.map((name) => df.col(name));
+ const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+ for (let r = 0; r < nRows; r++) {
+ const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+ const filled = bfillArray(rowVals, limit);
+ for (let ci = 0; ci < cols.length; ci++) {
+ const rowsFilledCI = rowsFilled[ci];
+ if (rowsFilledCI !== undefined) {
+ rowsFilledCI[r] = filled[ci] ?? null;
+ }
+ }
+ }
+ const colMap = new Map>();
+ for (let ci = 0; ci < cols.length; ci++) {
+ const name = cols[ci] as string;
+ const col = columns[ci] as Series;
+ colMap.set(
+ name,
+ new Series({
+ data: rowsFilled[ci] ?? [],
+ index: col.index,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/tests/stats/na_ops.test.ts b/tests/stats/na_ops.test.ts
new file mode 100644
index 00000000..340406ac
--- /dev/null
+++ b/tests/stats/na_ops.test.ts
@@ -0,0 +1,280 @@
+/**
+ * Tests for na_ops — missing-value utilities (isna, notna, ffill, bfill).
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ bfillSeries,
+ dataFrameBfill,
+ dataFrameFfill,
+ ffillSeries,
+ isna,
+ isnull,
+ notna,
+ notnull,
+} from "../../src/index.ts";
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+describe("isna (scalar)", () => {
+ it("returns true for null", () => expect(isna(null)).toBe(true));
+ it("returns true for undefined", () => expect(isna(undefined)).toBe(true));
+ it("returns true for NaN", () => expect(isna(Number.NaN)).toBe(true));
+ it("returns false for 0", () => expect(isna(0)).toBe(false));
+ it("returns false for empty string", () => expect(isna("")).toBe(false));
+ it("returns false for false", () => expect(isna(false)).toBe(false));
+ it("returns false for a number", () => expect(isna(42)).toBe(false));
+});
+
+describe("notna (scalar)", () => {
+ it("returns false for null", () => expect(notna(null)).toBe(false));
+ it("returns false for NaN", () => expect(notna(Number.NaN)).toBe(false));
+ it("returns true for 42", () => expect(notna(42)).toBe(true));
+ it("returns true for a string", () => expect(notna("hello")).toBe(true));
+});
+
+describe("isnull / notnull aliases", () => {
+ it("isnull equals isna for scalar", () => {
+ expect(isnull(null)).toBe(isna(null));
+ expect(isnull(42)).toBe(isna(42));
+ });
+ it("notnull equals notna for scalar", () => {
+ expect(notnull(null)).toBe(notna(null));
+ expect(notnull(42)).toBe(notna(42));
+ });
+});
+
+describe("isna (Series)", () => {
+ it("returns boolean Series of correct length", () => {
+ const s = new Series({ data: [1, null, Number.NaN, 4] });
+ const result = isna(s);
+ expect(result).toBeInstanceOf(Series);
+ expect([...result.values]).toEqual([false, true, true, false]);
+ });
+
+ it("all present", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect([...isna(s).values]).toEqual([false, false, false]);
+ });
+
+ it("all missing", () => {
+ const s = new Series({ data: [null, null, Number.NaN] });
+ expect([...isna(s).values]).toEqual([true, true, true]);
+ });
+});
+
+describe("notna (Series)", () => {
+ it("is the inverse of isna", () => {
+ const s = new Series({ data: [1, null, Number.NaN, 4] });
+ const na = isna(s).values;
+ const nna = notna(s).values;
+ for (let i = 0; i < na.length; i++) {
+ expect(nna[i]).toBe(!na[i]);
+ }
+ });
+});
+
+describe("isna (DataFrame)", () => {
+ it("returns DataFrame of booleans", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] });
+ const result = isna(df);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect([...result.col("a").values]).toEqual([false, true]);
+ expect([...result.col("b").values]).toEqual([true, false]);
+ });
+});
+
+describe("notna (DataFrame)", () => {
+ it("returns inverse of isna DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] });
+ expect([...notna(df).col("a").values]).toEqual([true, false]);
+ expect([...notna(df).col("b").values]).toEqual([false, true]);
+ });
+});
+
+// ─── ffillSeries ──────────────────────────────────────────────────────────────
+
+describe("ffillSeries", () => {
+ it("fills nulls with preceding value", () => {
+ const s = new Series({ data: [1, null, null, 4] });
+ expect([...ffillSeries(s).values]).toEqual([1, 1, 1, 4]);
+ });
+
+ it("leaves leading nulls untouched", () => {
+ const s = new Series({ data: [null, null, 3, null] });
+ expect([...ffillSeries(s).values]).toEqual([null, null, 3, 3]);
+ });
+
+ it("NaN is treated as missing", () => {
+ const s = new Series({ data: [2, Number.NaN, 5] });
+ const result = ffillSeries(s).values;
+ expect(result[0]).toBe(2);
+ expect(result[1]).toBe(2);
+ expect(result[2]).toBe(5);
+ });
+
+ it("respects limit option", () => {
+ const s = new Series({ data: [1, null, null, null, 5] });
+ expect([...ffillSeries(s, { limit: 1 }).values]).toEqual([1, 1, null, null, 5]);
+ });
+
+ it("preserves original Series", () => {
+ const s = new Series({ data: [1, null, 3] });
+ ffillSeries(s);
+ expect([...s.values]).toEqual([1, null, 3]);
+ });
+
+ it("empty Series returns empty", () => {
+ const s = new Series({ data: [] });
+ expect([...ffillSeries(s).values]).toEqual([]);
+ });
+
+ it("preserves name and index", () => {
+ const s = new Series({ data: [1, null], name: "x" });
+ const filled = ffillSeries(s);
+ expect(filled.name).toBe("x");
+ expect(filled.index.size).toBe(2);
+ });
+});
+
+// ─── bfillSeries ──────────────────────────────────────────────────────────────
+
+describe("bfillSeries", () => {
+ it("fills nulls with following value", () => {
+ const s = new Series({ data: [1, null, null, 4] });
+ expect([...bfillSeries(s).values]).toEqual([1, 4, 4, 4]);
+ });
+
+ it("leaves trailing nulls untouched", () => {
+ const s = new Series({ data: [null, 3, null, null] });
+ expect([...bfillSeries(s).values]).toEqual([3, 3, null, null]);
+ });
+
+ it("respects limit option", () => {
+ const s = new Series({ data: [1, null, null, null, 5] });
+ expect([...bfillSeries(s, { limit: 2 }).values]).toEqual([1, null, 5, 5, 5]);
+ });
+
+ it("empty Series returns empty", () => {
+ const s = new Series({ data: [] });
+ expect([...bfillSeries(s).values]).toEqual([]);
+ });
+});
+
+// ─── dataFrameFfill ───────────────────────────────────────────────────────────
+
+describe("dataFrameFfill (column-wise)", () => {
+ it("fills each column independently", () => {
+ const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 2, null] });
+ const result = dataFrameFfill(df);
+ expect([...result.col("a").values]).toEqual([1, 1, 3]);
+ expect([...result.col("b").values]).toEqual([null, 2, 2]);
+ });
+
+ it("preserves index", () => {
+ const df = DataFrame.fromColumns({ x: [1, null] });
+ expect(dataFrameFfill(df).index.size).toBe(2);
+ });
+});
+
+describe("dataFrameFfill (row-wise)", () => {
+ it("fills across columns per row", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [null, null], c: [3, 4] });
+ const result = dataFrameFfill(df, { axis: 1 });
+ expect([...result.col("a").values]).toEqual([1, null]);
+ expect([...result.col("b").values]).toEqual([1, null]);
+ expect([...result.col("c").values]).toEqual([3, 4]);
+ });
+});
+
+// ─── dataFrameBfill ───────────────────────────────────────────────────────────
+
+describe("dataFrameBfill (column-wise)", () => {
+ it("fills each column backward", () => {
+ const df = DataFrame.fromColumns({ a: [null, null, 3], b: [1, null, null] });
+ const result = dataFrameBfill(df);
+ expect([...result.col("a").values]).toEqual([3, 3, 3]);
+ expect([...result.col("b").values]).toEqual([1, null, null]);
+ });
+});
+
+describe("dataFrameBfill (row-wise)", () => {
+ it("fills backward across columns per row", () => {
+ const df = DataFrame.fromColumns({ a: [null, 1], b: [null, null], c: [3, null] });
+ const result = dataFrameBfill(df, { axis: 1 });
+ expect([...result.col("a").values]).toEqual([3, 1]);
+ expect([...result.col("b").values]).toEqual([3, null]);
+ expect([...result.col("c").values]).toEqual([3, null]);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property: ffill followed by bfill fills all if any non-null", () => {
+ it("all values filled when at least one is present", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.option(fc.integer({ min: 0, max: 100 }), { nil: null }), {
+ minLength: 1,
+ maxLength: 20,
+ }),
+ (raw) => {
+ const hasNonNull = raw.some((v) => v !== null);
+ if (!hasNonNull) {
+ return true;
+ }
+ const s = new Series({ data: raw });
+ const result = bfillSeries(ffillSeries(s));
+ return result.values.every((v) => v !== null);
+ },
+ ),
+ );
+ });
+});
+
+describe("property: ffill never introduces new non-null values beyond last valid", () => {
+ it("ffilled series has no nulls after first valid value", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.option(fc.integer({ min: -50, max: 50 }), { nil: null }), {
+ minLength: 0,
+ maxLength: 30,
+ }),
+ (raw) => {
+ const s = new Series({ data: raw });
+ const filled = ffillSeries(s).values;
+ let sawValid = false;
+ for (const v of filled) {
+ if (v !== null) {
+ sawValid = true;
+ }
+ if (sawValid && v === null) {
+ return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+});
+
+describe("property: isna is inverse of notna for scalars", () => {
+ it("isna(v) === !notna(v)", () => {
+ fc.assert(
+ fc.property(
+ fc.oneof(
+ fc.integer(),
+ fc.float({ noNaN: false }),
+ fc.constant(null),
+ fc.string(),
+ fc.boolean(),
+ ),
+ (v) => isna(v as Parameters[0]) === !notna(v as Parameters[0]),
+ ),
+ );
+ });
+});
From d6df47348e0ea49480e28bf091182af90b08720b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:01:00 +0000
Subject: [PATCH 2/2] Merge main into PR branch, resolving conflicts in
src/index.ts and src/stats/index.ts
- Keep na_ops additions (ffillSeries/bfillSeries/dataFrameFfill/dataFrameBfill)
- Keep main's additions (cut/qcut, where_mask, notna_isna, string_ops, etc.)
- Resolve duplicate isna/notna by using notna_isna.ts (main) for those exports
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
.autoloop/programs/perf-comparison/program.md | 74 +++
.github/workflows/autoloop.lock.yml | 36 +-
.github/workflows/autoloop.md | 69 +-
.github/workflows/evergreen.lock.yml | 36 +-
.github/workflows/evergreen.md | 22 +-
.github/workflows/pages.yml | 7 +
.github/workflows/sync-branches.lock.yml | 20 +-
.github/workflows/sync-branches.md | 18 +-
benchmarks/pandas/bench_concat.py | 28 +
benchmarks/pandas/bench_dataframe_apply.py | 27 +
benchmarks/pandas/bench_dataframe_creation.py | 27 +
benchmarks/pandas/bench_dataframe_dropna.py | 27 +
benchmarks/pandas/bench_dataframe_filter.py | 26 +
benchmarks/pandas/bench_dataframe_rename.py | 27 +
benchmarks/pandas/bench_dataframe_sort.py | 28 +
benchmarks/pandas/bench_describe.py | 27 +
benchmarks/pandas/bench_ewm_mean.py | 26 +
benchmarks/pandas/bench_groupby_mean.py | 27 +
benchmarks/pandas/bench_merge.py | 29 +
benchmarks/pandas/bench_pivot_table.py | 28 +
benchmarks/pandas/bench_read_csv.py | 30 +
benchmarks/pandas/bench_rolling_mean.py | 26 +
benchmarks/pandas/bench_series_arithmetic.py | 26 +
benchmarks/pandas/bench_series_creation.py | 47 ++
benchmarks/pandas/bench_series_cumsum.py | 26 +
benchmarks/pandas/bench_series_fillna.py | 26 +
benchmarks/pandas/bench_series_shift.py | 26 +
benchmarks/pandas/bench_series_sort.py | 27 +
benchmarks/pandas/bench_series_string_ops.py | 27 +
.../pandas/bench_series_value_counts.py | 25 +
benchmarks/results.json | 247 +++++++
benchmarks/run_benchmarks.sh | 129 ++++
benchmarks/tsb/bench_concat.ts | 32 +
benchmarks/tsb/bench_dataframe_apply.ts | 32 +
benchmarks/tsb/bench_dataframe_creation.ts | 33 +
benchmarks/tsb/bench_dataframe_dropna.ts | 31 +
benchmarks/tsb/bench_dataframe_filter.ts | 30 +
benchmarks/tsb/bench_dataframe_rename.ts | 31 +
benchmarks/tsb/bench_dataframe_sort.ts | 31 +
benchmarks/tsb/bench_describe.ts | 31 +
benchmarks/tsb/bench_ewm_mean.ts | 30 +
benchmarks/tsb/bench_groupby_mean.ts | 31 +
benchmarks/tsb/bench_merge.ts | 33 +
benchmarks/tsb/bench_pivot_table.ts | 32 +
benchmarks/tsb/bench_read_csv.ts | 39 ++
benchmarks/tsb/bench_rolling_mean.ts | 30 +
benchmarks/tsb/bench_series_arithmetic.ts | 30 +
benchmarks/tsb/bench_series_creation.ts | 49 ++
benchmarks/tsb/bench_series_cumsum.ts | 30 +
benchmarks/tsb/bench_series_fillna.ts | 31 +
benchmarks/tsb/bench_series_shift.ts | 30 +
benchmarks/tsb/bench_series_sort.ts | 30 +
benchmarks/tsb/bench_series_string_ops.ts | 32 +
benchmarks/tsb/bench_series_value_counts.ts | 30 +
docs/playground.md | 3 -
playground/api_types.html | 222 +++++++
playground/attrs.html | 183 +++++
playground/benchmarks.html | 360 ++++++++++
playground/categorical_ops.html | 338 ++++++++++
playground/cut_qcut.html | 163 +++++
playground/format_ops.html | 262 ++++++++
playground/index.html | 80 +++
playground/insert_pop.html | 172 +++++
playground/notna_isna.html | 242 +++++++
playground/numeric_extended.html | 353 ++++++++++
playground/pipe_apply.html | 276 ++++++++
playground/playground-runtime.js | 179 ++++-
playground/rolling_apply.html | 225 +++++++
playground/string_ops.html | 282 ++++++++
playground/string_ops_extended.html | 413 ++++++++++++
playground/to_from_dict.html | 122 ++++
playground/where_mask.html | 220 ++++++
playground/wide_to_long.html | 113 ++++
playground/window_extended.html | 304 +++++++++
src/core/api_types.ts | 629 ++++++++++++++++++
src/core/attrs.ts | 291 ++++++++
src/core/index.ts | 68 ++
src/core/insert_pop.ts | 214 ++++++
src/core/pipe_apply.ts | 303 +++++++++
src/core/to_from_dict.ts | 283 ++++++++
src/index.ts | 173 ++++-
src/reshape/index.ts | 2 +
src/reshape/wide_to_long.ts | 217 ++++++
src/stats/categorical_ops.ts | 483 ++++++++++++++
src/stats/cut_qcut.ts | 383 +++++++++++
src/stats/format_ops.ts | 442 ++++++++++++
src/stats/index.ts | 111 +++-
src/stats/notna_isna.ts | 369 ++++++++++
src/stats/numeric_extended.ts | 586 ++++++++++++++++
src/stats/string_ops.ts | 468 +++++++++++++
src/stats/string_ops_extended.ts | 429 ++++++++++++
src/stats/where_mask.ts | 289 ++++++++
src/stats/window_extended.ts | 321 +++++++++
src/window/index.ts | 7 +
src/window/rolling_apply.ts | 323 +++++++++
tests/core/api_types.test.ts | 621 +++++++++++++++++
tests/core/attrs.test.ts | 542 +++++++++++++++
tests/core/insert_pop.test.ts | 286 ++++++++
tests/core/pipe_apply.test.ts | 449 +++++++++++++
tests/core/to_from_dict.test.ts | 278 ++++++++
tests/reshape/wide_to_long.test.ts | 211 ++++++
tests/stats/categorical_ops.test.ts | 476 +++++++++++++
tests/stats/cut_qcut.test.ts | 277 ++++++++
tests/stats/format_ops.test.ts | 568 ++++++++++++++++
tests/stats/notna_isna.test.ts | 536 +++++++++++++++
tests/stats/numeric_extended.test.ts | 509 ++++++++++++++
tests/stats/rank.test.ts | 2 +-
tests/stats/string_ops.test.ts | 459 +++++++++++++
tests/stats/string_ops_extended.test.ts | 437 ++++++++++++
tests/stats/where_mask.test.ts | 338 ++++++++++
tests/stats/window_extended.test.ts | 365 ++++++++++
tests/window/rolling_apply.test.ts | 354 ++++++++++
112 files changed, 19493 insertions(+), 97 deletions(-)
create mode 100644 .autoloop/programs/perf-comparison/program.md
create mode 100644 benchmarks/pandas/bench_concat.py
create mode 100644 benchmarks/pandas/bench_dataframe_apply.py
create mode 100644 benchmarks/pandas/bench_dataframe_creation.py
create mode 100644 benchmarks/pandas/bench_dataframe_dropna.py
create mode 100644 benchmarks/pandas/bench_dataframe_filter.py
create mode 100644 benchmarks/pandas/bench_dataframe_rename.py
create mode 100644 benchmarks/pandas/bench_dataframe_sort.py
create mode 100644 benchmarks/pandas/bench_describe.py
create mode 100644 benchmarks/pandas/bench_ewm_mean.py
create mode 100644 benchmarks/pandas/bench_groupby_mean.py
create mode 100644 benchmarks/pandas/bench_merge.py
create mode 100644 benchmarks/pandas/bench_pivot_table.py
create mode 100644 benchmarks/pandas/bench_read_csv.py
create mode 100644 benchmarks/pandas/bench_rolling_mean.py
create mode 100644 benchmarks/pandas/bench_series_arithmetic.py
create mode 100644 benchmarks/pandas/bench_series_creation.py
create mode 100644 benchmarks/pandas/bench_series_cumsum.py
create mode 100644 benchmarks/pandas/bench_series_fillna.py
create mode 100644 benchmarks/pandas/bench_series_shift.py
create mode 100644 benchmarks/pandas/bench_series_sort.py
create mode 100644 benchmarks/pandas/bench_series_string_ops.py
create mode 100644 benchmarks/pandas/bench_series_value_counts.py
create mode 100644 benchmarks/results.json
create mode 100644 benchmarks/run_benchmarks.sh
create mode 100644 benchmarks/tsb/bench_concat.ts
create mode 100644 benchmarks/tsb/bench_dataframe_apply.ts
create mode 100644 benchmarks/tsb/bench_dataframe_creation.ts
create mode 100644 benchmarks/tsb/bench_dataframe_dropna.ts
create mode 100644 benchmarks/tsb/bench_dataframe_filter.ts
create mode 100644 benchmarks/tsb/bench_dataframe_rename.ts
create mode 100644 benchmarks/tsb/bench_dataframe_sort.ts
create mode 100644 benchmarks/tsb/bench_describe.ts
create mode 100644 benchmarks/tsb/bench_ewm_mean.ts
create mode 100644 benchmarks/tsb/bench_groupby_mean.ts
create mode 100644 benchmarks/tsb/bench_merge.ts
create mode 100644 benchmarks/tsb/bench_pivot_table.ts
create mode 100644 benchmarks/tsb/bench_read_csv.ts
create mode 100644 benchmarks/tsb/bench_rolling_mean.ts
create mode 100644 benchmarks/tsb/bench_series_arithmetic.ts
create mode 100644 benchmarks/tsb/bench_series_creation.ts
create mode 100644 benchmarks/tsb/bench_series_cumsum.ts
create mode 100644 benchmarks/tsb/bench_series_fillna.ts
create mode 100644 benchmarks/tsb/bench_series_shift.ts
create mode 100644 benchmarks/tsb/bench_series_sort.ts
create mode 100644 benchmarks/tsb/bench_series_string_ops.ts
create mode 100644 benchmarks/tsb/bench_series_value_counts.ts
create mode 100644 playground/api_types.html
create mode 100644 playground/attrs.html
create mode 100644 playground/benchmarks.html
create mode 100644 playground/categorical_ops.html
create mode 100644 playground/cut_qcut.html
create mode 100644 playground/format_ops.html
create mode 100644 playground/insert_pop.html
create mode 100644 playground/notna_isna.html
create mode 100644 playground/numeric_extended.html
create mode 100644 playground/pipe_apply.html
create mode 100644 playground/rolling_apply.html
create mode 100644 playground/string_ops.html
create mode 100644 playground/string_ops_extended.html
create mode 100644 playground/to_from_dict.html
create mode 100644 playground/where_mask.html
create mode 100644 playground/wide_to_long.html
create mode 100644 playground/window_extended.html
create mode 100644 src/core/api_types.ts
create mode 100644 src/core/attrs.ts
create mode 100644 src/core/insert_pop.ts
create mode 100644 src/core/pipe_apply.ts
create mode 100644 src/core/to_from_dict.ts
create mode 100644 src/reshape/wide_to_long.ts
create mode 100644 src/stats/categorical_ops.ts
create mode 100644 src/stats/cut_qcut.ts
create mode 100644 src/stats/format_ops.ts
create mode 100644 src/stats/notna_isna.ts
create mode 100644 src/stats/numeric_extended.ts
create mode 100644 src/stats/string_ops.ts
create mode 100644 src/stats/string_ops_extended.ts
create mode 100644 src/stats/where_mask.ts
create mode 100644 src/stats/window_extended.ts
create mode 100644 src/window/rolling_apply.ts
create mode 100644 tests/core/api_types.test.ts
create mode 100644 tests/core/attrs.test.ts
create mode 100644 tests/core/insert_pop.test.ts
create mode 100644 tests/core/pipe_apply.test.ts
create mode 100644 tests/core/to_from_dict.test.ts
create mode 100644 tests/reshape/wide_to_long.test.ts
create mode 100644 tests/stats/categorical_ops.test.ts
create mode 100644 tests/stats/cut_qcut.test.ts
create mode 100644 tests/stats/format_ops.test.ts
create mode 100644 tests/stats/notna_isna.test.ts
create mode 100644 tests/stats/numeric_extended.test.ts
create mode 100644 tests/stats/string_ops.test.ts
create mode 100644 tests/stats/string_ops_extended.test.ts
create mode 100644 tests/stats/where_mask.test.ts
create mode 100644 tests/stats/window_extended.test.ts
create mode 100644 tests/window/rolling_apply.test.ts
diff --git a/.autoloop/programs/perf-comparison/program.md b/.autoloop/programs/perf-comparison/program.md
new file mode 100644
index 00000000..c1aec206
--- /dev/null
+++ b/.autoloop/programs/perf-comparison/program.md
@@ -0,0 +1,74 @@
+---
+schedule: every 6h
+---
+
+# Performance Comparison: tsb (TypeScript) vs pandas (Python)
+
+## Goal
+
+Systematically benchmark every tsb function against its pandas equivalent, one function per iteration. Each iteration picks a function that has not yet been benchmarked, writes a matching performance test for both tsb (TypeScript/Bun) and pandas (Python), runs both, and records the timing results. The benchmark results are displayed on the playground pages doc site.
+
+This is an open-ended program — it runs continuously, always adding the next benchmark comparison.
+
+### How each iteration works
+
+1. **Read existing benchmarks** — check `benchmarks/tsb/` and `benchmarks/pandas/` to see which functions are already benchmarked.
+2. **Pick ONE function** from `src/` that has no benchmark yet. Prioritize core operations (Series, DataFrame, GroupBy, etc.).
+3. **Write a TypeScript benchmark** in `benchmarks/tsb/bench_{function}.ts` that:
+ - Creates a realistic dataset (e.g. 100,000 rows)
+ - Runs the operation in a tight loop (warm-up + measured iterations)
+ - Outputs JSON: `{"function": "...", "mean_ms": ..., "iterations": ..., "total_ms": ...}`
+4. **Write a matching Python benchmark** in `benchmarks/pandas/bench_{function}.py` that:
+ - Creates the same dataset as the TypeScript version
+ - Runs the same operation with the same loop structure
+ - Outputs the same JSON format
+5. **Run both benchmarks** via `benchmarks/run_benchmarks.sh` and capture results.
+6. **Update `benchmarks/results.json`** with the new timing data.
+7. **Update `playground/benchmarks.html`** to display the new function's comparison metrics.
+
+### Key constraints
+
+- **Matching datasets** — both benchmarks must use identical data (same size, same values where possible).
+- **Fair comparison** — same number of warm-up and measured iterations for both.
+- **JSON output** — every benchmark script must output a single JSON line to stdout.
+- **No modifications to `src/`** — benchmark code is separate from library code.
+- **Python environment** — install pandas via pip if not present.
+
+## Target
+
+Only modify these files:
+- `benchmarks/**` — benchmark scripts and results
+- `playground/benchmarks.html` — performance comparison playground page
+- `playground/index.html` — add/update link to benchmarks page
+
+Do NOT modify:
+- `src/**` — library source code
+- `tests/**` — test files
+- `README.md` — read-only
+- `.autoloop/programs/**` — program definitions (except this file's code/ dir)
+- `.github/workflows/autoloop*` — autoloop workflow files
+
+## Evaluation
+
+```bash
+# Set up Python environment if needed
+if ! command -v python3 &>/dev/null; then
+ echo "Python3 not found, skipping"
+fi
+pip3 install pandas --quiet 2>/dev/null || true
+
+# Count the number of benchmark pairs (functions with both TS and Python benchmarks)
+ts_benchmarks=$(ls benchmarks/tsb/bench_*.ts 2>/dev/null | wc -l | tr -d ' ')
+py_benchmarks=$(ls benchmarks/pandas/bench_*.py 2>/dev/null | wc -l | tr -d ' ')
+
+# The metric is the minimum of the two (both must exist for a complete benchmark)
+if [ "$ts_benchmarks" -lt "$py_benchmarks" ]; then
+ count=$ts_benchmarks
+else
+ count=$py_benchmarks
+fi
+
+echo "{\"benchmarked_functions\": ${count:-0}}"
+```
+
+The metric is `benchmarked_functions`. **Higher is better.**
diff --git a/.github/workflows/autoloop.lock.yml b/.github/workflows/autoloop.lock.yml
index ce21ec84..489ea218 100644
--- a/.github/workflows/autoloop.lock.yml
+++ b/.github/workflows/autoloop.lock.yml
@@ -37,7 +37,7 @@
# Imports:
# - shared/reporting.md
#
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"ae0854a9693094d32638babc16d353dc5de46c218ae3d893a9306b0b2a916042","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"4a373c351f80c4a3192abb04ad384f012a37e1fa4edfab3d08dc852deac2cf4f","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
name: "Autoloop"
"on":
@@ -222,21 +222,21 @@ jobs:
run: |
bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
{
- cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+ cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
- GH_AW_PROMPT_6cb617c1e46803c0_EOF
+ GH_AW_PROMPT_8719c7b9dd3572a2_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
- cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+ cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
Tools: add_comment(max:7), create_issue(max:2), update_issue(max:3), create_pull_request, add_labels(max:2), remove_labels(max:2), push_to_pull_request_branch, missing_tool, missing_data, noop
- GH_AW_PROMPT_6cb617c1e46803c0_EOF
+ GH_AW_PROMPT_8719c7b9dd3572a2_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_create_pull_request.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_push_to_pr_branch.md"
- cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+ cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
The following GitHub context information is available for this workflow:
@@ -269,7 +269,7 @@ jobs:
- **Note**: If a branch you need is not in the list above and is not listed as an additional fetched ref, it has NOT been checked out. For private repositories you cannot fetch it without proper authentication. If the branch is required and not available, exit with an error and ask the user to add it to the `fetch:` option of the `checkout:` configuration (e.g., `fetch: ["refs/pulls/open/*"]` for all open PR refs, or `fetch: ["main", "feature/my-branch"]` for specific branches).
- GH_AW_PROMPT_6cb617c1e46803c0_EOF
+ GH_AW_PROMPT_8719c7b9dd3572a2_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then
cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_prompt.md"
@@ -277,11 +277,11 @@ jobs:
if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then
cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_push_to_pr_branch_guidance.md"
fi
- cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+ cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
{{#runtime-import .github/workflows/shared/reporting.md}}
{{#runtime-import .github/workflows/autoloop.md}}
- GH_AW_PROMPT_6cb617c1e46803c0_EOF
+ GH_AW_PROMPT_8719c7b9dd3572a2_EOF
} > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@@ -439,7 +439,7 @@ jobs:
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_TOKEN: ${{ github.token }}
name: Check which programs are due
- run: "python3 - << 'PYEOF'\nimport os, json, re, glob, sys\nimport urllib.request, urllib.error\nfrom datetime import datetime, timezone, timedelta\n\nprograms_dir = \".autoloop/programs\"\nautoloop_dir = \".autoloop/programs\"\ntemplate_file = os.path.join(autoloop_dir, \"example.md\")\n\n# Read program state from repo-memory (persistent git-backed storage)\ngithub_token = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_program = os.environ.get(\"AUTOLOOP_PROGRAM\", \"\").strip()\n\n# Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id}\n# is derived from the branch-name configured in the tools section (memory/autoloop → autoloop)\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/autoloop\"\n\ndef parse_machine_state(content):\n \"\"\"Parse the ⚙️ Machine State table from a state file. Returns a dict.\"\"\"\n state = {}\n m = re.search(r'## ⚙️ Machine State.*?\\n(.*?)(?=\\n## |\\Z)', content, re.DOTALL)\n if not m:\n return state\n section = m.group(0)\n for row in re.finditer(r'\\|\\s*(.+?)\\s*\\|\\s*(.+?)\\s*\\|', section):\n raw_key = row.group(1).strip()\n raw_val = row.group(2).strip()\n if raw_key.lower() in (\"field\", \"---\", \":---\", \":---:\", \"---:\"):\n continue\n key = raw_key.lower().replace(\" \", \"_\")\n val = None if raw_val in (\"—\", \"-\", \"\") else raw_val\n state[key] = val\n # Coerce types\n for int_field in (\"iteration_count\", \"consecutive_errors\"):\n if int_field in state:\n try:\n state[int_field] = int(state[int_field])\n except (ValueError, TypeError):\n state[int_field] = 0\n if \"paused\" in state:\n state[\"paused\"] = str(state.get(\"paused\", \"\")).lower() == \"true\"\n if \"completed\" in state:\n state[\"completed\"] = str(state.get(\"completed\", \"\")).lower() == \"true\"\n # recent_statuses: stored as comma-separated words (e.g. \"accepted, rejected, error\")\n rs_raw = state.get(\"recent_statuses\") or \"\"\n if rs_raw:\n state[\"recent_statuses\"] = [s.strip().lower() for s in rs_raw.split(\",\") if s.strip()]\n else:\n state[\"recent_statuses\"] = []\n return state\n\ndef read_program_state(program_name):\n \"\"\"Read scheduling state from the repo-memory state file.\"\"\"\n state_file = os.path.join(repo_memory_dir, f\"{program_name}.md\")\n if not os.path.isfile(state_file):\n print(f\" {program_name}: no state file found (first run)\")\n return {}\n with open(state_file, encoding=\"utf-8\") as f:\n content = f.read()\n return parse_machine_state(content)\n\n# Bootstrap: create autoloop programs directory and template if missing\nif not os.path.isdir(autoloop_dir):\n os.makedirs(autoloop_dir, exist_ok=True)\n bt = chr(96) # backtick — avoid literal backticks that break gh-aw compiler\n template = \"\\n\".join([\n \"\",\n \"\",\n \"\",\n \"\",\n \"# Autoloop Program\",\n \"\",\n \"\",\n \"\",\n \"## Goal\",\n \"\",\n \"\",\n \"\",\n \"REPLACE THIS with your optimization goal.\",\n \"\",\n \"## Target\",\n \"\",\n \"\",\n \"\",\n \"Only modify these files:\",\n f\"- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)\",\n \"\",\n \"Do NOT modify:\",\n \"- (list files that must not be touched)\",\n \"\",\n \"## Evaluation\",\n \"\",\n \"\",\n \"\",\n f\"{bt}{bt}{bt}bash\",\n \"REPLACE_WITH_YOUR_EVALUATION_COMMAND\",\n f\"{bt}{bt}{bt}\",\n \"\",\n f\"The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)\",\n \"\",\n ])\n with open(template_file, \"w\") as f:\n f.write(template)\n # Leave the template unstaged — the agent will create a draft PR with it\n print(f\"BOOTSTRAPPED: created {template_file} locally (agent will create a draft PR)\")\n\n# Find all program files from all locations:\n# 1. Directory-based programs: .autoloop/programs//program.md (preferred)\n# 2. Bare markdown programs: .autoloop/programs/.md (simple)\n# 3. Issue-based programs: GitHub issues with the 'autoloop-program' label\nprogram_files = []\nissue_programs = {} # name -> {issue_number, file}\n\n# Scan .autoloop/programs/ for directory-based programs\nif os.path.isdir(programs_dir):\n for entry in sorted(os.listdir(programs_dir)):\n prog_dir = os.path.join(programs_dir, entry)\n if os.path.isdir(prog_dir):\n # Look for program.md inside the directory\n prog_file = os.path.join(prog_dir, \"program.md\")\n if os.path.isfile(prog_file):\n program_files.append(prog_file)\n\n# Scan .autoloop/programs/ for bare markdown programs\nbare_programs = sorted(glob.glob(os.path.join(autoloop_dir, \"*.md\")))\nfor pf in bare_programs:\n program_files.append(pf)\n\n# Scan GitHub issues with the 'autoloop-program' label\nissue_programs_dir = \"/tmp/gh-aw/issue-programs\"\nos.makedirs(issue_programs_dir, exist_ok=True)\ntry:\n api_url = f\"https://api.github.com/repos/{repo}/issues?labels=autoloop-program&state=open&per_page=100\"\n req = urllib.request.Request(api_url, headers={\n \"Authorization\": f\"token {github_token}\",\n \"Accept\": \"application/vnd.github.v3+json\",\n })\n with urllib.request.urlopen(req, timeout=30) as resp:\n issues = json.loads(resp.read().decode())\n for issue in issues:\n if issue.get(\"pull_request\"):\n continue # skip PRs\n body = issue.get(\"body\") or \"\"\n title = issue.get(\"title\") or \"\"\n number = issue[\"number\"]\n # Derive program name from issue title: slugify to lowercase with hyphens\n slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')\n slug = re.sub(r'-+', '-', slug) # collapse consecutive hyphens\n if not slug:\n slug = f\"issue-{number}\"\n # Avoid slug collisions: if another issue already claimed this slug, append issue number\n if slug in issue_programs:\n print(f\" Warning: slug '{slug}' (issue #{number}) collides with issue #{issue_programs[slug]['issue_number']}, appending issue number\")\n slug = f\"{slug}-{number}\"\n # Write issue body to a temp file so the scheduling loop can process it\n issue_file = os.path.join(issue_programs_dir, f\"{slug}.md\")\n with open(issue_file, \"w\") as f:\n f.write(body)\n program_files.append(issue_file)\n issue_programs[slug] = {\"issue_number\": number, \"file\": issue_file, \"title\": title}\n print(f\" Found issue-based program: '{slug}' (issue #{number})\")\nexcept Exception as e:\n print(f\" Warning: could not fetch issue-based programs: {e}\")\n\nif not program_files:\n # Fallback to single-file locations\n for path in [\".autoloop/program.md\", \"program.md\"]:\n if os.path.isfile(path):\n program_files = [path]\n break\n\nif not program_files:\n print(\"NO_PROGRAMS_FOUND\")\n os.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n with open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n json.dump({\"due\": [], \"skipped\": [], \"unconfigured\": [], \"no_programs\": True}, f)\n sys.exit(0)\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nnow = datetime.now(timezone.utc)\ndue = []\nskipped = []\nunconfigured = []\nall_programs = {} # name -> file path (populated during scanning)\n\n# Schedule string to timedelta\ndef parse_schedule(s):\n s = s.strip().lower()\n m = re.match(r\"every\\s+(\\d+)\\s*h\", s)\n if m:\n return timedelta(hours=int(m.group(1)))\n m = re.match(r\"every\\s+(\\d+)\\s*m\", s)\n if m:\n return timedelta(minutes=int(m.group(1)))\n if s == \"daily\":\n return timedelta(hours=24)\n if s == \"weekly\":\n return timedelta(days=7)\n return None # No per-program schedule — always due\n\ndef get_program_name(pf):\n \"\"\"Extract program name from file path.\n Directory-based: .autoloop/programs//program.md -> \n Bare markdown: .autoloop/programs/.md -> \n Issue-based: /tmp/gh-aw/issue-programs/.md -> \n \"\"\"\n if pf.endswith(\"/program.md\"):\n # Directory-based program: name is the parent directory\n return os.path.basename(os.path.dirname(pf))\n else:\n # Bare markdown or issue-based program: name is the filename without .md\n return os.path.splitext(os.path.basename(pf))[0]\n\nfor pf in program_files:\n name = get_program_name(pf)\n all_programs[name] = pf\n with open(pf) as f:\n content = f.read()\n\n # Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM)\n if \"\" in content:\n unconfigured.append(name)\n continue\n\n # Check for TODO/REPLACE placeholders\n if re.search(r'\\bTODO\\b|\\bREPLACE', content):\n unconfigured.append(name)\n continue\n\n # Parse optional YAML frontmatter for schedule and target-metric\n # Strip leading HTML comments before checking (issue-based programs may have them)\n content_stripped = re.sub(r'^(\\s*\\s*\\n)*', '', content, flags=re.DOTALL)\n schedule_delta = None\n target_metric = None\n fm_match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", content_stripped, re.DOTALL)\n if fm_match:\n for line in fm_match.group(1).split(\"\\n\"):\n if line.strip().startswith(\"schedule:\"):\n schedule_str = line.split(\":\", 1)[1].strip()\n schedule_delta = parse_schedule(schedule_str)\n if line.strip().startswith(\"target-metric:\"):\n try:\n target_metric = float(line.split(\":\", 1)[1].strip())\n except (ValueError, TypeError):\n print(f\" Warning: {name} has invalid target-metric value: {line.split(':', 1)[1].strip()}\")\n\n # Read state from repo-memory\n state = read_program_state(name)\n if state:\n print(f\" {name}: last_run={state.get('last_run')}, iteration_count={state.get('iteration_count')}\")\n else:\n print(f\" {name}: no state found (first run)\")\n\n last_run = None\n lr = state.get(\"last_run\")\n if lr:\n try:\n last_run = datetime.fromisoformat(lr.replace(\"Z\", \"+00:00\"))\n except ValueError:\n pass\n\n # Check if completed (target metric was reached)\n if str(state.get(\"completed\", \"\")).lower() == \"true\":\n skipped.append({\"name\": name, \"reason\": f\"completed: target metric reached\"})\n continue\n\n # Check if paused (e.g., plateau or recurring errors)\n if state.get(\"paused\"):\n skipped.append({\"name\": name, \"reason\": f\"paused: {state.get('pause_reason', 'unknown')}\"})\n continue\n\n # Auto-pause on plateau: 5+ consecutive rejections\n recent = state.get(\"recent_statuses\", [])[-5:]\n if len(recent) >= 5 and all(s == \"rejected\" for s in recent):\n skipped.append({\"name\": name, \"reason\": \"plateau: 5 consecutive rejections\"})\n continue\n\n # Check if due based on per-program schedule\n if schedule_delta and last_run:\n if now - last_run < schedule_delta:\n skipped.append({\"name\": name, \"reason\": \"not due yet\",\n \"next_due\": (last_run + schedule_delta).isoformat()})\n continue\n\n due.append({\"name\": name, \"last_run\": lr, \"file\": pf, \"target_metric\": target_metric})\n\n# Pick the program to run\nselected = None\nselected_file = None\nselected_issue = None\nselected_target_metric = None\ndeferred = []\n\nif forced_program:\n # Manual dispatch requested a specific program — bypass scheduling\n # (paused, not-due, and plateau programs can still be forced)\n if forced_program not in all_programs:\n print(f\"ERROR: requested program '{forced_program}' not found.\")\n print(f\" Available programs: {list(all_programs.keys())}\")\n sys.exit(1)\n if forced_program in unconfigured:\n print(f\"ERROR: requested program '{forced_program}' is unconfigured (has placeholders).\")\n sys.exit(1)\n selected = forced_program\n selected_file = all_programs[forced_program]\n deferred = [p[\"name\"] for p in due if p[\"name\"] != forced_program]\n if selected in issue_programs:\n selected_issue = issue_programs[selected][\"issue_number\"]\n # Find target_metric: check the due list first, then parse from the program file\n for p in due:\n if p[\"name\"] == forced_program:\n selected_target_metric = p.get(\"target_metric\")\n break\n if selected_target_metric is None:\n # Program may have been skipped (completed/paused/plateau) — parse directly\n try:\n with open(selected_file) as _f:\n _content = _f.read()\n _content_stripped = re.sub(r'^(\\s*\\s*\\n)*', '', _content, flags=re.DOTALL)\n _fm = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", _content_stripped, re.DOTALL)\n if _fm:\n for _line in _fm.group(1).split(\"\\n\"):\n if _line.strip().startswith(\"target-metric:\"):\n selected_target_metric = float(_line.split(\":\", 1)[1].strip())\n break\n except (OSError, ValueError, TypeError):\n pass\n print(f\"FORCED: running program '{forced_program}' (manual dispatch)\")\nelif due:\n # Normal scheduling: pick the single most-overdue program\n due.sort(key=lambda p: p[\"last_run\"] or \"\") # None/empty sorts first (never run)\n selected = due[0][\"name\"]\n selected_file = due[0][\"file\"]\n selected_target_metric = due[0].get(\"target_metric\")\n deferred = [p[\"name\"] for p in due[1:]]\n # Check if the selected program is issue-based\n if selected in issue_programs:\n selected_issue = issue_programs[selected][\"issue_number\"]\n\nresult = {\n \"selected\": selected,\n \"selected_file\": selected_file,\n \"selected_issue\": selected_issue,\n \"selected_target_metric\": selected_target_metric,\n \"issue_programs\": {name: info[\"issue_number\"] for name, info in issue_programs.items()},\n \"deferred\": deferred,\n \"skipped\": skipped,\n \"unconfigured\": unconfigured,\n \"no_programs\": False,\n}\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nwith open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n json.dump(result, f, indent=2)\n\nprint(\"=== Autoloop Program Check ===\")\nprint(f\"Selected program: {selected or '(none)'} ({selected_file or 'n/a'})\")\nprint(f\"Deferred (next run): {deferred or '(none)'}\")\nprint(f\"Programs skipped: {[s['name'] for s in skipped] or '(none)'}\")\nprint(f\"Programs unconfigured: {unconfigured or '(none)'}\")\n\nif not selected and not unconfigured:\n print(\"\\nNo programs due this run. Exiting early.\")\n sys.exit(1) # Non-zero exit skips the agent step\nPYEOF\n"
+ run: "python3 - << 'PYEOF'\nimport os, json, re, glob, sys\nimport urllib.request, urllib.error\nfrom datetime import datetime, timezone, timedelta\n\nprograms_dir = \".autoloop/programs\"\nautoloop_dir = \".autoloop/programs\"\ntemplate_file = os.path.join(autoloop_dir, \"example.md\")\n\n# Read program state from repo-memory (persistent git-backed storage)\ngithub_token = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_program = os.environ.get(\"AUTOLOOP_PROGRAM\", \"\").strip()\n\n# Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id}\n# is derived from the branch-name configured in the tools section (memory/autoloop → autoloop)\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/autoloop\"\n\ndef parse_machine_state(content):\n \"\"\"Parse the ⚙️ Machine State table from a state file. Returns a dict.\"\"\"\n state = {}\n m = re.search(r'## ⚙️ Machine State.*?\\n(.*?)(?=\\n## |\\Z)', content, re.DOTALL)\n if not m:\n return state\n section = m.group(0)\n for row in re.finditer(r'\\|\\s*(.+?)\\s*\\|\\s*(.+?)\\s*\\|', section):\n raw_key = row.group(1).strip()\n raw_val = row.group(2).strip()\n if raw_key.lower() in (\"field\", \"---\", \":---\", \":---:\", \"---:\"):\n continue\n key = raw_key.lower().replace(\" \", \"_\")\n val = None if raw_val in (\"—\", \"-\", \"\") else raw_val\n state[key] = val\n # Coerce types\n for int_field in (\"iteration_count\", \"consecutive_errors\"):\n if int_field in state:\n try:\n state[int_field] = int(state[int_field])\n except (ValueError, TypeError):\n state[int_field] = 0\n if \"paused\" in state:\n state[\"paused\"] = str(state.get(\"paused\", \"\")).lower() == \"true\"\n if \"completed\" in state:\n state[\"completed\"] = str(state.get(\"completed\", \"\")).lower() == \"true\"\n # recent_statuses: stored as comma-separated words (e.g. \"accepted, rejected, error\")\n rs_raw = state.get(\"recent_statuses\") or \"\"\n if rs_raw:\n state[\"recent_statuses\"] = [s.strip().lower() for s in rs_raw.split(\",\") if s.strip()]\n else:\n state[\"recent_statuses\"] = []\n return state\n\ndef read_program_state(program_name):\n \"\"\"Read scheduling state from the repo-memory state file.\"\"\"\n state_file = os.path.join(repo_memory_dir, f\"{program_name}.md\")\n if not os.path.isfile(state_file):\n print(f\" {program_name}: no state file found (first run)\")\n return {}\n with open(state_file, encoding=\"utf-8\") as f:\n content = f.read()\n return parse_machine_state(content)\n\n# Bootstrap: create autoloop programs directory and template if missing\nif not os.path.isdir(autoloop_dir):\n os.makedirs(autoloop_dir, exist_ok=True)\n bt = chr(96) # backtick — avoid literal backticks that break gh-aw compiler\n template = \"\\n\".join([\n \"\",\n \"\",\n \"\",\n \"\",\n \"# Autoloop Program\",\n \"\",\n \"\",\n \"\",\n \"## Goal\",\n \"\",\n \"\",\n \"\",\n \"REPLACE THIS with your optimization goal.\",\n \"\",\n \"## Target\",\n \"\",\n \"\",\n \"\",\n \"Only modify these files:\",\n f\"- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)\",\n \"\",\n \"Do NOT modify:\",\n \"- (list files that must not be touched)\",\n \"\",\n \"## Evaluation\",\n \"\",\n \"\",\n \"\",\n f\"{bt}{bt}{bt}bash\",\n \"REPLACE_WITH_YOUR_EVALUATION_COMMAND\",\n f\"{bt}{bt}{bt}\",\n \"\",\n f\"The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)\",\n \"\",\n ])\n with open(template_file, \"w\") as f:\n f.write(template)\n # Leave the template unstaged — the agent will create a draft PR with it\n print(f\"BOOTSTRAPPED: created {template_file} locally (agent will create a draft PR)\")\n\n# Find all program files from all locations:\n# 1. Directory-based programs: .autoloop/programs//program.md (preferred)\n# 2. Bare markdown programs: .autoloop/programs/.md (simple)\n# 3. Issue-based programs: GitHub issues with the 'autoloop-program' label\nprogram_files = []\nissue_programs = {} # name -> {issue_number, file}\n\n# Scan .autoloop/programs/ for directory-based programs\nif os.path.isdir(programs_dir):\n for entry in sorted(os.listdir(programs_dir)):\n prog_dir = os.path.join(programs_dir, entry)\n if os.path.isdir(prog_dir):\n # Look for program.md inside the directory\n prog_file = os.path.join(prog_dir, \"program.md\")\n if os.path.isfile(prog_file):\n program_files.append(prog_file)\n\n# Scan .autoloop/programs/ for bare markdown programs\nbare_programs = sorted(glob.glob(os.path.join(autoloop_dir, \"*.md\")))\nfor pf in bare_programs:\n program_files.append(pf)\n\n# Scan GitHub issues with the 'autoloop-program' label\nissue_programs_dir = \"/tmp/gh-aw/issue-programs\"\nos.makedirs(issue_programs_dir, exist_ok=True)\ntry:\n api_url = f\"https://api.github.com/repos/{repo}/issues?labels=autoloop-program&state=open&per_page=100\"\n req = urllib.request.Request(api_url, headers={\n \"Authorization\": f\"token {github_token}\",\n \"Accept\": \"application/vnd.github.v3+json\",\n })\n with urllib.request.urlopen(req, timeout=30) as resp:\n issues = json.loads(resp.read().decode())\n for issue in issues:\n if issue.get(\"pull_request\"):\n continue # skip PRs\n body = issue.get(\"body\") or \"\"\n title = issue.get(\"title\") or \"\"\n number = issue[\"number\"]\n # Derive program name from issue title: slugify to lowercase with hyphens\n slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')\n slug = re.sub(r'-+', '-', slug) # collapse consecutive hyphens\n if not slug:\n slug = f\"issue-{number}\"\n # Avoid slug collisions: if another issue already claimed this slug, append issue number\n if slug in issue_programs:\n print(f\" Warning: slug '{slug}' (issue #{number}) collides with issue #{issue_programs[slug]['issue_number']}, appending issue number\")\n slug = f\"{slug}-{number}\"\n # Write issue body to a temp file so the scheduling loop can process it\n issue_file = os.path.join(issue_programs_dir, f\"{slug}.md\")\n with open(issue_file, \"w\") as f:\n f.write(body)\n program_files.append(issue_file)\n issue_programs[slug] = {\"issue_number\": number, \"file\": issue_file, \"title\": title}\n print(f\" Found issue-based program: '{slug}' (issue #{number})\")\nexcept Exception as e:\n print(f\" Warning: could not fetch issue-based programs: {e}\")\n\nif not program_files:\n # Fallback to single-file locations\n for path in [\".autoloop/program.md\", \"program.md\"]:\n if os.path.isfile(path):\n program_files = [path]\n break\n\nif not program_files:\n print(\"NO_PROGRAMS_FOUND\")\n os.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n with open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n json.dump({\"due\": [], \"skipped\": [], \"unconfigured\": [], \"no_programs\": True}, f)\n sys.exit(0)\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nnow = datetime.now(timezone.utc)\ndue = []\nskipped = []\nunconfigured = []\nall_programs = {} # name -> file path (populated during scanning)\n\n# Schedule string to timedelta\ndef parse_schedule(s):\n s = s.strip().lower()\n m = re.match(r\"every\\s+(\\d+)\\s*h\", s)\n if m:\n return timedelta(hours=int(m.group(1)))\n m = re.match(r\"every\\s+(\\d+)\\s*m\", s)\n if m:\n return timedelta(minutes=int(m.group(1)))\n if s == \"daily\":\n return timedelta(hours=24)\n if s == \"weekly\":\n return timedelta(days=7)\n return None # No per-program schedule — always due\n\ndef get_program_name(pf):\n \"\"\"Extract program name from file path.\n Directory-based: .autoloop/programs//program.md -> \n Bare markdown: .autoloop/programs/.md -> \n Issue-based: /tmp/gh-aw/issue-programs/.md -> \n \"\"\"\n if pf.endswith(\"/program.md\"):\n # Directory-based program: name is the parent directory\n return os.path.basename(os.path.dirname(pf))\n else:\n # Bare markdown or issue-based program: name is the filename without .md\n return os.path.splitext(os.path.basename(pf))[0]\n\nfor pf in program_files:\n name = get_program_name(pf)\n all_programs[name] = pf\n with open(pf) as f:\n content = f.read()\n\n # Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM)\n if \"\" in content:\n unconfigured.append(name)\n continue\n\n # Check for TODO/REPLACE placeholders\n if re.search(r'\\bTODO\\b|\\bREPLACE', content):\n unconfigured.append(name)\n continue\n\n # Parse optional YAML frontmatter for schedule and target-metric\n # Strip leading HTML comments before checking (issue-based programs may have them)\n content_stripped = re.sub(r'^(\\s*\\s*\\n)*', '', content, flags=re.DOTALL)\n schedule_delta = None\n target_metric = None\n fm_match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", content_stripped, re.DOTALL)\n if fm_match:\n for line in fm_match.group(1).split(\"\\n\"):\n if line.strip().startswith(\"schedule:\"):\n schedule_str = line.split(\":\", 1)[1].strip()\n schedule_delta = parse_schedule(schedule_str)\n if line.strip().startswith(\"target-metric:\"):\n try:\n target_metric = float(line.split(\":\", 1)[1].strip())\n except (ValueError, TypeError):\n print(f\" Warning: {name} has invalid target-metric value: {line.split(':', 1)[1].strip()}\")\n\n # Read state from repo-memory\n state = read_program_state(name)\n if state:\n print(f\" {name}: last_run={state.get('last_run')}, iteration_count={state.get('iteration_count')}\")\n else:\n print(f\" {name}: no state found (first run)\")\n\n last_run = None\n lr = state.get(\"last_run\")\n if lr:\n try:\n last_run = datetime.fromisoformat(lr.replace(\"Z\", \"+00:00\"))\n except ValueError:\n pass\n\n # Check if completed (target metric was reached)\n if str(state.get(\"completed\", \"\")).lower() == \"true\":\n skipped.append({\"name\": name, \"reason\": f\"completed: target metric reached\"})\n continue\n\n # Check if paused (e.g., plateau or recurring errors)\n if state.get(\"paused\"):\n skipped.append({\"name\": name, \"reason\": f\"paused: {state.get('pause_reason', 'unknown')}\"})\n continue\n\n # Auto-pause on plateau: 5+ consecutive rejections\n recent = state.get(\"recent_statuses\", [])[-5:]\n if len(recent) >= 5 and all(s == \"rejected\" for s in recent):\n skipped.append({\"name\": name, \"reason\": \"plateau: 5 consecutive rejections\"})\n continue\n\n # Check if due based on per-program schedule\n if schedule_delta and last_run:\n if now - last_run < schedule_delta:\n skipped.append({\"name\": name, \"reason\": \"not due yet\",\n \"next_due\": (last_run + schedule_delta).isoformat()})\n continue\n\n due.append({\"name\": name, \"last_run\": lr, \"file\": pf, \"target_metric\": target_metric})\n\n# Pick the program to run\nselected = None\nselected_file = None\nselected_issue = None\nselected_target_metric = None\ndeferred = []\n\nif forced_program:\n # Manual dispatch requested a specific program — bypass scheduling\n # (paused, not-due, and plateau programs can still be forced)\n if forced_program not in all_programs:\n print(f\"ERROR: requested program '{forced_program}' not found.\")\n print(f\" Available programs: {list(all_programs.keys())}\")\n sys.exit(1)\n if forced_program in unconfigured:\n print(f\"ERROR: requested program '{forced_program}' is unconfigured (has placeholders).\")\n sys.exit(1)\n selected = forced_program\n selected_file = all_programs[forced_program]\n deferred = [p[\"name\"] for p in due if p[\"name\"] != forced_program]\n if selected in issue_programs:\n selected_issue = issue_programs[selected][\"issue_number\"]\n # Find target_metric: check the due list first, then parse from the program file\n for p in due:\n if p[\"name\"] == forced_program:\n selected_target_metric = p.get(\"target_metric\")\n break\n if selected_target_metric is None:\n # Program may have been skipped (completed/paused/plateau) — parse directly\n try:\n with open(selected_file) as _f:\n _content = _f.read()\n _content_stripped = re.sub(r'^(\\s*\\s*\\n)*', '', _content, flags=re.DOTALL)\n _fm = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", _content_stripped, re.DOTALL)\n if _fm:\n for _line in _fm.group(1).split(\"\\n\"):\n if _line.strip().startswith(\"target-metric:\"):\n selected_target_metric = float(_line.split(\":\", 1)[1].strip())\n break\n except (OSError, ValueError, TypeError):\n pass\n print(f\"FORCED: running program '{forced_program}' (manual dispatch)\")\nelif due:\n # Normal scheduling: pick the single most-overdue program\n due.sort(key=lambda p: p[\"last_run\"] or \"\") # None/empty sorts first (never run)\n selected = due[0][\"name\"]\n selected_file = due[0][\"file\"]\n selected_target_metric = due[0].get(\"target_metric\")\n deferred = [p[\"name\"] for p in due[1:]]\n # Check if the selected program is issue-based\n if selected in issue_programs:\n selected_issue = issue_programs[selected][\"issue_number\"]\n\n# Look up existing PR for the selected program's canonical branch\nexisting_pr = None\nhead_branch = None\nif selected:\n head_branch = f\"autoloop/{selected}\"\n owner = repo.split(\"/\")[0] if \"/\" in repo else \"\"\n if owner:\n try:\n pr_api_url = (\n f\"https://api.github.com/repos/{repo}/pulls\"\n f\"?state=open&head={owner}:{head_branch}&per_page=5\"\n )\n pr_req = urllib.request.Request(pr_api_url, headers={\n \"Authorization\": f\"token {github_token}\",\n \"Accept\": \"application/vnd.github.v3+json\",\n })\n with urllib.request.urlopen(pr_req, timeout=30) as pr_resp:\n open_prs = json.loads(pr_resp.read().decode())\n if open_prs:\n existing_pr = open_prs[0][\"number\"]\n print(f\" Found existing PR #{existing_pr} for branch {head_branch}\")\n else:\n print(f\" No existing PR found for branch {head_branch}\")\n except Exception as e:\n print(f\" Warning: could not check for existing PRs: {e}\")\n else:\n print(f\" Warning: could not parse owner from GITHUB_REPOSITORY='{repo}'\")\n\n # Also check the state file for a recorded PR number as fallback\n if existing_pr is None:\n state = read_program_state(selected)\n pr_field = state.get(\"pr\") or \"\"\n pr_match = re.match(r'^#?(\\d+)$', pr_field.strip())\n if pr_match:\n existing_pr = int(pr_match.group(1))\n print(f\" Found PR #{existing_pr} from state file for {selected}\")\n\nresult = {\n \"selected\": selected,\n \"selected_file\": selected_file,\n \"selected_issue\": selected_issue,\n \"selected_target_metric\": selected_target_metric,\n \"existing_pr\": existing_pr,\n \"head_branch\": head_branch,\n \"issue_programs\": {name: info[\"issue_number\"] for name, info in issue_programs.items()},\n \"deferred\": deferred,\n \"skipped\": skipped,\n \"unconfigured\": unconfigured,\n \"no_programs\": False,\n}\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nwith open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n json.dump(result, f, indent=2)\n\nprint(\"=== Autoloop Program Check ===\")\nprint(f\"Selected program: {selected or '(none)'} ({selected_file or 'n/a'})\")\nif existing_pr:\n print(f\"Existing PR: #{existing_pr} (branch: {head_branch})\")\nelse:\n print(f\"Existing PR: (none — will create on first accepted iteration)\")\nprint(f\"Deferred (next run): {deferred or '(none)'}\")\nprint(f\"Programs skipped: {[s['name'] for s in skipped] or '(none)'}\")\nprint(f\"Programs unconfigured: {unconfigured or '(none)'}\")\n\nif not selected and not unconfigured:\n print(\"\\nNo programs due this run. Exiting early.\")\n sys.exit(1) # Non-zero exit skips the agent step\nPYEOF\n"
# Repo memory git-based storage configuration from frontmatter processed below
- name: Clone repo-memory branch (default)
@@ -498,12 +498,12 @@ jobs:
mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
- cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_05418b5b293ba2f8_EOF'
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_02af14f48bc5ba75_EOF'
{"add_comment":{"hide_older_comments":false,"max":7,"target":"*"},"add_labels":{"max":2,"target":"*"},"create_issue":{"labels":["automation","autoloop"],"max":2,"title_prefix":"[Autoloop] "},"create_pull_request":{"draft":true,"labels":["automation","autoloop"],"max":1,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_files_policy":"fallback-to-issue","protected_path_prefixes":[".github/",".agents/"],"title_prefix":"[Autoloop] "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":30720,"max_patch_size":10240}]},"push_to_pull_request_branch":{"if_no_changes":"warn","max":1,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"],"target":"*","title_prefix":"[Autoloop] "},"remove_labels":{"max":2,"target":"*"},"update_issue":{"allow_body":true,"max":3,"target":"*","title_prefix":"[Autoloop] "}}
- GH_AW_SAFE_OUTPUTS_CONFIG_05418b5b293ba2f8_EOF
+ GH_AW_SAFE_OUTPUTS_CONFIG_02af14f48bc5ba75_EOF
- name: Write Safe Outputs Tools
run: |
- cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e9f0c97ff8e4b848_EOF'
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e09cbb8e5f09e2a3_EOF'
{
"description_suffixes": {
"add_comment": " CONSTRAINTS: Maximum 7 comment(s) can be added. Target: *.",
@@ -517,8 +517,8 @@ jobs:
"repo_params": {},
"dynamic_tools": []
}
- GH_AW_SAFE_OUTPUTS_TOOLS_META_e9f0c97ff8e4b848_EOF
- cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_c2504b2536e4b3d6_EOF'
+ GH_AW_SAFE_OUTPUTS_TOOLS_META_e09cbb8e5f09e2a3_EOF
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_ecaf271fbfb920d8_EOF'
{
"add_comment": {
"defaultMax": 1,
@@ -777,7 +777,7 @@ jobs:
"customValidation": "requiresOneOf:status,title,body"
}
}
- GH_AW_SAFE_OUTPUTS_VALIDATION_c2504b2536e4b3d6_EOF
+ GH_AW_SAFE_OUTPUTS_VALIDATION_ecaf271fbfb920d8_EOF
node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs
- name: Generate Safe Outputs MCP Server Config
id: safe-outputs-config
@@ -847,7 +847,7 @@ jobs:
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11'
mkdir -p /home/runner/.copilot
- cat << GH_AW_MCP_CONFIG_be8a945ee3e28a8b_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+ cat << GH_AW_MCP_CONFIG_757354268663f6b1_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
{
"mcpServers": {
"github": {
@@ -888,7 +888,7 @@ jobs:
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
}
}
- GH_AW_MCP_CONFIG_be8a945ee3e28a8b_EOF
+ GH_AW_MCP_CONFIG_757354268663f6b1_EOF
- name: Download activation artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
diff --git a/.github/workflows/autoloop.md b/.github/workflows/autoloop.md
index f3e796cc..ddc456a7 100644
--- a/.github/workflows/autoloop.md
+++ b/.github/workflows/autoloop.md
@@ -431,11 +431,50 @@ steps:
if selected in issue_programs:
selected_issue = issue_programs[selected]["issue_number"]
+ # Look up existing PR for the selected program's canonical branch
+ existing_pr = None
+ head_branch = None
+ if selected:
+ head_branch = f"autoloop/{selected}"
+ owner = repo.split("/")[0] if "/" in repo else ""
+ if owner:
+ try:
+ pr_api_url = (
+ f"https://api.github.com/repos/{repo}/pulls"
+ f"?state=open&head={owner}:{head_branch}&per_page=5"
+ )
+ pr_req = urllib.request.Request(pr_api_url, headers={
+ "Authorization": f"token {github_token}",
+ "Accept": "application/vnd.github.v3+json",
+ })
+ with urllib.request.urlopen(pr_req, timeout=30) as pr_resp:
+ open_prs = json.loads(pr_resp.read().decode())
+ if open_prs:
+ existing_pr = open_prs[0]["number"]
+ print(f" Found existing PR #{existing_pr} for branch {head_branch}")
+ else:
+ print(f" No existing PR found for branch {head_branch}")
+ except Exception as e:
+ print(f" Warning: could not check for existing PRs: {e}")
+ else:
+ print(f" Warning: could not parse owner from GITHUB_REPOSITORY='{repo}'")
+
+ # Also check the state file for a recorded PR number as fallback
+ if existing_pr is None:
+ state = read_program_state(selected)
+ pr_field = state.get("pr") or ""
+ pr_match = re.match(r'^#?(\d+)$', pr_field.strip())
+ if pr_match:
+ existing_pr = int(pr_match.group(1))
+ print(f" Found PR #{existing_pr} from state file for {selected}")
+
result = {
"selected": selected,
"selected_file": selected_file,
"selected_issue": selected_issue,
"selected_target_metric": selected_target_metric,
+ "existing_pr": existing_pr,
+ "head_branch": head_branch,
"issue_programs": {name: info["issue_number"] for name, info in issue_programs.items()},
"deferred": deferred,
"skipped": skipped,
@@ -449,6 +488,10 @@ steps:
print("=== Autoloop Program Check ===")
print(f"Selected program: {selected or '(none)'} ({selected_file or 'n/a'})")
+ if existing_pr:
+ print(f"Existing PR: #{existing_pr} (branch: {head_branch})")
+ else:
+ print(f"Existing PR: (none — will create on first accepted iteration)")
print(f"Deferred (next run): {deferred or '(none)'}")
print(f"Programs skipped: {[s['name'] for s in skipped] or '(none)'}")
print(f"Programs unconfigured: {unconfigured or '(none)'}")
@@ -538,6 +581,8 @@ The pre-step has already determined which program to run. Read `/tmp/gh-aw/autol
- **`selected_file`**: The full path to the program's markdown file (either `.autoloop/programs//program.md`, `.autoloop/programs/.md`, or `/tmp/gh-aw/issue-programs/.md` for issue-based programs).
- **`selected_issue`**: The GitHub issue number if the selected program came from an issue, or `null` if it came from a file.
- **`selected_target_metric`**: The `target-metric` value from the program's frontmatter (a number), or `null` if the program is open-ended. Used to check the [halting condition](#halting-condition) after each accepted iteration.
+- **`existing_pr`**: The PR number (e.g., `42`) of an already-open PR for this program's branch, or `null` if no open PR exists. **If this is not null, you MUST use `push-to-pull-request-branch` to push to this PR — do NOT call `create-pull-request`.**
+- **`head_branch`**: The canonical branch name for this program (e.g., `autoloop/coverage`). Always use this exact branch name — never append suffixes.
- **`issue_programs`**: A mapping of program name → issue number for all discovered issue-based programs.
- **`deferred`**: Other programs that were due but will be handled in future runs.
- **`unconfigured`**: Programs that still have the sentinel or placeholder content.
@@ -550,6 +595,7 @@ If `selected` is not null:
3. Read the current state of all target files.
4. Read the state file `{selected}.md` from the repo-memory folder for all state: the ⚙️ Machine State table (scheduling fields) plus the research sections (priorities, lessons, foreclosed avenues, iteration history).
5. If `selected_issue` is not null, this is an issue-based program — also read the issue comments for any human steering input.
+6. **Check `existing_pr`**: if it is not null, a PR already exists — use `push-to-pull-request-branch` to push commits to it. Only call `create-pull-request` when `existing_pr` is null.
## Multiple Programs
@@ -694,7 +740,7 @@ Each run executes **one iteration for the single selected program**:
If the state file does not yet exist, create it in the repo-memory folder using the template defined in the [Repo Memory](#repo-memory) section.
-3. Note the `PR` field from the Machine State table. If it contains a PR number (e.g., `#42`), that is the **existing draft PR** for this program — you must update it, not create a new one.
+3. Note the `existing_pr` field from `/tmp/gh-aw/autoloop.json`. If it is not null, that is the **existing draft PR** for this program — you must push to it using `push-to-pull-request-branch`, not create a new one. Also check the `PR` field from the Machine State table as a fallback.
### Step 2: Analyze and Propose
@@ -743,15 +789,15 @@ Each run executes **one iteration for the single selected program**:
- Commit message body (after a blank line): `Run: {run_url}` referencing the GitHub Actions run URL.
2. Push the commit to the long-running branch `autoloop/{program-name}`.
3. **Find the existing PR or create one** — follow these steps in order:
- a. Check the `PR` field in the state file's **⚙️ Machine State** table. If it contains a PR number (e.g., `#42`), that is the existing draft PR.
- b. If the state file has no PR number, search GitHub for open PRs with head branch `autoloop/{program-name}`. Use the GitHub API: `GET /repos/{owner}/{repo}/pulls?state=open&head={owner}:autoloop/{program-name}`.
- c. **If an existing PR is found** (from either step a or b): use `push-to-pull-request-branch` to push additional commits to the existing PR. Update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, and a link to the actions run. **Do NOT call `create-pull-request`.**
- d. **If NO PR exists** for `autoloop/{program-name}`: create one using `create-pull-request`:
+ a. **First, check `existing_pr` from `/tmp/gh-aw/autoloop.json`.** The pre-step has already looked up the open PR for this program. If `existing_pr` is not null, that is the existing draft PR — skip to step (c).
+ b. If `existing_pr` is null, also check the `PR` field in the state file's **⚙️ Machine State** table as a fallback. If it contains a PR number (e.g., `#42`), verify it is still open via the GitHub API.
+ c. **If an existing PR is found** (from step a or b): use `push-to-pull-request-branch` to push additional commits to the existing PR. Update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, and a link to the actions run. **Do NOT call `create-pull-request`.**
+ d. **If NO PR exists** for `autoloop/{program-name}` (both `existing_pr` is null AND the state file has no PR): create one using `create-pull-request`:
- Branch: `autoloop/{program-name}` (the branch you already created in Step 3 — do NOT let the framework auto-generate a branch name)
- Title: `[Autoloop: {program-name}]`
- Body includes: a summary of the program goal, link to the steering issue, the current best metric, and AI disclosure: `🤖 *This PR is maintained by Autoloop. Each accepted iteration adds a commit to this branch.*`
- > ⚠️ **Never create a new PR if one already exists for `autoloop/{program-name}`.** Each program must have exactly one draft PR at any time. If you are unsure whether a PR exists, check the GitHub API before calling `create-pull-request`.
+ > ⚠️ **Never create a new PR if one already exists for `autoloop/{program-name}`.** Each program must have exactly one draft PR at any time. The pre-step provides `existing_pr` in autoloop.json — always check it first. Only call `create-pull-request` when `existing_pr` is null AND the state file has no PR number.
4. Ensure the steering issue exists (see [Steering Issue](#steering-issue) below). Add a comment to the steering issue linking to the commit and actions run.
5. Add an entry to the experiment log issue.
6. Update the state file `{program-name}.md` in the repo-memory folder:
@@ -790,6 +836,13 @@ Maintain a single open issue **per program** titled `[Autoloop: {program-name}]
```markdown
🤖 *Autoloop — an iterative optimization agent for this repository.*
+| | |
+|---|---|
+| **Branch** | [`autoloop/{program-name}`](https://github.com/{owner}/{repo}/tree/autoloop/{program-name}) |
+| **Pull Request** | #{pr_number} |
+| **Steering Issue** | #{steering_issue_number} |
+| **State File** | [`{program-name}.md`](https://github.com/{owner}/{repo}/blob/memory/autoloop/{program-name}.md) |
+
## Program
**Goal**: {one-line summary from program.md}
@@ -817,6 +870,7 @@ Maintain a single open issue **per program** titled `[Autoloop: {program-name}]
- Iterations in **reverse chronological order** (newest first).
- Each iteration heading links to its GitHub Actions run.
- Use `${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}` for the current run URL.
+- The **links table at the top** must always show the current branch, PR, steering issue, and state file. Update the PR number when a new PR is created. When creating a continuation issue for a new month, copy the links table from the previous issue.
- Close the previous month's issue and create a new one at month boundaries.
- Maximum 50 iterations per issue; create a continuation issue if exceeded.
@@ -1148,9 +1202,10 @@ After each iteration, prepend an entry to the **📊 Iteration History** section
> ❌ **Do NOT create a new branch with a suffix for each iteration.**
> Correct: `autoloop/coverage`
> Wrong: `autoloop/coverage-abc123`, `autoloop/coverage-iter42`, `autoloop/coverage-deadbeef1234`
+> Use the `head_branch` field from `autoloop.json` — it is always the canonical name.
> ❌ **Do NOT create a new PR if one already exists for `autoloop/{program-name}`.**
-> Always check the state file's `PR` field and the GitHub API before calling `create-pull-request`. If a PR exists, use `push-to-pull-request-branch` instead.
+> The pre-step provides `existing_pr` in `autoloop.json`. If it is not null, **always** use `push-to-pull-request-branch` — never call `create-pull-request`. Only create a PR when `existing_pr` is null AND the state file has no PR number.
> ❌ **Do NOT let the gh-aw framework auto-generate a branch name when creating a PR.**
> Always specify the branch explicitly as `autoloop/{program-name}` when calling `create-pull-request`.
diff --git a/.github/workflows/evergreen.lock.yml b/.github/workflows/evergreen.lock.yml
index 703bf2c5..fbdca9b7 100644
--- a/.github/workflows/evergreen.lock.yml
+++ b/.github/workflows/evergreen.lock.yml
@@ -28,7 +28,7 @@
# Imports:
# - shared/reporting.md
#
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"fdc470cbbb93445cdc31d03533b3983d30603af2cbc207a06bc506a93d186f95","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"6c52512ee1dd9f0c424a7b5af5207b2d89e239e673df6f5ad79911a4820b75ab","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
name: "Evergreen — PR Health Keeper"
"on":
@@ -141,20 +141,20 @@ jobs:
run: |
bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
{
- cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+ cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
- GH_AW_PROMPT_c90257464e463e6f_EOF
+ GH_AW_PROMPT_1c58cbcd2bf82635_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
- cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+ cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
Tools: add_comment(max:3), push_to_pull_request_branch(max:3), missing_tool, missing_data, noop
- GH_AW_PROMPT_c90257464e463e6f_EOF
+ GH_AW_PROMPT_1c58cbcd2bf82635_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_push_to_pr_branch.md"
- cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+ cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
The following GitHub context information is available for this workflow:
@@ -187,13 +187,13 @@ jobs:
- **Note**: If a branch you need is not in the list above and is not listed as an additional fetched ref, it has NOT been checked out. For private repositories you cannot fetch it without proper authentication. If the branch is required and not available, exit with an error and ask the user to add it to the `fetch:` option of the `checkout:` configuration (e.g., `fetch: ["refs/pulls/open/*"]` for all open PR refs, or `fetch: ["main", "feature/my-branch"]` for specific branches).
- GH_AW_PROMPT_c90257464e463e6f_EOF
+ GH_AW_PROMPT_1c58cbcd2bf82635_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
- cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+ cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
{{#runtime-import .github/workflows/shared/reporting.md}}
{{#runtime-import .github/workflows/evergreen.md}}
- GH_AW_PROMPT_c90257464e463e6f_EOF
+ GH_AW_PROMPT_1c58cbcd2bf82635_EOF
} > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@@ -339,7 +339,7 @@ jobs:
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_TOKEN: ${{ github.token }}
name: Find a PR that needs attention
- run: "python3 - << 'PYEOF'\nimport os, json, re, sys\nimport urllib.request, urllib.error\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_pr = os.environ.get(\"FORCED_PR\", \"\").strip()\n\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/evergreen\"\noutput_file = \"/tmp/gh-aw/evergreen.json\"\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n\nMAX_ATTEMPTS = 5\n\ndef api_get(url):\n \"\"\"Make an authenticated GET request to the GitHub API.\"\"\"\n req = urllib.request.Request(url, headers={\n \"Authorization\": f\"token {token}\",\n \"Accept\": \"application/vnd.github.v3+json\",\n })\n with urllib.request.urlopen(req, timeout=30) as resp:\n return json.loads(resp.read().decode())\n\ndef get_all_open_prs():\n \"\"\"Fetch all open PRs, paginated.\"\"\"\n prs = []\n page = 1\n while True:\n url = f\"https://api.github.com/repos/{repo}/pulls?state=open&per_page=100&page={page}&sort=number&direction=asc\"\n batch = api_get(url)\n if not batch:\n break\n prs.extend(batch)\n if len(batch) < 100:\n break\n page += 1\n return prs\n\ndef get_check_status(pr):\n \"\"\"Get combined CI check status for a PR's head commit.\"\"\"\n head_sha = pr[\"head\"][\"sha\"]\n url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/status\"\n try:\n status = api_get(url)\n return status.get(\"state\", \"unknown\")\n except Exception as e:\n print(f\" Warning: could not fetch status for PR #{pr['number']}: {e}\")\n return \"unknown\"\n\ndef get_check_runs(pr):\n \"\"\"Get check runs for a PR's head commit.\"\"\"\n head_sha = pr[\"head\"][\"sha\"]\n url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/check-runs\"\n try:\n data = api_get(url)\n return data.get(\"check_runs\", [])\n except Exception as e:\n print(f\" Warning: could not fetch check runs for PR #{pr['number']}: {e}\")\n return []\n\ndef read_attempt_state(pr_number):\n \"\"\"Read attempt tracking state from repo-memory.\"\"\"\n state_file = os.path.join(repo_memory_dir, f\"pr-{pr_number}.md\")\n if not os.path.isfile(state_file):\n return {\"attempts\": 0, \"head_sha\": None}\n with open(state_file, encoding=\"utf-8\") as f:\n content = f.read()\n state = {\"attempts\": 0, \"head_sha\": None}\n m = re.search(r'\\|\\s*head_sha\\s*\\|\\s*(\\S+)\\s*\\|', content)\n if m:\n state[\"head_sha\"] = m.group(1)\n m = re.search(r'\\|\\s*attempts\\s*\\|\\s*(\\d+)\\s*\\|', content)\n if m:\n state[\"attempts\"] = int(m.group(1))\n return state\n\ndef pr_needs_attention(pr):\n \"\"\"Check if a PR has merge conflicts or failing CI. Returns a list of issues.\"\"\"\n issues = []\n\n # Check mergeable state\n # Need to fetch full PR details for mergeable info\n pr_url = f\"https://api.github.com/repos/{repo}/pulls/{pr['number']}\"\n try:\n full_pr = api_get(pr_url)\n mergeable = full_pr.get(\"mergeable\")\n mergeable_state = full_pr.get(\"mergeable_state\", \"unknown\")\n if mergeable is False:\n issues.append(\"merge_conflict\")\n elif mergeable_state == \"dirty\":\n issues.append(\"merge_conflict\")\n except Exception as e:\n print(f\" Warning: could not fetch mergeable state for PR #{pr['number']}: {e}\")\n\n # Check CI status via check runs\n check_runs = get_check_runs(pr)\n failed_checks = []\n for cr in check_runs:\n conclusion = cr.get(\"conclusion\")\n status = cr.get(\"status\")\n name = cr.get(\"name\", \"unknown\")\n if conclusion in (\"failure\", \"timed_out\", \"action_required\"):\n failed_checks.append(name)\n elif status == \"completed\" and conclusion not in (\"success\", \"neutral\", \"skipped\"):\n if conclusion is not None:\n failed_checks.append(name)\n if failed_checks:\n issues.append(f\"failing_checks: {', '.join(failed_checks)}\")\n\n # Also check commit status API (some checks use the older status API)\n combined_status = get_check_status(pr)\n if combined_status == \"failure\":\n if not failed_checks:\n issues.append(\"failing_status\")\n\n return issues\n\n# --- Main logic ---\n\nprint(\"=== Evergreen PR Health Check ===\")\nprint(f\"Repository: {repo}\")\n\nprs = get_all_open_prs()\nprint(f\"Found {len(prs)} open PR(s)\")\n\nif not prs:\n print(\"No open PRs. Exiting.\")\n with open(output_file, \"w\") as f:\n json.dump({\"selected\": None, \"reason\": \"no_open_prs\"}, f)\n sys.exit(1)\n\n# Evaluate each PR deterministically (sorted by PR number ascending)\ncandidates = []\nskipped = []\n\n# If a specific PR is forced, only check that one\nif forced_pr:\n prs = [pr for pr in prs if str(pr[\"number\"]) == forced_pr]\n if not prs:\n print(f\"ERROR: PR #{forced_pr} not found among open PRs.\")\n sys.exit(1)\n print(f\"FORCED: checking only PR #{forced_pr}\")\n\nfor pr in sorted(prs, key=lambda p: p[\"number\"]):\n pr_num = pr[\"number\"]\n head_sha = pr[\"head\"][\"sha\"]\n print(f\"\\nChecking PR #{pr_num}: {pr['title'][:60]}...\")\n print(f\" Head SHA: {head_sha[:12]}\")\n\n issues = pr_needs_attention(pr)\n if not issues:\n print(f\" Status: healthy (no issues)\")\n continue\n\n print(f\" Issues: {issues}\")\n\n # Check attempt tracking\n attempt_state = read_attempt_state(pr_num)\n if attempt_state[\"head_sha\"] == head_sha:\n attempts = attempt_state[\"attempts\"]\n print(f\" Attempts on this SHA: {attempts}/{MAX_ATTEMPTS}\")\n if attempts >= MAX_ATTEMPTS:\n skipped.append({\n \"pr\": pr_num,\n \"reason\": f\"max attempts ({MAX_ATTEMPTS}) reached on SHA {head_sha[:12]}\",\n })\n print(f\" SKIPPED: max attempts reached\")\n continue\n else:\n attempts = 0\n print(f\" New SHA detected — resetting attempt counter\")\n\n candidates.append({\n \"pr_number\": pr_num,\n \"title\": pr[\"title\"],\n \"head_sha\": head_sha,\n \"base_branch\": pr[\"base\"][\"ref\"],\n \"head_branch\": pr[\"head\"][\"ref\"],\n \"issues\": issues,\n \"attempts\": attempts,\n })\n\n# Select the first candidate (lowest PR number — deterministic)\nselected = candidates[0] if candidates else None\n\nresult = {\n \"selected\": selected,\n \"skipped\": skipped,\n \"total_open_prs\": len(prs),\n \"candidates_found\": len(candidates),\n}\n\nwith open(output_file, \"w\") as f:\n json.dump(result, f, indent=2)\n\nif selected:\n print(f\"\\n>>> Selected PR #{selected['pr_number']}: {selected['title']}\")\n print(f\" Issues: {selected['issues']}\")\n print(f\" Attempt: {selected['attempts'] + 1}/{MAX_ATTEMPTS}\")\nelse:\n print(\"\\nNo PRs need attention. Exiting.\")\n sys.exit(1)\nPYEOF\n"
+ run: "python3 - << 'PYEOF'\nimport os, json, re, subprocess, sys\nimport urllib.request, urllib.error\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_pr = os.environ.get(\"FORCED_PR\", \"\").strip()\n\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/evergreen\"\noutput_file = \"/tmp/gh-aw/evergreen.json\"\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n\nMAX_ATTEMPTS = 5\n\ndef api_get(url):\n \"\"\"Make an authenticated GET request to the GitHub API.\"\"\"\n req = urllib.request.Request(url, headers={\n \"Authorization\": f\"token {token}\",\n \"Accept\": \"application/vnd.github.v3+json\",\n })\n with urllib.request.urlopen(req, timeout=30) as resp:\n return json.loads(resp.read().decode())\n\ndef get_all_open_prs():\n \"\"\"Fetch all open PRs, paginated.\"\"\"\n prs = []\n page = 1\n while True:\n url = f\"https://api.github.com/repos/{repo}/pulls?state=open&per_page=100&page={page}&sort=number&direction=asc\"\n batch = api_get(url)\n if not batch:\n break\n prs.extend(batch)\n if len(batch) < 100:\n break\n page += 1\n return prs\n\ndef get_check_status(pr):\n \"\"\"Get combined CI check status for a PR's head commit.\"\"\"\n head_sha = pr[\"head\"][\"sha\"]\n url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/status\"\n try:\n status = api_get(url)\n return status.get(\"state\", \"unknown\")\n except Exception as e:\n print(f\" Warning: could not fetch status for PR #{pr['number']}: {e}\")\n return \"unknown\"\n\ndef get_check_runs(pr):\n \"\"\"Get check runs for a PR's head commit.\"\"\"\n head_sha = pr[\"head\"][\"sha\"]\n url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/check-runs\"\n try:\n data = api_get(url)\n return data.get(\"check_runs\", [])\n except Exception as e:\n print(f\" Warning: could not fetch check runs for PR #{pr['number']}: {e}\")\n return []\n\ndef read_attempt_state(pr_number):\n \"\"\"Read attempt tracking state from repo-memory.\"\"\"\n state_file = os.path.join(repo_memory_dir, f\"pr-{pr_number}.md\")\n if not os.path.isfile(state_file):\n return {\"attempts\": 0, \"head_sha\": None}\n with open(state_file, encoding=\"utf-8\") as f:\n content = f.read()\n state = {\"attempts\": 0, \"head_sha\": None}\n m = re.search(r'\\|\\s*head_sha\\s*\\|\\s*(\\S+)\\s*\\|', content)\n if m:\n state[\"head_sha\"] = m.group(1)\n m = re.search(r'\\|\\s*attempts\\s*\\|\\s*(\\d+)\\s*\\|', content)\n if m:\n state[\"attempts\"] = int(m.group(1))\n return state\n\ndef pr_needs_attention(pr):\n \"\"\"Check if a PR has merge conflicts or failing CI. Returns a list of issues.\"\"\"\n issues = []\n\n # Check mergeable state\n # Need to fetch full PR details for mergeable info\n pr_url = f\"https://api.github.com/repos/{repo}/pulls/{pr['number']}\"\n try:\n full_pr = api_get(pr_url)\n mergeable = full_pr.get(\"mergeable\")\n mergeable_state = full_pr.get(\"mergeable_state\", \"unknown\")\n if mergeable is False:\n issues.append(\"merge_conflict\")\n elif mergeable_state == \"dirty\":\n issues.append(\"merge_conflict\")\n except Exception as e:\n print(f\" Warning: could not fetch mergeable state for PR #{pr['number']}: {e}\")\n\n # Check CI status via check runs\n check_runs = get_check_runs(pr)\n failed_checks = []\n for cr in check_runs:\n conclusion = cr.get(\"conclusion\")\n status = cr.get(\"status\")\n name = cr.get(\"name\", \"unknown\")\n if conclusion in (\"failure\", \"timed_out\", \"action_required\"):\n failed_checks.append(name)\n elif status == \"completed\" and conclusion not in (\"success\", \"neutral\", \"skipped\"):\n if conclusion is not None:\n failed_checks.append(name)\n if failed_checks:\n issues.append(f\"failing_checks: {', '.join(failed_checks)}\")\n\n # Also check commit status API (some checks use the older status API)\n combined_status = get_check_status(pr)\n if combined_status == \"failure\":\n if not failed_checks:\n issues.append(\"failing_status\")\n\n return issues\n\n# --- Main logic ---\n\nprint(\"=== Evergreen PR Health Check ===\")\nprint(f\"Repository: {repo}\")\n\nprs = get_all_open_prs()\nprint(f\"Found {len(prs)} open PR(s)\")\n\nif not prs:\n print(\"No open PRs. Nothing to do.\")\n with open(output_file, \"w\") as f:\n json.dump({\"selected\": None, \"reason\": \"no_open_prs\"}, f)\n sys.exit(0)\n\n# Evaluate each PR deterministically (sorted by PR number ascending)\ncandidates = []\nskipped = []\n\n# If a specific PR is forced, only check that one\nif forced_pr:\n prs = [pr for pr in prs if str(pr[\"number\"]) == forced_pr]\n if not prs:\n print(f\"ERROR: PR #{forced_pr} not found among open PRs.\")\n sys.exit(1)\n print(f\"FORCED: checking only PR #{forced_pr}\")\n\nfor pr in sorted(prs, key=lambda p: p[\"number\"]):\n pr_num = pr[\"number\"]\n head_sha = pr[\"head\"][\"sha\"]\n print(f\"\\nChecking PR #{pr_num}: {pr['title'][:60]}...\")\n print(f\" Head SHA: {head_sha[:12]}\")\n\n issues = pr_needs_attention(pr)\n if not issues:\n print(f\" Status: healthy (no issues)\")\n continue\n\n print(f\" Issues: {issues}\")\n\n # Check attempt tracking\n attempt_state = read_attempt_state(pr_num)\n if attempt_state[\"head_sha\"] == head_sha:\n attempts = attempt_state[\"attempts\"]\n print(f\" Attempts on this SHA: {attempts}/{MAX_ATTEMPTS}\")\n if attempts >= MAX_ATTEMPTS:\n skipped.append({\n \"pr\": pr_num,\n \"reason\": f\"max attempts ({MAX_ATTEMPTS}) reached on SHA {head_sha[:12]}\",\n })\n print(f\" SKIPPED: max attempts reached\")\n continue\n else:\n attempts = 0\n print(f\" New SHA detected — resetting attempt counter\")\n\n candidates.append({\n \"pr_number\": pr_num,\n \"title\": pr[\"title\"],\n \"head_sha\": head_sha,\n \"base_branch\": pr[\"base\"][\"ref\"],\n \"head_branch\": pr[\"head\"][\"ref\"],\n \"issues\": issues,\n \"attempts\": attempts,\n })\n\n# Select the first candidate (lowest PR number — deterministic)\nselected = candidates[0] if candidates else None\n\nresult = {\n \"selected\": selected,\n \"skipped\": skipped,\n \"total_open_prs\": len(prs),\n \"candidates_found\": len(candidates),\n}\n\nwith open(output_file, \"w\") as f:\n json.dump(result, f, indent=2)\n\nif selected:\n branch = selected[\"head_branch\"]\n print(f\"Checking out PR branch before agent run: {branch}\")\n subprocess.check_call([\"git\", \"checkout\", \"-B\", branch, f\"origin/{branch}\"])\n subprocess.check_call([\"git\", \"branch\", \"--set-upstream-to\", f\"origin/{branch}\", branch])\n print(f\"\\n>>> Selected PR #{selected['pr_number']}: {selected['title']}\")\n print(f\" Issues: {selected['issues']}\")\n print(f\" Attempt: {selected['attempts'] + 1}/{MAX_ATTEMPTS}\")\nelse:\n print(\"\\nNo PRs need attention. Nothing to do.\")\n sys.exit(0)\nPYEOF\n"
# Repo memory git-based storage configuration from frontmatter processed below
- name: Clone repo-memory branch (default)
@@ -398,12 +398,12 @@ jobs:
mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
- cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_ec5103758147a5b8_EOF'
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_5d352d3a7dc8ac3d_EOF'
{"add_comment":{"max":3,"target":"*"},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":10240,"max_patch_size":10240}]},"push_to_pull_request_branch":{"if_no_changes":"warn","max":3,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_files_policy":"allowed","protected_path_prefixes":[".github/",".agents/"],"target":"*"}}
- GH_AW_SAFE_OUTPUTS_CONFIG_ec5103758147a5b8_EOF
+ GH_AW_SAFE_OUTPUTS_CONFIG_5d352d3a7dc8ac3d_EOF
- name: Write Safe Outputs Tools
run: |
- cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_0b11521b2b188ecd_EOF'
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e94b9b0d12aa4571_EOF'
{
"description_suffixes": {
"add_comment": " CONSTRAINTS: Maximum 3 comment(s) can be added. Target: *.",
@@ -412,8 +412,8 @@ jobs:
"repo_params": {},
"dynamic_tools": []
}
- GH_AW_SAFE_OUTPUTS_TOOLS_META_0b11521b2b188ecd_EOF
- cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_4f2d89a889ce19de_EOF'
+ GH_AW_SAFE_OUTPUTS_TOOLS_META_e94b9b0d12aa4571_EOF
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_7cc97c0128fe54d3_EOF'
{
"add_comment": {
"defaultMax": 1,
@@ -511,7 +511,7 @@ jobs:
}
}
}
- GH_AW_SAFE_OUTPUTS_VALIDATION_4f2d89a889ce19de_EOF
+ GH_AW_SAFE_OUTPUTS_VALIDATION_7cc97c0128fe54d3_EOF
node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs
- name: Generate Safe Outputs MCP Server Config
id: safe-outputs-config
@@ -581,7 +581,7 @@ jobs:
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11'
mkdir -p /home/runner/.copilot
- cat << GH_AW_MCP_CONFIG_e92a5aad7336713f_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+ cat << GH_AW_MCP_CONFIG_df1a40d4ce900f98_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
{
"mcpServers": {
"github": {
@@ -622,7 +622,7 @@ jobs:
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
}
}
- GH_AW_MCP_CONFIG_e92a5aad7336713f_EOF
+ GH_AW_MCP_CONFIG_df1a40d4ce900f98_EOF
- name: Download activation artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
diff --git a/.github/workflows/evergreen.md b/.github/workflows/evergreen.md
index 481eae1c..147b912b 100644
--- a/.github/workflows/evergreen.md
+++ b/.github/workflows/evergreen.md
@@ -54,7 +54,7 @@ steps:
FORCED_PR: ${{ github.event.inputs.pr_number }}
run: |
python3 - << 'PYEOF'
- import os, json, re, sys
+ import os, json, re, subprocess, sys
import urllib.request, urllib.error
token = os.environ.get("GITHUB_TOKEN", "")
@@ -179,10 +179,10 @@ steps:
print(f"Found {len(prs)} open PR(s)")
if not prs:
- print("No open PRs. Exiting.")
+ print("No open PRs. Nothing to do.")
with open(output_file, "w") as f:
json.dump({"selected": None, "reason": "no_open_prs"}, f)
- sys.exit(1)
+ sys.exit(0)
# Evaluate each PR deterministically (sorted by PR number ascending)
candidates = []
@@ -249,12 +249,16 @@ steps:
json.dump(result, f, indent=2)
if selected:
+ branch = selected["head_branch"]
+ print(f"Checking out PR branch before agent run: {branch}")
+ subprocess.check_call(["git", "checkout", "-B", branch, f"origin/{branch}"])
+ subprocess.check_call(["git", "branch", "--set-upstream-to", f"origin/{branch}", branch])
print(f"\n>>> Selected PR #{selected['pr_number']}: {selected['title']}")
print(f" Issues: {selected['issues']}")
print(f" Attempt: {selected['attempts'] + 1}/{MAX_ATTEMPTS}")
else:
- print("\nNo PRs need attention. Exiting.")
- sys.exit(1)
+ print("\nNo PRs need attention. Nothing to do.")
+ sys.exit(0)
PYEOF
features:
@@ -279,11 +283,9 @@ A pre-flight step has already identified a PR that needs attention. Read the sel
- `selected.base_branch` — the target branch (usually `main`)
- `selected.attempts` — how many times we've already tried on this SHA
-2. **Check out the PR branch** as a local tracking branch so the push tool can find it:
- ```bash
- git checkout -b origin/
- ```
- where `` is `selected.head_branch` from the selection file. **Do not** use a detached HEAD checkout — the `push-to-pull-request-branch` tool requires a named local branch.
+ > If `selected` is `null`, no PRs need attention right now. Call the **noop** tool with a message like "All PRs are healthy — nothing to fix." and stop.
+
+2. The pre-flight step already checks out `selected.head_branch` as a named local tracking branch before you start. Keep working on that branch (do not switch back to `main` or use detached HEAD).
3. **Fix the issues**:
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index a7ede9cd..127a90d6 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -36,6 +36,13 @@ jobs:
- name: Bundle TypeScript compiler for offline playground
run: cp node_modules/typescript/lib/typescript.js ./playground/dist/typescript.js
+ - name: Copy benchmark results to playground
+ run: |
+ mkdir -p ./playground/benchmarks
+ if [ -f benchmarks/results.json ]; then
+ cp benchmarks/results.json ./playground/benchmarks/results.json
+ fi
+
- name: Setup Python
uses: actions/setup-python@v5
with:
diff --git a/.github/workflows/sync-branches.lock.yml b/.github/workflows/sync-branches.lock.yml
index 0183de8c..78f6887e 100644
--- a/.github/workflows/sync-branches.lock.yml
+++ b/.github/workflows/sync-branches.lock.yml
@@ -24,7 +24,7 @@
# Runs whenever the default branch changes and merges it into all active
# autoloop/* branches so that program iterations always build on the latest code.
#
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"719939a8013705db572524495d231d61b5652aa8fa86506426ccbe84aade70e1","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"42baaebd1818fa54f67dfaadbc42d425fcd44388126d27496222c26a7fcdd745","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
name: "Sync Branches"
"on":
@@ -133,13 +133,13 @@ jobs:
run: |
bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
{
- cat << 'GH_AW_PROMPT_c6c6ca09724d546c_EOF'
+ cat << 'GH_AW_PROMPT_6ce21e657f0d715b_EOF'
- GH_AW_PROMPT_c6c6ca09724d546c_EOF
+ GH_AW_PROMPT_6ce21e657f0d715b_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
- cat << 'GH_AW_PROMPT_c6c6ca09724d546c_EOF'
+ cat << 'GH_AW_PROMPT_6ce21e657f0d715b_EOF'
The following GitHub context information is available for this workflow:
{{#if __GH_AW_GITHUB_ACTOR__ }}
@@ -168,12 +168,12 @@ jobs:
{{/if}}
- GH_AW_PROMPT_c6c6ca09724d546c_EOF
+ GH_AW_PROMPT_6ce21e657f0d715b_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_prompt.md"
- cat << 'GH_AW_PROMPT_c6c6ca09724d546c_EOF'
+ cat << 'GH_AW_PROMPT_6ce21e657f0d715b_EOF'
{{#runtime-import .github/workflows/sync-branches.md}}
- GH_AW_PROMPT_c6c6ca09724d546c_EOF
+ GH_AW_PROMPT_6ce21e657f0d715b_EOF
} > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@@ -285,7 +285,7 @@ jobs:
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
GITHUB_REPOSITORY: ${{ github.repository }}
name: Merge default branch into all autoloop program branches
- run: "python3 - << 'PYEOF'\nimport os, subprocess, sys\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\ndefault_branch = os.environ.get(\"DEFAULT_BRANCH\", \"main\")\n\n# List all remote branches matching the autoloop/* pattern\nresult = subprocess.run(\n [\"git\", \"branch\", \"-r\", \"--list\", \"origin/autoloop/*\"],\n capture_output=True, text=True\n)\nif result.returncode != 0:\n print(f\"Failed to list remote branches: {result.stderr}\")\n sys.exit(0)\n\nbranches = [b.strip().replace(\"origin/\", \"\") for b in result.stdout.strip().split(\"\\n\") if b.strip()]\n\nif not branches:\n print(\"No autoloop/* branches found. Nothing to sync.\")\n sys.exit(0)\n\nprint(f\"Found {len(branches)} autoloop branch(es) to sync: {branches}\")\n\nfailed = []\nfor branch in branches:\n print(f\"\\n--- Syncing {branch} with {default_branch} ---\")\n\n # Fetch both branches\n subprocess.run([\"git\", \"fetch\", \"origin\", branch], capture_output=True)\n subprocess.run([\"git\", \"fetch\", \"origin\", default_branch], capture_output=True)\n\n # Check out the program branch\n checkout = subprocess.run(\n [\"git\", \"checkout\", branch],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n # Try creating a local tracking branch\n checkout = subprocess.run(\n [\"git\", \"checkout\", \"-b\", branch, f\"origin/{branch}\"],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n print(f\" Failed to checkout {branch}: {checkout.stderr}\")\n failed.append(branch)\n continue\n\n # Merge the default branch into the program branch\n merge = subprocess.run(\n [\"git\", \"merge\", f\"origin/{default_branch}\", \"--no-edit\",\n \"-m\", f\"Merge {default_branch} into {branch}\"],\n capture_output=True, text=True\n )\n if merge.returncode != 0:\n print(f\" Merge conflict or failure for {branch}: {merge.stderr}\")\n # Abort the merge to leave a clean state\n subprocess.run([\"git\", \"merge\", \"--abort\"], capture_output=True)\n failed.append(branch)\n continue\n\n # Push the updated branch\n push = subprocess.run(\n [\"git\", \"push\", \"origin\", branch],\n capture_output=True, text=True\n )\n if push.returncode != 0:\n print(f\" Failed to push {branch}: {push.stderr}\")\n failed.append(branch)\n continue\n\n print(f\" Successfully synced {branch}\")\n\n# Return to default branch\nsubprocess.run([\"git\", \"checkout\", default_branch], capture_output=True)\n\nif failed:\n print(f\"\\n⚠️ Failed to sync {len(failed)} branch(es): {failed}\")\n print(\"These branches may need manual conflict resolution.\")\n # Don't fail the workflow — log the issue but continue\nelse:\n print(f\"\\n✅ All {len(branches)} branch(es) synced successfully.\")\nPYEOF\n"
+ run: "python3 - << 'PYEOF'\nimport os, re, subprocess, sys\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\ndefault_branch = os.environ.get(\"DEFAULT_BRANCH\", \"main\")\n\n# List all remote branches matching the autoloop/* pattern\nresult = subprocess.run(\n [\"git\", \"branch\", \"-r\", \"--list\", \"origin/autoloop/*\"],\n capture_output=True, text=True\n)\nif result.returncode != 0:\n print(f\"Failed to list remote branches: {result.stderr}\")\n sys.exit(0)\n\nall_branches = [b.strip().replace(\"origin/\", \"\") for b in result.stdout.strip().split(\"\\n\") if b.strip()]\n\n# Filter to canonical branches only: autoloop/{name} without hash suffixes.\n# Stale branches created by the framework (e.g. autoloop/name-a1b2c3d4e5f6g7h8)\n# are skipped — they are not the long-running program branches.\n_hash_suffix = re.compile(r'-[0-9a-f]{16}$')\nbranches = [b for b in all_branches if not _hash_suffix.search(b)]\nskipped_branches = [b for b in all_branches if _hash_suffix.search(b)]\n\nif skipped_branches:\n print(f\"Skipping {len(skipped_branches)} stale branch(es) with hash suffixes: {skipped_branches}\")\n\nif not branches:\n print(\"No canonical autoloop/* branches found. Nothing to sync.\")\n sys.exit(0)\n\nprint(f\"Found {len(branches)} canonical autoloop branch(es) to sync: {branches}\")\n\nfailed = []\nfor branch in branches:\n print(f\"\\n--- Syncing {branch} with {default_branch} ---\")\n\n # Fetch both branches\n subprocess.run([\"git\", \"fetch\", \"origin\", branch], capture_output=True)\n subprocess.run([\"git\", \"fetch\", \"origin\", default_branch], capture_output=True)\n\n # Check out the program branch\n checkout = subprocess.run(\n [\"git\", \"checkout\", branch],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n # Try creating a local tracking branch\n checkout = subprocess.run(\n [\"git\", \"checkout\", \"-b\", branch, f\"origin/{branch}\"],\n capture_output=True, text=True\n )\n if checkout.returncode != 0:\n print(f\" Failed to checkout {branch}: {checkout.stderr}\")\n failed.append(branch)\n continue\n\n # Merge the default branch into the program branch\n merge = subprocess.run(\n [\"git\", \"merge\", f\"origin/{default_branch}\", \"--no-edit\",\n \"-m\", f\"Merge {default_branch} into {branch}\"],\n capture_output=True, text=True\n )\n if merge.returncode != 0:\n print(f\" Merge conflict or failure for {branch}: {merge.stderr}\")\n # Abort the merge to leave a clean state\n subprocess.run([\"git\", \"merge\", \"--abort\"], capture_output=True)\n failed.append(branch)\n continue\n\n # Push the updated branch\n push = subprocess.run(\n [\"git\", \"push\", \"origin\", branch],\n capture_output=True, text=True\n )\n if push.returncode != 0:\n print(f\" Failed to push {branch}: {push.stderr}\")\n failed.append(branch)\n continue\n\n print(f\" Successfully synced {branch}\")\n\n# Return to default branch\nsubprocess.run([\"git\", \"checkout\", default_branch], capture_output=True)\n\nif failed:\n print(f\"\\n⚠️ Failed to sync {len(failed)} branch(es): {failed}\")\n print(\"These branches may need manual conflict resolution.\")\n # Don't fail the workflow — log the issue but continue\nelse:\n print(f\"\\n✅ All {len(branches)} branch(es) synced successfully.\")\nPYEOF\n"
- name: Configure Git credentials
env:
@@ -354,7 +354,7 @@ jobs:
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11'
mkdir -p /home/runner/.copilot
- cat << GH_AW_MCP_CONFIG_f2267ff9994f362a_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+ cat << GH_AW_MCP_CONFIG_6e54b48a11cd24bb_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
{
"mcpServers": {
"github": {
@@ -381,7 +381,7 @@ jobs:
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
}
}
- GH_AW_MCP_CONFIG_f2267ff9994f362a_EOF
+ GH_AW_MCP_CONFIG_6e54b48a11cd24bb_EOF
- name: Download activation artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
diff --git a/.github/workflows/sync-branches.md b/.github/workflows/sync-branches.md
index d6775100..772e2438 100644
--- a/.github/workflows/sync-branches.md
+++ b/.github/workflows/sync-branches.md
@@ -25,7 +25,7 @@ steps:
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
run: |
python3 - << 'PYEOF'
- import os, subprocess, sys
+ import os, re, subprocess, sys
token = os.environ.get("GITHUB_TOKEN", "")
repo = os.environ.get("GITHUB_REPOSITORY", "")
@@ -40,13 +40,23 @@ steps:
print(f"Failed to list remote branches: {result.stderr}")
sys.exit(0)
- branches = [b.strip().replace("origin/", "") for b in result.stdout.strip().split("\n") if b.strip()]
+ all_branches = [b.strip().replace("origin/", "") for b in result.stdout.strip().split("\n") if b.strip()]
+
+ # Filter to canonical branches only: autoloop/{name} without hash suffixes.
+ # Stale branches created by the framework (e.g. autoloop/name-a1b2c3d4e5f6g7h8)
+ # are skipped — they are not the long-running program branches.
+ _hash_suffix = re.compile(r'-[0-9a-f]{16}$')
+ branches = [b for b in all_branches if not _hash_suffix.search(b)]
+ skipped_branches = [b for b in all_branches if _hash_suffix.search(b)]
+
+ if skipped_branches:
+ print(f"Skipping {len(skipped_branches)} stale branch(es) with hash suffixes: {skipped_branches}")
if not branches:
- print("No autoloop/* branches found. Nothing to sync.")
+ print("No canonical autoloop/* branches found. Nothing to sync.")
sys.exit(0)
- print(f"Found {len(branches)} autoloop branch(es) to sync: {branches}")
+ print(f"Found {len(branches)} canonical autoloop branch(es) to sync: {branches}")
failed = []
for branch in branches:
diff --git a/benchmarks/pandas/bench_concat.py b/benchmarks/pandas/bench_concat.py
new file mode 100644
index 00000000..3533109e
--- /dev/null
+++ b/benchmarks/pandas/bench_concat.py
@@ -0,0 +1,28 @@
+"""Benchmark: concat — concatenate two 50k-row DataFrames"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 50_000
+WARMUP = 5
+ITERATIONS = 20
+
+vals1 = np.arange(ROWS, dtype=np.float64)
+vals2 = np.arange(ROWS, dtype=np.float64) * 2.0
+df1 = pd.DataFrame({"value": vals1})
+df2 = pd.DataFrame({"value": vals2})
+
+for _ in range(WARMUP):
+ pd.concat([df1, df2], ignore_index=True)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.concat([df1, df2], ignore_index=True)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "concat",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_apply.py b/benchmarks/pandas/bench_dataframe_apply.py
new file mode 100644
index 00000000..6788d422
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_apply.py
@@ -0,0 +1,27 @@
+"""Benchmark: dataframe_apply — apply a function across rows of a 10k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 3
+ITERATIONS = 10
+
+a = np.arange(ROWS, dtype=np.float64)
+b = np.arange(ROWS, dtype=np.float64) * 2.0
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+ df.apply(lambda row: row["a"] + row["b"], axis=1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.apply(lambda row: row["a"] + row["b"], axis=1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_apply",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_creation.py b/benchmarks/pandas/bench_dataframe_creation.py
new file mode 100644
index 00000000..706c8b13
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_creation.py
@@ -0,0 +1,27 @@
+"""Benchmark: DataFrame creation from arrays (pandas equivalent)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+nums1 = np.arange(ROWS, dtype=np.float64) * 1.1
+nums2 = np.arange(ROWS, dtype=np.float64) * 2.2
+strs = [f"label_{i % 100}" for i in range(ROWS)]
+
+for _ in range(WARMUP):
+ pd.DataFrame({"a": nums1, "b": nums2, "c": strs})
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.DataFrame({"a": nums1, "b": nums2, "c": strs})
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_creation",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_dropna.py b/benchmarks/pandas/bench_dataframe_dropna.py
new file mode 100644
index 00000000..08a11895
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_dropna.py
@@ -0,0 +1,27 @@
+"""Benchmark: dataframe_dropna — drop rows with NaN values from 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+a = np.where(np.arange(ROWS) % 10 == 0, np.nan, np.arange(ROWS) * 1.1)
+b = np.where(np.arange(ROWS) % 7 == 0, np.nan, np.arange(ROWS) * 2.2)
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+ df.dropna()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.dropna()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_dropna",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_filter.py b/benchmarks/pandas/bench_dataframe_filter.py
new file mode 100644
index 00000000..112384f8
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_filter.py
@@ -0,0 +1,26 @@
+"""Benchmark: DataFrame filter (boolean mask on 100k-row DataFrame)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+vals = np.arange(ROWS, dtype=np.float64) * 0.1
+df = pd.DataFrame({"value": vals})
+
+for _ in range(WARMUP):
+ df[df["value"] > 5000]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df[df["value"] > 5000]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_filter",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_rename.py b/benchmarks/pandas/bench_dataframe_rename.py
new file mode 100644
index 00000000..65e44626
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_rename.py
@@ -0,0 +1,27 @@
+"""Benchmark: dataframe_rename — rename columns in a 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+a = np.arange(ROWS, dtype=np.float64) * 1.1
+b = np.arange(ROWS, dtype=np.float64) * 2.2
+df = pd.DataFrame({"old_a": a, "old_b": b})
+
+for _ in range(WARMUP):
+ df.rename(columns={"old_a": "new_a", "old_b": "new_b"})
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.rename(columns={"old_a": "new_a", "old_b": "new_b"})
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_rename",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_sort.py b/benchmarks/pandas/bench_dataframe_sort.py
new file mode 100644
index 00000000..6ef3c84d
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_sort.py
@@ -0,0 +1,28 @@
+"""Benchmark: dataframe_sort — sort a 100k-row DataFrame by two columns"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+rng = np.random.default_rng(42)
+a = [f"group_{i % 100}" for i in range(ROWS)]
+b = rng.random(ROWS) * 1000
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+ df.sort_values(["a", "b"])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.sort_values(["a", "b"])
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_sort",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_describe.py b/benchmarks/pandas/bench_describe.py
new file mode 100644
index 00000000..b9e84dcc
--- /dev/null
+++ b/benchmarks/pandas/bench_describe.py
@@ -0,0 +1,27 @@
+"""Benchmark: describe — summary statistics on a 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+a = np.arange(ROWS, dtype=np.float64) * 1.1
+b = np.sqrt(np.arange(1, ROWS + 1, dtype=np.float64))
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+ df.describe()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.describe()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "describe",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_ewm_mean.py b/benchmarks/pandas/bench_ewm_mean.py
new file mode 100644
index 00000000..4e6cbadd
--- /dev/null
+++ b/benchmarks/pandas/bench_ewm_mean.py
@@ -0,0 +1,26 @@
+"""Benchmark: ewm_mean — exponentially weighted mean on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.sin(np.arange(ROWS) * 0.05)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.ewm(span=20).mean()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.ewm(span=20).mean()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "ewm_mean",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_groupby_mean.py b/benchmarks/pandas/bench_groupby_mean.py
new file mode 100644
index 00000000..050959af
--- /dev/null
+++ b/benchmarks/pandas/bench_groupby_mean.py
@@ -0,0 +1,27 @@
+"""Benchmark: GroupBy mean on 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+keys = [f"group_{i % 100}" for i in range(ROWS)]
+vals = np.arange(ROWS, dtype=np.float64) * 0.1
+df = pd.DataFrame({"key": keys, "value": vals})
+
+for _ in range(WARMUP):
+ df.groupby("key")["value"].mean()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.groupby("key")["value"].mean()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "groupby_mean",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_merge.py b/benchmarks/pandas/bench_merge.py
new file mode 100644
index 00000000..9775f4a2
--- /dev/null
+++ b/benchmarks/pandas/bench_merge.py
@@ -0,0 +1,29 @@
+"""Benchmark: merge — inner join two 50k-row DataFrames on a key column"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 50_000
+WARMUP = 3
+ITERATIONS = 10
+
+keys = np.arange(ROWS) % 1000
+vals1 = np.arange(ROWS, dtype=np.float64)
+vals2 = np.arange(ROWS, dtype=np.float64) * 2.0
+df1 = pd.DataFrame({"key": keys, "val1": vals1})
+df2 = pd.DataFrame({"key": keys, "val2": vals2})
+
+for _ in range(WARMUP):
+ pd.merge(df1, df2, on="key", how="inner")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.merge(df1, df2, on="key", how="inner")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "merge",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_pivot_table.py b/benchmarks/pandas/bench_pivot_table.py
new file mode 100644
index 00000000..f65f9321
--- /dev/null
+++ b/benchmarks/pandas/bench_pivot_table.py
@@ -0,0 +1,28 @@
+"""Benchmark: pivot_table — pivot aggregation on 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+rows = [f"row_{i % 100}" for i in range(ROWS)]
+cols = [f"col_{i % 50}" for i in range(ROWS)]
+vals = np.arange(ROWS, dtype=np.float64) * 0.1
+df = pd.DataFrame({"row": rows, "col": cols, "value": vals})
+
+for _ in range(WARMUP):
+ df.pivot_table(values="value", index="row", columns="col", aggfunc="mean")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.pivot_table(values="value", index="row", columns="col", aggfunc="mean")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "pivot_table",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_read_csv.py b/benchmarks/pandas/bench_read_csv.py
new file mode 100644
index 00000000..d6aa816a
--- /dev/null
+++ b/benchmarks/pandas/bench_read_csv.py
@@ -0,0 +1,30 @@
+"""Benchmark: read_csv — parse a 100k-row CSV file"""
+import json, time, os, tempfile
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 2
+ITERATIONS = 5
+
+# Build CSV file
+tmp_path = "/tmp/gh-aw/agent/bench_read_csv.csv"
+with open(tmp_path, "w") as f:
+ f.write("id,value,label\n")
+ for i in range(ROWS):
+ f.write(f"{i},{i * 1.1:.4f},cat_{i % 50}\n")
+
+for _ in range(WARMUP):
+ pd.read_csv(tmp_path)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.read_csv(tmp_path)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "read_csv",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_rolling_mean.py b/benchmarks/pandas/bench_rolling_mean.py
new file mode 100644
index 00000000..5258fca4
--- /dev/null
+++ b/benchmarks/pandas/bench_rolling_mean.py
@@ -0,0 +1,26 @@
+"""Benchmark: rolling mean with window=100 on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.sin(np.arange(ROWS) * 0.01)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.rolling(100).mean()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rolling(100).mean()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "rolling_mean",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_arithmetic.py b/benchmarks/pandas/bench_series_arithmetic.py
new file mode 100644
index 00000000..4f0325b0
--- /dev/null
+++ b/benchmarks/pandas/bench_series_arithmetic.py
@@ -0,0 +1,26 @@
+"""Benchmark: Series arithmetic (add + multiply on 100k-element Series)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.arange(ROWS, dtype=np.float64) * 0.5
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ (s + 2.0) * 0.5
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ (s + 2.0) * 0.5
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_arithmetic",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_creation.py b/benchmarks/pandas/bench_series_creation.py
new file mode 100644
index 00000000..c27fcf87
--- /dev/null
+++ b/benchmarks/pandas/bench_series_creation.py
@@ -0,0 +1,47 @@
+"""
+Benchmark: Series creation
+
+Creates a Series from a large numeric array and measures the time.
+Outputs JSON: {"function": "series_creation", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+
+def generate_data(n: int) -> "list[float]":
+ """Generate a deterministic numeric array of the given size."""
+ return [i * 1.1 + 0.5 for i in range(n)]
+
+
+data = generate_data(SIZE)
+
+# Warm-up
+for _ in range(WARMUP):
+ pd.Series(list(data))
+
+# Measured runs
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+ start = time.perf_counter()
+ pd.Series(list(data))
+ end = time.perf_counter()
+ times.append((end - start) * 1000) # convert to ms
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+result = {
+ "function": "series_creation",
+ "mean_ms": round(mean_ms, 3),
+ "iterations": ITERATIONS,
+ "total_ms": round(total_ms, 3),
+}
+
+print(json.dumps(result))
diff --git a/benchmarks/pandas/bench_series_cumsum.py b/benchmarks/pandas/bench_series_cumsum.py
new file mode 100644
index 00000000..556e3ebd
--- /dev/null
+++ b/benchmarks/pandas/bench_series_cumsum.py
@@ -0,0 +1,26 @@
+"""Benchmark: series_cumsum — cumulative sum on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.arange(ROWS, dtype=np.float64) * 0.001
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.cumsum()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.cumsum()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_cumsum",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_fillna.py b/benchmarks/pandas/bench_series_fillna.py
new file mode 100644
index 00000000..6b62f6ad
--- /dev/null
+++ b/benchmarks/pandas/bench_series_fillna.py
@@ -0,0 +1,26 @@
+"""Benchmark: series_fillna — fill NaN values in a 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.where(np.arange(ROWS) % 5 == 0, np.nan, np.arange(ROWS) * 1.1)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.fillna(0.0)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.fillna(0.0)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_fillna",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_shift.py b/benchmarks/pandas/bench_series_shift.py
new file mode 100644
index 00000000..0b294485
--- /dev/null
+++ b/benchmarks/pandas/bench_series_shift.py
@@ -0,0 +1,26 @@
+"""Benchmark: series_shift — shift values by 1 position in a 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.arange(ROWS, dtype=np.float64)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.shift(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.shift(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_shift",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_sort.py b/benchmarks/pandas/bench_series_sort.py
new file mode 100644
index 00000000..c31de4aa
--- /dev/null
+++ b/benchmarks/pandas/bench_series_sort.py
@@ -0,0 +1,27 @@
+"""Benchmark: Series sort (sort_values on 100k-element numeric Series)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+rng = np.random.default_rng(42)
+data = rng.random(ROWS) * 1000
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.sort_values()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.sort_values()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_sort",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_string_ops.py b/benchmarks/pandas/bench_series_string_ops.py
new file mode 100644
index 00000000..8744ddcc
--- /dev/null
+++ b/benchmarks/pandas/bench_series_string_ops.py
@@ -0,0 +1,27 @@
+"""Benchmark: series_string_ops — str.upper and str.contains on 100k strings"""
+import json, time
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = [f"hello_world_{i % 200}" for i in range(ROWS)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.str.upper()
+ s.str.contains("world")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.str.upper()
+ s.str.contains("world")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_string_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_value_counts.py b/benchmarks/pandas/bench_series_value_counts.py
new file mode 100644
index 00000000..c156a1eb
--- /dev/null
+++ b/benchmarks/pandas/bench_series_value_counts.py
@@ -0,0 +1,25 @@
+"""Benchmark: value_counts on a 100k-element Series with 100 distinct values"""
+import json, time
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = [f"cat_{i % 100}" for i in range(ROWS)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.value_counts()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.value_counts()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_value_counts",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/results.json b/benchmarks/results.json
new file mode 100644
index 00000000..c883f334
--- /dev/null
+++ b/benchmarks/results.json
@@ -0,0 +1,247 @@
+{
+ "benchmarks": [
+ {
+ "function": "concat",
+ "tsb": null,
+ "pandas": {
+ "function": "concat",
+ "mean_ms": 0.11375509999993483,
+ "iterations": 20,
+ "total_ms": 2.2751019999986966
+ },
+ "ratio": null
+ },
+ {
+ "function": "dataframe_apply",
+ "tsb": null,
+ "pandas": {
+ "function": "dataframe_apply",
+ "mean_ms": 47.161531699998704,
+ "iterations": 10,
+ "total_ms": 471.61531699998704
+ },
+ "ratio": null
+ },
+ {
+ "function": "dataframe_creation",
+ "tsb": null,
+ "pandas": {
+ "function": "dataframe_creation",
+ "mean_ms": 5.148059900000135,
+ "iterations": 10,
+ "total_ms": 51.48059900000135
+ },
+ "ratio": null
+ },
+ {
+ "function": "dataframe_dropna",
+ "tsb": null,
+ "pandas": {
+ "function": "dataframe_dropna",
+ "mean_ms": 2.42739894999886,
+ "iterations": 20,
+ "total_ms": 48.547978999977204
+ },
+ "ratio": null
+ },
+ {
+ "function": "dataframe_filter",
+ "tsb": null,
+ "pandas": {
+ "function": "dataframe_filter",
+ "mean_ms": 0.4964389500003108,
+ "iterations": 20,
+ "total_ms": 9.928779000006216
+ },
+ "ratio": null
+ },
+ {
+ "function": "dataframe_rename",
+ "tsb": null,
+ "pandas": {
+ "function": "dataframe_rename",
+ "mean_ms": 0.17103454999869427,
+ "iterations": 20,
+ "total_ms": 3.4206909999738855
+ },
+ "ratio": null
+ },
+ {
+ "function": "dataframe_sort",
+ "tsb": null,
+ "pandas": {
+ "function": "dataframe_sort",
+ "mean_ms": 33.301584399998774,
+ "iterations": 10,
+ "total_ms": 333.01584399998774
+ },
+ "ratio": null
+ },
+ {
+ "function": "describe",
+ "tsb": null,
+ "pandas": {
+ "function": "describe",
+ "mean_ms": 5.521558600003118,
+ "iterations": 10,
+ "total_ms": 55.21558600003118
+ },
+ "ratio": null
+ },
+ {
+ "function": "ewm_mean",
+ "tsb": null,
+ "pandas": {
+ "function": "ewm_mean",
+ "mean_ms": 1.7652839999982461,
+ "iterations": 10,
+ "total_ms": 17.65283999998246
+ },
+ "ratio": null
+ },
+ {
+ "function": "groupby_mean",
+ "tsb": null,
+ "pandas": {
+ "function": "groupby_mean",
+ "mean_ms": 8.079756900002621,
+ "iterations": 10,
+ "total_ms": 80.79756900002621
+ },
+ "ratio": null
+ },
+ {
+ "function": "merge",
+ "tsb": null,
+ "pandas": {
+ "function": "merge",
+ "mean_ms": 60.42320619999941,
+ "iterations": 10,
+ "total_ms": 604.2320619999941
+ },
+ "ratio": null
+ },
+ {
+ "function": "pivot_table",
+ "tsb": null,
+ "pandas": {
+ "function": "pivot_table",
+ "mean_ms": 22.500251999997545,
+ "iterations": 10,
+ "total_ms": 225.00251999997545
+ },
+ "ratio": null
+ },
+ {
+ "function": "read_csv",
+ "tsb": null,
+ "pandas": {
+ "function": "read_csv",
+ "mean_ms": 29.951929399999244,
+ "iterations": 5,
+ "total_ms": 149.75964699999622
+ },
+ "ratio": null
+ },
+ {
+ "function": "rolling_mean",
+ "tsb": null,
+ "pandas": {
+ "function": "rolling_mean",
+ "mean_ms": 1.71982609999759,
+ "iterations": 10,
+ "total_ms": 17.1982609999759
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_arithmetic",
+ "tsb": null,
+ "pandas": {
+ "function": "series_arithmetic",
+ "mean_ms": 0.764571400000591,
+ "iterations": 20,
+ "total_ms": 15.29142800001182
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_creation",
+ "tsb": null,
+ "pandas": {
+ "function": "series_creation",
+ "mean_ms": 7.607,
+ "iterations": 50,
+ "total_ms": 380.349
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_cumsum",
+ "tsb": null,
+ "pandas": {
+ "function": "series_cumsum",
+ "mean_ms": 1.1250383499998406,
+ "iterations": 20,
+ "total_ms": 22.500766999996813
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_fillna",
+ "tsb": null,
+ "pandas": {
+ "function": "series_fillna",
+ "mean_ms": 0.18527670000025864,
+ "iterations": 20,
+ "total_ms": 3.705534000005173
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_shift",
+ "tsb": null,
+ "pandas": {
+ "function": "series_shift",
+ "mean_ms": 0.07249699999931636,
+ "iterations": 20,
+ "total_ms": 1.4499399999863272
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_sort",
+ "tsb": null,
+ "pandas": {
+ "function": "series_sort",
+ "mean_ms": 5.127767300001551,
+ "iterations": 10,
+ "total_ms": 51.27767300001551
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_string_ops",
+ "tsb": null,
+ "pandas": {
+ "function": "series_string_ops",
+ "mean_ms": 34.08206670000027,
+ "iterations": 10,
+ "total_ms": 340.8206670000027
+ },
+ "ratio": null
+ },
+ {
+ "function": "series_value_counts",
+ "tsb": null,
+ "pandas": {
+ "function": "series_value_counts",
+ "mean_ms": 9.212644899997713,
+ "iterations": 10,
+ "total_ms": 92.12644899997713
+ },
+ "ratio": null
+ }
+ ],
+ "timestamp": "2026-04-12T15:46:00Z"
+}
\ No newline at end of file
diff --git a/benchmarks/run_benchmarks.sh b/benchmarks/run_benchmarks.sh
new file mode 100644
index 00000000..0f800de0
--- /dev/null
+++ b/benchmarks/run_benchmarks.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Run all tsb (TypeScript) and pandas (Python) benchmarks and collect results.
+#
+# Usage: ./benchmarks/run_benchmarks.sh
+#
+# Outputs: benchmarks/results.json with all benchmark results
+#
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Ensure Python and pandas are available
+if ! command -v python3 &>/dev/null; then
+ echo "ERROR: python3 is required but not found" >&2
+ exit 1
+fi
+
+python3 -c "import pandas" 2>/dev/null || {
+ echo "Installing pandas..."
+ pip3 install pandas --quiet
+}
+
+# Ensure Bun is available
+if ! command -v bun &>/dev/null; then
+ echo "ERROR: bun is required but not found" >&2
+ exit 1
+fi
+
+# Collect results
+results='{"benchmarks": [], "timestamp": "'$(date -u +"%Y-%m-%dT%H:%M:%SZ")'"}'
+
+echo "=== Running Performance Benchmarks ==="
+echo ""
+
+# Find all TypeScript benchmark files
+for ts_bench in "$SCRIPT_DIR"/tsb/bench_*.ts; do
+ [ -f "$ts_bench" ] || continue
+ bench_name=$(basename "$ts_bench" .ts | sed 's/^bench_//')
+
+ # Check for matching Python benchmark
+ py_bench="$SCRIPT_DIR/pandas/bench_${bench_name}.py"
+ if [ ! -f "$py_bench" ]; then
+ echo "SKIP: $bench_name (no matching Python benchmark)"
+ continue
+ fi
+
+ echo "--- Benchmarking: $bench_name ---"
+
+ # Run TypeScript benchmark
+ echo " Running tsb (TypeScript)..."
+ ts_result=$(cd "$REPO_ROOT" && bun run "$ts_bench" 2>/dev/null) || {
+ echo " ERROR: TypeScript benchmark failed"
+ continue
+ }
+ echo " tsb result: $ts_result"
+
+ # Run Python benchmark
+ echo " Running pandas (Python)..."
+ py_result=$(cd "$REPO_ROOT" && python3 "$py_bench" 2>/dev/null) || {
+ echo " ERROR: Python benchmark failed"
+ continue
+ }
+ echo " pandas result: $py_result"
+
+ # Extract mean_ms from both
+ ts_mean=$(echo "$ts_result" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d['mean_ms'])" 2>/dev/null) || {
+ echo " ERROR: could not parse tsb benchmark result"
+ continue
+ }
+ py_mean=$(echo "$py_result" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d['mean_ms'])" 2>/dev/null) || {
+ echo " ERROR: could not parse pandas benchmark result"
+ continue
+ }
+
+ # Calculate ratio (tsb / pandas) — < 1.0 means tsb is faster
+ ratio=$(python3 -c "
+ts, py = $ts_mean, $py_mean
+if py <= 0:
+ print('null')
+else:
+ print(round(ts / py, 3))
+")
+ if [ "$ratio" = "null" ]; then
+ echo " ERROR: pandas mean_ms is zero, cannot compute ratio"
+ continue
+ fi
+
+ echo " Ratio (tsb/pandas): ${ratio}x"
+ echo ""
+
+ # Add to results JSON
+ results=$(echo "$results" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+data['benchmarks'].append({
+ 'function': '$bench_name',
+ 'tsb': $ts_result,
+ 'pandas': $py_result,
+ 'ratio': $ratio
+})
+print(json.dumps(data, indent=2))
+")
+done
+
+# Write results
+echo "$results" > "$SCRIPT_DIR/results.json"
+echo "=== Results written to benchmarks/results.json ==="
+echo ""
+
+# Summary
+echo "=== Summary ==="
+echo "$results" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+benchmarks = data.get('benchmarks', [])
+if not benchmarks:
+ print('No benchmarks found.')
+else:
+ print(f'Functions benchmarked: {len(benchmarks)}')
+ for b in benchmarks:
+ fn = b['function']
+ ts = b['tsb']['mean_ms']
+ py = b['pandas']['mean_ms']
+ ratio = b['ratio']
+ faster = 'tsb' if ratio < 1 else 'pandas'
+ print(f' {fn}: tsb={ts}ms, pandas={py}ms, ratio={ratio}x ({faster} is faster)')
+"
diff --git a/benchmarks/tsb/bench_concat.ts b/benchmarks/tsb/bench_concat.ts
new file mode 100644
index 00000000..7a72f777
--- /dev/null
+++ b/benchmarks/tsb/bench_concat.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: concat — concatenate two 50k-row DataFrames
+ */
+import { DataFrame, concat } from "../../src/index.js";
+
+const ROWS = 50_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const vals1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const vals2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0);
+const df1 = new DataFrame({ value: vals1 });
+const df2 = new DataFrame({ value: vals2 });
+
+for (let i = 0; i < WARMUP; i++) {
+ concat([df1, df2]);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ concat([df1, df2]);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "concat",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_apply.ts b/benchmarks/tsb/bench_dataframe_apply.ts
new file mode 100644
index 00000000..32a99a68
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_apply.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: dataframe_apply — apply a function across rows of a 10k-row DataFrame
+ * (reduced size due to JS per-row overhead)
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0);
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_apply",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_creation.ts b/benchmarks/tsb/bench_dataframe_creation.ts
new file mode 100644
index 00000000..2eb8fd56
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_creation.ts
@@ -0,0 +1,33 @@
+/**
+ * Benchmark: DataFrame creation from arrays
+ * Creates a 3-column (2 numeric + 1 string) 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const nums1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1);
+const nums2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.2);
+const strs = Array.from({ length: ROWS }, (_, i) => `label_${i % 100}`);
+
+// Warm up
+for (let i = 0; i < WARMUP; i++) {
+ new DataFrame({ a: nums1, b: nums2, c: strs });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ new DataFrame({ a: nums1, b: nums2, c: strs });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_creation",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_dropna.ts b/benchmarks/tsb/bench_dataframe_dropna.ts
new file mode 100644
index 00000000..e4fef46b
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_dropna.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: dataframe_dropna — drop rows with NaN values from 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => (i % 10 === 0 ? NaN : i * 1.1));
+const b = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? NaN : i * 2.2));
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.dropna();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.dropna();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_dropna",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_filter.ts b/benchmarks/tsb/bench_dataframe_filter.ts
new file mode 100644
index 00000000..57d78bd7
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_filter.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: DataFrame filter (boolean mask on 100k-row DataFrame)
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1);
+const df = new DataFrame({ value: vals });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.filter((row) => (row["value"] as number) > 5000);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.filter((row) => (row["value"] as number) > 5000);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_filter",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_rename.ts b/benchmarks/tsb/bench_dataframe_rename.ts
new file mode 100644
index 00000000..807b63c9
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_rename.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: dataframe_rename — rename columns in a 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1);
+const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.2);
+const df = new DataFrame({ old_a: a, old_b: b });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.rename({ old_a: "new_a", old_b: "new_b" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.rename({ old_a: "new_a", old_b: "new_b" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_rename",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_sort.ts b/benchmarks/tsb/bench_dataframe_sort.ts
new file mode 100644
index 00000000..707e4ecf
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_sort.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: dataframe_sort — sort a 100k-row DataFrame by two columns
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Array.from({ length: ROWS }, (_, i) => `group_${i % 100}`);
+const b = Float64Array.from({ length: ROWS }, () => Math.random() * 1000);
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.sort_values(["a", "b"]);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.sort_values(["a", "b"]);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_sort",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_describe.ts b/benchmarks/tsb/bench_describe.ts
new file mode 100644
index 00000000..368156a3
--- /dev/null
+++ b/benchmarks/tsb/bench_describe.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: describe — summary statistics on a 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1);
+const b = Float64Array.from({ length: ROWS }, (_, i) => Math.sqrt(i + 1));
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.describe();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.describe();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "describe",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_ewm_mean.ts b/benchmarks/tsb/bench_ewm_mean.ts
new file mode 100644
index 00000000..8e6597f7
--- /dev/null
+++ b/benchmarks/tsb/bench_ewm_mean.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: ewm_mean — exponentially weighted mean on 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05));
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.ewm({ span: 20 }).mean();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.ewm({ span: 20 }).mean();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "ewm_mean",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_groupby_mean.ts b/benchmarks/tsb/bench_groupby_mean.ts
new file mode 100644
index 00000000..efecfddb
--- /dev/null
+++ b/benchmarks/tsb/bench_groupby_mean.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: GroupBy mean on 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const keys = Array.from({ length: ROWS }, (_, i) => `group_${i % 100}`);
+const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1);
+const df = new DataFrame({ key: keys, value: vals });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.groupby("key").mean();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.groupby("key").mean();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "groupby_mean",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_merge.ts b/benchmarks/tsb/bench_merge.ts
new file mode 100644
index 00000000..da68b52b
--- /dev/null
+++ b/benchmarks/tsb/bench_merge.ts
@@ -0,0 +1,33 @@
+/**
+ * Benchmark: merge — inner join two 50k-row DataFrames on a key column
+ */
+import { DataFrame, merge } from "../../src/index.js";
+
+const ROWS = 50_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const keys = Array.from({ length: ROWS }, (_, i) => i % 1000);
+const vals1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const vals2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0);
+const df1 = new DataFrame({ key: keys, val1: vals1 });
+const df2 = new DataFrame({ key: keys, val2: vals2 });
+
+for (let i = 0; i < WARMUP; i++) {
+ merge(df1, df2, { on: "key", how: "inner" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ merge(df1, df2, { on: "key", how: "inner" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "merge",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_pivot_table.ts b/benchmarks/tsb/bench_pivot_table.ts
new file mode 100644
index 00000000..78b94702
--- /dev/null
+++ b/benchmarks/tsb/bench_pivot_table.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: pivot_table — pivot aggregation on 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const rows = Array.from({ length: ROWS }, (_, i) => `row_${i % 100}`);
+const cols = Array.from({ length: ROWS }, (_, i) => `col_${i % 50}`);
+const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1);
+const df = new DataFrame({ row: rows, col: cols, value: vals });
+
+for (let i = 0; i < WARMUP; i++) {
+ df.pivot_table({ values: "value", index: "row", columns: "col", aggfunc: "mean" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ df.pivot_table({ values: "value", index: "row", columns: "col", aggfunc: "mean" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "pivot_table",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_read_csv.ts b/benchmarks/tsb/bench_read_csv.ts
new file mode 100644
index 00000000..0d9462bf
--- /dev/null
+++ b/benchmarks/tsb/bench_read_csv.ts
@@ -0,0 +1,39 @@
+/**
+ * Benchmark: read_csv — parse a 100k-row CSV string
+ */
+import { read_csv } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 2;
+const ITERATIONS = 5;
+
+// Build CSV string
+const lines = ["id,value,label"];
+for (let i = 0; i < ROWS; i++) {
+ lines.push(`${i},${(i * 1.1).toFixed(4)},cat_${i % 50}`);
+}
+const csvContent = lines.join("\n");
+
+// Write to a temp file
+import { writeFileSync } from "node:fs";
+const tmpPath = "/tmp/gh-aw/agent/bench_read_csv.csv";
+writeFileSync(tmpPath, csvContent, "utf8");
+
+for (let i = 0; i < WARMUP; i++) {
+ read_csv(tmpPath);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ read_csv(tmpPath);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "read_csv",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_rolling_mean.ts b/benchmarks/tsb/bench_rolling_mean.ts
new file mode 100644
index 00000000..646d3100
--- /dev/null
+++ b/benchmarks/tsb/bench_rolling_mean.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: rolling mean with window=100 on 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01));
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.rolling(100).mean();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.rolling(100).mean();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "rolling_mean",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_arithmetic.ts b/benchmarks/tsb/bench_series_arithmetic.ts
new file mode 100644
index 00000000..552be2ca
--- /dev/null
+++ b/benchmarks/tsb/bench_series_arithmetic.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: Series arithmetic (add + multiply on 100k-element Series)
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => i * 0.5);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.add(2.0).mul(0.5);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.add(2.0).mul(0.5);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_arithmetic",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_creation.ts b/benchmarks/tsb/bench_series_creation.ts
new file mode 100644
index 00000000..c7b4e145
--- /dev/null
+++ b/benchmarks/tsb/bench_series_creation.ts
@@ -0,0 +1,49 @@
+/**
+ * Benchmark: Series creation
+ *
+ * Creates a Series from a large numeric array and measures the time.
+ * Outputs JSON: {"function": "series_creation", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+
+import { Series } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+/** Generate a deterministic numeric array of the given size. */
+function generateData(n: number): readonly number[] {
+ const arr: number[] = [];
+ for (let i = 0; i < n; i++) {
+ arr.push(i * 1.1 + 0.5);
+ }
+ return arr;
+}
+
+const data = generateData(SIZE);
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ new Series({ data: [...data] });
+}
+
+// Measured runs
+const times: number[] = [];
+for (let i = 0; i < ITERATIONS; i++) {
+ const start = performance.now();
+ new Series({ data: [...data] });
+ const end = performance.now();
+ times.push(end - start);
+}
+
+const totalMs = times.reduce((a, b) => a + b, 0);
+const meanMs = totalMs / ITERATIONS;
+
+const result = {
+ function: "series_creation",
+ mean_ms: Math.round(meanMs * 1000) / 1000,
+ iterations: ITERATIONS,
+ total_ms: Math.round(totalMs * 1000) / 1000,
+};
+
+console.log(JSON.stringify(result));
diff --git a/benchmarks/tsb/bench_series_cumsum.ts b/benchmarks/tsb/bench_series_cumsum.ts
new file mode 100644
index 00000000..3eeba5b0
--- /dev/null
+++ b/benchmarks/tsb/bench_series_cumsum.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: series_cumsum — cumulative sum on 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => i * 0.001);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.cumsum();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.cumsum();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_cumsum",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_fillna.ts b/benchmarks/tsb/bench_series_fillna.ts
new file mode 100644
index 00000000..3e658b01
--- /dev/null
+++ b/benchmarks/tsb/bench_series_fillna.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: series_fillna — fill NaN/null values in a 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+// Create series with every 5th value as NaN
+const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 5 === 0 ? NaN : i * 1.1));
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.fillna(0.0);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.fillna(0.0);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_fillna",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_shift.ts b/benchmarks/tsb/bench_series_shift.ts
new file mode 100644
index 00000000..46e79d19
--- /dev/null
+++ b/benchmarks/tsb/bench_series_shift.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: series_shift — shift values by 1 position in a 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.shift(1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.shift(1);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_shift",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_sort.ts b/benchmarks/tsb/bench_series_sort.ts
new file mode 100644
index 00000000..c6aedb93
--- /dev/null
+++ b/benchmarks/tsb/bench_series_sort.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: Series sort (argsort on 100k-element numeric Series)
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, () => Math.random() * 1000);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.sort_values();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.sort_values();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_sort",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_string_ops.ts b/benchmarks/tsb/bench_series_string_ops.ts
new file mode 100644
index 00000000..c44cdefe
--- /dev/null
+++ b/benchmarks/tsb/bench_series_string_ops.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: series_string_ops — str.upper and str.contains on 100k strings
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => `hello_world_${i % 200}`);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.str.upper();
+ s.str.contains("world");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.str.upper();
+ s.str.contains("world");
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_string_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_value_counts.ts b/benchmarks/tsb/bench_series_value_counts.ts
new file mode 100644
index 00000000..b5352f54
--- /dev/null
+++ b/benchmarks/tsb/bench_series_value_counts.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: value_counts on a 100k-element Series with 100 distinct values
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => `cat_${i % 100}`);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+ s.value_counts();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ s.value_counts();
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_value_counts",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/docs/playground.md b/docs/playground.md
index b2e64183..7f08e62b 100644
--- a/docs/playground.md
+++ b/docs/playground.md
@@ -120,9 +120,6 @@ The CI pipeline (`pages.yml`) runs this automatically during deployment.
## Non-Goals (Current Scope)
-- **Syntax highlighting** in the editor: the current implementation uses a
- plain `
Insert and remove DataFrame columns at precise positions. insertColumn(df, loc, col, values) inserts at integer position, popColumn(df, col) returns { series, df }. Also includes reorderColumns and moveColumn. Mirrors pandas.DataFrame.insert() and .pop().
Bin continuous numeric data into discrete intervals. cut() uses fixed-width or explicit bin edges; qcut() uses quantile-based bins of equal population. Both return codes, labels, and bin edges. Mirrors pandas.cut and pandas.qcut.
Standalone custom rolling-window functions: rollingApply (custom fn per window), rollingAgg (multiple named aggregations → DataFrame), dataFrameRollingApply, dataFrameRollingAgg. Supports minPeriods, center, and raw mode. Mirrors pandas.Rolling.apply() and Rolling.agg().
Attach arbitrary key→value metadata to any Series or DataFrame via a WeakMap registry. Provides getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, mergeAttrs, clearAttrs, getAttr, setAttr, deleteAttr, attrsCount, attrsKeys. Mirrors pandas.DataFrame.attrs / pandas.Series.attrs.
+ Because tsb DataFrames are immutable, both functions return a new DataFrame
+ rather than mutating the original. popColumn returns both the extracted
+ Series and the resulting DataFrame.
+
// Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// → RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// → RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]); // df has 3 rows
+// → RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// → RangeError: Column "missing" not found in DataFrame.
+
+
+
+ Immutability: Like all tsb DataFrame operations, these functions never
+ mutate the original DataFrame. Always assign the return value to a new variable.
+
+ Standalone equivalents of the pandas
+ DataFrame.pipe()
+ /
+ Series.pipe()
+ chaining pattern plus various
+ apply()
+ /
+ applymap()
+ operations — usable without method-call syntax.
+
+
+
+ Why standalone? pandas chains operations via methods:
+ df.pipe(fn1).pipe(fn2). tsb provides a module-level
+ pipe(value, fn1, fn2, …) that works on any value,
+ not just DataFrames. All functions are pure — inputs are never mutated.
+
+
+
API Summary
+
+
+
Function
Pandas equivalent
Description
+
+
+
+
pipe(value, fn1, fn2, …)
+
df.pipe(fn).pipe(fn2)
+
Variadic type-safe pipeline — passes value through fns left-to-right
+ Apply any aggregation function to each rolling window. The function
+ receives the valid (non-null, non-NaN) numeric values
+ in the window and must return a single number.
+
+
import { rollingApply } from "tsb";
+
+const prices = new Series({ data: [10, 12, 11, 15, 14, 16], name: "price" });
+
+// Custom: range (max - min) over each 3-day window
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+rollingApply(prices, 3, range).toArray();
+// [null, null, 2, 4, 4, 5]
+// ↑↑ insufficient data (need 3 observations)
+
+
+
Options
+
+
+
Option
Default
Description
+
+
+
minPeriods
window
Minimum valid observations to compute (null otherwise)
+
center
false
Centre the window (symmetric) instead of trailing
+
raw
false
Pass full window including nulls (filtered to valid nums before fn call)
+
+
+
+
+
// minPeriods=1 → start computing from the very first position
+rollingApply(prices, 3, range, { minPeriods: 1 }).toArray();
+// [0, 2, 2, 4, 4, 5]
+
+// center=true → symmetric window around each point
+rollingApply(prices, 3, range, { center: true }).toArray();
+// [null, 2, 4, 4, 5, null]
+
+
2. rollingAgg — Multiple Aggregations at Once
+
+ Apply several named aggregation functions in a single pass over a Series,
+ returning a DataFrame where each column holds one
+ aggregation result.
+
+
import { rollingAgg } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8] });
+
+const result = rollingAgg(s, 3, {
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ max: (w) => Math.max(...w),
+ min: (w) => Math.min(...w),
+ range:(w) => Math.max(...w) - Math.min(...w),
+});
+
+// result is a DataFrame with columns: "mean", "max", "min", "range"
+// result.col("mean").toArray() → [null, null, 2, 3, 4, 5, 6, 7]
+// result.col("range").toArray() → [null, null, 2, 2, 2, 2, 2, 2]
+ string_ops
+ Standalone string operations for Series and arrays
+
+
+
+ string_ops provides module-level string functions that complement the
+ Series.str accessor. All functions accept a Series, a
+ string[], or a scalar string.
+
+
+
+
+
strNormalize — Unicode normalisation
+
Normalise every element to NFC, NFD, NFKC, or NFKD. Useful when mixing text
+ from different sources (e.g. macOS NFD vs Windows NFC).
+
+
+
+
é
+café
+file
+
+
+
+
+
+
+
+
+
+
+
+
+
strGetDummies — one-hot encode by delimiter
+
Split each string by a delimiter and produce a binary indicator DataFrame —
+ one column per unique token. Equivalent to pandas.Series.str.get_dummies().
+
+
+
+
a|b
+b|c
+a
+a|b|c
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
strExtractAll — extract all regex matches
+
Find every non-overlapping regex match in each element. Returns a JSON-encoded
+ array of match arrays per element — parse with JSON.parse.
+
+
+
+
abc 123 def 456
+foo bar
+hello 99 world 42
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
strRemovePrefix / strRemoveSuffix
+
Strip a leading or trailing string from elements only when it is present.
+
+
+
+
pre_alpha
+pre_beta
+gamma
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
strTranslate — character-level substitution
+
Replace or delete individual characters using a lookup table.
+ Format: one mapping per line as from=to or from=
+ to delete.
+ string_ops_extended adds advanced string utilities that complement
+ string_ops and the Series.str accessor. All functions accept
+ a Series, an array, or a scalar string.
+
+
+
+
+
strSplitExpand — split and expand to DataFrame columns
+
+ Split each element by a delimiter and expand the parts into a DataFrame
+ with one column per position. Mirrors pandas.Series.str.split(expand=True).
+ Shorter rows are padded with null.
+
+
+
+
+
2024-01-15
+2025-12-31
+1999-07-04
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
strExtractGroups — extract regex capture groups
+
+ Extract regex capture groups from each element into a DataFrame.
+ Named groups ((?<name>...)) become column names; unnamed groups
+ become 0, 1, … Non-matching rows produce null.
+
+
+
+
+
John 42
+Alice 30
+Bob invalid
+
+
+
+
+
Use (?<name>...) for named capture groups.
+
+
+
+
+
+
+
+
+
strPartition / strRPartition — split into (before, sep, after)
+
+ strPartition splits at the first occurrence of the separator;
+ strRPartition splits at the last. When the separator is not
+ found, strPartition returns [s, "", ""] and
+ strRPartition returns ["", "", s].
+
+
+
+
+
hello.world.foo
+example.com
+no-separator-here
+
+
+
+
+
+
+
+
+
+
+
+
+
strMultiReplace — apply multiple replacements in sequence
+
+ Apply an ordered list of {pat, repl} pairs to each element.
+ Each replacement is applied to the result of the previous one.
+ Patterns can be string literals (replaced globally) or RegExp objects.
+
+ strIndent adds a prefix to every non-empty line (mirrors
+ textwrap.indent).
+ strDedent removes the common leading whitespace from all lines
+ (mirrors textwrap.dedent).
+
+ Missing values (null / undefined) are preserved as null
+ in all orientations. When using fromDictOriented with "index"
+ orientation, any column that is absent from a given row object is filled with null.
+
+ seriesWhere / seriesMask and their DataFrame equivalents
+ allow element-wise conditional replacement — the TypeScript equivalents of
+ pandas.Series.where
+ and
+ pandas.Series.mask.
+
+
+
+ Quick rule:
+ where(cond) — keep where cond is true, replace elsewhere.
+ mask(cond) — keep where cond is false, replace elsewhere.
+ They are exact inverses of each other.
+
+
+
1. seriesWhere — Boolean Array Condition
+
+ Pass a boolean[] to keep values at true positions, replace
+ the rest with null (or a custom other value).
+
+ When you pass a Series<boolean> as the condition, values are aligned
+ by label, not position. Labels absent from the condition series are treated
+ as false.
+
+
import { Series, seriesWhere } from "tsb";
+
+const prices = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+const valid = new Series<boolean>({ data: [false, true], index: ["a", "b"] });
+
+// Only "b" is in the condition with value=true; "a"=false, "c" missing→false
+const result = seriesWhere(prices, valid, { other: -1 });
+// Series { a: -1, b: 20, c: -1 }
+ Reshape a wide-format DataFrame to long format by collapsing stub-prefixed column
+ groups into rows — mirrors
+
+ pandas.wide_to_long().
+
+
+
Concept
+
+ Given a wide DataFrame where repeated measurements are spread across columns with a
+ common stub prefix and a numeric (or other) suffix — e.g. score_2021,
+ score_2022 — wideToLong pivots those column groups into rows.
+ One row per original row per unique suffix is produced.
+
+
+
Example — numeric suffixes
+
import { DataFrame } from "tsb";
+import { wideToLong } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: ["x", "y"],
+ A1: [1, 2],
+ A2: [3, 4],
+ B1: [5, 6],
+ B2: [7, 8],
+});
+
+const long = wideToLong(df, ["A", "B"], "id", "num");
+
+// long.columns.values → ["id", "num", "A", "B"]
+// long.shape → [4, 4]
+//
+// id num A B
+// x 1 1 5
+// y 1 2 6
+// x 2 3 7
+// y 2 4 8
+
Column(s) to keep as id variables (repeated per suffix)
+
j
string
Name of the new column holding the suffix values
+
options.sep
string
Separator between stub and suffix (default: "")
+
options.suffix
string
Regex string matching the suffix (default: "\\d+")
+
+
+
+
Output layout
+
+ Output columns are always ordered: id cols, j, stub cols
+ (in the same order the stubs were passed). Suffixes are sorted numerically when they are all
+ integers, otherwise lexicographically. Wide columns that are absent from the DataFrame are
+ filled with null.
+
+
+
diff --git a/playground/window_extended.html b/playground/window_extended.html
new file mode 100644
index 00000000..4232fa5d
--- /dev/null
+++ b/playground/window_extended.html
@@ -0,0 +1,304 @@
+
+
+
+
+
+ tsb — Rolling Extended Stats: sem, skew, kurt, quantile
+
+
+
+
tsb — Rolling Extended Statistics
+
+ Higher-order rolling window statistics extending the core
+
+ pandas.Series.rolling()
+
+ API:
+ sem, skew, kurt, and
+ quantile.
+
+
+
1. rollingSem — Standard Error of the Mean
+
+ The standard error of the mean measures how much the sample mean
+ would vary across repeated samples. For a window of n values:
+
+
sem = std(ddof=1) / √n
+
Requires at least 2 valid observations per window.
+
+
import { rollingSem, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "x" });
+const sem3 = rollingSem(s, 3);
+// [null, null, 0.667, 0, 0.577, 0.577, 1.155, 2.082]
+
+
+
+
Live demo — sem with window=3
+
Comma-separated numbers (nulls accepted):
+
+
+
+
+
+
+
+
2. rollingSkew — Fisher-Pearson Skewness
+
+ Skewness measures asymmetry of the distribution in each window.
+ Positive = right tail heavier; negative = left tail heavier.
+ Uses the unbiased Fisher-Pearson formula (same as pandas):
+
+
skew = [n/((n-1)(n-2))] × Σ[(xᵢ−x̄)/s]³
+
Requires ≥ 3 valid observations.
+
+
import { rollingSkew, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+rollingSkew(s, 3);
+// [null, null, 0, 0, 0] ← symmetric windows → zero skew
+
+
+
+
Live demo — skewness with window=4
+
+
+
+
+
+
+
3. rollingKurt — Excess Kurtosis
+
+ Kurtosis measures how heavy the tails are relative to a normal distribution.
+ The excess kurtosis subtracts 3, so a normal distribution gives 0.
+ Uses the Fisher (1930) unbiased formula:
+
+
kurt = [n(n+1)/((n-1)(n-2)(n-3))] × Σ[(xᵢ−x̄)/s]⁴ − 3(n-1)²/((n-2)(n-3))
+
Requires ≥ 4 valid observations.
+
+
import { rollingKurt, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+rollingKurt(s, 4);
+// [null, null, null, -1.2] ← uniform distribution has kurt = -1.2
+
+
+
+
Live demo — excess kurtosis with window=5
+
+
+
+
+
+
+
4. rollingQuantile — Rolling Quantile
+
+ Computes any quantile within each sliding window using configurable
+ interpolation. When q = 0.5 this is identical to
+ rolling.median().
+
Linear interpolation — same as NumPy / pandas default
+
lower
Take the lower of the two surrounding values
+
higher
Take the higher of the two surrounding values
+
midpoint
Arithmetic mean of the two surrounding values
+
nearest
Whichever surrounding value is closest
+
+
+
+
+
Live demo — rolling quantile
+
+
+
+
+
+
+
+
+
Common Options
+
+
Option
Type
Default
Description
+
+
minPeriods
number
= window
Minimum valid obs required per window
+
center
boolean
false
Centre the window around each position
+
+
+
+
+ Note: Functions are pure — they return new Series objects
+ without modifying the input. Missing values (null, NaN)
+ are excluded from each window calculation.
+
+
+
+
+
diff --git a/src/core/api_types.ts b/src/core/api_types.ts
new file mode 100644
index 00000000..860d2050
--- /dev/null
+++ b/src/core/api_types.ts
@@ -0,0 +1,629 @@
+/**
+ * api_types — runtime type-checking predicates, mirroring `pandas.api.types`.
+ *
+ * Two groups of functions are provided:
+ *
+ * **Value-level predicates** — operate on arbitrary JavaScript values, equivalent
+ * to `pandas.api.types.is_scalar`, `is_list_like`, `is_number`, etc.
+ *
+ * **Dtype-level predicates** — accept a `Dtype` instance or a `DtypeName` string
+ * and answer questions about the dtype's kind, equivalent to
+ * `pandas.api.types.is_numeric_dtype`, `is_float_dtype`, etc.
+ *
+ * @example
+ * ```ts
+ * import { isScalar, isNumericDtype, Dtype } from "tsb";
+ * isScalar(42); // true
+ * isScalar([1, 2, 3]); // false
+ * isListLike([1, 2, 3]); // true
+ * isNumericDtype(Dtype.float64); // true
+ * isStringDtype("string"); // true
+ * ```
+ *
+ * @module
+ */
+
+import { Dtype } from "./dtype.ts";
+import type { DtypeName } from "../types.ts";
+
+// ─── internal helper ──────────────────────────────────────────────────────────
+
+/** Resolve a Dtype | DtypeName to a Dtype instance. */
+function resolveDtype(dtype: Dtype | DtypeName): Dtype {
+ if (dtype instanceof Dtype) {
+ return dtype;
+ }
+ return Dtype.from(dtype);
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// VALUE-LEVEL PREDICATES
+// ═════════════════════════════════════════════════════════════════════════════
+
+/**
+ * Return `true` if `val` is a scalar (not a collection).
+ *
+ * Scalars: `string`, `number`, `bigint`, `boolean`, `symbol`, `null`,
+ * `undefined`, and `Date` objects. Arrays, plain objects, `Map`, `Set`,
+ * iterables, and class instances other than `Date` are **not** scalars.
+ *
+ * Mirrors `pandas.api.types.is_scalar`.
+ *
+ * @example
+ * ```ts
+ * isScalar(42); // true
+ * isScalar("hello"); // true
+ * isScalar(null); // true
+ * isScalar([1, 2]); // false
+ * isScalar({ a: 1 }); // false
+ * ```
+ */
+export function isScalar(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ const t = typeof val;
+ if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") {
+ return true;
+ }
+ if (val instanceof Date) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is "list-like" — i.e. iterable (but not a string)
+ * or has a non-negative integer `length` property.
+ *
+ * Mirrors `pandas.api.types.is_list_like`.
+ *
+ * @example
+ * ```ts
+ * isListLike([1, 2, 3]); // true
+ * isListLike(new Set([1])); // true
+ * isListLike("abc"); // false (strings excluded)
+ * isListLike(42); // false
+ * isListLike({ a: 1 }); // false
+ * ```
+ */
+export function isListLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val === "string") {
+ return false;
+ }
+ // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol
+ if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") {
+ return false;
+ }
+ if (typeof val === "object" || typeof val === "function") {
+ if (Symbol.iterator in (val as object)) {
+ return true;
+ }
+ const len = (val as Record)["length"];
+ if (typeof len === "number" && len >= 0 && Number.isInteger(len)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is array-like — i.e. has a non-negative integer
+ * `length` property.
+ *
+ * Mirrors `pandas.api.types.is_array_like`.
+ *
+ * @example
+ * ```ts
+ * isArrayLike([1, 2]); // true
+ * isArrayLike("abc"); // true (strings have .length)
+ * isArrayLike(42); // false
+ * isArrayLike({}); // false
+ * ```
+ */
+export function isArrayLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val === "string") {
+ return true;
+ }
+ if (typeof val !== "object" && typeof val !== "function") {
+ return false;
+ }
+ const len = (val as Record)["length"];
+ return typeof len === "number" && len >= 0 && Number.isInteger(len);
+}
+
+/**
+ * Return `true` if `val` is dict-like — a plain object (not an array, not a
+ * `Date`, not a class instance).
+ *
+ * Mirrors `pandas.api.types.is_dict_like`.
+ *
+ * @example
+ * ```ts
+ * isDictLike({ a: 1 }); // true
+ * isDictLike(new Map()); // true (has .get / .set)
+ * isDictLike([1, 2]); // false
+ * isDictLike("abc"); // false
+ * ```
+ */
+export function isDictLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val !== "object") {
+ return false;
+ }
+ if (Array.isArray(val)) {
+ return false;
+ }
+ // Treat Map as dict-like (supports key lookup)
+ if (val instanceof Map) {
+ return true;
+ }
+ // Date is not dict-like
+ if (val instanceof Date) {
+ return false;
+ }
+ // Plain objects and other objects with properties
+ return true;
+}
+
+/**
+ * Return `true` if `val` is an iterator — i.e. has a callable `next` method.
+ *
+ * Mirrors `pandas.api.types.is_iterator`.
+ *
+ * @example
+ * ```ts
+ * isIterator([1, 2][Symbol.iterator]()); // true
+ * isIterator([1, 2]); // false
+ * ```
+ */
+export function isIterator(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val !== "object" && typeof val !== "function") {
+ return false;
+ }
+ return typeof (val as Record)["next"] === "function";
+}
+
+/**
+ * Return `true` if `val` is a `number` (including `NaN` and `±Infinity`).
+ *
+ * Mirrors `pandas.api.types.is_number`.
+ *
+ * @example
+ * ```ts
+ * isNumber(3.14); // true
+ * isNumber(NaN); // true
+ * isNumber("3"); // false
+ * ```
+ */
+export function isNumber(val: unknown): val is number {
+ return typeof val === "number";
+}
+
+/**
+ * Return `true` if `val` is a `boolean`.
+ *
+ * Mirrors `pandas.api.types.is_bool`.
+ *
+ * @example
+ * ```ts
+ * isBool(true); // true
+ * isBool(1); // false
+ * ```
+ */
+export function isBool(val: unknown): val is boolean {
+ return typeof val === "boolean";
+}
+
+/**
+ * Return `true` if `val` is a `string`.
+ *
+ * Named `isStringValue` to distinguish from the dtype-level `isStringDtype`.
+ * Mirrors `pandas.api.types.is_string` (not to be confused with dtype checks).
+ *
+ * @example
+ * ```ts
+ * isStringValue("hello"); // true
+ * isStringValue(42); // false
+ * ```
+ */
+export function isStringValue(val: unknown): val is string {
+ return typeof val === "string";
+}
+
+/**
+ * Return `true` if `val` is a finite floating-point number (has a fractional
+ * component or is finite non-integer). `NaN`, `±Infinity` are **not** floats
+ * in the pandas sense.
+ *
+ * Mirrors `pandas.api.types.is_float`.
+ *
+ * @example
+ * ```ts
+ * isFloat(3.14); // true
+ * isFloat(3.0); // false (integer value)
+ * isFloat(NaN); // false
+ * isFloat(Infinity); // false
+ * ```
+ */
+export function isFloat(val: unknown): boolean {
+ if (typeof val !== "number") {
+ return false;
+ }
+ if (!Number.isFinite(val)) {
+ return false;
+ }
+ return val !== Math.trunc(val);
+}
+
+/**
+ * Return `true` if `val` is a finite integer-valued number.
+ *
+ * Mirrors `pandas.api.types.is_integer`.
+ *
+ * @example
+ * ```ts
+ * isInteger(3); // true
+ * isInteger(3.0); // true (integer value stored as float)
+ * isInteger(3.14); // false
+ * isInteger(NaN); // false
+ * ```
+ */
+export function isInteger(val: unknown): boolean {
+ return typeof val === "number" && Number.isInteger(val);
+}
+
+/**
+ * Return `true` if `val` is a `bigint`.
+ *
+ * @example
+ * ```ts
+ * isBigInt(42n); // true
+ * isBigInt(42); // false
+ * ```
+ */
+export function isBigInt(val: unknown): val is bigint {
+ return typeof val === "bigint";
+}
+
+/**
+ * Return `true` if `val` is a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re`.
+ *
+ * @example
+ * ```ts
+ * isRegExp(/abc/); // true
+ * isRegExp(new RegExp("x")); // true
+ * isRegExp("abc"); // false
+ * ```
+ */
+export function isRegExp(val: unknown): val is RegExp {
+ return val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` can be compiled into a `RegExp` — i.e. it is either
+ * a `string` or already a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re_compilable`.
+ *
+ * @example
+ * ```ts
+ * isReCompilable("abc"); // true
+ * isReCompilable(/abc/); // true
+ * isReCompilable(42); // false
+ * ```
+ */
+export function isReCompilable(val: unknown): boolean {
+ return typeof val === "string" || val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` is a "missing" value in the pandas sense: `null`,
+ * `undefined`, or `NaN`.
+ *
+ * @example
+ * ```ts
+ * isMissing(null); // true
+ * isMissing(undefined); // true
+ * isMissing(NaN); // true
+ * isMissing(0); // false
+ * isMissing(""); // false
+ * ```
+ */
+export function isMissing(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ if (typeof val === "number" && Number.isNaN(val)) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is "hashable" — usable as an object-key in
+ * JavaScript. In practice this means it is a primitive (`string`, `number`,
+ * `bigint`, `boolean`, `symbol`, `null`, `undefined`).
+ *
+ * Mirrors the spirit of `pandas.api.types.is_hashable`.
+ *
+ * @example
+ * ```ts
+ * isHashable("key"); // true
+ * isHashable(42); // true
+ * isHashable({}); // false
+ * isHashable([]); // false
+ * ```
+ */
+export function isHashable(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ const t = typeof val;
+ return t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol";
+}
+
+/**
+ * Return `true` if `val` is a `Date` instance.
+ *
+ * @example
+ * ```ts
+ * isDate(new Date()); // true
+ * isDate("2024-01-01"); // false
+ * ```
+ */
+export function isDate(val: unknown): val is Date {
+ return val instanceof Date;
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// DTYPE-LEVEL PREDICATES
+// ═════════════════════════════════════════════════════════════════════════════
+
+/**
+ * Return `true` if the dtype is numeric (integer, unsigned integer, or float).
+ *
+ * Mirrors `pandas.api.types.is_numeric_dtype`.
+ *
+ * @example
+ * ```ts
+ * isNumericDtype(Dtype.float64); // true
+ * isNumericDtype("int32"); // true
+ * isNumericDtype("string"); // false
+ * ```
+ */
+export function isNumericDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isNumeric;
+}
+
+/**
+ * Return `true` if the dtype is any integer kind (signed or unsigned).
+ *
+ * Mirrors `pandas.api.types.is_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntegerDtype("int64"); // true
+ * isIntegerDtype("uint8"); // true
+ * isIntegerDtype("float32"); // false
+ * ```
+ */
+export function isIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isInteger;
+}
+
+/**
+ * Return `true` if the dtype is a signed integer (`int8`–`int64`).
+ *
+ * Mirrors `pandas.api.types.is_signed_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isSignedIntegerDtype("int32"); // true
+ * isSignedIntegerDtype("uint32"); // false
+ * ```
+ */
+export function isSignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isSignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is an unsigned integer (`uint8`–`uint64`).
+ *
+ * Mirrors `pandas.api.types.is_unsigned_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isUnsignedIntegerDtype("uint64"); // true
+ * isUnsignedIntegerDtype("int64"); // false
+ * ```
+ */
+export function isUnsignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isUnsignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is a floating-point type (`float32` or `float64`).
+ *
+ * Mirrors `pandas.api.types.is_float_dtype`.
+ *
+ * @example
+ * ```ts
+ * isFloatDtype("float64"); // true
+ * isFloatDtype("float32"); // true
+ * isFloatDtype("int32"); // false
+ * ```
+ */
+export function isFloatDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isFloat;
+}
+
+/**
+ * Return `true` if the dtype is boolean.
+ *
+ * Mirrors `pandas.api.types.is_bool_dtype`.
+ *
+ * @example
+ * ```ts
+ * isBoolDtype("bool"); // true
+ * isBoolDtype("int8"); // false
+ * ```
+ */
+export function isBoolDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isBool;
+}
+
+/**
+ * Return `true` if the dtype is the `string` dtype.
+ *
+ * Mirrors `pandas.api.types.is_string_dtype`.
+ *
+ * @example
+ * ```ts
+ * isStringDtype("string"); // true
+ * isStringDtype("object"); // false
+ * ```
+ */
+export function isStringDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isString;
+}
+
+/**
+ * Return `true` if the dtype is a datetime type.
+ *
+ * Mirrors `pandas.api.types.is_datetime64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isDatetimeDtype("datetime"); // true
+ * isDatetimeDtype("string"); // false
+ * ```
+ */
+export function isDatetimeDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is a timedelta type.
+ *
+ * Mirrors `pandas.api.types.is_timedelta64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isTimedeltaDtype("timedelta"); // true
+ * isTimedeltaDtype("datetime"); // false
+ * ```
+ */
+export function isTimedeltaDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isTimedelta;
+}
+
+/**
+ * Return `true` if the dtype is the categorical dtype.
+ *
+ * Mirrors `pandas.api.types.is_categorical_dtype`.
+ *
+ * @example
+ * ```ts
+ * isCategoricalDtype("category"); // true
+ * isCategoricalDtype("string"); // false
+ * ```
+ */
+export function isCategoricalDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isCategory;
+}
+
+/**
+ * Return `true` if the dtype is the object dtype.
+ *
+ * Mirrors `pandas.api.types.is_object_dtype`.
+ *
+ * @example
+ * ```ts
+ * isObjectDtype("object"); // true
+ * isObjectDtype("string"); // false
+ * ```
+ */
+export function isObjectDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isObject;
+}
+
+/**
+ * Return `true` if the dtype represents complex numbers.
+ *
+ * JavaScript has no native complex number type, so this always returns `false`
+ * (no complex dtype exists in the `tsb` dtype system). Provided for API
+ * parity with `pandas.api.types.is_complex_dtype`.
+ *
+ * @example
+ * ```ts
+ * isComplexDtype("float64"); // false (no complex dtype)
+ * ```
+ */
+export function isComplexDtype(_dtype: Dtype | DtypeName): boolean {
+ return false;
+}
+
+/**
+ * Return `true` if the dtype is an "extension array" dtype — i.e. any dtype
+ * beyond the numeric primitives: `string`, `object`, `datetime`, `timedelta`,
+ * `category`.
+ *
+ * Mirrors `pandas.api.types.is_extension_array_dtype`.
+ *
+ * @example
+ * ```ts
+ * isExtensionArrayDtype("category"); // true
+ * isExtensionArrayDtype("datetime"); // true
+ * isExtensionArrayDtype("int64"); // false
+ * ```
+ */
+export function isExtensionArrayDtype(dtype: Dtype | DtypeName): boolean {
+ const d = resolveDtype(dtype);
+ return d.isString || d.isObject || d.isDatetime || d.isTimedelta || d.isCategory;
+}
+
+/**
+ * Return `true` if the dtype can hold period (date period) data.
+ * In the current `tsb` dtype system this maps to the `datetime` kind.
+ *
+ * Mirrors `pandas.api.types.is_period_dtype`.
+ *
+ * @example
+ * ```ts
+ * isPeriodDtype("datetime"); // true
+ * isPeriodDtype("float64"); // false
+ * ```
+ */
+export function isPeriodDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is suitable for interval data — float or integer.
+ *
+ * Mirrors `pandas.api.types.is_interval_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntervalDtype("float64"); // true
+ * isIntervalDtype("int32"); // true
+ * isIntervalDtype("string"); // false
+ * ```
+ */
+export function isIntervalDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isNumeric;
+}
diff --git a/src/core/attrs.ts b/src/core/attrs.ts
new file mode 100644
index 00000000..81c6be1c
--- /dev/null
+++ b/src/core/attrs.ts
@@ -0,0 +1,291 @@
+/**
+ * attrs — user-defined metadata dictionary for Series and DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.attrs` / `pandas.Series.attrs`: an arbitrary
+ * key→value dictionary that travels with a data object and lets callers
+ * annotate it with provenance, units, descriptions, or any other metadata.
+ *
+ * Because the tsb Series and DataFrame classes are immutable by design, this
+ * module maintains a **WeakMap registry** that maps each object to its attrs
+ * record. The registry entries are garbage-collected automatically when the
+ * object itself is collected — there is no memory leak.
+ *
+ * ### Public surface
+ *
+ * ```ts
+ * import { getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, clearAttrs,
+ * hasAttrs } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ x: [1, 2, 3] });
+ *
+ * // Annotate
+ * setAttrs(df, { source: "sensor_A", unit: "metres" });
+ * getAttrs(df); // { source: "sensor_A", unit: "metres" }
+ *
+ * // Merge additional keys
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df); // { source: "sensor_A", unit: "metres", version: 2 }
+ *
+ * // Fluent helper — sets attrs and returns the same object
+ * const annotated = withAttrs(df, { source: "sensor_B" });
+ * annotated === df; // true — same reference
+ *
+ * // Propagate to a derived object
+ * const df2 = DataFrame.fromColumns({ y: [4, 5, 6] });
+ * copyAttrs(df, df2);
+ * getAttrs(df2); // { source: "sensor_A", unit: "metres", version: 2 }
+ * ```
+ *
+ * @module
+ */
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/**
+ * The attrs dictionary type. Keys are strings; values may be any JSON-safe
+ * primitive or nested structure. Mirrors the `dict` type of `pandas.attrs`.
+ */
+export type Attrs = Record;
+
+// ─── registry ─────────────────────────────────────────────────────────────────
+
+/** Internal WeakMap from any object to its attrs record. */
+const registry = new WeakMap