From 392e44e085c2434aab1f9c97290db1889ba2f06f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 10 Apr 2026 21:22:04 +0000
Subject: [PATCH 1/2] =?UTF-8?q?Iteration=20172:=20Add=20na=5Fops=20?=
 =?UTF-8?q?=E2=80=94=20isna/notna/ffill/bfill?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements pandas missing-value utilities as standalone exported functions:
- `isna` / `notna` / `isnull` / `notnull` — detect missing values in
  scalars, Series, and DataFrames (mirrors pd.isna / pd.notna)
- `ffillSeries` / `bfillSeries` — forward/backward fill for Series with
  optional `limit` parameter
- `dataFrameFfill` / `dataFrameBfill` — column-wise or row-wise fill for
  DataFrames with optional `limit` and `axis` parameters

Metric: 28 → 29 pandas_features_ported

Run: https://github.com/githubnext/tsessebe/actions/runs/24263385922

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 playground/index.html      |   5 +
 playground/na_ops.html     | 480 +++++++++++++++++++++++++++++++++++++
 src/index.ts               |  11 +
 src/stats/index.ts         |  11 +
 src/stats/na_ops.ts        | 336 ++++++++++++++++++++++++++
 tests/stats/na_ops.test.ts | 280 ++++++++++++++++++++++
 6 files changed, 1123 insertions(+)
 create mode 100644 playground/na_ops.html
 create mode 100644 src/stats/na_ops.ts
 create mode 100644 tests/stats/na_ops.test.ts
diff --git a/playground/index.html b/playground/index.html
index 48bfbcb9..2b619f97 100644
--- a/playground/index.html
+++ b/playground/index.html
@@ -254,6 +254,11 @@ <h3><a href="elem_ops.html" style="color: var(--accent); text-decoration: none;"
           <p>Element-wise transformations. clip(), seriesAbs(), seriesRound() for Series and DataFrame with min/max bounds, decimal precision, and axis support.</p>
           <div class="status done">✅ Complete</div>
         </div>
+        <div class="feature-card">
+          <h3><a href="na_ops.html" style="color: var(--accent); text-decoration: none;">🔍 missing-value ops</a></h3>
+          <p>Detect and fill missing values. isna(), notna(), isnull(), notnull() for scalars/Series/DataFrame. ffillSeries(), bfillSeries(), dataFrameFfill(), dataFrameBfill() with optional limit and axis support.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
         <div class="feature-card">
           <h3><a href="value_counts.html" style="color: var(--accent); text-decoration: none;">🔢 value_counts</a></h3>
           <p>Count unique values. valueCounts() for Series and dataFrameValueCounts() for DataFrame with normalize, sort, ascending, and dropna options.</p>
diff --git a/playground/na_ops.html b/playground/na_ops.html
new file mode 100644
index 00000000..c321438f
--- /dev/null
+++ b/playground/na_ops.html
@@ -0,0 +1,480 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — missing-value operations (isna, ffill, bfill)</title>
+  <style>
+    :root {
+      --bg: #0d1117;
+      --surface: #161b22;
+      --border: #30363d;
+      --text: #e6edf3;
+      --accent: #58a6ff;
+      --green: #3fb950;
+      --orange: #d29922;
+      --red: #f85149;
+      --font-mono: "Cascadia Code", "Fira Code", "JetBrains Mono", monospace;
+    }
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body {
+      background: var(--bg);
+      color: var(--text);
+      font-family: system-ui, -apple-system, sans-serif;
+      line-height: 1.6;
+      padding: 2rem;
+      max-width: 900px;
+      margin: 0 auto;
+    }
+    a { color: var(--accent); }
+    h1 { color: var(--accent); margin-bottom: 0.5rem; }
+    h2 { margin-top: 0; margin-bottom: 0.5rem; font-size: 1.25rem; }
+    p { color: #8b949e; margin-bottom: 1rem; }
+    code {
+      font-family: var(--font-mono);
+      font-size: 0.875em;
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 0.3rem;
+      padding: 0.1rem 0.4rem;
+    }
+    .back { margin-bottom: 2rem; display: inline-block; }
+    #playground-loading {
+      position: fixed;
+      inset: 0;
+      background: rgba(13, 17, 23, 0.92);
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      z-index: 1000;
+      gap: 1rem;
+    }
+    .spinner {
+      width: 40px; height: 40px;
+      border: 3px solid var(--border);
+      border-top-color: var(--accent);
+      border-radius: 50%;
+      animation: spin 0.8s linear infinite;
+    }
+    @keyframes spin { to { transform: rotate(360deg); } }
+    #playground-status { color: #8b949e; font-size: 0.95rem; }
+    .section {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 0.75rem;
+      padding: 1.5rem;
+      margin-bottom: 1.5rem;
+    }
+    .section p { margin-bottom: 0.75rem; }
+    .playground-block { margin-top: 0.75rem; }
+    .playground-header {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      background: #1c2128;
+      border: 1px solid var(--border);
+      border-bottom: none;
+      border-radius: 0.5rem 0.5rem 0 0;
+      padding: 0.4rem 0.75rem;
+    }
+    .playground-label { font-size: 0.75rem; color: #8b949e; font-family: var(--font-mono); }
+    .playground-actions { display: flex; gap: 0.5rem; }
+    .playground-run, .playground-reset {
+      font-size: 0.8rem;
+      padding: 0.25rem 0.6rem;
+      border-radius: 0.4rem;
+      border: 1px solid var(--border);
+      cursor: pointer;
+      background: #21262d;
+      color: var(--text);
+    }
+    .playground-run:not(:disabled):hover { background: #388bfd22; border-color: var(--accent); }
+    .playground-editor {
+      width: 100%;
+      min-height: 120px;
+      font-family: var(--font-mono);
+      font-size: 0.85rem;
+      background: #0d1117;
+      color: #e6edf3;
+      border: 1px solid var(--border);
+      border-radius: 0 0 0.5rem 0.5rem;
+      padding: 0.75rem;
+      resize: vertical;
+      outline: none;
+      tab-size: 2;
+    }
+    .playground-output {
+      font-family: var(--font-mono);
+      font-size: 0.82rem;
+      background: #010409;
+      border: 1px solid var(--border);
+      border-radius: 0.5rem;
+      padding: 0.75rem;
+      margin-top: 0.5rem;
+      min-height: 2.5rem;
+      white-space: pre-wrap;
+      color: var(--green);
+    }
+    .playground-hint { font-size: 0.72rem; color: #484f58; margin-top: 0.3rem; }
+    .api-reference-code {
+      font-family: var(--font-mono);
+      font-size: 0.82rem;
+      background: #010409;
+      border: 1px solid var(--border);
+      border-radius: 0.5rem;
+      padding: 1rem;
+      overflow-x: auto;
+      white-space: pre;
+      color: #e6edf3;
+    }
+    footer { margin-top: 2rem; padding-top: 1rem; border-top: 1px solid var(--border); }
+    footer p { font-size: 0.85rem; }
+    .tab-bar {
+      display: flex;
+      gap: 0;
+      border-bottom: 1px solid var(--border);
+      margin-bottom: 0;
+    }
+    .tab-btn {
+      padding: 0.3rem 0.9rem;
+      font-size: 0.8rem;
+      background: none;
+      border: none;
+      border-bottom: 2px solid transparent;
+      cursor: pointer;
+      color: #8b949e;
+    }
+    .tab-btn.active {
+      color: var(--accent);
+      border-bottom-color: var(--accent);
+    }
+  </style>
+</head>
+<body>
+  <div id="playground-loading">
+    <div class="spinner"></div>
+    <div id="playground-status">Loading tsb runtime…</div>
+  </div>
+
+  <a class="back" href="index.html">← Back to playground index</a>
+
+  <h1>Missing-value operations</h1>
+  <p>
+    <code>isna</code> / <code>notna</code> — detect missing values in scalars,
+    Series, and DataFrames.<br/>
+    <code>ffill</code> / <code>bfill</code> — propagate the last (or next) valid
+    value to fill gaps.<br/>
+    Mirrors <code>pd.isna()</code>, <code>Series.ffill()</code>, and
+    <code>DataFrame.bfill()</code> from pandas.
+  </p>
+
+  <!-- 1 · isna / notna on scalars -->
+  <div class="section">
+    <h2>1 · <code>isna</code> / <code>notna</code> on scalars</h2>
+    <p>
+      Returns <code>true</code> / <code>false</code> for individual values.
+      <code>null</code>, <code>undefined</code>, and <code>NaN</code> are all
+      considered "missing".
+    </p>
+    <div class="playground-block">
+      <div class="playground-header">
+        <div class="tab-bar">
+          <button class="tab-btn active" data-tab="ts">TypeScript</button>
+          <button class="tab-btn" data-tab="py">Python</button>
+        </div>
+        <div class="playground-actions">
+          <button class="playground-run" disabled>▶ Run</button>
+          <button class="playground-reset">↺ Reset</button>
+        </div>
+      </div>
+      <textarea class="playground-editor" spellcheck="false">import { isna, notna } from "tsb";
+
+console.log("isna(null):   ", isna(null));      // true
+console.log("isna(NaN):    ", isna(NaN));       // true
+console.log("isna(0):      ", isna(0));         // false
+console.log("isna(''):     ", isna(''));         // false
+console.log("notna(42):    ", notna(42));       // true
+console.log("notna(null):  ", notna(null));     // false</textarea>
+      <textarea class="playground-python" style="display:none">import pandas as pd
+import numpy as np
+
+print("isna(None):  ", pd.isna(None))     # True
+print("isna(nan):   ", pd.isna(float('nan')))  # True
+print("isna(0):     ", pd.isna(0))        # False
+print("isna(''):    ", pd.isna(''))       # False
+print("notna(42):   ", pd.notna(42))      # True
+print("notna(None): ", pd.notna(None))    # False</textarea>
+      <div class="playground-output">Click ▶ Run to execute</div>
+      <div class="playground-hint">Ctrl+Enter to run · Tab to indent</div>
+    </div>
+  </div>
+
+  <!-- 2 · isna on a Series -->
+  <div class="section">
+    <h2>2 · <code>isna</code> on a Series</h2>
+    <p>
+      When passed a Series, <code>isna</code> returns a boolean Series of the
+      same length — <code>true</code> where values are missing.
+    </p>
+    <div class="playground-block">
+      <div class="playground-header">
+        <div class="tab-bar">
+          <button class="tab-btn active" data-tab="ts">TypeScript</button>
+          <button class="tab-btn" data-tab="py">Python</button>
+        </div>
+        <div class="playground-actions">
+          <button class="playground-run" disabled>▶ Run</button>
+          <button class="playground-reset">↺ Reset</button>
+        </div>
+      </div>
+      <textarea class="playground-editor" spellcheck="false">import { Series, isna, notna } from "tsb";
+
+const s = new Series({ data: [1, null, NaN, 4, null], name: "values" });
+
+console.log("isna:  ", [...isna(s).values]);
+console.log("notna: ", [...notna(s).values]);
+// count missing:
+const missingCount = isna(s).values.filter(Boolean).length;
+console.log("missing count:", missingCount);</textarea>
+      <textarea class="playground-python" style="display:none">import pandas as pd
+import numpy as np
+
+s = pd.Series([1, None, np.nan, 4, None], name="values")
+
+print("isna:  ", s.isna().tolist())
+print("notna: ", s.notna().tolist())
+print("missing count:", s.isna().sum())</textarea>
+      <div class="playground-output">Click ▶ Run to execute</div>
+      <div class="playground-hint">Ctrl+Enter to run · Tab to indent</div>
+    </div>
+  </div>
+
+  <!-- 3 · isna on a DataFrame -->
+  <div class="section">
+    <h2>3 · <code>isna</code> on a DataFrame</h2>
+    <p>
+      Returns a DataFrame of booleans with the same shape — one column per
+      original column, <code>true</code> where missing.
+    </p>
+    <div class="playground-block">
+      <div class="playground-header">
+        <div class="tab-bar">
+          <button class="tab-btn active" data-tab="ts">TypeScript</button>
+          <button class="tab-btn" data-tab="py">Python</button>
+        </div>
+        <div class="playground-actions">
+          <button class="playground-run" disabled>▶ Run</button>
+          <button class="playground-reset">↺ Reset</button>
+        </div>
+      </div>
+      <textarea class="playground-editor" spellcheck="false">import { DataFrame, isna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:  ["Alice", null,    "Carol"],
+  score: [95,      NaN,     82],
+  grade: ["A",     "B",     null],
+});
+
+const mask = isna(df);
+for (const col of mask.columns.values) {
+  console.log(`${col}:`, [...mask.col(col).values]);
+}</textarea>
+      <textarea class="playground-python" style="display:none">import pandas as pd
+import numpy as np
+
+df = pd.DataFrame({
+    "name":  ["Alice", None,    "Carol"],
+    "score": [95,      np.nan, 82],
+    "grade": ["A",     "B",    None],
+})
+
+mask = df.isna()
+for col in mask.columns:
+    print(f"{col}:", mask[col].tolist())</textarea>
+      <div class="playground-output">Click ▶ Run to execute</div>
+      <div class="playground-hint">Ctrl+Enter to run · Tab to indent</div>
+    </div>
+  </div>
+
+  <!-- 4 · ffill -->
+  <div class="section">
+    <h2>4 · Forward-fill (<code>ffillSeries</code>)</h2>
+    <p>
+      Propagates the last valid value forward to fill gaps. Leading
+      <code>null</code>s that have no preceding value remain <code>null</code>.
+      Use the optional <code>limit</code> to cap consecutive fills.
+    </p>
+    <div class="playground-block">
+      <div class="playground-header">
+        <div class="tab-bar">
+          <button class="tab-btn active" data-tab="ts">TypeScript</button>
+          <button class="tab-btn" data-tab="py">Python</button>
+        </div>
+        <div class="playground-actions">
+          <button class="playground-run" disabled>▶ Run</button>
+          <button class="playground-reset">↺ Reset</button>
+        </div>
+      </div>
+      <textarea class="playground-editor" spellcheck="false">import { Series, ffillSeries } from "tsb";
+
+const s = new Series({ data: [null, 1, null, null, 4, null] });
+
+const filled = ffillSeries(s);
+console.log("original:", [...s.values]);
+console.log("ffilled: ", [...filled.values]);
+
+// limit: fill at most 1 consecutive gap
+const limited = ffillSeries(s, { limit: 1 });
+console.log("limit=1: ", [...limited.values]);</textarea>
+      <textarea class="playground-python" style="display:none">import pandas as pd
+
+s = pd.Series([None, 1, None, None, 4, None])
+
+print("original:", s.tolist())
+print("ffilled: ", s.ffill().tolist())
+print("limit=1: ", s.ffill(limit=1).tolist())</textarea>
+      <div class="playground-output">Click ▶ Run to execute</div>
+      <div class="playground-hint">Ctrl+Enter to run · Tab to indent</div>
+    </div>
+  </div>
+
+  <!-- 5 · bfill -->
+  <div class="section">
+    <h2>5 · Backward-fill (<code>bfillSeries</code>)</h2>
+    <p>
+      Propagates the next valid value backward to fill gaps. Trailing
+      <code>null</code>s that have no following value remain <code>null</code>.
+    </p>
+    <div class="playground-block">
+      <div class="playground-header">
+        <div class="tab-bar">
+          <button class="tab-btn active" data-tab="ts">TypeScript</button>
+          <button class="tab-btn" data-tab="py">Python</button>
+        </div>
+        <div class="playground-actions">
+          <button class="playground-run" disabled>▶ Run</button>
+          <button class="playground-reset">↺ Reset</button>
+        </div>
+      </div>
+      <textarea class="playground-editor" spellcheck="false">import { Series, bfillSeries } from "tsb";
+
+const s = new Series({ data: [null, null, 3, null, null] });
+
+const filled = bfillSeries(s);
+console.log("original:", [...s.values]);
+console.log("bfilled: ", [...filled.values]);
+
+// combine ffill + bfill to fill all gaps when any value exists
+import { ffillSeries } from "tsb";
+const all = bfillSeries(ffillSeries(s));
+console.log("ff+bf:   ", [...all.values]);</textarea>
+      <textarea class="playground-python" style="display:none">import pandas as pd
+
+s = pd.Series([None, None, 3, None, None])
+
+print("original:", s.tolist())
+print("bfilled: ", s.bfill().tolist())
+
+# combine ffill + bfill
+all_filled = s.ffill().bfill()
+print("ff+bf:   ", all_filled.tolist())</textarea>
+      <div class="playground-output">Click ▶ Run to execute</div>
+      <div class="playground-hint">Ctrl+Enter to run · Tab to indent</div>
+    </div>
+  </div>
+
+  <!-- 6 · DataFrame ffill / bfill -->
+  <div class="section">
+    <h2>6 · DataFrame forward-fill &amp; backward-fill</h2>
+    <p>
+      <code>dataFrameFfill</code> and <code>dataFrameBfill</code> apply fill
+      column-wise by default (axis=0). Pass <code>axis: 1</code> to fill
+      row-wise across columns.
+    </p>
+    <div class="playground-block">
+      <div class="playground-header">
+        <div class="tab-bar">
+          <button class="tab-btn active" data-tab="ts">TypeScript</button>
+          <button class="tab-btn" data-tab="py">Python</button>
+        </div>
+        <div class="playground-actions">
+          <button class="playground-run" disabled>▶ Run</button>
+          <button class="playground-reset">↺ Reset</button>
+        </div>
+      </div>
+      <textarea class="playground-editor" spellcheck="false">import { DataFrame, dataFrameFfill, dataFrameBfill } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1,    null, 3,    null],
+  b: [null, 2,    null, 4  ],
+});
+
+const ff = dataFrameFfill(df);
+console.log("ffill a:", [...ff.col("a").values]);  // [1,1,3,3]
+console.log("ffill b:", [...ff.col("b").values]);  // [null,2,2,4]
+
+const bf = dataFrameBfill(df);
+console.log("bfill a:", [...bf.col("a").values]);  // [1,3,3,null]
+console.log("bfill b:", [...bf.col("b").values]);  // [2,2,4,4]</textarea>
+      <textarea class="playground-python" style="display:none">import pandas as pd
+import numpy as np
+
+df = pd.DataFrame({
+    "a": [1,    np.nan, 3,    np.nan],
+    "b": [np.nan, 2,    np.nan, 4  ],
+})
+
+ff = df.ffill()
+print("ffill a:", ff["a"].tolist())
+print("ffill b:", ff["b"].tolist())
+
+bf = df.bfill()
+print("bfill a:", bf["a"].tolist())
+print("bfill b:", bf["b"].tolist())</textarea>
+      <div class="playground-output">Click ▶ Run to execute</div>
+      <div class="playground-hint">Ctrl+Enter to run · Tab to indent</div>
+    </div>
+  </div>
+
+  <!-- API Reference -->
+  <div class="section">
+    <h2>API Reference</h2>
+    <pre class="api-reference-code"><code>// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series&lt;boolean&gt;
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series&lt;boolean&gt;
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...)  // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+  limit?: number | null,   // max consecutive fills (default: no limit)
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",
+}): DataFrame</code></pre>
+  </div>
+
+  <footer>
+    <p>
+      <a href="index.html">tsb playground</a> ·
+      Built by <a href="https://github.com/githubnext/autoloop">Autoloop</a>
+    </p>
+  </footer>
+  <script type="module" src="playground-runtime.js"></script>
+</body>
+</html>
diff --git a/src/index.ts b/src/index.ts
index 1dd0aa57..ec702a7e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -107,3 +107,14 @@ export {
 export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts";
 export { valueCounts, dataFrameValueCounts } from "./stats/index.ts";
 export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts";
+export {
+  isna,
+  notna,
+  isnull,
+  notnull,
+  ffillSeries,
+  bfillSeries,
+  dataFrameFfill,
+  dataFrameBfill,
+} from "./stats/index.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts";
diff --git a/src/stats/index.ts b/src/stats/index.ts
index b1de48eb..84202fde 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -39,3 +39,14 @@ export {
   nsmallestDataFrame,
 } from "./nlargest.ts";
 export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts";
+export {
+  isna,
+  notna,
+  isnull,
+  notnull,
+  ffillSeries,
+  bfillSeries,
+  dataFrameFfill,
+  dataFrameBfill,
+} from "./na_ops.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts";
diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts
new file mode 100644
index 00000000..c776bb1f
--- /dev/null
+++ b/src/stats/na_ops.ts
@@ -0,0 +1,336 @@
+/**
+ * na_ops — missing-value utilities for Series and DataFrame.
+ *
+ * Mirrors the following pandas module-level functions and methods:
+ * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values
+ * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values
+ * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values
+ * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link ffillSeries} and {@link bfillSeries}. */
+export interface FillDirectionOptions {
+  /**
+   * Maximum number of consecutive NaN/null values to fill.
+   * `null` means no limit (default).
+   */
+  readonly limit?: number | null;
+}
+
+/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */
+export interface DataFrameFillOptions extends FillDirectionOptions {
+  /**
+   * - `0` or `"index"` (default): fill missing values down each **column**.
+   * - `1` or `"columns"`: fill missing values across each **row**.
+   */
+  readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+  return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Forward-fill an array of scalars in-place (returns a new array). */
+function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+  const out: Scalar[] = Array.from(vals);
+  let lastValid: Scalar = null;
+  let streak = 0;
+  for (let i = 0; i < out.length; i++) {
+    if (isMissing(out[i])) {
+      if (!isMissing(lastValid) && (limit === null || streak < limit)) {
+        out[i] = lastValid;
+        streak++;
+      }
+    } else {
+      lastValid = out[i] as Scalar;
+      streak = 0;
+    }
+  }
+  return out;
+}
+
+/** Backward-fill an array of scalars (returns a new array). */
+function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+  const out: Scalar[] = Array.from(vals);
+  let nextValid: Scalar = null;
+  let streak = 0;
+  for (let i = out.length - 1; i >= 0; i--) {
+    if (isMissing(out[i])) {
+      if (!isMissing(nextValid) && (limit === null || streak < limit)) {
+        out[i] = nextValid;
+        streak++;
+      }
+    } else {
+      nextValid = out[i] as Scalar;
+      streak = 0;
+    }
+  }
+  return out;
+}
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+/**
+ * Detect missing values in a scalar, Series, or DataFrame.
+ *
+ * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`.
+ * - For a **Series**: returns a `Series<boolean>` of the same index.
+ * - For a **DataFrame**: returns a `DataFrame` of boolean columns.
+ *
+ * Mirrors `pandas.isna()` / `pandas.isnull()`.
+ *
+ * @example
+ * ```ts
+ * import { isna } from "tsb";
+ * isna(null);          // true
+ * isna(42);            // false
+ * isna(NaN);           // true
+ *
+ * const s = new Series({ data: [1, null, NaN, 4] });
+ * isna(s);             // Series([false, true, true, false])
+ * ```
+ */
+export function isna(value: Scalar): boolean;
+export function isna(value: Series<Scalar>): Series<boolean>;
+export function isna(value: DataFrame): DataFrame;
+export function isna(
+  value: Scalar | Series<Scalar> | DataFrame,
+): boolean | Series<boolean> | DataFrame {
+  if (value instanceof DataFrame) {
+    return value.isna();
+  }
+  if (value instanceof Series) {
+    return value.isna();
+  }
+  return isMissing(value as Scalar);
+}
+
+/**
+ * Detect non-missing values in a scalar, Series, or DataFrame.
+ *
+ * Mirrors `pandas.notna()` / `pandas.notnull()`.
+ *
+ * @example
+ * ```ts
+ * import { notna } from "tsb";
+ * notna(null);         // false
+ * notna(42);           // true
+ * ```
+ */
+export function notna(value: Scalar): boolean;
+export function notna(value: Series<Scalar>): Series<boolean>;
+export function notna(value: DataFrame): DataFrame;
+export function notna(
+  value: Scalar | Series<Scalar> | DataFrame,
+): boolean | Series<boolean> | DataFrame {
+  if (value instanceof DataFrame) {
+    return value.notna();
+  }
+  if (value instanceof Series) {
+    return value.notna();
+  }
+  return !isMissing(value as Scalar);
+}
+
+/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */
+export const isnull = isna;
+
+/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */
+export const notnull = notna;
+
+// ─── ffill ────────────────────────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the last non-missing value
+ * that precedes it (if any). Values before the first non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.ffill()`.
+ *
+ * @param series - Input Series (unchanged).
+ * @param options - Optional `{ limit }` — max consecutive fills.
+ * @returns New Series with forward-filled values.
+ *
+ * @example
+ * ```ts
+ * import { ffillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * ffillSeries(s);  // Series([1, 1, 1, 4])
+ * ```
+ */
+export function ffillSeries<T extends Scalar>(
+  series: Series<T>,
+  options?: FillDirectionOptions,
+): Series<T> {
+  const limit = options?.limit ?? null;
+  const filled = ffillArray(series.values as readonly Scalar[], limit) as T[];
+  return new Series<T>({
+    data: filled,
+    index: series.index,
+    dtype: series.dtype,
+    name: series.name ?? undefined,
+  });
+}
+
+/**
+ * Backward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the next non-missing value
+ * that follows it (if any). Values after the last non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.bfill()`.
+ *
+ * @example
+ * ```ts
+ * import { bfillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * bfillSeries(s);  // Series([1, 4, 4, 4])
+ * ```
+ */
+export function bfillSeries<T extends Scalar>(
+  series: Series<T>,
+  options?: FillDirectionOptions,
+): Series<T> {
+  const limit = options?.limit ?? null;
+  const filled = bfillArray(series.values as readonly Scalar[], limit) as T[];
+  return new Series<T>({
+    data: filled,
+    index: series.index,
+    dtype: series.dtype,
+    name: series.name ?? undefined,
+  });
+}
+
+// ─── DataFrame ffill / bfill ──────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0): each column is independently
+ * forward-filled. With `axis=1` each row is forward-filled across columns.
+ *
+ * Mirrors `pandas.DataFrame.ffill()`.
+ *
+ * @example
+ * ```ts
+ * import { dataFrameFfill } from "tsb";
+ * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } });
+ * dataFrameFfill(df);
+ * // a: [1, 1, 3]
+ * // b: [null, 2, 2]
+ * ```
+ */
+export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+  const limit = options?.limit ?? null;
+  const axis = options?.axis ?? 0;
+  const byRow = axis === 1 || axis === "columns";
+
+  if (!byRow) {
+    // column-wise: fill each column independently
+    const colMap = new Map<string, Series<Scalar>>();
+    for (const name of df.columns.values) {
+      const col = df.col(name);
+      const filled = ffillArray(col.values, limit) as Scalar[];
+      colMap.set(name, new Series<Scalar>({ data: filled, index: col.index, dtype: col.dtype }));
+    }
+    return new DataFrame(colMap, df.index);
+  }
+
+  // row-wise: fill across columns for each row
+  const nRows = df.shape[0];
+  const cols = df.columns.values;
+  const columns = cols.map((name) => df.col(name));
+  const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+  for (let r = 0; r < nRows; r++) {
+    const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+    const filled = ffillArray(rowVals, limit);
+    for (let ci = 0; ci < cols.length; ci++) {
+      const rowsFilledCI = rowsFilled[ci];
+      if (rowsFilledCI !== undefined) {
+        rowsFilledCI[r] = filled[ci] ?? null;
+      }
+    }
+  }
+  const colMap = new Map<string, Series<Scalar>>();
+  for (let ci = 0; ci < cols.length; ci++) {
+    const name = cols[ci] as string;
+    const col = columns[ci] as Series<Scalar>;
+    colMap.set(
+      name,
+      new Series<Scalar>({
+        data: rowsFilled[ci] ?? [],
+        index: col.index,
+        dtype: col.dtype,
+      }),
+    );
+  }
+  return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Backward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0). With `axis=1` fills across rows.
+ *
+ * Mirrors `pandas.DataFrame.bfill()`.
+ */
+export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+  const limit = options?.limit ?? null;
+  const axis = options?.axis ?? 0;
+  const byRow = axis === 1 || axis === "columns";
+
+  if (!byRow) {
+    const colMap = new Map<string, Series<Scalar>>();
+    for (const name of df.columns.values) {
+      const col = df.col(name);
+      const filled = bfillArray(col.values, limit) as Scalar[];
+      colMap.set(name, new Series<Scalar>({ data: filled, index: col.index, dtype: col.dtype }));
+    }
+    return new DataFrame(colMap, df.index);
+  }
+
+  const nRows = df.shape[0];
+  const cols = df.columns.values;
+  const columns = cols.map((name) => df.col(name));
+  const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+  for (let r = 0; r < nRows; r++) {
+    const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+    const filled = bfillArray(rowVals, limit);
+    for (let ci = 0; ci < cols.length; ci++) {
+      const rowsFilledCI = rowsFilled[ci];
+      if (rowsFilledCI !== undefined) {
+        rowsFilledCI[r] = filled[ci] ?? null;
+      }
+    }
+  }
+  const colMap = new Map<string, Series<Scalar>>();
+  for (let ci = 0; ci < cols.length; ci++) {
+    const name = cols[ci] as string;
+    const col = columns[ci] as Series<Scalar>;
+    colMap.set(
+      name,
+      new Series<Scalar>({
+        data: rowsFilled[ci] ?? [],
+        index: col.index,
+        dtype: col.dtype,
+      }),
+    );
+  }
+  return new DataFrame(colMap, df.index);
+}
diff --git a/tests/stats/na_ops.test.ts b/tests/stats/na_ops.test.ts
new file mode 100644
index 00000000..340406ac
--- /dev/null
+++ b/tests/stats/na_ops.test.ts
@@ -0,0 +1,280 @@
+/**
+ * Tests for na_ops — missing-value utilities (isna, notna, ffill, bfill).
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+  DataFrame,
+  Series,
+  bfillSeries,
+  dataFrameBfill,
+  dataFrameFfill,
+  ffillSeries,
+  isna,
+  isnull,
+  notna,
+  notnull,
+} from "../../src/index.ts";
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+describe("isna (scalar)", () => {
+  it("returns true for null", () => expect(isna(null)).toBe(true));
+  it("returns true for undefined", () => expect(isna(undefined)).toBe(true));
+  it("returns true for NaN", () => expect(isna(Number.NaN)).toBe(true));
+  it("returns false for 0", () => expect(isna(0)).toBe(false));
+  it("returns false for empty string", () => expect(isna("")).toBe(false));
+  it("returns false for false", () => expect(isna(false)).toBe(false));
+  it("returns false for a number", () => expect(isna(42)).toBe(false));
+});
+
+describe("notna (scalar)", () => {
+  it("returns false for null", () => expect(notna(null)).toBe(false));
+  it("returns false for NaN", () => expect(notna(Number.NaN)).toBe(false));
+  it("returns true for 42", () => expect(notna(42)).toBe(true));
+  it("returns true for a string", () => expect(notna("hello")).toBe(true));
+});
+
+describe("isnull / notnull aliases", () => {
+  it("isnull equals isna for scalar", () => {
+    expect(isnull(null)).toBe(isna(null));
+    expect(isnull(42)).toBe(isna(42));
+  });
+  it("notnull equals notna for scalar", () => {
+    expect(notnull(null)).toBe(notna(null));
+    expect(notnull(42)).toBe(notna(42));
+  });
+});
+
+describe("isna (Series)", () => {
+  it("returns boolean Series of correct length", () => {
+    const s = new Series({ data: [1, null, Number.NaN, 4] });
+    const result = isna(s);
+    expect(result).toBeInstanceOf(Series);
+    expect([...result.values]).toEqual([false, true, true, false]);
+  });
+
+  it("all present", () => {
+    const s = new Series({ data: [1, 2, 3] });
+    expect([...isna(s).values]).toEqual([false, false, false]);
+  });
+
+  it("all missing", () => {
+    const s = new Series({ data: [null, null, Number.NaN] });
+    expect([...isna(s).values]).toEqual([true, true, true]);
+  });
+});
+
+describe("notna (Series)", () => {
+  it("is the inverse of isna", () => {
+    const s = new Series({ data: [1, null, Number.NaN, 4] });
+    const na = isna(s).values;
+    const nna = notna(s).values;
+    for (let i = 0; i < na.length; i++) {
+      expect(nna[i]).toBe(!na[i]);
+    }
+  });
+});
+
+describe("isna (DataFrame)", () => {
+  it("returns DataFrame of booleans", () => {
+    const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] });
+    const result = isna(df);
+    expect(result).toBeInstanceOf(DataFrame);
+    expect([...result.col("a").values]).toEqual([false, true]);
+    expect([...result.col("b").values]).toEqual([true, false]);
+  });
+});
+
+describe("notna (DataFrame)", () => {
+  it("returns inverse of isna DataFrame", () => {
+    const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] });
+    expect([...notna(df).col("a").values]).toEqual([true, false]);
+    expect([...notna(df).col("b").values]).toEqual([false, true]);
+  });
+});
+
+// ─── ffillSeries ──────────────────────────────────────────────────────────────
+
+describe("ffillSeries", () => {
+  it("fills nulls with preceding value", () => {
+    const s = new Series({ data: [1, null, null, 4] });
+    expect([...ffillSeries(s).values]).toEqual([1, 1, 1, 4]);
+  });
+
+  it("leaves leading nulls untouched", () => {
+    const s = new Series({ data: [null, null, 3, null] });
+    expect([...ffillSeries(s).values]).toEqual([null, null, 3, 3]);
+  });
+
+  it("NaN is treated as missing", () => {
+    const s = new Series({ data: [2, Number.NaN, 5] });
+    const result = ffillSeries(s).values;
+    expect(result[0]).toBe(2);
+    expect(result[1]).toBe(2);
+    expect(result[2]).toBe(5);
+  });
+
+  it("respects limit option", () => {
+    const s = new Series({ data: [1, null, null, null, 5] });
+    expect([...ffillSeries(s, { limit: 1 }).values]).toEqual([1, 1, null, null, 5]);
+  });
+
+  it("preserves original Series", () => {
+    const s = new Series({ data: [1, null, 3] });
+    ffillSeries(s);
+    expect([...s.values]).toEqual([1, null, 3]);
+  });
+
+  it("empty Series returns empty", () => {
+    const s = new Series({ data: [] });
+    expect([...ffillSeries(s).values]).toEqual([]);
+  });
+
+  it("preserves name and index", () => {
+    const s = new Series({ data: [1, null], name: "x" });
+    const filled = ffillSeries(s);
+    expect(filled.name).toBe("x");
+    expect(filled.index.size).toBe(2);
+  });
+});
+
+// ─── bfillSeries ──────────────────────────────────────────────────────────────
+
+describe("bfillSeries", () => {
+  it("fills nulls with following value", () => {
+    const s = new Series({ data: [1, null, null, 4] });
+    expect([...bfillSeries(s).values]).toEqual([1, 4, 4, 4]);
+  });
+
+  it("leaves trailing nulls untouched", () => {
+    const s = new Series({ data: [null, 3, null, null] });
+    expect([...bfillSeries(s).values]).toEqual([3, 3, null, null]);
+  });
+
+  it("respects limit option", () => {
+    const s = new Series({ data: [1, null, null, null, 5] });
+    expect([...bfillSeries(s, { limit: 2 }).values]).toEqual([1, null, 5, 5, 5]);
+  });
+
+  it("empty Series returns empty", () => {
+    const s = new Series({ data: [] });
+    expect([...bfillSeries(s).values]).toEqual([]);
+  });
+});
+
+// ─── dataFrameFfill ───────────────────────────────────────────────────────────
+
+describe("dataFrameFfill (column-wise)", () => {
+  it("fills each column independently", () => {
+    const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 2, null] });
+    const result = dataFrameFfill(df);
+    expect([...result.col("a").values]).toEqual([1, 1, 3]);
+    expect([...result.col("b").values]).toEqual([null, 2, 2]);
+  });
+
+  it("preserves index", () => {
+    const df = DataFrame.fromColumns({ x: [1, null] });
+    expect(dataFrameFfill(df).index.size).toBe(2);
+  });
+});
+
+describe("dataFrameFfill (row-wise)", () => {
+  it("fills across columns per row", () => {
+    const df = DataFrame.fromColumns({ a: [1, null], b: [null, null], c: [3, 4] });
+    const result = dataFrameFfill(df, { axis: 1 });
+    expect([...result.col("a").values]).toEqual([1, null]);
+    expect([...result.col("b").values]).toEqual([1, null]);
+    expect([...result.col("c").values]).toEqual([3, 4]);
+  });
+});
+
+// ─── dataFrameBfill ───────────────────────────────────────────────────────────
+
+describe("dataFrameBfill (column-wise)", () => {
+  it("fills each column backward", () => {
+    const df = DataFrame.fromColumns({ a: [null, null, 3], b: [1, null, null] });
+    const result = dataFrameBfill(df);
+    expect([...result.col("a").values]).toEqual([3, 3, 3]);
+    expect([...result.col("b").values]).toEqual([1, null, null]);
+  });
+});
+
+describe("dataFrameBfill (row-wise)", () => {
+  it("fills backward across columns per row", () => {
+    const df = DataFrame.fromColumns({ a: [null, 1], b: [null, null], c: [3, null] });
+    const result = dataFrameBfill(df, { axis: 1 });
+    expect([...result.col("a").values]).toEqual([3, 1]);
+    expect([...result.col("b").values]).toEqual([3, null]);
+    expect([...result.col("c").values]).toEqual([3, null]);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property: ffill followed by bfill fills all if any non-null", () => {
+  it("all values filled when at least one is present", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.option(fc.integer({ min: 0, max: 100 }), { nil: null }), {
+          minLength: 1,
+          maxLength: 20,
+        }),
+        (raw) => {
+          const hasNonNull = raw.some((v) => v !== null);
+          if (!hasNonNull) {
+            return true;
+          }
+          const s = new Series({ data: raw });
+          const result = bfillSeries(ffillSeries(s));
+          return result.values.every((v) => v !== null);
+        },
+      ),
+    );
+  });
+});
+
+describe("property: ffill never introduces new non-null values beyond last valid", () => {
+  it("ffilled series has no nulls after first valid value", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.option(fc.integer({ min: -50, max: 50 }), { nil: null }), {
+          minLength: 0,
+          maxLength: 30,
+        }),
+        (raw) => {
+          const s = new Series({ data: raw });
+          const filled = ffillSeries(s).values;
+          let sawValid = false;
+          for (const v of filled) {
+            if (v !== null) {
+              sawValid = true;
+            }
+            if (sawValid && v === null) {
+              return false;
+            }
+          }
+          return true;
+        },
+      ),
+    );
+  });
+});
+
+describe("property: isna is inverse of notna for scalars", () => {
+  it("isna(v) === !notna(v)", () => {
+    fc.assert(
+      fc.property(
+        fc.oneof(
+          fc.integer(),
+          fc.float({ noNaN: false }),
+          fc.constant(null),
+          fc.string(),
+          fc.boolean(),
+        ),
+        (v) => isna(v as Parameters<typeof isna>[0]) === !notna(v as Parameters<typeof notna>[0]),
+      ),
+    );
+  });
+});

From d6df47348e0ea49480e28bf091182af90b08720b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:01:00 +0000
Subject: [PATCH 2/2] Merge main into PR branch, resolving conflicts in
 src/index.ts and src/stats/index.ts

- Keep na_ops additions (ffillSeries/bfillSeries/dataFrameFfill/dataFrameBfill)
- Keep main's additions (cut/qcut, where_mask, notna_isna, string_ops, etc.)
- Resolve duplicate isna/notna by using notna_isna.ts (main) for those exports

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .autoloop/programs/perf-comparison/program.md |  74 +++
 .github/workflows/autoloop.lock.yml           |  36 +-
 .github/workflows/autoloop.md                 |  69 +-
 .github/workflows/evergreen.lock.yml          |  36 +-
 .github/workflows/evergreen.md                |  22 +-
 .github/workflows/pages.yml                   |   7 +
 .github/workflows/sync-branches.lock.yml      |  20 +-
 .github/workflows/sync-branches.md            |  18 +-
 benchmarks/pandas/bench_concat.py             |  28 +
 benchmarks/pandas/bench_dataframe_apply.py    |  27 +
 benchmarks/pandas/bench_dataframe_creation.py |  27 +
 benchmarks/pandas/bench_dataframe_dropna.py   |  27 +
 benchmarks/pandas/bench_dataframe_filter.py   |  26 +
 benchmarks/pandas/bench_dataframe_rename.py   |  27 +
 benchmarks/pandas/bench_dataframe_sort.py     |  28 +
 benchmarks/pandas/bench_describe.py           |  27 +
 benchmarks/pandas/bench_ewm_mean.py           |  26 +
 benchmarks/pandas/bench_groupby_mean.py       |  27 +
 benchmarks/pandas/bench_merge.py              |  29 +
 benchmarks/pandas/bench_pivot_table.py        |  28 +
 benchmarks/pandas/bench_read_csv.py           |  30 +
 benchmarks/pandas/bench_rolling_mean.py       |  26 +
 benchmarks/pandas/bench_series_arithmetic.py  |  26 +
 benchmarks/pandas/bench_series_creation.py    |  47 ++
 benchmarks/pandas/bench_series_cumsum.py      |  26 +
 benchmarks/pandas/bench_series_fillna.py      |  26 +
 benchmarks/pandas/bench_series_shift.py       |  26 +
 benchmarks/pandas/bench_series_sort.py        |  27 +
 benchmarks/pandas/bench_series_string_ops.py  |  27 +
 .../pandas/bench_series_value_counts.py       |  25 +
 benchmarks/results.json                       | 247 +++++++
 benchmarks/run_benchmarks.sh                  | 129 ++++
 benchmarks/tsb/bench_concat.ts                |  32 +
 benchmarks/tsb/bench_dataframe_apply.ts       |  32 +
 benchmarks/tsb/bench_dataframe_creation.ts    |  33 +
 benchmarks/tsb/bench_dataframe_dropna.ts      |  31 +
 benchmarks/tsb/bench_dataframe_filter.ts      |  30 +
 benchmarks/tsb/bench_dataframe_rename.ts      |  31 +
 benchmarks/tsb/bench_dataframe_sort.ts        |  31 +
 benchmarks/tsb/bench_describe.ts              |  31 +
 benchmarks/tsb/bench_ewm_mean.ts              |  30 +
 benchmarks/tsb/bench_groupby_mean.ts          |  31 +
 benchmarks/tsb/bench_merge.ts                 |  33 +
 benchmarks/tsb/bench_pivot_table.ts           |  32 +
 benchmarks/tsb/bench_read_csv.ts              |  39 ++
 benchmarks/tsb/bench_rolling_mean.ts          |  30 +
 benchmarks/tsb/bench_series_arithmetic.ts     |  30 +
 benchmarks/tsb/bench_series_creation.ts       |  49 ++
 benchmarks/tsb/bench_series_cumsum.ts         |  30 +
 benchmarks/tsb/bench_series_fillna.ts         |  31 +
 benchmarks/tsb/bench_series_shift.ts          |  30 +
 benchmarks/tsb/bench_series_sort.ts           |  30 +
 benchmarks/tsb/bench_series_string_ops.ts     |  32 +
 benchmarks/tsb/bench_series_value_counts.ts   |  30 +
 docs/playground.md                            |   3 -
 playground/api_types.html                     | 222 +++++++
 playground/attrs.html                         | 183 +++++
 playground/benchmarks.html                    | 360 ++++++++++
 playground/categorical_ops.html               | 338 ++++++++++
 playground/cut_qcut.html                      | 163 +++++
 playground/format_ops.html                    | 262 ++++++++
 playground/index.html                         |  80 +++
 playground/insert_pop.html                    | 172 +++++
 playground/notna_isna.html                    | 242 +++++++
 playground/numeric_extended.html              | 353 ++++++++++
 playground/pipe_apply.html                    | 276 ++++++++
 playground/playground-runtime.js              | 179 ++++-
 playground/rolling_apply.html                 | 225 +++++++
 playground/string_ops.html                    | 282 ++++++++
 playground/string_ops_extended.html           | 413 ++++++++++++
 playground/to_from_dict.html                  | 122 ++++
 playground/where_mask.html                    | 220 ++++++
 playground/wide_to_long.html                  | 113 ++++
 playground/window_extended.html               | 304 +++++++++
 src/core/api_types.ts                         | 629 ++++++++++++++++++
 src/core/attrs.ts                             | 291 ++++++++
 src/core/index.ts                             |  68 ++
 src/core/insert_pop.ts                        | 214 ++++++
 src/core/pipe_apply.ts                        | 303 +++++++++
 src/core/to_from_dict.ts                      | 283 ++++++++
 src/index.ts                                  | 173 ++++-
 src/reshape/index.ts                          |   2 +
 src/reshape/wide_to_long.ts                   | 217 ++++++
 src/stats/categorical_ops.ts                  | 483 ++++++++++++++
 src/stats/cut_qcut.ts                         | 383 +++++++++++
 src/stats/format_ops.ts                       | 442 ++++++++++++
 src/stats/index.ts                            | 111 +++-
 src/stats/notna_isna.ts                       | 369 ++++++++++
 src/stats/numeric_extended.ts                 | 586 ++++++++++++++++
 src/stats/string_ops.ts                       | 468 +++++++++++++
 src/stats/string_ops_extended.ts              | 429 ++++++++++++
 src/stats/where_mask.ts                       | 289 ++++++++
 src/stats/window_extended.ts                  | 321 +++++++++
 src/window/index.ts                           |   7 +
 src/window/rolling_apply.ts                   | 323 +++++++++
 tests/core/api_types.test.ts                  | 621 +++++++++++++++++
 tests/core/attrs.test.ts                      | 542 +++++++++++++++
 tests/core/insert_pop.test.ts                 | 286 ++++++++
 tests/core/pipe_apply.test.ts                 | 449 +++++++++++++
 tests/core/to_from_dict.test.ts               | 278 ++++++++
 tests/reshape/wide_to_long.test.ts            | 211 ++++++
 tests/stats/categorical_ops.test.ts           | 476 +++++++++++++
 tests/stats/cut_qcut.test.ts                  | 277 ++++++++
 tests/stats/format_ops.test.ts                | 568 ++++++++++++++++
 tests/stats/notna_isna.test.ts                | 536 +++++++++++++++
 tests/stats/numeric_extended.test.ts          | 509 ++++++++++++++
 tests/stats/rank.test.ts                      |   2 +-
 tests/stats/string_ops.test.ts                | 459 +++++++++++++
 tests/stats/string_ops_extended.test.ts       | 437 ++++++++++++
 tests/stats/where_mask.test.ts                | 338 ++++++++++
 tests/stats/window_extended.test.ts           | 365 ++++++++++
 tests/window/rolling_apply.test.ts            | 354 ++++++++++
 112 files changed, 19493 insertions(+), 97 deletions(-)
 create mode 100644 .autoloop/programs/perf-comparison/program.md
 create mode 100644 benchmarks/pandas/bench_concat.py
 create mode 100644 benchmarks/pandas/bench_dataframe_apply.py
 create mode 100644 benchmarks/pandas/bench_dataframe_creation.py
 create mode 100644 benchmarks/pandas/bench_dataframe_dropna.py
 create mode 100644 benchmarks/pandas/bench_dataframe_filter.py
 create mode 100644 benchmarks/pandas/bench_dataframe_rename.py
 create mode 100644 benchmarks/pandas/bench_dataframe_sort.py
 create mode 100644 benchmarks/pandas/bench_describe.py
 create mode 100644 benchmarks/pandas/bench_ewm_mean.py
 create mode 100644 benchmarks/pandas/bench_groupby_mean.py
 create mode 100644 benchmarks/pandas/bench_merge.py
 create mode 100644 benchmarks/pandas/bench_pivot_table.py
 create mode 100644 benchmarks/pandas/bench_read_csv.py
 create mode 100644 benchmarks/pandas/bench_rolling_mean.py
 create mode 100644 benchmarks/pandas/bench_series_arithmetic.py
 create mode 100644 benchmarks/pandas/bench_series_creation.py
 create mode 100644 benchmarks/pandas/bench_series_cumsum.py
 create mode 100644 benchmarks/pandas/bench_series_fillna.py
 create mode 100644 benchmarks/pandas/bench_series_shift.py
 create mode 100644 benchmarks/pandas/bench_series_sort.py
 create mode 100644 benchmarks/pandas/bench_series_string_ops.py
 create mode 100644 benchmarks/pandas/bench_series_value_counts.py
 create mode 100644 benchmarks/results.json
 create mode 100644 benchmarks/run_benchmarks.sh
 create mode 100644 benchmarks/tsb/bench_concat.ts
 create mode 100644 benchmarks/tsb/bench_dataframe_apply.ts
 create mode 100644 benchmarks/tsb/bench_dataframe_creation.ts
 create mode 100644 benchmarks/tsb/bench_dataframe_dropna.ts
 create mode 100644 benchmarks/tsb/bench_dataframe_filter.ts
 create mode 100644 benchmarks/tsb/bench_dataframe_rename.ts
 create mode 100644 benchmarks/tsb/bench_dataframe_sort.ts
 create mode 100644 benchmarks/tsb/bench_describe.ts
 create mode 100644 benchmarks/tsb/bench_ewm_mean.ts
 create mode 100644 benchmarks/tsb/bench_groupby_mean.ts
 create mode 100644 benchmarks/tsb/bench_merge.ts
 create mode 100644 benchmarks/tsb/bench_pivot_table.ts
 create mode 100644 benchmarks/tsb/bench_read_csv.ts
 create mode 100644 benchmarks/tsb/bench_rolling_mean.ts
 create mode 100644 benchmarks/tsb/bench_series_arithmetic.ts
 create mode 100644 benchmarks/tsb/bench_series_creation.ts
 create mode 100644 benchmarks/tsb/bench_series_cumsum.ts
 create mode 100644 benchmarks/tsb/bench_series_fillna.ts
 create mode 100644 benchmarks/tsb/bench_series_shift.ts
 create mode 100644 benchmarks/tsb/bench_series_sort.ts
 create mode 100644 benchmarks/tsb/bench_series_string_ops.ts
 create mode 100644 benchmarks/tsb/bench_series_value_counts.ts
 create mode 100644 playground/api_types.html
 create mode 100644 playground/attrs.html
 create mode 100644 playground/benchmarks.html
 create mode 100644 playground/categorical_ops.html
 create mode 100644 playground/cut_qcut.html
 create mode 100644 playground/format_ops.html
 create mode 100644 playground/insert_pop.html
 create mode 100644 playground/notna_isna.html
 create mode 100644 playground/numeric_extended.html
 create mode 100644 playground/pipe_apply.html
 create mode 100644 playground/rolling_apply.html
 create mode 100644 playground/string_ops.html
 create mode 100644 playground/string_ops_extended.html
 create mode 100644 playground/to_from_dict.html
 create mode 100644 playground/where_mask.html
 create mode 100644 playground/wide_to_long.html
 create mode 100644 playground/window_extended.html
 create mode 100644 src/core/api_types.ts
 create mode 100644 src/core/attrs.ts
 create mode 100644 src/core/insert_pop.ts
 create mode 100644 src/core/pipe_apply.ts
 create mode 100644 src/core/to_from_dict.ts
 create mode 100644 src/reshape/wide_to_long.ts
 create mode 100644 src/stats/categorical_ops.ts
 create mode 100644 src/stats/cut_qcut.ts
 create mode 100644 src/stats/format_ops.ts
 create mode 100644 src/stats/notna_isna.ts
 create mode 100644 src/stats/numeric_extended.ts
 create mode 100644 src/stats/string_ops.ts
 create mode 100644 src/stats/string_ops_extended.ts
 create mode 100644 src/stats/where_mask.ts
 create mode 100644 src/stats/window_extended.ts
 create mode 100644 src/window/rolling_apply.ts
 create mode 100644 tests/core/api_types.test.ts
 create mode 100644 tests/core/attrs.test.ts
 create mode 100644 tests/core/insert_pop.test.ts
 create mode 100644 tests/core/pipe_apply.test.ts
 create mode 100644 tests/core/to_from_dict.test.ts
 create mode 100644 tests/reshape/wide_to_long.test.ts
 create mode 100644 tests/stats/categorical_ops.test.ts
 create mode 100644 tests/stats/cut_qcut.test.ts
 create mode 100644 tests/stats/format_ops.test.ts
 create mode 100644 tests/stats/notna_isna.test.ts
 create mode 100644 tests/stats/numeric_extended.test.ts
 create mode 100644 tests/stats/string_ops.test.ts
 create mode 100644 tests/stats/string_ops_extended.test.ts
 create mode 100644 tests/stats/where_mask.test.ts
 create mode 100644 tests/stats/window_extended.test.ts
 create mode 100644 tests/window/rolling_apply.test.ts

diff --git a/.autoloop/programs/perf-comparison/program.md b/.autoloop/programs/perf-comparison/program.md
new file mode 100644
index 00000000..c1aec206
--- /dev/null
+++ b/.autoloop/programs/perf-comparison/program.md
@@ -0,0 +1,74 @@
+---
+schedule: every 6h
+---
+
+# Performance Comparison: tsb (TypeScript) vs pandas (Python)
+
+## Goal
+
+Systematically benchmark every tsb function against its pandas equivalent, one function per iteration. Each iteration picks a function that has not yet been benchmarked, writes a matching performance test for both tsb (TypeScript/Bun) and pandas (Python), runs both, and records the timing results. The benchmark results are displayed on the playground pages doc site.
+
+This is an open-ended program — it runs continuously, always adding the next benchmark comparison.
+
+### How each iteration works
+
+1. **Read existing benchmarks** — check `benchmarks/tsb/` and `benchmarks/pandas/` to see which functions are already benchmarked.
+2. **Pick ONE function** from `src/` that has no benchmark yet. Prioritize core operations (Series, DataFrame, GroupBy, etc.).
+3. **Write a TypeScript benchmark** in `benchmarks/tsb/bench_{function}.ts` that:
+   - Creates a realistic dataset (e.g. 100,000 rows)
+   - Runs the operation in a tight loop (warm-up + measured iterations)
+   - Outputs JSON: `{"function": "...", "mean_ms": ..., "iterations": ..., "total_ms": ...}`
+4. **Write a matching Python benchmark** in `benchmarks/pandas/bench_{function}.py` that:
+   - Creates the same dataset as the TypeScript version
+   - Runs the same operation with the same loop structure
+   - Outputs the same JSON format
+5. **Run both benchmarks** via `benchmarks/run_benchmarks.sh` and capture results.
+6. **Update `benchmarks/results.json`** with the new timing data.
+7. **Update `playground/benchmarks.html`** to display the new function's comparison metrics.
+
+### Key constraints
+
+- **Matching datasets** — both benchmarks must use identical data (same size, same values where possible).
+- **Fair comparison** — same number of warm-up and measured iterations for both.
+- **JSON output** — every benchmark script must output a single JSON line to stdout.
+- **No modifications to `src/`** — benchmark code is separate from library code.
+- **Python environment** — install pandas via pip if not present.
+
+## Target
+
+Only modify these files:
+- `benchmarks/**` — benchmark scripts and results
+- `playground/benchmarks.html` — performance comparison playground page
+- `playground/index.html` — add/update link to benchmarks page
+
+Do NOT modify:
+- `src/**` — library source code
+- `tests/**` — test files
+- `README.md` — read-only
+- `.autoloop/programs/**` — program definitions (except this file's code/ dir)
+- `.github/workflows/autoloop*` — autoloop workflow files
+
+## Evaluation
+
+```bash
+# Set up Python environment if needed
+if ! command -v python3 &>/dev/null; then
+  echo "Python3 not found, skipping"
+fi
+pip3 install pandas --quiet 2>/dev/null || true
+
+# Count the number of benchmark pairs (functions with both TS and Python benchmarks)
+ts_benchmarks=$(ls benchmarks/tsb/bench_*.ts 2>/dev/null | wc -l | tr -d ' ')
+py_benchmarks=$(ls benchmarks/pandas/bench_*.py 2>/dev/null | wc -l | tr -d ' ')
+
+# The metric is the minimum of the two (both must exist for a complete benchmark)
+if [ "$ts_benchmarks" -lt "$py_benchmarks" ]; then
+  count=$ts_benchmarks
+else
+  count=$py_benchmarks
+fi
+
+echo "{\"benchmarked_functions\": ${count:-0}}"
+```
+
+The metric is `benchmarked_functions`. **Higher is better.**
diff --git a/.github/workflows/autoloop.lock.yml b/.github/workflows/autoloop.lock.yml
index ce21ec84..489ea218 100644
--- a/.github/workflows/autoloop.lock.yml
+++ b/.github/workflows/autoloop.lock.yml
@@ -37,7 +37,7 @@
 #   Imports:
 #     - shared/reporting.md
 #
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"ae0854a9693094d32638babc16d353dc5de46c218ae3d893a9306b0b2a916042","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"4a373c351f80c4a3192abb04ad384f012a37e1fa4edfab3d08dc852deac2cf4f","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
 
 name: "Autoloop"
 "on":
@@ -222,21 +222,21 @@ jobs:
         run: |
           bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
           {
-          cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+          cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
           <system>
-          GH_AW_PROMPT_6cb617c1e46803c0_EOF
+          GH_AW_PROMPT_8719c7b9dd3572a2_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
-          cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+          cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
           <safe-output-tools>
           Tools: add_comment(max:7), create_issue(max:2), update_issue(max:3), create_pull_request, add_labels(max:2), remove_labels(max:2), push_to_pull_request_branch, missing_tool, missing_data, noop
-          GH_AW_PROMPT_6cb617c1e46803c0_EOF
+          GH_AW_PROMPT_8719c7b9dd3572a2_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_create_pull_request.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_push_to_pr_branch.md"
-          cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+          cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
           </safe-output-tools>
           <github-context>
           The following GitHub context information is available for this workflow:
@@ -269,7 +269,7 @@ jobs:
             - **Note**: If a branch you need is not in the list above and is not listed as an additional fetched ref, it has NOT been checked out. For private repositories you cannot fetch it without proper authentication. If the branch is required and not available, exit with an error and ask the user to add it to the `fetch:` option of the `checkout:` configuration (e.g., `fetch: ["refs/pulls/open/*"]` for all open PR refs, or `fetch: ["main", "feature/my-branch"]` for specific branches).
           </github-context>
           
-          GH_AW_PROMPT_6cb617c1e46803c0_EOF
+          GH_AW_PROMPT_8719c7b9dd3572a2_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
           if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then
             cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_prompt.md"
@@ -277,11 +277,11 @@ jobs:
           if [ "$GITHUB_EVENT_NAME" = "issue_comment" ] && [ -n "$GH_AW_IS_PR_COMMENT" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review_comment" ] || [ "$GITHUB_EVENT_NAME" = "pull_request_review" ]; then
             cat "${RUNNER_TEMP}/gh-aw/prompts/pr_context_push_to_pr_branch_guidance.md"
           fi
-          cat << 'GH_AW_PROMPT_6cb617c1e46803c0_EOF'
+          cat << 'GH_AW_PROMPT_8719c7b9dd3572a2_EOF'
           </system>
           {{#runtime-import .github/workflows/shared/reporting.md}}
           {{#runtime-import .github/workflows/autoloop.md}}
-          GH_AW_PROMPT_6cb617c1e46803c0_EOF
+          GH_AW_PROMPT_8719c7b9dd3572a2_EOF
           } > "$GH_AW_PROMPT"
       - name: Interpolate variables and render templates
         uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@@ -439,7 +439,7 @@ jobs:
           GITHUB_REPOSITORY: ${{ github.repository }}
           GITHUB_TOKEN: ${{ github.token }}
         name: Check which programs are due
-        run: "python3 - << 'PYEOF'\nimport os, json, re, glob, sys\nimport urllib.request, urllib.error\nfrom datetime import datetime, timezone, timedelta\n\nprograms_dir = \".autoloop/programs\"\nautoloop_dir = \".autoloop/programs\"\ntemplate_file = os.path.join(autoloop_dir, \"example.md\")\n\n# Read program state from repo-memory (persistent git-backed storage)\ngithub_token = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_program = os.environ.get(\"AUTOLOOP_PROGRAM\", \"\").strip()\n\n# Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id}\n# is derived from the branch-name configured in the tools section (memory/autoloop → autoloop)\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/autoloop\"\n\ndef parse_machine_state(content):\n    \"\"\"Parse the ⚙️ Machine State table from a state file. Returns a dict.\"\"\"\n    state = {}\n    m = re.search(r'## ⚙️ Machine State.*?\\n(.*?)(?=\\n## |\\Z)', content, re.DOTALL)\n    if not m:\n        return state\n    section = m.group(0)\n    for row in re.finditer(r'\\|\\s*(.+?)\\s*\\|\\s*(.+?)\\s*\\|', section):\n        raw_key = row.group(1).strip()\n        raw_val = row.group(2).strip()\n        if raw_key.lower() in (\"field\", \"---\", \":---\", \":---:\", \"---:\"):\n            continue\n        key = raw_key.lower().replace(\" \", \"_\")\n        val = None if raw_val in (\"—\", \"-\", \"\") else raw_val\n        state[key] = val\n    # Coerce types\n    for int_field in (\"iteration_count\", \"consecutive_errors\"):\n        if int_field in state:\n            try:\n                state[int_field] = int(state[int_field])\n            except (ValueError, TypeError):\n                state[int_field] = 0\n    if \"paused\" in state:\n        state[\"paused\"] = str(state.get(\"paused\", \"\")).lower() == \"true\"\n    if \"completed\" in state:\n        state[\"completed\"] = str(state.get(\"completed\", \"\")).lower() == \"true\"\n    # recent_statuses: stored as comma-separated words (e.g. \"accepted, rejected, error\")\n    rs_raw = state.get(\"recent_statuses\") or \"\"\n    if rs_raw:\n        state[\"recent_statuses\"] = [s.strip().lower() for s in rs_raw.split(\",\") if s.strip()]\n    else:\n        state[\"recent_statuses\"] = []\n    return state\n\ndef read_program_state(program_name):\n    \"\"\"Read scheduling state from the repo-memory state file.\"\"\"\n    state_file = os.path.join(repo_memory_dir, f\"{program_name}.md\")\n    if not os.path.isfile(state_file):\n        print(f\"  {program_name}: no state file found (first run)\")\n        return {}\n    with open(state_file, encoding=\"utf-8\") as f:\n        content = f.read()\n    return parse_machine_state(content)\n\n# Bootstrap: create autoloop programs directory and template if missing\nif not os.path.isdir(autoloop_dir):\n    os.makedirs(autoloop_dir, exist_ok=True)\n    bt = chr(96)  # backtick — avoid literal backticks that break gh-aw compiler\n    template = \"\\n\".join([\n        \"<!-- AUTOLOOP:UNCONFIGURED -->\",\n        \"<!-- Remove the line above once you have filled in your program. -->\",\n        \"<!-- Autoloop will NOT run until you do. -->\",\n        \"\",\n        \"# Autoloop Program\",\n        \"\",\n        \"<!-- Rename this file to something meaningful (e.g. training.md, coverage.md).\",\n        \"     The filename (minus .md) becomes the program name used in issues, PRs,\",\n        \"     and slash commands. Want multiple loops? Add more .md files here. -->\",\n        \"\",\n        \"## Goal\",\n        \"\",\n        \"<!-- Describe what you want to optimize. Be specific about what 'better' means. -->\",\n        \"\",\n        \"REPLACE THIS with your optimization goal.\",\n        \"\",\n        \"## Target\",\n        \"\",\n        \"<!-- List files Autoloop may modify. Everything else is off-limits. -->\",\n        \"\",\n        \"Only modify these files:\",\n        f\"- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)\",\n        \"\",\n        \"Do NOT modify:\",\n        \"- (list files that must not be touched)\",\n        \"\",\n        \"## Evaluation\",\n        \"\",\n        \"<!-- Provide a command and the metric to extract. -->\",\n        \"\",\n        f\"{bt}{bt}{bt}bash\",\n        \"REPLACE_WITH_YOUR_EVALUATION_COMMAND\",\n        f\"{bt}{bt}{bt}\",\n        \"\",\n        f\"The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)\",\n        \"\",\n    ])\n    with open(template_file, \"w\") as f:\n        f.write(template)\n    # Leave the template unstaged — the agent will create a draft PR with it\n    print(f\"BOOTSTRAPPED: created {template_file} locally (agent will create a draft PR)\")\n\n# Find all program files from all locations:\n# 1. Directory-based programs: .autoloop/programs/<name>/program.md (preferred)\n# 2. Bare markdown programs: .autoloop/programs/<name>.md (simple)\n# 3. Issue-based programs: GitHub issues with the 'autoloop-program' label\nprogram_files = []\nissue_programs = {}  # name -> {issue_number, file}\n\n# Scan .autoloop/programs/ for directory-based programs\nif os.path.isdir(programs_dir):\n    for entry in sorted(os.listdir(programs_dir)):\n        prog_dir = os.path.join(programs_dir, entry)\n        if os.path.isdir(prog_dir):\n            # Look for program.md inside the directory\n            prog_file = os.path.join(prog_dir, \"program.md\")\n            if os.path.isfile(prog_file):\n                program_files.append(prog_file)\n\n# Scan .autoloop/programs/ for bare markdown programs\nbare_programs = sorted(glob.glob(os.path.join(autoloop_dir, \"*.md\")))\nfor pf in bare_programs:\n    program_files.append(pf)\n\n# Scan GitHub issues with the 'autoloop-program' label\nissue_programs_dir = \"/tmp/gh-aw/issue-programs\"\nos.makedirs(issue_programs_dir, exist_ok=True)\ntry:\n    api_url = f\"https://api.github.com/repos/{repo}/issues?labels=autoloop-program&state=open&per_page=100\"\n    req = urllib.request.Request(api_url, headers={\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    })\n    with urllib.request.urlopen(req, timeout=30) as resp:\n        issues = json.loads(resp.read().decode())\n    for issue in issues:\n        if issue.get(\"pull_request\"):\n            continue  # skip PRs\n        body = issue.get(\"body\") or \"\"\n        title = issue.get(\"title\") or \"\"\n        number = issue[\"number\"]\n        # Derive program name from issue title: slugify to lowercase with hyphens\n        slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')\n        slug = re.sub(r'-+', '-', slug)  # collapse consecutive hyphens\n        if not slug:\n            slug = f\"issue-{number}\"\n        # Avoid slug collisions: if another issue already claimed this slug, append issue number\n        if slug in issue_programs:\n            print(f\"  Warning: slug '{slug}' (issue #{number}) collides with issue #{issue_programs[slug]['issue_number']}, appending issue number\")\n            slug = f\"{slug}-{number}\"\n        # Write issue body to a temp file so the scheduling loop can process it\n        issue_file = os.path.join(issue_programs_dir, f\"{slug}.md\")\n        with open(issue_file, \"w\") as f:\n            f.write(body)\n        program_files.append(issue_file)\n        issue_programs[slug] = {\"issue_number\": number, \"file\": issue_file, \"title\": title}\n        print(f\"  Found issue-based program: '{slug}' (issue #{number})\")\nexcept Exception as e:\n    print(f\"  Warning: could not fetch issue-based programs: {e}\")\n\nif not program_files:\n    # Fallback to single-file locations\n    for path in [\".autoloop/program.md\", \"program.md\"]:\n        if os.path.isfile(path):\n            program_files = [path]\n            break\n\nif not program_files:\n    print(\"NO_PROGRAMS_FOUND\")\n    os.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n    with open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n        json.dump({\"due\": [], \"skipped\": [], \"unconfigured\": [], \"no_programs\": True}, f)\n    sys.exit(0)\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nnow = datetime.now(timezone.utc)\ndue = []\nskipped = []\nunconfigured = []\nall_programs = {}  # name -> file path (populated during scanning)\n\n# Schedule string to timedelta\ndef parse_schedule(s):\n    s = s.strip().lower()\n    m = re.match(r\"every\\s+(\\d+)\\s*h\", s)\n    if m:\n        return timedelta(hours=int(m.group(1)))\n    m = re.match(r\"every\\s+(\\d+)\\s*m\", s)\n    if m:\n        return timedelta(minutes=int(m.group(1)))\n    if s == \"daily\":\n        return timedelta(hours=24)\n    if s == \"weekly\":\n        return timedelta(days=7)\n    return None  # No per-program schedule — always due\n\ndef get_program_name(pf):\n    \"\"\"Extract program name from file path.\n    Directory-based: .autoloop/programs/<name>/program.md -> <name>\n    Bare markdown: .autoloop/programs/<name>.md -> <name>\n    Issue-based: /tmp/gh-aw/issue-programs/<name>.md -> <name>\n    \"\"\"\n    if pf.endswith(\"/program.md\"):\n        # Directory-based program: name is the parent directory\n        return os.path.basename(os.path.dirname(pf))\n    else:\n        # Bare markdown or issue-based program: name is the filename without .md\n        return os.path.splitext(os.path.basename(pf))[0]\n\nfor pf in program_files:\n    name = get_program_name(pf)\n    all_programs[name] = pf\n    with open(pf) as f:\n        content = f.read()\n\n    # Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM)\n    if \"<!-- AUTOLOOP:UNCONFIGURED -->\" in content:\n        unconfigured.append(name)\n        continue\n\n    # Check for TODO/REPLACE placeholders\n    if re.search(r'\\bTODO\\b|\\bREPLACE', content):\n        unconfigured.append(name)\n        continue\n\n    # Parse optional YAML frontmatter for schedule and target-metric\n    # Strip leading HTML comments before checking (issue-based programs may have them)\n    content_stripped = re.sub(r'^(\\s*<!--.*?-->\\s*\\n)*', '', content, flags=re.DOTALL)\n    schedule_delta = None\n    target_metric = None\n    fm_match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", content_stripped, re.DOTALL)\n    if fm_match:\n        for line in fm_match.group(1).split(\"\\n\"):\n            if line.strip().startswith(\"schedule:\"):\n                schedule_str = line.split(\":\", 1)[1].strip()\n                schedule_delta = parse_schedule(schedule_str)\n            if line.strip().startswith(\"target-metric:\"):\n                try:\n                    target_metric = float(line.split(\":\", 1)[1].strip())\n                except (ValueError, TypeError):\n                    print(f\"  Warning: {name} has invalid target-metric value: {line.split(':', 1)[1].strip()}\")\n\n    # Read state from repo-memory\n    state = read_program_state(name)\n    if state:\n        print(f\"  {name}: last_run={state.get('last_run')}, iteration_count={state.get('iteration_count')}\")\n    else:\n        print(f\"  {name}: no state found (first run)\")\n\n    last_run = None\n    lr = state.get(\"last_run\")\n    if lr:\n        try:\n            last_run = datetime.fromisoformat(lr.replace(\"Z\", \"+00:00\"))\n        except ValueError:\n            pass\n\n    # Check if completed (target metric was reached)\n    if str(state.get(\"completed\", \"\")).lower() == \"true\":\n        skipped.append({\"name\": name, \"reason\": f\"completed: target metric reached\"})\n        continue\n\n    # Check if paused (e.g., plateau or recurring errors)\n    if state.get(\"paused\"):\n        skipped.append({\"name\": name, \"reason\": f\"paused: {state.get('pause_reason', 'unknown')}\"})\n        continue\n\n    # Auto-pause on plateau: 5+ consecutive rejections\n    recent = state.get(\"recent_statuses\", [])[-5:]\n    if len(recent) >= 5 and all(s == \"rejected\" for s in recent):\n        skipped.append({\"name\": name, \"reason\": \"plateau: 5 consecutive rejections\"})\n        continue\n\n    # Check if due based on per-program schedule\n    if schedule_delta and last_run:\n        if now - last_run < schedule_delta:\n            skipped.append({\"name\": name, \"reason\": \"not due yet\",\n                            \"next_due\": (last_run + schedule_delta).isoformat()})\n            continue\n\n    due.append({\"name\": name, \"last_run\": lr, \"file\": pf, \"target_metric\": target_metric})\n\n# Pick the program to run\nselected = None\nselected_file = None\nselected_issue = None\nselected_target_metric = None\ndeferred = []\n\nif forced_program:\n    # Manual dispatch requested a specific program — bypass scheduling\n    # (paused, not-due, and plateau programs can still be forced)\n    if forced_program not in all_programs:\n        print(f\"ERROR: requested program '{forced_program}' not found.\")\n        print(f\"  Available programs: {list(all_programs.keys())}\")\n        sys.exit(1)\n    if forced_program in unconfigured:\n        print(f\"ERROR: requested program '{forced_program}' is unconfigured (has placeholders).\")\n        sys.exit(1)\n    selected = forced_program\n    selected_file = all_programs[forced_program]\n    deferred = [p[\"name\"] for p in due if p[\"name\"] != forced_program]\n    if selected in issue_programs:\n        selected_issue = issue_programs[selected][\"issue_number\"]\n    # Find target_metric: check the due list first, then parse from the program file\n    for p in due:\n        if p[\"name\"] == forced_program:\n            selected_target_metric = p.get(\"target_metric\")\n            break\n    if selected_target_metric is None:\n        # Program may have been skipped (completed/paused/plateau) — parse directly\n        try:\n            with open(selected_file) as _f:\n                _content = _f.read()\n            _content_stripped = re.sub(r'^(\\s*<!--.*?-->\\s*\\n)*', '', _content, flags=re.DOTALL)\n            _fm = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", _content_stripped, re.DOTALL)\n            if _fm:\n                for _line in _fm.group(1).split(\"\\n\"):\n                    if _line.strip().startswith(\"target-metric:\"):\n                        selected_target_metric = float(_line.split(\":\", 1)[1].strip())\n                        break\n        except (OSError, ValueError, TypeError):\n            pass\n    print(f\"FORCED: running program '{forced_program}' (manual dispatch)\")\nelif due:\n    # Normal scheduling: pick the single most-overdue program\n    due.sort(key=lambda p: p[\"last_run\"] or \"\")  # None/empty sorts first (never run)\n    selected = due[0][\"name\"]\n    selected_file = due[0][\"file\"]\n    selected_target_metric = due[0].get(\"target_metric\")\n    deferred = [p[\"name\"] for p in due[1:]]\n    # Check if the selected program is issue-based\n    if selected in issue_programs:\n        selected_issue = issue_programs[selected][\"issue_number\"]\n\nresult = {\n    \"selected\": selected,\n    \"selected_file\": selected_file,\n    \"selected_issue\": selected_issue,\n    \"selected_target_metric\": selected_target_metric,\n    \"issue_programs\": {name: info[\"issue_number\"] for name, info in issue_programs.items()},\n    \"deferred\": deferred,\n    \"skipped\": skipped,\n    \"unconfigured\": unconfigured,\n    \"no_programs\": False,\n}\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nwith open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n    json.dump(result, f, indent=2)\n\nprint(\"=== Autoloop Program Check ===\")\nprint(f\"Selected program:      {selected or '(none)'} ({selected_file or 'n/a'})\")\nprint(f\"Deferred (next run):   {deferred or '(none)'}\")\nprint(f\"Programs skipped:      {[s['name'] for s in skipped] or '(none)'}\")\nprint(f\"Programs unconfigured: {unconfigured or '(none)'}\")\n\nif not selected and not unconfigured:\n    print(\"\\nNo programs due this run. Exiting early.\")\n    sys.exit(1)  # Non-zero exit skips the agent step\nPYEOF\n"
+        run: "python3 - << 'PYEOF'\nimport os, json, re, glob, sys\nimport urllib.request, urllib.error\nfrom datetime import datetime, timezone, timedelta\n\nprograms_dir = \".autoloop/programs\"\nautoloop_dir = \".autoloop/programs\"\ntemplate_file = os.path.join(autoloop_dir, \"example.md\")\n\n# Read program state from repo-memory (persistent git-backed storage)\ngithub_token = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_program = os.environ.get(\"AUTOLOOP_PROGRAM\", \"\").strip()\n\n# Repo-memory files are cloned to /tmp/gh-aw/repo-memory/{id}/ where {id}\n# is derived from the branch-name configured in the tools section (memory/autoloop → autoloop)\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/autoloop\"\n\ndef parse_machine_state(content):\n    \"\"\"Parse the ⚙️ Machine State table from a state file. Returns a dict.\"\"\"\n    state = {}\n    m = re.search(r'## ⚙️ Machine State.*?\\n(.*?)(?=\\n## |\\Z)', content, re.DOTALL)\n    if not m:\n        return state\n    section = m.group(0)\n    for row in re.finditer(r'\\|\\s*(.+?)\\s*\\|\\s*(.+?)\\s*\\|', section):\n        raw_key = row.group(1).strip()\n        raw_val = row.group(2).strip()\n        if raw_key.lower() in (\"field\", \"---\", \":---\", \":---:\", \"---:\"):\n            continue\n        key = raw_key.lower().replace(\" \", \"_\")\n        val = None if raw_val in (\"—\", \"-\", \"\") else raw_val\n        state[key] = val\n    # Coerce types\n    for int_field in (\"iteration_count\", \"consecutive_errors\"):\n        if int_field in state:\n            try:\n                state[int_field] = int(state[int_field])\n            except (ValueError, TypeError):\n                state[int_field] = 0\n    if \"paused\" in state:\n        state[\"paused\"] = str(state.get(\"paused\", \"\")).lower() == \"true\"\n    if \"completed\" in state:\n        state[\"completed\"] = str(state.get(\"completed\", \"\")).lower() == \"true\"\n    # recent_statuses: stored as comma-separated words (e.g. \"accepted, rejected, error\")\n    rs_raw = state.get(\"recent_statuses\") or \"\"\n    if rs_raw:\n        state[\"recent_statuses\"] = [s.strip().lower() for s in rs_raw.split(\",\") if s.strip()]\n    else:\n        state[\"recent_statuses\"] = []\n    return state\n\ndef read_program_state(program_name):\n    \"\"\"Read scheduling state from the repo-memory state file.\"\"\"\n    state_file = os.path.join(repo_memory_dir, f\"{program_name}.md\")\n    if not os.path.isfile(state_file):\n        print(f\"  {program_name}: no state file found (first run)\")\n        return {}\n    with open(state_file, encoding=\"utf-8\") as f:\n        content = f.read()\n    return parse_machine_state(content)\n\n# Bootstrap: create autoloop programs directory and template if missing\nif not os.path.isdir(autoloop_dir):\n    os.makedirs(autoloop_dir, exist_ok=True)\n    bt = chr(96)  # backtick — avoid literal backticks that break gh-aw compiler\n    template = \"\\n\".join([\n        \"<!-- AUTOLOOP:UNCONFIGURED -->\",\n        \"<!-- Remove the line above once you have filled in your program. -->\",\n        \"<!-- Autoloop will NOT run until you do. -->\",\n        \"\",\n        \"# Autoloop Program\",\n        \"\",\n        \"<!-- Rename this file to something meaningful (e.g. training.md, coverage.md).\",\n        \"     The filename (minus .md) becomes the program name used in issues, PRs,\",\n        \"     and slash commands. Want multiple loops? Add more .md files here. -->\",\n        \"\",\n        \"## Goal\",\n        \"\",\n        \"<!-- Describe what you want to optimize. Be specific about what 'better' means. -->\",\n        \"\",\n        \"REPLACE THIS with your optimization goal.\",\n        \"\",\n        \"## Target\",\n        \"\",\n        \"<!-- List files Autoloop may modify. Everything else is off-limits. -->\",\n        \"\",\n        \"Only modify these files:\",\n        f\"- {bt}REPLACE_WITH_FILE{bt} -- (describe what this file does)\",\n        \"\",\n        \"Do NOT modify:\",\n        \"- (list files that must not be touched)\",\n        \"\",\n        \"## Evaluation\",\n        \"\",\n        \"<!-- Provide a command and the metric to extract. -->\",\n        \"\",\n        f\"{bt}{bt}{bt}bash\",\n        \"REPLACE_WITH_YOUR_EVALUATION_COMMAND\",\n        f\"{bt}{bt}{bt}\",\n        \"\",\n        f\"The metric is {bt}REPLACE_WITH_METRIC_NAME{bt}. **Lower/Higher is better.** (pick one)\",\n        \"\",\n    ])\n    with open(template_file, \"w\") as f:\n        f.write(template)\n    # Leave the template unstaged — the agent will create a draft PR with it\n    print(f\"BOOTSTRAPPED: created {template_file} locally (agent will create a draft PR)\")\n\n# Find all program files from all locations:\n# 1. Directory-based programs: .autoloop/programs/<name>/program.md (preferred)\n# 2. Bare markdown programs: .autoloop/programs/<name>.md (simple)\n# 3. Issue-based programs: GitHub issues with the 'autoloop-program' label\nprogram_files = []\nissue_programs = {}  # name -> {issue_number, file}\n\n# Scan .autoloop/programs/ for directory-based programs\nif os.path.isdir(programs_dir):\n    for entry in sorted(os.listdir(programs_dir)):\n        prog_dir = os.path.join(programs_dir, entry)\n        if os.path.isdir(prog_dir):\n            # Look for program.md inside the directory\n            prog_file = os.path.join(prog_dir, \"program.md\")\n            if os.path.isfile(prog_file):\n                program_files.append(prog_file)\n\n# Scan .autoloop/programs/ for bare markdown programs\nbare_programs = sorted(glob.glob(os.path.join(autoloop_dir, \"*.md\")))\nfor pf in bare_programs:\n    program_files.append(pf)\n\n# Scan GitHub issues with the 'autoloop-program' label\nissue_programs_dir = \"/tmp/gh-aw/issue-programs\"\nos.makedirs(issue_programs_dir, exist_ok=True)\ntry:\n    api_url = f\"https://api.github.com/repos/{repo}/issues?labels=autoloop-program&state=open&per_page=100\"\n    req = urllib.request.Request(api_url, headers={\n        \"Authorization\": f\"token {github_token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    })\n    with urllib.request.urlopen(req, timeout=30) as resp:\n        issues = json.loads(resp.read().decode())\n    for issue in issues:\n        if issue.get(\"pull_request\"):\n            continue  # skip PRs\n        body = issue.get(\"body\") or \"\"\n        title = issue.get(\"title\") or \"\"\n        number = issue[\"number\"]\n        # Derive program name from issue title: slugify to lowercase with hyphens\n        slug = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')\n        slug = re.sub(r'-+', '-', slug)  # collapse consecutive hyphens\n        if not slug:\n            slug = f\"issue-{number}\"\n        # Avoid slug collisions: if another issue already claimed this slug, append issue number\n        if slug in issue_programs:\n            print(f\"  Warning: slug '{slug}' (issue #{number}) collides with issue #{issue_programs[slug]['issue_number']}, appending issue number\")\n            slug = f\"{slug}-{number}\"\n        # Write issue body to a temp file so the scheduling loop can process it\n        issue_file = os.path.join(issue_programs_dir, f\"{slug}.md\")\n        with open(issue_file, \"w\") as f:\n            f.write(body)\n        program_files.append(issue_file)\n        issue_programs[slug] = {\"issue_number\": number, \"file\": issue_file, \"title\": title}\n        print(f\"  Found issue-based program: '{slug}' (issue #{number})\")\nexcept Exception as e:\n    print(f\"  Warning: could not fetch issue-based programs: {e}\")\n\nif not program_files:\n    # Fallback to single-file locations\n    for path in [\".autoloop/program.md\", \"program.md\"]:\n        if os.path.isfile(path):\n            program_files = [path]\n            break\n\nif not program_files:\n    print(\"NO_PROGRAMS_FOUND\")\n    os.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n    with open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n        json.dump({\"due\": [], \"skipped\": [], \"unconfigured\": [], \"no_programs\": True}, f)\n    sys.exit(0)\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nnow = datetime.now(timezone.utc)\ndue = []\nskipped = []\nunconfigured = []\nall_programs = {}  # name -> file path (populated during scanning)\n\n# Schedule string to timedelta\ndef parse_schedule(s):\n    s = s.strip().lower()\n    m = re.match(r\"every\\s+(\\d+)\\s*h\", s)\n    if m:\n        return timedelta(hours=int(m.group(1)))\n    m = re.match(r\"every\\s+(\\d+)\\s*m\", s)\n    if m:\n        return timedelta(minutes=int(m.group(1)))\n    if s == \"daily\":\n        return timedelta(hours=24)\n    if s == \"weekly\":\n        return timedelta(days=7)\n    return None  # No per-program schedule — always due\n\ndef get_program_name(pf):\n    \"\"\"Extract program name from file path.\n    Directory-based: .autoloop/programs/<name>/program.md -> <name>\n    Bare markdown: .autoloop/programs/<name>.md -> <name>\n    Issue-based: /tmp/gh-aw/issue-programs/<name>.md -> <name>\n    \"\"\"\n    if pf.endswith(\"/program.md\"):\n        # Directory-based program: name is the parent directory\n        return os.path.basename(os.path.dirname(pf))\n    else:\n        # Bare markdown or issue-based program: name is the filename without .md\n        return os.path.splitext(os.path.basename(pf))[0]\n\nfor pf in program_files:\n    name = get_program_name(pf)\n    all_programs[name] = pf\n    with open(pf) as f:\n        content = f.read()\n\n    # Check sentinel (skip for issue-based programs which use AUTOLOOP:ISSUE-PROGRAM)\n    if \"<!-- AUTOLOOP:UNCONFIGURED -->\" in content:\n        unconfigured.append(name)\n        continue\n\n    # Check for TODO/REPLACE placeholders\n    if re.search(r'\\bTODO\\b|\\bREPLACE', content):\n        unconfigured.append(name)\n        continue\n\n    # Parse optional YAML frontmatter for schedule and target-metric\n    # Strip leading HTML comments before checking (issue-based programs may have them)\n    content_stripped = re.sub(r'^(\\s*<!--.*?-->\\s*\\n)*', '', content, flags=re.DOTALL)\n    schedule_delta = None\n    target_metric = None\n    fm_match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", content_stripped, re.DOTALL)\n    if fm_match:\n        for line in fm_match.group(1).split(\"\\n\"):\n            if line.strip().startswith(\"schedule:\"):\n                schedule_str = line.split(\":\", 1)[1].strip()\n                schedule_delta = parse_schedule(schedule_str)\n            if line.strip().startswith(\"target-metric:\"):\n                try:\n                    target_metric = float(line.split(\":\", 1)[1].strip())\n                except (ValueError, TypeError):\n                    print(f\"  Warning: {name} has invalid target-metric value: {line.split(':', 1)[1].strip()}\")\n\n    # Read state from repo-memory\n    state = read_program_state(name)\n    if state:\n        print(f\"  {name}: last_run={state.get('last_run')}, iteration_count={state.get('iteration_count')}\")\n    else:\n        print(f\"  {name}: no state found (first run)\")\n\n    last_run = None\n    lr = state.get(\"last_run\")\n    if lr:\n        try:\n            last_run = datetime.fromisoformat(lr.replace(\"Z\", \"+00:00\"))\n        except ValueError:\n            pass\n\n    # Check if completed (target metric was reached)\n    if str(state.get(\"completed\", \"\")).lower() == \"true\":\n        skipped.append({\"name\": name, \"reason\": f\"completed: target metric reached\"})\n        continue\n\n    # Check if paused (e.g., plateau or recurring errors)\n    if state.get(\"paused\"):\n        skipped.append({\"name\": name, \"reason\": f\"paused: {state.get('pause_reason', 'unknown')}\"})\n        continue\n\n    # Auto-pause on plateau: 5+ consecutive rejections\n    recent = state.get(\"recent_statuses\", [])[-5:]\n    if len(recent) >= 5 and all(s == \"rejected\" for s in recent):\n        skipped.append({\"name\": name, \"reason\": \"plateau: 5 consecutive rejections\"})\n        continue\n\n    # Check if due based on per-program schedule\n    if schedule_delta and last_run:\n        if now - last_run < schedule_delta:\n            skipped.append({\"name\": name, \"reason\": \"not due yet\",\n                            \"next_due\": (last_run + schedule_delta).isoformat()})\n            continue\n\n    due.append({\"name\": name, \"last_run\": lr, \"file\": pf, \"target_metric\": target_metric})\n\n# Pick the program to run\nselected = None\nselected_file = None\nselected_issue = None\nselected_target_metric = None\ndeferred = []\n\nif forced_program:\n    # Manual dispatch requested a specific program — bypass scheduling\n    # (paused, not-due, and plateau programs can still be forced)\n    if forced_program not in all_programs:\n        print(f\"ERROR: requested program '{forced_program}' not found.\")\n        print(f\"  Available programs: {list(all_programs.keys())}\")\n        sys.exit(1)\n    if forced_program in unconfigured:\n        print(f\"ERROR: requested program '{forced_program}' is unconfigured (has placeholders).\")\n        sys.exit(1)\n    selected = forced_program\n    selected_file = all_programs[forced_program]\n    deferred = [p[\"name\"] for p in due if p[\"name\"] != forced_program]\n    if selected in issue_programs:\n        selected_issue = issue_programs[selected][\"issue_number\"]\n    # Find target_metric: check the due list first, then parse from the program file\n    for p in due:\n        if p[\"name\"] == forced_program:\n            selected_target_metric = p.get(\"target_metric\")\n            break\n    if selected_target_metric is None:\n        # Program may have been skipped (completed/paused/plateau) — parse directly\n        try:\n            with open(selected_file) as _f:\n                _content = _f.read()\n            _content_stripped = re.sub(r'^(\\s*<!--.*?-->\\s*\\n)*', '', _content, flags=re.DOTALL)\n            _fm = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n\", _content_stripped, re.DOTALL)\n            if _fm:\n                for _line in _fm.group(1).split(\"\\n\"):\n                    if _line.strip().startswith(\"target-metric:\"):\n                        selected_target_metric = float(_line.split(\":\", 1)[1].strip())\n                        break\n        except (OSError, ValueError, TypeError):\n            pass\n    print(f\"FORCED: running program '{forced_program}' (manual dispatch)\")\nelif due:\n    # Normal scheduling: pick the single most-overdue program\n    due.sort(key=lambda p: p[\"last_run\"] or \"\")  # None/empty sorts first (never run)\n    selected = due[0][\"name\"]\n    selected_file = due[0][\"file\"]\n    selected_target_metric = due[0].get(\"target_metric\")\n    deferred = [p[\"name\"] for p in due[1:]]\n    # Check if the selected program is issue-based\n    if selected in issue_programs:\n        selected_issue = issue_programs[selected][\"issue_number\"]\n\n# Look up existing PR for the selected program's canonical branch\nexisting_pr = None\nhead_branch = None\nif selected:\n    head_branch = f\"autoloop/{selected}\"\n    owner = repo.split(\"/\")[0] if \"/\" in repo else \"\"\n    if owner:\n        try:\n            pr_api_url = (\n                f\"https://api.github.com/repos/{repo}/pulls\"\n                f\"?state=open&head={owner}:{head_branch}&per_page=5\"\n            )\n            pr_req = urllib.request.Request(pr_api_url, headers={\n                \"Authorization\": f\"token {github_token}\",\n                \"Accept\": \"application/vnd.github.v3+json\",\n            })\n            with urllib.request.urlopen(pr_req, timeout=30) as pr_resp:\n                open_prs = json.loads(pr_resp.read().decode())\n            if open_prs:\n                existing_pr = open_prs[0][\"number\"]\n                print(f\"  Found existing PR #{existing_pr} for branch {head_branch}\")\n            else:\n                print(f\"  No existing PR found for branch {head_branch}\")\n        except Exception as e:\n            print(f\"  Warning: could not check for existing PRs: {e}\")\n    else:\n        print(f\"  Warning: could not parse owner from GITHUB_REPOSITORY='{repo}'\")\n\n    # Also check the state file for a recorded PR number as fallback\n    if existing_pr is None:\n        state = read_program_state(selected)\n        pr_field = state.get(\"pr\") or \"\"\n        pr_match = re.match(r'^#?(\\d+)$', pr_field.strip())\n        if pr_match:\n            existing_pr = int(pr_match.group(1))\n            print(f\"  Found PR #{existing_pr} from state file for {selected}\")\n\nresult = {\n    \"selected\": selected,\n    \"selected_file\": selected_file,\n    \"selected_issue\": selected_issue,\n    \"selected_target_metric\": selected_target_metric,\n    \"existing_pr\": existing_pr,\n    \"head_branch\": head_branch,\n    \"issue_programs\": {name: info[\"issue_number\"] for name, info in issue_programs.items()},\n    \"deferred\": deferred,\n    \"skipped\": skipped,\n    \"unconfigured\": unconfigured,\n    \"no_programs\": False,\n}\n\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\nwith open(\"/tmp/gh-aw/autoloop.json\", \"w\") as f:\n    json.dump(result, f, indent=2)\n\nprint(\"=== Autoloop Program Check ===\")\nprint(f\"Selected program:      {selected or '(none)'} ({selected_file or 'n/a'})\")\nif existing_pr:\n    print(f\"Existing PR:           #{existing_pr} (branch: {head_branch})\")\nelse:\n    print(f\"Existing PR:           (none — will create on first accepted iteration)\")\nprint(f\"Deferred (next run):   {deferred or '(none)'}\")\nprint(f\"Programs skipped:      {[s['name'] for s in skipped] or '(none)'}\")\nprint(f\"Programs unconfigured: {unconfigured or '(none)'}\")\n\nif not selected and not unconfigured:\n    print(\"\\nNo programs due this run. Exiting early.\")\n    sys.exit(1)  # Non-zero exit skips the agent step\nPYEOF\n"
 
       # Repo memory git-based storage configuration from frontmatter processed below
       - name: Clone repo-memory branch (default)
@@ -498,12 +498,12 @@ jobs:
           mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs
           mkdir -p /tmp/gh-aw/safeoutputs
           mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
-          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_05418b5b293ba2f8_EOF'
+          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_02af14f48bc5ba75_EOF'
           {"add_comment":{"hide_older_comments":false,"max":7,"target":"*"},"add_labels":{"max":2,"target":"*"},"create_issue":{"labels":["automation","autoloop"],"max":2,"title_prefix":"[Autoloop] "},"create_pull_request":{"draft":true,"labels":["automation","autoloop"],"max":1,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_files_policy":"fallback-to-issue","protected_path_prefixes":[".github/",".agents/"],"title_prefix":"[Autoloop] "},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":30720,"max_patch_size":10240}]},"push_to_pull_request_branch":{"if_no_changes":"warn","max":1,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"],"target":"*","title_prefix":"[Autoloop] "},"remove_labels":{"max":2,"target":"*"},"update_issue":{"allow_body":true,"max":3,"target":"*","title_prefix":"[Autoloop] "}}
-          GH_AW_SAFE_OUTPUTS_CONFIG_05418b5b293ba2f8_EOF
+          GH_AW_SAFE_OUTPUTS_CONFIG_02af14f48bc5ba75_EOF
       - name: Write Safe Outputs Tools
         run: |
-          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e9f0c97ff8e4b848_EOF'
+          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e09cbb8e5f09e2a3_EOF'
           {
             "description_suffixes": {
               "add_comment": " CONSTRAINTS: Maximum 7 comment(s) can be added. Target: *.",
@@ -517,8 +517,8 @@ jobs:
             "repo_params": {},
             "dynamic_tools": []
           }
-          GH_AW_SAFE_OUTPUTS_TOOLS_META_e9f0c97ff8e4b848_EOF
-          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_c2504b2536e4b3d6_EOF'
+          GH_AW_SAFE_OUTPUTS_TOOLS_META_e09cbb8e5f09e2a3_EOF
+          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_ecaf271fbfb920d8_EOF'
           {
             "add_comment": {
               "defaultMax": 1,
@@ -777,7 +777,7 @@ jobs:
               "customValidation": "requiresOneOf:status,title,body"
             }
           }
-          GH_AW_SAFE_OUTPUTS_VALIDATION_c2504b2536e4b3d6_EOF
+          GH_AW_SAFE_OUTPUTS_VALIDATION_ecaf271fbfb920d8_EOF
           node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs
       - name: Generate Safe Outputs MCP Server Config
         id: safe-outputs-config
@@ -847,7 +847,7 @@ jobs:
           export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11'
           
           mkdir -p /home/runner/.copilot
-          cat << GH_AW_MCP_CONFIG_be8a945ee3e28a8b_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+          cat << GH_AW_MCP_CONFIG_757354268663f6b1_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
           {
             "mcpServers": {
               "github": {
@@ -888,7 +888,7 @@ jobs:
               "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
             }
           }
-          GH_AW_MCP_CONFIG_be8a945ee3e28a8b_EOF
+          GH_AW_MCP_CONFIG_757354268663f6b1_EOF
       - name: Download activation artifact
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
         with:
diff --git a/.github/workflows/autoloop.md b/.github/workflows/autoloop.md
index f3e796cc..ddc456a7 100644
--- a/.github/workflows/autoloop.md
+++ b/.github/workflows/autoloop.md
@@ -431,11 +431,50 @@ steps:
           if selected in issue_programs:
               selected_issue = issue_programs[selected]["issue_number"]
 
+      # Look up existing PR for the selected program's canonical branch
+      existing_pr = None
+      head_branch = None
+      if selected:
+          head_branch = f"autoloop/{selected}"
+          owner = repo.split("/")[0] if "/" in repo else ""
+          if owner:
+              try:
+                  pr_api_url = (
+                      f"https://api.github.com/repos/{repo}/pulls"
+                      f"?state=open&head={owner}:{head_branch}&per_page=5"
+                  )
+                  pr_req = urllib.request.Request(pr_api_url, headers={
+                      "Authorization": f"token {github_token}",
+                      "Accept": "application/vnd.github.v3+json",
+                  })
+                  with urllib.request.urlopen(pr_req, timeout=30) as pr_resp:
+                      open_prs = json.loads(pr_resp.read().decode())
+                  if open_prs:
+                      existing_pr = open_prs[0]["number"]
+                      print(f"  Found existing PR #{existing_pr} for branch {head_branch}")
+                  else:
+                      print(f"  No existing PR found for branch {head_branch}")
+              except Exception as e:
+                  print(f"  Warning: could not check for existing PRs: {e}")
+          else:
+              print(f"  Warning: could not parse owner from GITHUB_REPOSITORY='{repo}'")
+
+          # Also check the state file for a recorded PR number as fallback
+          if existing_pr is None:
+              state = read_program_state(selected)
+              pr_field = state.get("pr") or ""
+              pr_match = re.match(r'^#?(\d+)$', pr_field.strip())
+              if pr_match:
+                  existing_pr = int(pr_match.group(1))
+                  print(f"  Found PR #{existing_pr} from state file for {selected}")
+
       result = {
           "selected": selected,
           "selected_file": selected_file,
           "selected_issue": selected_issue,
           "selected_target_metric": selected_target_metric,
+          "existing_pr": existing_pr,
+          "head_branch": head_branch,
           "issue_programs": {name: info["issue_number"] for name, info in issue_programs.items()},
           "deferred": deferred,
           "skipped": skipped,
@@ -449,6 +488,10 @@ steps:
 
       print("=== Autoloop Program Check ===")
       print(f"Selected program:      {selected or '(none)'} ({selected_file or 'n/a'})")
+      if existing_pr:
+          print(f"Existing PR:           #{existing_pr} (branch: {head_branch})")
+      else:
+          print(f"Existing PR:           (none — will create on first accepted iteration)")
       print(f"Deferred (next run):   {deferred or '(none)'}")
       print(f"Programs skipped:      {[s['name'] for s in skipped] or '(none)'}")
       print(f"Programs unconfigured: {unconfigured or '(none)'}")
@@ -538,6 +581,8 @@ The pre-step has already determined which program to run. Read `/tmp/gh-aw/autol
 - **`selected_file`**: The full path to the program's markdown file (either `.autoloop/programs/<name>/program.md`, `.autoloop/programs/<name>.md`, or `/tmp/gh-aw/issue-programs/<name>.md` for issue-based programs).
 - **`selected_issue`**: The GitHub issue number if the selected program came from an issue, or `null` if it came from a file.
 - **`selected_target_metric`**: The `target-metric` value from the program's frontmatter (a number), or `null` if the program is open-ended. Used to check the [halting condition](#halting-condition) after each accepted iteration.
+- **`existing_pr`**: The PR number (e.g., `42`) of an already-open PR for this program's branch, or `null` if no open PR exists. **If this is not null, you MUST use `push-to-pull-request-branch` to push to this PR — do NOT call `create-pull-request`.**
+- **`head_branch`**: The canonical branch name for this program (e.g., `autoloop/coverage`). Always use this exact branch name — never append suffixes.
 - **`issue_programs`**: A mapping of program name → issue number for all discovered issue-based programs.
 - **`deferred`**: Other programs that were due but will be handled in future runs.
 - **`unconfigured`**: Programs that still have the sentinel or placeholder content.
@@ -550,6 +595,7 @@ If `selected` is not null:
 3. Read the current state of all target files.
 4. Read the state file `{selected}.md` from the repo-memory folder for all state: the ⚙️ Machine State table (scheduling fields) plus the research sections (priorities, lessons, foreclosed avenues, iteration history).
 5. If `selected_issue` is not null, this is an issue-based program — also read the issue comments for any human steering input.
+6. **Check `existing_pr`**: if it is not null, a PR already exists — use `push-to-pull-request-branch` to push commits to it. Only call `create-pull-request` when `existing_pr` is null.
 
 ## Multiple Programs
 
@@ -694,7 +740,7 @@ Each run executes **one iteration for the single selected program**:
    
    If the state file does not yet exist, create it in the repo-memory folder using the template defined in the [Repo Memory](#repo-memory) section.
 
-3. Note the `PR` field from the Machine State table. If it contains a PR number (e.g., `#42`), that is the **existing draft PR** for this program — you must update it, not create a new one.
+3. Note the `existing_pr` field from `/tmp/gh-aw/autoloop.json`. If it is not null, that is the **existing draft PR** for this program — you must push to it using `push-to-pull-request-branch`, not create a new one. Also check the `PR` field from the Machine State table as a fallback.
 
 ### Step 2: Analyze and Propose
 
@@ -743,15 +789,15 @@ Each run executes **one iteration for the single selected program**:
    - Commit message body (after a blank line): `Run: {run_url}` referencing the GitHub Actions run URL.
 2. Push the commit to the long-running branch `autoloop/{program-name}`.
 3. **Find the existing PR or create one** — follow these steps in order:
-   a. Check the `PR` field in the state file's **⚙️ Machine State** table. If it contains a PR number (e.g., `#42`), that is the existing draft PR.
-   b. If the state file has no PR number, search GitHub for open PRs with head branch `autoloop/{program-name}`. Use the GitHub API: `GET /repos/{owner}/{repo}/pulls?state=open&head={owner}:autoloop/{program-name}`.
-   c. **If an existing PR is found** (from either step a or b): use `push-to-pull-request-branch` to push additional commits to the existing PR. Update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, and a link to the actions run. **Do NOT call `create-pull-request`.**
-   d. **If NO PR exists** for `autoloop/{program-name}`: create one using `create-pull-request`:
+   a. **First, check `existing_pr` from `/tmp/gh-aw/autoloop.json`.** The pre-step has already looked up the open PR for this program. If `existing_pr` is not null, that is the existing draft PR — skip to step (c).
+   b. If `existing_pr` is null, also check the `PR` field in the state file's **⚙️ Machine State** table as a fallback. If it contains a PR number (e.g., `#42`), verify it is still open via the GitHub API.
+   c. **If an existing PR is found** (from step a or b): use `push-to-pull-request-branch` to push additional commits to the existing PR. Update the PR body with the latest metric and a summary of the most recent accepted iteration. Add a comment to the PR summarizing the iteration: what changed, old metric, new metric, improvement delta, and a link to the actions run. **Do NOT call `create-pull-request`.**
+   d. **If NO PR exists** for `autoloop/{program-name}` (both `existing_pr` is null AND the state file has no PR): create one using `create-pull-request`:
       - Branch: `autoloop/{program-name}` (the branch you already created in Step 3 — do NOT let the framework auto-generate a branch name)
       - Title: `[Autoloop: {program-name}]`
       - Body includes: a summary of the program goal, link to the steering issue, the current best metric, and AI disclosure: `🤖 *This PR is maintained by Autoloop. Each accepted iteration adds a commit to this branch.*`
 
-   > ⚠️ **Never create a new PR if one already exists for `autoloop/{program-name}`.** Each program must have exactly one draft PR at any time. If you are unsure whether a PR exists, check the GitHub API before calling `create-pull-request`.
+   > ⚠️ **Never create a new PR if one already exists for `autoloop/{program-name}`.** Each program must have exactly one draft PR at any time. The pre-step provides `existing_pr` in autoloop.json — always check it first. Only call `create-pull-request` when `existing_pr` is null AND the state file has no PR number.
 4. Ensure the steering issue exists (see [Steering Issue](#steering-issue) below). Add a comment to the steering issue linking to the commit and actions run.
 5. Add an entry to the experiment log issue.
 6. Update the state file `{program-name}.md` in the repo-memory folder:
@@ -790,6 +836,13 @@ Maintain a single open issue **per program** titled `[Autoloop: {program-name}]
 ```markdown
 🤖 *Autoloop — an iterative optimization agent for this repository.*
 
+| | |
+|---|---|
+| **Branch** | [`autoloop/{program-name}`](https://github.com/{owner}/{repo}/tree/autoloop/{program-name}) |
+| **Pull Request** | #{pr_number} |
+| **Steering Issue** | #{steering_issue_number} |
+| **State File** | [`{program-name}.md`](https://github.com/{owner}/{repo}/blob/memory/autoloop/{program-name}.md) |
+
 ## Program
 
 **Goal**: {one-line summary from program.md}
@@ -817,6 +870,7 @@ Maintain a single open issue **per program** titled `[Autoloop: {program-name}]
 - Iterations in **reverse chronological order** (newest first).
 - Each iteration heading links to its GitHub Actions run.
 - Use `${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}` for the current run URL.
+- The **links table at the top** must always show the current branch, PR, steering issue, and state file. Update the PR number when a new PR is created. When creating a continuation issue for a new month, copy the links table from the previous issue.
 - Close the previous month's issue and create a new one at month boundaries.
 - Maximum 50 iterations per issue; create a continuation issue if exceeded.
 
@@ -1148,9 +1202,10 @@ After each iteration, prepend an entry to the **📊 Iteration History** section
 > ❌ **Do NOT create a new branch with a suffix for each iteration.**
 > Correct: `autoloop/coverage`
 > Wrong: `autoloop/coverage-abc123`, `autoloop/coverage-iter42`, `autoloop/coverage-deadbeef1234`
+> Use the `head_branch` field from `autoloop.json` — it is always the canonical name.
 
 > ❌ **Do NOT create a new PR if one already exists for `autoloop/{program-name}`.**
-> Always check the state file's `PR` field and the GitHub API before calling `create-pull-request`. If a PR exists, use `push-to-pull-request-branch` instead.
+> The pre-step provides `existing_pr` in `autoloop.json`. If it is not null, **always** use `push-to-pull-request-branch` — never call `create-pull-request`. Only create a PR when `existing_pr` is null AND the state file has no PR number.
 
 > ❌ **Do NOT let the gh-aw framework auto-generate a branch name when creating a PR.**
 > Always specify the branch explicitly as `autoloop/{program-name}` when calling `create-pull-request`.
diff --git a/.github/workflows/evergreen.lock.yml b/.github/workflows/evergreen.lock.yml
index 703bf2c5..fbdca9b7 100644
--- a/.github/workflows/evergreen.lock.yml
+++ b/.github/workflows/evergreen.lock.yml
@@ -28,7 +28,7 @@
 #   Imports:
 #     - shared/reporting.md
 #
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"fdc470cbbb93445cdc31d03533b3983d30603af2cbc207a06bc506a93d186f95","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"6c52512ee1dd9f0c424a7b5af5207b2d89e239e673df6f5ad79911a4820b75ab","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
 
 name: "Evergreen — PR Health Keeper"
 "on":
@@ -141,20 +141,20 @@ jobs:
         run: |
           bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
           {
-          cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+          cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
           <system>
-          GH_AW_PROMPT_c90257464e463e6f_EOF
+          GH_AW_PROMPT_1c58cbcd2bf82635_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/repo_memory_prompt.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
-          cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+          cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
           <safe-output-tools>
           Tools: add_comment(max:3), push_to_pull_request_branch(max:3), missing_tool, missing_data, noop
-          GH_AW_PROMPT_c90257464e463e6f_EOF
+          GH_AW_PROMPT_1c58cbcd2bf82635_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_push_to_pr_branch.md"
-          cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+          cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
           </safe-output-tools>
           <github-context>
           The following GitHub context information is available for this workflow:
@@ -187,13 +187,13 @@ jobs:
             - **Note**: If a branch you need is not in the list above and is not listed as an additional fetched ref, it has NOT been checked out. For private repositories you cannot fetch it without proper authentication. If the branch is required and not available, exit with an error and ask the user to add it to the `fetch:` option of the `checkout:` configuration (e.g., `fetch: ["refs/pulls/open/*"]` for all open PR refs, or `fetch: ["main", "feature/my-branch"]` for specific branches).
           </github-context>
           
-          GH_AW_PROMPT_c90257464e463e6f_EOF
+          GH_AW_PROMPT_1c58cbcd2bf82635_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
-          cat << 'GH_AW_PROMPT_c90257464e463e6f_EOF'
+          cat << 'GH_AW_PROMPT_1c58cbcd2bf82635_EOF'
           </system>
           {{#runtime-import .github/workflows/shared/reporting.md}}
           {{#runtime-import .github/workflows/evergreen.md}}
-          GH_AW_PROMPT_c90257464e463e6f_EOF
+          GH_AW_PROMPT_1c58cbcd2bf82635_EOF
           } > "$GH_AW_PROMPT"
       - name: Interpolate variables and render templates
         uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@@ -339,7 +339,7 @@ jobs:
           GITHUB_REPOSITORY: ${{ github.repository }}
           GITHUB_TOKEN: ${{ github.token }}
         name: Find a PR that needs attention
-        run: "python3 - << 'PYEOF'\nimport os, json, re, sys\nimport urllib.request, urllib.error\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_pr = os.environ.get(\"FORCED_PR\", \"\").strip()\n\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/evergreen\"\noutput_file = \"/tmp/gh-aw/evergreen.json\"\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n\nMAX_ATTEMPTS = 5\n\ndef api_get(url):\n    \"\"\"Make an authenticated GET request to the GitHub API.\"\"\"\n    req = urllib.request.Request(url, headers={\n        \"Authorization\": f\"token {token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    })\n    with urllib.request.urlopen(req, timeout=30) as resp:\n        return json.loads(resp.read().decode())\n\ndef get_all_open_prs():\n    \"\"\"Fetch all open PRs, paginated.\"\"\"\n    prs = []\n    page = 1\n    while True:\n        url = f\"https://api.github.com/repos/{repo}/pulls?state=open&per_page=100&page={page}&sort=number&direction=asc\"\n        batch = api_get(url)\n        if not batch:\n            break\n        prs.extend(batch)\n        if len(batch) < 100:\n            break\n        page += 1\n    return prs\n\ndef get_check_status(pr):\n    \"\"\"Get combined CI check status for a PR's head commit.\"\"\"\n    head_sha = pr[\"head\"][\"sha\"]\n    url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/status\"\n    try:\n        status = api_get(url)\n        return status.get(\"state\", \"unknown\")\n    except Exception as e:\n        print(f\"  Warning: could not fetch status for PR #{pr['number']}: {e}\")\n        return \"unknown\"\n\ndef get_check_runs(pr):\n    \"\"\"Get check runs for a PR's head commit.\"\"\"\n    head_sha = pr[\"head\"][\"sha\"]\n    url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/check-runs\"\n    try:\n        data = api_get(url)\n        return data.get(\"check_runs\", [])\n    except Exception as e:\n        print(f\"  Warning: could not fetch check runs for PR #{pr['number']}: {e}\")\n        return []\n\ndef read_attempt_state(pr_number):\n    \"\"\"Read attempt tracking state from repo-memory.\"\"\"\n    state_file = os.path.join(repo_memory_dir, f\"pr-{pr_number}.md\")\n    if not os.path.isfile(state_file):\n        return {\"attempts\": 0, \"head_sha\": None}\n    with open(state_file, encoding=\"utf-8\") as f:\n        content = f.read()\n    state = {\"attempts\": 0, \"head_sha\": None}\n    m = re.search(r'\\|\\s*head_sha\\s*\\|\\s*(\\S+)\\s*\\|', content)\n    if m:\n        state[\"head_sha\"] = m.group(1)\n    m = re.search(r'\\|\\s*attempts\\s*\\|\\s*(\\d+)\\s*\\|', content)\n    if m:\n        state[\"attempts\"] = int(m.group(1))\n    return state\n\ndef pr_needs_attention(pr):\n    \"\"\"Check if a PR has merge conflicts or failing CI. Returns a list of issues.\"\"\"\n    issues = []\n\n    # Check mergeable state\n    # Need to fetch full PR details for mergeable info\n    pr_url = f\"https://api.github.com/repos/{repo}/pulls/{pr['number']}\"\n    try:\n        full_pr = api_get(pr_url)\n        mergeable = full_pr.get(\"mergeable\")\n        mergeable_state = full_pr.get(\"mergeable_state\", \"unknown\")\n        if mergeable is False:\n            issues.append(\"merge_conflict\")\n        elif mergeable_state == \"dirty\":\n            issues.append(\"merge_conflict\")\n    except Exception as e:\n        print(f\"  Warning: could not fetch mergeable state for PR #{pr['number']}: {e}\")\n\n    # Check CI status via check runs\n    check_runs = get_check_runs(pr)\n    failed_checks = []\n    for cr in check_runs:\n        conclusion = cr.get(\"conclusion\")\n        status = cr.get(\"status\")\n        name = cr.get(\"name\", \"unknown\")\n        if conclusion in (\"failure\", \"timed_out\", \"action_required\"):\n            failed_checks.append(name)\n        elif status == \"completed\" and conclusion not in (\"success\", \"neutral\", \"skipped\"):\n            if conclusion is not None:\n                failed_checks.append(name)\n    if failed_checks:\n        issues.append(f\"failing_checks: {', '.join(failed_checks)}\")\n\n    # Also check commit status API (some checks use the older status API)\n    combined_status = get_check_status(pr)\n    if combined_status == \"failure\":\n        if not failed_checks:\n            issues.append(\"failing_status\")\n\n    return issues\n\n# --- Main logic ---\n\nprint(\"=== Evergreen PR Health Check ===\")\nprint(f\"Repository: {repo}\")\n\nprs = get_all_open_prs()\nprint(f\"Found {len(prs)} open PR(s)\")\n\nif not prs:\n    print(\"No open PRs. Exiting.\")\n    with open(output_file, \"w\") as f:\n        json.dump({\"selected\": None, \"reason\": \"no_open_prs\"}, f)\n    sys.exit(1)\n\n# Evaluate each PR deterministically (sorted by PR number ascending)\ncandidates = []\nskipped = []\n\n# If a specific PR is forced, only check that one\nif forced_pr:\n    prs = [pr for pr in prs if str(pr[\"number\"]) == forced_pr]\n    if not prs:\n        print(f\"ERROR: PR #{forced_pr} not found among open PRs.\")\n        sys.exit(1)\n    print(f\"FORCED: checking only PR #{forced_pr}\")\n\nfor pr in sorted(prs, key=lambda p: p[\"number\"]):\n    pr_num = pr[\"number\"]\n    head_sha = pr[\"head\"][\"sha\"]\n    print(f\"\\nChecking PR #{pr_num}: {pr['title'][:60]}...\")\n    print(f\"  Head SHA: {head_sha[:12]}\")\n\n    issues = pr_needs_attention(pr)\n    if not issues:\n        print(f\"  Status: healthy (no issues)\")\n        continue\n\n    print(f\"  Issues: {issues}\")\n\n    # Check attempt tracking\n    attempt_state = read_attempt_state(pr_num)\n    if attempt_state[\"head_sha\"] == head_sha:\n        attempts = attempt_state[\"attempts\"]\n        print(f\"  Attempts on this SHA: {attempts}/{MAX_ATTEMPTS}\")\n        if attempts >= MAX_ATTEMPTS:\n            skipped.append({\n                \"pr\": pr_num,\n                \"reason\": f\"max attempts ({MAX_ATTEMPTS}) reached on SHA {head_sha[:12]}\",\n            })\n            print(f\"  SKIPPED: max attempts reached\")\n            continue\n    else:\n        attempts = 0\n        print(f\"  New SHA detected — resetting attempt counter\")\n\n    candidates.append({\n        \"pr_number\": pr_num,\n        \"title\": pr[\"title\"],\n        \"head_sha\": head_sha,\n        \"base_branch\": pr[\"base\"][\"ref\"],\n        \"head_branch\": pr[\"head\"][\"ref\"],\n        \"issues\": issues,\n        \"attempts\": attempts,\n    })\n\n# Select the first candidate (lowest PR number — deterministic)\nselected = candidates[0] if candidates else None\n\nresult = {\n    \"selected\": selected,\n    \"skipped\": skipped,\n    \"total_open_prs\": len(prs),\n    \"candidates_found\": len(candidates),\n}\n\nwith open(output_file, \"w\") as f:\n    json.dump(result, f, indent=2)\n\nif selected:\n    print(f\"\\n>>> Selected PR #{selected['pr_number']}: {selected['title']}\")\n    print(f\"    Issues: {selected['issues']}\")\n    print(f\"    Attempt: {selected['attempts'] + 1}/{MAX_ATTEMPTS}\")\nelse:\n    print(\"\\nNo PRs need attention. Exiting.\")\n    sys.exit(1)\nPYEOF\n"
+        run: "python3 - << 'PYEOF'\nimport os, json, re, subprocess, sys\nimport urllib.request, urllib.error\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\nforced_pr = os.environ.get(\"FORCED_PR\", \"\").strip()\n\nrepo_memory_dir = \"/tmp/gh-aw/repo-memory/evergreen\"\noutput_file = \"/tmp/gh-aw/evergreen.json\"\nos.makedirs(\"/tmp/gh-aw\", exist_ok=True)\n\nMAX_ATTEMPTS = 5\n\ndef api_get(url):\n    \"\"\"Make an authenticated GET request to the GitHub API.\"\"\"\n    req = urllib.request.Request(url, headers={\n        \"Authorization\": f\"token {token}\",\n        \"Accept\": \"application/vnd.github.v3+json\",\n    })\n    with urllib.request.urlopen(req, timeout=30) as resp:\n        return json.loads(resp.read().decode())\n\ndef get_all_open_prs():\n    \"\"\"Fetch all open PRs, paginated.\"\"\"\n    prs = []\n    page = 1\n    while True:\n        url = f\"https://api.github.com/repos/{repo}/pulls?state=open&per_page=100&page={page}&sort=number&direction=asc\"\n        batch = api_get(url)\n        if not batch:\n            break\n        prs.extend(batch)\n        if len(batch) < 100:\n            break\n        page += 1\n    return prs\n\ndef get_check_status(pr):\n    \"\"\"Get combined CI check status for a PR's head commit.\"\"\"\n    head_sha = pr[\"head\"][\"sha\"]\n    url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/status\"\n    try:\n        status = api_get(url)\n        return status.get(\"state\", \"unknown\")\n    except Exception as e:\n        print(f\"  Warning: could not fetch status for PR #{pr['number']}: {e}\")\n        return \"unknown\"\n\ndef get_check_runs(pr):\n    \"\"\"Get check runs for a PR's head commit.\"\"\"\n    head_sha = pr[\"head\"][\"sha\"]\n    url = f\"https://api.github.com/repos/{repo}/commits/{head_sha}/check-runs\"\n    try:\n        data = api_get(url)\n        return data.get(\"check_runs\", [])\n    except Exception as e:\n        print(f\"  Warning: could not fetch check runs for PR #{pr['number']}: {e}\")\n        return []\n\ndef read_attempt_state(pr_number):\n    \"\"\"Read attempt tracking state from repo-memory.\"\"\"\n    state_file = os.path.join(repo_memory_dir, f\"pr-{pr_number}.md\")\n    if not os.path.isfile(state_file):\n        return {\"attempts\": 0, \"head_sha\": None}\n    with open(state_file, encoding=\"utf-8\") as f:\n        content = f.read()\n    state = {\"attempts\": 0, \"head_sha\": None}\n    m = re.search(r'\\|\\s*head_sha\\s*\\|\\s*(\\S+)\\s*\\|', content)\n    if m:\n        state[\"head_sha\"] = m.group(1)\n    m = re.search(r'\\|\\s*attempts\\s*\\|\\s*(\\d+)\\s*\\|', content)\n    if m:\n        state[\"attempts\"] = int(m.group(1))\n    return state\n\ndef pr_needs_attention(pr):\n    \"\"\"Check if a PR has merge conflicts or failing CI. Returns a list of issues.\"\"\"\n    issues = []\n\n    # Check mergeable state\n    # Need to fetch full PR details for mergeable info\n    pr_url = f\"https://api.github.com/repos/{repo}/pulls/{pr['number']}\"\n    try:\n        full_pr = api_get(pr_url)\n        mergeable = full_pr.get(\"mergeable\")\n        mergeable_state = full_pr.get(\"mergeable_state\", \"unknown\")\n        if mergeable is False:\n            issues.append(\"merge_conflict\")\n        elif mergeable_state == \"dirty\":\n            issues.append(\"merge_conflict\")\n    except Exception as e:\n        print(f\"  Warning: could not fetch mergeable state for PR #{pr['number']}: {e}\")\n\n    # Check CI status via check runs\n    check_runs = get_check_runs(pr)\n    failed_checks = []\n    for cr in check_runs:\n        conclusion = cr.get(\"conclusion\")\n        status = cr.get(\"status\")\n        name = cr.get(\"name\", \"unknown\")\n        if conclusion in (\"failure\", \"timed_out\", \"action_required\"):\n            failed_checks.append(name)\n        elif status == \"completed\" and conclusion not in (\"success\", \"neutral\", \"skipped\"):\n            if conclusion is not None:\n                failed_checks.append(name)\n    if failed_checks:\n        issues.append(f\"failing_checks: {', '.join(failed_checks)}\")\n\n    # Also check commit status API (some checks use the older status API)\n    combined_status = get_check_status(pr)\n    if combined_status == \"failure\":\n        if not failed_checks:\n            issues.append(\"failing_status\")\n\n    return issues\n\n# --- Main logic ---\n\nprint(\"=== Evergreen PR Health Check ===\")\nprint(f\"Repository: {repo}\")\n\nprs = get_all_open_prs()\nprint(f\"Found {len(prs)} open PR(s)\")\n\nif not prs:\n    print(\"No open PRs. Nothing to do.\")\n    with open(output_file, \"w\") as f:\n        json.dump({\"selected\": None, \"reason\": \"no_open_prs\"}, f)\n    sys.exit(0)\n\n# Evaluate each PR deterministically (sorted by PR number ascending)\ncandidates = []\nskipped = []\n\n# If a specific PR is forced, only check that one\nif forced_pr:\n    prs = [pr for pr in prs if str(pr[\"number\"]) == forced_pr]\n    if not prs:\n        print(f\"ERROR: PR #{forced_pr} not found among open PRs.\")\n        sys.exit(1)\n    print(f\"FORCED: checking only PR #{forced_pr}\")\n\nfor pr in sorted(prs, key=lambda p: p[\"number\"]):\n    pr_num = pr[\"number\"]\n    head_sha = pr[\"head\"][\"sha\"]\n    print(f\"\\nChecking PR #{pr_num}: {pr['title'][:60]}...\")\n    print(f\"  Head SHA: {head_sha[:12]}\")\n\n    issues = pr_needs_attention(pr)\n    if not issues:\n        print(f\"  Status: healthy (no issues)\")\n        continue\n\n    print(f\"  Issues: {issues}\")\n\n    # Check attempt tracking\n    attempt_state = read_attempt_state(pr_num)\n    if attempt_state[\"head_sha\"] == head_sha:\n        attempts = attempt_state[\"attempts\"]\n        print(f\"  Attempts on this SHA: {attempts}/{MAX_ATTEMPTS}\")\n        if attempts >= MAX_ATTEMPTS:\n            skipped.append({\n                \"pr\": pr_num,\n                \"reason\": f\"max attempts ({MAX_ATTEMPTS}) reached on SHA {head_sha[:12]}\",\n            })\n            print(f\"  SKIPPED: max attempts reached\")\n            continue\n    else:\n        attempts = 0\n        print(f\"  New SHA detected — resetting attempt counter\")\n\n    candidates.append({\n        \"pr_number\": pr_num,\n        \"title\": pr[\"title\"],\n        \"head_sha\": head_sha,\n        \"base_branch\": pr[\"base\"][\"ref\"],\n        \"head_branch\": pr[\"head\"][\"ref\"],\n        \"issues\": issues,\n        \"attempts\": attempts,\n    })\n\n# Select the first candidate (lowest PR number — deterministic)\nselected = candidates[0] if candidates else None\n\nresult = {\n    \"selected\": selected,\n    \"skipped\": skipped,\n    \"total_open_prs\": len(prs),\n    \"candidates_found\": len(candidates),\n}\n\nwith open(output_file, \"w\") as f:\n    json.dump(result, f, indent=2)\n\nif selected:\n    branch = selected[\"head_branch\"]\n    print(f\"Checking out PR branch before agent run: {branch}\")\n    subprocess.check_call([\"git\", \"checkout\", \"-B\", branch, f\"origin/{branch}\"])\n    subprocess.check_call([\"git\", \"branch\", \"--set-upstream-to\", f\"origin/{branch}\", branch])\n    print(f\"\\n>>> Selected PR #{selected['pr_number']}: {selected['title']}\")\n    print(f\"    Issues: {selected['issues']}\")\n    print(f\"    Attempt: {selected['attempts'] + 1}/{MAX_ATTEMPTS}\")\nelse:\n    print(\"\\nNo PRs need attention. Nothing to do.\")\n    sys.exit(0)\nPYEOF\n"
 
       # Repo memory git-based storage configuration from frontmatter processed below
       - name: Clone repo-memory branch (default)
@@ -398,12 +398,12 @@ jobs:
           mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs
           mkdir -p /tmp/gh-aw/safeoutputs
           mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
-          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_ec5103758147a5b8_EOF'
+          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_5d352d3a7dc8ac3d_EOF'
           {"add_comment":{"max":3,"target":"*"},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"push_repo_memory":{"memories":[{"dir":"/tmp/gh-aw/repo-memory/default","id":"default","max_file_count":100,"max_file_size":10240,"max_patch_size":10240}]},"push_to_pull_request_branch":{"if_no_changes":"warn","max":3,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_files_policy":"allowed","protected_path_prefixes":[".github/",".agents/"],"target":"*"}}
-          GH_AW_SAFE_OUTPUTS_CONFIG_ec5103758147a5b8_EOF
+          GH_AW_SAFE_OUTPUTS_CONFIG_5d352d3a7dc8ac3d_EOF
       - name: Write Safe Outputs Tools
         run: |
-          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_0b11521b2b188ecd_EOF'
+          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_e94b9b0d12aa4571_EOF'
           {
             "description_suffixes": {
               "add_comment": " CONSTRAINTS: Maximum 3 comment(s) can be added. Target: *.",
@@ -412,8 +412,8 @@ jobs:
             "repo_params": {},
             "dynamic_tools": []
           }
-          GH_AW_SAFE_OUTPUTS_TOOLS_META_0b11521b2b188ecd_EOF
-          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_4f2d89a889ce19de_EOF'
+          GH_AW_SAFE_OUTPUTS_TOOLS_META_e94b9b0d12aa4571_EOF
+          cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_7cc97c0128fe54d3_EOF'
           {
             "add_comment": {
               "defaultMax": 1,
@@ -511,7 +511,7 @@ jobs:
               }
             }
           }
-          GH_AW_SAFE_OUTPUTS_VALIDATION_4f2d89a889ce19de_EOF
+          GH_AW_SAFE_OUTPUTS_VALIDATION_7cc97c0128fe54d3_EOF
           node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs
       - name: Generate Safe Outputs MCP Server Config
         id: safe-outputs-config
@@ -581,7 +581,7 @@ jobs:
           export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11'
           
           mkdir -p /home/runner/.copilot
-          cat << GH_AW_MCP_CONFIG_e92a5aad7336713f_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+          cat << GH_AW_MCP_CONFIG_df1a40d4ce900f98_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
           {
             "mcpServers": {
               "github": {
@@ -622,7 +622,7 @@ jobs:
               "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
             }
           }
-          GH_AW_MCP_CONFIG_e92a5aad7336713f_EOF
+          GH_AW_MCP_CONFIG_df1a40d4ce900f98_EOF
       - name: Download activation artifact
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
         with:
diff --git a/.github/workflows/evergreen.md b/.github/workflows/evergreen.md
index 481eae1c..147b912b 100644
--- a/.github/workflows/evergreen.md
+++ b/.github/workflows/evergreen.md
@@ -54,7 +54,7 @@ steps:
       FORCED_PR: ${{ github.event.inputs.pr_number }}
     run: |
       python3 - << 'PYEOF'
-      import os, json, re, sys
+      import os, json, re, subprocess, sys
       import urllib.request, urllib.error
 
       token = os.environ.get("GITHUB_TOKEN", "")
@@ -179,10 +179,10 @@ steps:
       print(f"Found {len(prs)} open PR(s)")
 
       if not prs:
-          print("No open PRs. Exiting.")
+          print("No open PRs. Nothing to do.")
           with open(output_file, "w") as f:
               json.dump({"selected": None, "reason": "no_open_prs"}, f)
-          sys.exit(1)
+          sys.exit(0)
 
       # Evaluate each PR deterministically (sorted by PR number ascending)
       candidates = []
@@ -249,12 +249,16 @@ steps:
           json.dump(result, f, indent=2)
 
       if selected:
+          branch = selected["head_branch"]
+          print(f"Checking out PR branch before agent run: {branch}")
+          subprocess.check_call(["git", "checkout", "-B", branch, f"origin/{branch}"])
+          subprocess.check_call(["git", "branch", "--set-upstream-to", f"origin/{branch}", branch])
           print(f"\n>>> Selected PR #{selected['pr_number']}: {selected['title']}")
           print(f"    Issues: {selected['issues']}")
           print(f"    Attempt: {selected['attempts'] + 1}/{MAX_ATTEMPTS}")
       else:
-          print("\nNo PRs need attention. Exiting.")
-          sys.exit(1)
+          print("\nNo PRs need attention. Nothing to do.")
+          sys.exit(0)
       PYEOF
 
 features:
@@ -279,11 +283,9 @@ A pre-flight step has already identified a PR that needs attention. Read the sel
    - `selected.base_branch` — the target branch (usually `main`)
    - `selected.attempts` — how many times we've already tried on this SHA
 
-2. **Check out the PR branch** as a local tracking branch so the push tool can find it:
-   ```bash
-   git checkout -b <head_branch> origin/<head_branch>
-   ```
-   where `<head_branch>` is `selected.head_branch` from the selection file. **Do not** use a detached HEAD checkout — the `push-to-pull-request-branch` tool requires a named local branch.
+   > If `selected` is `null`, no PRs need attention right now. Call the **noop** tool with a message like "All PRs are healthy — nothing to fix." and stop.
+
+2. The pre-flight step already checks out `selected.head_branch` as a named local tracking branch before you start. Keep working on that branch (do not switch back to `main` or use detached HEAD).
 
 3. **Fix the issues**:
 
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index a7ede9cd..127a90d6 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -36,6 +36,13 @@ jobs:
       - name: Bundle TypeScript compiler for offline playground
         run: cp node_modules/typescript/lib/typescript.js ./playground/dist/typescript.js
 
+      - name: Copy benchmark results to playground
+        run: |
+          mkdir -p ./playground/benchmarks
+          if [ -f benchmarks/results.json ]; then
+            cp benchmarks/results.json ./playground/benchmarks/results.json
+          fi
+
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
diff --git a/.github/workflows/sync-branches.lock.yml b/.github/workflows/sync-branches.lock.yml
index 0183de8c..78f6887e 100644
--- a/.github/workflows/sync-branches.lock.yml
+++ b/.github/workflows/sync-branches.lock.yml
@@ -24,7 +24,7 @@
 # Runs whenever the default branch changes and merges it into all active
 # autoloop/* branches so that program iterations always build on the latest code.
 #
-# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"719939a8013705db572524495d231d61b5652aa8fa86506426ccbe84aade70e1","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"42baaebd1818fa54f67dfaadbc42d425fcd44388126d27496222c26a7fcdd745","compiler_version":"v0.65.6","strict":true,"agent_id":"copilot"}
 
 name: "Sync Branches"
 "on":
@@ -133,13 +133,13 @@ jobs:
         run: |
           bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
           {
-          cat << 'GH_AW_PROMPT_c6c6ca09724d546c_EOF'
+          cat << 'GH_AW_PROMPT_6ce21e657f0d715b_EOF'
           <system>
-          GH_AW_PROMPT_c6c6ca09724d546c_EOF
+          GH_AW_PROMPT_6ce21e657f0d715b_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
           cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
-          cat << 'GH_AW_PROMPT_c6c6ca09724d546c_EOF'
+          cat << 'GH_AW_PROMPT_6ce21e657f0d715b_EOF'
           <github-context>
           The following GitHub context information is available for this workflow:
           {{#if __GH_AW_GITHUB_ACTOR__ }}
@@ -168,12 +168,12 @@ jobs:
           {{/if}}
           </github-context>
           
-          GH_AW_PROMPT_c6c6ca09724d546c_EOF
+          GH_AW_PROMPT_6ce21e657f0d715b_EOF
           cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_prompt.md"
-          cat << 'GH_AW_PROMPT_c6c6ca09724d546c_EOF'
+          cat << 'GH_AW_PROMPT_6ce21e657f0d715b_EOF'
           </system>
           {{#runtime-import .github/workflows/sync-branches.md}}
-          GH_AW_PROMPT_c6c6ca09724d546c_EOF
+          GH_AW_PROMPT_6ce21e657f0d715b_EOF
           } > "$GH_AW_PROMPT"
       - name: Interpolate variables and render templates
         uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@@ -285,7 +285,7 @@ jobs:
           DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
           GITHUB_REPOSITORY: ${{ github.repository }}
         name: Merge default branch into all autoloop program branches
-        run: "python3 - << 'PYEOF'\nimport os, subprocess, sys\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\ndefault_branch = os.environ.get(\"DEFAULT_BRANCH\", \"main\")\n\n# List all remote branches matching the autoloop/* pattern\nresult = subprocess.run(\n    [\"git\", \"branch\", \"-r\", \"--list\", \"origin/autoloop/*\"],\n    capture_output=True, text=True\n)\nif result.returncode != 0:\n    print(f\"Failed to list remote branches: {result.stderr}\")\n    sys.exit(0)\n\nbranches = [b.strip().replace(\"origin/\", \"\") for b in result.stdout.strip().split(\"\\n\") if b.strip()]\n\nif not branches:\n    print(\"No autoloop/* branches found. Nothing to sync.\")\n    sys.exit(0)\n\nprint(f\"Found {len(branches)} autoloop branch(es) to sync: {branches}\")\n\nfailed = []\nfor branch in branches:\n    print(f\"\\n--- Syncing {branch} with {default_branch} ---\")\n\n    # Fetch both branches\n    subprocess.run([\"git\", \"fetch\", \"origin\", branch], capture_output=True)\n    subprocess.run([\"git\", \"fetch\", \"origin\", default_branch], capture_output=True)\n\n    # Check out the program branch\n    checkout = subprocess.run(\n        [\"git\", \"checkout\", branch],\n        capture_output=True, text=True\n    )\n    if checkout.returncode != 0:\n        # Try creating a local tracking branch\n        checkout = subprocess.run(\n            [\"git\", \"checkout\", \"-b\", branch, f\"origin/{branch}\"],\n            capture_output=True, text=True\n        )\n    if checkout.returncode != 0:\n        print(f\"  Failed to checkout {branch}: {checkout.stderr}\")\n        failed.append(branch)\n        continue\n\n    # Merge the default branch into the program branch\n    merge = subprocess.run(\n        [\"git\", \"merge\", f\"origin/{default_branch}\", \"--no-edit\",\n         \"-m\", f\"Merge {default_branch} into {branch}\"],\n        capture_output=True, text=True\n    )\n    if merge.returncode != 0:\n        print(f\"  Merge conflict or failure for {branch}: {merge.stderr}\")\n        # Abort the merge to leave a clean state\n        subprocess.run([\"git\", \"merge\", \"--abort\"], capture_output=True)\n        failed.append(branch)\n        continue\n\n    # Push the updated branch\n    push = subprocess.run(\n        [\"git\", \"push\", \"origin\", branch],\n        capture_output=True, text=True\n    )\n    if push.returncode != 0:\n        print(f\"  Failed to push {branch}: {push.stderr}\")\n        failed.append(branch)\n        continue\n\n    print(f\"  Successfully synced {branch}\")\n\n# Return to default branch\nsubprocess.run([\"git\", \"checkout\", default_branch], capture_output=True)\n\nif failed:\n    print(f\"\\n⚠️ Failed to sync {len(failed)} branch(es): {failed}\")\n    print(\"These branches may need manual conflict resolution.\")\n    # Don't fail the workflow — log the issue but continue\nelse:\n    print(f\"\\n✅ All {len(branches)} branch(es) synced successfully.\")\nPYEOF\n"
+        run: "python3 - << 'PYEOF'\nimport os, re, subprocess, sys\n\ntoken = os.environ.get(\"GITHUB_TOKEN\", \"\")\nrepo = os.environ.get(\"GITHUB_REPOSITORY\", \"\")\ndefault_branch = os.environ.get(\"DEFAULT_BRANCH\", \"main\")\n\n# List all remote branches matching the autoloop/* pattern\nresult = subprocess.run(\n    [\"git\", \"branch\", \"-r\", \"--list\", \"origin/autoloop/*\"],\n    capture_output=True, text=True\n)\nif result.returncode != 0:\n    print(f\"Failed to list remote branches: {result.stderr}\")\n    sys.exit(0)\n\nall_branches = [b.strip().replace(\"origin/\", \"\") for b in result.stdout.strip().split(\"\\n\") if b.strip()]\n\n# Filter to canonical branches only: autoloop/{name} without hash suffixes.\n# Stale branches created by the framework (e.g. autoloop/name-a1b2c3d4e5f6g7h8)\n# are skipped — they are not the long-running program branches.\n_hash_suffix = re.compile(r'-[0-9a-f]{16}$')\nbranches = [b for b in all_branches if not _hash_suffix.search(b)]\nskipped_branches = [b for b in all_branches if _hash_suffix.search(b)]\n\nif skipped_branches:\n    print(f\"Skipping {len(skipped_branches)} stale branch(es) with hash suffixes: {skipped_branches}\")\n\nif not branches:\n    print(\"No canonical autoloop/* branches found. Nothing to sync.\")\n    sys.exit(0)\n\nprint(f\"Found {len(branches)} canonical autoloop branch(es) to sync: {branches}\")\n\nfailed = []\nfor branch in branches:\n    print(f\"\\n--- Syncing {branch} with {default_branch} ---\")\n\n    # Fetch both branches\n    subprocess.run([\"git\", \"fetch\", \"origin\", branch], capture_output=True)\n    subprocess.run([\"git\", \"fetch\", \"origin\", default_branch], capture_output=True)\n\n    # Check out the program branch\n    checkout = subprocess.run(\n        [\"git\", \"checkout\", branch],\n        capture_output=True, text=True\n    )\n    if checkout.returncode != 0:\n        # Try creating a local tracking branch\n        checkout = subprocess.run(\n            [\"git\", \"checkout\", \"-b\", branch, f\"origin/{branch}\"],\n            capture_output=True, text=True\n        )\n    if checkout.returncode != 0:\n        print(f\"  Failed to checkout {branch}: {checkout.stderr}\")\n        failed.append(branch)\n        continue\n\n    # Merge the default branch into the program branch\n    merge = subprocess.run(\n        [\"git\", \"merge\", f\"origin/{default_branch}\", \"--no-edit\",\n         \"-m\", f\"Merge {default_branch} into {branch}\"],\n        capture_output=True, text=True\n    )\n    if merge.returncode != 0:\n        print(f\"  Merge conflict or failure for {branch}: {merge.stderr}\")\n        # Abort the merge to leave a clean state\n        subprocess.run([\"git\", \"merge\", \"--abort\"], capture_output=True)\n        failed.append(branch)\n        continue\n\n    # Push the updated branch\n    push = subprocess.run(\n        [\"git\", \"push\", \"origin\", branch],\n        capture_output=True, text=True\n    )\n    if push.returncode != 0:\n        print(f\"  Failed to push {branch}: {push.stderr}\")\n        failed.append(branch)\n        continue\n\n    print(f\"  Successfully synced {branch}\")\n\n# Return to default branch\nsubprocess.run([\"git\", \"checkout\", default_branch], capture_output=True)\n\nif failed:\n    print(f\"\\n⚠️ Failed to sync {len(failed)} branch(es): {failed}\")\n    print(\"These branches may need manual conflict resolution.\")\n    # Don't fail the workflow — log the issue but continue\nelse:\n    print(f\"\\n✅ All {len(branches)} branch(es) synced successfully.\")\nPYEOF\n"
 
       - name: Configure Git credentials
         env:
@@ -354,7 +354,7 @@ jobs:
           export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.11'
           
           mkdir -p /home/runner/.copilot
-          cat << GH_AW_MCP_CONFIG_f2267ff9994f362a_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+          cat << GH_AW_MCP_CONFIG_6e54b48a11cd24bb_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
           {
             "mcpServers": {
               "github": {
@@ -381,7 +381,7 @@ jobs:
               "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
             }
           }
-          GH_AW_MCP_CONFIG_f2267ff9994f362a_EOF
+          GH_AW_MCP_CONFIG_6e54b48a11cd24bb_EOF
       - name: Download activation artifact
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
         with:
diff --git a/.github/workflows/sync-branches.md b/.github/workflows/sync-branches.md
index d6775100..772e2438 100644
--- a/.github/workflows/sync-branches.md
+++ b/.github/workflows/sync-branches.md
@@ -25,7 +25,7 @@ steps:
       DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
     run: |
       python3 - << 'PYEOF'
-      import os, subprocess, sys
+      import os, re, subprocess, sys
 
       token = os.environ.get("GITHUB_TOKEN", "")
       repo = os.environ.get("GITHUB_REPOSITORY", "")
@@ -40,13 +40,23 @@ steps:
           print(f"Failed to list remote branches: {result.stderr}")
           sys.exit(0)
 
-      branches = [b.strip().replace("origin/", "") for b in result.stdout.strip().split("\n") if b.strip()]
+      all_branches = [b.strip().replace("origin/", "") for b in result.stdout.strip().split("\n") if b.strip()]
+
+      # Filter to canonical branches only: autoloop/{name} without hash suffixes.
+      # Stale branches created by the framework (e.g. autoloop/name-a1b2c3d4e5f6g7h8)
+      # are skipped — they are not the long-running program branches.
+      _hash_suffix = re.compile(r'-[0-9a-f]{16}$')
+      branches = [b for b in all_branches if not _hash_suffix.search(b)]
+      skipped_branches = [b for b in all_branches if _hash_suffix.search(b)]
+
+      if skipped_branches:
+          print(f"Skipping {len(skipped_branches)} stale branch(es) with hash suffixes: {skipped_branches}")
 
       if not branches:
-          print("No autoloop/* branches found. Nothing to sync.")
+          print("No canonical autoloop/* branches found. Nothing to sync.")
           sys.exit(0)
 
-      print(f"Found {len(branches)} autoloop branch(es) to sync: {branches}")
+      print(f"Found {len(branches)} canonical autoloop branch(es) to sync: {branches}")
 
       failed = []
       for branch in branches:
diff --git a/benchmarks/pandas/bench_concat.py b/benchmarks/pandas/bench_concat.py
new file mode 100644
index 00000000..3533109e
--- /dev/null
+++ b/benchmarks/pandas/bench_concat.py
@@ -0,0 +1,28 @@
+"""Benchmark: concat — concatenate two 50k-row DataFrames"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 50_000
+WARMUP = 5
+ITERATIONS = 20
+
+vals1 = np.arange(ROWS, dtype=np.float64)
+vals2 = np.arange(ROWS, dtype=np.float64) * 2.0
+df1 = pd.DataFrame({"value": vals1})
+df2 = pd.DataFrame({"value": vals2})
+
+for _ in range(WARMUP):
+    pd.concat([df1, df2], ignore_index=True)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.concat([df1, df2], ignore_index=True)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "concat",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_apply.py b/benchmarks/pandas/bench_dataframe_apply.py
new file mode 100644
index 00000000..6788d422
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_apply.py
@@ -0,0 +1,27 @@
+"""Benchmark: dataframe_apply — apply a function across rows of a 10k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 3
+ITERATIONS = 10
+
+a = np.arange(ROWS, dtype=np.float64)
+b = np.arange(ROWS, dtype=np.float64) * 2.0
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+    df.apply(lambda row: row["a"] + row["b"], axis=1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.apply(lambda row: row["a"] + row["b"], axis=1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dataframe_apply",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_creation.py b/benchmarks/pandas/bench_dataframe_creation.py
new file mode 100644
index 00000000..706c8b13
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_creation.py
@@ -0,0 +1,27 @@
+"""Benchmark: DataFrame creation from arrays (pandas equivalent)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+nums1 = np.arange(ROWS, dtype=np.float64) * 1.1
+nums2 = np.arange(ROWS, dtype=np.float64) * 2.2
+strs = [f"label_{i % 100}" for i in range(ROWS)]
+
+for _ in range(WARMUP):
+    pd.DataFrame({"a": nums1, "b": nums2, "c": strs})
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.DataFrame({"a": nums1, "b": nums2, "c": strs})
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dataframe_creation",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_dropna.py b/benchmarks/pandas/bench_dataframe_dropna.py
new file mode 100644
index 00000000..08a11895
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_dropna.py
@@ -0,0 +1,27 @@
+"""Benchmark: dataframe_dropna — drop rows with NaN values from 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+a = np.where(np.arange(ROWS) % 10 == 0, np.nan, np.arange(ROWS) * 1.1)
+b = np.where(np.arange(ROWS) % 7 == 0, np.nan, np.arange(ROWS) * 2.2)
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+    df.dropna()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.dropna()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dataframe_dropna",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_filter.py b/benchmarks/pandas/bench_dataframe_filter.py
new file mode 100644
index 00000000..112384f8
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_filter.py
@@ -0,0 +1,26 @@
+"""Benchmark: DataFrame filter (boolean mask on 100k-row DataFrame)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+vals = np.arange(ROWS, dtype=np.float64) * 0.1
+df = pd.DataFrame({"value": vals})
+
+for _ in range(WARMUP):
+    df[df["value"] > 5000]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df[df["value"] > 5000]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dataframe_filter",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_rename.py b/benchmarks/pandas/bench_dataframe_rename.py
new file mode 100644
index 00000000..65e44626
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_rename.py
@@ -0,0 +1,27 @@
+"""Benchmark: dataframe_rename — rename columns in a 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+a = np.arange(ROWS, dtype=np.float64) * 1.1
+b = np.arange(ROWS, dtype=np.float64) * 2.2
+df = pd.DataFrame({"old_a": a, "old_b": b})
+
+for _ in range(WARMUP):
+    df.rename(columns={"old_a": "new_a", "old_b": "new_b"})
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.rename(columns={"old_a": "new_a", "old_b": "new_b"})
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dataframe_rename",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_sort.py b/benchmarks/pandas/bench_dataframe_sort.py
new file mode 100644
index 00000000..6ef3c84d
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_sort.py
@@ -0,0 +1,28 @@
+"""Benchmark: dataframe_sort — sort a 100k-row DataFrame by two columns"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+rng = np.random.default_rng(42)
+a = [f"group_{i % 100}" for i in range(ROWS)]
+b = rng.random(ROWS) * 1000
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+    df.sort_values(["a", "b"])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.sort_values(["a", "b"])
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "dataframe_sort",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_describe.py b/benchmarks/pandas/bench_describe.py
new file mode 100644
index 00000000..b9e84dcc
--- /dev/null
+++ b/benchmarks/pandas/bench_describe.py
@@ -0,0 +1,27 @@
+"""Benchmark: describe — summary statistics on a 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+a = np.arange(ROWS, dtype=np.float64) * 1.1
+b = np.sqrt(np.arange(1, ROWS + 1, dtype=np.float64))
+df = pd.DataFrame({"a": a, "b": b})
+
+for _ in range(WARMUP):
+    df.describe()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.describe()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "describe",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_ewm_mean.py b/benchmarks/pandas/bench_ewm_mean.py
new file mode 100644
index 00000000..4e6cbadd
--- /dev/null
+++ b/benchmarks/pandas/bench_ewm_mean.py
@@ -0,0 +1,26 @@
+"""Benchmark: ewm_mean — exponentially weighted mean on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.sin(np.arange(ROWS) * 0.05)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.ewm(span=20).mean()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.ewm(span=20).mean()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "ewm_mean",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_groupby_mean.py b/benchmarks/pandas/bench_groupby_mean.py
new file mode 100644
index 00000000..050959af
--- /dev/null
+++ b/benchmarks/pandas/bench_groupby_mean.py
@@ -0,0 +1,27 @@
+"""Benchmark: GroupBy mean on 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+keys = [f"group_{i % 100}" for i in range(ROWS)]
+vals = np.arange(ROWS, dtype=np.float64) * 0.1
+df = pd.DataFrame({"key": keys, "value": vals})
+
+for _ in range(WARMUP):
+    df.groupby("key")["value"].mean()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.groupby("key")["value"].mean()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "groupby_mean",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_merge.py b/benchmarks/pandas/bench_merge.py
new file mode 100644
index 00000000..9775f4a2
--- /dev/null
+++ b/benchmarks/pandas/bench_merge.py
@@ -0,0 +1,29 @@
+"""Benchmark: merge — inner join two 50k-row DataFrames on a key column"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 50_000
+WARMUP = 3
+ITERATIONS = 10
+
+keys = np.arange(ROWS) % 1000
+vals1 = np.arange(ROWS, dtype=np.float64)
+vals2 = np.arange(ROWS, dtype=np.float64) * 2.0
+df1 = pd.DataFrame({"key": keys, "val1": vals1})
+df2 = pd.DataFrame({"key": keys, "val2": vals2})
+
+for _ in range(WARMUP):
+    pd.merge(df1, df2, on="key", how="inner")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.merge(df1, df2, on="key", how="inner")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "merge",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_pivot_table.py b/benchmarks/pandas/bench_pivot_table.py
new file mode 100644
index 00000000..f65f9321
--- /dev/null
+++ b/benchmarks/pandas/bench_pivot_table.py
@@ -0,0 +1,28 @@
+"""Benchmark: pivot_table — pivot aggregation on 100k-row DataFrame"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+rows = [f"row_{i % 100}" for i in range(ROWS)]
+cols = [f"col_{i % 50}" for i in range(ROWS)]
+vals = np.arange(ROWS, dtype=np.float64) * 0.1
+df = pd.DataFrame({"row": rows, "col": cols, "value": vals})
+
+for _ in range(WARMUP):
+    df.pivot_table(values="value", index="row", columns="col", aggfunc="mean")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    df.pivot_table(values="value", index="row", columns="col", aggfunc="mean")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "pivot_table",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_read_csv.py b/benchmarks/pandas/bench_read_csv.py
new file mode 100644
index 00000000..d6aa816a
--- /dev/null
+++ b/benchmarks/pandas/bench_read_csv.py
@@ -0,0 +1,30 @@
+"""Benchmark: read_csv — parse a 100k-row CSV file"""
+import json, time, os, tempfile
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 2
+ITERATIONS = 5
+
+# Build CSV file
+tmp_path = "/tmp/gh-aw/agent/bench_read_csv.csv"
+with open(tmp_path, "w") as f:
+    f.write("id,value,label\n")
+    for i in range(ROWS):
+        f.write(f"{i},{i * 1.1:.4f},cat_{i % 50}\n")
+
+for _ in range(WARMUP):
+    pd.read_csv(tmp_path)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.read_csv(tmp_path)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "read_csv",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_rolling_mean.py b/benchmarks/pandas/bench_rolling_mean.py
new file mode 100644
index 00000000..5258fca4
--- /dev/null
+++ b/benchmarks/pandas/bench_rolling_mean.py
@@ -0,0 +1,26 @@
+"""Benchmark: rolling mean with window=100 on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.sin(np.arange(ROWS) * 0.01)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.rolling(100).mean()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.rolling(100).mean()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "rolling_mean",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_arithmetic.py b/benchmarks/pandas/bench_series_arithmetic.py
new file mode 100644
index 00000000..4f0325b0
--- /dev/null
+++ b/benchmarks/pandas/bench_series_arithmetic.py
@@ -0,0 +1,26 @@
+"""Benchmark: Series arithmetic (add + multiply on 100k-element Series)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.arange(ROWS, dtype=np.float64) * 0.5
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    (s + 2.0) * 0.5
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    (s + 2.0) * 0.5
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_arithmetic",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_creation.py b/benchmarks/pandas/bench_series_creation.py
new file mode 100644
index 00000000..c27fcf87
--- /dev/null
+++ b/benchmarks/pandas/bench_series_creation.py
@@ -0,0 +1,47 @@
+"""
+Benchmark: Series creation
+
+Creates a Series from a large numeric array and measures the time.
+Outputs JSON: {"function": "series_creation", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+
+def generate_data(n: int) -> "list[float]":
+    """Generate a deterministic numeric array of the given size."""
+    return [i * 1.1 + 0.5 for i in range(n)]
+
+
+data = generate_data(SIZE)
+
+# Warm-up
+for _ in range(WARMUP):
+    pd.Series(list(data))
+
+# Measured runs
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    pd.Series(list(data))
+    end = time.perf_counter()
+    times.append((end - start) * 1000)  # convert to ms
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+result = {
+    "function": "series_creation",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}
+
+print(json.dumps(result))
diff --git a/benchmarks/pandas/bench_series_cumsum.py b/benchmarks/pandas/bench_series_cumsum.py
new file mode 100644
index 00000000..556e3ebd
--- /dev/null
+++ b/benchmarks/pandas/bench_series_cumsum.py
@@ -0,0 +1,26 @@
+"""Benchmark: series_cumsum — cumulative sum on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.arange(ROWS, dtype=np.float64) * 0.001
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.cumsum()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.cumsum()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_cumsum",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_fillna.py b/benchmarks/pandas/bench_series_fillna.py
new file mode 100644
index 00000000..6b62f6ad
--- /dev/null
+++ b/benchmarks/pandas/bench_series_fillna.py
@@ -0,0 +1,26 @@
+"""Benchmark: series_fillna — fill NaN values in a 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.where(np.arange(ROWS) % 5 == 0, np.nan, np.arange(ROWS) * 1.1)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.fillna(0.0)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.fillna(0.0)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_fillna",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_shift.py b/benchmarks/pandas/bench_series_shift.py
new file mode 100644
index 00000000..0b294485
--- /dev/null
+++ b/benchmarks/pandas/bench_series_shift.py
@@ -0,0 +1,26 @@
+"""Benchmark: series_shift — shift values by 1 position in a 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 5
+ITERATIONS = 20
+
+data = np.arange(ROWS, dtype=np.float64)
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.shift(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.shift(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_shift",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_sort.py b/benchmarks/pandas/bench_series_sort.py
new file mode 100644
index 00000000..c31de4aa
--- /dev/null
+++ b/benchmarks/pandas/bench_series_sort.py
@@ -0,0 +1,27 @@
+"""Benchmark: Series sort (sort_values on 100k-element numeric Series)"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+rng = np.random.default_rng(42)
+data = rng.random(ROWS) * 1000
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.sort_values()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.sort_values()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_sort",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_string_ops.py b/benchmarks/pandas/bench_series_string_ops.py
new file mode 100644
index 00000000..8744ddcc
--- /dev/null
+++ b/benchmarks/pandas/bench_series_string_ops.py
@@ -0,0 +1,27 @@
+"""Benchmark: series_string_ops — str.upper and str.contains on 100k strings"""
+import json, time
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = [f"hello_world_{i % 200}" for i in range(ROWS)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.str.upper()
+    s.str.contains("world")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.str.upper()
+    s.str.contains("world")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_string_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_value_counts.py b/benchmarks/pandas/bench_series_value_counts.py
new file mode 100644
index 00000000..c156a1eb
--- /dev/null
+++ b/benchmarks/pandas/bench_series_value_counts.py
@@ -0,0 +1,25 @@
+"""Benchmark: value_counts on a 100k-element Series with 100 distinct values"""
+import json, time
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = [f"cat_{i % 100}" for i in range(ROWS)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.value_counts()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.value_counts()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "series_value_counts",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/results.json b/benchmarks/results.json
new file mode 100644
index 00000000..c883f334
--- /dev/null
+++ b/benchmarks/results.json
@@ -0,0 +1,247 @@
+{
+  "benchmarks": [
+    {
+      "function": "concat",
+      "tsb": null,
+      "pandas": {
+        "function": "concat",
+        "mean_ms": 0.11375509999993483,
+        "iterations": 20,
+        "total_ms": 2.2751019999986966
+      },
+      "ratio": null
+    },
+    {
+      "function": "dataframe_apply",
+      "tsb": null,
+      "pandas": {
+        "function": "dataframe_apply",
+        "mean_ms": 47.161531699998704,
+        "iterations": 10,
+        "total_ms": 471.61531699998704
+      },
+      "ratio": null
+    },
+    {
+      "function": "dataframe_creation",
+      "tsb": null,
+      "pandas": {
+        "function": "dataframe_creation",
+        "mean_ms": 5.148059900000135,
+        "iterations": 10,
+        "total_ms": 51.48059900000135
+      },
+      "ratio": null
+    },
+    {
+      "function": "dataframe_dropna",
+      "tsb": null,
+      "pandas": {
+        "function": "dataframe_dropna",
+        "mean_ms": 2.42739894999886,
+        "iterations": 20,
+        "total_ms": 48.547978999977204
+      },
+      "ratio": null
+    },
+    {
+      "function": "dataframe_filter",
+      "tsb": null,
+      "pandas": {
+        "function": "dataframe_filter",
+        "mean_ms": 0.4964389500003108,
+        "iterations": 20,
+        "total_ms": 9.928779000006216
+      },
+      "ratio": null
+    },
+    {
+      "function": "dataframe_rename",
+      "tsb": null,
+      "pandas": {
+        "function": "dataframe_rename",
+        "mean_ms": 0.17103454999869427,
+        "iterations": 20,
+        "total_ms": 3.4206909999738855
+      },
+      "ratio": null
+    },
+    {
+      "function": "dataframe_sort",
+      "tsb": null,
+      "pandas": {
+        "function": "dataframe_sort",
+        "mean_ms": 33.301584399998774,
+        "iterations": 10,
+        "total_ms": 333.01584399998774
+      },
+      "ratio": null
+    },
+    {
+      "function": "describe",
+      "tsb": null,
+      "pandas": {
+        "function": "describe",
+        "mean_ms": 5.521558600003118,
+        "iterations": 10,
+        "total_ms": 55.21558600003118
+      },
+      "ratio": null
+    },
+    {
+      "function": "ewm_mean",
+      "tsb": null,
+      "pandas": {
+        "function": "ewm_mean",
+        "mean_ms": 1.7652839999982461,
+        "iterations": 10,
+        "total_ms": 17.65283999998246
+      },
+      "ratio": null
+    },
+    {
+      "function": "groupby_mean",
+      "tsb": null,
+      "pandas": {
+        "function": "groupby_mean",
+        "mean_ms": 8.079756900002621,
+        "iterations": 10,
+        "total_ms": 80.79756900002621
+      },
+      "ratio": null
+    },
+    {
+      "function": "merge",
+      "tsb": null,
+      "pandas": {
+        "function": "merge",
+        "mean_ms": 60.42320619999941,
+        "iterations": 10,
+        "total_ms": 604.2320619999941
+      },
+      "ratio": null
+    },
+    {
+      "function": "pivot_table",
+      "tsb": null,
+      "pandas": {
+        "function": "pivot_table",
+        "mean_ms": 22.500251999997545,
+        "iterations": 10,
+        "total_ms": 225.00251999997545
+      },
+      "ratio": null
+    },
+    {
+      "function": "read_csv",
+      "tsb": null,
+      "pandas": {
+        "function": "read_csv",
+        "mean_ms": 29.951929399999244,
+        "iterations": 5,
+        "total_ms": 149.75964699999622
+      },
+      "ratio": null
+    },
+    {
+      "function": "rolling_mean",
+      "tsb": null,
+      "pandas": {
+        "function": "rolling_mean",
+        "mean_ms": 1.71982609999759,
+        "iterations": 10,
+        "total_ms": 17.1982609999759
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_arithmetic",
+      "tsb": null,
+      "pandas": {
+        "function": "series_arithmetic",
+        "mean_ms": 0.764571400000591,
+        "iterations": 20,
+        "total_ms": 15.29142800001182
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_creation",
+      "tsb": null,
+      "pandas": {
+        "function": "series_creation",
+        "mean_ms": 7.607,
+        "iterations": 50,
+        "total_ms": 380.349
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_cumsum",
+      "tsb": null,
+      "pandas": {
+        "function": "series_cumsum",
+        "mean_ms": 1.1250383499998406,
+        "iterations": 20,
+        "total_ms": 22.500766999996813
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_fillna",
+      "tsb": null,
+      "pandas": {
+        "function": "series_fillna",
+        "mean_ms": 0.18527670000025864,
+        "iterations": 20,
+        "total_ms": 3.705534000005173
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_shift",
+      "tsb": null,
+      "pandas": {
+        "function": "series_shift",
+        "mean_ms": 0.07249699999931636,
+        "iterations": 20,
+        "total_ms": 1.4499399999863272
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_sort",
+      "tsb": null,
+      "pandas": {
+        "function": "series_sort",
+        "mean_ms": 5.127767300001551,
+        "iterations": 10,
+        "total_ms": 51.27767300001551
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_string_ops",
+      "tsb": null,
+      "pandas": {
+        "function": "series_string_ops",
+        "mean_ms": 34.08206670000027,
+        "iterations": 10,
+        "total_ms": 340.8206670000027
+      },
+      "ratio": null
+    },
+    {
+      "function": "series_value_counts",
+      "tsb": null,
+      "pandas": {
+        "function": "series_value_counts",
+        "mean_ms": 9.212644899997713,
+        "iterations": 10,
+        "total_ms": 92.12644899997713
+      },
+      "ratio": null
+    }
+  ],
+  "timestamp": "2026-04-12T15:46:00Z"
+}
\ No newline at end of file
diff --git a/benchmarks/run_benchmarks.sh b/benchmarks/run_benchmarks.sh
new file mode 100644
index 00000000..0f800de0
--- /dev/null
+++ b/benchmarks/run_benchmarks.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# Run all tsb (TypeScript) and pandas (Python) benchmarks and collect results.
+#
+# Usage: ./benchmarks/run_benchmarks.sh
+#
+# Outputs: benchmarks/results.json with all benchmark results
+#
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# Ensure Python and pandas are available
+if ! command -v python3 &>/dev/null; then
+  echo "ERROR: python3 is required but not found" >&2
+  exit 1
+fi
+
+python3 -c "import pandas" 2>/dev/null || {
+  echo "Installing pandas..."
+  pip3 install pandas --quiet
+}
+
+# Ensure Bun is available
+if ! command -v bun &>/dev/null; then
+  echo "ERROR: bun is required but not found" >&2
+  exit 1
+fi
+
+# Collect results
+results='{"benchmarks": [], "timestamp": "'$(date -u +"%Y-%m-%dT%H:%M:%SZ")'"}'
+
+echo "=== Running Performance Benchmarks ==="
+echo ""
+
+# Find all TypeScript benchmark files
+for ts_bench in "$SCRIPT_DIR"/tsb/bench_*.ts; do
+  [ -f "$ts_bench" ] || continue
+  bench_name=$(basename "$ts_bench" .ts | sed 's/^bench_//')
+
+  # Check for matching Python benchmark
+  py_bench="$SCRIPT_DIR/pandas/bench_${bench_name}.py"
+  if [ ! -f "$py_bench" ]; then
+    echo "SKIP: $bench_name (no matching Python benchmark)"
+    continue
+  fi
+
+  echo "--- Benchmarking: $bench_name ---"
+
+  # Run TypeScript benchmark
+  echo "  Running tsb (TypeScript)..."
+  ts_result=$(cd "$REPO_ROOT" && bun run "$ts_bench" 2>/dev/null) || {
+    echo "  ERROR: TypeScript benchmark failed"
+    continue
+  }
+  echo "  tsb result: $ts_result"
+
+  # Run Python benchmark
+  echo "  Running pandas (Python)..."
+  py_result=$(cd "$REPO_ROOT" && python3 "$py_bench" 2>/dev/null) || {
+    echo "  ERROR: Python benchmark failed"
+    continue
+  }
+  echo "  pandas result: $py_result"
+
+  # Extract mean_ms from both
+  ts_mean=$(echo "$ts_result" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d['mean_ms'])" 2>/dev/null) || {
+    echo "  ERROR: could not parse tsb benchmark result"
+    continue
+  }
+  py_mean=$(echo "$py_result" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d['mean_ms'])" 2>/dev/null) || {
+    echo "  ERROR: could not parse pandas benchmark result"
+    continue
+  }
+
+  # Calculate ratio (tsb / pandas) — < 1.0 means tsb is faster
+  ratio=$(python3 -c "
+ts, py = $ts_mean, $py_mean
+if py <= 0:
+    print('null')
+else:
+    print(round(ts / py, 3))
+")
+  if [ "$ratio" = "null" ]; then
+    echo "  ERROR: pandas mean_ms is zero, cannot compute ratio"
+    continue
+  fi
+
+  echo "  Ratio (tsb/pandas): ${ratio}x"
+  echo ""
+
+  # Add to results JSON
+  results=$(echo "$results" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+data['benchmarks'].append({
+    'function': '$bench_name',
+    'tsb': $ts_result,
+    'pandas': $py_result,
+    'ratio': $ratio
+})
+print(json.dumps(data, indent=2))
+")
+done
+
+# Write results
+echo "$results" > "$SCRIPT_DIR/results.json"
+echo "=== Results written to benchmarks/results.json ==="
+echo ""
+
+# Summary
+echo "=== Summary ==="
+echo "$results" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+benchmarks = data.get('benchmarks', [])
+if not benchmarks:
+    print('No benchmarks found.')
+else:
+    print(f'Functions benchmarked: {len(benchmarks)}')
+    for b in benchmarks:
+        fn = b['function']
+        ts = b['tsb']['mean_ms']
+        py = b['pandas']['mean_ms']
+        ratio = b['ratio']
+        faster = 'tsb' if ratio < 1 else 'pandas'
+        print(f'  {fn}: tsb={ts}ms, pandas={py}ms, ratio={ratio}x ({faster} is faster)')
+"
diff --git a/benchmarks/tsb/bench_concat.ts b/benchmarks/tsb/bench_concat.ts
new file mode 100644
index 00000000..7a72f777
--- /dev/null
+++ b/benchmarks/tsb/bench_concat.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: concat — concatenate two 50k-row DataFrames
+ */
+import { DataFrame, concat } from "../../src/index.js";
+
+const ROWS = 50_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const vals1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const vals2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0);
+const df1 = new DataFrame({ value: vals1 });
+const df2 = new DataFrame({ value: vals2 });
+
+for (let i = 0; i < WARMUP; i++) {
+  concat([df1, df2]);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  concat([df1, df2]);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "concat",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_apply.ts b/benchmarks/tsb/bench_dataframe_apply.ts
new file mode 100644
index 00000000..32a99a68
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_apply.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: dataframe_apply — apply a function across rows of a 10k-row DataFrame
+ * (reduced size due to JS per-row overhead)
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0);
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 });
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dataframe_apply",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_creation.ts b/benchmarks/tsb/bench_dataframe_creation.ts
new file mode 100644
index 00000000..2eb8fd56
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_creation.ts
@@ -0,0 +1,33 @@
+/**
+ * Benchmark: DataFrame creation from arrays
+ * Creates a 3-column (2 numeric + 1 string) 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const nums1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1);
+const nums2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.2);
+const strs = Array.from({ length: ROWS }, (_, i) => `label_${i % 100}`);
+
+// Warm up
+for (let i = 0; i < WARMUP; i++) {
+  new DataFrame({ a: nums1, b: nums2, c: strs });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  new DataFrame({ a: nums1, b: nums2, c: strs });
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dataframe_creation",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_dropna.ts b/benchmarks/tsb/bench_dataframe_dropna.ts
new file mode 100644
index 00000000..e4fef46b
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_dropna.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: dataframe_dropna — drop rows with NaN values from 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => (i % 10 === 0 ? NaN : i * 1.1));
+const b = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? NaN : i * 2.2));
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.dropna();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.dropna();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dataframe_dropna",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_filter.ts b/benchmarks/tsb/bench_dataframe_filter.ts
new file mode 100644
index 00000000..57d78bd7
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_filter.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: DataFrame filter (boolean mask on 100k-row DataFrame)
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1);
+const df = new DataFrame({ value: vals });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.filter((row) => (row["value"] as number) > 5000);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.filter((row) => (row["value"] as number) > 5000);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dataframe_filter",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_rename.ts b/benchmarks/tsb/bench_dataframe_rename.ts
new file mode 100644
index 00000000..807b63c9
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_rename.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: dataframe_rename — rename columns in a 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1);
+const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.2);
+const df = new DataFrame({ old_a: a, old_b: b });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.rename({ old_a: "new_a", old_b: "new_b" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.rename({ old_a: "new_a", old_b: "new_b" });
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dataframe_rename",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_sort.ts b/benchmarks/tsb/bench_dataframe_sort.ts
new file mode 100644
index 00000000..707e4ecf
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_sort.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: dataframe_sort — sort a 100k-row DataFrame by two columns
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Array.from({ length: ROWS }, (_, i) => `group_${i % 100}`);
+const b = Float64Array.from({ length: ROWS }, () => Math.random() * 1000);
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.sort_values(["a", "b"]);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.sort_values(["a", "b"]);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "dataframe_sort",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_describe.ts b/benchmarks/tsb/bench_describe.ts
new file mode 100644
index 00000000..368156a3
--- /dev/null
+++ b/benchmarks/tsb/bench_describe.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: describe — summary statistics on a 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1);
+const b = Float64Array.from({ length: ROWS }, (_, i) => Math.sqrt(i + 1));
+const df = new DataFrame({ a, b });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.describe();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.describe();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "describe",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_ewm_mean.ts b/benchmarks/tsb/bench_ewm_mean.ts
new file mode 100644
index 00000000..8e6597f7
--- /dev/null
+++ b/benchmarks/tsb/bench_ewm_mean.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: ewm_mean — exponentially weighted mean on 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05));
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.ewm({ span: 20 }).mean();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.ewm({ span: 20 }).mean();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "ewm_mean",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_groupby_mean.ts b/benchmarks/tsb/bench_groupby_mean.ts
new file mode 100644
index 00000000..efecfddb
--- /dev/null
+++ b/benchmarks/tsb/bench_groupby_mean.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: GroupBy mean on 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const keys = Array.from({ length: ROWS }, (_, i) => `group_${i % 100}`);
+const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1);
+const df = new DataFrame({ key: keys, value: vals });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.groupby("key").mean();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.groupby("key").mean();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "groupby_mean",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_merge.ts b/benchmarks/tsb/bench_merge.ts
new file mode 100644
index 00000000..da68b52b
--- /dev/null
+++ b/benchmarks/tsb/bench_merge.ts
@@ -0,0 +1,33 @@
+/**
+ * Benchmark: merge — inner join two 50k-row DataFrames on a key column
+ */
+import { DataFrame, merge } from "../../src/index.js";
+
+const ROWS = 50_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const keys = Array.from({ length: ROWS }, (_, i) => i % 1000);
+const vals1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const vals2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0);
+const df1 = new DataFrame({ key: keys, val1: vals1 });
+const df2 = new DataFrame({ key: keys, val2: vals2 });
+
+for (let i = 0; i < WARMUP; i++) {
+  merge(df1, df2, { on: "key", how: "inner" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  merge(df1, df2, { on: "key", how: "inner" });
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "merge",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_pivot_table.ts b/benchmarks/tsb/bench_pivot_table.ts
new file mode 100644
index 00000000..78b94702
--- /dev/null
+++ b/benchmarks/tsb/bench_pivot_table.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: pivot_table — pivot aggregation on 100k-row DataFrame
+ */
+import { DataFrame } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const rows = Array.from({ length: ROWS }, (_, i) => `row_${i % 100}`);
+const cols = Array.from({ length: ROWS }, (_, i) => `col_${i % 50}`);
+const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1);
+const df = new DataFrame({ row: rows, col: cols, value: vals });
+
+for (let i = 0; i < WARMUP; i++) {
+  df.pivot_table({ values: "value", index: "row", columns: "col", aggfunc: "mean" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  df.pivot_table({ values: "value", index: "row", columns: "col", aggfunc: "mean" });
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "pivot_table",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_read_csv.ts b/benchmarks/tsb/bench_read_csv.ts
new file mode 100644
index 00000000..0d9462bf
--- /dev/null
+++ b/benchmarks/tsb/bench_read_csv.ts
@@ -0,0 +1,39 @@
+/**
+ * Benchmark: read_csv — parse a 100k-row CSV string
+ */
+import { read_csv } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 2;
+const ITERATIONS = 5;
+
+// Build CSV string
+const lines = ["id,value,label"];
+for (let i = 0; i < ROWS; i++) {
+  lines.push(`${i},${(i * 1.1).toFixed(4)},cat_${i % 50}`);
+}
+const csvContent = lines.join("\n");
+
+// Write to a temp file
+import { writeFileSync } from "node:fs";
+const tmpPath = "/tmp/gh-aw/agent/bench_read_csv.csv";
+writeFileSync(tmpPath, csvContent, "utf8");
+
+for (let i = 0; i < WARMUP; i++) {
+  read_csv(tmpPath);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  read_csv(tmpPath);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "read_csv",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_rolling_mean.ts b/benchmarks/tsb/bench_rolling_mean.ts
new file mode 100644
index 00000000..646d3100
--- /dev/null
+++ b/benchmarks/tsb/bench_rolling_mean.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: rolling mean with window=100 on 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01));
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.rolling(100).mean();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.rolling(100).mean();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "rolling_mean",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_arithmetic.ts b/benchmarks/tsb/bench_series_arithmetic.ts
new file mode 100644
index 00000000..552be2ca
--- /dev/null
+++ b/benchmarks/tsb/bench_series_arithmetic.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: Series arithmetic (add + multiply on 100k-element Series)
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => i * 0.5);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.add(2.0).mul(0.5);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.add(2.0).mul(0.5);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_arithmetic",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_creation.ts b/benchmarks/tsb/bench_series_creation.ts
new file mode 100644
index 00000000..c7b4e145
--- /dev/null
+++ b/benchmarks/tsb/bench_series_creation.ts
@@ -0,0 +1,49 @@
+/**
+ * Benchmark: Series creation
+ *
+ * Creates a Series from a large numeric array and measures the time.
+ * Outputs JSON: {"function": "series_creation", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+
+import { Series } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+/** Generate a deterministic numeric array of the given size. */
+function generateData(n: number): readonly number[] {
+  const arr: number[] = [];
+  for (let i = 0; i < n; i++) {
+    arr.push(i * 1.1 + 0.5);
+  }
+  return arr;
+}
+
+const data = generateData(SIZE);
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+  new Series({ data: [...data] });
+}
+
+// Measured runs
+const times: number[] = [];
+for (let i = 0; i < ITERATIONS; i++) {
+  const start = performance.now();
+  new Series({ data: [...data] });
+  const end = performance.now();
+  times.push(end - start);
+}
+
+const totalMs = times.reduce((a, b) => a + b, 0);
+const meanMs = totalMs / ITERATIONS;
+
+const result = {
+  function: "series_creation",
+  mean_ms: Math.round(meanMs * 1000) / 1000,
+  iterations: ITERATIONS,
+  total_ms: Math.round(totalMs * 1000) / 1000,
+};
+
+console.log(JSON.stringify(result));
diff --git a/benchmarks/tsb/bench_series_cumsum.ts b/benchmarks/tsb/bench_series_cumsum.ts
new file mode 100644
index 00000000..3eeba5b0
--- /dev/null
+++ b/benchmarks/tsb/bench_series_cumsum.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: series_cumsum — cumulative sum on 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => i * 0.001);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.cumsum();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.cumsum();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_cumsum",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_fillna.ts b/benchmarks/tsb/bench_series_fillna.ts
new file mode 100644
index 00000000..3e658b01
--- /dev/null
+++ b/benchmarks/tsb/bench_series_fillna.ts
@@ -0,0 +1,31 @@
+/**
+ * Benchmark: series_fillna — fill NaN/null values in a 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+// Create series with every 5th value as NaN
+const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 5 === 0 ? NaN : i * 1.1));
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.fillna(0.0);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.fillna(0.0);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_fillna",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_shift.ts b/benchmarks/tsb/bench_series_shift.ts
new file mode 100644
index 00000000..46e79d19
--- /dev/null
+++ b/benchmarks/tsb/bench_series_shift.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: series_shift — shift values by 1 position in a 100k-element Series
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 20;
+
+const data = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.shift(1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.shift(1);
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_shift",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_sort.ts b/benchmarks/tsb/bench_series_sort.ts
new file mode 100644
index 00000000..c6aedb93
--- /dev/null
+++ b/benchmarks/tsb/bench_series_sort.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: Series sort (argsort on 100k-element numeric Series)
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Float64Array.from({ length: ROWS }, () => Math.random() * 1000);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.sort_values();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.sort_values();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_sort",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_string_ops.ts b/benchmarks/tsb/bench_series_string_ops.ts
new file mode 100644
index 00000000..c44cdefe
--- /dev/null
+++ b/benchmarks/tsb/bench_series_string_ops.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: series_string_ops — str.upper and str.contains on 100k strings
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => `hello_world_${i % 200}`);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.str.upper();
+  s.str.contains("world");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.str.upper();
+  s.str.contains("world");
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_string_ops",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/benchmarks/tsb/bench_series_value_counts.ts b/benchmarks/tsb/bench_series_value_counts.ts
new file mode 100644
index 00000000..b5352f54
--- /dev/null
+++ b/benchmarks/tsb/bench_series_value_counts.ts
@@ -0,0 +1,30 @@
+/**
+ * Benchmark: value_counts on a 100k-element Series with 100 distinct values
+ */
+import { Series } from "../../src/index.js";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => `cat_${i % 100}`);
+const s = new Series(data);
+
+for (let i = 0; i < WARMUP; i++) {
+  s.value_counts();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+  s.value_counts();
+}
+const total = performance.now() - start;
+
+console.log(
+  JSON.stringify({
+    function: "series_value_counts",
+    mean_ms: total / ITERATIONS,
+    iterations: ITERATIONS,
+    total_ms: total,
+  }),
+);
diff --git a/docs/playground.md b/docs/playground.md
index b2e64183..7f08e62b 100644
--- a/docs/playground.md
+++ b/docs/playground.md
@@ -120,9 +120,6 @@ The CI pipeline (`pages.yml`) runs this automatically during deployment.
 
 ## Non-Goals (Current Scope)
 
-- **Syntax highlighting** in the editor: the current implementation uses a
-  plain `<textarea>`. A future enhancement could integrate CodeMirror or
-  Monaco for richer editing.
 - **Infinite loop protection**: long-running or infinite loops will hang the
   browser tab. A Web Worker–based sandbox could be added later.
 - **Type checking**: the playground transpiles but does not type-check.
diff --git a/playground/api_types.html b/playground/api_types.html
new file mode 100644
index 00000000..05b8347c
--- /dev/null
+++ b/playground/api_types.html
@@ -0,0 +1,222 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — api_types: Runtime type-checking predicates</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; }
+    h1 { border-bottom: 2px solid #e2e8f0; padding-bottom: .5rem; }
+    h2 { margin-top: 2rem; color: #2d3748; }
+    h3 { color: #4a5568; margin-top: 1.5rem; }
+    pre { background: #f7fafc; border: 1px solid #e2e8f0; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: .9rem; }
+    code { font-family: 'Fira Code', monospace; }
+    .result { background: #ebf8ff; border-left: 4px solid #4299e1; padding: .5rem 1rem; margin: .5rem 0; border-radius: 0 6px 6px 0; font-family: monospace; }
+    .ok { color: #276749; }
+    .fail { color: #c53030; }
+    .section { background: #fff5f5; border: 1px solid #fed7d7; border-radius: 6px; padding: 1rem; margin: 1rem 0; }
+    table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
+    th, td { border: 1px solid #e2e8f0; padding: .4rem .8rem; text-align: left; }
+    th { background: #f7fafc; font-weight: 600; }
+    a { color: #3182ce; }
+  </style>
+</head>
+<body>
+  <h1>📦 <code>api_types</code> — Runtime type-checking predicates</h1>
+  <p>
+    Port of <a href="https://pandas.pydata.org/docs/reference/api/pandas.api.types.html" target="_blank"><code>pandas.api.types</code></a>.
+    Two groups of predicates:
+    <strong>value-level</strong> (work on arbitrary JS values) and
+    <strong>dtype-level</strong> (work on <code>Dtype</code> instances or dtype name strings).
+  </p>
+
+  <h2>Value-Level Predicates</h2>
+
+  <h3><code>isScalar(val)</code></h3>
+  <p>Returns <code>true</code> for primitives and <code>Date</code>. Mirrors <code>pd.api.types.is_scalar</code>.</p>
+  <pre><code>import { isScalar } from "tsb";
+
+isScalar(42);            // true
+isScalar("hello");       // true
+isScalar(null);          // true
+isScalar(new Date());    // true
+isScalar([1, 2]);        // false
+isScalar({ a: 1 });      // false</code></pre>
+  <div id="scalar-demo" class="result"></div>
+
+  <h3><code>isListLike(val)</code></h3>
+  <p>Returns <code>true</code> for iterables (excluding strings) and objects with a numeric <code>length</code>.</p>
+  <pre><code>isListLike([1, 2, 3]);      // true
+isListLike(new Set([1]));   // true
+isListLike("abc");          // false
+isListLike(42);             // false</code></pre>
+  <div id="listlike-demo" class="result"></div>
+
+  <h3><code>isArrayLike(val)</code></h3>
+  <p>Returns <code>true</code> for values with a non-negative integer <code>length</code> (including strings).</p>
+  <pre><code>isArrayLike([1, 2]);      // true
+isArrayLike("hello");     // true
+isArrayLike(42);          // false</code></pre>
+
+  <h3><code>isDictLike(val)</code></h3>
+  <p>Returns <code>true</code> for plain objects and <code>Map</code>.</p>
+  <pre><code>isDictLike({ a: 1 });   // true
+isDictLike(new Map());  // true
+isDictLike([]);         // false</code></pre>
+
+  <h3><code>isNumber / isBool / isStringValue / isFloat / isInteger</code></h3>
+  <pre><code>isNumber(3.14);      // true
+isNumber(NaN);       // true  (typeof NaN === "number")
+isBool(true);        // true
+isStringValue("hi"); // true
+isFloat(3.14);       // true
+isFloat(3.0);        // false  (integer value)
+isInteger(42);       // true
+isInteger(3.14);     // false</code></pre>
+  <div id="value-demo" class="result"></div>
+
+  <h3><code>isMissing(val)</code></h3>
+  <p>Returns <code>true</code> for <code>null</code>, <code>undefined</code>, or <code>NaN</code>.</p>
+  <pre><code>isMissing(null);       // true
+isMissing(undefined);  // true
+isMissing(NaN);        // true
+isMissing(0);          // false</code></pre>
+
+  <h3><code>isHashable(val)</code></h3>
+  <p>Returns <code>true</code> for values safe to use as object keys (primitives).</p>
+  <pre><code>isHashable("key");  // true
+isHashable(42);     // true
+isHashable({});     // false</code></pre>
+
+  <h2>Dtype-Level Predicates</h2>
+  <p>All accept a <code>Dtype</code> instance or a dtype name string.</p>
+
+  <pre><code>import { Dtype, isNumericDtype, isFloatDtype, isIntegerDtype,
+         isStringDtype, isDatetimeDtype, isCategoricalDtype } from "tsb";
+
+isNumericDtype(Dtype.float64);    // true
+isNumericDtype("int32");          // true
+isNumericDtype("string");         // false
+
+isFloatDtype("float32");          // true
+isIntegerDtype("int64");          // true
+isUnsignedIntegerDtype("uint8");  // true
+isSignedIntegerDtype("int8");     // true
+isStringDtype("string");          // true
+isDatetimeDtype("datetime");      // true
+isCategoricalDtype("category");   // true
+isObjectDtype("object");          // true
+isExtensionArrayDtype("category"); // true
+isExtensionArrayDtype("int32");    // false</code></pre>
+  <div id="dtype-demo" class="result"></div>
+
+  <h2>Complete Predicate Reference</h2>
+  <table>
+    <tr><th>Function</th><th>Pandas equivalent</th><th>Description</th></tr>
+    <tr><td><code>isScalar(val)</code></td><td><code>is_scalar</code></td><td>Primitive or Date</td></tr>
+    <tr><td><code>isListLike(val)</code></td><td><code>is_list_like</code></td><td>Iterable (not string) or has length</td></tr>
+    <tr><td><code>isArrayLike(val)</code></td><td><code>is_array_like</code></td><td>Has non-negative integer length</td></tr>
+    <tr><td><code>isDictLike(val)</code></td><td><code>is_dict_like</code></td><td>Plain object or Map</td></tr>
+    <tr><td><code>isIterator(val)</code></td><td><code>is_iterator</code></td><td>Has callable <code>next</code> method</td></tr>
+    <tr><td><code>isNumber(val)</code></td><td><code>is_number</code></td><td><code>typeof === "number"</code></td></tr>
+    <tr><td><code>isBool(val)</code></td><td><code>is_bool</code></td><td><code>typeof === "boolean"</code></td></tr>
+    <tr><td><code>isStringValue(val)</code></td><td><code>is_string</code></td><td><code>typeof === "string"</code></td></tr>
+    <tr><td><code>isFloat(val)</code></td><td><code>is_float</code></td><td>Finite number with fractional part</td></tr>
+    <tr><td><code>isInteger(val)</code></td><td><code>is_integer</code></td><td>Integer-valued number</td></tr>
+    <tr><td><code>isBigInt(val)</code></td><td>—</td><td><code>typeof === "bigint"</code></td></tr>
+    <tr><td><code>isRegExp(val)</code></td><td><code>is_re</code></td><td>RegExp instance</td></tr>
+    <tr><td><code>isReCompilable(val)</code></td><td><code>is_re_compilable</code></td><td>String or RegExp</td></tr>
+    <tr><td><code>isMissing(val)</code></td><td><code>isna</code></td><td>null / undefined / NaN</td></tr>
+    <tr><td><code>isHashable(val)</code></td><td><code>is_hashable</code></td><td>Safe as object key (primitive)</td></tr>
+    <tr><td><code>isDate(val)</code></td><td>—</td><td>Date instance</td></tr>
+    <tr><td><code>isNumericDtype(d)</code></td><td><code>is_numeric_dtype</code></td><td>Int, uint, or float</td></tr>
+    <tr><td><code>isIntegerDtype(d)</code></td><td><code>is_integer_dtype</code></td><td>Any integer (signed or unsigned)</td></tr>
+    <tr><td><code>isSignedIntegerDtype(d)</code></td><td><code>is_signed_integer_dtype</code></td><td>int8–int64</td></tr>
+    <tr><td><code>isUnsignedIntegerDtype(d)</code></td><td><code>is_unsigned_integer_dtype</code></td><td>uint8–uint64</td></tr>
+    <tr><td><code>isFloatDtype(d)</code></td><td><code>is_float_dtype</code></td><td>float32 or float64</td></tr>
+    <tr><td><code>isBoolDtype(d)</code></td><td><code>is_bool_dtype</code></td><td>bool</td></tr>
+    <tr><td><code>isStringDtype(d)</code></td><td><code>is_string_dtype</code></td><td>string dtype</td></tr>
+    <tr><td><code>isDatetimeDtype(d)</code></td><td><code>is_datetime64_dtype</code></td><td>datetime</td></tr>
+    <tr><td><code>isTimedeltaDtype(d)</code></td><td><code>is_timedelta64_dtype</code></td><td>timedelta</td></tr>
+    <tr><td><code>isCategoricalDtype(d)</code></td><td><code>is_categorical_dtype</code></td><td>category</td></tr>
+    <tr><td><code>isObjectDtype(d)</code></td><td><code>is_object_dtype</code></td><td>object</td></tr>
+    <tr><td><code>isComplexDtype(d)</code></td><td><code>is_complex_dtype</code></td><td>Always false (no complex in tsb)</td></tr>
+    <tr><td><code>isExtensionArrayDtype(d)</code></td><td><code>is_extension_array_dtype</code></td><td>string/object/datetime/timedelta/category</td></tr>
+    <tr><td><code>isPeriodDtype(d)</code></td><td><code>is_period_dtype</code></td><td>Maps to datetime</td></tr>
+    <tr><td><code>isIntervalDtype(d)</code></td><td><code>is_interval_dtype</code></td><td>Numeric dtypes</td></tr>
+  </table>
+
+  <script type="module">
+    // Inline demo (no bundler required for playground)
+    function show(id, lines) {
+      document.getElementById(id).innerHTML = lines
+        .map(([expr, val]) => `<span class="${val ? 'ok' : 'fail'}">${expr}: ${val}</span>`)
+        .join('<br>');
+    }
+
+    // Value predicate demos (inline implementations for playground)
+    function isScalar(val) {
+      if (val === null || val === undefined) return true;
+      const t = typeof val;
+      if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") return true;
+      if (val instanceof Date) return true;
+      return false;
+    }
+    function isListLike(val) {
+      if (val === null || val === undefined || typeof val === "string") return false;
+      if (typeof val === "number" || typeof val === "boolean") return false;
+      if (typeof val === "object") {
+        if (Symbol.iterator in val) return true;
+        const len = val["length"];
+        if (typeof len === "number" && len >= 0 && Number.isInteger(len)) return true;
+      }
+      return false;
+    }
+
+    show("scalar-demo", [
+      ["isScalar(42)", isScalar(42)],
+      ["isScalar('hello')", isScalar("hello")],
+      ["isScalar(null)", isScalar(null)],
+      ["isScalar([1, 2])", isScalar([1, 2])],
+      ["isScalar({ a: 1 })", isScalar({ a: 1 })],
+    ]);
+
+    show("listlike-demo", [
+      ["isListLike([1, 2, 3])", isListLike([1, 2, 3])],
+      ["isListLike(new Set([1]))", isListLike(new Set([1]))],
+      ["isListLike('abc')", isListLike("abc")],
+      ["isListLike(42)", isListLike(42)],
+    ]);
+
+    function isFloat(val) {
+      if (typeof val !== "number" || !Number.isFinite(val)) return false;
+      return val !== Math.trunc(val);
+    }
+    function isInteger(val) { return typeof val === "number" && Number.isInteger(val); }
+
+    show("value-demo", [
+      ["isNumber(3.14)", typeof 3.14 === "number"],
+      ["isNumber(NaN)", typeof NaN === "number"],
+      ["isBool(true)", typeof true === "boolean"],
+      ["isFloat(3.14)", isFloat(3.14)],
+      ["isFloat(3.0)", isFloat(3.0)],
+      ["isInteger(42)", isInteger(42)],
+      ["isInteger(3.14)", isInteger(3.14)],
+    ]);
+
+    // Dtype demo table output
+    const dtypes = {
+      isNumeric: name => ["int8","int16","int32","int64","uint8","uint16","uint32","uint64","float32","float64"].includes(name),
+      isFloat: name => ["float32","float64"].includes(name),
+      isInteger: name => ["int8","int16","int32","int64","uint8","uint16","uint32","uint64"].includes(name),
+    };
+    show("dtype-demo", [
+      ["isNumericDtype('float64')", dtypes.isNumeric("float64")],
+      ["isNumericDtype('int32')", dtypes.isNumeric("int32")],
+      ["isNumericDtype('string')", dtypes.isNumeric("string")],
+      ["isFloatDtype('float32')", dtypes.isFloat("float32")],
+      ["isIntegerDtype('int64')", dtypes.isInteger("int64")],
+    ]);
+  </script>
+</body>
+</html>
diff --git a/playground/attrs.html b/playground/attrs.html
new file mode 100644
index 00000000..ae25d5f6
--- /dev/null
+++ b/playground/attrs.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — attrs: user-defined metadata</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; }
+    h1 { color: #0066cc; }
+    h2 { color: #333; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+    pre { background: #f6f8fa; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: 0.9em; }
+    code { font-family: 'Fira Code', 'Cascadia Code', monospace; }
+    .note { background: #fffbea; border-left: 4px solid #f5a623; padding: 0.7rem 1rem; border-radius: 0 6px 6px 0; margin: 1rem 0; }
+    table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem 0.75rem; text-align: left; }
+    th { background: #f0f4f8; }
+    a { color: #0066cc; }
+    .api-table td:first-child { font-family: monospace; white-space: nowrap; }
+  </style>
+</head>
+<body>
+  <p><a href="index.html">← tsb playground</a></p>
+
+  <h1><code>attrs</code> — User-Defined Metadata</h1>
+  <p>
+    Attach arbitrary key→value metadata to any <code>Series</code> or <code>DataFrame</code>
+    — mirrors
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.attrs.html" target="_blank">
+    <code>pandas.DataFrame.attrs</code></a> and
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.attrs.html" target="_blank">
+    <code>pandas.Series.attrs</code></a>.
+  </p>
+
+  <div class="note">
+    <strong>Design note:</strong> Because <code>tsb</code> objects are immutable (their data, index,
+    and dtype are frozen), attrs are stored in a <strong>WeakMap registry</strong> rather than as
+    instance properties.  This means attrs are attached &amp; detached without touching the object
+    itself, and garbage-collected automatically when the object is collected.
+  </div>
+
+  <h2>Basic usage</h2>
+
+  <pre><code>import {
+  getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs,
+  clearAttrs, hasAttrs, getAttr, setAttr, deleteAttr,
+  attrsCount, attrsKeys, mergeAttrs,
+} from "tsb";
+import { DataFrame, Series } from "tsb";
+
+// ─── annotate a DataFrame ─────────────────────────────────────────────────
+const df = DataFrame.fromColumns({
+  temperature: [22.1, 23.5, 21.8],
+  humidity:    [55, 60, 58],
+});
+
+setAttrs(df, {
+  source: "weather_station_42",
+  unit:   "Celsius",
+  notes: "Morning readings",
+});
+
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", notes: "Morning readings" }
+
+getAttr(df, "unit");     // → "Celsius"
+getAttr(df, "missing");  // → undefined
+attrsCount(df);          // → 3
+attrsKeys(df);           // → ["source", "unit", "notes"]
+hasAttrs(df);            // → true
+</code></pre>
+
+  <h2>Merging and updating</h2>
+
+  <pre><code>// updateAttrs merges new keys, preserves existing
+updateAttrs(df, { version: 2, notes: "Updated notes" });
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", notes: "Updated notes", version: 2 }
+
+// setAttr / deleteAttr for single keys
+setAttr(df, "sensor_id", "WS-042");
+deleteAttr(df, "notes");
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", version: 2, sensor_id: "WS-042" }
+</code></pre>
+
+  <h2>Propagating metadata to derived objects</h2>
+
+  <pre><code>// copyAttrs: copy all attrs from one object to another
+const s = new Series({ data: [22.1, 23.5, 21.8], name: "temperature" });
+setAttrs(s, { unit: "Celsius", source: "sensor_A" });
+
+const derived = new Series({ data: [71.8, 74.3, 71.2], name: "fahrenheit" });
+copyAttrs(s, derived);
+getAttrs(derived);
+// → { unit: "Celsius", source: "sensor_A" }
+
+// Then update the copy
+setAttr(derived, "unit", "Fahrenheit");
+getAttrs(derived);  // → { unit: "Fahrenheit", source: "sensor_A" }
+getAttrs(s);        // → { unit: "Celsius", source: "sensor_A" }  ← unchanged
+</code></pre>
+
+  <h2>Fluent helper — <code>withAttrs</code></h2>
+
+  <pre><code>// withAttrs sets attrs and returns the same object reference
+// Handy for inline annotation
+const annotated = withAttrs(
+  DataFrame.fromColumns({ x: [1, 2, 3] }),
+  { source: "lab_experiment", date: "2026-04-09" },
+);
+
+annotated === annotated;  // true — same reference, not a copy
+getAttrs(annotated);
+// → { source: "lab_experiment", date: "2026-04-09" }
+</code></pre>
+
+  <h2>Merging from multiple sources</h2>
+
+  <pre><code>// mergeAttrs: combine attrs from multiple objects into a target
+const s1 = new Series({ data: [1, 2, 3], name: "a" });
+const s2 = new Series({ data: [4, 5, 6], name: "b" });
+setAttrs(s1, { source: "sensor_A", unit: "kg" });
+setAttrs(s2, { source: "sensor_B", scale: 2.5 });
+
+const combined = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+mergeAttrs([s1, s2], combined);
+// Later sources win on conflicts: source="sensor_B"
+getAttrs(combined);
+// → { source: "sensor_B", unit: "kg", scale: 2.5 }
+</code></pre>
+
+  <h2>Clearing metadata</h2>
+
+  <pre><code>setAttrs(df, { x: 1, y: 2 });
+hasAttrs(df);   // → true
+attrsCount(df); // → 2
+
+clearAttrs(df);
+hasAttrs(df);   // → false
+getAttrs(df);   // → {}
+</code></pre>
+
+  <h2>API reference</h2>
+
+  <table class="api-table">
+    <thead>
+      <tr><th>Function</th><th>Description</th></tr>
+    </thead>
+    <tbody>
+      <tr><td>getAttrs(obj)</td><td>Return a shallow copy of all stored attrs (empty {} if none)</td></tr>
+      <tr><td>setAttrs(obj, attrs)</td><td>Overwrite attrs completely with the given record</td></tr>
+      <tr><td>updateAttrs(obj, updates)</td><td>Merge updates into existing attrs (existing keys preserved)</td></tr>
+      <tr><td>withAttrs(obj, attrs)</td><td>Fluent: set attrs and return the same object</td></tr>
+      <tr><td>copyAttrs(source, target)</td><td>Copy all attrs from source to target</td></tr>
+      <tr><td>mergeAttrs(sources[], target)</td><td>Merge attrs from multiple sources; later sources win</td></tr>
+      <tr><td>clearAttrs(obj)</td><td>Remove all attrs from obj</td></tr>
+      <tr><td>hasAttrs(obj)</td><td>Return true if any attrs are set</td></tr>
+      <tr><td>getAttr(obj, key)</td><td>Get a single attr value (undefined if missing)</td></tr>
+      <tr><td>setAttr(obj, key, value)</td><td>Set a single attr, preserving other keys</td></tr>
+      <tr><td>deleteAttr(obj, key)</td><td>Delete a single attr key</td></tr>
+      <tr><td>attrsCount(obj)</td><td>Number of stored attr keys</td></tr>
+      <tr><td>attrsKeys(obj)</td><td>Array of stored attr key names</td></tr>
+    </tbody>
+  </table>
+
+  <h2>Comparison with pandas</h2>
+
+  <table>
+    <thead>
+      <tr><th>pandas</th><th>tsb</th></tr>
+    </thead>
+    <tbody>
+      <tr><td><code>df.attrs</code></td><td><code>getAttrs(df)</code></td></tr>
+      <tr><td><code>df.attrs = {"k": "v"}</code></td><td><code>setAttrs(df, { k: "v" })</code></td></tr>
+      <tr><td><code>df.attrs["k"] = "v"</code></td><td><code>setAttr(df, "k", "v")</code></td></tr>
+      <tr><td><code>df.attrs["k"]</code></td><td><code>getAttr(df, "k")</code></td></tr>
+      <tr><td><code>del df.attrs["k"]</code></td><td><code>deleteAttr(df, "k")</code></td></tr>
+      <tr><td><code>df.attrs.update(d)</code></td><td><code>updateAttrs(df, d)</code></td></tr>
+      <tr><td><code>df.attrs.clear()</code></td><td><code>clearAttrs(df)</code></td></tr>
+    </tbody>
+  </table>
+</body>
+</html>
diff --git a/playground/benchmarks.html b/playground/benchmarks.html
new file mode 100644
index 00000000..c4a74f9f
--- /dev/null
+++ b/playground/benchmarks.html
@@ -0,0 +1,360 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — Performance Benchmarks: tsb vs pandas</title>
+  <style>
+    :root {
+      --bg: #0d1117;
+      --surface: #161b22;
+      --border: #30363d;
+      --text: #e6edf3;
+      --accent: #58a6ff;
+      --green: #3fb950;
+      --orange: #d29922;
+      --red: #f85149;
+      --font-mono: "Cascadia Code", "Fira Code", "JetBrains Mono", monospace;
+    }
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body {
+      background: var(--bg);
+      color: var(--text);
+      font-family: system-ui, -apple-system, sans-serif;
+      line-height: 1.6;
+      padding: 2rem;
+      max-width: 960px;
+      margin: 0 auto;
+    }
+    a { color: var(--accent); }
+    h1 { color: var(--accent); margin-bottom: 0.5rem; }
+    h2 { margin-top: 2rem; margin-bottom: 0.5rem; font-size: 1.25rem; }
+    p, .desc { color: #8b949e; margin-bottom: 1rem; }
+    code {
+      font-family: var(--font-mono);
+      font-size: 0.875em;
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 0.3rem;
+      padding: 0.1rem 0.4rem;
+    }
+    .back { margin-bottom: 2rem; display: inline-block; }
+
+    /* Benchmark table */
+    .bench-table {
+      width: 100%;
+      border-collapse: collapse;
+      margin-top: 1rem;
+      margin-bottom: 2rem;
+    }
+    .bench-table th, .bench-table td {
+      text-align: left;
+      padding: 0.75rem 1rem;
+      border: 1px solid var(--border);
+    }
+    .bench-table th {
+      background: var(--surface);
+      color: var(--accent);
+      font-weight: 600;
+      font-size: 0.9rem;
+    }
+    .bench-table td {
+      font-family: var(--font-mono);
+      font-size: 0.875rem;
+    }
+    .bench-table tr:nth-child(even) {
+      background: rgba(22, 27, 34, 0.5);
+    }
+    .faster-tsb { color: var(--green); font-weight: 600; }
+    .faster-pandas { color: var(--orange); font-weight: 600; }
+    .ratio-badge {
+      display: inline-block;
+      padding: 0.15rem 0.5rem;
+      border-radius: 0.3rem;
+      font-size: 0.8rem;
+      font-weight: 600;
+    }
+    .ratio-badge.fast { background: rgba(63, 185, 80, 0.15); color: var(--green); }
+    .ratio-badge.slow { background: rgba(210, 153, 34, 0.15); color: var(--orange); }
+
+    /* Bar chart */
+    .bar-chart { margin: 1.5rem 0; }
+    .bar-row {
+      display: flex;
+      align-items: center;
+      margin-bottom: 0.75rem;
+      gap: 0.75rem;
+    }
+    .bar-label {
+      width: 160px;
+      text-align: right;
+      font-size: 0.85rem;
+      color: #8b949e;
+      flex-shrink: 0;
+    }
+    .bar-container {
+      flex: 1;
+      display: flex;
+      gap: 4px;
+      flex-direction: column;
+    }
+    .bar {
+      height: 24px;
+      border-radius: 4px;
+      display: flex;
+      align-items: center;
+      padding-left: 8px;
+      font-size: 0.75rem;
+      font-family: var(--font-mono);
+      min-width: 50px;
+      transition: width 0.3s ease;
+    }
+    .bar.tsb { background: var(--accent); color: #0d1117; }
+    .bar.pandas { background: var(--orange); color: #0d1117; }
+
+    .legend {
+      display: flex;
+      gap: 1.5rem;
+      margin: 1rem 0;
+      font-size: 0.85rem;
+    }
+    .legend-item {
+      display: flex;
+      align-items: center;
+      gap: 0.4rem;
+    }
+    .legend-swatch {
+      width: 14px;
+      height: 14px;
+      border-radius: 3px;
+    }
+
+    .info-box {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 0.75rem;
+      padding: 1.25rem;
+      margin: 1.5rem 0;
+    }
+    .info-box h3 {
+      color: var(--accent);
+      margin-bottom: 0.5rem;
+      font-size: 1rem;
+    }
+    .info-box p, .info-box li {
+      color: #8b949e;
+      font-size: 0.875rem;
+    }
+    .info-box ul {
+      margin-left: 1.25rem;
+      margin-top: 0.5rem;
+    }
+    .info-box li { margin-bottom: 0.25rem; }
+
+    .timestamp {
+      color: #484f58;
+      font-size: 0.8rem;
+      margin-top: 1rem;
+    }
+
+    footer {
+      text-align: center;
+      padding: 2rem;
+      color: #8b949e;
+      font-size: 0.85rem;
+      border-top: 1px solid var(--border);
+      margin-top: 3rem;
+    }
+
+    #no-data {
+      text-align: center;
+      padding: 3rem;
+      color: #8b949e;
+    }
+    #no-data p { color: #8b949e; font-size: 1rem; }
+  </style>
+</head>
+<body>
+  <a class="back" href="index.html">← back to index</a>
+  <h1>⚡ Performance Benchmarks</h1>
+  <p class="desc">
+    Side-by-side performance comparison of <code>tsb</code> (TypeScript/Bun) vs
+    <code>pandas</code> (Python). Each function is benchmarked with identical datasets
+    and the same number of iterations.
+  </p>
+
+  <div class="legend">
+    <div class="legend-item">
+      <div class="legend-swatch" style="background: var(--accent);"></div>
+      <span>tsb (TypeScript)</span>
+    </div>
+    <div class="legend-item">
+      <div class="legend-swatch" style="background: var(--orange);"></div>
+      <span>pandas (Python)</span>
+    </div>
+  </div>
+
+  <div id="no-data" style="display: none;">
+    <p>📊 No benchmark data available yet.</p>
+    <p style="font-size: 0.875rem; margin-top: 0.5rem;">
+      Benchmarks are added automatically by the
+      <a href="https://github.com/githubnext/autoloop">Autoloop</a>
+      perf-comparison program. Each iteration adds a new function benchmark.
+    </p>
+  </div>
+
+  <!-- Chart section -->
+  <h2 id="chart-heading" style="display: none;">Visual Comparison</h2>
+  <div id="bar-chart" class="bar-chart"></div>
+
+  <!-- Table section -->
+  <h2 id="table-heading" style="display: none;">Detailed Results</h2>
+  <table id="bench-table" class="bench-table" style="display: none;">
+    <thead>
+      <tr>
+        <th>Function</th>
+        <th>tsb (ms)</th>
+        <th>pandas (ms)</th>
+        <th>Ratio</th>
+        <th>Faster</th>
+      </tr>
+    </thead>
+    <tbody id="bench-tbody"></tbody>
+  </table>
+  <p id="bench-timestamp" class="timestamp" style="display: none;"></p>
+
+  <!-- Methodology -->
+  <div class="info-box">
+    <h3>📐 Methodology</h3>
+    <p>Each benchmark follows a consistent protocol:</p>
+    <ul>
+      <li><strong>Dataset</strong>: 100,000 elements, deterministic generation</li>
+      <li><strong>Warm-up</strong>: 5 untimed iterations</li>
+      <li><strong>Measured</strong>: 50 timed iterations, mean reported</li>
+      <li><strong>tsb runtime</strong>: Bun (latest)</li>
+      <li><strong>pandas runtime</strong>: CPython + pandas (latest)</li>
+      <li><strong>Ratio</strong>: tsb_time / pandas_time — below 1.0 means tsb is faster</li>
+    </ul>
+  </div>
+
+  <div class="info-box">
+    <h3>🤖 About</h3>
+    <p>
+      These benchmarks are generated automatically by the Autoloop
+      <code>perf-comparison</code> program. Each iteration adds a new function
+      comparison. Results are updated on every accepted iteration and deployed
+      to this page.
+    </p>
+  </div>
+
+  <footer>
+    <p>
+      <a href="index.html">tsb playground</a> ·
+      Built by <a href="https://github.com/githubnext/autoloop">Autoloop</a>
+    </p>
+  </footer>
+
+  <script>
+    // Load benchmark results and render
+    (async function () {
+      const noData = document.getElementById("no-data");
+      const chartHeading = document.getElementById("chart-heading");
+      const barChart = document.getElementById("bar-chart");
+      const tableHeading = document.getElementById("table-heading");
+      const benchTable = document.getElementById("bench-table");
+      const benchTbody = document.getElementById("bench-tbody");
+      const benchTimestamp = document.getElementById("bench-timestamp");
+
+      let data;
+      try {
+        const resp = await fetch("../benchmarks/results.json");
+        if (!resp.ok) throw new Error("fetch failed");
+        data = await resp.json();
+      } catch {
+        // Try alternate path (deployed from playground/)
+        try {
+          const resp2 = await fetch("benchmarks/results.json");
+          if (!resp2.ok) throw new Error("fetch failed");
+          data = await resp2.json();
+        } catch {
+          noData.style.display = "block";
+          return;
+        }
+      }
+
+      const benchmarks = data.benchmarks || [];
+      if (benchmarks.length === 0) {
+        noData.style.display = "block";
+        return;
+      }
+
+      // Show sections
+      chartHeading.style.display = "";
+      tableHeading.style.display = "";
+      benchTable.style.display = "";
+      if (data.timestamp) {
+        benchTimestamp.style.display = "";
+        benchTimestamp.textContent = "Last updated: " + data.timestamp;
+      }
+
+      // Find max time for scaling bars
+      let maxTime = 0;
+      for (const b of benchmarks) {
+        if (b.tsb != null) maxTime = Math.max(maxTime, b.tsb.mean_ms);
+        if (b.pandas != null) maxTime = Math.max(maxTime, b.pandas.mean_ms);
+      }
+
+      // Render bar chart
+      for (const b of benchmarks) {
+        const label = b.function.replace(/_/g, " ");
+        const pyPct = b.pandas != null ? (b.pandas.mean_ms / maxTime) * 100 : 0;
+        const tsPct = b.tsb != null ? (b.tsb.mean_ms / maxTime) * 100 : 0;
+
+        const tsBar = b.tsb != null
+          ? '<div class="bar tsb" style="width: ' + Math.max(tsPct, 5) + '%">' + b.tsb.mean_ms.toFixed(3) + ' ms</div>'
+          : '<div class="bar tsb" style="width: 5%; opacity: 0.4;">pending</div>';
+        const pyBar = b.pandas != null
+          ? '<div class="bar pandas" style="width: ' + Math.max(pyPct, 5) + '%">' + b.pandas.mean_ms.toFixed(3) + ' ms</div>'
+          : '<div class="bar pandas" style="width: 5%; opacity: 0.4;">pending</div>';
+
+        const row = document.createElement("div");
+        row.className = "bar-row";
+        row.innerHTML =
+          '<div class="bar-label">' + label + '</div>' +
+          '<div class="bar-container">' + tsBar + pyBar + '</div>';
+        barChart.appendChild(row);
+      }
+
+      // Render table
+      for (const b of benchmarks) {
+        const ratio = (b.tsb != null && b.pandas != null && b.pandas.mean_ms > 0)
+          ? b.tsb.mean_ms / b.pandas.mean_ms
+          : null;
+        const faster = ratio != null ? (ratio < 1 ? "tsb" : "pandas") : "—";
+        const badgeClass = ratio != null ? (ratio < 1 ? "fast" : "slow") : "";
+        const fasterClass = ratio != null ? (ratio < 1 ? "faster-tsb" : "faster-pandas") : "";
+        const ratioDisplay = ratio != null
+          ? '<span class="ratio-badge ' + badgeClass + '">' + ratio.toFixed(3) + "x</span>"
+          : "—";
+        const displayRatio = ratio != null
+          ? (ratio < 1
+              ? (1 / ratio).toFixed(2) + "x faster"
+              : ratio.toFixed(2) + "x slower")
+          : "";
+        const fasterDisplay = ratio != null ? faster + " (" + displayRatio + ")" : "—";
+        const tsMsDisplay = b.tsb != null ? b.tsb.mean_ms.toFixed(3) : "—";
+        const pyMsDisplay = b.pandas != null ? b.pandas.mean_ms.toFixed(3) : "—";
+
+        const tr = document.createElement("tr");
+        tr.innerHTML =
+          "<td>" + b.function.replace(/_/g, " ") + "</td>" +
+          "<td>" + tsMsDisplay + "</td>" +
+          "<td>" + pyMsDisplay + "</td>" +
+          "<td>" + ratioDisplay + "</td>" +
+          '<td class="' + fasterClass + '">' + fasterDisplay + "</td>";
+        benchTbody.appendChild(tr);
+      }
+    })();
+  </script>
+</body>
+</html>
diff --git a/playground/categorical_ops.html b/playground/categorical_ops.html
new file mode 100644
index 00000000..c2d794ca
--- /dev/null
+++ b/playground/categorical_ops.html
@@ -0,0 +1,338 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — Categorical Ops</title>
+  <style>
+    :root {
+      --bg: #0d1117;
+      --surface: #161b22;
+      --border: #30363d;
+      --text: #e6edf3;
+      --accent: #58a6ff;
+      --green: #3fb950;
+      --orange: #d29922;
+      --red: #f85149;
+      --font-mono: "Cascadia Code", "Fira Code", "JetBrains Mono", monospace;
+    }
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body {
+      background: var(--bg);
+      color: var(--text);
+      font-family: system-ui, -apple-system, sans-serif;
+      line-height: 1.6;
+      padding: 2rem;
+      max-width: 900px;
+      margin: 0 auto;
+    }
+    a { color: var(--accent); }
+    h1 { color: var(--accent); margin-bottom: 0.5rem; }
+    h2 { margin-top: 0; margin-bottom: 0.5rem; font-size: 1.25rem; }
+    p { color: #8b949e; margin-bottom: 1rem; }
+    code {
+      font-family: var(--font-mono);
+      font-size: 0.875em;
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 0.3rem;
+      padding: 0.1rem 0.4rem;
+    }
+    .back { margin-bottom: 2rem; display: inline-block; }
+    #playground-loading {
+      position: fixed;
+      inset: 0;
+      background: rgba(13, 17, 23, 0.92);
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      z-index: 1000;
+      gap: 1rem;
+    }
+    .spinner {
+      width: 40px; height: 40px;
+      border: 3px solid var(--border);
+      border-top-color: var(--accent);
+      border-radius: 50%;
+      animation: spin 0.8s linear infinite;
+    }
+    @keyframes spin { to { transform: rotate(360deg); } }
+    .card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 0.5rem;
+      padding: 1.5rem;
+      margin-bottom: 1.5rem;
+    }
+    textarea {
+      width: 100%;
+      min-height: 140px;
+      font-family: var(--font-mono);
+      font-size: 0.85rem;
+      background: var(--bg);
+      color: var(--text);
+      border: 1px solid var(--border);
+      border-radius: 0.35rem;
+      padding: 0.75rem;
+      resize: vertical;
+    }
+    button {
+      background: var(--accent);
+      color: #0d1117;
+      border: none;
+      border-radius: 0.35rem;
+      padding: 0.5rem 1.25rem;
+      font-weight: 600;
+      cursor: pointer;
+      margin-top: 0.75rem;
+    }
+    button:hover { opacity: 0.85; }
+    .output {
+      margin-top: 0.75rem;
+      font-family: var(--font-mono);
+      font-size: 0.85rem;
+      white-space: pre-wrap;
+      background: var(--bg);
+      border: 1px solid var(--border);
+      border-radius: 0.35rem;
+      padding: 0.75rem;
+      min-height: 3rem;
+    }
+    .output.error { color: var(--red); }
+    .output.ok { color: var(--green); }
+    table { border-collapse: collapse; font-family: var(--font-mono); font-size: 0.85rem; }
+    th, td { border: 1px solid var(--border); padding: 0.3rem 0.75rem; }
+    th { background: var(--surface); color: var(--accent); }
+  </style>
+</head>
+<body>
+<div id="playground-loading">
+  <div class="spinner"></div>
+  <span>Loading tsb runtime…</span>
+</div>
+
+<a class="back" href="index.html">← back to index</a>
+<h1>🏷️ Categorical Ops</h1>
+<p>
+  Standalone categorical utility functions that complement the <code>Series.cat</code> accessor.
+  Mirrors <code>pd.Categorical.from_codes</code>, set operations on categories, frequency helpers,
+  and cross-tabulation.
+</p>
+
+<!-- ── catFromCodes ─────────────────────────────────────────────────── -->
+<div class="card">
+  <h2><code>catFromCodes(codes, categories, opts?)</code></h2>
+  <p>
+    Construct a categorical Series from integer codes (0-based) and a categories array.
+    Code <code>-1</code> maps to <code>null</code> (missing). Mirrors
+    <code>pd.Categorical.from_codes</code>.
+  </p>
+  <textarea id="code-from-codes">const { catFromCodes } = tsb;
+
+// Build a categorical from codes + category list
+const s = catFromCodes([0, 2, 1, -1, 0], ["red", "green", "blue"]);
+console.log("values:", s.toArray());
+console.log("categories:", s.cat.categories.values);
+console.log("ordered:", s.cat.ordered);
+
+// With ordered=true
+const grade = catFromCodes([2, 0, 1, 2], ["F", "C", "A"], { ordered: true, name: "grade" });
+console.log("grade values:", grade.toArray());
+console.log("grade ordered:", grade.cat.ordered);
+console.log("grade codes back:", grade.cat.codes.toArray());
+</textarea>
+  <button onclick="run('code-from-codes','out-from-codes')">▶ Run</button>
+  <div class="output" id="out-from-codes"></div>
+</div>
+
+<!-- ── Category set operations ──────────────────────────────────────── -->
+<div class="card">
+  <h2>Category set operations</h2>
+  <p>
+    <code>catUnionCategories</code>, <code>catIntersectCategories</code>,
+    <code>catDiffCategories</code>, and <code>catEqualCategories</code> let you
+    combine or compare the category sets of two Series.
+  </p>
+  <textarea id="code-set-ops">const { Series, catUnionCategories, catIntersectCategories, catDiffCategories, catEqualCategories } = tsb;
+
+const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+const b = new Series({ data: ["y", "z", "w"] }).cat.setCategories(["y", "z", "w"]);
+
+const union = catUnionCategories(a, b);
+console.log("union cats:", union.cat.categories.values);     // x y z w
+
+const inter = catIntersectCategories(a, b);
+console.log("intersect cats:", inter.cat.categories.values); // y z
+console.log("intersect vals:", inter.toArray());             // null y z
+
+const diff = catDiffCategories(a, b);
+console.log("diff cats:", diff.cat.categories.values);       // x
+console.log("diff vals:", diff.toArray());                   // x null null
+
+console.log("equal?", catEqualCategories(a, b));             // false
+const c = new Series({ data: [] }).cat.setCategories(["z", "x", "y"]);
+console.log("equal (diff order)?", catEqualCategories(a, c)); // true
+</textarea>
+  <button onclick="run('code-set-ops','out-set-ops')">▶ Run</button>
+  <div class="output" id="out-set-ops"></div>
+</div>
+
+<!-- ── catSortByFreq ────────────────────────────────────────────────── -->
+<div class="card">
+  <h2><code>catSortByFreq(series, opts?)</code></h2>
+  <p>
+    Reorder categories by their frequency in the data (most frequent first by default).
+    Mirrors <code>s.cat.reorder_categories(s.value_counts().index)</code>.
+  </p>
+  <textarea id="code-sort-freq">const { Series, catSortByFreq } = tsb;
+
+const s = new Series({ data: ["b", "a", "b", "c", "b", "a"] })
+  .cat.setCategories(["a", "b", "c"]);
+
+const sorted = catSortByFreq(s);
+console.log("categories (most frequent first):", sorted.cat.categories.values);
+// b=3, a=2, c=1 → ["b", "a", "c"]
+
+const asc = catSortByFreq(s, { ascending: true });
+console.log("categories (rarest first):", asc.cat.categories.values);
+// → ["c", "a", "b"]
+</textarea>
+  <button onclick="run('code-sort-freq','out-sort-freq')">▶ Run</button>
+  <div class="output" id="out-sort-freq"></div>
+</div>
+
+<!-- ── catToOrdinal ─────────────────────────────────────────────────── -->
+<div class="card">
+  <h2><code>catToOrdinal(series, order)</code></h2>
+  <p>
+    Create an ordered categorical from a Series using <code>order</code> to define both the
+    category set and their rank.  Values not in <code>order</code> become <code>null</code>.
+  </p>
+  <textarea id="code-to-ordinal">const { Series, catToOrdinal } = tsb;
+
+const s = new Series({ data: ["med", "low", "high", "med", "extreme"] });
+const ord = catToOrdinal(s, ["low", "med", "high"]);
+
+console.log("values:", ord.toArray());         // med low high med null
+console.log("ordered:", ord.cat.ordered);      // true
+console.log("categories:", ord.cat.categories.values); // low med high
+</textarea>
+  <button onclick="run('code-to-ordinal','out-to-ordinal')">▶ Run</button>
+  <div class="output" id="out-to-ordinal"></div>
+</div>
+
+<!-- ── catFreqTable ─────────────────────────────────────────────────── -->
+<div class="card">
+  <h2><code>catFreqTable(series)</code></h2>
+  <p>
+    Return a plain <code>Record&lt;string, number&gt;</code> of counts per category.
+    Zero-frequency categories are included.
+  </p>
+  <textarea id="code-freq-table">const { Series, catFreqTable } = tsb;
+
+const s = new Series({ data: ["b", "a", "b", null] })
+  .cat.setCategories(["a", "b", "c"]);
+
+const table = catFreqTable(s);
+console.log("frequency table:", table);
+// { a: 1, b: 2, c: 0 }
+
+// Sum equals non-null count
+const total = Object.values(table).reduce((sum, n) => sum + n, 0);
+console.log("total (non-null):", total); // 3
+</textarea>
+  <button onclick="run('code-freq-table','out-freq-table')">▶ Run</button>
+  <div class="output" id="out-freq-table"></div>
+</div>
+
+<!-- ── catCrossTab ──────────────────────────────────────────────────── -->
+<div class="card">
+  <h2><code>catCrossTab(a, b, opts?)</code></h2>
+  <p>
+    Cross-tabulation of two categorical Series. Rows = <code>a</code>'s categories,
+    columns = <code>b</code>'s categories, cells = co-occurrence counts.
+    Supports margins and normalization.
+  </p>
+  <textarea id="code-crosstab">const { Series, catCrossTab } = tsb;
+
+const gender = new Series({ data: ["M","F","M","F","M","F","M"] })
+  .cat.setCategories(["M","F"]);
+const response = new Series({ data: ["Yes","No","Yes","Yes","No","Yes","Yes"] })
+  .cat.setCategories(["Yes","No"]);
+
+const ct = catCrossTab(gender, response);
+console.log("columns:", ct.columns.values);
+console.log("index:", [...ct.index.values]);
+console.log("Yes col:", ct.get("Yes").toArray());
+console.log("No col:", ct.get("No").toArray());
+
+// With margins
+const ctm = catCrossTab(gender, response, { margins: true });
+console.log("\nWith margins:");
+console.log("columns:", ctm.columns.values);
+console.log("All col:", ctm.get("All").toArray());
+
+// Normalized
+const ctn = catCrossTab(gender, response, { normalize: true });
+console.log("\nNormalized Yes col:", ctn.get("Yes").toArray());
+</textarea>
+  <button onclick="run('code-crosstab','out-crosstab')">▶ Run</button>
+  <div class="output" id="out-crosstab"></div>
+</div>
+
+<!-- ── catRecode ────────────────────────────────────────────────────── -->
+<div class="card">
+  <h2><code>catRecode(series, mapping)</code></h2>
+  <p>
+    Rename categories via an object map or a transform function. Unmapped categories
+    are left unchanged.
+  </p>
+  <textarea id="code-recode">const { Series, catRecode } = tsb;
+
+const s = new Series({ data: ["a", "b", "c"] }).cat.setCategories(["a", "b", "c"]);
+
+// Object map (partial)
+const r1 = catRecode(s, { a: "alpha", b: "beta" });
+console.log("via map:", r1.cat.categories.values);  // alpha beta c
+console.log("values:", r1.toArray());               // alpha beta c
+
+// Transform function
+const r2 = catRecode(s, (x) => x.toUpperCase());
+console.log("via fn:", r2.cat.categories.values);   // A B C
+console.log("values:", r2.toArray());               // A B C
+</textarea>
+  <button onclick="run('code-recode','out-recode')">▶ Run</button>
+  <div class="output" id="out-recode"></div>
+</div>
+
+<script src="playground-runtime.js"></script>
+<script>
+async function run(codeId, outId) {
+  const code = document.getElementById(codeId).value;
+  const out = document.getElementById(outId);
+  out.className = 'output';
+  out.textContent = '…';
+  try {
+    const lines = [];
+    const tsb = await getTsb();
+    const origLog = console.log;
+    console.log = (...args) => lines.push(args.map(v =>
+      typeof v === 'object' && v !== null ? JSON.stringify(v) : String(v)
+    ).join(' '));
+    try {
+      const fn = new Function('tsb', code);
+      await fn(tsb);
+    } finally {
+      console.log = origLog;
+    }
+    out.textContent = lines.join('\n') || '(no output)';
+    out.classList.add('ok');
+  } catch (e) {
+    out.textContent = String(e);
+    out.classList.add('error');
+  }
+}
+</script>
+</body>
+</html>
diff --git a/playground/cut_qcut.html b/playground/cut_qcut.html
new file mode 100644
index 00000000..1d273a17
--- /dev/null
+++ b/playground/cut_qcut.html
@@ -0,0 +1,163 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>tsb — cut / qcut: Binning Continuous Data</title>
+    <style>
+      body {
+        font-family: system-ui, sans-serif;
+        max-width: 860px;
+        margin: 2rem auto;
+        padding: 0 1rem;
+        line-height: 1.6;
+        color: #1a1a1a;
+      }
+      h1 { color: #0d47a1; }
+      h2 { color: #1565c0; border-bottom: 2px solid #e3f2fd; padding-bottom: 0.25rem; }
+      pre {
+        background: #f5f5f5;
+        border-left: 4px solid #0d47a1;
+        padding: 1rem;
+        overflow-x: auto;
+        border-radius: 4px;
+      }
+      code { font-family: "Fira Code", "Cascadia Code", monospace; font-size: 0.9em; }
+      .demo {
+        background: #e8f5e9;
+        border: 1px solid #a5d6a7;
+        border-radius: 6px;
+        padding: 1rem 1.25rem;
+        margin: 1rem 0;
+      }
+      .demo h3 { margin-top: 0; color: #2e7d32; }
+      table { border-collapse: collapse; width: 100%; margin: 0.5rem 0; }
+      th, td { border: 1px solid #ccc; padding: 0.4rem 0.75rem; text-align: left; }
+      th { background: #e3f2fd; }
+      .note { background: #fff9c4; border: 1px solid #f9a825; border-radius: 4px; padding: 0.75rem 1rem; }
+      a { color: #0d47a1; }
+    </style>
+  </head>
+  <body>
+    <h1>tsb — <code>cut</code> / <code>qcut</code>: Binning Continuous Data</h1>
+    <p>
+      <code>cut</code> and <code>qcut</code> partition continuous numeric values into
+      discrete intervals — the TypeScript equivalents of
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.cut.html"><code>pandas.cut</code></a>
+      and
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.qcut.html"><code>pandas.qcut</code></a>.
+    </p>
+
+    <h2>1. <code>cut</code> — Fixed-Width Binning</h2>
+    <p>
+      Bin values into equal-width (or user-specified) intervals.
+      Pass an integer for automatic bins, or an explicit edge array.
+    </p>
+
+    <h3>Integer bins</h3>
+    <pre><code>import { cut } from "tsb";
+
+const ages = [5, 18, 25, 35, 50, 70];
+const { codes, labels, bins } = cut(ages, 3);
+
+// labels: ["(5.0, 26.7]", "(26.7, 48.3]", "(48.3, 70.0]"]
+// bins:   [4.935, 26.667, 48.333, 70]
+// codes:  [0, 0, 0, 1, 1, 2]
+console.table(ages.map((a, i) => ({ age: a, bin: labels[codes[i]!] })));
+</code></pre>
+
+    <h3>Explicit bin edges</h3>
+    <pre><code>const scores = [55, 65, 72, 80, 91, 98];
+const { codes, labels } = cut(scores, [0, 60, 70, 80, 90, 100], {
+  labels: ["F", "D", "C", "B", "A"],
+  include_lowest: true,
+});
+// codes:  [0, 1, 2, 3, 4, 4]
+// labels[codes[0]] → "F"
+// labels[codes[5]] → "A"
+</code></pre>
+
+    <h3>Options</h3>
+    <table>
+      <thead><tr><th>Option</th><th>Default</th><th>Description</th></tr></thead>
+      <tbody>
+        <tr><td><code>right</code></td><td><code>true</code></td><td>Intervals closed on right: <code>(a, b]</code>. Set <code>false</code> for <code>[a, b)</code>.</td></tr>
+        <tr><td><code>include_lowest</code></td><td><code>false</code></td><td>Make lowest interval left-closed: <code>[a, b]</code>.</td></tr>
+        <tr><td><code>labels</code></td><td>auto</td><td>Custom string labels, or <code>false</code> for integer codes.</td></tr>
+        <tr><td><code>precision</code></td><td><code>3</code></td><td>Decimal places in auto-generated labels.</td></tr>
+        <tr><td><code>duplicates</code></td><td><code>"raise"</code></td><td><code>"drop"</code> to silently remove duplicate bin edges.</td></tr>
+      </tbody>
+    </table>
+
+    <h2>2. <code>qcut</code> — Quantile-Based Binning</h2>
+    <p>
+      Divide values into bins of (approximately) equal population using quantiles.
+      Useful for creating percentile buckets or roughly equal-sized groups.
+    </p>
+
+    <h3>Quartile split</h3>
+    <pre><code>import { qcut } from "tsb";
+
+const values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+const { codes, labels, bins } = qcut(values, 4);
+
+// labels: ["[1, 3.25]", "(3.25, 5.5]", "(5.5, 7.75]", "(7.75, 10]"]
+// Every bin has ~2-3 elements
+</code></pre>
+
+    <h3>Custom quantile probabilities</h3>
+    <pre><code>const { labels } = qcut(values, [0, 0.1, 0.5, 0.9, 1], {
+  labels: ["bottom 10%", "lower middle", "upper middle", "top 10%"],
+});
+</code></pre>
+
+    <h3>Decile labels</h3>
+    <pre><code>const { codes } = qcut(data, 10, { labels: false });
+// codes[i] is 0..9 — the decile bucket index
+</code></pre>
+
+    <h2>3. Return Value: <code>BinResult</code></h2>
+    <pre><code>interface BinResult {
+  codes:  ReadonlyArray&lt;number | null&gt;; // bin index per value; null for NaN
+  labels: readonly string[];            // ordered label per bin
+  bins:   readonly number[];            // bin edge array (labels.length + 1)
+}
+</code></pre>
+
+    <div class="note">
+      <strong>Missing values</strong>: <code>NaN</code> and <code>Infinity</code> are
+      assigned <code>null</code> in the <code>codes</code> array and are never placed
+      in a bin.
+    </div>
+
+    <h2>4. <code>cut</code> vs <code>qcut</code></h2>
+    <table>
+      <thead><tr><th></th><th><code>cut</code></th><th><code>qcut</code></th></tr></thead>
+      <tbody>
+        <tr><td>Bin width</td><td>Equal (uniform edges)</td><td>Varies (equal population)</td></tr>
+        <tr><td>Bin count</td><td>Determined by <code>bins</code></td><td>Determined by <code>q</code></td></tr>
+        <tr><td>Best for</td><td>Meaningful thresholds (age groups, grade bands)</td><td>Percentile buckets, rank-based analysis</td></tr>
+        <tr><td>Left edge of first bin</td><td>Open <code>(</code> unless <code>include_lowest</code></td><td>Always closed <code>[</code></td></tr>
+      </tbody>
+    </table>
+
+    <h2>5. pandas Compatibility</h2>
+    <pre><code># Python pandas
+pd.cut([1, 2, 3, 4, 5], 2)
+# Interval(0.996, 3.0, closed='right')  ...
+
+# tsb equivalent
+cut([1, 2, 3, 4, 5], 2)
+// codes: [0, 0, 0, 1, 1]
+// labels: ["(0.996, 3.0]", "(3.0, 5.0]"]
+</code></pre>
+
+    <p>
+      Both <code>cut</code> and <code>qcut</code> follow pandas semantics exactly:
+      right-closed by default, linear interpolation for quantiles, and duplicate-edge
+      handling via <code>duplicates</code>.
+    </p>
+
+    <p><a href="index.html">← Back to tsb feature index</a></p>
+  </body>
+</html>
diff --git a/playground/format_ops.html b/playground/format_ops.html
new file mode 100644
index 00000000..d72fd1ec
--- /dev/null
+++ b/playground/format_ops.html
@@ -0,0 +1,262 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — format_ops: Number Formatting</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; color: #1a1a2e; }
+    h1 { color: #16213e; }
+    h2 { color: #0f3460; border-bottom: 2px solid #e94560; padding-bottom: 0.25rem; }
+    pre { background: #f4f4f8; border-left: 4px solid #e94560; padding: 1rem; overflow-x: auto; border-radius: 4px; }
+    code { font-family: 'Fira Code', 'Cascadia Code', monospace; font-size: 0.9em; }
+    .demo { background: #eef2ff; border-radius: 6px; padding: 1rem 1.25rem; margin: 1rem 0; }
+    .demo label { font-weight: 600; display: block; margin-bottom: 0.3rem; }
+    .demo input, .demo select { padding: 0.3rem 0.5rem; border: 1px solid #aaa; border-radius: 4px; margin-right: 0.5rem; }
+    .demo button { background: #e94560; color: white; border: none; padding: 0.4rem 0.9rem; border-radius: 4px; cursor: pointer; }
+    .demo button:hover { background: #c73652; }
+    .output { font-family: monospace; background: white; border: 1px solid #ccc; padding: 0.75rem; border-radius: 4px; margin-top: 0.5rem; min-height: 2rem; white-space: pre; }
+    table { border-collapse: collapse; width: 100%; }
+    th, td { border: 1px solid #ccc; padding: 0.5rem 0.75rem; text-align: left; }
+    th { background: #16213e; color: white; }
+    tr:nth-child(even) { background: #f8f8fc; }
+    a { color: #e94560; }
+  </style>
+</head>
+<body>
+<h1>🔢 <code>format_ops</code> — Number Formatting</h1>
+<p>
+  <strong>tsb</strong> provides a suite of number-formatting helpers that mirror pandas'
+  <code>style.format()</code> and <code>Series.map()</code> patterns.
+  Every function is zero-dependency and fully typed.
+</p>
+<p><a href="index.html">← Back to index</a></p>
+
+<h2>Scalar formatters</h2>
+
+<table>
+  <thead><tr><th>Function</th><th>Example input</th><th>Example output</th><th>Notes</th></tr></thead>
+  <tbody>
+    <tr><td><code>formatFloat(n, d)</code></td><td><code>3.14159, 2</code></td><td><code>"3.14"</code></td><td>Fixed decimal places</td></tr>
+    <tr><td><code>formatPercent(n, d)</code></td><td><code>0.1234, 1</code></td><td><code>"12.3%"</code></td><td>Multiplies by 100</td></tr>
+    <tr><td><code>formatScientific(n, d)</code></td><td><code>12345.678, 3</code></td><td><code>"1.235e+4"</code></td><td>Exponential notation</td></tr>
+    <tr><td><code>formatEngineering(n, d)</code></td><td><code>12345.678, 3</code></td><td><code>"12.346e+3"</code></td><td>Exponent multiple of 3</td></tr>
+    <tr><td><code>formatThousands(n, d, sep)</code></td><td><code>1234567.89, 2</code></td><td><code>"1,234,567.89"</code></td><td>Thousands separator</td></tr>
+    <tr><td><code>formatCurrency(n, sym, d)</code></td><td><code>1234.5, "$"</code></td><td><code>"$1,234.50"</code></td><td>Currency prefix + thousands</td></tr>
+    <tr><td><code>formatCompact(n, d)</code></td><td><code>1_234_567, 2</code></td><td><code>"1.23M"</code></td><td>K / M / B / T suffixes</td></tr>
+  </tbody>
+</table>
+
+<h2>Interactive demo — scalar formatting</h2>
+<div class="demo">
+  <label>Number: <input id="numInput" type="number" value="1234567.89" style="width:180px" /></label>
+  <label>Format:
+    <select id="fmtSelect">
+      <option value="float">formatFloat(n, decimals)</option>
+      <option value="percent">formatPercent(n, decimals)</option>
+      <option value="scientific">formatScientific(n, decimals)</option>
+      <option value="engineering">formatEngineering(n, decimals)</option>
+      <option value="thousands">formatThousands(n, decimals)</option>
+      <option value="currency">formatCurrency(n, "$", decimals)</option>
+      <option value="compact">formatCompact(n, decimals)</option>
+    </select>
+  </label>
+  <label>Decimals: <input id="decInput" type="number" value="2" min="0" max="10" style="width:60px" /></label>
+  <button onclick="runFmt()">Format</button>
+  <div class="output" id="fmtOutput"></div>
+</div>
+
+<h2>Formatter factories</h2>
+<pre><code>import {
+  makeFloatFormatter,
+  makePercentFormatter,
+  makeCurrencyFormatter,
+} from "tsb";
+
+const fmtFloat   = makeFloatFormatter(3);      // (v) => formatFloat(v, 3)
+const fmtPct     = makePercentFormatter(1);     // (v) => formatPercent(v, 1)
+const fmtDollar  = makeCurrencyFormatter("$");  // (v) => formatCurrency(v, "$", 2)
+
+fmtFloat(3.14159);   // "3.142"
+fmtPct(0.0825);      // "8.3%"
+fmtDollar(9999.99);  // "$9,999.99"
+</code></pre>
+
+<h2>Apply to a Series</h2>
+<pre><code>import { Series, applySeriesFormatter, makePercentFormatter } from "tsb";
+
+const returns = new Series({ data: [0.05, -0.02, 0.134, 0.007], name: "returns" });
+
+const formatted = applySeriesFormatter(returns, makePercentFormatter(1));
+// Series&lt;string&gt; ["5.0%", "-2.0%", "13.4%", "0.7%"]
+</code></pre>
+
+<h2>Apply to a DataFrame</h2>
+<pre><code>import { DataFrame, applyDataFrameFormatter, makeCurrencyFormatter, makePercentFormatter } from "tsb";
+
+const df = DataFrame.fromColumns({
+  price:   [1_299.99, 899.50, 45.00],
+  change:  [0.025, -0.031, 0.102],
+  volume:  [15_000, 8_200, 230_000],
+});
+
+const formatted = applyDataFrameFormatter(df, {
+  price:  makeCurrencyFormatter("$", 2),
+  change: makePercentFormatter(2),
+});
+
+// formatted = {
+//   price:  ["$1,299.99", "$899.50", "$45.00"],
+//   change: ["2.50%", "-3.10%", "10.20%"],
+//   volume: ["15000", "8200", "230000"],      // no formatter → String(v)
+// }
+</code></pre>
+
+<h2>Interactive demo — DataFrame formatting</h2>
+<div class="demo">
+  <button onclick="runDfFmt()">Run DataFrame example</button>
+  <div class="output" id="dfOutput"></div>
+</div>
+
+<h2>String rendering</h2>
+<pre><code>import { Series, DataFrame, seriesToString, dataFrameToString, makeFloatFormatter } from "tsb";
+
+const s = new Series({ data: [1.2, 3.4, 5.6], name: "value" });
+console.log(seriesToString(s, { formatter: makeFloatFormatter(1) }));
+// 0    1.2
+// 1    3.4
+// 2    5.6
+// Name: value, dtype: float64
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] });
+console.log(dataFrameToString(df));
+//    a    b
+// 0  1  4.0
+// 1  2  5.0
+// 2  3  6.0
+</code></pre>
+
+<h2>Interactive demo — seriesToString / dataFrameToString</h2>
+<div class="demo">
+  <button onclick="runToString()">Run toString example</button>
+  <div class="output" id="tsOutput"></div>
+</div>
+
+<script type="module">
+// Inline implementations for the playground (mirrors the real tsb exports)
+
+function formatFloat(value, decimals = 2) {
+  if (!Number.isFinite(value)) return String(value);
+  return value.toFixed(decimals);
+}
+function formatPercent(value, decimals = 2) {
+  if (!Number.isFinite(value)) return String(value);
+  return `${(value * 100).toFixed(decimals)}%`;
+}
+function formatScientific(value, decimals = 3) {
+  if (!Number.isFinite(value)) return String(value);
+  return value.toExponential(decimals);
+}
+function formatEngineering(value, decimals = 3) {
+  if (!Number.isFinite(value)) return String(value);
+  if (value === 0) return `0.${"0".repeat(decimals)}e+0`;
+  const sign = value < 0 ? "-" : "";
+  const abs = Math.abs(value);
+  const exp = Math.floor(Math.log10(abs));
+  const engExp = Math.floor(exp / 3) * 3;
+  const mantissa = abs / 10 ** engExp;
+  const expSign = engExp >= 0 ? "+" : "-";
+  return `${sign}${mantissa.toFixed(decimals)}e${expSign}${Math.abs(engExp)}`;
+}
+function formatThousands(value, decimals = 2, separator = ",") {
+  if (!Number.isFinite(value)) return String(value);
+  const fixed = value.toFixed(decimals);
+  const [intPart, fracPart] = fixed.split(".");
+  const intStr = intPart ?? "";
+  const isNeg = intStr.startsWith("-");
+  const digits = isNeg ? intStr.slice(1) : intStr;
+  const withSep = digits.replace(/\B(?=(\d{3})+(?!\d))/g, separator);
+  const sign = isNeg ? "-" : "";
+  return fracPart !== undefined ? `${sign}${withSep}.${fracPart}` : `${sign}${withSep}`;
+}
+function formatCurrency(value, symbol = "$", decimals = 2) {
+  if (!Number.isFinite(value)) return `${symbol}${String(value)}`;
+  const abs = Math.abs(value);
+  const sign = value < 0 ? "-" : "";
+  return `${sign}${symbol}${formatThousands(abs, decimals)}`;
+}
+function formatCompact(value, decimals = 2) {
+  if (!Number.isFinite(value)) return String(value);
+  const sign = value < 0 ? "-" : "";
+  const abs = Math.abs(value);
+  if (abs >= 1e12) return `${sign}${(abs / 1e12).toFixed(decimals)}T`;
+  if (abs >= 1e9)  return `${sign}${(abs / 1e9).toFixed(decimals)}B`;
+  if (abs >= 1e6)  return `${sign}${(abs / 1e6).toFixed(decimals)}M`;
+  if (abs >= 1e3)  return `${sign}${(abs / 1e3).toFixed(decimals)}K`;
+  return `${sign}${abs.toFixed(decimals)}`;
+}
+
+window.runFmt = function() {
+  const n = parseFloat(document.getElementById("numInput").value);
+  const d = parseInt(document.getElementById("decInput").value, 10);
+  const fmt = document.getElementById("fmtSelect").value;
+  let result;
+  switch (fmt) {
+    case "float":       result = formatFloat(n, d); break;
+    case "percent":     result = formatPercent(n, d); break;
+    case "scientific":  result = formatScientific(n, d); break;
+    case "engineering": result = formatEngineering(n, d); break;
+    case "thousands":   result = formatThousands(n, d); break;
+    case "currency":    result = formatCurrency(n, "$", d); break;
+    case "compact":     result = formatCompact(n, d); break;
+    default: result = "?";
+  }
+  document.getElementById("fmtOutput").textContent = `"${result}"`;
+};
+
+window.runDfFmt = function() {
+  const data = [
+    { price: 1299.99, change: 0.025, volume: 15000 },
+    { price: 899.50,  change: -0.031, volume: 8200 },
+    { price: 45.00,   change: 0.102, volume: 230000 },
+  ];
+  const cols = ["price", "change", "volume"];
+  const fmts = {
+    price:  (v) => formatCurrency(v, "$", 2),
+    change: (v) => formatPercent(v, 2),
+  };
+  const lines = [cols.map(c => c.padStart(12)).join("")];
+  data.forEach((row, i) => {
+    const cells = cols.map(c => {
+      const fmt = fmts[c] ?? String;
+      return fmt(row[c]).padStart(12);
+    });
+    lines.push(`${i} ${cells.join("")}`);
+  });
+  document.getElementById("dfOutput").textContent = lines.join("\n");
+};
+
+window.runToString = function() {
+  const data = [0.05, -0.02, 0.134, 0.007, 0.089];
+  const lines = [];
+  lines.push("=== seriesToString (percent formatter) ===");
+  data.forEach((v, i) => {
+    lines.push(`${String(i).padEnd(3)} ${formatPercent(v, 1).padStart(8)}`);
+  });
+  lines.push("Name: returns, dtype: float64");
+  lines.push("");
+  lines.push("=== dataFrameToString ===");
+  const df = [
+    { a: 1, b: 4.0 },
+    { a: 2, b: 5.0 },
+    { a: 3, b: 6.0 },
+  ];
+  lines.push("   a    b");
+  df.forEach((row, i) => {
+    lines.push(`${i}  ${String(row.a).padEnd(4)} ${String(row.b)}`);
+  });
+  document.getElementById("tsOutput").textContent = lines.join("\n");
+};
+</script>
+</body>
+</html>
diff --git a/playground/index.html b/playground/index.html
index 2b619f97..4834953d 100644
--- a/playground/index.html
+++ b/playground/index.html
@@ -269,6 +269,86 @@ <h3><a href="multi_index.html" style="color: var(--accent); text-decoration: non
           <p>Hierarchical indexing. MultiIndex for multi-level row and column labels with fromArrays, fromTuples, fromProduct, level access, and swapLevels.</p>
           <div class="status done">✅ Complete</div>
         </div>
+        <div class="feature-card">
+          <h3><a href="insert_pop.html" style="color: var(--accent); text-decoration: none;">📥 insertColumn / popColumn</a></h3>
+          <p>Insert and remove DataFrame columns at precise positions. <code>insertColumn(df, loc, col, values)</code> inserts at integer position, <code>popColumn(df, col)</code> returns <code>{ series, df }</code>. Also includes <code>reorderColumns</code> and <code>moveColumn</code>. Mirrors <code>pandas.DataFrame.insert()</code> and <code>.pop()</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="cut_qcut.html" style="color: var(--accent); text-decoration: none;">✂️ cut / qcut</a></h3>
+          <p>Bin continuous numeric data into discrete intervals. <code>cut()</code> uses fixed-width or explicit bin edges; <code>qcut()</code> uses quantile-based bins of equal population. Both return codes, labels, and bin edges. Mirrors <code>pandas.cut</code> and <code>pandas.qcut</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="window_extended.html" style="color: var(--accent); text-decoration: none;">📊 Rolling Extended Stats</a></h3>
+          <p>Higher-order rolling window statistics: <code>rollingSem</code> (standard error of mean), <code>rollingSkew</code> (Fisher-Pearson skewness), <code>rollingKurt</code> (excess kurtosis), and <code>rollingQuantile</code> (arbitrary percentile with 5 interpolation methods). Mirrors <code>pandas.Series.rolling().sem/skew/kurt/quantile()</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="rolling_apply.html" style="color: var(--accent); text-decoration: none;">🔧 Rolling Apply &amp; Multi-Agg</a></h3>
+          <p>Standalone custom rolling-window functions: <code>rollingApply</code> (custom fn per window), <code>rollingAgg</code> (multiple named aggregations → DataFrame), <code>dataFrameRollingApply</code>, <code>dataFrameRollingAgg</code>. Supports <code>minPeriods</code>, <code>center</code>, and <code>raw</code> mode. Mirrors <code>pandas.Rolling.apply()</code> and <code>Rolling.agg()</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="where_mask.html" style="color: var(--accent); text-decoration: none;">🎭 where / mask</a></h3>
+          <p>Element-wise conditional selection: <code>seriesWhere</code> / <code>seriesMask</code> and <code>dataFrameWhere</code> / <code>dataFrameMask</code>. Accepts boolean arrays, label-aligned boolean Series/DataFrame, or callables. Mirrors <code>pandas.Series.where</code>, <code>pandas.DataFrame.where</code>, and their <code>.mask()</code> inverses.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="card">
+          <h3><a href="notna_isna.html" style="color: var(--accent); text-decoration: none;">🔍 isna / notna</a></h3>
+          <p>Module-level missing-value detection: <code>isna</code>, <code>notna</code>, <code>isnull</code>, <code>notnull</code> work on scalars, arrays, Series, and DataFrames. Plus standalone <code>fillna</code>, <code>dropna</code>, <code>countna</code>, and <code>countValid</code>. Mirrors <code>pandas.isna</code>, <code>pandas.notna</code>, <code>pandas.isnull</code>, <code>pandas.notnull</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="attrs.html" style="color: var(--accent); text-decoration: none;">🏷️ attrs — User Metadata</a></h3>
+          <p>Attach arbitrary key→value metadata to any <code>Series</code> or <code>DataFrame</code> via a <strong>WeakMap registry</strong>. Provides <code>getAttrs</code>, <code>setAttrs</code>, <code>updateAttrs</code>, <code>copyAttrs</code>, <code>withAttrs</code>, <code>mergeAttrs</code>, <code>clearAttrs</code>, <code>getAttr</code>, <code>setAttr</code>, <code>deleteAttr</code>, <code>attrsCount</code>, <code>attrsKeys</code>. Mirrors <code>pandas.DataFrame.attrs</code> / <code>pandas.Series.attrs</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="string_ops.html" style="color: var(--accent); text-decoration: none;">🔤 string_ops — Standalone String Ops</a></h3>
+          <p>Module-level string utilities: <code>strNormalize</code> (Unicode NFC/NFD/NFKC/NFKD), <code>strGetDummies</code> (one-hot DataFrame), <code>strExtractAll</code> (all regex matches), <code>strRemovePrefix</code>, <code>strRemoveSuffix</code>, <code>strTranslate</code> (char-level substitution), <code>strCharWidth</code> (CJK-aware display width), <code>strByteLength</code>. Works on Series, arrays, or scalars.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="string_ops_extended.html" style="color: var(--accent); text-decoration: none;">🔤 string_ops_extended — Extended String Ops</a></h3>
+          <p>Advanced string utilities: <code>strSplitExpand</code> (split → DataFrame columns), <code>strExtractGroups</code> (regex capture groups → DataFrame), <code>strPartition</code> / <code>strRPartition</code> (split into before/sep/after), <code>strMultiReplace</code> (batch replacements), <code>strIndent</code> / <code>strDedent</code> (line-level indentation). Works on Series, arrays, or scalars.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="pipe_apply.html" style="color: var(--accent); text-decoration: none;">🔗 pipe_apply — Pipeline &amp; Apply Utilities</a></h3>
+          <p>Standalone equivalents of pandas' <code>pipe()</code> / <code>apply()</code> / <code>applymap()</code>: <code>pipe</code> (variadic type-safe pipeline), <code>seriesApply</code> (element-wise with label/pos context), <code>seriesTransform</code>, <code>dataFrameApply</code> (axis 0/1), <code>dataFrameApplyMap</code> (cell-wise), <code>dataFrameTransform</code> (column-wise), <code>dataFrameTransformRows</code> (row-wise).</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+        <div class="feature-card">
+          <h3><a href="numeric_extended.html" style="color: var(--accent); text-decoration: none;">🔢 numeric_extended — Numeric Utilities</a></h3>
+          <p>numpy/scipy-style numeric utilities: <code>digitize</code> (bin values), <code>histogram</code> (frequency counts with density option), <code>linspace</code> / <code>arange</code> (number sequences), <code>percentileOfScore</code> (percentile rank of a score), <code>zscore</code> (z-score standardisation), <code>minMaxNormalize</code> (scale to [0,1] or custom range), <code>coefficientOfVariation</code> (std/mean). Series-aware variants included.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+      </div>
+        <div class="feature-card done">
+          <div class="feature-content">
+            <h3><a href="categorical_ops.html" style="color: var(--accent); text-decoration: none;">🏷️ categorical_ops — Categorical Utilities</a></h3>
+          <p>Standalone categorical helpers: <code>catFromCodes</code> (from integer codes), set operations (<code>catUnionCategories</code>, <code>catIntersectCategories</code>, <code>catDiffCategories</code>, <code>catEqualCategories</code>), <code>catSortByFreq</code>, <code>catToOrdinal</code>, <code>catFreqTable</code>, <code>catCrossTab</code>, <code>catRecode</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+      </div>
+        <div class="feature-card done">
+          <div class="feature-content">
+            <h3><a href="format_ops.html" style="color: var(--accent); text-decoration: none;">🔢 format_ops — Number Formatting</a></h3>
+          <p>Number-formatting helpers for Series and DataFrame. Scalar formatters: <code>formatFloat</code>, <code>formatPercent</code>, <code>formatScientific</code>, <code>formatEngineering</code>, <code>formatThousands</code>, <code>formatCurrency</code>, <code>formatCompact</code>. Formatter factories: <code>makeFloatFormatter</code>, <code>makePercentFormatter</code>, <code>makeCurrencyFormatter</code>. Apply to collections: <code>applySeriesFormatter</code>, <code>applyDataFrameFormatter</code>. Render to string: <code>seriesToString</code>, <code>dataFrameToString</code>.</p>
+          <div class="status done">✅ Complete</div>
+        </div>
+      </div>
+    </section>
+
+    <section style="margin-top: 3rem;">
+      <h2 style="margin-bottom: 1rem; font-size: 1.25rem;">Performance</h2>
+      <div class="features-grid">
+        <div class="feature-card">
+          <h3><a href="benchmarks.html" style="color: var(--accent); text-decoration: none;">⚡ Benchmarks</a></h3>
+          <p>Side-by-side performance comparison of tsb (TypeScript/Bun) vs pandas (Python). Timing metrics for each function.</p>
+          <div class="status">🏗️ In Progress</div>
+        </div>
       </div>
     </section>
   </main>
diff --git a/playground/insert_pop.html b/playground/insert_pop.html
new file mode 100644
index 00000000..8b724566
--- /dev/null
+++ b/playground/insert_pop.html
@@ -0,0 +1,172 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — insertColumn / popColumn</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; }
+    h1 { color: #0066cc; }
+    h2 { color: #333; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+    pre { background: #f6f8fa; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: 0.9em; }
+    code { font-family: 'Fira Code', 'Cascadia Code', monospace; }
+    .note { background: #fffbea; border-left: 4px solid #f5a623; padding: 0.7rem 1rem; border-radius: 0 6px 6px 0; margin: 1rem 0; }
+    table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem 0.75rem; text-align: left; }
+    th { background: #f0f4f8; }
+    a { color: #0066cc; }
+  </style>
+</head>
+<body>
+  <p><a href="index.html">← tsb playground</a></p>
+
+  <h1><code>insertColumn</code> / <code>popColumn</code></h1>
+  <p>
+    Column insertion and removal for DataFrames — mirrors
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.insert.html" target="_blank">
+    <code>pandas.DataFrame.insert()</code></a> and
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pop.html" target="_blank">
+    <code>pandas.DataFrame.pop()</code></a>.
+  </p>
+  <p>
+    Because tsb DataFrames are <strong>immutable</strong>, both functions return a new DataFrame
+    rather than mutating the original.  <code>popColumn</code> returns both the extracted
+    <code>Series</code> and the resulting DataFrame.
+  </p>
+
+  <h2>API summary</h2>
+  <table>
+    <thead><tr><th>Function</th><th>Pandas equivalent</th><th>Description</th></tr></thead>
+    <tbody>
+      <tr>
+        <td><code>insertColumn(df, loc, col, values)</code></td>
+        <td><code>df.insert(loc, col, value)</code></td>
+        <td>Insert a new column at integer position <code>loc</code></td>
+      </tr>
+      <tr>
+        <td><code>popColumn(df, col)</code></td>
+        <td><code>df.pop(col)</code></td>
+        <td>Remove a column; returns <code>{ series, df }</code></td>
+      </tr>
+      <tr>
+        <td><code>reorderColumns(df, order)</code></td>
+        <td><code>df[order]</code></td>
+        <td>Reorder (and optionally subset) columns</td>
+      </tr>
+      <tr>
+        <td><code>moveColumn(df, col, newLoc)</code></td>
+        <td>—</td>
+        <td>Move an existing column to a new integer position</td>
+      </tr>
+    </tbody>
+  </table>
+
+  <h2>Example 1 — <code>insertColumn</code></h2>
+  <pre><code>import { DataFrame, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+// columns: ["name", "age"]
+
+// Insert "city" between "name" and "age"
+const df2 = insertColumn(df, 1, "city", ["NY", "LA", "SF"]);
+// df2.columns.values → ["name", "city", "age"]
+// df2.col("city").values → ["NY", "LA", "SF"]
+
+// Original is unchanged
+// df.columns.values → ["name", "age"]
+</code></pre>
+
+  <h2>Example 2 — Insert with a Series</h2>
+  <pre><code>import { DataFrame, Series, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const salary = new Series({ data: [100_000, 90_000, 120_000], name: "salary" });
+
+const df2 = insertColumn(df, 0, "salary", salary);
+// df2.columns.values → ["salary", "a", "b"]
+</code></pre>
+
+  <h2>Example 3 — <code>popColumn</code></h2>
+  <pre><code>import { DataFrame, popColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:   [1, 2, 3],
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+
+// Remove "age" and keep the Series
+const { series: ageSeries, df: df2 } = popColumn(df, "age");
+// ageSeries.values       → [30, 25, 35]
+// df2.columns.values     → ["id", "name"]
+// df.columns.values      → ["id", "name", "age"]  ← original unchanged
+</code></pre>
+
+  <h2>Example 4 — <code>reorderColumns</code></h2>
+  <pre><code>import { DataFrame, reorderColumns } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3], d: [4] });
+
+// Reverse the column order
+const df2 = reorderColumns(df, ["d", "c", "b", "a"]);
+// df2.columns.values → ["d", "c", "b", "a"]
+
+// Select a subset (drops columns not listed)
+const df3 = reorderColumns(df, ["a", "c"]);
+// df3.columns.values → ["a", "c"]   (b and d are dropped)
+</code></pre>
+
+  <h2>Example 5 — <code>moveColumn</code></h2>
+  <pre><code>import { DataFrame, moveColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  year:  [2020, 2021, 2022],
+  value: [10, 20, 30],
+  label: ["a", "b", "c"],
+});
+// columns: ["year", "value", "label"]
+
+// Move "label" to the front
+const df2 = moveColumn(df, "label", 0);
+// df2.columns.values → ["label", "year", "value"]
+</code></pre>
+
+  <h2>Error cases</h2>
+  <pre><code>// Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// → RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// → RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]);  // df has 3 rows
+// → RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// → RangeError: Column "missing" not found in DataFrame.
+</code></pre>
+
+  <div class="note">
+    <strong>Immutability:</strong> Like all tsb DataFrame operations, these functions never
+    mutate the original DataFrame.  Always assign the return value to a new variable.
+  </div>
+
+  <h2>pandas equivalence table</h2>
+  <table>
+    <thead>
+      <tr><th>pandas</th><th>tsb</th></tr>
+    </thead>
+    <tbody>
+      <tr><td><code>df.insert(1, "x", [1,2,3])</code> *(mutates)*</td><td><code>insertColumn(df, 1, "x", [1,2,3])</code></td></tr>
+      <tr><td><code>series = df.pop("col")</code> *(mutates)*</td><td><code>const { series, df: df2 } = popColumn(df, "col")</code></td></tr>
+      <tr><td><code>df[["c","a","b"]]</code></td><td><code>reorderColumns(df, ["c","a","b"])</code></td></tr>
+    </tbody>
+  </table>
+</body>
+</html>
diff --git a/playground/notna_isna.html b/playground/notna_isna.html
new file mode 100644
index 00000000..8002a5d9
--- /dev/null
+++ b/playground/notna_isna.html
@@ -0,0 +1,242 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb · isna / notna — Missing Value Detection</title>
+  <style>
+    *, *::before, *::after { box-sizing: border-box; }
+    body { font-family: system-ui, sans-serif; max-width: 900px; margin: 2rem auto; padding: 0 1rem; color: #1a1a1a; line-height: 1.6; }
+    h1 { font-size: 1.8rem; margin-bottom: .25rem; }
+    .subtitle { color: #666; margin-bottom: 2rem; }
+    h2 { font-size: 1.2rem; margin-top: 2rem; border-bottom: 2px solid #e5e7eb; padding-bottom: .3rem; }
+    .demo { background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; padding: 1.2rem; margin: 1rem 0; }
+    .demo label { font-size: .85rem; font-weight: 600; color: #475569; display: block; margin-bottom: .4rem; }
+    textarea, input[type=text] { width: 100%; padding: .5rem .7rem; font-family: monospace; font-size: .9rem; border: 1px solid #cbd5e1; border-radius: 5px; background: #fff; }
+    textarea { resize: vertical; min-height: 80px; }
+    button { margin-top: .6rem; padding: .45rem 1rem; background: #3b82f6; color: #fff; border: none; border-radius: 5px; cursor: pointer; font-size: .9rem; }
+    button:hover { background: #2563eb; }
+    .output { font-family: monospace; font-size: .88rem; background: #1e293b; color: #e2e8f0; padding: 1rem; border-radius: 6px; margin-top: .7rem; min-height: 40px; white-space: pre-wrap; }
+    .tag { display: inline-block; background: #dbeafe; color: #1e40af; padding: .15rem .5rem; border-radius: 999px; font-size: .75rem; font-weight: 600; margin-right: .3rem; }
+    nav { margin-bottom: 1.5rem; font-size: .9rem; } nav a { color: #3b82f6; text-decoration: none; } nav a:hover { text-decoration: underline; }
+    table { border-collapse: collapse; width: 100%; margin-top: .5rem; }
+    th, td { border: 1px solid #e2e8f0; padding: .4rem .7rem; font-size: .88rem; }
+    th { background: #f1f5f9; font-weight: 600; text-align: left; }
+    tr:nth-child(even) td { background: #f8fafc; }
+    .api-row td:first-child { font-family: monospace; font-size: .82rem; }
+    code { background: #f1f5f9; padding: .1rem .3rem; border-radius: 3px; font-size: .88rem; }
+  </style>
+</head>
+<body>
+<nav><a href="index.html">← Back to tsb playground</a></nav>
+
+<h1>isna / notna</h1>
+<p class="subtitle">Module-level missing-value detection — mirrors <code>pd.isna()</code>, <code>pd.notna()</code>, <code>pd.isnull()</code>, <code>pd.notnull()</code> from pandas.</p>
+
+<h2>What is "missing"?</h2>
+<p>In tsb, the following values are considered missing:</p>
+<ul>
+  <li><code>null</code></li>
+  <li><code>undefined</code></li>
+  <li><code>NaN</code> (IEEE 754 not-a-number)</li>
+</ul>
+<p>Everything else — <code>0</code>, <code>false</code>, <code>""</code>, <code>new Date(...)</code> — is <em>not</em> missing.</p>
+
+<h2>API Overview</h2>
+<table>
+  <tr><th>Function</th><th>Input</th><th>Output</th><th>Pandas equivalent</th></tr>
+  <tr class="api-row"><td>isna(v)</td><td>Scalar</td><td>boolean</td><td>pd.isna(v)</td></tr>
+  <tr class="api-row"><td>isna(arr)</td><td>Scalar[]</td><td>boolean[]</td><td>pd.isna(arr)</td></tr>
+  <tr class="api-row"><td>isna(series)</td><td>Series</td><td>Series&lt;boolean&gt;</td><td>pd.isna(series)</td></tr>
+  <tr class="api-row"><td>isna(df)</td><td>DataFrame</td><td>DataFrame</td><td>pd.isna(df)</td></tr>
+  <tr class="api-row"><td>notna(v)</td><td>any of above</td><td>same shape, inverted</td><td>pd.notna(v)</td></tr>
+  <tr class="api-row"><td>isnull / notnull</td><td>any of above</td><td>same as isna/notna</td><td>aliases</td></tr>
+  <tr class="api-row"><td>fillna(obj, {value})</td><td>Scalar/array/Series/DataFrame</td><td>same type, no missing</td><td>pd.Series.fillna()</td></tr>
+  <tr class="api-row"><td>dropna(obj, opts?)</td><td>array/Series/DataFrame</td><td>missing entries removed</td><td>pd.Series.dropna()</td></tr>
+  <tr class="api-row"><td>countna(obj)</td><td>array or Series</td><td>number</td><td>series.isna().sum()</td></tr>
+  <tr class="api-row"><td>countValid(obj)</td><td>array or Series</td><td>number</td><td>series.count()</td></tr>
+</table>
+
+<h2>🔬 Try it: isna on scalars</h2>
+<div class="demo">
+  <label>Test values (comma-separated, use "null", "NaN", "undefined")</label>
+  <input type="text" id="scalar-input" value="1, null, NaN, 0, false, undefined, &quot;hello&quot;" />
+  <button onclick="runScalar()">Run isna</button>
+  <div class="output" id="scalar-output">Click "Run isna" to see results.</div>
+</div>
+
+<h2>🔬 Try it: isna on arrays</h2>
+<div class="demo">
+  <label>Array values (JSON array, use null for missing, "NaN" string for NaN)</label>
+  <textarea id="array-input">[1, null, null, 42, "NaN", "hello", 0, false]</textarea>
+  <button onclick="runArray()">Run isna</button>
+  <div class="output" id="array-output">Click "Run isna" to see results.</div>
+</div>
+
+<h2>🔬 Try it: fillna on arrays</h2>
+<div class="demo">
+  <label>Array (JSON, use null for missing)</label>
+  <textarea id="fillna-input">[1, null, null, 4, 5]</textarea>
+  <label>Fill value</label>
+  <input type="text" id="fillna-value" value="0" style="width:80px; display:inline-block; margin-left:.5rem" />
+  <button onclick="runFillna()">Run fillna</button>
+  <div class="output" id="fillna-output">Click "Run fillna" to see results.</div>
+</div>
+
+<h2>🔬 Try it: dropna on arrays</h2>
+<div class="demo">
+  <label>Array (JSON, use null for missing)</label>
+  <textarea id="dropna-input">[10, null, 20, null, 30]</textarea>
+  <button onclick="runDropna()">Run dropna</button>
+  <div class="output" id="dropna-output">Click "Run dropna" to see results.</div>
+</div>
+
+<h2>📝 Code examples</h2>
+<div class="demo">
+<pre style="margin:0;font-size:.87rem;overflow-x:auto">
+import { isna, notna, isnull, notnull, fillna, dropna, countna, countValid } from "tsb";
+import { Series, DataFrame } from "tsb";
+
+// ── scalar ──────────────────────────────────────────────────
+isna(null);          // true
+isna(undefined);     // true
+isna(NaN);           // true
+isna(0);             // false  — zero is not missing
+isna(false);         // false  — false is not missing
+isna("");            // false  — empty string is not missing
+
+// ── array ───────────────────────────────────────────────────
+isna([1, null, NaN, 3]);     // [false, true, true, false]
+notna([1, null, NaN, 3]);    // [true, false, false, true]
+
+// ── Series ──────────────────────────────────────────────────
+const s = new Series({ data: [1, null, NaN, 4] });
+isna(s).values;   // [false, true, true, false]
+notna(s).values;  // [true, false, false, true]
+
+// ── DataFrame ───────────────────────────────────────────────
+const df = new DataFrame(new Map([
+  ["a", new Series({ data: [1, null, 3] })],
+  ["b", new Series({ data: [NaN, 5, 6] })],
+]));
+isna(df).col("a").values;  // [false, true, false]
+isna(df).col("b").values;  // [true, false, false]
+
+// ── aliases ─────────────────────────────────────────────────
+isnull(null);   // true  (same as isna)
+notnull(42);    // true  (same as notna)
+
+// ── fillna ──────────────────────────────────────────────────
+fillna([1, null, NaN, 4], { value: 0 });   // [1, 0, 0, 4]
+fillna(s, { value: -1 }).values;           // [1, -1, -1, 4]
+fillna(df, { value: 0 }).col("b").values;  // [0, 5, 6]
+
+// ── dropna ──────────────────────────────────────────────────
+dropna([1, null, NaN, 3]);   // [1, 3]
+dropna(s).values;            // [1, 4]
+dropna(df).shape;            // [2, 2]  (row 0 dropped because b[0]=NaN, row 1 dropped because a[1]=null)
+dropna(df, { how: "all" }).shape;         // drops only rows where ALL values are missing
+dropna(df, { axis: 1 }).columns.values;  // drops columns that contain any missing value
+
+// ── countna / countValid ─────────────────────────────────────
+countna([1, null, NaN, 3]);    // 2
+countValid([1, null, NaN, 3]); // 2
+</pre>
+</div>
+
+<script type="module">
+  // Minimal inline implementations for the playground (no bundler)
+
+  function scalarIsna(v) {
+    return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+  }
+
+  function isna(obj) {
+    if (Array.isArray(obj)) return obj.map(scalarIsna);
+    return scalarIsna(obj);
+  }
+
+  function notna(obj) {
+    if (Array.isArray(obj)) return obj.map(v => !scalarIsna(v));
+    return !scalarIsna(obj);
+  }
+
+  function fillna(arr, { value }) {
+    return arr.map(v => scalarIsna(v) ? value : v);
+  }
+
+  function dropna(arr) {
+    return arr.filter(v => !scalarIsna(v));
+  }
+
+  function parseVal(s) {
+    s = s.trim();
+    if (s === "null") return null;
+    if (s === "undefined") return undefined;
+    if (s === "NaN" || s === '"NaN"') return NaN;
+    if (s === "true") return true;
+    if (s === "false") return false;
+    if ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) return s.slice(1,-1);
+    const n = Number(s);
+    return isNaN(n) ? s : n;
+  }
+
+  window.runScalar = function() {
+    try {
+      const raw = document.getElementById("scalar-input").value;
+      const vals = raw.split(",").map(parseVal);
+      const lines = vals.map(v => {
+        const display = v === null ? "null" : v === undefined ? "undefined" : typeof v === "number" && isNaN(v) ? "NaN" : JSON.stringify(v);
+        const r = isna(v);
+        return `isna(${display.padEnd(12)}) → ${r}`;
+      });
+      document.getElementById("scalar-output").textContent = lines.join("\n");
+    } catch(e) { document.getElementById("scalar-output").textContent = "Error: " + e.message; }
+  };
+
+  window.runArray = function() {
+    try {
+      let raw = document.getElementById("array-input").value;
+      // Replace "NaN" strings with actual NaN placeholder
+      const arr = JSON.parse(raw.replace(/"NaN"/g, "null")).map((v, i) => {
+        // Check if original had "NaN"
+        return v;
+      });
+      // Re-parse to handle "NaN" strings
+      const arr2 = JSON.parse(raw).map(v => v === "NaN" ? NaN : v);
+      const naFlags = isna(arr2);
+      const notnaFlags = notna(arr2);
+      const lines = arr2.map((v, i) => {
+        const disp = v === null ? "null" : typeof v === "number" && isNaN(v) ? "NaN" : JSON.stringify(v);
+        return `[${i}] value=${String(disp).padEnd(10)} isna=${String(naFlags[i]).padEnd(6)} notna=${notnaFlags[i]}`;
+      });
+      document.getElementById("array-output").textContent = lines.join("\n") || "(empty)";
+    } catch(e) { document.getElementById("array-output").textContent = "Error: " + e.message; }
+  };
+
+  window.runFillna = function() {
+    try {
+      const raw = document.getElementById("fillna-input").value;
+      const arr = JSON.parse(raw);
+      const valRaw = document.getElementById("fillna-value").value.trim();
+      const value = parseVal(valRaw);
+      const result = fillna(arr, { value });
+      const before = `Before: ${JSON.stringify(arr)}`;
+      const after  = `After:  ${JSON.stringify(result)}`;
+      document.getElementById("fillna-output").textContent = before + "\n" + after;
+    } catch(e) { document.getElementById("fillna-output").textContent = "Error: " + e.message; }
+  };
+
+  window.runDropna = function() {
+    try {
+      const raw = document.getElementById("dropna-input").value;
+      const arr = JSON.parse(raw);
+      const result = dropna(arr);
+      const before = `Before: ${JSON.stringify(arr)}  (${arr.length} elements, ${arr.filter(scalarIsna).length} missing)`;
+      const after  = `After:  ${JSON.stringify(result)}  (${result.length} elements)`;
+      document.getElementById("dropna-output").textContent = before + "\n" + after;
+    } catch(e) { document.getElementById("dropna-output").textContent = "Error: " + e.message; }
+  };
+</script>
+</body>
+</html>
diff --git a/playground/numeric_extended.html b/playground/numeric_extended.html
new file mode 100644
index 00000000..14cc4990
--- /dev/null
+++ b/playground/numeric_extended.html
@@ -0,0 +1,353 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — Numeric Utilities (digitize, histogram, linspace, arange, zscore…)</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1.5rem; line-height: 1.6; color: #1a1a1a; }
+    h1 { font-size: 1.7rem; }
+    h2 { font-size: 1.2rem; margin-top: 2rem; color: #2563eb; }
+    pre { background: #f3f4f6; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: 0.88rem; }
+    code { font-family: "Fira Code", "Cascadia Code", monospace; }
+    .output { background: #ecfdf5; border-left: 3px solid #10b981; padding: 0.75rem 1rem; margin-top: 0.5rem; border-radius: 4px; white-space: pre; font-family: monospace; font-size: 0.87rem; }
+    .section { border-bottom: 1px solid #e5e7eb; padding-bottom: 1.5rem; margin-bottom: 1.5rem; }
+    a { color: #2563eb; }
+  </style>
+</head>
+<body>
+  <h1>🔢 Numeric Utilities</h1>
+  <p>
+    <a href="index.html">← back to index</a>
+  </p>
+  <p>
+    <code>tsb</code> ships numpy/scipy-style numeric utility functions — all implemented
+    from scratch with no external dependencies:
+    <code>digitize</code>, <code>histogram</code>, <code>linspace</code>, <code>arange</code>,
+    <code>percentileOfScore</code>, <code>zscore</code>, <code>minMaxNormalize</code>,
+    <code>coefficientOfVariation</code>.
+  </p>
+
+  <div class="section">
+    <h2>digitize — bin values</h2>
+    <p>
+      Map each value to the index of the bin it falls into. Mirrors <code>numpy.digitize</code>.
+      Indices are 0-based; values below the first edge return <code>-1</code>.
+    </p>
+    <pre><code>import { digitize, seriesDigitize, Series } from "tsb";
+
+// Find which [0,33), [33,66), [66,100] bucket each score belongs to
+const scores = [15, 45, 70, 33, 100];
+const edges  = [33, 66, 100];
+
+const bins = digitize(scores, edges);
+// → [-1, 1, 2, 0, 2]
+// 15 &lt; 33      → bin -1 (below first edge)
+// 45 ∈ [33,66) → bin  1
+// 70 ∈ [66,100)→ bin  2
+// 33 ∈ [33,66) → bin  0 (33 &lt; 66, right=false default)
+// 100 = last   → bin  2
+
+// Series version — preserves index
+const s = new Series({ data: [15, 45, 70], index: ["Alice","Bob","Carol"] });
+seriesDigitize(s, [33, 66, 100]);
+// Series: Alice→-1, Bob→1, Carol→2</code></pre>
+    <div class="output" id="out-digitize">Running…</div>
+  </div>
+
+  <div class="section">
+    <h2>histogram — frequency counts</h2>
+    <p>Count how many values fall in each bin. Mirrors <code>numpy.histogram</code>.</p>
+    <pre><code>import { histogram } from "tsb";
+
+const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+// Default: 10 equal-width bins
+const { counts, binEdges } = histogram(data);
+
+// Custom: 5 bins, density normalised
+const { counts: d, binEdges: e } = histogram(data, { bins: 5, density: true });
+
+// Explicit edges
+histogram(data, { binEdges: [1, 4, 7, 10] });
+// counts: [ 3, 3, 4 ]</code></pre>
+    <div class="output" id="out-histogram">Running…</div>
+  </div>
+
+  <div class="section">
+    <h2>linspace &amp; arange — number sequences</h2>
+    <p>Generate evenly-spaced sequences, mirroring <code>numpy.linspace</code> and <code>numpy.arange</code>.</p>
+    <pre><code>import { linspace, arange } from "tsb";
+
+// 5 values from 0 to 1 (inclusive)
+linspace(0, 1, 5);
+// → [0, 0.25, 0.5, 0.75, 1]
+
+// 0..4
+arange(5);
+// → [0, 1, 2, 3, 4]
+
+// From 2 to 10, step 2
+arange(2, 10, 2);
+// → [2, 4, 6, 8]
+
+// Descending
+arange(5, 0, -1);
+// → [5, 4, 3, 2, 1]</code></pre>
+    <div class="output" id="out-linspace">Running…</div>
+  </div>
+
+  <div class="section">
+    <h2>percentileOfScore — percentile rank</h2>
+    <p>
+      Compute what percentile a given score falls at within a dataset.
+      Mirrors <code>scipy.stats.percentileofscore</code>.
+    </p>
+    <pre><code>import { percentileOfScore } from "tsb";
+
+const grades = [55, 60, 70, 75, 80, 85, 90, 95];
+
+// What percentile is a score of 75?
+percentileOfScore(grades, 75);            // 50 (rank — default)
+percentileOfScore(grades, 75, "weak");    // 50 (≤ 75: 4/8 = 50%)
+percentileOfScore(grades, 75, "strict");  // 37.5 (< 75: 3/8 = 37.5%)</code></pre>
+    <div class="output" id="out-percentile">Running…</div>
+  </div>
+
+  <div class="section">
+    <h2>zscore — standardisation</h2>
+    <p>
+      Transform values to zero mean and unit variance. Mirrors <code>scipy.stats.zscore</code>.
+      Missing values are propagated; zero-variance data returns all <code>NaN</code>.
+    </p>
+    <pre><code>import { zscore, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "values" });
+const z = zscore(s);
+
+// z.values ≈ [-1.5, -0.5, -0.5, -0.5, 0, 0, 1, 2]
+
+// With population std (ddof=0)
+const zPop = zscore(s, { ddof: 0 });</code></pre>
+    <div class="output" id="out-zscore">Running…</div>
+  </div>
+
+  <div class="section">
+    <h2>minMaxNormalize — scale to [0, 1]</h2>
+    <p>
+      Scale all values to the interval <code>[0, 1]</code> (or a custom range).
+      Mirrors <code>sklearn MinMaxScaler</code>.
+    </p>
+    <pre><code>import { minMaxNormalize, Series } from "tsb";
+
+const s = new Series({ data: [0, 25, 50, 75, 100] });
+minMaxNormalize(s).values;
+// → [0, 0.25, 0.5, 0.75, 1]
+
+// Scale to [-1, 1]
+minMaxNormalize(s, { featureRangeMin: -1, featureRangeMax: 1 }).values;
+// → [-1, -0.5, 0, 0.5, 1]</code></pre>
+    <div class="output" id="out-minmax">Running…</div>
+  </div>
+
+  <div class="section">
+    <h2>coefficientOfVariation — relative spread</h2>
+    <p>
+      Dimensionless measure of dispersion: <code>std / |mean|</code>.
+      Useful for comparing spread across datasets with different units.
+    </p>
+    <pre><code>import { coefficientOfVariation, Series } from "tsb";
+
+// Dataset A: [10, 20, 30]  mean=20, std=10  → CV=0.5
+coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+
+// Dataset B: [100, 200, 300]  same shape, higher scale  → CV=0.5
+coefficientOfVariation(new Series({ data: [100, 200, 300] }));
+
+// CV with population std
+coefficientOfVariation(new Series({ data: [1, 2, 3, 4, 5] }), { ddof: 0 });</code></pre>
+    <div class="output" id="out-cv">Running…</div>
+  </div>
+
+  <script type="module">
+    // ── tiny helpers ──────────────────────────────────────────────────────────
+    function show(id, lines) {
+      document.getElementById(id).textContent = lines.join("\n");
+    }
+    function fmtArr(arr) {
+      return "[" + arr.map(v => (typeof v === "number" ? +v.toFixed(4) : v)).join(", ") + "]";
+    }
+
+    // ── inline micro-implementations (no bundler needed) ──────────────────────
+    // digitize
+    function digitize(values, bins, right = false) {
+      return values.map(v => {
+        if (v === null || (typeof v === "number" && isNaN(v))) return NaN;
+        const n = bins.length;
+        if (right) {
+          for (let i = 0; i < n; i++) if (v <= bins[i]) return i - 1;
+          return n - 1;
+        } else {
+          for (let i = 0; i < n; i++) if (v < bins[i]) return i - 1;
+          return n - 1;
+        }
+      });
+    }
+
+    // histogram
+    function histogram(values, opts = {}) {
+      const nums = values.filter(v => typeof v === "number" && !isNaN(v));
+      const nbins = opts.bins ?? 10;
+      const lo = Math.min(...nums);
+      const hi = Math.max(...nums);
+      const edges = [];
+      for (let i = 0; i <= nbins; i++) edges.push(lo + (i / nbins) * (hi - lo));
+      const counts = new Array(nbins).fill(0);
+      for (const v of nums) {
+        if (v === hi) { counts[nbins - 1]++; continue; }
+        let l = 0, r = nbins - 1;
+        while (l < r) { const m = (l + r) >> 1; if (v < edges[m + 1]) r = m; else l = m + 1; }
+        counts[l]++;
+      }
+      return { counts, binEdges: edges };
+    }
+
+    // linspace
+    function linspace(start, stop, num = 50) {
+      if (num === 0) return [];
+      if (num === 1) return [start];
+      const result = [];
+      for (let i = 0; i < num; i++) result.push(i === num - 1 ? stop : start + i * (stop - start) / (num - 1));
+      return result;
+    }
+
+    // arange
+    function arange(startOrStop, stop, step = 1) {
+      let start, s;
+      if (stop === undefined) { start = 0; s = startOrStop; }
+      else { start = startOrStop; s = stop; }
+      const result = [];
+      if (step > 0) for (let v = start; v < s; v = start + result.length * step) result.push(v);
+      else for (let v = start; v > s; v = start + result.length * step) result.push(v);
+      return result;
+    }
+
+    // percentileOfScore
+    function percentileOfScore(arr, score, kind = "rank") {
+      const nums = arr.filter(v => typeof v === "number" && !isNaN(v));
+      const n = nums.length;
+      if (n === 0) return NaN;
+      const weak = nums.filter(v => v <= score).length / n * 100;
+      const strict = nums.filter(v => v < score).length / n * 100;
+      return kind === "weak" ? weak : kind === "strict" ? strict : (weak + strict) / 2;
+    }
+
+    // zscore
+    function zscore(arr) {
+      const nums = arr.filter(v => typeof v === "number" && !isNaN(v));
+      const n = nums.length;
+      if (n < 2) return arr.map(() => NaN);
+      const mean = nums.reduce((a, b) => a + b, 0) / n;
+      const std = Math.sqrt(nums.reduce((a, b) => a + (b - mean) ** 2, 0) / (n - 1));
+      return arr.map(v => (typeof v === "number" && !isNaN(v)) ? (v - mean) / std : v);
+    }
+
+    // minMaxNormalize
+    function minMaxNormalize(arr, rMin = 0, rMax = 1) {
+      const nums = arr.filter(v => typeof v === "number" && !isNaN(v));
+      const lo = Math.min(...nums), hi = Math.max(...nums);
+      const span = hi - lo;
+      return arr.map(v => typeof v === "number" && !isNaN(v) ? span === 0 ? (rMin + rMax) / 2 : ((v - lo) / span) * (rMax - rMin) + rMin : v);
+    }
+
+    // CV
+    function cv(arr) {
+      const nums = arr.filter(v => typeof v === "number" && !isNaN(v));
+      const n = nums.length;
+      const mean = nums.reduce((a, b) => a + b, 0) / n;
+      const std = Math.sqrt(nums.reduce((a, b) => a + (b - mean) ** 2, 0) / (n - 1));
+      return std / Math.abs(mean);
+    }
+
+    // ── demos ─────────────────────────────────────────────────────────────────
+    try {
+      const scores = [15, 45, 70, 33, 100];
+      const edges  = [33, 66, 100];
+      const bins = digitize(scores, edges);
+      show("out-digitize", [
+        `scores = ${JSON.stringify(scores)}`,
+        `edges  = ${JSON.stringify(edges)}`,
+        `bins   = ${JSON.stringify(bins)}`,
+        `  15 < 33  → -1 (below first edge)`,
+        `  45 ∈ [33,66) → 1`,
+        `  70 ∈ [66,100)→ 2`,
+        `  33 ∈ [33,66) → 0`,
+        ` 100 = max   → 2`,
+      ]);
+    } catch (e) { show("out-digitize", ["Error: " + e.message]); }
+
+    try {
+      const data = [1,2,3,4,5,6,7,8,9,10];
+      const { counts, binEdges } = histogram(data, { bins: 5 });
+      const explicitCounts = histogram(data, { bins: 3 }).counts;
+      show("out-histogram", [
+        `data = [1..10]`,
+        `5 bins: counts = ${JSON.stringify(counts)}`,
+        `        edges  = [${binEdges.map(v => +v.toFixed(2)).join(", ")}]`,
+        `3 bins: counts = ${JSON.stringify(explicitCounts)}`,
+      ]);
+    } catch (e) { show("out-histogram", ["Error: " + e.message]); }
+
+    try {
+      show("out-linspace", [
+        `linspace(0, 1, 5) = ${fmtArr(linspace(0, 1, 5))}`,
+        `linspace(0, 10, 3) = ${fmtArr(linspace(0, 10, 3))}`,
+        `arange(5)         = ${fmtArr(arange(5))}`,
+        `arange(2, 10, 2)  = ${fmtArr(arange(2, 10, 2))}`,
+        `arange(5, 0, -1)  = ${fmtArr(arange(5, 0, -1))}`,
+      ]);
+    } catch (e) { show("out-linspace", ["Error: " + e.message]); }
+
+    try {
+      const grades = [55, 60, 70, 75, 80, 85, 90, 95];
+      show("out-percentile", [
+        `grades = ${JSON.stringify(grades)}`,
+        `percentileOfScore(grades, 75) [rank]   = ${percentileOfScore(grades, 75)}`,
+        `percentileOfScore(grades, 75, "weak")  = ${percentileOfScore(grades, 75, "weak")}`,
+        `percentileOfScore(grades, 75, "strict")= ${percentileOfScore(grades, 75, "strict")}`,
+      ]);
+    } catch (e) { show("out-percentile", ["Error: " + e.message]); }
+
+    try {
+      const data = [2, 4, 4, 4, 5, 5, 7, 9];
+      const z = zscore(data);
+      show("out-zscore", [
+        `data = ${JSON.stringify(data)}`,
+        `zscore = ${fmtArr(z)}`,
+        `mean(z) ≈ ${(z.reduce((a, b) => a + b, 0) / z.length).toExponential(2)} (≈ 0)`,
+        `std(z)  ≈ ${+(Math.sqrt(z.reduce((a, b) => a + b ** 2, 0) / (z.length - 1))).toFixed(4)} (≈ 1)`,
+      ]);
+    } catch (e) { show("out-zscore", ["Error: " + e.message]); }
+
+    try {
+      const data = [0, 25, 50, 75, 100];
+      const norm01  = minMaxNormalize(data, 0, 1);
+      const normN1P1 = minMaxNormalize(data, -1, 1);
+      show("out-minmax", [
+        `data      = ${JSON.stringify(data)}`,
+        `[0,1]     = ${fmtArr(norm01)}`,
+        `[-1,1]    = ${fmtArr(normN1P1)}`,
+      ]);
+    } catch (e) { show("out-minmax", ["Error: " + e.message]); }
+
+    try {
+      show("out-cv", [
+        `CV([10,20,30])      = ${+cv([10,20,30]).toFixed(4)}  (expect 0.5)`,
+        `CV([100,200,300])   = ${+cv([100,200,300]).toFixed(4)} (expect 0.5)`,
+        `CV([1,2,3,4,5])     = ${+cv([1,2,3,4,5]).toFixed(4)}`,
+        `CV([1,1,1]) std=0   = 0`,
+      ]);
+    } catch (e) { show("out-cv", ["Error: " + e.message]); }
+  </script>
+</body>
+</html>
diff --git a/playground/pipe_apply.html b/playground/pipe_apply.html
new file mode 100644
index 00000000..25f10a21
--- /dev/null
+++ b/playground/pipe_apply.html
@@ -0,0 +1,276 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — pipe_apply: functional pipeline &amp; apply utilities</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; }
+    h1 { color: #0066cc; }
+    h2 { color: #333; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+    pre { background: #f6f8fa; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: 0.9em; }
+    code { font-family: 'Fira Code', 'Cascadia Code', monospace; }
+    .note { background: #fffbea; border-left: 4px solid #f5a623; padding: 0.7rem 1rem; border-radius: 0 6px 6px 0; margin: 1rem 0; }
+    table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem 0.75rem; text-align: left; }
+    th { background: #f0f4f8; }
+    a { color: #0066cc; }
+    .api-table td:first-child { font-family: monospace; white-space: nowrap; }
+  </style>
+</head>
+<body>
+  <p><a href="index.html">← tsb playground</a></p>
+
+  <h1><code>pipe_apply</code> — Functional Pipeline &amp; Apply Utilities</h1>
+  <p>
+    Standalone equivalents of the pandas
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pipe.html" target="_blank"><code>DataFrame.pipe()</code></a>
+    /
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.pipe.html" target="_blank"><code>Series.pipe()</code></a>
+    chaining pattern plus various
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html" target="_blank"><code>apply()</code></a>
+    /
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.applymap.html" target="_blank"><code>applymap()</code></a>
+    operations — usable without method-call syntax.
+  </p>
+
+  <div class="note">
+    <strong>Why standalone?</strong>  pandas chains operations via methods:
+    <code>df.pipe(fn1).pipe(fn2)</code>.  tsb provides a module-level
+    <code>pipe(value, fn1, fn2, …)</code> that works on <em>any</em> value,
+    not just DataFrames.  All functions are <strong>pure</strong> — inputs are never mutated.
+  </div>
+
+  <h2>API Summary</h2>
+  <table class="api-table">
+    <thead>
+      <tr><th>Function</th><th>Pandas equivalent</th><th>Description</th></tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td>pipe(value, fn1, fn2, …)</td>
+        <td>df.pipe(fn).pipe(fn2)</td>
+        <td>Variadic type-safe pipeline — passes value through fns left-to-right</td>
+      </tr>
+      <tr>
+        <td>seriesApply(s, fn)</td>
+        <td>s.apply(fn)</td>
+        <td>Element-wise; fn receives (value, label, position)</td>
+      </tr>
+      <tr>
+        <td>seriesTransform(s, fn)</td>
+        <td>s.transform(fn)</td>
+        <td>Element-wise scalar→scalar; simpler than seriesApply</td>
+      </tr>
+      <tr>
+        <td>dataFrameApply(df, fn, axis?)</td>
+        <td>df.apply(fn, axis=0|1)</td>
+        <td>Apply fn to each column (axis=0) or row (axis=1) → Series of results</td>
+      </tr>
+      <tr>
+        <td>dataFrameApplyMap(df, fn)</td>
+        <td>df.applymap(fn) / df.map(fn)</td>
+        <td>Apply fn to every cell; fn receives (value, rowLabel, colName)</td>
+      </tr>
+      <tr>
+        <td>dataFrameTransform(df, fn)</td>
+        <td>df.transform(fn)</td>
+        <td>Replace each column with fn(col) — must return same-length Series</td>
+      </tr>
+      <tr>
+        <td>dataFrameTransformRows(df, fn)</td>
+        <td>df.apply(fn, axis=1, result_type='expand')</td>
+        <td>Replace each row with fn(rowRecord) — partial updates allowed</td>
+      </tr>
+    </tbody>
+  </table>
+
+  <h2>pipe — functional pipeline</h2>
+
+  <pre><code>import { pipe } from "tsb";
+import { DataFrame } from "tsb";
+
+// Type-safe pipeline with up to 8 steps (return type inferred at each step)
+const result = pipe(
+  rawData,
+  (df) => df.dropna(),                                  // DataFrame → DataFrame
+  (df) => df.assign({ z: df.col("x").add(df.col("y")).values }), // DataFrame → DataFrame
+  (df) => df.head(10),                                  // DataFrame → DataFrame
+  (df) => df.sum(),                                     // DataFrame → Series
+);
+
+// Works on any value — not just DataFrames
+const n = pipe(
+  3,
+  (x) => x + 1,   // 4
+  (x) => x * x,   // 16
+  (x) => x - 1,   // 15
+);
+// n === 15</code></pre>
+
+  <h2>seriesApply — element-wise apply</h2>
+
+  <pre><code>import { seriesApply, seriesTransform } from "tsb";
+import { Series } from "tsb";
+
+const temps = new Series({ data: [22.1, 23.5, null, 21.8], name: "temp_C" });
+
+// Element-wise with (value, label, position) context
+const fahrenheit = seriesApply(temps, (v) => v === null ? null : (v as number) * 9/5 + 32);
+// [71.78, 74.3, null, 71.24]
+
+// Simple scalar transform (no label/position needed)
+const rounded = seriesTransform(temps, (v) => v === null ? null : Math.round(v as number));
+// [22, 24, null, 22]
+
+// Using position to build cumulative logic
+const withPos = seriesApply(
+  new Series({ data: [10, 20, 30] }),
+  (v, _label, pos) => (v as number) + pos * 100,
+);
+// [10, 120, 230]</code></pre>
+
+  <h2>dataFrameApply — column/row aggregation</h2>
+
+  <pre><code>import { dataFrameApply } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  score: [85, 92, 78, 95],
+  weight: [1.0, 1.2, 0.8, 1.5],
+});
+
+// axis=0 (default): apply fn to each column → Series indexed by column names
+const colMax = dataFrameApply(df, (col) => col.max() ?? null);
+// colMax.at("score")  === 95
+// colMax.at("weight") === 1.5
+
+// axis=1: apply fn to each row → Series indexed by row labels
+const weightedScore = dataFrameApply(
+  df,
+  (row) => (row.at("score") as number) * (row.at("weight") as number),
+  1,
+);
+// [85, 110.4, 62.4, 142.5]</code></pre>
+
+  <h2>dataFrameApplyMap — element-wise cell transform</h2>
+
+  <pre><code>import { dataFrameApplyMap } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, -2, 3],
+  b: [-4, 5, -6],
+});
+
+// Zero out all negative values (like pandas df.applymap(lambda x: max(x, 0)))
+const clipped = dataFrameApplyMap(df, (v) => {
+  return typeof v === "number" && v < 0 ? 0 : v;
+});
+// a: [1, 0, 3]
+// b: [0, 5, 0]
+
+// fn receives full context: (value, rowLabel, colName)
+const tagged = dataFrameApplyMap(df, (v, row, col) => `${col}[${row}]=${v}`);
+// a: ["a[0]=1", "a[1]=-2", "a[2]=3"]
+// b: ["b[0]=-4", "b[1]=5", "b[2]=-6"]</code></pre>
+
+  <h2>dataFrameTransform — column-wise transform</h2>
+
+  <pre><code>import { dataFrameTransform, seriesTransform } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  x: [1, 2, 3, 4, 5],
+  y: [10, 20, 30, 40, 50],
+});
+
+// Z-score normalize each column
+const normalized = dataFrameTransform(df, (col) => {
+  const mu = col.mean();
+  const sd = col.std();
+  return seriesTransform(col, (v) =>
+    typeof v === "number" && sd > 0 ? (v - mu) / sd : v
+  );
+});
+
+// Bin each column into quartiles
+const binned = dataFrameTransform(df, (col) => {
+  const q1 = col.quantile(0.25);
+  const q2 = col.quantile(0.5);
+  const q3 = col.quantile(0.75);
+  return seriesTransform(col, (v) => {
+    const n = v as number;
+    if (n <= q1) return "Q1";
+    if (n <= q2) return "Q2";
+    if (n <= q3) return "Q3";
+    return "Q4";
+  });
+});</code></pre>
+
+  <h2>dataFrameTransformRows — row-wise transform</h2>
+
+  <pre><code>import { dataFrameTransformRows } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  first: ["alice", "bob", "carol"],
+  last:  ["smith", "jones", "white"],
+  score: [88, 75, 92],
+});
+
+// Normalise scores relative to the row's position (illustrative)
+const updated = dataFrameTransformRows(df, (row, _label, pos) => ({
+  // Only return keys you want to change — others are preserved as-is
+  score: (row["score"] as number) + pos,
+}));
+// scores become [88, 76, 94]
+// first and last columns are unchanged
+
+// Full row transformation (compute full name)
+const withFull = dataFrameTransformRows(df, (row) => ({
+  first: row["first"],
+  last:  row["last"],
+  score: row["score"],
+  full:  `${row["first"]} ${row["last"]}`,
+}));</code></pre>
+
+  <h2>Combining pipe + apply</h2>
+
+  <pre><code>import { pipe, dataFrameApplyMap, dataFrameTransform, seriesTransform } from "tsb";
+import { DataFrame } from "tsb";
+
+const raw = DataFrame.fromColumns({
+  price:    [9.99, -1, 24.5, null, 49.0],
+  quantity: [3, 5, null, 2, 1],
+});
+
+// Clean → impute → normalise in one readable pipeline
+const clean = pipe(
+  raw,
+  // 1. zero out invalid prices/quantities
+  (df) => dataFrameApplyMap(df, (v) =>
+    v === null || (typeof v === "number" && v &lt; 0) ? 0 : v
+  ),
+  // 2. add derived revenue column
+  (df) => df.assign({
+    revenue: df.col("price").mul(df.col("quantity")).values,
+  }),
+  // 3. round everything to 2 dp
+  (df) => dataFrameTransform(df, (col) =>
+    seriesTransform(col, (v) =>
+      typeof v === "number" ? Math.round(v * 100) / 100 : v
+    )
+  ),
+);</code></pre>
+
+  <hr />
+  <p>
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pipe.html" target="_blank">pandas DataFrame.pipe docs</a>
+    ·
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html" target="_blank">pandas DataFrame.apply docs</a>
+    ·
+    <a href="https://github.com/githubnext/tsessebe" target="_blank">tsb on GitHub</a>
+  </p>
+</body>
+</html>
diff --git a/playground/playground-runtime.js b/playground/playground-runtime.js
index 8a6afbd5..32039ec6 100644
--- a/playground/playground-runtime.js
+++ b/playground/playground-runtime.js
@@ -202,6 +202,104 @@ function setEditorCode(editor, code) {
   }
 }
 
+// ── Syntax highlighting ────────────────────────────────────────────
+
+function escapeHtml(text) {
+  return text
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;");
+}
+
+var HL_KEYWORDS =
+  "import|from|export|default|const|let|var|function|return|if|else|for|" +
+  "while|do|switch|case|break|continue|throw|try|catch|finally|new|class|" +
+  "extends|implements|interface|type|enum|as|typeof|instanceof|in|of|void|" +
+  "async|await|yield|static|get|set|super|delete|namespace";
+var HL_LITERALS = "true|false|null|undefined|this|NaN|Infinity";
+var HL_LITERALS_SET = {};
+HL_LITERALS.split("|").forEach(function (w) {
+  HL_LITERALS_SET[w] = true;
+});
+
+var HL_REGEX = new RegExp(
+  "(" +
+    "\\/\\/[^\\n]*" +
+    "|\\/\\*[\\s\\S]*?\\*\\/" +
+    '|"(?:[^"\\\\]|\\\\.)*"' +
+    "|'(?:[^'\\\\]|\\\\.)*'" +
+    "|`(?:[^`\\\\]|\\\\.)*`" +
+    "|\\b(?:" +
+    HL_KEYWORDS +
+    ")\\b" +
+    "|\\b(?:" +
+    HL_LITERALS +
+    ")\\b" +
+    "|\\b\\d+(?:\\.\\d+)?(?:e[+-]?\\d+)?\\b" +
+    ")",
+  "g",
+);
+
+function highlightCode(code) {
+  var html = "";
+  var lastIndex = 0;
+  HL_REGEX.lastIndex = 0;
+  var match;
+  while ((match = HL_REGEX.exec(code)) !== null) {
+    if (match.index > lastIndex) {
+      html += escapeHtml(code.slice(lastIndex, match.index));
+    }
+    var token = match[0];
+    var cls;
+    var ch = token.charAt(0);
+    if (ch === "/" && (token.charAt(1) === "/" || token.charAt(1) === "*")) {
+      cls = "hl-comment";
+    } else if (ch === '"' || ch === "'" || ch === "`") {
+      cls = "hl-string";
+    } else if (/^\d/.test(token)) {
+      cls = "hl-number";
+    } else if (HL_LITERALS_SET[token]) {
+      cls = "hl-literal";
+    } else {
+      cls = "hl-keyword";
+    }
+    html += '<span class="' + cls + '">' + escapeHtml(token) + "</span>";
+    lastIndex = HL_REGEX.lastIndex;
+  }
+  if (lastIndex < code.length) {
+    html += escapeHtml(code.slice(lastIndex));
+  }
+  return html;
+}
+
+function injectHighlightStyles() {
+  var style = document.createElement("style");
+  style.textContent =
+    ".editor-wrapper{position:relative}" +
+    ".editor-highlight{" +
+    "position:absolute;top:0;left:0;right:0;bottom:0;" +
+    "margin:0;pointer-events:none;z-index:1;" +
+    "background:transparent;" +
+    "border:1px solid transparent;border-top:none;border-bottom:none;" +
+    "border-radius:0;" +
+    "padding:1rem;" +
+    "font-family:var(--font-mono,'Cascadia Code','Fira Code','JetBrains Mono',monospace);" +
+    "font-size:0.875rem;line-height:1.55;" +
+    "white-space:pre;tab-size:2;overflow:hidden;" +
+    "color:#e6edf3;" +
+    "}" +
+    ".playground-editor.editor-transparent{" +
+    "color:transparent!important;" +
+    "caret-color:#e6edf3;" +
+    "}" +
+    ".hl-keyword{color:#ff7b72}" +
+    ".hl-string{color:#a5d6ff}" +
+    ".hl-comment{color:#8b949e;font-style:italic}" +
+    ".hl-number{color:#79c0ff}" +
+    ".hl-literal{color:#79c0ff}";
+  document.head.appendChild(style);
+}
+
 // ── Playground block setup ─────────────────────────────────────────
 
 function setupBlock(block, ts) {
@@ -214,6 +312,20 @@ function setupBlock(block, ts) {
 
   var originalCode = getEditorCode(editor);
 
+  // Normalize: convert contenteditable <pre> to <textarea>
+  if (!isTextarea(editor)) {
+    var ta = document.createElement("textarea");
+    ta.className = editor.className;
+    ta.value = originalCode;
+    ta.spellcheck = false;
+    ta.setAttribute("autocomplete", "off");
+    ta.setAttribute("autocorrect", "off");
+    ta.setAttribute("autocapitalize", "off");
+    editor.parentNode.replaceChild(ta, editor);
+    editor = ta;
+  }
+  editor.spellcheck = false;
+
   // ── Python tab setup ───────────────────────────────
   if (pythonSource) {
     var pythonCode = getEditorCode(pythonSource);
@@ -250,11 +362,16 @@ function setupBlock(block, ts) {
     // Insert Python view after editor
     editor.parentNode.insertBefore(pythonView, editor.nextSibling);
 
-    // Tab switching
+    // Tab switching (use wrapper to also hide/show the highlight overlay)
     tsTab.addEventListener("click", function () {
       tsTab.classList.add("active");
       pyTab.classList.remove("active");
-      editor.style.display = "";
+      var editorWrapper = editor.closest(".editor-wrapper");
+      if (editorWrapper) {
+        editorWrapper.style.display = "";
+      } else {
+        editor.style.display = "";
+      }
       pythonView.classList.remove("active");
       runBtn.style.display = "";
       if (resetBtn) resetBtn.style.display = "";
@@ -263,18 +380,49 @@ function setupBlock(block, ts) {
     pyTab.addEventListener("click", function () {
       pyTab.classList.add("active");
       tsTab.classList.remove("active");
-      editor.style.display = "none";
+      var editorWrapper = editor.closest(".editor-wrapper");
+      if (editorWrapper) {
+        editorWrapper.style.display = "none";
+      } else {
+        editor.style.display = "none";
+      }
       pythonView.classList.add("active");
       runBtn.style.display = "none";
       if (resetBtn) resetBtn.style.display = "none";
     });
   }
 
+  // Wrap editor in a container and add a highlight overlay
+  var wrapper = document.createElement("div");
+  wrapper.className = "editor-wrapper";
+  editor.parentNode.insertBefore(wrapper, editor);
+  wrapper.appendChild(editor);
+
+  var highlightPre = document.createElement("pre");
+  highlightPre.className = "editor-highlight";
+  highlightPre.setAttribute("aria-hidden", "true");
+  wrapper.appendChild(highlightPre);
+
+  editor.classList.add("editor-transparent");
+
+  function syncHighlight() {
+    highlightPre.innerHTML = highlightCode(editor.value) + "\n";
+  }
+
+  function syncScroll() {
+    highlightPre.scrollTop = editor.scrollTop;
+    highlightPre.scrollLeft = editor.scrollLeft;
+  }
+
+  editor.addEventListener("input", syncHighlight);
+  editor.addEventListener("scroll", syncScroll);
+  syncHighlight();
+
   // ── Auto-resize textarea to fit content ────────────
   function autoResize() {
-    if (!isTextarea(editor)) return;
     editor.style.height = "auto";
     editor.style.height = editor.scrollHeight + 2 + "px";
+    syncScroll();
   }
   editor.addEventListener("input", autoResize);
   autoResize();
@@ -283,17 +431,12 @@ function setupBlock(block, ts) {
   editor.addEventListener("keydown", function (e) {
     if (e.key === "Tab") {
       e.preventDefault();
-      if (isTextarea(editor)) {
-        var start = editor.selectionStart;
-        var end = editor.selectionEnd;
-        editor.value =
-          editor.value.substring(0, start) +
-          "  " +
-          editor.value.substring(end);
-        editor.selectionStart = editor.selectionEnd = start + 2;
-      } else {
-        document.execCommand("insertText", false, "  ");
-      }
+      var start = editor.selectionStart;
+      var end = editor.selectionEnd;
+      editor.value =
+        editor.value.substring(0, start) + "  " + editor.value.substring(end);
+      editor.selectionStart = editor.selectionEnd = start + 2;
+      syncHighlight();
     }
     // Ctrl+Enter or Cmd+Enter runs the code
     if (e.key === "Enter" && (e.ctrlKey || e.metaKey)) {
@@ -316,7 +459,7 @@ function setupBlock(block, ts) {
     output.classList.add("active");
     timingEl.innerHTML = "";
     try {
-      var code = getEditorCode(editor);
+      var code = editor.value;
       var startTime = performance.now();
       var js = transformCode(code, ts);
       var result = executeCode(js);
@@ -336,9 +479,10 @@ function setupBlock(block, ts) {
   // Reset button handler
   if (resetBtn) {
     resetBtn.addEventListener("click", function () {
-      setEditorCode(editor, originalCode);
+      editor.value = originalCode;
       output.textContent = "";
       output.classList.remove("error", "active");
+      syncHighlight();
       timingEl.innerHTML = "";
       autoResize();
     });
@@ -364,6 +508,7 @@ async function initPlayground() {
     var ts = await loadTypeScript();
 
     setStatus("Initializing editors\u2026");
+    injectHighlightStyles();
 
     // Initialize all playground blocks on the page
     var blocks = document.querySelectorAll(".playground-block");
diff --git a/playground/rolling_apply.html b/playground/rolling_apply.html
new file mode 100644
index 00000000..b307cdbd
--- /dev/null
+++ b/playground/rolling_apply.html
@@ -0,0 +1,225 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>tsb — Rolling Apply &amp; Multi-Aggregation</title>
+    <style>
+      body {
+        font-family: system-ui, sans-serif;
+        max-width: 860px;
+        margin: 2rem auto;
+        padding: 0 1rem;
+        line-height: 1.6;
+        color: #1a1a1a;
+      }
+      h1 { color: #0d47a1; }
+      h2 { color: #1565c0; border-bottom: 2px solid #e3f2fd; padding-bottom: 0.25rem; }
+      pre {
+        background: #f5f5f5;
+        border-left: 4px solid #0d47a1;
+        padding: 1rem;
+        overflow-x: auto;
+        border-radius: 4px;
+      }
+      code { font-family: "Fira Code", "Cascadia Code", monospace; font-size: 0.9em; }
+      .demo {
+        background: #e8f5e9;
+        border: 1px solid #a5d6a7;
+        border-radius: 6px;
+        padding: 1rem 1.25rem;
+        margin: 1rem 0;
+      }
+      .demo h3 { margin-top: 0; color: #2e7d32; }
+      table { border-collapse: collapse; width: 100%; margin: 0.5rem 0; }
+      th, td { border: 1px solid #ccc; padding: 0.4rem 0.75rem; text-align: left; }
+      th { background: #e3f2fd; }
+      .note { background: #fff9c4; border: 1px solid #f9a825; border-radius: 4px; padding: 0.75rem 1rem; }
+      a { color: #0d47a1; }
+    </style>
+  </head>
+  <body>
+    <h1>tsb — Rolling Apply &amp; Multi-Aggregation</h1>
+    <p>
+      Standalone functions for applying custom aggregation logic over sliding
+      windows, mirroring
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.core.window.rolling.Rolling.apply.html">
+        <code>pandas.Series.rolling().apply()</code>
+      </a>
+      and
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.core.window.rolling.Rolling.agg.html">
+        <code>Rolling.agg()</code>
+      </a>.
+    </p>
+
+    <h2>1. <code>rollingApply</code> — Custom Function Per Window</h2>
+    <p>
+      Apply any aggregation function to each rolling window.  The function
+      receives the <strong>valid (non-null, non-NaN) numeric values</strong>
+      in the window and must return a single number.
+    </p>
+    <pre><code>import { rollingApply } from "tsb";
+
+const prices = new Series({ data: [10, 12, 11, 15, 14, 16], name: "price" });
+
+// Custom: range (max - min) over each 3-day window
+const range = (w) =&gt; Math.max(...w) - Math.min(...w);
+
+rollingApply(prices, 3, range).toArray();
+// [null, null, 2, 4, 4, 5]
+//  ↑↑ insufficient data (need 3 observations)</code></pre>
+
+    <div class="demo">
+      <h3>Options</h3>
+      <table>
+        <thead>
+          <tr><th>Option</th><th>Default</th><th>Description</th></tr>
+        </thead>
+        <tbody>
+          <tr><td><code>minPeriods</code></td><td><code>window</code></td><td>Minimum valid observations to compute (null otherwise)</td></tr>
+          <tr><td><code>center</code></td><td><code>false</code></td><td>Centre the window (symmetric) instead of trailing</td></tr>
+          <tr><td><code>raw</code></td><td><code>false</code></td><td>Pass full window including nulls (filtered to valid nums before fn call)</td></tr>
+        </tbody>
+      </table>
+    </div>
+
+    <pre><code>// minPeriods=1 → start computing from the very first position
+rollingApply(prices, 3, range, { minPeriods: 1 }).toArray();
+// [0, 2, 2, 4, 4, 5]
+
+// center=true → symmetric window around each point
+rollingApply(prices, 3, range, { center: true }).toArray();
+// [null, 2, 4, 4, 5, null]</code></pre>
+
+    <h2>2. <code>rollingAgg</code> — Multiple Aggregations at Once</h2>
+    <p>
+      Apply several named aggregation functions in a single pass over a Series,
+      returning a <code>DataFrame</code> where each column holds one
+      aggregation result.
+    </p>
+    <pre><code>import { rollingAgg } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8] });
+
+const result = rollingAgg(s, 3, {
+  mean: (w) =&gt; w.reduce((a, b) =&gt; a + b, 0) / w.length,
+  max:  (w) =&gt; Math.max(...w),
+  min:  (w) =&gt; Math.min(...w),
+  range:(w) =&gt; Math.max(...w) - Math.min(...w),
+});
+
+// result is a DataFrame with columns: "mean", "max", "min", "range"
+// result.col("mean").toArray() → [null, null, 2, 3, 4, 5, 6, 7]
+// result.col("range").toArray() → [null, null, 2, 2, 2, 2, 2, 2]</code></pre>
+
+    <div class="note">
+      <strong>Pandas equivalent:</strong><br />
+      <code>s.rolling(3).agg({"mean": np.mean, "max": np.max, "min": np.min})</code>
+    </div>
+
+    <h2>3. <code>dataFrameRollingApply</code> — Apply Per Column</h2>
+    <p>
+      Apply a single custom function independently to each column of a
+      DataFrame, returning a new DataFrame of the same shape.
+    </p>
+    <pre><code>import { dataFrameRollingApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+  open:  [100, 102, 101, 105, 103],
+  close: [101, 103, 100, 106, 104],
+});
+
+// Pairwise range within each 2-step window per column
+const range = (w) =&gt; Math.max(...w) - Math.min(...w);
+
+dataFrameRollingApply(df, 2, range);
+//   open  close
+// 0 null  null
+// 1    2     2
+// 2    1     3
+// 3    4     6
+// 4    2     2</code></pre>
+
+    <h2>4. <code>dataFrameRollingAgg</code> — Multi-Agg Per Column</h2>
+    <p>
+      Apply multiple named aggregation functions to every column of a
+      DataFrame.  The result has columns named
+      <code>{originalColumn}_{aggName}</code>.
+    </p>
+    <pre><code>import { dataFrameRollingAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+  A: [1, 2, 3, 4, 5],
+  B: [10, 20, 30, 40, 50],
+});
+
+const out = dataFrameRollingAgg(df, 3, {
+  sum:  (w) =&gt; w.reduce((a, b) =&gt; a + b, 0),
+  mean: (w) =&gt; w.reduce((a, b) =&gt; a + b, 0) / w.length,
+});
+
+// Columns: "A_sum", "A_mean", "B_sum", "B_mean"
+// A_sum:  [null, null, 6, 9, 12]
+// A_mean: [null, null, 2, 3,  4]
+// B_sum:  [null, null, 60, 90, 120]
+// B_mean: [null, null, 20, 30,  40]</code></pre>
+
+    <h2>Comparison with pandas</h2>
+    <table>
+      <thead>
+        <tr><th>tsb</th><th>pandas</th></tr>
+      </thead>
+      <tbody>
+        <tr>
+          <td><code>rollingApply(s, w, fn)</code></td>
+          <td><code>s.rolling(w).apply(fn, raw=True)</code></td>
+        </tr>
+        <tr>
+          <td><code>rollingApply(s, w, fn, {minPeriods:1})</code></td>
+          <td><code>s.rolling(w, min_periods=1).apply(fn)</code></td>
+        </tr>
+        <tr>
+          <td><code>rollingAgg(s, w, {f1, f2})</code></td>
+          <td><code>s.rolling(w).agg({"f1": f1, "f2": f2})</code></td>
+        </tr>
+        <tr>
+          <td><code>dataFrameRollingApply(df, w, fn)</code></td>
+          <td><code>df.rolling(w).apply(fn)</code></td>
+        </tr>
+        <tr>
+          <td><code>dataFrameRollingAgg(df, w, {f1, f2})</code></td>
+          <td><code>df.rolling(w).agg({"f1": f1, "f2": f2})</code></td>
+        </tr>
+      </tbody>
+    </table>
+
+    <h2>Use case: Bollinger Band width</h2>
+    <pre><code>import { rollingAgg } from "tsb";
+
+// Bollinger Band width = (upper - lower) / middle
+// where upper = mean + 2·std, lower = mean - 2·std
+const prices = new Series({
+  data: [20, 21, 22, 20, 19, 21, 23, 24, 22, 21],
+  name: "price",
+});
+
+const stats = rollingAgg(prices, 5, {
+  mean: (w) =&gt; w.reduce((a, b) =&gt; a + b, 0) / w.length,
+  std: (w) =&gt; {
+    const m = w.reduce((a, b) =&gt; a + b, 0) / w.length;
+    return Math.sqrt(w.reduce((a, b) =&gt; a + (b - m) ** 2, 0) / (w.length - 1));
+  },
+});
+
+// Bollinger Band width = 4 * std / mean
+const bw = stats.col("std").toArray().map((std, i) =&gt; {
+  const mean = stats.col("mean").toArray()[i];
+  if (std === null || mean === null || mean === 0) return null;
+  return (4 * (std as number)) / (mean as number);
+});</code></pre>
+
+    <p>
+      <a href="index.html">← Back to tsb playground index</a>
+    </p>
+  </body>
+</html>
diff --git a/playground/string_ops.html b/playground/string_ops.html
new file mode 100644
index 00000000..5d9fff07
--- /dev/null
+++ b/playground/string_ops.html
@@ -0,0 +1,282 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — String Operations</title>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, sans-serif; background: #0d1117; color: #e6edf3; line-height: 1.6; }
+    header { background: #161b22; border-bottom: 1px solid #30363d; padding: 1rem 2rem; display: flex; align-items: center; gap: 1rem; }
+    header h1 { font-size: 1.4rem; font-weight: 700; }
+    header .badge { background: #388bfd22; color: #58a6ff; border: 1px solid #388bfd55; border-radius: 2rem; padding: 0.2rem 0.8rem; font-size: 0.75rem; }
+    main { max-width: 900px; margin: 2rem auto; padding: 0 2rem; }
+    h2 { font-size: 1.1rem; color: #58a6ff; margin: 2rem 0 0.5rem; }
+    p { color: #8b949e; margin-bottom: 0.75rem; font-size: 0.95rem; }
+    .section { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.25rem; margin-bottom: 1.5rem; }
+    .section h3 { font-size: 1rem; margin-bottom: 0.5rem; }
+    .row { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-top: 0.75rem; }
+    label { display: block; font-size: 0.8rem; color: #8b949e; margin-bottom: 0.25rem; }
+    input, select, textarea { width: 100%; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 0.5rem 0.75rem; color: #e6edf3; font-size: 0.9rem; font-family: 'JetBrains Mono', 'Fira Code', monospace; }
+    textarea { resize: vertical; min-height: 80px; }
+    button { background: #238636; border: none; border-radius: 6px; color: #fff; cursor: pointer; font-size: 0.9rem; padding: 0.5rem 1.25rem; margin-top: 0.75rem; }
+    button:hover { background: #2ea043; }
+    pre { background: #0d1117; border: 1px solid #21262d; border-radius: 6px; padding: 0.75rem 1rem; overflow-x: auto; font-size: 0.85rem; white-space: pre-wrap; margin-top: 0.5rem; color: #7ee787; font-family: 'JetBrains Mono', 'Fira Code', monospace; }
+    .error { color: #f85149; }
+    .info { font-size: 0.82rem; color: #8b949e; margin-top: 0.4rem; }
+    table { border-collapse: collapse; width: 100%; font-size: 0.85rem; }
+    th, td { border: 1px solid #30363d; padding: 0.35rem 0.75rem; text-align: left; }
+    th { background: #21262d; color: #8b949e; }
+  </style>
+</head>
+<body>
+<header>
+  <h1>tsb</h1>
+  <span class="badge">string_ops</span>
+  <span style="color:#8b949e; font-size:0.9rem;">Standalone string operations for Series and arrays</span>
+</header>
+<main>
+  <p>
+    <strong>string_ops</strong> provides module-level string functions that complement the
+    <code>Series.str</code> accessor. All functions accept a <code>Series</code>, a
+    <code>string[]</code>, or a scalar <code>string</code>.
+  </p>
+
+  <!-- strNormalize -->
+  <div class="section">
+    <h3>strNormalize — Unicode normalisation</h3>
+    <p>Normalise every element to NFC, NFD, NFKC, or NFKD. Useful when mixing text
+       from different sources (e.g. macOS NFD vs Windows NFC).</p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="norm-input">e&#x0301;
+cafe&#x0301;
+ﬁle</textarea>
+      </div>
+      <div>
+        <label>Normalization form</label>
+        <select id="norm-form">
+          <option value="NFC">NFC (compose)</option>
+          <option value="NFD">NFD (decompose)</option>
+          <option value="NFKC">NFKC (compat compose)</option>
+          <option value="NFKD">NFKD (compat decompose)</option>
+        </select>
+      </div>
+    </div>
+    <button onclick="runNormalize()">Run</button>
+    <pre id="norm-out"></pre>
+  </div>
+
+  <!-- strGetDummies -->
+  <div class="section">
+    <h3>strGetDummies — one-hot encode by delimiter</h3>
+    <p>Split each string by a delimiter and produce a binary indicator DataFrame —
+       one column per unique token. Equivalent to <code>pandas.Series.str.get_dummies()</code>.</p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="dum-input">a|b
+b|c
+a
+a|b|c</textarea>
+      </div>
+      <div>
+        <label>Separator</label>
+        <input id="dum-sep" value="|" />
+        <label style="margin-top:0.75rem">Column prefix (optional)</label>
+        <input id="dum-prefix" value="" placeholder="e.g. tag" />
+      </div>
+    </div>
+    <button onclick="runGetDummies()">Run</button>
+    <pre id="dum-out"></pre>
+  </div>
+
+  <!-- strExtractAll -->
+  <div class="section">
+    <h3>strExtractAll — extract all regex matches</h3>
+    <p>Find every non-overlapping regex match in each element. Returns a JSON-encoded
+       array of match arrays per element — parse with <code>JSON.parse</code>.</p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="ext-input">abc 123 def 456
+foo bar
+hello 99 world 42</textarea>
+      </div>
+      <div>
+        <label>Pattern (regex, no slashes)</label>
+        <input id="ext-pat" value="\d+" />
+        <label style="margin-top:0.75rem">Flags</label>
+        <input id="ext-flags" value="" placeholder="e.g. i" />
+      </div>
+    </div>
+    <button onclick="runExtractAll()">Run</button>
+    <pre id="ext-out"></pre>
+  </div>
+
+  <!-- strRemovePrefix / strRemoveSuffix -->
+  <div class="section">
+    <h3>strRemovePrefix / strRemoveSuffix</h3>
+    <p>Strip a leading or trailing string from elements only when it is present.</p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="affix-input">pre_alpha
+pre_beta
+gamma</textarea>
+      </div>
+      <div>
+        <label>Prefix to remove</label>
+        <input id="affix-prefix" value="pre_" />
+        <label style="margin-top:0.75rem">Suffix to remove</label>
+        <input id="affix-suffix" value="" placeholder="_end" />
+      </div>
+    </div>
+    <button onclick="runAffix()">Run</button>
+    <pre id="affix-out"></pre>
+  </div>
+
+  <!-- strTranslate -->
+  <div class="section">
+    <h3>strTranslate — character-level substitution</h3>
+    <p>Replace or delete individual characters using a lookup table.
+       Format: one mapping per line as <code>from=to</code> or <code>from=</code>
+       to delete.</p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="tr-input">hello world
+aeiou</textarea>
+      </div>
+      <div>
+        <label>Translation table (from=to, one per line)</label>
+        <textarea id="tr-table">a=A
+e=E
+o=0</textarea>
+      </div>
+    </div>
+    <button onclick="runTranslate()">Run</button>
+    <pre id="tr-out"></pre>
+  </div>
+
+  <!-- strCharWidth / strByteLength -->
+  <div class="section">
+    <h3>strCharWidth &amp; strByteLength — display &amp; byte widths</h3>
+    <p>
+      <strong>strCharWidth</strong> counts columns for terminal display (CJK chars count as 2).<br>
+      <strong>strByteLength</strong> counts UTF-8 bytes (useful for byte-limited APIs).
+    </p>
+    <label>Input strings (one per line)</label>
+    <textarea id="width-input">hello
+こんにちは
+café
+😀🎉</textarea>
+    <button onclick="runWidths()">Run</button>
+    <pre id="width-out"></pre>
+  </div>
+</main>
+
+<script type="module">
+import {
+  strNormalize, strGetDummies, strExtractAll,
+  strRemovePrefix, strRemoveSuffix,
+  strTranslate, strCharWidth, strByteLength,
+} from "/src/stats/string_ops.ts";
+import { Series } from "/src/index.ts";
+
+function makeSeriesFromTextarea(id) {
+  const lines = document.getElementById(id).value.split("\n").filter(l => l.length > 0);
+  return new Series({ data: lines });
+}
+
+window.runNormalize = function() {
+  try {
+    const ser = makeSeriesFromTextarea("norm-input");
+    const form = document.getElementById("norm-form").value;
+    const out = strNormalize(ser, form);
+    const rows = [...out.toArray()].map((v, i) => `${i}: ${JSON.stringify(v)}`).join("\n");
+    document.getElementById("norm-out").textContent = rows;
+  } catch (e) { document.getElementById("norm-out").textContent = "Error: " + e.message; }
+};
+
+window.runGetDummies = function() {
+  try {
+    const ser = makeSeriesFromTextarea("dum-input");
+    const sep = document.getElementById("dum-sep").value || "|";
+    const prefix = document.getElementById("dum-prefix").value;
+    const opts = prefix ? { sep, prefix, prefixSep: "_" } : { sep };
+    const df = strGetDummies(ser, opts);
+    const cols = df.columns.values;
+    // Build ASCII table
+    let lines = cols.join("\t");
+    for (let r = 0; r < df.shape[0]; r++) {
+      lines += "\n" + cols.map(c => df.col(c).values[r]).join("\t");
+    }
+    document.getElementById("dum-out").textContent =
+      `Shape: ${df.shape[0]}×${df.shape[1]}\n\n` + lines;
+  } catch (e) { document.getElementById("dum-out").textContent = "Error: " + e.message; }
+};
+
+window.runExtractAll = function() {
+  try {
+    const ser = makeSeriesFromTextarea("ext-input");
+    const pat = document.getElementById("ext-pat").value;
+    const flags = document.getElementById("ext-flags").value;
+    const out = strExtractAll(ser, pat, { flags });
+    const rows = [...out.toArray()].map((v, i) => {
+      const m = JSON.parse(v);
+      return `${i}: ${m.length} match(es) — ${JSON.stringify(m)}`;
+    }).join("\n");
+    document.getElementById("ext-out").textContent = rows;
+  } catch (e) { document.getElementById("ext-out").textContent = "Error: " + e.message; }
+};
+
+window.runAffix = function() {
+  try {
+    const ser = makeSeriesFromTextarea("affix-input");
+    const prefix = document.getElementById("affix-prefix").value;
+    const suffix = document.getElementById("affix-suffix").value;
+    let out = ser;
+    if (prefix) out = strRemovePrefix(out, prefix);
+    if (suffix) out = strRemoveSuffix(out, suffix);
+    const rows = [...out.toArray()].map((v, i) => `${i}: ${v}`).join("\n");
+    document.getElementById("affix-out").textContent = rows;
+  } catch (e) { document.getElementById("affix-out").textContent = "Error: " + e.message; }
+};
+
+window.runTranslate = function() {
+  try {
+    const ser = makeSeriesFromTextarea("tr-input");
+    const tableLines = document.getElementById("tr-table").value.split("\n").filter(l => l.trim());
+    const table = new Map();
+    for (const line of tableLines) {
+      const eq = line.indexOf("=");
+      if (eq === -1) continue;
+      const from = line.slice(0, eq);
+      const to = line.slice(eq + 1);
+      table.set(from, to === "" ? null : to);
+    }
+    const out = strTranslate(ser, table);
+    const rows = [...out.toArray()].map((v, i) => `${i}: ${v}`).join("\n");
+    document.getElementById("tr-out").textContent = rows;
+  } catch (e) { document.getElementById("tr-out").textContent = "Error: " + e.message; }
+};
+
+window.runWidths = function() {
+  try {
+    const ser = makeSeriesFromTextarea("width-input");
+    const charW = strCharWidth(ser);
+    const byteL = strByteLength(ser);
+    const strs = [...ser.toArray()];
+    let lines = "str\t\t\tchars\tdisplay-cols\tbytes";
+    for (let i = 0; i < strs.length; i++) {
+      const str = strs[i];
+      lines += `\n${JSON.stringify(str)}\t\t${str.length}\t${charW.values[i]}\t\t${byteL.values[i]}`;
+    }
+    document.getElementById("width-out").textContent = lines;
+  } catch (e) { document.getElementById("width-out").textContent = "Error: " + e.message; }
+};
+</script>
+</body>
+</html>
diff --git a/playground/string_ops_extended.html b/playground/string_ops_extended.html
new file mode 100644
index 00000000..81bdaddb
--- /dev/null
+++ b/playground/string_ops_extended.html
@@ -0,0 +1,413 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — Extended String Operations</title>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, sans-serif; background: #0d1117; color: #e6edf3; line-height: 1.6; }
+    header { background: #161b22; border-bottom: 1px solid #30363d; padding: 1rem 2rem; display: flex; align-items: center; gap: 1rem; }
+    header h1 { font-size: 1.4rem; font-weight: 700; }
+    header .badge { background: #388bfd22; color: #58a6ff; border: 1px solid #388bfd55; border-radius: 2rem; padding: 0.2rem 0.8rem; font-size: 0.75rem; }
+    main { max-width: 900px; margin: 2rem auto; padding: 0 2rem; }
+    h2 { font-size: 1.1rem; color: #58a6ff; margin: 2rem 0 0.5rem; }
+    p { color: #8b949e; margin-bottom: 0.75rem; font-size: 0.95rem; }
+    .section { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.25rem; margin-bottom: 1.5rem; }
+    .section h3 { font-size: 1rem; margin-bottom: 0.5rem; }
+    .row { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-top: 0.75rem; }
+    .row3 { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 1rem; margin-top: 0.75rem; }
+    label { display: block; font-size: 0.8rem; color: #8b949e; margin-bottom: 0.25rem; }
+    input, select, textarea { width: 100%; background: #0d1117; border: 1px solid #30363d; border-radius: 6px; padding: 0.5rem 0.75rem; color: #e6edf3; font-size: 0.9rem; font-family: 'JetBrains Mono', 'Fira Code', monospace; }
+    textarea { resize: vertical; min-height: 70px; }
+    button { background: #238636; border: none; border-radius: 6px; color: #fff; cursor: pointer; font-size: 0.9rem; padding: 0.5rem 1.25rem; margin-top: 0.75rem; }
+    button:hover { background: #2ea043; }
+    pre { background: #0d1117; border: 1px solid #21262d; border-radius: 6px; padding: 0.75rem 1rem; overflow-x: auto; font-size: 0.85rem; white-space: pre-wrap; margin-top: 0.5rem; color: #7ee787; font-family: 'JetBrains Mono', 'Fira Code', monospace; }
+    .error { color: #f85149; }
+    .info { font-size: 0.82rem; color: #8b949e; margin-top: 0.4rem; }
+    table { border-collapse: collapse; width: 100%; font-size: 0.85rem; }
+    th, td { border: 1px solid #30363d; padding: 0.35rem 0.75rem; text-align: left; }
+    th { background: #21262d; color: #8b949e; }
+  </style>
+</head>
+<body>
+<header>
+  <h1>tsb</h1>
+  <span class="badge">string_ops_extended</span>
+  <span style="color:#8b949e; font-size:0.9rem;">Advanced standalone string operations: split-expand, extract, partition, multi-replace, indent, dedent</span>
+</header>
+<main>
+  <p>
+    <strong>string_ops_extended</strong> adds advanced string utilities that complement
+    <code>string_ops</code> and the <code>Series.str</code> accessor. All functions accept
+    a <code>Series</code>, an array, or a scalar string.
+  </p>
+
+  <!-- strSplitExpand -->
+  <div class="section">
+    <h3>strSplitExpand — split and expand to DataFrame columns</h3>
+    <p>
+      Split each element by a delimiter and expand the parts into a <code>DataFrame</code>
+      with one column per position. Mirrors <code>pandas.Series.str.split(expand=True)</code>.
+      Shorter rows are padded with <code>null</code>.
+    </p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="split-input">2024-01-15
+2025-12-31
+1999-07-04</textarea>
+      </div>
+      <div>
+        <label>Separator</label>
+        <input id="split-sep" value="-" />
+        <label style="margin-top:0.75rem">Max splits (–1 = unlimited)</label>
+        <input id="split-n" type="number" value="-1" />
+      </div>
+    </div>
+    <button onclick="runSplitExpand()">Run strSplitExpand</button>
+    <pre id="split-out"></pre>
+  </div>
+
+  <!-- strExtractGroups -->
+  <div class="section">
+    <h3>strExtractGroups — extract regex capture groups</h3>
+    <p>
+      Extract regex capture groups from each element into a <code>DataFrame</code>.
+      Named groups (<code>(?&lt;name&gt;...)</code>) become column names; unnamed groups
+      become <code>0</code>, <code>1</code>, …  Non-matching rows produce <code>null</code>.
+    </p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="extract-input">John 42
+Alice 30
+Bob invalid</textarea>
+      </div>
+      <div>
+        <label>Pattern (supports named groups)</label>
+        <input id="extract-pat" value="(?&lt;name&gt;\w+) (?&lt;age&gt;\d+)" />
+        <div class="info">Use <code>(?&lt;name&gt;...)</code> for named capture groups.</div>
+      </div>
+    </div>
+    <button onclick="runExtractGroups()">Run strExtractGroups</button>
+    <pre id="extract-out"></pre>
+  </div>
+
+  <!-- strPartition / strRPartition -->
+  <div class="section">
+    <h3>strPartition / strRPartition — split into (before, sep, after)</h3>
+    <p>
+      <strong>strPartition</strong> splits at the <em>first</em> occurrence of the separator;
+      <strong>strRPartition</strong> splits at the <em>last</em>. When the separator is not
+      found, <code>strPartition</code> returns <code>[s, "", ""]</code> and
+      <code>strRPartition</code> returns <code>["", "", s]</code>.
+    </p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="part-input">hello.world.foo
+example.com
+no-separator-here</textarea>
+      </div>
+      <div>
+        <label>Separator</label>
+        <input id="part-sep" value="." />
+      </div>
+    </div>
+    <button onclick="runPartition()">Run strPartition &amp; strRPartition</button>
+    <pre id="part-out"></pre>
+  </div>
+
+  <!-- strMultiReplace -->
+  <div class="section">
+    <h3>strMultiReplace — apply multiple replacements in sequence</h3>
+    <p>
+      Apply an ordered list of <code>{pat, repl}</code> pairs to each element.
+      Each replacement is applied to the result of the previous one.
+      Patterns can be string literals (replaced globally) or <code>RegExp</code> objects.
+    </p>
+    <div class="row">
+      <div>
+        <label>Input strings (one per line)</label>
+        <textarea id="mr-input">Hello World
+foo bar baz
+123-456-7890</textarea>
+      </div>
+      <div>
+        <label>Replacements (JSON array of {pat, repl} pairs)</label>
+        <textarea id="mr-rules">[
+  {"pat": "Hello", "repl": "Hi"},
+  {"pat": "World", "repl": "Earth"},
+  {"pat": "foo", "repl": "qux"}
+]</textarea>
+      </div>
+    </div>
+    <button onclick="runMultiReplace()">Run strMultiReplace</button>
+    <pre id="mr-out"></pre>
+  </div>
+
+  <!-- strIndent / strDedent -->
+  <div class="section">
+    <h3>strIndent / strDedent — line-level indentation utilities</h3>
+    <p>
+      <strong>strIndent</strong> adds a prefix to every non-empty line (mirrors
+      <code>textwrap.indent</code>).
+      <strong>strDedent</strong> removes the common leading whitespace from all lines
+      (mirrors <code>textwrap.dedent</code>).
+    </p>
+    <div class="row">
+      <div>
+        <label>Input text (use literal \n for newlines)</label>
+        <textarea id="indent-input">    def hello():
+        print("world")
+        return 42</textarea>
+      </div>
+      <div>
+        <label>Indent prefix (for strIndent)</label>
+        <input id="indent-prefix" value="# " />
+      </div>
+    </div>
+    <button onclick="runIndentDedent()">Run strIndent &amp; strDedent</button>
+    <pre id="indent-out"></pre>
+  </div>
+
+</main>
+
+<script type="module">
+// ─── mini inline tsb implementation ──────────────────────────────────────────
+// (polyfill-level implementations for playground purposes)
+
+function escapeRegex(s) {
+  return s.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
+}
+
+function toStrOrNull(v) {
+  if (v == null || (typeof v === "number" && isNaN(v))) return null;
+  return String(v);
+}
+
+function strSplitExpand(arr, sep = " ", n = -1) {
+  function splitOne(s) {
+    if (s === null) return [null];
+    if (n < 0) {
+      const pat = sep instanceof RegExp ? sep : new RegExp(escapeRegex(sep));
+      return s.split(pat);
+    }
+    const parts = [];
+    let rest = s;
+    for (let i = 0; i < n; i++) {
+      let idx, sepLen;
+      if (typeof sep === "string") {
+        idx = rest.indexOf(sep);
+        sepLen = sep.length;
+      } else {
+        const m = rest.match(sep);
+        if (!m || m.index === undefined) break;
+        idx = m.index;
+        sepLen = m[0]?.length ?? 0;
+      }
+      if (idx === -1) break;
+      parts.push(rest.slice(0, idx));
+      rest = rest.slice(idx + sepLen);
+    }
+    parts.push(rest);
+    return parts;
+  }
+  const rows = arr.map(splitOne);
+  const width = rows.reduce((m, r) => Math.max(m, r.length), 0);
+  const cols = {};
+  for (let c = 0; c < width; c++) {
+    cols[c] = rows.map(r => c < r.length ? r[c] ?? null : null);
+  }
+  return { rows: arr.length, cols: width, data: cols };
+}
+
+function strExtractGroups(arr, pat, flags = "") {
+  const re = pat instanceof RegExp ? (flags ? new RegExp(pat.source, pat.flags + flags) : pat) : new RegExp(pat, flags);
+  function getGroupNames(re) {
+    const ngp = /\(\?<([^>]+)>/g;
+    const names = [];
+    let m;
+    while ((m = ngp.exec(re.source)) !== null) names.push(m[1]);
+    return names;
+  }
+  const groupNames = getGroupNames(re);
+  const rows = arr.map(v => {
+    const s = toStrOrNull(v);
+    if (!s) return [];
+    const m = re.exec(s);
+    if (!m) return [];
+    return Array.from({length: m.length - 1}, (_, i) => m[i + 1] ?? null);
+  });
+  const width = rows.reduce((w, r) => Math.max(w, r.length), 0);
+  const colNames = groupNames.length === width && width > 0
+    ? groupNames
+    : Array.from({length: width}, (_, i) => String(i));
+  const cols = {};
+  for (let c = 0; c < width; c++) {
+    cols[colNames[c]] = rows.map(r => c < r.length ? r[c] ?? null : null);
+  }
+  return { rows: arr.length, cols: width, colNames, data: cols };
+}
+
+function strPartition(arr, sep) {
+  return arr.map(v => {
+    const s = toStrOrNull(v);
+    if (!s) return [null, null, null];
+    const idx = s.indexOf(sep);
+    if (idx === -1) return [s, "", ""];
+    return [s.slice(0, idx), sep, s.slice(idx + sep.length)];
+  });
+}
+
+function strRPartition(arr, sep) {
+  return arr.map(v => {
+    const s = toStrOrNull(v);
+    if (!s) return [null, null, null];
+    const idx = s.lastIndexOf(sep);
+    if (idx === -1) return ["", "", s];
+    return [s.slice(0, idx), sep, s.slice(idx + sep.length)];
+  });
+}
+
+function strMultiReplace(arr, rules) {
+  return arr.map(v => {
+    let s = toStrOrNull(v);
+    if (!s) return null;
+    for (const {pat, repl} of rules) {
+      s = s.replace(
+        pat instanceof RegExp ? pat : new RegExp(escapeRegex(pat), "g"),
+        repl
+      );
+    }
+    return s;
+  });
+}
+
+function strIndent(s, prefix) {
+  return s.split("\n").map(line => line.trim().length > 0 ? prefix + line : line).join("\n");
+}
+
+function strDedent(s) {
+  const lines = s.split("\n");
+  let minIndent = Infinity;
+  for (const line of lines) {
+    if (line.trim().length === 0) continue;
+    const leading = line.length - line.trimStart().length;
+    if (leading < minIndent) minIndent = leading;
+  }
+  if (!isFinite(minIndent) || minIndent === 0) return s;
+  return lines.map(line => line.trim().length === 0 ? "" : line.slice(minIndent)).join("\n");
+}
+
+// ─── render helpers ────────────────────────────────────────────────────────────
+
+function renderDF(result) {
+  if (!result || result.rows === 0) return "(empty)";
+  const { data, colNames } = result;
+  const cols = colNames || Object.keys(data);
+  let lines = ["  " + cols.join("  |  ")];
+  lines.push("-".repeat(lines[0].length));
+  for (let r = 0; r < result.rows; r++) {
+    const row = cols.map(c => String(data[c]?.[r] ?? "null").padEnd(8));
+    lines.push(row.join("  |  "));
+  }
+  return lines.join("\n");
+}
+
+// ─── event handlers ────────────────────────────────────────────────────────────
+
+window.runSplitExpand = function() {
+  const out = document.getElementById("split-out");
+  try {
+    const arr = document.getElementById("split-input").value.split("\n");
+    const sep = document.getElementById("split-sep").value;
+    const n = parseInt(document.getElementById("split-n").value, 10);
+    const result = strSplitExpand(arr, sep, n);
+    const colKeys = Object.keys(result.data);
+    out.textContent = "strSplitExpand(input, " + JSON.stringify(sep) + ", { n: " + n + " })\n\n" + renderDF({...result, colNames: colKeys});
+    out.className = "";
+  } catch(e) {
+    out.textContent = "Error: " + e.message;
+    out.className = "error";
+  }
+};
+
+window.runExtractGroups = function() {
+  const out = document.getElementById("extract-out");
+  try {
+    const arr = document.getElementById("extract-input").value.split("\n");
+    const pat = document.getElementById("extract-pat").value;
+    const result = strExtractGroups(arr, pat);
+    out.textContent = "strExtractGroups(input, /" + pat + "/)\n\n" + renderDF(result);
+    out.className = "";
+  } catch(e) {
+    out.textContent = "Error: " + e.message;
+    out.className = "error";
+  }
+};
+
+window.runPartition = function() {
+  const out = document.getElementById("part-out");
+  try {
+    const arr = document.getElementById("part-input").value.split("\n");
+    const sep = document.getElementById("part-sep").value;
+    const partRows = strPartition(arr, sep);
+    const rpartRows = strRPartition(arr, sep);
+    let txt = "strPartition(input, " + JSON.stringify(sep) + ")\n";
+    txt += "  [0=before, 1=sep, 2=after]\n";
+    partRows.forEach((r, i) => {
+      txt += "  " + JSON.stringify(arr[i]) + " → " + JSON.stringify(r) + "\n";
+    });
+    txt += "\nstrRPartition(input, " + JSON.stringify(sep) + ")\n";
+    txt += "  [0=before, 1=sep, 2=after]\n";
+    rpartRows.forEach((r, i) => {
+      txt += "  " + JSON.stringify(arr[i]) + " → " + JSON.stringify(r) + "\n";
+    });
+    out.textContent = txt;
+    out.className = "";
+  } catch(e) {
+    out.textContent = "Error: " + e.message;
+    out.className = "error";
+  }
+};
+
+window.runMultiReplace = function() {
+  const out = document.getElementById("mr-out");
+  try {
+    const arr = document.getElementById("mr-input").value.split("\n");
+    const rules = JSON.parse(document.getElementById("mr-rules").value);
+    const result = strMultiReplace(arr, rules);
+    let txt = "strMultiReplace(input, rules)\n\n";
+    arr.forEach((s, i) => {
+      txt += "  " + JSON.stringify(s) + " → " + JSON.stringify(result[i]) + "\n";
+    });
+    out.textContent = txt;
+    out.className = "";
+  } catch(e) {
+    out.textContent = "Error: " + e.message;
+    out.className = "error";
+  }
+};
+
+window.runIndentDedent = function() {
+  const out = document.getElementById("indent-out");
+  try {
+    const raw = document.getElementById("indent-input").value;
+    const prefix = document.getElementById("indent-prefix").value;
+    const indented = strIndent(raw, prefix);
+    const dedented = strDedent(raw);
+    let txt = "Original:\n" + raw + "\n\n";
+    txt += "strIndent(text, " + JSON.stringify(prefix) + "):\n" + indented + "\n\n";
+    txt += "strDedent(text):\n" + dedented;
+    out.textContent = txt;
+    out.className = "";
+  } catch(e) {
+    out.textContent = "Error: " + e.message;
+    out.className = "error";
+  }
+};
+</script>
+</body>
+</html>
diff --git a/playground/to_from_dict.html b/playground/to_from_dict.html
new file mode 100644
index 00000000..a8ca3e88
--- /dev/null
+++ b/playground/to_from_dict.html
@@ -0,0 +1,122 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — toDictOriented / fromDictOriented</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; }
+    h1 { color: #0066cc; }
+    h2 { color: #333; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+    pre { background: #f6f8fa; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: 0.9em; }
+    code { font-family: 'Fira Code', 'Cascadia Code', monospace; }
+    .note { background: #fffbea; border-left: 4px solid #f5a623; padding: 0.7rem 1rem; border-radius: 0 6px 6px 0; margin: 1rem 0; }
+    table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem 0.75rem; text-align: left; }
+    th { background: #f0f4f8; }
+    a { color: #0066cc; }
+  </style>
+</head>
+<body>
+  <p><a href="index.html">← tsb playground</a></p>
+
+  <h1><code>toDictOriented</code> / <code>fromDictOriented</code></h1>
+  <p>
+    Convert a DataFrame to and from dictionary structures with flexible orientation — mirrors
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_dict.html" target="_blank">
+    <code>pandas.DataFrame.to_dict(orient=...)</code></a> and
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.from_dict.html" target="_blank">
+    <code>pandas.DataFrame.from_dict(orient=...)</code></a>.
+  </p>
+
+  <h2>Supported orientations — <code>toDictOriented</code></h2>
+  <table>
+    <thead><tr><th>Orient</th><th>Return type</th><th>Description</th></tr></thead>
+    <tbody>
+      <tr><td><code>"dict"</code> / <code>"columns"</code></td><td><code>Record&lt;col, Record&lt;rowLabel, value&gt;&gt;</code></td><td>Nested column → row-label → value map</td></tr>
+      <tr><td><code>"list"</code></td><td><code>Record&lt;col, value[]&gt;</code></td><td>Column name → array of values</td></tr>
+      <tr><td><code>"series"</code></td><td><code>Record&lt;col, Series&gt;</code></td><td>Column name → Series object</td></tr>
+      <tr><td><code>"split"</code></td><td><code>{ index, columns, data }</code></td><td>Serialisable split structure</td></tr>
+      <tr><td><code>"tight"</code></td><td><code>{ index, columns, data, index_names, column_names }</code></td><td>Split plus axis-name metadata</td></tr>
+      <tr><td><code>"records"</code></td><td><code>Record&lt;col, value&gt;[]</code></td><td>Array of row objects</td></tr>
+      <tr><td><code>"index"</code></td><td><code>Record&lt;rowLabel, Record&lt;col, value&gt;&gt;</code></td><td>Row-label → column → value</td></tr>
+    </tbody>
+  </table>
+
+  <h2>Supported orientations — <code>fromDictOriented</code></h2>
+  <table>
+    <thead><tr><th>Orient</th><th>Input shape</th></tr></thead>
+    <tbody>
+      <tr><td><code>"columns"</code> (default)</td><td><code>{ col: value[] }</code></td></tr>
+      <tr><td><code>"index"</code></td><td><code>{ rowLabel: { col: value } }</code></td></tr>
+      <tr><td><code>"split"</code></td><td><code>{ index?, columns, data }</code></td></tr>
+      <tr><td><code>"tight"</code></td><td>Same as <code>"split"</code>, extra fields ignored</td></tr>
+    </tbody>
+  </table>
+
+  <h2>Example — all orientations</h2>
+  <pre><code>import { DataFrame } from "tsb";
+import { toDictOriented, fromDictOriented } from "tsb";
+
+const df = DataFrame.fromColumns(
+  { name: ["Alice", "Bob"], score: [92, 85] },
+  { index: new Index(["r0", "r1"]) },
+);
+
+// "dict" / "columns"
+toDictOriented(df, "dict");
+// { name: { r0: "Alice", r1: "Bob" }, score: { r0: 92, r1: 85 } }
+
+// "list"
+toDictOriented(df, "list");
+// { name: ["Alice", "Bob"], score: [92, 85] }
+
+// "records"
+toDictOriented(df, "records");
+// [ { name: "Alice", score: 92 }, { name: "Bob", score: 85 } ]
+
+// "split"
+toDictOriented(df, "split");
+// { index: ["r0", "r1"], columns: ["name", "score"], data: [["Alice", 92], ["Bob", 85]] }
+
+// "index"
+toDictOriented(df, "index");
+// { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } }
+
+// fromDictOriented — columns (default)
+fromDictOriented({ name: ["Alice", "Bob"], score: [92, 85] });
+
+// fromDictOriented — index
+fromDictOriented(
+  { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } },
+  "index",
+);
+
+// fromDictOriented — split (round-trip)
+const split = toDictOriented(df, "split");
+const df2 = fromDictOriented(split, "split");
+// df2 is equivalent to df
+</code></pre>
+
+  <h2>Missing values</h2>
+  <div class="note">
+    Missing values (<code>null</code> / <code>undefined</code>) are preserved as <code>null</code>
+    in all orientations.  When using <code>fromDictOriented</code> with <code>"index"</code>
+    orientation, any column that is absent from a given row object is filled with <code>null</code>.
+  </div>
+
+  <h2>Type signatures</h2>
+  <pre><code>function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record&lt;string, Record&lt;string, Scalar&gt;&gt;;
+function toDictOriented(df: DataFrame, orient: "list"): Record&lt;string, Scalar[]&gt;;
+function toDictOriented(df: DataFrame, orient: "series"): Record&lt;string, Series&lt;Scalar&gt;&gt;;
+function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+function toDictOriented(df: DataFrame, orient: "records"): Record&lt;string, Scalar&gt;[];
+function toDictOriented(df: DataFrame, orient: "index"): Record&lt;string, Record&lt;string, Scalar&gt;&gt;;
+
+function fromDictOriented(data: Record&lt;string, readonly Scalar[]&gt;, orient?: "columns"): DataFrame;
+function fromDictOriented(data: Record&lt;string, Record&lt;string, Scalar&gt;&gt;, orient: "index"): DataFrame;
+function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+</code></pre>
+</body>
+</html>
diff --git a/playground/where_mask.html b/playground/where_mask.html
new file mode 100644
index 00000000..89a50a05
--- /dev/null
+++ b/playground/where_mask.html
@@ -0,0 +1,220 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>tsb — where / mask: Conditional Selection</title>
+    <style>
+      body {
+        font-family: system-ui, sans-serif;
+        max-width: 860px;
+        margin: 2rem auto;
+        padding: 0 1rem;
+        line-height: 1.6;
+        color: #1a1a1a;
+      }
+      h1 { color: #0d47a1; }
+      h2 { color: #1565c0; border-bottom: 2px solid #e3f2fd; padding-bottom: 0.25rem; }
+      pre {
+        background: #f5f5f5;
+        border-left: 4px solid #0d47a1;
+        padding: 1rem;
+        overflow-x: auto;
+        border-radius: 4px;
+      }
+      code { font-family: "Fira Code", "Cascadia Code", monospace; font-size: 0.9em; }
+      .demo {
+        background: #e8f5e9;
+        border: 1px solid #a5d6a7;
+        border-radius: 6px;
+        padding: 1rem 1.25rem;
+        margin: 1rem 0;
+      }
+      .demo h3 { margin-top: 0; color: #2e7d32; }
+      table { border-collapse: collapse; width: 100%; margin: 0.5rem 0; }
+      th, td { border: 1px solid #ccc; padding: 0.4rem 0.75rem; text-align: left; }
+      th { background: #e3f2fd; }
+      .note { background: #fff9c4; border: 1px solid #f9a825; border-radius: 4px; padding: 0.75rem 1rem; }
+      a { color: #0d47a1; }
+    </style>
+  </head>
+  <body>
+    <h1>tsb — <code>where</code> / <code>mask</code>: Conditional Selection</h1>
+    <p>
+      <code>seriesWhere</code> / <code>seriesMask</code> and their DataFrame equivalents
+      allow element-wise conditional replacement — the TypeScript equivalents of
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.where.html"><code>pandas.Series.where</code></a>
+      and
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.mask.html"><code>pandas.Series.mask</code></a>.
+    </p>
+
+    <div class="note">
+      <strong>Quick rule:</strong><br />
+      <code>where(cond)</code> — <em>keep</em> where <code>cond</code> is <strong>true</strong>, replace elsewhere.<br />
+      <code>mask(cond)</code> — <em>keep</em> where <code>cond</code> is <strong>false</strong>, replace elsewhere.<br />
+      They are exact inverses of each other.
+    </div>
+
+    <h2>1. <code>seriesWhere</code> — Boolean Array Condition</h2>
+    <p>
+      Pass a <code>boolean[]</code> to keep values at <code>true</code> positions, replace
+      the rest with <code>null</code> (or a custom <code>other</code> value).
+    </p>
+    <pre><code>import { Series, seriesWhere } from "tsb";
+
+const scores = new Series({ data: [42, 91, 67, 55, 88] });
+const highScores = seriesWhere(scores, [false, true, false, false, true]);
+// Series [null, 91, null, null, 88]
+
+// Custom replacement value
+const clamped = seriesWhere(scores, [false, true, false, false, true], { other: 0 });
+// Series [0, 91, 0, 0, 88]</code></pre>
+
+    <h2>2. <code>seriesWhere</code> — Callable Condition</h2>
+    <p>
+      Pass a function that receives the Series and returns a <code>boolean[]</code> or
+      <code>Series&lt;boolean&gt;</code>. This avoids computing the condition array manually.
+    </p>
+    <pre><code>import { Series, seriesWhere } from "tsb";
+
+const temps = new Series({ data: [-5, 12, 23, -3, 8] });
+
+// Keep only values above freezing
+const aboveFreezing = seriesWhere(
+  temps,
+  (s) =&gt; s.values.map((v) =&gt; (v as number) &gt; 0),
+);
+// Series [null, 12, 23, null, 8]
+
+// Replace with 0 instead of null
+const noFreeze = seriesWhere(
+  temps,
+  (s) =&gt; s.values.map((v) =&gt; (v as number) &gt; 0),
+  { other: 0 },
+);
+// Series [0, 12, 23, 0, 8]</code></pre>
+
+    <h2>3. <code>seriesMask</code> — The Inverse</h2>
+    <p>
+      <code>mask</code> replaces positions where the condition is <strong>true</strong>
+      (the opposite of <code>where</code>). Use it to "blank out" outliers or invalid values.
+    </p>
+    <pre><code>import { Series, seriesMask } from "tsb";
+
+const data = new Series({ data: [1, 2, 3, 4, 5] });
+
+// Mask out values greater than 3
+const masked = seriesMask(
+  data,
+  (s) =&gt; s.values.map((v) =&gt; (v as number) &gt; 3),
+  { other: null },
+);
+// Series [1, 2, 3, null, null]</code></pre>
+
+    <h2>4. <code>dataFrameWhere</code> — Element-Wise on DataFrames</h2>
+    <p>
+      Pass a boolean <code>DataFrame</code> or a callable that returns one.
+      Columns and row labels are aligned by name.
+    </p>
+    <pre><code>import { DataFrame, dataFrameWhere } from "tsb";
+
+const df = DataFrame.fromColumns({
+  temp_c:   [22, -3, 18, -7, 30],
+  humidity: [55, 80, 62, 75, 45],
+});
+
+// Keep only valid summer readings (temp &gt; 0)
+const condDf = DataFrame.fromColumns({
+  temp_c:   [true, false, true, false, true],
+  humidity: [true, false, true, false, true],
+});
+
+const summer = dataFrameWhere(df, condDf);
+// DataFrame:
+//   temp_c   [22,   null, 18,   null, 30  ]
+//   humidity [55,   null, 62,   null, 45  ]</code></pre>
+
+    <h2>5. <code>dataFrameWhere</code> — Callable Condition</h2>
+    <pre><code>import { DataFrame, dataFrameWhere } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3, 4, 5],
+  b: [10, 20, 30, 40, 50],
+});
+
+// Keep only values &gt; 2 (column-wise threshold)
+const result = dataFrameWhere(df, (d) =&gt; {
+  const condCols: Record&lt;string, boolean[]&gt; = {};
+  for (const col of d.columns) {
+    condCols[col as string] = d.col(col as string).values.map(
+      (v) =&gt; (v as number) &gt; 2
+    );
+  }
+  return DataFrame.fromColumns(condCols);
+});
+// DataFrame:
+//   a: [null, null, 3, 4, 5]
+//   b: [10,   20,   30, 40, 50]</code></pre>
+
+    <h2>6. <code>dataFrameMask</code> — DataFrame Mask</h2>
+    <pre><code>import { DataFrame, dataFrameMask } from "tsb";
+
+const df = DataFrame.fromColumns({
+  sales:  [100, 200, 50,  300, 80],
+  profit: [10,  40,  -5,  60,  -2],
+});
+
+// Mask out (replace) rows with negative profit
+const cleaned = dataFrameMask(
+  df,
+  (d) =&gt; {
+    const condCols: Record&lt;string, boolean[]&gt; = {};
+    for (const col of d.columns) {
+      condCols[col as string] = d.col(col as string).values.map(
+        (v) =&gt; (v as number) &lt; 0
+      );
+    }
+    return DataFrame.fromColumns(condCols);
+  },
+  { other: 0 },
+);
+// DataFrame:
+//   sales:  [100, 200, 50,  300, 80]
+//   profit: [10,  40,  0,   60,  0 ]</code></pre>
+
+    <h2>Label-Aligned Series Condition</h2>
+    <p>
+      When you pass a <code>Series&lt;boolean&gt;</code> as the condition, values are aligned
+      by <strong>label</strong>, not position. Labels absent from the condition series are treated
+      as <code>false</code>.
+    </p>
+    <pre><code>import { Series, seriesWhere } from "tsb";
+
+const prices = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+const valid  = new Series&lt;boolean&gt;({ data: [false, true], index: ["a", "b"] });
+
+// Only "b" is in the condition with value=true; "a"=false, "c" missing→false
+const result = seriesWhere(prices, valid, { other: -1 });
+// Series { a: -1, b: 20, c: -1 }</code></pre>
+
+    <h2>API Reference</h2>
+    <table>
+      <tr><th>Function</th><th>Keeps when cond is…</th><th>Replaces with</th></tr>
+      <tr><td><code>seriesWhere(s, cond, {other})</code></td><td><code>true</code></td><td><code>other</code> (default <code>null</code>)</td></tr>
+      <tr><td><code>seriesMask(s, cond, {other})</code></td><td><code>false</code></td><td><code>other</code> (default <code>null</code>)</td></tr>
+      <tr><td><code>dataFrameWhere(df, cond, {other})</code></td><td><code>true</code></td><td><code>other</code> (default <code>null</code>)</td></tr>
+      <tr><td><code>dataFrameMask(df, cond, {other})</code></td><td><code>false</code></td><td><code>other</code> (default <code>null</code>)</td></tr>
+    </table>
+
+    <h3>Condition types</h3>
+    <table>
+      <tr><th>Type</th><th>Series ops</th><th>DataFrame ops</th></tr>
+      <tr><td>Boolean array</td><td>✅ positional</td><td>—</td></tr>
+      <tr><td><code>Series&lt;boolean&gt;</code></td><td>✅ label-aligned</td><td>—</td></tr>
+      <tr><td><code>DataFrame</code> (boolean)</td><td>—</td><td>✅ label-aligned</td></tr>
+      <tr><td>Callable</td><td>✅ receives Series</td><td>✅ receives DataFrame</td></tr>
+    </table>
+
+    <p><a href="index.html">← Back to tsb playground index</a></p>
+  </body>
+</html>
diff --git a/playground/wide_to_long.html b/playground/wide_to_long.html
new file mode 100644
index 00000000..b30980cd
--- /dev/null
+++ b/playground/wide_to_long.html
@@ -0,0 +1,113 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>tsb — wideToLong</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 860px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; }
+    h1 { color: #0066cc; }
+    h2 { color: #333; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+    pre { background: #f6f8fa; border-radius: 6px; padding: 1rem; overflow-x: auto; font-size: 0.9em; }
+    code { font-family: 'Fira Code', 'Cascadia Code', monospace; }
+    .note { background: #fffbea; border-left: 4px solid #f5a623; padding: 0.7rem 1rem; border-radius: 0 6px 6px 0; margin: 1rem 0; }
+    table { border-collapse: collapse; width: 100%; margin: 1rem 0; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem 0.75rem; text-align: left; }
+    th { background: #f0f4f8; }
+    a { color: #0066cc; }
+  </style>
+</head>
+<body>
+  <p><a href="index.html">← tsb playground</a></p>
+
+  <h1><code>wideToLong</code></h1>
+  <p>
+    Reshape a wide-format DataFrame to long format by collapsing stub-prefixed column
+    groups into rows — mirrors
+    <a href="https://pandas.pydata.org/docs/reference/api/pandas.wide_to_long.html" target="_blank">
+    <code>pandas.wide_to_long()</code></a>.
+  </p>
+
+  <h2>Concept</h2>
+  <p>
+    Given a wide DataFrame where repeated measurements are spread across columns with a
+    common stub prefix and a numeric (or other) suffix — e.g. <code>score_2021</code>,
+    <code>score_2022</code> — <code>wideToLong</code> pivots those column groups into rows.
+    One row per original row per unique suffix is produced.
+  </p>
+
+  <h2>Example — numeric suffixes</h2>
+  <pre><code>import { DataFrame } from "tsb";
+import { wideToLong } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:  ["x", "y"],
+  A1:  [1, 2],
+  A2:  [3, 4],
+  B1:  [5, 6],
+  B2:  [7, 8],
+});
+
+const long = wideToLong(df, ["A", "B"], "id", "num");
+
+// long.columns.values → ["id", "num", "A", "B"]
+// long.shape          → [4, 4]
+//
+// id  num   A   B
+//  x    1   1   5
+//  y    1   2   6
+//  x    2   3   7
+//  y    2   4   8
+</code></pre>
+
+  <h2>Example — separator and custom suffix</h2>
+  <pre><code>const df = DataFrame.fromColumns({
+  country: ["US", "UK"],
+  gdp_2020: [21e12, 2.7e12],
+  gdp_2021: [23e12, 3.1e12],
+  pop_2020: [331e6, 67e6],
+  pop_2021: [332e6, 68e6],
+});
+
+const long = wideToLong(df, ["gdp", "pop"], "country", "year", { sep: "_" });
+// long.shape → [4, 4]  — 2 countries × 2 years
+// Columns: ["country", "year", "gdp", "pop"]
+</code></pre>
+
+  <h2>API reference</h2>
+  <pre><code>function wideToLong(
+  df: DataFrame,
+  stubnames: string | string[],
+  i: string | string[],
+  j: string,
+  options?: WideToLongOptions,
+): DataFrame;
+
+interface WideToLongOptions {
+  sep?: string;      // separator between stub and suffix, default ""
+  suffix?: string;   // regex string matching suffix, default "\\d+"
+}
+</code></pre>
+
+  <h2>Parameters</h2>
+  <table>
+    <thead><tr><th>Parameter</th><th>Type</th><th>Description</th></tr></thead>
+    <tbody>
+      <tr><td><code>df</code></td><td><code>DataFrame</code></td><td>Source DataFrame (not mutated)</td></tr>
+      <tr><td><code>stubnames</code></td><td><code>string | string[]</code></td><td>Prefix(es) shared by the wide column groups</td></tr>
+      <tr><td><code>i</code></td><td><code>string | string[]</code></td><td>Column(s) to keep as id variables (repeated per suffix)</td></tr>
+      <tr><td><code>j</code></td><td><code>string</code></td><td>Name of the new column holding the suffix values</td></tr>
+      <tr><td><code>options.sep</code></td><td><code>string</code></td><td>Separator between stub and suffix (default: <code>""</code>)</td></tr>
+      <tr><td><code>options.suffix</code></td><td><code>string</code></td><td>Regex string matching the suffix (default: <code>"\\d+"</code>)</td></tr>
+    </tbody>
+  </table>
+
+  <h2>Output layout</h2>
+  <div class="note">
+    Output columns are always ordered: <strong>id cols</strong>, <strong>j</strong>, <strong>stub cols</strong>
+    (in the same order the stubs were passed).  Suffixes are sorted numerically when they are all
+    integers, otherwise lexicographically.  Wide columns that are absent from the DataFrame are
+    filled with <code>null</code>.
+  </div>
+</body>
+</html>
diff --git a/playground/window_extended.html b/playground/window_extended.html
new file mode 100644
index 00000000..4232fa5d
--- /dev/null
+++ b/playground/window_extended.html
@@ -0,0 +1,304 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>tsb — Rolling Extended Stats: sem, skew, kurt, quantile</title>
+    <style>
+      body {
+        font-family: system-ui, sans-serif;
+        max-width: 860px;
+        margin: 2rem auto;
+        padding: 0 1rem;
+        line-height: 1.6;
+        color: #1a1a1a;
+      }
+      h1 { color: #0d47a1; }
+      h2 { color: #1565c0; border-bottom: 2px solid #e3f2fd; padding-bottom: 0.25rem; }
+      pre {
+        background: #f5f5f5;
+        border-left: 4px solid #0d47a1;
+        padding: 1rem;
+        overflow-x: auto;
+        border-radius: 4px;
+      }
+      code { font-family: "Fira Code", "Cascadia Code", monospace; font-size: 0.9em; }
+      .demo {
+        background: #e8f5e9;
+        border: 1px solid #a5d6a7;
+        border-radius: 6px;
+        padding: 1rem 1.25rem;
+        margin: 1rem 0;
+      }
+      .demo h3 { margin-top: 0; color: #2e7d32; }
+      table { border-collapse: collapse; width: 100%; margin: 0.5rem 0; }
+      th, td { border: 1px solid #ccc; padding: 0.4rem 0.75rem; text-align: left; }
+      th { background: #e3f2fd; }
+      .note { background: #fff9c4; border: 1px solid #f9a825; border-radius: 4px; padding: 0.75rem 1rem; }
+      a { color: #0d47a1; }
+    </style>
+  </head>
+  <body>
+    <h1>tsb — Rolling Extended Statistics</h1>
+    <p>
+      Higher-order rolling window statistics extending the core
+      <a href="https://pandas.pydata.org/docs/reference/api/pandas.core.window.rolling.Rolling.html">
+        <code>pandas.Series.rolling()</code>
+      </a>
+      API:
+      <strong>sem</strong>, <strong>skew</strong>, <strong>kurt</strong>, and
+      <strong>quantile</strong>.
+    </p>
+
+    <h2>1. <code>rollingSem</code> — Standard Error of the Mean</h2>
+    <p>
+      The <em>standard error of the mean</em> measures how much the sample mean
+      would vary across repeated samples. For a window of <em>n</em> values:
+    </p>
+    <pre><code>sem = std(ddof=1) / √n</code></pre>
+    <p>Requires at least 2 valid observations per window.</p>
+
+    <pre><code>import { rollingSem, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "x" });
+const sem3 = rollingSem(s, 3);
+// [null, null, 0.667, 0, 0.577, 0.577, 1.155, 2.082]
+</code></pre>
+
+    <div class="demo">
+      <h3>Live demo — sem with window=3</h3>
+      <p>Comma-separated numbers (nulls accepted):</p>
+      <input id="sem-input" type="text" value="2, 4, 4, 4, 5, 5, 7, 9" style="width:100%">
+      <label>Window: <input id="sem-win" type="number" value="3" min="2" max="20" style="width:60px"></label>
+      <label>minPeriods: <input id="sem-mp" type="number" value="" min="1" max="20" placeholder="= window" style="width:80px"></label>
+      <button onclick="runSem()">Run</button>
+      <pre id="sem-out" style="margin-top:0.5rem"></pre>
+    </div>
+
+    <h2>2. <code>rollingSkew</code> — Fisher-Pearson Skewness</h2>
+    <p>
+      Skewness measures asymmetry of the distribution in each window.
+      Positive = right tail heavier; negative = left tail heavier.
+      Uses the unbiased Fisher-Pearson formula (same as pandas):
+    </p>
+    <pre><code>skew = [n/((n-1)(n-2))] × Σ[(xᵢ−x̄)/s]³</code></pre>
+    <p>Requires ≥ 3 valid observations.</p>
+
+    <pre><code>import { rollingSkew, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+rollingSkew(s, 3);
+// [null, null, 0, 0, 0]   ← symmetric windows → zero skew
+</code></pre>
+
+    <div class="demo">
+      <h3>Live demo — skewness with window=4</h3>
+      <input id="skew-input" type="text" value="1, 2, 3, 10, 1, 2, 3" style="width:100%">
+      <label>Window: <input id="skew-win" type="number" value="4" min="3" max="20" style="width:60px"></label>
+      <button onclick="runSkew()">Run</button>
+      <pre id="skew-out" style="margin-top:0.5rem"></pre>
+    </div>
+
+    <h2>3. <code>rollingKurt</code> — Excess Kurtosis</h2>
+    <p>
+      Kurtosis measures how heavy the tails are relative to a normal distribution.
+      The <em>excess</em> kurtosis subtracts 3, so a normal distribution gives 0.
+      Uses the Fisher (1930) unbiased formula:
+    </p>
+    <pre><code>kurt = [n(n+1)/((n-1)(n-2)(n-3))] × Σ[(xᵢ−x̄)/s]⁴ − 3(n-1)²/((n-2)(n-3))</code></pre>
+    <p>Requires ≥ 4 valid observations.</p>
+
+    <pre><code>import { rollingKurt, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+rollingKurt(s, 4);
+// [null, null, null, -1.2]   ← uniform distribution has kurt = -1.2
+</code></pre>
+
+    <div class="demo">
+      <h3>Live demo — excess kurtosis with window=5</h3>
+      <input id="kurt-input" type="text" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10" style="width:100%">
+      <label>Window: <input id="kurt-win" type="number" value="5" min="4" max="20" style="width:60px"></label>
+      <button onclick="runKurt()">Run</button>
+      <pre id="kurt-out" style="margin-top:0.5rem"></pre>
+    </div>
+
+    <h2>4. <code>rollingQuantile</code> — Rolling Quantile</h2>
+    <p>
+      Computes any quantile within each sliding window using configurable
+      interpolation. When <code>q = 0.5</code> this is identical to
+      <code>rolling.median()</code>.
+    </p>
+
+    <pre><code>import { rollingQuantile, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+
+rollingQuantile(s, 0.5, 3);  // rolling median: [null, null, 2, 3, 4]
+rollingQuantile(s, 0.25, 3); // [null, null, 1.5, 2.5, 3.5]
+rollingQuantile(s, 0.75, 3); // [null, null, 2.5, 3.5, 4.5]
+</code></pre>
+
+    <h3>Interpolation methods</h3>
+    <table>
+      <thead><tr><th>Method</th><th>Behaviour when q falls between two values</th></tr></thead>
+      <tbody>
+        <tr><td><code>linear</code> (default)</td><td>Linear interpolation — same as NumPy / pandas default</td></tr>
+        <tr><td><code>lower</code></td><td>Take the lower of the two surrounding values</td></tr>
+        <tr><td><code>higher</code></td><td>Take the higher of the two surrounding values</td></tr>
+        <tr><td><code>midpoint</code></td><td>Arithmetic mean of the two surrounding values</td></tr>
+        <tr><td><code>nearest</code></td><td>Whichever surrounding value is closest</td></tr>
+      </tbody>
+    </table>
+
+    <div class="demo">
+      <h3>Live demo — rolling quantile</h3>
+      <input id="quant-input" type="text" value="3, 1, 4, 1, 5, 9, 2, 6" style="width:100%">
+      <label>q (0–1): <input id="quant-q" type="number" value="0.5" min="0" max="1" step="0.05" style="width:70px"></label>
+      <label>Window: <input id="quant-win" type="number" value="3" min="1" max="20" style="width:60px"></label>
+      <label>Interpolation:
+        <select id="quant-interp">
+          <option value="linear" selected>linear</option>
+          <option value="lower">lower</option>
+          <option value="higher">higher</option>
+          <option value="midpoint">midpoint</option>
+          <option value="nearest">nearest</option>
+        </select>
+      </label>
+      <button onclick="runQuantile()">Run</button>
+      <pre id="quant-out" style="margin-top:0.5rem"></pre>
+    </div>
+
+    <h2>Common Options</h2>
+    <table>
+      <thead><tr><th>Option</th><th>Type</th><th>Default</th><th>Description</th></tr></thead>
+      <tbody>
+        <tr><td><code>minPeriods</code></td><td><code>number</code></td><td>= window</td><td>Minimum valid obs required per window</td></tr>
+        <tr><td><code>center</code></td><td><code>boolean</code></td><td><code>false</code></td><td>Centre the window around each position</td></tr>
+      </tbody>
+    </table>
+
+    <div class="note">
+      <strong>Note:</strong> Functions are <em>pure</em> — they return new Series objects
+      without modifying the input. Missing values (<code>null</code>, <code>NaN</code>)
+      are excluded from each window calculation.
+    </div>
+
+    <script type="module">
+      import { Series } from "https://esm.sh/tsb@0.0.1";
+
+      function parseData(str) {
+        return str.split(",").map(v => {
+          const t = v.trim();
+          if (t === "" || t === "null" || t === "NaN") return null;
+          const n = parseFloat(t);
+          return isNaN(n) ? null : n;
+        });
+      }
+
+      function fmt(v) {
+        if (v === null || v === undefined) return "null";
+        if (typeof v === "number") return v.toFixed(4);
+        return String(v);
+      }
+
+      // Minimal rolling implementations for the playground (no bundler)
+      function isMissing(v) { return v === null || v === undefined || (typeof v === "number" && isNaN(v)); }
+      function validNums(arr) { return arr.filter(v => !isMissing(v) && typeof v === "number"); }
+
+      function windowSlice(data, i, window, center) {
+        if (center) {
+          const half = Math.floor((window - 1) / 2);
+          const lo = Math.max(0, i - half);
+          const hi = Math.min(data.length, i + (window - half));
+          return data.slice(lo, hi);
+        }
+        return data.slice(Math.max(0, i - window + 1), i + 1);
+      }
+
+      function rollingApply(data, window, minPeriods, center, minN, fn) {
+        const mp = minPeriods ?? window;
+        return data.map((_, i) => {
+          const nums = validNums(windowSlice(data, i, window, center));
+          if (nums.length < Math.max(minN, mp)) return null;
+          return fn(nums);
+        });
+      }
+
+      function computeSem(nums) {
+        const n = nums.length;
+        const m = nums.reduce((a, v) => a + v, 0) / n;
+        const s = Math.sqrt(nums.reduce((a, v) => a + (v - m) ** 2, 0) / (n - 1));
+        return s / Math.sqrt(n);
+      }
+
+      function computeSkew(nums) {
+        const n = nums.length;
+        const m = nums.reduce((a, v) => a + v, 0) / n;
+        const s = Math.sqrt(nums.reduce((a, v) => a + (v - m) ** 2, 0) / (n - 1));
+        if (s === 0) return 0;
+        const sum3 = nums.reduce((a, v) => a + ((v - m) / s) ** 3, 0);
+        return (n / ((n - 1) * (n - 2))) * sum3;
+      }
+
+      function computeKurt(nums) {
+        const n = nums.length;
+        const m = nums.reduce((a, v) => a + v, 0) / n;
+        const s = Math.sqrt(nums.reduce((a, v) => a + (v - m) ** 2, 0) / (n - 1));
+        if (s === 0) return 0;
+        const sum4 = nums.reduce((a, v) => a + ((v - m) / s) ** 4, 0);
+        return (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * sum4 - 3 * (n - 1) ** 2 / ((n - 2) * (n - 3));
+      }
+
+      function computeQuantile(sorted, q, method) {
+        const n = sorted.length;
+        if (n === 1) return sorted[0];
+        const virt = q * (n - 1);
+        const lo = Math.floor(virt), hi = Math.ceil(virt);
+        const lv = sorted[lo], hv = sorted[hi];
+        if (method === "lower") return lv;
+        if (method === "higher") return hv;
+        if (method === "midpoint") return (lv + hv) / 2;
+        if (method === "nearest") return (virt - lo < 0.5) ? lv : hv;
+        return lv + (virt - lo) * (hv - lv);
+      }
+
+      window.runSem = function() {
+        const data = parseData(document.getElementById("sem-input").value);
+        const win = +document.getElementById("sem-win").value;
+        const mp = document.getElementById("sem-mp").value ? +document.getElementById("sem-mp").value : undefined;
+        const result = rollingApply(data, win, mp, false, 2, computeSem);
+        document.getElementById("sem-out").textContent = "[" + result.map(fmt).join(", ") + "]";
+      };
+
+      window.runSkew = function() {
+        const data = parseData(document.getElementById("skew-input").value);
+        const win = +document.getElementById("skew-win").value;
+        const result = rollingApply(data, win, win, false, 3, computeSkew);
+        document.getElementById("skew-out").textContent = "[" + result.map(fmt).join(", ") + "]";
+      };
+
+      window.runKurt = function() {
+        const data = parseData(document.getElementById("kurt-input").value);
+        const win = +document.getElementById("kurt-win").value;
+        const result = rollingApply(data, win, win, false, 4, computeKurt);
+        document.getElementById("kurt-out").textContent = "[" + result.map(fmt).join(", ") + "]";
+      };
+
+      window.runQuantile = function() {
+        const data = parseData(document.getElementById("quant-input").value);
+        const q = +document.getElementById("quant-q").value;
+        const win = +document.getElementById("quant-win").value;
+        const interp = document.getElementById("quant-interp").value;
+        const result = rollingApply(data, win, win, false, 1, nums => {
+          const sorted = [...nums].sort((a, b) => a - b);
+          return computeQuantile(sorted, q, interp);
+        });
+        document.getElementById("quant-out").textContent = "[" + result.map(fmt).join(", ") + "]";
+      };
+
+      // auto-run demos on load
+      window.runSem(); window.runSkew(); window.runKurt(); window.runQuantile();
+    </script>
+  </body>
+</html>
diff --git a/src/core/api_types.ts b/src/core/api_types.ts
new file mode 100644
index 00000000..860d2050
--- /dev/null
+++ b/src/core/api_types.ts
@@ -0,0 +1,629 @@
+/**
+ * api_types — runtime type-checking predicates, mirroring `pandas.api.types`.
+ *
+ * Two groups of functions are provided:
+ *
+ * **Value-level predicates** — operate on arbitrary JavaScript values, equivalent
+ * to `pandas.api.types.is_scalar`, `is_list_like`, `is_number`, etc.
+ *
+ * **Dtype-level predicates** — accept a `Dtype` instance or a `DtypeName` string
+ * and answer questions about the dtype's kind, equivalent to
+ * `pandas.api.types.is_numeric_dtype`, `is_float_dtype`, etc.
+ *
+ * @example
+ * ```ts
+ * import { isScalar, isNumericDtype, Dtype } from "tsb";
+ * isScalar(42);               // true
+ * isScalar([1, 2, 3]);        // false
+ * isListLike([1, 2, 3]);      // true
+ * isNumericDtype(Dtype.float64);  // true
+ * isStringDtype("string");       // true
+ * ```
+ *
+ * @module
+ */
+
+import { Dtype } from "./dtype.ts";
+import type { DtypeName } from "../types.ts";
+
+// ─── internal helper ──────────────────────────────────────────────────────────
+
+/** Resolve a Dtype | DtypeName to a Dtype instance. */
+function resolveDtype(dtype: Dtype | DtypeName): Dtype {
+  if (dtype instanceof Dtype) {
+    return dtype;
+  }
+  return Dtype.from(dtype);
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// VALUE-LEVEL PREDICATES
+// ═════════════════════════════════════════════════════════════════════════════
+
+/**
+ * Return `true` if `val` is a scalar (not a collection).
+ *
+ * Scalars: `string`, `number`, `bigint`, `boolean`, `symbol`, `null`,
+ * `undefined`, and `Date` objects.  Arrays, plain objects, `Map`, `Set`,
+ * iterables, and class instances other than `Date` are **not** scalars.
+ *
+ * Mirrors `pandas.api.types.is_scalar`.
+ *
+ * @example
+ * ```ts
+ * isScalar(42);           // true
+ * isScalar("hello");      // true
+ * isScalar(null);         // true
+ * isScalar([1, 2]);       // false
+ * isScalar({ a: 1 });     // false
+ * ```
+ */
+export function isScalar(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return true;
+  }
+  const t = typeof val;
+  if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") {
+    return true;
+  }
+  if (val instanceof Date) {
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Return `true` if `val` is "list-like" — i.e. iterable (but not a string)
+ * or has a non-negative integer `length` property.
+ *
+ * Mirrors `pandas.api.types.is_list_like`.
+ *
+ * @example
+ * ```ts
+ * isListLike([1, 2, 3]);      // true
+ * isListLike(new Set([1]));   // true
+ * isListLike("abc");          // false  (strings excluded)
+ * isListLike(42);             // false
+ * isListLike({ a: 1 });       // false
+ * ```
+ */
+export function isListLike(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return false;
+  }
+  if (typeof val === "string") {
+    return false;
+  }
+  // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol
+  if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") {
+    return false;
+  }
+  if (typeof val === "object" || typeof val === "function") {
+    if (Symbol.iterator in (val as object)) {
+      return true;
+    }
+    const len = (val as Record<string, unknown>)["length"];
+    if (typeof len === "number" && len >= 0 && Number.isInteger(len)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * Return `true` if `val` is array-like — i.e. has a non-negative integer
+ * `length` property.
+ *
+ * Mirrors `pandas.api.types.is_array_like`.
+ *
+ * @example
+ * ```ts
+ * isArrayLike([1, 2]);   // true
+ * isArrayLike("abc");    // true  (strings have .length)
+ * isArrayLike(42);       // false
+ * isArrayLike({});       // false
+ * ```
+ */
+export function isArrayLike(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return false;
+  }
+  if (typeof val === "string") {
+    return true;
+  }
+  if (typeof val !== "object" && typeof val !== "function") {
+    return false;
+  }
+  const len = (val as Record<string, unknown>)["length"];
+  return typeof len === "number" && len >= 0 && Number.isInteger(len);
+}
+
+/**
+ * Return `true` if `val` is dict-like — a plain object (not an array, not a
+ * `Date`, not a class instance).
+ *
+ * Mirrors `pandas.api.types.is_dict_like`.
+ *
+ * @example
+ * ```ts
+ * isDictLike({ a: 1 });       // true
+ * isDictLike(new Map());      // true  (has .get / .set)
+ * isDictLike([1, 2]);         // false
+ * isDictLike("abc");          // false
+ * ```
+ */
+export function isDictLike(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return false;
+  }
+  if (typeof val !== "object") {
+    return false;
+  }
+  if (Array.isArray(val)) {
+    return false;
+  }
+  // Treat Map as dict-like (supports key lookup)
+  if (val instanceof Map) {
+    return true;
+  }
+  // Date is not dict-like
+  if (val instanceof Date) {
+    return false;
+  }
+  // Plain objects and other objects with properties
+  return true;
+}
+
+/**
+ * Return `true` if `val` is an iterator — i.e. has a callable `next` method.
+ *
+ * Mirrors `pandas.api.types.is_iterator`.
+ *
+ * @example
+ * ```ts
+ * isIterator([1, 2][Symbol.iterator]());  // true
+ * isIterator([1, 2]);                     // false
+ * ```
+ */
+export function isIterator(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return false;
+  }
+  if (typeof val !== "object" && typeof val !== "function") {
+    return false;
+  }
+  return typeof (val as Record<string, unknown>)["next"] === "function";
+}
+
+/**
+ * Return `true` if `val` is a `number` (including `NaN` and `±Infinity`).
+ *
+ * Mirrors `pandas.api.types.is_number`.
+ *
+ * @example
+ * ```ts
+ * isNumber(3.14);   // true
+ * isNumber(NaN);    // true
+ * isNumber("3");    // false
+ * ```
+ */
+export function isNumber(val: unknown): val is number {
+  return typeof val === "number";
+}
+
+/**
+ * Return `true` if `val` is a `boolean`.
+ *
+ * Mirrors `pandas.api.types.is_bool`.
+ *
+ * @example
+ * ```ts
+ * isBool(true);    // true
+ * isBool(1);       // false
+ * ```
+ */
+export function isBool(val: unknown): val is boolean {
+  return typeof val === "boolean";
+}
+
+/**
+ * Return `true` if `val` is a `string`.
+ *
+ * Named `isStringValue` to distinguish from the dtype-level `isStringDtype`.
+ * Mirrors `pandas.api.types.is_string` (not to be confused with dtype checks).
+ *
+ * @example
+ * ```ts
+ * isStringValue("hello");  // true
+ * isStringValue(42);       // false
+ * ```
+ */
+export function isStringValue(val: unknown): val is string {
+  return typeof val === "string";
+}
+
+/**
+ * Return `true` if `val` is a finite floating-point number (has a fractional
+ * component or is finite non-integer).  `NaN`, `±Infinity` are **not** floats
+ * in the pandas sense.
+ *
+ * Mirrors `pandas.api.types.is_float`.
+ *
+ * @example
+ * ```ts
+ * isFloat(3.14);   // true
+ * isFloat(3.0);    // false  (integer value)
+ * isFloat(NaN);    // false
+ * isFloat(Infinity); // false
+ * ```
+ */
+export function isFloat(val: unknown): boolean {
+  if (typeof val !== "number") {
+    return false;
+  }
+  if (!Number.isFinite(val)) {
+    return false;
+  }
+  return val !== Math.trunc(val);
+}
+
+/**
+ * Return `true` if `val` is a finite integer-valued number.
+ *
+ * Mirrors `pandas.api.types.is_integer`.
+ *
+ * @example
+ * ```ts
+ * isInteger(3);      // true
+ * isInteger(3.0);    // true   (integer value stored as float)
+ * isInteger(3.14);   // false
+ * isInteger(NaN);    // false
+ * ```
+ */
+export function isInteger(val: unknown): boolean {
+  return typeof val === "number" && Number.isInteger(val);
+}
+
+/**
+ * Return `true` if `val` is a `bigint`.
+ *
+ * @example
+ * ```ts
+ * isBigInt(42n);   // true
+ * isBigInt(42);    // false
+ * ```
+ */
+export function isBigInt(val: unknown): val is bigint {
+  return typeof val === "bigint";
+}
+
+/**
+ * Return `true` if `val` is a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re`.
+ *
+ * @example
+ * ```ts
+ * isRegExp(/abc/);          // true
+ * isRegExp(new RegExp("x")); // true
+ * isRegExp("abc");          // false
+ * ```
+ */
+export function isRegExp(val: unknown): val is RegExp {
+  return val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` can be compiled into a `RegExp` — i.e. it is either
+ * a `string` or already a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re_compilable`.
+ *
+ * @example
+ * ```ts
+ * isReCompilable("abc");   // true
+ * isReCompilable(/abc/);   // true
+ * isReCompilable(42);      // false
+ * ```
+ */
+export function isReCompilable(val: unknown): boolean {
+  return typeof val === "string" || val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` is a "missing" value in the pandas sense: `null`,
+ * `undefined`, or `NaN`.
+ *
+ * @example
+ * ```ts
+ * isMissing(null);       // true
+ * isMissing(undefined);  // true
+ * isMissing(NaN);        // true
+ * isMissing(0);          // false
+ * isMissing("");         // false
+ * ```
+ */
+export function isMissing(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return true;
+  }
+  if (typeof val === "number" && Number.isNaN(val)) {
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Return `true` if `val` is "hashable" — usable as an object-key in
+ * JavaScript.  In practice this means it is a primitive (`string`, `number`,
+ * `bigint`, `boolean`, `symbol`, `null`, `undefined`).
+ *
+ * Mirrors the spirit of `pandas.api.types.is_hashable`.
+ *
+ * @example
+ * ```ts
+ * isHashable("key");   // true
+ * isHashable(42);      // true
+ * isHashable({});      // false
+ * isHashable([]);      // false
+ * ```
+ */
+export function isHashable(val: unknown): boolean {
+  if (val === null || val === undefined) {
+    return true;
+  }
+  const t = typeof val;
+  return t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol";
+}
+
+/**
+ * Return `true` if `val` is a `Date` instance.
+ *
+ * @example
+ * ```ts
+ * isDate(new Date());   // true
+ * isDate("2024-01-01"); // false
+ * ```
+ */
+export function isDate(val: unknown): val is Date {
+  return val instanceof Date;
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// DTYPE-LEVEL PREDICATES
+// ═════════════════════════════════════════════════════════════════════════════
+
+/**
+ * Return `true` if the dtype is numeric (integer, unsigned integer, or float).
+ *
+ * Mirrors `pandas.api.types.is_numeric_dtype`.
+ *
+ * @example
+ * ```ts
+ * isNumericDtype(Dtype.float64);  // true
+ * isNumericDtype("int32");        // true
+ * isNumericDtype("string");       // false
+ * ```
+ */
+export function isNumericDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isNumeric;
+}
+
+/**
+ * Return `true` if the dtype is any integer kind (signed or unsigned).
+ *
+ * Mirrors `pandas.api.types.is_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntegerDtype("int64");   // true
+ * isIntegerDtype("uint8");   // true
+ * isIntegerDtype("float32"); // false
+ * ```
+ */
+export function isIntegerDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isInteger;
+}
+
+/**
+ * Return `true` if the dtype is a signed integer (`int8`–`int64`).
+ *
+ * Mirrors `pandas.api.types.is_signed_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isSignedIntegerDtype("int32");  // true
+ * isSignedIntegerDtype("uint32"); // false
+ * ```
+ */
+export function isSignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isSignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is an unsigned integer (`uint8`–`uint64`).
+ *
+ * Mirrors `pandas.api.types.is_unsigned_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isUnsignedIntegerDtype("uint64"); // true
+ * isUnsignedIntegerDtype("int64");  // false
+ * ```
+ */
+export function isUnsignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isUnsignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is a floating-point type (`float32` or `float64`).
+ *
+ * Mirrors `pandas.api.types.is_float_dtype`.
+ *
+ * @example
+ * ```ts
+ * isFloatDtype("float64");  // true
+ * isFloatDtype("float32");  // true
+ * isFloatDtype("int32");    // false
+ * ```
+ */
+export function isFloatDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isFloat;
+}
+
+/**
+ * Return `true` if the dtype is boolean.
+ *
+ * Mirrors `pandas.api.types.is_bool_dtype`.
+ *
+ * @example
+ * ```ts
+ * isBoolDtype("bool");     // true
+ * isBoolDtype("int8");     // false
+ * ```
+ */
+export function isBoolDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isBool;
+}
+
+/**
+ * Return `true` if the dtype is the `string` dtype.
+ *
+ * Mirrors `pandas.api.types.is_string_dtype`.
+ *
+ * @example
+ * ```ts
+ * isStringDtype("string");   // true
+ * isStringDtype("object");   // false
+ * ```
+ */
+export function isStringDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isString;
+}
+
+/**
+ * Return `true` if the dtype is a datetime type.
+ *
+ * Mirrors `pandas.api.types.is_datetime64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isDatetimeDtype("datetime");  // true
+ * isDatetimeDtype("string");    // false
+ * ```
+ */
+export function isDatetimeDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is a timedelta type.
+ *
+ * Mirrors `pandas.api.types.is_timedelta64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isTimedeltaDtype("timedelta");  // true
+ * isTimedeltaDtype("datetime");   // false
+ * ```
+ */
+export function isTimedeltaDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isTimedelta;
+}
+
+/**
+ * Return `true` if the dtype is the categorical dtype.
+ *
+ * Mirrors `pandas.api.types.is_categorical_dtype`.
+ *
+ * @example
+ * ```ts
+ * isCategoricalDtype("category");  // true
+ * isCategoricalDtype("string");    // false
+ * ```
+ */
+export function isCategoricalDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isCategory;
+}
+
+/**
+ * Return `true` if the dtype is the object dtype.
+ *
+ * Mirrors `pandas.api.types.is_object_dtype`.
+ *
+ * @example
+ * ```ts
+ * isObjectDtype("object");   // true
+ * isObjectDtype("string");   // false
+ * ```
+ */
+export function isObjectDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isObject;
+}
+
+/**
+ * Return `true` if the dtype represents complex numbers.
+ *
+ * JavaScript has no native complex number type, so this always returns `false`
+ * (no complex dtype exists in the `tsb` dtype system).  Provided for API
+ * parity with `pandas.api.types.is_complex_dtype`.
+ *
+ * @example
+ * ```ts
+ * isComplexDtype("float64");  // false  (no complex dtype)
+ * ```
+ */
+export function isComplexDtype(_dtype: Dtype | DtypeName): boolean {
+  return false;
+}
+
+/**
+ * Return `true` if the dtype is an "extension array" dtype — i.e. any dtype
+ * beyond the numeric primitives: `string`, `object`, `datetime`, `timedelta`,
+ * `category`.
+ *
+ * Mirrors `pandas.api.types.is_extension_array_dtype`.
+ *
+ * @example
+ * ```ts
+ * isExtensionArrayDtype("category");  // true
+ * isExtensionArrayDtype("datetime");  // true
+ * isExtensionArrayDtype("int64");     // false
+ * ```
+ */
+export function isExtensionArrayDtype(dtype: Dtype | DtypeName): boolean {
+  const d = resolveDtype(dtype);
+  return d.isString || d.isObject || d.isDatetime || d.isTimedelta || d.isCategory;
+}
+
+/**
+ * Return `true` if the dtype can hold period (date period) data.
+ * In the current `tsb` dtype system this maps to the `datetime` kind.
+ *
+ * Mirrors `pandas.api.types.is_period_dtype`.
+ *
+ * @example
+ * ```ts
+ * isPeriodDtype("datetime");  // true
+ * isPeriodDtype("float64");   // false
+ * ```
+ */
+export function isPeriodDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is suitable for interval data — float or integer.
+ *
+ * Mirrors `pandas.api.types.is_interval_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntervalDtype("float64");  // true
+ * isIntervalDtype("int32");    // true
+ * isIntervalDtype("string");   // false
+ * ```
+ */
+export function isIntervalDtype(dtype: Dtype | DtypeName): boolean {
+  return resolveDtype(dtype).isNumeric;
+}
diff --git a/src/core/attrs.ts b/src/core/attrs.ts
new file mode 100644
index 00000000..81c6be1c
--- /dev/null
+++ b/src/core/attrs.ts
@@ -0,0 +1,291 @@
+/**
+ * attrs — user-defined metadata dictionary for Series and DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.attrs` / `pandas.Series.attrs`: an arbitrary
+ * key→value dictionary that travels with a data object and lets callers
+ * annotate it with provenance, units, descriptions, or any other metadata.
+ *
+ * Because the tsb Series and DataFrame classes are immutable by design, this
+ * module maintains a **WeakMap registry** that maps each object to its attrs
+ * record.  The registry entries are garbage-collected automatically when the
+ * object itself is collected — there is no memory leak.
+ *
+ * ### Public surface
+ *
+ * ```ts
+ * import { getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, clearAttrs,
+ *           hasAttrs } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ x: [1, 2, 3] });
+ *
+ * // Annotate
+ * setAttrs(df, { source: "sensor_A", unit: "metres" });
+ * getAttrs(df);  // { source: "sensor_A", unit: "metres" }
+ *
+ * // Merge additional keys
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df);  // { source: "sensor_A", unit: "metres", version: 2 }
+ *
+ * // Fluent helper — sets attrs and returns the same object
+ * const annotated = withAttrs(df, { source: "sensor_B" });
+ * annotated === df; // true — same reference
+ *
+ * // Propagate to a derived object
+ * const df2 = DataFrame.fromColumns({ y: [4, 5, 6] });
+ * copyAttrs(df, df2);
+ * getAttrs(df2);  // { source: "sensor_A", unit: "metres", version: 2 }
+ * ```
+ *
+ * @module
+ */
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/**
+ * The attrs dictionary type.  Keys are strings; values may be any JSON-safe
+ * primitive or nested structure.  Mirrors the `dict` type of `pandas.attrs`.
+ */
+export type Attrs = Record<string, unknown>;
+
+// ─── registry ─────────────────────────────────────────────────────────────────
+
+/** Internal WeakMap from any object to its attrs record. */
+const registry = new WeakMap<object, Attrs>();
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Retrieve the attrs dictionary for `obj`.
+ *
+ * Returns a **shallow copy** so callers cannot mutate the stored record
+ * accidentally.  If no attrs have been set, returns an empty object `{}`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3] });
+ * setAttrs(s, { unit: "kg" });
+ * getAttrs(s); // { unit: "kg" }
+ * ```
+ */
+export function getAttrs(obj: object): Attrs {
+  const stored = registry.get(obj);
+  return stored !== undefined ? { ...stored } : {};
+}
+
+/**
+ * **Overwrite** the attrs dictionary for `obj` with `attrs`.
+ *
+ * Any previously stored attrs are discarded.  Stores a shallow copy so
+ * subsequent mutations to the passed-in object do not affect the stored value.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "sensor_A" });
+ * getAttrs(df); // { source: "sensor_A" }
+ * ```
+ */
+export function setAttrs(obj: object, attrs: Attrs): void {
+  registry.set(obj, { ...attrs });
+}
+
+/**
+ * **Merge** `updates` into the existing attrs for `obj`.
+ *
+ * Existing keys that are not present in `updates` are preserved.  Keys that
+ * are present in both `updates` and the existing attrs are overwritten.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "A" });
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df); // { source: "A", version: 2 }
+ * ```
+ */
+export function updateAttrs(obj: object, updates: Attrs): void {
+  const existing = registry.get(obj) ?? {};
+  registry.set(obj, { ...existing, ...updates });
+}
+
+/**
+ * **Copy** the attrs from `source` to `target`, overwriting any existing attrs
+ * on `target`.
+ *
+ * Useful for propagating metadata from an input to a derived result.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df1, { source: "sensor_A" });
+ * const df2 = df1.head(5);
+ * copyAttrs(df1, df2);
+ * getAttrs(df2); // { source: "sensor_A" }
+ * ```
+ */
+export function copyAttrs(source: object, target: object): void {
+  const stored = registry.get(source);
+  if (stored !== undefined) {
+    registry.set(target, { ...stored });
+  } else {
+    registry.delete(target);
+  }
+}
+
+/**
+ * **Fluent helper** — set attrs on `obj` and return the same object.
+ *
+ * This **replaces** any previously stored attrs (same semantics as
+ * {@link setAttrs}).  The return type is `T` so callers do not lose the
+ * concrete type of their object.
+ *
+ * @example
+ * ```ts
+ * const annotated = withAttrs(df, { source: "sensor_A", unit: "metres" });
+ * annotated === df; // true — same reference
+ * getAttrs(annotated); // { source: "sensor_A", unit: "metres" }
+ * ```
+ */
+export function withAttrs<T extends object>(obj: T, attrs: Attrs): T {
+  registry.set(obj, { ...attrs });
+  return obj;
+}
+
+/**
+ * **Remove** all attrs from `obj`.
+ *
+ * After calling this, {@link getAttrs} returns `{}` and {@link hasAttrs}
+ * returns `false`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "A" });
+ * clearAttrs(df);
+ * hasAttrs(df); // false
+ * getAttrs(df); // {}
+ * ```
+ */
+export function clearAttrs(obj: object): void {
+  registry.delete(obj);
+}
+
+/**
+ * Returns `true` if `obj` has any attrs set, `false` otherwise.
+ *
+ * @example
+ * ```ts
+ * hasAttrs(df); // false
+ * setAttrs(df, { x: 1 });
+ * hasAttrs(df); // true
+ * clearAttrs(df);
+ * hasAttrs(df); // false
+ * ```
+ */
+export function hasAttrs(obj: object): boolean {
+  return registry.has(obj);
+}
+
+/**
+ * Retrieve a **single** attrs value by key.
+ *
+ * Returns `undefined` if the key does not exist (or no attrs are set).
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { unit: "kg" });
+ * getAttr(df, "unit");    // "kg"
+ * getAttr(df, "missing"); // undefined
+ * ```
+ */
+export function getAttr(obj: object, key: string): unknown {
+  return registry.get(obj)?.[key];
+}
+
+/**
+ * Set a **single** attrs key on `obj`, preserving all other existing attrs.
+ *
+ * @example
+ * ```ts
+ * setAttr(df, "unit", "kg");
+ * setAttr(df, "source", "lab");
+ * getAttrs(df); // { unit: "kg", source: "lab" }
+ * ```
+ */
+export function setAttr(obj: object, key: string, value: unknown): void {
+  const existing = registry.get(obj) ?? {};
+  registry.set(obj, { ...existing, [key]: value });
+}
+
+/**
+ * Delete a **single** attrs key from `obj`, preserving all other keys.
+ *
+ * Does nothing if the key does not exist.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { a: 1, b: 2 });
+ * deleteAttr(df, "a");
+ * getAttrs(df); // { b: 2 }
+ * ```
+ */
+export function deleteAttr(obj: object, key: string): void {
+  const existing = registry.get(obj);
+  if (existing === undefined) return;
+  const { [key]: _removed, ...rest } = existing;
+  if (Object.keys(rest).length === 0) {
+    registry.delete(obj);
+  } else {
+    registry.set(obj, rest);
+  }
+}
+
+/**
+ * Return the number of attrs keys stored on `obj`.
+ *
+ * @example
+ * ```ts
+ * attrsCount(df);              // 0
+ * setAttrs(df, { a: 1, b: 2 });
+ * attrsCount(df);              // 2
+ * ```
+ */
+export function attrsCount(obj: object): number {
+  return Object.keys(registry.get(obj) ?? {}).length;
+}
+
+/**
+ * Return the list of attrs keys stored on `obj`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { a: 1, b: 2 });
+ * attrsKeys(df); // ["a", "b"]
+ * ```
+ */
+export function attrsKeys(obj: object): string[] {
+  return Object.keys(registry.get(obj) ?? {});
+}
+
+/**
+ * Merge attrs from multiple source objects into a single target object.
+ *
+ * Sources are applied left-to-right; later sources overwrite earlier ones on
+ * key conflicts.  Overwrites any existing attrs on `target`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(s1, { source: "A", unit: "kg" });
+ * setAttrs(s2, { source: "B", scale: 2 });
+ * mergeAttrs([s1, s2], df);
+ * getAttrs(df); // { source: "B", unit: "kg", scale: 2 }
+ * ```
+ */
+export function mergeAttrs(sources: readonly object[], target: object): void {
+  const merged: Attrs = {};
+  for (const src of sources) {
+    const stored = registry.get(src);
+    if (stored !== undefined) {
+      Object.assign(merged, stored);
+    }
+  }
+  if (Object.keys(merged).length > 0) {
+    registry.set(target, merged);
+  }
+}
diff --git a/src/core/index.ts b/src/core/index.ts
index ada43b65..08713cae 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -15,3 +15,71 @@ export { CategoricalAccessor } from "./cat_accessor.ts";
 export type { CatSeriesLike } from "./cat_accessor.ts";
 export { MultiIndex } from "./multi_index.ts";
 export type { MultiIndexOptions } from "./multi_index.ts";
+export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./insert_pop.ts";
+export type { PopResult } from "./insert_pop.ts";
+export { toDictOriented, fromDictOriented } from "./to_from_dict.ts";
+export type {
+  ToDictOrient,
+  FromDictOrient,
+  DictSplit,
+  DictTight,
+  SplitInput,
+} from "./to_from_dict.ts";
+export {
+  getAttrs,
+  setAttrs,
+  updateAttrs,
+  copyAttrs,
+  withAttrs,
+  clearAttrs,
+  hasAttrs,
+  getAttr,
+  setAttr,
+  deleteAttr,
+  attrsCount,
+  attrsKeys,
+  mergeAttrs,
+} from "./attrs.ts";
+export type { Attrs } from "./attrs.ts";
+export {
+  pipe,
+  seriesApply,
+  seriesTransform,
+  dataFrameApply,
+  dataFrameApplyMap,
+  dataFrameTransform,
+  dataFrameTransformRows,
+} from "./pipe_apply.ts";
+export {
+  isScalar,
+  isListLike,
+  isArrayLike,
+  isDictLike,
+  isIterator,
+  isNumber,
+  isBool,
+  isStringValue,
+  isFloat,
+  isInteger,
+  isBigInt,
+  isRegExp,
+  isReCompilable,
+  isMissing,
+  isHashable,
+  isDate,
+  isNumericDtype,
+  isIntegerDtype,
+  isSignedIntegerDtype,
+  isUnsignedIntegerDtype,
+  isFloatDtype,
+  isBoolDtype,
+  isStringDtype,
+  isDatetimeDtype,
+  isTimedeltaDtype,
+  isCategoricalDtype,
+  isObjectDtype,
+  isComplexDtype,
+  isExtensionArrayDtype,
+  isPeriodDtype,
+  isIntervalDtype,
+} from "./api_types.ts";
diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts
new file mode 100644
index 00000000..d56c42bc
--- /dev/null
+++ b/src/core/insert_pop.ts
@@ -0,0 +1,214 @@
+/**
+ * DataFrame.insert() and DataFrame.pop() — column insertion and removal.
+ *
+ * Mirrors `pandas.DataFrame.insert(loc, column, value)` and
+ * `pandas.DataFrame.pop(item)`.
+ *
+ * Since `DataFrame` in tsb is immutable, both operations return a new DataFrame.
+ * `popColumn` returns both the extracted `Series` and the resulting DataFrame.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, insertColumn, popColumn } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ *
+ * // Insert column "x" at position 1 (between "a" and "b")
+ * const df2 = insertColumn(df, 1, "x", [10, 20]);
+ * // df2.columns.values → ["a", "x", "b"]
+ *
+ * // Pop column "a" out of df2
+ * const { series, df: df3 } = popColumn(df2, "a");
+ * // series.values → [1, 2]
+ * // df3.columns.values → ["x", "b"]
+ * ```
+ *
+ * @packageDocumentation
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── insertColumn ─────────────────────────────────────────────────────────────
+
+/**
+ * Insert a new column into `df` at integer column position `loc`.
+ *
+ * Mirrors `pandas.DataFrame.insert(loc, column, value, allow_duplicates=False)`.
+ * Raises a `RangeError` if:
+ * - `column` already exists in `df` (no duplicates by default)
+ * - `loc` is out of range (must be 0 ≤ loc ≤ df.shape[1])
+ * - `values` length does not match the number of rows
+ *
+ * @param df             Source DataFrame (not mutated).
+ * @param loc            Zero-based integer position at which to insert the column.
+ * @param column         Name of the new column.
+ * @param values         Column data as an array of scalars or a `Series<Scalar>`.
+ * @param allowDuplicates When `true`, silently allow duplicate column names. Default `false`.
+ * @returns A new DataFrame with the column inserted.
+ */
+export function insertColumn(
+  df: DataFrame,
+  loc: number,
+  column: string,
+  values: readonly Scalar[] | Series<Scalar>,
+  allowDuplicates = false,
+): DataFrame {
+  const nCols = df.shape[1];
+  const nRows = df.shape[0];
+
+  if (!allowDuplicates && df.has(column)) {
+    throw new RangeError(
+      `Column "${column}" already exists. Use allowDuplicates=true to permit duplicate names.`,
+    );
+  }
+
+  if (loc < 0 || loc > nCols) {
+    throw new RangeError(`loc=${loc} is out of range [0, ${nCols}].`);
+  }
+
+  // Resolve values to a Series aligned to df's row index.
+  const series: Series<Scalar> =
+    values instanceof Series
+      ? values
+      : new Series<Scalar>({ data: values, index: df.index, name: column });
+
+  if (series.size !== nRows) {
+    throw new RangeError(
+      `values length ${series.size} does not match DataFrame row count ${nRows}.`,
+    );
+  }
+
+  // Rebuild the column map, inserting the new column at position `loc`.
+  const colMap = new Map<string, Series<Scalar>>();
+  let idx = 0;
+
+  for (const colName of df.columns.values) {
+    if (idx === loc) {
+      colMap.set(column, series);
+    }
+    colMap.set(colName, df.col(colName));
+    idx++;
+  }
+
+  // Handle insertion at the end (loc === nCols).
+  if (loc === nCols) {
+    colMap.set(column, series);
+  }
+
+  return new DataFrame(colMap, df.index);
+}
+
+// ─── popColumn ────────────────────────────────────────────────────────────────
+
+/** Return type of {@link popColumn}. */
+export interface PopResult {
+  /** The extracted column as a Series. */
+  readonly series: Series<Scalar>;
+  /** The DataFrame with the column removed. */
+  readonly df: DataFrame;
+}
+
+/**
+ * Remove a column from `df` and return both the extracted `Series` and the
+ * resulting DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.pop(item)`, but because tsb DataFrames are
+ * immutable this function returns the removed Series *and* the new DataFrame
+ * (rather than mutating in place).
+ *
+ * Raises a `RangeError` if `col` does not exist in `df`.
+ *
+ * @param df  Source DataFrame (not mutated).
+ * @param col Name of the column to remove.
+ * @returns   `{ series, df }` — the extracted column and the remaining DataFrame.
+ *
+ * @example
+ * ```ts
+ * const { series, df: remaining } = popColumn(df, "age");
+ * // series contains the "age" column; remaining has all other columns
+ * ```
+ */
+export function popColumn(df: DataFrame, col: string): PopResult {
+  const series = df.get(col);
+  if (series === undefined) {
+    throw new RangeError(`Column "${col}" not found in DataFrame.`);
+  }
+
+  const colMap = new Map<string, Series<Scalar>>();
+  for (const colName of df.columns.values) {
+    if (colName !== col) {
+      colMap.set(colName, df.col(colName));
+    }
+  }
+
+  return {
+    series,
+    df: new DataFrame(colMap, df.index),
+  };
+}
+
+// ─── reorderColumns ──────────────────────────────────────────────────────────
+
+/**
+ * Reorder the columns of `df` to match `order`.
+ *
+ * Mirrors `df[order]` in pandas.  All names in `order` must be present in `df`;
+ * extra names in `df` not listed in `order` are dropped.
+ *
+ * @param df    Source DataFrame.
+ * @param order New column order (subset of `df.columns.values`).
+ * @returns     A new DataFrame with columns in the specified order.
+ */
+export function reorderColumns(df: DataFrame, order: readonly string[]): DataFrame {
+  const colMap = new Map<string, Series<Scalar>>();
+  for (const name of order) {
+    const s = df.get(name);
+    if (s === undefined) {
+      throw new RangeError(`Column "${name}" not found in DataFrame.`);
+    }
+    colMap.set(name, s);
+  }
+  return new DataFrame(colMap, df.index);
+}
+
+// ─── moveColumn ──────────────────────────────────────────────────────────────
+
+/**
+ * Move an existing column to a new integer position.
+ *
+ * This is a convenience wrapper combining {@link popColumn} and
+ * {@link insertColumn}: it removes the column from its current position and
+ * re-inserts it at `newLoc` in the resulting DataFrame.
+ *
+ * @param df     Source DataFrame.
+ * @param col    Name of the column to move.
+ * @param newLoc Target position (0 ≤ newLoc ≤ df.shape[1] − 1).
+ * @returns      A new DataFrame with the column at the new position.
+ */
+export function moveColumn(df: DataFrame, col: string, newLoc: number): DataFrame {
+  const { series, df: without } = popColumn(df, col);
+  return insertColumn(without, newLoc, col, series);
+}
+
+// ─── internal re-export helper (used by DataFrame constructor access) ─────────
+
+/**
+ * Build a new DataFrame from an ordered iterable of `[name, Series]` pairs and
+ * a row index.  Exported for use by other tsb modules that need to construct
+ * DataFrames without going through the public factory methods.
+ *
+ * @internal
+ */
+export function dataFrameFromPairs(
+  pairs: Iterable<readonly [string, Series<Scalar>]>,
+  index: Index<Label>,
+): DataFrame {
+  const colMap = new Map<string, Series<Scalar>>();
+  for (const [name, series] of pairs) {
+    colMap.set(name, series);
+  }
+  return new DataFrame(colMap, index);
+}
diff --git a/src/core/pipe_apply.ts b/src/core/pipe_apply.ts
new file mode 100644
index 00000000..2f0b0180
--- /dev/null
+++ b/src/core/pipe_apply.ts
@@ -0,0 +1,303 @@
+/**
+ * pipe_apply — functional pipeline and apply utilities for Series and DataFrame.
+ *
+ * Provides standalone equivalents of the pandas `.pipe()` chaining pattern and
+ * various `.apply()` / `applymap()` operations, usable without method-call syntax.
+ *
+ * | Function             | Pandas equivalent                         |
+ * |----------------------|-------------------------------------------|
+ * | `pipe`               | `df.pipe(fn)` / `s.pipe(fn)` chained      |
+ * | `seriesApply`        | `s.apply(fn)`                             |
+ * | `seriesTransform`    | `s.transform(fn)` (scalar→scalar variant) |
+ * | `dataFrameApply`     | `df.apply(fn, axis=0\|1)`                 |
+ * | `dataFrameApplyMap`  | `df.applymap(fn)` / `df.map(fn)` (≥2.1)  |
+ * | `dataFrameTransform` | `df.transform(fn)`                        |
+ *
+ * All functions are **pure** — inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── pipe ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Pass `value` through a sequence of unary functions left-to-right.
+ *
+ * Each function receives the output of the previous one.  The overloads
+ * preserve precise return types at each step up to 8 functions deep; beyond
+ * that the return type widens to `unknown`.
+ *
+ * ```ts
+ * const result = pipe(
+ *   df,
+ *   (d) => d.dropna(),
+ *   (d) => d.assign({ z: d.col("x").add(d.col("y")).values }),
+ *   (d) => d.head(10),
+ * );
+ * ```
+ *
+ * Mirrors `pandas.DataFrame.pipe(fn)` / `pandas.Series.pipe(fn)` chaining,
+ * but works on **any** value — not just DataFrames and Series.
+ */
+export function pipe<A>(value: A): A;
+export function pipe<A, B>(value: A, fn1: (a: A) => B): B;
+export function pipe<A, B, C>(value: A, fn1: (a: A) => B, fn2: (b: B) => C): C;
+export function pipe<A, B, C, D>(
+  value: A,
+  fn1: (a: A) => B,
+  fn2: (b: B) => C,
+  fn3: (c: C) => D,
+): D;
+export function pipe<A, B, C, D, E>(
+  value: A,
+  fn1: (a: A) => B,
+  fn2: (b: B) => C,
+  fn3: (c: C) => D,
+  fn4: (d: D) => E,
+): E;
+export function pipe<A, B, C, D, E, F>(
+  value: A,
+  fn1: (a: A) => B,
+  fn2: (b: B) => C,
+  fn3: (c: C) => D,
+  fn4: (d: D) => E,
+  fn5: (e: E) => F,
+): F;
+export function pipe<A, B, C, D, E, F, G>(
+  value: A,
+  fn1: (a: A) => B,
+  fn2: (b: B) => C,
+  fn3: (c: C) => D,
+  fn4: (d: D) => E,
+  fn5: (e: E) => F,
+  fn6: (f: F) => G,
+): G;
+export function pipe<A, B, C, D, E, F, G, H>(
+  value: A,
+  fn1: (a: A) => B,
+  fn2: (b: B) => C,
+  fn3: (c: C) => D,
+  fn4: (d: D) => E,
+  fn5: (e: E) => F,
+  fn6: (f: F) => G,
+  fn7: (g: G) => H,
+): H;
+// Implementation (untyped fallback for arbitrarily long pipelines)
+export function pipe(value: unknown, ...fns: ReadonlyArray<(x: unknown) => unknown>): unknown {
+  let acc = value;
+  for (const fn of fns) {
+    acc = fn(acc);
+  }
+  return acc;
+}
+
+// ─── Series apply ─────────────────────────────────────────────────────────────
+
+/**
+ * Apply `fn` to every element of `series`, returning a new `Series<Scalar>`.
+ *
+ * `fn` receives `(value, label, position)`:
+ * - `value`    — the raw element
+ * - `label`    — the index label at this position
+ * - `position` — the zero-based integer position
+ *
+ * Mirrors `pandas.Series.apply(func)`.
+ *
+ * ```ts
+ * const doubled = seriesApply(s, (v) => typeof v === "number" ? v * 2 : v);
+ * ```
+ */
+export function seriesApply(
+  series: Series<Scalar>,
+  fn: (value: Scalar, label: Label, position: number) => Scalar,
+): Series<Scalar> {
+  const n = series.size;
+  const out: Scalar[] = new Array<Scalar>(n);
+  for (let i = 0; i < n; i++) {
+    out[i] = fn(series.iat(i), series.index.at(i), i);
+  }
+  return new Series({ data: out, index: series.index, ...(series.name !== null ? { name: series.name } : {}) });
+}
+
+/**
+ * Apply a scalar-to-scalar `fn` to every element and return a new Series.
+ *
+ * Unlike {@link seriesApply}, `fn` only receives the value — no label or
+ * position.  This matches the most common pandas `s.apply(lambda x: …)` usage.
+ *
+ * ```ts
+ * const capped = seriesTransform(s, (v) => typeof v === "number" ? Math.min(v, 100) : v);
+ * ```
+ */
+export function seriesTransform(
+  series: Series<Scalar>,
+  fn: (value: Scalar) => Scalar,
+): Series<Scalar> {
+  const n = series.size;
+  const out: Scalar[] = new Array<Scalar>(n);
+  for (let i = 0; i < n; i++) {
+    out[i] = fn(series.iat(i));
+  }
+  return new Series({ data: out, index: series.index, ...(series.name !== null ? { name: series.name } : {}) });
+}
+
+// ─── DataFrame apply ──────────────────────────────────────────────────────────
+
+/**
+ * Apply `fn` to each column or row of `df`, aggregating to a scalar per column/row.
+ *
+ * - **axis = 0** (default): `fn` receives each column `Series`; results are
+ *   indexed by column names, mirroring `df.apply(fn, axis=0)`.
+ * - **axis = 1**: `fn` receives each row as a `Series` indexed by column names;
+ *   results are indexed by the DataFrame's row labels, mirroring `df.apply(fn, axis=1)`.
+ *
+ * ```ts
+ * // max value in each column
+ * const colMax = dataFrameApply(df, (s) => s.max() ?? null);
+ *
+ * // sum across each row
+ * const rowSums = dataFrameApply(df, (s) => s.sum(), 1);
+ * ```
+ */
+export function dataFrameApply(
+  df: DataFrame,
+  fn: (s: Series<Scalar>, label: string | Label) => Scalar,
+  axis: 0 | 1 = 0,
+): Series<Scalar> {
+  if (axis === 0) {
+    const colNames = df.columns.values as readonly string[];
+    const out: Scalar[] = colNames.map((name) => fn(df.col(name), name));
+    return new Series({ data: out, index: df.columns });
+  }
+  // axis === 1
+  const colNames = df.columns.values as readonly string[];
+  const n = df.index.size;
+  const out: Scalar[] = new Array<Scalar>(n);
+  for (let i = 0; i < n; i++) {
+    const rowData: Scalar[] = colNames.map((name) => df.col(name).iat(i));
+    const rowSeries = new Series<Scalar>({ data: rowData, index: [...colNames] });
+    out[i] = fn(rowSeries, df.index.at(i));
+  }
+  return new Series({ data: out, index: df.index });
+}
+
+/**
+ * Apply `fn` to every element of `df`, returning a new DataFrame with the same
+ * shape (same index, same columns).
+ *
+ * `fn` receives `(value, rowLabel, columnName)`:
+ * - `value`      — the scalar at `(row, col)`
+ * - `rowLabel`   — the row index label
+ * - `columnName` — the column name
+ *
+ * Mirrors `pandas.DataFrame.applymap(fn)` (renamed `DataFrame.map` in pandas 2.1).
+ *
+ * ```ts
+ * // zero-out negatives
+ * const clamped = dataFrameApplyMap(df, (v) => typeof v === "number" && v < 0 ? 0 : v);
+ * ```
+ */
+export function dataFrameApplyMap(
+  df: DataFrame,
+  fn: (value: Scalar, rowLabel: Label, colName: string) => Scalar,
+): DataFrame {
+  const colNames = df.columns.values as readonly string[];
+  const rowLabels = df.index.values as readonly Label[];
+  const n = rowLabels.length;
+  const newData: Record<string, readonly Scalar[]> = {};
+  for (const colName of colNames) {
+    const col = df.col(colName);
+    const out: Scalar[] = new Array<Scalar>(n);
+    for (let i = 0; i < n; i++) {
+      out[i] = fn(col.iat(i), rowLabels[i] as Label, colName);
+    }
+    newData[colName] = out;
+  }
+  return DataFrame.fromColumns(newData, { index: df.index });
+}
+
+/**
+ * Apply `fn` to each column of `df`, replacing each column with the transformed
+ * Series.  Returns a new DataFrame with the same index and column names.
+ *
+ * `fn` receives a column `Series<Scalar>` and must return a `Series<Scalar>` of
+ * the **same length**.
+ *
+ * Mirrors `pandas.DataFrame.transform(fn)` (column-wise variant).
+ *
+ * ```ts
+ * // z-score normalise every column
+ * const normed = dataFrameTransform(df, (col) => {
+ *   const mu = col.mean();
+ *   const sd = col.std();
+ *   return seriesTransform(col, (v) => typeof v === "number" ? (v - mu) / sd : v);
+ * });
+ * ```
+ */
+export function dataFrameTransform(
+  df: DataFrame,
+  fn: (col: Series<Scalar>, colName: string) => Series<Scalar>,
+): DataFrame {
+  const colNames = df.columns.values as readonly string[];
+  const newData: Record<string, readonly Scalar[]> = {};
+  for (const colName of colNames) {
+    const transformed = fn(df.col(colName), colName);
+    if (transformed.size !== df.index.size) {
+      throw new RangeError(
+        `dataFrameTransform: column "${colName}" — transform returned ${transformed.size} rows, expected ${df.index.size}`,
+      );
+    }
+    newData[colName] = transformed.values;
+  }
+  return DataFrame.fromColumns(newData, { index: df.index });
+}
+
+/**
+ * Apply `fn` to each row of `df`, replacing each row with the transformed
+ * record.  Returns a new DataFrame with the same index and column names.
+ *
+ * `fn` receives an object `{ colName: value, … }` for the row and must return
+ * a partial or full record of the same columns.  Missing keys keep their
+ * original value; extra keys are ignored.
+ *
+ * ```ts
+ * // negate every value in each row
+ * const neg = dataFrameTransformRows(df, (row) =>
+ *   Object.fromEntries(Object.entries(row).map(([k, v]) => [k, typeof v === "number" ? -v : v]))
+ * );
+ * ```
+ */
+export function dataFrameTransformRows(
+  df: DataFrame,
+  fn: (row: Readonly<Record<string, Scalar>>, rowLabel: Label, position: number) => Readonly<Record<string, Scalar>>,
+): DataFrame {
+  const colNames = df.columns.values as readonly string[];
+  const rowLabels = df.index.values as readonly Label[];
+  const n = rowLabels.length;
+  // build output arrays per column
+  const colArrays = new Map<string, Scalar[]>();
+  for (const c of colNames) {
+    colArrays.set(c, new Array<Scalar>(n));
+  }
+  for (let i = 0; i < n; i++) {
+    const rowIn: Record<string, Scalar> = {};
+    for (const c of colNames) {
+      rowIn[c] = df.col(c).iat(i);
+    }
+    const rowOut = fn(rowIn, rowLabels[i] as Label, i);
+    for (const c of colNames) {
+      const colArr = colArrays.get(c);
+      if (colArr === undefined) continue;
+      // use the transformed value if present, else keep original
+      colArr[i] = c in rowOut ? (rowOut[c] as Scalar) : rowIn[c];
+    }
+  }
+  const newData: Record<string, readonly Scalar[]> = {};
+  for (const c of colNames) {
+    newData[c] = colArrays.get(c) as Scalar[];
+  }
+  return DataFrame.fromColumns(newData, { index: df.index });
+}
diff --git a/src/core/to_from_dict.ts b/src/core/to_from_dict.ts
new file mode 100644
index 00000000..975a7fc5
--- /dev/null
+++ b/src/core/to_from_dict.ts
@@ -0,0 +1,283 @@
+/**
+ * to_from_dict — DataFrame ↔ dictionary conversions with orient support.
+ *
+ * Mirrors `pandas.DataFrame.to_dict(orient=...)` and
+ * `pandas.DataFrame.from_dict(data, orient=...)`.
+ *
+ * Supported `orient` values for {@link toDictOriented}:
+ * - `"dict"` / `"columns"` — `{col: {rowLabel: value}}`
+ * - `"list"`               — `{col: [values]}`
+ * - `"series"`             — `{col: Series<Scalar>}`
+ * - `"split"`              — `{index, columns, data}`
+ * - `"tight"`              — like `"split"` plus `index_names` and `column_names`
+ * - `"records"`            — `[{col: value, ...}, ...]`
+ * - `"index"`              — `{rowLabel: {col: value}}`
+ *
+ * Supported `orient` values for {@link fromDictOriented}:
+ * - `"columns"` — `{col: [values]}` (default)
+ * - `"index"`   — `{rowLabel: {col: value}}`
+ * - `"split"`   — `{index?, columns, data}`
+ * - `"tight"`   — `{index?, columns, data, index_names?, column_names?}`
+ *
+ * @module
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Orient values supported by {@link toDictOriented}. */
+export type ToDictOrient = "dict" | "columns" | "list" | "series" | "split" | "tight" | "records" | "index";
+
+/** Orient values supported by {@link fromDictOriented}. */
+export type FromDictOrient = "columns" | "index" | "split" | "tight";
+
+/** Result shape for `orient = "split"`. */
+export interface DictSplit {
+  readonly index: Label[];
+  readonly columns: string[];
+  readonly data: Scalar[][];
+}
+
+/** Result shape for `orient = "tight"`. */
+export interface DictTight extends DictSplit {
+  readonly index_names: (string | null)[];
+  readonly column_names: (string | null)[];
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a row label to a string key. */
+function labelKey(label: Label): string {
+  if (label === null || label === undefined) {
+    return "null";
+  }
+  return String(label);
+}
+
+/** True when an array of labels is the default 0…n-1 RangeIndex. */
+function isDefaultRange(labels: readonly Label[]): boolean {
+  for (let i = 0; i < labels.length; i++) {
+    if (labels[i] !== i) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// ─── toDictOriented ───────────────────────────────────────────────────────────
+
+/**
+ * Convert a DataFrame to a dictionary using the given `orient`.
+ *
+ * Mirrors `pandas.DataFrame.to_dict(orient, ...)`.
+ *
+ * @param df     Source DataFrame.
+ * @param orient Output structure. Defaults to `"dict"`.
+ */
+export function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record<string, Record<string, Scalar>>;
+export function toDictOriented(df: DataFrame, orient: "list"): Record<string, Scalar[]>;
+export function toDictOriented(df: DataFrame, orient: "series"): Record<string, Series<Scalar>>;
+export function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+export function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+export function toDictOriented(df: DataFrame, orient: "records"): Record<string, Scalar>[];
+export function toDictOriented(df: DataFrame, orient: "index"): Record<string, Record<string, Scalar>>;
+export function toDictOriented(
+  df: DataFrame,
+  orient: ToDictOrient = "dict",
+): Record<string, unknown> | unknown[] {
+  const colNames = [...df.columns.values];
+  const rowLabels = [...(df.index.values as Label[])];
+  const nRows = df.index.size;
+
+  switch (orient) {
+    case "dict":
+    case "columns": {
+      const result: Record<string, Record<string, Scalar>> = {};
+      for (const col of colNames) {
+        const series = df.col(col);
+        const colObj: Record<string, Scalar> = {};
+        for (let i = 0; i < nRows; i++) {
+          const lbl = rowLabels[i];
+          const key = labelKey(lbl !== undefined ? lbl : null);
+          colObj[key] = (series.values[i] ?? null) as Scalar;
+        }
+        result[col] = colObj;
+      }
+      return result;
+    }
+
+    case "list": {
+      const result: Record<string, Scalar[]> = {};
+      for (const col of colNames) {
+        result[col] = [...(df.col(col).values as readonly Scalar[])];
+      }
+      return result;
+    }
+
+    case "series": {
+      const result: Record<string, Series<Scalar>> = {};
+      for (const col of colNames) {
+        result[col] = df.col(col);
+      }
+      return result;
+    }
+
+    case "split": {
+      const data: Scalar[][] = [];
+      for (let i = 0; i < nRows; i++) {
+        const row: Scalar[] = colNames.map((col) => (df.col(col).values[i] ?? null) as Scalar);
+        data.push(row);
+      }
+      return { index: rowLabels, columns: colNames, data } satisfies DictSplit;
+    }
+
+    case "tight": {
+      const data: Scalar[][] = [];
+      for (let i = 0; i < nRows; i++) {
+        const row: Scalar[] = colNames.map((col) => (df.col(col).values[i] ?? null) as Scalar);
+        data.push(row);
+      }
+      return {
+        index: rowLabels,
+        columns: colNames,
+        data,
+        index_names: [null],
+        column_names: [null],
+      } satisfies DictTight;
+    }
+
+    case "records": {
+      return df.toRecords();
+    }
+
+    case "index": {
+      const result: Record<string, Record<string, Scalar>> = {};
+      for (let i = 0; i < nRows; i++) {
+        const lbl = rowLabels[i];
+        const key = labelKey(lbl !== undefined ? lbl : null);
+        const rowObj: Record<string, Scalar> = {};
+        for (const col of colNames) {
+          rowObj[col] = (df.col(col).values[i] ?? null) as Scalar;
+        }
+        result[key] = rowObj;
+      }
+      return result;
+    }
+
+    default: {
+      const exhaustive: never = orient;
+      throw new RangeError(`Unknown orient: ${String(exhaustive)}`);
+    }
+  }
+}
+
+// ─── fromDictOriented ─────────────────────────────────────────────────────────
+
+/** Input type for `orient = "split"` / `"tight"`. */
+export interface SplitInput {
+  readonly index?: readonly Label[];
+  readonly columns: readonly string[];
+  readonly data: readonly (readonly Scalar[])[];
+}
+
+/**
+ * Construct a DataFrame from a dictionary using the given `orient`.
+ *
+ * Mirrors `pandas.DataFrame.from_dict(data, orient=...)`.
+ *
+ * @param data   Input dictionary (shape depends on `orient`).
+ * @param orient How `data` is structured. Defaults to `"columns"`.
+ */
+export function fromDictOriented(
+  data: Readonly<Record<string, readonly Scalar[]>>,
+  orient?: "columns",
+): DataFrame;
+export function fromDictOriented(
+  data: Readonly<Record<string, Readonly<Record<string, Scalar>>>>,
+  orient: "index",
+): DataFrame;
+export function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+export function fromDictOriented(
+  data: unknown,
+  orient: FromDictOrient = "columns",
+): DataFrame {
+  switch (orient) {
+    case "columns": {
+      const colsData = data as Record<string, readonly Scalar[]>;
+      return DataFrame.fromColumns(colsData as Record<string, readonly Scalar[]>);
+    }
+
+    case "index": {
+      const indexData = data as Record<string, Record<string, Scalar>>;
+      const rowLabels = Object.keys(indexData);
+      // Collect all column names in insertion order
+      const colSet = new Map<string, null>();
+      for (const rowLabel of rowLabels) {
+        const rowObj = indexData[rowLabel];
+        if (rowObj !== undefined) {
+          for (const col of Object.keys(rowObj)) {
+            colSet.set(col, null);
+          }
+        }
+      }
+      const colNames = [...colSet.keys()];
+      const colArrays: Record<string, Scalar[]> = {};
+      for (const col of colNames) {
+        colArrays[col] = [];
+      }
+      for (const rowLabel of rowLabels) {
+        const rowObj = indexData[rowLabel] ?? {};
+        for (const col of colNames) {
+          const arr = colArrays[col];
+          if (arr !== undefined) {
+            arr.push(rowObj[col] ?? null);
+          }
+        }
+      }
+      const idx = new Index<Label>(rowLabels as Label[]);
+      return DataFrame.fromColumns(colArrays as Record<string, readonly Scalar[]>, { index: idx });
+    }
+
+    case "split":
+    case "tight": {
+      return buildFromSplit(data as SplitInput);
+    }
+
+    default: {
+      const exhaustive: never = orient;
+      throw new RangeError(`Unknown orient: ${String(exhaustive)}`);
+    }
+  }
+}
+
+// ─── internal helpers ──────────────────────────────────────────────────────────
+
+/** Build a DataFrame from a split/tight structure. */
+function buildFromSplit(input: SplitInput): DataFrame {
+  const { columns, data } = input;
+  const colArrays: Record<string, Scalar[]> = {};
+  for (const col of columns) {
+    colArrays[col] = [];
+  }
+  for (const row of data) {
+    for (let j = 0; j < columns.length; j++) {
+      const col = columns[j];
+      const arr = colArrays[col];
+      if (col !== undefined && arr !== undefined) {
+        arr.push(row[j] ?? null);
+      }
+    }
+  }
+
+  // Determine the row index
+  if (input.index !== undefined && !isDefaultRange(input.index)) {
+    const idx = new Index<Label>(input.index as Label[]);
+    return DataFrame.fromColumns(colArrays as Record<string, readonly Scalar[]>, { index: idx });
+  }
+
+  return DataFrame.fromColumns(colArrays as Record<string, readonly Scalar[]>);
+}
diff --git a/src/index.ts b/src/index.ts
index ec702a7e..f8ee246a 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -61,6 +61,13 @@ export type { ExpandingOptions, ExpandingSeriesLike } from "./window/index.ts";
 export { DataFrameExpanding } from "./core/index.ts";
 export { EWM } from "./window/index.ts";
 export type { EwmOptions, EwmSeriesLike } from "./window/index.ts";
+export {
+  rollingApply,
+  rollingAgg,
+  dataFrameRollingApply,
+  dataFrameRollingAgg,
+} from "./window/index.ts";
+export type { RollingApplyOptions, RollingAggOptions, AggFunctions } from "./window/index.ts";
 export { DataFrameEwm } from "./core/index.ts";
 export { CategoricalAccessor } from "./core/index.ts";
 export type { CatSeriesLike } from "./core/index.ts";
@@ -107,14 +114,172 @@ export {
 export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts";
 export { valueCounts, dataFrameValueCounts } from "./stats/index.ts";
 export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts";
+export { isna, notna, isnull, notnull, fillna, dropna, countna, countValid } from "./stats/index.ts";
+export type { IsnaInput, FillnaOptions, DropnaOptions } from "./stats/index.ts";
 export {
-  isna,
-  notna,
-  isnull,
-  notnull,
   ffillSeries,
   bfillSeries,
   dataFrameFfill,
   dataFrameBfill,
 } from "./stats/index.ts";
 export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts";
+
+export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./core/index.ts";
+export type { PopResult } from "./core/index.ts";
+export { toDictOriented, fromDictOriented } from "./core/index.ts";
+export type { ToDictOrient, FromDictOrient, DictSplit, DictTight, SplitInput } from "./core/index.ts";
+export { wideToLong } from "./reshape/index.ts";
+export type { WideToLongOptions } from "./reshape/index.ts";
+export { cut, qcut } from "./stats/index.ts";
+export type { BinResult, CutOptions, QCutOptions } from "./stats/index.ts";
+export { rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "./stats/index.ts";
+export type { WindowExtOptions, RollingQuantileOptions } from "./stats/index.ts";
+export { seriesWhere, seriesMask, dataFrameWhere, dataFrameMask } from "./stats/index.ts";
+export type {
+  SeriesCond,
+  DataFrameCond,
+  SeriesWhereOptions,
+  DataFrameWhereOptions,
+} from "./stats/index.ts";
+export {
+  getAttrs,
+  setAttrs,
+  updateAttrs,
+  copyAttrs,
+  withAttrs,
+  clearAttrs,
+  hasAttrs,
+  getAttr,
+  setAttr,
+  deleteAttr,
+  attrsCount,
+  attrsKeys,
+  mergeAttrs,
+} from "./core/index.ts";
+export type { Attrs } from "./core/index.ts";
+export {
+  pipe,
+  seriesApply,
+  seriesTransform,
+  dataFrameApply,
+  dataFrameApplyMap,
+  dataFrameTransform,
+  dataFrameTransformRows,
+} from "./core/index.ts";
+export {
+  isScalar,
+  isListLike,
+  isArrayLike,
+  isDictLike,
+  isIterator,
+  isNumber,
+  isBool,
+  isStringValue,
+  isFloat,
+  isInteger,
+  isBigInt,
+  isRegExp,
+  isReCompilable,
+  isMissing,
+  isHashable,
+  isDate,
+  isNumericDtype,
+  isIntegerDtype,
+  isSignedIntegerDtype,
+  isUnsignedIntegerDtype,
+  isFloatDtype,
+  isBoolDtype,
+  isStringDtype,
+  isDatetimeDtype,
+  isTimedeltaDtype,
+  isCategoricalDtype,
+  isObjectDtype,
+  isComplexDtype,
+  isExtensionArrayDtype,
+  isPeriodDtype,
+  isIntervalDtype,
+} from "./core/index.ts";
+export {
+  strNormalize,
+  strGetDummies,
+  strExtractAll,
+  strRemovePrefix,
+  strRemoveSuffix,
+  strTranslate,
+  strCharWidth,
+  strByteLength,
+  strSplitExpand,
+  strExtractGroups,
+  strPartition,
+  strRPartition,
+  strMultiReplace,
+  strIndent,
+  strDedent,
+} from "./stats/index.ts";
+export type {
+  NormalizeForm,
+  StrInput,
+  GetDummiesOptions,
+  ExtractAllOptions,
+  SplitExpandOptions,
+  ExtractGroupsOptions,
+  PartitionResult,
+  ReplacePair,
+  IndentOptions,
+} from "./stats/index.ts";
+export {
+  digitize,
+  histogram,
+  linspace,
+  arange,
+  percentileOfScore,
+  zscore,
+  minMaxNormalize,
+  coefficientOfVariation,
+  seriesDigitize,
+} from "./stats/index.ts";
+export type {
+  HistogramOptions,
+  HistogramResult,
+  ZscoreOptions,
+  MinMaxOptions,
+  CvOptions,
+} from "./stats/index.ts";
+export {
+  catFromCodes,
+  catUnionCategories,
+  catIntersectCategories,
+  catDiffCategories,
+  catEqualCategories,
+  catSortByFreq,
+  catToOrdinal,
+  catFreqTable,
+  catCrossTab,
+  catRecode,
+} from "./stats/index.ts";
+export type {
+  CatFromCodesOptions,
+  CatSortByFreqOptions,
+  CatCrossTabOptions,
+} from "./stats/index.ts";
+export {
+  formatFloat,
+  formatPercent,
+  formatScientific,
+  formatEngineering,
+  formatThousands,
+  formatCurrency,
+  formatCompact,
+  makeFloatFormatter,
+  makePercentFormatter,
+  makeCurrencyFormatter,
+  applySeriesFormatter,
+  applyDataFrameFormatter,
+  seriesToString,
+  dataFrameToString,
+} from "./stats/index.ts";
+export type {
+  Formatter,
+  SeriesToStringOptions,
+  DataFrameToStringOptions,
+} from "./stats/index.ts";
diff --git a/src/reshape/index.ts b/src/reshape/index.ts
index f15320ca..849c435d 100644
--- a/src/reshape/index.ts
+++ b/src/reshape/index.ts
@@ -10,3 +10,5 @@ export { pivot, pivotTable } from "./pivot.ts";
 export type { PivotOptions, PivotTableOptions, AggFuncName } from "./pivot.ts";
 export { stack, unstack, STACK_DEFAULT_SEP } from "./stack_unstack.ts";
 export type { StackOptions, UnstackOptions } from "./stack_unstack.ts";
+export { wideToLong } from "./wide_to_long.ts";
+export type { WideToLongOptions } from "./wide_to_long.ts";
diff --git a/src/reshape/wide_to_long.ts b/src/reshape/wide_to_long.ts
new file mode 100644
index 00000000..7ac62ba8
--- /dev/null
+++ b/src/reshape/wide_to_long.ts
@@ -0,0 +1,217 @@
+/**
+ * wide_to_long — reshape a wide DataFrame to a long format by collapsing
+ * stub-prefixed column groups into rows.
+ *
+ * Mirrors `pandas.wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+')`.
+ *
+ * Given a DataFrame whose columns include groups like
+ * `"A1"`, `"A2"`, `"B1"`, `"B2"` (stubs `["A","B"]`, separator `""`, suffix `\\d+`),
+ * this function pivots those groups into long format where each unique suffix
+ * value becomes a new row:
+ *
+ * ```
+ * id  num  A  B
+ *  x    1  1  5
+ *  x    2  3  7
+ *  y    1  2  6
+ *  y    2  4  8
+ * ```
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { wideToLong } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ *   id: ["x", "y"],
+ *   A1: [1, 2],
+ *   A2: [3, 4],
+ *   B1: [5, 6],
+ *   B2: [7, 8],
+ * });
+ *
+ * const long = wideToLong(df, ["A", "B"], "id", "num");
+ * // long.columns.values → ["id", "num", "A", "B"]
+ * // long.shape          → [4, 4]
+ * ```
+ *
+ * @module
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "../core/base-index.ts";
+import { DataFrame } from "../core/frame.ts";
+import { RangeIndex } from "../core/range-index.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Options for {@link wideToLong}. */
+export interface WideToLongOptions {
+  /**
+   * Separator between stub name and suffix in column names.
+   * Defaults to `""` (no separator).
+   * @example `sep: "_"` matches columns like `"value_2021"`, `"value_2022"`.
+   */
+  readonly sep?: string;
+  /**
+   * Regular expression (as a string) that the suffix must match.
+   * Defaults to `"\\d+"` (one or more digits).
+   * @example `suffix: "[a-z]+"` matches alphabetic suffixes.
+   */
+  readonly suffix?: string;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Normalise a string-or-string-array option to `string[]`. */
+function toStringArray(x: readonly string[] | string): string[] {
+  return typeof x === "string" ? [x] : [...x];
+}
+
+/**
+ * Collect the unique suffix values that appear in the DataFrame column names
+ * for the given stubs, separator, and suffix regex.
+ *
+ * Returns suffixes in the order they first appear (scanning columns left to right).
+ */
+function collectSuffixes(
+  colNames: readonly string[],
+  stubs: readonly string[],
+  sep: string,
+  suffixRe: RegExp,
+): string[] {
+  const seen = new Map<string, number>(); // suffix → first-seen position
+  for (const col of colNames) {
+    for (const stub of stubs) {
+      const prefix = stub + sep;
+      if (col.startsWith(prefix)) {
+        const rest = col.slice(prefix.length);
+        const m = rest.match(suffixRe);
+        if (m !== null && m[0] === rest) {
+          const pos = seen.size;
+          if (!seen.has(rest)) {
+            seen.set(rest, pos);
+          }
+        }
+      }
+    }
+  }
+  return [...seen.keys()].sort((a, b) => {
+    // Sort numerically when both look like integers, otherwise lexicographically.
+    const na = Number(a);
+    const nb = Number(b);
+    if (!Number.isNaN(na) && !Number.isNaN(nb)) {
+      return na - nb;
+    }
+    return a < b ? -1 : a > b ? 1 : 0;
+  });
+}
+
+// ─── wideToLong ───────────────────────────────────────────────────────────────
+
+/**
+ * Reshape a wide-format DataFrame to long format by collapsing stub-prefixed
+ * column groups into rows.
+ *
+ * Mirrors `pandas.wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+')`.
+ *
+ * @param df        Source DataFrame (not mutated).
+ * @param stubnames Stub name(s) that prefix the wide columns (e.g. `["A", "B"]`).
+ * @param i         Column name(s) to use as id variables (kept for every row).
+ * @param j         Name of the new column that will hold the suffix value.
+ * @param options   Optional `sep` and `suffix` overrides.
+ * @returns A new long-format DataFrame.
+ *
+ * @throws {RangeError} if any `i` column does not exist in `df`.
+ * @throws {RangeError} if `j` conflicts with an existing non-stub column name.
+ */
+export function wideToLong(
+  df: DataFrame,
+  stubnames: readonly string[] | string,
+  i: readonly string[] | string,
+  j: string,
+  options: WideToLongOptions = {},
+): DataFrame {
+  const stubs = toStringArray(stubnames);
+  const idCols = toStringArray(i);
+  const sep = options.sep ?? "";
+  const suffixPattern = options.suffix ?? "\\d+";
+  const suffixRe = new RegExp(`^(?:${suffixPattern})$`);
+
+  // Validate id columns exist.
+  for (const col of idCols) {
+    if (!df.has(col)) {
+      throw new RangeError(`id column "${col}" does not exist in DataFrame.`);
+    }
+  }
+
+  // j must not conflict with a non-stub, non-id column.
+  const colNames = [...df.columns.values];
+  const stubSet = new Set(stubs);
+  for (const col of colNames) {
+    if (col === j && !stubSet.has(col) && !idCols.includes(col)) {
+      throw new RangeError(`Column name "${j}" conflicts with existing column.`);
+    }
+  }
+
+  // Collect ordered suffix values.
+  const suffixes = collectSuffixes(colNames, stubs, sep, suffixRe);
+
+  const nRows = df.index.size;
+
+  // Build output column arrays.
+  const idArrays: Record<string, Scalar[]> = {};
+  for (const col of idCols) {
+    idArrays[col] = [];
+  }
+  const jArray: Scalar[] = [];
+  const stubArrays: Record<string, Scalar[]> = {};
+  for (const stub of stubs) {
+    stubArrays[stub] = [];
+  }
+
+  // Coerce suffix to number if possible (for the j-column values).
+  function coerceSuffix(s: string): Scalar {
+    const n = Number(s);
+    return Number.isNaN(n) ? s : n;
+  }
+
+  for (const suffix of suffixes) {
+    for (let row = 0; row < nRows; row++) {
+      // Append id column values.
+      for (const col of idCols) {
+        const arr = idArrays[col];
+        if (arr !== undefined) {
+          arr.push((df.col(col).values[row] ?? null) as Scalar);
+        }
+      }
+      // Append j value.
+      jArray.push(coerceSuffix(suffix));
+      // Append stub values.
+      for (const stub of stubs) {
+        const wideColName = stub + sep + suffix;
+        const arr = stubArrays[stub];
+        if (arr !== undefined) {
+          const wideCol = df.get(wideColName);
+          const val: Scalar = wideCol !== undefined ? ((wideCol.values[row] ?? null) as Scalar) : null;
+          arr.push(val);
+        }
+      }
+    }
+  }
+
+  // Assemble output DataFrame column map.
+  const outData: Record<string, readonly Scalar[]> = {};
+  for (const col of idCols) {
+    outData[col] = idArrays[col] ?? [];
+  }
+  outData[j] = jArray;
+  for (const stub of stubs) {
+    outData[stub] = stubArrays[stub] ?? [];
+  }
+
+  const totalRows = nRows * suffixes.length;
+  const rowIndex = new RangeIndex(totalRows) as unknown as Index<Label>;
+
+  return DataFrame.fromColumns(outData as Record<string, readonly Scalar[]>, { index: rowIndex });
+}
diff --git a/src/stats/categorical_ops.ts b/src/stats/categorical_ops.ts
new file mode 100644
index 00000000..f9abbb0d
--- /dev/null
+++ b/src/stats/categorical_ops.ts
@@ -0,0 +1,483 @@
+/**
+ * categorical_ops — standalone categorical utility functions.
+ *
+ * Mirrors pandas' `pd.Categorical`, `pd.Categorical.from_codes`, and related
+ * top-level helpers that operate on categorical data without requiring a method
+ * call on an existing `CategoricalAccessor`.
+ *
+ * All functions return a `CatSeriesLike` (or plain data) and are **pure** —
+ * inputs are never mutated.
+ *
+ * ### Included functions
+ *
+ * | Function | Pandas equivalent |
+ * |----------|-------------------|
+ * | `catFromCodes` | `pd.Categorical.from_codes` |
+ * | `catUnionCategories` | `a.cat.set_categories(union(...))` pattern |
+ * | `catIntersectCategories` | `a.cat.set_categories(intersect(...))` |
+ * | `catDiffCategories` | `a.cat.remove_categories(b_cats)` pattern |
+ * | `catEqualCategories` | compare `.cat.categories` sets |
+ * | `catSortByFreq` | `a.cat.reorder_categories(sorted_by_freq)` |
+ * | `catToOrdinal` | `pd.Categorical(values, categories=order, ordered=True)` |
+ * | `catFreqTable` | `a.value_counts(sort=False)` on categorical |
+ * | `catCrossTab` | reduced `pd.crosstab` for two categorical Series |
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { CatSeriesLike } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public option types ───────────────────────────────────────────────────────
+
+/** Options for {@link catFromCodes}. */
+export interface CatFromCodesOptions {
+  /** Whether the resulting categorical is ordered. Default `false`. */
+  ordered?: boolean;
+  /** Series name for the result. */
+  name?: string | null;
+}
+
+/** Options for {@link catSortByFreq}. */
+export interface CatSortByFreqOptions {
+  /** If `true`, least frequent categories come first. Default `false` (most frequent first). */
+  ascending?: boolean;
+}
+
+/** Options for {@link catCrossTab}. */
+export interface CatCrossTabOptions {
+  /** If `true`, include a row and column of totals. Default `false`. */
+  margins?: boolean;
+  /** Label used for the margins row/column. Default `"All"`. */
+  marginsName?: string;
+  /** If `true`, normalize counts (divide by total). Default `false`. */
+  normalize?: boolean;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Return true when value should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+  return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Build a sorted unique key list preserving encounter order (for cats). */
+function uniqueKeys(cats: readonly Scalar[]): Scalar[] {
+  const seen = new Set<string>();
+  const result: Scalar[] = [];
+  for (const c of cats) {
+    const k = String(c);
+    if (!seen.has(k)) {
+      seen.add(k);
+      result.push(c);
+    }
+  }
+  return result;
+}
+
+// ─── catFromCodes ─────────────────────────────────────────────────────────────
+
+/**
+ * Construct a categorical `Series` from integer codes and a categories array.
+ *
+ * Mirrors `pandas.Categorical.from_codes(codes, categories, ordered=False)`.
+ *
+ * - Codes are **0-based** indices into `categories`.
+ * - A code of `-1` maps to `null` (missing value), matching pandas `NaN`.
+ * - Any code outside `[-1, categories.length)` throws a `RangeError`.
+ *
+ * @param codes     Integer codes (one per element).
+ * @param categories Array of category labels; the order defines ordinal rank.
+ * @param opts      Optional settings (ordered, name).
+ * @returns A `CatSeriesLike` with the specified categories.
+ *
+ * @example
+ * ```ts
+ * const s = catFromCodes([0, 2, 1, -1, 0], ["a", "b", "c"]);
+ * s.cat.categories.values; // ["a", "b", "c"]
+ * s.toArray();             // ["a", "c", "b", null, "a"]
+ * ```
+ */
+export function catFromCodes(
+  codes: readonly number[],
+  categories: readonly Scalar[],
+  opts: CatFromCodesOptions = {},
+): CatSeriesLike {
+  const { ordered = false, name = null } = opts;
+  const cats = uniqueKeys(categories);
+  const values: Scalar[] = codes.map((code) => {
+    if (code === -1) return null;
+    if (code < -1 || code >= cats.length) {
+      throw new RangeError(
+        `catFromCodes: code ${code} is out of range [0, ${cats.length - 1}]`,
+      );
+    }
+    return cats[code] as Scalar;
+  });
+  const base = new Series({ data: values, name });
+  return base.cat.setCategories(cats, ordered);
+}
+
+// ─── catUnionCategories ────────────────────────────────────────────────────────
+
+/**
+ * Return a new `CatSeriesLike` with the same values as `a` but whose categories
+ * are the **union** of `a`'s and `b`'s categories.
+ *
+ * Categories from `b` that are not already in `a` are appended (in the order
+ * they appear in `b`). The ordering flag is taken from `a`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["y", "z"] }).cat.setCategories(["y", "z"]);
+ * catUnionCategories(a, b).cat.categories.values; // ["x", "y", "z"]
+ * ```
+ */
+export function catUnionCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+  const aCats = a.cat.categories.values as Scalar[];
+  const bCats = b.cat.categories.values as Scalar[];
+  const seen = new Set(aCats.map(String));
+  const merged = [...aCats];
+  for (const c of bCats) {
+    if (!seen.has(String(c))) {
+      seen.add(String(c));
+      merged.push(c);
+    }
+  }
+  return a.cat.setCategories(merged, a.cat.ordered);
+}
+
+// ─── catIntersectCategories ───────────────────────────────────────────────────
+
+/**
+ * Return a new `CatSeriesLike` with values from `a` whose categories are the
+ * **intersection** of `a`'s and `b`'s categories.
+ *
+ * Values whose category is not in the intersection are set to `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+ * const b = new Series({ data: ["y", "z"] }).cat.setCategories(["y", "z"]);
+ * const r = catIntersectCategories(a, b);
+ * r.cat.categories.values; // ["y", "z"]
+ * r.toArray();             // [null, "y", "z"]
+ * ```
+ */
+export function catIntersectCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+  const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+  const intersected = (a.cat.categories.values as Scalar[]).filter((c) =>
+    bSet.has(String(c)),
+  );
+  return a.cat.setCategories(intersected, a.cat.ordered);
+}
+
+// ─── catDiffCategories ────────────────────────────────────────────────────────
+
+/**
+ * Return a new `CatSeriesLike` with values from `a` whose categories are the
+ * **set difference** `a.categories − b.categories`.
+ *
+ * Values whose category is present in `b` are set to `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+ * const b = new Series({ data: ["z"] }).cat.setCategories(["z"]);
+ * const r = catDiffCategories(a, b);
+ * r.cat.categories.values; // ["x", "y"]
+ * r.toArray();             // ["x", "y", null]
+ * ```
+ */
+export function catDiffCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+  const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+  const remaining = (a.cat.categories.values as Scalar[]).filter(
+    (c) => !bSet.has(String(c)),
+  );
+  return a.cat.setCategories(remaining, a.cat.ordered);
+}
+
+// ─── catEqualCategories ───────────────────────────────────────────────────────
+
+/**
+ * Return `true` when `a` and `b` have exactly the same set of categories,
+ * ignoring order.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["y"] }).cat.setCategories(["y", "x"]);
+ * catEqualCategories(a, b); // true
+ * ```
+ */
+export function catEqualCategories(a: CatSeriesLike, b: CatSeriesLike): boolean {
+  const aSet = new Set((a.cat.categories.values as Scalar[]).map(String));
+  const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+  if (aSet.size !== bSet.size) return false;
+  for (const c of aSet) {
+    if (!bSet.has(c)) return false;
+  }
+  return true;
+}
+
+// ─── catSortByFreq ────────────────────────────────────────────────────────────
+
+/**
+ * Reorder the categories of a categorical Series by their **frequency** in the
+ * data (most frequent first by default).
+ *
+ * Mirrors `series.cat.reorder_categories(series.value_counts().index)`.
+ *
+ * @param series   The source categorical Series.
+ * @param opts     `{ ascending: false }` — set `true` for rarest-first.
+ * @returns A new `CatSeriesLike` with categories sorted by frequency.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["b", "a", "b", "c", "b", "a"] })
+ *   .cat.setCategories(["a", "b", "c"]);
+ * catSortByFreq(s).cat.categories.values; // ["b", "a", "c"]
+ * ```
+ */
+export function catSortByFreq(
+  series: CatSeriesLike,
+  opts: CatSortByFreqOptions = {},
+): CatSeriesLike {
+  const { ascending = false } = opts;
+  const cats = series.cat.categories.values as Scalar[];
+  const freq = new Map<string, number>();
+  for (const c of cats) freq.set(String(c), 0);
+  for (const v of series.values) {
+    if (!isMissing(v)) {
+      const k = String(v);
+      const prev = freq.get(k);
+      if (prev !== undefined) freq.set(k, prev + 1);
+    }
+  }
+  const sorted = [...cats].sort((a, b) => {
+    const fa = freq.get(String(a)) ?? 0;
+    const fb = freq.get(String(b)) ?? 0;
+    return ascending ? fa - fb : fb - fa;
+  });
+  return series.cat.reorderCategories(sorted);
+}
+
+// ─── catToOrdinal ─────────────────────────────────────────────────────────────
+
+/**
+ * Create an **ordered** categorical Series from `series` using `order` to
+ * define both the category set and their rank.
+ *
+ * Mirrors `pd.Categorical(series, categories=order, ordered=True)`.
+ *
+ * Values not present in `order` are set to `null`. The number of categories
+ * in the result equals `order.length`.
+ *
+ * @param series  Source Series (any values).
+ * @param order   Ordered list of category labels (low to high).
+ * @returns A new `CatSeriesLike` with `.cat.ordered === true`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["med", "low", "high", "med"] });
+ * const ord = catToOrdinal(s, ["low", "med", "high"]);
+ * ord.cat.ordered;              // true
+ * ord.cat.categories.values;   // ["low", "med", "high"]
+ * ```
+ */
+export function catToOrdinal(series: CatSeriesLike, order: readonly Scalar[]): CatSeriesLike {
+  return series.cat.setCategories(order, true);
+}
+
+// ─── catFreqTable ─────────────────────────────────────────────────────────────
+
+/**
+ * Return the frequency of each category as a plain `Record<string, number>`.
+ *
+ * All defined categories are present in the result, even those with zero
+ * occurrences, matching `series.cat.value_counts()` semantics.
+ *
+ * Missing values are excluded from the count.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["b", "a", "b", null] })
+ *   .cat.setCategories(["a", "b", "c"]);
+ * catFreqTable(s); // { a: 1, b: 2, c: 0 }
+ * ```
+ */
+export function catFreqTable(series: CatSeriesLike): Record<string, number> {
+  const cats = series.cat.categories.values as Scalar[];
+  const freq: Record<string, number> = {};
+  for (const c of cats) freq[String(c)] = 0;
+  for (const v of series.values) {
+    if (!isMissing(v)) {
+      const k = String(v);
+      if (Object.prototype.hasOwnProperty.call(freq, k)) {
+        (freq[k] as number) += 1;
+      }
+    }
+  }
+  return freq;
+}
+
+// ─── catCrossTab ──────────────────────────────────────────────────────────────
+
+/**
+ * Compute a cross-tabulation of two categorical Series.
+ *
+ * Mirrors a simplified `pd.crosstab(a, b)` for categorical inputs:
+ * rows = `a`'s categories, columns = `b`'s categories, cells = co-occurrence
+ * counts.  Only aligned positions (same integer index) are tallied; missing
+ * values in either Series skip the row.
+ *
+ * @param a         First categorical Series (determines rows).
+ * @param b         Second categorical Series (determines columns).
+ * @param opts      `{ margins, marginsName, normalize }`.
+ * @returns A `DataFrame` of count (or proportion) values.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "x", "y", "y"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["p", "q", "p", "q"] }).cat.setCategories(["p", "q"]);
+ * const ct = catCrossTab(a, b);
+ * // DataFrame:
+ * //     p  q
+ * // x   1  1
+ * // y   1  1
+ * ```
+ */
+export function catCrossTab(
+  a: CatSeriesLike,
+  b: CatSeriesLike,
+  opts: CatCrossTabOptions = {},
+): DataFrame {
+  const { margins = false, marginsName = "All", normalize = false } = opts;
+
+  const rowCats = a.cat.categories.values as Label[];
+  const colCats = b.cat.categories.values as Label[];
+
+  // Build count matrix: rowCats × colCats
+  const counts = new Map<string, Map<string, number>>();
+  for (const r of rowCats) {
+    const row = new Map<string, number>();
+    for (const c of colCats) row.set(String(c), 0);
+    counts.set(String(r), row);
+  }
+
+  const aVals = a.values;
+  const bVals = b.values;
+  const n = Math.min(aVals.length, bVals.length);
+  for (let i = 0; i < n; i++) {
+    const av = aVals[i];
+    const bv = bVals[i];
+    if (isMissing(av) || isMissing(bv)) continue;
+    const row = counts.get(String(av));
+    if (row === undefined) continue;
+    const prev = row.get(String(bv));
+    if (prev !== undefined) row.set(String(bv), prev + 1);
+  }
+
+  // Compute total for normalization
+  let total = 0;
+  if (normalize) {
+    for (const row of counts.values()) {
+      for (const v of row.values()) total += v;
+    }
+  }
+
+  // Build data columns: each colCat is a column, each rowCat is a row value
+  const data: Record<string, Scalar[]> = {};
+  for (const c of colCats) {
+    const col: Scalar[] = [];
+    for (const r of rowCats) {
+      const v = counts.get(String(r))?.get(String(c)) ?? 0;
+      col.push(normalize && total > 0 ? v / total : v);
+    }
+    data[String(c)] = col;
+  }
+
+  // Add margin column (row totals)
+  if (margins) {
+    const rowTotals: Scalar[] = rowCats.map((r) => {
+      let sum = 0;
+      const row = counts.get(String(r));
+      if (row) for (const v of row.values()) sum += v;
+      return normalize && total > 0 ? sum / total : sum;
+    });
+    data[marginsName] = rowTotals;
+  }
+
+  // Build DataFrame with row index = rowCats
+  const rowLabels: Label[] = [...rowCats];
+
+  // Add margin row (column totals)
+  if (margins) {
+    const allCols = [...colCats.map(String), marginsName];
+    let marginRowTotal = 0;
+    for (const c of colCats) {
+      let colSum = 0;
+      for (const r of rowCats) {
+        colSum += counts.get(String(r))?.get(String(c)) ?? 0;
+      }
+      const val = normalize && total > 0 ? colSum / total : colSum;
+      (data[String(c)] as Scalar[]).push(val);
+      marginRowTotal += normalize && total > 0 ? colSum / total : colSum;
+    }
+    if (margins) {
+      (data[marginsName] as Scalar[]).push(
+        normalize && total > 0 ? marginRowTotal : marginRowTotal,
+      );
+    }
+    rowLabels.push(marginsName as Label);
+    // Ensure all column arrays have the same length
+    for (const col of allCols) {
+      const arr = data[col];
+      if (arr === undefined) data[col] = rowLabels.map(() => 0);
+    }
+  }
+
+  return DataFrame.fromColumns(data, { index: rowLabels });
+}
+
+// ─── catRecode ────────────────────────────────────────────────────────────────
+
+/**
+ * Rename categories of a categorical Series using a string→string map.
+ *
+ * Mirrors `series.cat.rename_categories(mapping)` but as a standalone function
+ * that also accepts a transform function.
+ *
+ * @param series   The source categorical.
+ * @param mapping  Either a `Record<string, string>` (rename specified keys) or
+ *                 a `(label: string) => string` transform applied to every category.
+ * @returns A new `CatSeriesLike` with renamed categories.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["a", "b"] }).cat.setCategories(["a", "b", "c"]);
+ * catRecode(s, { a: "A", b: "B" }).cat.categories.values; // ["A", "B", "c"]
+ * catRecode(s, (x) => x.toUpperCase()).cat.categories.values; // ["A", "B", "C"]
+ * ```
+ */
+export function catRecode(
+  series: CatSeriesLike,
+  mapping: Record<string, string> | ((label: string) => string),
+): CatSeriesLike {
+  if (typeof mapping === "function") {
+    return series.cat.renameCategories(
+      (series.cat.categories.values as Scalar[]).map((c) => mapping(String(c))),
+    );
+  }
+  const cats = series.cat.categories.values as Scalar[];
+  const newCats = cats.map((c): Scalar => {
+    const k = String(c);
+    return Object.prototype.hasOwnProperty.call(mapping, k)
+      ? (mapping[k] as string)
+      : c;
+  });
+  return series.cat.renameCategories(newCats);
+}
diff --git a/src/stats/cut_qcut.ts b/src/stats/cut_qcut.ts
new file mode 100644
index 00000000..d24b3dda
--- /dev/null
+++ b/src/stats/cut_qcut.ts
@@ -0,0 +1,383 @@
+/**
+ * cut / qcut — bin continuous data into discrete intervals.
+ *
+ * Mirrors `pandas.cut` and `pandas.qcut`:
+ *
+ * - {@link cut} — bin values into fixed-width or user-supplied bins.
+ * - {@link qcut} — bin values into quantile-based bins of equal population.
+ *
+ * Both functions return a {@link BinResult} describing the assigned bin for
+ * each input value, the ordered bin labels, the numeric bin edges, and (for
+ * `qcut`) the actual quantile edges used.
+ *
+ * @example
+ * ```ts
+ * import { cut, qcut } from "tsb";
+ *
+ * const result = cut([1, 2, 3, 4, 5], 2);
+ * result.codes;  // [0, 0, 0, 1, 1]
+ * result.labels; // ["(1.0, 3.0]", "(3.0, 5.0]"]
+ * result.bins;   // [1, 3, 5]
+ *
+ * const qr = qcut([1, 2, 3, 4, 5], 2);
+ * qr.codes;  // [0, 0, 1, 1, 1]  (median split)
+ * ```
+ *
+ * @module
+ */
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Result of {@link cut} or {@link qcut}.
+ *
+ * - `codes` — integer bin index for each input value (`null` for NaN / missing).
+ * - `labels` — ordered array of label strings (one per bin).
+ * - `bins` — numeric bin edge array (length = `labels.length + 1`).
+ */
+export interface BinResult {
+  /** Bin index for each input value. `null` when the value is NaN or out of range. */
+  readonly codes: ReadonlyArray<number | null>;
+  /** Ordered bin labels. */
+  readonly labels: readonly string[];
+  /** Bin edge array: `bins[i]` to `bins[i+1]` is the i-th bin. */
+  readonly bins: readonly number[];
+}
+
+/** Options for {@link cut}. */
+export interface CutOptions {
+  /**
+   * Custom labels for the resulting bins.
+   * - Array of strings: one label per bin (length must equal number of bins).
+   * - `false`: return integer codes directly (labels will be `["0","1",...]`).
+   * - Omitted: auto-generate interval strings like `"(0.5, 1.5]"`.
+   */
+  readonly labels?: readonly string[] | false;
+  /**
+   * Whether intervals are closed on the right (default `true`).
+   * - `true`  → `(a, b]`
+   * - `false` → `[a, b)`
+   */
+  readonly right?: boolean;
+  /**
+   * When `true`, the leftmost interval is closed on the left as well
+   * (default `false`).  Only meaningful when `right` is `true`.
+   */
+  readonly include_lowest?: boolean;
+  /**
+   * Number of decimal places in auto-generated interval labels (default `3`).
+   */
+  readonly precision?: number;
+  /**
+   * How to handle duplicate bin edges generated from data (default `"raise"`).
+   * - `"raise"` — throw if duplicate edges are detected.
+   * - `"drop"`  — silently remove duplicate edges.
+   *
+   * Only relevant when `bins` is an integer.
+   */
+  readonly duplicates?: "raise" | "drop";
+}
+
+/** Options for {@link qcut}. */
+export interface QCutOptions {
+  /**
+   * Custom labels (same semantics as {@link CutOptions.labels}).
+   */
+  readonly labels?: readonly string[] | false;
+  /**
+   * Number of decimal places in auto-generated interval labels (default `3`).
+   */
+  readonly precision?: number;
+  /**
+   * How to handle duplicate quantile edges (default `"raise"`).
+   */
+  readonly duplicates?: "raise" | "drop";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Format a numeric edge to at most `precision` decimal places. */
+function fmt(v: number, precision: number): string {
+  return v.toFixed(precision).replace(/\.?0+$/, "").replace(/^-0$/, "0");
+}
+
+/** Build interval label string from two edges. */
+function intervalLabel(lo: number, hi: number, right: boolean, precision: number): string {
+  const left = right ? "(" : "[";
+  const right_bracket = right ? "]" : ")";
+  return `${left}${fmt(lo, precision)}, ${fmt(hi, precision)}${right_bracket}`;
+}
+
+/** Compute the k-th quantile (0–1) of a sorted (non-NaN) array using linear interpolation. */
+function quantileOfSorted(sorted: readonly number[], q: number): number {
+  if (sorted.length === 0) return Number.NaN;
+  if (q <= 0) return sorted[0] as number;
+  if (q >= 1) return sorted[sorted.length - 1] as number;
+  const idx = q * (sorted.length - 1);
+  const lo = Math.floor(idx);
+  const hi = Math.ceil(idx);
+  if (lo === hi) return sorted[lo] as number;
+  const frac = idx - lo;
+  return (sorted[lo] as number) * (1 - frac) + (sorted[hi] as number) * frac;
+}
+
+/** Deduplicate a sorted numeric array, optionally raising on duplicates. */
+function deduplicateEdges(edges: number[], duplicates: "raise" | "drop"): number[] {
+  const deduped: number[] = [edges[0] as number];
+  for (let i = 1; i < edges.length; i++) {
+    if ((edges[i] as number) === deduped[deduped.length - 1]) {
+      if (duplicates === "raise") {
+        throw new Error(
+          `Duplicate bin edge ${edges[i]}. Pass duplicates="drop" to silently remove duplicates.`,
+        );
+      }
+      // drop duplicate — skip
+    } else {
+      deduped.push(edges[i] as number);
+    }
+  }
+  return deduped;
+}
+
+/** Assign each value to a bin index given sorted bin edges. */
+function assignBins(
+  values: readonly number[],
+  edges: readonly number[],
+  right: boolean,
+  include_lowest: boolean,
+): Array<number | null> {
+  const n = edges.length - 1; // number of bins
+  return values.map((v) => {
+    if (!Number.isFinite(v) || Number.isNaN(v)) return null;
+    // Binary search for the bin
+    let lo = 0;
+    let hi = n - 1;
+    while (lo < hi) {
+      const mid = (lo + hi) >> 1;
+      const binHi = edges[mid + 1] as number;
+      const binLo = edges[mid] as number;
+      if (right) {
+        // (binLo, binHi]
+        if (v <= binHi) hi = mid;
+        else lo = mid + 1;
+      } else {
+        // [binLo, binHi)
+        if (v < binHi) hi = mid;
+        else lo = mid + 1;
+      }
+    }
+    // Validate the found bin
+    const binLo = edges[lo] as number;
+    const binHi = edges[lo + 1] as number;
+    if (right) {
+      // (binLo, binHi] — but first bin may be [binLo, binHi] if include_lowest
+      if (v > binHi) return null;
+      if (lo === 0 && include_lowest) {
+        if (v < binLo) return null;
+      } else {
+        if (v <= binLo) return null;
+      }
+    } else {
+      // [binLo, binHi)
+      if (v < binLo || v >= binHi) {
+        // Last bin includes the right edge
+        if (lo === n - 1 && v === binHi) return lo;
+        return null;
+      }
+    }
+    return lo;
+  });
+}
+
+// ─── cut ──────────────────────────────────────────────────────────────────────
+
+/**
+ * Bin values into discrete intervals.
+ *
+ * @param x      Array of numeric values to bin.
+ * @param bins   Number of equal-width bins **or** an explicit array of
+ *               monotonically increasing bin edges.
+ * @param options  See {@link CutOptions}.
+ * @returns      A {@link BinResult}.
+ *
+ * @example
+ * ```ts
+ * const { codes, labels } = cut([1, 2, 3, 4, 5], 2);
+ * // codes  → [0, 0, 0, 1, 1]
+ * // labels → ["(1.0, 3.0]", "(3.0, 5.0]"]
+ * ```
+ */
+export function cut(
+  x: readonly number[],
+  bins: number | readonly number[],
+  options: CutOptions = {},
+): BinResult {
+  const {
+    labels: labelsOpt,
+    right = true,
+    include_lowest = false,
+    precision = 3,
+    duplicates = "raise",
+  } = options;
+
+  // ── build bin edges ─────────────────────────────────────────────────────────
+  let edges: number[];
+  if (typeof bins === "number") {
+    if (bins < 1 || !Number.isInteger(bins)) {
+      throw new Error("`bins` must be a positive integer when passed as a number.");
+    }
+    const finite = x.filter((v) => Number.isFinite(v));
+    if (finite.length === 0) {
+      throw new Error("Cannot cut empty or all-NaN array.");
+    }
+    const mn = Math.min(...finite);
+    const mx = Math.max(...finite);
+    if (mn === mx) {
+      throw new Error("Cannot cut constant array (all values identical).");
+    }
+    const step = (mx - mn) / bins;
+    edges = Array.from({ length: bins + 1 }, (_, i) => mn + i * step);
+    // Slightly extend the lower edge so the minimum value is included
+    edges[0] = mn - step * 0.001;
+    edges = deduplicateEdges(edges, duplicates);
+  } else {
+    if (bins.length < 2) {
+      throw new Error("At least 2 bin edges must be supplied.");
+    }
+    edges = [...bins];
+    // Validate monotone
+    for (let i = 1; i < edges.length; i++) {
+      if ((edges[i] as number) <= (edges[i - 1] as number)) {
+        throw new Error("Bin edges must be monotonically increasing.");
+      }
+    }
+  }
+
+  const numBins = edges.length - 1;
+
+  // ── build labels ────────────────────────────────────────────────────────────
+  let resolvedLabels: string[];
+  if (labelsOpt === false) {
+    resolvedLabels = Array.from({ length: numBins }, (_, i) => String(i));
+  } else if (Array.isArray(labelsOpt)) {
+    if (labelsOpt.length !== numBins) {
+      throw new Error(
+        `Length of labels (${labelsOpt.length}) must equal number of bins (${numBins}).`,
+      );
+    }
+    resolvedLabels = [...labelsOpt];
+  } else {
+    resolvedLabels = Array.from({ length: numBins }, (_, i) => {
+      const lo = edges[i] as number;
+      const hi = edges[i + 1] as number;
+      if (i === 0 && include_lowest && right) {
+        // Show the leftmost bin as [lo, hi]
+        return `[${fmt(lo, precision)}, ${fmt(hi, precision)}]`;
+      }
+      return intervalLabel(lo, hi, right, precision);
+    });
+  }
+
+  // ── assign bins ─────────────────────────────────────────────────────────────
+  const codes = assignBins(x, edges, right, include_lowest);
+
+  return { codes, labels: resolvedLabels, bins: edges };
+}
+
+// ─── qcut ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Quantile-based discretization.
+ *
+ * Bins values such that each bin contains (approximately) equal numbers of
+ * observations, using linearly-interpolated quantiles.
+ *
+ * @param x  Array of numeric values to bin.
+ * @param q  Number of quantile bins (integer ≥ 2) **or** an explicit array
+ *           of quantile probabilities in [0, 1] (monotonically increasing).
+ *           Common shorthand: `4` → quartiles, `10` → deciles.
+ * @param options  See {@link QCutOptions}.
+ * @returns  A {@link BinResult}.
+ *
+ * @example
+ * ```ts
+ * const { codes, labels } = qcut([1, 2, 3, 4, 5], 4);
+ * // Quantile edges at 0%, 25%, 50%, 75%, 100% of [1..5]
+ * ```
+ */
+export function qcut(
+  x: readonly number[],
+  q: number | readonly number[],
+  options: QCutOptions = {},
+): BinResult {
+  const { labels: labelsOpt, precision = 3, duplicates = "raise" } = options;
+
+  // ── build quantile probabilities ─────────────────────────────────────────
+  let quantiles: number[];
+  if (typeof q === "number") {
+    if (q < 2 || !Number.isInteger(q)) {
+      throw new Error("`q` must be an integer ≥ 2 when passed as a number.");
+    }
+    quantiles = Array.from({ length: q + 1 }, (_, i) => i / q);
+  } else {
+    quantiles = [...q];
+    if (quantiles.length < 2) {
+      throw new Error("At least 2 quantile probabilities must be supplied.");
+    }
+    for (let i = 1; i < quantiles.length; i++) {
+      if ((quantiles[i] as number) <= (quantiles[i - 1] as number)) {
+        throw new Error("Quantile probabilities must be monotonically increasing.");
+      }
+    }
+    if ((quantiles[0] as number) < 0 || (quantiles[quantiles.length - 1] as number) > 1) {
+      throw new Error("Quantile probabilities must be in [0, 1].");
+    }
+  }
+
+  // ── compute edges from sorted data ───────────────────────────────────────
+  const finite = x.filter((v) => Number.isFinite(v) && !Number.isNaN(v));
+  if (finite.length === 0) {
+    throw new Error("Cannot qcut empty or all-NaN array.");
+  }
+  const sorted = [...finite].sort((a, b) => a - b);
+
+  let edges: number[] = quantiles.map((p) => quantileOfSorted(sorted, p));
+
+  // Deduplicate
+  edges = deduplicateEdges(edges, duplicates);
+
+  const numBins = edges.length - 1;
+  if (numBins < 1) {
+    throw new Error(
+      "Not enough unique quantile edges. Try passing duplicates=\"drop\" or reducing `q`.",
+    );
+  }
+
+  // ── build labels ────────────────────────────────────────────────────────────
+  let resolvedLabels: string[];
+  if (labelsOpt === false) {
+    resolvedLabels = Array.from({ length: numBins }, (_, i) => String(i));
+  } else if (Array.isArray(labelsOpt)) {
+    if (labelsOpt.length !== numBins) {
+      throw new Error(
+        `Length of labels (${labelsOpt.length}) must equal number of bins (${numBins}).`,
+      );
+    }
+    resolvedLabels = [...labelsOpt];
+  } else {
+    resolvedLabels = Array.from({ length: numBins }, (_, i) => {
+      const lo = edges[i] as number;
+      const hi = edges[i + 1] as number;
+      if (i === 0) {
+        // First bin is always left-closed in qcut (pandas semantics)
+        return `[${fmt(lo, precision)}, ${fmt(hi, precision)}]`;
+      }
+      return `(${fmt(lo, precision)}, ${fmt(hi, precision)}]`;
+    });
+  }
+
+  // ── assign bins (qcut always uses right-closed, include_lowest) ──────────
+  const codes = assignBins(x, edges, true, true);
+
+  return { codes, labels: resolvedLabels, bins: edges };
+}
diff --git a/src/stats/format_ops.ts b/src/stats/format_ops.ts
new file mode 100644
index 00000000..148a85b1
--- /dev/null
+++ b/src/stats/format_ops.ts
@@ -0,0 +1,442 @@
+/**
+ * format_ops — number-formatting helpers for Series and DataFrame.
+ *
+ * Mirrors several pandas formatting utilities including `Series.map`,
+ * `DataFrame.style`, and the `format_` methods.
+ *
+ * Exported functions:
+ * - {@link formatFloat}           — fixed decimal places
+ * - {@link formatPercent}         — percentage string
+ * - {@link formatScientific}      — scientific notation (e.g. `1.23e+4`)
+ * - {@link formatEngineering}     — engineering notation (exponent multiple of 3)
+ * - {@link formatThousands}       — thousands-separated string
+ * - {@link formatCurrency}        — currency string
+ * - {@link formatCompact}         — compact notation (K, M, B, T)
+ * - {@link makeFloatFormatter}    — factory returning a float formatter
+ * - {@link makePercentFormatter}  — factory returning a percent formatter
+ * - {@link makeCurrencyFormatter} — factory returning a currency formatter
+ * - {@link applySeriesFormatter}  — apply a formatter to every value in a Series
+ * - {@link applyDataFrameFormatter} — apply per-column formatters to a DataFrame
+ * - {@link seriesToString}        — render a Series as a human-readable string
+ * - {@link dataFrameToString}     — render a DataFrame as a human-readable string
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── scalar formatting ────────────────────────────────────────────────────────
+
+/**
+ * Format a number with a fixed number of decimal places.
+ *
+ * @param value    The number to format (non-finite values render as their string).
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function formatFloat(value: number, decimals = 2): string {
+  if (!Number.isFinite(value)) {
+    return String(value);
+  }
+  return value.toFixed(decimals);
+}
+
+/**
+ * Format a number as a percentage string.
+ *
+ * The value is multiplied by 100 before formatting.
+ * e.g. `formatPercent(0.1234, 1)` → `"12.3%"`.
+ *
+ * @param value    The proportion to format (0 → `"0.00%"`, 1 → `"100.00%"`).
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function formatPercent(value: number, decimals = 2): string {
+  if (!Number.isFinite(value)) {
+    return String(value);
+  }
+  return `${(value * 100).toFixed(decimals)}%`;
+}
+
+/**
+ * Format a number in scientific notation.
+ *
+ * e.g. `formatScientific(12345.678, 3)` → `"1.235e+4"`.
+ *
+ * @param value    The number to format.
+ * @param decimals Significant figures after the decimal point. Default: `3`.
+ */
+export function formatScientific(value: number, decimals = 3): string {
+  if (!Number.isFinite(value)) {
+    return String(value);
+  }
+  return value.toExponential(decimals);
+}
+
+/**
+ * Format a number in engineering notation (exponent always a multiple of 3).
+ *
+ * e.g. `formatEngineering(12345.678, 3)` → `"12.346e+3"`.
+ *
+ * @param value    The number to format.
+ * @param decimals Decimal places in the mantissa. Default: `3`.
+ */
+export function formatEngineering(value: number, decimals = 3): string {
+  if (!Number.isFinite(value)) {
+    return String(value);
+  }
+  if (value === 0) {
+    return `0.${"0".repeat(decimals)}e+0`;
+  }
+  const sign = value < 0 ? "-" : "";
+  const abs = Math.abs(value);
+  const exp = Math.floor(Math.log10(abs));
+  const engExp = Math.floor(exp / 3) * 3;
+  const mantissa = abs / 10 ** engExp;
+  const expSign = engExp >= 0 ? "+" : "-";
+  return `${sign}${mantissa.toFixed(decimals)}e${expSign}${Math.abs(engExp)}`;
+}
+
+/**
+ * Format a number with a thousands separator.
+ *
+ * e.g. `formatThousands(1234567.89, 2)` → `"1,234,567.89"`.
+ *
+ * @param value     The number to format.
+ * @param decimals  Decimal places. Default: `2`.
+ * @param separator Thousands separator. Default: `","`.
+ */
+export function formatThousands(value: number, decimals = 2, separator = ","): string {
+  if (!Number.isFinite(value)) {
+    return String(value);
+  }
+  const fixed = value.toFixed(decimals);
+  const [intPart, fracPart] = fixed.split(".");
+  const intStr = intPart ?? "";
+  const isNeg = intStr.startsWith("-");
+  const digits = isNeg ? intStr.slice(1) : intStr;
+  const withSep = digits.replace(/\B(?=(\d{3})+(?!\d))/g, separator);
+  const sign = isNeg ? "-" : "";
+  return fracPart !== undefined ? `${sign}${withSep}.${fracPart}` : `${sign}${withSep}`;
+}
+
+/**
+ * Format a number as a currency string.
+ *
+ * e.g. `formatCurrency(1234.5, "$", 2)` → `"$1,234.50"`.
+ *
+ * @param value    The number to format.
+ * @param symbol   Currency symbol. Default: `"$"`.
+ * @param decimals Decimal places. Default: `2`.
+ */
+export function formatCurrency(value: number, symbol = "$", decimals = 2): string {
+  if (!Number.isFinite(value)) {
+    return `${symbol}${String(value)}`;
+  }
+  const abs = Math.abs(value);
+  const sign = value < 0 ? "-" : "";
+  return `${sign}${symbol}${formatThousands(abs, decimals)}`;
+}
+
+/**
+ * Format a number in compact notation using SI-style suffixes.
+ *
+ * Thresholds: T ≥ 1e12, B ≥ 1e9, M ≥ 1e6, K ≥ 1e3.
+ * Values below 1000 are formatted with `decimals` decimal places.
+ *
+ * e.g. `formatCompact(1_234_567, 2)` → `"1.23M"`.
+ *
+ * @param value    The number to format.
+ * @param decimals Decimal places in the mantissa. Default: `2`.
+ */
+export function formatCompact(value: number, decimals = 2): string {
+  if (!Number.isFinite(value)) {
+    return String(value);
+  }
+  const sign = value < 0 ? "-" : "";
+  const abs = Math.abs(value);
+  if (abs >= 1e12) {
+    return `${sign}${(abs / 1e12).toFixed(decimals)}T`;
+  }
+  if (abs >= 1e9) {
+    return `${sign}${(abs / 1e9).toFixed(decimals)}B`;
+  }
+  if (abs >= 1e6) {
+    return `${sign}${(abs / 1e6).toFixed(decimals)}M`;
+  }
+  if (abs >= 1e3) {
+    return `${sign}${(abs / 1e3).toFixed(decimals)}K`;
+  }
+  return `${sign}${abs.toFixed(decimals)}`;
+}
+
+// ─── formatter factories ──────────────────────────────────────────────────────
+
+/** A function that converts a {@link Scalar} value to a string. */
+export type Formatter = (value: Scalar) => string;
+
+/**
+ * Create a float formatter with the given number of decimal places.
+ *
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function makeFloatFormatter(decimals = 2): Formatter {
+  return (value: Scalar): string => {
+    if (typeof value !== "number") {
+      return String(value ?? "");
+    }
+    return formatFloat(value, decimals);
+  };
+}
+
+/**
+ * Create a percent formatter with the given number of decimal places.
+ *
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function makePercentFormatter(decimals = 2): Formatter {
+  return (value: Scalar): string => {
+    if (typeof value !== "number") {
+      return String(value ?? "");
+    }
+    return formatPercent(value, decimals);
+  };
+}
+
+/**
+ * Create a currency formatter with the given symbol and decimal places.
+ *
+ * @param symbol   Currency symbol. Default: `"$"`.
+ * @param decimals Decimal places. Default: `2`.
+ */
+export function makeCurrencyFormatter(symbol = "$", decimals = 2): Formatter {
+  return (value: Scalar): string => {
+    if (typeof value !== "number") {
+      return String(value ?? "");
+    }
+    return formatCurrency(value, symbol, decimals);
+  };
+}
+
+// ─── apply to Series / DataFrame ─────────────────────────────────────────────
+
+/**
+ * Apply a formatter to every element of a Series, returning a `Series<string>`.
+ *
+ * Non-numeric formatters receive the raw {@link Scalar} value; numeric-only
+ * formatters (e.g. from {@link makeFloatFormatter}) receive the value unchanged
+ * and should guard against non-numeric types themselves.
+ *
+ * @param series    The source Series.
+ * @param formatter A {@link Formatter} to apply to each value.
+ */
+export function applySeriesFormatter(series: Series<Scalar>, formatter: Formatter): Series<string> {
+  const formatted: string[] = [];
+  for (let i = 0; i < series.size; i++) {
+    formatted.push(formatter(series.values[i] as Scalar));
+  }
+  return new Series<string>({ data: formatted, index: series.index, name: series.name });
+}
+
+/**
+ * Apply per-column formatters to a DataFrame, returning a
+ * `Record<string, string[]>` where each key is a column name and the value is
+ * the formatted column data.
+ *
+ * Columns without a matching formatter are rendered via `String(value)`.
+ *
+ * @param df         The source DataFrame.
+ * @param formatters Map of column name → {@link Formatter}.
+ */
+export function applyDataFrameFormatter(
+  df: DataFrame,
+  formatters: Readonly<Record<string, Formatter>>,
+): Record<string, string[]> {
+  const result: Record<string, string[]> = {};
+  for (const colName of df.columns.values) {
+    const fmt: Formatter = formatters[colName] ?? ((v: Scalar) => String(v ?? ""));
+    const col = df.col(colName);
+    const formatted: string[] = [];
+    for (let i = 0; i < col.size; i++) {
+      formatted.push(fmt(col.values[i] as Scalar));
+    }
+    result[colName] = formatted;
+  }
+  return result;
+}
+
+// ─── to-string rendering ──────────────────────────────────────────────────────
+
+/** Options for {@link seriesToString}. */
+export interface SeriesToStringOptions {
+  /**
+   * Maximum number of rows to display.
+   * Default: `60`.
+   */
+  readonly maxRows?: number;
+  /**
+   * Formatter to apply to each value.
+   * Default: `String`.
+   */
+  readonly formatter?: Formatter;
+  /**
+   * Series name to display in the header.
+   * Default: the series' own name, or `null` for no header.
+   */
+  readonly name?: string | null;
+}
+
+/**
+ * Render a Series as a human-readable multi-line string.
+ *
+ * The output mirrors `repr(series)` in pandas:
+ * ```
+ * 0    1.00
+ * 1    2.00
+ * 2    3.00
+ * Name: x, dtype: float64
+ * ```
+ *
+ * @param series  The Series to render.
+ * @param options Optional rendering options.
+ */
+export function seriesToString(series: Series<Scalar>, options: SeriesToStringOptions = {}): string {
+  const maxRows = options.maxRows ?? 60;
+  const fmt: Formatter = options.formatter ?? ((v: Scalar) => String(v ?? "NaN"));
+  const displayName = options.name !== undefined ? options.name : series.name;
+
+  const n = series.size;
+  const truncated = n > maxRows;
+  const displayCount = truncated ? maxRows : n;
+
+  // Compute label column width
+  let labelWidth = 0;
+  for (let i = 0; i < displayCount; i++) {
+    const label = String(series.index.at(i) ?? "");
+    if (label.length > labelWidth) {
+      labelWidth = label.length;
+    }
+  }
+
+  const lines: string[] = [];
+  for (let i = 0; i < displayCount; i++) {
+    const label = String(series.index.at(i) ?? "").padEnd(labelWidth);
+    const val = fmt(series.values[i] as Scalar);
+    lines.push(`${label}    ${val}`);
+  }
+
+  if (truncated) {
+    lines.push(`...`);
+  }
+
+  const footer: string[] = [];
+  if (displayName !== null && displayName !== undefined) {
+    footer.push(`Name: ${displayName}`);
+  }
+  footer.push(`dtype: ${series.dtype.name}`);
+
+  if (footer.length > 0) {
+    lines.push(footer.join(", "));
+  }
+
+  return lines.join("\n");
+}
+
+/** Options for {@link dataFrameToString}. */
+export interface DataFrameToStringOptions {
+  /**
+   * Maximum number of rows to display.
+   * Default: `60`.
+   */
+  readonly maxRows?: number;
+  /**
+   * Maximum number of columns to display.
+   * Default: `20`.
+   */
+  readonly maxCols?: number;
+  /**
+   * Per-column formatters.
+   * Default: `String` for all columns.
+   */
+  readonly formatters?: Readonly<Record<string, Formatter>>;
+}
+
+/**
+ * Render a DataFrame as a human-readable multi-line string (like pandas `repr`).
+ *
+ * @param df      The DataFrame to render.
+ * @param options Optional rendering options.
+ */
+export function dataFrameToString(df: DataFrame, options: DataFrameToStringOptions = {}): string {
+  const maxRows = options.maxRows ?? 60;
+  const maxCols = options.maxCols ?? 20;
+  const formatters = options.formatters ?? {};
+
+  const [nRows, nCols] = df.shape;
+  const truncRows = nRows > maxRows;
+  const truncCols = nCols > maxCols;
+  const displayRows = truncRows ? maxRows : nRows;
+
+  // Pick columns to display
+  const allCols = [...df.columns.values];
+  const displayCols = truncCols ? allCols.slice(0, maxCols) : allCols;
+
+  // Gather formatted cells
+  const cells: string[][] = [];
+  for (const colName of displayCols) {
+    const fmt: Formatter = formatters[colName] ?? ((v: Scalar) => String(v ?? ""));
+    const col = df.col(colName);
+    const colCells: string[] = [];
+    for (let i = 0; i < displayRows; i++) {
+      colCells.push(fmt(col.values[i] as Scalar));
+    }
+    cells.push(colCells);
+  }
+
+  // Compute column widths (max of header or any cell)
+  const colWidths: number[] = displayCols.map((name, ci) => {
+    let w = name.length;
+    const colCells = cells[ci];
+    if (colCells !== undefined) {
+      for (const cell of colCells) {
+        if (cell.length > w) {
+          w = cell.length;
+        }
+      }
+    }
+    return w;
+  });
+
+  // Compute index label width
+  let idxWidth = 0;
+  for (let i = 0; i < displayRows; i++) {
+    const label = String(df.index.at(i) ?? "");
+    if (label.length > idxWidth) {
+      idxWidth = label.length;
+    }
+  }
+
+  // Build header row
+  const headerParts = displayCols.map((name, ci) => name.padStart(colWidths[ci] ?? name.length));
+  const header = `${"".padEnd(idxWidth)}  ${headerParts.join("  ")}`;
+
+  const lines: string[] = [header];
+
+  for (let i = 0; i < displayRows; i++) {
+    const label = String(df.index.at(i) ?? "").padEnd(idxWidth);
+    const rowParts = displayCols.map((_, ci) => {
+      const cell = cells[ci]?.[i] ?? "";
+      return cell.padStart(colWidths[ci] ?? cell.length);
+    });
+    lines.push(`${label}  ${rowParts.join("  ")}`);
+  }
+
+  if (truncRows) {
+    lines.push("...");
+  }
+  if (truncCols) {
+    lines.push(`[${nRows} rows × ${nCols} columns]`);
+  }
+
+  return lines.join("\n");
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index 84202fde..d29cbaad 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -39,14 +39,115 @@ export {
   nsmallestDataFrame,
 } from "./nlargest.ts";
 export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts";
+export { cut, qcut } from "./cut_qcut.ts";
+export type { BinResult, CutOptions, QCutOptions } from "./cut_qcut.ts";
+export { rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "./window_extended.ts";
+export type { WindowExtOptions, RollingQuantileOptions } from "./window_extended.ts";
+export { seriesWhere, seriesMask, dataFrameWhere, dataFrameMask } from "./where_mask.ts";
+export type {
+  SeriesCond,
+  DataFrameCond,
+  SeriesWhereOptions,
+  DataFrameWhereOptions,
+} from "./where_mask.ts";
 export {
   isna,
   notna,
   isnull,
   notnull,
-  ffillSeries,
-  bfillSeries,
-  dataFrameFfill,
-  dataFrameBfill,
-} from "./na_ops.ts";
+  fillna,
+  dropna,
+  countna,
+  countValid,
+} from "./notna_isna.ts";
+export type { IsnaInput, FillnaOptions, DropnaOptions } from "./notna_isna.ts";
+export { ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "./na_ops.ts";
 export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts";
+export {
+  strNormalize,
+  strGetDummies,
+  strExtractAll,
+  strRemovePrefix,
+  strRemoveSuffix,
+  strTranslate,
+  strCharWidth,
+  strByteLength,
+} from "./string_ops.ts";
+export type {
+  NormalizeForm,
+  StrInput,
+  GetDummiesOptions,
+  ExtractAllOptions,
+} from "./string_ops.ts";
+export {
+  strSplitExpand,
+  strExtractGroups,
+  strPartition,
+  strRPartition,
+  strMultiReplace,
+  strIndent,
+  strDedent,
+} from "./string_ops_extended.ts";
+export type {
+  SplitExpandOptions,
+  ExtractGroupsOptions,
+  PartitionResult,
+  ReplacePair,
+  IndentOptions,
+} from "./string_ops_extended.ts";
+export {
+  digitize,
+  histogram,
+  linspace,
+  arange,
+  percentileOfScore,
+  zscore,
+  minMaxNormalize,
+  coefficientOfVariation,
+  seriesDigitize,
+} from "./numeric_extended.ts";
+export type {
+  HistogramOptions,
+  HistogramResult,
+  ZscoreOptions,
+  MinMaxOptions,
+  CvOptions,
+} from "./numeric_extended.ts";
+export {
+  catFromCodes,
+  catUnionCategories,
+  catIntersectCategories,
+  catDiffCategories,
+  catEqualCategories,
+  catSortByFreq,
+  catToOrdinal,
+  catFreqTable,
+  catCrossTab,
+  catRecode,
+} from "./categorical_ops.ts";
+export type {
+  CatFromCodesOptions,
+  CatSortByFreqOptions,
+  CatCrossTabOptions,
+} from "./categorical_ops.ts";
+export {
+  formatFloat,
+  formatPercent,
+  formatScientific,
+  formatEngineering,
+  formatThousands,
+  formatCurrency,
+  formatCompact,
+  makeFloatFormatter,
+  makePercentFormatter,
+  makeCurrencyFormatter,
+  applySeriesFormatter,
+  applyDataFrameFormatter,
+  seriesToString,
+  dataFrameToString,
+} from "./format_ops.ts";
+export type {
+  Formatter,
+  SeriesToStringOptions,
+  DataFrameToStringOptions,
+} from "./format_ops.ts";
diff --git a/src/stats/notna_isna.ts b/src/stats/notna_isna.ts
new file mode 100644
index 00000000..bd685c1c
--- /dev/null
+++ b/src/stats/notna_isna.ts
@@ -0,0 +1,369 @@
+/**
+ * notna_isna — module-level missing-value utilities.
+ *
+ * Mirrors the pandas top-level functions:
+ * - `pd.isna(obj)`  / `pd.isnull(obj)`  — detect missing values
+ * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values
+ * - `pd.array_isna(arr)` — convenience wrapper for arrays
+ *
+ * Plus standalone `fillna` and `dropna` that operate on scalars, arrays,
+ * `Series`, and `DataFrame` without requiring a method call.
+ *
+ * ### What counts as "missing"?
+ * - `null`
+ * - `undefined`
+ * - `NaN` (IEEE 754 `number`)
+ *
+ * Everything else — including `0`, `false`, `""`, `0n`, `new Date(NaN)` — is
+ * treated as **not** missing.  (`Date(NaN)` has NaN time but is a valid object,
+ * matching pandas semantics where `NaT` is only produced by explicit
+ * datetime constructors.)
+ *
+ * ### Overloads
+ *
+ * | Input type         | Return type         |
+ * |--------------------|---------------------|
+ * | `Scalar`           | `boolean`           |
+ * | `readonly Scalar[]`| `boolean[]`         |
+ * | `Series<Scalar>`   | `Series<boolean>`   |
+ * | `DataFrame`        | `DataFrame`         |
+ *
+ * All functions are **pure** — inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── primitive helper ─────────────────────────────────────────────────────────
+
+/** True when `v` is null, undefined, or NaN. */
+function scalarIsna(v: Scalar): boolean {
+  return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Union of all input types accepted by `isna` / `notna`. */
+export type IsnaInput = Scalar | readonly Scalar[] | Series<Scalar> | DataFrame;
+
+/** Options for standalone `fillna`. */
+export interface FillnaOptions {
+  /**
+   * The value used to replace missing entries.
+   * Must be compatible with the element type.
+   */
+  value: Scalar;
+}
+
+/** Options for standalone `dropna`. */
+export interface DropnaOptions {
+  /**
+   * Only used for DataFrame inputs.
+   * - `"any"` (default) — drop a row if **any** column is missing
+   * - `"all"` — drop a row only if **all** columns are missing
+   */
+  how?: "any" | "all";
+  /**
+   * `axis=0` (default) — drop rows that contain missing values.
+   * `axis=1` — drop columns that contain missing values.
+   */
+  axis?: 0 | 1;
+}
+
+// ─── isna overloads ───────────────────────────────────────────────────────────
+
+/**
+ * Detect missing values in a scalar, array, Series, or DataFrame.
+ *
+ * Returns `true` for `null`, `undefined`, and `NaN`; `false` for everything
+ * else.
+ *
+ * @example
+ * ```ts
+ * import { isna } from "tsb";
+ *
+ * isna(null);         // true
+ * isna(0);            // false
+ * isna([1, null, NaN]); // [false, true, true]
+ * ```
+ */
+export function isna(obj: Scalar): boolean;
+/** @overload */
+export function isna(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function isna(obj: Series<Scalar>): Series<boolean>;
+/** @overload */
+export function isna(obj: DataFrame): DataFrame;
+export function isna(
+  obj: Scalar | readonly Scalar[] | Series<Scalar> | DataFrame,
+): boolean | boolean[] | Series<boolean> | DataFrame {
+  if (obj instanceof DataFrame) {
+    return obj.isna();
+  }
+  if (obj instanceof Series) {
+    return obj.isna();
+  }
+  if (Array.isArray(obj)) {
+    return (obj as readonly Scalar[]).map(scalarIsna);
+  }
+  return scalarIsna(obj as Scalar);
+}
+
+// ─── notna overloads ──────────────────────────────────────────────────────────
+
+/**
+ * Detect non-missing values — the inverse of {@link isna}.
+ *
+ * Returns `false` for `null`, `undefined`, and `NaN`; `true` for everything
+ * else.
+ *
+ * @example
+ * ```ts
+ * import { notna } from "tsb";
+ *
+ * notna(null);           // false
+ * notna(42);             // true
+ * notna([1, null, NaN]); // [true, false, false]
+ * ```
+ */
+export function notna(obj: Scalar): boolean;
+/** @overload */
+export function notna(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function notna(obj: Series<Scalar>): Series<boolean>;
+/** @overload */
+export function notna(obj: DataFrame): DataFrame;
+export function notna(
+  obj: Scalar | readonly Scalar[] | Series<Scalar> | DataFrame,
+): boolean | boolean[] | Series<boolean> | DataFrame {
+  if (obj instanceof DataFrame) {
+    return obj.notna();
+  }
+  if (obj instanceof Series) {
+    return obj.notna();
+  }
+  if (Array.isArray(obj)) {
+    return (obj as readonly Scalar[]).map((v) => !scalarIsna(v));
+  }
+  return !scalarIsna(obj as Scalar);
+}
+
+// ─── pandas-compatible aliases ────────────────────────────────────────────────
+
+/**
+ * Alias for {@link isna}.  Mirrors `pandas.isnull`.
+ * @see isna
+ */
+export function isnull(obj: Scalar): boolean;
+/** @overload */
+export function isnull(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function isnull(obj: Series<Scalar>): Series<boolean>;
+/** @overload */
+export function isnull(obj: DataFrame): DataFrame;
+export function isnull(
+  obj: Scalar | readonly Scalar[] | Series<Scalar> | DataFrame,
+): boolean | boolean[] | Series<boolean> | DataFrame {
+  return isna(obj as Parameters<typeof isna>[0]);
+}
+
+/**
+ * Alias for {@link notna}.  Mirrors `pandas.notnull`.
+ * @see notna
+ */
+export function notnull(obj: Scalar): boolean;
+/** @overload */
+export function notnull(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function notnull(obj: Series<Scalar>): Series<boolean>;
+/** @overload */
+export function notnull(obj: DataFrame): DataFrame;
+export function notnull(
+  obj: Scalar | readonly Scalar[] | Series<Scalar> | DataFrame,
+): boolean | boolean[] | Series<boolean> | DataFrame {
+  return notna(obj as Parameters<typeof notna>[0]);
+}
+
+// ─── standalone fillna ────────────────────────────────────────────────────────
+
+/**
+ * Replace missing values with a fill value.
+ *
+ * Standalone equivalent of `Series.fillna()` / `DataFrame.fillna()`.
+ * Also handles bare arrays and scalars for convenience.
+ *
+ * @example
+ * ```ts
+ * import { fillna } from "tsb";
+ *
+ * fillna([1, null, NaN, 3], { value: 0 }); // [1, 0, 0, 3]
+ * ```
+ */
+export function fillna(obj: Scalar, opts: FillnaOptions): Scalar;
+/** @overload */
+export function fillna(obj: readonly Scalar[], opts: FillnaOptions): Scalar[];
+/** @overload */
+export function fillna(obj: Series<Scalar>, opts: FillnaOptions): Series<Scalar>;
+/** @overload */
+export function fillna(obj: DataFrame, opts: FillnaOptions): DataFrame;
+export function fillna(
+  obj: Scalar | readonly Scalar[] | Series<Scalar> | DataFrame,
+  opts: FillnaOptions,
+): Scalar | Scalar[] | Series<Scalar> | DataFrame {
+  const { value } = opts;
+  if (obj instanceof DataFrame) {
+    return obj.fillna(value);
+  }
+  if (obj instanceof Series) {
+    return obj.fillna(value);
+  }
+  if (Array.isArray(obj)) {
+    return (obj as readonly Scalar[]).map((v) => (scalarIsna(v) ? value : v));
+  }
+  const s = obj as Scalar;
+  return scalarIsna(s) ? value : s;
+}
+
+// ─── standalone dropna ────────────────────────────────────────────────────────
+
+/**
+ * Remove missing values from an array, Series, or DataFrame.
+ *
+ * Standalone equivalent of `Series.dropna()` / `DataFrame.dropna()`.
+ *
+ * For DataFrames, the `how` and `axis` options control which rows/columns are
+ * dropped (defaults: `how="any"`, `axis=0`).
+ *
+ * @example
+ * ```ts
+ * import { dropna } from "tsb";
+ *
+ * dropna([1, null, NaN, 3]); // [1, 3]
+ * ```
+ */
+export function dropna(obj: readonly Scalar[], opts?: DropnaOptions): Scalar[];
+/** @overload */
+export function dropna(obj: Series<Scalar>, opts?: DropnaOptions): Series<Scalar>;
+/** @overload */
+export function dropna(obj: DataFrame, opts?: DropnaOptions): DataFrame;
+export function dropna(
+  obj: readonly Scalar[] | Series<Scalar> | DataFrame,
+  opts: DropnaOptions = {},
+): Scalar[] | Series<Scalar> | DataFrame {
+  const how: "any" | "all" = opts.how ?? "any";
+  const axis: 0 | 1 = opts.axis ?? 0;
+
+  if (obj instanceof DataFrame) {
+    return _dataFrameDropna(obj, how, axis);
+  }
+  if (obj instanceof Series) {
+    return obj.dropna();
+  }
+  // plain array
+  return (obj as readonly Scalar[]).filter((v) => !scalarIsna(v));
+}
+
+// ─── DataFrame dropna helpers ─────────────────────────────────────────────────
+
+function _dataFrameDropna(df: DataFrame, how: "any" | "all", axis: 0 | 1): DataFrame {
+  if (axis === 1) {
+    return _dropnaColumns(df, how);
+  }
+  return _dropnaRows(df, how);
+}
+
+function _dropnaRows(df: DataFrame, how: "any" | "all"): DataFrame {
+  const nRows = df.index.size;
+  const colNames = df.columns.values as string[];
+  const keep: number[] = [];
+
+  for (let i = 0; i < nRows; i++) {
+    const rowMissing: boolean[] = colNames.map((col) => scalarIsna(df.col(col).iat(i)));
+
+    const shouldDrop =
+      how === "any" ? rowMissing.some(Boolean) : rowMissing.every(Boolean);
+
+    if (!shouldDrop) {
+      keep.push(i);
+    }
+  }
+
+  // Rebuild DataFrame with kept rows
+  const colMap = new Map<string, Series<Scalar>>();
+  const keptLabels: Label[] = keep.map((i) => df.index.at(i));
+  const newIndex = new Index<Label>(keptLabels);
+  for (const name of colNames) {
+    const series = df.col(name);
+    const keptValues: Scalar[] = keep.map((i) => series.iat(i));
+    colMap.set(
+      name,
+      new Series<Scalar>({
+        data: keptValues,
+        index: newIndex,
+        dtype: series.dtype,
+        name,
+      }),
+    );
+  }
+  return new DataFrame(colMap, newIndex);
+}
+
+function _dropnaColumns(df: DataFrame, how: "any" | "all"): DataFrame {
+  const colNames = df.columns.values as string[];
+  const colMap = new Map<string, Series<Scalar>>();
+
+  for (const name of colNames) {
+    const series = df.col(name);
+    const vals = series.values;
+    const missingFlags = vals.map(scalarIsna);
+
+    const shouldDrop =
+      how === "any" ? missingFlags.some(Boolean) : missingFlags.every(Boolean);
+
+    if (!shouldDrop) {
+      colMap.set(name, series);
+    }
+  }
+  return new DataFrame(colMap, df.index);
+}
+
+// ─── countna / countValid helpers ─────────────────────────────────────────────
+
+/**
+ * Count missing values in an array or Series.
+ *
+ * Mirrors `Series.isna().sum()` but without constructing an intermediate
+ * boolean Series.
+ *
+ * @example
+ * ```ts
+ * import { countna } from "tsb";
+ *
+ * countna([1, null, NaN, 3]); // 2
+ * ```
+ */
+export function countna(obj: readonly Scalar[] | Series<Scalar>): number {
+  const vals: readonly Scalar[] = obj instanceof Series ? obj.values : obj;
+  return vals.reduce<number>((acc, v) => acc + (scalarIsna(v) ? 1 : 0), 0);
+}
+
+/**
+ * Count non-missing values in an array or Series.
+ *
+ * Mirrors `Series.count()`.
+ *
+ * @example
+ * ```ts
+ * import { countValid } from "tsb";
+ *
+ * countValid([1, null, NaN, 3]); // 2
+ * ```
+ */
+export function countValid(obj: readonly Scalar[] | Series<Scalar>): number {
+  const vals: readonly Scalar[] = obj instanceof Series ? obj.values : obj;
+  return vals.reduce<number>((acc, v) => acc + (scalarIsna(v) ? 0 : 1), 0);
+}
diff --git a/src/stats/numeric_extended.ts b/src/stats/numeric_extended.ts
new file mode 100644
index 00000000..c5534e71
--- /dev/null
+++ b/src/stats/numeric_extended.ts
@@ -0,0 +1,586 @@
+/**
+ * numeric_extended — additional numeric utility functions for arrays and Series.
+ *
+ * Mirrors frequently-used numpy / scipy / pandas functions not yet in tsb:
+ * - `digitize(values, bins, right?)` — find bin indices (numpy.digitize)
+ * - `histogram(values, options?)` — compute histogram counts and edges (numpy.histogram)
+ * - `linspace(start, stop, num?)` — evenly-spaced sequence (numpy.linspace)
+ * - `arange(start, stop?, step?)` — range with step (numpy.arange)
+ * - `percentileOfScore(arr, score, kind?)` — percentile rank of a score (scipy.stats.percentileofscore)
+ * - `zscore(series, options?)` — z-score standardisation (scipy.stats.zscore)
+ * - `minMaxNormalize(series, options?)` — min-max normalisation to [0, 1] or custom range
+ * - `coefficientOfVariation(series, options?)` — std / mean (dimensionless spread)
+ *
+ * All functions are **pure** (return new values; inputs are unchanged).
+ * Missing values (null / NaN) are handled consistently: ignored in aggregates
+ * and propagated in per-element outputs unless noted otherwise.
+ *
+ * @module
+ */
+
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** True when `v` is a finite, non-null, non-NaN number. */
+function isNum(v: Scalar): v is number {
+  return typeof v === "number" && !Number.isNaN(v);
+}
+
+/** Extract finite numbers from scalar array. */
+function finiteNums(vals: readonly Scalar[]): number[] {
+  return vals.filter(isNum);
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link histogram}. */
+export interface HistogramOptions {
+  /**
+   * Number of equal-width bins to produce.  Defaults to `10`.
+   * Ignored when `binEdges` is provided.
+   */
+  readonly bins?: number;
+  /**
+   * Explicit bin edges.  Must be strictly increasing and have length ≥ 2.
+   * When provided, `bins` is ignored.
+   */
+  readonly binEdges?: readonly number[];
+  /**
+   * `[min, max]` range to consider.  Values outside are ignored.
+   * Defaults to `[min(values), max(values)]`.
+   * Only used when `binEdges` is not provided.
+   */
+  readonly range?: readonly [number, number];
+  /**
+   * If `true`, the result is normalised as a probability density so that the
+   * integral over the range is 1 (like `numpy.histogram(density=True)`).
+   * Defaults to `false`.
+   */
+  readonly density?: boolean;
+}
+
+/** Result of {@link histogram}. */
+export interface HistogramResult {
+  /** Bin counts (or densities when `density: true`). */
+  readonly counts: readonly number[];
+  /** Bin edges — always has length `counts.length + 1`. */
+  readonly binEdges: readonly number[];
+}
+
+/** Options for {@link zscore}. */
+export interface ZscoreOptions {
+  /**
+   * Degrees-of-freedom correction for std.
+   * - `1` (default, matches pandas `ddof=1`): sample std
+   * - `0`: population std
+   */
+  readonly ddof?: 0 | 1;
+}
+
+/** Options for {@link minMaxNormalize}. */
+export interface MinMaxOptions {
+  /**
+   * Lower bound of the output range.  Defaults to `0`.
+   */
+  readonly featureRangeMin?: number;
+  /**
+   * Upper bound of the output range.  Defaults to `1`.
+   */
+  readonly featureRangeMax?: number;
+}
+
+/** Options for {@link coefficientOfVariation}. */
+export interface CvOptions {
+  /**
+   * Degrees-of-freedom correction for std.
+   * - `1` (default): sample std
+   * - `0`: population std
+   */
+  readonly ddof?: 0 | 1;
+}
+
+// ─── digitize ─────────────────────────────────────────────────────────────────
+
+/**
+ * Return the indices of the bins to which each value in `values` belongs.
+ *
+ * Mirrors `numpy.digitize(values, bins, right=False)`.
+ *
+ * Each value `v` is mapped to bin index `i` such that:
+ * - `right = false` (default): `bins[i-1] <= v < bins[i]`
+ * - `right = true`:            `bins[i-1] < v <= bins[i]`
+ *
+ * Indices are 0-based (unlike numpy which uses 1-based).
+ * Values below `bins[0]` map to `-1`; values at/above `bins[last]` map to
+ * `bins.length - 1`.
+ *
+ * Missing / NaN values in `values` are mapped to `NaN`.
+ *
+ * @param values - array of numbers to bin (may contain null/NaN)
+ * @param bins   - strictly increasing bin-edge array (length ≥ 1)
+ * @param right  - if `true`, intervals are open on the left (pandas default is `false`)
+ * @returns        array of integer bin indices (same length as `values`)
+ *
+ * @example
+ * ```ts
+ * digitize([0.5, 1.5, 2.5, 3.5], [1, 2, 3]);
+ * // → [-1, 0, 1, 2]
+ * ```
+ */
+export function digitize(
+  values: readonly (number | null)[],
+  bins: readonly number[],
+  right = false,
+): (number | typeof NaN)[] {
+  if (bins.length === 0) {
+    throw new RangeError("bins must have at least one element");
+  }
+  return values.map((v) => {
+    if (v === null || (typeof v === "number" && Number.isNaN(v))) {
+      return Number.NaN;
+    }
+    const n = bins.length;
+    if (right) {
+      // open left, closed right: bins[i-1] < v <= bins[i]
+      for (let i = 0; i < n; i++) {
+        if (v <= (bins[i] as number)) {
+          return i - 1; // below first edge → -1
+        }
+      }
+      return n - 1; // above last edge
+    } else {
+      // closed left, open right: bins[i-1] <= v < bins[i]
+      for (let i = 0; i < n; i++) {
+        if (v < (bins[i] as number)) {
+          return i - 1;
+        }
+      }
+      return n - 1; // at or above last edge
+    }
+  });
+}
+
+// ─── histogram ────────────────────────────────────────────────────────────────
+
+/**
+ * Compute a histogram of `values`.
+ *
+ * Mirrors `numpy.histogram(values, bins=10, range=None, density=False)`.
+ *
+ * NaN / null values are silently ignored.
+ *
+ * @param values  - numeric values to bin
+ * @param options - {@link HistogramOptions}
+ * @returns         {@link HistogramResult} with `counts` and `binEdges`
+ *
+ * @example
+ * ```ts
+ * histogram([1, 2, 3, 4, 5], { bins: 2 });
+ * // { counts: [2, 3], binEdges: [1, 3, 5] }
+ * ```
+ */
+export function histogram(
+  values: readonly (number | null | Scalar)[],
+  options?: HistogramOptions,
+): HistogramResult {
+  const nums = finiteNums(values as readonly Scalar[]);
+  if (nums.length === 0) {
+    // Return a zero-count histogram over [0, 1] when there is no data.
+    const nb = options?.bins ?? 10;
+    const edges: number[] = [];
+    for (let i = 0; i <= nb; i++) {
+      edges.push(i / nb);
+    }
+    const counts = new Array<number>(nb).fill(0);
+    return { counts, binEdges: edges };
+  }
+
+  let edges: number[];
+
+  if (options?.binEdges !== undefined) {
+    const be = options.binEdges;
+    if (be.length < 2) {
+      throw new RangeError("binEdges must have at least 2 elements");
+    }
+    edges = [...be];
+  } else {
+    const nbins = options?.bins ?? 10;
+    if (nbins < 1) {
+      throw new RangeError("bins must be >= 1");
+    }
+    let lo: number;
+    let hi: number;
+    if (options?.range !== undefined) {
+      [lo, hi] = options.range;
+    } else {
+      lo = Math.min(...nums);
+      hi = Math.max(...nums);
+    }
+    if (lo === hi) {
+      // Degenerate range: widen by 0.5 on each side (mirrors numpy).
+      lo -= 0.5;
+      hi += 0.5;
+    }
+    edges = [];
+    for (let i = 0; i <= nbins; i++) {
+      edges.push(lo + (i / nbins) * (hi - lo));
+    }
+  }
+
+  const nbins = edges.length - 1;
+  const counts = new Array<number>(nbins).fill(0);
+  const lo = edges[0] as number;
+  const hi = edges[nbins] as number;
+
+  for (const v of nums) {
+    if (v < lo || v > hi) {
+      continue; // out of range
+    }
+    if (v === hi) {
+      // Right-most value goes into the last bin.
+      (counts[nbins - 1] as number)++;
+      continue;
+    }
+    // Binary search for the bin.
+    let left = 0;
+    let right = nbins - 1;
+    while (left < right) {
+      const mid = (left + right) >> 1;
+      if (v < (edges[mid + 1] as number)) {
+        right = mid;
+      } else {
+        left = mid + 1;
+      }
+    }
+    (counts[left] as number)++;
+  }
+
+  if (options?.density === true) {
+    const total = nums.length;
+    const densityCounts = counts.map((c, i) => {
+      const width = (edges[i + 1] as number) - (edges[i] as number);
+      return c / (total * width);
+    });
+    return { counts: densityCounts, binEdges: edges };
+  }
+
+  return { counts, binEdges: edges };
+}
+
+// ─── linspace ─────────────────────────────────────────────────────────────────
+
+/**
+ * Return `num` evenly spaced numbers from `start` to `stop` (inclusive).
+ *
+ * Mirrors `numpy.linspace(start, stop, num=50, endpoint=True)`.
+ *
+ * @param start - first value
+ * @param stop  - last value (included)
+ * @param num   - number of values to generate (default `50`; must be ≥ 0)
+ * @returns       array of `num` numbers
+ *
+ * @example
+ * ```ts
+ * linspace(0, 1, 5);
+ * // → [0, 0.25, 0.5, 0.75, 1]
+ * ```
+ */
+export function linspace(start: number, stop: number, num = 50): number[] {
+  if (num < 0) {
+    throw new RangeError("num must be >= 0");
+  }
+  if (num === 0) {
+    return [];
+  }
+  if (num === 1) {
+    return [start];
+  }
+  const step = (stop - start) / (num - 1);
+  const result: number[] = [];
+  for (let i = 0; i < num; i++) {
+    result.push(i === num - 1 ? stop : start + i * step);
+  }
+  return result;
+}
+
+// ─── arange ───────────────────────────────────────────────────────────────────
+
+/**
+ * Return evenly-spaced values within a given interval.
+ *
+ * Mirrors `numpy.arange([start,] stop[, step])`.
+ *
+ * Call signatures:
+ * - `arange(stop)` — values in `[0, stop)` with step `1`
+ * - `arange(start, stop)` — values in `[start, stop)` with step `1`
+ * - `arange(start, stop, step)` — values in `[start, stop)` with given step
+ *
+ * @example
+ * ```ts
+ * arange(5);           // [0, 1, 2, 3, 4]
+ * arange(1, 5);        // [1, 2, 3, 4]
+ * arange(0, 1, 0.25);  // [0, 0.25, 0.5, 0.75]
+ * ```
+ */
+export function arange(stop: number): number[];
+export function arange(start: number, stop: number): number[];
+export function arange(start: number, stop: number, step: number): number[];
+export function arange(startOrStop: number, stop?: number, step?: number): number[] {
+  let start: number;
+  let s: number;
+  let st: number;
+
+  if (stop === undefined) {
+    start = 0;
+    s = startOrStop;
+    st = 1;
+  } else if (step === undefined) {
+    start = startOrStop;
+    s = stop;
+    st = 1;
+  } else {
+    start = startOrStop;
+    s = stop;
+    st = step;
+  }
+
+  if (st === 0) {
+    throw new RangeError("step must not be zero");
+  }
+
+  const result: number[] = [];
+  if (st > 0) {
+    for (let v = start; v < s; v = start + result.length * st) {
+      result.push(v);
+    }
+  } else {
+    for (let v = start; v > s; v = start + result.length * st) {
+      result.push(v);
+    }
+  }
+  return result;
+}
+
+// ─── percentileOfScore ────────────────────────────────────────────────────────
+
+/**
+ * Compute the percentile rank of `score` within `arr`.
+ *
+ * Mirrors `scipy.stats.percentileofscore(arr, score, kind)`.
+ *
+ * @param arr   - numeric values (NaN/null are ignored)
+ * @param score - value whose rank to compute
+ * @param kind  - ranking method:
+ *   - `"rank"` (default): average of `weak` and `strict` percentiles
+ *   - `"weak"`:   proportion of values ≤ score
+ *   - `"strict"`: proportion of values < score
+ *   - `"mean"`:   mean of `weak` and `strict` (same as `"rank"`)
+ * @returns  percentile in `[0, 100]` (or `NaN` when `arr` is empty)
+ *
+ * @example
+ * ```ts
+ * percentileOfScore([1, 2, 3, 4, 5], 3);   // 50
+ * percentileOfScore([1, 2, 3, 4, 5], 3, "weak");   // 60
+ * percentileOfScore([1, 2, 3, 4, 5], 3, "strict");  // 40
+ * ```
+ */
+export function percentileOfScore(
+  arr: readonly (number | null | Scalar)[],
+  score: number,
+  kind: "rank" | "weak" | "strict" | "mean" = "rank",
+): number {
+  const nums = finiteNums(arr as readonly Scalar[]);
+  const n = nums.length;
+  if (n === 0) {
+    return Number.NaN;
+  }
+  const weakCount = nums.filter((v) => v <= score).length;
+  const strictCount = nums.filter((v) => v < score).length;
+
+  switch (kind) {
+    case "weak":
+      return (weakCount / n) * 100;
+    case "strict":
+      return (strictCount / n) * 100;
+    case "rank":
+    case "mean":
+      return ((weakCount + strictCount) / 2 / n) * 100;
+  }
+}
+
+// ─── zscore ───────────────────────────────────────────────────────────────────
+
+/**
+ * Standardise a numeric Series to zero mean and unit variance (z-score).
+ *
+ * Mirrors `scipy.stats.zscore(a, ddof=1)`.
+ *
+ * Each value is transformed as: `z = (x − mean) / std`
+ *
+ * Missing values (null / NaN) are propagated unchanged in the output.
+ * If std is 0 (or fewer than 2 non-missing values), all outputs are `NaN`.
+ *
+ * @param series  - input Series (must be numeric)
+ * @param options - {@link ZscoreOptions}
+ * @returns         new Series of z-scores with same index
+ *
+ * @example
+ * ```ts
+ * zscore(new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] }));
+ * // approximately [−1.5, −0.5, −0.5, −0.5, 0, 0, 1, 2] (normalised)
+ * ```
+ */
+export function zscore(
+  series: Series<Scalar>,
+  options?: ZscoreOptions,
+): Series<Scalar> {
+  const ddof = options?.ddof ?? 1;
+  const vals = series.values as readonly Scalar[];
+  const nums = finiteNums(vals);
+  const n = nums.length;
+
+  if (n < 2) {
+    const nanVals = vals.map(() => Number.NaN as Scalar);
+    return series.withValues(nanVals) as Series<Scalar>;
+  }
+
+  const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+  const variance = nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - ddof);
+  const std = Math.sqrt(variance);
+
+  if (std === 0) {
+    const nanVals = vals.map((v) => (isNum(v) ? Number.NaN : v) as Scalar);
+    return series.withValues(nanVals) as Series<Scalar>;
+  }
+
+  const zVals = vals.map((v) => (isNum(v) ? ((v - mean) / std) as Scalar : v));
+  return series.withValues(zVals) as Series<Scalar>;
+}
+
+// ─── minMaxNormalize ──────────────────────────────────────────────────────────
+
+/**
+ * Scale a numeric Series to a fixed range using min-max normalisation.
+ *
+ * Mirrors `sklearn.preprocessing.MinMaxScaler` applied to a 1-D array.
+ *
+ * `x_scaled = (x − min) / (max − min) × (rangeMax − rangeMin) + rangeMin`
+ *
+ * Missing values (null / NaN) are propagated unchanged.
+ * If all values are equal, returns a Series of the midpoint of the target range.
+ *
+ * @param series  - input Series (must be numeric)
+ * @param options - {@link MinMaxOptions}
+ * @returns         new Series normalised to `[featureRangeMin, featureRangeMax]`
+ *
+ * @example
+ * ```ts
+ * minMaxNormalize(new Series({ data: [0, 5, 10] }));
+ * // → Series([0, 0.5, 1])
+ * ```
+ */
+export function minMaxNormalize(
+  series: Series<Scalar>,
+  options?: MinMaxOptions,
+): Series<Scalar> {
+  const rMin = options?.featureRangeMin ?? 0;
+  const rMax = options?.featureRangeMax ?? 1;
+  if (rMin >= rMax) {
+    throw new RangeError("featureRangeMin must be less than featureRangeMax");
+  }
+
+  const vals = series.values as readonly Scalar[];
+  const nums = finiteNums(vals);
+  if (nums.length === 0) {
+    return series.withValues(vals.map(() => Number.NaN as Scalar)) as Series<Scalar>;
+  }
+
+  const min = Math.min(...nums);
+  const max = Math.max(...nums);
+  const span = max - min;
+
+  if (span === 0) {
+    const mid = (rMin + rMax) / 2;
+    const midVals = vals.map((v) => (isNum(v) ? (mid as Scalar) : v));
+    return series.withValues(midVals) as Series<Scalar>;
+  }
+
+  const scaled = vals.map((v) =>
+    isNum(v) ? (((v - min) / span) * (rMax - rMin) + rMin) as Scalar : v,
+  );
+  return series.withValues(scaled) as Series<Scalar>;
+}
+
+// ─── coefficientOfVariation ───────────────────────────────────────────────────
+
+/**
+ * Compute the coefficient of variation (CV) — std / |mean| — as a unitless
+ * measure of relative dispersion.
+ *
+ * NaN / null values are ignored in aggregation.
+ * Returns `NaN` when mean is 0 or fewer than 2 valid values exist.
+ *
+ * @param series  - numeric Series
+ * @param options - {@link CvOptions}
+ * @returns         ratio std / |mean|
+ *
+ * @example
+ * ```ts
+ * coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+ * // ≈ 0.5
+ * ```
+ */
+export function coefficientOfVariation(
+  series: Series<Scalar>,
+  options?: CvOptions,
+): number {
+  const ddof = options?.ddof ?? 1;
+  const vals = series.values as readonly Scalar[];
+  const nums = finiteNums(vals);
+  const n = nums.length;
+
+  if (n < 2) {
+    return Number.NaN;
+  }
+
+  const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+  if (mean === 0) {
+    return Number.NaN;
+  }
+
+  const variance = nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - ddof);
+  const std = Math.sqrt(variance);
+  return std / Math.abs(mean);
+}
+
+// ─── seriesDigitize ───────────────────────────────────────────────────────────
+
+/**
+ * Apply {@link digitize} to a Series, returning a new numeric Series of bin indices.
+ *
+ * @param series - Series of numeric values
+ * @param bins   - strictly increasing bin-edge array
+ * @param right  - if `true`, intervals are open on the left
+ * @returns        new Series of bin indices (integer or NaN for missing values)
+ *
+ * @example
+ * ```ts
+ * seriesDigitize(new Series({ data: [0.5, 1.5, 2.5] }), [1, 2]);
+ * // → Series([-1, 0, 1])
+ * ```
+ */
+export function seriesDigitize(
+  series: Series<Scalar>,
+  bins: readonly number[],
+  right = false,
+): Series<number> {
+  const vals = series.values as readonly (number | null)[];
+  const indices = digitize(vals, bins, right);
+  return new Series<number>({
+    data: indices as number[],
+    index: series.index as import("../core/index.ts").Index<Label>,
+    name: series.name,
+  });
+}
diff --git a/src/stats/string_ops.ts b/src/stats/string_ops.ts
new file mode 100644
index 00000000..19d69e13
--- /dev/null
+++ b/src/stats/string_ops.ts
@@ -0,0 +1,468 @@
+/**
+ * string_ops — standalone string operation functions for Series and arrays.
+ *
+ * Provides string transformation utilities that work on `Series<string>`,
+ * `string[]`, and scalar strings. These complement the `StringAccessor`
+ * class by offering module-level functions that do not require the `.str`
+ * accessor pattern.
+ *
+ * Functions mirror pandas `str` accessor methods that are either missing from
+ * the accessor or better expressed as pure standalone utilities:
+ *
+ * - `strNormalize`  — Unicode normalization (NFC / NFD / NFKC / NFKD)
+ * - `strGetDummies` — split strings by delimiter → one-hot DataFrame
+ * - `strExtractAll` — extract ALL regex matches per element
+ * - `strRemovePrefix` — remove a leading prefix
+ * - `strRemoveSuffix` — remove a trailing suffix
+ * - `strTranslate` — character-level substitution via a mapping
+ * - `strCharWidth` — display width (accounts for CJK full-width characters)
+ * - `strByteLength` — UTF-8 encoded byte length
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Unicode normalization form. */
+export type NormalizeForm = "NFC" | "NFD" | "NFKC" | "NFKD";
+
+/** Input accepted by all string-op functions. */
+export type StrInput = Series<Scalar> | readonly string[] | string;
+
+/** Options for {@link strGetDummies}. */
+export interface GetDummiesOptions {
+  /**
+   * The delimiter used to split each element into tokens.
+   * @default "|"
+   */
+  readonly sep?: string;
+
+  /**
+   * Prefix prepended to every column name in the output DataFrame.
+   * @default ""
+   */
+  readonly prefix?: string;
+
+  /**
+   * Separator between the prefix and the token name.
+   * @default "_"
+   */
+  readonly prefixSep?: string;
+}
+
+/** Options for {@link strExtractAll}. */
+export interface ExtractAllOptions {
+  /**
+   * RegExp flags used when `pat` is supplied as a plain string.
+   * The `g` flag is always added internally — you do not need to include it.
+   * @default ""
+   */
+  readonly flags?: string;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Extract a plain string from a Scalar value; returns `""` for non-strings. */
+function scalarToStr(v: Scalar): string {
+  if (typeof v === "string") return v;
+  if (v === null || v === undefined) return "";
+  return String(v);
+}
+
+/**
+ * Normalise the input to a `string[]`.
+ * Scalars are wrapped in a single-element array.
+ */
+function toStringArray(input: StrInput): string[] {
+  if (typeof input === "string") return [input];
+  if (input instanceof Series) {
+    return input.values.map(scalarToStr);
+  }
+  return input.map(scalarToStr);
+}
+
+/**
+ * Build an output `Series<Scalar>` whose index mirrors the input.
+ * - `Series`  → copy the input index
+ * - `string[]` → default `RangeIndex`
+ * - `string`  → default `RangeIndex` of length 1
+ */
+function buildSeries(data: Scalar[], input: StrInput): Series<Scalar> {
+  if (input instanceof Series) {
+    return new Series({ data, index: input.index });
+  }
+  return new Series({ data });
+}
+
+// ─── strNormalize ─────────────────────────────────────────────────────────────
+
+/**
+ * Apply Unicode normalization to every element.
+ *
+ * Mirrors `pandas.Series.str.normalize(form)`.
+ *
+ * @param input - Input data (Series, string array, or scalar string).
+ * @param form  - One of `"NFC"` (default), `"NFD"`, `"NFKC"`, or `"NFKD"`.
+ * @returns A new `Series<Scalar>` (or scalar string) with normalised values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["\u00e9", "caf\u0065\u0301"] });
+ * strNormalize(s, "NFC");
+ * // Series ["é", "café"]  (both now NFC)
+ * ```
+ */
+export function strNormalize(input: string, form?: NormalizeForm): string;
+export function strNormalize(
+  input: readonly string[] | Series<Scalar>,
+  form?: NormalizeForm,
+): Series<Scalar>;
+export function strNormalize(
+  input: StrInput,
+  form: NormalizeForm = "NFC",
+): Series<Scalar> | string {
+  if (typeof input === "string") return input.normalize(form);
+  const strs = toStringArray(input);
+  const data: Scalar[] = strs.map((s) => s.normalize(form));
+  return buildSeries(data, input);
+}
+
+// ─── strGetDummies ────────────────────────────────────────────────────────────
+
+/**
+ * Encode each string element as a row in a one-hot DataFrame by splitting on a
+ * delimiter.
+ *
+ * Mirrors `pandas.Series.str.get_dummies(sep)`.
+ *
+ * @param input   - Series or string array.
+ * @param options - `sep` (default `"|"`), `prefix` and `prefixSep` for column names.
+ * @returns A `DataFrame` of 0/1 integer values, one column per unique token.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["a|b", "b|c", "a"] });
+ * strGetDummies(s);
+ * // DataFrame
+ * //    a  b  c
+ * // 0  1  1  0
+ * // 1  0  1  1
+ * // 2  1  0  0
+ * ```
+ */
+export function strGetDummies(
+  input: readonly string[] | Series<Scalar>,
+  options: GetDummiesOptions = {},
+): DataFrame {
+  const sep = options.sep ?? "|";
+  const prefix = options.prefix ?? "";
+  const prefixSep = options.prefixSep ?? "_";
+
+  const strs = toStringArray(input);
+
+  // 1. Collect all unique tokens in first-seen order.
+  const seen = new Set<string>();
+  const tokenRows: string[][] = strs.map((s) => {
+    const tokens = s === "" ? [] : s.split(sep);
+    tokens.forEach((t) => seen.add(t));
+    return tokens;
+  });
+
+  const allTokens = [...seen].sort(); // stable alphabetical order
+
+  // 2. Build column name with optional prefix.
+  const colName = (token: string): string =>
+    prefix === "" ? token : `${prefix}${prefixSep}${token}`;
+
+  // 3. Build one Scalar[] per column.
+  const columns: Record<string, Scalar[]> = {};
+  for (const token of allTokens) {
+    const name = colName(token);
+    columns[name] = tokenRows.map((row) => (row.includes(token) ? 1 : 0));
+  }
+
+  // 4. Preserve the row index from a Series input.
+  if (input instanceof Series) {
+    const rowIndex = input.index;
+    return DataFrame.fromColumns(columns, { index: rowIndex });
+  }
+  return DataFrame.fromColumns(columns);
+}
+
+// ─── strExtractAll ────────────────────────────────────────────────────────────
+
+/**
+ * Extract ALL non-overlapping regex matches from every element.
+ *
+ * Each element maps to an array of match arrays (one inner array per match;
+ * each inner array contains the full match and any capture groups).
+ *
+ * Mirrors `pandas.Series.str.extractall(pat)`, but returns a
+ * `Series<Scalar[][]>` rather than a multi-indexed DataFrame to avoid
+ * the overhead of MultiIndex construction.
+ *
+ * @param input   - Series or string array.
+ * @param pat     - Regular expression (string or `RegExp`).
+ * @param options - Optional flags when `pat` is a string.
+ * @returns A `Series` whose values are `string[][]` (an array of match arrays).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["abc 123", "foo 456 bar 789"] });
+ * strExtractAll(s, /(\d+)/);
+ * // Series [
+ * //   [["123", "123"]],
+ * //   [["456", "456"], ["789", "789"]],
+ * // ]
+ * ```
+ */
+export function strExtractAll(
+  input: readonly string[] | Series<Scalar>,
+  pat: string | RegExp,
+  options: ExtractAllOptions = {},
+): Series<Scalar> {
+  const strs = toStringArray(input);
+  const flags =
+    pat instanceof RegExp
+      ? pat.flags.includes("g")
+        ? pat.flags
+        : `${pat.flags}g`
+      : `${options.flags ?? ""}g`;
+  const source = pat instanceof RegExp ? pat.source : pat;
+  const re = new RegExp(source, flags);
+
+  const data: Scalar[] = strs.map((s) => {
+    const matches: string[][] = [];
+    let m: RegExpExecArray | null;
+    re.lastIndex = 0;
+    while ((m = re.exec(s)) !== null) {
+      matches.push([...m]);
+      if (!re.global) break;
+    }
+    // Store as JSON string so it fits in Scalar; consumers can JSON.parse
+    return JSON.stringify(matches);
+  });
+
+  return buildSeries(data, input);
+}
+
+// ─── strRemovePrefix ──────────────────────────────────────────────────────────
+
+/**
+ * Remove a leading prefix from each element (only if the element starts with it).
+ *
+ * Mirrors Python 3.9+ `str.removeprefix()` and can be used as a pandas
+ * equivalent via `df["col"].str.removeprefix(prefix)`.
+ *
+ * @param input  - Series, string array, or scalar string.
+ * @param prefix - Prefix to remove.
+ * @returns A new Series (or scalar string) with the prefix stripped where present.
+ *
+ * @example
+ * ```ts
+ * strRemovePrefix(["prefix_a", "prefix_b", "other"], "prefix_");
+ * // Series ["a", "b", "other"]
+ * ```
+ */
+export function strRemovePrefix(input: string, prefix: string): string;
+export function strRemovePrefix(
+  input: readonly string[] | Series<Scalar>,
+  prefix: string,
+): Series<Scalar>;
+export function strRemovePrefix(
+  input: StrInput,
+  prefix: string,
+): Series<Scalar> | string {
+  if (typeof input === "string") {
+    return input.startsWith(prefix) ? input.slice(prefix.length) : input;
+  }
+  const strs = toStringArray(input);
+  const data: Scalar[] = strs.map((s) =>
+    s.startsWith(prefix) ? s.slice(prefix.length) : s,
+  );
+  return buildSeries(data, input);
+}
+
+// ─── strRemoveSuffix ──────────────────────────────────────────────────────────
+
+/**
+ * Remove a trailing suffix from each element (only if the element ends with it).
+ *
+ * Mirrors Python 3.9+ `str.removesuffix()`.
+ *
+ * @param input  - Series, string array, or scalar string.
+ * @param suffix - Suffix to remove.
+ * @returns A new Series (or scalar string) with the suffix stripped where present.
+ *
+ * @example
+ * ```ts
+ * strRemoveSuffix(["hello_suffix", "world_suffix", "test"], "_suffix");
+ * // Series ["hello", "world", "test"]
+ * ```
+ */
+export function strRemoveSuffix(input: string, suffix: string): string;
+export function strRemoveSuffix(
+  input: readonly string[] | Series<Scalar>,
+  suffix: string,
+): Series<Scalar>;
+export function strRemoveSuffix(
+  input: StrInput,
+  suffix: string,
+): Series<Scalar> | string {
+  if (typeof input === "string") {
+    return input.endsWith(suffix) ? input.slice(0, input.length - suffix.length) : input;
+  }
+  const strs = toStringArray(input);
+  const data: Scalar[] = strs.map((s) =>
+    s.endsWith(suffix) ? s.slice(0, s.length - suffix.length) : s,
+  );
+  return buildSeries(data, input);
+}
+
+// ─── strTranslate ─────────────────────────────────────────────────────────────
+
+/**
+ * Translate characters in each element according to a mapping.
+ *
+ * Works like Python's `str.translate(table)`, where a `Map<string, string | null>`
+ * maps single characters to their replacements (`null` means delete).
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param table - Map from single source characters to replacement strings or
+ *   `null` (to delete the character).
+ * @returns A new Series (or scalar string) with characters replaced.
+ *
+ * @example
+ * ```ts
+ * const t = new Map([["a", "A"], ["e", null]]);
+ * strTranslate(["cafe", "bale"], t);
+ * // Series ["cAf", "bAl"]
+ * ```
+ */
+export function strTranslate(input: string, table: ReadonlyMap<string, string | null>): string;
+export function strTranslate(
+  input: readonly string[] | Series<Scalar>,
+  table: ReadonlyMap<string, string | null>,
+): Series<Scalar>;
+export function strTranslate(
+  input: StrInput,
+  table: ReadonlyMap<string, string | null>,
+): Series<Scalar> | string {
+  const translate = (s: string): string => {
+    let result = "";
+    for (const ch of s) {
+      if (table.has(ch)) {
+        const repl = table.get(ch);
+        if (repl !== null && repl !== undefined) result += repl;
+        // null → delete: skip
+      } else {
+        result += ch;
+      }
+    }
+    return result;
+  };
+
+  if (typeof input === "string") return translate(input);
+  const strs = toStringArray(input);
+  const data: Scalar[] = strs.map(translate);
+  return buildSeries(data, input);
+}
+
+// ─── strCharWidth ─────────────────────────────────────────────────────────────
+
+/**
+ * Compute the *display width* of each element, counting CJK (Chinese/Japanese/
+ * Korean) and other full-width characters as 2 columns.
+ *
+ * This is useful when formatting text tables that mix ASCII and East-Asian
+ * scripts.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @returns A new `Series<Scalar>` of numbers (or a number for scalar input).
+ *
+ * @example
+ * ```ts
+ * strCharWidth("hello");    // 5
+ * strCharWidth("こんにちは");  // 10
+ * ```
+ */
+export function strCharWidth(input: string): number;
+export function strCharWidth(
+  input: readonly string[] | Series<Scalar>,
+): Series<Scalar>;
+export function strCharWidth(
+  input: StrInput,
+): Series<Scalar> | number {
+  const width = (s: string): number => {
+    let w = 0;
+    for (const ch of s) {
+      const cp = ch.codePointAt(0) ?? 0;
+      // Full-width and CJK ranges (simplified but covers the common cases)
+      if (
+        (cp >= 0x1100 && cp <= 0x115f) || // Hangul Jamo
+        (cp >= 0x2e80 && cp <= 0x303e) || // CJK Radicals, Kangxi
+        (cp >= 0x3041 && cp <= 0x33ff) || // Hiragana, Katakana, CJK
+        (cp >= 0x3400 && cp <= 0x4dbf) || // CJK Extension A
+        (cp >= 0x4e00 && cp <= 0xa4c6) || // CJK Unified + Yi
+        (cp >= 0xa960 && cp <= 0xa97c) || // Hangul Jamo Extended-A
+        (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
+        (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility
+        (cp >= 0xfe10 && cp <= 0xfe19) || // Vertical forms
+        (cp >= 0xfe30 && cp <= 0xfe6b) || // CJK Compatibility Forms
+        (cp >= 0xff01 && cp <= 0xff60) || // Halfwidth/Fullwidth
+        (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Signs
+        (cp >= 0x1b000 && cp <= 0x1b001) || // Kana Supplement
+        (cp >= 0x1f004 && cp <= 0x1f004) || // Mahjong tile
+        (cp >= 0x1f0cf && cp <= 0x1f0cf) || // Playing card
+        (cp >= 0x1f200 && cp <= 0x1f251) || // Enclosed CJK
+        (cp >= 0x20000 && cp <= 0x2fffd) || // CJK Extension B–F
+        (cp >= 0x30000 && cp <= 0x3fffd) // CJK Extension G
+      ) {
+        w += 2;
+      } else {
+        w += 1;
+      }
+    }
+    return w;
+  };
+
+  if (typeof input === "string") return width(input);
+  const strs = toStringArray(input);
+  const data: Scalar[] = strs.map((s) => width(s));
+  return buildSeries(data, input);
+}
+
+// ─── strByteLength ────────────────────────────────────────────────────────────
+
+/**
+ * Compute the UTF-8 encoded byte length of each element.
+ *
+ * Useful when working with byte-limited APIs (HTTP headers, database columns)
+ * where the character count alone is insufficient.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @returns A new `Series<Scalar>` of numbers (or a number for scalar input).
+ *
+ * @example
+ * ```ts
+ * strByteLength("hello");    // 5
+ * strByteLength("こんにちは");  // 15   (3 bytes per character)
+ * ```
+ */
+export function strByteLength(input: string): number;
+export function strByteLength(
+  input: readonly string[] | Series<Scalar>,
+): Series<Scalar>;
+export function strByteLength(
+  input: StrInput,
+): Series<Scalar> | number {
+  const byteLen = (s: string): number => new TextEncoder().encode(s).length;
+
+  if (typeof input === "string") return byteLen(input);
+  const strs = toStringArray(input);
+  const data: Scalar[] = strs.map((s) => byteLen(s));
+  return buildSeries(data, input);
+}
diff --git a/src/stats/string_ops_extended.ts b/src/stats/string_ops_extended.ts
new file mode 100644
index 00000000..ed6e2a42
--- /dev/null
+++ b/src/stats/string_ops_extended.ts
@@ -0,0 +1,429 @@
+/**
+ * string_ops_extended — extended standalone string operations.
+ *
+ * Provides advanced string manipulation utilities that complement
+ * `string_ops.ts` and `StringAccessor`:
+ *
+ * - `strSplitExpand`   — split strings by delimiter and expand each part into
+ *                        a DataFrame column (mirrors `str.split(expand=True)`)
+ * - `strExtractGroups` — extract regex capture groups into a DataFrame
+ *                        (mirrors `str.extract` with capture groups)
+ * - `strPartition`     — split at first occurrence of sep → (before, sep, after)
+ * - `strRPartition`    — split at last occurrence of sep → (before, sep, after)
+ * - `strMultiReplace`  — apply multiple find/replace pairs in sequence
+ * - `strIndent`        — prefix every (non-empty) line with a string
+ * - `strDedent`        — remove common leading whitespace from all lines
+ *
+ * @module
+ */
+
+import { DataFrame, Index, RangeIndex, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+import type { StrInput } from "./string_ops.ts";
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+function toStrOrNull(v: Scalar): string | null {
+  if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+    return null;
+  }
+  return String(v);
+}
+
+function toValues(input: readonly Scalar[] | Series<Scalar>): readonly Scalar[] {
+  return input instanceof Series ? input.values : input;
+}
+
+function rowIndex(input: readonly Scalar[] | Series<Scalar>): Index<Label> {
+  return input instanceof Series ? input.index : new RangeIndex(toValues(input).length);
+}
+
+function escapeRegex(s: string): string {
+  return s.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
+}
+
+// ─── strSplitExpand ───────────────────────────────────────────────────────────
+
+/** Options for {@link strSplitExpand}. */
+export interface SplitExpandOptions {
+  /**
+   * Maximum number of splits to perform. `-1` means unlimited.
+   * @default -1
+   */
+  readonly n?: number;
+}
+
+/** Split a scalar string into an array of parts. */
+export function strSplitExpand(
+  input: string,
+  sep?: string | RegExp,
+  options?: SplitExpandOptions,
+): string[];
+/** Split each element and expand the parts into a DataFrame (one column per part). */
+export function strSplitExpand(
+  input: readonly Scalar[] | Series<Scalar>,
+  sep?: string | RegExp,
+  options?: SplitExpandOptions,
+): DataFrame;
+/** @internal */
+export function strSplitExpand(
+  input: StrInput,
+  sep: string | RegExp = " ",
+  options: SplitExpandOptions = {},
+): string[] | DataFrame {
+  const maxSplits = options.n ?? -1;
+
+  function splitOne(s: string | null): (string | null)[] {
+    if (s === null) return [null];
+    if (maxSplits < 0) {
+      // unlimited splits
+      const pat = sep instanceof RegExp ? sep : new RegExp(escapeRegex(sep));
+      return s.split(pat);
+    }
+    // limited splits: extract up to maxSplits separators
+    const parts: string[] = [];
+    let rest = s;
+    for (let i = 0; i < maxSplits; i++) {
+      let idx: number;
+      let sepLen: number;
+      if (typeof sep === "string") {
+        idx = rest.indexOf(sep);
+        sepLen = sep.length;
+      } else {
+        const m = rest.match(sep);
+        if (m === null || m.index === undefined) break;
+        idx = m.index;
+        sepLen = m[0]?.length ?? 0;
+      }
+      if (idx === -1) break;
+      parts.push(rest.slice(0, idx));
+      rest = rest.slice(idx + sepLen);
+    }
+    parts.push(rest);
+    return parts;
+  }
+
+  if (typeof input === "string") {
+    return splitOne(input) as string[];
+  }
+
+  const vals = toValues(input);
+  const rows: (string | null)[][] = vals.map((v) => splitOne(toStrOrNull(v)));
+
+  // determine column width (maximum number of parts in any row)
+  const width = rows.reduce((m, r) => Math.max(m, r.length), 0);
+
+  const columns: Record<string, Scalar[]> = {};
+  for (let c = 0; c < width; c++) {
+    const col: Scalar[] = rows.map((r) => {
+      const cell = r[c];
+      return cell !== undefined ? cell : null;
+    });
+    columns[String(c)] = col;
+  }
+
+  return DataFrame.fromColumns(columns, { index: rowIndex(input) });
+}
+
+// ─── strExtractGroups ─────────────────────────────────────────────────────────
+
+/** Options for {@link strExtractGroups}. */
+export interface ExtractGroupsOptions {
+  /** Additional regex flags to merge with any flags already on a RegExp pattern. */
+  readonly flags?: string;
+}
+
+/**
+ * Extract regex capture groups from each element into a DataFrame.
+ *
+ * One column is created per capture group. Named groups (`(?<name>...)`)
+ * produce named columns; unnamed groups produce `"0"`, `"1"`, … columns.
+ *
+ * Non-matching elements produce a row of `null` values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["2024-01-15", "2025-12-31"] });
+ * const df = strExtractGroups(s, /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/);
+ * // DataFrame with columns: year, month, day
+ * ```
+ */
+export function strExtractGroups(
+  input: readonly Scalar[] | Series<Scalar>,
+  pat: string | RegExp,
+  options?: ExtractGroupsOptions,
+): DataFrame {
+  const flags = options?.flags ?? "";
+  const re =
+    pat instanceof RegExp
+      ? flags === ""
+        ? pat
+        : new RegExp(pat.source, pat.flags + flags)
+      : new RegExp(pat, flags);
+
+  const groupNames = extractGroupNames(re);
+  const vals = toValues(input);
+
+  const rows: (string | null)[][] = vals.map((v) => {
+    const s = toStrOrNull(v);
+    if (s === null) return [];
+    const m = re.exec(s);
+    if (m === null) return [];
+    return Array.from({ length: m.length - 1 }, (_, i) => {
+      const captured = m[i + 1];
+      return captured !== undefined ? captured : null;
+    });
+  });
+
+  const width = rows.reduce((w, r) => Math.max(w, r.length), 0);
+
+  // Use named groups if available and count matches; otherwise use 0-indexed strings.
+  const colNames: string[] =
+    groupNames.length === width && width > 0
+      ? groupNames
+      : Array.from({ length: width }, (_, i) => String(i));
+
+  const columns: Record<string, Scalar[]> = {};
+  for (let c = 0; c < width; c++) {
+    const name = colNames[c] ?? String(c);
+    const col: Scalar[] = rows.map((r) => {
+      const cell = r[c];
+      return cell !== undefined ? cell : null;
+    });
+    columns[name] = col;
+  }
+
+  return DataFrame.fromColumns(columns, { index: rowIndex(input) });
+}
+
+/** Parse named capture group names from a regex source string. */
+function extractGroupNames(re: RegExp): string[] {
+  const namedGroupPattern = /\(\?<([^>]+)>/g;
+  const names: string[] = [];
+  let m: RegExpExecArray | null;
+  while ((m = namedGroupPattern.exec(re.source)) !== null) {
+    const name = m[1];
+    if (name !== undefined) names.push(name);
+  }
+  return names;
+}
+
+// ─── strPartition ─────────────────────────────────────────────────────────────
+
+/**
+ * Result of {@link strPartition} / {@link strRPartition} on a scalar input:
+ * a 3-tuple `[before, separator, after]`.
+ */
+export type PartitionResult = [string, string, string];
+
+/** Partition a scalar string at the first occurrence of `sep`. */
+export function strPartition(input: string, sep: string): PartitionResult;
+/** Partition each element and expand to a DataFrame with columns `"0"`, `"1"`, `"2"`. */
+export function strPartition(
+  input: readonly Scalar[] | Series<Scalar>,
+  sep: string,
+): DataFrame;
+/** @internal */
+export function strPartition(
+  input: StrInput,
+  sep: string,
+): PartitionResult | DataFrame {
+  function partitionOne(s: string | null): [string | null, string | null, string | null] {
+    if (s === null) return [null, null, null];
+    const idx = s.indexOf(sep);
+    if (idx === -1) return [s, "", ""];
+    return [s.slice(0, idx), sep, s.slice(idx + sep.length)];
+  }
+
+  if (typeof input === "string") {
+    return partitionOne(input) as PartitionResult;
+  }
+
+  const vals = toValues(input);
+  const rows = vals.map((v) => partitionOne(toStrOrNull(v)));
+
+  return DataFrame.fromColumns(
+    {
+      "0": rows.map((r) => r[0]),
+      "1": rows.map((r) => r[1]),
+      "2": rows.map((r) => r[2]),
+    },
+    { index: rowIndex(input) },
+  );
+}
+
+// ─── strRPartition ────────────────────────────────────────────────────────────
+
+/** Partition a scalar string at the LAST occurrence of `sep`. */
+export function strRPartition(input: string, sep: string): PartitionResult;
+/** Partition each element at the last occurrence and expand to a DataFrame. */
+export function strRPartition(
+  input: readonly Scalar[] | Series<Scalar>,
+  sep: string,
+): DataFrame;
+/** @internal */
+export function strRPartition(
+  input: StrInput,
+  sep: string,
+): PartitionResult | DataFrame {
+  function rpartitionOne(s: string | null): [string | null, string | null, string | null] {
+    if (s === null) return [null, null, null];
+    const idx = s.lastIndexOf(sep);
+    if (idx === -1) return ["", "", s];
+    return [s.slice(0, idx), sep, s.slice(idx + sep.length)];
+  }
+
+  if (typeof input === "string") {
+    return rpartitionOne(input) as PartitionResult;
+  }
+
+  const vals = toValues(input);
+  const rows = vals.map((v) => rpartitionOne(toStrOrNull(v)));
+
+  return DataFrame.fromColumns(
+    {
+      "0": rows.map((r) => r[0]),
+      "1": rows.map((r) => r[1]),
+      "2": rows.map((r) => r[2]),
+    },
+    { index: rowIndex(input) },
+  );
+}
+
+// ─── strMultiReplace ──────────────────────────────────────────────────────────
+
+/** A single find/replace pair for {@link strMultiReplace}. */
+export interface ReplacePair {
+  /** Pattern to search for (string literal or regular expression). */
+  readonly pat: string | RegExp;
+  /** Replacement string (may use `$1`, `$2`, … back-references for RegExp patterns). */
+  readonly repl: string;
+}
+
+/** Apply a sequence of find/replace pairs to a scalar string. */
+export function strMultiReplace(input: string, replacements: readonly ReplacePair[]): string;
+/** Apply a sequence of find/replace pairs to each element of a Series or array. */
+export function strMultiReplace(
+  input: readonly Scalar[] | Series<Scalar>,
+  replacements: readonly ReplacePair[],
+): Series<Scalar>;
+/** @internal */
+export function strMultiReplace(
+  input: StrInput,
+  replacements: readonly ReplacePair[],
+): string | Series<Scalar> {
+  function applyAll(s: string | null): string | null {
+    if (s === null) return null;
+    let result = s;
+    for (const { pat, repl } of replacements) {
+      result = result.replace(pat instanceof RegExp ? pat : new RegExp(escapeRegex(pat), "g"), repl);
+    }
+    return result;
+  }
+
+  if (typeof input === "string") {
+    return applyAll(input) ?? "";
+  }
+
+  const vals = toValues(input);
+  const out: Scalar[] = vals.map((v) => applyAll(toStrOrNull(v)));
+  if (input instanceof Series) {
+    return input.withValues(out);
+  }
+  return new Series({ data: out });
+}
+
+// ─── strIndent ────────────────────────────────────────────────────────────────
+
+/** Options for {@link strIndent}. */
+export interface IndentOptions {
+  /**
+   * Predicate to decide which lines get the prefix.
+   * Defaults to all non-empty lines (lines that are not solely whitespace).
+   */
+  readonly predicate?: (line: string) => boolean;
+}
+
+/** Prefix every (non-empty) line in a scalar string. */
+export function strIndent(input: string, prefix: string, options?: IndentOptions): string;
+/** Prefix every (non-empty) line in each element of a Series or array. */
+export function strIndent(
+  input: readonly Scalar[] | Series<Scalar>,
+  prefix: string,
+  options?: IndentOptions,
+): Series<Scalar>;
+/** @internal */
+export function strIndent(
+  input: StrInput,
+  prefix: string,
+  options: IndentOptions = {},
+): string | Series<Scalar> {
+  const predicate = options.predicate ?? ((line: string) => line.trim().length > 0);
+
+  function indentOne(s: string | null): string | null {
+    if (s === null) return null;
+    return s
+      .split("\n")
+      .map((line) => (predicate(line) ? prefix + line : line))
+      .join("\n");
+  }
+
+  if (typeof input === "string") {
+    return indentOne(input) ?? "";
+  }
+
+  const vals = toValues(input);
+  const out: Scalar[] = vals.map((v) => indentOne(toStrOrNull(v)));
+  if (input instanceof Series) {
+    return input.withValues(out);
+  }
+  return new Series({ data: out });
+}
+
+// ─── strDedent ────────────────────────────────────────────────────────────────
+
+/**
+ * Remove the common leading whitespace from every line of a string.
+ *
+ * Mirrors `textwrap.dedent` from Python's standard library.
+ * Lines that are entirely whitespace are not used to compute the common
+ * prefix but are still included in the output (trimmed to empty).
+ *
+ * @example
+ * ```ts
+ * strDedent("    hello\n    world") // "hello\nworld"
+ * strDedent("  a\n    b")          // "a\n  b"
+ * ```
+ */
+export function strDedent(input: string): string;
+/** Remove common leading whitespace from each element of a Series or array. */
+export function strDedent(input: readonly Scalar[] | Series<Scalar>): Series<Scalar>;
+/** @internal */
+export function strDedent(input: StrInput): string | Series<Scalar> {
+  function dedentOne(s: string | null): string | null {
+    if (s === null) return null;
+    const lines = s.split("\n");
+    // find the minimum leading-whitespace length among non-whitespace-only lines
+    let minIndent = Infinity;
+    for (const line of lines) {
+      if (line.trim().length === 0) continue;
+      const leading = line.length - line.trimStart().length;
+      if (leading < minIndent) minIndent = leading;
+    }
+    if (minIndent === Infinity || minIndent === 0) return s;
+    return lines
+      .map((line) => (line.trim().length === 0 ? "" : line.slice(minIndent)))
+      .join("\n");
+  }
+
+  if (typeof input === "string") {
+    return dedentOne(input) ?? "";
+  }
+
+  const vals = toValues(input);
+  const out: Scalar[] = vals.map((v) => dedentOne(toStrOrNull(v)));
+  if (input instanceof Series) {
+    return input.withValues(out);
+  }
+  return new Series({ data: out });
+}
diff --git a/src/stats/where_mask.ts b/src/stats/where_mask.ts
new file mode 100644
index 00000000..d6921cd9
--- /dev/null
+++ b/src/stats/where_mask.ts
@@ -0,0 +1,289 @@
+/**
+ * where_mask — element-wise conditional selection for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.where(cond, other)` — keep values where `cond` is truthy, replace with `other` elsewhere
+ * - `Series.mask(cond, other)` — inverse of `where`; keep where `cond` is falsy
+ * - `DataFrame.where(cond, other)` — element-wise `where` for DataFrames
+ * - `DataFrame.mask(cond, other)` — element-wise `mask` for DataFrames
+ *
+ * The `cond` parameter accepts:
+ * - A boolean array (aligned positionally to the series/column values)
+ * - A boolean `Series<boolean>` (aligned by label to the target series)
+ * - A callable `(s: Series<Scalar>) => boolean[] | Series<boolean>` for Series ops
+ * - A boolean `DataFrame` (aligned by label) for DataFrame ops
+ * - A callable `(df: DataFrame) => DataFrame` returning a boolean DataFrame for DataFrame ops
+ *
+ * All functions are **pure** — inputs are never mutated.
+ * Missing values in `cond` are treated as `false` (i.e. the position is replaced).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * A boolean condition for a Series operation.
+ *
+ * - `readonly boolean[]` — positional mask (must match series length)
+ * - `Series<boolean>` — label-aligned boolean series
+ * - `(s: Series<Scalar>) => readonly boolean[] | Series<boolean>` — callable
+ */
+export type SeriesCond =
+  | readonly boolean[]
+  | Series<boolean>
+  | ((s: Series<Scalar>) => readonly boolean[] | Series<boolean>);
+
+/**
+ * A boolean condition for a DataFrame operation.
+ *
+ * - `DataFrame` — label-aligned boolean DataFrame
+ * - `(df: DataFrame) => DataFrame` — callable returning a boolean DataFrame
+ */
+export type DataFrameCond = DataFrame | ((df: DataFrame) => DataFrame);
+
+/** Options for {@link seriesWhere} and {@link seriesMask}. */
+export interface SeriesWhereOptions {
+  /**
+   * Replacement value for positions where the condition is not satisfied.
+   * Defaults to `null` (pandas uses `NaN` for numeric; we use `null` as the
+   * universal missing sentinel in tsb).
+   */
+  readonly other?: Scalar;
+}
+
+/** Options for {@link dataFrameWhere} and {@link dataFrameMask}. */
+export interface DataFrameWhereOptions {
+  /**
+   * Replacement value for positions where the condition is not satisfied.
+   * Defaults to `null`.
+   */
+  readonly other?: Scalar;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Resolve a {@link SeriesCond} to a positional boolean array aligned to
+ * `series`.
+ *
+ * For a label-aligned `Series<boolean>`, labels that are absent in the target
+ * series are treated as `false`.
+ */
+function resolveSeriesCond(
+  series: Series<Scalar>,
+  cond: SeriesCond,
+): readonly boolean[] {
+  if (typeof cond === "function") {
+    const resolved = cond(series);
+    return resolveSeriesCond(series, resolved);
+  }
+
+  if (Array.isArray(cond)) {
+    return cond as readonly boolean[];
+  }
+
+  // Series<boolean> — align by label
+  const boolSeries = cond as Series<boolean>;
+  const labels = series.index.values as readonly Label[];
+  return labels.map((label) => {
+    const pos = boolSeries.index.values.indexOf(label);
+    if (pos === -1) return false;
+    const v = boolSeries.values[pos];
+    return v === true;
+  });
+}
+
+/**
+ * Apply a positional boolean mask to `series`.
+ *
+ * @param series - source series
+ * @param mask - `true` → keep original, `false` → use `other`
+ * @param other - replacement value (default `null`)
+ */
+function applyMaskToSeries(
+  series: Series<Scalar>,
+  mask: readonly boolean[],
+  other: Scalar,
+): Series<Scalar> {
+  const result: Scalar[] = series.values.map((v, i) => (mask[i] === true ? v : other));
+  return new Series<Scalar>({ data: result, index: series.index, name: series.name });
+}
+
+// ─── Series operations ────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series keeping values where `cond` is truthy, replacing all
+ * other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.Series.where(cond, other)`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesWhere(s, [true, false, true, false, true]);
+ * // Series [1, null, 3, null, 5]
+ *
+ * seriesWhere(s, (x) => x.values.map((v) => (v as number) > 2), { other: 0 });
+ * // Series [0, 0, 3, 4, 5]
+ * ```
+ */
+export function seriesWhere(
+  series: Series<Scalar>,
+  cond: SeriesCond,
+  options: SeriesWhereOptions = {},
+): Series<Scalar> {
+  const other: Scalar = options.other !== undefined ? options.other : null;
+  const mask = resolveSeriesCond(series, cond);
+  return applyMaskToSeries(series, mask, other);
+}
+
+/**
+ * Return a new Series keeping values where `cond` is **falsy**, replacing all
+ * other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.Series.mask(cond, other)`.
+ *
+ * `mask` is the exact inverse of `where`:
+ * `seriesMask(s, cond) === seriesWhere(s, inverted_cond)`
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesMask(s, [true, false, true, false, true]);
+ * // Series [null, 2, null, 4, null]
+ *
+ * seriesMask(s, (x) => x.values.map((v) => (v as number) > 2), { other: -1 });
+ * // Series [1, 2, -1, -1, -1]
+ * ```
+ */
+export function seriesMask(
+  series: Series<Scalar>,
+  cond: SeriesCond,
+  options: SeriesWhereOptions = {},
+): Series<Scalar> {
+  const other: Scalar = options.other !== undefined ? options.other : null;
+  const mask = resolveSeriesCond(series, cond);
+  // Invert: keep where cond is FALSE
+  const inverted = mask.map((b) => !b);
+  return applyMaskToSeries(series, inverted, other);
+}
+
+// ─── DataFrame operations ─────────────────────────────────────────────────────
+
+/**
+ * Resolve a {@link DataFrameCond} to a per-column positional boolean array map.
+ *
+ * For a label-aligned boolean `DataFrame`, missing column/row labels are treated
+ * as `false`.
+ */
+function resolveDataFrameCond(
+  df: DataFrame,
+  cond: DataFrameCond,
+): Map<string, readonly boolean[]> {
+  const condDf: DataFrame = typeof cond === "function" ? cond(df) : cond;
+
+  const result = new Map<string, readonly boolean[]>();
+  const rowLabels = df.index.values as readonly Label[];
+
+  for (const colName of df.columns.values) {
+    const condColIdx = condDf.columns.indexOf(colName);
+    if (condColIdx === -1) {
+      // Column absent from condition → treat entire column as false
+      result.set(colName, rowLabels.map(() => false));
+      continue;
+    }
+
+    const condCol = condDf.col(colName);
+    const rowMask: boolean[] = rowLabels.map((label) => {
+      const rowPos = condDf.index.values.indexOf(label);
+      if (rowPos === -1) return false;
+      return condCol.values[rowPos] === true;
+    });
+    result.set(colName, rowMask);
+  }
+  return result;
+}
+
+/**
+ * Return a new DataFrame keeping values where the element-wise `cond` is
+ * truthy, replacing all other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.DataFrame.where(cond, other)`.
+ *
+ * `cond` may be:
+ * - A `DataFrame` of booleans (label-aligned)
+ * - A callable `(df: DataFrame) => DataFrame` that returns a boolean DataFrame
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * const mask = DataFrame.fromColumns({ a: [true, false, true], b: [false, true, false] });
+ * dataFrameWhere(df, mask);
+ * // DataFrame { a: [1, null, 3], b: [null, 5, null] }
+ *
+ * dataFrameWhere(df, (d) =>
+ *   DataFrame.fromColumns(
+ *     Object.fromEntries(d.columns.map((c) => [c, d.col(c as string).values.map((v) => (v as number) > 2)]))
+ *   )
+ * );
+ * // DataFrame { a: [null, null, 3], b: [4, 5, 6] }
+ * ```
+ */
+export function dataFrameWhere(
+  df: DataFrame,
+  cond: DataFrameCond,
+  options: DataFrameWhereOptions = {},
+): DataFrame {
+  const other: Scalar = options.other !== undefined ? options.other : null;
+  const condMap = resolveDataFrameCond(df, cond);
+
+  const resultCols: Record<string, Scalar[]> = {};
+  for (const colName of df.columns.values) {
+    const srcCol = df.col(colName);
+    const mask = condMap.get(colName) ?? srcCol.values.map(() => false);
+    resultCols[colName] = srcCol.values.map((v, i) => (mask[i] === true ? v : other));
+  }
+
+  return DataFrame.fromColumns(resultCols, { index: df.index });
+}
+
+/**
+ * Return a new DataFrame keeping values where the element-wise `cond` is
+ * **falsy**, replacing all other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.DataFrame.mask(cond, other)`.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * dataFrameMask(df, (d) =>
+ *   DataFrame.fromColumns(
+ *     Object.fromEntries(d.columns.values.map((c) => [c, d.col(c).values.map((v) => (v as number) > 2)]))
+ *   )
+ * );
+ * // DataFrame { a: [1, 2, null], b: [null, null, null] }
+ * ```
+ */
+export function dataFrameMask(
+  df: DataFrame,
+  cond: DataFrameCond,
+  options: DataFrameWhereOptions = {},
+): DataFrame {
+  const other: Scalar = options.other !== undefined ? options.other : null;
+  const condMap = resolveDataFrameCond(df, cond);
+
+  const resultCols: Record<string, Scalar[]> = {};
+  for (const colName of df.columns.values) {
+    const srcCol = df.col(colName);
+    const mask = condMap.get(colName) ?? srcCol.values.map(() => false);
+    // Invert: keep where cond is FALSE
+    resultCols[colName] = srcCol.values.map((v, i) => (mask[i] !== true ? v : other));
+  }
+
+  return DataFrame.fromColumns(resultCols, { index: df.index });
+}
diff --git a/src/stats/window_extended.ts b/src/stats/window_extended.ts
new file mode 100644
index 00000000..3811122d
--- /dev/null
+++ b/src/stats/window_extended.ts
@@ -0,0 +1,321 @@
+/**
+ * window_extended — additional rolling-window aggregations for Series.
+ *
+ * Extends the core `Rolling` aggregations with higher-order statistics that
+ * mirror pandas methods:
+ * - `Series.rolling(w).sem()` → {@link rollingSem}
+ * - `Series.rolling(w).skew()` → {@link rollingSkew}
+ * - `Series.rolling(w).kurt()` → {@link rollingKurt}
+ * - `Series.rolling(w).quantile(q)` → {@link rollingQuantile}
+ *
+ * All functions are **pure** (return new Series; inputs are unchanged).
+ * Missing values (null / NaN) are excluded from each window calculation.
+ * A `null` result is produced whenever the window has fewer than `minPeriods`
+ * valid observations (or fewer than the function's minimum required count).
+ *
+ * @module
+ */
+
+import type { Index } from "../core/base-index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options shared by all rolling-window functions in this module. */
+export interface WindowExtOptions {
+  /**
+   * Minimum number of valid (non-null / non-NaN) observations required in the
+   * window to produce a non-null result.
+   *
+   * Defaults to the `window` size (matching pandas behaviour).
+   */
+  readonly minPeriods?: number;
+  /**
+   * Whether to centre the window around each position.
+   * When `false` (default) the window is trailing (right-aligned).
+   */
+  readonly center?: boolean;
+}
+
+/** Options for {@link rollingQuantile}. */
+export interface RollingQuantileOptions extends WindowExtOptions {
+  /**
+   * Interpolation method when the desired quantile falls between two values.
+   * - `"linear"` (default): pandas default — linear interpolation.
+   * - `"lower"`: take the lower of the two surrounding values.
+   * - `"higher"`: take the higher of the two surrounding values.
+   * - `"midpoint"`: arithmetic mean of the two surrounding values.
+   * - `"nearest"`: whichever value is closest (lower on tie).
+   */
+  readonly interpolation?: "linear" | "lower" | "higher" | "midpoint" | "nearest";
+}
+
+// ─── minimal Series interface (mirrors RollingSeriesLike) ─────────────────────
+
+/** Minimal interface the real `Series<Scalar>` class satisfies. */
+interface SeriesLike {
+  readonly values: readonly Scalar[];
+  readonly index: Index<Label>;
+  readonly name: string | null;
+  withValues(data: readonly Scalar[], name?: string | null): SeriesLike;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+  return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+function validNums(slice: readonly Scalar[]): number[] {
+  const out: number[] = [];
+  for (const v of slice) {
+    if (!isMissing(v) && typeof v === "number") {
+      out.push(v);
+    }
+  }
+  return out;
+}
+
+function trailingBounds(i: number, window: number, n: number): [number, number] {
+  return [Math.max(0, i - window + 1), Math.min(n, i + 1)];
+}
+
+function centeredBounds(i: number, window: number, n: number): [number, number] {
+  const half = Math.floor((window - 1) / 2);
+  return [Math.max(0, i - half), Math.min(n, i + (window - half))];
+}
+
+function windowBounds(i: number, window: number, n: number, center: boolean): [number, number] {
+  return center ? centeredBounds(i, window, n) : trailingBounds(i, window, n);
+}
+
+function numMean(nums: readonly number[]): number {
+  return nums.reduce((s, v) => s + v, 0) / nums.length;
+}
+
+function numVar(nums: readonly number[], ddof: number): number {
+  if (nums.length - ddof <= 0) {
+    return Number.NaN;
+  }
+  const m = numMean(nums);
+  return nums.reduce((s, v) => s + (v - m) ** 2, 0) / (nums.length - ddof);
+}
+
+function numStd(nums: readonly number[], ddof: number): number {
+  return Math.sqrt(numVar(nums, ddof));
+}
+
+/** Apply an aggregation over each window, returning a new Series. */
+function applyWindow(
+  series: SeriesLike,
+  window: number,
+  opts: WindowExtOptions,
+  minN: number,
+  agg: (nums: number[], n: number) => Scalar,
+): SeriesLike {
+  const { values, index, name } = series;
+  const n = values.length;
+  const minPeriods = opts.minPeriods ?? window;
+  const effectiveMin = Math.max(minN, minPeriods);
+  const center = opts.center ?? false;
+  const out: Scalar[] = new Array<Scalar>(n).fill(null);
+
+  for (let i = 0; i < n; i++) {
+    const [lo, hi] = windowBounds(i, window, n, center);
+    const nums = validNums(values.slice(lo, hi));
+    if (nums.length < effectiveMin) {
+      continue;
+    }
+    out[i] = agg(nums, nums.length);
+  }
+
+  return series.withValues(out, name);
+}
+
+// ─── Rolling SEM ──────────────────────────────────────────────────────────────
+
+/**
+ * Rolling standard error of the mean.
+ *
+ * `sem = std(ddof=1) / sqrt(n)` where `n` is the number of valid observations
+ * in the window.  Requires at least 2 valid values (else `null`).
+ *
+ * Mirrors `pandas.Series.rolling(window).sem()`.
+ *
+ * @param series - Input Series.
+ * @param window - Size of the sliding window (number of observations).
+ * @param opts - Window options.
+ * @returns A new Series with rolling SEM values.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4, 5]);
+ * rollingSem(s, 3); // [null, null, ~0.577, ~0.577, ~0.577]
+ * ```
+ */
+export function rollingSem(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike {
+  return applyWindow(series, window, opts, 2, (nums) => {
+    const s = numStd(nums, 1);
+    return s / Math.sqrt(nums.length);
+  });
+}
+
+// ─── Rolling Skewness ─────────────────────────────────────────────────────────
+
+/**
+ * Rolling Fisher-Pearson skewness (unbiased, 3rd standardised moment).
+ *
+ * Uses the standard adjustment formula:
+ * ```
+ * skew = [n / ((n-1)(n-2))] * Σ[(xᵢ - x̄) / s]³
+ * ```
+ * where `s` is the sample standard deviation (`ddof=1`).
+ * Requires at least 3 valid observations (else `null`).
+ *
+ * Mirrors `pandas.Series.rolling(window).skew()`.
+ *
+ * @param series - Input Series.
+ * @param window - Size of the sliding window.
+ * @param opts - Window options.
+ * @returns A new Series with rolling skewness values.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4, 5]);
+ * rollingSkew(s, 3); // [null, null, 0, 0, 0]  (symmetric windows)
+ * ```
+ */
+export function rollingSkew(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike {
+  return applyWindow(series, window, opts, 3, (nums, n) => {
+    const m = numMean(nums);
+    const s = numStd(nums, 1);
+    if (s === 0 || Number.isNaN(s)) {
+      return 0;
+    }
+    const sum3 = nums.reduce((acc, v) => acc + ((v - m) / s) ** 3, 0);
+    return (n / ((n - 1) * (n - 2))) * sum3;
+  });
+}
+
+// ─── Rolling Kurtosis ─────────────────────────────────────────────────────────
+
+/**
+ * Rolling excess kurtosis (Fisher's definition, unbiased, 4th standardised moment).
+ *
+ * Uses the standard adjustment (Fisher, 1930):
+ * ```
+ * kurt = [n(n+1) / ((n-1)(n-2)(n-3))] * Σ[(xᵢ - x̄) / s]⁴
+ *        − 3(n-1)² / ((n-2)(n-3))
+ * ```
+ * where `s` is the sample standard deviation (`ddof=1`).
+ * Requires at least 4 valid observations (else `null`).
+ *
+ * Mirrors `pandas.Series.rolling(window).kurt()`.
+ *
+ * @param series - Input Series.
+ * @param window - Size of the sliding window.
+ * @param opts - Window options.
+ * @returns A new Series with rolling excess kurtosis values.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4]);
+ * rollingKurt(s, 4); // [null, null, null, -1.2]  (uniform distribution)
+ * ```
+ */
+export function rollingKurt(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike {
+  return applyWindow(series, window, opts, 4, (nums, n) => {
+    const m = numMean(nums);
+    const s = numStd(nums, 1);
+    if (s === 0 || Number.isNaN(s)) {
+      return 0;
+    }
+    const sum4 = nums.reduce((acc, v) => acc + ((v - m) / s) ** 4, 0);
+    const term1 = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * sum4;
+    const term2 = (3 * (n - 1) ** 2) / ((n - 2) * (n - 3));
+    return term1 - term2;
+  });
+}
+
+// ─── Rolling Quantile ─────────────────────────────────────────────────────────
+
+/**
+ * Compute a quantile value for each rolling window.
+ *
+ * @param sorted - A sorted array of valid numbers in the window.
+ * @param q - Quantile in [0, 1].
+ * @param method - Interpolation method.
+ */
+function computeQuantile(
+  sorted: readonly number[],
+  q: number,
+  method: RollingQuantileOptions["interpolation"],
+): number {
+  const n = sorted.length;
+  if (n === 0) {
+    return Number.NaN;
+  }
+  if (n === 1) {
+    return sorted[0] as number;
+  }
+  const virtual = q * (n - 1);
+  const lo = Math.floor(virtual);
+  const hi = Math.ceil(virtual);
+  const loVal = sorted[lo] as number;
+  const hiVal = sorted[hi] as number;
+  switch (method ?? "linear") {
+    case "lower":
+      return loVal;
+    case "higher":
+      return hiVal;
+    case "midpoint":
+      return (loVal + hiVal) / 2;
+    case "nearest": {
+      const fracLo = virtual - lo;
+      return fracLo < 0.5 ? loVal : hiVal;
+    }
+    case "linear":
+    default: {
+      const frac = virtual - lo;
+      return loVal + frac * (hiVal - loVal);
+    }
+  }
+}
+
+/**
+ * Rolling quantile.
+ *
+ * Computes the `q`-th quantile for each sliding window, using linear
+ * interpolation by default.
+ *
+ * Mirrors `pandas.Series.rolling(window).quantile(q, interpolation)`.
+ *
+ * @param series - Input Series.
+ * @param q - Quantile to compute (0 ≤ q ≤ 1).
+ * @param window - Size of the sliding window.
+ * @param opts - Window options including interpolation method.
+ * @returns A new Series with rolling quantile values.
+ *
+ * @throws {RangeError} If `q` is outside `[0, 1]`.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4, 5]);
+ * rollingQuantile(s, 0.5, 3); // [null, null, 2, 3, 4]  (rolling median)
+ * ```
+ */
+export function rollingQuantile(
+  series: SeriesLike,
+  q: number,
+  window: number,
+  opts: RollingQuantileOptions = {},
+): SeriesLike {
+  if (q < 0 || q > 1) {
+    throw new RangeError(`rollingQuantile: q must be in [0, 1], got ${q}`);
+  }
+  const { interpolation } = opts;
+  return applyWindow(series, window, opts, 1, (nums) => {
+    const sorted = [...nums].sort((a, b) => a - b);
+    return computeQuantile(sorted, q, interpolation);
+  });
+}
diff --git a/src/window/index.ts b/src/window/index.ts
index 90f8c0dd..378222e2 100644
--- a/src/window/index.ts
+++ b/src/window/index.ts
@@ -10,3 +10,10 @@ export { Expanding } from "./expanding.ts";
 export type { ExpandingOptions, ExpandingSeriesLike } from "./expanding.ts";
 export { EWM } from "./ewm.ts";
 export type { EwmOptions, EwmSeriesLike } from "./ewm.ts";
+export {
+  rollingApply,
+  rollingAgg,
+  dataFrameRollingApply,
+  dataFrameRollingAgg,
+} from "./rolling_apply.ts";
+export type { RollingApplyOptions, RollingAggOptions, AggFunctions } from "./rolling_apply.ts";
diff --git a/src/window/rolling_apply.ts b/src/window/rolling_apply.ts
new file mode 100644
index 00000000..18d09c93
--- /dev/null
+++ b/src/window/rolling_apply.ts
@@ -0,0 +1,323 @@
+/**
+ * rolling_apply — standalone rolling-window apply and multi-aggregation.
+ *
+ * Mirrors the flexibility of `pandas.core.window.Rolling.apply()` with
+ * additional utilities not available on the Rolling class:
+ *
+ * - {@link rollingApply} — apply a custom function over each window of a
+ *   Series, with `raw` mode support (pass all window values including null/NaN
+ *   vs. only valid numbers).
+ * - {@link rollingAgg} — apply multiple named aggregation functions in a
+ *   single pass, returning a DataFrame keyed by function name.
+ * - {@link dataFrameRollingApply} — apply a custom function per-column across
+ *   a DataFrame.
+ * - {@link dataFrameRollingAgg} — apply multiple named aggregation functions
+ *   per-column across a DataFrame.
+ *
+ * ### raw vs. filtered mode
+ *
+ * By default (`raw: false`) the aggregation function receives only the **valid
+ * (non-null, non-NaN) numeric values** in the current window — matching the
+ * default `raw=True` behaviour of `pandas.Rolling.apply` with NaN values
+ * already stripped.  With `raw: true` the function receives the **full window
+ * slice** including `null`/`undefined`/`NaN` entries (as `null`), giving the
+ * aggregation full control over missing-value handling.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public option types ──────────────────────────────────────────────────────
+
+/** Options for {@link rollingApply} and {@link dataFrameRollingApply}. */
+export interface RollingApplyOptions {
+  /**
+   * Minimum number of valid (non-null/NaN) observations required to produce a
+   * non-null result.
+   *
+   * Defaults to `window` (same as `pandas.Rolling` behaviour).
+   */
+  readonly minPeriods?: number;
+  /**
+   * Whether to centre the window.  When `true` the window is symmetric around
+   * each index position; when `false` (default) the window is trailing.
+   */
+  readonly center?: boolean;
+  /**
+   * When `true`, the aggregation function receives the **full** window slice
+   * including `null`/`NaN` values (represented as `null`).  When `false`
+   * (default), only the valid numeric values are passed.
+   */
+  readonly raw?: boolean;
+}
+
+/** Options for {@link rollingAgg} and {@link dataFrameRollingAgg}. */
+export type RollingAggOptions = Omit<RollingApplyOptions, "raw">;
+
+/** A named map of aggregation functions for {@link rollingAgg}. */
+export type AggFunctions = Record<string, (values: readonly number[]) => number>;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a Scalar is missing. */
+function isMissing(v: Scalar): boolean {
+  return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Extract the numeric values from a window slice, excluding missing entries. */
+function validNums(slice: readonly Scalar[]): number[] {
+  const out: number[] = [];
+  for (const v of slice) {
+    if (!isMissing(v) && typeof v === "number") {
+      out.push(v);
+    }
+  }
+  return out;
+}
+
+/** Convert a raw window slice to `null`-substituted numeric array. */
+function rawWindow(slice: readonly Scalar[]): (number | null)[] {
+  return slice.map((v): number | null => {
+    if (isMissing(v)) return null;
+    if (typeof v === "number") return v;
+    return null;
+  });
+}
+
+/** Trailing-window [start, end) indices for position `i`. */
+function trailingBounds(i: number, window: number, n: number): [number, number] {
+  return [Math.max(0, i - window + 1), Math.min(n, i + 1)];
+}
+
+/** Centred-window [start, end) indices for position `i`. */
+function centeredBounds(i: number, window: number, n: number): [number, number] {
+  const half = Math.floor((window - 1) / 2);
+  return [Math.max(0, i - half), Math.min(n, i + (window - half))];
+}
+
+/** Select trailing or centred window bounds. */
+function bounds(i: number, window: number, n: number, center: boolean): [number, number] {
+  return center ? centeredBounds(i, window, n) : trailingBounds(i, window, n);
+}
+
+// ─── core engine ──────────────────────────────────────────────────────────────
+
+/**
+ * Iterate over each position in `vals`, yielding the window's valid numeric
+ * values (or, when `useRaw`, the raw slice with nulls).  Returns whether the
+ * window met `minPeriods` and the processed window array.
+ */
+function* windowIterator(
+  vals: readonly Scalar[],
+  window: number,
+  minPeriods: number,
+  center: boolean,
+  useRaw: boolean,
+): Generator<{ met: boolean; nums: readonly number[]; raw: readonly (number | null)[] }> {
+  const n = vals.length;
+  for (let i = 0; i < n; i++) {
+    const [start, end] = bounds(i, window, n, center);
+    const slice = vals.slice(start, end);
+    const nums = validNums(slice);
+    const met = nums.length >= minPeriods;
+    yield { met, nums, raw: useRaw ? rawWindow(slice) : [] };
+  }
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Apply a custom aggregation function over a rolling window of a Series.
+ *
+ * This is the standalone counterpart to `series.rolling(w).apply(fn)`.  It
+ * adds `raw` mode support and returns a `Series<number | null>` with the
+ * original index and name preserved.
+ *
+ * @param series  - Input Series (numeric values only; non-numeric treated as missing).
+ * @param window  - Window size (positive integer).
+ * @param fn      - Aggregation function.  In default (`raw: false`) mode
+ *                  receives only valid numeric values; in `raw: true` mode
+ *                  receives the full window with nulls.
+ * @param options - {@link RollingApplyOptions}.
+ * @returns A new `Series<number | null>`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * rollingApply(s, 3, (w) => w.reduce((a, b) => a + b, 0) / w.length);
+ * // Series([null, null, 2, 3, 4])
+ * ```
+ */
+export function rollingApply(
+  series: Series<Scalar>,
+  window: number,
+  fn: (values: readonly number[]) => number,
+  options?: RollingApplyOptions,
+): Series<number | null> {
+  if (!Number.isInteger(window) || window < 1) {
+    throw new RangeError(`window must be a positive integer, got ${window}`);
+  }
+  const minPeriods = options?.minPeriods ?? window;
+  const center = options?.center ?? false;
+  const useRaw = options?.raw ?? false;
+
+  const vals = series.values;
+  const result: (number | null)[] = [];
+
+  for (const { met, nums, raw } of windowIterator(vals, window, minPeriods, center, useRaw)) {
+    if (!met) {
+      result.push(null);
+    } else if (useRaw) {
+      const validOnly = (raw as readonly (number | null)[]).filter(
+        (v): v is number => v !== null,
+      );
+      result.push(fn(validOnly));
+    } else {
+      result.push(fn(nums));
+    }
+  }
+
+  return new Series<number | null>({
+    data: result,
+    index: series.index as Index<Label>,
+    name: series.name,
+  });
+}
+
+/**
+ * Apply multiple named aggregation functions over a rolling window of a
+ * Series, returning a DataFrame where each column corresponds to one
+ * aggregation function.
+ *
+ * Mirrors `pandas.Series.rolling(w).agg({"mean": np.mean, "std": np.std})`.
+ *
+ * @param series  - Input Series.
+ * @param window  - Window size (positive integer).
+ * @param fns     - Named map of aggregation functions (each receives valid
+ *                  numeric values in the window).
+ * @param options - {@link RollingAggOptions}.
+ * @returns A `DataFrame` with one column per function in `fns`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * rollingAgg(s, 3, {
+ *   mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ *   max:  (w) => Math.max(...w),
+ * });
+ * // DataFrame with columns "mean" and "max"
+ * ```
+ */
+export function rollingAgg(
+  series: Series<Scalar>,
+  window: number,
+  fns: AggFunctions,
+  options?: RollingAggOptions,
+): DataFrame {
+  if (!Number.isInteger(window) || window < 1) {
+    throw new RangeError(`window must be a positive integer, got ${window}`);
+  }
+  const minPeriods = options?.minPeriods ?? window;
+  const center = options?.center ?? false;
+
+  const fnEntries = Object.entries(fns);
+  const cols: Map<string, (number | null)[]> = new Map(fnEntries.map(([k]) => [k, []]));
+  const vals = series.values;
+
+  for (const { met, nums } of windowIterator(vals, window, minPeriods, center, false)) {
+    for (const [name, fn] of fnEntries) {
+      const col = cols.get(name) as (number | null)[];
+      col.push(met ? fn(nums) : null);
+    }
+  }
+
+  const colMap = new Map<string, Series<Scalar>>();
+  for (const [name, data] of cols) {
+    colMap.set(
+      name,
+      new Series<Scalar>({
+        data,
+        index: series.index as Index<Label>,
+        name,
+      }),
+    );
+  }
+  return new DataFrame(colMap, series.index as Index<Label>);
+}
+
+/**
+ * Apply a custom aggregation function over a rolling window for each column of
+ * a DataFrame.
+ *
+ * @param df      - Input DataFrame.
+ * @param window  - Window size (positive integer).
+ * @param fn      - Aggregation function receiving valid numeric values.
+ * @param options - {@link RollingApplyOptions}.
+ * @returns A new `DataFrame` with the same shape as `df`.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * dataFrameRollingApply(df, 2, (w) => w[w.length - 1] - w[0]);
+ * // DataFrame with pairwise diff per column
+ * ```
+ */
+export function dataFrameRollingApply(
+  df: DataFrame,
+  window: number,
+  fn: (values: readonly number[]) => number,
+  options?: RollingApplyOptions,
+): DataFrame {
+  const colMap = new Map<string, Series<Scalar>>();
+  for (const colName of df.columns.values) {
+    const col = df.col(colName);
+    const result = rollingApply(col, window, fn, options);
+    colMap.set(colName, result as Series<Scalar>);
+  }
+  return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Apply multiple named aggregation functions over a rolling window for each
+ * column of a DataFrame.
+ *
+ * Each column produces a sub-DataFrame of results.  All sub-DataFrames are
+ * concatenated horizontally, with column names formatted as `{col}_{aggName}`.
+ *
+ * @param df      - Input DataFrame.
+ * @param window  - Window size (positive integer).
+ * @param fns     - Named map of aggregation functions.
+ * @param options - {@link RollingAggOptions}.
+ * @returns A `DataFrame` with columns `{col}_{aggName}` for every combination.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ x: [1, 2, 3, 4], y: [5, 6, 7, 8] });
+ * dataFrameRollingAgg(df, 2, { mean: avg, sum: s });
+ * // columns: "x_mean", "x_sum", "y_mean", "y_sum"
+ * ```
+ */
+export function dataFrameRollingAgg(
+  df: DataFrame,
+  window: number,
+  fns: AggFunctions,
+  options?: RollingAggOptions,
+): DataFrame {
+  const colMap = new Map<string, Series<Scalar>>();
+  const fnEntries = Object.entries(fns);
+
+  for (const colName of df.columns.values) {
+    const col = df.col(colName);
+    const aggDf = rollingAgg(col, window, fns, options);
+
+    for (const [aggName] of fnEntries) {
+      const key = `${colName}_${aggName}`;
+      colMap.set(key, aggDf.col(aggName));
+    }
+  }
+  return new DataFrame(colMap, df.index);
+}
diff --git a/tests/core/api_types.test.ts b/tests/core/api_types.test.ts
new file mode 100644
index 00000000..17064ceb
--- /dev/null
+++ b/tests/core/api_types.test.ts
@@ -0,0 +1,621 @@
+/**
+ * Tests for src/core/api_types.ts — runtime type-checking predicates.
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Dtype } from "../../src/index.ts";
+import {
+  isArrayLike,
+  isBigInt,
+  isBool,
+  isBoolDtype,
+  isCategoricalDtype,
+  isComplexDtype,
+  isDate,
+  isDatetimeDtype,
+  isDictLike,
+  isExtensionArrayDtype,
+  isFloat,
+  isFloatDtype,
+  isHashable,
+  isInteger,
+  isIntegerDtype,
+  isIntervalDtype,
+  isIterator,
+  isListLike,
+  isMissing,
+  isNumber,
+  isNumericDtype,
+  isObjectDtype,
+  isPeriodDtype,
+  isRegExp,
+  isReCompilable,
+  isScalar,
+  isSignedIntegerDtype,
+  isStringDtype,
+  isStringValue,
+  isTimedeltaDtype,
+  isUnsignedIntegerDtype,
+} from "../../src/core/api_types.ts";
+
+// ─── isScalar ─────────────────────────────────────────────────────────────────
+
+describe("isScalar", () => {
+  it("returns true for primitives", () => {
+    expect(isScalar(42)).toBe(true);
+    expect(isScalar(3.14)).toBe(true);
+    expect(isScalar("hello")).toBe(true);
+    expect(isScalar(true)).toBe(true);
+    expect(isScalar(false)).toBe(true);
+    expect(isScalar(null)).toBe(true);
+    expect(isScalar(undefined)).toBe(true);
+    expect(isScalar(BigInt(7))).toBe(true);
+    expect(isScalar(Symbol("x"))).toBe(true);
+  });
+
+  it("returns true for Date", () => {
+    expect(isScalar(new Date())).toBe(true);
+  });
+
+  it("returns false for arrays", () => {
+    expect(isScalar([])).toBe(false);
+    expect(isScalar([1, 2])).toBe(false);
+  });
+
+  it("returns false for plain objects", () => {
+    expect(isScalar({})).toBe(false);
+    expect(isScalar({ a: 1 })).toBe(false);
+  });
+
+  it("returns false for Map/Set", () => {
+    expect(isScalar(new Map())).toBe(false);
+    expect(isScalar(new Set())).toBe(false);
+  });
+
+  it("property: all numbers are scalars", () => {
+    fc.assert(
+      fc.property(fc.float({ noNaN: true }), (n) => isScalar(n) === true),
+    );
+  });
+});
+
+// ─── isListLike ───────────────────────────────────────────────────────────────
+
+describe("isListLike", () => {
+  it("returns true for arrays", () => {
+    expect(isListLike([])).toBe(true);
+    expect(isListLike([1, 2, 3])).toBe(true);
+  });
+
+  it("returns true for Set/Map", () => {
+    expect(isListLike(new Set([1, 2]))).toBe(true);
+    expect(isListLike(new Map())).toBe(true);
+  });
+
+  it("returns false for strings (excluded)", () => {
+    expect(isListLike("abc")).toBe(false);
+    expect(isListLike("")).toBe(false);
+  });
+
+  it("returns false for numbers and booleans", () => {
+    expect(isListLike(42)).toBe(false);
+    expect(isListLike(true)).toBe(false);
+  });
+
+  it("returns false for null/undefined", () => {
+    expect(isListLike(null)).toBe(false);
+    expect(isListLike(undefined)).toBe(false);
+  });
+
+  it("returns true for array-like objects with length", () => {
+    expect(isListLike({ length: 3, 0: "a", 1: "b", 2: "c" })).toBe(true);
+  });
+});
+
+// ─── isArrayLike ──────────────────────────────────────────────────────────────
+
+describe("isArrayLike", () => {
+  it("returns true for arrays", () => {
+    expect(isArrayLike([])).toBe(true);
+    expect(isArrayLike([1, 2])).toBe(true);
+  });
+
+  it("returns true for strings (have .length)", () => {
+    expect(isArrayLike("hello")).toBe(true);
+    expect(isArrayLike("")).toBe(true);
+  });
+
+  it("returns false for numbers", () => {
+    expect(isArrayLike(42)).toBe(false);
+    expect(isArrayLike(NaN)).toBe(false);
+  });
+
+  it("returns false for null/undefined", () => {
+    expect(isArrayLike(null)).toBe(false);
+    expect(isArrayLike(undefined)).toBe(false);
+  });
+
+  it("returns true for typed arrays", () => {
+    expect(isArrayLike(new Uint8Array(3))).toBe(true);
+    expect(isArrayLike(new Float64Array(0))).toBe(true);
+  });
+
+  it("returns true for object with non-negative integer length", () => {
+    expect(isArrayLike({ length: 0 })).toBe(true);
+    expect(isArrayLike({ length: 5 })).toBe(true);
+  });
+});
+
+// ─── isDictLike ───────────────────────────────────────────────────────────────
+
+describe("isDictLike", () => {
+  it("returns true for plain objects", () => {
+    expect(isDictLike({})).toBe(true);
+    expect(isDictLike({ a: 1 })).toBe(true);
+  });
+
+  it("returns true for Map", () => {
+    expect(isDictLike(new Map())).toBe(true);
+  });
+
+  it("returns false for arrays", () => {
+    expect(isDictLike([])).toBe(false);
+    expect(isDictLike([1, 2])).toBe(false);
+  });
+
+  it("returns false for Date", () => {
+    expect(isDictLike(new Date())).toBe(false);
+  });
+
+  it("returns false for null/undefined/primitives", () => {
+    expect(isDictLike(null)).toBe(false);
+    expect(isDictLike(undefined)).toBe(false);
+    expect(isDictLike(42)).toBe(false);
+    expect(isDictLike("abc")).toBe(false);
+  });
+});
+
+// ─── isIterator ───────────────────────────────────────────────────────────────
+
+describe("isIterator", () => {
+  it("returns true for array iterator", () => {
+    const iter = [1, 2, 3][Symbol.iterator]();
+    expect(isIterator(iter)).toBe(true);
+  });
+
+  it("returns true for generator", () => {
+    function* gen(): Generator<number> {
+      yield 1;
+    }
+    expect(isIterator(gen())).toBe(true);
+  });
+
+  it("returns false for array (not iterator)", () => {
+    expect(isIterator([1, 2, 3])).toBe(false);
+  });
+
+  it("returns false for null/undefined", () => {
+    expect(isIterator(null)).toBe(false);
+    expect(isIterator(undefined)).toBe(false);
+  });
+});
+
+// ─── isNumber / isBool / isStringValue ───────────────────────────────────────
+
+describe("isNumber", () => {
+  it("true for numbers including NaN and Infinity", () => {
+    expect(isNumber(3.14)).toBe(true);
+    expect(isNumber(0)).toBe(true);
+    expect(isNumber(NaN)).toBe(true);
+    expect(isNumber(Infinity)).toBe(true);
+    expect(isNumber(-Infinity)).toBe(true);
+  });
+
+  it("false for non-numbers", () => {
+    expect(isNumber("3")).toBe(false);
+    expect(isNumber(true)).toBe(false);
+    expect(isNumber(null)).toBe(false);
+  });
+});
+
+describe("isBool", () => {
+  it("true for booleans only", () => {
+    expect(isBool(true)).toBe(true);
+    expect(isBool(false)).toBe(true);
+    expect(isBool(1)).toBe(false);
+    expect(isBool(0)).toBe(false);
+    expect(isBool("true")).toBe(false);
+  });
+});
+
+describe("isStringValue", () => {
+  it("true for strings", () => {
+    expect(isStringValue("")).toBe(true);
+    expect(isStringValue("hello")).toBe(true);
+  });
+
+  it("false for non-strings", () => {
+    expect(isStringValue(42)).toBe(false);
+    expect(isStringValue(null)).toBe(false);
+  });
+});
+
+// ─── isFloat / isInteger ──────────────────────────────────────────────────────
+
+describe("isFloat", () => {
+  it("true for numbers with fractional part", () => {
+    expect(isFloat(3.14)).toBe(true);
+    expect(isFloat(-0.5)).toBe(true);
+    expect(isFloat(0.001)).toBe(true);
+  });
+
+  it("false for integer-valued numbers", () => {
+    expect(isFloat(3.0)).toBe(false);
+    expect(isFloat(0)).toBe(false);
+    expect(isFloat(-4)).toBe(false);
+  });
+
+  it("false for NaN and Infinity", () => {
+    expect(isFloat(NaN)).toBe(false);
+    expect(isFloat(Infinity)).toBe(false);
+    expect(isFloat(-Infinity)).toBe(false);
+  });
+
+  it("false for non-numbers", () => {
+    expect(isFloat("3.14")).toBe(false);
+  });
+});
+
+describe("isInteger", () => {
+  it("true for integer-valued numbers", () => {
+    expect(isInteger(0)).toBe(true);
+    expect(isInteger(42)).toBe(true);
+    expect(isInteger(-7)).toBe(true);
+    expect(isInteger(3.0)).toBe(true);
+  });
+
+  it("false for fractional numbers", () => {
+    expect(isInteger(3.14)).toBe(false);
+  });
+
+  it("false for NaN and Infinity", () => {
+    expect(isInteger(NaN)).toBe(false);
+    expect(isInteger(Infinity)).toBe(false);
+  });
+
+  it("false for non-numbers", () => {
+    expect(isInteger("3")).toBe(false);
+  });
+});
+
+// ─── isBigInt / isRegExp / isReCompilable ─────────────────────────────────────
+
+describe("isBigInt", () => {
+  it("true for bigint", () => {
+    expect(isBigInt(BigInt(42))).toBe(true);
+    expect(isBigInt(0n)).toBe(true);
+  });
+
+  it("false for regular numbers", () => {
+    expect(isBigInt(42)).toBe(false);
+    expect(isBigInt("42")).toBe(false);
+  });
+});
+
+describe("isRegExp", () => {
+  it("true for RegExp instances", () => {
+    expect(isRegExp(/abc/)).toBe(true);
+    expect(isRegExp(new RegExp("xyz"))).toBe(true);
+  });
+
+  it("false for strings and other values", () => {
+    expect(isRegExp("abc")).toBe(false);
+    expect(isRegExp(null)).toBe(false);
+  });
+});
+
+describe("isReCompilable", () => {
+  it("true for strings and RegExp", () => {
+    expect(isReCompilable("abc")).toBe(true);
+    expect(isReCompilable(/abc/)).toBe(true);
+  });
+
+  it("false for numbers and objects", () => {
+    expect(isReCompilable(42)).toBe(false);
+    expect(isReCompilable({})).toBe(false);
+  });
+});
+
+// ─── isMissing ────────────────────────────────────────────────────────────────
+
+describe("isMissing", () => {
+  it("true for null, undefined, NaN", () => {
+    expect(isMissing(null)).toBe(true);
+    expect(isMissing(undefined)).toBe(true);
+    expect(isMissing(NaN)).toBe(true);
+  });
+
+  it("false for valid values", () => {
+    expect(isMissing(0)).toBe(false);
+    expect(isMissing("")).toBe(false);
+    expect(isMissing(false)).toBe(false);
+    expect(isMissing(Infinity)).toBe(false);
+  });
+
+  it("property: no finite number is missing", () => {
+    fc.assert(
+      fc.property(fc.float({ noNaN: true }), (n) => {
+        if (!Number.isFinite(n)) return true;
+        return !isMissing(n);
+      }),
+    );
+  });
+});
+
+// ─── isHashable / isDate ──────────────────────────────────────────────────────
+
+describe("isHashable", () => {
+  it("true for primitives", () => {
+    expect(isHashable("key")).toBe(true);
+    expect(isHashable(42)).toBe(true);
+    expect(isHashable(true)).toBe(true);
+    expect(isHashable(null)).toBe(true);
+    expect(isHashable(undefined)).toBe(true);
+    expect(isHashable(Symbol("x"))).toBe(true);
+  });
+
+  it("false for objects and arrays", () => {
+    expect(isHashable({})).toBe(false);
+    expect(isHashable([])).toBe(false);
+    expect(isHashable(new Date())).toBe(false);
+  });
+});
+
+describe("isDate", () => {
+  it("true for Date instances", () => {
+    expect(isDate(new Date())).toBe(true);
+    expect(isDate(new Date("2024-01-01"))).toBe(true);
+  });
+
+  it("false for strings and timestamps", () => {
+    expect(isDate("2024-01-01")).toBe(false);
+    expect(isDate(1704067200000)).toBe(false);
+    expect(isDate(null)).toBe(false);
+  });
+});
+
+// ─── Dtype-level predicates ───────────────────────────────────────────────────
+
+describe("isNumericDtype", () => {
+  it("true for all numeric dtypes", () => {
+    for (const name of ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64"] as const) {
+      expect(isNumericDtype(name)).toBe(true);
+      expect(isNumericDtype(Dtype.from(name))).toBe(true);
+    }
+  });
+
+  it("false for non-numeric dtypes", () => {
+    expect(isNumericDtype("string")).toBe(false);
+    expect(isNumericDtype("bool")).toBe(false);
+    expect(isNumericDtype("datetime")).toBe(false);
+    expect(isNumericDtype("category")).toBe(false);
+  });
+});
+
+describe("isIntegerDtype", () => {
+  it("true for signed and unsigned integers", () => {
+    expect(isIntegerDtype("int32")).toBe(true);
+    expect(isIntegerDtype("uint64")).toBe(true);
+  });
+
+  it("false for floats and others", () => {
+    expect(isIntegerDtype("float64")).toBe(false);
+    expect(isIntegerDtype("bool")).toBe(false);
+  });
+});
+
+describe("isSignedIntegerDtype", () => {
+  it("true for int8/16/32/64", () => {
+    expect(isSignedIntegerDtype("int8")).toBe(true);
+    expect(isSignedIntegerDtype("int64")).toBe(true);
+  });
+
+  it("false for uint", () => {
+    expect(isSignedIntegerDtype("uint8")).toBe(false);
+    expect(isSignedIntegerDtype("uint64")).toBe(false);
+  });
+});
+
+describe("isUnsignedIntegerDtype", () => {
+  it("true for uint8/16/32/64", () => {
+    expect(isUnsignedIntegerDtype("uint8")).toBe(true);
+    expect(isUnsignedIntegerDtype("uint64")).toBe(true);
+  });
+
+  it("false for int", () => {
+    expect(isUnsignedIntegerDtype("int8")).toBe(false);
+    expect(isUnsignedIntegerDtype("int64")).toBe(false);
+  });
+});
+
+describe("isFloatDtype", () => {
+  it("true for float32 and float64", () => {
+    expect(isFloatDtype("float32")).toBe(true);
+    expect(isFloatDtype("float64")).toBe(true);
+    expect(isFloatDtype(Dtype.float64)).toBe(true);
+  });
+
+  it("false for integers and others", () => {
+    expect(isFloatDtype("int32")).toBe(false);
+    expect(isFloatDtype("string")).toBe(false);
+  });
+});
+
+describe("isBoolDtype", () => {
+  it("true for bool", () => {
+    expect(isBoolDtype("bool")).toBe(true);
+    expect(isBoolDtype(Dtype.bool)).toBe(true);
+  });
+
+  it("false for others", () => {
+    expect(isBoolDtype("int8")).toBe(false);
+    expect(isBoolDtype("string")).toBe(false);
+  });
+});
+
+describe("isStringDtype", () => {
+  it("true for string dtype", () => {
+    expect(isStringDtype("string")).toBe(true);
+    expect(isStringDtype(Dtype.string)).toBe(true);
+  });
+
+  it("false for object and others", () => {
+    expect(isStringDtype("object")).toBe(false);
+    expect(isStringDtype("int32")).toBe(false);
+  });
+});
+
+describe("isDatetimeDtype", () => {
+  it("true for datetime", () => {
+    expect(isDatetimeDtype("datetime")).toBe(true);
+    expect(isDatetimeDtype(Dtype.datetime)).toBe(true);
+  });
+
+  it("false for timedelta and others", () => {
+    expect(isDatetimeDtype("timedelta")).toBe(false);
+    expect(isDatetimeDtype("string")).toBe(false);
+  });
+});
+
+describe("isTimedeltaDtype", () => {
+  it("true for timedelta", () => {
+    expect(isTimedeltaDtype("timedelta")).toBe(true);
+    expect(isTimedeltaDtype(Dtype.timedelta)).toBe(true);
+  });
+
+  it("false for datetime and others", () => {
+    expect(isTimedeltaDtype("datetime")).toBe(false);
+    expect(isTimedeltaDtype("float64")).toBe(false);
+  });
+});
+
+describe("isCategoricalDtype", () => {
+  it("true for category", () => {
+    expect(isCategoricalDtype("category")).toBe(true);
+    expect(isCategoricalDtype(Dtype.category)).toBe(true);
+  });
+
+  it("false for others", () => {
+    expect(isCategoricalDtype("string")).toBe(false);
+    expect(isCategoricalDtype("int32")).toBe(false);
+  });
+});
+
+describe("isObjectDtype", () => {
+  it("true for object dtype", () => {
+    expect(isObjectDtype("object")).toBe(true);
+    expect(isObjectDtype(Dtype.object)).toBe(true);
+  });
+
+  it("false for string and others", () => {
+    expect(isObjectDtype("string")).toBe(false);
+    expect(isObjectDtype("int32")).toBe(false);
+  });
+});
+
+describe("isComplexDtype", () => {
+  it("always returns false (no complex type in tsb)", () => {
+    expect(isComplexDtype("float64")).toBe(false);
+    expect(isComplexDtype("int32")).toBe(false);
+    expect(isComplexDtype(Dtype.float64)).toBe(false);
+  });
+});
+
+describe("isExtensionArrayDtype", () => {
+  it("true for string/object/datetime/timedelta/category", () => {
+    expect(isExtensionArrayDtype("string")).toBe(true);
+    expect(isExtensionArrayDtype("object")).toBe(true);
+    expect(isExtensionArrayDtype("datetime")).toBe(true);
+    expect(isExtensionArrayDtype("timedelta")).toBe(true);
+    expect(isExtensionArrayDtype("category")).toBe(true);
+  });
+
+  it("false for numeric dtypes", () => {
+    expect(isExtensionArrayDtype("int32")).toBe(false);
+    expect(isExtensionArrayDtype("float64")).toBe(false);
+    expect(isExtensionArrayDtype("bool")).toBe(false);
+  });
+});
+
+describe("isPeriodDtype", () => {
+  it("true for datetime (maps to period)", () => {
+    expect(isPeriodDtype("datetime")).toBe(true);
+  });
+
+  it("false for others", () => {
+    expect(isPeriodDtype("float64")).toBe(false);
+    expect(isPeriodDtype("string")).toBe(false);
+  });
+});
+
+describe("isIntervalDtype", () => {
+  it("true for numeric dtypes (interval uses numeric bounds)", () => {
+    expect(isIntervalDtype("float64")).toBe(true);
+    expect(isIntervalDtype("int32")).toBe(true);
+    expect(isIntervalDtype("uint8")).toBe(true);
+  });
+
+  it("false for string/category/bool", () => {
+    expect(isIntervalDtype("string")).toBe(false);
+    expect(isIntervalDtype("category")).toBe(false);
+    expect(isIntervalDtype("bool")).toBe(false);
+  });
+});
+
+// ─── property-based cross-checks ─────────────────────────────────────────────
+
+describe("dtype predicate cross-checks", () => {
+  const numericNames = ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64"] as const;
+  const nonNumericNames = ["bool", "string", "object", "datetime", "timedelta", "category"] as const;
+
+  it("isNumericDtype and isIntegerDtype are consistent", () => {
+    for (const n of numericNames) {
+      if (isIntegerDtype(n)) {
+        expect(isNumericDtype(n)).toBe(true);
+      }
+    }
+  });
+
+  it("no numeric dtype is extension array", () => {
+    for (const n of numericNames) {
+      if (!isBoolDtype(n)) {
+        expect(isExtensionArrayDtype(n)).toBe(false);
+      }
+    }
+  });
+
+  it("signed and unsigned integers are disjoint", () => {
+    for (const n of numericNames) {
+      if (isSignedIntegerDtype(n)) {
+        expect(isUnsignedIntegerDtype(n)).toBe(false);
+      }
+      if (isUnsignedIntegerDtype(n)) {
+        expect(isSignedIntegerDtype(n)).toBe(false);
+      }
+    }
+  });
+
+  it("float dtypes are not integer dtypes", () => {
+    for (const n of numericNames) {
+      if (isFloatDtype(n)) {
+        expect(isIntegerDtype(n)).toBe(false);
+      }
+    }
+  });
+
+  it("non-numeric dtypes fail isNumericDtype", () => {
+    for (const n of nonNumericNames) {
+      expect(isNumericDtype(n)).toBe(false);
+    }
+  });
+});
diff --git a/tests/core/attrs.test.ts b/tests/core/attrs.test.ts
new file mode 100644
index 00000000..8a22786b
--- /dev/null
+++ b/tests/core/attrs.test.ts
@@ -0,0 +1,542 @@
+/**
+ * Tests for src/core/attrs.ts
+ *
+ * Covers:
+ * - getAttrs: returns empty {} when no attrs set
+ * - getAttrs: returns shallow copy (caller mutations don't affect registry)
+ * - setAttrs: sets attrs; subsequent getAttrs reflects the new values
+ * - setAttrs: overwrites previous attrs completely
+ * - updateAttrs: merges new keys, preserves existing keys
+ * - updateAttrs: overwrites existing keys on conflict
+ * - updateAttrs: works on an object with no prior attrs
+ * - copyAttrs: copies attrs from source to target
+ * - copyAttrs: overwrites target's existing attrs
+ * - copyAttrs: when source has no attrs, clears target's attrs
+ * - withAttrs: returns the same object reference
+ * - withAttrs: sets the attrs (replaces existing)
+ * - clearAttrs: removes all attrs
+ * - clearAttrs: is a no-op if no attrs exist
+ * - hasAttrs: false when no attrs set
+ * - hasAttrs: true after setAttrs
+ * - hasAttrs: false after clearAttrs
+ * - getAttr: returns undefined for missing key
+ * - getAttr: returns the correct value
+ * - setAttr: sets a single key, preserves other keys
+ * - deleteAttr: removes a key, preserves remaining
+ * - deleteAttr: clears registry when last key removed
+ * - deleteAttr: no-op if key doesn't exist
+ * - attrsCount: 0 when no attrs; n when n keys set
+ * - attrsKeys: [] when no attrs; list of keys otherwise
+ * - mergeAttrs: merges attrs from multiple sources
+ * - mergeAttrs: later sources win on key conflicts
+ * - Independence: separate objects have independent attrs
+ * - Works with Series and DataFrame objects
+ * - Property: setAttrs/getAttrs round-trip
+ * - Property: updateAttrs is a superset of previous attrs
+ * - Property: copyAttrs makes target equal to source
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import {
+  attrsCount,
+  attrsKeys,
+  clearAttrs,
+  copyAttrs,
+  deleteAttr,
+  getAttr,
+  getAttrs,
+  hasAttrs,
+  mergeAttrs,
+  setAttr,
+  setAttrs,
+  updateAttrs,
+  withAttrs,
+} from "../../src/core/attrs.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function freshObj(): object {
+  return {};
+}
+
+function makeSeries(): Series {
+  return new Series({ data: [1, 2, 3], name: "x" });
+}
+
+function makeDF(): DataFrame {
+  return DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+}
+
+// ─── getAttrs ─────────────────────────────────────────────────────────────────
+
+describe("getAttrs", () => {
+  test("returns {} when no attrs set", () => {
+    expect(getAttrs(freshObj())).toEqual({});
+  });
+
+  test("returns the stored attrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { x: 1, y: "hello" });
+    expect(getAttrs(obj)).toEqual({ x: 1, y: "hello" });
+  });
+
+  test("returns a shallow copy — caller mutations don't leak into registry", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1 });
+    const copy = getAttrs(obj);
+    copy["a"] = 999;
+    // original should be unchanged
+    expect(getAttrs(obj)).toEqual({ a: 1 });
+  });
+});
+
+// ─── setAttrs ─────────────────────────────────────────────────────────────────
+
+describe("setAttrs", () => {
+  test("sets attrs from scratch", () => {
+    const obj = freshObj();
+    setAttrs(obj, { source: "lab", version: 3 });
+    expect(getAttrs(obj)).toEqual({ source: "lab", version: 3 });
+  });
+
+  test("overwrites previous attrs completely", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2 });
+    setAttrs(obj, { c: 3 });
+    expect(getAttrs(obj)).toEqual({ c: 3 });
+  });
+
+  test("shallow-copies the input — later mutation doesn't change stored attrs", () => {
+    const obj = freshObj();
+    const input: Record<string, unknown> = { x: 10 };
+    setAttrs(obj, input);
+    input["x"] = 999;
+    expect(getAttrs(obj)).toEqual({ x: 10 });
+  });
+});
+
+// ─── updateAttrs ──────────────────────────────────────────────────────────────
+
+describe("updateAttrs", () => {
+  test("adds new keys without removing existing ones", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1 });
+    updateAttrs(obj, { b: 2 });
+    expect(getAttrs(obj)).toEqual({ a: 1, b: 2 });
+  });
+
+  test("overwrites existing keys on conflict", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2 });
+    updateAttrs(obj, { a: 99 });
+    expect(getAttrs(obj)).toEqual({ a: 99, b: 2 });
+  });
+
+  test("works when no prior attrs exist", () => {
+    const obj = freshObj();
+    updateAttrs(obj, { fresh: true });
+    expect(getAttrs(obj)).toEqual({ fresh: true });
+  });
+
+  test("merges multiple key types: number, string, boolean, null", () => {
+    const obj = freshObj();
+    setAttrs(obj, { n: 1 });
+    updateAttrs(obj, { s: "hi", flag: false, nothing: null });
+    expect(getAttrs(obj)).toEqual({ n: 1, s: "hi", flag: false, nothing: null });
+  });
+});
+
+// ─── copyAttrs ────────────────────────────────────────────────────────────────
+
+describe("copyAttrs", () => {
+  test("copies source attrs to target", () => {
+    const src = freshObj();
+    const tgt = freshObj();
+    setAttrs(src, { unit: "kg", version: 1 });
+    copyAttrs(src, tgt);
+    expect(getAttrs(tgt)).toEqual({ unit: "kg", version: 1 });
+  });
+
+  test("overwrites target's existing attrs", () => {
+    const src = freshObj();
+    const tgt = freshObj();
+    setAttrs(src, { new: true });
+    setAttrs(tgt, { old: true });
+    copyAttrs(src, tgt);
+    expect(getAttrs(tgt)).toEqual({ new: true });
+  });
+
+  test("when source has no attrs, clears target attrs", () => {
+    const src = freshObj();
+    const tgt = freshObj();
+    setAttrs(tgt, { old: 1 });
+    copyAttrs(src, tgt);
+    expect(getAttrs(tgt)).toEqual({});
+    expect(hasAttrs(tgt)).toBe(false);
+  });
+
+  test("copy is shallow — subsequent changes to source don't affect target", () => {
+    const src = freshObj();
+    const tgt = freshObj();
+    setAttrs(src, { x: 1 });
+    copyAttrs(src, tgt);
+    updateAttrs(src, { x: 999 });
+    expect(getAttrs(tgt)).toEqual({ x: 1 });
+  });
+});
+
+// ─── withAttrs ────────────────────────────────────────────────────────────────
+
+describe("withAttrs", () => {
+  test("returns the same object reference", () => {
+    const obj = freshObj();
+    const result = withAttrs(obj, { k: 1 });
+    expect(result).toBe(obj);
+  });
+
+  test("sets attrs on the object", () => {
+    const obj = freshObj();
+    withAttrs(obj, { source: "sensor_A" });
+    expect(getAttrs(obj)).toEqual({ source: "sensor_A" });
+  });
+
+  test("replaces any previous attrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { old: true });
+    withAttrs(obj, { new: true });
+    expect(getAttrs(obj)).toEqual({ new: true });
+  });
+
+  test("type is preserved — works with Series", () => {
+    const s = makeSeries();
+    const result = withAttrs(s, { unit: "m" });
+    // same object, type preserved
+    expect(result).toBe(s);
+    expect(result instanceof Series).toBe(true);
+    expect(getAttrs(result)).toEqual({ unit: "m" });
+  });
+});
+
+// ─── clearAttrs ───────────────────────────────────────────────────────────────
+
+describe("clearAttrs", () => {
+  test("removes all attrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2 });
+    clearAttrs(obj);
+    expect(getAttrs(obj)).toEqual({});
+    expect(hasAttrs(obj)).toBe(false);
+  });
+
+  test("no-op when no attrs exist", () => {
+    const obj = freshObj();
+    expect(() => clearAttrs(obj)).not.toThrow();
+    expect(getAttrs(obj)).toEqual({});
+  });
+});
+
+// ─── hasAttrs ─────────────────────────────────────────────────────────────────
+
+describe("hasAttrs", () => {
+  test("returns false when no attrs set", () => {
+    expect(hasAttrs(freshObj())).toBe(false);
+  });
+
+  test("returns true after setAttrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { x: 1 });
+    expect(hasAttrs(obj)).toBe(true);
+  });
+
+  test("returns false after clearAttrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { x: 1 });
+    clearAttrs(obj);
+    expect(hasAttrs(obj)).toBe(false);
+  });
+});
+
+// ─── getAttr / setAttr ────────────────────────────────────────────────────────
+
+describe("getAttr", () => {
+  test("returns undefined for missing key", () => {
+    expect(getAttr(freshObj(), "missing")).toBeUndefined();
+  });
+
+  test("returns undefined for key missing after setAttrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1 });
+    expect(getAttr(obj, "b")).toBeUndefined();
+  });
+
+  test("returns the correct value", () => {
+    const obj = freshObj();
+    setAttrs(obj, { unit: "kg", scale: 2 });
+    expect(getAttr(obj, "unit")).toBe("kg");
+    expect(getAttr(obj, "scale")).toBe(2);
+  });
+});
+
+describe("setAttr", () => {
+  test("sets a single key from scratch", () => {
+    const obj = freshObj();
+    setAttr(obj, "unit", "kg");
+    expect(getAttrs(obj)).toEqual({ unit: "kg" });
+  });
+
+  test("adds a key without removing existing ones", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1 });
+    setAttr(obj, "b", 2);
+    expect(getAttrs(obj)).toEqual({ a: 1, b: 2 });
+  });
+
+  test("overwrites a single key", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2 });
+    setAttr(obj, "a", 99);
+    expect(getAttrs(obj)).toEqual({ a: 99, b: 2 });
+  });
+});
+
+// ─── deleteAttr ───────────────────────────────────────────────────────────────
+
+describe("deleteAttr", () => {
+  test("removes the specified key", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2 });
+    deleteAttr(obj, "a");
+    expect(getAttrs(obj)).toEqual({ b: 2 });
+  });
+
+  test("clears registry entry when last key is removed", () => {
+    const obj = freshObj();
+    setAttrs(obj, { only: true });
+    deleteAttr(obj, "only");
+    expect(hasAttrs(obj)).toBe(false);
+    expect(getAttrs(obj)).toEqual({});
+  });
+
+  test("no-op if key doesn't exist", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1 });
+    deleteAttr(obj, "missing");
+    expect(getAttrs(obj)).toEqual({ a: 1 });
+  });
+
+  test("no-op if no attrs at all", () => {
+    const obj = freshObj();
+    expect(() => deleteAttr(obj, "x")).not.toThrow();
+    expect(getAttrs(obj)).toEqual({});
+  });
+});
+
+// ─── attrsCount / attrsKeys ───────────────────────────────────────────────────
+
+describe("attrsCount", () => {
+  test("returns 0 when no attrs", () => {
+    expect(attrsCount(freshObj())).toBe(0);
+  });
+
+  test("returns correct count after setAttrs", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2, c: 3 });
+    expect(attrsCount(obj)).toBe(3);
+  });
+
+  test("updates after deleteAttr", () => {
+    const obj = freshObj();
+    setAttrs(obj, { a: 1, b: 2 });
+    deleteAttr(obj, "a");
+    expect(attrsCount(obj)).toBe(1);
+  });
+});
+
+describe("attrsKeys", () => {
+  test("returns [] when no attrs", () => {
+    expect(attrsKeys(freshObj())).toEqual([]);
+  });
+
+  test("returns key list", () => {
+    const obj = freshObj();
+    setAttrs(obj, { x: 1, y: 2 });
+    const keys = attrsKeys(obj);
+    expect(keys.sort()).toEqual(["x", "y"]);
+  });
+});
+
+// ─── mergeAttrs ───────────────────────────────────────────────────────────────
+
+describe("mergeAttrs", () => {
+  test("merges attrs from multiple sources", () => {
+    const s1 = freshObj();
+    const s2 = freshObj();
+    const tgt = freshObj();
+    setAttrs(s1, { a: 1, b: 2 });
+    setAttrs(s2, { c: 3 });
+    mergeAttrs([s1, s2], tgt);
+    expect(getAttrs(tgt)).toEqual({ a: 1, b: 2, c: 3 });
+  });
+
+  test("later sources win on key conflicts", () => {
+    const s1 = freshObj();
+    const s2 = freshObj();
+    const tgt = freshObj();
+    setAttrs(s1, { source: "A", unit: "kg" });
+    setAttrs(s2, { source: "B", scale: 2 });
+    mergeAttrs([s1, s2], tgt);
+    expect(getAttrs(tgt)).toEqual({ source: "B", unit: "kg", scale: 2 });
+  });
+
+  test("skips sources with no attrs", () => {
+    const s1 = freshObj();
+    const s2 = freshObj();
+    const tgt = freshObj();
+    setAttrs(s1, { a: 1 });
+    // s2 has no attrs
+    mergeAttrs([s1, s2], tgt);
+    expect(getAttrs(tgt)).toEqual({ a: 1 });
+  });
+
+  test("merging from empty sources leaves target without attrs", () => {
+    const s1 = freshObj();
+    const tgt = freshObj();
+    setAttrs(tgt, { old: 1 });
+    mergeAttrs([s1], tgt);
+    // no sources had attrs — target should have empty attrs
+    // (mergeAttrs with empty merged dict does not write to registry)
+    // but previous attrs on target are NOT cleared (only set if there's content)
+    // this is intentional — mergeAttrs is additive, not destructive
+    expect(getAttrs(tgt)).toEqual({ old: 1 });
+  });
+});
+
+// ─── independence ─────────────────────────────────────────────────────────────
+
+describe("independence", () => {
+  test("separate objects have independent attrs", () => {
+    const obj1 = freshObj();
+    const obj2 = freshObj();
+    setAttrs(obj1, { id: "A" });
+    setAttrs(obj2, { id: "B" });
+    expect(getAttrs(obj1)).toEqual({ id: "A" });
+    expect(getAttrs(obj2)).toEqual({ id: "B" });
+  });
+
+  test("clearing one object does not affect another", () => {
+    const obj1 = freshObj();
+    const obj2 = freshObj();
+    setAttrs(obj1, { x: 1 });
+    setAttrs(obj2, { y: 2 });
+    clearAttrs(obj1);
+    expect(hasAttrs(obj1)).toBe(false);
+    expect(getAttrs(obj2)).toEqual({ y: 2 });
+  });
+});
+
+// ─── integration: Series and DataFrame ───────────────────────────────────────
+
+describe("integration with Series and DataFrame", () => {
+  test("can attach attrs to a Series", () => {
+    const s = makeSeries();
+    setAttrs(s, { unit: "metres", source: "GPS" });
+    expect(getAttrs(s)).toEqual({ unit: "metres", source: "GPS" });
+    expect(hasAttrs(s)).toBe(true);
+  });
+
+  test("can attach attrs to a DataFrame", () => {
+    const df = makeDF();
+    setAttrs(df, { description: "sensor readings", rows: 3 });
+    expect(getAttr(df, "description")).toBe("sensor readings");
+    expect(attrsCount(df)).toBe(2);
+  });
+
+  test("withAttrs fluent helper on DataFrame", () => {
+    const df = makeDF();
+    const result = withAttrs(df, { version: 5 });
+    expect(result).toBe(df);
+    expect(getAttrs(result)).toEqual({ version: 5 });
+  });
+
+  test("copyAttrs from Series to DataFrame", () => {
+    const s = makeSeries();
+    const df = makeDF();
+    setAttrs(s, { lineage: "processed" });
+    copyAttrs(s, df);
+    expect(getAttrs(df)).toEqual({ lineage: "processed" });
+  });
+
+  test("mergeAttrs from two Series into a DataFrame", () => {
+    const s1 = makeSeries();
+    const s2 = makeSeries();
+    const df = makeDF();
+    setAttrs(s1, { source: "A", unit: "kg" });
+    setAttrs(s2, { source: "B", version: 1 });
+    mergeAttrs([s1, s2], df);
+    expect(getAttrs(df)).toEqual({ source: "B", unit: "kg", version: 1 });
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property: setAttrs/getAttrs round-trip", () => {
+  test("any record can be stored and retrieved intact", () => {
+    fc.assert(
+      fc.property(
+        fc.dictionary(fc.string({ minLength: 1, maxLength: 10 }), fc.oneof(fc.integer(), fc.string(), fc.boolean())),
+        (attrs) => {
+          const obj = freshObj();
+          setAttrs(obj, attrs);
+          const result = getAttrs(obj);
+          expect(result).toEqual(attrs);
+        },
+      ),
+    );
+  });
+});
+
+describe("property: updateAttrs is a superset of previous", () => {
+  test("all keys from the original are still present after update", () => {
+    fc.assert(
+      fc.property(
+        fc.dictionary(fc.string({ minLength: 1 }), fc.integer()),
+        fc.dictionary(fc.string({ minLength: 1 }), fc.integer()),
+        (original, updates) => {
+          const obj = freshObj();
+          setAttrs(obj, original);
+          updateAttrs(obj, updates);
+          const result = getAttrs(obj);
+          // every key from original that is NOT in updates must still be present
+          for (const [k, v] of Object.entries(original)) {
+            if (!(k in updates)) {
+              expect(result[k]).toBe(v);
+            }
+          }
+          // every key from updates must be present with the update value
+          for (const [k, v] of Object.entries(updates)) {
+            expect(result[k]).toBe(v);
+          }
+        },
+      ),
+    );
+  });
+});
+
+describe("property: copyAttrs makes target equal to source", () => {
+  test("after copyAttrs, getAttrs(target) deep-equals getAttrs(source)", () => {
+    fc.assert(
+      fc.property(
+        fc.dictionary(fc.string({ minLength: 1 }), fc.oneof(fc.integer(), fc.string())),
+        (sourceAttrs) => {
+          const src = freshObj();
+          const tgt = freshObj();
+          setAttrs(src, sourceAttrs);
+          copyAttrs(src, tgt);
+          expect(getAttrs(tgt)).toEqual(getAttrs(src));
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/core/insert_pop.test.ts b/tests/core/insert_pop.test.ts
new file mode 100644
index 00000000..daab7705
--- /dev/null
+++ b/tests/core/insert_pop.test.ts
@@ -0,0 +1,286 @@
+/**
+ * Tests for src/core/insert_pop.ts — insertColumn(), popColumn(), reorderColumns(), moveColumn().
+ *
+ * Covers:
+ * - insertColumn: basic insertion at various positions
+ * - insertColumn: insertion at start (loc=0) and end (loc=nCols)
+ * - insertColumn: insertion with a Series value
+ * - insertColumn: error on duplicate column name (allowDuplicates=false)
+ * - insertColumn: allowDuplicates=true bypasses duplicate check
+ * - insertColumn: error on out-of-range loc
+ * - insertColumn: error on wrong-length values
+ * - popColumn: removes column and returns Series + new DataFrame
+ * - popColumn: error on missing column
+ * - reorderColumns: reorders to specified order
+ * - reorderColumns: error on missing column in order
+ * - moveColumn: moves column to new position
+ * - Property-based: insertColumn then popColumn round-trips shape
+ * - Property-based: column order after insert is correct
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import { insertColumn, moveColumn, popColumn, reorderColumns } from "../../src/core/insert_pop.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeDF(): DataFrame {
+  return DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9] });
+}
+
+// ─── insertColumn ─────────────────────────────────────────────────────────────
+
+describe("insertColumn", () => {
+  test("inserts at position 0 (start)", () => {
+    const df = makeDF();
+    const df2 = insertColumn(df, 0, "x", [10, 20, 30]);
+    expect(df2.columns.values).toEqual(["x", "a", "b", "c"]);
+    expect(df2.col("x").values).toEqual([10, 20, 30]);
+  });
+
+  test("inserts at position 1 (middle)", () => {
+    const df = makeDF();
+    const df2 = insertColumn(df, 1, "x", [10, 20, 30]);
+    expect(df2.columns.values).toEqual(["a", "x", "b", "c"]);
+    expect(df2.col("x").values).toEqual([10, 20, 30]);
+  });
+
+  test("inserts at end (loc = nCols)", () => {
+    const df = makeDF();
+    const df2 = insertColumn(df, 3, "x", [10, 20, 30]);
+    expect(df2.columns.values).toEqual(["a", "b", "c", "x"]);
+  });
+
+  test("inserts using a Series value", () => {
+    const df = makeDF();
+    const s = new Series({ data: [100, 200, 300], name: "s" });
+    const df2 = insertColumn(df, 2, "s", s);
+    expect(df2.columns.values).toEqual(["a", "b", "s", "c"]);
+    expect(df2.col("s").values).toEqual([100, 200, 300]);
+  });
+
+  test("preserves original DataFrame (immutable)", () => {
+    const df = makeDF();
+    insertColumn(df, 1, "x", [10, 20, 30]);
+    expect(df.columns.values).toEqual(["a", "b", "c"]);
+  });
+
+  test("preserves row index", () => {
+    const df = makeDF();
+    const df2 = insertColumn(df, 0, "z", [0, 0, 0]);
+    expect(df2.shape[0]).toBe(3);
+    expect(df2.index.values).toEqual(df.index.values);
+  });
+
+  test("throws on duplicate column (allowDuplicates=false)", () => {
+    const df = makeDF();
+    expect(() => insertColumn(df, 1, "a", [1, 2, 3])).toThrow(RangeError);
+  });
+
+  test("allows duplicate column when allowDuplicates=true", () => {
+    const df = makeDF();
+    const df2 = insertColumn(df, 1, "a", [99, 99, 99], true);
+    // The first "a" is at index 0, second at index 1
+    expect(df2.shape[1]).toBe(4);
+  });
+
+  test("throws on loc < 0", () => {
+    const df = makeDF();
+    expect(() => insertColumn(df, -1, "x", [1, 2, 3])).toThrow(RangeError);
+  });
+
+  test("throws on loc > nCols", () => {
+    const df = makeDF();
+    expect(() => insertColumn(df, 10, "x", [1, 2, 3])).toThrow(RangeError);
+  });
+
+  test("throws on wrong-length values array", () => {
+    const df = makeDF();
+    expect(() => insertColumn(df, 1, "x", [1, 2])).toThrow(RangeError);
+  });
+
+  test("inserts into empty DataFrame (0 rows) at pos 0", () => {
+    const df = DataFrame.fromColumns({});
+    const df2 = insertColumn(df, 0, "a", []);
+    expect(df2.columns.values).toEqual(["a"]);
+    expect(df2.shape[0]).toBe(0);
+  });
+
+  test("shape[1] increases by 1", () => {
+    const df = makeDF();
+    const df2 = insertColumn(df, 2, "new", [1, 2, 3]);
+    expect(df2.shape[1]).toBe(df.shape[1] + 1);
+  });
+});
+
+// ─── popColumn ────────────────────────────────────────────────────────────────
+
+describe("popColumn", () => {
+  test("removes column and returns it as Series", () => {
+    const df = makeDF();
+    const { series, df: df2 } = popColumn(df, "b");
+    expect(series.values).toEqual([4, 5, 6]);
+    expect(df2.columns.values).toEqual(["a", "c"]);
+  });
+
+  test("popping first column", () => {
+    const df = makeDF();
+    const { series, df: df2 } = popColumn(df, "a");
+    expect(series.values).toEqual([1, 2, 3]);
+    expect(df2.columns.values).toEqual(["b", "c"]);
+  });
+
+  test("popping last column", () => {
+    const df = makeDF();
+    const { series, df: df2 } = popColumn(df, "c");
+    expect(series.values).toEqual([7, 8, 9]);
+    expect(df2.columns.values).toEqual(["a", "b"]);
+  });
+
+  test("preserves original DataFrame (immutable)", () => {
+    const df = makeDF();
+    popColumn(df, "b");
+    expect(df.columns.values).toEqual(["a", "b", "c"]);
+  });
+
+  test("shape[1] decreases by 1", () => {
+    const df = makeDF();
+    const { df: df2 } = popColumn(df, "a");
+    expect(df2.shape[1]).toBe(df.shape[1] - 1);
+  });
+
+  test("throws on missing column", () => {
+    const df = makeDF();
+    expect(() => popColumn(df, "z")).toThrow(RangeError);
+  });
+
+  test("popping all columns leaves empty-column DataFrame", () => {
+    const df = DataFrame.fromColumns({ x: [1, 2] });
+    const { df: df2 } = popColumn(df, "x");
+    expect(df2.shape[1]).toBe(0);
+    expect(df2.shape[0]).toBe(2);
+  });
+});
+
+// ─── reorderColumns ──────────────────────────────────────────────────────────
+
+describe("reorderColumns", () => {
+  test("reorders columns to new order", () => {
+    const df = makeDF();
+    const df2 = reorderColumns(df, ["c", "a", "b"]);
+    expect(df2.columns.values).toEqual(["c", "a", "b"]);
+  });
+
+  test("values are preserved after reorder", () => {
+    const df = makeDF();
+    const df2 = reorderColumns(df, ["c", "b", "a"]);
+    expect(df2.col("a").values).toEqual([1, 2, 3]);
+    expect(df2.col("b").values).toEqual([4, 5, 6]);
+    expect(df2.col("c").values).toEqual([7, 8, 9]);
+  });
+
+  test("can select subset of columns (acts like df[subset])", () => {
+    const df = makeDF();
+    const df2 = reorderColumns(df, ["a", "c"]);
+    expect(df2.columns.values).toEqual(["a", "c"]);
+    expect(df2.shape[1]).toBe(2);
+  });
+
+  test("throws on column not in DataFrame", () => {
+    const df = makeDF();
+    expect(() => reorderColumns(df, ["a", "z"])).toThrow(RangeError);
+  });
+});
+
+// ─── moveColumn ──────────────────────────────────────────────────────────────
+
+describe("moveColumn", () => {
+  test("moves last column to position 0", () => {
+    const df = makeDF();
+    const df2 = moveColumn(df, "c", 0);
+    expect(df2.columns.values).toEqual(["c", "a", "b"]);
+  });
+
+  test("moves first column to end", () => {
+    const df = makeDF();
+    const df2 = moveColumn(df, "a", 2);
+    expect(df2.columns.values).toEqual(["b", "c", "a"]);
+  });
+
+  test("values are preserved", () => {
+    const df = makeDF();
+    const df2 = moveColumn(df, "b", 0);
+    expect(df2.col("b").values).toEqual([4, 5, 6]);
+    expect(df2.col("a").values).toEqual([1, 2, 3]);
+  });
+
+  test("shape is unchanged", () => {
+    const df = makeDF();
+    const df2 = moveColumn(df, "b", 2);
+    expect(df2.shape).toEqual(df.shape);
+  });
+});
+
+// ─── property-based tests ────────────────────────────────────────────────────
+
+describe("insertColumn + popColumn property tests", () => {
+  test("insert then pop round-trips shape", () => {
+    fc.assert(
+      fc.property(
+        fc.integer({ min: 1, max: 5 }),
+        fc.integer({ min: 1, max: 5 }),
+        (nCols, nRows) => {
+          // Build a DataFrame with nCols columns
+          const colData: Record<string, number[]> = {};
+          for (let i = 0; i < nCols; i++) {
+            colData[`col${i}`] = Array.from({ length: nRows }, (_, j) => i * nRows + j);
+          }
+          const df = DataFrame.fromColumns(colData);
+
+          const loc = Math.floor(nCols / 2);
+          const values = Array.from({ length: nRows }, () => 99);
+          const df2 = insertColumn(df, loc, "inserted", values);
+
+          // pop the inserted column back out
+          const { df: df3 } = popColumn(df2, "inserted");
+          expect(df3.shape).toEqual(df.shape);
+          expect(df3.columns.values).toEqual(df.columns.values);
+        },
+      ),
+    );
+  });
+
+  test("insertColumn: new column appears at correct position", () => {
+    fc.assert(
+      fc.property(
+        fc.integer({ min: 0, max: 4 }),
+        fc.integer({ min: 1, max: 4 }),
+        (insertLoc, nCols) => {
+          const loc = Math.min(insertLoc, nCols);
+          const colData: Record<string, number[]> = {};
+          for (let i = 0; i < nCols; i++) {
+            colData[`c${i}`] = [i, i + 1, i + 2];
+          }
+          const df = DataFrame.fromColumns(colData);
+          const df2 = insertColumn(df, loc, "NEW", [0, 0, 0]);
+          expect(df2.columns.values[loc]).toBe("NEW");
+          expect(df2.shape[1]).toBe(nCols + 1);
+        },
+      ),
+    );
+  });
+
+  test("popColumn: returned series values match original column", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 10 }),
+        (vals) => {
+          const df = DataFrame.fromColumns({ x: vals, y: vals.map((v) => v * 2) });
+          const { series } = popColumn(df, "x");
+          expect(series.values).toEqual(vals);
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/core/pipe_apply.test.ts b/tests/core/pipe_apply.test.ts
new file mode 100644
index 00000000..9ebee428
--- /dev/null
+++ b/tests/core/pipe_apply.test.ts
@@ -0,0 +1,449 @@
+/**
+ * Tests for src/core/pipe_apply.ts
+ *
+ * Covers:
+ * - pipe: identity (no fns) returns value unchanged
+ * - pipe: single function transforms value
+ * - pipe: two functions applied left-to-right
+ * - pipe: three functions applied left-to-right
+ * - pipe: four functions applied left-to-right
+ * - pipe: works with number, string, boolean, object, Series, DataFrame
+ * - pipe: fn receives output of prior fn (composition)
+ * - seriesApply: maps fn over every element
+ * - seriesApply: fn receives (value, label, position)
+ * - seriesApply: preserves index labels
+ * - seriesApply: preserves series name
+ * - seriesApply: handles null values (passed through)
+ * - seriesApply: empty series returns empty series
+ * - seriesTransform: maps scalar fn over every element
+ * - seriesTransform: fn only receives value (no label/pos)
+ * - seriesTransform: preserves name and index
+ * - seriesTransform: empty series returns empty series
+ * - dataFrameApply axis=0: applies fn to each column, indexed by column names
+ * - dataFrameApply axis=0: fn receives column Series and column name
+ * - dataFrameApply axis=1: applies fn to each row, indexed by row labels
+ * - dataFrameApply axis=1: row Series has column names as index
+ * - dataFrameApply: default axis is 0
+ * - dataFrameApply: empty DataFrame (no rows) returns empty result
+ * - dataFrameApplyMap: applies fn to every cell
+ * - dataFrameApplyMap: fn receives (value, rowLabel, colName)
+ * - dataFrameApplyMap: output has same shape
+ * - dataFrameApplyMap: output has same index and columns
+ * - dataFrameApplyMap: does not mutate input
+ * - dataFrameTransform: replaces each column with fn(col)
+ * - dataFrameTransform: fn receives (col, colName)
+ * - dataFrameTransform: output has same index and columns
+ * - dataFrameTransform: throws RangeError when fn returns wrong length
+ * - dataFrameTransformRows: applies fn to each row record
+ * - dataFrameTransformRows: fn receives (row, rowLabel, position)
+ * - dataFrameTransformRows: partial row updates merge correctly
+ * - dataFrameTransformRows: output has same shape
+ * - Property: pipe(v, f, g) === g(f(v))
+ * - Property: seriesApply with identity fn produces identical values
+ * - Property: seriesTransform with identity fn produces identical values
+ * - Property: dataFrameApplyMap with identity fn produces identical df
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series } from "../../src/core/index.ts";
+import {
+  dataFrameApply,
+  dataFrameApplyMap,
+  dataFrameTransform,
+  dataFrameTransformRows,
+  pipe,
+  seriesApply,
+  seriesTransform,
+} from "../../src/core/pipe_apply.ts";
+import type { Label, Scalar } from "../../src/types.ts";
+
+// ─── helpers ────────────────────────────────────────────────────────────────
+
+function makeSeries(data: Scalar[], labels?: Label[], name?: string): Series<Scalar> {
+  return new Series({ data, ...(labels ? { index: labels } : {}), ...(name ? { name } : {}) });
+}
+
+function makeDF(obj: Record<string, Scalar[]>, rowLabels?: Label[]): DataFrame {
+  return DataFrame.fromColumns(obj, rowLabels ? { index: rowLabels } : undefined);
+}
+
+// ─── pipe ───────────────────────────────────────────────────────────────────
+
+describe("pipe", () => {
+  test("identity — no fns returns value unchanged", () => {
+    expect(pipe(42)).toBe(42);
+    expect(pipe("hello")).toBe("hello");
+    expect(pipe(null)).toBe(null);
+  });
+
+  test("single fn transforms value", () => {
+    expect(pipe(3, (x: number) => x * 2)).toBe(6);
+    expect(pipe("hi", (s: string) => s.toUpperCase())).toBe("HI");
+  });
+
+  test("two fns applied left-to-right", () => {
+    const result = pipe(
+      1,
+      (x: number) => x + 10,
+      (x: number) => x * 3,
+    );
+    expect(result).toBe(33); // (1+10)*3
+  });
+
+  test("three fns applied left-to-right", () => {
+    const result = pipe(
+      2,
+      (x: number) => x + 1,
+      (x: number) => x * x,
+      (x: number) => x - 1,
+    );
+    expect(result).toBe(8); // (2+1)^2 - 1
+  });
+
+  test("four fns applied left-to-right", () => {
+    const result = pipe(
+      0,
+      (x: number) => x + 1,
+      (x: number) => x * 2,
+      (x: number) => x + 3,
+      (x: number) => x.toString(),
+    );
+    expect(result).toBe("5"); // (0+1)*2+3 = 5
+  });
+
+  test("works with Series", () => {
+    const s = makeSeries([1, 2, 3]);
+    const result = pipe(
+      s,
+      (s2: Series<Scalar>) => s2.sum(),
+    );
+    expect(result).toBe(6);
+  });
+
+  test("works with DataFrame", () => {
+    const df = makeDF({ a: [1, 2], b: [3, 4] });
+    const result = pipe(
+      df,
+      (d: DataFrame) => d.sum(),
+    );
+    expect(result.at("a")).toBe(3);
+    expect(result.at("b")).toBe(7);
+  });
+
+  test("fn receives output of prior fn (composition)", () => {
+    const calls: string[] = [];
+    pipe(
+      10,
+      (x: number) => { calls.push(`fn1:${x}`); return x + 1; },
+      (x: number) => { calls.push(`fn2:${x}`); return x * 2; },
+    );
+    expect(calls).toEqual(["fn1:10", "fn2:11"]);
+  });
+});
+
+// ─── seriesApply ─────────────────────────────────────────────────────────────
+
+describe("seriesApply", () => {
+  test("maps fn over every element", () => {
+    const s = makeSeries([1, 2, 3]);
+    const out = seriesApply(s, (v) => (v as number) * 2);
+    expect(out.values).toEqual([2, 4, 6]);
+  });
+
+  test("fn receives (value, label, position)", () => {
+    const s = makeSeries([10, 20, 30], ["a", "b", "c"]);
+    const labels: Label[] = [];
+    const positions: number[] = [];
+    const vals: Scalar[] = [];
+    seriesApply(s, (v, lbl, pos) => {
+      vals.push(v);
+      labels.push(lbl);
+      positions.push(pos);
+      return v;
+    });
+    expect(vals).toEqual([10, 20, 30]);
+    expect(labels).toEqual(["a", "b", "c"]);
+    expect(positions).toEqual([0, 1, 2]);
+  });
+
+  test("preserves index labels", () => {
+    const s = makeSeries([1, 2], ["x", "y"]);
+    const out = seriesApply(s, (v) => v);
+    expect(out.index.values).toEqual(["x", "y"]);
+  });
+
+  test("preserves series name", () => {
+    const s = makeSeries([1, 2], undefined, "myCol");
+    const out = seriesApply(s, (v) => v);
+    expect(out.name).toBe("myCol");
+  });
+
+  test("null values are passed to fn", () => {
+    const s = makeSeries([1, null, 3]);
+    const out = seriesApply(s, (v) => v === null ? 0 : (v as number) + 1);
+    expect(out.values).toEqual([2, 0, 4]);
+  });
+
+  test("empty series returns empty series", () => {
+    const s = makeSeries([]);
+    const out = seriesApply(s, (v) => v);
+    expect(out.size).toBe(0);
+  });
+});
+
+// ─── seriesTransform ─────────────────────────────────────────────────────────
+
+describe("seriesTransform", () => {
+  test("maps scalar fn over every element", () => {
+    const s = makeSeries([1, 2, 3]);
+    const out = seriesTransform(s, (v) => (v as number) ** 2);
+    expect(out.values).toEqual([1, 4, 9]);
+  });
+
+  test("preserves index and name", () => {
+    const s = makeSeries([5, 6], ["p", "q"], "z");
+    const out = seriesTransform(s, (v) => v);
+    expect(out.index.values).toEqual(["p", "q"]);
+    expect(out.name).toBe("z");
+  });
+
+  test("empty series returns empty series", () => {
+    const s = makeSeries([]);
+    const out = seriesTransform(s, (v) => v);
+    expect(out.size).toBe(0);
+  });
+
+  test("fn only receives value (no label/pos)", () => {
+    const callCount = { n: 0 };
+    const s = makeSeries([1, 2]);
+    seriesTransform(s, (v) => { callCount.n++; return v; });
+    expect(callCount.n).toBe(2);
+  });
+});
+
+// ─── dataFrameApply ──────────────────────────────────────────────────────────
+
+describe("dataFrameApply", () => {
+  const df = makeDF({ a: [1, 2, 3], b: [10, 20, 30] });
+
+  test("axis=0: fn applied to each column, indexed by column names", () => {
+    const out = dataFrameApply(df, (s) => s.sum());
+    expect(out.at("a")).toBe(6);
+    expect(out.at("b")).toBe(60);
+    expect(out.index.values).toEqual(["a", "b"]);
+  });
+
+  test("axis=0: fn receives column Series and column name", () => {
+    const received: string[] = [];
+    dataFrameApply(df, (_s, name) => { received.push(name as string); return 0; });
+    expect(received).toEqual(["a", "b"]);
+  });
+
+  test("default axis is 0", () => {
+    const out1 = dataFrameApply(df, (s) => s.mean());
+    const out2 = dataFrameApply(df, (s) => s.mean(), 0);
+    expect(out1.values).toEqual(out2.values);
+  });
+
+  test("axis=1: fn applied to each row, indexed by row labels", () => {
+    const out = dataFrameApply(df, (s) => s.sum(), 1);
+    expect(out.values).toEqual([11, 22, 33]);
+    expect(out.index.values).toEqual([0, 1, 2]);
+  });
+
+  test("axis=1: row Series has column names as index", () => {
+    const colNames: string[][] = [];
+    dataFrameApply(df, (s) => { colNames.push([...s.index.values] as string[]); return 0; }, 1);
+    expect(colNames[0]).toEqual(["a", "b"]);
+    expect(colNames[1]).toEqual(["a", "b"]);
+  });
+
+  test("axis=1: fn receives row label as second arg", () => {
+    const dfLabeled = makeDF({ x: [1, 2] }, ["row0", "row1"]);
+    const labels: Label[] = [];
+    dataFrameApply(dfLabeled, (_s, lbl) => { labels.push(lbl); return 0; }, 1);
+    expect(labels).toEqual(["row0", "row1"]);
+  });
+
+  test("empty DataFrame (no rows) returns empty result", () => {
+    const dfEmpty = makeDF({ a: [], b: [] });
+    const out = dataFrameApply(dfEmpty, (s) => s.sum(), 1);
+    expect(out.size).toBe(0);
+  });
+});
+
+// ─── dataFrameApplyMap ───────────────────────────────────────────────────────
+
+describe("dataFrameApplyMap", () => {
+  const df = makeDF({ x: [1, 2], y: [3, 4] }, ["r0", "r1"]);
+
+  test("applies fn to every cell", () => {
+    const out = dataFrameApplyMap(df, (v) => (v as number) * 10);
+    expect(out.col("x").values).toEqual([10, 20]);
+    expect(out.col("y").values).toEqual([30, 40]);
+  });
+
+  test("fn receives (value, rowLabel, colName)", () => {
+    const calls: Array<[Scalar, Label, string]> = [];
+    dataFrameApplyMap(df, (v, row, col) => { calls.push([v, row, col]); return v; });
+    expect(calls).toEqual([
+      [1, "r0", "x"], [2, "r1", "x"],
+      [3, "r0", "y"], [4, "r1", "y"],
+    ]);
+  });
+
+  test("output has same shape", () => {
+    const out = dataFrameApplyMap(df, (v) => v);
+    expect(out.shape).toEqual(df.shape);
+  });
+
+  test("output has same index and columns", () => {
+    const out = dataFrameApplyMap(df, (v) => v);
+    expect(out.index.values).toEqual(df.index.values);
+    expect(out.columns.values).toEqual(df.columns.values);
+  });
+
+  test("does not mutate input", () => {
+    const before = df.col("x").values.slice();
+    dataFrameApplyMap(df, () => 999);
+    expect(df.col("x").values).toEqual(before);
+  });
+});
+
+// ─── dataFrameTransform ──────────────────────────────────────────────────────
+
+describe("dataFrameTransform", () => {
+  const df = makeDF({ a: [1, 2, 3], b: [4, 5, 6] });
+
+  test("replaces each column with fn(col)", () => {
+    const out = dataFrameTransform(df, (col) =>
+      seriesTransform(col, (v) => -(v as number)),
+    );
+    expect(out.col("a").values).toEqual([-1, -2, -3]);
+    expect(out.col("b").values).toEqual([-4, -5, -6]);
+  });
+
+  test("fn receives (col, colName)", () => {
+    const names: string[] = [];
+    dataFrameTransform(df, (col, name) => { names.push(name); return col; });
+    expect(names).toEqual(["a", "b"]);
+  });
+
+  test("output has same index and columns", () => {
+    const out = dataFrameTransform(df, (col) => col);
+    expect(out.index.values).toEqual(df.index.values);
+    expect(out.columns.values).toEqual(df.columns.values);
+  });
+
+  test("throws RangeError when fn returns wrong length", () => {
+    expect(() =>
+      dataFrameTransform(df, (_col) => makeSeries([1])),
+    ).toThrow(RangeError);
+  });
+});
+
+// ─── dataFrameTransformRows ──────────────────────────────────────────────────
+
+describe("dataFrameTransformRows", () => {
+  const df = makeDF({ a: [1, 2, 3], b: [10, 20, 30] });
+
+  test("applies fn to each row record", () => {
+    const out = dataFrameTransformRows(df, (row) => ({ a: (row["a"] as number) + 100, b: row["b"] }));
+    expect(out.col("a").values).toEqual([101, 102, 103]);
+    expect(out.col("b").values).toEqual([10, 20, 30]);
+  });
+
+  test("fn receives (row, rowLabel, position)", () => {
+    const labels: Label[] = [];
+    const positions: number[] = [];
+    const dfL = makeDF({ a: [1, 2] }, ["r0", "r1"]);
+    dataFrameTransformRows(dfL, (row, lbl, pos) => {
+      labels.push(lbl);
+      positions.push(pos);
+      return row;
+    });
+    expect(labels).toEqual(["r0", "r1"]);
+    expect(positions).toEqual([0, 1]);
+  });
+
+  test("partial row update merges correctly (unspecified keys keep original)", () => {
+    const out = dataFrameTransformRows(df, (row) => ({ a: 999 }));
+    // 'b' not returned by fn → keeps original
+    expect(out.col("a").values).toEqual([999, 999, 999]);
+    expect(out.col("b").values).toEqual([10, 20, 30]);
+  });
+
+  test("output has same shape", () => {
+    const out = dataFrameTransformRows(df, (row) => row);
+    expect(out.shape).toEqual(df.shape);
+  });
+
+  test("output preserves index", () => {
+    const dfL = makeDF({ a: [1, 2] }, ["p", "q"]);
+    const out = dataFrameTransformRows(dfL, (row) => row);
+    expect(out.index.values).toEqual(["p", "q"]);
+  });
+});
+
+// ─── property-based tests ────────────────────────────────────────────────────
+
+describe("pipe — property tests", () => {
+  test("pipe(v, f, g) === g(f(v)) for number inputs", () => {
+    fc.assert(
+      fc.property(fc.double({ noNaN: true }), (n) => {
+        const f = (x: number) => x * 2;
+        const g = (x: number) => x + 1;
+        expect(pipe(n, f, g)).toBe(g(f(n)));
+      }),
+    );
+  });
+});
+
+describe("seriesApply — property tests", () => {
+  test("identity fn produces identical values", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.oneof(fc.integer(), fc.constant(null)), { minLength: 0, maxLength: 20 }),
+        (data) => {
+          const s = makeSeries(data as Scalar[]);
+          const out = seriesApply(s, (v) => v);
+          expect([...out.values]).toEqual([...s.values]);
+        },
+      ),
+    );
+  });
+});
+
+describe("seriesTransform — property tests", () => {
+  test("identity fn produces identical values", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer(), { minLength: 0, maxLength: 20 }),
+        (data) => {
+          const s = makeSeries(data as Scalar[]);
+          const out = seriesTransform(s, (v) => v);
+          expect([...out.values]).toEqual([...s.values]);
+        },
+      ),
+    );
+  });
+});
+
+describe("dataFrameApplyMap — property tests", () => {
+  test("identity fn produces equal DataFrame values", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer(), { minLength: 1, maxLength: 6 }),
+        fc.array(fc.integer(), { minLength: 1, maxLength: 6 }),
+        (col1, col2) => {
+          const n = Math.min(col1.length, col2.length);
+          const df = makeDF({ a: col1.slice(0, n), b: col2.slice(0, n) });
+          const out = dataFrameApplyMap(df, (v) => v);
+          expect([...out.col("a").values]).toEqual([...df.col("a").values]);
+          expect([...out.col("b").values]).toEqual([...df.col("b").values]);
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/core/to_from_dict.test.ts b/tests/core/to_from_dict.test.ts
new file mode 100644
index 00000000..e842e29e
--- /dev/null
+++ b/tests/core/to_from_dict.test.ts
@@ -0,0 +1,278 @@
+/**
+ * Tests for src/core/to_from_dict.ts
+ *
+ * Covers:
+ * - toDictOriented: orient "dict"/"columns" — nested column→rowLabel→value maps
+ * - toDictOriented: orient "list" — column→value-array
+ * - toDictOriented: orient "series" — column→Series
+ * - toDictOriented: orient "split" — {index, columns, data}
+ * - toDictOriented: orient "tight" — split + index_names/column_names
+ * - toDictOriented: orient "records" — array of row objects
+ * - toDictOriented: orient "index" — rowLabel→col→value
+ * - fromDictOriented: orient "columns" — {col:[values]}
+ * - fromDictOriented: orient "index" — {rowLabel:{col:value}}
+ * - fromDictOriented: orient "split" — {index?,columns,data}
+ * - fromDictOriented: orient "tight" — same as split
+ * - round-trips: split round-trip, records round-trip
+ * - missing values are preserved as null
+ * - custom row index is preserved/reconstructed
+ * - Property: split round-trip preserves shape and column order
+ * - Property: records orientation column count is stable
+ * - Property: index orientation row count is preserved
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Index, Series } from "../../src/index.ts";
+import {
+  fromDictOriented,
+  toDictOriented,
+} from "../../src/core/to_from_dict.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeDF(): DataFrame {
+  return DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+}
+
+function makeIndexedDF(): DataFrame {
+  return DataFrame.fromColumns(
+    { x: [10, 20], y: [30, 40] },
+    { index: new Index(["r0", "r1"]) },
+  );
+}
+
+// ─── toDictOriented ───────────────────────────────────────────────────────────
+
+describe("toDictOriented — dict/columns", () => {
+  test("default range index produces string-keyed rows", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "dict");
+    expect(result["a"]).toEqual({ "0": 1, "1": 2, "2": 3 });
+    expect(result["b"]).toEqual({ "0": 4, "1": 5, "2": 6 });
+  });
+
+  test("orient 'columns' is identical to 'dict'", () => {
+    const df = makeDF();
+    expect(toDictOriented(df, "columns")).toEqual(toDictOriented(df, "dict"));
+  });
+
+  test("custom string index is used as row keys", () => {
+    const df = makeIndexedDF();
+    const result = toDictOriented(df, "dict");
+    expect(result["x"]).toEqual({ r0: 10, r1: 20 });
+  });
+
+  test("empty DataFrame produces empty result", () => {
+    const df = DataFrame.fromColumns({});
+    const result = toDictOriented(df, "dict");
+    expect(Object.keys(result)).toHaveLength(0);
+  });
+});
+
+describe("toDictOriented — list", () => {
+  test("returns column→array mapping", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "list");
+    expect(result["a"]).toEqual([1, 2, 3]);
+    expect(result["b"]).toEqual([4, 5, 6]);
+  });
+
+  test("preserves null values", () => {
+    const df = DataFrame.fromColumns({ v: [1, null, 3] });
+    expect(toDictOriented(df, "list")["v"]).toEqual([1, null, 3]);
+  });
+});
+
+describe("toDictOriented — series", () => {
+  test("returns column→Series mapping", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "series");
+    expect(result["a"]).toBeInstanceOf(Series);
+    expect((result["a"] as Series<number>).values).toEqual([1, 2, 3]);
+  });
+});
+
+describe("toDictOriented — split", () => {
+  test("basic split structure", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "split");
+    expect(result.columns).toEqual(["a", "b"]);
+    expect(result.index).toEqual([0, 1, 2]);
+    expect(result.data).toEqual([[1, 4], [2, 5], [3, 6]]);
+  });
+
+  test("split with custom index", () => {
+    const df = makeIndexedDF();
+    const result = toDictOriented(df, "split");
+    expect(result.index).toEqual(["r0", "r1"]);
+    expect(result.data).toEqual([[10, 30], [20, 40]]);
+  });
+});
+
+describe("toDictOriented — tight", () => {
+  test("tight includes index_names and column_names", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "tight");
+    expect(result.index_names).toEqual([null]);
+    expect(result.column_names).toEqual([null]);
+    expect(result.columns).toEqual(["a", "b"]);
+    expect(result.data).toEqual([[1, 4], [2, 5], [3, 6]]);
+  });
+});
+
+describe("toDictOriented — records", () => {
+  test("returns array of row objects", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "records");
+    expect(result).toEqual([
+      { a: 1, b: 4 },
+      { a: 2, b: 5 },
+      { a: 3, b: 6 },
+    ]);
+  });
+
+  test("preserves null values", () => {
+    const df = DataFrame.fromColumns({ x: [null, 1] });
+    const result = toDictOriented(df, "records");
+    expect(result[0]).toEqual({ x: null });
+  });
+});
+
+describe("toDictOriented — index", () => {
+  test("range index keys are stringified", () => {
+    const df = makeDF();
+    const result = toDictOriented(df, "index");
+    expect(result["0"]).toEqual({ a: 1, b: 4 });
+    expect(result["2"]).toEqual({ a: 3, b: 6 });
+  });
+
+  test("custom index keys are used", () => {
+    const df = makeIndexedDF();
+    const result = toDictOriented(df, "index");
+    expect(result["r0"]).toEqual({ x: 10, y: 30 });
+  });
+});
+
+// ─── fromDictOriented ─────────────────────────────────────────────────────────
+
+describe("fromDictOriented — columns", () => {
+  test("default orient is columns", () => {
+    const df = fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] });
+    expect(df.columns.values).toEqual(["a", "b"]);
+    expect(df.col("a").values).toEqual([1, 2, 3]);
+  });
+});
+
+describe("fromDictOriented — index", () => {
+  test("reconstructs from rowLabel→col→value mapping", () => {
+    const df = fromDictOriented(
+      { r0: { x: 10, y: 30 }, r1: { x: 20, y: 40 } },
+      "index",
+    );
+    expect(df.index.values).toEqual(["r0", "r1"]);
+    expect(df.col("x").values).toEqual([10, 20]);
+    expect(df.col("y").values).toEqual([30, 40]);
+  });
+
+  test("missing fields become null", () => {
+    const df = fromDictOriented({ r0: { a: 1 }, r1: { a: 2, b: 99 } }, "index");
+    expect(df.col("b").values[0]).toBeNull();
+    expect(df.col("b").values[1]).toBe(99);
+  });
+
+  test("insertion order of columns is preserved", () => {
+    const df = fromDictOriented({ r0: { c: 1, a: 2 } }, "index");
+    expect(df.columns.values[0]).toBe("c");
+    expect(df.columns.values[1]).toBe("a");
+  });
+});
+
+describe("fromDictOriented — split", () => {
+  test("reconstructs from split structure", () => {
+    const df = fromDictOriented(
+      { columns: ["a", "b"], data: [[1, 4], [2, 5], [3, 6]] },
+      "split",
+    );
+    expect(df.shape).toEqual([3, 2]);
+    expect(df.col("a").values).toEqual([1, 2, 3]);
+  });
+
+  test("custom index is restored", () => {
+    const df = fromDictOriented(
+      { index: ["r0", "r1"], columns: ["x"], data: [[10], [20]] },
+      "split",
+    );
+    expect(df.index.values).toEqual(["r0", "r1"]);
+  });
+
+  test("orient 'tight' behaves like 'split'", () => {
+    const tightData = { index: [0, 1], columns: ["v"], data: [[7], [8]] };
+    const df = fromDictOriented(tightData, "tight");
+    expect(df.col("v").values).toEqual([7, 8]);
+  });
+});
+
+// ─── round-trips ──────────────────────────────────────────────────────────────
+
+describe("round-trips", () => {
+  test("split round-trip preserves values", () => {
+    const df = makeDF();
+    const split = toDictOriented(df, "split");
+    const df2 = fromDictOriented(split, "split");
+    expect(df2.shape).toEqual(df.shape);
+    expect(df2.col("a").values).toEqual([1, 2, 3]);
+  });
+
+  test("records round-trip preserves shape", () => {
+    const df = makeDF();
+    const recs = toDictOriented(df, "records");
+    const df2 = DataFrame.fromRecords(recs);
+    expect(df2.shape).toEqual(df.shape);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property-based", () => {
+  test("split round-trip preserves shape and column order", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer({ min: 1, max: 10 }), { minLength: 1, maxLength: 4 }),
+        fc.array(fc.integer({ min: 1, max: 10 }), { minLength: 1, maxLength: 4 }),
+        (colA, colB) => {
+          const df = DataFrame.fromColumns({ a: colA, b: colB.slice(0, colA.length) });
+          const split = toDictOriented(df, "split");
+          const df2 = fromDictOriented(split, "split");
+          return df2.shape[0] === df.shape[0] && df2.columns.values[0] === "a";
+        },
+      ),
+    );
+  });
+
+  test("records orient column count matches original", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer({ min: 0, max: 100 }), { minLength: 2, maxLength: 5 }),
+        (vals) => {
+          const df = DataFrame.fromColumns({ x: vals, y: vals });
+          const recs = toDictOriented(df, "records");
+          return recs.every((r) => Object.keys(r).length === 2);
+        },
+      ),
+    );
+  });
+
+  test("index orient row count matches original", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer({ min: 0, max: 99 }), { minLength: 1, maxLength: 10 }),
+        (vals) => {
+          const df = DataFrame.fromColumns({ v: vals });
+          const idx = toDictOriented(df, "index");
+          return Object.keys(idx).length === vals.length;
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/reshape/wide_to_long.test.ts b/tests/reshape/wide_to_long.test.ts
new file mode 100644
index 00000000..61d38269
--- /dev/null
+++ b/tests/reshape/wide_to_long.test.ts
@@ -0,0 +1,211 @@
+/**
+ * Tests for src/reshape/wide_to_long.ts
+ *
+ * Covers:
+ * - Basic two-stub wide-to-long conversion
+ * - Single stub
+ * - Custom separator
+ * - Custom suffix regex
+ * - Single id column (string)
+ * - Multiple id columns (array)
+ * - Alphabetic suffixes
+ * - Missing stub columns default to null
+ * - Output row count = nRows × nSuffixes
+ * - Output column order: id cols, j, stub cols
+ * - Error on missing id column
+ * - Property: output row count = input rows × distinct suffixes
+ * - Property: id values repeat for each suffix
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame } from "../../src/index.ts";
+import { wideToLong } from "../../src/reshape/wide_to_long.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeWideDF(): DataFrame {
+  return DataFrame.fromColumns({
+    id: ["x", "y"],
+    A1: [1, 2],
+    A2: [3, 4],
+    B1: [5, 6],
+    B2: [7, 8],
+  });
+}
+
+// ─── basic ─────────────────────────────────────────────────────────────────────
+
+describe("wideToLong — basic", () => {
+  test("two stubs, two suffixes, single id", () => {
+    const df = makeWideDF();
+    const long = wideToLong(df, ["A", "B"], "id", "num");
+    expect(long.shape).toEqual([4, 4]); // 2 rows × 2 suffixes = 4; 4 cols: id, num, A, B
+    expect(long.columns.values).toEqual(["id", "num", "A", "B"]);
+  });
+
+  test("j column holds suffix values", () => {
+    const df = makeWideDF();
+    const long = wideToLong(df, ["A", "B"], "id", "num");
+    // Suffixes 1, 2 — repeated for each original row
+    expect(long.col("num").values).toEqual([1, 1, 2, 2]);
+  });
+
+  test("id column values repeat for each suffix", () => {
+    const df = makeWideDF();
+    const long = wideToLong(df, ["A", "B"], "id", "num");
+    expect(long.col("id").values).toEqual(["x", "y", "x", "y"]);
+  });
+
+  test("stub A values are correct", () => {
+    const df = makeWideDF();
+    const long = wideToLong(df, ["A", "B"], "id", "num");
+    // suffix=1 rows: A1=[1,2]; suffix=2 rows: A2=[3,4]
+    expect(long.col("A").values).toEqual([1, 2, 3, 4]);
+  });
+
+  test("stub B values are correct", () => {
+    const df = makeWideDF();
+    const long = wideToLong(df, ["A", "B"], "id", "num");
+    expect(long.col("B").values).toEqual([5, 6, 7, 8]);
+  });
+});
+
+// ─── single stub ──────────────────────────────────────────────────────────────
+
+describe("wideToLong — single stub", () => {
+  test("string stubname is normalised to array", () => {
+    const df = DataFrame.fromColumns({ id: [1, 2], val1: [10, 20], val2: [30, 40] });
+    const long = wideToLong(df, "val", "id", "num");
+    expect(long.shape).toEqual([4, 3]); // 2×2 rows, 3 cols: id, num, val
+    expect(long.col("val").values).toEqual([10, 20, 30, 40]);
+  });
+});
+
+// ─── separator ────────────────────────────────────────────────────────────────
+
+describe("wideToLong — sep option", () => {
+  test("underscore separator", () => {
+    const df = DataFrame.fromColumns({
+      id: ["a"],
+      score_2021: [100],
+      score_2022: [200],
+    });
+    const long = wideToLong(df, "score", "id", "year", { sep: "_" });
+    expect(long.shape).toEqual([2, 3]);
+    expect(long.col("year").values).toEqual([2021, 2022]);
+    expect(long.col("score").values).toEqual([100, 200]);
+  });
+});
+
+// ─── custom suffix ────────────────────────────────────────────────────────────
+
+describe("wideToLong — suffix option", () => {
+  test("alphabetic suffix regex", () => {
+    const df = DataFrame.fromColumns({
+      id: [1],
+      vala: [10],
+      valb: [20],
+    });
+    const long = wideToLong(df, "val", "id", "letter", { suffix: "[a-z]" });
+    expect(long.shape).toEqual([2, 3]);
+    expect(long.col("letter").values).toEqual(["a", "b"]);
+  });
+});
+
+// ─── multiple id columns ──────────────────────────────────────────────────────
+
+describe("wideToLong — multiple id columns", () => {
+  test("two id columns are both preserved", () => {
+    const df = DataFrame.fromColumns({
+      grp: ["G1", "G2"],
+      subgrp: ["S1", "S2"],
+      v1: [10, 20],
+      v2: [30, 40],
+    });
+    const long = wideToLong(df, "v", ["grp", "subgrp"], "n");
+    expect(long.columns.values).toEqual(["grp", "subgrp", "n", "v"]);
+    expect(long.col("grp").values).toEqual(["G1", "G2", "G1", "G2"]);
+    expect(long.col("subgrp").values).toEqual(["S1", "S2", "S1", "S2"]);
+  });
+});
+
+// ─── missing stub column → null ───────────────────────────────────────────────
+
+describe("wideToLong — missing stub columns", () => {
+  test("missing wide column fills with null", () => {
+    // A1 exists but A2 does not — A2 values should be null
+    const df = DataFrame.fromColumns({ id: [1], A1: [10] });
+    const long = wideToLong(df, "A", "id", "n", { suffix: "[12]" });
+    // suffix 1 → A1=10, suffix 2 → A2=null
+    expect(long.col("A").values).toEqual([10, null]);
+  });
+});
+
+// ─── output row count ─────────────────────────────────────────────────────────
+
+describe("wideToLong — output shape", () => {
+  test("row count equals nRows × nSuffixes", () => {
+    const df = DataFrame.fromColumns({
+      id: [1, 2, 3],
+      x1: [1, 2, 3],
+      x2: [4, 5, 6],
+      x3: [7, 8, 9],
+    });
+    const long = wideToLong(df, "x", "id", "k");
+    expect(long.shape[0]).toBe(9); // 3 rows × 3 suffixes
+  });
+});
+
+// ─── error handling ───────────────────────────────────────────────────────────
+
+describe("wideToLong — errors", () => {
+  test("throws on missing id column", () => {
+    const df = DataFrame.fromColumns({ A1: [1] });
+    expect(() => wideToLong(df, "A", "id", "n")).toThrow(RangeError);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property-based", () => {
+  test("output row count = input rows × distinct suffix count", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer({ min: 1, max: 9 }), { minLength: 2, maxLength: 5 }),
+        fc.integer({ min: 1, max: 3 }),
+        (idVals, nSuffix) => {
+          const colData: Record<string, readonly number[]> = { id: idVals };
+          for (let s = 1; s <= nSuffix; s++) {
+            colData[`x${s}`] = idVals.map((v) => v * s);
+          }
+          const df = DataFrame.fromColumns(colData);
+          const long = wideToLong(df, "x", "id", "n");
+          return long.shape[0] === idVals.length * nSuffix;
+        },
+      ),
+    );
+  });
+
+  test("id column values repeat exactly nSuffix times each", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.integer({ min: 0, max: 99 }), { minLength: 1, maxLength: 4 }),
+        fc.integer({ min: 1, max: 3 }),
+        (idVals, nSuffix) => {
+          const colData: Record<string, readonly number[]> = { id: idVals };
+          for (let s = 1; s <= nSuffix; s++) {
+            colData[`v${s}`] = idVals.map(() => s);
+          }
+          const df = DataFrame.fromColumns(colData);
+          const long = wideToLong(df, "v", "id", "n");
+          const outId = long.col("id").values;
+          // Each original id value should appear nSuffix times
+          return idVals.every(
+            (v) => outId.filter((x) => x === v).length === nSuffix,
+          );
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/stats/categorical_ops.test.ts b/tests/stats/categorical_ops.test.ts
new file mode 100644
index 00000000..c5af01d6
--- /dev/null
+++ b/tests/stats/categorical_ops.test.ts
@@ -0,0 +1,476 @@
+/**
+ * Tests for src/stats/categorical_ops.ts
+ *
+ * Tests cover:
+ * - catFromCodes — construction from integer codes
+ * - catUnionCategories — union of category sets
+ * - catIntersectCategories — intersection of category sets
+ * - catDiffCategories — set difference of categories
+ * - catEqualCategories — equality check of category sets
+ * - catSortByFreq — reorder categories by frequency
+ * - catToOrdinal — create ordered categorical
+ * - catFreqTable — frequency table as plain object
+ * - catCrossTab — cross-tabulation DataFrame
+ * - catRecode — rename categories via map or function
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/core/index.ts";
+import {
+  catCrossTab,
+  catDiffCategories,
+  catEqualCategories,
+  catFreqTable,
+  catFromCodes,
+  catIntersectCategories,
+  catRecode,
+  catSortByFreq,
+  catToOrdinal,
+  catUnionCategories,
+} from "../../src/stats/categorical_ops.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Build a categorical Series from values with explicit categories. */
+function makeCat(values: (string | null)[], cats: string[]) {
+  return new Series({ data: values }).cat.setCategories(cats);
+}
+
+// ─── catFromCodes ─────────────────────────────────────────────────────────────
+
+describe("catFromCodes", () => {
+  it("maps codes to category labels", () => {
+    const s = catFromCodes([0, 2, 1], ["a", "b", "c"]);
+    expect(s.toArray()).toEqual(["a", "c", "b"]);
+    expect(s.cat.categories.values).toEqual(["a", "b", "c"]);
+  });
+
+  it("maps -1 to null (missing)", () => {
+    const s = catFromCodes([0, -1, 2], ["x", "y", "z"]);
+    expect(s.toArray()).toEqual(["x", null, "z"]);
+  });
+
+  it("preserves all categories even when some are unused", () => {
+    const s = catFromCodes([0], ["a", "b", "c"]);
+    expect(s.cat.nCategories).toBe(3);
+  });
+
+  it("propagates ordered option", () => {
+    const s = catFromCodes([0, 1], ["lo", "hi"], { ordered: true });
+    expect(s.cat.ordered).toBe(true);
+  });
+
+  it("propagates name option", () => {
+    const s = catFromCodes([0], ["a"], { name: "myCol" });
+    expect(s.name).toBe("myCol");
+  });
+
+  it("throws on out-of-range codes", () => {
+    expect(() => catFromCodes([5], ["a", "b"])).toThrow(RangeError);
+  });
+
+  it("throws on large negative codes (< -1)", () => {
+    expect(() => catFromCodes([-2], ["a"])).toThrow(RangeError);
+  });
+
+  it("handles empty codes array", () => {
+    const s = catFromCodes([], ["a", "b"]);
+    expect(s.toArray()).toEqual([]);
+    expect(s.cat.nCategories).toBe(2);
+  });
+
+  it("handles duplicate categories by deduplicating", () => {
+    const s = catFromCodes([0, 1], ["a", "a", "b"]);
+    // deduped → ["a","b"], code 1 → "b"
+    expect(s.cat.nCategories).toBe(2);
+    expect(s.toArray()).toEqual(["a", "b"]);
+  });
+});
+
+// ─── catUnionCategories ────────────────────────────────────────────────────────
+
+describe("catUnionCategories", () => {
+  it("extends a's categories with b's extras", () => {
+    const a = makeCat(["x", "y"], ["x", "y"]);
+    const b = makeCat(["y", "z"], ["y", "z"]);
+    const r = catUnionCategories(a, b);
+    expect(r.cat.categories.values).toEqual(["x", "y", "z"]);
+  });
+
+  it("preserves a's values", () => {
+    const a = makeCat(["x", "y"], ["x", "y"]);
+    const b = makeCat(["z"], ["z"]);
+    expect(catUnionCategories(a, b).toArray()).toEqual(["x", "y"]);
+  });
+
+  it("is idempotent when same categories", () => {
+    const a = makeCat(["x"], ["x", "y"]);
+    const b = makeCat(["y"], ["x", "y"]);
+    const r = catUnionCategories(a, b);
+    expect(r.cat.nCategories).toBe(2);
+  });
+
+  it("appends b-only categories in b-order", () => {
+    const a = makeCat(["a"], ["a"]);
+    const b = makeCat(["c", "b"], ["c", "b"]);
+    const r = catUnionCategories(a, b);
+    expect(r.cat.categories.values).toEqual(["a", "c", "b"]);
+  });
+
+  it("preserves ordered flag from a", () => {
+    const a = new Series({ data: ["x"] }).cat.setCategories(["x"], true);
+    const b = makeCat(["y"], ["y"]);
+    expect(catUnionCategories(a, b).cat.ordered).toBe(true);
+  });
+});
+
+// ─── catIntersectCategories ───────────────────────────────────────────────────
+
+describe("catIntersectCategories", () => {
+  it("keeps only categories in both", () => {
+    const a = makeCat(["x", "y", "z"], ["x", "y", "z"]);
+    const b = makeCat(["y", "z"], ["y", "z"]);
+    const r = catIntersectCategories(a, b);
+    expect(r.cat.categories.values).toEqual(["y", "z"]);
+  });
+
+  it("sets values to null when category removed", () => {
+    const a = makeCat(["x", "y", "z"], ["x", "y", "z"]);
+    const b = makeCat(["y", "z"], ["y", "z"]);
+    const r = catIntersectCategories(a, b);
+    expect(r.toArray()).toEqual([null, "y", "z"]);
+  });
+
+  it("empty intersection → all values null", () => {
+    const a = makeCat(["x"], ["x"]);
+    const b = makeCat(["y"], ["y"]);
+    const r = catIntersectCategories(a, b);
+    expect(r.cat.nCategories).toBe(0);
+    expect(r.toArray()).toEqual([null]);
+  });
+
+  it("full intersection → no change to values", () => {
+    const a = makeCat(["x", "y"], ["x", "y"]);
+    const b = makeCat(["x", "y"], ["x", "y"]);
+    const r = catIntersectCategories(a, b);
+    expect(r.toArray()).toEqual(["x", "y"]);
+  });
+});
+
+// ─── catDiffCategories ────────────────────────────────────────────────────────
+
+describe("catDiffCategories", () => {
+  it("removes b's categories from a", () => {
+    const a = makeCat(["x", "y", "z"], ["x", "y", "z"]);
+    const b = makeCat(["z"], ["z"]);
+    const r = catDiffCategories(a, b);
+    expect(r.cat.categories.values).toEqual(["x", "y"]);
+    expect(r.toArray()).toEqual(["x", "y", null]);
+  });
+
+  it("no overlap → unchanged", () => {
+    const a = makeCat(["x", "y"], ["x", "y"]);
+    const b = makeCat(["z"], ["z"]);
+    const r = catDiffCategories(a, b);
+    expect(r.cat.categories.values).toEqual(["x", "y"]);
+    expect(r.toArray()).toEqual(["x", "y"]);
+  });
+
+  it("all removed → empty categories and all null", () => {
+    const a = makeCat(["x"], ["x"]);
+    const b = makeCat(["x"], ["x"]);
+    const r = catDiffCategories(a, b);
+    expect(r.cat.nCategories).toBe(0);
+    expect(r.toArray()).toEqual([null]);
+  });
+});
+
+// ─── catEqualCategories ───────────────────────────────────────────────────────
+
+describe("catEqualCategories", () => {
+  it("returns true for same categories in different order", () => {
+    const a = makeCat(["x"], ["x", "y"]);
+    const b = makeCat(["y"], ["y", "x"]);
+    expect(catEqualCategories(a, b)).toBe(true);
+  });
+
+  it("returns false when different categories", () => {
+    const a = makeCat(["x"], ["x", "y"]);
+    const b = makeCat(["z"], ["x", "z"]);
+    expect(catEqualCategories(a, b)).toBe(false);
+  });
+
+  it("returns false when different sizes", () => {
+    const a = makeCat(["x"], ["x", "y"]);
+    const b = makeCat(["x"], ["x"]);
+    expect(catEqualCategories(a, b)).toBe(false);
+  });
+
+  it("empty vs empty is equal", () => {
+    const a = makeCat([], []);
+    const b = makeCat([], []);
+    expect(catEqualCategories(a, b)).toBe(true);
+  });
+});
+
+// ─── catSortByFreq ────────────────────────────────────────────────────────────
+
+describe("catSortByFreq", () => {
+  it("reorders categories most-frequent first", () => {
+    const s = makeCat(["b", "a", "b", "c", "b", "a"], ["a", "b", "c"]);
+    const r = catSortByFreq(s);
+    expect(r.cat.categories.values).toEqual(["b", "a", "c"]);
+  });
+
+  it("ascending=true puts rarest first", () => {
+    const s = makeCat(["b", "a", "b", "c", "b", "a"], ["a", "b", "c"]);
+    const r = catSortByFreq(s, { ascending: true });
+    expect(r.cat.categories.values).toEqual(["c", "a", "b"]);
+  });
+
+  it("preserves values", () => {
+    const s = makeCat(["a", "b"], ["a", "b"]);
+    const r = catSortByFreq(s);
+    expect(r.toArray()).toEqual(["a", "b"]);
+  });
+
+  it("zero-freq categories end up last (descending)", () => {
+    const s = makeCat(["x", "x"], ["x", "y"]);
+    const r = catSortByFreq(s);
+    expect(r.cat.categories.values[0]).toBe("x");
+    expect(r.cat.categories.values[1]).toBe("y");
+  });
+
+  it("handles missing values — they are not counted", () => {
+    const s = makeCat(["a", null, "a", null], ["a", "b"]);
+    const r = catSortByFreq(s);
+    expect(r.cat.categories.values[0]).toBe("a");
+  });
+});
+
+// ─── catToOrdinal ─────────────────────────────────────────────────────────────
+
+describe("catToOrdinal", () => {
+  it("sets ordered flag to true", () => {
+    const s = makeCat(["med", "low", "high", "med"], ["low", "med", "high"]);
+    const r = catToOrdinal(s, ["low", "med", "high"]);
+    expect(r.cat.ordered).toBe(true);
+  });
+
+  it("uses provided order as categories", () => {
+    const s = makeCat(["b", "a"], ["a", "b"]);
+    const r = catToOrdinal(s, ["a", "b"]);
+    expect(r.cat.categories.values).toEqual(["a", "b"]);
+  });
+
+  it("values outside order become null", () => {
+    const s = new Series({ data: ["low", "med", "high", "extreme"] });
+    const r = catToOrdinal(s, ["low", "med", "high"]);
+    expect(r.toArray()).toEqual(["low", "med", "high", null]);
+  });
+
+  it("preserves values within order", () => {
+    const s = makeCat(["low", "high"], ["low", "med", "high"]);
+    const r = catToOrdinal(s, ["low", "med", "high"]);
+    expect(r.toArray()).toEqual(["low", "high"]);
+  });
+});
+
+// ─── catFreqTable ─────────────────────────────────────────────────────────────
+
+describe("catFreqTable", () => {
+  it("counts occurrences of each category", () => {
+    const s = makeCat(["b", "a", "b", null], ["a", "b", "c"]);
+    expect(catFreqTable(s)).toEqual({ a: 1, b: 2, c: 0 });
+  });
+
+  it("includes zero for unused categories", () => {
+    const s = makeCat(["a"], ["a", "b", "c"]);
+    const t = catFreqTable(s);
+    expect(t["b"]).toBe(0);
+    expect(t["c"]).toBe(0);
+  });
+
+  it("returns empty object for no categories", () => {
+    const s = makeCat([], []);
+    expect(catFreqTable(s)).toEqual({});
+  });
+
+  it("ignores null/missing values", () => {
+    const s = makeCat([null, null, "a"], ["a", "b"]);
+    const t = catFreqTable(s);
+    expect(t["a"]).toBe(1);
+    expect(t["b"]).toBe(0);
+  });
+
+  it("all values present → correct counts", () => {
+    const s = makeCat(["x", "y", "x", "z", "z", "z"], ["x", "y", "z"]);
+    expect(catFreqTable(s)).toEqual({ x: 2, y: 1, z: 3 });
+  });
+});
+
+// ─── catCrossTab ──────────────────────────────────────────────────────────────
+
+describe("catCrossTab", () => {
+  it("returns a DataFrame with correct shape", () => {
+    const a = makeCat(["x", "x", "y", "y"], ["x", "y"]);
+    const b = makeCat(["p", "q", "p", "q"], ["p", "q"]);
+    const ct = catCrossTab(a, b);
+    expect(ct.columns.values).toEqual(["p", "q"]);
+    expect([...ct.index.values]).toEqual(["x", "y"]);
+  });
+
+  it("counts co-occurrences correctly", () => {
+    const a = makeCat(["x", "x", "y", "y"], ["x", "y"]);
+    const b = makeCat(["p", "q", "p", "q"], ["p", "q"]);
+    const ct = catCrossTab(a, b);
+    expect(ct.get("p")?.toArray()).toEqual([1, 1]);
+    expect(ct.get("q")?.toArray()).toEqual([1, 1]);
+  });
+
+  it("skips missing values in either series", () => {
+    const a = makeCat(["x", null, "y"], ["x", "y"]);
+    const b = makeCat(["p", "q", "p"], ["p", "q"]);
+    const ct = catCrossTab(a, b);
+    // null in a is skipped → row x: p=1, row y: p=1
+    expect(ct.get("p")?.toArray()).toEqual([1, 1]);
+    expect(ct.get("q")?.toArray()).toEqual([0, 0]);
+  });
+
+  it("margins option adds totals row and column", () => {
+    const a = makeCat(["x", "y"], ["x", "y"]);
+    const b = makeCat(["p", "q"], ["p", "q"]);
+    const ct = catCrossTab(a, b, { margins: true });
+    expect(ct.columns.values).toContain("All");
+    expect([...ct.index.values]).toContain("All");
+  });
+
+  it("normalize divides by grand total", () => {
+    const a = makeCat(["x", "x", "y", "y"], ["x", "y"]);
+    const b = makeCat(["p", "q", "p", "q"], ["p", "q"]);
+    const ct = catCrossTab(a, b, { normalize: true });
+    // Each cell = 1/4
+    const col = ct.get("p")?.toArray() ?? [];
+    expect(typeof col[0]).toBe("number");
+    expect((col[0] as number) + (col[1] as number)).toBeCloseTo(0.5, 10);
+  });
+
+  it("custom marginsName appears in columns and index", () => {
+    const a = makeCat(["x"], ["x"]);
+    const b = makeCat(["p"], ["p"]);
+    const ct = catCrossTab(a, b, { margins: true, marginsName: "Total" });
+    expect(ct.columns.values).toContain("Total");
+    expect([...ct.index.values]).toContain("Total");
+  });
+
+  it("zero cells are included for unused category pairs", () => {
+    const a = makeCat(["x", "x"], ["x", "y"]);
+    const b = makeCat(["p", "p"], ["p", "q"]);
+    const ct = catCrossTab(a, b);
+    // y never appears → row y should be zeros
+    const yRow = ct.get("p")?.toArray() ?? [];
+    expect(yRow[1]).toBe(0);
+  });
+});
+
+// ─── catRecode ────────────────────────────────────────────────────────────────
+
+describe("catRecode", () => {
+  it("renames categories via object map", () => {
+    const s = makeCat(["a", "b"], ["a", "b", "c"]);
+    const r = catRecode(s, { a: "A", b: "B" });
+    expect(r.cat.categories.values).toEqual(["A", "B", "c"]);
+    expect(r.toArray()).toEqual(["A", "B"]);
+  });
+
+  it("renames all categories via function", () => {
+    const s = makeCat(["a", "b"], ["a", "b", "c"]);
+    const r = catRecode(s, (x) => x.toUpperCase());
+    expect(r.cat.categories.values).toEqual(["A", "B", "C"]);
+    expect(r.toArray()).toEqual(["A", "B"]);
+  });
+
+  it("partial map leaves unmapped categories unchanged", () => {
+    const s = makeCat(["a", "b", "c"], ["a", "b", "c"]);
+    const r = catRecode(s, { a: "alpha" });
+    expect(r.cat.categories.values).toEqual(["alpha", "b", "c"]);
+  });
+
+  it("identity function returns same categories", () => {
+    const s = makeCat(["x", "y"], ["x", "y"]);
+    const r = catRecode(s, (x) => x);
+    expect(r.cat.categories.values).toEqual(["x", "y"]);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("catFromCodes — property tests", () => {
+  it("round-trips: codes from a series match its values", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.constantFrom("a", "b", "c"), { minLength: 0, maxLength: 10 }),
+        (items) => {
+          const cats = ["a", "b", "c"];
+          const s = makeCat(items, cats);
+          const codes = s.cat.codes.toArray() as number[];
+          const rebuilt = catFromCodes(codes, cats);
+          const orig = s.toArray();
+          const back = rebuilt.toArray();
+          if (orig.length !== back.length) return false;
+          return orig.every((v, i) => v === back[i]);
+        },
+      ),
+    );
+  });
+
+  it("catFreqTable totals match non-null count", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.constantFrom("x", "y", "z", null), { minLength: 0, maxLength: 20 }),
+        (items) => {
+          const s = makeCat(items as (string | null)[], ["x", "y", "z"]);
+          const table = catFreqTable(s);
+          const total = Object.values(table).reduce((acc, n) => acc + n, 0);
+          const nonNull = items.filter((v) => v !== null).length;
+          return total === nonNull;
+        },
+      ),
+    );
+  });
+
+  it("catUnionCategories has at least as many cats as each input", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.constantFrom("a", "b", "c"), { minLength: 1, maxLength: 5 }),
+        fc.array(fc.constantFrom("b", "c", "d"), { minLength: 1, maxLength: 5 }),
+        (va, vb) => {
+          const a = makeCat(va, [...new Set(va)]);
+          const b = makeCat(vb, [...new Set(vb)]);
+          const r = catUnionCategories(a, b);
+          return (
+            r.cat.nCategories >= a.cat.nCategories &&
+            r.cat.nCategories >= b.cat.nCategories
+          );
+        },
+      ),
+    );
+  });
+
+  it("catEqualCategories is symmetric", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.constantFrom("p", "q", "r"), { minLength: 0, maxLength: 3 }),
+        fc.array(fc.constantFrom("p", "q", "r"), { minLength: 0, maxLength: 3 }),
+        (va, vb) => {
+          const uniqueA = [...new Set(va)];
+          const uniqueB = [...new Set(vb)];
+          const a = makeCat([], uniqueA);
+          const b = makeCat([], uniqueB);
+          return catEqualCategories(a, b) === catEqualCategories(b, a);
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/stats/cut_qcut.test.ts b/tests/stats/cut_qcut.test.ts
new file mode 100644
index 00000000..10da91df
--- /dev/null
+++ b/tests/stats/cut_qcut.test.ts
@@ -0,0 +1,277 @@
+/**
+ * Tests for cut / qcut binning functions.
+ */
+
+import { describe, expect, it } from "bun:test";
+import * as fc from "fast-check";
+import { cut, qcut } from "../../src/stats/cut_qcut.ts";
+
+// ─── cut — basic ─────────────────────────────────────────────────────────────
+
+describe("cut — integer bins", () => {
+  it("bins [1,2,3,4,5] into 2 equal-width bins", () => {
+    const { codes, labels, bins } = cut([1, 2, 3, 4, 5], 2);
+    expect(codes).toEqual([0, 0, 0, 1, 1]);
+    expect(labels.length).toBe(2);
+    expect(bins.length).toBe(3);
+  });
+
+  it("auto-labels use interval notation", () => {
+    const { labels } = cut([0, 1, 2, 3], 2);
+    expect(labels[0]).toMatch(/^\(/); // left open
+    expect(labels[0]).toMatch(/\]$/); // right closed
+  });
+
+  it("right=false uses left-closed intervals", () => {
+    const { codes, labels } = cut([1, 2, 3, 4, 5], 2, { right: false });
+    // [lo, hi)
+    expect(labels[0]).toMatch(/^\[/);
+    expect(labels[0]).toMatch(/\)$/);
+    expect(codes).toEqual([0, 0, 0, 1, 1]);
+  });
+
+  it("include_lowest labels the first bin with [ on both sides", () => {
+    const { labels } = cut([1, 2, 3, 4, 5], 2, { include_lowest: true });
+    expect(labels[0]).toMatch(/^\[/);
+    expect(labels[0]).toMatch(/\]$/);
+    expect(labels[1]).toMatch(/^\(/);
+  });
+
+  it("labels=false returns integer code strings", () => {
+    const { labels, codes } = cut([1, 2, 3, 4, 5], 2, { labels: false });
+    expect(labels).toEqual(["0", "1"]);
+    expect(codes).toEqual([0, 0, 0, 1, 1]);
+  });
+
+  it("custom string labels", () => {
+    const { labels } = cut([1, 2, 3, 4, 5], 2, { labels: ["low", "high"] });
+    expect(labels).toEqual(["low", "high"]);
+  });
+
+  it("NaN is assigned null code", () => {
+    const { codes } = cut([1, Number.NaN, 3], 2);
+    expect(codes[1]).toBeNull();
+    expect(codes[0]).toBe(0);
+    expect(codes[2]).toBe(1);
+  });
+
+  it("values exactly at min assigned to bin 0 (include_lowest)", () => {
+    const { codes } = cut([1, 2, 3, 4, 5], 2, { include_lowest: true });
+    // min value 1 should be in bin 0
+    expect(codes[0]).toBe(0);
+  });
+
+  it("values exactly at max assigned to last bin", () => {
+    const { codes } = cut([0, 1, 2, 3, 4, 5], 5, { include_lowest: true });
+    // 5 is the max, should be in the last bin
+    expect(codes[5]).toBe(4);
+  });
+
+  it("throws on empty array", () => {
+    expect(() => cut([], 2)).toThrow();
+  });
+
+  it("throws on constant array", () => {
+    expect(() => cut([3, 3, 3], 2)).toThrow();
+  });
+
+  it("throws when custom labels length mismatch", () => {
+    expect(() => cut([1, 2, 3, 4], 2, { labels: ["a", "b", "c"] })).toThrow();
+  });
+});
+
+describe("cut — explicit bin edges", () => {
+  it("bins with explicit edges [0, 2, 4]", () => {
+    const { codes, labels, bins } = cut([0.5, 1, 1.5, 2.5, 3.5], [0, 2, 4]);
+    expect(codes).toEqual([0, 0, 0, 1, 1]);
+    expect(bins).toEqual([0, 2, 4]);
+    expect(labels.length).toBe(2);
+  });
+
+  it("value outside range returns null", () => {
+    const { codes } = cut([5, 1, 2], [0, 2, 4]);
+    expect(codes[0]).toBeNull(); // 5 > 4
+  });
+
+  it("value at left edge outside range (right=true) returns null", () => {
+    const { codes } = cut([0, 1, 2], [0, 2, 4]);
+    // 0 is ≤ left edge (0,2] → outside unless include_lowest
+    expect(codes[0]).toBeNull();
+  });
+
+  it("value at left edge included with include_lowest", () => {
+    const { codes } = cut([0, 1, 2], [0, 2, 4], { include_lowest: true });
+    expect(codes[0]).toBe(0);
+  });
+
+  it("throws if edges are not monotone", () => {
+    expect(() => cut([1, 2], [3, 1, 2])).toThrow();
+  });
+
+  it("precision controls label decimal places", () => {
+    const { labels } = cut([1, 2, 3, 4], 2, { precision: 1 });
+    // labels should have 1 decimal place
+    expect(labels[0]).toMatch(/\d\.\d[,\]]/);
+  });
+});
+
+// ─── qcut — basic ────────────────────────────────────────────────────────────
+
+describe("qcut — integer q", () => {
+  it("q=2 splits at median", () => {
+    const { codes, labels, bins } = qcut([1, 2, 3, 4, 5], 2);
+    expect(labels.length).toBe(2);
+    expect(bins.length).toBe(3);
+    // Lower half in bin 0, upper in bin 1
+    expect(codes[0]).toBe(0);
+    expect(codes[4]).toBe(1);
+  });
+
+  it("q=4 produces quartile labels", () => {
+    const { labels } = qcut([1, 2, 3, 4, 5, 6, 7, 8], 4);
+    expect(labels.length).toBe(4);
+  });
+
+  it("first bin label uses [ on the left (pandas semantics)", () => {
+    const { labels } = qcut([1, 2, 3, 4, 5], 2);
+    expect(labels[0]).toMatch(/^\[/);
+    expect(labels[1]).toMatch(/^\(/);
+  });
+
+  it("labels=false returns integer strings", () => {
+    const { labels } = qcut([1, 2, 3, 4, 5], 2, { labels: false });
+    expect(labels).toEqual(["0", "1"]);
+  });
+
+  it("custom labels", () => {
+    const { labels } = qcut([1, 2, 3, 4, 5], 2, { labels: ["bottom", "top"] });
+    expect(labels).toEqual(["bottom", "top"]);
+  });
+
+  it("NaN → null code", () => {
+    const { codes } = qcut([1, Number.NaN, 3, 4, 5], 2);
+    expect(codes[1]).toBeNull();
+  });
+
+  it("throws on empty array", () => {
+    expect(() => qcut([], 2)).toThrow();
+  });
+
+  it("throws on q < 2", () => {
+    expect(() => qcut([1, 2, 3], 1)).toThrow();
+  });
+
+  it("throws on custom labels length mismatch", () => {
+    expect(() => qcut([1, 2, 3, 4], 2, { labels: ["a", "b", "c"] })).toThrow();
+  });
+});
+
+describe("qcut — explicit quantile probabilities", () => {
+  it("probabilities [0, 0.5, 1] produce 2 bins", () => {
+    const { codes, labels } = qcut([1, 2, 3, 4, 5], [0, 0.5, 1]);
+    expect(labels.length).toBe(2);
+    expect(codes[0]).toBe(0);
+    expect(codes[4]).toBe(1);
+  });
+
+  it("all-same-value triggers duplicate handling (drop)", () => {
+    expect(() => qcut([1, 1, 1, 1], 4)).toThrow();
+    const { bins } = qcut([1, 1, 1, 1, 2], 4, { duplicates: "drop" });
+    expect(bins.length).toBeGreaterThanOrEqual(2);
+  });
+
+  it("throws on non-monotone quantile probabilities", () => {
+    expect(() => qcut([1, 2, 3], [0, 0.8, 0.5, 1])).toThrow();
+  });
+
+  it("throws on probabilities outside [0,1]", () => {
+    expect(() => qcut([1, 2, 3], [0, 1.1])).toThrow();
+  });
+});
+
+// ─── property-based tests ────────────────────────────────────────────────────
+
+describe("cut — property tests", () => {
+  it("all finite non-NaN values in range are assigned a bin", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ min: -100, max: 100, noNaN: true }), {
+          minLength: 2,
+          maxLength: 20,
+        }),
+        fc.integer({ min: 2, max: 5 }),
+        (xs, numBins) => {
+          // Need at least 2 distinct finite values
+          const distinct = new Set(xs.filter(Number.isFinite));
+          if (distinct.size < 2) return;
+          const { codes, labels } = cut(xs, numBins, { include_lowest: true });
+          for (let i = 0; i < xs.length; i++) {
+            const v = xs[i] as number;
+            const c = codes[i];
+            if (!Number.isFinite(v)) {
+              expect(c).toBeNull();
+            } else {
+              expect(c).not.toBeNull();
+              expect(c).toBeGreaterThanOrEqual(0);
+              expect(c).toBeLessThan(labels.length);
+            }
+          }
+        },
+      ),
+    );
+  });
+
+  it("codes length equals input length", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ min: -50, max: 50, noNaN: true }), {
+          minLength: 2,
+          maxLength: 30,
+        }),
+        fc.integer({ min: 2, max: 4 }),
+        (xs, numBins) => {
+          const distinct = new Set(xs.filter(Number.isFinite));
+          if (distinct.size < 2) return;
+          const { codes } = cut(xs, numBins);
+          expect(codes.length).toBe(xs.length);
+        },
+      ),
+    );
+  });
+});
+
+describe("qcut — property tests", () => {
+  it("all finite values in result are assigned a valid bin index", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ min: -100, max: 100, noNaN: true }), {
+          minLength: 4,
+          maxLength: 30,
+        }),
+        fc.integer({ min: 2, max: 4 }),
+        (xs, numQ) => {
+          const finite = xs.filter(Number.isFinite);
+          const distinct = new Set(finite);
+          if (distinct.size < numQ) return;
+          try {
+            const { codes, labels } = qcut(xs, numQ, { duplicates: "drop" });
+            for (let i = 0; i < xs.length; i++) {
+              const v = xs[i] as number;
+              const c = codes[i];
+              if (!Number.isFinite(v)) {
+                expect(c).toBeNull();
+              } else {
+                if (c !== null) {
+                  expect(c).toBeGreaterThanOrEqual(0);
+                  expect(c).toBeLessThan(labels.length);
+                }
+              }
+            }
+          } catch {
+            // Degenerate inputs (all-same) are allowed to throw
+          }
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/stats/format_ops.test.ts b/tests/stats/format_ops.test.ts
new file mode 100644
index 00000000..fee43e8c
--- /dev/null
+++ b/tests/stats/format_ops.test.ts
@@ -0,0 +1,568 @@
+/**
+ * Tests for src/stats/format_ops.ts
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame } from "../../src/core/index.ts";
+import { Series } from "../../src/core/index.ts";
+import {
+  applyDataFrameFormatter,
+  applySeriesFormatter,
+  dataFrameToString,
+  formatCompact,
+  formatCurrency,
+  formatEngineering,
+  formatFloat,
+  formatPercent,
+  formatScientific,
+  formatThousands,
+  makeCurrencyFormatter,
+  makeFloatFormatter,
+  makePercentFormatter,
+  seriesToString,
+} from "../../src/stats/format_ops.ts";
+
+// ─── formatFloat ──────────────────────────────────────────────────────────────
+
+describe("formatFloat", () => {
+  test("default 2 decimal places", () => {
+    expect(formatFloat(3.14159)).toBe("3.14");
+  });
+
+  test("0 decimal places", () => {
+    expect(formatFloat(3.7, 0)).toBe("4");
+  });
+
+  test("4 decimal places", () => {
+    expect(formatFloat(1.23456789, 4)).toBe("1.2346");
+  });
+
+  test("negative number", () => {
+    expect(formatFloat(-2.5, 1)).toBe("-2.5");
+  });
+
+  test("zero", () => {
+    expect(formatFloat(0, 3)).toBe("0.000");
+  });
+
+  test("Infinity", () => {
+    expect(formatFloat(Infinity)).toBe("Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatFloat(Number.NaN)).toBe("NaN");
+  });
+
+  test("property: result is a string", () => {
+    fc.assert(
+      fc.property(fc.double({ noNaN: true, noDefaultInfinity: true }), (n) => {
+        return typeof formatFloat(n) === "string";
+      }),
+    );
+  });
+
+  test("property: contains correct number of decimal places", () => {
+    fc.assert(
+      fc.property(
+        fc.double({ noNaN: true, noDefaultInfinity: true, min: -1e15, max: 1e15 }),
+        fc.integer({ min: 0, max: 6 }),
+        (n, d) => {
+          const result = formatFloat(n, d);
+          if (d === 0) {
+            return !result.includes(".");
+          }
+          const parts = result.split(".");
+          return parts.length === 2 && (parts[1]?.length ?? 0) === d;
+        },
+      ),
+    );
+  });
+});
+
+// ─── formatPercent ────────────────────────────────────────────────────────────
+
+describe("formatPercent", () => {
+  test("basic percentage", () => {
+    expect(formatPercent(0.1234, 1)).toBe("12.3%");
+  });
+
+  test("100%", () => {
+    expect(formatPercent(1.0, 0)).toBe("100%");
+  });
+
+  test("0%", () => {
+    expect(formatPercent(0, 2)).toBe("0.00%");
+  });
+
+  test("negative", () => {
+    expect(formatPercent(-0.05, 1)).toBe("-5.0%");
+  });
+
+  test("Infinity", () => {
+    expect(formatPercent(Infinity)).toBe("Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatPercent(Number.NaN)).toBe("NaN");
+  });
+
+  test("ends with %", () => {
+    fc.assert(
+      fc.property(fc.double({ noNaN: true, noDefaultInfinity: true }), (n) => {
+        return formatPercent(n).endsWith("%");
+      }),
+    );
+  });
+});
+
+// ─── formatScientific ─────────────────────────────────────────────────────────
+
+describe("formatScientific", () => {
+  test("large number", () => {
+    // toExponential(3) → "1.235e+4"
+    expect(formatScientific(12345.678, 3)).toBe("1.235e+4");
+  });
+
+  test("small number", () => {
+    expect(formatScientific(0.00123, 2)).toBe("1.23e-3");
+  });
+
+  test("Infinity", () => {
+    expect(formatScientific(Infinity)).toBe("Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatScientific(Number.NaN)).toBe("NaN");
+  });
+
+  test("property: contains e", () => {
+    fc.assert(
+      fc.property(fc.double({ noNaN: true, noDefaultInfinity: true, min: 1e-100, max: 1e100 }), (n) => {
+        return formatScientific(n).includes("e");
+      }),
+    );
+  });
+});
+
+// ─── formatEngineering ───────────────────────────────────────────────────────
+
+describe("formatEngineering", () => {
+  test("kilo range", () => {
+    const result = formatEngineering(12345.678, 3);
+    expect(result).toBe("12.346e+3");
+  });
+
+  test("mega range", () => {
+    expect(formatEngineering(1_000_000, 1)).toBe("1.0e+6");
+  });
+
+  test("milli range", () => {
+    expect(formatEngineering(0.001, 2)).toBe("1.00e-3");
+  });
+
+  test("zero", () => {
+    expect(formatEngineering(0, 2)).toBe("0.00e+0");
+  });
+
+  test("negative", () => {
+    expect(formatEngineering(-1200, 1)).toBe("-1.2e+3");
+  });
+
+  test("Infinity", () => {
+    expect(formatEngineering(Infinity)).toBe("Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatEngineering(Number.NaN)).toBe("NaN");
+  });
+
+  test("property: exponent is multiple of 3", () => {
+    fc.assert(
+      fc.property(
+        fc.double({ noNaN: true, noDefaultInfinity: true, min: 1e-9, max: 1e9 }),
+        (n) => {
+          if (n === 0) return true;
+          const result = formatEngineering(n);
+          const match = result.match(/e([+-])(\d+)$/);
+          if (!match) return false;
+          const exp = Number(match[2]);
+          return exp % 3 === 0;
+        },
+      ),
+    );
+  });
+});
+
+// ─── formatThousands ─────────────────────────────────────────────────────────
+
+describe("formatThousands", () => {
+  test("basic", () => {
+    expect(formatThousands(1234567.89, 2)).toBe("1,234,567.89");
+  });
+
+  test("no decimals", () => {
+    expect(formatThousands(1000000, 0)).toBe("1,000,000");
+  });
+
+  test("small number", () => {
+    expect(formatThousands(123, 2)).toBe("123.00");
+  });
+
+  test("negative", () => {
+    expect(formatThousands(-1234.5, 1)).toBe("-1,234.5");
+  });
+
+  test("custom separator", () => {
+    expect(formatThousands(1234567, 0, ".")).toBe("1.234.567");
+  });
+
+  test("Infinity", () => {
+    expect(formatThousands(Infinity)).toBe("Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatThousands(Number.NaN)).toBe("NaN");
+  });
+});
+
+// ─── formatCurrency ──────────────────────────────────────────────────────────
+
+describe("formatCurrency", () => {
+  test("dollar", () => {
+    expect(formatCurrency(1234.5)).toBe("$1,234.50");
+  });
+
+  test("euro symbol", () => {
+    expect(formatCurrency(9876.54, "€", 2)).toBe("€9,876.54");
+  });
+
+  test("negative", () => {
+    expect(formatCurrency(-500.0, "$", 2)).toBe("-$500.00");
+  });
+
+  test("zero", () => {
+    expect(formatCurrency(0, "$", 2)).toBe("$0.00");
+  });
+
+  test("Infinity", () => {
+    expect(formatCurrency(Infinity)).toBe("$Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatCurrency(Number.NaN)).toBe("$NaN");
+  });
+});
+
+// ─── formatCompact ────────────────────────────────────────────────────────────
+
+describe("formatCompact", () => {
+  test("trillions", () => {
+    expect(formatCompact(1_234_567_890_000, 2)).toBe("1.23T");
+  });
+
+  test("billions", () => {
+    expect(formatCompact(2_500_000_000, 1)).toBe("2.5B");
+  });
+
+  test("millions", () => {
+    expect(formatCompact(1_234_567, 2)).toBe("1.23M");
+  });
+
+  test("thousands", () => {
+    expect(formatCompact(9876, 1)).toBe("9.9K");
+  });
+
+  test("small number", () => {
+    expect(formatCompact(123.45, 2)).toBe("123.45");
+  });
+
+  test("zero", () => {
+    expect(formatCompact(0, 2)).toBe("0.00");
+  });
+
+  test("negative millions", () => {
+    expect(formatCompact(-2_000_000, 1)).toBe("-2.0M");
+  });
+
+  test("Infinity", () => {
+    expect(formatCompact(Infinity)).toBe("Infinity");
+  });
+
+  test("NaN", () => {
+    expect(formatCompact(Number.NaN)).toBe("NaN");
+  });
+});
+
+// ─── formatter factories ──────────────────────────────────────────────────────
+
+describe("makeFloatFormatter", () => {
+  test("basic usage", () => {
+    const fmt = makeFloatFormatter(3);
+    expect(fmt(3.14159)).toBe("3.142");
+  });
+
+  test("non-numeric value", () => {
+    const fmt = makeFloatFormatter(2);
+    expect(fmt("hello")).toBe("hello");
+  });
+
+  test("null value", () => {
+    const fmt = makeFloatFormatter(2);
+    expect(fmt(null)).toBe("");
+  });
+
+  test("property: numeric values formatted correctly", () => {
+    fc.assert(
+      fc.property(
+        fc.double({ noNaN: true, noDefaultInfinity: true }),
+        fc.integer({ min: 0, max: 4 }),
+        (n, d) => {
+          const fmt = makeFloatFormatter(d);
+          return fmt(n) === formatFloat(n, d);
+        },
+      ),
+    );
+  });
+});
+
+describe("makePercentFormatter", () => {
+  test("basic usage", () => {
+    const fmt = makePercentFormatter(1);
+    expect(fmt(0.1234)).toBe("12.3%");
+  });
+
+  test("non-numeric value", () => {
+    const fmt = makePercentFormatter(2);
+    expect(fmt("N/A")).toBe("N/A");
+  });
+
+  test("null value", () => {
+    const fmt = makePercentFormatter(2);
+    expect(fmt(null)).toBe("");
+  });
+});
+
+describe("makeCurrencyFormatter", () => {
+  test("dollar default", () => {
+    const fmt = makeCurrencyFormatter();
+    expect(fmt(1000)).toBe("$1,000.00");
+  });
+
+  test("euro", () => {
+    const fmt = makeCurrencyFormatter("€", 2);
+    expect(fmt(999.99)).toBe("€999.99");
+  });
+
+  test("non-numeric", () => {
+    const fmt = makeCurrencyFormatter("$");
+    expect(fmt("N/A")).toBe("N/A");
+  });
+
+  test("null value", () => {
+    const fmt = makeCurrencyFormatter("$");
+    expect(fmt(null)).toBe("");
+  });
+});
+
+// ─── applySeriesFormatter ────────────────────────────────────────────────────
+
+describe("applySeriesFormatter", () => {
+  test("applies formatter to each element", () => {
+    const s = new Series<number>({ data: [0.1, 0.2, 0.3] });
+    const result = applySeriesFormatter(s, makePercentFormatter(0));
+    expect(result.values[0]).toBe("10%");
+    expect(result.values[1]).toBe("20%");
+    expect(result.values[2]).toBe("30%");
+  });
+
+  test("preserves index", () => {
+    const s = new Series<number>({ data: [1, 2, 3], index: ["a", "b", "c"] });
+    const result = applySeriesFormatter(s, makeFloatFormatter(1));
+    expect(result.index.at(0)).toBe("a");
+    expect(result.index.at(1)).toBe("b");
+    expect(result.index.at(2)).toBe("c");
+  });
+
+  test("preserves name", () => {
+    const s = new Series<number>({ data: [1.5, 2.5], name: "price" });
+    const result = applySeriesFormatter(s, makeFloatFormatter(2));
+    expect(result.name).toBe("price");
+  });
+
+  test("result dtype is string", () => {
+    const s = new Series<number>({ data: [1, 2, 3] });
+    const result = applySeriesFormatter(s, makeFloatFormatter(2));
+    expect(result.dtype.name).toBe("string");
+  });
+
+  test("empty series", () => {
+    const s = new Series<number>({ data: [] });
+    const result = applySeriesFormatter(s, makeFloatFormatter(2));
+    expect(result.size).toBe(0);
+  });
+
+  test("property: output size matches input size", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ noNaN: true, noDefaultInfinity: true }), { minLength: 1 }),
+        (vals) => {
+          const s = new Series<number>({ data: vals });
+          const result = applySeriesFormatter(s, makeFloatFormatter(2));
+          return result.size === s.size;
+        },
+      ),
+    );
+  });
+});
+
+// ─── applyDataFrameFormatter ─────────────────────────────────────────────────
+
+describe("applyDataFrameFormatter", () => {
+  test("applies formatters to matching columns", () => {
+    const df = DataFrame.fromColumns({
+      price: [10.1, 20.2, 30.3],
+      pct: [0.1, 0.2, 0.3],
+    });
+    const result = applyDataFrameFormatter(df, {
+      price: makeCurrencyFormatter("$", 1),
+      pct: makePercentFormatter(0),
+    });
+    expect(result["price"]).toEqual(["$10.1", "$20.2", "$30.3"]);
+    expect(result["pct"]).toEqual(["10%", "20%", "30%"]);
+  });
+
+  test("uses String for columns without a formatter", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+    const result = applyDataFrameFormatter(df, {});
+    expect(result["a"]).toEqual(["1", "2"]);
+    expect(result["b"]).toEqual(["x", "y"]);
+  });
+
+  test("returns all columns", () => {
+    const df = DataFrame.fromColumns({ x: [1, 2], y: [3, 4], z: [5, 6] });
+    const result = applyDataFrameFormatter(df, { x: makeFloatFormatter(1) });
+    expect(Object.keys(result).sort()).toEqual(["x", "y", "z"]);
+  });
+
+  test("empty dataframe", () => {
+    const df = DataFrame.fromColumns({ a: [], b: [] });
+    const result = applyDataFrameFormatter(df, {});
+    expect(result["a"]).toEqual([]);
+    expect(result["b"]).toEqual([]);
+  });
+});
+
+// ─── seriesToString ──────────────────────────────────────────────────────────
+
+describe("seriesToString", () => {
+  test("basic output", () => {
+    const s = new Series<number>({ data: [1, 2, 3], name: "x" });
+    const out = seriesToString(s);
+    expect(out).toContain("1");
+    expect(out).toContain("2");
+    expect(out).toContain("3");
+    expect(out).toContain("Name: x");
+    expect(out).toContain("dtype:");
+  });
+
+  test("no name when null", () => {
+    const s = new Series<number>({ data: [1, 2] });
+    const out = seriesToString(s);
+    expect(out).not.toContain("Name:");
+    expect(out).toContain("dtype:");
+  });
+
+  test("custom formatter", () => {
+    const s = new Series<number>({ data: [0.1, 0.2] });
+    const out = seriesToString(s, { formatter: makePercentFormatter(0) });
+    expect(out).toContain("10%");
+    expect(out).toContain("20%");
+  });
+
+  test("truncates when maxRows exceeded", () => {
+    const data = Array.from({ length: 100 }, (_, i) => i);
+    const s = new Series<number>({ data });
+    const out = seriesToString(s, { maxRows: 5 });
+    expect(out).toContain("...");
+    expect(out.split("\n").length).toBeLessThan(20);
+  });
+
+  test("empty series", () => {
+    const s = new Series<number>({ data: [] });
+    const out = seriesToString(s);
+    expect(out).toContain("dtype:");
+  });
+
+  test("returns a string", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ noNaN: true, noDefaultInfinity: true }), { minLength: 1 }),
+        (vals) => {
+          const s = new Series<number>({ data: vals });
+          return typeof seriesToString(s) === "string";
+        },
+      ),
+    );
+  });
+});
+
+// ─── dataFrameToString ───────────────────────────────────────────────────────
+
+describe("dataFrameToString", () => {
+  test("basic output contains column names", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+    const out = dataFrameToString(df);
+    expect(out).toContain("a");
+    expect(out).toContain("b");
+    expect(out).toContain("1");
+    expect(out).toContain("x");
+  });
+
+  test("truncates rows with ...", () => {
+    const df = DataFrame.fromColumns({
+      val: Array.from({ length: 100 }, (_, i) => i),
+    });
+    const out = dataFrameToString(df, { maxRows: 10 });
+    expect(out).toContain("...");
+  });
+
+  test("truncates cols with row x col footer", () => {
+    const cols: Record<string, number[]> = {};
+    for (let i = 0; i < 30; i++) {
+      cols[`col${i}`] = [1, 2];
+    }
+    const df = DataFrame.fromColumns(cols);
+    const out = dataFrameToString(df, { maxCols: 5 });
+    expect(out).toContain("rows × 30 columns");
+  });
+
+  test("custom formatters applied", () => {
+    const df = DataFrame.fromColumns({ price: [1.5, 2.5] });
+    const out = dataFrameToString(df, {
+      formatters: { price: makeCurrencyFormatter("$", 2) },
+    });
+    expect(out).toContain("$1.50");
+    expect(out).toContain("$2.50");
+  });
+
+  test("empty dataframe", () => {
+    const df = DataFrame.fromColumns({ a: [], b: [] });
+    const out = dataFrameToString(df);
+    expect(typeof out).toBe("string");
+    expect(out).toContain("a");
+    expect(out).toContain("b");
+  });
+
+  test("returns a string", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ noNaN: true, noDefaultInfinity: true }), { minLength: 1, maxLength: 20 }),
+        (vals) => {
+          const df = DataFrame.fromColumns({ x: vals, y: vals });
+          return typeof dataFrameToString(df) === "string";
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/stats/notna_isna.test.ts b/tests/stats/notna_isna.test.ts
new file mode 100644
index 00000000..22a78b53
--- /dev/null
+++ b/tests/stats/notna_isna.test.ts
@@ -0,0 +1,536 @@
+/**
+ * Tests for src/stats/notna_isna.ts
+ * — isna, notna, isnull, notnull, fillna, dropna, countna, countValid
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+  countValid,
+  countna,
+  dropna,
+  fillna,
+  isna,
+  isnull,
+  notna,
+  notnull,
+} from "../../src/stats/notna_isna.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: readonly Scalar[], name?: string): Series<Scalar> {
+  return new Series({ data: [...data], name: name ?? null });
+}
+
+function sv(series: Series<Scalar>): readonly Scalar[] {
+  return series.values;
+}
+
+// ─── isna — scalar ────────────────────────────────────────────────────────────
+
+describe("isna — scalar", () => {
+  it("returns true for null", () => {
+    expect(isna(null)).toBe(true);
+  });
+
+  it("returns true for undefined", () => {
+    expect(isna(undefined)).toBe(true);
+  });
+
+  it("returns true for NaN", () => {
+    expect(isna(NaN)).toBe(true);
+  });
+
+  it("returns false for 0", () => {
+    expect(isna(0)).toBe(false);
+  });
+
+  it("returns false for false", () => {
+    expect(isna(false)).toBe(false);
+  });
+
+  it("returns false for empty string", () => {
+    expect(isna("")).toBe(false);
+  });
+
+  it("returns false for a regular number", () => {
+    expect(isna(42)).toBe(false);
+  });
+
+  it("returns false for a non-empty string", () => {
+    expect(isna("hello")).toBe(false);
+  });
+
+  it("returns false for a Date", () => {
+    expect(isna(new Date("2024-01-01"))).toBe(false);
+  });
+
+  it("returns false for true", () => {
+    expect(isna(true)).toBe(false);
+  });
+});
+
+// ─── isna — array ─────────────────────────────────────────────────────────────
+
+describe("isna — array", () => {
+  it("maps correctly over mixed array", () => {
+    expect(isna([1, null, NaN, "x", undefined])).toEqual([false, true, true, false, true]);
+  });
+
+  it("returns all-false for array with no missing values", () => {
+    expect(isna([1, 2, 3])).toEqual([false, false, false]);
+  });
+
+  it("returns all-true for array of nulls", () => {
+    expect(isna([null, null, null])).toEqual([true, true, true]);
+  });
+
+  it("handles empty array", () => {
+    expect(isna([])).toEqual([]);
+  });
+});
+
+// ─── isna — Series ────────────────────────────────────────────────────────────
+
+describe("isna — Series", () => {
+  it("returns boolean Series with true at missing positions", () => {
+    const result = isna(s([1, null, NaN, 4]));
+    expect(sv(result)).toEqual([false, true, true, false]);
+  });
+
+  it("returns all-false for complete series", () => {
+    const result = isna(s([10, 20, 30]));
+    expect(sv(result)).toEqual([false, false, false]);
+  });
+
+  it("preserves series name", () => {
+    const result = isna(s([1, null], "x"));
+    expect(result.name).toBe("x");
+  });
+
+  it("handles all-missing series", () => {
+    const result = isna(s([null, undefined, NaN]));
+    expect(sv(result)).toEqual([true, true, true]);
+  });
+});
+
+// ─── isna — DataFrame ─────────────────────────────────────────────────────────
+
+describe("isna — DataFrame", () => {
+  it("returns boolean DataFrame", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([1, null, 3]) as Series<Scalar>],
+        ["b", s([NaN, 5, null]) as Series<Scalar>],
+      ]),
+    );
+    const result = isna(df);
+    expect(sv(result.col("a"))).toEqual([false, true, false]);
+    expect(sv(result.col("b"))).toEqual([true, false, true]);
+  });
+
+  it("returns all-false DataFrame for complete data", () => {
+    const df = new DataFrame(
+      new Map([
+        ["x", s([1, 2]) as Series<Scalar>],
+        ["y", s([3, 4]) as Series<Scalar>],
+      ]),
+    );
+    const result = isna(df);
+    expect(sv(result.col("x"))).toEqual([false, false]);
+    expect(sv(result.col("y"))).toEqual([false, false]);
+  });
+});
+
+// ─── notna — scalar ───────────────────────────────────────────────────────────
+
+describe("notna — scalar", () => {
+  it("returns false for null", () => {
+    expect(notna(null)).toBe(false);
+  });
+
+  it("returns false for undefined", () => {
+    expect(notna(undefined)).toBe(false);
+  });
+
+  it("returns false for NaN", () => {
+    expect(notna(NaN)).toBe(false);
+  });
+
+  it("returns true for 0", () => {
+    expect(notna(0)).toBe(true);
+  });
+
+  it("returns true for false", () => {
+    expect(notna(false)).toBe(true);
+  });
+
+  it("returns true for empty string", () => {
+    expect(notna("")).toBe(true);
+  });
+
+  it("returns true for regular number", () => {
+    expect(notna(42)).toBe(true);
+  });
+});
+
+// ─── notna — array ────────────────────────────────────────────────────────────
+
+describe("notna — array", () => {
+  it("is the inverse of isna for arrays", () => {
+    const arr: Scalar[] = [1, null, NaN, "x", undefined];
+    const naFlags = isna(arr);
+    const notnaFlags = notna(arr);
+    for (let i = 0; i < arr.length; i++) {
+      expect(notnaFlags[i]).toBe(!naFlags[i]);
+    }
+  });
+});
+
+// ─── notna — Series ───────────────────────────────────────────────────────────
+
+describe("notna — Series", () => {
+  it("is the inverse of isna for Series", () => {
+    const series = s([1, null, NaN, 4]);
+    const naResult = isna(series);
+    const notnaResult = notna(series);
+    for (let i = 0; i < series.values.length; i++) {
+      expect(notnaResult.iat(i)).toBe(!naResult.iat(i));
+    }
+  });
+});
+
+// ─── isnull / notnull aliases ─────────────────────────────────────────────────
+
+describe("isnull / notnull — aliases", () => {
+  it("isnull(scalar) matches isna(scalar)", () => {
+    expect(isnull(null)).toBe(isna(null));
+    expect(isnull(1)).toBe(isna(1));
+    expect(isnull(NaN)).toBe(isna(NaN));
+  });
+
+  it("notnull(scalar) matches notna(scalar)", () => {
+    expect(notnull(null)).toBe(notna(null));
+    expect(notnull(42)).toBe(notna(42));
+  });
+
+  it("isnull(array) matches isna(array)", () => {
+    const arr: Scalar[] = [1, null, NaN];
+    expect(isnull(arr)).toEqual(isna(arr));
+  });
+
+  it("isnull(Series) values match isna(Series) values", () => {
+    const series = s([1, null, 3]);
+    expect(sv(isnull(series))).toEqual(sv(isna(series)));
+  });
+
+  it("isnull(DataFrame) matches isna(DataFrame) column values", () => {
+    const df = new DataFrame(
+      new Map([["a", s([null, 1, NaN]) as Series<Scalar>]]),
+    );
+    const r1 = isnull(df);
+    const r2 = isna(df);
+    expect(sv(r1.col("a"))).toEqual(sv(r2.col("a")));
+  });
+});
+
+// ─── fillna — scalar ──────────────────────────────────────────────────────────
+
+describe("fillna — scalar", () => {
+  it("fills null with value", () => {
+    expect(fillna(null, { value: 0 })).toBe(0);
+  });
+
+  it("fills undefined with value", () => {
+    expect(fillna(undefined, { value: -1 })).toBe(-1);
+  });
+
+  it("fills NaN with value", () => {
+    expect(fillna(NaN, { value: 99 })).toBe(99);
+  });
+
+  it("leaves non-missing scalar unchanged", () => {
+    expect(fillna(42, { value: 0 })).toBe(42);
+  });
+
+  it("leaves zero unchanged", () => {
+    expect(fillna(0, { value: -1 })).toBe(0);
+  });
+});
+
+// ─── fillna — array ───────────────────────────────────────────────────────────
+
+describe("fillna — array", () => {
+  it("replaces null and NaN with value, keeps rest", () => {
+    expect(fillna([1, null, NaN, 3], { value: 0 })).toEqual([1, 0, 0, 3]);
+  });
+
+  it("replaces undefined", () => {
+    expect(fillna([undefined, 5, null], { value: -1 })).toEqual([-1, 5, -1]);
+  });
+
+  it("handles empty array", () => {
+    expect(fillna([], { value: 0 })).toEqual([]);
+  });
+
+  it("leaves complete array unchanged", () => {
+    expect(fillna([1, 2, 3], { value: 0 })).toEqual([1, 2, 3]);
+  });
+});
+
+// ─── fillna — Series ──────────────────────────────────────────────────────────
+
+describe("fillna — Series", () => {
+  it("replaces missing values in a Series", () => {
+    const result = fillna(s([1, null, NaN, 4]), { value: 0 });
+    expect(sv(result as Series<Scalar>)).toEqual([1, 0, 0, 4]);
+  });
+
+  it("leaves complete Series unchanged", () => {
+    const result = fillna(s([10, 20]), { value: -1 });
+    expect(sv(result as Series<Scalar>)).toEqual([10, 20]);
+  });
+});
+
+// ─── fillna — DataFrame ───────────────────────────────────────────────────────
+
+describe("fillna — DataFrame", () => {
+  it("fills all missing cells with value", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([1, null]) as Series<Scalar>],
+        ["b", s([NaN, 5]) as Series<Scalar>],
+      ]),
+    );
+    const result = fillna(df, { value: 0 }) as DataFrame;
+    expect(sv(result.col("a"))).toEqual([1, 0]);
+    expect(sv(result.col("b"))).toEqual([0, 5]);
+  });
+});
+
+// ─── dropna — array ───────────────────────────────────────────────────────────
+
+describe("dropna — array", () => {
+  it("removes null, undefined, and NaN", () => {
+    expect(dropna([1, null, NaN, 3, undefined, 5])).toEqual([1, 3, 5]);
+  });
+
+  it("returns empty array when all missing", () => {
+    expect(dropna([null, NaN, undefined])).toEqual([]);
+  });
+
+  it("returns unchanged array when none missing", () => {
+    expect(dropna([1, 2, 3])).toEqual([1, 2, 3]);
+  });
+
+  it("handles empty array", () => {
+    expect(dropna([])).toEqual([]);
+  });
+});
+
+// ─── dropna — Series ──────────────────────────────────────────────────────────
+
+describe("dropna — Series", () => {
+  it("drops missing values from a Series", () => {
+    const result = dropna(s([1, null, NaN, 4])) as Series<Scalar>;
+    expect(sv(result)).toEqual([1, 4]);
+  });
+
+  it("returns complete Series unchanged", () => {
+    const result = dropna(s([10, 20, 30])) as Series<Scalar>;
+    expect(sv(result)).toEqual([10, 20, 30]);
+  });
+
+  it("returns empty Series when all missing", () => {
+    const result = dropna(s([null, NaN])) as Series<Scalar>;
+    expect(sv(result)).toEqual([]);
+  });
+});
+
+// ─── dropna — DataFrame (axis=0, how="any") ───────────────────────────────────
+
+describe("dropna — DataFrame axis=0 how=any", () => {
+  it("drops rows with any missing value", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([1, null, 3]) as Series<Scalar>],
+        ["b", s([4, 5, 6]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df) as DataFrame;
+    expect(result.shape[0]).toBe(2);
+    expect(sv(result.col("a"))).toEqual([1, 3]);
+    expect(sv(result.col("b"))).toEqual([4, 6]);
+  });
+
+  it("keeps all rows when no missing values", () => {
+    const df = new DataFrame(
+      new Map([
+        ["x", s([1, 2, 3]) as Series<Scalar>],
+        ["y", s([4, 5, 6]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df) as DataFrame;
+    expect(result.shape[0]).toBe(3);
+  });
+
+  it("drops all rows when every row has a missing value", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([null, null]) as Series<Scalar>],
+        ["b", s([1, 2]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df) as DataFrame;
+    expect(result.shape[0]).toBe(0);
+  });
+});
+
+// ─── dropna — DataFrame (axis=0, how="all") ───────────────────────────────────
+
+describe("dropna — DataFrame axis=0 how=all", () => {
+  it("keeps rows with at least one non-missing value", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([1, null, null]) as Series<Scalar>],
+        ["b", s([4, null, 6]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df, { how: "all" }) as DataFrame;
+    expect(result.shape[0]).toBe(2);
+    expect(sv(result.col("a"))).toEqual([1, null]);
+    expect(sv(result.col("b"))).toEqual([4, 6]);
+  });
+
+  it("drops rows only when all values are missing", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([null, null]) as Series<Scalar>],
+        ["b", s([null, 2]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df, { how: "all" }) as DataFrame;
+    expect(result.shape[0]).toBe(1);
+    expect(sv(result.col("b"))).toEqual([2]);
+  });
+});
+
+// ─── dropna — DataFrame (axis=1) ─────────────────────────────────────────────
+
+describe("dropna — DataFrame axis=1", () => {
+  it("drops columns with any missing value", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([1, 2, 3]) as Series<Scalar>],
+        ["b", s([4, null, 6]) as Series<Scalar>],
+        ["c", s([7, 8, 9]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df, { axis: 1 }) as DataFrame;
+    expect(result.columns.values).toEqual(["a", "c"]);
+  });
+
+  it("keeps all columns when no missing values", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([1, 2]) as Series<Scalar>],
+        ["b", s([3, 4]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df, { axis: 1 }) as DataFrame;
+    expect(result.columns.values).toEqual(["a", "b"]);
+  });
+
+  it("drops columns only when all values missing (how=all)", () => {
+    const df = new DataFrame(
+      new Map([
+        ["a", s([null, null]) as Series<Scalar>],
+        ["b", s([null, 1]) as Series<Scalar>],
+      ]),
+    );
+    const result = dropna(df, { axis: 1, how: "all" }) as DataFrame;
+    expect(result.columns.values).toEqual(["b"]);
+  });
+});
+
+// ─── countna / countValid ─────────────────────────────────────────────────────
+
+describe("countna", () => {
+  it("counts nulls, undefineds, NaNs in array", () => {
+    expect(countna([1, null, NaN, 3, undefined])).toBe(3);
+  });
+
+  it("returns 0 for complete array", () => {
+    expect(countna([1, 2, 3])).toBe(0);
+  });
+
+  it("counts missing values in Series", () => {
+    expect(countna(s([null, 1, NaN]))).toBe(2);
+  });
+
+  it("returns 0 for empty array", () => {
+    expect(countna([])).toBe(0);
+  });
+});
+
+describe("countValid", () => {
+  it("counts non-missing values in array", () => {
+    expect(countValid([1, null, NaN, 3, undefined])).toBe(2);
+  });
+
+  it("returns full length for complete array", () => {
+    expect(countValid([1, 2, 3])).toBe(3);
+  });
+
+  it("counts valid values in Series", () => {
+    expect(countValid(s([null, 1, NaN, 4]))).toBe(2);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+const scalarsArb = fc.array(
+  fc.oneof(
+    fc.integer(),
+    fc.double({ noNaN: false }),
+    fc.string(),
+    fc.boolean(),
+    fc.constant(null),
+    fc.constant(undefined),
+  ) as fc.Arbitrary<Scalar>,
+  { minLength: 0, maxLength: 20 },
+);
+
+describe("property: isna/notna are exact inverses (array)", () => {
+  it("notna[i] === !isna[i] for all arrays", () => {
+    fc.assert(
+      fc.property(scalarsArb, (arr) => {
+        const na = isna(arr);
+        const nna = notna(arr);
+        return na.every((v, i) => v === !nna[i]);
+      }),
+    );
+  });
+});
+
+describe("property: fillna removes all missing values (array)", () => {
+  it("fillna([...], {value: 0}) has no missing values", () => {
+    fc.assert(
+      fc.property(scalarsArb, (arr) => {
+        const filled = fillna(arr, { value: 0 });
+        return !filled.some((v) => v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)));
+      }),
+    );
+  });
+});
+
+describe("property: countna + countValid === length (array)", () => {
+  it("countna + countValid equals array length", () => {
+    fc.assert(
+      fc.property(scalarsArb, (arr) => {
+        return countna(arr) + countValid(arr) === arr.length;
+      }),
+    );
+  });
+});
diff --git a/tests/stats/numeric_extended.test.ts b/tests/stats/numeric_extended.test.ts
new file mode 100644
index 00000000..101ed90b
--- /dev/null
+++ b/tests/stats/numeric_extended.test.ts
@@ -0,0 +1,509 @@
+/**
+ * Tests for src/stats/numeric_extended.ts
+ * — digitize, histogram, linspace, arange, percentileOfScore,
+ *   zscore, minMaxNormalize, coefficientOfVariation, seriesDigitize
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+  arange,
+  coefficientOfVariation,
+  digitize,
+  histogram,
+  linspace,
+  minMaxNormalize,
+  percentileOfScore,
+  seriesDigitize,
+  zscore,
+} from "../../src/stats/numeric_extended.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function ns(data: (number | null)[]): Series<Scalar> {
+  return new Series({ data: data as Scalar[] });
+}
+
+function vals(s: Series<Scalar>): Scalar[] {
+  return [...s.toArray()] as Scalar[];
+}
+
+// ─── digitize ────────────────────────────────────────────────────────────────
+
+describe("digitize", () => {
+  it("maps values into correct bins (right=false)", () => {
+    expect(digitize([0.5, 1.5, 2.5, 3.5], [1, 2, 3])).toEqual([-1, 0, 1, 2]);
+  });
+
+  it("maps values into correct bins (right=true)", () => {
+    // right=true: bins[i-1] < v <= bins[i]
+    expect(digitize([1, 2, 3], [1, 2, 3], true)).toEqual([0, 1, 2]);
+  });
+
+  it("value exactly at left edge (right=false) → same bin", () => {
+    // 1 is at bins[0], so 1 < 1 is false; 1 < 2 is true → index 0
+    expect(digitize([1], [1, 2, 3])).toEqual([0]);
+  });
+
+  it("value exactly at right boundary (right=true) → last bin", () => {
+    expect(digitize([3], [1, 2, 3], true)).toEqual([2]);
+  });
+
+  it("value below first edge → -1", () => {
+    expect(digitize([0], [1, 2, 3])).toEqual([-1]);
+  });
+
+  it("value above last edge → bins.length - 1", () => {
+    expect(digitize([10], [1, 2, 3])).toEqual([2]);
+  });
+
+  it("null maps to NaN", () => {
+    const result = digitize([null, 1.5], [1, 2]);
+    expect(Number.isNaN(result[0])).toBe(true);
+    expect(result[1]).toBe(0);
+  });
+
+  it("NaN maps to NaN", () => {
+    const result = digitize([Number.NaN, 1.5], [1, 2]);
+    expect(Number.isNaN(result[0])).toBe(true);
+  });
+
+  it("single-edge bins", () => {
+    expect(digitize([0, 1, 2], [1])).toEqual([-1, 0, 0]);
+  });
+
+  it("throws on empty bins", () => {
+    expect(() => digitize([1], [])).toThrow(RangeError);
+  });
+
+  it("empty values → empty result", () => {
+    expect(digitize([], [1, 2])).toEqual([]);
+  });
+});
+
+// ─── histogram ───────────────────────────────────────────────────────────────
+
+describe("histogram", () => {
+  it("produces correct counts with default 10 bins", () => {
+    const { counts, binEdges } = histogram([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+    expect(counts.length).toBe(10);
+    expect(binEdges.length).toBe(11);
+    const total = counts.reduce((a, b) => a + b, 0);
+    expect(total).toBe(10);
+  });
+
+  it("2-bin example", () => {
+    const { counts } = histogram([1, 2, 3, 4, 5], { bins: 2 });
+    expect(counts.length).toBe(2);
+    expect(counts[0]! + counts[1]!).toBe(5);
+  });
+
+  it("right-most value lands in last bin", () => {
+    const { counts } = histogram([0, 5, 10], { bins: 2, range: [0, 10] });
+    expect(counts[1]).toBe(2); // 5 and 10
+  });
+
+  it("explicit binEdges override bins", () => {
+    const { counts, binEdges } = histogram([1, 2, 3, 4, 5], {
+      binEdges: [1, 3, 5],
+    });
+    expect(binEdges).toEqual([1, 3, 5]);
+    expect(counts.length).toBe(2);
+    expect(counts[0]! + counts[1]!).toBe(5);
+  });
+
+  it("density normalisation: integral ≈ 1", () => {
+    const { counts, binEdges } = histogram([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], {
+      density: true,
+    });
+    let integral = 0;
+    for (let i = 0; i < counts.length; i++) {
+      integral += (counts[i] as number) * ((binEdges[i + 1] as number) - (binEdges[i] as number));
+    }
+    expect(Math.abs(integral - 1)).toBeLessThan(1e-10);
+  });
+
+  it("values outside range are ignored", () => {
+    const { counts } = histogram([0, 1, 5, 10, 99], { range: [1, 10] });
+    expect(counts.reduce((a, b) => a + b, 0)).toBe(3);
+  });
+
+  it("NaN values are ignored", () => {
+    const { counts } = histogram([1, 2, Number.NaN, 3, null as unknown as number]);
+    expect(counts.reduce((a, b) => a + b, 0)).toBe(3);
+  });
+
+  it("degenerate range (all same value): still produces bins", () => {
+    const { counts, binEdges } = histogram([5, 5, 5]);
+    expect(binEdges[0]).toBe(4.5);
+    expect(binEdges[binEdges.length - 1]).toBe(5.5);
+    expect(counts.reduce((a, b) => a + b, 0)).toBe(3);
+  });
+
+  it("empty values → zero counts", () => {
+    const { counts } = histogram([], { bins: 5 });
+    expect(counts.every((c) => c === 0)).toBe(true);
+  });
+
+  it("throws when bins < 1", () => {
+    expect(() => histogram([1, 2], { bins: 0 })).toThrow(RangeError);
+  });
+
+  it("throws when binEdges has fewer than 2 entries", () => {
+    expect(() => histogram([1, 2], { binEdges: [1] })).toThrow(RangeError);
+  });
+});
+
+// ─── linspace ────────────────────────────────────────────────────────────────
+
+describe("linspace", () => {
+  it("default 50 points", () => {
+    const r = linspace(0, 1);
+    expect(r.length).toBe(50);
+    expect(r[0]).toBe(0);
+    expect(r[49]).toBe(1);
+  });
+
+  it("5 evenly-spaced points", () => {
+    expect(linspace(0, 1, 5)).toEqual([0, 0.25, 0.5, 0.75, 1]);
+  });
+
+  it("num=1 returns only start", () => {
+    expect(linspace(3, 7, 1)).toEqual([3]);
+  });
+
+  it("num=0 returns empty array", () => {
+    expect(linspace(0, 1, 0)).toEqual([]);
+  });
+
+  it("negative range", () => {
+    const r = linspace(1, -1, 3);
+    expect(r).toEqual([1, 0, -1]);
+  });
+
+  it("throws on negative num", () => {
+    expect(() => linspace(0, 1, -1)).toThrow(RangeError);
+  });
+
+  it("last element is exactly stop (no floating-point drift)", () => {
+    const r = linspace(0, 10, 100);
+    expect(r[r.length - 1]).toBe(10);
+  });
+});
+
+// ─── arange ──────────────────────────────────────────────────────────────────
+
+describe("arange", () => {
+  it("single argument: 0..stop-1", () => {
+    expect(arange(5)).toEqual([0, 1, 2, 3, 4]);
+  });
+
+  it("two arguments: start..stop-1", () => {
+    expect(arange(2, 6)).toEqual([2, 3, 4, 5]);
+  });
+
+  it("three arguments: custom step", () => {
+    expect(arange(0, 1, 0.25)).toEqual([0, 0.25, 0.5, 0.75]);
+  });
+
+  it("negative step: descending", () => {
+    expect(arange(5, 0, -1)).toEqual([5, 4, 3, 2, 1]);
+  });
+
+  it("stop <= start with positive step → empty", () => {
+    expect(arange(5, 3)).toEqual([]);
+  });
+
+  it("stop >= start with negative step → empty", () => {
+    expect(arange(1, 5, -1)).toEqual([]);
+  });
+
+  it("stop == 0 → empty", () => {
+    expect(arange(0)).toEqual([]);
+  });
+
+  it("throws on step=0", () => {
+    expect(() => arange(0, 5, 0)).toThrow(RangeError);
+  });
+
+  it("large step: fewer elements", () => {
+    expect(arange(0, 10, 3)).toEqual([0, 3, 6, 9]);
+  });
+});
+
+// ─── percentileOfScore ───────────────────────────────────────────────────────
+
+describe("percentileOfScore", () => {
+  const arr = [1, 2, 3, 4, 5];
+
+  it("rank (default): average of weak and strict", () => {
+    expect(percentileOfScore(arr, 3)).toBe(50); // weak=60, strict=40 → 50
+  });
+
+  it("weak: proportion of values <= score", () => {
+    expect(percentileOfScore(arr, 3, "weak")).toBe(60);
+  });
+
+  it("strict: proportion of values < score", () => {
+    expect(percentileOfScore(arr, 3, "strict")).toBe(40);
+  });
+
+  it("mean: same as rank", () => {
+    expect(percentileOfScore(arr, 3, "mean")).toBe(50);
+  });
+
+  it("score below all values → strict=0", () => {
+    expect(percentileOfScore(arr, 0, "strict")).toBe(0);
+  });
+
+  it("score above all values → weak=100", () => {
+    expect(percentileOfScore(arr, 10, "weak")).toBe(100);
+  });
+
+  it("empty array → NaN", () => {
+    expect(Number.isNaN(percentileOfScore([], 5))).toBe(true);
+  });
+
+  it("NaN values are ignored", () => {
+    const arr2 = [1, 2, Number.NaN, 3, 4, 5];
+    expect(percentileOfScore(arr2, 3, "weak")).toBe(60);
+  });
+
+  it("duplicate values", () => {
+    expect(percentileOfScore([1, 1, 1, 2], 1, "weak")).toBe(75);
+  });
+});
+
+// ─── zscore ──────────────────────────────────────────────────────────────────
+
+describe("zscore", () => {
+  it("standardises to mean≈0 and std≈1", () => {
+    const s = ns([2, 4, 4, 4, 5, 5, 7, 9]);
+    const z = vals(zscore(s)).filter((v) => typeof v === "number") as number[];
+    const mean = z.reduce((a, b) => a + b, 0) / z.length;
+    const variance = z.reduce((a, b) => a + (b - mean) ** 2, 0) / (z.length - 1);
+    expect(Math.abs(mean)).toBeLessThan(1e-10);
+    expect(Math.abs(variance - 1)).toBeLessThan(1e-10);
+  });
+
+  it("propagates null unchanged", () => {
+    const s = ns([1, null, 3]);
+    const result = vals(zscore(s));
+    expect(result[1]).toBeNull();
+  });
+
+  it("zero std → all NaN", () => {
+    const s = ns([5, 5, 5]);
+    const result = vals(zscore(s)).filter((v) => typeof v === "number") as number[];
+    expect(result.every(Number.isNaN)).toBe(true);
+  });
+
+  it("fewer than 2 valid values → all NaN", () => {
+    const s = ns([1]);
+    const result = vals(zscore(s));
+    expect(Number.isNaN(result[0])).toBe(true);
+  });
+
+  it("ddof=0 uses population std", () => {
+    const s = ns([1, 2, 3, 4, 5]);
+    const z0 = vals(zscore(s, { ddof: 0 })).filter((v): v is number => typeof v === "number");
+    const z1 = vals(zscore(s, { ddof: 1 })).filter((v): v is number => typeof v === "number");
+    // ddof=0 produces smaller values (divided by larger std denominator)
+    const range0 = Math.max(...z0) - Math.min(...z0);
+    const range1 = Math.max(...z1) - Math.min(...z1);
+    expect(range0).toBeLessThan(range1);
+  });
+
+  it("preserves index labels", () => {
+    const s = new Series({ data: [1, 2, 3] as Scalar[], index: ["a", "b", "c"] });
+    const z = zscore(s);
+    expect(z.index.at(0)).toBe("a");
+    expect(z.index.at(2)).toBe("c");
+  });
+});
+
+// ─── minMaxNormalize ─────────────────────────────────────────────────────────
+
+describe("minMaxNormalize", () => {
+  it("default range [0, 1]", () => {
+    const s = ns([0, 5, 10]);
+    const result = vals(minMaxNormalize(s));
+    expect(result).toEqual([0, 0.5, 1]);
+  });
+
+  it("custom range", () => {
+    const s = ns([0, 5, 10]);
+    const result = vals(minMaxNormalize(s, { featureRangeMin: -1, featureRangeMax: 1 }));
+    expect(result).toEqual([-1, 0, 1]);
+  });
+
+  it("propagates null", () => {
+    const s = ns([0, null, 10]);
+    const result = vals(minMaxNormalize(s));
+    expect(result[1]).toBeNull();
+    expect(result[0]).toBe(0);
+    expect(result[2]).toBe(1);
+  });
+
+  it("all same values → midpoint of range", () => {
+    const s = ns([5, 5, 5]);
+    const result = vals(minMaxNormalize(s)) as number[];
+    expect(result.every((v) => v === 0.5)).toBe(true);
+  });
+
+  it("single value → midpoint", () => {
+    const s = ns([7]);
+    const result = vals(minMaxNormalize(s)) as number[];
+    expect(result[0]).toBe(0.5);
+  });
+
+  it("empty Series → all NaN", () => {
+    const s = ns([]);
+    const result = vals(minMaxNormalize(s));
+    expect(result).toEqual([]);
+  });
+
+  it("throws when rangeMin >= rangeMax", () => {
+    expect(() => minMaxNormalize(ns([1, 2]), { featureRangeMin: 1, featureRangeMax: 0 })).toThrow(
+      RangeError,
+    );
+  });
+
+  it("preserves index", () => {
+    const s = new Series({ data: [0, 10] as Scalar[], index: ["x", "y"] });
+    const n = minMaxNormalize(s);
+    expect(n.index.at(0)).toBe("x");
+    expect(n.index.at(1)).toBe("y");
+  });
+});
+
+// ─── coefficientOfVariation ───────────────────────────────────────────────────
+
+describe("coefficientOfVariation", () => {
+  it("known dataset: [10, 20, 30] CV ≈ 0.5", () => {
+    const s = ns([10, 20, 30]);
+    const cv = coefficientOfVariation(s);
+    expect(Math.abs(cv - 0.5)).toBeLessThan(1e-10);
+  });
+
+  it("mean = 0 → NaN", () => {
+    const s = ns([-1, 0, 1]);
+    expect(Number.isNaN(coefficientOfVariation(s))).toBe(true);
+  });
+
+  it("fewer than 2 values → NaN", () => {
+    expect(Number.isNaN(coefficientOfVariation(ns([5])))).toBe(true);
+    expect(Number.isNaN(coefficientOfVariation(ns([])))).toBe(true);
+  });
+
+  it("ignores null/NaN", () => {
+    const s = ns([10, null, 20, 30]);
+    const cv = coefficientOfVariation(s);
+    expect(Math.abs(cv - 0.5)).toBeLessThan(1e-10);
+  });
+
+  it("ddof=0 uses population std", () => {
+    const s = ns([2, 4, 4, 4, 5, 5, 7, 9]);
+    const cv0 = coefficientOfVariation(s, { ddof: 0 });
+    const cv1 = coefficientOfVariation(s, { ddof: 1 });
+    expect(cv0).toBeLessThan(cv1);
+  });
+
+  it("CV > 0 for data with spread", () => {
+    expect(coefficientOfVariation(ns([1, 2, 3, 4, 5]))).toBeGreaterThan(0);
+  });
+});
+
+// ─── seriesDigitize ──────────────────────────────────────────────────────────
+
+describe("seriesDigitize", () => {
+  it("returns numeric Series with same index", () => {
+    const s = new Series({ data: [0.5, 1.5, 2.5] as Scalar[], index: ["a", "b", "c"] });
+    const result = seriesDigitize(s, [1, 2]);
+    expect(result.index.at(0)).toBe("a");
+    expect(result.index.at(2)).toBe("c");
+    expect([...result.toArray()]).toEqual([-1, 0, 1]);
+  });
+
+  it("preserves name", () => {
+    const s = new Series({ data: [1, 2] as Scalar[], name: "myCol" });
+    expect(seriesDigitize(s, [1, 2]).name).toBe("myCol");
+  });
+});
+
+// ─── property tests ──────────────────────────────────────────────────────────
+
+describe("property: histogram counts match input count", () => {
+  it("sum of counts equals number of in-range finite values", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.float({ noNaN: true, noDefaultInfinity: true, min: 0, max: 100 }), {
+          minLength: 1,
+          maxLength: 100,
+        }),
+        fc.integer({ min: 1, max: 20 }),
+        (values, bins) => {
+          const { counts } = histogram(values, { bins });
+          const total = counts.reduce((a, b) => a + b, 0);
+          expect(total).toBe(values.length);
+        },
+      ),
+    );
+  });
+});
+
+describe("property: linspace endpoints", () => {
+  it("first element is start, last element is stop", () => {
+    fc.assert(
+      fc.property(
+        fc.float({ noNaN: true, noDefaultInfinity: true, min: -100, max: 0 }),
+        fc.float({ noNaN: true, noDefaultInfinity: true, min: 1, max: 100 }),
+        fc.integer({ min: 2, max: 200 }),
+        (start, stop, num) => {
+          const r = linspace(start, stop, num);
+          expect(r.length).toBe(num);
+          expect(r[0]).toBe(start);
+          expect(r[r.length - 1]).toBe(stop);
+        },
+      ),
+    );
+  });
+});
+
+describe("property: arange length matches formula", () => {
+  it("length = ceil((stop - start) / step)", () => {
+    fc.assert(
+      fc.property(
+        fc.integer({ min: 0, max: 50 }),
+        fc.integer({ min: 51, max: 200 }),
+        fc.integer({ min: 1, max: 20 }),
+        (start, stop, step) => {
+          const r = arange(start, stop, step);
+          const expected = Math.ceil((stop - start) / step);
+          expect(r.length).toBe(expected);
+        },
+      ),
+    );
+  });
+});
+
+describe("property: zscore mean is approximately 0", () => {
+  it("mean of z-scores is ≈ 0 for finite data with variance", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+          minLength: 3,
+          maxLength: 50,
+        }),
+        (data) => {
+          const s = new Series({ data: data as Scalar[] });
+          const z = vals(zscore(s)).filter((v): v is number => typeof v === "number" && !Number.isNaN(v));
+          if (z.length < 2) return;
+          const mean = z.reduce((a, b) => a + b, 0) / z.length;
+          expect(Math.abs(mean)).toBeLessThan(1e-8);
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/stats/rank.test.ts b/tests/stats/rank.test.ts
index 2ef38b87..d2b538ee 100644
--- a/tests/stats/rank.test.ts
+++ b/tests/stats/rank.test.ts
@@ -4,7 +4,7 @@
 import { describe, expect, it } from "bun:test";
 import fc from "fast-check";
 import { DataFrame, Series, rankDataFrame, rankSeries } from "../../src/index.ts";
-import type { Label, Scalar } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
 
 // ─── helpers ─────────────────────────────────────────────────────────────────
 
diff --git a/tests/stats/string_ops.test.ts b/tests/stats/string_ops.test.ts
new file mode 100644
index 00000000..f9e9a0b4
--- /dev/null
+++ b/tests/stats/string_ops.test.ts
@@ -0,0 +1,459 @@
+/**
+ * Tests for src/stats/string_ops.ts
+ * — strNormalize, strGetDummies, strExtractAll, strRemovePrefix,
+ *   strRemoveSuffix, strTranslate, strCharWidth, strByteLength
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+  strByteLength,
+  strCharWidth,
+  strExtractAll,
+  strGetDummies,
+  strNormalize,
+  strRemovePrefix,
+  strRemoveSuffix,
+  strTranslate,
+} from "../../src/stats/string_ops.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: string[]): Series<Scalar> {
+  return new Series({ data: data as Scalar[] });
+}
+
+function vals(series: Series<Scalar>): Scalar[] {
+  return [...series.toArray()] as Scalar[];
+}
+
+// ─── strNormalize ─────────────────────────────────────────────────────────────
+
+describe("strNormalize", () => {
+  it("scalar — NFC is identity for already-NFC string", () => {
+    expect(strNormalize("hello", "NFC")).toBe("hello");
+  });
+
+  it("scalar — NFC composes decomposed é", () => {
+    // \u0065\u0301 is 'e' + combining acute accent → NFC = \u00e9
+    const decomposed = "\u0065\u0301";
+    expect(strNormalize(decomposed, "NFC")).toBe("\u00e9");
+  });
+
+  it("scalar — NFD decomposes precomposed é", () => {
+    const precomposed = "\u00e9";
+    const nfd = strNormalize(precomposed, "NFD");
+    expect(nfd).toBe("\u0065\u0301");
+  });
+
+  it("scalar — NFKC decomposes ligature fi", () => {
+    // ﬁ U+FB01 → fi under NFKC
+    expect(strNormalize("\uFB01", "NFKC")).toBe("fi");
+  });
+
+  it("array — NFC composes each element", () => {
+    const out = vals(strNormalize(["\u0065\u0301", "\u0061\u0301"], "NFC"));
+    expect(out[0]).toBe("\u00e9");
+    expect(out[1]).toBe("\u00e1");
+  });
+
+  it("Series — preserves index", () => {
+    const ser = new Series({ data: ["\u0065\u0301", "hello"] as Scalar[], index: ["x", "y"] });
+    const out = strNormalize(ser, "NFC");
+    expect(out.index.values[0]).toBe("x");
+    expect(out.index.values[1]).toBe("y");
+    expect(out.values[0]).toBe("\u00e9");
+    expect(out.values[1]).toBe("hello");
+  });
+
+  it("default form is NFC", () => {
+    const result = strNormalize(s(["\u0065\u0301"]));
+    expect(vals(result)[0]).toBe("\u00e9");
+  });
+
+  it("NFKD decomposes and also applies compatibility mappings", () => {
+    // ℌ (U+210C, script H) → NFKD → H
+    expect(strNormalize("\u210C", "NFKD")).toBe("H");
+  });
+});
+
+// ─── strGetDummies ────────────────────────────────────────────────────────────
+
+describe("strGetDummies", () => {
+  it("basic | separator", () => {
+    const df = strGetDummies(s(["a|b", "b|c", "a"]));
+    expect(df.shape[0]).toBe(3);
+    expect(df.columns.values.sort()).toEqual(["a", "b", "c"]);
+    expect(df.col("a").values[0]).toBe(1);
+    expect(df.col("a").values[1]).toBe(0);
+    expect(df.col("a").values[2]).toBe(1);
+    expect(df.col("b").values[0]).toBe(1);
+    expect(df.col("b").values[1]).toBe(1);
+    expect(df.col("b").values[2]).toBe(0);
+    expect(df.col("c").values[0]).toBe(0);
+    expect(df.col("c").values[1]).toBe(1);
+    expect(df.col("c").values[2]).toBe(0);
+  });
+
+  it("custom separator", () => {
+    const df = strGetDummies(s(["a,b", "b,c"]), { sep: "," });
+    expect(df.columns.values.sort()).toEqual(["a", "b", "c"]);
+  });
+
+  it("prefix option", () => {
+    const df = strGetDummies(s(["x|y"]), { prefix: "tag", prefixSep: "-" });
+    expect(df.columns.values.sort()).toEqual(["tag-x", "tag-y"]);
+  });
+
+  it("empty string element maps to no tokens", () => {
+    const df = strGetDummies(s(["a|b", ""]));
+    expect(df.col("a").values[1]).toBe(0);
+    expect(df.col("b").values[1]).toBe(0);
+  });
+
+  it("single-token element", () => {
+    const df = strGetDummies(s(["a", "b", "a"]));
+    expect(df.shape[0]).toBe(3);
+    expect(df.col("a").values[0]).toBe(1);
+    expect(df.col("a").values[1]).toBe(0);
+    expect(df.col("a").values[2]).toBe(1);
+  });
+
+  it("all same token → single column of ones", () => {
+    const df = strGetDummies(s(["x", "x", "x"]));
+    expect(df.shape[1]).toBe(1);
+    expect([...df.col("x").values]).toEqual([1, 1, 1]);
+  });
+
+  it("preserves Series index in output rows", () => {
+    const ser = new Series({ data: ["a|b", "b"] as Scalar[], index: [10, 20] });
+    const df = strGetDummies(ser);
+    expect(df.index.values[0]).toBe(10);
+    expect(df.index.values[1]).toBe(20);
+  });
+
+  it("array input (not Series)", () => {
+    const df = strGetDummies(["a|b", "c"]);
+    expect(df.shape[0]).toBe(2);
+    expect(df.columns.values.sort()).toEqual(["a", "b", "c"]);
+  });
+});
+
+// ─── strExtractAll ────────────────────────────────────────────────────────────
+
+describe("strExtractAll", () => {
+  it("extracts all digit groups", () => {
+    const out = vals(strExtractAll(s(["abc 123 def 456"]), /\d+/));
+    const matches = JSON.parse(out[0] as string) as string[][];
+    expect(matches.length).toBe(2);
+    expect(matches[0]?.[0]).toBe("123");
+    expect(matches[1]?.[0]).toBe("456");
+  });
+
+  it("capture group stored as second element", () => {
+    const out = vals(strExtractAll(s(["a1b2"]), /([a-z])(\d)/));
+    const matches = JSON.parse(out[0] as string) as string[][];
+    expect(matches.length).toBe(2);
+    expect(matches[0]?.[1]).toBe("a");
+    expect(matches[0]?.[2]).toBe("1");
+    expect(matches[1]?.[1]).toBe("b");
+    expect(matches[1]?.[2]).toBe("2");
+  });
+
+  it("no matches → empty array", () => {
+    const out = vals(strExtractAll(s(["hello"]), /\d+/));
+    const matches = JSON.parse(out[0] as string) as string[][];
+    expect(matches.length).toBe(0);
+  });
+
+  it("string pattern with flags", () => {
+    const out = vals(strExtractAll(s(["Hello World"]), "[a-z]+", { flags: "i" }));
+    const matches = JSON.parse(out[0] as string) as string[][];
+    expect(matches.length).toBe(2);
+    expect(matches[0]?.[0]).toBe("Hello");
+    expect(matches[1]?.[0]).toBe("World");
+  });
+
+  it("each element processed independently", () => {
+    const out = vals(strExtractAll(s(["a1", "b2b3"]), /[a-z]\d/));
+    const m0 = JSON.parse(out[0] as string) as string[][];
+    const m1 = JSON.parse(out[1] as string) as string[][];
+    expect(m0.length).toBe(1);
+    expect(m1.length).toBe(2);
+  });
+});
+
+// ─── strRemovePrefix ─────────────────────────────────────────────────────────
+
+describe("strRemovePrefix", () => {
+  it("scalar — removes prefix", () => {
+    expect(strRemovePrefix("prefix_hello", "prefix_")).toBe("hello");
+  });
+
+  it("scalar — no prefix present → unchanged", () => {
+    expect(strRemovePrefix("hello", "prefix_")).toBe("hello");
+  });
+
+  it("scalar — empty prefix → unchanged", () => {
+    expect(strRemovePrefix("hello", "")).toBe("hello");
+  });
+
+  it("array — mixed", () => {
+    const out = vals(strRemovePrefix(["pre_a", "pre_b", "other"], "pre_"));
+    expect(out).toEqual(["a", "b", "other"]);
+  });
+
+  it("Series — preserves index", () => {
+    const ser = new Series({ data: ["pre_x", "y"] as Scalar[], index: [0, 1] });
+    const out = strRemovePrefix(ser, "pre_");
+    expect(vals(out)).toEqual(["x", "y"]);
+    expect(out.index.values[0]).toBe(0);
+  });
+
+  it("whole string matches prefix → empty string", () => {
+    expect(strRemovePrefix("prefix", "prefix")).toBe("");
+  });
+
+  it("prefix longer than string → unchanged", () => {
+    expect(strRemovePrefix("ab", "abc")).toBe("ab");
+  });
+});
+
+// ─── strRemoveSuffix ─────────────────────────────────────────────────────────
+
+describe("strRemoveSuffix", () => {
+  it("scalar — removes suffix", () => {
+    expect(strRemoveSuffix("hello_suf", "_suf")).toBe("hello");
+  });
+
+  it("scalar — no suffix present → unchanged", () => {
+    expect(strRemoveSuffix("hello", "_suf")).toBe("hello");
+  });
+
+  it("scalar — empty suffix → unchanged", () => {
+    expect(strRemoveSuffix("hello", "")).toBe("hello");
+  });
+
+  it("array — mixed", () => {
+    const out = vals(strRemoveSuffix(["a_end", "b_end", "other"], "_end"));
+    expect(out).toEqual(["a", "b", "other"]);
+  });
+
+  it("Series — preserves index", () => {
+    const ser = new Series({ data: ["x_suf", "y"] as Scalar[], index: ["a", "b"] });
+    const out = strRemoveSuffix(ser, "_suf");
+    expect(vals(out)).toEqual(["x", "y"]);
+    expect(out.index.values[1]).toBe("b");
+  });
+
+  it("whole string is suffix → empty string", () => {
+    expect(strRemoveSuffix("suffix", "suffix")).toBe("");
+  });
+});
+
+// ─── strTranslate ─────────────────────────────────────────────────────────────
+
+describe("strTranslate", () => {
+  it("replaces characters according to table", () => {
+    const table = new Map<string, string | null>([["a", "A"], ["e", "E"]]);
+    expect(strTranslate("hello", table)).toBe("hEllo");
+    expect(strTranslate("abc", table)).toBe("Abc");
+  });
+
+  it("null mapping deletes character", () => {
+    const table = new Map<string, string | null>([["e", null]]);
+    expect(strTranslate("hello", table)).toBe("hllo");
+  });
+
+  it("multi-char replacement", () => {
+    const table = new Map<string, string | null>([["a", "aa"]]);
+    expect(strTranslate("cat", table)).toBe("caat");
+  });
+
+  it("characters not in table pass through", () => {
+    const table = new Map<string, string | null>([["z", "Z"]]);
+    expect(strTranslate("hello", table)).toBe("hello");
+  });
+
+  it("array input", () => {
+    const table = new Map<string, string | null>([["o", "0"]]);
+    const out = vals(strTranslate(["foo", "bar"], table));
+    expect(out).toEqual(["f00", "bar"]);
+  });
+
+  it("Series — preserves index", () => {
+    const table = new Map<string, string | null>([["x", "X"]]);
+    const ser = new Series({ data: ["fox", "box"] as Scalar[], index: [10, 20] });
+    const out = strTranslate(ser, table);
+    expect(vals(out)).toEqual(["foX", "boX"]);
+    expect(out.index.values[0]).toBe(10);
+  });
+
+  it("empty table → identity", () => {
+    const table = new Map<string, string | null>();
+    expect(strTranslate("hello", table)).toBe("hello");
+  });
+
+  it("delete all vowels", () => {
+    const table = new Map<string, string | null>([
+      ["a", null], ["e", null], ["i", null], ["o", null], ["u", null],
+    ]);
+    expect(strTranslate("hello world", table)).toBe("hll wrld");
+  });
+});
+
+// ─── strCharWidth ─────────────────────────────────────────────────────────────
+
+describe("strCharWidth", () => {
+  it("ASCII: width = char count", () => {
+    expect(strCharWidth("hello")).toBe(5);
+    expect(strCharWidth("")).toBe(0);
+    expect(strCharWidth("a")).toBe(1);
+  });
+
+  it("Hiragana: each char is 2 wide", () => {
+    // こんにちは = 5 chars × 2 = 10
+    expect(strCharWidth("こんにちは")).toBe(10);
+  });
+
+  it("CJK unified ideographs: each char is 2 wide", () => {
+    // 你好 = 2 chars × 2 = 4
+    expect(strCharWidth("你好")).toBe(4);
+  });
+
+  it("mixed ASCII + CJK", () => {
+    // "A你B" = 1 + 2 + 1 = 4
+    expect(strCharWidth("A你B")).toBe(4);
+  });
+
+  it("array input returns Series of widths", () => {
+    const out = vals(strCharWidth(["hi", "こんにちは"]));
+    expect(out[0]).toBe(2);
+    expect(out[1]).toBe(10);
+  });
+
+  it("Series input preserves index", () => {
+    const ser = new Series({ data: ["ab", "你好"] as Scalar[], index: ["x", "y"] });
+    const out = strCharWidth(ser);
+    expect(vals(out)).toEqual([2, 4]);
+    expect(out.index.values[0]).toBe("x");
+  });
+});
+
+// ─── strByteLength ────────────────────────────────────────────────────────────
+
+describe("strByteLength", () => {
+  it("ASCII: byte length = char count", () => {
+    expect(strByteLength("hello")).toBe(5);
+    expect(strByteLength("")).toBe(0);
+  });
+
+  it("UTF-8 multibyte: hiragana is 3 bytes per char", () => {
+    // こ = U+3053, 3-byte UTF-8
+    expect(strByteLength("こんにちは")).toBe(15);
+  });
+
+  it("2-byte UTF-8: e.g. é (U+00e9)", () => {
+    expect(strByteLength("\u00e9")).toBe(2);
+  });
+
+  it("4-byte UTF-8: emoji (e.g. U+1F600)", () => {
+    expect(strByteLength("😀")).toBe(4);
+  });
+
+  it("array input returns Series of byte lengths", () => {
+    const out = vals(strByteLength(["hi", "こ"]));
+    expect(out[0]).toBe(2);
+    expect(out[1]).toBe(3);
+  });
+
+  it("Series input preserves index", () => {
+    const ser = new Series({ data: ["hello", "\u00e9"] as Scalar[], index: [0, 1] });
+    const out = strByteLength(ser);
+    expect(vals(out)).toEqual([5, 2]);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("strNormalize — properties", () => {
+  it("NFC is idempotent", () => {
+    fc.assert(
+      fc.property(fc.string({ unit: "grapheme" }), (str) => {
+        const once = strNormalize(str, "NFC");
+        const twice = strNormalize(once, "NFC");
+        return once === twice;
+      }),
+    );
+  });
+
+  it("NFD is idempotent", () => {
+    fc.assert(
+      fc.property(fc.string({ unit: "grapheme" }), (str) => {
+        const once = strNormalize(str, "NFD");
+        const twice = strNormalize(str, "NFD");
+        return once === twice;
+      }),
+    );
+  });
+
+  it("NFC and NFD have the same length after normalisation of ASCII", () => {
+    fc.assert(
+      fc.property(fc.asciiString(), (str) => {
+        return strNormalize(str, "NFC").length === strNormalize(str, "NFD").length;
+      }),
+    );
+  });
+});
+
+describe("strRemovePrefix — properties", () => {
+  it("result never starts with prefix (when prefix non-empty)", () => {
+    fc.assert(
+      fc.property(fc.asciiString(), fc.asciiString({ minLength: 1 }), (str, prefix) => {
+        const result = strRemovePrefix(str, prefix);
+        return !result.startsWith(prefix) || !str.startsWith(prefix);
+      }),
+    );
+  });
+
+  it("removePrefix then re-add prefix restores original when it was present", () => {
+    fc.assert(
+      fc.property(fc.asciiString(), fc.asciiString(), (body, prefix) => {
+        const full = prefix + body;
+        const stripped = strRemovePrefix(full, prefix);
+        return stripped === body;
+      }),
+    );
+  });
+});
+
+describe("strRemoveSuffix — properties", () => {
+  it("removeSuffix then re-add suffix restores original when it was present", () => {
+    fc.assert(
+      fc.property(fc.asciiString(), fc.asciiString(), (body, suffix) => {
+        const full = body + suffix;
+        const stripped = strRemoveSuffix(full, suffix);
+        return stripped === body;
+      }),
+    );
+  });
+});
+
+describe("strByteLength — properties", () => {
+  it("byte length >= char length for all strings", () => {
+    fc.assert(
+      fc.property(fc.string({ unit: "grapheme" }), (str) => {
+        return strByteLength(str) >= str.length;
+      }),
+    );
+  });
+
+  it("ASCII byte length equals char length", () => {
+    fc.assert(
+      fc.property(fc.asciiString(), (str) => {
+        return strByteLength(str) === str.length;
+      }),
+    );
+  });
+});
diff --git a/tests/stats/string_ops_extended.test.ts b/tests/stats/string_ops_extended.test.ts
new file mode 100644
index 00000000..6a3b4fe8
--- /dev/null
+++ b/tests/stats/string_ops_extended.test.ts
@@ -0,0 +1,437 @@
+/**
+ * Tests for src/stats/string_ops_extended.ts
+ * — strSplitExpand, strExtractGroups, strPartition, strRPartition,
+ *   strMultiReplace, strIndent, strDedent
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+  strDedent,
+  strExtractGroups,
+  strIndent,
+  strMultiReplace,
+  strPartition,
+  strRPartition,
+  strSplitExpand,
+} from "../../src/stats/string_ops_extended.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: (string | null)[]): Series<Scalar> {
+  return new Series({ data: data as Scalar[] });
+}
+
+function vals(series: Series<Scalar>): Scalar[] {
+  return [...series.toArray()] as Scalar[];
+}
+
+function dfCol(df: DataFrame, col: string): Scalar[] {
+  return [...df.col(col).toArray()] as Scalar[];
+}
+
+// ─── strSplitExpand ───────────────────────────────────────────────────────────
+
+describe("strSplitExpand — scalar", () => {
+  it("splits on space by default", () => {
+    expect(strSplitExpand("hello world foo")).toEqual(["hello", "world", "foo"]);
+  });
+
+  it("splits on custom separator", () => {
+    expect(strSplitExpand("a,b,c", ",")).toEqual(["a", "b", "c"]);
+  });
+
+  it("respects n option", () => {
+    expect(strSplitExpand("a-b-c-d", "-", { n: 2 })).toEqual(["a", "b", "c-d"]);
+  });
+
+  it("n=1 splits once", () => {
+    expect(strSplitExpand("a|b|c", "|", { n: 1 })).toEqual(["a", "b|c"]);
+  });
+
+  it("handles separator not found", () => {
+    expect(strSplitExpand("hello", ",")).toEqual(["hello"]);
+  });
+
+  it("splits on RegExp", () => {
+    expect(strSplitExpand("a1b2c", /\d/)).toEqual(["a", "b", "c"]);
+  });
+});
+
+describe("strSplitExpand — Series/array", () => {
+  it("expands to DataFrame columns", () => {
+    const sr = s(["a b c", "x y z"]);
+    const df = strSplitExpand(sr);
+    expect(dfCol(df, "0")).toEqual(["a", "x"]);
+    expect(dfCol(df, "1")).toEqual(["b", "y"]);
+    expect(dfCol(df, "2")).toEqual(["c", "z"]);
+  });
+
+  it("pads short rows with null", () => {
+    const sr = s(["a b c", "x y"]);
+    const df = strSplitExpand(sr);
+    expect(dfCol(df, "2")).toEqual(["c", null]);
+  });
+
+  it("handles null elements", () => {
+    const sr = s([null, "a b"]);
+    const df = strSplitExpand(sr);
+    expect(dfCol(df, "0")).toEqual([null, "a"]);
+    expect(dfCol(df, "1")).toEqual([null, "b"]);
+  });
+
+  it("preserves Series index", () => {
+    const sr = new Series({ data: ["a b", "c d"], index: ["i", "j"] });
+    const df = strSplitExpand(sr);
+    expect([...df.index.toArray()]).toEqual(["i", "j"]);
+  });
+
+  it("accepts raw array", () => {
+    const df = strSplitExpand(["x y", "a b c"], " ");
+    expect(df.columns.values).toHaveLength(3);
+  });
+
+  it("respects n option in Series mode", () => {
+    const sr = s(["a-b-c", "x-y-z"]);
+    const df = strSplitExpand(sr, "-", { n: 1 });
+    expect(dfCol(df, "0")).toEqual(["a", "x"]);
+    expect(dfCol(df, "1")).toEqual(["b-c", "y-z"]);
+  });
+});
+
+// ─── strExtractGroups ─────────────────────────────────────────────────────────
+
+describe("strExtractGroups", () => {
+  it("extracts numbered groups to columns 0, 1, ...", () => {
+    const sr = s(["2024-01-15", "2025-12-31"]);
+    const df = strExtractGroups(sr, /(\d{4})-(\d{2})-(\d{2})/);
+    expect(dfCol(df, "0")).toEqual(["2024", "2025"]);
+    expect(dfCol(df, "1")).toEqual(["01", "12"]);
+    expect(dfCol(df, "2")).toEqual(["15", "31"]);
+  });
+
+  it("uses named groups as column names", () => {
+    const sr = s(["John 42", "Alice 30"]);
+    const df = strExtractGroups(sr, /(?<name>\w+) (?<age>\d+)/);
+    expect(dfCol(df, "name")).toEqual(["John", "Alice"]);
+    expect(dfCol(df, "age")).toEqual(["42", "30"]);
+  });
+
+  it("non-matching rows produce null", () => {
+    const sr = s(["hello", "world"]);
+    const df = strExtractGroups(sr, /(\d+)/);
+    expect(dfCol(df, "0")).toEqual([null, null]);
+  });
+
+  it("null elements produce null", () => {
+    const sr = s([null, "abc123"]);
+    const df = strExtractGroups(sr, /([a-z]+)(\d+)/);
+    expect(dfCol(df, "0")).toEqual([null, "abc"]);
+    expect(dfCol(df, "1")).toEqual([null, "123"]);
+  });
+
+  it("accepts string pattern", () => {
+    const sr = s(["foo-bar"]);
+    const df = strExtractGroups(sr, "([a-z]+)-([a-z]+)");
+    expect(dfCol(df, "0")).toEqual(["foo"]);
+    expect(dfCol(df, "1")).toEqual(["bar"]);
+  });
+
+  it("accepts flags option", () => {
+    const sr = s(["HELLO world"]);
+    const df = strExtractGroups(sr, /([a-z]+)/, { flags: "i" });
+    expect(dfCol(df, "0")).toEqual(["HELLO"]);
+  });
+});
+
+// ─── strPartition ─────────────────────────────────────────────────────────────
+
+describe("strPartition — scalar", () => {
+  it("splits at first occurrence", () => {
+    expect(strPartition("hello world foo", " ")).toEqual(["hello", " ", "world foo"]);
+  });
+
+  it("returns [s, '', ''] when sep not found", () => {
+    expect(strPartition("hello", ",")).toEqual(["hello", "", ""]);
+  });
+
+  it("handles empty sep at start", () => {
+    expect(strPartition("-hello", "-")).toEqual(["", "-", "hello"]);
+  });
+
+  it("multi-char separator", () => {
+    expect(strPartition("a::b::c", "::")).toEqual(["a", "::", "b::c"]);
+  });
+});
+
+describe("strPartition — Series/array", () => {
+  it("expands to 3-column DataFrame", () => {
+    const sr = s(["a|b|c", "x|y"]);
+    const df = strPartition(sr, "|");
+    expect(dfCol(df, "0")).toEqual(["a", "x"]);
+    expect(dfCol(df, "1")).toEqual(["|", "|"]);
+    expect(dfCol(df, "2")).toEqual(["b|c", "y"]);
+  });
+
+  it("handles null elements", () => {
+    const sr = s([null, "a|b"]);
+    const df = strPartition(sr, "|");
+    expect(dfCol(df, "0")).toEqual([null, "a"]);
+    expect(dfCol(df, "1")).toEqual([null, "|"]);
+  });
+
+  it("accepts raw array", () => {
+    const df = strPartition(["x-y", "a-b"] as Scalar[], "-");
+    expect(dfCol(df, "0")).toEqual(["x", "a"]);
+  });
+});
+
+// ─── strRPartition ────────────────────────────────────────────────────────────
+
+describe("strRPartition — scalar", () => {
+  it("splits at last occurrence", () => {
+    expect(strRPartition("hello world foo", " ")).toEqual(["hello world", " ", "foo"]);
+  });
+
+  it("returns ['', '', s] when sep not found", () => {
+    expect(strRPartition("hello", ",")).toEqual(["", "", "hello"]);
+  });
+
+  it("multi-char separator", () => {
+    expect(strRPartition("a::b::c", "::")).toEqual(["a::b", "::", "c"]);
+  });
+});
+
+describe("strRPartition — Series/array", () => {
+  it("expands to 3-column DataFrame", () => {
+    const sr = s(["a|b|c", "x|y"]);
+    const df = strRPartition(sr, "|");
+    expect(dfCol(df, "0")).toEqual(["a|b", "x"]);
+    expect(dfCol(df, "1")).toEqual(["|", "|"]);
+    expect(dfCol(df, "2")).toEqual(["c", "y"]);
+  });
+
+  it("handles null elements", () => {
+    const sr = s([null, "a|b|c"]);
+    const df = strRPartition(sr, "|");
+    expect(dfCol(df, "2")).toEqual([null, "c"]);
+  });
+});
+
+// ─── strMultiReplace ──────────────────────────────────────────────────────────
+
+describe("strMultiReplace — scalar", () => {
+  it("applies replacements in order", () => {
+    const result = strMultiReplace("hello world", [
+      { pat: "hello", repl: "hi" },
+      { pat: "world", repl: "earth" },
+    ]);
+    expect(result).toBe("hi earth");
+  });
+
+  it("applies regex replacement", () => {
+    const result = strMultiReplace("foo123bar456", [{ pat: /\d+/g, repl: "N" }]);
+    expect(result).toBe("fooNbarN");
+  });
+
+  it("chain order matters", () => {
+    // first replaces 'a' with 'b', then 'b' with 'c' → 'c'
+    const result = strMultiReplace("a", [
+      { pat: "a", repl: "b" },
+      { pat: "b", repl: "c" },
+    ]);
+    expect(result).toBe("c");
+  });
+
+  it("empty replacements returns original", () => {
+    expect(strMultiReplace("hello", [])).toBe("hello");
+  });
+});
+
+describe("strMultiReplace — Series", () => {
+  it("applies to each element", () => {
+    const sr = s(["hello", "world"]);
+    const out = strMultiReplace(sr, [{ pat: "o", repl: "0" }]);
+    expect(vals(out)).toEqual(["hell0", "w0rld"]);
+  });
+
+  it("null elements remain null", () => {
+    const sr = s([null, "hello"]);
+    const out = strMultiReplace(sr, [{ pat: "h", repl: "H" }]);
+    expect(vals(out)).toEqual([null, "Hello"]);
+  });
+
+  it("preserves Series index", () => {
+    const sr = new Series({ data: ["a", "b"], index: ["x", "y"] });
+    const out = strMultiReplace(sr, [{ pat: "a", repl: "A" }]);
+    expect([...out.index.toArray()]).toEqual(["x", "y"]);
+  });
+});
+
+// ─── strIndent ────────────────────────────────────────────────────────────────
+
+describe("strIndent — scalar", () => {
+  it("adds prefix to non-empty lines", () => {
+    expect(strIndent("hello\nworld", "  ")).toBe("  hello\n  world");
+  });
+
+  it("skips empty/whitespace-only lines by default", () => {
+    expect(strIndent("a\n\nb", "> ")).toBe("> a\n\n> b");
+  });
+
+  it("respects custom predicate", () => {
+    // only indent lines starting with '#'
+    const result = strIndent("# title\nnormal line\n# section", "  ", {
+      predicate: (line) => line.startsWith("#"),
+    });
+    expect(result).toBe("  # title\nnormal line\n  # section");
+  });
+
+  it("single line", () => {
+    expect(strIndent("hello", ">>")).toBe(">>hello");
+  });
+
+  it("empty string returns empty", () => {
+    expect(strIndent("", "  ")).toBe("");
+  });
+});
+
+describe("strIndent — Series", () => {
+  it("applies to each element", () => {
+    const sr = s(["a\nb", "x\ny"]);
+    const out = strIndent(sr, "- ");
+    expect(vals(out)).toEqual(["- a\n- b", "- x\n- y"]);
+  });
+
+  it("null elements remain null", () => {
+    const sr = s([null, "hello"]);
+    const out = strIndent(sr, "> ");
+    expect(vals(out)).toEqual([null, "> hello"]);
+  });
+});
+
+// ─── strDedent ────────────────────────────────────────────────────────────────
+
+describe("strDedent — scalar", () => {
+  it("removes common leading whitespace", () => {
+    expect(strDedent("    hello\n    world")).toBe("hello\nworld");
+  });
+
+  it("removes only the common prefix", () => {
+    expect(strDedent("  a\n    b")).toBe("a\n  b");
+  });
+
+  it("ignores whitespace-only lines for computing prefix", () => {
+    expect(strDedent("  a\n\n  b")).toBe("a\n\nb");
+  });
+
+  it("no-op when no leading whitespace", () => {
+    expect(strDedent("a\nb")).toBe("a\nb");
+  });
+
+  it("single line", () => {
+    expect(strDedent("   hello")).toBe("hello");
+  });
+
+  it("empty string returns empty", () => {
+    expect(strDedent("")).toBe("");
+  });
+
+  it("mixed indentation collapses to minimum", () => {
+    expect(strDedent("    a\n  b\n      c")).toBe("  a\nb\n    c");
+  });
+});
+
+describe("strDedent — Series", () => {
+  it("applies to each element independently", () => {
+    const sr = s(["  a\n  b", "    x\n    y"]);
+    const out = strDedent(sr);
+    expect(vals(out)).toEqual(["a\nb", "x\ny"]);
+  });
+
+  it("null elements remain null", () => {
+    const sr = s([null, "  hi"]);
+    const out = strDedent(sr);
+    expect(vals(out)).toEqual([null, "hi"]);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("strSplitExpand — property tests", () => {
+  it("split then rejoin recovers original (no-limit, single-char sep)", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.string({ minLength: 0, maxLength: 20 }), { minLength: 1, maxLength: 5 }),
+        (arr) => {
+          const df = strSplitExpand(arr as Scalar[], "|");
+          const cols = df.columns.values as string[];
+          // rejoin each row
+          const rejoined = arr.map((s, ri) => {
+            return cols.map((c) => df.col(c).iat(ri) ?? "").join("|");
+          });
+          // compare (escaped) — the round-trip should work if the original had no "|" in it
+          // just check shape: each row gets at least 1 column
+          return rejoined.length === arr.length;
+        },
+      ),
+    );
+  });
+});
+
+describe("strPartition / strRPartition — property tests", () => {
+  it("partition: concatenating parts recovers original", () => {
+    fc.assert(
+      fc.property(fc.string(), fc.string({ minLength: 1, maxLength: 3 }), (str, sep) => {
+        const [before, mid, after] = strPartition(str, sep);
+        if (str.includes(sep)) {
+          return before + mid + after === str;
+        } else {
+          return before === str && mid === "" && after === "";
+        }
+      }),
+    );
+  });
+
+  it("rpartition: concatenating parts recovers original", () => {
+    fc.assert(
+      fc.property(fc.string(), fc.string({ minLength: 1, maxLength: 3 }), (str, sep) => {
+        const [before, mid, after] = strRPartition(str, sep);
+        if (str.includes(sep)) {
+          return before + mid + after === str;
+        } else {
+          return before === "" && mid === "" && after === str;
+        }
+      }),
+    );
+  });
+});
+
+describe("strDedent — property tests", () => {
+  it("dedent-then-indent is idempotent on uniform indentation", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.string({ minLength: 1 }).filter((s) => !s.includes("\n")), {
+          minLength: 1,
+          maxLength: 5,
+        }),
+        fc.integer({ min: 0, max: 8 }),
+        (lines, spaces) => {
+          const prefix = " ".repeat(spaces);
+          const indented = lines.map((l) => prefix + l).join("\n");
+          const dedented = strDedent(indented);
+          return dedented === lines.join("\n");
+        },
+      ),
+    );
+  });
+});
+
+describe("strMultiReplace — property tests", () => {
+  it("empty replacements is identity", () => {
+    fc.assert(
+      fc.property(fc.string(), (str) => strMultiReplace(str, []) === str),
+    );
+  });
+});
diff --git a/tests/stats/where_mask.test.ts b/tests/stats/where_mask.test.ts
new file mode 100644
index 00000000..670019f1
--- /dev/null
+++ b/tests/stats/where_mask.test.ts
@@ -0,0 +1,338 @@
+/**
+ * Tests for src/stats/where_mask.ts — seriesWhere, seriesMask, dataFrameWhere, dataFrameMask.
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+  dataFrameMask,
+  dataFrameWhere,
+  seriesMask,
+  seriesWhere,
+} from "../../src/stats/where_mask.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: readonly Scalar[]): Series<Scalar> {
+  return new Series({ data: [...data] });
+}
+
+function sv(series: Series<Scalar>): readonly Scalar[] {
+  return series.values;
+}
+
+// ─── seriesWhere — boolean array cond ─────────────────────────────────────────
+
+describe("seriesWhere — boolean array", () => {
+  it("keeps values where cond=true, replaces with null where false", () => {
+    const result = seriesWhere(s([1, 2, 3, 4, 5]), [true, false, true, false, true]);
+    expect(sv(result)).toEqual([1, null, 3, null, 5]);
+  });
+
+  it("keeps all values when cond is all-true", () => {
+    const result = seriesWhere(s([10, 20, 30]), [true, true, true]);
+    expect(sv(result)).toEqual([10, 20, 30]);
+  });
+
+  it("replaces all values when cond is all-false", () => {
+    const result = seriesWhere(s([1, 2, 3]), [false, false, false]);
+    expect(sv(result)).toEqual([null, null, null]);
+  });
+
+  it("uses custom other value", () => {
+    const result = seriesWhere(s([1, 2, 3]), [true, false, true], { other: -99 });
+    expect(sv(result)).toEqual([1, -99, 3]);
+  });
+
+  it("preserves null source values when cond is true", () => {
+    const result = seriesWhere(s([1, null, 3]), [true, true, false]);
+    expect(sv(result)).toEqual([1, null, null]);
+  });
+
+  it("preserves series name and index", () => {
+    const src = new Series({ data: [1, 2, 3], name: "myCol" });
+    const result = seriesWhere(src, [true, false, true]);
+    expect(result.name).toBe("myCol");
+    expect(result.values.length).toBe(3);
+  });
+
+  it("works with string values", () => {
+    const result = seriesWhere(s(["a", "b", "c"]), [false, true, false], { other: "x" });
+    expect(sv(result)).toEqual(["x", "b", "x"]);
+  });
+
+  it("works with boolean values in series", () => {
+    const result = seriesWhere(s([true, false, true]), [true, false, true], { other: false });
+    expect(sv(result)).toEqual([true, false, true]);
+  });
+});
+
+// ─── seriesWhere — Series<boolean> cond ───────────────────────────────────────
+
+describe("seriesWhere — Series<boolean> cond (label-aligned)", () => {
+  it("aligns by label", () => {
+    const src = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+    const cond = new Series<boolean>({ data: [false, true, false], index: ["a", "b", "c"] });
+    const result = seriesWhere(src, cond);
+    expect(sv(result)).toEqual([null, 2, null]);
+  });
+
+  it("treats missing label as false (replaces with other)", () => {
+    const src = new Series({ data: [10, 20, 30], index: ["x", "y", "z"] });
+    // cond only has "y" and "z"
+    const cond = new Series<boolean>({ data: [true, true], index: ["y", "z"] });
+    const result = seriesWhere(src, cond, { other: 0 });
+    expect(sv(result)).toEqual([0, 20, 30]);
+  });
+});
+
+// ─── seriesWhere — callable cond ──────────────────────────────────────────────
+
+describe("seriesWhere — callable cond", () => {
+  it("callable returning boolean array", () => {
+    const result = seriesWhere(
+      s([1, 2, 3, 4, 5]),
+      (x) => x.values.map((v) => (v as number) > 3),
+    );
+    expect(sv(result)).toEqual([null, null, null, 4, 5]);
+  });
+
+  it("callable returning Series<boolean>", () => {
+    const src = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+    const result = seriesWhere(src, (x) => {
+      const bools = x.values.map((v) => (v as number) >= 20);
+      return new Series<boolean>({ data: bools, index: x.index });
+    });
+    expect(sv(result)).toEqual([null, 20, 30]);
+  });
+
+  it("callable with other value", () => {
+    const result = seriesWhere(
+      s([5, 10, 15]),
+      (x) => x.values.map((v) => (v as number) > 7),
+      { other: -1 },
+    );
+    expect(sv(result)).toEqual([-1, 10, 15]);
+  });
+});
+
+// ─── seriesMask — basic ───────────────────────────────────────────────────────
+
+describe("seriesMask — boolean array", () => {
+  it("is inverse of seriesWhere", () => {
+    const data = s([1, 2, 3, 4, 5]);
+    const cond = [true, false, true, false, true];
+    const where = seriesWhere(data, cond);
+    const mask = seriesMask(data, cond);
+    // Where: [1, null, 3, null, 5] — Mask: [null, 2, null, 4, null]
+    expect(sv(where)).toEqual([1, null, 3, null, 5]);
+    expect(sv(mask)).toEqual([null, 2, null, 4, null]);
+  });
+
+  it("keeps all when cond all-false", () => {
+    const result = seriesMask(s([1, 2, 3]), [false, false, false]);
+    expect(sv(result)).toEqual([1, 2, 3]);
+  });
+
+  it("replaces all when cond all-true", () => {
+    const result = seriesMask(s([1, 2, 3]), [true, true, true]);
+    expect(sv(result)).toEqual([null, null, null]);
+  });
+
+  it("uses custom other value", () => {
+    const result = seriesMask(s([1, 2, 3, 4]), [false, true, false, true], { other: 999 });
+    expect(sv(result)).toEqual([1, 999, 3, 999]);
+  });
+});
+
+describe("seriesMask — callable cond", () => {
+  it("masks values satisfying condition", () => {
+    const result = seriesMask(
+      s([1, 2, 3, 4, 5]),
+      (x) => x.values.map((v) => (v as number) > 3),
+      { other: 0 },
+    );
+    expect(sv(result)).toEqual([1, 2, 3, 0, 0]);
+  });
+});
+
+// ─── dataFrameWhere ───────────────────────────────────────────────────────────
+
+describe("dataFrameWhere — DataFrame cond", () => {
+  it("keeps values where cond=true, replaces with null elsewhere", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+    const cond = DataFrame.fromColumns({
+      a: [true, false, true],
+      b: [false, true, false],
+    });
+    const result = dataFrameWhere(df, cond);
+    expect(result.col("a").values).toEqual([1, null, 3]);
+    expect(result.col("b").values).toEqual([null, 5, null]);
+  });
+
+  it("uses custom other value", () => {
+    const df = DataFrame.fromColumns({ x: [10, 20, 30] });
+    const cond = DataFrame.fromColumns({ x: [true, false, true] });
+    const result = dataFrameWhere(df, cond, { other: -1 });
+    expect(result.col("x").values).toEqual([10, -1, 30]);
+  });
+
+  it("treats missing column in cond as all-false (replaces with other)", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+    const cond = DataFrame.fromColumns({ a: [true, false] }); // missing col "b"
+    const result = dataFrameWhere(df, cond, { other: 0 });
+    expect(result.col("a").values).toEqual([1, 0]);
+    expect(result.col("b").values).toEqual([0, 0]); // all replaced
+  });
+
+  it("preserves row index", () => {
+    const df = DataFrame.fromColumns({ v: [1, 2, 3] }, { index: ["r0", "r1", "r2"] });
+    const cond = DataFrame.fromColumns({ v: [false, true, false] }, { index: ["r0", "r1", "r2"] });
+    const result = dataFrameWhere(df, cond);
+    expect(result.index.values).toEqual(["r0", "r1", "r2"]);
+    expect(result.col("v").values).toEqual([null, 2, null]);
+  });
+
+  it("all-true cond returns copy of df values", () => {
+    const df = DataFrame.fromColumns({ a: [7, 8, 9], b: [1, 2, 3] });
+    const cond = DataFrame.fromColumns({ a: [true, true, true], b: [true, true, true] });
+    const result = dataFrameWhere(df, cond);
+    expect(result.col("a").values).toEqual([7, 8, 9]);
+    expect(result.col("b").values).toEqual([1, 2, 3]);
+  });
+});
+
+describe("dataFrameWhere — callable cond", () => {
+  it("callable returning boolean DataFrame", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+    const result = dataFrameWhere(df, (d) => {
+      const condCols: Record<string, boolean[]> = {};
+      for (const c of d.columns) {
+        condCols[c as string] = d.col(c as string).values.map((v) => (v as number) > 2);
+      }
+      return DataFrame.fromColumns(condCols);
+    });
+    expect(result.col("a").values).toEqual([null, null, 3]);
+    expect(result.col("b").values).toEqual([4, 5, 6]);
+  });
+});
+
+// ─── dataFrameMask ────────────────────────────────────────────────────────────
+
+describe("dataFrameMask — DataFrame cond", () => {
+  it("is inverse of dataFrameWhere", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+    const cond = DataFrame.fromColumns({
+      a: [true, false, true],
+      b: [false, true, false],
+    });
+    const rWhere = dataFrameWhere(df, cond);
+    const rMask = dataFrameMask(df, cond);
+    // where keeps trues, mask keeps falses — they should be complements
+    expect(rWhere.col("a").values).toEqual([1, null, 3]);
+    expect(rMask.col("a").values).toEqual([null, 2, null]);
+    expect(rWhere.col("b").values).toEqual([null, 5, null]);
+    expect(rMask.col("b").values).toEqual([4, null, 6]);
+  });
+
+  it("uses custom other value", () => {
+    const df = DataFrame.fromColumns({ z: [10, 20, 30] });
+    const cond = DataFrame.fromColumns({ z: [false, true, false] });
+    const result = dataFrameMask(df, cond, { other: 99 });
+    expect(result.col("z").values).toEqual([10, 99, 30]);
+  });
+});
+
+describe("dataFrameMask — callable cond", () => {
+  it("callable returning boolean DataFrame", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [5, 6, 7, 8] });
+    const result = dataFrameMask(
+      df,
+      (d) => {
+        const condCols: Record<string, boolean[]> = {};
+        for (const c of d.columns) {
+          condCols[c as string] = d.col(c as string).values.map((v) => (v as number) % 2 === 0);
+        }
+        return DataFrame.fromColumns(condCols);
+      },
+      { other: -1 },
+    );
+    // mask: replaces where cond=true (even values)
+    expect(result.col("a").values).toEqual([1, -1, 3, -1]);
+    expect(result.col("b").values).toEqual([5, -1, 7, -1]);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property-based: seriesWhere / seriesMask complement", () => {
+  it("where + mask partition values (no overlap, full coverage)", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.oneof(fc.integer({ min: -100, max: 100 }), fc.constant(null)), {
+          minLength: 1,
+          maxLength: 20,
+        }),
+        fc.array(fc.boolean(), { minLength: 1, maxLength: 20 }),
+        (rawData, rawCond) => {
+          const n = Math.min(rawData.length, rawCond.length);
+          const data = rawData.slice(0, n) as Scalar[];
+          const cond = rawCond.slice(0, n);
+
+          const src = new Series<Scalar>({ data });
+          const whereResult = seriesWhere(src, cond, { other: "__OTHER__" as Scalar });
+          const maskResult = seriesMask(src, cond, { other: "__OTHER__" as Scalar });
+
+          for (let i = 0; i < n; i++) {
+            const wv = whereResult.values[i];
+            const mv = maskResult.values[i];
+            if (cond[i]) {
+              // where keeps original, mask replaces
+              expect(wv).toBe(data[i]);
+              expect(mv).toBe("__OTHER__");
+            } else {
+              // where replaces, mask keeps original
+              expect(wv).toBe("__OTHER__");
+              expect(mv).toBe(data[i]);
+            }
+          }
+        },
+      ),
+    );
+  });
+
+  it("seriesWhere with all-true cond === identity", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.oneof(fc.integer({ min: -1000, max: 1000 }), fc.constant(null)), {
+          minLength: 0,
+          maxLength: 30,
+        }),
+        (data) => {
+          const src = new Series<Scalar>({ data: data as Scalar[] });
+          const cond = data.map(() => true);
+          const result = seriesWhere(src, cond);
+          expect(result.values).toEqual(src.values);
+        },
+      ),
+    );
+  });
+
+  it("seriesMask with all-false cond === identity", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.oneof(fc.integer({ min: -1000, max: 1000 }), fc.constant(null)), {
+          minLength: 0,
+          maxLength: 30,
+        }),
+        (data) => {
+          const src = new Series<Scalar>({ data: data as Scalar[] });
+          const cond = data.map(() => false);
+          const result = seriesMask(src, cond);
+          expect(result.values).toEqual(src.values);
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/stats/window_extended.test.ts b/tests/stats/window_extended.test.ts
new file mode 100644
index 00000000..e69ab2a5
--- /dev/null
+++ b/tests/stats/window_extended.test.ts
@@ -0,0 +1,365 @@
+/**
+ * Tests for src/stats/window_extended.ts
+ * — rollingSem, rollingSkew, rollingKurt, rollingQuantile
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+  rollingKurt,
+  rollingQuantile,
+  rollingSem,
+  rollingSkew,
+} from "../../src/stats/window_extended.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: readonly (number | null)[]): Series<Scalar> {
+  return new Series({ data: [...(data as Scalar[])] });
+}
+
+function vals(series: ReturnType<typeof rollingSem>): (number | null)[] {
+  return [...series.toArray()] as (number | null)[];
+}
+
+function close(a: Scalar, b: number, tol = 1e-9): boolean {
+  if (a === null || a === undefined) return false;
+  if (typeof a !== "number") return false;
+  return Math.abs(a - b) < tol;
+}
+
+function isNull(v: Scalar): boolean {
+  return v === null || v === undefined;
+}
+
+// ─── rollingSem ───────────────────────────────────────────────────────────────
+
+describe("rollingSem", () => {
+  it("window=2 — two observations needed", () => {
+    const out = vals(rollingSem(s([1, 2, 3, 4]), 2));
+    expect(isNull(out[0])).toBe(true);
+    // sem([1,2]) = std([1,2],ddof=1)/sqrt(2) = (√0.5)/√2 = 0.5
+    expect(close(out[1], 0.5)).toBe(true);
+    // sem([2,3]) = same = 0.5
+    expect(close(out[2], 0.5)).toBe(true);
+    expect(close(out[3], 0.5)).toBe(true);
+  });
+
+  it("window=3 — first two are null", () => {
+    const out = vals(rollingSem(s([2, 4, 4, 4, 5, 5, 7, 9]), 3));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[1])).toBe(true);
+    // window [2,4,4]: mean=10/3, std=~1.1547, sem=~0.6667
+    expect(typeof out[2]).toBe("number");
+  });
+
+  it("constant window → sem = 0", () => {
+    const out = vals(rollingSem(s([5, 5, 5, 5, 5]), 3));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[1])).toBe(true);
+    expect(close(out[2] as number, 0)).toBe(true);
+    expect(close(out[3] as number, 0)).toBe(true);
+    expect(close(out[4] as number, 0)).toBe(true);
+  });
+
+  it("null values are skipped", () => {
+    const out = vals(rollingSem(s([1, null, 3, 4]), 3));
+    // window 2: [1, null, 3] → valid [1,3] → n=2 (need minPeriods=3 → null)
+    expect(isNull(out[2])).toBe(true);
+    // window 3: [null, 3, 4] → valid [3,4] → n=2 < minPeriods=3 → null
+    expect(isNull(out[3])).toBe(true);
+  });
+
+  it("minPeriods=2 allows fewer valid values", () => {
+    const out = vals(rollingSem(s([1, null, 3]), 3, { minPeriods: 2 }));
+    // window 2 [1,null,3]: 2 valid nums [1,3], sem = std(ddof=1)/sqrt(2)
+    // std([1,3],ddof=1) = sqrt(2), sem = sqrt(2)/sqrt(2) = 1
+    expect(close(out[2] as number, 1)).toBe(true);
+  });
+
+  it("center=true shifts window", () => {
+    const out = vals(rollingSem(s([1, 2, 3, 4, 5]), 3, { center: true }));
+    // centered: index 0 → [0,1], index 1 → [0,2], etc.
+    // index 0: window [0,1] = [1,2] → 2 obs < minPeriods=3 → null
+    expect(isNull(out[0])).toBe(true);
+    // index 1: window [0,2] = [1,2,3] → 3 valid
+    expect(typeof out[1]).toBe("number");
+    // index 4: window [3,4] = [4,5] → 2 obs < minPeriods=3 → null
+    expect(isNull(out[4])).toBe(true);
+  });
+
+  it("sem = std(ddof=1)/sqrt(n) — formula verification", () => {
+    // [1,2,3]: mean=2, variance=1, std=1, sem=1/sqrt(3)
+    const out = vals(rollingSem(s([1, 2, 3]), 3));
+    expect(close(out[2] as number, 1 / Math.sqrt(3))).toBe(true);
+  });
+
+  it("preserves Series name", () => {
+    const input = new Series({ data: [1, 2, 3], name: "x" });
+    const result = rollingSem(input, 2);
+    expect(result.name).toBe("x");
+  });
+});
+
+// ─── rollingSkew ──────────────────────────────────────────────────────────────
+
+describe("rollingSkew", () => {
+  it("requires 3 valid observations", () => {
+    const out = vals(rollingSkew(s([1, 2, 3, 4, 5]), 3));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[1])).toBe(true);
+    expect(typeof out[2]).toBe("number");
+  });
+
+  it("symmetric windows have zero skew", () => {
+    // [1,2,3] and [2,3,4] are perfectly symmetric
+    const out = vals(rollingSkew(s([1, 2, 3, 4, 5]), 3));
+    expect(close(out[2] as number, 0, 1e-9)).toBe(true);
+    expect(close(out[3] as number, 0, 1e-9)).toBe(true);
+    expect(close(out[4] as number, 0, 1e-9)).toBe(true);
+  });
+
+  it("right-skewed data → positive skew", () => {
+    // [1,2,10]: strongly right-skewed
+    const out = vals(rollingSkew(s([1, 2, 10]), 3));
+    const v = out[2];
+    expect(typeof v).toBe("number");
+    expect((v as number) > 0).toBe(true);
+  });
+
+  it("left-skewed data → negative skew", () => {
+    const out = vals(rollingSkew(s([10, 2, 1]), 3));
+    const v = out[2];
+    expect(typeof v).toBe("number");
+    expect((v as number) < 0).toBe(true);
+  });
+
+  it("constant window → skew = 0 (std=0 special case)", () => {
+    const out = vals(rollingSkew(s([3, 3, 3, 3]), 3));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[1])).toBe(true);
+    expect(close(out[2] as number, 0)).toBe(true);
+    expect(close(out[3] as number, 0)).toBe(true);
+  });
+
+  it("null values skipped (minPeriods=3)", () => {
+    const out = vals(rollingSkew(s([1, null, 3, 4, 5]), 4, { minPeriods: 3 }));
+    // window 3: [null,3,4,5] → valid [3,4,5] → 3 valid ≥ 3
+    expect(typeof out[3]).toBe("number");
+  });
+
+  it("known value: [1,2,3,4,5] full window skew=0", () => {
+    const out = vals(rollingSkew(s([1, 2, 3, 4, 5]), 5));
+    expect(isNull(out[0])).toBe(true);
+    expect(close(out[4] as number, 0, 1e-9)).toBe(true);
+  });
+});
+
+// ─── rollingKurt ──────────────────────────────────────────────────────────────
+
+describe("rollingKurt", () => {
+  it("requires 4 valid observations", () => {
+    const out = vals(rollingKurt(s([1, 2, 3, 4, 5]), 4));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[1])).toBe(true);
+    expect(isNull(out[2])).toBe(true);
+    expect(typeof out[3]).toBe("number");
+  });
+
+  it("uniform distribution [1,2,3,4] — excess kurtosis = -1.2", () => {
+    const out = vals(rollingKurt(s([1, 2, 3, 4]), 4));
+    expect(close(out[3] as number, -1.2, 1e-9)).toBe(true);
+  });
+
+  it("window=5 sliding", () => {
+    const out = vals(rollingKurt(s([1, 2, 3, 4, 5]), 5));
+    // [1..5]: excess kurtosis of uniform = -1.3 (n=5)
+    expect(typeof out[4]).toBe("number");
+  });
+
+  it("null values skipped", () => {
+    const out = vals(rollingKurt(s([1, null, 3, 4, 5, 6]), 5, { minPeriods: 4 }));
+    // window 4: [null,3,4,5,6] → valid [3,4,5,6] → 4 ≥ 4
+    expect(typeof out[4]).toBe("number");
+  });
+
+  it("constant window → kurt = 0 (std=0 special case)", () => {
+    const out = vals(rollingKurt(s([2, 2, 2, 2, 2]), 4));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[2])).toBe(true);
+    expect(close(out[3] as number, 0)).toBe(true);
+    expect(close(out[4] as number, 0)).toBe(true);
+  });
+
+  it("minPeriods honoured", () => {
+    const out = vals(rollingKurt(s([1, 2, 3, 4, 5]), 5, { minPeriods: 4 }));
+    // index 3: window [0..3] = [1,2,3,4] → 4 ≥ 4
+    expect(typeof out[3]).toBe("number");
+    expect(typeof out[4]).toBe("number");
+  });
+});
+
+// ─── rollingQuantile ──────────────────────────────────────────────────────────
+
+describe("rollingQuantile", () => {
+  it("q=0.5 is rolling median", () => {
+    const out = vals(rollingQuantile(s([1, 2, 3, 4, 5]), 0.5, 3));
+    expect(isNull(out[0])).toBe(true);
+    expect(isNull(out[1])).toBe(true);
+    expect(close(out[2] as number, 2)).toBe(true);
+    expect(close(out[3] as number, 3)).toBe(true);
+    expect(close(out[4] as number, 4)).toBe(true);
+  });
+
+  it("q=0 is rolling minimum", () => {
+    const out = vals(rollingQuantile(s([3, 1, 4, 1, 5, 9]), 0, 3));
+    expect(close(out[2] as number, 1)).toBe(true);
+    expect(close(out[3] as number, 1)).toBe(true);
+    expect(close(out[4] as number, 1)).toBe(true);
+    expect(close(out[5] as number, 5)).toBe(true);
+  });
+
+  it("q=1 is rolling maximum", () => {
+    const out = vals(rollingQuantile(s([3, 1, 4, 1, 5, 9]), 1, 3));
+    expect(close(out[2] as number, 4)).toBe(true);
+    expect(close(out[5] as number, 9)).toBe(true);
+  });
+
+  it("linear interpolation — q=0.5 between two values", () => {
+    // [1, 3]: q=0.5 → virtual=0.5 → lo=0(val=1) hi=1(val=3) → 1+0.5*(3-1)=2
+    const out = vals(rollingQuantile(s([1, 3]), 0.5, 2));
+    expect(close(out[1] as number, 2)).toBe(true);
+  });
+
+  it("interpolation=lower", () => {
+    // [1,3] q=0.5 lower → 1
+    const out = vals(
+      rollingQuantile(s([1, 3]), 0.5, 2, { interpolation: "lower" }),
+    );
+    expect(close(out[1] as number, 1)).toBe(true);
+  });
+
+  it("interpolation=higher", () => {
+    const out = vals(
+      rollingQuantile(s([1, 3]), 0.5, 2, { interpolation: "higher" }),
+    );
+    expect(close(out[1] as number, 3)).toBe(true);
+  });
+
+  it("interpolation=midpoint", () => {
+    const out = vals(
+      rollingQuantile(s([1, 3]), 0.5, 2, { interpolation: "midpoint" }),
+    );
+    expect(close(out[1] as number, 2)).toBe(true);
+  });
+
+  it("interpolation=nearest", () => {
+    // q=0.25 with [1,2,3,4]: virtual=0.75 → lo=0(1), hi=1(2), frac=0.75>0.5 → 2
+    const out = vals(
+      rollingQuantile(s([1, 2, 3, 4]), 0.25, 4, { interpolation: "nearest" }),
+    );
+    expect(close(out[3] as number, 2)).toBe(true);
+  });
+
+  it("throws on q outside [0,1]", () => {
+    expect(() => rollingQuantile(s([1, 2, 3]), -0.1, 2)).toThrow(RangeError);
+    expect(() => rollingQuantile(s([1, 2, 3]), 1.1, 2)).toThrow(RangeError);
+  });
+
+  it("null values skipped in window", () => {
+    // [1, null, 3]: 2 valid [1,3]; minPeriods=2 → quantile(0.5)=2
+    const out = vals(
+      rollingQuantile(s([1, null, 3]), 0.5, 3, { minPeriods: 2 }),
+    );
+    expect(close(out[2] as number, 2)).toBe(true);
+  });
+
+  it("center=true", () => {
+    const out = vals(rollingQuantile(s([1, 2, 3, 4, 5]), 0.5, 3, { center: true }));
+    // centered: index 1 → [1,2,3] median=2
+    expect(close(out[1] as number, 2)).toBe(true);
+    // index 2 → [2,3,4] median=3
+    expect(close(out[2] as number, 3)).toBe(true);
+    // index 3 → [3,4,5] median=4
+    expect(close(out[3] as number, 4)).toBe(true);
+  });
+
+  it("minPeriods=1 gives results for single-element windows", () => {
+    const out = vals(
+      rollingQuantile(s([5, 3, 8]), 0.5, 3, { minPeriods: 1 }),
+    );
+    // index 0: window=[5] → quantile=5
+    expect(close(out[0] as number, 5)).toBe(true);
+    // index 2: window=[5,3,8] → sorted=[3,5,8] → median=5
+    expect(close(out[2] as number, 5)).toBe(true);
+  });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("window_extended — property tests", () => {
+  it("rollingSem is always non-negative", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ noNaN: true, min: -1000, max: 1000 }), {
+          minLength: 3,
+          maxLength: 20,
+        }),
+        fc.integer({ min: 2, max: 10 }),
+        (data, window) => {
+          const result = vals(rollingSem(s(data), window));
+          for (const v of result) {
+            if (v !== null && v !== undefined) {
+              if (typeof v !== "number") return false;
+              if (v < -1e-12) return false;
+            }
+          }
+          return true;
+        },
+      ),
+    );
+  });
+
+  it("rollingQuantile(q=0) ≤ rollingQuantile(q=0.5) ≤ rollingQuantile(q=1)", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ noNaN: true, min: -100, max: 100 }), {
+          minLength: 3,
+          maxLength: 15,
+        }),
+        fc.integer({ min: 2, max: 8 }),
+        (data, window) => {
+          const lo = vals(rollingQuantile(s(data), 0, window, { minPeriods: 1 }));
+          const med = vals(rollingQuantile(s(data), 0.5, window, { minPeriods: 1 }));
+          const hi = vals(rollingQuantile(s(data), 1, window, { minPeriods: 1 }));
+          for (let i = 0; i < data.length; i++) {
+            const l = lo[i];
+            const m = med[i];
+            const h = hi[i];
+            if (l === null || m === null || h === null) continue;
+            if ((l as number) > (m as number) + 1e-9) return false;
+            if ((m as number) > (h as number) + 1e-9) return false;
+          }
+          return true;
+        },
+      ),
+    );
+  });
+
+  it("rollingSem result length equals input length", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.double({ noNaN: true, min: -100, max: 100 }), {
+          minLength: 0,
+          maxLength: 20,
+        }),
+        fc.integer({ min: 1, max: 10 }),
+        (data, window) => {
+          const result = rollingSem(s(data), window);
+          return result.values.length === data.length;
+        },
+      ),
+    );
+  });
+});
diff --git a/tests/window/rolling_apply.test.ts b/tests/window/rolling_apply.test.ts
new file mode 100644
index 00000000..8fc7b0fd
--- /dev/null
+++ b/tests/window/rolling_apply.test.ts
@@ -0,0 +1,354 @@
+/**
+ * Tests for rolling_apply — standalone rolling-window apply and multi-aggregation.
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame } from "../../src/core/index.ts";
+import { Series } from "../../src/core/index.ts";
+import {
+  dataFrameRollingAgg,
+  dataFrameRollingApply,
+  rollingAgg,
+  rollingApply,
+} from "../../src/window/rolling_apply.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function numSum(nums: readonly number[]): number {
+  return nums.reduce((a, b) => a + b, 0);
+}
+function numMean(nums: readonly number[]): number {
+  return numSum(nums) / nums.length;
+}
+function numMax(nums: readonly number[]): number {
+  return Math.max(...nums);
+}
+function numMin(nums: readonly number[]): number {
+  return Math.min(...nums);
+}
+
+function s(...data: (number | null)[]): Series<number | null> {
+  return new Series({ data });
+}
+
+// ─── rollingApply ─────────────────────────────────────────────────────────────
+
+describe("rollingApply", () => {
+  test("window=1 is identity sum", () => {
+    const out = rollingApply(s(1, 2, 3, 4), 1, numSum);
+    expect(out.toArray()).toEqual([1, 2, 3, 4]);
+  });
+
+  test("window=3 trailing mean", () => {
+    const out = rollingApply(s(1, 2, 3, 4, 5), 3, numMean);
+    expect(out.toArray()).toEqual([null, null, 2, 3, 4]);
+  });
+
+  test("window=3 sum", () => {
+    const out = rollingApply(s(1, 2, 3, 4, 5), 3, numSum);
+    expect(out.toArray()).toEqual([null, null, 6, 9, 12]);
+  });
+
+  test("window larger than series returns all nulls", () => {
+    const out = rollingApply(s(1, 2, 3), 10, numMean);
+    expect(out.toArray()).toEqual([null, null, null]);
+  });
+
+  test("preserves series name", () => {
+    const input = new Series({ data: [1, 2, 3], name: "myCol" });
+    const out = rollingApply(input, 2, numSum);
+    expect(out.name).toBe("myCol");
+  });
+
+  test("minPeriods=1 fills from position 0", () => {
+    const out = rollingApply(s(1, 2, 3), 3, numMean, { minPeriods: 1 });
+    expect(out.toArray()).toEqual([1, 1.5, 2]);
+  });
+
+  test("minPeriods=2 with window=3", () => {
+    const out = rollingApply(s(1, 2, 3, 4), 3, numSum, { minPeriods: 2 });
+    expect(out.toArray()).toEqual([null, 3, 6, 9]);
+  });
+
+  test("center=true symmetric window (odd)", () => {
+    // window=3, center: position 1 sees [0..2], position 2 sees [1..3]
+    const out = rollingApply(s(1, 2, 3, 4, 5), 3, numSum, { center: true });
+    expect(out.toArray()).toEqual([null, 6, 9, 12, null]);
+  });
+
+  test("handles null values in series", () => {
+    const out = rollingApply(s(1, null, 3, 4), 2, numSum, { minPeriods: 1 });
+    expect(out.toArray()).toEqual([1, 1, 3, 7]);
+  });
+
+  test("all nulls returns all nulls", () => {
+    const out = rollingApply(s(null, null, null), 2, numSum, { minPeriods: 1 });
+    expect(out.toArray()).toEqual([null, null, null]);
+  });
+
+  test("custom max function", () => {
+    const out = rollingApply(s(3, 1, 4, 1, 5, 9), 3, numMax);
+    expect(out.toArray()).toEqual([null, null, 4, 4, 5, 9]);
+  });
+
+  test("custom min function", () => {
+    const out = rollingApply(s(3, 1, 4, 1, 5, 9), 3, numMin);
+    expect(out.toArray()).toEqual([null, null, 1, 1, 1, 5]);
+  });
+
+  test("raw=true passes valid nums only (same as default)", () => {
+    const out = rollingApply(s(1, 2, 3, 4), 2, numSum, { raw: true });
+    expect(out.toArray()).toEqual([null, 3, 5, 7]);
+  });
+
+  test("window=1 with nulls and minPeriods=1", () => {
+    const out = rollingApply(s(1, null, 3), 1, numSum, { minPeriods: 1 });
+    expect(out.toArray()).toEqual([1, null, 3]);
+  });
+
+  test("throws on non-positive window", () => {
+    expect(() => rollingApply(s(1, 2, 3), 0, numSum)).toThrow(RangeError);
+    expect(() => rollingApply(s(1, 2, 3), -1, numSum)).toThrow(RangeError);
+  });
+
+  test("product function over window", () => {
+    const prod = (nums: readonly number[]) => nums.reduce((a, b) => a * b, 1);
+    const out = rollingApply(s(2, 3, 4, 5), 3, prod);
+    expect(out.toArray()).toEqual([null, null, 24, 60]);
+  });
+
+  test("pairwise diff function", () => {
+    // last - first in window
+    const diff = (nums: readonly number[]) => nums[nums.length - 1]! - nums[0]!;
+    const out = rollingApply(s(1, 3, 6, 10, 15), 3, diff);
+    expect(out.toArray()).toEqual([null, null, 5, 7, 9]);
+  });
+
+  test("empty series", () => {
+    const out = rollingApply(s(), 3, numSum);
+    expect(out.toArray()).toEqual([]);
+  });
+
+  test("single element series window=1", () => {
+    const out = rollingApply(s(42), 1, numSum);
+    expect(out.toArray()).toEqual([42]);
+  });
+
+  test("window=2 centered with even series length", () => {
+    const out = rollingApply(s(1, 2, 3, 4), 2, numSum, { center: true });
+    // center=true, window=2: half=floor(1/2)=0, half2=2-0=2
+    // i=0: [0,2)=[1,2], sum=3; i=1: [1,3)=[2,3], sum=5; i=2: [2,4)=[3,4], sum=7; i=3: [3,4)=[4], sum=null(minPeriods=2)
+    expect(out.toArray()).toEqual([3, 5, 7, null]);
+  });
+
+  test("count function behaves correctly", () => {
+    const count = (nums: readonly number[]) => nums.length;
+    const out = rollingApply(s(1, null, 3, null, 5), 3, count, { minPeriods: 1 });
+    expect(out.toArray()).toEqual([1, 1, 2, 2, 2]);
+  });
+
+  test("range function over window", () => {
+    const range = (nums: readonly number[]) => Math.max(...nums) - Math.min(...nums);
+    const out = rollingApply(s(1, 5, 2, 8, 3), 3, range);
+    expect(out.toArray()).toEqual([null, null, 4, 6, 6]);
+  });
+});
+
+// ─── rollingAgg ──────────────────────────────────────────────────────────────
+
+describe("rollingAgg", () => {
+  test("returns DataFrame with one column per function", () => {
+    const out = rollingAgg(s(1, 2, 3, 4, 5), 3, { mean: numMean, sum: numSum });
+    expect(out.columns.toArray()).toEqual(["mean", "sum"]);
+    expect(out.shape).toEqual([5, 2]);
+  });
+
+  test("mean column matches rollingApply mean", () => {
+    const agg = rollingAgg(s(1, 2, 3, 4, 5), 3, { mean: numMean });
+    const apply = rollingApply(s(1, 2, 3, 4, 5), 3, numMean);
+    expect(agg.col("mean").toArray()).toEqual(apply.toArray());
+  });
+
+  test("sum column matches rollingApply sum", () => {
+    const agg = rollingAgg(s(1, 2, 3, 4, 5), 3, { sum: numSum });
+    const apply = rollingApply(s(1, 2, 3, 4, 5), 3, numSum);
+    expect(agg.col("sum").toArray()).toEqual(apply.toArray());
+  });
+
+  test("three aggregation functions", () => {
+    const out = rollingAgg(s(1, 2, 3, 4), 2, { sum: numSum, min: numMin, max: numMax });
+    expect(out.columns.toArray()).toEqual(["sum", "min", "max"]);
+    expect(out.col("sum").toArray()).toEqual([null, 3, 5, 7]);
+    expect(out.col("min").toArray()).toEqual([null, 1, 2, 3]);
+    expect(out.col("max").toArray()).toEqual([null, 2, 3, 4]);
+  });
+
+  test("minPeriods option respected in all columns", () => {
+    const out = rollingAgg(s(1, 2, 3, 4), 3, { sum: numSum, mean: numMean }, { minPeriods: 2 });
+    expect(out.col("sum").toArray()).toEqual([null, 3, 6, 9]);
+    expect(out.col("mean").toArray()).toEqual([null, 1.5, 2, 3]);
+  });
+
+  test("center option respected", () => {
+    const out = rollingAgg(s(1, 2, 3, 4, 5), 3, { sum: numSum }, { center: true });
+    expect(out.col("sum").toArray()).toEqual([null, 6, 9, 12, null]);
+  });
+
+  test("single function is equivalent to rollingApply", () => {
+    const data = [2, 4, 6, 8, 10] as const;
+    const agg = rollingAgg(s(...data), 2, { f: numMean });
+    const apply = rollingApply(s(...data), 2, numMean);
+    expect(agg.col("f").toArray()).toEqual(apply.toArray());
+  });
+
+  test("throws on non-positive window", () => {
+    expect(() => rollingAgg(s(1, 2, 3), 0, { sum: numSum })).toThrow(RangeError);
+  });
+
+  test("empty series produces empty DataFrame", () => {
+    const out = rollingAgg(s(), 2, { sum: numSum, mean: numMean });
+    expect(out.shape).toEqual([0, 2]);
+  });
+});
+
+// ─── dataFrameRollingApply ───────────────────────────────────────────────────
+
+describe("dataFrameRollingApply", () => {
+  test("applies function column-wise", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [5, 6, 7, 8] });
+    const out = dataFrameRollingApply(df, 2, numSum);
+    expect(out.columns.toArray()).toEqual(["a", "b"]);
+    expect(out.col("a").toArray()).toEqual([null, 3, 5, 7]);
+    expect(out.col("b").toArray()).toEqual([null, 11, 13, 15]);
+  });
+
+  test("preserves original row index", () => {
+    const df = DataFrame.fromColumns({ x: [10, 20, 30] });
+    const out = dataFrameRollingApply(df, 2, numMean);
+    expect(out.index.toArray()).toEqual(df.index.toArray());
+  });
+
+  test("preserves column names", () => {
+    const df = DataFrame.fromColumns({ alpha: [1, 2, 3], beta: [4, 5, 6] });
+    const out = dataFrameRollingApply(df, 2, numSum);
+    expect(out.columns.toArray()).toEqual(["alpha", "beta"]);
+  });
+
+  test("minPeriods=1 fills from first row", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+    const out = dataFrameRollingApply(df, 3, numMean, { minPeriods: 1 });
+    expect(out.col("a").toArray()).toEqual([1, 1.5, 2]);
+  });
+
+  test("custom function applied independently per column", () => {
+    const diff = (nums: readonly number[]) => nums[nums.length - 1]! - nums[0]!;
+    const df = DataFrame.fromColumns({ a: [1, 3, 6], b: [10, 15, 21] });
+    const out = dataFrameRollingApply(df, 2, diff);
+    expect(out.col("a").toArray()).toEqual([null, 2, 3]);
+    expect(out.col("b").toArray()).toEqual([null, 5, 6]);
+  });
+
+  test("single column DataFrame", () => {
+    const df = DataFrame.fromColumns({ v: [1, 2, 3, 4] });
+    const out = dataFrameRollingApply(df, 2, numMax);
+    expect(out.col("v").toArray()).toEqual([null, 2, 3, 4]);
+  });
+});
+
+// ─── dataFrameRollingAgg ─────────────────────────────────────────────────────
+
+describe("dataFrameRollingAgg", () => {
+  test("column naming convention {col}_{aggName}", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+    const out = dataFrameRollingAgg(df, 2, { sum: numSum, mean: numMean });
+    expect(out.columns.toArray()).toEqual(["a_sum", "a_mean", "b_sum", "b_mean"]);
+  });
+
+  test("values match column-wise rollingAgg", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [10, 20, 30, 40] });
+    const out = dataFrameRollingAgg(df, 2, { sum: numSum });
+    expect(out.col("a_sum").toArray()).toEqual([null, 3, 5, 7]);
+    expect(out.col("b_sum").toArray()).toEqual([null, 30, 50, 70]);
+  });
+
+  test("shape is rows × (cols × fns)", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9] });
+    const out = dataFrameRollingAgg(df, 2, { sum: numSum, max: numMax });
+    expect(out.shape).toEqual([3, 6]);
+  });
+
+  test("single function single column", () => {
+    const df = DataFrame.fromColumns({ x: [2, 4, 6, 8] });
+    const out = dataFrameRollingAgg(df, 2, { mean: numMean });
+    expect(out.columns.toArray()).toEqual(["x_mean"]);
+    expect(out.col("x_mean").toArray()).toEqual([null, 3, 5, 7]);
+  });
+
+  test("minPeriods and center propagated correctly", () => {
+    const df = DataFrame.fromColumns({ a: [1, 2, 3, 4, 5] });
+    const out = dataFrameRollingAgg(df, 3, { sum: numSum }, { center: true });
+    expect(out.col("a_sum").toArray()).toEqual([null, 6, 9, 12, null]);
+  });
+});
+
+// ─── property-based tests ────────────────────────────────────────────────────
+
+describe("rollingApply property tests", () => {
+  test("output length equals input length", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), { minLength: 0, maxLength: 20 }),
+        fc.integer({ min: 1, max: 5 }),
+        (data, window) => {
+          const series = new Series({ data });
+          const out = rollingApply(series, window, numSum);
+          return out.length === data.length;
+        },
+      ),
+    );
+  });
+
+  test("leading nulls count equals min(window-1, n) for standard mode", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), { minLength: 1, maxLength: 20 }),
+        fc.integer({ min: 1, max: 8 }),
+        (data, window) => {
+          const series = new Series({ data });
+          const out = rollingApply(series, window, numSum);
+          const vals = out.toArray();
+          const expectedNulls = Math.min(window - 1, data.length);
+          let leadingNulls = 0;
+          for (const v of vals) {
+            if (v === null) leadingNulls++;
+            else break;
+          }
+          return leadingNulls === expectedNulls;
+        },
+      ),
+    );
+  });
+
+  test("rollingAgg columns match individual rollingApply", () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), { minLength: 0, maxLength: 15 }),
+        fc.integer({ min: 1, max: 5 }),
+        (data, window) => {
+          const series = new Series({ data });
+          const agg = rollingAgg(series, window, { sum: numSum, max: numMax });
+          const appliedSum = rollingApply(series, window, numSum);
+          const appliedMax = rollingApply(series, window, numMax);
+          const sumOk =
+            JSON.stringify(agg.col("sum").toArray()) ===
+            JSON.stringify(appliedSum.toArray());
+          const maxOk =
+            JSON.stringify(agg.col("max").toArray()) ===
+            JSON.stringify(appliedMax.toArray());
+          return sumOk && maxOk;
+        },
+      ),
+    );
+  });
+});

Option	Default	Description
`right`	`true`	Intervals closed on right: `(a, b]`. Set `false` for `[a, b)`.
`include_lowest`	`false`	Make lowest interval left-closed: `[a, b]`.
`labels`	auto	Custom string labels, or `false` for integer codes.
`precision`	`3`	Decimal places in auto-generated labels.
`duplicates`	`"raise"`	`"drop"` to silently remove duplicate bin edges.
	`cut`	`qcut`
Bin width	Equal (uniform edges)	Varies (equal population)
Bin count	Determined by `bins`	Determined by `q`
Best for	Meaningful thresholds (age groups, grade bands)	Percentile buckets, rank-based analysis
Left edge of first bin	Open `(` unless `include_lowest`	Always closed `[`
Function	Example input	Example output	Notes
`formatFloat(n, d)`	`3.14159, 2`	`"3.14"`	Fixed decimal places
`formatPercent(n, d)`	`0.1234, 1`	`"12.3%"`	Multiplies by 100
`formatScientific(n, d)`	`12345.678, 3`	`"1.235e+4"`	Exponential notation
`formatEngineering(n, d)`	`12345.678, 3`	`"12.346e+3"`	Exponent multiple of 3
`formatThousands(n, d, sep)`	`1234567.89, 2`	`"1,234,567.89"`	Thousands separator
`formatCurrency(n, sym, d)`	`1234.5, "$"`	`"$1,234.50"`	Currency prefix + thousands
`formatCompact(n, d)`	`1_234_567, 2`	`"1.23M"`	K / M / B / T suffixes
Function	Pandas equivalent	Description
`insertColumn(df, loc, col, values)`	`df.insert(loc, col, value)`	Insert a new column at integer position `loc`
`popColumn(df, col)`	`df.pop(col)`	Remove a column; returns `{ series, df }`
`reorderColumns(df, order)`	`df[order]`	Reorder (and optionally subset) columns
`moveColumn(df, col, newLoc)`	—	Move an existing column to a new integer position
pandas	tsb
`df.insert(1, "x", [1,2,3])` (mutates)	`insertColumn(df, 1, "x", [1,2,3])`
`series = df.pop("col")` (mutates)	`const { series, df: df2 } = popColumn(df, "col")`
`df[["c","a","b"]]`	`reorderColumns(df, ["c","a","b"])`
Function	Input	Output	Pandas equivalent
isna(v)	Scalar	boolean	pd.isna(v)
isna(arr)	Scalar[]	boolean[]	pd.isna(arr)
isna(series)	Series	Series<boolean>	pd.isna(series)
isna(df)	DataFrame	DataFrame	pd.isna(df)
notna(v)	any of above	same shape, inverted	pd.notna(v)
isnull / notnull	any of above	same as isna/notna	aliases
fillna(obj, {value})	Scalar/array/Series/DataFrame	same type, no missing	pd.Series.fillna()
dropna(obj, opts?)	array/Series/DataFrame	missing entries removed	pd.Series.dropna()
countna(obj)	array or Series	number	series.isna().sum()
countValid(obj)	array or Series	number	series.count()
Function	Pandas equivalent	Description
pipe(value, fn1, fn2, …)	df.pipe(fn).pipe(fn2)	Variadic type-safe pipeline — passes value through fns left-to-right
seriesApply(s, fn)	s.apply(fn)	Element-wise; fn receives (value, label, position)
seriesTransform(s, fn)	s.transform(fn)	Element-wise scalar→scalar; simpler than seriesApply
dataFrameApply(df, fn, axis?)	df.apply(fn, axis=0\|1)	Apply fn to each column (axis=0) or row (axis=1) → Series of results
dataFrameApplyMap(df, fn)	df.applymap(fn) / df.map(fn)	Apply fn to every cell; fn receives (value, rowLabel, colName)
dataFrameTransform(df, fn)	df.transform(fn)	Replace each column with fn(col) — must return same-length Series
dataFrameTransformRows(df, fn)	df.apply(fn, axis=1, result_type='expand')	Replace each row with fn(rowRecord) — partial updates allowed
Orient	Return type	Description
`"dict"` / `"columns"`	`Record<col, Record<rowLabel, value>>`	Nested column → row-label → value map
`"list"`	`Record<col, value[]>`	Column name → array of values
`"series"`	`Record<col, Series>`	Column name → Series object
`"split"`	`{ index, columns, data }`	Serialisable split structure
`"tight"`	`{ index, columns, data, index_names, column_names }`	Split plus axis-name metadata
`"records"`	`Record<col, value>[]`	Array of row objects
`"index"`	`Record<rowLabel, Record<col, value>>`	Row-label → column → value
Orient	Input shape
`"columns"` (default)	`{ col: value[] }`
`"index"`	`{ rowLabel: { col: value } }`
`"split"`	`{ index?, columns, data }`
`"tight"`	Same as `"split"`, extra fields ignored
Function	Keeps when cond is…	Replaces with
`seriesWhere(s, cond, {other})`	`true`	`other` (default `null`)
`seriesMask(s, cond, {other})`	`false`	`other` (default `null`)
`dataFrameWhere(df, cond, {other})`	`true`	`other` (default `null`)
`dataFrameMask(df, cond, {other})`	`false`	`other` (default `null`)