diff --git a/playground/infer_objects.html b/playground/infer_objects.html
new file mode 100644
index 00000000..0dc49004
--- /dev/null
+++ b/playground/infer_objects.html
@@ -0,0 +1,152 @@
+
+
+
+
+
+
tsb — infer_objects / convert_dtypes
+
+
+
+
← Back to tsb playground
+
infer_objects / convert_dtypes
+
+
+ pandas equivalent:
+ Series.infer_objects() /
+ DataFrame.infer_objects() /
+ Series.convert_dtypes() /
+ DataFrame.convert_dtypes()
+
+
+
What it does
+
+ These utilities refine dtypes automatically — useful after reading data from
+ CSV/JSON where everything starts as object or string:
+
+
+ inferObjectsSeries — promotes an object-typed Series to a
+ more specific dtype (int, float, bool, string) when all values have a consistent type.
+ inferObjectsDataFrame — applies per-column inference to every column.
+ convertDtypesSeries — like inferObjectsSeries but also
+ parses string columns as numbers when possible.
+ convertDtypesDataFrame — per-column convertDtypesSeries.
+
+
+
inferObjectsSeries — promote object → typed
+
import { Series, Dtype, inferObjectsSeries } from "tsb";
+
+// Object series holding integers
+const s = new Series({ data: [1, 2, 3], dtype: Dtype.object });
+s.dtype.kind; // "object"
+
+const better = inferObjectsSeries(s);
+better.dtype.kind; // "int"
+better.values; // [1, 2, 3]
+
+// Mixed types — cannot infer, returns original
+const mixed = new Series({ data: [1, "a", true], dtype: Dtype.object });
+inferObjectsSeries(mixed).dtype.kind; // "object"
+
+// All null — no inference possible
+const nulls = new Series({ data: [null, null], dtype: Dtype.object });
+inferObjectsSeries(nulls).dtype.kind; // "object"
+
+
inferObjectsDataFrame — all columns at once
+
import { DataFrame, inferObjectsDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ ints: [1, 2, 3],
+ floats: [1.1, 2.2, 3.3],
+ strs: ["a", "b", "c"],
+ bools: [true, false, true],
+});
+
+const inferred = inferObjectsDataFrame(df);
+inferred.col("ints").dtype.kind; // "int"
+inferred.col("floats").dtype.kind; // "float"
+inferred.col("strs").dtype.kind; // "string"
+inferred.col("bools").dtype.kind; // "bool"
+
+
convertDtypesSeries — also parses numeric strings
+
import { Series, convertDtypesSeries } from "tsb";
+
+// String values that look like integers
+const ints = new Series({ data: ["1", "2", "3"] });
+const result = convertDtypesSeries(ints);
+result.dtype.kind; // "int"
+result.values; // [1, 2, 3]
+
+// String values that look like floats
+const floats = new Series({ data: ["1.5", "2.5", "3.5"] });
+convertDtypesSeries(floats).dtype.kind; // "float"
+
+// Non-numeric strings: unchanged
+const text = new Series({ data: ["apple", "banana"] });
+convertDtypesSeries(text); // same Series, dtype "string"
+
+// Int series with nulls → can convert to float for NA safety
+import { Dtype } from "tsb";
+const withNull = new Series({ data: [1, null, 3], dtype: Dtype.int64 });
+convertDtypesSeries(withNull, { convertIntegerToFloat: true }).dtype.kind;
+// "float" (null becomes NaN-compatible)
+
+
convertDtypesDataFrame — per-column conversion
+
import { DataFrame, convertDtypesDataFrame } from "tsb";
+
+// After reading a CSV, all columns come back as strings:
+const raw = DataFrame.fromColumns({
+ age: ["25", "30", "22"],
+ score: ["88.5", "92.1", "78.0"],
+ name: ["Alice", "Bob", "Charlie"],
+});
+
+const typed = convertDtypesDataFrame(raw);
+typed.col("age").dtype.kind; // "int"
+typed.col("score").dtype.kind; // "float"
+typed.col("name").dtype.kind; // "string" (unchanged — not numeric)
+
+
API reference
+
+ Function Description
+ inferObjectsSeries(s, options?)Infer better dtype for object-typed Series
+ inferObjectsDataFrame(df, options?)Infer better dtypes for all columns
+ convertDtypesSeries(s, options?)Convert to best dtype, including string→number parsing
+ convertDtypesDataFrame(df, options?)Per-column convertDtypesSeries
+
+
+
InferObjectsOptions
+
+ Option Type Default Description
+ objectOnlybooleantrueOnly infer for object-dtype Series (mirrors pandas default)
+
+
+
ConvertDtypesOptions
+
+ Option Type Default Description
+ convertStringbooleantrueParse string values as numbers when possible
+ convertIntegerToFloatbooleanfalseConvert int series with nulls to float
+
+
+
When to use which
+
+ Use case Function
+ Promote object columns after creation inferObjectsSeries / DataFrame
+ Parse CSV/JSON string columns to numbers convertDtypesSeries / DataFrame
+ Make int columns nullable (float) convertDtypesSeries(s, { convertIntegerToFloat: true })
+
+
+
+
+
diff --git a/playground/join.html b/playground/join.html
new file mode 100644
index 00000000..8a4b66be
--- /dev/null
+++ b/playground/join.html
@@ -0,0 +1,142 @@
+
+
+
+
+
+
tsb — join: label-based DataFrame join
+
+
+
+
← Back to tsb playground
+
join — label-based DataFrame join
+
+
+ pandas equivalent: DataFrame.join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False)
+
+
+
What it does
+
+ join(left, right, options?) aligns two DataFrames by their index labels (or a key column).
+ Unlike the general-purpose merge(), join() defaults to a left join on index
+ — the idiomatic way to combine DataFrames that already share an index.
+
+
+
Left join (default)
+
import { DataFrame, join } from "tsb";
+
+const employees = DataFrame.fromColumns(
+ { dept: ["Engineering", "Marketing", "Engineering"] },
+ { index: ["alice", "bob", "charlie"] },
+);
+
+const salaries = DataFrame.fromColumns(
+ { salary: [90_000, 75_000] },
+ { index: ["alice", "charlie"] },
+);
+
+join(employees, salaries);
+// dept salary
+// alice Engineering 90000
+// bob Marketing null ← no salary for bob
+// charlie Engineering 75000
+
+
Inner / outer / right join
+
join(employees, salaries, { how: "inner" });
+// Only alice and charlie (keys in BOTH DataFrames)
+
+join(employees, salaries, { how: "outer" });
+// All keys from either DataFrame (nulls where absent)
+
+join(employees, salaries, { how: "right" });
+// All keys from salaries: alice and charlie
+
+
Overlapping columns — use lsuffix / rsuffix
+
const a = DataFrame.fromColumns({ score: [10, 20] }, { index: ["x", "y"] });
+const b = DataFrame.fromColumns({ score: [15, 25] }, { index: ["x", "y"] });
+
+// This would throw — 'score' exists in both without suffix disambiguation:
+// join(a, b);
+
+join(a, b, { lsuffix: "_a", rsuffix: "_b" });
+// score_a score_b
+// x 10 15
+// y 20 25
+
+
Join on a column key
+
const orders = DataFrame.fromColumns({
+ customerId: ["C1", "C2", "C1"],
+ amount: [100, 200, 150],
+});
+const customers = DataFrame.fromColumns(
+ { name: ["Alice", "Bob"] },
+ { index: ["C1", "C2"] },
+);
+
+// Join orders.customerId against customers index
+join(orders, customers, { on: "customerId", how: "left" });
+// customerId amount name
+// C1 100 Alice
+// C2 200 Bob
+// C1 150 Alice
+
+
joinAll — chain multiple joins
+
import { joinAll } from "tsb";
+
+const base = DataFrame.fromColumns({ A: [1,2,3] }, { index: ["K0","K1","K2"] });
+const b1 = DataFrame.fromColumns({ B: [10,20,30] }, { index: ["K0","K1","K2"] });
+const b2 = DataFrame.fromColumns({ C: [100,200,300] }, { index: ["K0","K1","K2"] });
+
+joinAll(base, [b1, b2]);
+// A B C
+// 1 10 100
+// 2 20 200
+// 3 30 300
+
+
crossJoin — Cartesian product
+
import { crossJoin } from "tsb";
+
+const colors = DataFrame.fromColumns({ color: ["red", "blue"] });
+const sizes = DataFrame.fromColumns({ size: ["S", "M", "L"] });
+
+crossJoin(colors, sizes);
+// color size
+// red S
+// red M
+// red L
+// blue S
+// blue M
+// blue L
+
+
API reference
+
+ Function Description
+ join(left, right, options?)Label-based join (default: left join on index)
+ joinAll(left, others[], options?)Chain joins left-to-right
+ crossJoin(left, right, options?)Cartesian product of two DataFrames
+
+
+
JoinOptions
+
+ Option Type Default Description
+ how"left" | "right" | "inner" | "outer""left"Join type
+ onstringindex Left column to use as join key
+ lsuffixstring""Suffix for overlapping left columns
+ rsuffixstring""Suffix for overlapping right columns
+ sortbooleanfalseSort result by join keys
+
+
+
+
+
diff --git a/playground/math_ops.html b/playground/math_ops.html
new file mode 100644
index 00000000..0d138cda
--- /dev/null
+++ b/playground/math_ops.html
@@ -0,0 +1,100 @@
+
+
+
+
+
+
math_ops — abs, round — tsb playground
+
+
+
+
math_ops — abs, round
+
+ Element-wise mathematical transformations for Series and DataFrame.
+ Mirrors pandas.Series.abs(), pandas.DataFrame.abs(),
+ pandas.Series.round(), and pandas.DataFrame.round().
+ Missing values (null, NaN) are preserved as-is.
+
+
+
Interactive Demo
+
absSeries
+
absDataFrame
+
roundSeries
+
roundDataFrame (per-column)
+
Click a button above to run an example.
+
+
Code Examples
+
import { Series, DataFrame, absSeries, absDataFrame, roundSeries, roundDataFrame } from "tsb";
+
+// ── absSeries ────────────────────────────────────────────────────────────────
+const s = new Series({ data: [-1, 2, -3, null] });
+absSeries(s).values; // [1, 2, 3, null]
+
+// ── absDataFrame ─────────────────────────────────────────────────────────────
+const df = DataFrame.fromColumns({ a: [-1, 2], b: [3, -4] });
+absDataFrame(df).col("a").values; // [1, 2]
+absDataFrame(df).col("b").values; // [3, 4]
+
+// ── roundSeries ──────────────────────────────────────────────────────────────
+const prices = new Series({ data: [1.234, 5.678, null] });
+roundSeries(prices, 2).values; // [1.23, 5.68, null]
+roundSeries(prices, 0).values; // [1, 6, null]
+roundSeries(prices, -1).values; // nearest 10: [0, 10, null]
+
+// ── roundDataFrame ────────────────────────────────────────────────────────────
+const data = DataFrame.fromColumns({ price: [1.111, 2.222], qty: [3.7, 4.4] });
+roundDataFrame(data, 2).col("price").values; // [1.11, 2.22]
+roundDataFrame(data, { price: 1, qty: 0 }).col("qty").values; // [4, 4]
+
+
+
+
+
diff --git a/playground/merge_asof.html b/playground/merge_asof.html
new file mode 100644
index 00000000..c532a50e
--- /dev/null
+++ b/playground/merge_asof.html
@@ -0,0 +1,126 @@
+
+
+
+
+
+
tsb — merge_asof (ordered nearest-key join)
+
+
+
+
← Back to tsb playground
+
merge_asof — Ordered Nearest-Key Join
+
+
+ pandas equivalent: pd.merge_asof(left, right, on="time")
+
+
+
+ mergeAsof is an ordered left-join that matches on the nearest key
+ rather than an exact key. It is especially useful for time-series data — e.g., matching
+ each trade to the most recent quote.
+
+
+
Key concepts
+
+ Both DataFrames must be sorted ascending by the key column before calling mergeAsof.
+ The result always has the same number of rows as the left DataFrame .
+ direction: controls whether to look backward (default), forward , or for the nearest key.
+ by: require additional columns to match exactly before doing the asof lookup (e.g. by ticker).
+ tolerance: ignore matches further than this numeric distance.
+
+
+
Basic example — backward (default)
+
import { DataFrame, mergeAsof } from "tsb";
+
+// Each trade is matched to the most recent quote (backward asof)
+const trades = DataFrame.fromColumns({
+ time: [1, 5, 10],
+ price: [100, 200, 300],
+});
+const quotes = DataFrame.fromColumns({
+ time: [2, 6],
+ bid: [98, 195],
+});
+
+const result = mergeAsof(trades, quotes, { on: "time" });
+// time | price | bid
+// 1 | 100 | null ← no quote ≤ 1
+// 5 | 200 | 98 ← most recent quote ≤ 5 is at time=2
+// 10 | 300 | 195 ← most recent quote ≤ 10 is at time=6
+
+
Forward direction
+
// Match each event to the next scheduled announcement
+const events = DataFrame.fromColumns({ t: [1, 3, 7], v: [10, 30, 70] });
+const schedule = DataFrame.fromColumns({ t: [2, 6, 10], w: [20, 60, 100] });
+
+const result = mergeAsof(events, schedule, {
+ on: "t",
+ direction: "forward",
+});
+// t=1 → t=2 (w=20), t=3 → t=6 (w=60), t=7 → t=10 (w=100)
+
+
Nearest direction
+
const result = mergeAsof(trades, quotes, {
+ on: "time",
+ direction: "nearest",
+});
+// Picks the quote with the smallest absolute time difference.
+
+
Grouping with by
+
// Match trades to quotes within the same ticker symbol
+const trades = DataFrame.fromColumns({
+ time: [1, 2, 3, 4],
+ ticker: ["AAPL","MSFT","AAPL","MSFT"],
+ price: [100, 200, 110, 210],
+});
+const quotes = DataFrame.fromColumns({
+ time: [1, 1, 3, 3],
+ ticker: ["AAPL","MSFT","AAPL","MSFT"],
+ bid: [99, 198, 109, 208],
+});
+
+mergeAsof(trades, quotes, { on: "time", by: "ticker" });
+
+
Tolerance
+
// Only match if the key distance is ≤ 2
+mergeAsof(left, right, { on: "t", tolerance: 2 });
+
+
Different key column names (left_on / right_on)
+
mergeAsof(left, right, {
+ left_on: "trade_time",
+ right_on: "quote_time",
+});
+
+
Using index as key
+
mergeAsof(left, right, {
+ left_index: true,
+ right_on: "timestamp",
+});
+
+
Options reference
+
+ Option Default Description
+ on— Shared key column name
+ left_on / right_on— Different key columns per side
+ left_index / right_indexfalseUse index as key
+ by— Column(s) that must match exactly
+ left_by / right_by— Different by-columns per side
+ direction"backward""backward", "forward", or "nearest"
+ tolerancenullMax numeric key distance for a match
+ allow_exact_matchestrueInclude exact key matches
+ suffixes["_x","_y"]Suffixes for overlapping column names
+
+
+
diff --git a/playground/merge_ordered.html b/playground/merge_ordered.html
new file mode 100644
index 00000000..661ce854
--- /dev/null
+++ b/playground/merge_ordered.html
@@ -0,0 +1,147 @@
+
+
+
+
+
+
tsb — merge_ordered (ordered fill merge)
+
+
+
+
← Back to tsb playground
+
merge_ordered — Ordered Fill Merge
+
+
+ pandas equivalent: pd.merge_ordered(left, right, on="date")
+
+
+
+ mergeOrdered is an ordered merge (default outer join) that
+ sorts the result by the key column(s). It is ideal for time-series and event data where
+ both DataFrames have partially overlapping key ranges and you want a complete timeline
+ with optional forward-fill (fill_method: "ffill") to carry values forward.
+
+
+
Key concepts
+
+ Default how: "outer" — keeps all keys from both DataFrames.
+ Result is always sorted ascending by the key column.
+ fill_method: "ffill" forward-fills null gaps in non-key columns after the merge.
+ left_by / right_by: perform the ordered merge independently per group and concatenate.
+ left_on / right_on: use different key column names per side.
+
+
+
Basic outer ordered merge
+
import { DataFrame, mergeOrdered } from "tsb";
+
+const left = DataFrame.fromColumns({
+ date: [1, 3, 5],
+ price: [10, 30, 50],
+});
+const right = DataFrame.fromColumns({
+ date: [2, 3, 6],
+ volume: [200, 300, 600],
+});
+
+const result = mergeOrdered(left, right, { on: "date" });
+// date | price | volume
+// 1 | 10 | null
+// 2 | null | 200
+// 3 | 30 | 300
+// 5 | 50 | null
+// 6 | null | 600
+
+
Forward-fill after merge
+
const result = mergeOrdered(left, right, {
+ on: "date",
+ fill_method: "ffill",
+});
+// date | price | volume
+// 1 | 10 | null ← no earlier price to fill
+// 2 | 10 | 200 ← price carried forward from date=1
+// 3 | 30 | 300
+// 5 | 50 | 300 ← volume carried forward from date=3
+// 6 | 50 | 600
+
+
Inner join variant
+
// Only rows where both DataFrames have a key
+mergeOrdered(left, right, { on: "date", how: "inner" });
+// date | price | volume
+// 3 | 30 | 300
+
+
Different key column names per side
+
const left2 = DataFrame.fromColumns({ t_left: [1, 3, 5], a: [10, 30, 50] });
+const right2 = DataFrame.fromColumns({ t_right: [2, 3, 6], b: [200, 300, 600] });
+
+mergeOrdered(left2, right2, { left_on: "t_left", right_on: "t_right" });
+// t_left | a | b
+// 1 | 10 | null
+// 2 | null | 200
+// 3 | 30 | 300
+// 5 | 50 | null
+// 6 | null | 600
+
+
Group-wise ordered merge (left_by / right_by)
+
// Perform the ordered merge independently for each group
+const left3 = DataFrame.fromColumns({
+ grp: ["A", "A", "B", "B"],
+ k: [1, 3, 1, 3],
+ a: [10, 30, 100, 300],
+});
+const right3 = DataFrame.fromColumns({
+ grp: ["A", "A", "B", "B"],
+ k: [2, 3, 2, 3],
+ b: [20, 30, 200, 300],
+});
+
+mergeOrdered(left3, right3, {
+ on: "k",
+ left_by: "grp",
+ right_by: "grp",
+});
+// grp | k | a | b
+// A | 1 | 10 | null
+// A | 2 | null | 20
+// A | 3 | 30 | 30
+// B | 1 | 100 | null
+// B | 2 | null | 200
+// B | 3 | 300 | 300
+
+
Overlapping non-key columns — suffixes
+
const left4 = DataFrame.fromColumns({ k: [1, 2, 3], val: [10, 20, 30] });
+const right4 = DataFrame.fromColumns({ k: [2, 3, 4], val: [200, 300, 400] });
+
+mergeOrdered(left4, right4, { on: "k", suffixes: ["_L", "_R"] });
+// k | val_L | val_R
+// 1 | 10 | null
+// 2 | 20 | 200
+// 3 | 30 | 300
+// 4 | null | 400
+
+
API reference
+
+ Option Type Default Description
+
+ onstring | string[]— Key column(s) present in both DataFrames
+ left_onstring | string[]— Key column(s) in the left DataFrame
+ right_onstring | string[]— Key column(s) in the right DataFrame
+ how"outer" | "inner" | "left" | "right""outer"Join type
+ fill_method"ffill" | nullnullForward-fill null gaps after merge
+ left_bystring | string[]— Group columns in left DataFrame
+ right_bystring | string[]— Group columns in right DataFrame
+ suffixes[string, string]["_x", "_y"]Suffixes for overlapping non-key columns
+
+
+
+
diff --git a/playground/notna_boolean.html b/playground/notna_boolean.html
new file mode 100644
index 00000000..1776fce0
--- /dev/null
+++ b/playground/notna_boolean.html
@@ -0,0 +1,104 @@
+
+
+
+
+
+
keepTrue / keepFalse / filterBy — Boolean Indexing — tsb playground
+
+
+
+
keepTrue / keepFalse / filterBy — Boolean Indexing
+
+ Boolean-mask selection helpers that mirror the pandas
+ series[mask] / df[mask] idiom.
+
+
+ keepTrue(series, mask) — keep elements where mask is truthy
+ keepFalse(series, mask) — keep elements where mask is falsy
+ filterBy(df, mask) — filter DataFrame rows by boolean mask
+
+
+
Interactive Demo
+
keepTrue
+
keepFalse
+
filterBy (DataFrame)
+
Series as mask
+
Click a button above to run an example.
+
+
Code Examples
+
import { Series, DataFrame, keepTrue, keepFalse, filterBy } from "tsb";
+
+const s = new Series({ data: [10, 20, 30, 40], index: ["a", "b", "c", "d"] });
+
+// Keep elements where mask is true
+keepTrue(s, [true, false, true, false]).values; // [10, 30]
+
+// Keep elements where mask is false (complement)
+keepFalse(s, [true, false, true, false]).values; // [20, 40]
+
+// Filter DataFrame rows
+const df = DataFrame.fromColumns(
+ { age: [25, 30, 35, 40], score: [88, 72, 95, 60] },
+ { index: ["alice", "bob", "carol", "dave"] },
+);
+const highScore = df.col("score").values.map((v) => (v as number) >= 80);
+filterBy(df, highScore).col("age").values; // [25, 35]
+
+// Use a Series as a mask
+const mask = new Series({ data: [true, null, true, false], index: ["a", "b", "c", "d"] });
+keepTrue(s, mask).values; // [10, 30] (null treated as false)
+
+
+
+
+
diff --git a/playground/rename_ops.html b/playground/rename_ops.html
new file mode 100644
index 00000000..949447d9
--- /dev/null
+++ b/playground/rename_ops.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
rename_ops — Rename, Prefix/Suffix, set_axis, to_frame — tsb playground
+
+
+
+
rename_ops — Rename, add_prefix/suffix, set_axis, to_frame
+
+ Functions for renaming labels, adding prefix/suffix to column or index labels,
+ replacing an axis entirely (set_axis), and converting a Series to a
+ single-column DataFrame (to_frame). Mirrors the corresponding
+ pandas methods.
+
+
+
Interactive Demo
+
renameDataFrame (columns)
+
renameSeriesIndex
+
addPrefixDataFrame
+
addSuffixSeries
+
setAxisDataFrame (axis=1)
+
seriesToFrame
+
Click a button above to run an example.
+
+
Code Examples
+
import {
+ Series, DataFrame,
+ renameSeriesIndex, renameDataFrame,
+ addPrefixDataFrame, addSuffixDataFrame,
+ addPrefixSeries, addSuffixSeries,
+ setAxisSeries, setAxisDataFrame,
+ seriesToFrame,
+} from "tsb";
+
+// ── renameSeriesIndex ────────────────────────────────────────────────────────
+const s = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+
+// Record mapping — only matched labels are changed
+renameSeriesIndex(s, { a: "x", c: "z" }).index.values; // ["x", "b", "z"]
+
+// Function mapper — called for every index label
+renameSeriesIndex(s, l => l.toUpperCase()).index.values; // ["A", "B", "C"]
+
+// ── renameDataFrame ──────────────────────────────────────────────────────────
+const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] },
+ { index: ["r0", "r1"] });
+
+// Rename columns
+renameDataFrame(df, { columns: { a: "x", b: "y" } }).columns.values; // ["x","y"]
+
+// Rename row index
+renameDataFrame(df, { index: { r0: "row0", r1: "row1" } }).index.values;
+
+// ── add_prefix / add_suffix ──────────────────────────────────────────────────
+addPrefixDataFrame(df, "col_").columns.values; // ["col_a","col_b"]
+addSuffixDataFrame(df, "_v1").columns.values; // ["a_v1","b_v1"]
+
+addPrefixSeries(s, "idx_").index.values; // ["idx_a","idx_b","idx_c"]
+addSuffixSeries(s, "_end").index.values; // ["a_end","b_end","c_end"]
+
+// ── set_axis ─────────────────────────────────────────────────────────────────
+setAxisSeries(s, ["x", "y", "z"]).index.values; // ["x","y","z"]
+setAxisDataFrame(df, ["col1","col2"], 1).columns.values; // ["col1","col2"]
+setAxisDataFrame(df, ["rowA","rowB"], 0).index.values; // ["rowA","rowB"]
+
+// ── to_frame ─────────────────────────────────────────────────────────────────
+const score = new Series({ data: [90, 80, 70], name: "score" });
+seriesToFrame(score).columns.values; // ["score"]
+seriesToFrame(score, "points").columns.values; // ["points"]
+
+
+
+
+
diff --git a/playground/resample.html b/playground/resample.html
new file mode 100644
index 00000000..aa3509b6
--- /dev/null
+++ b/playground/resample.html
@@ -0,0 +1,323 @@
+
+
+
+
+
+
tsb — resample()
+
+
+
+
+
+
+
+
+
+ Overview
+
+ resample groups a time-indexed Series or DataFrame into fixed-size time bins
+ (seconds, minutes, hours, days, weeks, months, quarters, or years) and applies an aggregation
+ function to each bin. Empty bins are automatically included in the output, filled with
+ NaN for numeric aggregations or 0 for count/size.
+
+ Supported frequencies
+
+ String Interval Default label
+ "S"Second left (bin start)
+ "T" / "min"Minute left
+ "H"Hour left
+ "D"Calendar day (UTC) left — UTC midnight
+ "W" / "W-SUN"Week ending Sunday right — Sunday
+ "W-MON" … "W-SAT"Week ending on weekday right — anchor day
+ "MS"Month start (1st) left
+ "ME"Month end (last day) right — last day
+ "QS"Quarter start left
+ "QE"Quarter end right — last day of quarter
+ "AS" / "YS"Year start (Jan 1) left
+ "AE" / "YE"Year end (Dec 31) right
+
+
+
+
+
+ Example 1 — Daily sum of a price Series
+ import { Series, resampleSeries } from "tsb";
+
+const dates = [
+ new Date("2024-01-01T09:00Z"),
+ new Date("2024-01-01T15:00Z"),
+ new Date("2024-01-02T10:00Z"),
+ new Date("2024-01-02T16:00Z"),
+ new Date("2024-01-04T09:00Z"), // note: Jan 3 is empty
+];
+const prices = new Series({ data: [100, 105, 98, 110, 120], index: dates, name: "price" });
+
+const daily = resampleSeries(prices, "D").sum();
+// Jan 1: 205 Jan 2: 208 Jan 3: NaN (empty) Jan 4: 120
+console.log(daily.index.values.map(d => d.toISOString().slice(0,10)));
+console.log(daily.toArray());
+ ▶ Run
+ Click "Run" to execute.
+
+
+
+
+ Example 2 — Monthly mean with month-start labels
+ import { Series, resampleSeries } from "tsb";
+
+const timestamps = [
+ new Date("2024-01-05Z"), new Date("2024-01-20Z"),
+ new Date("2024-02-10Z"), new Date("2024-02-25Z"),
+ new Date("2024-03-15Z"),
+];
+const values = new Series({ data: [10, 20, 30, 40, 50], index: timestamps });
+
+const monthly = resampleSeries(values, "MS").mean();
+// Jan: 15 Feb: 35 Mar: 50
+console.log(monthly.index.values.map(d => d.toISOString().slice(0,7)));
+console.log(monthly.toArray());
+ ▶ Run
+ Click "Run" to execute.
+
+
+
+
+ Example 3 — OHLC (Open-High-Low-Close) aggregation
+ import { Series, resampleSeries } from "tsb";
+
+const ticks = [
+ new Date("2024-01-01T09:00Z"), new Date("2024-01-01T10:00Z"),
+ new Date("2024-01-01T11:00Z"), new Date("2024-01-01T15:00Z"),
+];
+const px = new Series({ data: [100, 108, 95, 103], index: ticks, name: "AAPL" });
+
+const ohlc = resampleSeries(px, "D").ohlc();
+console.log("open :", ohlc.col("open").toArray());
+console.log("high :", ohlc.col("high").toArray());
+console.log("low :", ohlc.col("low").toArray());
+console.log("close:", ohlc.col("close").toArray());
+ ▶ Run
+ Click "Run" to execute.
+
+
+
+
+ Example 4 — DataFrame resample with per-column aggregations
+ import { DataFrame, Index, resampleDataFrame } from "tsb";
+
+const idx = new Index([
+ new Date("2024-01-01Z"), new Date("2024-01-01T12:00Z"),
+ new Date("2024-01-02Z"), new Date("2024-01-02T18:00Z"),
+]);
+const df = DataFrame.fromColumns(
+ { revenue: [100, 200, 150, 50], visits: [10, 20, 5, 15] },
+ { index: idx },
+);
+
+// Different aggregation per column
+const result = resampleDataFrame(df, "D").agg({
+ revenue: "sum",
+ visits: "mean",
+});
+console.log("revenue:", result.col("revenue").toArray()); // [300, 200]
+console.log("visits :", result.col("visits").toArray()); // [15, 10]
+console.log("index :", result.index.values.map(d => d.toISOString().slice(0,10)));
+ ▶ Run
+ Click "Run" to execute.
+
+
+
+
+ Example 5 — Weekly resample (labeled by Sunday)
+ import { Series, resampleSeries } from "tsb";
+
+// Jan 8 2024 = Monday, Jan 14 = Sunday
+const dates = [
+ new Date("2024-01-08Z"), new Date("2024-01-10Z"), new Date("2024-01-14Z"),
+ new Date("2024-01-15Z"), new Date("2024-01-18Z"),
+];
+const s = new Series({ data: [1, 2, 3, 4, 5], index: dates });
+const weekly = resampleSeries(s, "W").sum();
+
+// Week 1 (ends Jan 14): 1+2+3=6 Week 2 (ends Jan 21): 4+5=9
+console.log(weekly.index.values.map(d => d.toISOString().slice(0,10)));
+console.log(weekly.toArray());
+ ▶ Run
+ Click "Run" to execute.
+
+
+
+
+ Example 6 — Custom aggregation function
+ import { Series, resampleSeries } from "tsb";
+
+const dates = [
+ new Date("2024-01-01Z"), new Date("2024-01-01T12:00Z"),
+ new Date("2024-01-02Z"),
+];
+const s = new Series({ data: [2, 4, 8], index: dates });
+
+// Product of each bin
+const product = resampleSeries(s, "D").agg((vals) =>
+ vals.reduce((acc, v) => (typeof v === "number" ? acc * v : acc), 1)
+);
+console.log(product.toArray()); // [8, 8]
+ ▶ Run
+ Click "Run" to execute.
+
+
+
+ API Reference
+ resampleSeries(series, freq, options?)
+ Returns a SeriesResampler with methods: .sum(), .mean(), .min(), .max(), .count(), .first(), .last(), .std(), .var(), .size(), .ohlc(), .agg(spec).
+ resampleDataFrame(df, freq, options?)
+ Returns a DataFrameResampler with the same numeric aggregation methods (each returning a DataFrame), plus .size() (returns a Series), and .agg(spec) where spec can be a per-column object.
+ options
+
+ Option Type Description
+ label"left" | "right"Override the default label side for the output index.
+
+
+
+
+ See also
+
+ groupby — label-based grouping ·
+ rolling — rolling window ·
+ date_range — generate datetime indices
+
+
+
+
+
+
+
+
diff --git a/playground/scalar_extract.html b/playground/scalar_extract.html
new file mode 100644
index 00000000..16615590
--- /dev/null
+++ b/playground/scalar_extract.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+
scalar_extract — tsb playground
+
+
+
+
← Back to playground index
+
scalar_extract — squeeze / item / bool / first_valid_index / last_valid_index
+
+ Utilities to extract scalar values from Series and DataFrames.
+ Mirrors pandas.Series.squeeze(), Series.item(),
+ Series.bool(), Series.first_valid_index(),
+ Series.last_valid_index(), and their DataFrame equivalents.
+
+
+
squeezeSeries — extract scalar from a single-element Series
+
Python pandas equivalent:
+
import pandas as pd
+
+s = pd.Series([42])
+print(s.squeeze()) # 42
+
+s2 = pd.Series([1, 2, 3])
+print(s2.squeeze()) # Series unchanged
+
+
tsb equivalent:
+
import { Series, squeezeSeries } from "tsb";
+
+squeezeSeries(new Series({ data: [42] })); // 42
+squeezeSeries(new Series({ data: [1, 2, 3] })); // Series([1, 2, 3])
+
+
+
squeezeDataFrame — squeeze 1-D axis objects
+
Python pandas equivalent:
+
import pandas as pd
+
+df1x1 = pd.DataFrame({"A": [10]})
+print(df1x1.squeeze()) # 10 (scalar)
+
+df1xN = pd.DataFrame({"A": [1], "B": [2], "C": [3]})
+print(df1xN.squeeze()) # Series indexed by column names
+
+dfNx1 = pd.DataFrame({"A": [1, 2, 3]})
+print(dfNx1.squeeze()) # Series indexed by row labels
+print(dfNx1.squeeze(axis=1)) # same as above
+
+
tsb equivalent:
+
import { DataFrame, squeezeDataFrame } from "tsb";
+
+// 1×1 → scalar
+squeezeDataFrame(DataFrame.fromColumns({ A: [10] })); // 10
+
+// 1 row, N cols → Series over columns
+squeezeDataFrame(DataFrame.fromColumns({ A: [1], B: [2] })); // Series([1, 2])
+
+// N rows, 1 col → Series over rows
+squeezeDataFrame(DataFrame.fromColumns({ A: [1, 2, 3] })); // Series([1, 2, 3])
+
+// axis=1: force squeeze along columns axis
+squeezeDataFrame(DataFrame.fromColumns({ A: [1, 2, 3] }), 1); // Series([1, 2, 3])
+
+
+
itemSeries — return the single element of a Series
+
Python pandas equivalent:
+
import pandas as pd
+
+s = pd.Series([7])
+print(s.item()) # 7
+
+s2 = pd.Series([1, 2])
+s2.item() # ValueError
+
+
tsb equivalent:
+
import { Series, itemSeries } from "tsb";
+
+itemSeries(new Series({ data: [7] })); // 7
+itemSeries(new Series({ data: [1, 2] })); // throws RangeError
+
+
+
boolSeries / boolDataFrame — convert to boolean
+
Python pandas equivalent:
+
import pandas as pd
+
+pd.Series([1]).bool() # True
+pd.Series([0]).bool() # False
+pd.DataFrame({"A": [1]}).bool() # True
+
+
tsb equivalent:
+
import { Series, DataFrame, boolSeries, boolDataFrame } from "tsb";
+
+boolSeries(new Series({ data: [1] })); // true
+boolSeries(new Series({ data: [0] })); // false
+boolDataFrame(DataFrame.fromColumns({ A: [1] })); // true
+boolDataFrame(DataFrame.fromColumns({ A: [false] })); // false
+
+
+
firstValidIndex / lastValidIndex — find first/last non-NA label
+
Python pandas equivalent:
+
import pandas as pd
+import numpy as np
+
+s = pd.Series([None, np.nan, 3.0, 4.0], index=["a", "b", "c", "d"])
+print(s.first_valid_index()) # "a" ... wait: "c"
+print(s.last_valid_index()) # "d"
+
+s_all_na = pd.Series([None, None])
+print(s_all_na.first_valid_index()) # None
+
+
tsb equivalent:
+
import { Series, firstValidIndex, lastValidIndex } from "tsb";
+
+const s = new Series({ data: [null, NaN, 3, 4], index: ["a", "b", "c", "d"] });
+firstValidIndex(s); // "c"
+lastValidIndex(s); // "d"
+
+const allNA = new Series({ data: [null, null] });
+firstValidIndex(allNA); // null
+
+
+
dataFrameFirstValidIndex / dataFrameLastValidIndex
+
Python pandas equivalent:
+
import pandas as pd
+import numpy as np
+
+df = pd.DataFrame({"A": [None, None, 1], "B": [None, 2, 3]})
+print(df.first_valid_index()) # 1 (row 1 has B=2)
+print(df.last_valid_index()) # 2 (row 2 has A=1, B=3)
+
+
tsb equivalent:
+
import { DataFrame, dataFrameFirstValidIndex, dataFrameLastValidIndex } from "tsb";
+
+const df = DataFrame.fromColumns({
+ A: [null, null, 1],
+ B: [null, 2, 3],
+});
+dataFrameFirstValidIndex(df); // 1
+dataFrameLastValidIndex(df); // 2
+
+
+
+
+
diff --git a/playground/sort_ops.html b/playground/sort_ops.html
new file mode 100644
index 00000000..4e048de9
--- /dev/null
+++ b/playground/sort_ops.html
@@ -0,0 +1,126 @@
+
+
+
+
+
+
sort_ops — tsb playground
+
+
+
+
← Back to playground index
+
sort_ops — sort_values and sort_index for Series and DataFrame
+
+ Sorting utilities that mirror pandas' sort_values and
+ sort_index methods. All functions are pure — they return a
+ new object without modifying the input.
+
+
+
sortValuesSeries — sort a Series by its values
+
Python pandas equivalent:
+
import pandas as pd
+s = pd.Series([3, 1, 2], index=["b", "a", "c"])
+s.sort_values()
+# a 1
+# c 2
+# b 3
+s.sort_values(ascending=False)
+# b 3
+# c 2
+# a 1
+
+
tsb equivalent:
+
import { Series, sortValuesSeries } from "tsb";
+
+const s = new Series({ data: [3, 1, 2], index: ["b", "a", "c"] });
+
+sortValuesSeries(s); // [1, 2, 3] index: ["a","c","b"]
+sortValuesSeries(s, { ascending: false }); // [3, 2, 1] index: ["b","c","a"]
+sortValuesSeries(s, { ignoreIndex: true }); // resets index to [0, 1, 2]
+
+
+
NaN / null handling
+
Python pandas equivalent:
+
s = pd.Series([3.0, None, 1.0])
+s.sort_values() # 1, 3, NaN (NaN last by default)
+s.sort_values(na_position="first") # NaN, 1, 3
+
+
tsb equivalent:
+
const s2 = new Series({ data: [3, null, 1] });
+sortValuesSeries(s2); // [1, 3, null]
+sortValuesSeries(s2, { naPosition: "first" }); // [null, 1, 3]
+
+
+
sortIndexSeries — sort a Series by its index labels
+
Python pandas equivalent:
+
s.sort_index() # sort by label alphabetically / numerically
+
+
tsb equivalent:
+
import { sortIndexSeries } from "tsb";
+
+const s = new Series({ data: [3, 1, 2], index: ["b", "a", "c"] });
+sortIndexSeries(s);
+// values: [1, 3, 2], index: ["a", "b", "c"]
+
+
+
sortValuesDataFrame — sort DataFrame rows by column values
+
Python pandas equivalent:
+
df = pd.DataFrame({"a": [3, 1, 2], "b": [10, 30, 20]})
+df.sort_values("a")
+# a b
+# 1 1 30
+# 2 2 20
+# 0 3 10
+
+df.sort_values(["a", "b"], ascending=[True, False])
+
+
tsb equivalent:
+
import { DataFrame, sortValuesDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [3, 1, 2], b: [10, 30, 20] });
+
+sortValuesDataFrame(df, "a");
+// col a: [1, 2, 3] col b: [30, 20, 10]
+
+sortValuesDataFrame(df, ["a", "b"], { ascending: [true, false] });
+// compound sort: by a ascending, then b descending
+
+
+
sortIndexDataFrame — sort DataFrame rows (or columns) by index
+
Python pandas equivalent:
+
df.sort_index() # sort rows by row-index labels
+df.sort_index(axis=1) # sort columns alphabetically
+
+
tsb equivalent:
+
import { sortIndexDataFrame } from "tsb";
+
+const df2 = DataFrame.fromColumns({ z: [1], a: [2], m: [3] });
+sortIndexDataFrame(df2, { axis: 1 });
+// columns in alphabetical order: "a", "m", "z"
+
+const df3 = DataFrame.fromColumns({ v: [1, 2, 3] }, { index: ["c", "a", "b"] });
+sortIndexDataFrame(df3);
+// rows in index order: "a" (2), "b" (3), "c" (1)
+
+
+
Summary of options
+
// sortValuesSeries(s, { ascending?, naPosition?, ignoreIndex? })
+// sortIndexSeries(s, { ascending?, naPosition?, ignoreIndex? })
+// sortValuesDataFrame(df, by, { ascending?, naPosition?, ignoreIndex? })
+// sortIndexDataFrame(df, { ascending?, axis?, naPosition?, ignoreIndex? })
+//
+// ascending — true (default) or false
+// naPosition — "last" (default) or "first"
+// ignoreIndex — false (default) or true (resets index to 0, 1, 2, ...)
+// axis — 0 (rows, default) or 1 (columns, sortIndexDataFrame only)
+
+
+
diff --git a/playground/str_findall_and_json_denormalize.html b/playground/str_findall_and_json_denormalize.html
new file mode 100644
index 00000000..c0e915e8
--- /dev/null
+++ b/playground/str_findall_and_json_denormalize.html
@@ -0,0 +1,258 @@
+
+
+
+
+
+
tsb — str.findall & to_json_normalize
+
+
+
+
🔍 tsb — str.findall & toJsonDenormalize
+
+ Two new features in tsb:
+ strFindall / strFindallCount / strFindFirst / strFindallExpand
+ (mirrors pandas.Series.str.findall)
+ and
+ toJsonDenormalize / toJsonRecords / toJsonSplit / toJsonIndex
+ (the inverse of jsonNormalize).
+
+
← Back to feature index
+
+
+
+
1. strFindall — all regex matches per element
+
Mirrors pandas.Series.str.findall(pat). Returns a Series where each value is a JSON-encoded array of all non-overlapping matches.
+
// pandas equivalent:
+// s.str.findall(r'\d+')
+
+import { Series } from 'tsb';
+import { strFindall, strFindallCount, strFindFirst } from 'tsb';
+
+const prices = new Series({ data: ['$10.99 and $5.00', 'free!', '$3.50'] });
+
+const allPrices = strFindall(prices, /\$[\d.]+/);
+// Series [
+// '["$10.99","$5.00"]', ← JSON string
+// '[]',
+// '["$3.50"]'
+// ]
+
+// Parse the JSON to get actual arrays:
+JSON.parse(allPrices.values[0]); // ["$10.99", "$5.00"]
+JSON.parse(allPrices.values[1]); // []
+
✅ Each element contains a JSON.stringify(string[]) result.
+
+
With capture groups
+
// When the pattern has a capture group, returns the captured value
+const s = new Series({ data: ['name: Alice', 'name: Bob', 'unknown'] });
+const names = strFindall(s, /name: (\w+)/);
+// Series ['["Alice"]', '["Bob"]', '[]']
+
+// First capture group is extracted (pandas behaviour)
+
+
Null / NaN handling
+
const s = new Series({ data: ['hello', null, NaN, 'world'] });
+const result = strFindall(s, /\w+/);
+// Series ['["hello"]', null, null, '["world"]']
+// Null/NaN elements return null (not []) — matches pandas
+
+
+
+
+
2. strFindallCount — count matches per element
+
import { strFindallCount } from 'tsb';
+
+const words = new Series({ data: ['one two three', 'four', 'five six'] });
+const counts = strFindallCount(words, /\b\w+\b/);
+// Series [3, 1, 2]
+
+// Count vowels per word
+const vowels = new Series({ data: ['beautiful', 'rhythm', 'aeiou'] });
+strFindallCount(vowels, /[aeiou]/i);
+// Series [5, 0, 5]
+
💡 More efficient than strFindall when you only need the count, not the matches themselves.
+
+
+
+
+
3. strFindFirst — first match per element
+
import { strFindFirst } from 'tsb';
+
+const logs = new Series({ data: [
+ '2024-01-15: ERROR occurred',
+ '2024-02-20: INFO ok',
+ 'no date here',
+] });
+
+const dates = strFindFirst(logs, /\d{4}-\d{2}-\d{2}/);
+// Series ['2024-01-15', '2024-02-20', null]
+
+// Extract just the year (first capture group)
+const years = strFindFirst(logs, /(\d{4})-\d{2}-\d{2}/);
+// Series ['2024', '2024', null]
+
+
+
+
+
4. strFindallExpand — expand capture groups into a DataFrame
+
Mirrors pandas.Series.str.extract(pat, expand=True).
+
import { strFindallExpand } from 'tsb';
+
+const people = new Series({ data: ['John 30', 'Jane 25', 'unknown'] });
+
+// Named capture groups → column names
+const df = strFindallExpand(people, /(?<name>\w+)\s+(?<age>\d+)/);
+// name age
+// 0 John 30
+// 1 Jane 25
+// 2 null null
+
+// Unnamed groups → numbered columns "0", "1", ...
+const df2 = strFindallExpand(people, /(\w+)\s+(\d+)/);
+// 0 1
+// 0 John 30
+// 1 Jane 25
+// 2 null null
+
+
+
+
+
5. toJsonDenormalize — flat DataFrame → nested JSON
+
The inverse of jsonNormalize: takes a DataFrame with dot-separated column names and reconstructs nested JSON objects.
+
import { DataFrame } from 'tsb';
+import { toJsonDenormalize } from 'tsb';
+
+// Start with a flattened DataFrame (as jsonNormalize would produce)
+const flat = DataFrame.fromColumns({
+ name: ['Alice', 'Bob'],
+ 'address.city': ['New York', 'Los Angeles'],
+ 'address.zip': ['10001', '90001'],
+ 'address.country':['US', 'US'],
+});
+
+// Reconstruct nested JSON
+const records = toJsonDenormalize(flat);
+// [
+// { name: 'Alice', address: { city: 'New York', zip: '10001', country: 'US' } },
+// { name: 'Bob', address: { city: 'Los Angeles', zip: '90001', country: 'US' } },
+// ]
+
+// Round-trip: jsonNormalize → toJsonDenormalize
+import { jsonNormalize } from 'tsb';
+const original = [
+ { user: { name: 'Alice', age: 30 }, score: 100 },
+ { user: { name: 'Bob', age: 25 }, score: 200 },
+];
+const df = jsonNormalize(original);
+const recovered = toJsonDenormalize(df);
+// recovered ≈ original (with the same structure)
+
+
Custom separator
+
// If jsonNormalize was called with sep='__'
+const df2 = DataFrame.fromColumns({
+ 'user__name': ['Alice'],
+ 'user__city': ['NYC'],
+});
+toJsonDenormalize(df2, { sep: '__' });
+// [{ user: { name: 'Alice', city: 'NYC' } }]
+
+
Drop null values
+
const df3 = DataFrame.fromColumns({ a: [1, null], b: [null, 2] });
+toJsonDenormalize(df3, { dropNull: true });
+// [{ a: 1 }, { b: 2 }] ← null fields are omitted
+
+
+
+
+
6. JSON serialization utilities
+
+
toJsonRecords — orient="records"
+
import { toJsonRecords } from 'tsb';
+const df = DataFrame.fromColumns({ a: [1, 2], b: ['x', 'y'] });
+toJsonRecords(df);
+// [{ a: 1, b: 'x' }, { a: 2, b: 'y' }]
+
+
toJsonSplit — orient="split"
+
import { toJsonSplit } from 'tsb';
+toJsonSplit(df);
+// { columns: ['a', 'b'], index: [0, 1], data: [[1, 'x'], [2, 'y']] }
+
+toJsonSplit(df, { includeIndex: false });
+// { columns: ['a', 'b'], data: [[1, 'x'], [2, 'y']] }
+
+
toJsonIndex — orient="index"
+
import { toJsonIndex } from 'tsb';
+toJsonIndex(df);
+// { '0': { a: 1, b: 'x' }, '1': { a: 2, b: 'y' } }
+
+// With custom string index
+const df2 = DataFrame.fromColumns(
+ { v: [10, 20] },
+ { index: ['alice', 'bob'] }
+);
+toJsonIndex(df2);
+// { alice: { v: 10 }, bob: { v: 20 } }
+
+
+
+
API reference
+
+
+ Function Signature pandas equivalent
+
+
+
+ strFindall
+ (input, pat, flags?) → Series<Scalar>
+ s.str.findall(pat)
+
+
+ strFindallCount
+ (input, pat, flags?) → Series<Scalar>
+ s.str.findall(pat).map(len)
+
+
+ strFindFirst
+ (input, pat, flags?) → Series<Scalar>
+ s.str.extract(pat)[0]
+
+
+ strFindallExpand
+ (input, pat, flags?) → DataFrame
+ s.str.extract(pat, expand=True)
+
+
+ toJsonDenormalize
+ (df, options?) → JsonRecord[]
+ inverse of json_normalize
+
+
+ toJsonRecords
+ (df) → JsonRecord[]
+ df.to_json(orient='records')
+
+
+ toJsonSplit
+ (df, options?) → JsonSplitResult
+ df.to_json(orient='split')
+
+
+ toJsonIndex
+ (df) → JsonRecord
+ df.to_json(orient='index')
+
+
+
+
+
+
diff --git a/playground/swaplevel.html b/playground/swaplevel.html
new file mode 100644
index 00000000..52673cd1
--- /dev/null
+++ b/playground/swaplevel.html
@@ -0,0 +1,124 @@
+
+
+
+
+
+
swapLevel / reorderLevels — tsb playground
+
+
+
+
← Back to playground index
+
swapLevel & reorderLevels
+
+ Reorder the levels of a MultiIndex on a Series or DataFrame.
+ Mirrors pandas.Series.swaplevel,
+ pandas.DataFrame.swaplevel,
+ pandas.Series.reorder_levels, and
+ pandas.DataFrame.reorder_levels.
+
+
+
swapLevelSeries — swap two levels
+
Python pandas equivalent:
+
import pandas as pd
+
+idx = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
+s = pd.Series([10, 20, 30], index=idx)
+
+swapped = s.swaplevel(0, 1)
+print(swapped.index.tolist())
+# [(1, 'a'), (2, 'a'), (1, 'b')]
+
+
tsb equivalent:
+
import { MultiIndex, Series, swapLevelSeries } from "tsb";
+import type { Index, Label } from "tsb";
+
+const mi = MultiIndex.fromTuples([["a", 1], ["a", 2], ["b", 1]]);
+const s = new Series({ data: [10, 20, 30], index: mi as unknown as Index<Label> });
+
+const swapped = swapLevelSeries(s, 0, 1);
+// index tuples: [(1,"a"), (2,"a"), (1,"b")]
+// values: [10, 20, 30]
+
+
+
swapLevelDataFrame — swap row-index levels
+
Python pandas equivalent:
+
import pandas as pd
+
+idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["letter", "number"])
+df = pd.DataFrame({"x": [10, 20]}, index=idx)
+
+swapped = df.swaplevel("letter", "number")
+print(swapped.index.tolist())
+# [(1, 'a'), (2, 'b')]
+
+
tsb equivalent:
+
import { DataFrame, MultiIndex, swapLevelDataFrame } from "tsb";
+import type { Index, Label } from "tsb";
+
+const mi = MultiIndex.fromTuples([["a", 1], ["b", 2]], { names: ["letter", "number"] });
+const df = DataFrame.fromColumns(
+ { x: [10, 20] },
+ { index: mi as unknown as Index<Label> },
+);
+
+const swapped = swapLevelDataFrame(df, "letter", "number");
+// row index tuples: [(1,"a"), (2,"b")]
+
+
+
reorderLevelsSeries — arbitrary level reordering
+
Python pandas equivalent:
+
import pandas as pd
+
+idx = pd.MultiIndex.from_arrays([["a", "b"], [1, 2], ["x", "y"]])
+s = pd.Series([10, 20], index=idx)
+
+reordered = s.reorder_levels([2, 0, 1])
+print(reordered.index.tolist())
+# [("x", "a", 1), ("y", "b", 2)]
+
+
tsb equivalent:
+
import { MultiIndex, Series, reorderLevelsSeries } from "tsb";
+import type { Index, Label } from "tsb";
+
+const mi = MultiIndex.fromArrays([["a", "b"], [1, 2], ["x", "y"]]);
+const s = new Series({ data: [10, 20], index: mi as unknown as Index<Label> });
+
+const reordered = reorderLevelsSeries(s, [2, 0, 1]);
+// index tuples: [("x","a",1), ("y","b",2)]
+
+
+
reorderLevelsDataFrame
+
Python pandas equivalent:
+
import pandas as pd
+
+idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)])
+df = pd.DataFrame({"v": [5, 15]}, index=idx)
+
+reordered = df.reorder_levels([1, 0])
+print(reordered.index.tolist())
+# [(1, 'a'), (2, 'b')]
+
+
tsb equivalent:
+
import { DataFrame, MultiIndex, reorderLevelsDataFrame } from "tsb";
+import type { Index, Label } from "tsb";
+
+const mi = MultiIndex.fromTuples([["a", 1], ["b", 2]]);
+const df = DataFrame.fromColumns(
+ { v: [5, 15] },
+ { index: mi as unknown as Index<Label> },
+);
+
+const reordered = reorderLevelsDataFrame(df, [1, 0]);
+// row index tuples: [(1,"a"), (2,"b")]
+
+
+
diff --git a/playground/testing.html b/playground/testing.html
new file mode 100644
index 00000000..d202c5ba
--- /dev/null
+++ b/playground/testing.html
@@ -0,0 +1,176 @@
+
+
+
+
+
+
tsb — testing utilities
+
+
+
+
+
+
+
+
+ Overview
+
+ The tsb testing module provides assertion helpers for comparing tsb objects
+ in test suites — analogous to pandas.testing.assert_series_equal,
+ assert_frame_equal, and assert_index_equal.
+
+
+ When a check fails, a descriptive AssertionError is thrown with information about
+ which element differed and at which position — making test failures easy to diagnose.
+
+
+
+
+ Import
+ import {
+ assertSeriesEqual,
+ assertFrameEqual,
+ assertIndexEqual,
+ AssertionError,
+} from "tsb";
+
+
+
+ assertSeriesEqual(left, right, options?)
+ Assert that two Series contain identical values (with optional tolerance for floats).
+ Passing example
+ import { Series, assertSeriesEqual } from "tsb";
+
+const a = new Series({ data: [1, 2, 3], name: "x" });
+const b = new Series({ data: [1, 2, 3], name: "x" });
+assertSeriesEqual(a, b);
+// ✅ no exception thrown
+
+ Failing example
+ const c = new Series({ data: [1, 2, 99], name: "x" });
+assertSeriesEqual(a, c);
+// ❌ AssertionError: Series: values differ at index 2 (position 2).
+// left=3, right=99
+
+ Float tolerance
+ const p = new Series({ data: [1.0, 2.0] });
+const q = new Series({ data: [1.0 + 1e-9, 2.0] }); // tiny rounding error
+assertSeriesEqual(p, q); // ✅ passes (within default atol=1e-8)
+
+assertSeriesEqual(p, q, { checkExact: true }); // ❌ exact comparison fails
+
+ Options
+
+ Option Type Default Description
+ checkDtypesboolean true Compare dtype of both Series
+ checkIndexboolean true Compare row index labels
+ checkNamesboolean true Compare Series name and index name
+ checkExactboolean false Exact numeric equality (no tolerance)
+ rtolnumber 1e-5 Relative tolerance
+ atolnumber 1e-8 Absolute tolerance
+ objLabelstring "Series" Error message prefix
+
+
+
+
+ assertFrameEqual(left, right, options?)
+ Assert that two DataFrames are structurally and value-identical.
+
+ Passing example
+ import { DataFrame, assertFrameEqual } from "tsb";
+
+const a = DataFrame.fromColumns({ x: [1, 2], y: [3, 4] });
+const b = DataFrame.fromColumns({ x: [1, 2], y: [3, 4] });
+assertFrameEqual(a, b); // ✅
+
+ Ignore column order
+ const c = DataFrame.fromColumns({ y: [3, 4], x: [1, 2] }); // columns reversed
+assertFrameEqual(a, c, { checkLike: true }); // ✅ order ignored
+
+ Options
+
+ Option Type Default Description
+ checkDtypesboolean true Compare column dtypes
+ checkIndexboolean true Compare row index labels
+ checkNamesboolean true Compare index and column names
+ checkLikeboolean false Ignore column order
+ checkExactboolean false Exact numeric equality
+ rtolnumber 1e-5 Relative tolerance
+ atolnumber 1e-8 Absolute tolerance
+ objLabelstring "DataFrame" Error message prefix
+
+
+
+
+ assertIndexEqual(left, right, options?)
+ Assert that two Index objects have identical labels.
+ import { Index, assertIndexEqual } from "tsb";
+
+const a = new Index(["a", "b", "c"]);
+const b = new Index(["a", "b", "c"]);
+assertIndexEqual(a, b); // ✅
+
+const c = new Index(["a", "b", "z"]);
+assertIndexEqual(a, c);
+// ❌ AssertionError: Index: Index values differ at position 2. left=c, right=z
+
+
+
+ AssertionError
+
+ All failed assertions throw an AssertionError instance (extends Error).
+ It can be caught explicitly or used with expect().toThrow(AssertionError) in bun:test.
+
+ import { AssertionError, assertSeriesEqual, Series } from "tsb";
+
+try {
+ assertSeriesEqual(
+ new Series({ data: [1, 2, 3] }),
+ new Series({ data: [1, 2, 4] }),
+ );
+} catch (e) {
+ if (e instanceof AssertionError) {
+ console.error("Assertion failed:", e.message);
+ }
+}
+
+ 💡 In bun:test, use expect(() => assertSeriesEqual(a, b)).toThrow(AssertionError)
+ to write negative assertions.
+
+
+
+
+ pandas equivalents
+
+ tsb pandas
+ assertSeriesEqual(a, b)pd.testing.assert_series_equal(a, b)
+ assertFrameEqual(a, b)pd.testing.assert_frame_equal(a, b)
+ assertIndexEqual(a, b)pd.testing.assert_index_equal(a, b)
+ AssertionErrorAssertionError (Python built-in)
+
+
+
+
+
+
diff --git a/playground/timedelta_range.html b/playground/timedelta_range.html
new file mode 100644
index 00000000..f06e0bdf
--- /dev/null
+++ b/playground/timedelta_range.html
@@ -0,0 +1,209 @@
+
+
+
+
+
+
tsb — timedelta_range
+
+
+
+
+
+ ← back to index
+
+ Frequency Reference
+
+
+ String Duration Example
+
+ W 1 week (7 days) "2W" → 14 days per step
+ D 1 calendar day "3D" → 3 days per step
+ H 1 hour "6H" → 6 hours per step
+ T / min 1 minute "30min" → 30 minutes
+ S 1 second "10S" → 10 seconds
+ L / ms 1 millisecond "500ms" → 500 ms
+
+
+
+
+ Interactive Builder
+
+
Provide at least 2 of: start, end, periods, freq.
+
+
+
+
Generate
+
Click Generate to produce the TimedeltaIndex.
+
+
+ Preset Examples
+
+
+ Description Code Action
+
+
+ 5 daily intervals from 0
+ timedelta_range({ start:"0 days", periods:5, freq:"D" })
+ Run
+
+
+ 1-to-3 days in daily steps
+ timedelta_range({ start:"1 days", end:"3 days", freq:"D" })
+ Run
+
+
+ 4 entries ending at 3 days (freq D)
+ timedelta_range({ end:"3 days", periods:4, freq:"D" })
+ Run
+
+
+ Linear space 0→2 days, 5 points
+ timedelta_range({ start:"0 days", end:"2 days", periods:5 })
+ Run
+
+
+ 6-hour steps, closed=left
+ timedelta_range({ start:"0 days", end:"1 days", freq:"6H", closed:"left" })
+ Run
+
+
+ 30-minute intervals, 8 periods
+ timedelta_range({ start:"0 days", periods:8, freq:"30min" })
+ Run
+
+
+
+
+
+
+
+
+
diff --git a/playground/transform_agg.html b/playground/transform_agg.html
new file mode 100644
index 00000000..39f2e2c6
--- /dev/null
+++ b/playground/transform_agg.html
@@ -0,0 +1,147 @@
+
+
+
+
+
+
transform — Series.transform / DataFrame.transform — tsb playground
+
+
+
+
transform — Series.transform / DataFrame.transform
+
+ Apply one or more functions to a Series or DataFrame and return a result with the
+ same index (broadcast scalars to full length).
+ Mirrors pandas.Series.transform() and pandas.DataFrame.transform().
+
+
+
API
+
+import { seriesTransform, dataFrameTransform } from "tsb";
+
+// single function or built-in name → Series
+seriesTransform(s, "cumsum");
+seriesTransform(s, (x) => x);
+
+// array → DataFrame (one column per function)
+seriesTransform(s, ["sum", "cumsum", "mean"]);
+
+// Record → DataFrame with named columns
+seriesTransform(s, { total: "sum", running: "cumsum" });
+
+// DataFrame transform (column-wise by default)
+dataFrameTransform(df, "cumsum");
+dataFrameTransform(df, { a: "sum", b: "cummin" }); // per-column
+dataFrameTransform(df, "cumsum", { axis: 1 }); // row-wise
+
+
+
Built-in names
+
+Aggregating (broadcast): "sum", "mean", "min", "max", "std", "var", "median",
+ "count", "first", "last", "prod", "any", "all", "nunique"
+Cumulative (same shape): "cumsum", "cumprod", "cummin", "cummax"
+
+
+
Interactive Demo
+
Series cumsum
+
Series sum (broadcast)
+
Series multi-func → DataFrame
+
Series record → DataFrame
+
DataFrame transform
+
DataFrame per-column
+
Click a button above to run an example.
+
+
Examples
+
+import { Series, DataFrame, seriesTransform, dataFrameTransform } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+
+// cumulative sum
+seriesTransform(s, "cumsum").values; // [1, 3, 6, 10]
+
+// broadcast aggregate
+seriesTransform(s, "sum").values; // [10, 10, 10, 10]
+
+// multiple functions
+const df = seriesTransform(s, ["sum", "cumsum", "mean"]);
+df.col("sum").values; // [10, 10, 10, 10]
+df.col("cumsum").values; // [1, 3, 6, 10]
+df.col("mean").values; // [2.5, 2.5, 2.5, 2.5]
+
+// DataFrame transform
+const frame = DataFrame.fromColumns({ a: [1,2,3], b: [10,20,30] });
+dataFrameTransform(frame, "cumsum").col("b").values; // [10, 30, 60]
+
+
+
+
+
diff --git a/playground/truncate.html b/playground/truncate.html
new file mode 100644
index 00000000..ee4cb76b
--- /dev/null
+++ b/playground/truncate.html
@@ -0,0 +1,132 @@
+
+
+
+
+
+
truncate — tsb playground
+
+
+
+
← Back to playground index
+
truncate
+
+ Truncate a Series or DataFrame to keep only the elements within a label window
+ [before, after] (both bounds inclusive).
+ Mirrors pandas.Series.truncate and
+ pandas.DataFrame.truncate.
+
+
+
truncateSeries — keep rows within [before, after]
+
Python pandas equivalent:
+
import pandas as pd
+
+s = pd.Series([10, 20, 30, 40, 50], index=[0, 1, 2, 3, 4])
+
+print(s.truncate(before=1, after=3))
+# 1 20
+# 2 30
+# 3 40
+
+print(s.truncate(before=2))
+# 2 30
+# 3 40
+# 4 50
+
+print(s.truncate(after=2))
+# 0 10
+# 1 20
+# 2 30
+
+
tsb equivalent:
+
import { Series, truncateSeries } from "tsb";
+
+const s = new Series({ data: [10, 20, 30, 40, 50], index: [0, 1, 2, 3, 4] });
+
+truncateSeries(s, 1, 3).values; // [20, 30, 40]
+truncateSeries(s, 2).values; // [30, 40, 50]
+truncateSeries(s, undefined, 2).values; // [10, 20, 30]
+
+
+
truncateDataFrame — truncate rows
+
Python pandas equivalent:
+
import pandas as pd
+
+df = pd.DataFrame(
+ {"a": [10, 20, 30, 40, 50], "b": [1, 2, 3, 4, 5]},
+ index=[0, 1, 2, 3, 4],
+)
+
+print(df.truncate(before=1, after=3))
+# a b
+# 1 20 2
+# 2 30 3
+# 3 40 4
+
+
tsb equivalent:
+
import { DataFrame, truncateDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns(
+ { a: [10, 20, 30, 40, 50], b: [1, 2, 3, 4, 5] },
+ { index: [0, 1, 2, 3, 4] },
+);
+
+const result = truncateDataFrame(df, 1, 3);
+result.col("a").values; // [20, 30, 40]
+result.index.values; // [1, 2, 3]
+
+
+
truncateDataFrame — truncate columns (axis=1)
+
Python pandas equivalent:
+
import pandas as pd
+
+df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
+
+print(df.truncate(before="a", after="b", axis=1))
+# a b
+# 0 1 3
+# 1 2 4
+
+
tsb equivalent:
+
import { DataFrame, truncateDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4], c: [5, 6] });
+
+const result = truncateDataFrame(df, "a", "b", { axis: 1 });
+result.columns.values; // ["a", "b"]
+
+
+
String index truncation
+
Python pandas equivalent:
+
import pandas as pd
+
+s = pd.Series(
+ [1, 2, 3, 4, 5],
+ index=["apple", "banana", "cherry", "date", "elderberry"],
+)
+
+print(s.truncate(before="banana", after="date"))
+# banana 2
+# cherry 3
+# date 4
+
+
tsb equivalent:
+
import { Series, truncateSeries } from "tsb";
+
+const s = new Series({
+ data: [1, 2, 3, 4, 5],
+ index: ["apple", "banana", "cherry", "date", "elderberry"],
+});
+
+truncateSeries(s, "banana", "date").values; // [2, 3, 4]
+
+
+
diff --git a/playground/update.html b/playground/update.html
new file mode 100644
index 00000000..fd452d63
--- /dev/null
+++ b/playground/update.html
@@ -0,0 +1,101 @@
+
+
+
+
+
+
update — tsb playground
+
+
+
+
← Back to playground index
+
update
+
+ Update a Series or DataFrame in-place using non-NA values from another object.
+ Mirrors pandas.DataFrame.update and pandas.Series.update.
+
+
+
seriesUpdate — basic overwrite
+
Python pandas equivalent:
+
import pandas as pd
+import numpy as np
+
+s = pd.Series([1, np.nan, 3], index=[0, 1, 2])
+other = pd.Series([np.nan, 20, np.nan], index=[0, 1, 2])
+s.update(other)
+print(s.tolist())
+# [1.0, 20.0, 3.0]
+
+
tsb equivalent:
+
import { Series, seriesUpdate } from "tsb";
+
+const s = new Series({ data: [1, null, 3], index: [0, 1, 2] });
+const other = new Series({ data: [null, 20, null], index: [0, 1, 2] });
+seriesUpdate(s, other).values;
+// [1, 20, 3]
+
+
+
overwrite=false — only fill NA
+
Python pandas equivalent:
+
import pandas as pd
+import numpy as np
+
+s = pd.Series([1, np.nan, 3])
+other = pd.Series([10, 20, 30])
+s.update(other, overwrite=False)
+print(s.tolist())
+# [1.0, 20.0, 3.0]
+
+
tsb equivalent:
+
import { Series, seriesUpdate } from "tsb";
+
+const s = new Series({ data: [1, null, 3] });
+const other = new Series({ data: [10, 20, 30] });
+seriesUpdate(s, other, { overwrite: false }).values;
+// [1, 20, 3]
+
+
+
dataFrameUpdate — update from another DataFrame
+
Python pandas equivalent:
+
import pandas as pd
+import numpy as np
+
+df = pd.DataFrame({"a": [1, np.nan, 3], "b": [10, 20, 30]})
+other = pd.DataFrame({"a": [np.nan, 99, np.nan]})
+df.update(other)
+print(df)
+# a b
+# 0 1.0 10.0
+# 1 99.0 20.0
+# 2 3.0 30.0
+
+
tsb equivalent:
+
import { DataFrame, dataFrameUpdate } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, null, 3], b: [10, 20, 30] });
+const other = DataFrame.fromColumns({ a: [null, 99, null] });
+const result = dataFrameUpdate(df, other);
+result.col("a").values; // [1, 99, 3]
+result.col("b").values; // [10, 20, 30]
+
+
+
Label alignment
+
tsb equivalent:
+
import { Series, seriesUpdate } from "tsb";
+
+const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+// other only has label 1 — other labels unchanged
+const other = new Series({ data: [99], index: [1] });
+seriesUpdate(s, other).values;
+// [1, 99, 3]
+
+
+
diff --git a/playground/xs.html b/playground/xs.html
new file mode 100644
index 00000000..76af21ad
--- /dev/null
+++ b/playground/xs.html
@@ -0,0 +1,109 @@
+
+
+
+
+
+
xs — Cross-Section Selection — tsb playground
+
+
+
+
xs — Cross-Section Selection
+
+ xsDataFrame(df, key) extracts a row by label as a Series, or
+ a column by name (with axis: 1). Works with both flat and
+ MultiIndex DataFrames.
+
+
+
Interactive Demo
+
Row cross-section
+
Column cross-section
+
MultiIndex cross-section
+
Click a button above to run an example.
+
+
Code Examples
+
import { DataFrame, xsDataFrame, xsSeries, MultiIndex } from "tsb";
+
+// ── flat index ──────────────────────────────────────────────────────────────
+const df = DataFrame.fromColumns(
+ { a: [1, 2, 3], b: [4, 5, 6] },
+ { index: ["x", "y", "z"] },
+);
+
+// Select row "y" → Series { a: 2, b: 5 }
+xsDataFrame(df, "y");
+
+// Select column "b" → Series { x: 4, y: 5, z: 6 }
+xsDataFrame(df, "b", { axis: 1 });
+
+// ── MultiIndex ─────────────────────────────────────────────────────────────
+const mi = MultiIndex.fromTuples([
+ ["A", 1], ["A", 2],
+ ["B", 1], ["B", 2],
+]);
+const miDf = new DataFrame( ... , mi);
+
+// All "A" rows → DataFrame with 2 rows
+xsDataFrame(miDf, "A");
+
+// ── Series ─────────────────────────────────────────────────────────────────
+const s = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+xsSeries(s, "b"); // → 20
+
+
+
+
+
diff --git a/src/core/date_offset.ts b/src/core/date_offset.ts
index 4eda968e..f4c80e9f 100644
--- a/src/core/date_offset.ts
+++ b/src/core/date_offset.ts
@@ -304,7 +304,10 @@ function applyWeek(date: Date, n: number, jsDow: number | null): Date {
export class Day implements DateOffset {
readonly name = "Day";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
/** Convenience factory: `Day.of(3)` equivalent to `new Day(3)`. */
static of(n = 1): Day {
@@ -347,7 +350,10 @@ export class Day implements DateOffset {
export class Hour implements DateOffset {
readonly name = "Hour";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): Hour {
return new Hour(n);
@@ -386,7 +392,10 @@ export class Hour implements DateOffset {
export class Minute implements DateOffset {
readonly name = "Minute";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): Minute {
return new Minute(n);
@@ -425,7 +434,10 @@ export class Minute implements DateOffset {
export class Second implements DateOffset {
readonly name = "Second";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): Second {
return new Second(n);
@@ -464,7 +476,10 @@ export class Second implements DateOffset {
export class Milli implements DateOffset {
readonly name = "Milli";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): Milli {
return new Milli(n);
@@ -522,12 +537,11 @@ export class Week implements DateOffset {
* Weekday anchor (pandas convention: 0 = Monday, …, 6 = Sunday).
* `null` means no alignment.
*/
+ readonly n: number;
readonly weekday: number | null;
- constructor(
- readonly n = 1,
- options: WeekOptions = {},
- ) {
+ constructor(n = 1, options: WeekOptions = {}) {
+ this.n = n;
this.weekday = options.weekday ?? null;
}
@@ -591,7 +605,10 @@ export class Week implements DateOffset {
export class MonthEnd implements DateOffset {
readonly name = "MonthEnd";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): MonthEnd {
return new MonthEnd(n);
@@ -652,7 +669,10 @@ export class MonthEnd implements DateOffset {
export class MonthBegin implements DateOffset {
readonly name = "MonthBegin";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): MonthBegin {
return new MonthBegin(n);
@@ -709,7 +729,10 @@ export class MonthBegin implements DateOffset {
export class YearEnd implements DateOffset {
readonly name = "YearEnd";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): YearEnd {
return new YearEnd(n);
@@ -761,7 +784,10 @@ export class YearEnd implements DateOffset {
export class YearBegin implements DateOffset {
readonly name = "YearBegin";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): YearBegin {
return new YearBegin(n);
@@ -816,7 +842,10 @@ export class YearBegin implements DateOffset {
export class BusinessDay implements DateOffset {
readonly name = "BusinessDay";
- constructor(readonly n = 1) {}
+ readonly n: number;
+ constructor(n = 1) {
+ this.n = n;
+ }
static of(n = 1): BusinessDay {
return new BusinessDay(n);
diff --git a/src/core/frame.ts b/src/core/frame.ts
index 91b28377..ddb641a1 100644
--- a/src/core/frame.ts
+++ b/src/core/frame.ts
@@ -131,6 +131,21 @@ export class DataFrame {
return new DataFrame(colMap, rowIndex);
}
+ /**
+ * Alias for {@link fromColumns}. Create a DataFrame from an object mapping column names to value arrays.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromArrays({ a: [1, 2, 3], b: [4, 5, 6] });
+ * ```
+ */
+ static fromArrays(
+ data: Readonly
>,
+ options?: DataFrameOptions,
+ ): DataFrame {
+ return DataFrame.fromColumns(data, options);
+ }
+
/**
* Create a DataFrame from an array of row objects.
*
@@ -758,11 +773,23 @@ export class DataFrame {
// ─── module-level helpers (extracted to keep methods lean) ───────────────────
+function isIndexLike(v: unknown): v is Index {
+ if (typeof v !== "object" || v === null) {
+ return false;
+ }
+ const rec = v as Record;
+ return (
+ typeof rec["size"] === "number" &&
+ typeof rec["at"] === "function" &&
+ typeof rec["getLoc"] === "function"
+ );
+}
+
function resolveRowIndex(nRows: number, supplied?: Index | readonly Label[]): Index {
if (supplied === undefined) {
return defaultRowIndex(nRows);
}
- if (supplied instanceof Index) {
+ if (isIndexLike(supplied)) {
return supplied;
}
return new Index(supplied as Label[]);
diff --git a/src/core/index.ts b/src/core/index.ts
index 3fd31e7c..dc9437dd 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -23,6 +23,12 @@ export { Period, PeriodIndex } from "./period.ts";
export type { PeriodFreq, PeriodIndexOptions } from "./period.ts";
export { Timedelta, TimedeltaIndex } from "./timedelta.ts";
export type { TimedeltaComponents, TimedeltaIndexOptions } from "./timedelta.ts";
+export { timedelta_range } from "./timedelta_range.ts";
+export type {
+ TimedeltaFreq,
+ TimedeltaRangeClosed,
+ TimedeltaRangeOptions,
+} from "./timedelta_range.ts";
export {
Day,
Hour,
diff --git a/src/core/multi_index.ts b/src/core/multi_index.ts
index 13a40fa5..433c50c2 100644
--- a/src/core/multi_index.ts
+++ b/src/core/multi_index.ts
@@ -633,10 +633,22 @@ export class MultiIndex {
// ─── tuple comparison helpers ────────────────────────────────────────────────
/** Compare two non-null scalar labels; return -1 / 0 / 1. */
-function compareScalars(av: number | string | boolean, bv: number | string | boolean): number {
+function compareScalars(
+ av: number | string | boolean | Date,
+ bv: number | string | boolean | Date,
+): number {
if (av === bv) {
return 0;
}
+ if (av instanceof Date && bv instanceof Date) {
+ return av.getTime() < bv.getTime() ? -1 : 1;
+ }
+ if (av instanceof Date) {
+ return 1;
+ }
+ if (bv instanceof Date) {
+ return -1;
+ }
return av < bv ? -1 : 1;
}
diff --git a/src/core/sample.ts b/src/core/sample.ts
index 7cd8d529..76d87ad9 100644
--- a/src/core/sample.ts
+++ b/src/core/sample.ts
@@ -131,13 +131,13 @@ function normalizeWeights(
* Falls back to basic weighted sampling when `replace=true`.
*/
function weightedSampleWithoutReplacement(
- poolSize: number,
+ _poolSize: number,
k: number,
probs: number[],
rng: () => number,
): number[] {
// Use reservoir sampling with exponential keys: assign key = rand^(1/w), take top-k
- const keys: Array<[number, number]> = probs.map((p, i) => {
+ const keys: [number, number][] = probs.map((p, i) => {
const r = rng();
const key = p > 0 ? r ** (1 / p) : 0;
return [key, i];
diff --git a/src/core/series.ts b/src/core/series.ts
index 3dd5ee16..f9ed23dc 100644
--- a/src/core/series.ts
+++ b/src/core/series.ts
@@ -172,7 +172,7 @@ export class Series {
if (index === undefined) {
this.index = defaultIndex(data.length);
- } else if (index instanceof Index) {
+ } else if (isIndexLike(index)) {
if (index.size !== data.length) {
throw new RangeError(
`Index length ${index.size} does not match data length ${data.length}`,
@@ -967,3 +967,15 @@ export class Series {
return new SeriesGroupBy(this as Series, by);
}
}
+
+function isIndexLike(v: unknown): v is Index {
+ if (typeof v !== "object" || v === null) {
+ return false;
+ }
+ const rec = v as Record;
+ return (
+ typeof rec["size"] === "number" &&
+ typeof rec["at"] === "function" &&
+ typeof rec["getLoc"] === "function"
+ );
+}
diff --git a/src/core/timedelta.ts b/src/core/timedelta.ts
index 4eb9c788..41c8cd6e 100644
--- a/src/core/timedelta.ts
+++ b/src/core/timedelta.ts
@@ -69,8 +69,8 @@ const MS_PER_WEEK = 7 * MS_PER_DAY;
const RE_ISO =
/^-?P(?:(\d+(?:\.\d+)?)W)?(?:(\d+(?:\.\d+)?)D)?(?:T(?:(\d+(?:\.\d+)?)H)?(?:(\d+(?:\.\d+)?)M)?(?:(\d+(?:\.\d+)?)S)?)?$/i;
-/** pandas-style: "N days HH:MM:SS[.mmm]" */
-const RE_PANDAS = /^(-)?(\d+) days? (\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?$/i;
+/** pandas-style: "N days[ HH:MM:SS[.mmm]]" — time part is optional */
+const RE_PANDAS = /^(-)?(\d+) days?(?:\s+(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?)?$/i;
/** Simple "HH:MM:SS[.mmm]" with optional sign */
const RE_HHMMSS = /^(-)?(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?$/;
@@ -97,6 +97,47 @@ function pad2(n: number): string {
return String(Math.abs(n)).padStart(2, "0");
}
+/** Parse ISO 8601 match array into milliseconds. */
+function parseIsoMatch(trimmed: string, m: RegExpExecArray): number {
+ const sign = trimmed.startsWith("-") ? -1 : 1;
+ const [, wStr, dStr, hStr, mStr, sStr] = m;
+ return (
+ sign *
+ (Number(wStr ?? 0) * MS_PER_WEEK +
+ Number(dStr ?? 0) * MS_PER_DAY +
+ Number(hStr ?? 0) * MS_PER_HOUR +
+ Number(mStr ?? 0) * MS_PER_MINUTE +
+ Number(sStr ?? 0) * MS_PER_SECOND)
+ );
+}
+
+/** Parse pandas-style match array into milliseconds. */
+function parsePandasMatch(m: RegExpExecArray): number {
+ const [, signStr, daysStr, hStr, mStr, sStr, fracStr] = m;
+ const sign = signStr === "-" ? -1 : 1;
+ return (
+ sign *
+ (Number(daysStr) * MS_PER_DAY +
+ Number(hStr ?? "0") * MS_PER_HOUR +
+ Number(mStr ?? "0") * MS_PER_MINUTE +
+ Number(sStr ?? "0") * MS_PER_SECOND +
+ parseFrac(fracStr))
+ );
+}
+
+/** Parse HH:MM:SS match array into milliseconds. */
+function parseHhmmssMatch(m: RegExpExecArray): number {
+ const [, signStr, hStr, mStr, sStr, fracStr] = m;
+ const sign = signStr === "-" ? -1 : 1;
+ return (
+ sign *
+ (Number(hStr) * MS_PER_HOUR +
+ Number(mStr) * MS_PER_MINUTE +
+ Number(sStr) * MS_PER_SECOND +
+ parseFrac(fracStr))
+ );
+}
+
// ─── Timedelta ────────────────────────────────────────────────────────────────
/**
@@ -163,48 +204,18 @@ export class Timedelta {
*/
static parse(s: string): Timedelta {
const trimmed = s.trim();
-
- // ISO 8601
const iso = RE_ISO.exec(trimmed);
if (iso !== null) {
- const sign = trimmed.startsWith("-") ? -1 : 1;
- const [, wStr, dStr, hStr, mStr, sStr] = iso;
- const ms =
- Number(wStr ?? 0) * MS_PER_WEEK +
- Number(dStr ?? 0) * MS_PER_DAY +
- Number(hStr ?? 0) * MS_PER_HOUR +
- Number(mStr ?? 0) * MS_PER_MINUTE +
- Number(sStr ?? 0) * MS_PER_SECOND;
- return new Timedelta(sign * ms);
+ return new Timedelta(parseIsoMatch(trimmed, iso));
}
-
- // pandas-style "N days HH:MM:SS[.mmm]"
const pandas = RE_PANDAS.exec(trimmed);
if (pandas !== null) {
- const [, signStr, daysStr, hStr, mStr, sStr, fracStr] = pandas;
- const sign = signStr === "-" ? -1 : 1;
- const ms =
- Number(daysStr) * MS_PER_DAY +
- Number(hStr) * MS_PER_HOUR +
- Number(mStr) * MS_PER_MINUTE +
- Number(sStr) * MS_PER_SECOND +
- parseFrac(fracStr);
- return new Timedelta(sign * ms);
+ return new Timedelta(parsePandasMatch(pandas));
}
-
- // HH:MM:SS[.mmm]
const hms = RE_HHMMSS.exec(trimmed);
if (hms !== null) {
- const [, signStr, hStr, mStr, sStr, fracStr] = hms;
- const sign = signStr === "-" ? -1 : 1;
- const ms =
- Number(hStr) * MS_PER_HOUR +
- Number(mStr) * MS_PER_MINUTE +
- Number(sStr) * MS_PER_SECOND +
- parseFrac(fracStr);
- return new Timedelta(sign * ms);
+ return new Timedelta(parseHhmmssMatch(hms));
}
-
throw new SyntaxError(`Timedelta.parse: cannot parse "${s}"`);
}
@@ -239,8 +250,28 @@ export class Timedelta {
return Math.abs(this.totalMilliseconds) % MS_PER_SECOND;
}
+ /** Alias for {@link milliseconds} — backward compatibility (`pandas.Timedelta.ms`). */
+ get ms(): number {
+ return this.milliseconds;
+ }
+
+ /** Absolute millisecond value — backward compatibility. */
+ get absMs(): number {
+ return Math.abs(this.totalMilliseconds);
+ }
+
+ /** Sign: `+1` for non-negative, `-1` for negative. */
+ get sign(): number {
+ return this.totalMilliseconds < 0 ? -1 : 1;
+ }
+
// ── total-unit conversions ────────────────────────────────────────────────
+ /** Alias for {@link totalMilliseconds} — backward compatibility. */
+ get totalMs(): number {
+ return this.totalMilliseconds;
+ }
+
/** Duration expressed in whole + fractional days. */
get totalDays(): number {
return this.totalMilliseconds / MS_PER_DAY;
@@ -291,6 +322,11 @@ export class Timedelta {
return new Timedelta(this.totalMilliseconds - other.totalMilliseconds);
}
+ /** Alias for {@link sub} — backward compatibility. */
+ subtract(other: Timedelta): Timedelta {
+ return this.sub(other);
+ }
+
/**
* Return `this * scalar`.
*
@@ -303,6 +339,11 @@ export class Timedelta {
return new Timedelta(this.totalMilliseconds * scalar);
}
+ /** Alias for {@link mul} — backward compatibility. */
+ scale(factor: number): Timedelta {
+ return this.mul(factor);
+ }
+
/**
* Return the negation of this duration.
*
@@ -355,6 +396,21 @@ export class Timedelta {
return this.totalMilliseconds === other.totalMilliseconds;
}
+ /** Alias for `compareTo(other) < 0`. */
+ lt(other: Timedelta): boolean {
+ return this.totalMilliseconds < other.totalMilliseconds;
+ }
+
+ /** Alias for `compareTo(other) > 0`. */
+ gt(other: Timedelta): boolean {
+ return this.totalMilliseconds > other.totalMilliseconds;
+ }
+
+ /** Alias for {@link equals}. */
+ eq(other: Timedelta): boolean {
+ return this.equals(other);
+ }
+
// ── string representation ─────────────────────────────────────────────────
/**
diff --git a/src/core/timedelta_range.ts b/src/core/timedelta_range.ts
new file mode 100644
index 00000000..c639ea3a
--- /dev/null
+++ b/src/core/timedelta_range.ts
@@ -0,0 +1,327 @@
+/**
+ * timedelta_range — factory for evenly-spaced TimedeltaIndex sequences.
+ *
+ * Mirrors `pandas.timedelta_range`.
+ *
+ * Generate a fixed-frequency {@link TimedeltaIndex} by specifying at least
+ * two of the four parameters: `start`, `end`, `periods`, and `freq`.
+ *
+ * **Freq string aliases:**
+ *
+ * | String | Duration |
+ * |--------|----------|
+ * | `"W"` | 1 week (7 days) |
+ * | `"D"` | 1 calendar day |
+ * | `"H"` | 1 hour |
+ * | `"T"` / `"min"` | 1 minute |
+ * | `"S"` | 1 second |
+ * | `"L"` / `"ms"` | 1 millisecond |
+ * | `"U"` / `"us"` | 1 microsecond (rounded to nearest ms) |
+ * | `"N"` / `"ns"` | 1 nanosecond (rounded to nearest ms) |
+ *
+ * Multiplier prefixes are supported: `"2H"`, `"30min"`, `"500ms"`, etc.
+ *
+ * @example
+ * ```ts
+ * // 5 one-hour periods starting from 0
+ * const idx = timedelta_range({ start: "0 days", periods: 5, freq: "H" });
+ * idx.size; // 5
+ * idx.at(0).totalHours; // 0
+ * idx.at(4).totalHours; // 4
+ *
+ * // Start and end with freq
+ * const idx2 = timedelta_range({ start: "1 days", end: "3 days", freq: "D" });
+ * idx2.size; // 3
+ *
+ * // Start and end with periods (linear space)
+ * const idx3 = timedelta_range({ start: "0 days", end: "4 days", periods: 5 });
+ * idx3.at(2).totalDays; // 2
+ * ```
+ *
+ * @module
+ */
+
+import { Timedelta, TimedeltaIndex } from "./timedelta.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Supported frequency alias strings for {@link timedelta_range}.
+ *
+ * Optionally prefixed with a positive integer multiplier, e.g. `"2H"`, `"30min"`.
+ */
+export type TimedeltaFreq =
+ | "W"
+ | "D"
+ | "H"
+ | "T"
+ | "min"
+ | "S"
+ | "L"
+ | "ms"
+ | "U"
+ | "us"
+ | "N"
+ | "ns"
+ | string; // allows "2H", "30min", etc.
+
+/** Closed endpoint specification. */
+export type TimedeltaRangeClosed = "left" | "right" | "both" | "neither" | null;
+
+/** Options for {@link timedelta_range}. */
+export interface TimedeltaRangeOptions {
+ /**
+ * First value of the sequence.
+ * May be a {@link Timedelta}, a parseable string, or a number of milliseconds.
+ */
+ readonly start?: Timedelta | string | number;
+ /**
+ * Last value of the sequence (inclusive unless `closed` excludes it).
+ * May be a {@link Timedelta}, a parseable string, or a number of milliseconds.
+ */
+ readonly end?: Timedelta | string | number;
+ /** Number of values to generate. */
+ readonly periods?: number;
+ /**
+ * Frequency (step size) between values.
+ * A {@link TimedeltaFreq} string such as `"H"`, `"2D"`, `"30min"`,
+ * or a plain `number` of milliseconds.
+ */
+ readonly freq?: TimedeltaFreq | number;
+ /** Optional name label for the resulting index. */
+ readonly name?: string | null;
+ /**
+ * Which endpoints to include.
+ * - `"both"` (default): include both `start` and `end`.
+ * - `"left"` : include `start`, exclude `end`.
+ * - `"right"` : exclude `start`, include `end`.
+ * - `"neither"`: exclude both endpoints.
+ * - `null` : same as `"both"`.
+ */
+ readonly closed?: TimedeltaRangeClosed;
+}
+
+// ─── frequency parsing ────────────────────────────────────────────────────────
+
+/** Map of bare unit aliases to milliseconds. */
+const UNIT_MS: Record = {
+ W: 7 * 86_400_000,
+ D: 86_400_000,
+ H: 3_600_000,
+ T: 60_000,
+ min: 60_000,
+ S: 1_000,
+ L: 1,
+ ms: 1,
+ U: 0.001, // microseconds → ms (rounded later)
+ us: 0.001,
+ N: 1e-6, // nanoseconds → ms (rounded later)
+ ns: 1e-6,
+};
+
+/** Regex: optional integer multiplier followed by unit alias. */
+const RE_FREQ =
+ /^(\d+(?:\.\d+)?)\s*(W|D|H|T|min|S|L|ms|U|us|N|ns)$|^(W|D|H|T|min|S|L|ms|U|us|N|ns)$/;
+
+/**
+ * Parse a freq string or number into milliseconds.
+ *
+ * @throws {Error} on unrecognised format.
+ */
+function freqToMs(freq: TimedeltaFreq | number): number {
+ if (typeof freq === "number") {
+ return freq;
+ }
+ const m = RE_FREQ.exec(freq);
+ if (!m) {
+ throw new Error(`timedelta_range: unrecognised freq "${freq}"`);
+ }
+ if (m[3] !== undefined) {
+ // bare unit, no multiplier
+ const base = UNIT_MS[m[3]];
+ if (base === undefined) {
+ throw new Error(`timedelta_range: unknown unit "${m[3]}"`);
+ }
+ return base;
+ }
+ // multiplier + unit
+ const multiplier = Number(m[1]);
+ const unit = m[2] as string;
+ const base = UNIT_MS[unit];
+ if (base === undefined) {
+ throw new Error(`timedelta_range: unknown unit "${unit}"`);
+ }
+ return multiplier * base;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Coerce start/end input to milliseconds. */
+function toMs(v: Timedelta | string | number): number {
+ if (typeof v === "number") {
+ return v;
+ }
+ if (v instanceof Timedelta) {
+ return v.totalMilliseconds;
+ }
+ return Timedelta.parse(v).totalMilliseconds;
+}
+
+/** Apply closed endpoint filtering. */
+function applyClosedFilter(
+ values: number[],
+ startMs: number | null,
+ endMs: number | null,
+ closed: TimedeltaRangeClosed,
+): number[] {
+ if (closed === null || closed === "both") {
+ return values;
+ }
+ return values.filter((v) => {
+ if (closed === "left") {
+ return endMs === null || v < endMs || v === startMs;
+ }
+ if (closed === "right") {
+ return startMs === null || v > startMs || v === endMs;
+ }
+ // "neither"
+ const excludeStart = startMs !== null && v === startMs;
+ const excludeEnd = endMs !== null && v === endMs;
+ return !(excludeStart || excludeEnd);
+ });
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Return a fixed-frequency {@link TimedeltaIndex}.
+ *
+ * At least **two** of `start`, `end`, `periods`, and `freq` must be provided.
+ * When `start` and `end` are both given without `freq`, the values are linearly
+ * spaced (i.e. `periods` determines the step size).
+ *
+ * @example
+ * ```ts
+ * timedelta_range({ start: "0 days", periods: 4, freq: "D" });
+ * // TimedeltaIndex: [0, 1, 2, 3] days
+ *
+ * timedelta_range({ start: "1 days", end: "3 days", freq: "D" });
+ * // TimedeltaIndex: [1, 2, 3] days
+ *
+ * timedelta_range({ start: "0 days", end: "2 days", periods: 5 });
+ * // TimedeltaIndex: [0, 12h, 1d, 1d12h, 2d]
+ * ```
+ */
+export function timedelta_range(options: TimedeltaRangeOptions): TimedeltaIndex {
+ const { periods, name = null, closed = "both" } = options;
+ const hasStart = options.start !== undefined;
+ const hasEnd = options.end !== undefined;
+ const hasFreq = options.freq !== undefined;
+ const hasPeriods = periods !== undefined;
+
+ // Validate: at least two of the four parameters must be provided
+ const given = [hasStart, hasEnd, hasPeriods, hasFreq].filter(Boolean).length;
+ if (given < 2) {
+ throw new Error(
+ "timedelta_range: must specify at least two of 'start', 'end', 'periods', 'freq'",
+ );
+ }
+
+ let values: number[];
+ const startMs = hasStart ? toMs(options.start as Timedelta | string | number) : null;
+ const endMs = hasEnd ? toMs(options.end as Timedelta | string | number) : null;
+
+ if (hasPeriods && periods !== undefined && periods < 0) {
+ throw new RangeError("timedelta_range: periods must be non-negative");
+ }
+
+ if (hasStart && hasEnd && !hasFreq && hasPeriods && periods !== undefined) {
+ // Linear spacing between start and end with exactly `periods` points
+ values = buildLinear(startMs as number, endMs as number, periods);
+ } else if (hasStart && hasEnd && hasFreq) {
+ // Build from start to end stepping by freq
+ const stepMs = freqToMs(options.freq as TimedeltaFreq | number);
+ values = buildStartEnd(startMs as number, endMs as number, stepMs);
+ } else if (hasStart && hasFreq && hasPeriods && periods !== undefined) {
+ // Build forward from start for `periods` items
+ const stepMs = freqToMs(options.freq as TimedeltaFreq | number);
+ values = buildStartPeriods(startMs as number, stepMs, periods);
+ } else if (hasEnd && hasFreq && hasPeriods && periods !== undefined) {
+ // Build backward from end for `periods` items
+ const stepMs = freqToMs(options.freq as TimedeltaFreq | number);
+ values = buildEndPeriods(endMs as number, stepMs, periods);
+ } else if (hasStart && hasEnd && !hasFreq && !hasPeriods) {
+ // Only start and end given — include both endpoints (single step if equal)
+ values = startMs === endMs ? [startMs as number] : [startMs as number, endMs as number];
+ } else if (hasStart && hasPeriods && !hasFreq && periods !== undefined) {
+ // start + periods with no freq: default to 1-day step
+ values = buildStartPeriods(startMs as number, 86_400_000, periods);
+ } else {
+ throw new Error(
+ "timedelta_range: unsupported combination of parameters — " +
+ "provide start+end+freq, start+periods+freq, end+periods+freq, or start+end+periods",
+ );
+ }
+
+ const filtered = applyClosedFilter(values, startMs, endMs, closed);
+ const deltas = filtered.map((ms) => Timedelta.fromMilliseconds(ms));
+ return TimedeltaIndex.fromTimedeltas(deltas, { name });
+}
+
+// ─── internal builders ────────────────────────────────────────────────────────
+
+/** Linearly space `n` values from `startMs` to `endMs` inclusive. */
+function buildLinear(startMs: number, endMs: number, n: number): number[] {
+ if (n === 0) {
+ return [];
+ }
+ if (n === 1) {
+ return [startMs];
+ }
+ const step = (endMs - startMs) / (n - 1);
+ const values: number[] = [];
+ for (let i = 0; i < n; i++) {
+ values.push(startMs + i * step);
+ }
+ return values;
+}
+
+/** Build from `startMs` up to (inclusive) `endMs` with step `stepMs`. */
+function buildStartEnd(startMs: number, endMs: number, stepMs: number): number[] {
+ if (stepMs === 0) {
+ throw new RangeError("timedelta_range: freq must be non-zero");
+ }
+ const values: number[] = [];
+ const forward = stepMs > 0;
+ let cur = startMs;
+ const MAX = 1_000_000;
+ while (values.length < MAX) {
+ if (forward ? cur > endMs : cur < endMs) {
+ break;
+ }
+ values.push(cur);
+ cur += stepMs;
+ }
+ return values;
+}
+
+/** Build `n` values from `startMs` stepping by `stepMs`. */
+function buildStartPeriods(startMs: number, stepMs: number, n: number): number[] {
+ const values: number[] = [];
+ for (let i = 0; i < n; i++) {
+ values.push(startMs + i * stepMs);
+ }
+ return values;
+}
+
+/** Build `n` values ending at `endMs` stepping by `stepMs`, in ascending order. */
+function buildEndPeriods(endMs: number, stepMs: number, n: number): number[] {
+ if (stepMs === 0) {
+ throw new RangeError("timedelta_range: freq must be non-zero");
+ }
+ const values: number[] = [];
+ for (let i = n - 1; i >= 0; i--) {
+ values.push(endMs - i * stepMs);
+ }
+ return values;
+}
diff --git a/src/index.ts b/src/index.ts
index 8f62a18d..572ed113 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -56,6 +56,8 @@ export { readJson, toJson } from "./io/index.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./io/index.ts";
export { jsonNormalize } from "./io/index.ts";
export type { JsonNormalizeOptions, JsonPath } from "./io/index.ts";
+export { toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex } from "./io/index.ts";
+export type { JsonDenormalizeOptions, JsonSplitOptions, JsonSplitResult } from "./io/index.ts";
export { pearsonCorr, dataFrameCorr, dataFrameCov } from "./stats/index.ts";
export type { CorrMethod, CorrOptions, CovOptions } from "./stats/index.ts";
export { Rolling } from "./window/index.ts";
@@ -182,8 +184,10 @@ export {
export { Period, PeriodIndex } from "./core/index.ts";
export type { PeriodFreq, PeriodIndexOptions } from "./core/index.ts";
-export { TimedeltaIndex } from "./core/index.ts";
+export { TimedeltaIndex, Timedelta } from "./core/index.ts";
export type { TimedeltaComponents, TimedeltaIndexOptions } from "./core/index.ts";
+export { timedelta_range } from "./core/index.ts";
+export type { TimedeltaFreq, TimedeltaRangeClosed, TimedeltaRangeOptions } from "./core/index.ts";
export {
Day,
Hour,
@@ -344,9 +348,7 @@ export type { Attrs } from "./core/index.ts";
export {
pipe,
seriesApply,
- seriesTransform,
dataFrameApplyMap,
- dataFrameTransform,
dataFrameTransformRows,
} from "./core/index.ts";
export {
@@ -537,7 +539,7 @@ export { toDatetime } from "./stats/index.ts";
export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./stats/index.ts";
// Branch-unique exports not yet in main
-export { toTimedelta, parseFrac, formatTimedelta, Timedelta } from "./stats/index.ts";
+export { toTimedelta, parseFrac, formatTimedelta } from "./stats/index.ts";
export type { TimedeltaUnit, TimedeltaErrors, ToTimedeltaOptions } from "./stats/index.ts";
export { dateRange, parseFreq, advanceDate, toDateInput } from "./stats/index.ts";
export type { DateRangeInclusive, ParsedFreq } from "./stats/index.ts";
@@ -553,3 +555,116 @@ export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.t
export { intervalRange } from "./stats/index.ts";
export type { ClosedType } from "./stats/index.ts";
export { nunique } from "./stats/index.ts";
+export { queryDataFrame, evalDataFrame } from "./stats/index.ts";
+export { strFindall, strFindallCount, strFindFirst, strFindallExpand } from "./stats/index.ts";
+export { cutBinsToFrame, cutBinCounts, binEdges } from "./stats/index.ts";
+export type { CutBinsToFrameOptions } from "./stats/index.ts";
+export { xsDataFrame, xsSeries } from "./stats/index.ts";
+export type { XsDataFrameOptions, XsSeriesOptions } from "./stats/index.ts";
+export {
+ swapLevelSeries,
+ swapLevelDataFrame,
+ reorderLevelsSeries,
+ reorderLevelsDataFrame,
+} from "./stats/swaplevel.ts";
+export type {
+ SwapLevelDataFrameOptions,
+ ReorderLevelsDataFrameOptions,
+} from "./stats/swaplevel.ts";
+export { truncateSeries, truncateDataFrame } from "./stats/truncate.ts";
+export type { TruncateOptions } from "./stats/truncate.ts";
+export { seriesBetween } from "./stats/index.ts";
+export type { BetweenInclusive, BetweenOptions } from "./stats/index.ts";
+export { seriesUpdate, dataFrameUpdate } from "./stats/index.ts";
+export type { UpdateOptions } from "./stats/index.ts";
+export { filterDataFrame, filterSeries } from "./stats/index.ts";
+export type { FilterLabelsOptions } from "./stats/index.ts";
+export { combineSeries, combineDataFrame } from "./stats/index.ts";
+export type { CombineDataFrameOptions } from "./stats/index.ts";
+export { keepTrue, keepFalse, filterBy } from "./stats/index.ts";
+export {
+ squeezeSeries,
+ squeezeDataFrame,
+ itemSeries,
+ boolSeries,
+ boolDataFrame,
+ firstValidIndex,
+ lastValidIndex,
+ dataFrameFirstValidIndex,
+ dataFrameLastValidIndex,
+} from "./stats/index.ts";
+export type { SqueezeResult } from "./stats/index.ts";
+export { autoCorr, corrWith } from "./stats/index.ts";
+export type { CorrWithOptions } from "./stats/index.ts";
+export {
+ renameSeriesIndex,
+ renameDataFrame,
+ addPrefixDataFrame,
+ addSuffixDataFrame,
+ addPrefixSeries,
+ addSuffixSeries,
+ setAxisSeries,
+ setAxisDataFrame,
+ seriesToFrame,
+} from "./stats/index.ts";
+export type { LabelMapper, RenameDataFrameOptions } from "./stats/index.ts";
+export { absSeries, absDataFrame, roundSeries, roundDataFrame } from "./stats/index.ts";
+export type { RoundDataFrameSpec } from "./stats/index.ts";
+export {
+ seriesDotSeries,
+ seriesDotDataFrame,
+ dataFrameDotSeries,
+ dataFrameDotDataFrame,
+} from "./stats/index.ts";
+export { seriesTransform, dataFrameTransform } from "./stats/index.ts";
+export type { TransformFunc, TransformFuncName, DataFrameTransformOptions } from "./stats/index.ts";
+export { seriesAt, seriesIat, dataFrameAt, dataFrameIat } from "./stats/index.ts";
+export {
+ sortValuesSeries,
+ sortIndexSeries,
+ sortValuesDataFrame,
+ sortIndexDataFrame,
+} from "./stats/index.ts";
+export type {
+ SortValuesSeriesOptions,
+ SortIndexSeriesOptions,
+ SortValuesDataFrameOptions,
+ SortIndexDataFrameOptions,
+} from "./stats/index.ts";
+export { join, joinAll, crossJoin } from "./merge/index.ts";
+export type { JoinOptions } from "./merge/index.ts";
+export {
+ inferObjectsSeries,
+ inferObjectsDataFrame,
+ convertDtypesSeries,
+ convertDtypesDataFrame,
+} from "./stats/index.ts";
+export type { InferObjectsOptions, ConvertDtypesOptions } from "./stats/index.ts";
+export { mergeAsof } from "./merge/index.ts";
+export type { MergeAsofOptions, AsofDirection } from "./merge/index.ts";
+export { mergeOrdered } from "./merge/index.ts";
+export type { MergeOrderedOptions, OrderedFillMethod } from "./merge/index.ts";
+export {
+ resampleSeries,
+ resampleDataFrame,
+ SeriesResampler,
+ DataFrameResampler,
+} from "./stats/index.ts";
+export type {
+ ResampleFreq,
+ ResampleLabel,
+ ResampleAggName,
+ ResampleAggFn,
+ ResampleOptions,
+} from "./stats/index.ts";
+export {
+ AssertionError,
+ assertSeriesEqual,
+ assertFrameEqual,
+ assertIndexEqual,
+} from "./testing/index.ts";
+export type {
+ AssertSeriesEqualOptions,
+ AssertFrameEqualOptions,
+ AssertIndexEqualOptions,
+} from "./testing/index.ts";
diff --git a/src/io/index.ts b/src/io/index.ts
index e868c4c8..afb4ac57 100644
--- a/src/io/index.ts
+++ b/src/io/index.ts
@@ -10,6 +10,17 @@ export { readJson, toJson } from "./json.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./json.ts";
export { jsonNormalize } from "./json_normalize.ts";
export type { JsonPath, JsonNormalizeOptions } from "./json_normalize.ts";
+export {
+ toJsonDenormalize,
+ toJsonRecords,
+ toJsonSplit,
+ toJsonIndex,
+} from "./to_json_normalize.ts";
+export type {
+ JsonDenormalizeOptions,
+ JsonSplitOptions,
+ JsonSplitResult,
+} from "./to_json_normalize.ts";
// readExcel / xlsxSheetNames use node:zlib and cannot be bundled for the
// browser. Import them directly from "tsb/io/read_excel" when running in
// Node / Bun.
diff --git a/src/io/to_json_normalize.ts b/src/io/to_json_normalize.ts
new file mode 100644
index 00000000..c18b62cb
--- /dev/null
+++ b/src/io/to_json_normalize.ts
@@ -0,0 +1,288 @@
+/**
+ * toJsonDenormalize — convert a flat DataFrame back to nested JSON records.
+ *
+ * This is the inverse operation of `jsonNormalize`: given a flat DataFrame
+ * whose column names use a separator (e.g. `"."`) to encode nesting depth,
+ * reconstruct an array of nested JSON objects.
+ *
+ * For example, a DataFrame with columns `["name", "address.city", "address.zip"]`
+ * produces records like `{ name: "Alice", address: { city: "NY", zip: "10001" } }`.
+ *
+ * Additional utilities:
+ *
+ * - `toJsonDenormalize` — main function; mirrors inverting `pandas.json_normalize`
+ * - `toJsonRecords` — simple orient="records" serialisation (no nesting)
+ * - `toJsonSplit` — orient="split" (columns + data + index)
+ * - `toJsonIndex` — orient="index" (keyed by index label)
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── JSON value types (no `any`) ──────────────────────────────────────────────
+
+/** A JSON primitive (leaf value). */
+type JsonPrimitive = string | number | boolean | null;
+
+/** Any valid JSON value. */
+type JsonValue = JsonPrimitive | JsonValue[] | JsonRecord;
+
+/** A JSON object (dict). */
+interface JsonRecord {
+ [key: string]: JsonValue;
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link toJsonDenormalize}. */
+export interface JsonDenormalizeOptions {
+ /**
+ * Separator used in column names to encode nesting depth.
+ * Must match the separator used when `jsonNormalize` was called.
+ * @default "."
+ */
+ readonly sep?: string;
+
+ /**
+ * When `true`, omit keys whose value is `null`.
+ * @default false
+ */
+ readonly dropNull?: boolean;
+}
+
+/** Options for {@link toJsonSplit}. */
+export interface JsonSplitOptions {
+ /**
+ * When `true`, include the DataFrame index in the output.
+ * @default true
+ */
+ readonly includeIndex?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a Scalar to a JSON-compatible value. */
+function scalarToJson(v: Scalar): JsonPrimitive {
+ if (v === null || v === undefined) {
+ return null;
+ }
+ if (typeof v === "number") {
+ if (Number.isNaN(v) || !Number.isFinite(v)) {
+ return null;
+ }
+ return v;
+ }
+ if (typeof v === "boolean") {
+ return v;
+ }
+ return String(v);
+}
+
+/**
+ * Set a value in a nested object using a dot-separated path.
+ * Intermediate objects are created as needed.
+ */
+function setNested(obj: JsonRecord, keys: readonly string[], value: JsonPrimitive): void {
+ let current: JsonRecord = obj;
+ for (let i = 0; i < keys.length - 1; i++) {
+ const k = keys[i] as string;
+ if (
+ !(k in current) ||
+ typeof current[k] !== "object" ||
+ current[k] === null ||
+ Array.isArray(current[k])
+ ) {
+ current[k] = {};
+ }
+ current = current[k] as JsonRecord;
+ }
+ const lastKey = keys.at(-1) as string;
+ current[lastKey] = value;
+}
+
+// ─── toJsonDenormalize ────────────────────────────────────────────────────────
+
+/**
+ * Convert a flat DataFrame to an array of nested JSON objects.
+ *
+ * Reverses the flattening performed by `jsonNormalize`: column names
+ * containing the separator (default `"."`) are split into nested keys.
+ *
+ * @param df - Input DataFrame.
+ * @param options - Configuration options.
+ * @returns An array of nested `JsonRecord` objects, one per row.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({
+ * name: ["Alice", "Bob"],
+ * "address.city": ["NY", "LA"],
+ * "address.zip": ["10001", "90001"],
+ * });
+ * toJsonDenormalize(df);
+ * // [
+ * // { name: "Alice", address: { city: "NY", zip: "10001" } },
+ * // { name: "Bob", address: { city: "LA", zip: "90001" } },
+ * // ]
+ * ```
+ */
+export function toJsonDenormalize(
+ df: DataFrame,
+ options: JsonDenormalizeOptions = {},
+): JsonRecord[] {
+ const sep = options.sep ?? ".";
+ const dropNull = options.dropNull ?? false;
+
+ const columns = df.columns.values;
+ // Pre-split all column names into key paths.
+ const paths: string[][] = columns.map((col) => col.split(sep));
+
+ const nRows = df.index.size;
+ const result: JsonRecord[] = [];
+
+ for (let r = 0; r < nRows; r++) {
+ const record: JsonRecord = {};
+
+ for (let c = 0; c < columns.length; c++) {
+ const colName = columns[c] as string;
+ const col = df.col(colName);
+ const raw = col.values[r] as Scalar;
+ const value = scalarToJson(raw);
+
+ if (dropNull && value === null) {
+ continue;
+ }
+
+ const keys = paths[c] as string[];
+ setNested(record, keys, value);
+ }
+
+ result.push(record);
+ }
+
+ return result;
+}
+
+// ─── toJsonRecords ────────────────────────────────────────────────────────────
+
+/**
+ * Serialize a DataFrame as an array of flat record objects.
+ *
+ * This is equivalent to `df.to_json(orient="records")` in pandas.
+ * Column names are NOT split on any separator — the output is always flat.
+ *
+ * @param df - Input DataFrame.
+ * @returns An array of `JsonRecord` objects, one per row.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * toJsonRecords(df);
+ * // [{ a: 1, b: "x" }, { a: 2, b: "y" }]
+ * ```
+ */
+export function toJsonRecords(df: DataFrame): JsonRecord[] {
+ const columns = df.columns.values;
+ const nRows = df.index.size;
+ const result: JsonRecord[] = [];
+
+ for (let r = 0; r < nRows; r++) {
+ const record: JsonRecord = {};
+ for (const col of columns) {
+ const series = df.col(col);
+ record[col] = scalarToJson(series.values[r] as Scalar);
+ }
+ result.push(record);
+ }
+
+ return result;
+}
+
+// ─── toJsonSplit ──────────────────────────────────────────────────────────────
+
+/**
+ * Serialize a DataFrame in "split" orientation.
+ *
+ * Mirrors `df.to_json(orient="split")` in pandas.
+ *
+ * @param df - Input DataFrame.
+ * @param options - Configuration options.
+ * @returns An object with `{ columns, index?, data }` keys.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * toJsonSplit(df);
+ * // {
+ * // columns: ["a", "b"],
+ * // index: [0, 1],
+ * // data: [[1, "x"], [2, "y"]],
+ * // }
+ * ```
+ */
+export interface JsonSplitResult {
+ columns: string[];
+ index?: JsonPrimitive[];
+ data: JsonPrimitive[][];
+}
+
+export function toJsonSplit(df: DataFrame, options: JsonSplitOptions = {}): JsonSplitResult {
+ const includeIndex = options.includeIndex ?? true;
+ const columns = df.columns.values;
+ const nRows = df.index.size;
+
+ const data: JsonPrimitive[][] = [];
+ for (let r = 0; r < nRows; r++) {
+ const row: JsonPrimitive[] = [];
+ for (const col of columns) {
+ const series = df.col(col);
+ row.push(scalarToJson(series.values[r] as Scalar));
+ }
+ data.push(row);
+ }
+
+ const result: JsonSplitResult = { columns: [...columns], data };
+ if (includeIndex) {
+ result.index = df.index.toArray().map(scalarToJson);
+ }
+ return result;
+}
+
+// ─── toJsonIndex ──────────────────────────────────────────────────────────────
+
+/**
+ * Serialize a DataFrame in "index" orientation.
+ *
+ * Mirrors `df.to_json(orient="index")` in pandas.
+ * Rows are keyed by their index label (converted to string).
+ *
+ * @param df - Input DataFrame.
+ * @returns An object mapping index label → flat record.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * toJsonIndex(df);
+ * // { "0": { a: 1, b: "x" }, "1": { a: 2, b: "y" } }
+ * ```
+ */
+export function toJsonIndex(df: DataFrame): JsonRecord {
+ const columns = df.columns.values;
+ const indexLabels = df.index.toArray();
+ const nRows = indexLabels.length;
+ const result: JsonRecord = {};
+
+ for (let r = 0; r < nRows; r++) {
+ const label = String(indexLabels[r]);
+ const record: JsonRecord = {};
+ for (const col of columns) {
+ const series = df.col(col);
+ record[col] = scalarToJson(series.values[r] as Scalar);
+ }
+ result[label] = record;
+ }
+
+ return result;
+}
diff --git a/src/merge/index.ts b/src/merge/index.ts
index 06f3f025..defec380 100644
--- a/src/merge/index.ts
+++ b/src/merge/index.ts
@@ -8,3 +8,9 @@ export { concat } from "./concat.ts";
export type { ConcatOptions } from "./concat.ts";
export { merge } from "./merge.ts";
export type { MergeOptions } from "./merge.ts";
+export { join, joinAll, crossJoin } from "./join.ts";
+export type { JoinOptions } from "./join.ts";
+export { mergeAsof } from "./merge_asof.ts";
+export type { MergeAsofOptions, AsofDirection } from "./merge_asof.ts";
+export { mergeOrdered } from "./merge_ordered.ts";
+export type { MergeOrderedOptions, OrderedFillMethod } from "./merge_ordered.ts";
diff --git a/src/merge/join.ts b/src/merge/join.ts
new file mode 100644
index 00000000..57f3856f
--- /dev/null
+++ b/src/merge/join.ts
@@ -0,0 +1,254 @@
+/**
+ * join — label-based join of two DataFrames.
+ *
+ * Mirrors `pandas.DataFrame.join`:
+ * - Joins `left` to `right` using **index labels** by default
+ * - `how`: `"left"` (default), `"right"`, `"inner"`, `"outer"`
+ * - `on`: use a column from `left` as the join key (matched against `right`'s index)
+ * - `lsuffix` / `rsuffix`: applied to overlapping column names
+ * - `sort`: sort result by join keys
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, join } from "tsb";
+ *
+ * const left = DataFrame.fromColumns(
+ * { A: [1, 2, 3] },
+ * { index: ["K0", "K1", "K2"] },
+ * );
+ * const right = DataFrame.fromColumns(
+ * { B: [4, 5, 6] },
+ * { index: ["K0", "K2", "K3"] },
+ * );
+ *
+ * join(left, right);
+ * // Left join (default):
+ * // A B
+ * // K0 1 4
+ * // K1 2 null
+ * // K2 3 5
+ *
+ * join(left, right, { how: "inner" });
+ * // A B
+ * // K0 1 4
+ * // K2 3 5
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+import { merge } from "./merge.ts";
+import type { MergeOptions } from "./merge.ts";
+
+// ─── public API types ─────────────────────────────────────────────────────────
+
+/** Options for {@link join}. */
+export interface JoinOptions {
+ /**
+ * Column in `left` to use as the join key (matched against `right`'s index).
+ * When omitted, `left`'s index is used as the join key.
+ */
+ readonly on?: string;
+ /**
+ * Join type:
+ * - `"left"` (default): all rows from `left`; non-matching `right` rows dropped
+ * - `"right"`: all rows from `right`; non-matching `left` rows dropped
+ * - `"inner"`: only rows with matching keys in **both** DataFrames
+ * - `"outer"`: all rows; missing values filled with `null`
+ */
+ readonly how?: "left" | "right" | "inner" | "outer";
+ /**
+ * Suffix appended to overlapping column names from `left`.
+ * Default: `""` (empty — raise if overlap and both suffixes are empty).
+ */
+ readonly lsuffix?: string;
+ /**
+ * Suffix appended to overlapping column names from `right`.
+ * Default: `""` (empty — raise if overlap and both suffixes are empty).
+ */
+ readonly rsuffix?: string;
+ /**
+ * Sort result rows by the join keys.
+ * Default: `false`.
+ */
+ readonly sort?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Find column names that exist in both DataFrames (excluding any `on` key
+ * that will become a join key and won't appear in both outputs).
+ */
+function overlappingCols(left: DataFrame, right: DataFrame, on: string | undefined): string[] {
+ const leftCols = new Set(left.columns.values);
+ const rightCols = right.columns.values;
+ const overlap: string[] = [];
+ for (const c of rightCols) {
+ if (leftCols.has(c) && c !== on) {
+ overlap.push(c);
+ }
+ }
+ return overlap;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Join two DataFrames on their index (or a column of the left DataFrame).
+ *
+ * This is a convenience wrapper around {@link merge} that defaults to a
+ * **left join on index labels**, matching pandas `DataFrame.join`.
+ *
+ * @param left - The primary DataFrame.
+ * @param right - The DataFrame to join to `left`.
+ * @param options - Join options.
+ * @returns A new DataFrame with rows aligned by the join keys.
+ *
+ * @example
+ * ```ts
+ * const result = join(employees, departments, { how: "left" });
+ * ```
+ */
+export function join(left: DataFrame, right: DataFrame, options?: JoinOptions): DataFrame {
+ const how = options?.how ?? "left";
+ const on = options?.on;
+ const lsuffix = options?.lsuffix ?? "";
+ const rsuffix = options?.rsuffix ?? "";
+ const sort = options?.sort ?? false;
+
+ // Validate suffixes when there are overlapping columns.
+ const overlap = overlappingCols(left, right, on);
+ if (overlap.length > 0 && lsuffix === "" && rsuffix === "") {
+ throw new Error(
+ `join: columns overlap but no suffix specified: ${overlap.join(", ")}. Pass lsuffix or rsuffix to disambiguate.`,
+ );
+ }
+
+ // Build suffixes tuple — if both are empty the overlap guard above already threw.
+ const suffixes: readonly [string, string] = [lsuffix, rsuffix];
+
+ const mergeOpts: MergeOptions = {
+ how,
+ suffixes,
+ sort,
+ ...(on !== undefined ? { left_on: on } : { left_index: true }),
+ right_index: true,
+ };
+
+ return merge(left, right, mergeOpts);
+}
+
+// ─── multi-join helper ────────────────────────────────────────────────────────
+
+/**
+ * Join multiple DataFrames together (left-to-right chain).
+ *
+ * Equivalent to `pandas.DataFrame.join([other1, other2, ...])` when called
+ * as `joinAll(base, [df1, df2], options)`.
+ *
+ * Each join in the chain uses the same `options`; index alignment propagates
+ * from left to right.
+ *
+ * @example
+ * ```ts
+ * const result = joinAll(base, [costs, names], { how: "left" });
+ * ```
+ */
+export function joinAll(
+ left: DataFrame,
+ others: readonly DataFrame[],
+ options?: Omit,
+): DataFrame {
+ let result = left;
+ for (const other of others) {
+ result = join(result, other, options);
+ }
+ return result;
+}
+
+// ─── cross join ───────────────────────────────────────────────────────────────
+
+/**
+ * Produce the Cartesian product of two DataFrames (cross join).
+ *
+ * Equivalent to `pandas.merge(left, right, how="cross")`. Every row in
+ * `left` is paired with every row in `right`. The result has
+ * `left.shape[0] * right.shape[0]` rows.
+ *
+ * Column name conflicts are resolved with `lsuffix` / `rsuffix`.
+ *
+ * @example
+ * ```ts
+ * const colors = DataFrame.fromColumns({ color: ["red", "blue"] });
+ * const sizes = DataFrame.fromColumns({ size: ["S", "M", "L"] });
+ * crossJoin(colors, sizes);
+ * // color size
+ * // red S
+ * // red M
+ * // red L
+ * // blue S
+ * // blue M
+ * // blue L
+ * ```
+ */
+export function crossJoin(
+ left: DataFrame,
+ right: DataFrame,
+ options?: { readonly lsuffix?: string; readonly rsuffix?: string },
+): DataFrame {
+ const lsuffix = options?.lsuffix ?? "";
+ const rsuffix = options?.rsuffix ?? "";
+
+ const overlap = overlappingCols(left, right, undefined);
+ if (overlap.length > 0 && lsuffix === "" && rsuffix === "") {
+ throw new Error(
+ `crossJoin: columns overlap but no suffix specified: ${overlap.join(", ")}. Pass lsuffix or rsuffix to disambiguate.`,
+ );
+ }
+
+ const nLeft = left.shape[0];
+ const nRight = right.shape[0];
+ const total = nLeft * nRight;
+
+ // Build result columns.
+ const leftColNames = left.columns.values;
+ const rightColNames = right.columns.values;
+
+ const rightColSet = new Set(rightColNames);
+ const leftColSet = new Set(leftColNames);
+ const resultCols: Record = {};
+
+ // Left columns: row i*nRight+j gets leftVals[i]
+ for (const col of leftColNames) {
+ const vals = left.col(col).values;
+ // Apply lsuffix to left cols that overlap with right cols
+ const outName = rightColSet.has(col) && lsuffix !== "" ? col + lsuffix : col;
+ const data: Scalar[] = new Array(total);
+ for (let i = 0; i < nLeft; i++) {
+ const v = vals[i] ?? null;
+ for (let j = 0; j < nRight; j++) {
+ data[i * nRight + j] = v;
+ }
+ }
+ resultCols[outName] = data;
+ }
+
+ // Right columns: row i*nRight+j gets rightVals[j]
+ for (const col of rightColNames) {
+ const vals = right.col(col).values;
+ // Apply rsuffix to right cols that overlap with left cols
+ const outName = leftColSet.has(col) ? col + rsuffix : col;
+ const data: Scalar[] = new Array(total);
+ for (let i = 0; i < nLeft; i++) {
+ for (let j = 0; j < nRight; j++) {
+ data[i * nRight + j] = vals[j] ?? null;
+ }
+ }
+ resultCols[outName] = data;
+ }
+
+ return DataFrame.fromColumns(resultCols);
+}
diff --git a/src/merge/merge.ts b/src/merge/merge.ts
index d2c52a63..1eb5f94a 100644
--- a/src/merge/merge.ts
+++ b/src/merge/merge.ts
@@ -32,8 +32,7 @@
* @module
*/
-import { DataFrame } from "../core/index.ts";
-import type { Index } from "../core/index.ts";
+import { DataFrame, Index } from "../core/index.ts";
import { RangeIndex } from "../core/index.ts";
import type { JoinHow, Label, Scalar } from "../types.ts";
@@ -208,12 +207,48 @@ function computePairs(
rightMap: Map,
how: JoinHow,
): RowPair[] {
+ if (how === "left") {
+ const pairs: RowPair[] = [];
+ for (const [key, leftIdxs] of leftMap) {
+ const rightIdxs = rightMap.get(key);
+ if (rightIdxs === undefined) {
+ for (const li of leftIdxs) {
+ pairs.push({ left: li, right: null });
+ }
+ continue;
+ }
+ for (const li of leftIdxs) {
+ for (const ri of rightIdxs) {
+ pairs.push({ left: li, right: ri });
+ }
+ }
+ }
+ return pairs;
+ }
+ if (how === "right") {
+ const pairs: RowPair[] = [];
+ for (const [key, rightIdxs] of rightMap) {
+ const leftIdxs = leftMap.get(key);
+ if (leftIdxs === undefined) {
+ for (const ri of rightIdxs) {
+ pairs.push({ left: null, right: ri });
+ }
+ continue;
+ }
+ for (const li of leftIdxs) {
+ for (const ri of rightIdxs) {
+ pairs.push({ left: li, right: ri });
+ }
+ }
+ }
+ return pairs;
+ }
const pairs: RowPair[] = [];
const matched = addMatchedPairs(leftMap, rightMap, pairs);
- if (how === "left" || how === "outer") {
+ if (how === "outer") {
addLeftUnmatched(leftMap, matched, pairs);
}
- if (how === "right" || how === "outer") {
+ if (how === "outer") {
addRightUnmatched(rightMap, matched, pairs);
}
return pairs;
@@ -420,12 +455,25 @@ function buildResultDataFrame(
right: DataFrame,
pairs: readonly RowPair[],
plan: readonly ColPlanEntry[],
+ keySpec: KeySpec,
): DataFrame {
const colData: Record = {};
for (const entry of plan) {
colData[entry.outputName] = buildResultColumn(left, right, pairs, entry);
}
- const index = new RangeIndex(pairs.length) as unknown as Index;
+ let index = new RangeIndex(pairs.length) as unknown as Index;
+ if (keySpec.leftUseIndex && keySpec.rightUseIndex) {
+ const labels: Label[] = pairs.map((p) => {
+ if (p.left !== null) {
+ return (left.index.values[p.left] ?? null) as Label;
+ }
+ if (p.right !== null) {
+ return (right.index.values[p.right] ?? null) as Label;
+ }
+ return null;
+ });
+ index = new Index(labels);
+ }
return DataFrame.fromColumns(colData as Record, { index });
}
@@ -499,7 +547,7 @@ export function merge(left: DataFrame, right: DataFrame, options?: MergeOptions)
const plan = buildColPlan(left, right, keySpec, suffixes);
- const result = buildResultDataFrame(left, right, pairs, plan);
+ const result = buildResultDataFrame(left, right, pairs, plan, keySpec);
if (sort && plan.length > 0) {
const sortCols = keySortCols(plan);
diff --git a/src/merge/merge_asof.ts b/src/merge/merge_asof.ts
new file mode 100644
index 00000000..f2e2c312
--- /dev/null
+++ b/src/merge/merge_asof.ts
@@ -0,0 +1,477 @@
+/**
+ * merge_asof — ordered (nearest-key) left-join of two DataFrames.
+ *
+ * Mirrors `pandas.merge_asof`:
+ * - Performs a left join on the **nearest** key rather than an exact match
+ * - Both DataFrames **must** be sorted by the key column ascending
+ * - `direction`: `"backward"` (default), `"forward"`, `"nearest"`
+ * - `by`: additional columns that must match exactly before the asof key lookup
+ * - `tolerance`: maximum numeric distance allowed between matched keys
+ * - `allow_exact_matches`: if `false`, only strictly less-than (backward) or
+ * strictly greater-than (forward) matches are allowed
+ * - `suffixes`: column-name suffixes applied to overlapping non-key columns
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, mergeAsof } from "tsb";
+ *
+ * const trades = DataFrame.fromColumns({
+ * time: [1, 5, 10],
+ * price: [100, 200, 300],
+ * });
+ * const quotes = DataFrame.fromColumns({
+ * time: [2, 6],
+ * bid: [98, 195],
+ * });
+ *
+ * mergeAsof(trades, quotes, { on: "time" });
+ * // time | price | bid
+ * // 1 | 100 | null ← no quote ≤ 1
+ * // 5 | 200 | 98 ← most recent quote ≤ 5 is at time=2
+ * // 10 | 300 | 195 ← most recent quote ≤ 10 is at time=6
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, RangeIndex } from "../core/index.ts";
+import type { Index } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public API types ─────────────────────────────────────────────────────────
+
+/** Direction for the asof key search. */
+export type AsofDirection = "backward" | "forward" | "nearest";
+
+/** Options for {@link mergeAsof}. */
+export interface MergeAsofOptions {
+ /**
+ * Column name present in **both** DataFrames to use as the ordered key.
+ * Mutually exclusive with `left_on` / `right_on` / `left_index` / `right_index`.
+ */
+ readonly on?: string;
+ /** Key column in the left DataFrame (use with `right_on`). */
+ readonly left_on?: string;
+ /** Key column in the right DataFrame (use with `left_on`). */
+ readonly right_on?: string;
+ /** Use left DataFrame's index as the key. */
+ readonly left_index?: boolean;
+ /** Use right DataFrame's index as the key. */
+ readonly right_index?: boolean;
+ /**
+ * Column(s) that must match **exactly** before the asof key lookup.
+ * Equivalent to `by` in both DataFrames.
+ */
+ readonly by?: string | readonly string[];
+ /** `by` override for the left DataFrame only. */
+ readonly left_by?: string | readonly string[];
+ /** `by` override for the right DataFrame only. */
+ readonly right_by?: string | readonly string[];
+ /**
+ * Suffixes applied to overlapping non-key column names.
+ * Default: `["_x", "_y"]`.
+ */
+ readonly suffixes?: readonly [string, string];
+ /**
+ * Maximum distance (numeric) allowed between matched keys.
+ * A matched row is nulled-out when `|leftKey - rightKey| > tolerance`.
+ * Default: `null` (no limit).
+ */
+ readonly tolerance?: number | null;
+ /**
+ * Whether an exact key match is allowed.
+ * - `true` (default): `leftKey === rightKey` is a valid match
+ * - `false`: only strictly less-than (backward) / greater-than (forward) matches
+ */
+ readonly allow_exact_matches?: boolean;
+ /**
+ * Direction for the nearest-key search:
+ * - `"backward"` (default): largest right key ≤ left key
+ * - `"forward"`: smallest right key ≥ left key
+ * - `"nearest"`: closest right key (ties broken backward)
+ */
+ readonly direction?: AsofDirection;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Extract numeric/string key for a given row from a DataFrame column. */
+function getKeyValue(df: DataFrame, colName: string | null, rowIdx: number): Scalar {
+ if (colName === null) {
+ return df.index.at(rowIdx) as Scalar;
+ }
+ return df.col(colName).iat(rowIdx);
+}
+
+/** Convert Label/Scalar to a comparable number for asof matching. */
+function toNum(v: Scalar): number {
+ if (v instanceof Date) {
+ return v.getTime();
+ }
+ if (typeof v === "number") {
+ return v;
+ }
+ if (typeof v === "bigint") {
+ return Number(v);
+ }
+ if (typeof v === "string") {
+ const n = Number(v);
+ return Number.isNaN(n) ? Number.NaN : n;
+ }
+ return Number.NaN;
+}
+
+/**
+ * Binary search helpers.
+ * Returns the insertion index for `target` in the sorted array `arr`.
+ */
+function lowerBound(arr: readonly number[], target: number): number {
+ let lo = 0;
+ let hi = arr.length;
+ while (lo < hi) {
+ const mid = (lo + hi) >>> 1;
+ if ((arr[mid] as number) < target) {
+ lo = mid + 1;
+ } else {
+ hi = mid;
+ }
+ }
+ return lo;
+}
+
+function upperBound(arr: readonly number[], target: number): number {
+ let lo = 0;
+ let hi = arr.length;
+ while (lo < hi) {
+ const mid = (lo + hi) >>> 1;
+ if ((arr[mid] as number) <= target) {
+ lo = mid + 1;
+ } else {
+ hi = mid;
+ }
+ }
+ return lo;
+}
+
+/**
+ * Find the right-side row index for a single left key value using the
+ * pre-sorted key array and direction.
+ */
+function findMatch(
+ leftKeyNum: number,
+ rightKeys: readonly number[],
+ direction: AsofDirection,
+ allowExact: boolean,
+): number {
+ if (rightKeys.length === 0) {
+ return -1;
+ }
+
+ if (direction === "backward") {
+ // largest right key that is <= leftKey (or < if !allowExact)
+ const bound = allowExact
+ ? upperBound(rightKeys, leftKeyNum)
+ : lowerBound(rightKeys, leftKeyNum);
+ return bound - 1; // -1 means no match
+ }
+
+ if (direction === "forward") {
+ // smallest right key that is >= leftKey (or > if !allowExact)
+ const bound = allowExact
+ ? lowerBound(rightKeys, leftKeyNum)
+ : upperBound(rightKeys, leftKeyNum);
+ return bound < rightKeys.length ? bound : -1;
+ }
+
+ // direction === "nearest": closest key; ties broken backward
+ const bwdBound = upperBound(rightKeys, leftKeyNum) - 1;
+ const fwdBound = lowerBound(rightKeys, leftKeyNum);
+
+ const hasBwd = bwdBound >= 0;
+ const hasFwd = fwdBound < rightKeys.length;
+
+ // If exact match exists and allow_exact_matches, it satisfies both directions
+ if (!(hasBwd || hasFwd)) {
+ return -1;
+ }
+ if (!hasBwd) {
+ return fwdBound;
+ }
+ if (!hasFwd) {
+ return bwdBound;
+ }
+
+ const bwdDist = leftKeyNum - (rightKeys[bwdBound] as number);
+ const fwdDist = (rightKeys[fwdBound] as number) - leftKeyNum;
+
+ // Exact match: both distances are 0
+ if (bwdDist === 0 && fwdDist === 0) {
+ return allowExact ? bwdBound : -1;
+ }
+ if (bwdDist === 0) {
+ return allowExact ? bwdBound : fwdBound;
+ }
+ if (fwdDist === 0) {
+ return allowExact ? fwdBound : bwdBound;
+ }
+
+ return fwdDist < bwdDist ? fwdBound : bwdBound;
+}
+
+// ─── resolve key spec ─────────────────────────────────────────────────────────
+
+interface KeySpec {
+ readonly leftKey: string | null; // null → use index
+ readonly rightKey: string | null;
+ readonly leftBy: readonly string[];
+ readonly rightBy: readonly string[];
+}
+
+function resolveKeySpec(left: DataFrame, right: DataFrame, opts: MergeAsofOptions): KeySpec {
+ let leftKey: string | null;
+ let rightKey: string | null;
+
+ if (opts.left_index) {
+ leftKey = null;
+ } else if (opts.left_on != null) {
+ leftKey = opts.left_on;
+ } else if (opts.on != null) {
+ leftKey = opts.on;
+ } else {
+ // infer: find common numeric column
+ const common = left.columns.values.filter((c) => right.columns.values.includes(c));
+ if (common.length === 0) {
+ throw new Error(
+ "merge_asof: no common columns found and no key specified via `on`, `left_on`/`right_on`, or `*_index`",
+ );
+ }
+ leftKey = common[0] as string;
+ }
+
+ if (opts.right_index) {
+ rightKey = null;
+ } else if (opts.right_on != null) {
+ rightKey = opts.right_on;
+ } else if (opts.on != null) {
+ rightKey = opts.on;
+ } else {
+ rightKey = leftKey; // inferred common column
+ }
+
+ // by columns
+ const toArray = (v: string | readonly string[] | undefined): readonly string[] => {
+ if (v === undefined) {
+ return [];
+ }
+ return typeof v === "string" ? [v] : v;
+ };
+
+ const globalBy = toArray(opts.by);
+ const leftBy = opts.left_by != null ? toArray(opts.left_by) : globalBy;
+ const rightBy = opts.right_by != null ? toArray(opts.right_by) : globalBy;
+
+ // Validate that left/right DataFrames actually have the by columns
+ for (const col of leftBy) {
+ if (!left.columns.values.includes(col)) {
+ throw new Error(`merge_asof: left_by column "${col}" not found in left DataFrame`);
+ }
+ }
+ for (const col of rightBy) {
+ if (!right.columns.values.includes(col)) {
+ throw new Error(`merge_asof: right_by column "${col}" not found in right DataFrame`);
+ }
+ }
+
+ // Validate key columns exist
+ if (leftKey !== null && !left.columns.values.includes(leftKey)) {
+ throw new Error(`merge_asof: left key column "${leftKey}" not found in left DataFrame`);
+ }
+ if (rightKey !== null && !right.columns.values.includes(rightKey)) {
+ throw new Error(`merge_asof: right key column "${rightKey}" not found in right DataFrame`);
+ }
+
+ return { leftKey, rightKey, leftBy, rightBy };
+}
+
+// ─── column plan ──────────────────────────────────────────────────────────────
+
+interface ColEntry {
+ readonly side: "left" | "right" | "key";
+ readonly srcCol: string | null; // null → index
+ readonly outCol: string;
+}
+
+function buildColPlan(
+ left: DataFrame,
+ right: DataFrame,
+ keySpec: KeySpec,
+ suffixes: readonly [string, string],
+): readonly ColEntry[] {
+ const plan: ColEntry[] = [];
+
+ // All left columns
+ for (const c of left.columns.values) {
+ plan.push({ side: "left", srcCol: c, outCol: c });
+ }
+
+ // Right columns: skip the right key column; apply suffixes for overlaps
+ const leftOutNames = new Set(left.columns.values);
+
+ for (const c of right.columns.values) {
+ if (c === keySpec.rightKey && keySpec.leftKey !== null) {
+ // Skip the right key column when it's a named column (to avoid duplication)
+ // unless left_on/right_on differ, in which case both are kept
+ if (keySpec.leftKey === keySpec.rightKey) {
+ continue;
+ }
+ }
+ // Check overlap with left output names (after accounting for suffixes)
+ let outCol = c;
+ if (leftOutNames.has(c)) {
+ // Apply suffix to both left and right
+ const leftIdx = plan.findIndex((e) => e.outCol === c && e.side === "left");
+ if (leftIdx >= 0) {
+ const existing = plan[leftIdx];
+ if (existing !== undefined) {
+ plan[leftIdx] = { side: existing.side, srcCol: existing.srcCol, outCol: c + suffixes[0] };
+ leftOutNames.delete(c);
+ leftOutNames.add(c + suffixes[0]);
+ }
+ }
+ outCol = c + suffixes[1];
+ }
+ plan.push({ side: "right", srcCol: c, outCol });
+ }
+
+ return plan;
+}
+
+// ─── public function ──────────────────────────────────────────────────────────
+
+/**
+ * Perform an ordered (nearest-key) left-join of two DataFrames.
+ *
+ * Mirrors `pandas.merge_asof`.
+ *
+ * Both DataFrames must be sorted ascending by their key column(s) before
+ * calling this function.
+ *
+ * @param left - Left DataFrame (must be sorted by key).
+ * @param right - Right DataFrame (must be sorted by key).
+ * @param options - Join specification (see {@link MergeAsofOptions}).
+ * @returns A new `DataFrame` with the same number of rows as `left`.
+ *
+ * @example
+ * ```ts
+ * // Match each trade to the most-recent quote (backward asof)
+ * mergeAsof(trades, quotes, { on: "time" });
+ *
+ * // Forward asof: find the first quote after each trade
+ * mergeAsof(trades, quotes, { on: "time", direction: "forward" });
+ *
+ * // Nearest: find the closest quote, with per-ticker grouping
+ * mergeAsof(trades, quotes, { on: "time", by: "ticker", direction: "nearest" });
+ * ```
+ */
+export function mergeAsof(
+ left: DataFrame,
+ right: DataFrame,
+ options?: MergeAsofOptions,
+): DataFrame {
+ const opts = options ?? {};
+ const suffixes: readonly [string, string] = opts.suffixes ?? ["_x", "_y"];
+ const direction: AsofDirection = opts.direction ?? "backward";
+ const allowExact: boolean = opts.allow_exact_matches ?? true;
+ const tolerance: number | null = opts.tolerance ?? null;
+
+ const keySpec = resolveKeySpec(left, right, opts);
+ const plan = buildColPlan(left, right, keySpec, suffixes);
+
+ const nLeft = left.shape[0];
+ const nRight = right.shape[0];
+
+ // Pre-extract right keys as numbers
+ const rightKeyNums: number[] = new Array(nRight) as number[];
+ for (let i = 0; i < nRight; i++) {
+ rightKeyNums[i] = toNum(getKeyValue(right, keySpec.rightKey, i));
+ }
+
+ // For each left row, find the matching right row
+ const rightMatchIdx: number[] = new Array(nLeft).fill(-1) as number[];
+
+ if (keySpec.leftBy.length === 0) {
+ // No by-groups: single sorted search over all of right
+ for (let li = 0; li < nLeft; li++) {
+ const lkNum = toNum(getKeyValue(left, keySpec.leftKey, li));
+ rightMatchIdx[li] = findMatch(lkNum, rightKeyNums, direction, allowExact);
+ }
+ } else {
+ // by-groups: group right rows by their by-key tuple, then search within each group
+ // Build a map: byKey → sorted list of {rightKeyNum, rightRowIdx}
+ type GroupEntry = { keyNum: number; rowIdx: number };
+ const groups = new Map();
+
+ for (let ri = 0; ri < nRight; ri++) {
+ const byVals: Scalar[] = keySpec.rightBy.map((col) => right.col(col).iat(ri));
+ const groupKey = JSON.stringify(byVals);
+ let group = groups.get(groupKey);
+ if (group === undefined) {
+ group = [];
+ groups.set(groupKey, group);
+ }
+ group.push({ keyNum: rightKeyNums[ri] as number, rowIdx: ri });
+ }
+
+ for (let li = 0; li < nLeft; li++) {
+ const byVals: Scalar[] = keySpec.leftBy.map((col) => left.col(col).iat(li));
+ const groupKey = JSON.stringify(byVals);
+ const group = groups.get(groupKey);
+ if (group === undefined || group.length === 0) {
+ rightMatchIdx[li] = -1;
+ continue;
+ }
+ const groupKeys = group.map((e) => e.keyNum);
+ const lkNum = toNum(getKeyValue(left, keySpec.leftKey, li));
+ const posInGroup = findMatch(lkNum, groupKeys, direction, allowExact);
+ rightMatchIdx[li] = posInGroup >= 0 ? (group[posInGroup]?.rowIdx ?? -1) : -1;
+ }
+ }
+
+ // Apply tolerance filter
+ if (tolerance !== null) {
+ for (let li = 0; li < nLeft; li++) {
+ const ri = rightMatchIdx[li] as number;
+ if (ri < 0) {
+ continue;
+ }
+ const lkNum = toNum(getKeyValue(left, keySpec.leftKey, li));
+ const rkNum = rightKeyNums[ri] as number;
+ if (Math.abs(lkNum - rkNum) > tolerance) {
+ rightMatchIdx[li] = -1;
+ }
+ }
+ }
+
+ // Build output columns
+ const colData: Record = {};
+ for (const entry of plan) {
+ const col: Scalar[] = new Array(nLeft) as Scalar[];
+ if (entry.side === "left") {
+ const series = left.col(entry.srcCol as string);
+ for (let li = 0; li < nLeft; li++) {
+ col[li] = series.iat(li);
+ }
+ } else {
+ // right side — use matched row or null
+ const series = right.col(entry.srcCol as string);
+ for (let li = 0; li < nLeft; li++) {
+ const ri = rightMatchIdx[li] as number;
+ col[li] = ri >= 0 ? series.iat(ri) : null;
+ }
+ }
+ colData[entry.outCol] = col;
+ }
+
+ const index = new RangeIndex(nLeft) as unknown as Index;
+ return DataFrame.fromColumns(colData as Record, { index });
+}
diff --git a/src/merge/merge_ordered.ts b/src/merge/merge_ordered.ts
new file mode 100644
index 00000000..b932abd2
--- /dev/null
+++ b/src/merge/merge_ordered.ts
@@ -0,0 +1,647 @@
+/**
+ * merge_ordered — ordered merge of two DataFrames with optional fill.
+ *
+ * Mirrors `pandas.merge_ordered`:
+ * - Performs an ordered (sorted) merge — default `how: "outer"`
+ * - Result is sorted ascending by the merge key column(s)
+ * - `fill_method`: optional `"ffill"` to forward-fill NaN/null gaps in
+ * non-key columns after merging
+ * - `left_by` / `right_by`: group columns — the merge is applied
+ * independently within each group combination and results are
+ * concatenated in group order
+ * - `suffixes`: applied to overlapping non-key column names (default
+ * `["_x", "_y"]`)
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, mergeOrdered } from "tsb";
+ *
+ * const left = DataFrame.fromColumns({
+ * date: [1, 3, 5],
+ * price: [10, 30, 50],
+ * });
+ * const right = DataFrame.fromColumns({
+ * date: [2, 3, 6],
+ * volume: [200, 300, 600],
+ * });
+ *
+ * mergeOrdered(left, right, { on: "date" });
+ * // date | price | volume
+ * // 1 | 10 | null
+ * // 2 | null | 200
+ * // 3 | 30 | 300
+ * // 5 | 50 | null
+ * // 6 | null | 600
+ *
+ * mergeOrdered(left, right, { on: "date", fill_method: "ffill" });
+ * // date | price | volume
+ * // 1 | 10 | null ← nothing before to fill
+ * // 2 | 10 | 200 ← price carried forward from row 0
+ * // 3 | 30 | 300
+ * // 5 | 50 | 300 ← volume carried forward from row 3
+ * // 6 | 50 | 600
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, RangeIndex } from "../core/index.ts";
+import type { Index } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public API types ─────────────────────────────────────────────────────────
+
+/** Fill method applied to non-key columns after the ordered merge. */
+export type OrderedFillMethod = "ffill";
+
+/** Options for {@link mergeOrdered}. */
+export interface MergeOrderedOptions {
+ /**
+ * Column name present in **both** DataFrames to use as the ordered key.
+ * Mutually exclusive with `left_on` / `right_on`.
+ */
+ readonly on?: string | readonly string[];
+ /** Key column(s) in the left DataFrame (use with `right_on`). */
+ readonly left_on?: string | readonly string[];
+ /** Key column(s) in the right DataFrame (use with `left_on`). */
+ readonly right_on?: string | readonly string[];
+ /**
+ * Column(s) in the left DataFrame to group by before merging.
+ * The merge is applied independently per group and results concatenated.
+ */
+ readonly left_by?: string | readonly string[];
+ /**
+ * Column(s) in the right DataFrame to group by before merging.
+ * Must have the same number of columns as `left_by` when both are provided.
+ */
+ readonly right_by?: string | readonly string[];
+ /**
+ * How to join the two DataFrames.
+ * Default: `"outer"`.
+ */
+ readonly how?: "inner" | "outer" | "left" | "right";
+ /**
+ * Fill method to apply to non-key columns after merging.
+ * - `"ffill"`: forward-fill null/undefined values within each column
+ * - `null` / omitted: no filling (default)
+ */
+ readonly fill_method?: OrderedFillMethod | null;
+ /**
+ * Suffixes applied to overlapping non-key column names.
+ * Default: `["_x", "_y"]`.
+ */
+ readonly suffixes?: readonly [string, string];
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Normalise a string | readonly string[] | undefined into string[]. */
+function toCols(v: string | readonly string[] | undefined): string[] {
+ if (v === undefined) {
+ return [];
+ }
+ return typeof v === "string" ? [v] : [...v];
+}
+
+/** Read scalar from a DataFrame column. */
+function getVal(df: DataFrame, col: string, row: number): Scalar {
+ return df.col(col).at(row) as Scalar;
+}
+
+/** Build a composite key string from multiple columns for one row. */
+function makeGroupKey(df: DataFrame, cols: readonly string[], row: number): string {
+ return cols.map((c) => String(getVal(df, c, row))).join("\x00");
+}
+
+/** Compare two Scalar values for sort ordering (ascending). */
+function compareScalar(a: Scalar, b: Scalar): number {
+ if (a === null || a === undefined) {
+ return b === null || b === undefined ? 0 : 1;
+ }
+ if (b === null || b === undefined) {
+ return -1;
+ }
+ if (typeof a === "number" && typeof b === "number") {
+ return a - b;
+ }
+ if (typeof a === "string" && typeof b === "string") {
+ return a < b ? -1 : a > b ? 1 : 0;
+ }
+ return String(a) < String(b) ? -1 : String(a) > String(b) ? 1 : 0;
+}
+
+/** Apply forward-fill to an array of scalars (in-place mutating). */
+function ffillArray(arr: Scalar[]): void {
+ let last: Scalar = null;
+ for (let i = 0; i < arr.length; i++) {
+ const v = arr[i];
+ if (v === null || v === undefined) {
+ arr[i] = last;
+ } else {
+ last = v;
+ }
+ }
+}
+
+// ─── core ordered merge ───────────────────────────────────────────────────────
+
+/**
+ * Column plan entry: which side provides a column, what key it reads, what
+ * name it gets in the output.
+ */
+interface ColEntry {
+ readonly outputName: string;
+ readonly side: "left" | "right" | "coalesce";
+ readonly leftCol: string | null;
+ readonly rightCol: string | null;
+}
+
+/** Resolve key column names (left and right may differ). */
+function resolveKeys(
+ left: DataFrame,
+ right: DataFrame,
+ opts: MergeOrderedOptions,
+): { leftKeys: string[]; rightKeys: string[] } {
+ const onCols = toCols(opts.on);
+ if (onCols.length > 0) {
+ return { leftKeys: onCols, rightKeys: onCols };
+ }
+ const leftKeys = toCols(opts.left_on);
+ const rightKeys = toCols(opts.right_on);
+ if (leftKeys.length > 0 && rightKeys.length > 0) {
+ return { leftKeys, rightKeys };
+ }
+ // Auto-detect shared columns
+ const leftCols = new Set(left.columns.values as string[]);
+ const shared = (right.columns.values as string[]).filter((c) => leftCols.has(c));
+ if (shared.length === 0) {
+ throw new Error("mergeOrdered: no common columns and no on/left_on/right_on specified");
+ }
+ return { leftKeys: shared, rightKeys: shared };
+}
+
+/** Build the output column plan for an ordered merge. */
+function buildPlan(
+ left: DataFrame,
+ right: DataFrame,
+ leftKeys: readonly string[],
+ rightKeys: readonly string[],
+ leftBy: readonly string[],
+ rightBy: readonly string[],
+ suffixes: readonly [string, string],
+): ColEntry[] {
+ const leftKeysSet = new Set(leftKeys);
+ const rightKeysSet = new Set(rightKeys);
+ const leftBySet = new Set(leftBy);
+ const rightBySet = new Set(rightBy);
+
+ const plan: ColEntry[] = [];
+
+ // 1. Coalesced key columns (using left key names in output)
+ for (let i = 0; i < leftKeys.length; i++) {
+ const lk = leftKeys[i]!;
+ const rk = rightKeys[i]!;
+ plan.push({ outputName: lk, side: "coalesce", leftCol: lk, rightCol: rk });
+ }
+
+ // 2. By columns
+ for (let i = 0; i < leftBy.length; i++) {
+ const lc = leftBy[i]!;
+ const rc = rightBy[i];
+ if (rc === lc) {
+ plan.push({ outputName: lc, side: "coalesce", leftCol: lc, rightCol: rc });
+ } else {
+ plan.push({ outputName: lc, side: "left", leftCol: lc, rightCol: null });
+ }
+ }
+
+ // 3. Right-by columns (from right only) — only if different names from left_by
+ for (let i = 0; i < rightBy.length; i++) {
+ const rc = rightBy[i]!;
+ const lc = leftBy[i];
+ if (rc !== lc) {
+ plan.push({ outputName: rc, side: "right", leftCol: null, rightCol: rc });
+ }
+ }
+
+ // 4. Non-key, non-by left columns
+ const leftNonKey = (left.columns.values as string[]).filter(
+ (c) => !(leftKeysSet.has(c) || leftBySet.has(c)),
+ );
+ // 5. Non-key, non-by right columns
+ const rightNonKey = (right.columns.values as string[]).filter(
+ (c) => !(rightKeysSet.has(c) || rightBySet.has(c)),
+ );
+
+ const rightNonKeySet = new Set(rightNonKey);
+
+ for (const lc of leftNonKey) {
+ if (rightNonKeySet.has(lc)) {
+ // Overlap: emit both with suffixes
+ plan.push({ outputName: lc + suffixes[0], side: "left", leftCol: lc, rightCol: null });
+ } else {
+ plan.push({ outputName: lc, side: "left", leftCol: lc, rightCol: null });
+ }
+ }
+ for (const rc of rightNonKey) {
+ if (leftNonKey.includes(rc)) {
+ plan.push({ outputName: rc + suffixes[1], side: "right", leftCol: null, rightCol: rc });
+ } else {
+ plan.push({ outputName: rc, side: "right", leftCol: null, rightCol: rc });
+ }
+ }
+
+ return plan;
+}
+
+/**
+ * Merge a subset of rows from left and right into an ordered result.
+ * Both subsets are already sorted by the key columns.
+ */
+function mergeSubset(
+ left: DataFrame,
+ right: DataFrame,
+ leftRows: readonly number[],
+ rightRows: readonly number[],
+ leftKeys: readonly string[],
+ rightKeys: readonly string[],
+ plan: readonly ColEntry[],
+ how: "inner" | "outer" | "left" | "right",
+): Record {
+ // Build merged key + row-pair list via a sorted merge of the two row-sets
+ type RowPair = { leftRow: number | null; rightRow: number | null; keyVal: Scalar[] };
+
+ const pairs: RowPair[] = [];
+
+ let li = 0;
+ let ri = 0;
+
+ while (li < leftRows.length && ri < rightRows.length) {
+ const lr = leftRows[li]!;
+ const rr = rightRows[ri]!;
+
+ // Build composite key arrays
+ const lKeyVals = leftKeys.map((k) => getVal(left, k, lr));
+ const rKeyVals = rightKeys.map((k) => getVal(right, k, rr));
+
+ // Compare first key dimension
+ const cmp = compareScalar(lKeyVals[0] ?? null, rKeyVals[0] ?? null);
+
+ if (cmp === 0) {
+ // exact match — may need to handle many-to-many
+ // Find all left rows with same key
+ let li2 = li + 1;
+ while (li2 < leftRows.length) {
+ const nextLr = leftRows[li2]!;
+ const nextKey = leftKeys.map((k) => getVal(left, k, nextLr));
+ if (compareScalar(nextKey[0] ?? null, lKeyVals[0] ?? null) !== 0) {
+ break;
+ }
+ li2++;
+ }
+ let ri2 = ri + 1;
+ while (ri2 < rightRows.length) {
+ const nextRr = rightRows[ri2]!;
+ const nextKey = rightKeys.map((k) => getVal(right, k, nextRr));
+ if (compareScalar(nextKey[0] ?? null, rKeyVals[0] ?? null) !== 0) {
+ break;
+ }
+ ri2++;
+ }
+ // Cartesian product of matching rows
+ for (let a = li; a < li2; a++) {
+ for (let b = ri; b < ri2; b++) {
+ pairs.push({
+ leftRow: leftRows[a] ?? null,
+ rightRow: rightRows[b] ?? null,
+ keyVal: lKeyVals,
+ });
+ }
+ }
+ li = li2;
+ ri = ri2;
+ } else if (cmp < 0) {
+ // left key is smaller
+ if (how === "outer" || how === "left") {
+ pairs.push({ leftRow: lr, rightRow: null, keyVal: lKeyVals });
+ }
+ li++;
+ } else {
+ // right key is smaller
+ if (how === "outer" || how === "right") {
+ pairs.push({ leftRow: null, rightRow: rr, keyVal: rKeyVals });
+ }
+ ri++;
+ }
+ }
+
+ // Remaining left rows
+ if (how === "outer" || how === "left") {
+ while (li < leftRows.length) {
+ const lr = leftRows[li]!;
+ const lKeyVals = leftKeys.map((k) => getVal(left, k, lr));
+ pairs.push({ leftRow: lr, rightRow: null, keyVal: lKeyVals });
+ li++;
+ }
+ }
+
+ // Remaining right rows
+ if (how === "outer" || how === "right") {
+ while (ri < rightRows.length) {
+ const rr = rightRows[ri]!;
+ const rKeyVals = rightKeys.map((k) => getVal(right, k, rr));
+ pairs.push({ leftRow: null, rightRow: rr, keyVal: rKeyVals });
+ ri++;
+ }
+ }
+
+ // Build output columns
+ const outCols: Record = {};
+ for (const e of plan) {
+ outCols[e.outputName] = [];
+ }
+
+ for (const pair of pairs) {
+ for (const e of plan) {
+ let val: Scalar = null;
+ if (e.side === "coalesce") {
+ if (pair.leftRow !== null && e.leftCol !== null) {
+ val = getVal(left, e.leftCol, pair.leftRow);
+ } else if (pair.rightRow !== null && e.rightCol !== null) {
+ val = getVal(right, e.rightCol, pair.rightRow);
+ }
+ } else if (e.side === "left") {
+ if (pair.leftRow !== null && e.leftCol !== null) {
+ val = getVal(left, e.leftCol, pair.leftRow);
+ }
+ } else {
+ // right
+ if (pair.rightRow !== null && e.rightCol !== null) {
+ val = getVal(right, e.rightCol, pair.rightRow);
+ }
+ }
+ (outCols[e.outputName] as Scalar[]).push(val);
+ }
+ }
+
+ return outCols;
+}
+
+/** Concatenate record-of-arrays column-wise by appending rows. */
+function appendRows(
+ dest: Record,
+ src: Record,
+ keys: readonly string[],
+): void {
+ for (const k of keys) {
+ const d = dest[k];
+ const s = src[k];
+ if (d !== undefined && s !== undefined) {
+ for (const v of s) {
+ d.push(v);
+ }
+ }
+ }
+}
+
+// ─── public function ──────────────────────────────────────────────────────────
+
+/**
+ * Perform an ordered merge of two DataFrames, optionally filling gaps.
+ *
+ * Mirrors `pandas.merge_ordered`.
+ *
+ * @param left - Left DataFrame (must be sorted by the key column).
+ * @param right - Right DataFrame (must be sorted by the key column).
+ * @param options - Merge specification (see {@link MergeOrderedOptions}).
+ * @returns A new `DataFrame` with rows sorted ascending by the key column(s).
+ *
+ * @throws {Error} When no join keys can be determined.
+ *
+ * @example
+ * ```ts
+ * const left = DataFrame.fromColumns({
+ * k: [1, 3, 5],
+ * a: [10, 30, 50],
+ * });
+ * const right = DataFrame.fromColumns({
+ * k: [2, 3, 6],
+ * b: [20, 30, 60],
+ * });
+ *
+ * mergeOrdered(left, right, { on: "k" });
+ * // k | a | b
+ * // 1 | 10 | null
+ * // 2 | null | 20
+ * // 3 | 30 | 30
+ * // 5 | 50 | null
+ * // 6 | null | 60
+ *
+ * mergeOrdered(left, right, { on: "k", fill_method: "ffill" });
+ * // k | a | b
+ * // 1 | 10 | null
+ * // 2 | 10 | 20
+ * // 3 | 30 | 30
+ * // 5 | 50 | 30
+ * // 6 | 50 | 60
+ * ```
+ */
+export function mergeOrdered(
+ left: DataFrame,
+ right: DataFrame,
+ options?: MergeOrderedOptions,
+): DataFrame {
+ const opts = options ?? {};
+ const how = opts.how ?? "outer";
+ const suffixes: readonly [string, string] = opts.suffixes ?? ["_x", "_y"];
+ const fillMethod = opts.fill_method ?? null;
+
+ const { leftKeys, rightKeys } = resolveKeys(left, right, opts);
+
+ const leftBy = toCols(opts.left_by);
+ const rightBy = toCols(opts.right_by);
+
+ // Validate by columns
+ if (leftBy.length > 0 && rightBy.length > 0 && leftBy.length !== rightBy.length) {
+ throw new Error("mergeOrdered: left_by and right_by must have the same number of columns");
+ }
+ const hasBy = leftBy.length > 0 || rightBy.length > 0;
+ const effectiveLeftBy = leftBy.length > 0 ? leftBy : rightBy;
+ const effectiveRightBy = rightBy.length > 0 ? rightBy : leftBy;
+
+ const plan = buildPlan(
+ left,
+ right,
+ leftKeys,
+ rightKeys,
+ effectiveLeftBy,
+ effectiveRightBy,
+ suffixes,
+ );
+ const outputColNames = plan.map((e) => e.outputName);
+
+ // ── No-group case ─────────────────────────────────────────────────────────
+
+ if (!hasBy) {
+ // Sort both DFs by key then merge
+ const leftSorted = sortByKeys(left, leftKeys);
+ const rightSorted = sortByKeys(right, rightKeys);
+
+ const leftAllRows = Array.from({ length: leftSorted.shape[0] }, (_, i) => i);
+ const rightAllRows = Array.from({ length: rightSorted.shape[0] }, (_, i) => i);
+
+ const colData = mergeSubset(
+ leftSorted,
+ rightSorted,
+ leftAllRows,
+ rightAllRows,
+ leftKeys,
+ rightKeys,
+ plan,
+ how,
+ );
+
+ if (fillMethod === "ffill") {
+ for (const name of outputColNames) {
+ // Don't fill key columns
+ if (!leftKeys.includes(name)) {
+ const arr = colData[name];
+ if (arr !== undefined) {
+ ffillArray(arr);
+ }
+ }
+ }
+ }
+
+ return buildDataFrame(colData, outputColNames);
+ }
+
+ // ── Group-by case ─────────────────────────────────────────────────────────
+
+ // Group left and right rows by their by-column keys
+ const leftGroups = groupRows(left, effectiveLeftBy);
+ const rightGroups = groupRows(right, effectiveRightBy);
+
+ // Collect all group keys from both sides
+ const allGroupKeys = new Set([...leftGroups.keys(), ...rightGroups.keys()]);
+
+ // Initialise output column arrays
+ const colData: Record = {};
+ for (const name of outputColNames) {
+ colData[name] = [];
+ }
+
+ for (const gk of allGroupKeys) {
+ const lRows = leftGroups.get(gk) ?? [];
+ const rRows = rightGroups.get(gk) ?? [];
+
+ // Sort row indices by key columns within each group
+ const leftSortedRows = sortRowIndices(left, lRows, leftKeys);
+ const rightSortedRows = sortRowIndices(right, rRows, rightKeys);
+
+ const groupCols = mergeSubset(
+ left,
+ right,
+ leftSortedRows,
+ rightSortedRows,
+ leftKeys,
+ rightKeys,
+ plan,
+ how,
+ );
+
+ if (fillMethod === "ffill") {
+ for (const name of outputColNames) {
+ if (
+ !(
+ leftKeys.includes(name) ||
+ effectiveLeftBy.includes(name) ||
+ effectiveRightBy.includes(name)
+ )
+ ) {
+ const arr = groupCols[name];
+ if (arr !== undefined) {
+ ffillArray(arr);
+ }
+ }
+ }
+ }
+
+ appendRows(colData, groupCols, outputColNames);
+ }
+
+ return buildDataFrame(colData, outputColNames);
+}
+
+// ─── sorting helpers ──────────────────────────────────────────────────────────
+
+/** Sort a DataFrame by key columns (returns new DataFrame). */
+function sortByKeys(df: DataFrame, keys: readonly string[]): DataFrame {
+ if (keys.length === 0) {
+ return df;
+ }
+ const n = df.shape[0];
+ const rows = Array.from({ length: n }, (_, i) => i);
+ rows.sort((a, b) => {
+ for (const k of keys) {
+ const va = getVal(df, k, a);
+ const vb = getVal(df, k, b);
+ const c = compareScalar(va, vb);
+ if (c !== 0) {
+ return c;
+ }
+ }
+ return 0;
+ });
+
+ const colData: Record = {};
+ for (const c of df.columns.values as string[]) {
+ colData[c] = rows.map((r) => getVal(df, c, r));
+ }
+
+ const idx = new RangeIndex(n) as unknown as Index;
+ return DataFrame.fromColumns(colData as Record, { index: idx });
+}
+
+/** Sort an array of row-indices by key columns. */
+function sortRowIndices(df: DataFrame, rows: readonly number[], keys: readonly string[]): number[] {
+ const sorted = [...rows];
+ sorted.sort((a, b) => {
+ for (const k of keys) {
+ const va = getVal(df, k, a);
+ const vb = getVal(df, k, b);
+ const c = compareScalar(va, vb);
+ if (c !== 0) {
+ return c;
+ }
+ }
+ return 0;
+ });
+ return sorted;
+}
+
+/** Group row indices by the composite value of by-columns. */
+function groupRows(df: DataFrame, byCols: readonly string[]): Map {
+ const map = new Map();
+ for (let i = 0; i < df.shape[0]; i++) {
+ const k = makeGroupKey(df, byCols, i);
+ let arr = map.get(k);
+ if (arr === undefined) {
+ arr = [];
+ map.set(k, arr);
+ }
+ arr.push(i);
+ }
+ return map;
+}
+
+/** Build a DataFrame from a column-data record and an ordered list of column names. */
+function buildDataFrame(colData: Record, colNames: readonly string[]): DataFrame {
+ const n = colNames.length > 0 ? (colData[colNames[0]!] as Scalar[]).length : 0;
+ const idx = new RangeIndex(n) as unknown as Index;
+ const data: Record = {};
+ for (const c of colNames) {
+ data[c] = colData[c] as Scalar[];
+ }
+ return DataFrame.fromColumns(data, { index: idx });
+}
diff --git a/src/reshape/explode.ts b/src/reshape/explode.ts
index 0b19c2e5..6825ec17 100644
--- a/src/reshape/explode.ts
+++ b/src/reshape/explode.ts
@@ -192,7 +192,9 @@ export function explodeDataFrame(
outLabels.push(label);
for (const col of allCols) {
const colArr = outData[col];
- if (colArr === undefined) continue;
+ if (colArr === undefined) {
+ continue;
+ }
if (explodeCols.includes(col)) {
const val = df.col(col).iat(i);
const cells = expandCell(val);
diff --git a/src/stats/at_iat.ts b/src/stats/at_iat.ts
new file mode 100644
index 00000000..cb8a4162
--- /dev/null
+++ b/src/stats/at_iat.ts
@@ -0,0 +1,136 @@
+/**
+ * at_iat — fast scalar access for Series and DataFrame.
+ *
+ * Mirrors the pandas `.at` and `.iat` indexers:
+ *
+ * - `seriesAt(s, label)` — value by label (`Series.at[label]`)
+ * - `seriesIat(s, i)` — value by integer position (`Series.iat[i]`)
+ * - `dataFrameAt(df, rowLabel, colLabel)` — scalar by row-label × col-label (`DataFrame.at[row, col]`)
+ * - `dataFrameIat(df, rowInt, colInt)` — scalar by integer position (`DataFrame.iat[row, col]`)
+ *
+ * All four are **read-only** accessors optimised for single-cell lookups.
+ * They are equivalent to the equivalent `.loc`/`.iloc` calls but make the
+ * intent clearer and avoid the overhead of allocating a new Series or
+ * DataFrame for a single value.
+ *
+ * @example
+ * ```ts
+ * import { Series, DataFrame, seriesAt, seriesIat, dataFrameAt, dataFrameIat } from "tsb";
+ *
+ * const s = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+ * seriesAt(s, "b"); // 20
+ * seriesIat(s, 2); // 30
+ *
+ * const df = DataFrame.fromColumns(
+ * { x: [1, 2], y: [3, 4] },
+ * { index: ["r0", "r1"] },
+ * );
+ * dataFrameAt(df, "r1", "x"); // 2
+ * dataFrameIat(df, 0, 1); // 3
+ * ```
+ *
+ * @module
+ */
+
+import type { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── Series accessors ─────────────────────────────────────────────────────────
+
+/**
+ * Return the value of `s` at `label` (label-based fast scalar access).
+ *
+ * Mirrors `pandas.Series.at[label]`.
+ *
+ * @throws {RangeError} when `label` is not in the index.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+ * seriesAt(s, "b"); // 20
+ * ```
+ */
+export function seriesAt(s: Series, label: Label): Scalar {
+ try {
+ return s.at(label);
+ } catch (error) {
+ if (error instanceof Error && error.message.startsWith("KeyError:")) {
+ throw new RangeError(error.message);
+ }
+ throw error;
+ }
+}
+
+/**
+ * Return the value of `s` at integer position `i` (positional fast scalar access).
+ *
+ * Mirrors `pandas.Series.iat[i]`. Supports negative indexing: `-1` is the
+ * last element.
+ *
+ * @throws {RangeError} when `i` is out of bounds.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30] });
+ * seriesIat(s, 2); // 30
+ * seriesIat(s, -1); // 30
+ * ```
+ */
+export function seriesIat(s: Series, i: number): Scalar {
+ return s.iat(i);
+}
+
+// ─── DataFrame accessors ──────────────────────────────────────────────────────
+
+/**
+ * Return the scalar value of `df` at row `rowLabel` / column `colLabel`
+ * (label-based fast scalar access).
+ *
+ * Mirrors `pandas.DataFrame.at[rowLabel, colLabel]`.
+ *
+ * @throws {RangeError} when `rowLabel` is not in the row index.
+ * @throws {Error} when `colLabel` is not a column name.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ x: [1, 2], y: [3, 4] }, { index: ["r0", "r1"] });
+ * dataFrameAt(df, "r1", "x"); // 2
+ * ```
+ */
+export function dataFrameAt(df: DataFrame, rowLabel: Label, colLabel: string): Scalar {
+ try {
+ return df.col(colLabel).at(rowLabel);
+ } catch (error) {
+ if (error instanceof Error && error.message.startsWith("KeyError:")) {
+ throw new RangeError(error.message);
+ }
+ throw error;
+ }
+}
+
+/**
+ * Return the scalar value of `df` at integer row position `rowInt` / column
+ * position `colInt` (positional fast scalar access).
+ *
+ * Mirrors `pandas.DataFrame.iat[rowInt, colInt]`. Both `rowInt` and `colInt`
+ * support negative indexing.
+ *
+ * @throws {RangeError} when either position is out of bounds.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ x: [1, 2], y: [3, 4] }, { index: ["r0", "r1"] });
+ * dataFrameIat(df, 0, 1); // 3 (row 0, col 1 = "y")
+ * ```
+ */
+export function dataFrameIat(df: DataFrame, rowInt: number, colInt: number): Scalar {
+ const nCols = df.columns.size;
+ const normCol = colInt < 0 ? nCols + colInt : colInt;
+ if (normCol < 0 || normCol >= nCols) {
+ throw new RangeError(
+ `Column index ${colInt} out of bounds for DataFrame with ${nCols} columns`,
+ );
+ }
+ const colName = df.columns.at(normCol);
+ return df.col(colName).iat(rowInt);
+}
diff --git a/src/stats/between.ts b/src/stats/between.ts
new file mode 100644
index 00000000..964a8a5d
--- /dev/null
+++ b/src/stats/between.ts
@@ -0,0 +1,127 @@
+/**
+ * between — element-wise range check for Series values.
+ *
+ * Mirrors `pandas.Series.between(left, right, inclusive='both')`.
+ *
+ * Returns a boolean Series indicating whether each element falls within the
+ * interval `[left, right]` (by default inclusive on both ends).
+ *
+ * - {@link seriesBetween} — element-wise range check
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesBetween } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesBetween(s, 2, 4).values; // [false, true, true, true, false]
+ *
+ * seriesBetween(s, 2, 4, { inclusive: "left" }).values;
+ * // [false, true, true, false, false]
+ * ```
+ *
+ * @module
+ */
+
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Controls which endpoints of the interval are included.
+ * - `"both"` (default): left ≤ x ≤ right
+ * - `"left"`: left ≤ x < right
+ * - `"right"`: left < x ≤ right
+ * - `"neither"`: left < x < right
+ */
+export type BetweenInclusive = "both" | "left" | "right" | "neither";
+
+/** Options for {@link seriesBetween}. */
+export interface BetweenOptions {
+ /**
+ * Which endpoints to include.
+ * @default "both"
+ */
+ readonly inclusive?: BetweenInclusive;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Return `true` when `v` is a missing value (null, undefined, NaN). */
+function isMissing(v: unknown): boolean {
+ if (v === null || v === undefined) {
+ return true;
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return true;
+ }
+ return false;
+}
+
+/** Compare two scalar values as numbers or strings. */
+function scalarLt(a: Scalar, b: Scalar): boolean {
+ return (a as unknown as number) < (b as unknown as number);
+}
+
+function scalarLte(a: Scalar, b: Scalar): boolean {
+ return (a as unknown as number) <= (b as unknown as number);
+}
+
+/**
+ * Check whether a single scalar `v` falls inside [left, right] according to
+ * the `inclusive` setting. Returns `false` for any missing value.
+ */
+function inRange(v: Scalar, left: Scalar, right: Scalar, inclusive: BetweenInclusive): boolean {
+ if (isMissing(v) || isMissing(left) || isMissing(right)) {
+ return false;
+ }
+ const leftOk =
+ inclusive === "both" || inclusive === "left" ? scalarLte(left, v) : scalarLt(left, v);
+ const rightOk =
+ inclusive === "both" || inclusive === "right" ? scalarLte(v, right) : scalarLt(v, right);
+ return leftOk && rightOk;
+}
+
+// ─── seriesBetween ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a boolean Series indicating whether each element of `s` lies within
+ * the range `[left, right]`.
+ *
+ * Missing values in `s` produce `false` (matching pandas behaviour).
+ *
+ * @param s - Source Series.
+ * @param left - Left bound of the interval.
+ * @param right - Right bound of the interval.
+ * @param options - See {@link BetweenOptions}.
+ * @returns Boolean Series with the same index as `s`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesBetween } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesBetween(s, 2, 4).values;
+ * // [false, true, true, true, false]
+ *
+ * seriesBetween(s, 2, 4, { inclusive: "neither" }).values;
+ * // [false, false, true, false, false]
+ * ```
+ */
+export function seriesBetween(
+ s: Series,
+ left: Scalar,
+ right: Scalar,
+ options: BetweenOptions = {},
+): Series {
+ const inclusive: BetweenInclusive = options.inclusive ?? "both";
+ const data: boolean[] = [];
+ for (let i = 0; i < s.size; i++) {
+ data.push(inRange(s.values[i] as Scalar, left, right, inclusive));
+ }
+ return new Series({
+ data,
+ index: s.index,
+ name: s.name,
+ });
+}
diff --git a/src/stats/combine.ts b/src/stats/combine.ts
new file mode 100644
index 00000000..646cae76
--- /dev/null
+++ b/src/stats/combine.ts
@@ -0,0 +1,255 @@
+/**
+ * combine — element-wise combination of two Series or two DataFrames
+ * using a caller-supplied binary function.
+ *
+ * Mirrors `pandas.Series.combine()` / `pandas.DataFrame.combine()`.
+ *
+ * - {@link combineSeries} — combine two Series element-wise
+ * - {@link combineDataFrame} — combine two DataFrames column-by-column
+ *
+ * ### Semantics
+ *
+ * For `combineSeries(self, other, func, fillValue?)`:
+ * - The result index is the **union** of `self.index` and `other.index`.
+ * - For each index label, the value is `func(a, b)` where `a` is from `self`
+ * and `b` is from `other`.
+ * - When only one side has a value for a label, `fillValue` (default `null`)
+ * is used for the missing side.
+ *
+ * For `combineDataFrame(self, other, func, fillValue?, overwrite?)`:
+ * - The result columns are the **union** of the two sets.
+ * - For each column that exists in **both**, the result is `combineSeries(a, b, func, fillValue)`.
+ * - For columns only in `self`: when `overwrite` is `true` (default), the
+ * result is `func(v, fillValue)` for each element; when `false`, the column
+ * from `self` is kept as-is.
+ * - For columns only in `other`: same rule from `other`'s perspective.
+ *
+ * @example
+ * ```ts
+ * import { Series, combineSeries } from "tsb";
+ *
+ * const a = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ * const b = new Series({ data: [10, 20, 30], index: [0, 1, 2] });
+ * combineSeries(a, b, (x, y) => Math.max(x as number, y as number)).values;
+ * // [10, 20, 30]
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Index } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link combineDataFrame}. */
+export interface CombineDataFrameOptions {
+ /**
+ * Scalar used as a placeholder for missing values when only one side has a
+ * given index label or column. Default `null`.
+ */
+ readonly fillValue?: Scalar;
+
+ /**
+ * When `true` (default) columns that exist in only one DataFrame are still
+ * processed by `func` (using `fillValue` for the missing side). When
+ * `false`, those columns are passed through unchanged from whichever
+ * DataFrame contains them.
+ */
+ readonly overwrite?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Build a label → [positions] map for an Index. */
+function buildLabelMap(idx: Index): Map {
+ const map = new Map();
+ for (let i = 0; i < idx.size; i++) {
+ const key = String(idx.at(i));
+ const existing = map.get(key);
+ if (existing !== undefined) {
+ existing.push(i);
+ } else {
+ map.set(key, [i]);
+ }
+ }
+ return map;
+}
+
+/** Sorted union of two arrays of string-keys. */
+function unionKeys(a: Iterable, b: Iterable): string[] {
+ const set = new Set(a);
+ for (const k of b) {
+ set.add(k);
+ }
+ return [...set].sort();
+}
+
+// ─── Series ───────────────────────────────────────────────────────────────────
+
+/**
+ * Combine two Series element-wise with a binary function.
+ *
+ * The result index is the union of the two indices. When a label exists in
+ * only one Series, `fillValue` (default `null`) is used for the missing side.
+ *
+ * @param self - Left-hand Series.
+ * @param other - Right-hand Series.
+ * @param func - Binary function `(a, b) → Scalar`.
+ * @param fillValue - Placeholder for missing values. Default `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: [1, 2, null], index: [0, 1, 2] });
+ * const b = new Series({ data: [10, null, 30], index: [0, 1, 3] });
+ * combineSeries(a, b, (x, y) => (x as number) + (y as number), 0).values;
+ * // [11, 2, 0, 30] — index [0, 1, 2, 3]
+ * ```
+ */
+export function combineSeries(
+ self: Series,
+ other: Series,
+ func: (a: Scalar, b: Scalar) => Scalar,
+ fillValue: Scalar = null,
+): Series {
+ const selfIdx = self.index as Index;
+ const otherIdx = other.index as Index;
+
+ const selfMap = buildLabelMap(selfIdx);
+ const otherMap = buildLabelMap(otherIdx);
+
+ const allKeys = unionKeys(selfMap.keys(), otherMap.keys());
+
+ const resultData: Scalar[] = [];
+ const resultIndex: Label[] = [];
+
+ for (const key of allKeys) {
+ const selfPositions = selfMap.get(key) ?? [];
+ const otherPositions = otherMap.get(key) ?? [];
+
+ // Pair them up; if one side has more, use fillValue for extras
+ const maxLen = Math.max(selfPositions.length, otherPositions.length, 1);
+ for (let k = 0; k < maxLen; k++) {
+ const si = selfPositions[k];
+ const oi = otherPositions[k];
+
+ const aVal: Scalar = si !== undefined ? (self.values[si] ?? null) : fillValue;
+ const bVal: Scalar = oi !== undefined ? (other.values[oi] ?? null) : fillValue;
+
+ resultData.push(func(aVal, bVal));
+
+ // Use the label from whichever side has a value at this position
+ const rawLabel: Label =
+ si !== undefined
+ ? (selfIdx.at(si) ?? (key as Label))
+ : (otherIdx.at(oi ?? 0) ?? (key as Label));
+ resultIndex.push(rawLabel);
+ }
+ }
+
+ return new Series({ data: resultData, index: resultIndex });
+}
+
+// ─── DataFrame ────────────────────────────────────────────────────────────────
+
+/**
+ * Combine two DataFrames column-by-column with a binary function.
+ *
+ * For columns present in **both** DataFrames, each column pair is merged via
+ * {@link combineSeries}. For columns present in only **one** DataFrame, the
+ * behaviour depends on `overwrite`:
+ * - When `overwrite` is `true` (default), `func` is applied with `fillValue`
+ * for the missing side, producing a new column.
+ * - When `overwrite` is `false`, the original column is preserved as-is.
+ *
+ * @param self - Left-hand DataFrame.
+ * @param other - Right-hand DataFrame.
+ * @param func - Binary function `(a, b) → Scalar` applied element-wise.
+ * @param options - {@link CombineDataFrameOptions}
+ *
+ * @example
+ * ```ts
+ * const a = DataFrame.fromColumns({ x: [1, 2], y: [10, 20] });
+ * const b = DataFrame.fromColumns({ x: [100, 200], z: [1000, 2000] });
+ * combineDataFrame(a, b, (p, q) => Math.min(p as number, q as number)).col("x").values;
+ * // [1, 2]
+ * ```
+ */
+export function combineDataFrame(
+ self: DataFrame,
+ other: DataFrame,
+ func: (a: Scalar, b: Scalar) => Scalar,
+ options: CombineDataFrameOptions = {},
+): DataFrame {
+ const fillValue: Scalar = options.fillValue ?? null;
+ const overwrite: boolean = options.overwrite !== false;
+
+ const selfCols = new Set(self.columns.values);
+ const otherCols = new Set(other.columns.values);
+ const allCols = unionKeys(selfCols, otherCols);
+
+ const resultCols: Record = {};
+ // Track the row index from the first shared column combination.
+ let resultRowIndex: readonly Label[] | null = null;
+
+ for (const col of allCols) {
+ const inSelf = selfCols.has(col);
+ const inOther = otherCols.has(col);
+
+ if (inSelf && inOther) {
+ const merged = combineSeries(self.col(col), other.col(col), func, fillValue);
+ resultCols[col] = [...merged.values] as Scalar[];
+ if (resultRowIndex === null) {
+ resultRowIndex = merged.index.values as readonly Label[];
+ }
+ } else if (inSelf) {
+ if (overwrite) {
+ const selfSeries = self.col(col);
+ const data = (selfSeries.values as readonly Scalar[]).map((v) =>
+ func(v ?? null, fillValue),
+ );
+ resultCols[col] = data as Scalar[];
+ } else {
+ resultCols[col] = [...(self.col(col).values as readonly Scalar[])] as Scalar[];
+ }
+ } else {
+ // inOther only
+ if (overwrite) {
+ const otherSeries = other.col(col);
+ const data = (otherSeries.values as readonly Scalar[]).map((v) =>
+ func(fillValue, v ?? null),
+ );
+ resultCols[col] = data as Scalar[];
+ } else {
+ resultCols[col] = [...(other.col(col).values as readonly Scalar[])] as Scalar[];
+ }
+ }
+ }
+
+ // Determine row index: from shared column combination, or from self/other directly.
+ const rowIndex: readonly Label[] =
+ resultRowIndex ??
+ (selfCols.size > 0
+ ? (self.index.values as readonly Label[])
+ : (other.index.values as readonly Label[]));
+
+ const nRows = rowIndex.length;
+ // Ensure all columns match the row count (pad / truncate if needed)
+ const alignedCols: Record = {};
+ for (const col of allCols) {
+ const arr = resultCols[col] ?? [];
+ if (arr.length === nRows) {
+ alignedCols[col] = arr;
+ } else {
+ // Pad with null to match row count
+ const padded: Scalar[] = [...arr];
+ while (padded.length < nRows) {
+ padded.push(null);
+ }
+ alignedCols[col] = padded.slice(0, nRows);
+ }
+ }
+
+ return DataFrame.fromColumns(alignedCols, { index: rowIndex });
+}
diff --git a/src/stats/corrwith.ts b/src/stats/corrwith.ts
new file mode 100644
index 00000000..276ecf5f
--- /dev/null
+++ b/src/stats/corrwith.ts
@@ -0,0 +1,273 @@
+/**
+ * corrwith — pairwise correlation of a DataFrame with a Series or another DataFrame.
+ * autocorr — lag-N autocorrelation for a numeric Series.
+ *
+ * Mirrors:
+ * - `pandas.Series.autocorr(lag=1)` — Pearson correlation of the Series with
+ * itself shifted by `lag` positions (positional shift, not label-aligned).
+ * - `pandas.DataFrame.corrwith(other, axis=0, drop=False, method="pearson")` —
+ * compute the pairwise column-wise (or row-wise) Pearson correlation between
+ * a DataFrame and a Series or another DataFrame.
+ *
+ * ### autoCorr
+ *
+ * The autocorrelation at lag `k` is `pearsonCorr(s, s.shift(k))`. The shift
+ * is positional — i.e. the first `k` elements of the shifted copy become `null`
+ * (dropped from the correlation computation). This matches pandas' behaviour.
+ *
+ * ### corrWith
+ *
+ * When `other` is a **Series** (axis=0):
+ * - Each *column* of `df` is correlated with `other` using label alignment.
+ * - The result is a Series indexed by the column names of `df`.
+ *
+ * When `other` is a **DataFrame** (axis=0):
+ * - Columns present in both DataFrames are correlated pairwise.
+ * - If `drop=false` (default), columns present in only one DataFrame receive
+ * `NaN` in the result. If `drop=true`, those columns are omitted.
+ * - The result is a Series indexed by the union (or intersection) of column
+ * names.
+ *
+ * When `axis=1` the same logic applies along rows instead of columns.
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+import { pearsonCorr } from "./corr.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link corrWith}. */
+export interface CorrWithOptions {
+ /**
+ * Axis along which to align and correlate.
+ * - `0` / `"index"` (default): correlate columns
+ * - `1` / `"columns"`: correlate rows
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+ /**
+ * When `true`, drop columns/rows that appear in only one of the two objects.
+ * When `false` (default), those labels receive `NaN`.
+ */
+ readonly drop?: boolean;
+ /**
+ * Minimum number of non-NaN observation pairs required to compute a valid
+ * correlation. Defaults to `1`.
+ */
+ readonly minPeriods?: number;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True iff `v` is null, undefined, or NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Transpose a DataFrame — rows become columns, columns become rows. */
+function transpose(df: DataFrame): DataFrame {
+ const rowLabels = df.index.toArray();
+ const colLabels = df.columns.toArray();
+
+ const newCols: Record = {};
+ for (const r of rowLabels) {
+ newCols[String(r)] = [];
+ }
+ for (const col of colLabels) {
+ const vals = df.col(col).values;
+ for (let i = 0; i < rowLabels.length; i++) {
+ const r = rowLabels[i];
+ if (r !== null && r !== undefined) {
+ const arr = newCols[String(r)];
+ if (arr !== undefined) {
+ const v = vals[i];
+ arr.push(v !== undefined ? v : null);
+ }
+ }
+ }
+ }
+ return DataFrame.fromColumns(newCols, { index: colLabels });
+}
+
+// ─── autoCorr ─────────────────────────────────────────────────────────────────
+
+/**
+ * Compute the lag-N autocorrelation of a numeric Series.
+ *
+ * The autocorrelation at lag `k` is the Pearson correlation coefficient
+ * between the Series and the same Series shifted by `k` positions.
+ * The first `k` values of the shifted copy are `null` (excluded from
+ * the correlation).
+ *
+ * Returns `NaN` when:
+ * - There are fewer than 2 valid observation pairs.
+ * - All valid values are identical (zero variance).
+ *
+ * Mirrors `pandas.Series.autocorr(lag=1)`.
+ *
+ * @param s - Input numeric Series.
+ * @param lag - Shift amount (default `1`). Must be a non-negative integer.
+ *
+ * @example
+ * ```ts
+ * import { Series, autoCorr } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * autoCorr(s); // lag=1 → 1.0 (perfectly correlated with itself)
+ * autoCorr(s, 0); // lag=0 → 1.0
+ * autoCorr(s, 2); // lag=2 → 1.0
+ * ```
+ */
+export function autoCorr(s: Series, lag = 1): number {
+ if (lag < 0 || !Number.isInteger(lag)) {
+ throw new RangeError(`autoCorr: lag must be a non-negative integer, got ${lag}`);
+ }
+
+ if (lag === 0) {
+ // lag=0 → corr with itself = 1 if any valid value exists
+ for (const v of s.values) {
+ if (!isMissing(v !== undefined ? v : null)) {
+ return 1;
+ }
+ }
+ return Number.NaN;
+ }
+
+ const vals = s.values;
+ const n = vals.length;
+ if (lag >= n) {
+ return Number.NaN;
+ }
+
+ // Collect aligned (original[i], original[i-lag]) pairs — drop if either is NA
+ const xs: number[] = [];
+ const ys: number[] = [];
+ for (let i = lag; i < n; i++) {
+ const rawA = vals[i];
+ const rawB = vals[i - lag];
+ const a: Scalar = rawA !== undefined ? rawA : null;
+ const b: Scalar = rawB !== undefined ? rawB : null;
+ if (isMissing(a) || isMissing(b)) {
+ continue;
+ }
+ if (typeof a !== "number" || typeof b !== "number") {
+ continue;
+ }
+ xs.push(a);
+ ys.push(b);
+ }
+
+ if (xs.length < 2) {
+ return Number.NaN;
+ }
+
+ const meanX = xs.reduce((acc, v) => acc + v, 0) / xs.length;
+ const meanY = ys.reduce((acc, v) => acc + v, 0) / ys.length;
+ let num = 0;
+ let varX = 0;
+ let varY = 0;
+ for (let i = 0; i < xs.length; i++) {
+ const dx = (xs[i] as number) - meanX;
+ const dy = (ys[i] as number) - meanY;
+ num += dx * dy;
+ varX += dx * dx;
+ varY += dy * dy;
+ }
+ const denom = Math.sqrt(varX * varY);
+ return denom === 0 ? Number.NaN : num / denom;
+}
+
+// ─── corrWith ─────────────────────────────────────────────────────────────────
+
+/**
+ * Compute the pairwise Pearson correlation of `df` columns with a Series or
+ * another DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.corrwith(other, axis=0, drop=False, method="pearson")`.
+ *
+ * **When `other` is a Series (axis=0):**
+ * Each column of `df` is correlated individually with `other` using
+ * label-based alignment. The result is a Series indexed by `df`'s column
+ * names.
+ *
+ * **When `other` is a DataFrame (axis=0):**
+ * Columns present in both DataFrames are correlated pairwise. Columns
+ * appearing in only one are set to `NaN` unless `drop=true`, in which case
+ * they are excluded from the result.
+ *
+ * **axis=1:**
+ * The same logic applies along rows. Each *row* of `df` is correlated with
+ * the corresponding element in `other` (by row-label alignment). The result
+ * is a Series indexed by `df`'s row index.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, Series, corrWith } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: [1, 2, 3, 4, 5],
+ * B: [5, 4, 3, 2, 1],
+ * });
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * corrWith(df, s).values;
+ * // A → 1.0, B → -1.0
+ * ```
+ */
+export function corrWith(
+ df: DataFrame,
+ other: DataFrame | Series,
+ options: CorrWithOptions = {},
+): Series {
+ const axis = options.axis === 1 || options.axis === "columns" ? 1 : 0;
+ const drop = options.drop ?? false;
+ const minPeriods = options.minPeriods ?? 1;
+
+ const dfWork = axis === 1 ? transpose(df) : df;
+
+ if (other instanceof Series) {
+ if (axis === 1) {
+ const aligned = new Series({
+ data: dfWork.index.toArray().map((_, i) => other.values[i] ?? null),
+ index: dfWork.index,
+ });
+ return _corrWithSeries(dfWork, aligned, minPeriods);
+ }
+ return _corrWithSeries(dfWork, other, minPeriods);
+ }
+
+ const otherWork = axis === 1 ? transpose(other) : other;
+ return _corrWithDataFrame(dfWork, otherWork, drop, minPeriods);
+}
+
+/** Correlate each column of `df` with a single Series. */
+function _corrWithSeries(df: DataFrame, other: Series, minPeriods: number): Series {
+ const cols = df.columns.toArray();
+ const results: Scalar[] = cols.map((c) => pearsonCorr(df.col(c), other, { minPeriods }));
+ return new Series({ data: results, index: new Index(cols) });
+}
+
+/** Correlate each common column of `df` with the matching column of `other`. */
+function _corrWithDataFrame(
+ df: DataFrame,
+ other: DataFrame,
+ drop: boolean,
+ minPeriods: number,
+): Series {
+ const dfCols = new Set(df.columns.toArray());
+ const otherCols = new Set(other.columns.toArray());
+
+ const allCols = drop
+ ? [...dfCols].filter((c) => otherCols.has(c))
+ : [...new Set([...dfCols, ...otherCols])];
+
+ const results: Scalar[] = allCols.map((c) => {
+ if (!(dfCols.has(c) && otherCols.has(c))) {
+ return Number.NaN;
+ }
+ return pearsonCorr(df.col(c), other.col(c), { minPeriods });
+ });
+
+ return new Series({ data: results, index: new Index(allCols) });
+}
diff --git a/src/stats/cut_bins_to_frame.ts b/src/stats/cut_bins_to_frame.ts
new file mode 100644
index 00000000..6a4e68ef
--- /dev/null
+++ b/src/stats/cut_bins_to_frame.ts
@@ -0,0 +1,158 @@
+/**
+ * cutBinsToFrame — summarise the bins produced by `cut` or `qcut` as a DataFrame.
+ *
+ * Given a {@link BinResult} (as returned by {@link cut} or {@link qcut}) and an
+ * optional array of original data values, `cutBinsToFrame` builds a tidy
+ * summary DataFrame with one row per bin and the following columns:
+ *
+ * | column | description |
+ * |-------------|-----------------------------------------------------------|
+ * | `bin` | bin label string (e.g. `"(0.0, 1.0]"`) |
+ * | `left` | lower (left) bin edge |
+ * | `right` | upper (right) bin edge |
+ * | `count` | number of observations that fell in each bin |
+ * | `frequency` | proportion of observations (`count / total`) |
+ *
+ * When no `data` argument is supplied `count` and `frequency` are both `0`.
+ *
+ * @example
+ * ```ts
+ * import { cut, cutBinsToFrame } from "tsb";
+ *
+ * const result = cut([1, 2, 3, 4, 5], 2);
+ * const df = cutBinsToFrame(result);
+ * // df.columns → ["bin", "left", "right", "count", "frequency"]
+ * // df.shape → [2, 5]
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { BinResult } from "./cut_qcut.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link cutBinsToFrame}. */
+export interface CutBinsToFrameOptions {
+ /**
+ * Original data values used to compute bin counts.
+ * When provided, `count` and `frequency` columns are populated.
+ * @default []
+ */
+ readonly data?: readonly (number | null | undefined)[];
+}
+
+// ─── implementation ───────────────────────────────────────────────────────────
+
+/**
+ * Convert a {@link BinResult} into a summary DataFrame.
+ *
+ * @param result Result of {@link cut} or {@link qcut}.
+ * @param options See {@link CutBinsToFrameOptions}.
+ */
+export function cutBinsToFrame(result: BinResult, options: CutBinsToFrameOptions = {}): DataFrame {
+ const { data = [] } = options;
+
+ const { labels, bins, codes } = result;
+ const numBins = labels.length;
+
+ // ── bin edges ────────────────────────────────────────────────────────────────
+ const leftEdges: number[] = [];
+ const rightEdges: number[] = [];
+ for (let i = 0; i < numBins; i++) {
+ leftEdges.push(bins[i] as number);
+ rightEdges.push(bins[i + 1] as number);
+ }
+
+ // ── counts ────────────────────────────────────────────────────────────────────
+ const counts: number[] = Array.from({ length: numBins }, () => 0);
+
+ // Count using supplied codes array (same length as data)
+ const codeSource: ReadonlyArray =
+ data.length > 0 ? codes : ([] as Array);
+
+ let total = 0;
+ for (const code of codeSource) {
+ if (code !== null && code >= 0 && code < numBins) {
+ (counts[code] as number)++;
+ total++;
+ }
+ }
+
+ // ── frequency ─────────────────────────────────────────────────────────────────
+ const frequencies: number[] = counts.map((c) => (total > 0 ? c / total : 0));
+
+ return DataFrame.fromColumns({
+ bin: labels as readonly string[],
+ left: leftEdges,
+ right: rightEdges,
+ count: counts,
+ frequency: frequencies,
+ });
+}
+
+// ─── cutBinCounts ─────────────────────────────────────────────────────────────
+
+/**
+ * Return the per-bin observation counts from a {@link BinResult} as a plain
+ * `Record` mapping label → count.
+ *
+ * This is a lightweight alternative to {@link cutBinsToFrame} when you only
+ * need the count dictionary and not the full DataFrame.
+ *
+ * @example
+ * ```ts
+ * import { cut, cutBinCounts } from "tsb";
+ *
+ * const result = cut([1, 2, 3, 4, 5], 2);
+ * cutBinCounts(result);
+ * // { "(1.0, 3.0]": 3, "(3.0, 5.0]": 2 }
+ * ```
+ */
+export function cutBinCounts(result: BinResult): Record {
+ const { labels, codes } = result;
+ const out: Record = {};
+ for (const label of labels) {
+ out[label] = 0;
+ }
+ for (const code of codes) {
+ if (code !== null) {
+ const label = labels[code];
+ if (label !== undefined) {
+ out[label] = (out[label] as number) + 1;
+ }
+ }
+ }
+ return out;
+}
+
+// ─── binEdges ────────────────────────────────────────────────────────────────
+
+/**
+ * Extract a DataFrame of bin edges and labels from a {@link BinResult}.
+ *
+ * Produces a two-column DataFrame with `left` and `right` columns indexed
+ * by the bin label.
+ *
+ * @example
+ * ```ts
+ * import { cut, binEdges } from "tsb";
+ *
+ * const result = cut([1, 2, 3, 4, 5], 2);
+ * const edges = binEdges(result);
+ * // edges.index → Index ["(1.0, 3.0]", "(3.0, 5.0]"]
+ * // edges.columns → ["left", "right"]
+ * ```
+ */
+export function binEdges(result: BinResult): DataFrame {
+ const { labels, bins } = result;
+ const numBins = labels.length;
+ const left: number[] = [];
+ const right: number[] = [];
+ for (let i = 0; i < numBins; i++) {
+ left.push(bins[i] as number);
+ right.push(bins[i + 1] as number);
+ }
+ return DataFrame.fromColumns({ left, right }, { index: labels as unknown as number[] });
+}
diff --git a/src/stats/date_range.ts b/src/stats/date_range.ts
index 14c01860..d2e49e82 100644
--- a/src/stats/date_range.ts
+++ b/src/stats/date_range.ts
@@ -126,9 +126,15 @@ const UNIT_NORM: Readonly> = {
function normaliseUnit(raw: string): string {
// Case-sensitive lowercase tokens — millisecond aliases must come first
// so they are not confused with "MS" (month-start) after uppercasing.
- if (raw === "ms" || raw === "L") return "ms";
- if (raw === "us") return "us";
- if (raw === "ns") return "ns";
+ if (raw === "ms" || raw === "L") {
+ return "ms";
+ }
+ if (raw === "us") {
+ return "us";
+ }
+ if (raw === "ns") {
+ return "ns";
+ }
const u = raw.toUpperCase();
// Tokens that are passed through unchanged (already canonical)
if (u === "MS" || u === "QS" || u === "D" || u === "B") {
@@ -447,36 +453,51 @@ function snapToCalendarBoundary(d: Date, unit: string): Date {
const m = d.getUTCMonth();
const day = d.getUTCDate();
switch (unit) {
- case "MS":
+ case "MS": {
// If not already month-start, advance to first day of next month.
- if (day === 1) return d;
+ if (day === 1) {
+ return d;
+ }
return new Date(Date.UTC(y, m + 1, 1));
+ }
case "ME": {
// If not already month-end, snap to end of the current month.
const lastDay = daysInMonth(y, m);
- if (day === lastDay) return d;
+ if (day === lastDay) {
+ return d;
+ }
return new Date(Date.UTC(y, m, lastDay));
}
case "QS": {
// Quarter-starts are Jan/Apr/Jul/Oct 1.
const isQS = (m === 0 || m === 3 || m === 6 || m === 9) && day === 1;
- if (isQS) return d;
+ if (isQS) {
+ return d;
+ }
return nextQStart(d);
}
case "QE": {
// Quarter-ends are Mar 31, Jun 30, Sep 30, Dec 31.
const isQE = (m === 2 || m === 5 || m === 8 || m === 11) && day === daysInMonth(y, m);
- if (isQE) return d;
+ if (isQE) {
+ return d;
+ }
return nextQEnd(d);
}
- case "YS":
+ case "YS": {
// Year-start is Jan 1.
- if (m === 0 && day === 1) return d;
+ if (m === 0 && day === 1) {
+ return d;
+ }
return new Date(Date.UTC(y + 1, 0, 1));
- case "YE":
+ }
+ case "YE": {
// Year-end is Dec 31.
- if (m === 11 && day === 31) return d;
+ if (m === 11 && day === 31) {
+ return d;
+ }
return new Date(Date.UTC(y, 11, 31));
+ }
default:
return d;
}
diff --git a/src/stats/dot_matmul.ts b/src/stats/dot_matmul.ts
new file mode 100644
index 00000000..46e35b3a
--- /dev/null
+++ b/src/stats/dot_matmul.ts
@@ -0,0 +1,291 @@
+/**
+ * dot_matmul — dot product and matrix multiplication for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.dot(other)` — dot product with a Series (→ scalar) or
+ * matrix-multiply with a DataFrame (→ Series).
+ * - `DataFrame.dot(other)` — matrix-multiply with a Series (→ Series) or
+ * another DataFrame (→ DataFrame).
+ *
+ * Index alignment is performed automatically (inner join on shared labels).
+ * Missing values (`null`, `undefined`, `NaN`) are treated as `0` during
+ * multiplication (same as pandas with fill_value=0 semantics for missing
+ * intersection items, but NaN propagates within aligned pairs).
+ *
+ * All functions are **pure** — inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Build a label → position map for quick lookup.
+ */
+function buildLabelMap(idx: Index): Map {
+ const m = new Map();
+ for (let i = 0; i < idx.size; i++) {
+ m.set(String(idx.at(i)), i);
+ }
+ return m;
+}
+
+/**
+ * Align two arrays by shared labels (inner join on index).
+ * Returns the aligned numeric values (NaN for missing scalars).
+ */
+function alignTwoSeries(
+ a: Series,
+ b: Series,
+): { aVals: number[]; bVals: number[]; labels: Label[] } {
+ const bMap = buildLabelMap(b.index);
+ const aVals: number[] = [];
+ const bVals: number[] = [];
+ const labels: Label[] = [];
+
+ for (let i = 0; i < a.index.size; i++) {
+ const label = String(a.index.at(i));
+ const j = bMap.get(label);
+ if (j === undefined) {
+ continue;
+ }
+ const av = a.values[i] as Scalar;
+ const bv = b.values[j] as Scalar;
+ aVals.push(isMissing(av) ? Number.NaN : (av as number));
+ bVals.push(isMissing(bv) ? Number.NaN : (bv as number));
+ labels.push(a.index.at(i));
+ }
+ return { aVals, bVals, labels };
+}
+
+// ─── seriesDotSeries ──────────────────────────────────────────────────────────
+
+/**
+ * Compute the dot product of two Series after aligning on their shared index.
+ *
+ * Mirrors the core of `pandas.Series.dot(other: Series)`.
+ *
+ * - Labels present in both Series participate in the sum.
+ * - If either aligned value is `NaN`, the result is `NaN`.
+ * - Returns `0` when there are no common labels.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesDotSeries } from "tsb";
+ *
+ * const a = new Series({ data: [1, 2, 3], index: ["x", "y", "z"] });
+ * const b = new Series({ data: [4, 5, 6], index: ["x", "y", "z"] });
+ * seriesDotSeries(a, b); // 1*4 + 2*5 + 3*6 = 32
+ * ```
+ */
+export function seriesDotSeries(a: Series, b: Series): number {
+ const { aVals, bVals } = alignTwoSeries(a, b);
+ let sum = 0;
+ for (let i = 0; i < aVals.length; i++) {
+ const av = aVals[i] as number;
+ const bv = bVals[i] as number;
+ if (Number.isNaN(av) || Number.isNaN(bv)) {
+ return Number.NaN;
+ }
+ sum += av * bv;
+ }
+ return sum;
+}
+
+// ─── seriesDotDataFrame ────────────────────────────────────────────────────────
+
+/**
+ * Multiply a row-vector Series against a DataFrame (inner join on index ↔ row labels).
+ *
+ * Mirrors `pandas.Series.dot(other: DataFrame)`.
+ *
+ * For each column `c` of `other`, computes:
+ * `result[c] = Σ s[k] * other.loc[k, c]` for all `k` in `s.index ∩ other.index`.
+ *
+ * The returned Series is indexed by the DataFrame's column names.
+ *
+ * @example
+ * ```ts
+ * import { Series, DataFrame, seriesDotDataFrame } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2], index: ["r0", "r1"] });
+ * const df = DataFrame.fromColumns({ a: [1, 3], b: [2, 4] });
+ * // df index is [0,1]; s index is ["r0","r1"] → no overlap → [0, 0]
+ * ```
+ */
+export function seriesDotDataFrame(s: Series, df: DataFrame): Series {
+ // Build label → row index map for df
+ const dfRowMap = buildLabelMap(df.index);
+ const colNames = df.columns.values;
+
+ const result: Scalar[] = new Array(colNames.length).fill(0);
+
+ for (let si = 0; si < s.index.size; si++) {
+ const label = String(s.index.at(si));
+ const ri = dfRowMap.get(label);
+ if (ri === undefined) {
+ continue;
+ }
+ const sv = s.values[si] as Scalar;
+ const svNum = isMissing(sv) ? Number.NaN : (sv as number);
+
+ for (let ci = 0; ci < colNames.length; ci++) {
+ const colSeries = df.col(colNames[ci] as string);
+ const cv = colSeries.values[ri] as Scalar;
+ const cvNum = isMissing(cv) ? Number.NaN : (cv as number);
+ const cur = result[ci] as number;
+ result[ci] =
+ Number.isNaN(cur) || Number.isNaN(svNum) || Number.isNaN(cvNum)
+ ? Number.NaN
+ : cur + svNum * cvNum;
+ }
+ }
+
+ return new Series({
+ data: result,
+ index: new Index(colNames as readonly Label[]),
+ name: s.name,
+ });
+}
+
+// ─── dataFrameDotSeries ────────────────────────────────────────────────────────
+
+/**
+ * Multiply a DataFrame by a column-vector Series (inner join on columns ↔ index).
+ *
+ * Mirrors `pandas.DataFrame.dot(other: Series)`.
+ *
+ * For each row `r` of `df`, computes:
+ * `result[r] = Σ df.loc[r, k] * s[k]` for all `k` in `df.columns ∩ s.index`.
+ *
+ * The returned Series is indexed by the DataFrame's row index.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, Series, dataFrameDotSeries } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * const s = new Series({ data: [1, 0], index: ["a", "b"] });
+ * dataFrameDotSeries(df, s).values; // [1, 2] (a column × 1 + b column × 0)
+ * ```
+ */
+export function dataFrameDotSeries(df: DataFrame, s: Series): Series {
+ const nRows = df.index.size;
+ const colNames = df.columns.values;
+
+ // Build column → series-index map
+ const sMap = buildLabelMap(s.index);
+
+ // Find the intersecting columns
+ const sharedCols: Array<{ colName: string; sIdx: number }> = [];
+ for (const colName of colNames) {
+ const si = sMap.get(String(colName));
+ if (si !== undefined) {
+ sharedCols.push({ colName: colName as string, sIdx: si });
+ }
+ }
+
+ const result: Scalar[] = new Array(nRows).fill(0);
+
+ for (const { colName, sIdx } of sharedCols) {
+ const col = df.col(colName);
+ const sv = s.values[sIdx] as Scalar;
+ const svNum = isMissing(sv) ? Number.NaN : (sv as number);
+
+ for (let ri = 0; ri < nRows; ri++) {
+ const cv = col.values[ri] as Scalar;
+ const cvNum = isMissing(cv) ? Number.NaN : (cv as number);
+ const cur = result[ri] as number;
+ result[ri] =
+ Number.isNaN(cur) || Number.isNaN(svNum) || Number.isNaN(cvNum)
+ ? Number.NaN
+ : cur + cvNum * svNum;
+ }
+ }
+
+ return new Series({ data: result, index: df.index });
+}
+
+// ─── dataFrameDotDataFrame ─────────────────────────────────────────────────────
+
+/**
+ * Matrix-multiply two DataFrames (inner join on left.columns ↔ right.index).
+ *
+ * Mirrors `pandas.DataFrame.dot(other: DataFrame)`.
+ *
+ * The result has:
+ * - Row index = `left.index`
+ * - Column names = `right.columns`
+ * - Shape = `[left.nRows, right.nCols]`
+ *
+ * Only the labels that appear in **both** `left.columns` **and** `right.index`
+ * participate in the sum (inner join).
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameDotDataFrame } from "tsb";
+ *
+ * const A = DataFrame.fromColumns({ k: [1, 2] }); // 2×1
+ * const B = DataFrame.fromColumns({ x: [3], y: [4] }); // 1×2
+ * // A has columns ["k"], B has index [0] — no overlap → zeros
+ * ```
+ */
+export function dataFrameDotDataFrame(left: DataFrame, right: DataFrame): DataFrame {
+ const nRowsLeft = left.index.size;
+ const rightColNames = right.columns.values;
+
+ // Build left.columns → right.index map
+ const rightRowMap = buildLabelMap(right.index);
+ const leftColNames = left.columns.values;
+
+ const sharedKeys: Array<{ leftColName: string; rightRowIdx: number }> = [];
+ for (const lc of leftColNames) {
+ const ri = rightRowMap.get(String(lc));
+ if (ri !== undefined) {
+ sharedKeys.push({ leftColName: lc as string, rightRowIdx: ri });
+ }
+ }
+
+ // Result columns: one per right column
+ const colMap: Record = {};
+ for (const rc of rightColNames) {
+ colMap[rc as string] = new Array(nRowsLeft).fill(0);
+ }
+
+ for (const { leftColName, rightRowIdx } of sharedKeys) {
+ const leftCol = left.col(leftColName);
+ for (let ci = 0; ci < rightColNames.length; ci++) {
+ const rightColName = rightColNames[ci] as string;
+ const rightCol = right.col(rightColName);
+ const rv = rightCol.values[rightRowIdx] as Scalar;
+ const rvNum = isMissing(rv) ? Number.NaN : (rv as number);
+ const outCol = colMap[rightColName] as Scalar[];
+ for (let ri = 0; ri < nRowsLeft; ri++) {
+ const lv = leftCol.values[ri] as Scalar;
+ const lvNum = isMissing(lv) ? Number.NaN : (lv as number);
+ const cur = outCol[ri] as number;
+ outCol[ri] =
+ Number.isNaN(cur) || Number.isNaN(rvNum) || Number.isNaN(lvNum)
+ ? Number.NaN
+ : cur + lvNum * rvNum;
+ }
+ }
+ }
+
+ const finalColMap: Record> = {};
+ for (const rc of rightColNames) {
+ finalColMap[rc as string] = new Series({
+ data: colMap[rc as string] as Scalar[],
+ index: left.index,
+ });
+ }
+ return new DataFrame(new Map(Object.entries(finalColMap)), left.index);
+}
diff --git a/src/stats/eval_query.ts b/src/stats/eval_query.ts
new file mode 100644
index 00000000..a25172bd
--- /dev/null
+++ b/src/stats/eval_query.ts
@@ -0,0 +1,834 @@
+/**
+ * eval_query — `DataFrame.query()` and `DataFrame.eval()`.
+ *
+ * Mirrors `pandas.DataFrame.query(expr)` and `pandas.DataFrame.eval(expr)`:
+ *
+ * - {@link queryDataFrame} — filter rows using a boolean expression string
+ * - {@link evalDataFrame} — evaluate an expression, returning a new `Series`
+ *
+ * Supported expression syntax:
+ * - **Column references**: bare identifiers (`col`) or backtick-quoted (`` `col name` ``)
+ * - **Literals**: `42`, `3.14`, `"foo"`, `'bar'`, `True`/`False`, `None`/`null`/`NaN`
+ * - **Arithmetic**: `+ - * / % **` (standard precedence)
+ * - **Comparison**: `== != < <= > >=`
+ * - **Logical**: `and or not`
+ * - **Membership**: `col in [1, 2, 3]`, `col not in ("a", "b")`
+ * - **Functions**: `abs(x)`, `round(x, d)`, `str(x)`, `len(x)`, `lower(x)`,
+ * `upper(x)`, `isnull(x)` / `isna(x)`, `notnull(x)` / `notna(x)`
+ * - **Grouping**: parentheses
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, queryDataFrame, evalDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromArrays({ a: [1, 2, 3, 4], b: ["x", "y", "x", "y"] });
+ *
+ * // Filter rows
+ * queryDataFrame(df, "a > 2 and b == 'x'");
+ * // DataFrame with row: a=3, b="x"
+ *
+ * // Evaluate expression
+ * evalDataFrame(df, "a * 2");
+ * // Series [2, 4, 6, 8]
+ * ```
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── Token types ──────────────────────────────────────────────────────────────
+
+type TokKind =
+ | "NUM"
+ | "STR"
+ | "IDENT"
+ | "BACKTICK"
+ | "EQ"
+ | "NEQ"
+ | "LT"
+ | "LE"
+ | "GT"
+ | "GE"
+ | "PLUS"
+ | "MINUS"
+ | "STAR"
+ | "SLASH"
+ | "PERCENT"
+ | "POW"
+ | "LPAREN"
+ | "RPAREN"
+ | "LBRACKET"
+ | "RBRACKET"
+ | "COMMA"
+ | "EOF";
+
+interface Token {
+ readonly kind: TokKind;
+ readonly value: string;
+ readonly pos: number;
+}
+
+// ─── Lexer ────────────────────────────────────────────────────────────────────
+
+/** Tokenise an expression string into a flat token array. */
+function lex(expr: string): readonly Token[] {
+ const tokens: Token[] = [];
+ let i = 0;
+ while (i < expr.length) {
+ i = lexOne(expr, i, tokens);
+ }
+ tokens.push({ kind: "EOF", value: "", pos: expr.length });
+ return tokens;
+}
+
+/** Lex one token starting at position `i`; return new position. */
+function lexOne(expr: string, i: number, out: Token[]): number {
+ const ch = expr.charAt(i);
+ if (ch === " " || ch === "\t" || ch === "\r" || ch === "\n") {
+ return i + 1;
+ }
+ if (ch === "`") {
+ return lexBacktick(expr, i, out);
+ }
+ if (ch === '"' || ch === "'") {
+ return lexString(expr, i, out);
+ }
+ if (ch >= "0" && ch <= "9") {
+ return lexNumber(expr, i, out);
+ }
+ if (ch === "." && isDigit(expr.charAt(i + 1))) {
+ return lexNumber(expr, i, out);
+ }
+ if ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_") {
+ return lexIdent(expr, i, out);
+ }
+ return lexSymbol(expr, i, out);
+}
+
+function isDigit(ch: string): boolean {
+ return ch >= "0" && ch <= "9";
+}
+
+function lexBacktick(expr: string, i: number, out: Token[]): number {
+ const start = i + 1;
+ let j = start;
+ while (j < expr.length && expr.charAt(j) !== "`") {
+ j++;
+ }
+ out.push({ kind: "BACKTICK", value: expr.slice(start, j), pos: i });
+ return j + 1;
+}
+
+function lexString(expr: string, i: number, out: Token[]): number {
+ const q = expr.charAt(i);
+ let j = i + 1;
+ let result = "";
+ while (j < expr.length && expr.charAt(j) !== q) {
+ if (expr.charAt(j) === "\\") {
+ j++;
+ result += expr.charAt(j);
+ } else {
+ result += expr.charAt(j);
+ }
+ j++;
+ }
+ out.push({ kind: "STR", value: result, pos: i });
+ return j + 1;
+}
+
+function lexNumber(expr: string, i: number, out: Token[]): number {
+ let pos = i;
+ const start = pos;
+ while (pos < expr.length) {
+ const c = expr.charAt(pos);
+ if (!((c >= "0" && c <= "9") || c === ".")) {
+ break;
+ }
+ pos++;
+ }
+ if (pos < expr.length && (expr.charAt(pos) === "e" || expr.charAt(pos) === "E")) {
+ pos++;
+ const sign = expr.charAt(pos);
+ if (sign === "+" || sign === "-") {
+ pos++;
+ }
+ while (pos < expr.length && isDigit(expr.charAt(pos))) {
+ pos++;
+ }
+ }
+ out.push({ kind: "NUM", value: expr.slice(start, pos), pos: start });
+ return pos;
+}
+
+function lexIdent(expr: string, i: number, out: Token[]): number {
+ let pos = i;
+ const start = pos;
+ while (pos < expr.length && /\w/.test(expr.charAt(pos))) {
+ pos++;
+ }
+ out.push({ kind: "IDENT", value: expr.slice(start, pos), pos: start });
+ return pos;
+}
+
+const SINGLE_CHAR_TOKENS: ReadonlyMap = new Map([
+ ["<", "LT"],
+ [">", "GT"],
+ ["+", "PLUS"],
+ ["-", "MINUS"],
+ ["*", "STAR"],
+ ["/", "SLASH"],
+ ["%", "PERCENT"],
+ ["(", "LPAREN"],
+ [")", "RPAREN"],
+ ["[", "LBRACKET"],
+ ["]", "RBRACKET"],
+ [",", "COMMA"],
+]);
+
+function lexSymbol(expr: string, i: number, out: Token[]): number {
+ const ch = expr.charAt(i);
+ const ch2 = expr.charAt(i + 1);
+ if (ch === "=" && ch2 === "=") {
+ out.push({ kind: "EQ", value: "==", pos: i });
+ return i + 2;
+ }
+ if (ch === "!" && ch2 === "=") {
+ out.push({ kind: "NEQ", value: "!=", pos: i });
+ return i + 2;
+ }
+ if (ch === "<" && ch2 === "=") {
+ out.push({ kind: "LE", value: "<=", pos: i });
+ return i + 2;
+ }
+ if (ch === ">" && ch2 === "=") {
+ out.push({ kind: "GE", value: ">=", pos: i });
+ return i + 2;
+ }
+ if (ch === "*" && ch2 === "*") {
+ out.push({ kind: "POW", value: "**", pos: i });
+ return i + 2;
+ }
+ const kind = SINGLE_CHAR_TOKENS.get(ch);
+ if (kind !== undefined) {
+ out.push({ kind, value: ch, pos: i });
+ return i + 1;
+ }
+ throw new SyntaxError(`Unexpected character '${ch}' at position ${i} in: ${expr}`);
+}
+
+// ─── AST ──────────────────────────────────────────────────────────────────────
+
+type AstNode =
+ | { readonly type: "BinOp"; readonly op: string; readonly left: AstNode; readonly right: AstNode }
+ | { readonly type: "UnaryOp"; readonly op: string; readonly operand: AstNode }
+ | {
+ readonly type: "InOp";
+ readonly value: AstNode;
+ readonly list: readonly AstNode[];
+ readonly negated: boolean;
+ }
+ | { readonly type: "Literal"; readonly value: Scalar }
+ | { readonly type: "ColRef"; readonly name: string }
+ | { readonly type: "FuncCall"; readonly name: string; readonly args: readonly AstNode[] };
+
+// ─── Parser ───────────────────────────────────────────────────────────────────
+
+class ExprParser {
+ private readonly tokens: readonly Token[];
+ private pos = 0;
+
+ constructor(tokens: readonly Token[]) {
+ this.tokens = tokens;
+ }
+
+ private peek(): Token {
+ return this.tokens[this.pos] ?? { kind: "EOF", value: "", pos: 0 };
+ }
+
+ private peek2(): Token {
+ return this.tokens[this.pos + 1] ?? { kind: "EOF", value: "", pos: 0 };
+ }
+
+ private consume(): Token {
+ const t = this.peek();
+ this.pos++;
+ return t;
+ }
+
+ private expect(kind: TokKind): Token {
+ const t = this.consume();
+ if (t.kind !== kind) {
+ throw new SyntaxError(`Expected ${kind} but got ${t.kind} ('${t.value}')`);
+ }
+ return t;
+ }
+
+ private matchKw(word: string): boolean {
+ const t = this.peek();
+ return t.kind === "IDENT" && t.value.toLowerCase() === word;
+ }
+
+ /** Parse and consume the full expression, asserting EOF. */
+ parse(): AstNode {
+ const node = this.parseOr();
+ if (this.peek().kind !== "EOF") {
+ throw new SyntaxError(`Unexpected token '${this.peek().value}' after expression`);
+ }
+ return node;
+ }
+
+ private parseOr(): AstNode {
+ let left = this.parseAnd();
+ while (this.matchKw("or")) {
+ this.consume();
+ const right = this.parseAnd();
+ left = { type: "BinOp", op: "or", left, right };
+ }
+ return left;
+ }
+
+ private parseAnd(): AstNode {
+ let left = this.parseNot();
+ while (this.matchKw("and")) {
+ this.consume();
+ const right = this.parseNot();
+ left = { type: "BinOp", op: "and", left, right };
+ }
+ return left;
+ }
+
+ private parseNot(): AstNode {
+ if (this.matchKw("not")) {
+ this.consume();
+ return { type: "UnaryOp", op: "not", operand: this.parseNot() };
+ }
+ return this.parseComparison();
+ }
+
+ private parseComparison(): AstNode {
+ const left = this.parseAdd();
+ return this.parseCmpRhs(left);
+ }
+
+ private parseCmpRhs(left: AstNode): AstNode {
+ const CMP_KINDS: readonly TokKind[] = ["EQ", "NEQ", "LT", "LE", "GT", "GE"];
+ if (
+ this.matchKw("not") &&
+ this.peek2().kind === "IDENT" &&
+ this.peek2().value.toLowerCase() === "in"
+ ) {
+ this.consume(); // "not"
+ this.consume(); // "in"
+ return { type: "InOp", value: left, list: this.parseListLiteral(), negated: true };
+ }
+ if (this.matchKw("in")) {
+ this.consume(); // "in"
+ return { type: "InOp", value: left, list: this.parseListLiteral(), negated: false };
+ }
+ if (!CMP_KINDS.includes(this.peek().kind)) {
+ return left;
+ }
+ const op = this.consume().value;
+ const right = this.parseAdd();
+ return { type: "BinOp", op, left, right };
+ }
+
+ private parseListLiteral(): readonly AstNode[] {
+ const items: AstNode[] = [];
+ const open = this.peek().kind;
+ if (open !== "LPAREN" && open !== "LBRACKET") {
+ items.push(this.parsePrimary());
+ return items;
+ }
+ this.consume();
+ const close: TokKind = open === "LPAREN" ? "RPAREN" : "RBRACKET";
+ while (this.peek().kind !== close && this.peek().kind !== "EOF") {
+ items.push(this.parsePrimary());
+ if (this.peek().kind === "COMMA") {
+ this.consume();
+ }
+ }
+ this.expect(close);
+ return items;
+ }
+
+ private parseAdd(): AstNode {
+ let left = this.parseMul();
+ while (this.peek().kind === "PLUS" || this.peek().kind === "MINUS") {
+ const op = this.consume().value;
+ const right = this.parseMul();
+ left = { type: "BinOp", op, left, right };
+ }
+ return left;
+ }
+
+ private parseMul(): AstNode {
+ let left = this.parseUnary();
+ while (
+ this.peek().kind === "STAR" ||
+ this.peek().kind === "SLASH" ||
+ this.peek().kind === "PERCENT"
+ ) {
+ const op = this.consume().value;
+ const right = this.parseUnary();
+ left = { type: "BinOp", op, left, right };
+ }
+ return left;
+ }
+
+ private parseUnary(): AstNode {
+ if (this.peek().kind === "MINUS") {
+ this.consume();
+ return { type: "UnaryOp", op: "-", operand: this.parseUnary() };
+ }
+ if (this.peek().kind === "PLUS") {
+ this.consume();
+ return this.parseUnary();
+ }
+ return this.parsePow();
+ }
+
+ private parsePow(): AstNode {
+ const base = this.parsePrimary();
+ if (this.peek().kind === "POW") {
+ this.consume();
+ const exp = this.parseUnary();
+ return { type: "BinOp", op: "**", left: base, right: exp };
+ }
+ return base;
+ }
+
+ private parsePrimary(): AstNode {
+ const t = this.peek();
+ if (t.kind === "LPAREN") {
+ return this.parseParenExpr();
+ }
+ if (t.kind === "BACKTICK") {
+ this.consume();
+ return { type: "ColRef", name: t.value };
+ }
+ if (t.kind === "NUM") {
+ this.consume();
+ return { type: "Literal", value: Number(t.value) };
+ }
+ if (t.kind === "STR") {
+ this.consume();
+ return { type: "Literal", value: t.value };
+ }
+ if (t.kind === "IDENT") {
+ return this.parseIdentOrCall();
+ }
+ throw new SyntaxError(`Unexpected token '${t.value}' at position ${t.pos}`);
+ }
+
+ private parseParenExpr(): AstNode {
+ this.consume(); // "("
+ const node = this.parseOr();
+ this.expect("RPAREN");
+ return node;
+ }
+
+ private parseIdentOrCall(): AstNode {
+ const t = this.consume();
+ const low = t.value.toLowerCase();
+ if (low === "true") {
+ return { type: "Literal", value: true };
+ }
+ if (low === "false") {
+ return { type: "Literal", value: false };
+ }
+ if (low === "none" || low === "null" || low === "nan") {
+ return { type: "Literal", value: null };
+ }
+ if (this.peek().kind === "LPAREN") {
+ this.consume(); // "("
+ const args = this.parseFuncArgs();
+ this.expect("RPAREN");
+ return { type: "FuncCall", name: t.value, args };
+ }
+ return { type: "ColRef", name: t.value };
+ }
+
+ private parseFuncArgs(): readonly AstNode[] {
+ const args: AstNode[] = [];
+ while (this.peek().kind !== "RPAREN" && this.peek().kind !== "EOF") {
+ args.push(this.parseOr());
+ if (this.peek().kind === "COMMA") {
+ this.consume();
+ }
+ }
+ return args;
+ }
+}
+
+// ─── Evaluator ────────────────────────────────────────────────────────────────
+
+/** Evaluate an AST node for one row, given column values in `row`. */
+function evalNode(node: AstNode, row: ReadonlyMap): Scalar {
+ switch (node.type) {
+ case "Literal":
+ return node.value;
+ case "ColRef": {
+ if (!row.has(node.name)) {
+ throw new Error(`Column '${node.name}' not found in DataFrame`);
+ }
+ return row.get(node.name) ?? null;
+ }
+ case "UnaryOp":
+ return evalUnary(node.op, evalNode(node.operand, row));
+ case "BinOp":
+ return evalBinOp(node.op, node.left, node.right, row);
+ case "InOp":
+ return evalInOp(node, row);
+ case "FuncCall":
+ return evalFuncCall(node.name, node.args, row);
+ }
+}
+
+function isTruthy(v: Scalar): boolean {
+ if (v === null || v === undefined) {
+ return false;
+ }
+ if (typeof v === "boolean") {
+ return v;
+ }
+ if (typeof v === "number") {
+ return v !== 0 && !Number.isNaN(v);
+ }
+ if (typeof v === "string") {
+ return v.length > 0;
+ }
+ if (typeof v === "bigint") {
+ return v !== 0n;
+ }
+ return true;
+}
+
+function evalUnary(op: string, val: Scalar): Scalar {
+ if (op === "-") {
+ return typeof val === "number" ? -val : null;
+ }
+ if (op === "+") {
+ return typeof val === "number" ? val : null;
+ }
+ if (op === "not") {
+ return !isTruthy(val);
+ }
+ return null;
+}
+
+function evalBinOp(
+ op: string,
+ leftNode: AstNode,
+ rightNode: AstNode,
+ row: ReadonlyMap,
+): Scalar {
+ // Short-circuit logical ops
+ if (op === "or") {
+ return isTruthy(evalNode(leftNode, row)) ? true : isTruthy(evalNode(rightNode, row));
+ }
+ if (op === "and") {
+ return isTruthy(evalNode(leftNode, row)) ? isTruthy(evalNode(rightNode, row)) : false;
+ }
+ return applyBinOp(op, evalNode(leftNode, row), evalNode(rightNode, row));
+}
+
+function scalarEq(l: Scalar, r: Scalar): boolean {
+ if (l === null || l === undefined) {
+ return r === null || r === undefined;
+ }
+ if (typeof l === "number" && Number.isNaN(l)) {
+ return false;
+ }
+ if (l instanceof Date && r instanceof Date) {
+ return l.getTime() === r.getTime();
+ }
+ return l === r;
+}
+
+function numericCmp(l: Scalar, r: Scalar): number {
+ if (l == null || r == null) {
+ return Number.NaN;
+ }
+ if (l instanceof Date && r instanceof Date) {
+ return l.getTime() - r.getTime();
+ }
+ if (typeof l === "number" && typeof r === "number") {
+ return l - r;
+ }
+ if (typeof l === "string" && typeof r === "string") {
+ return l < r ? -1 : l > r ? 1 : 0;
+ }
+ return Number.NaN;
+}
+
+function numericOp(l: Scalar, r: Scalar, fn: (a: number, b: number) => number): Scalar {
+ if (l == null || r == null) {
+ return null;
+ }
+ if (typeof l === "number" && typeof r === "number") {
+ return canonicalizeZero(fn(l, r));
+ }
+ return null;
+}
+
+function applyBinOp(op: string, l: Scalar, r: Scalar): Scalar {
+ switch (op) {
+ case "==":
+ return scalarEq(l, r);
+ case "!=":
+ return !scalarEq(l, r);
+ case "<":
+ return numericCmp(l, r) < 0;
+ case "<=":
+ return numericCmp(l, r) <= 0;
+ case ">":
+ return numericCmp(l, r) > 0;
+ case ">=":
+ return numericCmp(l, r) >= 0;
+ case "+":
+ return addScalar(l, r);
+ case "-":
+ return numericOp(l, r, (a, b) => a - b);
+ case "*":
+ return numericOp(l, r, (a, b) => a * b);
+ case "/":
+ return numericOp(l, r, (a, b) => a / b);
+ case "%":
+ return numericOp(l, r, (a, b) => a % b);
+ case "**":
+ return numericOp(l, r, Math.pow);
+ default:
+ return null;
+ }
+}
+
+function addScalar(l: Scalar, r: Scalar): Scalar {
+ if (l == null || r == null) {
+ return null;
+ }
+ if (typeof l === "string" || typeof r === "string") {
+ return String(l) + String(r);
+ }
+ if (typeof l === "number" && typeof r === "number") {
+ return canonicalizeZero(l + r);
+ }
+ return null;
+}
+
+function canonicalizeZero(value: number): number {
+ return Object.is(value, -0) ? 0 : value;
+}
+
+function evalInOp(
+ node: Extract,
+ row: ReadonlyMap,
+): boolean {
+ const val = evalNode(node.value, row);
+ const found = node.list.some((item) => scalarEq(val, evalNode(item, row)));
+ return node.negated ? !found : found;
+}
+
+type BuiltinFn = (args: readonly Scalar[]) => Scalar;
+
+const BUILTIN_FUNCS: ReadonlyMap = new Map([
+ [
+ "abs",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.abs(x) : null;
+ },
+ ],
+ [
+ "round",
+ (a) => {
+ const x = a[0];
+ const d = a[1];
+ return typeof x === "number" ? Number(x.toFixed(typeof d === "number" ? d : 0)) : null;
+ },
+ ],
+ [
+ "str",
+ (a) => {
+ const x = a[0];
+ return x == null ? null : String(x);
+ },
+ ],
+ [
+ "len",
+ (a) => {
+ const x = a[0];
+ return typeof x === "string" ? x.length : null;
+ },
+ ],
+ [
+ "lower",
+ (a) => {
+ const x = a[0];
+ return typeof x === "string" ? x.toLowerCase() : null;
+ },
+ ],
+ [
+ "upper",
+ (a) => {
+ const x = a[0];
+ return typeof x === "string" ? x.toUpperCase() : null;
+ },
+ ],
+ [
+ "isnull",
+ (a) => {
+ const x = a[0];
+ return x == null || (typeof x === "number" && Number.isNaN(x));
+ },
+ ],
+ [
+ "isna",
+ (a) => {
+ const x = a[0];
+ return x == null || (typeof x === "number" && Number.isNaN(x));
+ },
+ ],
+ [
+ "notnull",
+ (a) => {
+ const x = a[0];
+ return x != null && !(typeof x === "number" && Number.isNaN(x));
+ },
+ ],
+ [
+ "notna",
+ (a) => {
+ const x = a[0];
+ return x != null && !(typeof x === "number" && Number.isNaN(x));
+ },
+ ],
+ [
+ "sqrt",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.sqrt(x) : null;
+ },
+ ],
+ [
+ "log",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.log(x) : null;
+ },
+ ],
+ [
+ "log2",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.log2(x) : null;
+ },
+ ],
+ [
+ "log10",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.log10(x) : null;
+ },
+ ],
+ [
+ "floor",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.floor(x) : null;
+ },
+ ],
+ [
+ "ceil",
+ (a) => {
+ const x = a[0];
+ return typeof x === "number" ? Math.ceil(x) : null;
+ },
+ ],
+]);
+
+function evalFuncCall(
+ name: string,
+ argNodes: readonly AstNode[],
+ row: ReadonlyMap,
+): Scalar {
+ const fn = BUILTIN_FUNCS.get(name.toLowerCase());
+ if (fn === undefined) {
+ throw new Error(`Unknown function '${name}()'`);
+ }
+ return fn(argNodes.map((a) => evalNode(a, row)));
+}
+
+// ─── Row accessor ─────────────────────────────────────────────────────────────
+
+function buildRowMap(df: DataFrame, rowIdx: number): ReadonlyMap {
+ const map = new Map();
+ for (const col of df.columns.values) {
+ map.set(col, df.col(col).iat(rowIdx));
+ }
+ return map;
+}
+
+// ─── Public functions ─────────────────────────────────────────────────────────
+
+/**
+ * Filter rows of a DataFrame using a boolean expression string.
+ *
+ * Mirrors `pandas.DataFrame.query(expr)`.
+ *
+ * Column names with spaces or special characters can be quoted with backticks:
+ * `` `column name` == "value" ``.
+ *
+ * @param df - The input DataFrame.
+ * @param expr - Boolean expression string referencing column names.
+ * @returns A new DataFrame containing only the rows where `expr` is truthy.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromArrays({ a: [1, 2, 3, 4], score: [0.1, 0.9, 0.5, 0.8] });
+ * queryDataFrame(df, "a >= 2 and score > 0.7");
+ * // a=2,score=0.9 | a=4,score=0.8
+ * ```
+ */
+export function queryDataFrame(df: DataFrame, expr: string): DataFrame {
+ const tokens = lex(expr);
+ const ast = new ExprParser(tokens).parse();
+ const nRows = df.shape[0];
+ const keep: number[] = [];
+ for (let i = 0; i < nRows; i++) {
+ if (isTruthy(evalNode(ast, buildRowMap(df, i)))) {
+ keep.push(i);
+ }
+ }
+ return df.iloc(keep);
+}
+
+/**
+ * Evaluate an expression against a DataFrame, returning a new Series.
+ *
+ * Mirrors `pandas.DataFrame.eval(expr)`.
+ *
+ * @param df - The input DataFrame.
+ * @param expr - Expression string referencing column names.
+ * @returns A `Series` with one value per row.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromArrays({ price: [10, 20, 30], qty: [2, 3, 1] });
+ * evalDataFrame(df, "price * qty");
+ * // Series [20, 60, 30]
+ * ```
+ */
+export function evalDataFrame(df: DataFrame, expr: string): Series {
+ const tokens = lex(expr);
+ const ast = new ExprParser(tokens).parse();
+ const nRows = df.shape[0];
+ const results: Scalar[] = new Array(nRows);
+ for (let i = 0; i < nRows; i++) {
+ results[i] = evalNode(ast, buildRowMap(df, i));
+ }
+ return new Series({ data: results, index: df.index });
+}
diff --git a/src/stats/filter_labels.ts b/src/stats/filter_labels.ts
new file mode 100644
index 00000000..d75eb367
--- /dev/null
+++ b/src/stats/filter_labels.ts
@@ -0,0 +1,204 @@
+/**
+ * filter_labels — filter a Series or DataFrame by row/column labels.
+ *
+ * Mirrors `pandas.DataFrame.filter(items, like, regex, axis)`.
+ *
+ * Exactly one of `items`, `like`, or `regex` must be specified.
+ *
+ * - {@link filterDataFrame} — filter DataFrame rows or columns by label
+ * - {@link filterSeries} — filter Series index labels
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, filterDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4], c_x: [5, 6] });
+ *
+ * // Keep only columns whose name is in the list
+ * filterDataFrame(df, { items: ["a", "c_x"] }).columns.values;
+ * // ["a", "c_x"]
+ *
+ * // Keep columns whose name contains "_x"
+ * filterDataFrame(df, { like: "_x" }).columns.values;
+ * // ["c_x"]
+ *
+ * // Keep columns matching regex "^[ab]$"
+ * filterDataFrame(df, { regex: "^[ab]$" }).columns.values;
+ * // ["a", "b"]
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link filterDataFrame} and {@link filterSeries}. */
+export interface FilterLabelsOptions {
+ /**
+ * Keep labels whose string representation appears in this list.
+ * Mutually exclusive with `like` and `regex`.
+ */
+ readonly items?: readonly Label[];
+
+ /**
+ * Keep labels whose string representation **contains** this substring.
+ * Mutually exclusive with `items` and `regex`.
+ */
+ readonly like?: string;
+
+ /**
+ * Keep labels whose string representation matches this regular expression.
+ * Mutually exclusive with `items` and `like`.
+ */
+ readonly regex?: string;
+
+ /**
+ * Axis to filter along (DataFrame only).
+ * - `0` or `"index"`: filter rows (default).
+ * - `1` or `"columns"`: filter columns.
+ * @default 1 (columns, matching pandas default for DataFrame.filter)
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/**
+ * Build a predicate for a label given the filter options.
+ * Exactly one of `items`, `like`, or `regex` is expected to be set.
+ */
+function buildPredicate(options: FilterLabelsOptions): (label: Label) => boolean {
+ const { items, like, regex } = options;
+ const setCount =
+ (items !== undefined ? 1 : 0) + (like !== undefined ? 1 : 0) + (regex !== undefined ? 1 : 0);
+ if (setCount === 0) {
+ throw new TypeError("filterDataFrame: exactly one of items, like, or regex must be specified");
+ }
+ if (setCount > 1) {
+ throw new TypeError("filterDataFrame: only one of items, like, or regex may be specified");
+ }
+
+ if (items !== undefined) {
+ const set = new Set(items.map(String));
+ return (label: Label): boolean => set.has(String(label));
+ }
+ if (like !== undefined) {
+ return (label: Label): boolean => String(label).includes(like);
+ }
+ if (regex !== undefined) {
+ const re = new RegExp(regex);
+ return (label: Label): boolean => re.test(String(label));
+ }
+ // unreachable — setCount === 1 guarantees one branch was taken
+ throw new TypeError("filterDataFrame: internal error");
+}
+
+// ─── filterDataFrame ──────────────────────────────────────────────────────────
+
+/**
+ * Filter rows or columns of a DataFrame by label.
+ *
+ * Pass exactly one of `items`, `like`, or `regex` in `options`.
+ * The `axis` option controls whether rows (`0`/`"index"`) or columns
+ * (`1`/`"columns"`) are filtered; defaults to `1` (columns), matching the
+ * pandas default.
+ *
+ * @param df - Source DataFrame.
+ * @param options - See {@link FilterLabelsOptions}.
+ * @returns New DataFrame with only the matching rows or columns.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, filterDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns(
+ * { a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9] },
+ * { index: [10, 20, 30] },
+ * );
+ *
+ * // Columns
+ * filterDataFrame(df, { items: ["a", "c"] }).columns.values; // ["a", "c"]
+ * filterDataFrame(df, { like: "b" }).columns.values; // ["b"]
+ * filterDataFrame(df, { regex: "[ac]" }).columns.values; // ["a", "c"]
+ *
+ * // Rows
+ * filterDataFrame(df, { items: [10, 30], axis: 0 }).index.values; // [10, 30]
+ * ```
+ */
+export function filterDataFrame(df: DataFrame, options: FilterLabelsOptions): DataFrame {
+ const axisSpec = options.axis ?? 1;
+ const filterRows = axisSpec === 0 || axisSpec === "index";
+ const predicate = buildPredicate(options);
+
+ if (filterRows) {
+ const positions: number[] = [];
+ for (let i = 0; i < df.index.size; i++) {
+ if (predicate(df.index.at(i))) {
+ positions.push(i);
+ }
+ }
+ const newIndexLabels = positions.map((i) => df.index.at(i));
+ const newIndex = new Index(newIndexLabels);
+ const colNames = df.columns.values as readonly string[];
+ const cols = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name);
+ const data = positions.map((i) => col.values[i] as Scalar);
+ cols.set(name, new Series({ data, index: newIndex, dtype: col.dtype }));
+ }
+ return new DataFrame(cols, newIndex);
+ }
+ const colNames = df.columns.values as readonly string[];
+ const kept = colNames.filter((name) => predicate(name));
+ const cols = new Map>();
+ for (const name of kept) {
+ const col = df.col(name);
+ cols.set(
+ name,
+ new Series({ data: col.values as Scalar[], index: df.index, dtype: col.dtype }),
+ );
+ }
+ return new DataFrame(cols, df.index);
+}
+
+// ─── filterSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Filter a Series by its index labels.
+ *
+ * Pass exactly one of `items`, `like`, or `regex` in `options`.
+ * (The `axis` option is ignored for Series — only the index is filtered.)
+ *
+ * @param s - Source Series.
+ * @param options - See {@link FilterLabelsOptions}.
+ * @returns New Series with only the matching index positions.
+ *
+ * @example
+ * ```ts
+ * import { Series, filterSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3], index: ["alpha", "beta", "gamma"] });
+ * filterSeries(s, { like: "a" }).index.values; // ["alpha", "gamma"]
+ * filterSeries(s, { items: ["beta"] }).values; // [2]
+ * ```
+ */
+export function filterSeries(s: Series, options: FilterLabelsOptions): Series {
+ const predicate = buildPredicate(options);
+ const positions: number[] = [];
+ for (let i = 0; i < s.size; i++) {
+ if (predicate(s.index.at(i))) {
+ positions.push(i);
+ }
+ }
+ const data = positions.map((i) => s.values[i] as Scalar);
+ const labels = positions.map((i) => s.index.at(i));
+ return new Series({
+ data,
+ index: new Index(labels),
+ dtype: s.dtype,
+ name: s.name,
+ });
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index c09ad5ca..d5426bd1 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -361,7 +361,7 @@ export type {
export { toDatetime } from "./to_datetime.ts";
export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./to_datetime.ts";
-export { toTimedelta, parseFrac, formatTimedelta, Timedelta } from "./to_timedelta.ts";
+export { toTimedelta, parseFrac, formatTimedelta } from "./to_timedelta.ts";
export type { TimedeltaUnit, TimedeltaErrors, ToTimedeltaOptions } from "./to_timedelta.ts";
export { dateRange, parseFreq, advanceDate, toDateInput } from "./date_range.ts";
export type {
@@ -381,3 +381,108 @@ export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts";
export { intervalRange } from "./interval.ts";
export type { ClosedType } from "./interval.ts";
export { nunique } from "./reduce_ops.ts";
+export { queryDataFrame, evalDataFrame } from "./eval_query.ts";
+export { strFindall, strFindallCount, strFindFirst, strFindallExpand } from "./str_findall.ts";
+export {
+ cutBinsToFrame,
+ cutBinCounts,
+ binEdges,
+} from "./cut_bins_to_frame.ts";
+export type { CutBinsToFrameOptions } from "./cut_bins_to_frame.ts";
+export { xsDataFrame, xsSeries } from "./xs.ts";
+export type { XsDataFrameOptions, XsSeriesOptions } from "./xs.ts";
+export {
+ swapLevelSeries,
+ swapLevelDataFrame,
+ reorderLevelsSeries,
+ reorderLevelsDataFrame,
+} from "./swaplevel.ts";
+export type { SwapLevelDataFrameOptions, ReorderLevelsDataFrameOptions } from "./swaplevel.ts";
+export { truncateSeries, truncateDataFrame } from "./truncate.ts";
+export type { TruncateOptions } from "./truncate.ts";
+export { seriesBetween } from "./between.ts";
+export type { BetweenInclusive, BetweenOptions } from "./between.ts";
+export { seriesUpdate, dataFrameUpdate } from "./update.ts";
+export type { UpdateOptions } from "./update.ts";
+export { filterDataFrame, filterSeries } from "./filter_labels.ts";
+export type { FilterLabelsOptions } from "./filter_labels.ts";
+
+export { combineSeries, combineDataFrame } from "./combine.ts";
+export type { CombineDataFrameOptions } from "./combine.ts";
+export { keepTrue, keepFalse, filterBy } from "./notna_boolean.ts";
+export {
+ squeezeSeries,
+ squeezeDataFrame,
+ itemSeries,
+ boolSeries,
+ boolDataFrame,
+ firstValidIndex,
+ lastValidIndex,
+ dataFrameFirstValidIndex,
+ dataFrameLastValidIndex,
+} from "./scalar_extract.ts";
+export type { SqueezeResult } from "./scalar_extract.ts";
+export { autoCorr, corrWith } from "./corrwith.ts";
+export type { CorrWithOptions } from "./corrwith.ts";
+export {
+ renameSeriesIndex,
+ renameDataFrame,
+ addPrefixDataFrame,
+ addSuffixDataFrame,
+ addPrefixSeries,
+ addSuffixSeries,
+ setAxisSeries,
+ setAxisDataFrame,
+ seriesToFrame,
+} from "./rename_ops.ts";
+export type { LabelMapper, RenameDataFrameOptions } from "./rename_ops.ts";
+export { absSeries, absDataFrame, roundSeries, roundDataFrame } from "./math_ops.ts";
+export type { RoundDataFrameSpec } from "./math_ops.ts";
+export {
+ seriesDotSeries,
+ seriesDotDataFrame,
+ dataFrameDotSeries,
+ dataFrameDotDataFrame,
+} from "./dot_matmul.ts";
+export { seriesTransform, dataFrameTransform } from "./transform_agg.ts";
+export type {
+ TransformFunc,
+ TransformFuncName,
+ DataFrameTransformOptions,
+} from "./transform_agg.ts";
+export { seriesAt, seriesIat, dataFrameAt, dataFrameIat } from "./at_iat.ts";
+export {
+ sortValuesSeries,
+ sortIndexSeries,
+ sortValuesDataFrame,
+ sortIndexDataFrame,
+} from "./sort_ops.ts";
+export type {
+ SortValuesSeriesOptions,
+ SortIndexSeriesOptions,
+ SortValuesDataFrameOptions,
+ SortIndexDataFrameOptions,
+} from "./sort_ops.ts";
+export {
+ inferObjectsSeries,
+ inferObjectsDataFrame,
+ convertDtypesSeries,
+ convertDtypesDataFrame,
+} from "./infer_objects.ts";
+export type {
+ InferObjectsOptions,
+ ConvertDtypesOptions,
+} from "./infer_objects.ts";
+export {
+ resampleSeries,
+ resampleDataFrame,
+ SeriesResampler,
+ DataFrameResampler,
+} from "./resample.ts";
+export type {
+ ResampleFreq,
+ ResampleLabel,
+ ResampleAggName,
+ ResampleAggFn,
+ ResampleOptions,
+} from "./resample.ts";
diff --git a/src/stats/infer_objects.ts b/src/stats/infer_objects.ts
new file mode 100644
index 00000000..28e7c53e
--- /dev/null
+++ b/src/stats/infer_objects.ts
@@ -0,0 +1,396 @@
+/**
+ * infer_objects — infer better dtypes for object-typed Series/DataFrame columns.
+ *
+ * Mirrors `pandas.Series.infer_objects` and `pandas.DataFrame.infer_objects`,
+ * plus the related `pandas.api.types.convert_dtypes`.
+ *
+ * - {@link inferObjectsSeries}: attempt to infer a better dtype for a Series
+ * - {@link inferObjectsDataFrame}: apply `inferObjectsSeries` to every column
+ * - {@link convertDtypesSeries}: convert a Series to the best possible dtype
+ * - {@link convertDtypesDataFrame}: apply `convertDtypesSeries` to every column
+ *
+ * @example
+ * ```ts
+ * import { Series, DataFrame, inferObjectsSeries, convertDtypesSeries } from "tsb";
+ *
+ * // Object-typed Series holding numeric strings → float
+ * const s = new Series({ data: [1, 2, 3], dtype: Dtype.object });
+ * inferObjectsSeries(s).dtype.kind; // "int"
+ *
+ * // All-null object series → remains object
+ * const nulls = new Series({ data: [null, null] });
+ * inferObjectsSeries(nulls).dtype.name; // "object"
+ *
+ * // String numerics → convert to float
+ * convertDtypesSeries(new Series({ data: ["1", "2.5", "3"] }));
+ * // Series([1, 2.5, 3], dtype=float64)
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, Dtype, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when the value is null/undefined/NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when the value is a whole-number finite number or bigint. */
+function isInteger(v: Scalar): boolean {
+ if (typeof v === "bigint") {
+ return true;
+ }
+ if (typeof v === "number") {
+ return Number.isFinite(v) && Math.floor(v) === v;
+ }
+ return false;
+}
+
+/** True when the value is a finite float (not whole). */
+function isFloat(v: Scalar): boolean {
+ return typeof v === "number" && Number.isFinite(v) && Math.floor(v) !== v;
+}
+
+/**
+ * Determine the best `Dtype` for an array of values.
+ *
+ * Returns `null` if the array is empty or all-null (no inference possible).
+ */
+function inferBestDtype(values: readonly Scalar[]): Dtype | null {
+ let hasInt = false;
+ let hasFloat = false;
+ let hasBool = false;
+ let hasString = false;
+ let hasOther = false;
+ let nonNullCount = 0;
+
+ for (const v of values) {
+ if (isMissing(v)) {
+ continue;
+ }
+ nonNullCount++;
+ if (typeof v === "boolean") {
+ hasBool = true;
+ } else if (typeof v === "string") {
+ hasString = true;
+ } else if (isFloat(v)) {
+ hasFloat = true;
+ } else if (isInteger(v)) {
+ hasInt = true;
+ } else {
+ hasOther = true;
+ }
+ }
+
+ if (nonNullCount === 0) {
+ return null;
+ }
+ if (hasOther) {
+ return null; // objects, dates, etc. — can't safely infer
+ }
+
+ const typeCount = (hasBool ? 1 : 0) + (hasString ? 1 : 0) + (hasInt ? 1 : 0) + (hasFloat ? 1 : 0);
+ if (typeCount > 1) {
+ return null; // mixed types
+ }
+
+ if (hasBool) {
+ return Dtype.from("bool");
+ }
+ if (hasInt) {
+ return Dtype.from("int64");
+ }
+ if (hasFloat) {
+ return Dtype.from("float64");
+ }
+ if (hasString) {
+ return Dtype.from("string");
+ }
+
+ return null;
+}
+
+/**
+ * Try to convert a string value to a number.
+ * Returns the number if successful, null otherwise.
+ */
+function tryParseNumber(v: string): number | null {
+ const trimmed = v.trim();
+ if (trimmed === "" || trimmed === "nan" || trimmed === "NaN") {
+ return Number.NaN;
+ }
+ if (trimmed === "inf" || trimmed === "Infinity") {
+ return Number.POSITIVE_INFINITY;
+ }
+ if (trimmed === "-inf" || trimmed === "-Infinity") {
+ return Number.NEGATIVE_INFINITY;
+ }
+ const n = Number(trimmed);
+ if (Number.isNaN(n)) {
+ return null;
+ }
+ return n;
+}
+
+// ─── infer_objects ────────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link inferObjectsSeries} and {@link inferObjectsDataFrame}.
+ */
+export interface InferObjectsOptions {
+ /**
+ * Only convert `object`-dtype columns/Series.
+ * When `false`, attempt inference on all columns regardless of dtype.
+ * Default: `true` (mirrors pandas default).
+ */
+ readonly objectOnly?: boolean;
+}
+
+/**
+ * Attempt to infer a better dtype for an object-typed Series.
+ *
+ * Mirrors `pandas.Series.infer_objects`. For non-object Series, returns the
+ * original unchanged (unless `options.objectOnly` is `false`).
+ *
+ * @param s - The Series to process.
+ * @param options - Optional settings.
+ * @returns A new Series with an inferred dtype, or the original if no better
+ * type can be determined.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3], dtype: Dtype.object });
+ * inferObjectsSeries(s).dtype.kind; // "int"
+ * ```
+ */
+export function inferObjectsSeries(
+ s: Series,
+ options?: InferObjectsOptions,
+): Series {
+ const objectOnly = options?.objectOnly ?? true;
+
+ if (objectOnly && s.dtype.kind !== "object") {
+ return s;
+ }
+
+ const inferred = inferBestDtype(s.values);
+ if (inferred === null || inferred === s.dtype) {
+ return s;
+ }
+
+ return new Series({
+ data: s.values,
+ index: s.index,
+ dtype: inferred,
+ name: s.name,
+ });
+}
+
+/**
+ * Attempt to infer better dtypes for all columns in a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.infer_objects`. Each column is processed
+ * independently via {@link inferObjectsSeries}.
+ *
+ * @param df - The DataFrame to process.
+ * @param options - Optional settings.
+ * @returns A new DataFrame with inferred dtypes for each column.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * inferObjectsDataFrame(df); // same data, refined dtypes
+ * ```
+ */
+export function inferObjectsDataFrame(df: DataFrame, options?: InferObjectsOptions): DataFrame {
+ const colData: Record = {};
+ for (const col of df.columns.values) {
+ const inferred = inferObjectsSeries(df.col(col), options);
+ colData[col] = inferred.values;
+ }
+ return DataFrame.fromColumns(colData, { index: df.index });
+}
+
+// ─── convert_dtypes ───────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link convertDtypesSeries} and {@link convertDtypesDataFrame}.
+ */
+export interface ConvertDtypesOptions {
+ /**
+ * When `true`, attempt to parse string values as numbers.
+ * Default: `true`.
+ */
+ readonly convertString?: boolean;
+ /**
+ * When `true`, convert integer columns to float when nulls are present
+ * (since null cannot be represented in integer arrays — mirrors pandas NA
+ * handling for nullable integers).
+ * Default: `false` (keep as int; null stays as null).
+ */
+ readonly convertIntegerToFloat?: boolean;
+}
+
+/**
+ * Convert a Series to the best possible dtype.
+ *
+ * Mirrors `pandas.Series.convert_dtypes`:
+ * - `object` → tries bool, int, float, string
+ * - `string` → tries to parse as number (if `convertString`)
+ * - `int` or `float` → returns unchanged (already best numeric type)
+ * - `bool` → returns unchanged
+ *
+ * Unlike pandas, this does not require nullable-int or StringDtype extensions.
+ * All conversions stay within the existing tsb type system.
+ *
+ * @param s - The Series to convert.
+ * @param options - Conversion options.
+ * @returns A new Series with the best inferred dtype.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["1", "2.5", "3"] });
+ * convertDtypesSeries(s).dtype.kind; // "float"
+ * convertDtypesSeries(s).values; // [1, 2.5, 3]
+ * ```
+ */
+export function convertDtypesSeries(
+ s: Series,
+ options?: ConvertDtypesOptions,
+): Series {
+ const convertString = options?.convertString ?? true;
+ const convertIntToFloat = options?.convertIntegerToFloat ?? false;
+
+ const kind = s.dtype.kind;
+
+ // Numeric / bool: check if we need to convert ints to float for null values.
+ if (kind === "int" || kind === "uint") {
+ if (convertIntToFloat) {
+ const hasNull = s.values.some(isMissing);
+ if (hasNull) {
+ return new Series({
+ data: s.values.map((v) => (isMissing(v) ? null : (v as unknown as number))),
+ index: s.index,
+ dtype: Dtype.from("float64"),
+ name: s.name,
+ });
+ }
+ }
+ return s;
+ }
+
+ if (kind === "float" || kind === "bool") {
+ return s;
+ }
+
+ // String dtype: try numeric parse.
+ if (kind === "string") {
+ if (!convertString) {
+ return s;
+ }
+ return tryConvertStringToNumeric(s);
+ }
+
+ // Object dtype: try full inference, including string → numeric.
+ if (kind === "object") {
+ // First try direct type inference (handles int/float/bool already).
+ const inferred = inferObjectsSeries(s, { objectOnly: false });
+ if (inferred.dtype !== s.dtype) {
+ if (convertString && inferred.dtype.kind === "string") {
+ return tryConvertStringToNumeric(inferred);
+ }
+ return inferred;
+ }
+
+ // If the values are all strings (or null), try string → numeric.
+ if (convertString) {
+ const allStringOrNull = s.values.every((v) => isMissing(v) || typeof v === "string");
+ if (allStringOrNull) {
+ const asSeries = new Series({
+ data: s.values,
+ index: s.index,
+ dtype: Dtype.from("string"),
+ name: s.name,
+ });
+ return tryConvertStringToNumeric(asSeries);
+ }
+ }
+
+ return inferred;
+ }
+
+ // datetime, timedelta, category: return unchanged.
+ return s;
+}
+
+/** Internal: try converting a string-typed Series to float or int. */
+function tryConvertStringToNumeric(s: Series): Series {
+ const values = s.values;
+ const converted: Scalar[] = new Array(values.length);
+ let allInt = true;
+ let allNumeric = true;
+
+ for (let i = 0; i < values.length; i++) {
+ const v = values[i];
+ if (isMissing(v)) {
+ converted[i] = null;
+ continue;
+ }
+ if (typeof v !== "string") {
+ allNumeric = false;
+ break;
+ }
+ const n = tryParseNumber(v);
+ if (n === null) {
+ allNumeric = false;
+ break;
+ }
+ converted[i] = n;
+ if (!(Number.isNaN(n) || Number.isFinite(n))) {
+ // Infinity — treat as float
+ allInt = false;
+ } else if (Number.isFinite(n) && Math.floor(n) !== n) {
+ allInt = false;
+ }
+ }
+
+ if (!allNumeric) {
+ return s;
+ }
+
+ const dtype = allInt ? Dtype.from("int64") : Dtype.from("float64");
+ return new Series({
+ data: converted,
+ index: s.index,
+ dtype,
+ name: s.name,
+ });
+}
+
+/**
+ * Convert all columns in a DataFrame to their best possible dtypes.
+ *
+ * Mirrors `pandas.DataFrame.convert_dtypes`.
+ *
+ * @param df - The DataFrame to convert.
+ * @param options - Conversion options.
+ * @returns A new DataFrame with each column converted.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: ["1", "2"], b: [true, false] });
+ * convertDtypesDataFrame(df).col("a").dtype.kind; // "int"
+ * ```
+ */
+export function convertDtypesDataFrame(df: DataFrame, options?: ConvertDtypesOptions): DataFrame {
+ const colData: Record = {};
+ for (const col of df.columns.values) {
+ const converted = convertDtypesSeries(df.col(col), options);
+ colData[col] = converted.values;
+ }
+ return DataFrame.fromColumns(colData, { index: df.index });
+}
diff --git a/src/stats/math_ops.ts b/src/stats/math_ops.ts
new file mode 100644
index 00000000..34dcc013
--- /dev/null
+++ b/src/stats/math_ops.ts
@@ -0,0 +1,167 @@
+/**
+ * math_ops — element-wise mathematical transformations for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.abs()` — absolute value of each element
+ * - `DataFrame.abs()` — element-wise absolute value
+ * - `Series.round(decimals?)` — round each element to N decimal places
+ * - `DataFrame.round(decimals?)` — round each column to N decimal places
+ * (or per-column decimals via a Record)
+ *
+ * All functions are **pure** — inputs are never mutated.
+ * `null` / `undefined` / `NaN` values propagate unchanged.
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Round a number to `decimals` decimal places using the "round half away from
+ * zero" strategy (matches Python's `round()` for positive decimals).
+ */
+function roundNum(v: number, decimals: number): number {
+ if (decimals === 0) {
+ return Math.round(v);
+ }
+ const factor = 10 ** decimals;
+ return Math.round(v * factor) / factor;
+}
+
+// ─── absSeries ────────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with the absolute value of each numeric element.
+ *
+ * Mirrors `pandas.Series.abs()`.
+ *
+ * Non-numeric and missing values (`null`, `undefined`, `NaN`) are preserved
+ * as-is.
+ *
+ * @example
+ * ```ts
+ * import { Series, absSeries } from "tsb";
+ *
+ * absSeries(new Series({ data: [-1, 2, -3, null] })).values;
+ * // [1, 2, 3, null]
+ * ```
+ */
+export function absSeries(s: Series): Series {
+ const data: Scalar[] = s.values.map((v) => {
+ if (isMissing(v)) {
+ return v;
+ }
+ if (typeof v === "number") {
+ return Math.abs(v);
+ }
+ return v; // non-numeric (string, boolean) — pass through unchanged
+ });
+ return new Series({ data, index: s.index, name: s.name });
+}
+
+// ─── absDataFrame ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame where every numeric cell has been replaced by its
+ * absolute value.
+ *
+ * Mirrors `pandas.DataFrame.abs()`.
+ *
+ * Non-numeric and missing values are preserved as-is.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, absDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [-1, 2], b: [3, -4] });
+ * absDataFrame(df).col("a").values; // [1, 2]
+ * absDataFrame(df).col("b").values; // [3, 4]
+ * ```
+ */
+export function absDataFrame(df: DataFrame): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const newColMap = new Map>();
+ for (const name of colNames) {
+ newColMap.set(name, absSeries(df.col(name)));
+ }
+ return new DataFrame(newColMap, df.index, [...colNames]);
+}
+
+// ─── roundSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with each numeric element rounded to `decimals` decimal
+ * places.
+ *
+ * Mirrors `pandas.Series.round(decimals=0)`.
+ *
+ * Missing values (`null`, `undefined`, `NaN`) are preserved as-is.
+ *
+ * @param decimals - Number of decimal places (default `0`). Negative values
+ * round to tens, hundreds, etc. (e.g. `-1` rounds to the nearest 10).
+ *
+ * @example
+ * ```ts
+ * import { Series, roundSeries } from "tsb";
+ *
+ * roundSeries(new Series({ data: [1.234, 5.678] }), 2).values;
+ * // [1.23, 5.68]
+ * ```
+ */
+export function roundSeries(s: Series, decimals = 0): Series {
+ const data: Scalar[] = s.values.map((v) => {
+ if (isMissing(v)) {
+ return v;
+ }
+ if (typeof v === "number") {
+ return roundNum(v, decimals);
+ }
+ return v;
+ });
+ return new Series({ data, index: s.index, name: s.name });
+}
+
+// ─── roundDataFrame ───────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link roundDataFrame}.
+ *
+ * Either a single `decimals` number (applied to all columns) or a per-column
+ * `Record` (unspecified columns default to `0`).
+ */
+export type RoundDataFrameSpec = number | Readonly>;
+
+/**
+ * Return a new DataFrame with each numeric cell rounded to the specified
+ * number of decimal places.
+ *
+ * Mirrors `pandas.DataFrame.round(decimals)`:
+ * - Pass a single number to apply the same precision to all columns.
+ * - Pass a `Record` to use per-column precision.
+ * Columns not listed default to `0`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, roundDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1.111, 2.222], b: [3.333, 4.444] });
+ * roundDataFrame(df, 2).col("a").values; // [1.11, 2.22]
+ * roundDataFrame(df, { a: 1, b: 2 }).col("b").values; // [3.33, 4.44]
+ * ```
+ */
+export function roundDataFrame(df: DataFrame, decimals: RoundDataFrameSpec = 0): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const newColMap = new Map>();
+ for (const name of colNames) {
+ const d = typeof decimals === "number" ? decimals : (decimals[name] ?? 0);
+ newColMap.set(name, roundSeries(df.col(name), d));
+ }
+ return new DataFrame(newColMap, df.index, [...colNames]);
+}
diff --git a/src/stats/notna_boolean.ts b/src/stats/notna_boolean.ts
new file mode 100644
index 00000000..59053d06
--- /dev/null
+++ b/src/stats/notna_boolean.ts
@@ -0,0 +1,155 @@
+/**
+ * notna_boolean — boolean-mask indexing helpers for Series and DataFrames.
+ *
+ * Complements the existing `notna`/`isna` predicates by exposing
+ * easy selection helpers inspired by pandas boolean-indexing idioms:
+ *
+ * - {@link keepTrue} — keep elements where a boolean mask is `true`
+ * - {@link keepFalse} — keep elements where a boolean mask is `false`
+ * - {@link filterBy} — filter DataFrame rows by a boolean array / Series mask
+ *
+ * ### Usage
+ *
+ * ```ts
+ * import { Series, DataFrame, keepTrue, keepFalse, filterBy } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4], index: [0, 1, 2, 3] });
+ * const mask = new Series({ data: [true, false, true, false], index: [0, 1, 2, 3] });
+ *
+ * keepTrue(s, mask).values; // [1, 3]
+ * keepFalse(s, mask).values; // [2, 4]
+ *
+ * const df = DataFrame.fromColumns({ a: [10, 20, 30], b: [1, 2, 3] });
+ * const dfMask = [true, false, true];
+ * filterBy(df, dfMask).col("a").values; // [10, 30]
+ * ```
+ *
+ * @module
+ */
+
+import { type DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a mask value to a boolean. Truthy = `true`, falsy = `false`. */
+function toBoolean(v: Scalar | boolean): boolean {
+ return v !== null && v !== undefined && v !== false && v !== 0 && v !== "";
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Keep Series elements where the corresponding mask value is truthy.
+ *
+ * The mask can be:
+ * - A `Series` aligned by position (same length as `series`).
+ * - A plain `boolean[]` (same length as `series`).
+ *
+ * Missing mask values (`null`, `undefined`, `NaN`) are treated as `false`.
+ *
+ * @param series - Source Series.
+ * @param mask - Boolean Series or `boolean[]`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+ * keepTrue(s, [true, false, true]).values; // [10, 30]
+ * ```
+ */
+export function keepTrue(
+ series: Series,
+ mask: Series | readonly boolean[],
+): Series {
+ const values = series.values as readonly Scalar[];
+ const maskVals: readonly (Scalar | boolean)[] =
+ mask instanceof Series ? (mask.values as readonly Scalar[]) : mask;
+
+ const resultData: Scalar[] = [];
+ const resultIndex: Label[] = [];
+
+ for (let i = 0; i < values.length; i++) {
+ const mv = maskVals[i] ?? null;
+ if (toBoolean(mv)) {
+ resultData.push(values[i] ?? null);
+ resultIndex.push(series.index.at(i) ?? (i as Label));
+ }
+ }
+
+ return new Series({
+ data: resultData,
+ index: resultIndex,
+ name: series.name,
+ });
+}
+
+/**
+ * Keep Series elements where the corresponding mask value is falsy.
+ *
+ * This is the complement of {@link keepTrue}.
+ *
+ * @param series - Source Series.
+ * @param mask - Boolean Series or `boolean[]`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+ * keepFalse(s, [true, false, true]).values; // [20]
+ * ```
+ */
+export function keepFalse(
+ series: Series,
+ mask: Series | readonly boolean[],
+): Series {
+ const values = series.values as readonly Scalar[];
+ const maskVals: readonly (Scalar | boolean)[] =
+ mask instanceof Series ? (mask.values as readonly Scalar[]) : mask;
+
+ const resultData: Scalar[] = [];
+ const resultIndex: Label[] = [];
+
+ for (let i = 0; i < values.length; i++) {
+ const mv = maskVals[i] ?? null;
+ if (!toBoolean(mv)) {
+ resultData.push(values[i] ?? null);
+ resultIndex.push(series.index.at(i) ?? (i as Label));
+ }
+ }
+
+ return new Series({
+ data: resultData,
+ index: resultIndex,
+ name: series.name,
+ });
+}
+
+/**
+ * Filter DataFrame rows by a boolean mask, keeping rows where the mask is truthy.
+ *
+ * The mask can be:
+ * - A `Series` aligned by position (same length as the DataFrame).
+ * - A plain `boolean[]` (same length as the DataFrame).
+ *
+ * @param df - Source DataFrame.
+ * @param mask - Boolean mask.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * filterBy(df, [true, false, true]).col("a").values; // [1, 3]
+ * ```
+ */
+export function filterBy(df: DataFrame, mask: Series | readonly boolean[]): DataFrame {
+ const maskVals: readonly (Scalar | boolean)[] =
+ mask instanceof Series ? (mask.values as readonly Scalar[]) : mask;
+
+ const keepPositions: number[] = [];
+ for (let i = 0; i < df.index.size; i++) {
+ const mv = maskVals[i] ?? null;
+ if (toBoolean(mv)) {
+ keepPositions.push(i);
+ }
+ }
+
+ return df.iloc(keepPositions);
+}
diff --git a/src/stats/rename_ops.ts b/src/stats/rename_ops.ts
new file mode 100644
index 00000000..4eb4c515
--- /dev/null
+++ b/src/stats/rename_ops.ts
@@ -0,0 +1,332 @@
+/**
+ * rename_ops — rename labels, add prefix/suffix, set axis, and convert Series
+ * to DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.rename(index)` — rename index labels via mapping or function
+ * - `DataFrame.rename(columns?, index?)` — rename columns and/or index labels
+ * - `DataFrame.add_prefix(prefix)` — prefix all column labels
+ * - `DataFrame.add_suffix(suffix)` — suffix all column labels
+ * - `Series.add_prefix(prefix)` — prefix index labels
+ * - `Series.add_suffix(suffix)` — suffix index labels
+ * - `Series.set_axis(labels)` — replace the index of a Series
+ * - `DataFrame.set_axis(labels, axis)` — replace the column or row axis
+ * - `Series.to_frame(name?)` — convert a Series to a single-column DataFrame
+ *
+ * All functions are **pure** — inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** A mapper: either a `Record` mapping or a `(label:Label)=>Label` function. */
+export type LabelMapper = Readonly> | ((label: Label) => Label);
+
+/** Options for {@link renameDataFrame}. */
+export interface RenameDataFrameOptions {
+ /**
+ * Rename column labels.
+ * Pass a `Record` or a `(name: Label) => Label` function.
+ */
+ readonly columns?: LabelMapper;
+ /**
+ * Rename row-index labels.
+ * Pass a `Record` or a `(label: Label) => Label` function.
+ */
+ readonly index?: LabelMapper;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Apply a LabelMapper to a single label. */
+function applyMapper(mapper: LabelMapper, label: Label): Label {
+ if (typeof mapper === "function") {
+ return mapper(label);
+ }
+ // TypeScript narrows mapper to Record here
+ const key = String(label);
+ const mapped = mapper[key];
+ return mapped !== undefined ? mapped : label;
+}
+
+/** Apply a LabelMapper to every element of an array of labels. */
+function mapLabels(mapper: LabelMapper, labels: readonly Label[]): Label[] {
+ return labels.map((l) => applyMapper(mapper, l));
+}
+
+// ─── renameSeriesIndex ────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with renamed index labels.
+ *
+ * Mirrors `pandas.Series.rename(index=...)`.
+ *
+ * The `mapper` argument may be:
+ * - A `Record` — each matching label is replaced; others
+ * are kept as-is.
+ * - A `(label: Label) => Label` function — called for every index label.
+ *
+ * @example
+ * ```ts
+ * import { Series, renameSeriesIndex } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+ * renameSeriesIndex(s, { a: "x", c: "z" }).index.values;
+ * // ["x", "b", "z"]
+ * ```
+ */
+export function renameSeriesIndex(s: Series, mapper: LabelMapper): Series {
+ const newLabels = mapLabels(mapper, s.index.values as readonly Label[]);
+ return new Series({
+ data: s.values,
+ index: new Index(newLabels),
+ name: s.name,
+ dtype: s.dtype,
+ });
+}
+
+// ─── renameDataFrame ──────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame with renamed column and/or row-index labels.
+ *
+ * Mirrors `pandas.DataFrame.rename(columns=..., index=...)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, renameDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * renameDataFrame(df, { columns: { a: "x", b: "y" } }).columns.values;
+ * // ["x", "y"]
+ * ```
+ */
+export function renameDataFrame(df: DataFrame, options: RenameDataFrameOptions): DataFrame {
+ const colMapper = options.columns;
+ const idxMapper = options.index;
+
+ // Build new column map
+ const colNames = df.columns.values as readonly string[];
+ const newColNames: string[] = colMapper
+ ? mapLabels(colMapper, colNames as readonly Label[]).map(String)
+ : [...colNames];
+
+ // Build new row index
+ const rowLabels = df.index.values as readonly Label[];
+ const newRowLabels: Label[] = idxMapper ? mapLabels(idxMapper, rowLabels) : [...rowLabels];
+ const newRowIndex = new Index(newRowLabels);
+
+ // Rebuild column map with new names but same data (reindexed rows)
+ const newColMap = new Map>();
+ for (let i = 0; i < colNames.length; i++) {
+ const oldName = colNames[i];
+ const newName = newColNames[i];
+ if (oldName === undefined || newName === undefined) {
+ continue;
+ }
+ const col = df.col(oldName);
+ const newCol = new Series({
+ data: col.values,
+ index: newRowIndex,
+ });
+ newColMap.set(newName, newCol);
+ }
+
+ return new DataFrame(newColMap, newRowIndex, newColNames);
+}
+
+// ─── addPrefix / addSuffix ────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame with `prefix` prepended to every column label.
+ *
+ * Mirrors `pandas.DataFrame.add_prefix(prefix)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, addPrefixDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ * addPrefixDataFrame(df, "col_").columns.values;
+ * // ["col_a", "col_b"]
+ * ```
+ */
+export function addPrefixDataFrame(df: DataFrame, prefix: string): DataFrame {
+ return renameDataFrame(df, { columns: (label) => `${prefix}${String(label)}` });
+}
+
+/**
+ * Return a new DataFrame with `suffix` appended to every column label.
+ *
+ * Mirrors `pandas.DataFrame.add_suffix(suffix)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, addSuffixDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ * addSuffixDataFrame(df, "_v1").columns.values;
+ * // ["a_v1", "b_v1"]
+ * ```
+ */
+export function addSuffixDataFrame(df: DataFrame, suffix: string): DataFrame {
+ return renameDataFrame(df, { columns: (label) => `${String(label)}${suffix}` });
+}
+
+/**
+ * Return a new Series with `prefix` prepended to every index label.
+ *
+ * Mirrors `pandas.Series.add_prefix(prefix)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, addPrefixSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ * addPrefixSeries(s, "x_").index.values;
+ * // ["x_a", "x_b"]
+ * ```
+ */
+export function addPrefixSeries(s: Series, prefix: string): Series {
+ return renameSeriesIndex(s, (label) => `${prefix}${String(label)}`);
+}
+
+/**
+ * Return a new Series with `suffix` appended to every index label.
+ *
+ * Mirrors `pandas.Series.add_suffix(suffix)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, addSuffixSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ * addSuffixSeries(s, "_end").index.values;
+ * // ["a_end", "b_end"]
+ * ```
+ */
+export function addSuffixSeries(s: Series, suffix: string): Series {
+ return renameSeriesIndex(s, (label) => `${String(label)}${suffix}`);
+}
+
+// ─── setAxisSeries ────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with the given labels as its index.
+ *
+ * Mirrors `pandas.Series.set_axis(labels)`.
+ *
+ * @throws {RangeError} if `labels` length does not match the Series size.
+ *
+ * @example
+ * ```ts
+ * import { Series, setAxisSeries } from "tsb";
+ *
+ * const s = new Series({ data: [10, 20, 30] });
+ * setAxisSeries(s, ["x", "y", "z"]).index.values;
+ * // ["x", "y", "z"]
+ * ```
+ */
+export function setAxisSeries(s: Series, labels: readonly Label[]): Series {
+ if (labels.length !== s.size) {
+ throw new RangeError(
+ `set_axis: labels length ${labels.length} does not match Series size ${s.size}`,
+ );
+ }
+ return new Series({
+ data: s.values,
+ index: new Index(labels),
+ name: s.name,
+ dtype: s.dtype,
+ });
+}
+
+/**
+ * Return a new DataFrame with the given labels replacing the specified axis.
+ *
+ * Mirrors `pandas.DataFrame.set_axis(labels, axis=0|1)`:
+ * - `axis = 0` / `"index"` (default) — replace row index labels.
+ * - `axis = 1` / `"columns"` — replace column labels.
+ *
+ * @throws {RangeError} if `labels` length does not match the relevant axis size.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, setAxisDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * setAxisDataFrame(df, ["r0", "r1"], 0).index.values; // ["r0", "r1"]
+ * setAxisDataFrame(df, ["x", "y"], 1).columns.values; // ["x", "y"]
+ * ```
+ */
+export function setAxisDataFrame(
+ df: DataFrame,
+ labels: readonly Label[],
+ axis: 0 | 1 | "index" | "columns" = 0,
+): DataFrame {
+ const isColumns = axis === 1 || axis === "columns";
+
+ if (isColumns) {
+ const colNames = df.columns.values as readonly string[];
+ if (labels.length !== colNames.length) {
+ throw new RangeError(
+ `set_axis: labels length ${labels.length} does not match columns count ${colNames.length}`,
+ );
+ }
+ return renameDataFrame(df, {
+ columns: (label) => {
+ const idx = colNames.indexOf(String(label));
+ if (idx < 0 || idx >= labels.length) {
+ return label;
+ }
+ const newLabel = labels[idx];
+ return newLabel !== undefined ? newLabel : label;
+ },
+ });
+ }
+
+ // axis = 0: replace row index
+ if (labels.length !== df.index.size) {
+ throw new RangeError(
+ `set_axis: labels length ${labels.length} does not match row count ${df.index.size}`,
+ );
+ }
+ const newRowIndex = new Index(labels);
+ const colNames = df.columns.values as readonly string[];
+ const newColMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name);
+ newColMap.set(name, new Series({ data: col.values, index: newRowIndex }));
+ }
+ return new DataFrame(newColMap, newRowIndex, colNames);
+}
+
+// ─── seriesToFrame ────────────────────────────────────────────────────────────
+
+/**
+ * Convert a Series to a single-column DataFrame.
+ *
+ * Mirrors `pandas.Series.to_frame(name?)`:
+ * - The resulting DataFrame has one column whose name is `name` (if given)
+ * or the Series name, falling back to `0`.
+ * - The row index is the same as the Series index.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesToFrame } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3], name: "score" });
+ * seriesToFrame(s).columns.values; // ["score"]
+ * seriesToFrame(s, "points").columns.values; // ["points"]
+ * ```
+ */
+export function seriesToFrame(s: Series, name?: string | null): DataFrame {
+ const colName = name !== undefined && name !== null ? name : (s.name ?? "0");
+ const colMap = new Map>();
+ colMap.set(colName, new Series({ data: s.values as readonly Scalar[], index: s.index }));
+ return new DataFrame(colMap, s.index, [colName]);
+}
diff --git a/src/stats/resample.ts b/src/stats/resample.ts
new file mode 100644
index 00000000..e493c493
--- /dev/null
+++ b/src/stats/resample.ts
@@ -0,0 +1,737 @@
+/**
+ * resample — time-based resampling for Series and DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.resample` / `pandas.Series.resample`.
+ *
+ * Supported frequency strings:
+ * | String | Interval |
+ * |--------|----------|
+ * | `"S"` | Second |
+ * | `"T"` / `"min"` | Minute |
+ * | `"H"` | Hour |
+ * | `"D"` | Calendar day (UTC) |
+ * | `"W"` / `"W-SUN"` | Week ending Sunday (closed right, labeled right) |
+ * | `"W-MON"` … `"W-SAT"` | Week ending on the specified weekday |
+ * | `"MS"` | Month start — 1st of each month (closed left, labeled left) |
+ * | `"ME"` | Month end — last day of each month (labeled right) |
+ * | `"QS"` | Quarter start — Jan/Apr/Jul/Oct 1 (labeled left) |
+ * | `"QE"` | Quarter end — Mar 31 / Jun 30 / Sep 30 / Dec 31 (labeled right) |
+ * | `"AS"` / `"YS"` | Year start — Jan 1 (labeled left) |
+ * | `"AE"` / `"YE"` | Year end — Dec 31 (labeled right) |
+ *
+ * @example
+ * ```ts
+ * const dates = [new Date("2024-01-01"), new Date("2024-01-02"), new Date("2024-02-01")];
+ * const s = new Series({ data: [1, 2, 3], index: dates });
+ * resampleSeries(s, "MS").sum().toArray(); // [3, 3]
+ * ```
+ *
+ * @module
+ */
+
+import { Index } from "../core/base-index.ts";
+import { DataFrame } from "../core/frame.ts";
+import { Series } from "../core/series.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Recognised frequency abbreviations for {@link resampleSeries} and
+ * {@link resampleDataFrame}.
+ */
+export type ResampleFreq =
+ | "S"
+ | "T"
+ | "min"
+ | "H"
+ | "D"
+ | "W"
+ | "W-SUN"
+ | "W-MON"
+ | "W-TUE"
+ | "W-WED"
+ | "W-THU"
+ | "W-FRI"
+ | "W-SAT"
+ | "MS"
+ | "ME"
+ | "QS"
+ | "QE"
+ | "AS"
+ | "YS"
+ | "AE"
+ | "YE";
+
+/** Which end of the bin interval labels the output index. */
+export type ResampleLabel = "left" | "right";
+
+/** Built-in aggregation names understood by `agg()`. */
+export type ResampleAggName =
+ | "sum"
+ | "mean"
+ | "min"
+ | "max"
+ | "count"
+ | "first"
+ | "last"
+ | "std"
+ | "var"
+ | "size";
+
+/** Custom aggregation function accepted by `agg()`. */
+export type ResampleAggFn = (values: readonly Scalar[]) => Scalar;
+
+/** Options accepted by {@link resampleSeries} and {@link resampleDataFrame}. */
+export interface ResampleOptions {
+ /**
+ * Which end of the bin interval labels the output index.
+ * Defaults to `"right"` for `W`, `ME`, `QE`, `YE`/`AE`; `"left"` for all others.
+ */
+ readonly label?: ResampleLabel;
+}
+
+// ─── internal constants ───────────────────────────────────────────────────────
+
+const MS_S = 1_000;
+const MS_T = 60_000;
+const MS_H = 3_600_000;
+const MS_D = 86_400_000;
+const MS_W = 7 * MS_D;
+
+// ─── helpers: missing value ────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+// ─── helpers: date coercion ────────────────────────────────────────────────────
+
+function toDate(v: Label): Date | null {
+ if (v instanceof Date) return v;
+ if (typeof v === "string" || typeof v === "number") {
+ const d = new Date(v as string | number);
+ return Number.isNaN(d.getTime()) ? null : d;
+ }
+ return null;
+}
+
+// ─── helpers: default label side per frequency ─────────────────────────────────
+
+function freqDefaultLabel(freq: string): ResampleLabel {
+ return freq.startsWith("W") || freq === "ME" || freq === "QE" || freq === "AE" || freq === "YE"
+ ? "right"
+ : "left";
+}
+
+// ─── helpers: bin group key ────────────────────────────────────────────────────
+
+/**
+ * Returns the UTC ms timestamp of the canonical bin key for `d`.
+ *
+ * For "closed-left" frequencies (S, T, min, H, D, MS, QS, YS/AS): returns the
+ * left boundary (bin start) — i.e., the floor of `d` to that period.
+ *
+ * For "closed-right" / anchor frequencies (W*, ME, QE, YE/AE): returns the
+ * natural right anchor — e.g., the upcoming Sunday for W, the last-of-month for ME.
+ *
+ * This value uniquely identifies the bin AND, in the default label-setting,
+ * IS the output label.
+ */
+function binGroupKey(d: Date, freq: string): number {
+ const yr = d.getUTCFullYear();
+ const mo = d.getUTCMonth();
+ const day = d.getUTCDay();
+ const baseDay = Date.UTC(yr, mo, d.getUTCDate());
+
+ switch (freq) {
+ case "S":
+ return Math.floor(d.getTime() / MS_S) * MS_S;
+ case "T":
+ case "min":
+ return Math.floor(d.getTime() / MS_T) * MS_T;
+ case "H":
+ return Math.floor(d.getTime() / MS_H) * MS_H;
+ case "D":
+ return baseDay;
+
+ // Weekly — closed right, label = the anchor weekday
+ case "W":
+ case "W-SUN":
+ return baseDay + (day === 0 ? 0 : 7 - day) * MS_D;
+ case "W-MON":
+ return baseDay + (day === 1 ? 0 : (8 - day) % 7) * MS_D;
+ case "W-TUE":
+ return baseDay + (day === 2 ? 0 : (9 - day) % 7) * MS_D;
+ case "W-WED":
+ return baseDay + (day === 3 ? 0 : (10 - day) % 7) * MS_D;
+ case "W-THU":
+ return baseDay + (day === 4 ? 0 : (11 - day) % 7) * MS_D;
+ case "W-FRI":
+ return baseDay + (day === 5 ? 0 : (12 - day) % 7) * MS_D;
+ case "W-SAT":
+ return baseDay + (day === 6 ? 0 : (13 - day) % 7) * MS_D;
+
+ // Calendar — closed left
+ case "MS":
+ return Date.UTC(yr, mo, 1);
+ case "ME":
+ return Date.UTC(yr, mo + 1, 0); // last day of month
+ case "QS":
+ return Date.UTC(yr, Math.floor(mo / 3) * 3, 1);
+ case "QE": {
+ const qm = Math.floor(mo / 3) * 3 + 2;
+ return Date.UTC(yr, qm + 1, 0);
+ }
+ case "AS":
+ case "YS":
+ return Date.UTC(yr, 0, 1);
+ case "AE":
+ case "YE":
+ return Date.UTC(yr, 11, 31);
+
+ default:
+ throw new Error(`Unsupported resample frequency: "${freq}"`);
+ }
+}
+
+/** Advance a bin group key (UTC ms timestamp) by exactly one period. */
+function nextGroupKey(ts: number, freq: string): number {
+ const d = new Date(ts);
+ const yr = d.getUTCFullYear();
+ const mo = d.getUTCMonth();
+
+ switch (freq) {
+ case "S":
+ return ts + MS_S;
+ case "T":
+ case "min":
+ return ts + MS_T;
+ case "H":
+ return ts + MS_H;
+ case "D":
+ return ts + MS_D;
+ case "W":
+ case "W-SUN":
+ case "W-MON":
+ case "W-TUE":
+ case "W-WED":
+ case "W-THU":
+ case "W-FRI":
+ case "W-SAT":
+ return ts + MS_W;
+ case "MS":
+ return Date.UTC(yr, mo + 1, 1);
+ case "ME":
+ return Date.UTC(yr, mo + 2, 0);
+ case "QS":
+ return Date.UTC(yr, mo + 3, 1);
+ case "QE":
+ return Date.UTC(yr, mo + 4, 0);
+ case "AS":
+ case "YS":
+ return Date.UTC(yr + 1, 0, 1);
+ case "AE":
+ case "YE":
+ return Date.UTC(yr + 1, 11, 31);
+ default:
+ throw new Error(`Unsupported resample frequency: "${freq}"`);
+ }
+}
+
+/**
+ * Convert a group key to the final output label timestamp.
+ * When the user requests a label side different from the frequency default,
+ * the key is shifted by one period.
+ */
+function keyToLabel(key: number, freq: string, label: ResampleLabel): number {
+ const dflt = freqDefaultLabel(freq);
+ if (label === dflt) return key;
+
+ if (label === "right") {
+ // User wants right label on a left-default freq → next bin start
+ return nextGroupKey(key, freq);
+ }
+
+ // User wants left label on a right-default freq (W*, ME, QE, YE/AE)
+ if (freq.startsWith("W")) return key - 6 * MS_D; // anchor → Mon/+1
+ if (freq === "ME") {
+ const d = new Date(key);
+ return Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), 1);
+ }
+ if (freq === "QE") {
+ const d = new Date(key);
+ return Date.UTC(d.getUTCFullYear(), d.getUTCMonth() - 2, 1);
+ }
+ if (freq === "AE" || freq === "YE") {
+ return Date.UTC(new Date(key).getUTCFullYear(), 0, 1);
+ }
+ return key;
+}
+
+// ─── helpers: grouping ────────────────────────────────────────────────────────
+
+interface Groups {
+ /** Sorted list of unique group-key timestamps. */
+ readonly keys: readonly number[];
+ /** Map from group key → sorted array of row positions. */
+ readonly map: ReadonlyMap;
+}
+
+function buildGroups(index: Index, freq: string): Groups {
+ const map = new Map();
+ for (let i = 0; i < index.size; i++) {
+ const label = index.at(i) as Label;
+ const d = toDate(label);
+ if (d === null) continue;
+ const key = binGroupKey(d, freq);
+ let arr = map.get(key);
+ if (arr === undefined) {
+ arr = [];
+ map.set(key, arr);
+ }
+ arr.push(i);
+ }
+ const keys = [...map.keys()].sort((a, b) => a - b);
+ return { keys, map };
+}
+
+/** All bin keys in the inclusive range [first, last]. */
+function allKeys(first: number, last: number, freq: string): number[] {
+ const result: number[] = [];
+ let cur = first;
+ while (cur <= last) {
+ result.push(cur);
+ cur = nextGroupKey(cur, freq);
+ }
+ return result;
+}
+
+// ─── helpers: aggregation functions ──────────────────────────────────────────
+
+type AggFn = (vals: readonly Scalar[]) => Scalar;
+
+function aggNums(vals: readonly Scalar[]): number[] {
+ return vals.filter((v): v is number => !isMissing(v) && typeof v === "number");
+}
+
+function aggSum(vals: readonly Scalar[]): Scalar {
+ const ns = aggNums(vals);
+ if (ns.length === 0) return Number.NaN;
+ return ns.reduce((a, b) => a + b, 0);
+}
+
+function aggMean(vals: readonly Scalar[]): Scalar {
+ const ns = aggNums(vals);
+ if (ns.length === 0) return Number.NaN;
+ return ns.reduce((a, b) => a + b, 0) / ns.length;
+}
+
+function aggMin(vals: readonly Scalar[]): Scalar {
+ const c = vals.filter((v): v is Exclude => !isMissing(v));
+ if (c.length === 0) return Number.NaN;
+ return c.reduce((a, b) => (a < b ? a : b));
+}
+
+function aggMax(vals: readonly Scalar[]): Scalar {
+ const c = vals.filter((v): v is Exclude => !isMissing(v));
+ if (c.length === 0) return Number.NaN;
+ return c.reduce((a, b) => (a > b ? a : b));
+}
+
+function aggCount(vals: readonly Scalar[]): Scalar {
+ return vals.filter((v) => !isMissing(v)).length;
+}
+
+function aggFirst(vals: readonly Scalar[]): Scalar {
+ for (const v of vals) if (!isMissing(v)) return v;
+ return Number.NaN;
+}
+
+function aggLast(vals: readonly Scalar[]): Scalar {
+ for (let i = vals.length - 1; i >= 0; i--) {
+ const v = vals[i]!;
+ if (!isMissing(v)) return v;
+ }
+ return Number.NaN;
+}
+
+function aggStd(vals: readonly Scalar[]): Scalar {
+ const ns = aggNums(vals);
+ if (ns.length < 2) return Number.NaN;
+ const m = ns.reduce((a, b) => a + b, 0) / ns.length;
+ return Math.sqrt(ns.reduce((s, v) => s + (v - m) ** 2, 0) / (ns.length - 1));
+}
+
+function aggVar(vals: readonly Scalar[]): Scalar {
+ const ns = aggNums(vals);
+ if (ns.length < 2) return Number.NaN;
+ const m = ns.reduce((a, b) => a + b, 0) / ns.length;
+ return ns.reduce((s, v) => s + (v - m) ** 2, 0) / (ns.length - 1);
+}
+
+const BUILTIN: Readonly> = {
+ sum: aggSum,
+ mean: aggMean,
+ min: aggMin,
+ max: aggMax,
+ count: aggCount,
+ first: aggFirst,
+ last: aggLast,
+ std: aggStd,
+ var: aggVar,
+ size: (vals) => vals.length,
+};
+
+function resolveAgg(spec: ResampleAggName | ResampleAggFn): AggFn {
+ if (typeof spec === "function") return spec;
+ const fn = BUILTIN[spec];
+ if (!fn) throw new Error(`Unknown resample aggregation: "${spec}"`);
+ return fn;
+}
+
+// ─── helpers: output index construction ───────────────────────────────────────
+
+function buildDateIndex(
+ groupKeys: readonly number[],
+ freq: string,
+ label: ResampleLabel,
+): Index {
+ return new Index(groupKeys.map((k) => new Date(keyToLabel(k, freq, label))));
+}
+
+// ─── SeriesResampler ──────────────────────────────────────────────────────────
+
+/**
+ * Time-based resampler for a {@link Series} with a datetime index.
+ *
+ * Obtained via {@link resampleSeries}.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({
+ * data: [1, 2, 3],
+ * index: [new Date("2024-01-01"), new Date("2024-01-01T12:00Z"), new Date("2024-01-02")],
+ * });
+ * resampleSeries(s, "D").sum().toArray(); // [3, 3]
+ * ```
+ */
+export class SeriesResampler {
+ private readonly _s: Series;
+ private readonly _freq: string;
+ private readonly _label: ResampleLabel;
+ private _cachedGroups: Groups | null = null;
+
+ constructor(series: Series, freq: string, options: ResampleOptions = {}) {
+ this._s = series;
+ this._freq = freq;
+ this._label = options.label ?? freqDefaultLabel(freq);
+ }
+
+ private _groups(): Groups {
+ if (this._cachedGroups === null) {
+ this._cachedGroups = buildGroups(this._s.index, this._freq);
+ }
+ return this._cachedGroups;
+ }
+
+ /**
+ * Apply an aggregation to each time bin.
+ *
+ * @param spec - Built-in name (e.g. `"sum"`) or a custom `(vals) => Scalar` function.
+ */
+ agg(spec: ResampleAggName | ResampleAggFn): Series {
+ const fn = resolveAgg(spec);
+ const { keys, map } = this._groups();
+ if (keys.length === 0) {
+ return new Series({ data: [], index: new Index([]), name: this._s.name });
+ }
+ const vals = this._s.values;
+ const binKeys = allKeys(keys[0]!, keys[keys.length - 1]!, this._freq);
+ const data: Scalar[] = binKeys.map((k) => {
+ const positions = map.get(k) ?? [];
+ return fn(positions.map((p) => vals[p] as Scalar));
+ });
+ return new Series({
+ data,
+ index: buildDateIndex(binKeys, this._freq, this._label),
+ name: this._s.name,
+ });
+ }
+
+ /** Sum of each bin (NaN for empty bins). */
+ sum(): Series {
+ return this.agg("sum");
+ }
+
+ /** Mean of each bin (NaN for empty bins). */
+ mean(): Series {
+ return this.agg("mean");
+ }
+
+ /** Minimum of each bin (NaN for empty bins). */
+ min(): Series {
+ return this.agg("min");
+ }
+
+ /** Maximum of each bin (NaN for empty bins). */
+ max(): Series {
+ return this.agg("max");
+ }
+
+ /** Count of non-missing values in each bin (0 for empty bins). */
+ count(): Series {
+ return this.agg("count");
+ }
+
+ /** First non-missing value in each bin (NaN for empty bins). */
+ first(): Series {
+ return this.agg("first");
+ }
+
+ /** Last non-missing value in each bin (NaN for empty bins). */
+ last(): Series {
+ return this.agg("last");
+ }
+
+ /** Sample standard deviation per bin (NaN if fewer than 2 values). */
+ std(): Series {
+ return this.agg("std");
+ }
+
+ /** Sample variance per bin (NaN if fewer than 2 values). */
+ var(): Series {
+ return this.agg("var");
+ }
+
+ /** Total number of observations (including missing) per bin. */
+ size(): Series {
+ return this.agg("size");
+ }
+
+ /**
+ * Open-High-Low-Close aggregation.
+ *
+ * Returns a DataFrame with columns `["open", "high", "low", "close"]` indexed
+ * by the bin labels.
+ */
+ ohlc(): DataFrame {
+ const { keys, map } = this._groups();
+ if (keys.length === 0) {
+ return DataFrame.fromColumns({ open: [], high: [], low: [], close: [] });
+ }
+ const vals = this._s.values;
+ const binKeys = allKeys(keys[0]!, keys[keys.length - 1]!, this._freq);
+
+ const open: Scalar[] = [];
+ const high: Scalar[] = [];
+ const low: Scalar[] = [];
+ const close: Scalar[] = [];
+
+ for (const k of binKeys) {
+ const positions = map.get(k) ?? [];
+ const binVals = positions.map((p) => vals[p] as Scalar);
+ open.push(aggFirst(binVals));
+ high.push(aggMax(binVals));
+ low.push(aggMin(binVals));
+ close.push(aggLast(binVals));
+ }
+
+ return DataFrame.fromColumns(
+ { open, high, low, close },
+ { index: buildDateIndex(binKeys, this._freq, this._label) },
+ );
+ }
+}
+
+// ─── DataFrameResampler ───────────────────────────────────────────────────────
+
+/**
+ * Time-based resampler for a {@link DataFrame} with a datetime row index.
+ *
+ * Obtained via {@link resampleDataFrame}.
+ *
+ * @example
+ * ```ts
+ * const idx = [new Date("2024-01-01"), new Date("2024-01-01T12:00Z"), new Date("2024-01-02")];
+ * const df = DataFrame.fromColumns({ v: [1, 2, 3] }, { index: new Index(idx) });
+ * resampleDataFrame(df, "D").sum();
+ * // DataFrame { v: [3, 3] }
+ * ```
+ */
+export class DataFrameResampler {
+ private readonly _df: DataFrame;
+ private readonly _freq: string;
+ private readonly _label: ResampleLabel;
+ private _cachedGroups: Groups | null = null;
+
+ constructor(df: DataFrame, freq: string, options: ResampleOptions = {}) {
+ this._df = df;
+ this._freq = freq;
+ this._label = options.label ?? freqDefaultLabel(freq);
+ }
+
+ private _groups(): Groups {
+ if (this._cachedGroups === null) {
+ this._cachedGroups = buildGroups(this._df.index, this._freq);
+ }
+ return this._cachedGroups;
+ }
+
+ /**
+ * Apply aggregation(s) to each time bin.
+ *
+ * @param spec
+ * - A single built-in name or function: applied to every column.
+ * - A `Record`: per-column aggregations; unmapped columns default to `"sum"`.
+ */
+ agg(
+ spec:
+ | ResampleAggName
+ | ResampleAggFn
+ | Readonly>,
+ ): DataFrame {
+ const { keys, map } = this._groups();
+ const colNames = this._df.columns.values as readonly string[];
+
+ if (keys.length === 0) {
+ const emptyCols: Record = {};
+ for (const c of colNames) emptyCols[c] = [];
+ return DataFrame.fromColumns(emptyCols);
+ }
+
+ const binKeys = allKeys(keys[0]!, keys[keys.length - 1]!, this._freq);
+ const idx = buildDateIndex(binKeys, this._freq, this._label);
+ const colData: Record = {};
+
+ for (const colName of colNames) {
+ let fn: AggFn;
+ if (typeof spec === "object" && spec !== null && typeof spec !== "function") {
+ const s = (spec as Readonly>)[colName];
+ fn = resolveAgg(s ?? "sum");
+ } else {
+ fn = resolveAgg(spec as ResampleAggName | ResampleAggFn);
+ }
+
+ const colVals = this._df.col(colName).values;
+ colData[colName] = binKeys.map((k) => {
+ const positions = map.get(k) ?? [];
+ return fn(positions.map((p) => colVals[p] as Scalar));
+ });
+ }
+
+ return DataFrame.fromColumns(colData, { index: idx });
+ }
+
+ /** Sum per column per bin (NaN for empty bins). */
+ sum(): DataFrame {
+ return this.agg("sum");
+ }
+
+ /** Mean per column per bin (NaN for empty bins). */
+ mean(): DataFrame {
+ return this.agg("mean");
+ }
+
+ /** Minimum per column per bin (NaN for empty bins). */
+ min(): DataFrame {
+ return this.agg("min");
+ }
+
+ /** Maximum per column per bin (NaN for empty bins). */
+ max(): DataFrame {
+ return this.agg("max");
+ }
+
+ /** Count of non-missing values per column per bin. */
+ count(): DataFrame {
+ return this.agg("count");
+ }
+
+ /** First non-missing value per column per bin. */
+ first(): DataFrame {
+ return this.agg("first");
+ }
+
+ /** Last non-missing value per column per bin. */
+ last(): DataFrame {
+ return this.agg("last");
+ }
+
+ /** Sample standard deviation per column per bin. */
+ std(): DataFrame {
+ return this.agg("std");
+ }
+
+ /** Sample variance per column per bin. */
+ var(): DataFrame {
+ return this.agg("var");
+ }
+
+ /**
+ * Number of observations per bin (across all columns; a Series of counts).
+ */
+ size(): Series {
+ const { keys, map } = this._groups();
+ if (keys.length === 0) {
+ return new Series({ data: [], index: new Index([]) });
+ }
+ const binKeys = allKeys(keys[0]!, keys[keys.length - 1]!, this._freq);
+ const data: Scalar[] = binKeys.map((k) => (map.get(k) ?? []).length);
+ return new Series({ data, index: buildDateIndex(binKeys, this._freq, this._label) });
+ }
+}
+
+// ─── public factory functions ─────────────────────────────────────────────────
+
+/**
+ * Create a time-based resampler for a {@link Series}.
+ *
+ * The Series index must contain `Date` objects (or ISO-8601 strings / Unix
+ * timestamps parseable by `new Date()`).
+ *
+ * @param series - Source Series with a datetime index.
+ * @param freq - Frequency string, e.g. `"D"`, `"H"`, `"MS"`, `"W"`.
+ * @param options - Optional `{ label }` override.
+ *
+ * @example
+ * ```ts
+ * const dates = [new Date("2024-01-01"), new Date("2024-01-01T18:00Z"), new Date("2024-01-02")];
+ * const s = new Series({ data: [10, 20, 30], index: dates });
+ * resampleSeries(s, "D").sum().toArray(); // [30, 30]
+ * resampleSeries(s, "D").mean().toArray(); // [15, 30]
+ * resampleSeries(s, "D").ohlc().col("open").toArray(); // [10, 30]
+ * ```
+ */
+export function resampleSeries(
+ series: Series,
+ freq: ResampleFreq | string,
+ options?: ResampleOptions,
+): SeriesResampler {
+ return new SeriesResampler(series, freq, options);
+}
+
+/**
+ * Create a time-based resampler for a {@link DataFrame}.
+ *
+ * The DataFrame row index must contain `Date` objects (or parseable values).
+ *
+ * @param df - Source DataFrame with a datetime row index.
+ * @param freq - Frequency string, e.g. `"D"`, `"H"`, `"MS"`, `"W"`.
+ * @param options - Optional `{ label }` override.
+ *
+ * @example
+ * ```ts
+ * const idx = new Index([new Date("2024-01-01"), new Date("2024-01-02"), new Date("2024-02-01")]);
+ * const df = DataFrame.fromColumns({ val: [1, 2, 3] }, { index: idx });
+ * resampleDataFrame(df, "MS").sum();
+ * // DataFrame { val: [3, 3] }
+ * ```
+ */
+export function resampleDataFrame(
+ df: DataFrame,
+ freq: ResampleFreq | string,
+ options?: ResampleOptions,
+): DataFrameResampler {
+ return new DataFrameResampler(df, freq, options);
+}
diff --git a/src/stats/scalar_extract.ts b/src/stats/scalar_extract.ts
new file mode 100644
index 00000000..8a2dd7f1
--- /dev/null
+++ b/src/stats/scalar_extract.ts
@@ -0,0 +1,329 @@
+/**
+ * scalar_extract — extract scalar / Series values from Series and DataFrame.
+ *
+ * Mirrors several pandas scalar-extraction utilities:
+ * - `Series.squeeze()` — return scalar if length == 1, else self
+ * - `DataFrame.squeeze(axis?)` — squeeze 1-D axis objects into scalars/Series
+ * - `Series.item()` — return the single element (throws if size != 1)
+ * - `Series.bool()` — return bool of single-element Series
+ * - `DataFrame.bool()` — return bool of single-element DataFrame
+ * - `Series.first_valid_index()` — index label of first non-NA value or null
+ * - `Series.last_valid_index()` — index label of last non-NA value or null
+ * - `DataFrame.first_valid_index()` — label of first row with any non-NA value
+ * - `DataFrame.last_valid_index()` — label of last row with any non-NA value
+ *
+ * @module
+ */
+
+import { type DataFrame, Index, Series } from "../core/index.ts";
+import type { Axis, Label, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Safely read values[i] with noUncheckedIndexedAccess. */
+function atVal(arr: readonly T[], i: number): T | null {
+ const v = arr[i];
+ return v !== undefined ? v : null;
+}
+
+// ─── squeeze ──────────────────────────────────────────────────────────────────
+
+/**
+ * Return the single element of a one-element Series as a scalar.
+ * If the Series has more than one element, return the Series unchanged.
+ *
+ * Mirrors `pandas.Series.squeeze()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, squeezeSeries } from "tsb";
+ *
+ * squeezeSeries(new Series({ data: [42] })); // 42
+ * squeezeSeries(new Series({ data: [1, 2] })); // Series([1, 2])
+ * ```
+ */
+export function squeezeSeries(s: Series): Scalar | Series {
+ if (s.size === 1) {
+ return atVal(s.values, 0);
+ }
+ return s;
+}
+
+/**
+ * Result type for {@link squeezeDataFrame}.
+ *
+ * - `scalar` — returned when the DataFrame is 1×1 and `axis` is not specified
+ * - `series` — returned when one axis has size 1
+ * - `dataframe` — returned when neither axis has size 1
+ */
+export type SqueezeResult = Scalar | Series | DataFrame;
+
+/**
+ * Squeeze 1-D axis objects from a DataFrame into a scalar or Series.
+ *
+ * Mirrors `pandas.DataFrame.squeeze(axis?)`:
+ * - `axis=undefined` (default): squeeze as many dimensions as possible.
+ * - 1 row AND 1 col → scalar
+ * - 1 row only → the single row as a Series (indexed by column names)
+ * - 1 col only → the single column as a Series (indexed by row labels)
+ * - Otherwise → DataFrame unchanged
+ * - `axis=0` / `"index"`: squeeze rows. If 1 row → Series; else → DataFrame.
+ * - `axis=1` / `"columns"`: squeeze columns. If 1 col → Series; else → DataFrame.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, squeezeDataFrame } from "tsb";
+ *
+ * const df1x1 = DataFrame.fromColumns({ A: [10] });
+ * squeezeDataFrame(df1x1); // 10
+ *
+ * const df1xN = DataFrame.fromColumns({ A: [1], B: [2] });
+ * squeezeDataFrame(df1xN); // Series([1, 2], index=["A", "B"])
+ *
+ * const dfNx1 = DataFrame.fromColumns({ A: [1, 2, 3] });
+ * squeezeDataFrame(dfNx1); // Series([1, 2, 3])
+ * ```
+ */
+export function squeezeDataFrame(df: DataFrame, axis?: Axis): SqueezeResult {
+ const [nRows, nCols] = df.shape;
+ const normalAxis = axis === "index" ? 0 : axis === "columns" ? 1 : axis;
+
+ if (normalAxis === 0) {
+ if (nRows === 1) {
+ return _rowSeries(df, 0);
+ }
+ return df;
+ }
+
+ if (normalAxis === 1) {
+ if (nCols === 1) {
+ return df.col(df.columns.at(0));
+ }
+ return df;
+ }
+
+ // axis === undefined — squeeze as many dimensions as possible
+ if (nRows === 1 && nCols === 1) {
+ const s = df.col(df.columns.at(0));
+ return atVal(s.values, 0);
+ }
+ if (nRows === 1) {
+ return _rowSeries(df, 0);
+ }
+ if (nCols === 1) {
+ return df.col(df.columns.at(0));
+ }
+ return df;
+}
+
+/** Extract row `i` as a Series indexed by column names. */
+function _rowSeries(df: DataFrame, row: number): Series {
+ const colLabels = df.columns.toArray();
+ const values: Scalar[] = colLabels.map((c) => atVal(df.col(c).values, row));
+ return new Series({ data: values, index: new Index(colLabels) });
+}
+
+// ─── item ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Return the single element of a Series as a scalar value.
+ *
+ * Throws a `RangeError` if the Series does not have exactly one element.
+ *
+ * Mirrors `pandas.Series.item()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, itemSeries } from "tsb";
+ *
+ * itemSeries(new Series({ data: [7] })); // 7
+ * ```
+ */
+export function itemSeries(s: Series): Scalar {
+ if (s.size !== 1) {
+ throw new RangeError(`itemSeries: Series must have exactly 1 element, got ${s.size}`);
+ }
+ return atVal(s.values, 0);
+}
+
+// ─── bool ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Return the boolean value of a single-element Series.
+ *
+ * Throws if the Series does not contain exactly one element, or if that
+ * element is null/undefined.
+ *
+ * Mirrors `pandas.Series.bool()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, boolSeries } from "tsb";
+ *
+ * boolSeries(new Series({ data: [1] })); // true
+ * boolSeries(new Series({ data: [0] })); // false
+ * boolSeries(new Series({ data: [true] })); // true
+ * ```
+ */
+export function boolSeries(s: Series): boolean {
+ if (s.size !== 1) {
+ throw new RangeError(
+ `boolSeries: only a single-element Series can be converted to a scalar boolean, got size ${s.size}`,
+ );
+ }
+ const v = atVal(s.values, 0);
+ if (v === null || v === undefined) {
+ throw new TypeError("boolSeries: element is null/undefined — cannot convert to bool");
+ }
+ return Boolean(v);
+}
+
+/**
+ * Return the boolean value of a single-element (1×1) DataFrame.
+ *
+ * Throws if the DataFrame shape is not exactly 1×1.
+ *
+ * Mirrors `pandas.DataFrame.bool()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, boolDataFrame } from "tsb";
+ *
+ * boolDataFrame(DataFrame.fromColumns({ A: [1] })); // true
+ * boolDataFrame(DataFrame.fromColumns({ A: [false] })); // false
+ * ```
+ */
+export function boolDataFrame(df: DataFrame): boolean {
+ const [nRows, nCols] = df.shape;
+ if (nRows !== 1 || nCols !== 1) {
+ throw new RangeError(
+ `boolDataFrame: only a 1×1 DataFrame can be converted to a scalar boolean, got shape [${nRows}, ${nCols}]`,
+ );
+ }
+ const s = df.col(df.columns.at(0));
+ const v = atVal(s.values, 0);
+ if (v === null || v === undefined) {
+ throw new TypeError("boolDataFrame: element is null/undefined — cannot convert to bool");
+ }
+ return Boolean(v);
+}
+
+// ─── first/last valid index ───────────────────────────────────────────────────
+
+/**
+ * Return the index label of the first non-NA value in a Series.
+ * Returns `null` if all values are NA (null / undefined / NaN).
+ *
+ * Mirrors `pandas.Series.first_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, firstValidIndex } from "tsb";
+ *
+ * firstValidIndex(new Series({ data: [null, NaN, 3, 4], index: ["a","b","c","d"] }));
+ * // "c"
+ * firstValidIndex(new Series({ data: [null, null] }));
+ * // null
+ * ```
+ */
+export function firstValidIndex(s: Series): Label | null {
+ for (let i = 0; i < s.size; i++) {
+ const v = atVal(s.values, i);
+ if (!isMissing(v)) {
+ return s.index.at(i);
+ }
+ }
+ return null;
+}
+
+/**
+ * Return the index label of the last non-NA value in a Series.
+ * Returns `null` if all values are NA.
+ *
+ * Mirrors `pandas.Series.last_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, lastValidIndex } from "tsb";
+ *
+ * lastValidIndex(new Series({ data: [1, 2, null, null], index: ["a","b","c","d"] }));
+ * // "b"
+ * ```
+ */
+export function lastValidIndex(s: Series): Label | null {
+ for (let i = s.size - 1; i >= 0; i--) {
+ const v = atVal(s.values, i);
+ if (!isMissing(v)) {
+ return s.index.at(i);
+ }
+ }
+ return null;
+}
+
+/**
+ * Return the row index label of the first row that contains at least one
+ * non-NA value across all columns.
+ * Returns `null` if every value in the DataFrame is NA.
+ *
+ * Mirrors `pandas.DataFrame.first_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameFirstValidIndex } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: [null, null, 1],
+ * B: [null, 2, 3],
+ * });
+ * dataFrameFirstValidIndex(df); // 1 (row 1 has B=2)
+ * ```
+ */
+export function dataFrameFirstValidIndex(df: DataFrame): Label | null {
+ const [nRows] = df.shape;
+ const colNames = df.columns.toArray();
+ for (let i = 0; i < nRows; i++) {
+ for (const col of colNames) {
+ const v = atVal(df.col(col).values, i);
+ if (!isMissing(v)) {
+ return df.index.at(i);
+ }
+ }
+ }
+ return null;
+}
+
+/**
+ * Return the row index label of the last row that contains at least one
+ * non-NA value across all columns.
+ * Returns `null` if every value in the DataFrame is NA.
+ *
+ * Mirrors `pandas.DataFrame.last_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameLastValidIndex } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: [1, null, null],
+ * B: [2, 3, null],
+ * });
+ * dataFrameLastValidIndex(df); // 1 (row 1 has B=3)
+ * ```
+ */
+export function dataFrameLastValidIndex(df: DataFrame): Label | null {
+ const [nRows] = df.shape;
+ const colNames = df.columns.toArray();
+ for (let i = nRows - 1; i >= 0; i--) {
+ for (const col of colNames) {
+ const v = atVal(df.col(col).values, i);
+ if (!isMissing(v)) {
+ return df.index.at(i);
+ }
+ }
+ }
+ return null;
+}
diff --git a/src/stats/sort_ops.ts b/src/stats/sort_ops.ts
new file mode 100644
index 00000000..8f5d3c29
--- /dev/null
+++ b/src/stats/sort_ops.ts
@@ -0,0 +1,384 @@
+/**
+ * sort_ops — value and index sorting for Series and DataFrame.
+ *
+ * Mirrors the pandas sort methods:
+ *
+ * - `sortValuesSeries(s, options)` — sort a Series by its values
+ * - `sortIndexSeries(s, options)` — sort a Series by its index labels
+ * - `sortValuesDataFrame(df, by, options)` — sort a DataFrame by one or more column values
+ * - `sortIndexDataFrame(df, options)` — sort a DataFrame by its row index (or column names)
+ *
+ * All functions return a **new** object — the original is never mutated.
+ *
+ * @example
+ * ```ts
+ * import { Series, DataFrame, sortValuesSeries, sortValuesDataFrame } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, null, 2], index: ["b", "d", "a", "c"] });
+ * sortValuesSeries(s);
+ * // Series([1, 2, 3, null], index=["d","c","b","a"]) ← NaN/null last by default
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 2], b: [10, 30, 20] });
+ * sortValuesDataFrame(df, "a");
+ * // rows in order: index 1 (a=1), index 2 (a=2), index 0 (a=3)
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when v is null, undefined, or NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Build an argsort permutation that places missing values at the specified
+ * position, and sorts non-missing values with the given comparator.
+ */
+function argsortWithNa(
+ values: readonly Scalar[],
+ ascending: boolean,
+ naPosition: "first" | "last",
+): number[] {
+ const n = values.length;
+ const missing: number[] = [];
+ const present: number[] = [];
+
+ for (let i = 0; i < n; i++) {
+ if (isMissing(values[i])) {
+ missing.push(i);
+ } else {
+ present.push(i);
+ }
+ }
+
+ // Sort the non-missing indices by their values.
+ present.sort((a, b) => {
+ const av = values[a] as Scalar;
+ const bv = values[b] as Scalar;
+ return compareNonNull(av, bv, ascending);
+ });
+
+ return naPosition === "last" ? [...present, ...missing] : [...missing, ...present];
+}
+
+/** Compare two non-null/non-NaN scalars. */
+function compareNonNull(a: Scalar, b: Scalar, ascending: boolean): number {
+ let result: number;
+ if (typeof a === "number" && typeof b === "number") {
+ result = a - b;
+ } else if (typeof a === "boolean" && typeof b === "boolean") {
+ result = (a ? 1 : 0) - (b ? 1 : 0);
+ } else {
+ const as_ = String(a);
+ const bs_ = String(b);
+ result = as_ < bs_ ? -1 : as_ > bs_ ? 1 : 0;
+ }
+ return ascending ? result : -result;
+}
+
+/** Build a default 0..n-1 index. */
+function rangeIndex(n: number): Index {
+ const labels: number[] = [];
+ for (let i = 0; i < n; i++) {
+ labels.push(i);
+ }
+ return new Index(labels);
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link sortValuesSeries}. */
+export interface SortValuesSeriesOptions {
+ /**
+ * Sort ascending (default `true`). Pass `false` for descending order.
+ */
+ readonly ascending?: boolean;
+ /**
+ * Where to place `null` / `NaN` values.
+ * @defaultValue `"last"`
+ */
+ readonly naPosition?: "first" | "last";
+ /**
+ * If `true`, reset the resulting index to `0, 1, 2, …`
+ * @defaultValue `false`
+ */
+ readonly ignoreIndex?: boolean;
+}
+
+/** Options for {@link sortIndexSeries}. */
+export interface SortIndexSeriesOptions {
+ /**
+ * Sort ascending (default `true`). Pass `false` for descending order.
+ */
+ readonly ascending?: boolean;
+ /**
+ * Where to place index entries that are `null` / `NaN`.
+ * @defaultValue `"last"`
+ */
+ readonly naPosition?: "first" | "last";
+ /**
+ * If `true`, reset the resulting index to `0, 1, 2, …`
+ * @defaultValue `false`
+ */
+ readonly ignoreIndex?: boolean;
+}
+
+/** Options for {@link sortValuesDataFrame}. */
+export interface SortValuesDataFrameOptions {
+ /**
+ * Sort ascending. Can be a single boolean (applies to all `by` columns) or
+ * an array of booleans (one per column in `by`).
+ * @defaultValue `true`
+ */
+ readonly ascending?: boolean | readonly boolean[];
+ /**
+ * Where to place rows whose sort-key column value is `null` / `NaN`.
+ * @defaultValue `"last"`
+ */
+ readonly naPosition?: "first" | "last";
+ /**
+ * If `true`, reset the resulting row index to `0, 1, 2, …`
+ * @defaultValue `false`
+ */
+ readonly ignoreIndex?: boolean;
+}
+
+/** Options for {@link sortIndexDataFrame}. */
+export interface SortIndexDataFrameOptions {
+ /**
+ * Sort ascending (default `true`). Pass `false` for descending order.
+ */
+ readonly ascending?: boolean;
+ /**
+ * Which axis to sort.
+ * - `0` (default) — sort rows by row-index labels.
+ * - `1` — sort columns by column names.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * Where to place index labels that are `null` / `NaN`.
+ * @defaultValue `"last"`
+ */
+ readonly naPosition?: "first" | "last";
+ /**
+ * If `true`, reset the resulting index to `0, 1, 2, …` (only valid for axis=0).
+ * @defaultValue `false`
+ */
+ readonly ignoreIndex?: boolean;
+}
+
+// ─── sortValuesSeries ─────────────────────────────────────────────────────────
+
+/**
+ * Sort a Series by its values.
+ *
+ * Mirrors `pandas.Series.sort_values()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, sortValuesSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 2], index: ["b", "a", "c"] });
+ * sortValuesSeries(s);
+ * // Series([1, 2, 3], index=["a","c","b"])
+ *
+ * sortValuesSeries(s, { ascending: false });
+ * // Series([3, 2, 1], index=["b","c","a"])
+ * ```
+ */
+export function sortValuesSeries(
+ s: Series,
+ options?: SortValuesSeriesOptions,
+): Series {
+ const ascending = options?.ascending ?? true;
+ const naPosition = options?.naPosition ?? "last";
+ const ignoreIndex = options?.ignoreIndex ?? false;
+
+ const perm = argsortWithNa(s.values, ascending, naPosition);
+ const newData = perm.map((i) => s.values[i] as Scalar);
+ const newIndex = ignoreIndex
+ ? rangeIndex(perm.length)
+ : new Index(perm.map((i) => s.index.at(i)));
+
+ return new Series({ data: newData, index: newIndex, name: s.name });
+}
+
+// ─── sortIndexSeries ──────────────────────────────────────────────────────────
+
+/**
+ * Sort a Series by its index labels.
+ *
+ * Mirrors `pandas.Series.sort_index()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, sortIndexSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 2], index: ["b", "a", "c"] });
+ * sortIndexSeries(s);
+ * // Series([1, 3, 2], index=["a","b","c"])
+ * ```
+ */
+export function sortIndexSeries(
+ s: Series,
+ options?: SortIndexSeriesOptions,
+): Series {
+ const ascending = options?.ascending ?? true;
+ const naPosition = options?.naPosition ?? "last";
+ const ignoreIndex = options?.ignoreIndex ?? false;
+
+ const labels = s.index.values;
+ const perm = argsortWithNa(labels, ascending, naPosition);
+ const newData = perm.map((i) => s.values[i] as Scalar);
+ const newIndex = ignoreIndex
+ ? rangeIndex(perm.length)
+ : new Index(perm.map((i) => labels[i] as Label));
+
+ return new Series({ data: newData, index: newIndex, name: s.name });
+}
+
+// ─── sortValuesDataFrame ──────────────────────────────────────────────────────
+
+/**
+ * Sort a DataFrame by the values of one or more columns.
+ *
+ * Mirrors `pandas.DataFrame.sort_values(by, ...)`.
+ *
+ * When multiple sort keys are provided (array `by`), later columns act as
+ * tie-breakers for earlier ones — matching pandas' stable-sort behaviour.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, sortValuesDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 2], b: [10, 30, 20] });
+ * sortValuesDataFrame(df, "a");
+ * // rows in order 1, 2, 0 (a=1, a=2, a=3)
+ *
+ * sortValuesDataFrame(df, ["b", "a"]);
+ * // rows in order 0, 2, 1 (b=10, b=20, b=30)
+ * ```
+ */
+export function sortValuesDataFrame(
+ df: DataFrame,
+ by: string | readonly string[],
+ options?: SortValuesDataFrameOptions,
+): DataFrame {
+ const byArr = typeof by === "string" ? [by] : [...by];
+ const ascending = options?.ascending ?? true;
+ const naPosition = options?.naPosition ?? "last";
+ const ignoreIndex = options?.ignoreIndex ?? false;
+
+ const nRows = df.index.size;
+ const perm: number[] = [];
+ for (let i = 0; i < nRows; i++) {
+ perm.push(i);
+ }
+
+ // Build ascending flags per key.
+ const ascFlags: boolean[] = byArr.map((_, ki) => {
+ if (typeof ascending === "boolean") {
+ return ascending;
+ }
+ return (ascending as readonly boolean[])[ki] ?? true;
+ });
+
+ // Stable sort: sort by last key first, then earlier keys (to get multi-key).
+ // Equivalent to a single pass sort with a compound comparator.
+ perm.sort((a, b) => {
+ for (let ki = 0; ki < byArr.length; ki++) {
+ const col = byArr[ki] as string;
+ const asc = ascFlags[ki] ?? true;
+ const av = df.col(col).values[a] as Scalar;
+ const bv = df.col(col).values[b] as Scalar;
+ const aMiss = isMissing(av);
+ const bMiss = isMissing(bv);
+ if (aMiss && bMiss) {
+ continue;
+ }
+ if (aMiss) {
+ return naPosition === "last" ? 1 : -1;
+ }
+ if (bMiss) {
+ return naPosition === "last" ? -1 : 1;
+ }
+ const cmp = compareNonNull(av, bv, asc);
+ if (cmp !== 0) {
+ return cmp;
+ }
+ }
+ return 0;
+ });
+
+ const result = df.iloc(perm);
+
+ if (ignoreIndex) {
+ const colsObj: Record = {};
+ for (const c of result.columns.values) {
+ colsObj[c] = [...result.col(c).values];
+ }
+ return DataFrame.fromColumns(colsObj);
+ }
+
+ return result;
+}
+
+// ─── sortIndexDataFrame ───────────────────────────────────────────────────────
+
+/**
+ * Sort a DataFrame by its row index labels (axis=0) or column names (axis=1).
+ *
+ * Mirrors `pandas.DataFrame.sort_index()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, sortIndexDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns(
+ * { x: [1, 2, 3] },
+ * { index: ["b", "a", "c"] },
+ * );
+ * sortIndexDataFrame(df);
+ * // rows reordered: "a" (row 1), "b" (row 0), "c" (row 2)
+ *
+ * const df2 = DataFrame.fromColumns({ z: [1], a: [2], m: [3] });
+ * sortIndexDataFrame(df2, { axis: 1 });
+ * // columns in alphabetical order: "a", "m", "z"
+ * ```
+ */
+export function sortIndexDataFrame(df: DataFrame, options?: SortIndexDataFrameOptions): DataFrame {
+ const ascending = options?.ascending ?? true;
+ const axis = options?.axis ?? 0;
+ const naPosition = options?.naPosition ?? "last";
+ const ignoreIndex = options?.ignoreIndex ?? false;
+
+ if (axis === 1) {
+ // Sort columns by their names.
+ const colLabels = df.columns.values;
+ const perm = argsortWithNa(colLabels, ascending, naPosition);
+ const sortedColNames = perm.map((i) => colLabels[i] as string);
+ return df.select(sortedColNames);
+ }
+
+ // Sort rows by index labels.
+ const labels = df.index.values;
+ const perm = argsortWithNa(labels, ascending, naPosition);
+
+ const result = df.iloc(perm);
+
+ if (ignoreIndex) {
+ const colsObj: Record = {};
+ for (const c of result.columns.values) {
+ colsObj[c] = [...result.col(c).values];
+ }
+ return DataFrame.fromColumns(colsObj);
+ }
+
+ return result;
+}
diff --git a/src/stats/str_findall.ts b/src/stats/str_findall.ts
new file mode 100644
index 00000000..b2c3dcb8
--- /dev/null
+++ b/src/stats/str_findall.ts
@@ -0,0 +1,353 @@
+/**
+ * str_findall — findall, findFirst, and findallCount for Series strings.
+ *
+ * Mirrors `pandas.Series.str.findall(pat)` and related helpers:
+ *
+ * - `strFindall` — all non-overlapping regex matches per element
+ * - `strFindallCount` — count of matches per element
+ * - `strFindFirst` — first match per element (or null if none)
+ * - `strFindallExpand`— expand first N capture groups into a DataFrame
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+import type { StrInput } from "./string_ops.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function toStr(v: Scalar): string | null {
+ if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+ return null;
+ }
+ return String(v);
+}
+
+function toInputStrings(input: StrInput): string[] {
+ if (typeof input === "string") {
+ return [input];
+ }
+ if (input instanceof Series) {
+ return input.values.map((v) => toStr(v) ?? "");
+ }
+ return (input as readonly Scalar[]).map((v) => toStr(v) ?? "");
+}
+
+function buildResult(data: Scalar[], input: StrInput): Series {
+ if (input instanceof Series) {
+ return new Series({ data, index: input.index });
+ }
+ return new Series({ data });
+}
+
+/** Build a global RegExp from a pattern, optionally with flags. */
+function makeGlobal(pat: string | RegExp, flags?: string): RegExp {
+ if (pat instanceof RegExp) {
+ const f = pat.flags.includes("g") ? pat.flags : `${pat.flags}g`;
+ return new RegExp(pat.source, f);
+ }
+ const f = `${flags ?? ""}g`.replace(/g{2,}/, "g");
+ return new RegExp(pat, f);
+}
+
+/** Extract named capture-group identifiers from a regex source pattern. */
+function extractNamedGroupNames(source: string): string[] {
+ const names: string[] = [];
+ const re = /\(\?<([A-Za-z_]\w*)>/g;
+ for (;;) {
+ const m = re.exec(source);
+ if (m === null) {
+ break;
+ }
+ const name = m[1];
+ if (name !== undefined) {
+ names.push(name);
+ }
+ }
+ return names;
+}
+
+// ─── strFindall ───────────────────────────────────────────────────────────────
+
+/**
+ * Find all non-overlapping regex matches in each element.
+ *
+ * Mirrors `pandas.Series.str.findall(pat, flags=0)`.
+ *
+ * Each element in the returned Series contains a `string[]` of matches
+ * (the full match if no capture groups; the single capture group string if
+ * exactly one group is present; a `string[]` per match if multiple groups).
+ * Null/NaN elements produce `null`.
+ *
+ * The `string[]` value is stored as a JSON-serialized string for compatibility
+ * with `Scalar`. Use `JSON.parse` to recover the array.
+ *
+ * @param input - Series, array, or scalar string.
+ * @param pat - Regular expression pattern (string or RegExp).
+ * @param flags - Regex flags (only used when `pat` is a string).
+ * @returns A `Series` where each value is a JSON string of `string[]`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["one two three", "four five"] });
+ * const result = strFindall(s, /\w+/);
+ * // Series [
+ * // '["one","two","three"]',
+ * // '["four","five"]',
+ * // ]
+ * JSON.parse(result.values[0] as string); // ["one", "two", "three"]
+ * ```
+ */
+export function strFindall(input: StrInput, pat: string | RegExp, flags?: string): Series {
+ const strs = toInputStrings(input);
+ const re = makeGlobal(pat, flags);
+
+ const data: Scalar[] = strs.map((s, i) => {
+ // null/NaN elements: check original value
+ const orig =
+ input instanceof Series
+ ? input.values[i]
+ : typeof input === "string"
+ ? input
+ : (input as readonly Scalar[])[i];
+ if (orig === null || orig === undefined || (typeof orig === "number" && Number.isNaN(orig))) {
+ return null;
+ }
+
+ re.lastIndex = 0;
+ const matches: string[] = [];
+ for (;;) {
+ const m = re.exec(s);
+ if (m === null) {
+ break;
+ }
+ // If there are capture groups, use the first group (pandas behaviour).
+ matches.push(m.length > 1 ? (m[1] ?? "") : (m[0] ?? ""));
+ }
+ return JSON.stringify(matches);
+ });
+
+ return buildResult(data, input);
+}
+
+// ─── strFindallCount ──────────────────────────────────────────────────────────
+
+/**
+ * Count all non-overlapping regex matches in each element.
+ *
+ * This is equivalent to `strFindall(s, pat).map(x => JSON.parse(x).length)`
+ * but more efficient since it avoids allocating match arrays.
+ *
+ * @param input - Series, array, or scalar string.
+ * @param pat - Regular expression pattern.
+ * @param flags - Regex flags (only when `pat` is a string).
+ * @returns A `Series` of integer counts. Null elements return `null`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["aaa", "bb", "c"] });
+ * strFindallCount(s, /a+/);
+ * // Series [1, 0, 0]
+ * ```
+ */
+export function strFindallCount(
+ input: StrInput,
+ pat: string | RegExp,
+ flags?: string,
+): Series {
+ const strs = toInputStrings(input);
+ const re = makeGlobal(pat, flags);
+
+ const data: Scalar[] = strs.map((s, i) => {
+ const orig =
+ input instanceof Series
+ ? input.values[i]
+ : typeof input === "string"
+ ? input
+ : (input as readonly Scalar[])[i];
+ if (orig === null || orig === undefined || (typeof orig === "number" && Number.isNaN(orig))) {
+ return null;
+ }
+
+ re.lastIndex = 0;
+ let count = 0;
+ for (;;) {
+ const m = re.exec(s);
+ if (m === null) {
+ break;
+ }
+ count++;
+ }
+ return count;
+ });
+
+ return buildResult(data, input);
+}
+
+// ─── strFindFirst ─────────────────────────────────────────────────────────────
+
+/**
+ * Return the first regex match in each element, or `null` if there is none.
+ *
+ * If the pattern has capture groups, returns the first capture group's value
+ * (mirrors pandas behaviour for single-group patterns).
+ *
+ * @param input - Series, array, or scalar string.
+ * @param pat - Regular expression pattern.
+ * @param flags - Regex flags (only when `pat` is a string).
+ * @returns A `Series` of strings (first match) or `null`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["price: $10.99", "no price", "cost: $5.00"] });
+ * strFindFirst(s, /\$[\d.]+/);
+ * // Series ["$10.99", null, "$5.00"]
+ * ```
+ */
+export function strFindFirst(
+ input: StrInput,
+ pat: string | RegExp,
+ flags?: string,
+): Series {
+ const strs = toInputStrings(input);
+ const source = pat instanceof RegExp ? pat.source : pat;
+ const baseFlags = pat instanceof RegExp ? pat.flags.replace("g", "") : (flags ?? "");
+ const re = new RegExp(source, baseFlags);
+
+ const data: Scalar[] = strs.map((s, i) => {
+ const orig =
+ input instanceof Series
+ ? input.values[i]
+ : typeof input === "string"
+ ? input
+ : (input as readonly Scalar[])[i];
+ if (orig === null || orig === undefined || (typeof orig === "number" && Number.isNaN(orig))) {
+ return null;
+ }
+
+ const m = re.exec(s);
+ if (m === null) {
+ return null;
+ }
+ return m.length > 1 ? (m[1] ?? null) : (m[0] ?? null);
+ });
+
+ return buildResult(data, input);
+}
+
+// ─── strFindallExpand ─────────────────────────────────────────────────────────
+
+/**
+ * Extract capture groups from the **first** match of each element into a
+ * DataFrame, one column per capture group.
+ *
+ * This is a simplified variant of `str.extract(pat, expand=True)` limited
+ * to named or positional capture groups in the pattern.
+ *
+ * Column names are taken from named capture groups (`(?...)`) where
+ * present; otherwise numbered as `"0"`, `"1"`, etc.
+ *
+ * @param input - Series or string array.
+ * @param pat - Regular expression with capture groups.
+ * @param flags - Regex flags (only when `pat` is a string).
+ * @returns A `DataFrame` with one row per input element and one column per
+ * capture group. Non-matching elements produce `null` in all columns.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["John 30", "Jane 25", "unknown"] });
+ * strFindallExpand(s, /(?\w+)\s+(?\d+)/);
+ * // DataFrame
+ * // name age
+ * // 0 John 30
+ * // 1 Jane 25
+ * // 2 null null
+ * ```
+ */
+export function strFindallExpand(
+ input: readonly string[] | Series,
+ pat: string | RegExp,
+ flags?: string,
+): DataFrame {
+ const source = pat instanceof RegExp ? pat.source : pat;
+ const baseFlags = pat instanceof RegExp ? pat.flags.replace("g", "") : (flags ?? "");
+ const re = new RegExp(source, baseFlags);
+
+ const strs = toInputStrings(input);
+
+ // Determine group names from pattern source.
+ const namedKeys = extractNamedGroupNames(source);
+
+ // Determine number of capture groups from source
+ // Count open parens that aren't non-capturing groups (?:
+ let groupCount = 0;
+ for (let i = 0; i < source.length; i++) {
+ if (source[i] === "(" && source[i + 1] !== "?" && source[i + 1] !== "*") {
+ groupCount++;
+ } else if (
+ source[i] === "(" &&
+ source[i + 1] === "?" &&
+ source[i + 2] !== ":" &&
+ source[i + 2] !== "=" &&
+ source[i + 2] !== "!" &&
+ source[i + 2] !== "<" // negative look-behind uses (? 0 ? namedKeys.length : Math.max(groupCount, 1);
+ const colNames: string[] =
+ namedKeys.length > 0 ? namedKeys : Array.from({ length: colCount }, (_, k) => String(k));
+
+ const columns: Record