diff --git a/playground/apply.html b/playground/apply.html
new file mode 100644
index 00000000..41fc7a65
--- /dev/null
+++ b/playground/apply.html
@@ -0,0 +1,186 @@
+
+
+
+
+
+ tsb — apply / map
+
+
+
+ tsb — apply / map
+
+ Apply functions element-wise or per-column/row.
+ applySeries maps a function over each element.
+ mapSeries supports function, Map, or plain-object lookup.
+ applyDataFrame reduces each column or row to a scalar.
+ applyExpandDataFrame transforms each column/row into a new Series.
+ mapDataFrame applies a function element-wise across the whole DataFrame.
+
+
+ Core concept
+ // Element-wise apply on a Series
+applySeries(s, (v) => Math.sqrt(v as number))
+
+// Map via lookup table
+mapSeries(s, { a: 1, b: 2, c: 3 })
+
+// Reduce each column to a scalar
+applyDataFrame(df, (col) => col.values.reduce((a, b) => a + b, 0))
+
+// Transform each column, return a DataFrame
+applyExpandDataFrame(df, (col) => new Series({ data: col.values.map(v => v * 2), index: col.index }))
+
+// Element-wise map on a DataFrame
+mapDataFrame(df, (v) => (v as number) ** 2)
+
+
+ pandas equivalent:
+ s.apply(func) / s.map(func_or_dict)
+ df.apply(func, axis=0) / df.applymap(func) (now df.map(func))
+
+
+
+ Demo 1 — applySeries element-wise
+
+
Code
+
const s = new Series({ data: [1, 4, 9, 16] });
+applySeries(s, (v) => Math.sqrt(v as number)).values;
+// → [1, 2, 3, 4]
+
Run
+
+
+
+
+ Demo 2 — mapSeries with object lookup
+
+
Code
+
const s = new Series({ data: ["a", "b", "c", "d"] });
+mapSeries(s, { a: 1, b: 2, c: 3 }).values;
+// → [1, 2, 3, null] ("d" not in lookup → null)
+
Run
+
+
+
+
+ Demo 3 — applyDataFrame: sum of each column (axis=0)
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [10, 20, 30] });
+applyDataFrame(df, (col) =>
+ (col.values as number[]).reduce((acc, v) => acc + v, 0)
+).values;
+// → [6, 60] (indexed by column names)
+
Run
+
+
+
+
+ Demo 4 — applyDataFrame: sum of each row (axis=1)
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+applyDataFrame(df, (row) =>
+ (row.values as number[]).reduce((acc, v) => acc + v, 0),
+ { axis: 1 }
+).values;
+// → [5, 7, 9]
+
Run
+
+
+
+
+ Demo 5 — applyExpandDataFrame: double each column
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+applyExpandDataFrame(df, (col) =>
+ new Series({ data: (col.values as number[]).map(v => v * 2), index: col.index })
+);
+// a: [2, 4, 6] b: [8, 10, 12]
+
Run
+
+
+
+
+ Demo 6 — mapDataFrame: element-wise square
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+mapDataFrame(df, (v) => (v as number) ** 2);
+// a: [1, 4, 9] b: [16, 25, 36]
+
Run
+
+
+
+
+
+
diff --git a/playground/astype.html b/playground/astype.html
new file mode 100644
index 00000000..efd9e5ed
--- /dev/null
+++ b/playground/astype.html
@@ -0,0 +1,438 @@
+
+
+
+
+
+ tsb — astype
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← tsb playground
+ astype — dtype coercion
+
+ Cast Series and DataFrame values to a different dtype.
+ Mirrors pandas.Series.astype and pandas.DataFrame.astype.
+
+
+
+
+
1 · Series — float to int64
+
+ Cast floating-point values to integers via truncation (same as
+ pandas.Series.astype("int64")).
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Series — numbers to string
+
Convert every value to its string representation. Null/undefined values
+ become null (not the string "null").
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Overflow clamping for bounded integer dtypes
+
+ Values that overflow the target integer dtype's range are clamped to
+ [min, max] — e.g. uint8 is clamped to
+ [0, 255].
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame — cast all columns
+
Pass a single dtype name to cast every column to the same type.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame — per-column dtype mapping
+
Pass a Record<string, DtypeName> to cast individual
+ columns. Columns not listed are carried over unchanged.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Casting to bool
+
Zero, empty string, and NaN become false;
+ everything else (including non-zero numbers and non-empty strings)
+ becomes true.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Series cast
+astypeSeries(
+ series: Series,
+ dtype: DtypeName | Dtype,
+ options?: AstypeOptions,
+): Series
+
+// DataFrame cast (all columns or per-column mapping)
+astype(
+ df: DataFrame,
+ dtype: DtypeName | Dtype | Record<string, DtypeName | Dtype>,
+ options?: DataFrameAstypeOptions,
+): DataFrame
+
+// Low-level scalar cast
+castScalar(value: Scalar, dtype: Dtype): Scalar
+
+// Options
+interface AstypeOptions {
+ errors?: "raise" | "ignore"; // default "raise"
+}
+
+// Supported dtype names
+type DtypeName =
+ | "int8" | "int16" | "int32" | "int64"
+ | "uint8" | "uint16" | "uint32" | "uint64"
+ | "float32" | "float64"
+ | "bool" | "string" | "object"
+ | "datetime" | "timedelta" | "category"
+
+
+
+
+
+
diff --git a/playground/clip_advanced.html b/playground/clip_advanced.html
new file mode 100644
index 00000000..eb200294
--- /dev/null
+++ b/playground/clip_advanced.html
@@ -0,0 +1,163 @@
+
+
+
+
+
+ tsb — clip_advanced (per-element clipping)
+
+
+
+ tsb — clip_advanced (per-element clipping)
+
+ Clip Series and DataFrame values to per-element bounds.
+ Unlike the simple scalar clip, clipAdvancedSeries and
+ clipAdvancedDataFrame support array, Series, and DataFrame bounds —
+ enabling per-position or element-wise bound specification.
+
+
+ Core concept
+ // Scalar bounds (like pandas s.clip(lower=0, upper=5))
+clipAdvancedSeries(s, { lower: 0, upper: 5 })
+
+// Per-element array bounds
+clipAdvancedSeries(s, { lower: [1, 2, 3], upper: [4, 5, 6] })
+
+// Series bounds (positional alignment)
+clipAdvancedSeries(s, { lower: loSeries, upper: hiSeries })
+
+// DataFrame element-wise bounds
+clipAdvancedDataFrame(df, { lower: loDf, upper: hiDf })
+
+// Series broadcast on DataFrame (axis=0: one bound per column; axis=1: one per row)
+clipAdvancedDataFrame(df, { lower: loSeries, axis: 1 })
+
+
+ pandas equivalent:
+ s.clip(lower=lo_array, upper=hi_array)
+ df.clip(lower=lo_df, upper=hi_df)
+
+
+
+ Demo 1 — clipAdvancedSeries with scalar bounds
+
+
Code
+
const s = new Series({ data: [-3, 1, 5, 10] });
+clipAdvancedSeries(s, { lower: 0, upper: 6 }).values;
+// → [0, 1, 5, 6]
+
Run
+
+
+
+
+ Demo 2 — clipAdvancedSeries with per-element array bounds
+
+
Code
+
const s = new Series({ data: [-1, 0, 5, 12] });
+const lo = [2, -1, 4, 10];
+const hi = [5, 3, 8, 11];
+clipAdvancedSeries(s, { lower: lo, upper: hi }).values;
+// → [2, 0, 5, 11]
+
Run
+
+
+
+
+ Demo 3 — clipAdvancedSeries with Series bounds
+
+
Code
+
const s = new Series({ data: [0, 5, 10, 15] });
+const loBound = new Series({ data: [1, 3, 8, 12] });
+const hiBound = new Series({ data: [2, 7, 9, 20] });
+clipAdvancedSeries(s, { lower: loBound, upper: hiBound }).values;
+// → [1, 5, 9, 15]
+
Run
+
+
+
+
+ Demo 4 — clipAdvancedDataFrame with DataFrame bounds
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const lo = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+const hi = DataFrame.fromColumns({ a: [3, 7, 8], b: [5, 9, 12] });
+const result = clipAdvancedDataFrame(df, { lower: lo, upper: hi });
+result.col("a").values; // → [2, 5, 8]
+result.col("b").values; // → [2, 6, 10]
+
Run
+
+
+
+
+ Demo 5 — clipAdvancedDataFrame with Series broadcast (axis=1)
+
+
Code
+
// axis=1: one lower bound per row
+const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const loPerRow = new Series({ data: [0, 4, 10] });
+const result = clipAdvancedDataFrame(df, { lower: loPerRow, axis: 1 });
+result.col("a").values; // → [1, 5, 10]
+result.col("b").values; // → [2, 6, 10]
+
Run
+
+
+
+
+
+
diff --git a/playground/crosstab.html b/playground/crosstab.html
new file mode 100644
index 00000000..82adadcc
--- /dev/null
+++ b/playground/crosstab.html
@@ -0,0 +1,242 @@
+
+
+
+
+
+ tsb — crosstab
+
+
+
+ tsb — crosstab
+
+ Cross-tabulation computes a frequency table of two (or more) categorical
+ variables. It mirrors pandas.crosstab .
+
+ ← back to index
+
+
+ 1 · Basic frequency count
+
+
Code
+
import { crosstab, Series } from "tsb";
+
+const a = new Series({ data: ["foo","foo","bar","bar","foo"], name: "A" });
+const b = new Series({ data: ["one","two","one","two","one"], name: "B" });
+const ct = crosstab(a, b);
+// rows = unique A values, cols = unique B values, cells = counts
+
Result
+
+
+
+
+ 2 · Margins (row / column totals)
+
+
Code
+
const ct = crosstab(a, b, { margins: true, margins_name: "All" });
+
Result
+
+
+
+
+ 3 · Normalize — proportions over all cells
+
+
Code
+
const ct = crosstab(a, b, { normalize: "all" });
+
Result
+
+
+
+
+ 4 · Normalize — row proportions (normalize="index")
+
+
Code
+
const ct = crosstab(a, b, { normalize: "index" });
+
Result
+
+
+
+
+ 5 · Normalize — column proportions (normalize="columns")
+
+
Code
+
const ct = crosstab(a, b, { normalize: "columns" });
+
Result
+
+
+
+
+ 6 · Values + aggfunc (aggregated cross-tab)
+
+
Code
+
const sales = new Series({ data: [10, 20, 5, 15, 30], name: "sales" });
+const ct = crosstab(a, b, { values: sales, aggfunc: "sum" });
+
Result
+
+
+
+
+ 7 · dropna — exclude / include missing values
+
+
Code
+
const x = ["a", null, "b", "a"];
+const y = ["x", "y", "x", "y"];
+const withDropna = crosstabSeries(x, y, { dropna: true }); // default
+const withoutDrop = crosstabSeries(x, y, { dropna: false });
+
dropna=true
+
+
dropna=false
+
+
+
+
+ 8 · Plain array inputs (crosstabSeries)
+
+
Code
+
import { crosstabSeries } from "tsb";
+const ct = crosstabSeries(
+ ["cat","dog","cat","dog","fish"],
+ ["male","male","female","female","female"],
+);
+
Result
+
+
+
+
+
+
diff --git a/playground/cut.html b/playground/cut.html
new file mode 100644
index 00000000..24ee65a1
--- /dev/null
+++ b/playground/cut.html
@@ -0,0 +1,343 @@
+
+
+
+
+
+ tsb — cut / qcut
+
+
+
+ tsb — cut / qcut
+
+ Bin continuous numeric data into discrete intervals.
+ cut uses equal-width (or user-defined) bins;
+ qcut uses equal-frequency (quantile-based) bins.
+ Both return a Series<string | null> of bin labels.
+
+
+ Core concept
+ // Equal-width bins
+cut(s, 4) // 4 bins of equal width
+cut(s, [0, 10, 50, 100]) // explicit edges
+
+// Equal-frequency bins (quartiles)
+qcut(s, 4) // 4 bins, each with ~25% of data
+qcut(s, [0, 0.25, 0.5, 0.75, 1]) // explicit quantile levels
+
+// Custom labels
+cut(s, 3, { labels: ["low", "mid", "high"] })
+
+// Return bin edges too
+const [binned, edges] = cut(s, 3, { retbins: true })
+
+// Integer bin codes
+cutCodes(s, 4) // → Series of 0, 1, 2, 3 integers
+
+
+ pandas equivalent:
+ pd.cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False)
+ pd.qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise')
+
+
+
+ Demo 1 — cut: equal-width bins
+
+
Code
+
const s = new Series({ data: [1, 7, 5, 4, 2, 3, 8, 6], name: "score" });
+cut(s, 4).values;
+// Each value assigned to one of 4 equal-width bins
+
Run
+
+
+
+
+ Demo 2 — cut: explicit bin edges
+
+
Code
+
const s = new Series({ data: [15, 35, 55, 75, 95] });
+cut(s, [0, 25, 50, 75, 100]).values;
+// → ["(0, 25]", "(25, 50]", "(50, 75]", "(75, 100]", "(75, 100]"]
+
Run
+
+
+
+
+ Demo 3 — cut: custom labels
+
+
Code
+
const grades = new Series({ data: [45, 62, 78, 91, 55] });
+cut(grades, [0, 60, 70, 80, 100], { labels: ["F", "D", "C", "B/A"] }).values;
+// → grade letter for each score
+
Run
+
+
+
+
+ Demo 4 — qcut: quartile bins
+
+
Code
+
const s = new Series({ data: [3, 1, 7, 2, 9, 4, 6, 8, 5, 10] });
+qcut(s, 4).values;
+// Equal-frequency quartile bins — each bin contains ~25% of values
+
Run
+
+
+
+
+ Demo 5 — retbins: get bin edges back
+
+
Code
+
const s = new Series({ data: [10, 30, 50, 70, 90] });
+const [binned, edges] = cut(s, 3, { retbins: true });
+// edges: the computed bin boundaries
+
Run
+
+
+
+
+ Demo 6 — cutCodes: integer bin codes
+
+
Code
+
const s = new Series({ data: [5, 15, 25, 35, 45] });
+cutCodes(s, [0, 10, 20, 30, 40, 50]).values;
+// → [0, 1, 2, 3, 4] (integer bin indices)
+
Run
+
+
+
+
+ Demo 7 — null / NaN handling
+
+
Code
+
const s = new Series({ data: [1, null, NaN, 5, 10] });
+cut(s, 3).values;
+// null and NaN stay as null in the output
+
Run
+
+
+
+
+ Demo 8 — qcut: handling duplicate edges with ties
+
+
Code
+
const s = new Series({ data: [1, 1, 1, 2, 3, 4, 5, 5, 5] });
+qcut(s, 4, { duplicates: "drop" }).values;
+// Ties cause duplicate quantile boundaries — "drop" removes them
+
Run
+
+
+
+
+
+
diff --git a/playground/diff_shift.html b/playground/diff_shift.html
new file mode 100644
index 00000000..3a300fbf
--- /dev/null
+++ b/playground/diff_shift.html
@@ -0,0 +1,443 @@
+
+
+
+
+
+ tsb — diff & shift (discrete difference and value shifting)
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← Back to playground index
+
+ diff & shift — discrete difference and value shifting
+
+ diffSeries / diffDataFrame compute the element-wise discrete
+ difference (value[i] - value[i-periods]).
+ shiftSeries / shiftDataFrame shift values forward or backward
+ by a given number of periods, filling with a configurable value.
+ Mirrors Series.diff(), Series.shift(),
+ DataFrame.diff(), and DataFrame.shift() from pandas.
+
+
+
+
+
1 · Series diff — first discrete difference
+
+ Compute s[i] - s[i - periods] for each position.
+ The first periods entries are null.
+ Non-numeric values produce null.
+
+
+
+
+
+
Press ▶ Run to execute
+
+
💡 Tip: diffSeries is commonly used to compute returns, velocity, or changes over time.
+
+
+
+
+
2 · Series shift — lag and lead values
+
+ Shift values forward (positive periods) or backward (negative periods).
+ Vacated positions are filled with fillValue (default null).
+
+
+
+
+
+
Press ▶ Run to execute
+
+
💡 Tip: combine shiftSeries with arithmetic to compute returns, lags, or leads.
+
+
+
+
+
3 · DataFrame diff — column-wise and row-wise
+
+ axis=0 (default): diff each column independently (rows over time).
+ axis=1: diff across columns within each row.
+
+
+
+
+
+
Press ▶ Run to execute
+
+
+
+
+
+
4 · DataFrame shift — lagging a DataFrame
+
+ Shift all columns by the same number of periods.
+ Useful for creating lagged features in machine learning.
+
+
+
+
+
+
Press ▶ Run to execute
+
+
💡 Tip: creating multiple lagged columns is a common feature-engineering technique for time series forecasting.
+
+
+
+
+
API Reference
+
// Discrete difference
+diffSeries(series: Series<Scalar>, options?: DiffOptions): Series<Scalar>
+diffDataFrame(df: DataFrame, options?: DataFrameDiffOptions): DataFrame
+
+interface DiffOptions {
+ periods?: number; // default 1; negative = look forward
+}
+interface DataFrameDiffOptions extends DiffOptions {
+ axis?: 0 | 1 | "index" | "columns"; // default 0
+}
+
+// Value shifting
+shiftSeries(series: Series<Scalar>, options?: ShiftOptions): Series<Scalar>
+shiftDataFrame(df: DataFrame, options?: DataFrameShiftOptions): DataFrame
+
+interface ShiftOptions {
+ periods?: number; // default 1; negative = shift backward
+ fillValue?: Scalar; // default null
+}
+interface DataFrameShiftOptions extends ShiftOptions {
+ axis?: 0 | 1 | "index" | "columns"; // default 0
+}
+
+
+
+
+
+
+
diff --git a/playground/duplicated.html b/playground/duplicated.html
new file mode 100644
index 00000000..d562c003
--- /dev/null
+++ b/playground/duplicated.html
@@ -0,0 +1,198 @@
+
+
+
+
+
+ tsb — duplicated / drop_duplicates
+
+
+
+ tsb — duplicated / drop_duplicates
+
+ Detect and remove duplicate values or rows.
+ duplicatedSeries / duplicatedDataFrame return a boolean
+ Series marking which items are duplicates.
+ dropDuplicatesSeries / dropDuplicatesDataFrame return
+ a new object with duplicates removed.
+
+
+ Core concept
+ // keep="first" (default): mark later duplicates as true
+duplicatedSeries(s)
+
+// keep="last": mark earlier duplicates as true
+duplicatedSeries(s, { keep: "last" })
+
+// keep=false: mark ALL occurrences of any duplicate
+duplicatedSeries(s, { keep: false })
+
+
+ pandas equivalent:
+ s.duplicated(keep='first')
+ df.duplicated(subset=['a', 'b'], keep='first')
+ s.drop_duplicates() / df.drop_duplicates()
+
+
+
+ Demo 1 — duplicatedSeries with keep="first"
+
+
Code
+
const s = new Series({ data: [1, 2, 1, 3, 2] });
+duplicatedSeries(s).values;
+// → [false, false, true, false, true]
+
Run
+
+
+
+
+ Demo 2 — duplicatedSeries with keep=false (mark all)
+
+
Code
+
const s = new Series({ data: ["a", "b", "a", "c", "b"] });
+duplicatedSeries(s, { keep: false }).values;
+// → [true, true, true, false, true]
+
Run
+
+
+
+
+ Demo 3 — dropDuplicatesSeries
+
+
Code
+
const s = new Series({ data: [10, 20, 10, 30, 20], name: "prices" });
+dropDuplicatesSeries(s).values;
+// → [10, 20, 30]
+
Run
+
+
+
+
+ Demo 4 — duplicatedDataFrame with subset
+
+
Code
+
const df = DataFrame.fromRecords([
+ { name: "Alice", dept: "Eng" },
+ { name: "Bob", dept: "Eng" },
+ { name: "Alice", dept: "HR" },
+ { name: "Bob", dept: "Eng" }, // ← duplicate of row 1 on "name"+"dept"
+]);
+// Only consider "name" column for duplicates:
+duplicatedDataFrame(df, { subset: ["name"] }).values;
+// → [false, false, true, true] (Alice and Bob each appear twice)
+
Run
+
+
+
+
+ Demo 5 — dropDuplicatesDataFrame
+
+
Code
+
const df = DataFrame.fromRecords([
+ { a: 1, b: 2 },
+ { a: 1, b: 2 },
+ { a: 3, b: 4 },
+ { a: 3, b: 4 },
+]);
+const deduped = dropDuplicatesDataFrame(df);
+// shape: [2, 2]
+// a: [1, 3] b: [2, 4]
+
Run
+
+
+
+
+ Interactive editor
+
+
Edit and run:
+
+
Run
+
+
+
+
+
+
diff --git a/playground/get_dummies.html b/playground/get_dummies.html
new file mode 100644
index 00000000..efa302e9
--- /dev/null
+++ b/playground/get_dummies.html
@@ -0,0 +1,259 @@
+
+
+
+
+
+ tsb — getDummies / fromDummies
+
+
+
+ getDummies one-hot encoding
+ Convert categorical variables into binary indicator columns — mirrors pandas.get_dummies and pandas.from_dummies.
+
+
+
+
1. Basic Series → dummy DataFrame
+
+
+
Input Series
+
const s = new Series({
+ data: ["cat", "dog", "cat", "fish"],
+ name: "animal"
+});
+getDummies(s);
+
+
+
+
+
+
+
+
2. Custom prefix and separator
+
+
+
Code
+
getDummies(s, {
+ prefix: "pet",
+ prefixSep: "__"
+});
+
+
+
+
+
+
+
+
3. Drop first level (avoid multicollinearity)
+
+
+
Code
+
const s2 = new Series({
+ data: ["a","b","c","a"],
+ name: "x"
+});
+getDummies(s2, { dropFirst: true });
+
+
+
Columns (a dropped)
+
…
+
+
+
+
+
+
+
4. Include NaN indicator column
+
+
+
Code
+
const s3 = new Series({
+ data: ["a", null, "b", null],
+ name: "x"
+});
+getDummies(s3, { dummyNa: true });
+
+
+
Result (with x_nan column)
+
…
+
+
+
+
+
+
+
5. DataFrame — encode categorical columns automatically
+
+
+
Code
+
const df = DataFrame.fromColumns({
+ score: [90, 85, 72],
+ grade: ["A", "B", "C"],
+ pass: [true, true, false]
+});
+getDummies(df);
+
+
+
+
+
+
+
+
6. Encode only specified columns
+
+
+
Code
+
const df2 = DataFrame.fromColumns({
+ color: ["r","g","b"],
+ shape: ["sq","ci","sq"],
+ n: [1,2,3]
+});
+getDummies(df2, { columns: ["color"] });
+
+
+
+
+
+
+
+
7. fromDummies — reverse one-hot encoding
+
+
+
Code
+
const original = new Series({
+ data: ["cat","dog","cat","fish"],
+ name: "pet"
+});
+const dummies = getDummies(original);
+const recovered = fromDummies(dummies, { sep: "_" });
+
+
+
+
+
+
+
+
+
+
+
diff --git a/playground/idxmin_idxmax.html b/playground/idxmin_idxmax.html
new file mode 100644
index 00000000..b771dd36
--- /dev/null
+++ b/playground/idxmin_idxmax.html
@@ -0,0 +1,439 @@
+
+
+
+
+
+ tsb — idxmin / idxmax
+
+
+
+
+
+
Loading TypeScript compiler…
+
+
+ ← tsb playground
+ idxmin / idxmax
+
+ Return the index label of the minimum or maximum value in a
+ Series or each column of a DataFrame.
+ Mirrors pandas.Series.idxmin(), idxmax(),
+ pandas.DataFrame.idxmin(), and DataFrame.idxmax().
+
+
+
+
+
1 · Series.idxmin — label of the minimum value
+
Returns the index label at the position of the minimum value.
+ NaN / null values are skipped by default.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Series.idxmax — label of the maximum value
+
Returns the index label at the position of the maximum value.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · NaN handling — skipna option
+
By default NaN / null values are skipped. Set skipna: false
+ to propagate NaN (returns null if any value is NaN).
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame.idxmin — row label of column minima
+
Returns a Series indexed by column names. Each value is the row label
+ where that column achieves its minimum.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame.idxmax — row label of column maxima
+
Returns a Series indexed by column names, where each entry is the row
+ label of that column's maximum value.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Edge cases — empty, all-NaN, all-equal
+
Behavior for empty series, series where every value is NaN, and series
+ where all values are equal.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Series
+idxminSeries(series, { skipna?: boolean }): Label // default skipna=true
+idxmaxSeries(series, { skipna?: boolean }): Label
+
+// DataFrame (axis=0 — min/max per column)
+idxminDataFrame(df, { skipna?: boolean }): Series // indexed by column names
+idxmaxDataFrame(df, { skipna?: boolean }): Series
+
+
+
+
+
+
diff --git a/playground/index.html b/playground/index.html
index 48bfbcb9..5074e12b 100644
--- a/playground/index.html
+++ b/playground/index.html
@@ -229,6 +229,11 @@
+ isna / notna — detect missing values in scalars,
+ Series, and DataFrames.
+ ffill / bfill — propagate the last (or next) valid
+ value to fill gaps.
+ Mirrors pd.isna(), Series.ffill(), and
+ DataFrame.bfill() from pandas.
+
+
+
+
+
1 · isna / notna on scalars
+
+ Returns true / false for individual values.
+ null, undefined, and NaN are all
+ considered "missing".
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · isna on a Series
+
+ When passed a Series, isna returns a boolean Series of the
+ same length — true where values are missing.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · isna on a DataFrame
+
+ Returns a DataFrame of booleans with the same shape — one column per
+ original column, true where missing.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · Forward-fill (ffillSeries)
+
+ Propagates the last valid value forward to fill gaps. Leading
+ nulls that have no preceding value remain null.
+ Use the optional limit to cap consecutive fills.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · Backward-fill (bfillSeries)
+
+ Propagates the next valid value backward to fill gaps. Trailing
+ nulls that have no following value remain null.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · DataFrame forward-fill & backward-fill
+
+ dataFrameFfill and dataFrameBfill apply fill
+ column-wise by default (axis=0). Pass axis: 1 to fill
+ row-wise across columns.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series<boolean>
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series<boolean>
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...) // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+ limit?: number | null, // max consecutive fills (default: no limit)
+ axis?: 0 | 1 | "index" | "columns", // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+ limit?: number | null,
+ axis?: 0 | 1 | "index" | "columns",
+}): DataFrame
+
+
+
+
+
+
diff --git a/playground/pct_change.html b/playground/pct_change.html
new file mode 100644
index 00000000..3576797a
--- /dev/null
+++ b/playground/pct_change.html
@@ -0,0 +1,448 @@
+
+
+
+
+
+ tsb — pct_change
+
+
+
+
+
+
Initializing playground…
+
+ ← Back to roadmap
+ 📊 pct_change — Interactive Playground
+ Compute the fractional change between each element and a prior element.
+ Mirrors pandas.Series.pct_change() /
+ pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run
+ (or Ctrl+Enter) to execute it live in your browser.
+
+
+
+
+
1 · Basic pct_change on a Series
+
pctChangeSeries(series) returns the fractional (not percentage) change
+ from each previous element. The first element is always null.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Multi-period change
+
The periods option controls the lag. Use periods: 2 to
+ compare each value to the one two steps earlier — useful for month-over-month
+ comparisons in quarterly data.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Handling missing values
+
By default, pctChangeSeries forward-fills (fillMethod: "pad")
+ NaN/null values before computing the ratio — so gaps don't break the chain.
+ Set fillMethod: null to propagate NaN instead.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · Limit consecutive fills
+
The limit option caps how many consecutive NaN values get forward-filled.
+ Useful when you want to tolerate short gaps but not bridge large ones.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame column-wise pct_change
+
pctChangeDataFrame(df) applies pctChangeSeries to every
+ column independently. Ideal for comparing multiple assets or metrics simultaneously.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Negative periods (look-forward change)
+
A negative periods value computes the forward change: how much will
+ this element change by the time we reach |periods| steps ahead.
+ Useful for computing returns on a "hold for N periods" strategy.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
All functions return a new Series/DataFrame of the same shape — inputs are never mutated.
+
// Series
+pctChangeSeries(series, {
+ periods?: number, // default 1 (positive = look back, negative = look forward)
+ fillMethod?: "pad" | "bfill" | null, // default "pad"
+ limit?: number | null, // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+ periods?: number,
+ fillMethod?: "pad" | "bfill" | null,
+ limit?: number | null,
+ axis?: 0 | 1 | "index" | "columns", // default 0 (column-wise)
+}): DataFrame
+
+
+
+
+
+
diff --git a/playground/pivot_table.html b/playground/pivot_table.html
new file mode 100644
index 00000000..252b88ba
--- /dev/null
+++ b/playground/pivot_table.html
@@ -0,0 +1,253 @@
+
+
+
+
+
+ tsb — pivotTableFull
+
+
+
+ tsb — pivotTableFull
+
+ Pivot table with margins aggregates a DataFrame using row and column
+ groupings, and optionally adds "All" row/column totals.
+ It mirrors
+
+ pandas.pivot_table()
+ .
+
+ ← back to index
+
+
+ 1 · Basic pivot table — sum
+
+
Code
+
import { DataFrame, pivotTableFull } from "tsb";
+
+const df = DataFrame.fromColumns({
+ A: ["foo","foo","foo","bar","bar","bar"],
+ C: ["small","large","large","small","small","large"],
+ D: [1, 2, 2, 3, 3, 4],
+});
+
+pivotTableFull(df, { index: "A", columns: "C", values: "D", aggfunc: "sum" });
+
Result
+
+
+
+
+ 2 · Margins — row and column totals
+
+
Code
+
pivotTableFull(df, {
+ index: "A", columns: "C", values: "D",
+ aggfunc: "sum",
+ margins: true, // add "All" row and column
+ margins_name: "All", // default label
+});
+
Result
+
+
+
+
+ 3 · Custom margins label
+
+
Code
+
pivotTableFull(df, {
+ index: "A", columns: "C", values: "D",
+ aggfunc: "sum",
+ margins: true,
+ margins_name: "Total",
+});
+
Result
+
+
+
+
+ 4 · Margins with mean — uses raw data, not cell means
+
+
Code
+
pivotTableFull(df, {
+ index: "A", columns: "C", values: "D",
+ aggfunc: "mean",
+ margins: true,
+});
+
Result
+
+
+
+
+ 5 · Count with margins
+
+
Code
+
pivotTableFull(df, {
+ index: "A", columns: "C", values: "D",
+ aggfunc: "count",
+ margins: true,
+});
+
Result
+
+
+
+
+ 6 · fill_value — replace empty cells
+
+
Code
+
const sparse = DataFrame.fromColumns({
+ product: ["A","A","B","B"],
+ region: ["North","South","North","East"],
+ sales: [100, 200, 300, 150],
+});
+pivotTableFull(sparse, {
+ index: "product", columns: "region", values: "sales",
+ aggfunc: "sum",
+ fill_value: 0, // cells with no data become 0
+ margins: true,
+});
+
Result
+
+
+
+
+ 7 · sort=false — preserve insertion order
+
+
Code
+
pivotTableFull(df, {
+ index: "A", columns: "C", values: "D",
+ aggfunc: "sum",
+ sort: false, // keep rows/columns in first-seen order
+});
+
Result (insertion order)
+
+
+
+
+ 8 · Multiple index columns
+
+
Code
+
const df2 = DataFrame.fromColumns({
+ A: ["foo","foo","foo","bar","bar","bar"],
+ B: ["one","one","two","two","one","one"],
+ C: ["small","large","large","small","small","large"],
+ D: [1, 2, 2, 3, 3, 4],
+});
+pivotTableFull(df2, {
+ index: ["A","B"], columns: "C", values: "D",
+ aggfunc: "sum", margins: true,
+});
+
Result
+
+
+
+
+
+
diff --git a/playground/replace.html b/playground/replace.html
new file mode 100644
index 00000000..19da518a
--- /dev/null
+++ b/playground/replace.html
@@ -0,0 +1,408 @@
+
+
+
+
+
+ tsb — replace (value substitution)
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← Back to playground index
+
+ replace — value substitution
+
+ replaceSeries / replaceDataFrame substitute values
+ matching a pattern with a new value.
+ Supports scalar, array, and mapping (Record / Map) replacement specs.
+ Mirrors Series.replace() and DataFrame.replace() from pandas.
+
+
+
+
+
1 · Scalar → scalar replacement
+
+ Replace every occurrence of a single value with another value.
+ Works on numbers, strings, booleans, and null.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Array replacement
+
+ Replace a list of values with a single target, or perform pair-wise
+ replacement using two equal-length arrays.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Mapping (Record / Map) replacement
+
+ Pass a lookup table as either a plain object (Record<string, Scalar>)
+ or a JavaScript Map for full type flexibility.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame replacement
+
+ replaceDataFrame applies the same spec to all columns by
+ default. Use the columns option to restrict which columns
+ are affected.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Replace values in a Series
+replaceSeries(
+ series: Series,
+ spec: ReplaceSpec,
+ options?: ReplaceOptions,
+): Series
+
+// Replace values in a DataFrame
+replaceDataFrame(
+ df: DataFrame,
+ spec: ReplaceSpec,
+ options?: DataFrameReplaceOptions,
+): DataFrame
+
+// Replacement spec variants
+type ReplaceSpec =
+ | { toReplace: Scalar; value: Scalar } // scalar → scalar
+ | { toReplace: Scalar[]; value: Scalar } // array → scalar
+ | { toReplace: Scalar[]; value: Scalar[] } // array → array (pair-wise)
+ | { toReplace: Record<string, Scalar> } // Record mapping
+ | { toReplace: Map<Scalar, Scalar> } // Map mapping
+
+// Options
+interface ReplaceOptions {
+ matchNaN?: boolean; // treat NaN===NaN for matching (default: true)
+}
+
+interface DataFrameReplaceOptions extends ReplaceOptions {
+ columns?: string[]; // only replace in these columns (default: all)
+}
+
+
+
+
+
+
diff --git a/playground/sample.html b/playground/sample.html
new file mode 100644
index 00000000..d29ed43a
--- /dev/null
+++ b/playground/sample.html
@@ -0,0 +1,187 @@
+
+
+
+
+
+ tsb — sample
+
+
+
+ tsb — sample
+
+ Randomly sample items from a Series or rows/columns from a DataFrame.
+ Supports fixed count (n), fractional sampling (frac),
+ sampling with replacement (replace), weighted sampling, and
+ deterministic seeding via randomState.
+
+
+ Core concept
+ // Sample 3 items (without replacement by default)
+sampleSeries(s, { n: 3 })
+
+// Sample 50% of rows
+sampleDataFrame(df, { frac: 0.5 })
+
+// Reproducible sample with seed
+sampleSeries(s, { n: 2, randomState: 42 })
+
+// Sample with replacement (bootstrap)
+sampleSeries(s, { n: 10, replace: true })
+
+// Sample columns instead of rows
+sampleDataFrame(df, { n: 2, axis: 1 })
+
+
+ pandas equivalent:
+ s.sample(n=3, random_state=42)
+ df.sample(frac=0.5, replace=False, axis=0)
+
+
+
+ Demo 1 — sampleSeries (n)
+
+
Code
+
const s = new Series({ data: [10, 20, 30, 40, 50], name: "scores" });
+sampleSeries(s, { n: 3, randomState: 7 }).values;
+// deterministic result with seed 7
+
Run
+
+
+
+
+ Demo 2 — sampleSeries with frac
+
+
Code
+
const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+sampleSeries(s, { frac: 0.3, randomState: 42 }).values;
+// 30% of 10 items = 3 items
+
Run
+
+
+
+
+ Demo 3 — bootstrap sampling (replace=true)
+
+
Code
+
const s = new Series({ data: ["a", "b", "c"] });
+// Sample more items than pool size — only possible with replace=true
+sampleSeries(s, { n: 7, replace: true, randomState: 0 }).values;
+
Run
+
+
+
+
+ Demo 4 — weighted sampling
+
+
Code
+
const s = new Series({ data: ["rare", "common", "very_common"] });
+// "very_common" has 10× the weight of "rare"
+sampleSeries(s, { n: 1, weights: [1, 5, 10], randomState: 3 }).values;
+// most likely: ["very_common"]
+
Run
+
+
+
+
+ Demo 5 — sampleDataFrame (rows)
+
+
Code
+
const df = DataFrame.fromRecords([
+ { city: "NYC", pop: 8_336_817 },
+ { city: "LA", pop: 3_979_576 },
+ { city: "Chicago",pop: 2_693_976 },
+ { city: "Houston",pop: 2_320_268 },
+ { city: "Phoenix",pop: 1_680_992 },
+]);
+const sample = sampleDataFrame(df, { n: 3, randomState: 1 });
+sample.col("city").values;
+
Run
+
+
+
+
+ Interactive editor
+
+
Edit and run:
+
+
Run
+
+
+
+
+
+
diff --git a/playground/where_mask.html b/playground/where_mask.html
new file mode 100644
index 00000000..8e3bba6a
--- /dev/null
+++ b/playground/where_mask.html
@@ -0,0 +1,199 @@
+
+
+
+
+
+ tsb — where / mask
+
+
+
+ tsb — where / mask
+
+ Conditional value selection: keep or replace elements based on a boolean
+ condition. These are the TypeScript equivalents of
+ pandas.Series.where / pandas.DataFrame.where and
+ pandas.Series.mask / pandas.DataFrame.mask.
+
+
+ Core concept
+ // where: keep where cond=true, replace with `other` where cond=false
+whereSeries(s, cond, { other: null })
+
+// mask: replace where cond=true with `other`, keep where cond=false
+maskSeries(s, cond, { other: null })
+
+
+ pandas equivalent:
+ s.where(cond, other=np.nan)
+ s.mask(cond, other=np.nan)
+
+
+
+ Demo 1 — whereSeries with boolean array
+
+
Code
+
const s = new Series({ data: [10, 20, 30, 40, 50], name: "prices" });
+whereSeries(s, [true, false, true, false, true]);
+// → [10, null, 30, null, 50]
+
Run
+
+
+
+
+ Demo 2 — maskSeries with callable condition
+
+
Code
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+// Replace values > 3 with -1
+maskSeries(s, (v) => v > 3, { other: -1 });
+// → [1, 2, 3, -1, -1]
+
Run
+
+
+
+
+ Demo 3 — whereDataFrame with 2-D condition
+
+
Code
+
const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [4, 5, 6],
+});
+const cond = [[true, false], [false, true], [true, true]];
+whereDataFrame(df, cond);
+// a: [1, null, 3]
+// b: [null, 5, 6]
+
Run
+
+
+
+
+ Demo 4 — whereDataFrame with Series condition (axis=0)
+
+
Code
+
const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [10, 20, 30],
+});
+// Keep rows 0 and 2 only, replace row 1 across all columns
+const rowCond = new Series({ data: [true, false, true], index: [0, 1, 2] });
+whereDataFrame(df, rowCond, { axis: 0, other: 0 });
+// a: [1, 0, 3]
+// b: [10, 0, 30]
+
Run
+
+
+
+
+ Demo 5 — maskDataFrame with DataFrame condition
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const condDf = DataFrame.fromColumns({
+ a: [false, true, false],
+ b: [true, false, true],
+});
+maskDataFrame(df, condDf, { other: 99 });
+// a: [1, 99, 3]
+// b: [99, 5, 99]
+
Run
+
+
+
+
+ Demo 6 — Interactive editor
+
+
Edit and run
+
+
Run
+
+
+
+
+
+
diff --git a/src/core/astype.ts b/src/core/astype.ts
new file mode 100644
index 00000000..6a9403be
--- /dev/null
+++ b/src/core/astype.ts
@@ -0,0 +1,245 @@
+/**
+ * astype — dtype coercion for Series and DataFrame.
+ *
+ * Mirrors `pandas.Series.astype` and `pandas.DataFrame.astype`:
+ * cast values to a target dtype, with null/NaN passthrough semantics
+ * matching pandas' default `errors="raise"` behaviour.
+ *
+ * @module
+ */
+
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+import { Dtype } from "./dtype.ts";
+import type { DtypeName, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isNull(v: Scalar): v is null | undefined {
+ return v === null || v === undefined;
+}
+
+/** Integer clamp ranges for each integer dtype name. */
+const INT_RANGES: Readonly<
+ Record
+> = {
+ int8: { lo: -128, hi: 127, unsigned: false },
+ int16: { lo: -32768, hi: 32767, unsigned: false },
+ int32: { lo: -2147483648, hi: 2147483647, unsigned: false },
+ int64: { lo: Number.MIN_SAFE_INTEGER, hi: Number.MAX_SAFE_INTEGER, unsigned: false },
+ uint8: { lo: 0, hi: 255, unsigned: true },
+ uint16: { lo: 0, hi: 65535, unsigned: true },
+ uint32: { lo: 0, hi: 4294967295, unsigned: true },
+ uint64: { lo: 0, hi: Number.MAX_SAFE_INTEGER, unsigned: true },
+};
+
+/**
+ * Cast a single scalar value to the target dtype.
+ *
+ * Rules per dtype kind:
+ * - **int/uint**: `Math.trunc(Number(v))`, clamped to the dtype range. `null/undefined → null`.
+ * - **float32/float64**: `Number(v)`. `null/undefined → null`. Strings that
+ * are not parsable become `NaN` (same as pandas `errors="coerce"`-like
+ * number coercion).
+ * - **bool**: falsy values → `false`; truthy → `true`. `null/undefined → null`.
+ * - **string**: `String(v)`. `null/undefined → null`.
+ * - **datetime**: `new Date(Number(v))` for numbers; `new Date(String(v))` for
+ * strings; `null/undefined → null`.
+ * - **object/category/timedelta**: value is returned as-is (no transformation).
+ */
+export function castScalar(v: Scalar, dtype: Dtype): Scalar {
+ if (isNull(v)) {
+ return null;
+ }
+
+ const k = dtype.kind;
+
+ if (k === "int" || k === "uint") {
+ if (typeof v === "boolean") {
+ return v ? 1 : 0;
+ }
+ if (v instanceof Date) {
+ return Math.trunc(v.getTime());
+ }
+ const n = Number(v);
+ if (Number.isNaN(n)) {
+ return null;
+ }
+ const range = INT_RANGES[dtype.name];
+ if (range === undefined) {
+ return Math.trunc(n);
+ }
+ const t = Math.trunc(n);
+ return Math.max(range.lo, Math.min(range.hi, t));
+ }
+
+ if (k === "float") {
+ if (typeof v === "boolean") {
+ return v ? 1.0 : 0.0;
+ }
+ if (v instanceof Date) {
+ return v.getTime();
+ }
+ return Number(v);
+ }
+
+ if (k === "bool") {
+ if (typeof v === "number") {
+ return !Number.isNaN(v) && v !== 0;
+ }
+ if (v instanceof Date) {
+ return true;
+ }
+ return Boolean(v);
+ }
+
+ if (k === "string") {
+ if (v instanceof Date) {
+ return v.toISOString();
+ }
+ return String(v);
+ }
+
+ if (k === "datetime") {
+ if (v instanceof Date) {
+ return v;
+ }
+ if (typeof v === "number") {
+ return new Date(v);
+ }
+ const d = new Date(String(v));
+ return Number.isNaN(d.getTime()) ? null : d;
+ }
+
+ // object / category / timedelta — return unchanged
+ return v;
+}
+
+// ─── AstypeOptions ────────────────────────────────────────────────────────────
+
+/** Options accepted by {@link astypeSeries} and {@link astype}. */
+export interface AstypeOptions {
+ /**
+ * When `true`, values that cannot be cast are silently replaced with
+ * `null` instead of throwing.
+ *
+ * @default false
+ */
+ readonly errors?: "raise" | "ignore";
+}
+
+// ─── astypeSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Cast a Series to a different dtype.
+ *
+ * Returns a new Series whose values have been coerced to `dtype`. The index
+ * and name are preserved unchanged.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1.9, 2.1, 3.7], name: "x" });
+ * const si = astypeSeries(s, "int64");
+ * si.values; // [1, 2, 3]
+ * si.dtype.name; // "int64"
+ * ```
+ */
+export function astypeSeries(
+ s: Series,
+ dtype: DtypeName | Dtype,
+ options: AstypeOptions = {},
+): Series {
+ const targetDtype = dtype instanceof Dtype ? dtype : Dtype.from(dtype as DtypeName);
+ const { errors = "raise" } = options;
+
+ const casted: Scalar[] = [];
+ for (const v of s.values) {
+ let out: Scalar;
+ try {
+ out = castScalar(v, targetDtype);
+ } catch (e) {
+ if (errors === "ignore") {
+ out = v;
+ } else {
+ throw e;
+ }
+ }
+ casted.push(out);
+ }
+
+ return new Series({
+ data: casted,
+ index: s.index,
+ dtype: targetDtype,
+ name: s.name,
+ });
+}
+
+// ─── DataFrame astype ─────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link astype} (DataFrame variant).
+ */
+export interface DataFrameAstypeOptions extends AstypeOptions {
+ /**
+ * When `true`, only the columns listed in `dtype` (when `dtype` is a
+ * `Record`) are recast; other columns are carried over unchanged.
+ *
+ * When `false` (default) and `dtype` is a `Record`, columns not listed
+ * in the map are carried over unchanged (same behaviour).
+ *
+ * This option exists for pandas API compatibility.
+ */
+ readonly copy?: boolean;
+}
+
+/**
+ * Cast one or more columns in a DataFrame to the specified dtype(s).
+ *
+ * - Pass a single `DtypeName` or `Dtype` to cast **all** columns.
+ * - Pass a `Record` to cast individual columns.
+ * Columns not listed are returned unchanged.
+ *
+ * Returns a new DataFrame; the original is not modified.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: ["3", "4"] });
+ *
+ * // Cast all columns to float64
+ * astype(df, "float64");
+ *
+ * // Cast only column "b" to int64
+ * astype(df, { b: "int64" });
+ * ```
+ */
+export function astype(
+ df: DataFrame,
+ dtype:
+ | DtypeName
+ | Dtype
+ | Readonly>,
+ options: DataFrameAstypeOptions = {},
+): DataFrame {
+ const colMap = new Map>();
+
+ const isSingleDtype =
+ typeof dtype === "string" || dtype instanceof Dtype;
+
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ if (isSingleDtype) {
+ colMap.set(name, astypeSeries(col, dtype as DtypeName | Dtype, options));
+ } else {
+ const mapping = dtype as Readonly>;
+ const target = mapping[name];
+ if (target !== undefined) {
+ colMap.set(name, astypeSeries(col, target, options));
+ } else {
+ colMap.set(name, col);
+ }
+ }
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/core/index.ts b/src/core/index.ts
index ada43b65..b8513810 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -15,3 +15,7 @@ export { CategoricalAccessor } from "./cat_accessor.ts";
export type { CatSeriesLike } from "./cat_accessor.ts";
export { MultiIndex } from "./multi_index.ts";
export type { MultiIndexOptions } from "./multi_index.ts";
+export { astypeSeries, astype, castScalar } from "./astype.ts";
+export type { AstypeOptions, DataFrameAstypeOptions } from "./astype.ts";
+export { sampleSeries, sampleDataFrame } from "./sample.ts";
+export type { SampleOptions } from "./sample.ts";
diff --git a/src/core/sample.ts b/src/core/sample.ts
new file mode 100644
index 00000000..869ce7b8
--- /dev/null
+++ b/src/core/sample.ts
@@ -0,0 +1,334 @@
+/**
+ * sample — random sampling from Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.sample(n, frac, replace, weights, random_state, axis)`
+ * - `pandas.DataFrame.sample(n, frac, replace, weights, random_state, axis)`
+ *
+ * @module
+ */
+
+import { DataFrame } from "./frame.ts";
+import { Index } from "./base-index.ts";
+import { Series } from "./series.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link sampleSeries} and {@link sampleDataFrame}. */
+export interface SampleOptions {
+ /**
+ * Number of items to return. Mutually exclusive with `frac`.
+ * @defaultValue `1` (when neither `n` nor `frac` is provided)
+ */
+ readonly n?: number;
+ /**
+ * Fraction of items to return (e.g. `0.5` for 50%).
+ * Mutually exclusive with `n`.
+ */
+ readonly frac?: number;
+ /**
+ * Allow sampling with replacement (the same item may appear multiple times).
+ * @defaultValue `false`
+ */
+ readonly replace?: boolean;
+ /**
+ * Weights for each item. Must have the same length as the Series/DataFrame.
+ * Weights do not need to sum to 1 — they are normalized internally.
+ * Missing weights (null/undefined/NaN) are treated as 0.
+ */
+ readonly weights?: readonly (number | null | undefined)[];
+ /**
+ * Seed for the random number generator. When provided, sampling is
+ * deterministic (same seed + same data → same result).
+ * Uses a simple LCG (linear congruential generator).
+ */
+ readonly randomState?: number;
+ /**
+ * Axis to sample along (DataFrame only).
+ * - `0` or `"index"` (default): sample rows.
+ * - `1` or `"columns"`: sample columns.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── seeded RNG ───────────────────────────────────────────────────────────────
+
+/**
+ * Minimal LCG-based PRNG (Knuth constants).
+ * Returns a new seed and a float in [0, 1).
+ */
+function lcgNext(seed: number): [number, number] {
+ // LCG parameters (Numerical Recipes)
+ const a = 1664525;
+ const c = 1013904223;
+ const m = 2 ** 32;
+ const nextSeed = ((a * seed + c) >>> 0) % m;
+ return [nextSeed, nextSeed / m];
+}
+
+/** Build a seeded random float generator that returns [0,1). */
+function makeRng(seed: number | undefined): () => number {
+ if (seed === undefined) {
+ return () => Math.random();
+ }
+ let s = seed >>> 0; // ensure 32-bit unsigned
+ return () => {
+ const [ns, r] = lcgNext(s);
+ s = ns;
+ return r;
+ };
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Resolve how many items to sample from a pool of size `poolSize`. */
+function resolveN(poolSize: number, n: number | undefined, frac: number | undefined): number {
+ if (n !== undefined && frac !== undefined) {
+ throw new Error("Sample: specify either `n` or `frac`, not both.");
+ }
+ if (frac !== undefined) {
+ if (frac < 0) {
+ throw new RangeError("Sample: `frac` must be >= 0.");
+ }
+ return Math.floor(frac * poolSize);
+ }
+ if (n !== undefined) {
+ if (n < 0) {
+ throw new RangeError("Sample: `n` must be >= 0.");
+ }
+ return n;
+ }
+ return 1;
+}
+
+/** Normalize weights to probabilities summing to 1. */
+function normalizeWeights(
+ rawWeights: readonly (number | null | undefined)[],
+ poolSize: number,
+): number[] {
+ if (rawWeights.length !== poolSize) {
+ throw new RangeError(
+ `Sample: weights length (${rawWeights.length}) must equal pool size (${poolSize}).`,
+ );
+ }
+ const ws = rawWeights.map((w) => {
+ const v = w ?? 0;
+ if (typeof v !== "number" || Number.isNaN(v) || v < 0) {
+ return 0;
+ }
+ return v;
+ });
+ const total = ws.reduce((s, v) => s + v, 0);
+ if (total === 0) {
+ throw new Error("Sample: all weights are zero.");
+ }
+ return ws.map((w) => w / total);
+}
+
+/**
+ * Weighted random sample without replacement using the alias method.
+ * Falls back to basic weighted sampling when `replace=true`.
+ */
+function weightedSampleWithoutReplacement(
+ poolSize: number,
+ k: number,
+ probs: number[],
+ rng: () => number,
+): number[] {
+ // Use reservoir sampling with exponential keys: assign key = rand^(1/w), take top-k
+ const keys: Array<[number, number]> = probs.map((p, i) => {
+ const r = rng();
+ const key = p > 0 ? Math.pow(r, 1 / p) : 0;
+ return [key, i];
+ });
+ keys.sort((a, b) => b[0] - a[0]);
+ return keys.slice(0, k).map(([, i]) => i);
+}
+
+/**
+ * Weighted sample WITH replacement: pick `k` indices based on cumulative probabilities.
+ */
+function weightedSampleWithReplacement(
+ k: number,
+ probs: number[],
+ rng: () => number,
+): number[] {
+ const cumulative: number[] = [];
+ let sum = 0;
+ for (const p of probs) {
+ sum += p;
+ cumulative.push(sum);
+ }
+
+ const result: number[] = [];
+ for (let i = 0; i < k; i++) {
+ const r = rng();
+ let idx = cumulative.findIndex((c) => c >= r);
+ if (idx < 0) {
+ idx = probs.length - 1;
+ }
+ result.push(idx);
+ }
+ return result;
+}
+
+/**
+ * Fisher-Yates shuffle (unweighted, without replacement) — pick the first `k` elements.
+ */
+function fisherYatesSample(poolSize: number, k: number, rng: () => number): number[] {
+ const indices = Array.from({ length: poolSize }, (_, i) => i);
+ for (let i = 0; i < k; i++) {
+ const j = i + Math.floor(rng() * (poolSize - i));
+ const tmp = indices[i];
+ const jVal = indices[j];
+ if (tmp !== undefined && jVal !== undefined) {
+ indices[i] = jVal;
+ indices[j] = tmp;
+ }
+ }
+ return indices.slice(0, k);
+}
+
+/**
+ * Sample with replacement (unweighted): draw `k` integers in [0, poolSize).
+ */
+function uniformSampleWithReplacement(poolSize: number, k: number, rng: () => number): number[] {
+ const result: number[] = [];
+ for (let i = 0; i < k; i++) {
+ result.push(Math.floor(rng() * poolSize));
+ }
+ return result;
+}
+
+/** Core sampling logic: return an array of selected positions. */
+function samplePositions(
+ poolSize: number,
+ k: number,
+ replace: boolean,
+ weights: readonly (number | null | undefined)[] | undefined,
+ rng: () => number,
+): number[] {
+ if (poolSize === 0 || k === 0) {
+ return [];
+ }
+ if (!replace && k > poolSize) {
+ throw new RangeError(
+ `Sample: cannot sample ${k} items without replacement from a pool of ${poolSize}.`,
+ );
+ }
+
+ if (weights !== undefined) {
+ const probs = normalizeWeights(weights, poolSize);
+ if (replace) {
+ return weightedSampleWithReplacement(k, probs, rng);
+ }
+ return weightedSampleWithoutReplacement(poolSize, k, probs, rng);
+ }
+
+ if (replace) {
+ return uniformSampleWithReplacement(poolSize, k, rng);
+ }
+ return fisherYatesSample(poolSize, k, rng);
+}
+
+// ─── Series sample ────────────────────────────────────────────────────────────
+
+/**
+ * Return a random sample of items from a Series.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30, 40, 50] });
+ * sampleSeries(s, { n: 3, randomState: 42 }).values; // [30, 10, 50] (deterministic)
+ * ```
+ */
+export function sampleSeries(series: Series, options?: SampleOptions): Series {
+ const opts = options ?? {};
+ const k = resolveN(series.values.length, opts.n, opts.frac);
+ const replace = opts.replace ?? false;
+ const rng = makeRng(opts.randomState);
+
+ const positions = samplePositions(series.values.length, k, replace, opts.weights, rng);
+ const newValues: Scalar[] = positions.map((i) => series.values[i] ?? null);
+ const newLabels: Label[] = positions.map((i) => series.index.at(i) ?? null);
+
+ return new Series({
+ data: newValues,
+ index: new Index(newLabels),
+ name: series.name ?? undefined,
+ dtype: series.dtype,
+ });
+}
+
+// ─── DataFrame sample ──────────────────────────────────────────────────────────
+
+/**
+ * Return a random sample of rows (or columns) from a DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromRecords([
+ * { a: 1 }, { a: 2 }, { a: 3 }, { a: 4 }, { a: 5 },
+ * ]);
+ * sampleDataFrame(df, { n: 2, randomState: 0 }).shape; // [2, 1]
+ * ```
+ */
+export function sampleDataFrame(df: DataFrame, options?: SampleOptions): DataFrame {
+ const opts = options ?? {};
+ const axis = opts.axis ?? 0;
+ const isColAxis = axis === 1 || axis === "columns";
+
+ if (isColAxis) {
+ return sampleDataFrameColumns(df, opts);
+ }
+ return sampleDataFrameRows(df, opts);
+}
+
+/** Sample rows from a DataFrame. */
+function sampleDataFrameRows(df: DataFrame, opts: SampleOptions): DataFrame {
+ const nRows = df.shape[0];
+ const k = resolveN(nRows, opts.n, opts.frac);
+ const replace = opts.replace ?? false;
+ const rng = makeRng(opts.randomState);
+
+ const positions = samplePositions(nRows, k, replace, opts.weights, rng);
+ const newLabels: Label[] = positions.map((i) => df.index.at(i) ?? null);
+ const newIndex = new Index(newLabels);
+
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const newVals: Scalar[] = positions.map((i) => col.values[i] ?? null);
+ colMap.set(
+ name,
+ new Series({
+ data: newVals,
+ index: newIndex,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, newIndex);
+}
+
+/** Sample columns from a DataFrame. */
+function sampleDataFrameColumns(df: DataFrame, opts: SampleOptions): DataFrame {
+ const allCols = df.columns.values;
+ const nCols = allCols.length;
+ const k = resolveN(nCols, opts.n, opts.frac);
+ const replace = opts.replace ?? false;
+ const rng = makeRng(opts.randomState);
+
+ const positions = samplePositions(nCols, k, replace, opts.weights, rng);
+
+ const colMap = new Map>();
+ for (const pos of positions) {
+ const name = allCols[pos];
+ if (name !== undefined) {
+ const col = df.col(name);
+ colMap.set(name, col);
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/index.ts b/src/index.ts
index 1dd0aa57..c1557e84 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -74,6 +74,8 @@ export type {
} from "./reshape/index.ts";
export { stack, unstack, STACK_DEFAULT_SEP } from "./reshape/index.ts";
export type { StackOptions, UnstackOptions } from "./reshape/index.ts";
+export { pivotTableFull } from "./reshape/index.ts";
+export type { PivotTableFullOptions, PivotAggFunc } from "./reshape/index.ts";
export { MultiIndex } from "./core/index.ts";
export type { MultiIndexOptions } from "./core/index.ts";
export { rankSeries, rankDataFrame } from "./stats/index.ts";
@@ -107,3 +109,86 @@ export {
export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts";
export { valueCounts, dataFrameValueCounts } from "./stats/index.ts";
export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts";
+export {
+ isna,
+ notna,
+ isnull,
+ notnull,
+ ffillSeries,
+ bfillSeries,
+ dataFrameFfill,
+ dataFrameBfill,
+} from "./stats/index.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts";
+export { pctChangeSeries, pctChangeDataFrame } from "./stats/index.ts";
+export type {
+ PctChangeFillMethod,
+ PctChangeOptions,
+ DataFramePctChangeOptions,
+} from "./stats/index.ts";
+export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./stats/index.ts";
+export type { IdxOptions, IdxDataFrameOptions } from "./stats/index.ts";
+export { astypeSeries, astype, castScalar } from "./core/index.ts";
+export type { AstypeOptions, DataFrameAstypeOptions } from "./core/index.ts";
+export { replaceSeries, replaceDataFrame } from "./stats/index.ts";
+export type {
+ ReplaceMapping,
+ ReplaceSpec,
+ ReplaceOptions,
+ DataFrameReplaceOptions,
+} from "./stats/index.ts";
+export { whereSeries, maskSeries, whereDataFrame, maskDataFrame } from "./stats/index.ts";
+export type {
+ SeriesCond,
+ DataFrameCond,
+ WhereOptions,
+ WhereDataFrameOptions,
+} from "./stats/index.ts";
+export { diffSeries, diffDataFrame, shiftSeries, shiftDataFrame } from "./stats/index.ts";
+export type {
+ DiffOptions,
+ DataFrameDiffOptions,
+ ShiftOptions,
+ DataFrameShiftOptions,
+} from "./stats/index.ts";
+export {
+ duplicatedSeries,
+ duplicatedDataFrame,
+ dropDuplicatesSeries,
+ dropDuplicatesDataFrame,
+} from "./stats/index.ts";
+export type { KeepPolicy, DuplicatedOptions, DataFrameDuplicatedOptions } from "./stats/index.ts";
+export { sampleSeries, sampleDataFrame } from "./core/index.ts";
+export type { SampleOptions } from "./core/index.ts";
+export { clipAdvancedSeries, clipAdvancedDataFrame } from "./stats/index.ts";
+export type {
+ SeriesBound,
+ DataFrameBound,
+ ClipAdvancedSeriesOptions,
+ ClipAdvancedDataFrameOptions,
+} from "./stats/index.ts";
+export {
+ applySeries,
+ mapSeries,
+ applyDataFrame,
+ applyExpandDataFrame,
+ mapDataFrame,
+} from "./stats/index.ts";
+export type {
+ MapLookup,
+ ApplyDataFrameOptions,
+ ApplyExpandDataFrameOptions,
+} from "./stats/index.ts";
+export { cut, qcut, cutCodes, cutCategories } from "./stats/index.ts";
+export type {
+ CutOptions,
+ QcutOptions,
+ CutResult,
+ CutResultWithBins,
+} from "./stats/index.ts";
+export { Interval, IntervalIndex, intervalRange } from "./stats/index.ts";
+export type { ClosedType, IntervalOptions, IntervalRangeOptions } from "./stats/index.ts";
+export { getDummies, getDummiesSeries, getDummiesDataFrame, fromDummies } from "./stats/index.ts";
+export type { GetDummiesOptions, FromDummiesOptions } from "./stats/index.ts";
+export { crosstab, crosstabSeries } from "./stats/index.ts";
+export type { CrosstabOptions, CrosstabAggFunc, CrosstabNormalize } from "./stats/index.ts";
diff --git a/src/reshape/index.ts b/src/reshape/index.ts
index f15320ca..176355d3 100644
--- a/src/reshape/index.ts
+++ b/src/reshape/index.ts
@@ -10,3 +10,5 @@ export { pivot, pivotTable } from "./pivot.ts";
export type { PivotOptions, PivotTableOptions, AggFuncName } from "./pivot.ts";
export { stack, unstack, STACK_DEFAULT_SEP } from "./stack_unstack.ts";
export type { StackOptions, UnstackOptions } from "./stack_unstack.ts";
+export { pivotTableFull } from "./pivot_table.ts";
+export type { PivotTableFullOptions, PivotAggFunc } from "./pivot_table.ts";
diff --git a/src/reshape/pivot_table.ts b/src/reshape/pivot_table.ts
new file mode 100644
index 00000000..d1268cc4
--- /dev/null
+++ b/src/reshape/pivot_table.ts
@@ -0,0 +1,396 @@
+/**
+ * pivot_table — enhanced pivot table with margins (row/column totals).
+ *
+ * Mirrors `pandas.pivot_table()` with full margins support:
+ * - All aggregation functions: mean, sum, min, max, count, first, last
+ * - `margins=true` adds an "All" row and "All" column with marginal aggregates
+ * - `margins_name` customises the All label (default `"All"`)
+ * - `sort=true` sorts row and column labels lexicographically (default `true`)
+ * - `fill_value` replaces empty cells
+ * - `dropna` skips rows whose column-group key is all-NaN
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, pivotTableFull } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: ["foo","foo","foo","bar","bar","bar"],
+ * B: ["one","one","two","two","one","one"],
+ * C: ["small","large","large","small","small","large"],
+ * D: [1, 2, 2, 3, 3, 4],
+ * });
+ *
+ * pivotTableFull(df, { index: "A", columns: "C", values: "D",
+ * aggfunc: "sum", margins: true });
+ * // C large small All
+ * // A
+ * // bar 4 3 7
+ * // foo 4 1 5
+ * // All 8 4 12
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public API types ─────────────────────────────────────────────────────────
+
+/** Aggregation function for {@link pivotTableFull}. */
+export type PivotAggFunc = "mean" | "sum" | "min" | "max" | "count" | "first" | "last";
+
+/** Options for {@link pivotTableFull}. */
+export interface PivotTableFullOptions {
+ /** Column(s) to use as row index. */
+ readonly index: string | readonly string[];
+ /** Column(s) to use as column headers. */
+ readonly columns: string | readonly string[];
+ /** Column(s) to aggregate. Defaults to all remaining columns. */
+ readonly values?: string | readonly string[];
+ /** Aggregation function. Default `"mean"`. */
+ readonly aggfunc?: PivotAggFunc;
+ /** Fill value for empty cells. Default `null`. */
+ readonly fill_value?: Scalar;
+ /** Skip rows with no non-null values. Default `false`. */
+ readonly dropna?: boolean;
+ /** Add row and column totals. Default `false`. */
+ readonly margins?: boolean;
+ /** Label for the margins row/column. Default `"All"`. */
+ readonly margins_name?: string;
+ /** Sort row and column labels lexicographically. Default `true`. */
+ readonly sort?: boolean;
+}
+
+// ─── internal sentinel ────────────────────────────────────────────────────────
+
+/** Internal key used to represent the margins (All) group. */
+// biome-ignore lint/nursery/noSecrets: not a secret — composite delimiter for internal keying
+const MARGIN_SENTINEL = "\x02\x03MARGIN\x03\x02";
+
+// ─── utility helpers ──────────────────────────────────────────────────────────
+
+/** Coerce string-or-array to string[]. */
+function toArr(v: string | readonly string[]): string[] {
+ return typeof v === "string" ? [v] : [...v];
+}
+
+/** True when a Scalar is missing (null / undefined / NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Read a single cell from a DataFrame column. */
+function readCell(df: DataFrame, col: string, ri: number): Scalar {
+ return (df.col(col).values as readonly Scalar[])[ri] ?? null;
+}
+
+/** Build a composite row/column key from one or more column values. */
+function makeKey(df: DataFrame, cols: string[], ri: number): string {
+ return cols.map((c) => String(readCell(df, c, ri))).join("\x00");
+}
+
+/** Convert a composite key to a display label. */
+function keyLabel(key: string): Label {
+ const parts = key.split("\x00");
+ return (parts.length === 1 ? parts[0] : parts.join(", ")) as Label;
+}
+
+/** Push a number into a map-of-arrays, creating the bucket when absent. */
+function push(groups: Map, key: string, v: number): void {
+ let b = groups.get(key);
+ if (b === undefined) {
+ b = [];
+ groups.set(key, b);
+ }
+ b.push(v);
+}
+
+/** Append to an array only when the item is not already present. */
+function pushUnique(arr: string[], item: string): void {
+ if (!arr.includes(item)) {
+ arr.push(item);
+ }
+}
+
+// ─── aggregation ──────────────────────────────────────────────────────────────
+
+/** Reduce a non-empty numeric array with the given aggregation function. */
+function applyAggFunc(nums: number[], fn: PivotAggFunc): number {
+ if (fn === "count") {
+ return nums.length;
+ }
+ if (fn === "first") {
+ return nums[0] as number;
+ }
+ if (fn === "last") {
+ return nums.at(-1) as number;
+ }
+ if (fn === "min") {
+ return Math.min(...nums);
+ }
+ if (fn === "max") {
+ return Math.max(...nums);
+ }
+ const total = nums.reduce((a, b) => a + b, 0);
+ if (fn === "sum") {
+ return total;
+ }
+ return total / nums.length; // mean
+}
+
+/** Aggregate the bucket for a cell key, or return fill value when empty. */
+function aggregateCell(
+ groups: Map,
+ cellKey: string,
+ fn: PivotAggFunc,
+ fillValue: Scalar,
+): Scalar {
+ const bucket = groups.get(cellKey);
+ if (bucket === undefined || bucket.length === 0) {
+ return fn === "count" ? 0 : fillValue;
+ }
+ return applyAggFunc(bucket, fn);
+}
+
+// ─── group collection ─────────────────────────────────────────────────────────
+
+/** Collect the observation at (rk, ck, valCol) into all relevant buckets. */
+function collectObservation(
+ groups: Map,
+ rk: string,
+ ck: string,
+ valCol: string,
+ v: number,
+ withMargins: boolean,
+): void {
+ push(groups, `${rk}\x01${ck}\x01${valCol}`, v);
+ if (withMargins) {
+ push(groups, `${rk}\x01${MARGIN_SENTINEL}\x01${valCol}`, v);
+ push(groups, `${MARGIN_SENTINEL}\x01${ck}\x01${valCol}`, v);
+ push(groups, `${MARGIN_SENTINEL}\x01${MARGIN_SENTINEL}\x01${valCol}`, v);
+ }
+}
+
+interface GroupsData {
+ readonly rowKeys: string[];
+ readonly colKeys: string[];
+ readonly groups: Map;
+}
+
+/** Scan the DataFrame and populate all group buckets. */
+function collectGroups(
+ df: DataFrame,
+ idxCols: string[],
+ colCols: string[],
+ valuesCols: string[],
+ withMargins: boolean,
+): GroupsData {
+ const nRows = df.index.size;
+ const rowKeys: string[] = [];
+ const colKeys: string[] = [];
+ const groups: Map = new Map();
+
+ for (let ri = 0; ri < nRows; ri++) {
+ const rk = makeKey(df, idxCols, ri);
+ const ck = makeKey(df, colCols, ri);
+ pushUnique(rowKeys, rk);
+ pushUnique(colKeys, ck);
+ for (const valCol of valuesCols) {
+ const v = readCell(df, valCol, ri);
+ if (!isMissing(v) && typeof v === "number") {
+ collectObservation(groups, rk, ck, valCol, v, withMargins);
+ }
+ }
+ }
+
+ return { rowKeys, colKeys, groups };
+}
+
+// ─── result construction ──────────────────────────────────────────────────────
+
+/** Build the ordered list of output column names. */
+function buildColumnNames(
+ colKeys: string[],
+ valuesCols: string[],
+ isSingle: boolean,
+ withMargins: boolean,
+ marginsName: string,
+): string[] {
+ const keys = withMargins ? [...colKeys, MARGIN_SENTINEL] : colKeys;
+ const names: string[] = [];
+ for (const ck of keys) {
+ const label = ck === MARGIN_SENTINEL ? marginsName : ck;
+ for (const vc of valuesCols) {
+ names.push(isSingle ? label : `${vc}_${label}`);
+ }
+ }
+ return names;
+}
+
+/** Compute one data row for a given row key. */
+function computeRow(
+ rk: string,
+ colKeys: string[],
+ valuesCols: string[],
+ isSingle: boolean,
+ groups: Map,
+ fn: PivotAggFunc,
+ fillValue: Scalar,
+ withMargins: boolean,
+ marginsName: string,
+): Record {
+ const keys = withMargins ? [...colKeys, MARGIN_SENTINEL] : colKeys;
+ const row: Record = {};
+ for (const ck of keys) {
+ const label = ck === MARGIN_SENTINEL ? marginsName : ck;
+ for (const vc of valuesCols) {
+ const colName = isSingle ? label : `${vc}_${label}`;
+ row[colName] = aggregateCell(groups, `${rk}\x01${ck}\x01${vc}`, fn, fillValue);
+ }
+ }
+ return row;
+}
+
+/** Check whether every value in a row record is missing. */
+function rowIsAllMissing(row: Record): boolean {
+ return Object.values(row).every((v) => isMissing(v));
+}
+
+interface AssembleOptions {
+ readonly rowKeys: string[];
+ readonly colKeys: string[];
+ readonly valuesCols: string[];
+ readonly groups: Map;
+ readonly fn: PivotAggFunc;
+ readonly fillValue: Scalar;
+ readonly dropna: boolean;
+ readonly withMargins: boolean;
+ readonly marginsName: string;
+ readonly sort: boolean;
+}
+
+/** Build the output DataFrame from aggregated groups. */
+function assembleDataFrame(opts: AssembleOptions): DataFrame {
+ const orderedRows = opts.sort ? [...opts.rowKeys].sort() : opts.rowKeys;
+ const orderedCols = opts.sort ? [...opts.colKeys].sort() : opts.colKeys;
+ const isSingle = opts.valuesCols.length === 1;
+
+ const colNames = buildColumnNames(
+ orderedCols,
+ opts.valuesCols,
+ isSingle,
+ opts.withMargins,
+ opts.marginsName,
+ );
+
+ const dataRows: Record[] = [];
+ const rowLabels: Label[] = [];
+
+ const allRowKeys = opts.withMargins ? [...orderedRows, MARGIN_SENTINEL] : orderedRows;
+ for (const rk of allRowKeys) {
+ const row = computeRow(
+ rk,
+ orderedCols,
+ opts.valuesCols,
+ isSingle,
+ opts.groups,
+ opts.fn,
+ opts.fillValue,
+ opts.withMargins,
+ opts.marginsName,
+ );
+ if (opts.dropna && rk !== MARGIN_SENTINEL && rowIsAllMissing(row)) {
+ continue;
+ }
+ dataRows.push(row);
+ rowLabels.push(rk === MARGIN_SENTINEL ? (opts.marginsName as Label) : keyLabel(rk));
+ }
+
+ const outCols: Record = {};
+ for (const name of colNames) {
+ outCols[name] = dataRows.map((r) => r[name] ?? null);
+ }
+
+ return DataFrame.fromColumns(outCols, { index: new Index(rowLabels) });
+}
+
+// ─── values resolution ────────────────────────────────────────────────────────
+
+/** Determine which columns to aggregate (explicit or all non-index/column cols). */
+function resolveValues(
+ df: DataFrame,
+ optValues: PivotTableFullOptions["values"],
+ idxCols: string[],
+ colCols: string[],
+): string[] {
+ if (optValues !== undefined) {
+ const cols = toArr(optValues);
+ for (const c of cols) {
+ if (!df.has(c)) {
+ throw new RangeError(`values column "${c}" does not exist.`);
+ }
+ }
+ return cols;
+ }
+ const exclude = new Set([...idxCols, ...colCols]);
+ return df.columns.values.filter((c) => !exclude.has(c));
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Create a pivot table with optional row/column margin totals.
+ *
+ * Mirrors `pandas.pivot_table()` — an enhanced version of {@link pivotTable}
+ * that adds `margins`, `margins_name`, and `sort` options.
+ *
+ * @param df - Source DataFrame.
+ * @param options - Pivot table options.
+ * @returns Aggregated pivot DataFrame, with optional All row/column.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, pivotTableFull } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: ["foo","foo","foo","bar","bar","bar"],
+ * C: ["small","large","large","small","small","large"],
+ * D: [1, 2, 2, 3, 3, 4],
+ * });
+ *
+ * pivotTableFull(df, { index: "A", columns: "C", values: "D",
+ * aggfunc: "sum", margins: true });
+ * // rows: foo, bar, All
+ * // cols: large, small, All
+ * ```
+ */
+export function pivotTableFull(df: DataFrame, options: PivotTableFullOptions): DataFrame {
+ const idxCols = toArr(options.index);
+ const colCols = toArr(options.columns);
+
+ for (const c of [...idxCols, ...colCols]) {
+ if (!df.has(c)) {
+ throw new RangeError(`Column "${c}" does not exist.`);
+ }
+ }
+
+ const valuesCols = resolveValues(df, options.values, idxCols, colCols);
+ const withMargins = options.margins === true;
+
+ const { rowKeys, colKeys, groups } = collectGroups(df, idxCols, colCols, valuesCols, withMargins);
+
+ return assembleDataFrame({
+ rowKeys,
+ colKeys,
+ valuesCols,
+ groups,
+ fn: options.aggfunc ?? "mean",
+ fillValue: options.fill_value ?? null,
+ dropna: options.dropna === true,
+ withMargins,
+ marginsName: options.margins_name ?? "All",
+ sort: options.sort !== false,
+ });
+}
diff --git a/src/stats/apply.ts b/src/stats/apply.ts
new file mode 100644
index 00000000..51af45c8
--- /dev/null
+++ b/src/stats/apply.ts
@@ -0,0 +1,346 @@
+/**
+ * apply — function application and mapping for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.apply(func)` — apply a function element-wise to a Series
+ * - `Series.map(func | dict)` — map values via function or lookup table
+ * - `DataFrame.apply(func, axis=0)` — apply a function to each column/row (returns Series)
+ * - `DataFrame.apply(func, axis=0, result_type="expand")` — apply returning a DataFrame
+ * - `DataFrame.applymap(func)` / `DataFrame.map(func)` — element-wise mapping
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Label, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** A lookup map used in {@link mapSeries}. */
+export type MapLookup = ReadonlyMap | Readonly>;
+
+/** Options for {@link applyDataFrame}. */
+export interface ApplyDataFrameOptions {
+ /**
+ * Axis along which to apply the function.
+ * - `0` or `"index"` (default): apply to each **column** (function receives a column Series)
+ * - `1` or `"columns"`: apply to each **row** (function receives a row Series)
+ */
+ readonly axis?: Axis;
+}
+
+/** Options for {@link applyExpandDataFrame}. */
+export interface ApplyExpandDataFrameOptions {
+ /**
+ * Axis along which to apply the function.
+ * - `0` or `"index"` (default): apply to each **column** (function receives a column Series)
+ * - `1` or `"columns"`: apply to each **row** (function receives a row Series)
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Build a row Series from a DataFrame at position `r`. */
+function rowSeries(df: DataFrame, r: number): Series {
+ const colNames = df.columns.values;
+ const data: Scalar[] = new Array(colNames.length);
+ const labels: Label[] = new Array(colNames.length);
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ data[c] = null;
+ labels[c] = c;
+ continue;
+ }
+ data[c] = df.col(colName).iat(r);
+ labels[c] = colName;
+ }
+ return new Series({ data, index: labels, name: String(df.index.at(r)) });
+}
+
+/** Resolve an object-literal lookup to a Map. */
+function toMap(lookup: MapLookup): ReadonlyMap {
+ if (lookup instanceof Map) {
+ return lookup;
+ }
+ return new Map(Object.entries(lookup as Readonly>));
+}
+
+// ─── applySeries ──────────────────────────────────────────────────────────────
+
+/**
+ * Apply a function element-wise to each value in a Series.
+ *
+ * Non-numeric values are passed to `fn` unchanged — `fn` controls what happens to them.
+ * Mirrors `pandas.Series.apply(func)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, applySeries } from "tsb";
+ * const s = new Series({ data: [1, 4, 9] });
+ * applySeries(s, (v) => Math.sqrt(v as number)).values; // [1, 2, 3]
+ * ```
+ */
+export function applySeries(
+ series: Series,
+ fn: (value: Scalar, label: Label, index: number) => Scalar,
+): Series {
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = fn(series.iat(i), series.index.at(i), i);
+ }
+ return new Series({ data: out, index: series.index, name: series.name });
+}
+
+// ─── mapSeries ────────────────────────────────────────────────────────────────
+
+/**
+ * Map values of a Series via a function, a `Map`, or a plain object lookup table.
+ *
+ * - **Function**: applied element-wise (same as {@link applySeries}).
+ * - **Map / Record**: values not found in the lookup become `null` (matching pandas NaN).
+ *
+ * Mirrors `pandas.Series.map(arg)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, mapSeries } from "tsb";
+ * const s = new Series({ data: ["a", "b", "c"] });
+ * mapSeries(s, { a: 1, b: 2, c: 3 }).values; // [1, 2, 3]
+ * mapSeries(s, (v) => String(v).toUpperCase()).values; // ["A", "B", "C"]
+ * ```
+ */
+export function mapSeries(
+ series: Series,
+ mapper: ((value: Scalar, label: Label, index: number) => Scalar) | MapLookup,
+): Series {
+ if (typeof mapper === "function") {
+ return applySeries(series, mapper);
+ }
+ const lookup = toMap(mapper);
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ const v = series.iat(i);
+ out[i] = lookup.has(v) ? (lookup.get(v) ?? null) : null;
+ }
+ return new Series({ data: out, index: series.index, name: series.name });
+}
+
+// ─── applyDataFrame ───────────────────────────────────────────────────────────
+
+/**
+ * Apply a reducing function to each column (axis=0) or row (axis=1) of a DataFrame.
+ *
+ * The function receives a `Series` representing the column or row,
+ * and must return a single `Scalar` value. The result is a Series indexed by
+ * column names (axis=0) or row labels (axis=1).
+ *
+ * Mirrors `pandas.DataFrame.apply(func, axis=0)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, applyDataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * // Sum of each column:
+ * applyDataFrame(df, (col) => (col.values as number[]).reduce((a, b) => a + b, 0)).values;
+ * // → [6, 15] (index: ["a", "b"])
+ * ```
+ */
+export function applyDataFrame(
+ df: DataFrame,
+ fn: (slice: Series, label: Label) => Scalar,
+ options: ApplyDataFrameOptions = {},
+): Series {
+ const axis: Axis = options.axis ?? 0;
+ const isColAxis = axis === 0 || axis === "index";
+
+ if (isColAxis) {
+ return applyDataFrameCols(df, fn);
+ }
+ return applyDataFrameRows(df, fn);
+}
+
+/** Apply fn to each column, return a Series indexed by column names. */
+function applyDataFrameCols(
+ df: DataFrame,
+ fn: (slice: Series, label: Label) => Scalar,
+): Series {
+ const colNames = df.columns.values;
+ const data: Scalar[] = new Array(colNames.length);
+ const labels: Label[] = new Array(colNames.length);
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ data[c] = null;
+ labels[c] = c;
+ continue;
+ }
+ data[c] = fn(df.col(colName), colName);
+ labels[c] = colName;
+ }
+ return new Series({ data, index: labels });
+}
+
+/** Apply fn to each row, return a Series indexed by row labels. */
+function applyDataFrameRows(
+ df: DataFrame,
+ fn: (slice: Series, label: Label) => Scalar,
+): Series {
+ const nRows = df.index.size;
+ const data: Scalar[] = new Array(nRows);
+ const labels: Label[] = new Array(nRows);
+ for (let r = 0; r < nRows; r++) {
+ const label = df.index.at(r);
+ data[r] = fn(rowSeries(df, r), label);
+ labels[r] = label;
+ }
+ return new Series({ data, index: labels });
+}
+
+// ─── applyExpandDataFrame ─────────────────────────────────────────────────────
+
+/**
+ * Apply a function to each column (axis=0) or row (axis=1) of a DataFrame,
+ * where the function returns a `Series`. The results are assembled
+ * into a new DataFrame.
+ *
+ * - **axis=0**: function is called for each column; returned Series become
+ * new column data (same row index expected).
+ * - **axis=1**: function is called for each row; returned Series become
+ * new rows assembled as a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.apply(func, axis=0, result_type="expand")`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, Series, applyExpandDataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * // Double each column:
+ * applyExpandDataFrame(df, (col) =>
+ * new Series({ data: col.values.map((v) => (v as number) * 2), index: col.index })
+ * ).col("a").values; // [2, 4]
+ * ```
+ */
+export function applyExpandDataFrame(
+ df: DataFrame,
+ fn: (slice: Series, label: Label) => Series,
+ options: ApplyExpandDataFrameOptions = {},
+): DataFrame {
+ const axis: Axis = options.axis ?? 0;
+ const isColAxis = axis === 0 || axis === "index";
+
+ if (isColAxis) {
+ return applyExpandCols(df, fn);
+ }
+ return applyExpandRows(df, fn);
+}
+
+/** Apply expand function to each column → reassemble as DataFrame. */
+function applyExpandCols(
+ df: DataFrame,
+ fn: (slice: Series, label: Label) => Series,
+): DataFrame {
+ const colNames = df.columns.values;
+ const colMap = new Map>();
+ for (const colName of colNames) {
+ if (colName === undefined) {
+ continue;
+ }
+ colMap.set(colName, fn(df.col(colName), colName));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/** Lookup a column key value from a row Series result. */
+function lookupRowValue(row: Series, colKey: string): Scalar {
+ for (let j = 0; j < row.index.size; j++) {
+ if (String(row.index.at(j)) === colKey) {
+ return row.iat(j);
+ }
+ }
+ return null;
+}
+
+/** Apply expand function to each row → reassemble results as DataFrame. */
+function applyExpandRows(
+ df: DataFrame,
+ fn: (slice: Series, label: Label) => Series,
+): DataFrame {
+ const nRows = df.index.size;
+ const rowResults: Series[] = [];
+ const rowLabels: Label[] = new Array(nRows);
+
+ for (let r = 0; r < nRows; r++) {
+ const label = df.index.at(r);
+ rowLabels[r] = label;
+ rowResults.push(fn(rowSeries(df, r), label));
+ }
+
+ const firstResult = rowResults[0];
+ if (firstResult === undefined || nRows === 0) {
+ return new DataFrame(new Map(), df.index);
+ }
+
+ const resultCols: Label[] = [];
+ for (let j = 0; j < firstResult.index.size; j++) {
+ resultCols.push(firstResult.index.at(j));
+ }
+
+ const colMap = new Map>();
+ for (const colLabel of resultCols) {
+ const colKey = String(colLabel);
+ const data: Scalar[] = new Array(nRows);
+ for (let r = 0; r < nRows; r++) {
+ const row = rowResults[r];
+ data[r] = row !== undefined ? lookupRowValue(row, colKey) : null;
+ }
+ colMap.set(colKey, new Series({ data, index: rowLabels, name: colKey }));
+ }
+
+ return new DataFrame(colMap);
+}
+
+// ─── mapDataFrame ─────────────────────────────────────────────────────────────
+
+/**
+ * Apply a function element-wise to every cell of a DataFrame.
+ *
+ * The function receives `(value, rowLabel, columnName)` and returns a `Scalar`.
+ * The result is a new DataFrame with the same shape, index, and columns.
+ *
+ * Mirrors `pandas.DataFrame.applymap(func)` (renamed to `map` in pandas ≥ 2.1).
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, mapDataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * mapDataFrame(df, (v) => (v as number) ** 2).col("b").values; // [16, 25, 36]
+ * ```
+ */
+export function mapDataFrame(
+ df: DataFrame,
+ fn: (value: Scalar, rowLabel: Label, colName: string) => Scalar,
+): DataFrame {
+ const colNames = df.columns.values;
+ const colMap = new Map>();
+
+ for (const colName of colNames) {
+ if (colName === undefined) {
+ continue;
+ }
+ const col = df.col(colName);
+ const out: Scalar[] = new Array(df.index.size);
+ for (let r = 0; r < df.index.size; r++) {
+ out[r] = fn(col.iat(r), df.index.at(r), colName);
+ }
+ colMap.set(colName, new Series({ data: out, index: df.index, name: colName }));
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/clip_advanced.ts b/src/stats/clip_advanced.ts
new file mode 100644
index 00000000..032bce5b
--- /dev/null
+++ b/src/stats/clip_advanced.ts
@@ -0,0 +1,290 @@
+/**
+ * clip_advanced — per-element clipping for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods with array/Series/DataFrame bounds:
+ * - `Series.clip(lower, upper)` — per-element bounds from scalar, array, or Series
+ * - `DataFrame.clip(lower, upper, axis?)` — per-element bounds with broadcast support
+ *
+ * Unlike the simple scalar `clip` in `elem_ops`, this module supports:
+ * - Per-position bounds (array or positionally-aligned Series)
+ * - DataFrame-shaped bounds for element-wise clipping
+ * - Axis-based broadcasting when bounds is a Series
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ * Missing values (null / NaN) are propagated through every operation.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Scalar or per-element bound accepted by {@link clipAdvancedSeries}. */
+export type SeriesBound = number | null | undefined | readonly number[] | Series;
+
+/** Scalar or per-element bound accepted by {@link clipAdvancedDataFrame}. */
+export type DataFrameBound =
+ | number
+ | null
+ | undefined
+ | readonly number[]
+ | Series
+ | DataFrame;
+
+/** Options for {@link clipAdvancedSeries}. */
+export interface ClipAdvancedSeriesOptions {
+ /**
+ * Lower bound — scalar, array, or positionally-aligned Series.
+ * `null` / `undefined` means no lower bound.
+ */
+ readonly lower?: SeriesBound;
+ /**
+ * Upper bound — scalar, array, or positionally-aligned Series.
+ * `null` / `undefined` means no upper bound.
+ */
+ readonly upper?: SeriesBound;
+}
+
+/** Options for {@link clipAdvancedDataFrame}. */
+export interface ClipAdvancedDataFrameOptions {
+ /**
+ * Lower bound — scalar, array, Series, or element-wise DataFrame.
+ * `null` / `undefined` means no lower bound.
+ */
+ readonly lower?: DataFrameBound;
+ /**
+ * Upper bound — scalar, array, Series, or element-wise DataFrame.
+ * `null` / `undefined` means no upper bound.
+ */
+ readonly upper?: DataFrameBound;
+ /**
+ * When `lower` or `upper` is a Series, this axis controls broadcasting.
+ * - `0` or `"index"` (default): broadcast Series along rows (one bound per column).
+ * - `1` or `"columns"`: broadcast Series along columns (one bound per row).
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a finite number (not null / undefined / NaN). */
+function isFiniteNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/** Clip a numeric value to [lo, hi], preserving missing values. */
+function clipValue(v: Scalar, lo: number, hi: number): Scalar {
+ if (!isFiniteNum(v)) {
+ return v;
+ }
+ if (v < lo) {
+ return lo;
+ }
+ if (v > hi) {
+ return hi;
+ }
+ return v;
+}
+
+/**
+ * Resolve a Series bound to a positional number for index `i`.
+ * Arrays are accessed by position; Series are accessed by position.
+ */
+function resolveSeriesBound(bound: SeriesBound, i: number): number {
+ if (bound === null || bound === undefined) {
+ return Number.NaN; // sentinel: no bound
+ }
+ if (typeof bound === "number") {
+ return bound;
+ }
+ if (Array.isArray(bound)) {
+ const v = (bound as readonly number[])[i];
+ return v !== undefined ? v : Number.NaN;
+ }
+ // Series — positional access
+ const s = bound as Series;
+ if (i >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(i);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+}
+
+// ─── clipAdvancedSeries ────────────────────────────────────────────────────────
+
+/**
+ * Clip each element of a Series to per-element [lower, upper] bounds.
+ *
+ * Bounds may be:
+ * - A scalar `number` — applies the same bound to every element
+ * - A `number[]` array — per-position bounds aligned by position
+ * - A `Series` — per-position bounds taken positionally (label order ignored)
+ * - `null` / `undefined` — no bound in that direction
+ *
+ * Non-numeric values (null, NaN, strings, …) pass through unchanged.
+ * Mirrors `pandas.Series.clip(lower, upper)` with array bounds.
+ *
+ * @example
+ * ```ts
+ * import { Series, clipAdvancedSeries } from "tsb";
+ * const s = new Series({ data: [-3, 1, 5, 10] });
+ * const lo = new Series({ data: [-1, 0, 2, 8] });
+ * clipAdvancedSeries(s, { lower: lo }).values; // [-1, 1, 5, 10]
+ * ```
+ */
+export function clipAdvancedSeries(
+ series: Series,
+ options: ClipAdvancedSeriesOptions = {},
+): Series {
+ const { lower, upper } = options;
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+
+ for (let i = 0; i < n; i++) {
+ const v = series.iat(i);
+ if (!isFiniteNum(v)) {
+ out[i] = v;
+ continue;
+ }
+
+ const lo = resolveSeriesBound(lower, i);
+ const hi = resolveSeriesBound(upper, i);
+
+ const effectiveLo = Number.isNaN(lo) ? Number.NEGATIVE_INFINITY : lo;
+ const effectiveHi = Number.isNaN(hi) ? Number.POSITIVE_INFINITY : hi;
+
+ out[i] = clipValue(v, effectiveLo, effectiveHi);
+ }
+
+ return new Series({ data: out, index: series.index, name: series.name });
+}
+
+// ─── DataFrame bound helpers ───────────────────────────────────────────────────
+
+/** Resolve bound for a DataFrame cell where the bound is a Series (axis-based). */
+function resolveSeriesBoundForDf(s: Series, r: number, c: number, axis: Axis): number {
+ const isRowAxis = axis === 0 || axis === "index";
+ if (isRowAxis) {
+ // broadcast along rows → one bound per column → use col index `c`
+ if (c >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(c);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+ }
+ // broadcast along columns → one bound per row → use row index `r`
+ if (r >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(r);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+}
+
+/** Resolve bound for a DataFrame cell where the bound is a DataFrame (element-wise). */
+function resolveDataFrameBoundFromDf(bound: DataFrame, r: number, colName: string): number {
+ let val: Scalar = null;
+ try {
+ val = bound.col(colName).iat(r);
+ } catch {
+ return Number.NaN;
+ }
+ return isFiniteNum(val) ? val : Number.NaN;
+}
+
+/**
+ * Resolve a DataFrame bound value for cell (row r, col c).
+ * Supports: scalar, row-array, Series (broadcast by axis), DataFrame (element-wise).
+ */
+function resolveDataFrameBound(
+ bound: DataFrameBound,
+ r: number,
+ c: number,
+ colName: string,
+ axis: Axis,
+): number {
+ if (bound === null || bound === undefined) {
+ return Number.NaN;
+ }
+ if (typeof bound === "number") {
+ return bound;
+ }
+ if (bound instanceof DataFrame) {
+ return resolveDataFrameBoundFromDf(bound, r, colName);
+ }
+ if (bound instanceof Series) {
+ return resolveSeriesBoundForDf(bound as Series, r, c, axis);
+ }
+ // plain array: treat as row-indexed (one bound per row)
+ if (Array.isArray(bound)) {
+ const v = (bound as readonly number[])[r];
+ return v !== undefined ? v : Number.NaN;
+ }
+ return Number.NaN;
+}
+
+// ─── clipAdvancedDataFrame ─────────────────────────────────────────────────────
+
+/**
+ * Clip each element of a DataFrame to per-element [lower, upper] bounds.
+ *
+ * Bounds may be:
+ * - A scalar `number` — same bound applied to every cell
+ * - A `number[]` array — per-row bounds (one per row, broadcast across columns)
+ * - A `Series` — broadcast by `axis`:
+ * - `axis=0` (default): one bound per **column** (series index = column position)
+ * - `axis=1`: one bound per **row** (series index = row position)
+ * - A `DataFrame` — element-wise bounds (same shape, same column names)
+ * - `null` / `undefined` — no bound in that direction
+ *
+ * Non-numeric values (null, NaN, strings, …) pass through unchanged.
+ * Mirrors `pandas.DataFrame.clip(lower, upper, axis=0)` with array/Series/DF bounds.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, clipAdvancedDataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+ * const loBound = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+ * clipAdvancedDataFrame(df, { lower: loBound }).col("a").values; // [2, 5, 9]
+ * ```
+ */
+export function clipAdvancedDataFrame(
+ df: DataFrame,
+ options: ClipAdvancedDataFrameOptions = {},
+): DataFrame {
+ const { lower, upper } = options;
+ const axis: Axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+ const colMap = new Map>();
+
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ const col = df.col(colName);
+ const out: Scalar[] = new Array(df.index.size);
+
+ for (let r = 0; r < df.index.size; r++) {
+ const v = col.iat(r);
+ if (!isFiniteNum(v)) {
+ out[r] = v;
+ continue;
+ }
+
+ const lo = resolveDataFrameBound(lower, r, c, colName, axis);
+ const hi = resolveDataFrameBound(upper, r, c, colName, axis);
+
+ const effectiveLo = Number.isNaN(lo) ? Number.NEGATIVE_INFINITY : lo;
+ const effectiveHi = Number.isNaN(hi) ? Number.POSITIVE_INFINITY : hi;
+
+ out[r] = clipValue(v, effectiveLo, effectiveHi);
+ }
+
+ colMap.set(colName, new Series({ data: out, index: df.index, name: colName }));
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/crosstab.ts b/src/stats/crosstab.ts
new file mode 100644
index 00000000..b09f358b
--- /dev/null
+++ b/src/stats/crosstab.ts
@@ -0,0 +1,387 @@
+/**
+ * crosstab — cross-tabulation of two or more factors.
+ *
+ * Mirrors `pandas.crosstab`:
+ * - `crosstab(index, columns)` → frequency table (count of co-occurrences)
+ * - Supports `values` + `aggfunc` for aggregated cross-tabulations
+ * - Supports `normalize` (all / index / columns) for proportion tables
+ * - Supports `margins` for row/column totals
+ * - Supports `dropna` to exclude NaN combinations
+ *
+ * @example
+ * ```ts
+ * import { crosstab, Series } from "tsb";
+ * const a = new Series({ data: ["foo","foo","bar","bar"], name: "A" });
+ * const b = new Series({ data: ["one","two","one","two"], name: "B" });
+ * const ct = crosstab(a, b);
+ * // col one two
+ * // A
+ * // bar 1 1
+ * // foo 1 1
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public API types ─────────────────────────────────────────────────────────
+
+/** Aggregation function name for {@link crosstab}. */
+export type CrosstabAggFunc = "count" | "sum" | "mean" | "min" | "max";
+
+/** Normalize mode: proportions over all cells, rows, or columns. */
+export type CrosstabNormalize = boolean | "all" | "index" | "columns";
+
+/** Options for {@link crosstab}. */
+export interface CrosstabOptions {
+ /**
+ * Values to aggregate. If omitted, counts co-occurrences.
+ */
+ readonly values?: Series | readonly Scalar[];
+ /**
+ * Aggregation function when `values` is provided. Default `"count"`.
+ */
+ readonly aggfunc?: CrosstabAggFunc;
+ /**
+ * If `true` or a string, add row/column totals.
+ * Default `false`.
+ */
+ readonly margins?: boolean;
+ /**
+ * Label for the margins row/column. Default `"All"`.
+ */
+ readonly margins_name?: string;
+ /**
+ * Normalise values:
+ * - `"all"` or `true` → divide by grand total
+ * - `"index"` → divide each row by its row total
+ * - `"columns"` → divide each column by its column total
+ * - `false` (default) → no normalisation
+ */
+ readonly normalize?: CrosstabNormalize;
+ /**
+ * If `true` (default), exclude combinations where either factor is NaN/null.
+ */
+ readonly dropna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a Series or array to a plain Scalar array. */
+function toScalarArray(x: Series | readonly Scalar[]): readonly Scalar[] {
+ if (x instanceof Series) {
+ return x.values as readonly Scalar[];
+ }
+ return x;
+}
+
+/** True when a value is missing (null / undefined / NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Aggregate a numeric bucket according to `aggfunc`. */
+function aggregateBucket(nums: number[], fn: CrosstabAggFunc): number {
+ if (fn === "count") {
+ return nums.length;
+ }
+ if (nums.length === 0) {
+ return Number.NaN;
+ }
+ if (fn === "sum") {
+ return nums.reduce((s, v) => s + v, 0);
+ }
+ if (fn === "mean") {
+ return nums.reduce((s, v) => s + v, 0) / nums.length;
+ }
+ if (fn === "min") {
+ return Math.min(...nums);
+ }
+ // max
+ return Math.max(...nums);
+}
+
+/** Add an observation to the cell map. */
+function pushObservation(
+ cellMap: Map,
+ rowKey: string,
+ colKey: string,
+ value: number,
+): void {
+ const key = `${rowKey}\x00${colKey}`;
+ let bucket = cellMap.get(key);
+ if (bucket === undefined) {
+ bucket = [];
+ cellMap.set(key, bucket);
+ }
+ bucket.push(value);
+}
+
+/** Build ordered row/column key arrays and the cell map. */
+function buildCellMap(
+ rowVals: readonly Scalar[],
+ colVals: readonly Scalar[],
+ valVals: readonly Scalar[] | null,
+ dropna: boolean,
+): {
+ rowKeys: string[];
+ colKeys: string[];
+ rowOrder: string[];
+ colOrder: string[];
+ cellMap: Map;
+} {
+ const rowKeys: string[] = rowVals.map((v) => String(v));
+ const colKeys: string[] = colVals.map((v) => String(v));
+ const rowOrder: string[] = [];
+ const colOrder: string[] = [];
+ const seenRow = new Set();
+ const seenCol = new Set();
+ const cellMap = new Map();
+
+ const n = rowKeys.length;
+ for (let i = 0; i < n; i++) {
+ const rv = rowVals[i];
+ const cv = colVals[i];
+ if (dropna && (isMissing(rv) || isMissing(cv))) {
+ continue;
+ }
+ const rk = rowKeys[i] ?? "";
+ const ck = colKeys[i] ?? "";
+ if (!seenRow.has(rk)) {
+ seenRow.add(rk);
+ rowOrder.push(rk);
+ }
+ if (!seenCol.has(ck)) {
+ seenCol.add(ck);
+ colOrder.push(ck);
+ }
+ const value = valVals !== null ? (valVals[i] as number) : 1;
+ pushObservation(cellMap, rk, ck, value);
+ }
+
+ return { rowKeys, colKeys, rowOrder, colOrder, cellMap };
+}
+
+/** Build matrix using direct key lookup. */
+function buildMatrixDirect(
+ rowOrder: readonly string[],
+ colOrder: readonly string[],
+ cellMap: Map,
+ aggfunc: CrosstabAggFunc,
+): number[][] {
+ return rowOrder.map((rk) =>
+ colOrder.map((ck) => {
+ const key = `${rk}\x00${ck}`;
+ const bucket = cellMap.get(key);
+ if (bucket === undefined || bucket.length === 0) {
+ return aggfunc === "count" ? 0 : Number.NaN;
+ }
+ return aggregateBucket(bucket, aggfunc);
+ }),
+ );
+}
+
+/** Sum all non-NaN cells in a matrix. */
+function sumAll(matrix: number[][]): number {
+ let total = 0;
+ for (const row of matrix) {
+ for (const v of row) {
+ total += Number.isNaN(v) ? 0 : v;
+ }
+ }
+ return total;
+}
+
+/** Sum non-NaN cells excluding the last row and last column (margins). */
+function sumExcludeMargins(matrix: number[][]): number {
+ const nRows = matrix.length;
+ const nCols = nRows > 0 ? (matrix[0]?.length ?? 0) : 0;
+ let total = 0;
+ for (let ri = 0; ri < nRows - 1; ri++) {
+ for (let ci = 0; ci < nCols - 1; ci++) {
+ total += Number.isNaN(matrix[ri]?.[ci] ?? Number.NaN) ? 0 : (matrix[ri]?.[ci] ?? 0);
+ }
+ }
+ return total;
+}
+
+/** Divide every cell by `total`. */
+function divideMatrix(matrix: number[][], total: number): number[][] {
+ return matrix.map((row) => row.map((v) => (Number.isNaN(v) ? Number.NaN : v / total)));
+}
+
+/** Normalise by grand total, optionally ignoring the margins row/col. */
+function normalizeAll(matrix: number[][], withMargins: boolean): number[][] {
+ const total = withMargins ? sumExcludeMargins(matrix) : sumAll(matrix);
+ return divideMatrix(matrix, total);
+}
+
+/** Normalise each row by its row total. */
+function normalizeByIndex(matrix: number[][]): number[][] {
+ return matrix.map((row) => {
+ const rowTotal = row.reduce((s, v) => s + (Number.isNaN(v) ? 0 : v), 0);
+ return row.map((v) => (Number.isNaN(v) ? Number.NaN : v / rowTotal));
+ });
+}
+
+/** Normalise each column by its column total. */
+function normalizeByColumns(matrix: number[][]): number[][] {
+ const nCols = matrix.length > 0 ? (matrix[0]?.length ?? 0) : 0;
+ const colTotals = new Array(nCols).fill(0);
+ for (const row of matrix) {
+ row.forEach((v, ci) => {
+ colTotals[ci] = (colTotals[ci] ?? 0) + (Number.isNaN(v) ? 0 : v);
+ });
+ }
+ return matrix.map((row) =>
+ row.map((v, ci) => {
+ const ct = colTotals[ci] ?? 1;
+ return Number.isNaN(v) ? Number.NaN : v / ct;
+ }),
+ );
+}
+
+/** Apply normalisation to a matrix. */
+function normalizeMatrix(
+ matrix: number[][],
+ mode: CrosstabNormalize,
+ withMargins: boolean,
+): number[][] {
+ if (mode === false) {
+ return matrix;
+ }
+ const actualMode = mode === true ? "all" : mode;
+ if (actualMode === "all") {
+ return normalizeAll(matrix, withMargins);
+ }
+ if (actualMode === "index") {
+ return normalizeByIndex(matrix);
+ }
+ return normalizeByColumns(matrix);
+}
+
+/** Add margins (All row + All column) to matrix, rowOrder, colOrder. */
+function addMargins(
+ matrix: number[][],
+ rowOrder: readonly string[],
+ colOrder: readonly string[],
+ marginsName: string,
+): { matrix: number[][]; rowOrder: string[]; colOrder: string[] } {
+ const nCols = colOrder.length;
+ const newMatrix = matrix.map((row) => {
+ const rowSum = row.reduce((s, v) => s + (Number.isNaN(v) ? 0 : v), 0);
+ return [...row, rowSum];
+ });
+ const colSums = new Array(nCols).fill(0);
+ for (const row of matrix) {
+ row.forEach((v, ci) => {
+ colSums[ci] = (colSums[ci] ?? 0) + (Number.isNaN(v) ? 0 : v);
+ });
+ }
+ const grandTotal = colSums.reduce((s, v) => s + v, 0);
+ newMatrix.push([...colSums, grandTotal]);
+
+ return {
+ matrix: newMatrix,
+ rowOrder: [...rowOrder, marginsName],
+ colOrder: [...colOrder, marginsName],
+ };
+}
+
+/** Resolve final layout (optionally applying margins then normalization). */
+function resolveFinalLayout(
+ matrix: number[][],
+ rowOrder: string[],
+ colOrder: string[],
+ opts: Required>,
+): { matrix: number[][]; rowOrder: string[]; colOrder: string[] } {
+ const { margins, margins_name: marginsName, normalize } = opts;
+ const withMargins = margins === true;
+
+ let mat = matrix;
+ let ro = rowOrder;
+ let co = colOrder;
+
+ if (withMargins) {
+ const result = addMargins(mat, ro, co, marginsName);
+ mat = result.matrix;
+ ro = result.rowOrder;
+ co = result.colOrder;
+ }
+
+ if (normalize !== false) {
+ mat = normalizeMatrix(mat, normalize, withMargins);
+ }
+
+ return { matrix: mat, rowOrder: ro, colOrder: co };
+}
+
+// ─── main export ──────────────────────────────────────────────────────────────
+
+/**
+ * Compute a simple cross-tabulation of two Series (frequency count).
+ *
+ * @param rowSeries - Series (or array) to use as row factor.
+ * @param colSeries - Series (or array) to use as column factor.
+ * @param options - Optional configuration.
+ * @returns A DataFrame where rows = unique row-factor values,
+ * columns = unique column-factor values, cells = counts
+ * (or aggregated values when `values` is provided).
+ */
+export function crosstab(
+ rowSeries: Series | readonly Scalar[],
+ colSeries: Series | readonly Scalar[],
+ options: CrosstabOptions = {},
+): DataFrame {
+ const rowVals = toScalarArray(rowSeries);
+ const colVals = toScalarArray(colSeries);
+ if (rowVals.length !== colVals.length) {
+ throw new RangeError("crosstab: index and columns must have the same length.");
+ }
+
+ const aggfunc: CrosstabAggFunc =
+ options.values !== undefined ? (options.aggfunc ?? "mean") : "count";
+ const dropna: boolean = options.dropna ?? true;
+ const margins: boolean = options.margins === true;
+ const marginsName: string = options.margins_name ?? "All";
+ const normalize: CrosstabNormalize = options.normalize ?? false;
+
+ const valVals: readonly Scalar[] | null =
+ options.values !== undefined ? toScalarArray(options.values) : null;
+
+ const { rowOrder, colOrder, cellMap } = buildCellMap(rowVals, colVals, valVals, dropna);
+
+ const matrix = buildMatrixDirect(rowOrder, colOrder, cellMap, aggfunc);
+
+ const layout = resolveFinalLayout(matrix, rowOrder, colOrder, {
+ margins,
+ margins_name: marginsName,
+ normalize,
+ });
+
+ const outCols: Record = {};
+ for (let ci = 0; ci < layout.colOrder.length; ci++) {
+ const colName = layout.colOrder[ci] ?? "";
+ outCols[colName] = layout.matrix.map((row) => row[ci] ?? null);
+ }
+
+ return DataFrame.fromColumns(outCols, {
+ index: new Index(layout.rowOrder as Label[]),
+ });
+}
+
+/**
+ * Compute a cross-tabulation directly from two same-length arrays
+ * (convenience wrapper for array inputs).
+ */
+export function crosstabSeries(
+ rowData: readonly Scalar[],
+ colData: readonly Scalar[],
+ options: CrosstabOptions = {},
+): DataFrame {
+ return crosstab(rowData, colData, options);
+}
diff --git a/src/stats/cut.ts b/src/stats/cut.ts
new file mode 100644
index 00000000..ffd3fb19
--- /dev/null
+++ b/src/stats/cut.ts
@@ -0,0 +1,453 @@
+/**
+ * cut / qcut — bin continuous data into discrete intervals.
+ *
+ * Mirrors `pandas.cut()` and `pandas.qcut()`:
+ * - `cut(x, bins, options)` — uniform or user-defined bin edges
+ * - `qcut(x, q, options)` — quantile-based (equal-frequency) bins
+ *
+ * Each function returns:
+ * - a `Series` of bin-label strings (or custom labels)
+ * - optionally the bin edges used (via `retbins: true`)
+ *
+ * @module
+ */
+
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link cut}. */
+export interface CutOptions {
+ /**
+ * Whether the right edge of each interval is closed.
+ * Default `true` — `(lo, hi]` (half-open on left, closed on right).
+ * When `false` — `[lo, hi)`.
+ */
+ readonly right?: boolean;
+ /**
+ * Custom labels for the resulting bins.
+ * - `readonly string[]` — one label per bin interval.
+ * - `false` — use integer codes (0, 1, 2, …) as labels.
+ * - `undefined` (default) — auto-generate `"(lo, hi]"` style labels.
+ */
+ readonly labels?: readonly string[] | false;
+ /**
+ * When `true`, return a `[series, binEdges]` tuple.
+ * When `false` (default), return only the Series.
+ */
+ readonly retbins?: boolean;
+ /**
+ * Number of decimal places for auto-generated interval labels.
+ * Default `3`.
+ */
+ readonly precision?: number;
+ /**
+ * When `bins` is a number, extend the left edge by a small factor
+ * so the minimum value is included. Default `true`.
+ */
+ readonly includeLowest?: boolean;
+ /**
+ * When `true` (default), result categories are ordered by interval.
+ * Currently affects only label ordering in the returned series, not dtype.
+ */
+ readonly ordered?: boolean;
+}
+
+/** Options for {@link qcut}. */
+export interface QcutOptions {
+ /**
+ * Custom labels for the resulting bins.
+ * - `readonly string[]` — one label per quantile interval.
+ * - `false` — use integer codes (0, 1, 2, …).
+ * - `undefined` (default) — auto-generate percentile-range labels.
+ */
+ readonly labels?: readonly string[] | false;
+ /** When `true`, return a `[series, binEdges]` tuple. Default `false`. */
+ readonly retbins?: boolean;
+ /** Decimal places for auto-generated labels. Default `3`. */
+ readonly precision?: number;
+ /**
+ * Whether to allow duplicate bin edges (non-unique quantile boundaries).
+ * When `"raise"` (default), throws if duplicates are found.
+ * When `"drop"`, silently removes duplicates.
+ */
+ readonly duplicates?: "raise" | "drop";
+}
+
+// ─── helper types ─────────────────────────────────────────────────────────────
+
+/** Result when `retbins` is `false` (default). */
+export type CutResult = Series;
+
+/** Result when `retbins` is `true`. */
+export type CutResultWithBins = [Series, readonly number[]];
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when value is null/undefined/NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Format a number to `precision` decimal places, stripping trailing zeros. */
+function fmt(n: number, precision: number): string {
+ return Number(n.toFixed(precision)).toString();
+}
+
+/** Build interval label string like `"(0.0, 1.5]"` or `"[0.0, 1.5)"`. */
+function intervalLabel(lo: number, hi: number, right: boolean, precision: number): string {
+ const l = fmt(lo, precision);
+ const r = fmt(hi, precision);
+ return right ? `(${l}, ${r}]` : `[${l}, ${r})`;
+}
+
+/**
+ * Compute linear-interpolation quantile (same algorithm as describe.ts).
+ *
+ * @param sorted ascending-sorted array of finite numbers
+ * @param q quantile in [0, 1]
+ */
+function linearQuantile(sorted: readonly number[], q: number): number {
+ const n = sorted.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ const pos = q * (n - 1);
+ const lo = Math.floor(pos);
+ const hi = Math.ceil(pos);
+ if (lo === hi) {
+ return sorted[lo] as number;
+ }
+ const frac = pos - lo;
+ return (sorted[lo] as number) * (1 - frac) + (sorted[hi] as number) * frac;
+}
+
+/** Validate and normalise user-supplied bin edges (sorted, unique). */
+function normaliseBinEdges(edges: readonly number[]): readonly number[] {
+ if (edges.length < 2) {
+ throw new RangeError("At least 2 bin edges required.");
+ }
+ const sorted = [...edges].sort((a, b) => a - b);
+ for (let i = 1; i < sorted.length; i++) {
+ if ((sorted[i] as number) === (sorted[i - 1] as number)) {
+ throw new RangeError(
+ `Bin edge ${sorted[i]} appears more than once. Bin edges must be unique.`,
+ );
+ }
+ }
+ return sorted;
+}
+
+/** Binary search: find the bin index for value `v` given sorted `edges`. */
+function findBin(v: number, edges: readonly number[], right: boolean): number {
+ let lo = 0;
+ let hi = edges.length - 2; // last valid bin index
+
+ while (lo < hi) {
+ const mid = (lo + hi) >>> 1;
+ const edgeMid = edges[mid + 1] as number;
+ if (right ? v <= edgeMid : v < edgeMid) {
+ hi = mid;
+ } else {
+ lo = mid + 1;
+ }
+ }
+ return lo;
+}
+
+/** Build the label array for `numBins` intervals. */
+function buildLabels(
+ edges: readonly number[],
+ right: boolean,
+ labels: readonly string[] | false | undefined,
+ precision: number,
+ numBins: number,
+): readonly (string | null)[] {
+ if (labels === false) {
+ return Array.from({ length: numBins }, (_, i) => String(i));
+ }
+ if (labels !== undefined) {
+ if (labels.length !== numBins) {
+ throw new RangeError(
+ `labels length (${labels.length}) must equal number of bins (${numBins}).`,
+ );
+ }
+ return labels;
+ }
+ return Array.from({ length: numBins }, (_, i) => {
+ const lo = edges[i] as number;
+ const hi = edges[i + 1] as number;
+ return intervalLabel(lo, hi, right, precision);
+ });
+}
+
+/** Check whether `v` is within the valid bin range. */
+function isInRange(v: number, lo0: number, hiN: number, right: boolean): boolean {
+ if (right) {
+ return v > lo0 && v <= hiN;
+ }
+ return v >= lo0 && v < hiN;
+}
+
+/**
+ * Assign each value in `data` to a bin interval, returning a label string
+ * (or `null` for missing / out-of-range values).
+ */
+function assignBins(
+ data: readonly Scalar[],
+ edges: readonly number[],
+ right: boolean,
+ labels: readonly string[] | false | undefined,
+ precision: number,
+ includeLowest: boolean,
+): readonly (string | null)[] {
+ const numBins = edges.length - 1;
+ const binLabels = buildLabels(edges, right, labels, precision, numBins);
+
+ const lo0 = edges[0] as number;
+ const hiN = edges[numBins] as number;
+ // Widen leftmost edge by a tiny epsilon so the minimum value falls inside.
+ const adjustedLo0 = includeLowest ? lo0 - 1e-10 * (Math.abs(lo0) + 1) : lo0;
+
+ return data.map((raw): string | null => {
+ if (isMissing(raw) || typeof raw !== "number") {
+ return null;
+ }
+ if (!isInRange(raw, adjustedLo0, hiN, right)) {
+ return null;
+ }
+ const bin = findBin(raw, edges, right);
+ return binLabels[bin] ?? null;
+ });
+}
+
+/** Compute equal-width edges from a numeric range. */
+function equalWidthEdges(minVal: number, maxVal: number, bins: number): readonly number[] {
+ if (minVal === maxVal) {
+ const lo = minVal - 0.5;
+ const hi = maxVal + 0.5;
+ return Array.from({ length: bins + 1 }, (_, i) => lo + (i * (hi - lo)) / bins);
+ }
+ const step = (maxVal - minVal) / bins;
+ return Array.from({ length: bins + 1 }, (_, i) => minVal + i * step);
+}
+
+/** Extract finite numbers from a scalar array. */
+function numericOnly(vals: readonly Scalar[]): number[] {
+ return vals.filter((v): v is number => typeof v === "number" && !Number.isNaN(v));
+}
+
+/** Build edges from a numeric integer bin count. */
+function edgesFromCount(nums: readonly number[], bins: number): readonly number[] {
+ if (!Number.isInteger(bins) || bins < 1) {
+ throw new RangeError("`bins` must be a positive integer when given as a number.");
+ }
+ if (nums.length === 0) {
+ throw new RangeError("Cannot determine bin edges: no finite numeric values in x.");
+ }
+ const minVal = Math.min(...nums);
+ const maxVal = Math.max(...nums);
+ return equalWidthEdges(minVal, maxVal, bins);
+}
+
+/** Return series (or [series, edges] tuple) based on retbins flag. */
+function wrapResult(
+ series: Series,
+ edges: readonly number[],
+ retbins: boolean,
+): CutResult | CutResultWithBins {
+ if (retbins) {
+ return [series, edges];
+ }
+ return series;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Bin values in `x` into discrete intervals — mirrors `pandas.cut()`.
+ *
+ * @param x Input Series of numeric values.
+ * @param bins Either an integer number of equal-width bins, or an explicit
+ * sorted array of bin edges (length ≥ 2).
+ * @param options See {@link CutOptions}.
+ * @returns A `Series` with bin-label for each element,
+ * or a `[Series, binEdges]` tuple when `retbins: true`.
+ *
+ * @example
+ * ```ts
+ * import { cut, Series } from "tsb";
+ *
+ * const s = new Series({ data: [1, 7, 5, 4, 2, 3], name: "x" });
+ * const binned = cut(s, 3);
+ * ```
+ */
+export function cut(x: Series, bins: number, options?: CutOptions): CutResult;
+export function cut(x: Series, bins: readonly number[], options?: CutOptions): CutResult;
+export function cut(
+ x: Series,
+ bins: number | readonly number[],
+ options: CutOptions = {},
+): CutResult | CutResultWithBins {
+ const right = options.right ?? true;
+ const labels = options.labels;
+ const retbins = options.retbins ?? false;
+ const precision = options.precision ?? 3;
+ const includeLowest = options.includeLowest ?? true;
+
+ const vals = x.values;
+ const nums = numericOnly(vals);
+ const edges = typeof bins === "number" ? edgesFromCount(nums, bins) : normaliseBinEdges(bins);
+
+ const resultVals = assignBins(vals, edges, right, labels, precision, includeLowest);
+ const series = new Series({
+ data: [...resultVals],
+ index: x.index,
+ name: x.name ?? null,
+ });
+ return wrapResult(series, edges, retbins);
+}
+
+/** Build quantile levels from an integer `q`. */
+function quantileLevelsFromInt(q: number): readonly number[] {
+ if (!Number.isInteger(q) || q < 2) {
+ throw new RangeError("`q` must be an integer ≥ 2 when given as a number.");
+ }
+ return Array.from({ length: q + 1 }, (_, i) => i / q);
+}
+
+/** Deduplicate sorted edges, or raise if duplicates are found. */
+function deduplicateEdges(rawEdges: number[], duplicates: "raise" | "drop"): readonly number[] {
+ for (let i = 1; i < rawEdges.length; i++) {
+ if ((rawEdges[i] as number) !== (rawEdges[i - 1] as number)) {
+ continue;
+ }
+ if (duplicates === "drop") {
+ const deduped = [...new Set(rawEdges)].sort((a, b) => a - b);
+ if (deduped.length < 2) {
+ throw new RangeError(
+ "After dropping duplicate bin edges, fewer than 2 unique edges remain.",
+ );
+ }
+ return deduped;
+ }
+ throw new RangeError(
+ `Duplicate bin edges found: ${rawEdges[i]}. Use duplicates="drop" to handle.`,
+ );
+ }
+ return rawEdges;
+}
+
+/**
+ * Bin values in `x` into quantile-based (equal-frequency) intervals —
+ * mirrors `pandas.qcut()`.
+ *
+ * @param x Input Series of numeric values.
+ * @param q Either an integer number of quantiles, or an explicit array
+ * of quantile levels in [0, 1] (e.g. `[0, 0.25, 0.5, 0.75, 1]`).
+ * @param options See {@link QcutOptions}.
+ * @returns A `Series` with quantile-bin label for each element,
+ * or a `[Series, binEdges]` tuple when `retbins: true`.
+ *
+ * @example
+ * ```ts
+ * import { qcut, Series } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], name: "v" });
+ * const binned = qcut(s, 4); // 4 equal-frequency quartile bins
+ * ```
+ */
+export function qcut(x: Series, q: number, options?: QcutOptions): CutResult;
+export function qcut(x: Series, q: readonly number[], options?: QcutOptions): CutResult;
+export function qcut(
+ x: Series,
+ q: number | readonly number[],
+ options: QcutOptions = {},
+): CutResult | CutResultWithBins {
+ const labels = options.labels;
+ const retbins = options.retbins ?? false;
+ const precision = options.precision ?? 3;
+ const duplicates = options.duplicates ?? "raise";
+
+ const vals = x.values;
+ const nums = numericOnly(vals);
+
+ if (nums.length === 0) {
+ throw new RangeError("Cannot compute quantiles: no finite numeric values in x.");
+ }
+
+ const sorted = [...nums].sort((a, b) => a - b);
+
+ let qLevels: readonly number[];
+ if (typeof q === "number") {
+ qLevels = quantileLevelsFromInt(q);
+ } else {
+ if (q.length < 2) {
+ throw new RangeError("`q` array must have at least 2 elements.");
+ }
+ qLevels = [...q].sort((a, b) => a - b);
+ }
+
+ const rawEdges = qLevels.map((qLevel) => linearQuantile(sorted, qLevel));
+ const edges = deduplicateEdges(rawEdges, duplicates);
+
+ const resultVals = assignBins(vals, edges, true, labels, precision, true);
+ const series = new Series({
+ data: [...resultVals],
+ index: x.index,
+ name: x.name ?? null,
+ });
+ return wrapResult(series, edges, retbins);
+}
+
+/**
+ * Return the integer bin code (0-based) for each element of `x`.
+ *
+ * Equivalent to `cut(x, bins, { labels: false })` but returns `number | null`.
+ *
+ * @param x Input Series of numeric values.
+ * @param bins Integer number of equal-width bins or explicit bin edges.
+ * @returns Series of integer bin codes (or `null` for missing/out-of-range).
+ */
+export function cutCodes(
+ x: Series,
+ bins: number | readonly number[],
+ options?: Omit,
+): Series {
+ const strSeries = cut(x, bins as number, { ...options, labels: false }) as CutResult;
+ const data = strSeries.values.map((v): number | null =>
+ v === null ? null : Number.parseInt(v, 10),
+ );
+ return new Series({
+ data: [...data],
+ index: x.index,
+ name: x.name ?? null,
+ });
+}
+
+/**
+ * Return the unique bin labels in interval order.
+ *
+ * @param bins integer or edge array (same as passed to `cut`/`qcut`)
+ * @param minVal minimum data value (used when `bins` is an integer)
+ * @param maxVal maximum data value (used when `bins` is an integer)
+ * @param right whether intervals are right-closed (default `true`)
+ * @param precision decimal places (default `3`)
+ */
+export function cutCategories(
+ bins: number | readonly number[],
+ minVal: number,
+ maxVal: number,
+ right = true,
+ precision = 3,
+): readonly string[] {
+ const edges =
+ typeof bins === "number" ? equalWidthEdges(minVal, maxVal, bins) : normaliseBinEdges(bins);
+ const numBins = edges.length - 1;
+ return Array.from({ length: numBins }, (_, i) => {
+ const lo = edges[i] as number;
+ const hi = edges[i + 1] as number;
+ return intervalLabel(lo, hi, right, precision);
+ });
+}
diff --git a/src/stats/diff_shift.ts b/src/stats/diff_shift.ts
new file mode 100644
index 00000000..4f62825f
--- /dev/null
+++ b/src/stats/diff_shift.ts
@@ -0,0 +1,368 @@
+/**
+ * diff_shift — discrete difference and value-shift for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.diff(periods=1)` — first discrete difference shifted by `periods`
+ * - `Series.shift(periods=1, fill_value=NaN)` — shift index by `periods`
+ * - `DataFrame.diff(periods=1, axis=0)` — column-wise or row-wise diff
+ * - `DataFrame.shift(periods=1, fill_value=NaN, axis=0)` — column-wise or row-wise shift
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ * Non-numeric values in `diff` yield `null`.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Options for {@link diffSeries} and {@link diffDataFrame}. */
+export interface DiffOptions {
+ /**
+ * Number of periods to shift for calculating difference.
+ * Negative values shift in the opposite direction.
+ * Default `1`.
+ */
+ readonly periods?: number;
+}
+
+/** Options for {@link diffDataFrame}. */
+export interface DataFrameDiffOptions extends DiffOptions {
+ /**
+ * Axis along which to compute the difference.
+ * - `0` or `"index"` (default): diff down each **column**.
+ * - `1` or `"columns"`: diff across each **row**.
+ */
+ readonly axis?: Axis;
+}
+
+/** Options for {@link shiftSeries} and {@link shiftDataFrame}. */
+export interface ShiftOptions {
+ /**
+ * Number of periods to shift.
+ * Positive: shift forward (later rows get earlier values).
+ * Negative: shift backward.
+ * Default `1`.
+ */
+ readonly periods?: number;
+ /**
+ * Value to fill positions that fall outside the original range.
+ * Default `null` (treated as missing, like pandas NaN).
+ */
+ readonly fillValue?: Scalar;
+}
+
+/** Options for {@link shiftDataFrame}. */
+export interface DataFrameShiftOptions extends ShiftOptions {
+ /**
+ * Axis along which to shift.
+ * - `0` or `"index"` (default): shift down each **column**.
+ * - `1` or `"columns"`: shift across each **row**.
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ───────────────────────────────────────────────────────────────────
+
+/** True when `v` is a finite number (not null / undefined / NaN). */
+function isFiniteNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/**
+ * Compute element-wise discrete difference for an array of scalars.
+ * `result[i] = arr[i] - arr[i - periods]`.
+ * Non-numeric positions (either current or prior) yield `null`.
+ */
+function diffArray(vals: readonly Scalar[], periods: number): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(null);
+ for (let i = 0; i < n; i++) {
+ const j = i - periods;
+ if (j < 0 || j >= n) {
+ out[i] = null;
+ continue;
+ }
+ const cur = vals[i] as Scalar;
+ const prev = vals[j] as Scalar;
+ if (isFiniteNum(cur) && isFiniteNum(prev)) {
+ out[i] = cur - prev;
+ } else {
+ out[i] = null;
+ }
+ }
+ return out;
+}
+
+/**
+ * Shift an array of scalars by `periods` positions, filling with `fillValue`.
+ * Positive `periods` moves values forward (later positions get earlier values);
+ * negative `periods` moves values backward.
+ */
+function shiftArray(vals: readonly Scalar[], periods: number, fillValue: Scalar): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(fillValue);
+ if (periods >= 0) {
+ for (let i = periods; i < n; i++) {
+ out[i] = vals[i - periods] as Scalar;
+ }
+ } else {
+ const offset = -periods;
+ for (let i = 0; i < n - offset; i++) {
+ out[i] = vals[i + offset] as Scalar;
+ }
+ }
+ return out;
+}
+
+// ─── Series: diff ──────────────────────────────────────────────────────────────
+
+/**
+ * Compute the first discrete difference of a Series.
+ *
+ * `result[i] = series[i] - series[i - periods]`.
+ * The first `|periods|` positions (or last, for negative) are `null`.
+ * Non-numeric values yield `null`.
+ *
+ * Mirrors `pandas.Series.diff(periods=1)`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { diffSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 3, 6, 10, 15] });
+ * diffSeries(s).values; // [null, 2, 3, 4, 5]
+ * diffSeries(s, { periods: 2 }).values; // [null, null, 5, 7, 9]
+ * ```
+ */
+export function diffSeries(series: Series, options: DiffOptions = {}): Series {
+ const periods = options.periods ?? 1;
+ const data = diffArray(series.values as readonly Scalar[], periods);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+// ─── Series: shift ─────────────────────────────────────────────────────────────
+
+/**
+ * Shift the values of a Series by `periods` positions.
+ *
+ * Positive `periods` shifts values forward (down); earlier positions are filled
+ * with `fillValue`. Negative `periods` shifts backward (up).
+ *
+ * Mirrors `pandas.Series.shift(periods=1, fill_value=NaN)`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { shiftSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * shiftSeries(s).values; // [null, 1, 2, 3, 4]
+ * shiftSeries(s, { periods: -1 }).values; // [2, 3, 4, 5, null]
+ * shiftSeries(s, { periods: 2, fillValue: 0 }).values; // [0, 0, 1, 2, 3]
+ * ```
+ */
+export function shiftSeries(series: Series, options: ShiftOptions = {}): Series {
+ const periods = options.periods ?? 1;
+ const fillValue = options.fillValue !== undefined ? options.fillValue : null;
+ const data = shiftArray(series.values as readonly Scalar[], periods, fillValue);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+// ─── DataFrame: diff ──────────────────────────────────────────────────────────
+
+/**
+ * Compute the first discrete difference of a DataFrame.
+ *
+ * When `axis=0` (default), diffs down each column independently.
+ * When `axis=1`, diffs across each row (column N minus column N-periods).
+ *
+ * Mirrors `pandas.DataFrame.diff(periods=1, axis=0)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { diffDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 3, 6], b: [10, 20, 35] });
+ * diffDataFrame(df).col("a").values; // [null, 2, 3]
+ * diffDataFrame(df).col("b").values; // [null, 10, 15]
+ * ```
+ */
+export function diffDataFrame(df: DataFrame, options: DataFrameDiffOptions = {}): DataFrame {
+ const periods = options.periods ?? 1;
+ const axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+
+ if (axis === 1 || axis === "columns") {
+ return diffDataFrameRowWise(df, colNames, periods);
+ }
+ return diffDataFrameColWise(df, colNames, periods);
+}
+
+/** Diff each column independently (axis=0). */
+function diffDataFrameColWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+): DataFrame {
+ const colMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name) as Series;
+ const data = diffArray(col.values as readonly Scalar[], periods);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/** Diff across columns (axis=1). */
+function diffDataFrameRowWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+): DataFrame {
+ const nRows = df.index.size;
+ const nCols = colNames.length;
+ const colMap = new Map>();
+
+ for (let c = 0; c < nCols; c++) {
+ const name = colNames[c];
+ if (name === undefined) {
+ continue;
+ }
+ const rowData: Scalar[] = new Array(nRows).fill(null);
+ const priorIdx = c - periods;
+ if (priorIdx < 0 || priorIdx >= nCols) {
+ colMap.set(name, new Series({ data: rowData, index: df.index, name }));
+ continue;
+ }
+ const priorName = colNames[priorIdx];
+ if (priorName === undefined) {
+ colMap.set(name, new Series({ data: rowData, index: df.index, name }));
+ continue;
+ }
+ const curCol = df.col(name) as Series;
+ const priorCol = df.col(priorName) as Series;
+ for (let r = 0; r < nRows; r++) {
+ const cur = curCol.iat(r);
+ const prev = priorCol.iat(r);
+ if (isFiniteNum(cur) && isFiniteNum(prev)) {
+ rowData[r] = cur - prev;
+ } else {
+ rowData[r] = null;
+ }
+ }
+ colMap.set(name, new Series({ data: rowData, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── DataFrame: shift ─────────────────────────────────────────────────────────
+
+/**
+ * Shift the values of a DataFrame by `periods` positions.
+ *
+ * When `axis=0` (default), each column is shifted independently.
+ * When `axis=1`, each row is shifted across columns.
+ *
+ * Mirrors `pandas.DataFrame.shift(periods=1, fill_value=NaN, axis=0)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { shiftDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * shiftDataFrame(df).col("a").values; // [null, 1, 2]
+ * shiftDataFrame(df, { periods: -1 }).col("b").values; // [5, 6, null]
+ * ```
+ */
+export function shiftDataFrame(df: DataFrame, options: DataFrameShiftOptions = {}): DataFrame {
+ const periods = options.periods ?? 1;
+ const fillValue = options.fillValue !== undefined ? options.fillValue : null;
+ const axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+
+ if (axis === 1 || axis === "columns") {
+ return shiftDataFrameRowWise(df, colNames, periods, fillValue);
+ }
+ return shiftDataFrameColWise(df, colNames, periods, fillValue);
+}
+
+/** Shift each column independently (axis=0). */
+function shiftDataFrameColWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+ fillValue: Scalar,
+): DataFrame {
+ const colMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name) as Series;
+ const data = shiftArray(col.values as readonly Scalar[], periods, fillValue);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/** Shift each row across columns (axis=1). */
+function shiftDataFrameRowWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+ fillValue: Scalar,
+): DataFrame {
+ const nRows = df.index.size;
+ const nCols = colNames.length;
+
+ // Build a 2D matrix [row][col] of shifted values
+ const matrix: Scalar[][] = Array.from({ length: nRows }, () =>
+ new Array(nCols).fill(fillValue),
+ );
+
+ if (periods >= 0) {
+ for (let c = periods; c < nCols; c++) {
+ const srcName = colNames[c - periods];
+ if (srcName === undefined) {
+ continue;
+ }
+ const src = df.col(srcName) as Series;
+ for (let r = 0; r < nRows; r++) {
+ const row = matrix[r];
+ if (row !== undefined) {
+ row[c] = src.iat(r);
+ }
+ }
+ }
+ } else {
+ const offset = -periods;
+ for (let c = 0; c < nCols - offset; c++) {
+ const srcName = colNames[c + offset];
+ if (srcName === undefined) {
+ continue;
+ }
+ const src = df.col(srcName) as Series;
+ for (let r = 0; r < nRows; r++) {
+ const row = matrix[r];
+ if (row !== undefined) {
+ row[c] = src.iat(r);
+ }
+ }
+ }
+ }
+
+ const colMap = new Map>();
+ for (let c = 0; c < nCols; c++) {
+ const name = colNames[c];
+ if (name === undefined) {
+ continue;
+ }
+ const data = matrix.map((row) => row[c] as Scalar);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/duplicated.ts b/src/stats/duplicated.ts
new file mode 100644
index 00000000..cdf9377c
--- /dev/null
+++ b/src/stats/duplicated.ts
@@ -0,0 +1,274 @@
+/**
+ * duplicated — detect and remove duplicate rows/values in Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.duplicated(keep='first')`
+ * - `pandas.DataFrame.duplicated(subset, keep='first')`
+ * - `pandas.Series.drop_duplicates(keep='first')`
+ * - `pandas.DataFrame.drop_duplicates(subset, keep='first')`
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Controls which duplicate to mark:
+ * - `"first"` — mark all duplicates except the first occurrence.
+ * - `"last"` — mark all duplicates except the last occurrence.
+ * - `false` — mark ALL occurrences (i.e., any row that appears >1 time).
+ */
+export type KeepPolicy = "first" | "last" | false;
+
+/** Options for {@link duplicatedSeries} and {@link dropDuplicatesSeries}. */
+export interface DuplicatedOptions {
+ /**
+ * Which duplicates to mark/keep.
+ * @defaultValue `"first"`
+ */
+ readonly keep?: KeepPolicy;
+}
+
+/** Options for {@link duplicatedDataFrame} and {@link dropDuplicatesDataFrame}. */
+export interface DataFrameDuplicatedOptions extends DuplicatedOptions {
+ /**
+ * Subset of column names to consider. When omitted, all columns are used.
+ */
+ readonly subset?: readonly string[];
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Stable string key for any Scalar (same logic as value_counts). */
+function scalarKey(v: Scalar): string {
+ if (v === null || v === undefined) {
+ return "\x00null";
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return "\x00nan";
+ }
+ if (v instanceof Date) {
+ return `\x01date:${v.getTime().toString()}`;
+ }
+ return `\x02${typeof v}:${String(v)}`;
+}
+
+/** Build a composite row key from the values of the selected columns at row `i`. */
+function rowKey(df: DataFrame, colNames: readonly string[], i: number): string {
+ const parts: string[] = [];
+ for (const name of colNames) {
+ const s = df.get(name);
+ const v: Scalar = s !== undefined ? (s.values[i] ?? null) : null;
+ parts.push(scalarKey(v));
+ }
+ return parts.join("|");
+}
+
+/**
+ * Core algorithm: return a boolean array where `true` = duplicate.
+ *
+ * @param keys Array of string keys (one per element/row)
+ * @param keep Keep policy
+ */
+function markDuplicates(keys: readonly string[], keep: KeepPolicy): boolean[] {
+ const n = keys.length;
+ const result = new Array(n).fill(false);
+
+ if (keep === false) {
+ // Mark ALL occurrences where the key appears more than once
+ const counts = new Map();
+ for (const k of keys) {
+ counts.set(k, (counts.get(k) ?? 0) + 1);
+ }
+ for (let i = 0; i < n; i++) {
+ const k = keys[i];
+ if (k !== undefined) {
+ result[i] = (counts.get(k) ?? 0) > 1;
+ }
+ }
+ return result;
+ }
+
+ if (keep === "first") {
+ const seen = new Set();
+ for (let i = 0; i < n; i++) {
+ const k = keys[i];
+ if (k !== undefined) {
+ if (seen.has(k)) {
+ result[i] = true;
+ } else {
+ seen.add(k);
+ }
+ }
+ }
+ return result;
+ }
+
+ // keep === "last": iterate in reverse
+ const seen = new Set();
+ for (let i = n - 1; i >= 0; i--) {
+ const k = keys[i];
+ if (k !== undefined) {
+ if (seen.has(k)) {
+ result[i] = true;
+ } else {
+ seen.add(k);
+ }
+ }
+ }
+ return result;
+}
+
+// ─── Series duplicated ────────────────────────────────────────────────────────
+
+/**
+ * Return a boolean Series indicating duplicated values.
+ *
+ * `true` marks a value as a duplicate (according to `keep`).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 1, 3, 2] });
+ * duplicatedSeries(s).values; // [false, false, true, false, true]
+ * ```
+ */
+export function duplicatedSeries(
+ series: Series,
+ options?: DuplicatedOptions,
+): Series {
+ const keep = options?.keep ?? "first";
+ const keys = series.values.map(scalarKey);
+ const flags = markDuplicates(keys, keep);
+ return new Series({
+ data: flags,
+ index: series.index,
+ name: series.name ?? undefined,
+ });
+}
+
+// ─── DataFrame duplicated ─────────────────────────────────────────────────────
+
+/**
+ * Return a boolean Series indicating duplicated rows.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromRecords([
+ * { a: 1, b: 2 }, { a: 1, b: 2 }, { a: 3, b: 4 },
+ * ]);
+ * duplicatedDataFrame(df).values; // [false, true, false]
+ * ```
+ */
+export function duplicatedDataFrame(
+ df: DataFrame,
+ options?: DataFrameDuplicatedOptions,
+): Series {
+ const keep = options?.keep ?? "first";
+ const colNames = resolveSubset(df, options?.subset);
+ const nRows = df.shape[0];
+
+ const keys: string[] = [];
+ for (let i = 0; i < nRows; i++) {
+ keys.push(rowKey(df, colNames, i));
+ }
+
+ const flags = markDuplicates(keys, keep);
+ return new Series({
+ data: flags,
+ index: df.index,
+ });
+}
+
+// ─── Series drop_duplicates ───────────────────────────────────────────────────
+
+/**
+ * Return a new Series with duplicate values removed.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 1, 3, 2] });
+ * dropDuplicatesSeries(s).values; // [1, 2, 3]
+ * ```
+ */
+export function dropDuplicatesSeries(
+ series: Series,
+ options?: DuplicatedOptions,
+): Series {
+ const dupFlags = duplicatedSeries(series, options);
+ const keepPositions: number[] = [];
+ for (let i = 0; i < dupFlags.values.length; i++) {
+ if (dupFlags.values[i] === false) {
+ keepPositions.push(i);
+ }
+ }
+ const newValues: Scalar[] = keepPositions.map((i) => series.values[i] ?? null);
+ const newLabels: Label[] = keepPositions.map((i) => series.index.at(i) ?? null);
+ return new Series({
+ data: newValues,
+ index: new Index(newLabels),
+ name: series.name ?? undefined,
+ });
+}
+
+// ─── DataFrame drop_duplicates ────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame with duplicate rows removed.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromRecords([
+ * { a: 1, b: 2 }, { a: 1, b: 2 }, { a: 3, b: 4 },
+ * ]);
+ * dropDuplicatesDataFrame(df).shape; // [2, 2]
+ * ```
+ */
+export function dropDuplicatesDataFrame(
+ df: DataFrame,
+ options?: DataFrameDuplicatedOptions,
+): DataFrame {
+ const dupFlags = duplicatedDataFrame(df, options);
+ const keepPositions: number[] = [];
+ for (let i = 0; i < dupFlags.values.length; i++) {
+ if (dupFlags.values[i] === false) {
+ keepPositions.push(i);
+ }
+ }
+ return selectRows(df, keepPositions);
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Resolve the subset of columns, defaulting to all columns. */
+function resolveSubset(df: DataFrame, subset: readonly string[] | undefined): readonly string[] {
+ if (subset !== undefined && subset.length > 0) {
+ return subset;
+ }
+ return df.columns.values;
+}
+
+/** Build a new DataFrame containing only the specified row positions. */
+function selectRows(df: DataFrame, positions: readonly number[]): DataFrame {
+ const colMap = new Map>();
+ const newLabels: Label[] = positions.map((i) => df.index.at(i) ?? null);
+ const newIndex = new Index(newLabels);
+
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const newVals: Scalar[] = positions.map((i) => col.values[i] ?? null);
+ colMap.set(
+ name,
+ new Series({
+ data: newVals,
+ index: newIndex,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, newIndex);
+}
diff --git a/src/stats/get_dummies.ts b/src/stats/get_dummies.ts
new file mode 100644
index 00000000..49daea71
--- /dev/null
+++ b/src/stats/get_dummies.ts
@@ -0,0 +1,383 @@
+/**
+ * get_dummies — one-hot encoding of categorical variables.
+ *
+ * Mirrors `pandas.get_dummies` and `pandas.from_dummies`:
+ * - `getDummies(series)` → DataFrame of 0/1 indicator columns
+ * - `getDummies(dataframe)` → DataFrame with categorical columns expanded
+ * - `fromDummies(df)` → Series of category labels (reverse operation)
+ *
+ * @example
+ * ```ts
+ * import { getDummies, Series } from "tsb";
+ * const s = new Series({ data: ["a", "b", "a", "c"], name: "color" });
+ * const dummies = getDummies(s);
+ * // DataFrame { color_a: [1,0,1,0], color_b: [0,1,0,0], color_c: [0,0,0,1] }
+ * ```
+ */
+
+import { Dtype } from "../core/index.ts";
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── option types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link getDummies}. */
+export interface GetDummiesOptions {
+ /**
+ * String to append before each dummy column name.
+ * - For Series input: a single string (default: the series name or "").
+ * - For DataFrame input: a single string applied to all encoded columns,
+ * an array aligned with `columns`, or a record mapping column→prefix.
+ */
+ readonly prefix?: string | readonly string[] | Readonly> | null;
+ /** Separator between prefix and value label (default `"_"`). */
+ readonly prefixSep?: string;
+ /** If `true`, include an extra `_nan` column for missing values (default `false`). */
+ readonly dummyNa?: boolean;
+ /**
+ * For DataFrame input: which columns to one-hot encode.
+ * Defaults to all object/string/category/boolean columns.
+ */
+ readonly columns?: readonly string[];
+ /**
+ * Drop the first level dummy for each variable to avoid multicollinearity
+ * (default `false`).
+ */
+ readonly dropFirst?: boolean;
+ /** Dtype of the indicator columns (default `Dtype.uint8`). */
+ readonly dtype?: Dtype;
+}
+
+/** Options for {@link fromDummies}. */
+export interface FromDummiesOptions {
+ /** Separator used when splitting column names to recover the original column name (default `"_"`). */
+ readonly sep?: string;
+ /**
+ * If `true`, rows where all dummies are 0 are mapped to `null` (missing) instead
+ * of raising an error (default `false`).
+ */
+ readonly defaultCategory?: Scalar;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a scalar to a string label safe to embed in a column name. */
+function labelStr(v: Scalar): string {
+ if (v === null || v === undefined) {
+ return "nan";
+ }
+ if (v instanceof Date) {
+ return v.toISOString();
+ }
+ return String(v);
+}
+
+/** Determine whether a dtype should be considered categorical for auto-detection. */
+function isCategoricalDtype(dtype: Dtype): boolean {
+ return (
+ dtype.name === "string" ||
+ dtype.name === "object" ||
+ dtype.name === "category" ||
+ dtype.name === "bool"
+ );
+}
+
+/** Build the prefix string for a given column name given the prefix option. */
+function resolvePrefix(
+ colName: string,
+ prefixOpt: GetDummiesOptions["prefix"],
+ colIndex: number,
+): string {
+ if (prefixOpt === null || prefixOpt === undefined) {
+ return colName;
+ }
+ if (typeof prefixOpt === "string") {
+ return prefixOpt;
+ }
+ if (Array.isArray(prefixOpt)) {
+ return (prefixOpt as readonly string[])[colIndex] ?? colName;
+ }
+ const map = prefixOpt as Readonly>;
+ return map[colName] ?? colName;
+}
+
+/** Encode a single array of values into dummy columns.
+ * Returns a map of `columnName → indicator array`. */
+function collectLevels(values: readonly Scalar[]): string[] {
+ const levelSet = new Set();
+ for (const v of values) {
+ const isNa = v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+ if (!isNa) {
+ levelSet.add(labelStr(v));
+ }
+ }
+ return [...levelSet].sort((a, b) => a.localeCompare(b));
+}
+
+/** Build a single indicator column array. */
+function buildIndicatorCol(
+ values: readonly Scalar[],
+ level: string,
+ zeroVal: Scalar,
+ oneVal: Scalar,
+): Scalar[] {
+ const arr: Scalar[] = new Array(values.length).fill(zeroVal);
+ for (let i = 0; i < values.length; i++) {
+ if (labelStr(values[i] as Scalar) === level) {
+ arr[i] = oneVal;
+ }
+ }
+ return arr;
+}
+
+/** Build the NaN indicator column array. */
+function buildNaCol(values: readonly Scalar[], zeroVal: Scalar, oneVal: Scalar): Scalar[] {
+ const arr: Scalar[] = new Array(values.length).fill(zeroVal);
+ for (let i = 0; i < values.length; i++) {
+ const v = values[i];
+ const isNa = v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+ if (isNa) {
+ arr[i] = oneVal;
+ }
+ }
+ return arr;
+}
+
+function encodeSingleColumn(
+ values: readonly Scalar[],
+ colPrefix: string,
+ sep: string,
+ dummyNa: boolean,
+ dropFirst: boolean,
+ dtype: Dtype,
+): Map {
+ let levels = collectLevels(values);
+ if (dropFirst && levels.length > 0) {
+ levels = levels.slice(1);
+ }
+
+ const zeroVal: Scalar = dtype.name === "bool" ? false : 0;
+ const oneVal: Scalar = dtype.name === "bool" ? true : 1;
+ const result = new Map();
+
+ for (const level of levels) {
+ result.set(`${colPrefix}${sep}${level}`, buildIndicatorCol(values, level, zeroVal, oneVal));
+ }
+
+ if (dummyNa) {
+ result.set(`${colPrefix}${sep}nan`, buildNaCol(values, zeroVal, oneVal));
+ }
+
+ return result;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * One-hot encode a Series into a DataFrame of binary indicator columns.
+ *
+ * Each unique value in the series becomes a column. Column names are
+ * `{prefix}{prefixSep}{value}`, defaulting to `{seriesName}_{value}`.
+ *
+ * @example
+ * ```ts
+ * import { getDummiesSeries, Series } from "tsb";
+ * const s = new Series({ data: ["cat", "dog", "cat"], name: "animal" });
+ * getDummiesSeries(s);
+ * // DataFrame { animal_cat: [1,0,1], animal_dog: [0,1,0] }
+ * ```
+ */
+export function getDummiesSeries(series: Series, options?: GetDummiesOptions): DataFrame {
+ const sep = options?.prefixSep ?? "_";
+ const dummyNa = options?.dummyNa ?? false;
+ const dropFirst = options?.dropFirst ?? false;
+ const dtype = options?.dtype ?? Dtype.uint8;
+
+ const defaultPrefix = series.name !== null ? series.name : "";
+ let prefix = defaultPrefix;
+ if (
+ options?.prefix !== undefined &&
+ options.prefix !== null &&
+ typeof options.prefix === "string"
+ ) {
+ prefix = options.prefix;
+ }
+
+ const encoded = encodeSingleColumn(series.values, prefix, sep, dummyNa, dropFirst, dtype);
+
+ const colData: Record = {};
+ for (const [k, v] of encoded) {
+ colData[k] = v;
+ }
+
+ return DataFrame.fromColumns(colData, { index: series.index.values });
+}
+
+/**
+ * One-hot encode categorical columns in a DataFrame.
+ *
+ * Non-categorical columns are kept as-is; each encoded column is replaced by
+ * its set of dummy columns, inserted at the same position.
+ *
+ * @example
+ * ```ts
+ * import { getDummiesDataFrame, DataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ x: [1, 2], color: ["red", "blue"] });
+ * getDummiesDataFrame(df);
+ * // DataFrame { x: [1,2], color_blue: [0,1], color_red: [1,0] }
+ * ```
+ */
+export function getDummiesDataFrame(df: DataFrame, options?: GetDummiesOptions): DataFrame {
+ const sep = options?.prefixSep ?? "_";
+ const dummyNa = options?.dummyNa ?? false;
+ const dropFirst = options?.dropFirst ?? false;
+ const dtype = options?.dtype ?? Dtype.uint8;
+
+ // Determine which columns to encode.
+ const allCols = [...df.columns.values];
+ let encodeSet: Set;
+ if (options?.columns !== undefined) {
+ encodeSet = new Set(options.columns);
+ } else {
+ encodeSet = new Set(allCols.filter((c) => isCategoricalDtype(df.col(c).dtype)));
+ }
+
+ let encodeIndex = 0;
+ const colData: Record = {};
+
+ for (const colName of allCols) {
+ if (encodeSet.has(colName)) {
+ const colPrefix = resolvePrefix(colName, options?.prefix, encodeIndex);
+ const encoded = encodeSingleColumn(
+ df.col(colName).values,
+ colPrefix,
+ sep,
+ dummyNa,
+ dropFirst,
+ dtype,
+ );
+ for (const [k, v] of encoded) {
+ colData[k] = v;
+ }
+ encodeIndex++;
+ } else {
+ colData[colName] = df.col(colName).values;
+ }
+ }
+
+ return DataFrame.fromColumns(colData, { index: df.index.values });
+}
+
+/**
+ * One-hot encode a Series or DataFrame.
+ *
+ * - If `data` is a `Series`, delegates to {@link getDummiesSeries}.
+ * - If `data` is a `DataFrame`, delegates to {@link getDummiesDataFrame}.
+ *
+ * @example
+ * ```ts
+ * import { getDummies, Series } from "tsb";
+ * getDummies(new Series({ data: ["a","b","a"], name: "x" }));
+ * // DataFrame { x_a: [1,0,1], x_b: [0,1,0] }
+ * ```
+ */
+export function getDummies(
+ data: Series | DataFrame,
+ options?: GetDummiesOptions,
+): DataFrame {
+ if (data instanceof Series) {
+ return getDummiesSeries(data, options);
+ }
+ return getDummiesDataFrame(data, options);
+}
+
+/** Split a column name into prefix and label at the last occurrence of sep. */
+function splitColName(colName: string, sep: string): { prefix: string; label: string } {
+ const idx = colName.lastIndexOf(sep);
+ if (idx < 0) {
+ return { prefix: "", label: colName };
+ }
+ return { prefix: colName.slice(0, idx), label: colName.slice(idx + sep.length) };
+}
+
+/** Infer the series name from the common prefix of split column names. */
+function inferSeriesName(
+ splitCols: ReadonlyArray<{ prefix: string; label: string }>,
+): string | null {
+ const firstPrefix = splitCols[0]?.prefix ?? "";
+ const allSame = splitCols.every((x) => x.prefix === firstPrefix);
+ return allSame && firstPrefix !== "" ? firstPrefix : null;
+}
+
+/** Find the active dummy label for a single row, or null if none active. */
+function findActiveLabel(
+ rowIndex: number,
+ cols: readonly string[],
+ splitCols: ReadonlyArray<{ prefix: string; label: string }>,
+ df: DataFrame,
+): { label: Scalar; count: number } {
+ let found: Scalar = null;
+ let count = 0;
+ for (let j = 0; j < cols.length; j++) {
+ const colName = cols[j];
+ if (colName === undefined) {
+ continue;
+ }
+ const v = df.col(colName).values[rowIndex];
+ if (v === 1 || v === true) {
+ count++;
+ found = splitCols[j]?.label ?? null;
+ }
+ }
+ return { label: found, count };
+}
+
+/**
+ * Reverse a one-hot encoding — reconstruct a categorical Series from a set of
+ * binary dummy columns.
+ *
+ * Each row must have exactly one column set to a truthy value (unless
+ * `defaultCategory` is supplied, which is used for all-zero rows).
+ *
+ * Column names are expected to be `{prefix}{sep}{category}`. The prefix is
+ * taken from the longest common prefix of all column names.
+ *
+ * @throws {RangeError} If a row has more than one active dummy (ambiguous encoding).
+ *
+ * @example
+ * ```ts
+ * import { fromDummies, DataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ x_a: [1,0,1], x_b: [0,1,0] });
+ * fromDummies(df, { sep: "_" });
+ * // Series { data: ["a", "b", "a"], name: "x" }
+ * ```
+ */
+export function fromDummies(df: DataFrame, options?: FromDummiesOptions): Series {
+ const sep = options?.sep ?? "_";
+ const cols = [...df.columns.values];
+ if (cols.length === 0) {
+ return new Series({ data: [], name: null });
+ }
+
+ const splitCols = cols.map((c) => splitColName(c, sep));
+ const seriesName = inferSeriesName(splitCols);
+ const nRows = df.index.size;
+ const result: Scalar[] = new Array(nRows).fill(null);
+
+ for (let i = 0; i < nRows; i++) {
+ const { label, count } = findActiveLabel(i, cols, splitCols, df);
+ if (count > 1) {
+ throw new RangeError(
+ `fromDummies: row ${i} has ${count} active dummy columns (expected 0 or 1).`,
+ );
+ }
+ if (count === 0) {
+ result[i] = options?.defaultCategory !== undefined ? options.defaultCategory : null;
+ } else {
+ result[i] = label;
+ }
+ }
+
+ return new Series({ data: result, index: df.index.values, name: seriesName });
+}
diff --git a/src/stats/idxmin_idxmax.ts b/src/stats/idxmin_idxmax.ts
new file mode 100644
index 00000000..6ee745f9
--- /dev/null
+++ b/src/stats/idxmin_idxmax.ts
@@ -0,0 +1,234 @@
+/**
+ * idxmin / idxmax — return the index label of the minimum or maximum value.
+ *
+ * Mirrors `pandas.Series.idxmin()` / `pandas.Series.idxmax()` and
+ * `pandas.DataFrame.idxmin()` / `pandas.DataFrame.idxmax()`:
+ *
+ * - `idxminSeries(series)` — label of the minimum value (NaN/null excluded)
+ * - `idxmaxSeries(series)` — label of the maximum value (NaN/null excluded)
+ * - `idxminDataFrame(df)` — Series of row labels where each column achieves its min
+ * - `idxmaxDataFrame(df)` — Series of row labels where each column achieves its max
+ *
+ * When `skipna` is true (the default), NaN / null values are ignored.
+ * When `skipna` is false, any NaN / null causes the result to be `null`.
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Dtype, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link idxminSeries}, {@link idxmaxSeries}. */
+export interface IdxOptions {
+ /**
+ * Whether to skip NaN / null values.
+ * @defaultValue `true`
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link idxminDataFrame}, {@link idxmaxDataFrame}. */
+export interface IdxDataFrameOptions {
+ /**
+ * Whether to skip NaN / null values.
+ * @defaultValue `true`
+ */
+ readonly skipna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Find the index of the extreme value (min or max) among `values`.
+ * Returns `null` when all values are missing (with `skipna=true`) or when
+ * any value is missing (with `skipna=false`).
+ */
+function findExtreme(
+ values: readonly Scalar[],
+ skipna: boolean,
+ isBetter: (a: Scalar, b: Scalar) => boolean,
+): number | null {
+ let bestIdx: number | null = null;
+ let bestVal: Scalar = null;
+
+ for (let i = 0; i < values.length; i++) {
+ const v = values[i] as Scalar;
+ if (isMissing(v)) {
+ if (!skipna) {
+ return null;
+ }
+ continue;
+ }
+ if (bestIdx === null || isBetter(v, bestVal)) {
+ bestIdx = i;
+ bestVal = v;
+ }
+ }
+ return bestIdx;
+}
+
+/** Compare scalars: returns true if `a` is less than `b`. */
+function isLess(a: Scalar, b: Scalar): boolean {
+ if (b === null || b === undefined) {
+ return false;
+ }
+ return (a as number | string | boolean) < (b as number | string | boolean);
+}
+
+/** Compare scalars: returns true if `a` is greater than `b`. */
+function isGreater(a: Scalar, b: Scalar): boolean {
+ if (b === null || b === undefined) {
+ return false;
+ }
+ return (a as number | string | boolean) > (b as number | string | boolean);
+}
+
+// ─── public API — Series ──────────────────────────────────────────────────────
+
+/**
+ * Return the index label of the minimum value in `series`.
+ *
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Returns `null` when the series is empty or all values are NaN / null.
+ *
+ * Mirrors `pandas.Series.idxmin()`.
+ *
+ * @param series - Input Series.
+ * @param options - Options (skipna).
+ * @returns The index label at the minimum value, or `null` if no valid value exists.
+ *
+ * @example
+ * ```ts
+ * import { Series, idxminSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] });
+ * idxminSeries(s); // "b" (first occurrence of 1)
+ * ```
+ */
+export function idxminSeries(series: Series, options: IdxOptions = {}): Label {
+ const skipna = options.skipna ?? true;
+ const idx = findExtreme(series.values, skipna, isLess);
+ if (idx === null) {
+ return null;
+ }
+ return series.index.at(idx);
+}
+
+/**
+ * Return the index label of the maximum value in `series`.
+ *
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Returns `null` when the series is empty or all values are NaN / null.
+ *
+ * Mirrors `pandas.Series.idxmax()`.
+ *
+ * @param series - Input Series.
+ * @param options - Options (skipna).
+ * @returns The index label at the maximum value, or `null` if no valid value exists.
+ *
+ * @example
+ * ```ts
+ * import { Series, idxmaxSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] });
+ * idxmaxSeries(s); // "e"
+ * ```
+ */
+export function idxmaxSeries(series: Series, options: IdxOptions = {}): Label {
+ const skipna = options.skipna ?? true;
+ const idx = findExtreme(series.values, skipna, isGreater);
+ if (idx === null) {
+ return null;
+ }
+ return series.index.at(idx);
+}
+
+// ─── public API — DataFrame ───────────────────────────────────────────────────
+
+/**
+ * Return a Series containing the index label of the minimum value for each column.
+ *
+ * The result Series is indexed by column names.
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Columns where all values are NaN / null yield `null` in the result.
+ *
+ * Mirrors `pandas.DataFrame.idxmin()` (axis=0).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Options (skipna).
+ * @returns A Series indexed by column names, containing the row label of each column's min.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, idxminDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ * idxminDataFrame(df).values; // ["y", "z"]
+ * ```
+ */
+export function idxminDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series {
+ const skipna = options.skipna ?? true;
+ const colNames = df.columns.values;
+ const result: Label[] = colNames.map((colName) => {
+ const s = df.col(colName);
+ const idx = findExtreme(s.values, skipna, isLess);
+ if (idx === null) {
+ return null;
+ }
+ return df.index.at(idx);
+ });
+ return new Series({
+ data: result,
+ index: colNames as unknown as Label[],
+ name: null,
+ dtype: Dtype.from("object"),
+ });
+}
+
+/**
+ * Return a Series containing the index label of the maximum value for each column.
+ *
+ * The result Series is indexed by column names.
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Columns where all values are NaN / null yield `null` in the result.
+ *
+ * Mirrors `pandas.DataFrame.idxmax()` (axis=0).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Options (skipna).
+ * @returns A Series indexed by column names, containing the row label of each column's max.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, idxmaxDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ * idxmaxDataFrame(df).values; // ["z", "y"]
+ * ```
+ */
+export function idxmaxDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series {
+ const skipna = options.skipna ?? true;
+ const colNames = df.columns.values;
+ const result: Label[] = colNames.map((colName) => {
+ const s = df.col(colName);
+ const idx = findExtreme(s.values, skipna, isGreater);
+ if (idx === null) {
+ return null;
+ }
+ return df.index.at(idx);
+ });
+ return new Series({
+ data: result,
+ index: colNames as unknown as Label[],
+ name: null,
+ dtype: Dtype.from("object"),
+ });
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index b1de48eb..a1c9bf51 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -39,3 +39,82 @@ export {
nsmallestDataFrame,
} from "./nlargest.ts";
export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts";
+export {
+ isna,
+ notna,
+ isnull,
+ notnull,
+ ffillSeries,
+ bfillSeries,
+ dataFrameFfill,
+ dataFrameBfill,
+} from "./na_ops.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts";
+export { pctChangeSeries, pctChangeDataFrame } from "./pct_change.ts";
+export type {
+ PctChangeFillMethod,
+ PctChangeOptions,
+ DataFramePctChangeOptions,
+} from "./pct_change.ts";
+export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./idxmin_idxmax.ts";
+export type { IdxOptions, IdxDataFrameOptions } from "./idxmin_idxmax.ts";
+export { replaceSeries, replaceDataFrame } from "./replace.ts";
+export type {
+ ReplaceMapping,
+ ReplaceSpec,
+ ReplaceOptions,
+ DataFrameReplaceOptions,
+} from "./replace.ts";
+export { whereSeries, maskSeries, whereDataFrame, maskDataFrame } from "./where_mask.ts";
+export type {
+ SeriesCond,
+ DataFrameCond,
+ WhereOptions,
+ WhereDataFrameOptions,
+} from "./where_mask.ts";
+export { diffSeries, diffDataFrame, shiftSeries, shiftDataFrame } from "./diff_shift.ts";
+export type {
+ DiffOptions,
+ DataFrameDiffOptions,
+ ShiftOptions,
+ DataFrameShiftOptions,
+} from "./diff_shift.ts";
+export {
+ duplicatedSeries,
+ duplicatedDataFrame,
+ dropDuplicatesSeries,
+ dropDuplicatesDataFrame,
+} from "./duplicated.ts";
+export type { KeepPolicy, DuplicatedOptions, DataFrameDuplicatedOptions } from "./duplicated.ts";
+export { clipAdvancedSeries, clipAdvancedDataFrame } from "./clip_advanced.ts";
+export type {
+ SeriesBound,
+ DataFrameBound,
+ ClipAdvancedSeriesOptions,
+ ClipAdvancedDataFrameOptions,
+} from "./clip_advanced.ts";
+export {
+ applySeries,
+ mapSeries,
+ applyDataFrame,
+ applyExpandDataFrame,
+ mapDataFrame,
+} from "./apply.ts";
+export type {
+ MapLookup,
+ ApplyDataFrameOptions,
+ ApplyExpandDataFrameOptions,
+} from "./apply.ts";
+export { cut, qcut, cutCodes, cutCategories } from "./cut.ts";
+export type {
+ CutOptions,
+ QcutOptions,
+ CutResult,
+ CutResultWithBins,
+} from "./cut.ts";
+export { Interval, IntervalIndex, intervalRange } from "./interval.ts";
+export type { ClosedType, IntervalOptions, IntervalRangeOptions } from "./interval.ts";
+export { getDummies, getDummiesSeries, getDummiesDataFrame, fromDummies } from "./get_dummies.ts";
+export type { GetDummiesOptions, FromDummiesOptions } from "./get_dummies.ts";
+export { crosstab, crosstabSeries } from "./crosstab.ts";
+export type { CrosstabOptions, CrosstabAggFunc, CrosstabNormalize } from "./crosstab.ts";
diff --git a/src/stats/interval.ts b/src/stats/interval.ts
new file mode 100644
index 00000000..7fab0bc1
--- /dev/null
+++ b/src/stats/interval.ts
@@ -0,0 +1,413 @@
+/**
+ * Interval — pandas-compatible interval type and IntervalIndex.
+ *
+ * Mirrors `pandas.Interval` and `pandas.IntervalIndex`:
+ * - `Interval` — a single bounded interval `(left, right]`, `[left, right)`,
+ * `[left, right]`, or `(left, right)`.
+ * - `IntervalIndex` — an ordered array of `Interval` objects used as an axis label.
+ * - `intervalRange()` — construct a sequence of equal-length intervals (like
+ * `pd.interval_range`).
+ *
+ * @example
+ * ```ts
+ * const iv = new Interval(0, 5); // (0, 5]
+ * iv.contains(3); // true
+ * iv.overlaps(new Interval(4, 10)); // true
+ *
+ * const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * // IntervalIndex([(0, 1], (1, 2], (2, 3]])
+ *
+ * const rng = intervalRange(0, 1, { periods: 4 });
+ * // [(0.0, 0.25], (0.25, 0.5], (0.5, 0.75], (0.75, 1.0]]
+ * ```
+ *
+ * @module
+ */
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Specifies which endpoint(s) of an interval are closed (inclusive).
+ *
+ * - `"right"` (default) — `(left, right]`
+ * - `"left"` — `[left, right)`
+ * - `"both"` — `[left, right]`
+ * - `"neither"` — `(left, right)`
+ */
+export type ClosedType = "left" | "right" | "both" | "neither";
+
+/** Options for {@link IntervalIndex.fromBreaks} and {@link intervalRange}. */
+export interface IntervalOptions {
+ /** Which endpoints are closed. Default `"right"`. */
+ readonly closed?: ClosedType;
+ /** Human-readable name for the index axis. */
+ readonly name?: string | null;
+}
+
+/** Options for {@link intervalRange}. */
+export interface IntervalRangeOptions extends IntervalOptions {
+ /**
+ * Number of intervals to generate.
+ * Exactly one of `periods` or `freq` must be provided.
+ */
+ readonly periods?: number;
+ /**
+ * Step size between interval edges.
+ * Exactly one of `periods` or `freq` must be provided.
+ */
+ readonly freq?: number;
+}
+
+// ─── Interval ─────────────────────────────────────────────────────────────────
+
+/**
+ * An immutable bounded interval.
+ *
+ * Mirrors `pandas.Interval`. Endpoints are numbers.
+ */
+export class Interval {
+ /** Left (lower) endpoint. */
+ readonly left: number;
+
+ /** Right (upper) endpoint. */
+ readonly right: number;
+
+ /** Which endpoints are closed (inclusive). */
+ readonly closed: ClosedType;
+
+ constructor(left: number, right: number, closed: ClosedType = "right") {
+ if (left > right) {
+ throw new RangeError(`Interval: left (${left}) must be ≤ right (${right})`);
+ }
+ this.left = left;
+ this.right = right;
+ this.closed = closed;
+ }
+
+ // ─── derived properties ─────────────────────────────────────────
+
+ /** Length of the interval (`right − left`). */
+ get length(): number {
+ return this.right - this.left;
+ }
+
+ /** Mid-point of the interval. */
+ get mid(): number {
+ return (this.left + this.right) / 2;
+ }
+
+ /** True when left endpoint is closed. */
+ get closedLeft(): boolean {
+ return this.closed === "left" || this.closed === "both";
+ }
+
+ /** True when right endpoint is closed. */
+ get closedRight(): boolean {
+ return this.closed === "right" || this.closed === "both";
+ }
+
+ /** True when neither endpoint is closed. */
+ get isOpen(): boolean {
+ return this.closed === "neither";
+ }
+
+ /** True when both endpoints are closed. */
+ get isClosed(): boolean {
+ return this.closed === "both";
+ }
+
+ // ─── membership ─────────────────────────────────────────────────
+
+ /**
+ * Return `true` if `value` falls within this interval.
+ *
+ * @example
+ * ```ts
+ * new Interval(0, 5).contains(5); // true (right-closed)
+ * new Interval(0, 5).contains(0); // false (right-closed, 0 excluded)
+ * new Interval(0, 5, "both").contains(0); // true
+ * ```
+ */
+ contains(value: number): boolean {
+ const leftOk = this.closedLeft ? value >= this.left : value > this.left;
+ const rightOk = this.closedRight ? value <= this.right : value < this.right;
+ return leftOk && rightOk;
+ }
+
+ // ─── comparison / set operations ────────────────────────────────
+
+ /**
+ * Return `true` if this interval overlaps with `other`.
+ *
+ * Two intervals overlap when they share any interior point.
+ * Touching at a single endpoint is considered overlapping when that endpoint
+ * is closed in both intervals.
+ */
+ overlaps(other: Interval): boolean {
+ if (this.left > other.right || other.left > this.right) {
+ return false;
+ }
+ if (this.left === other.right) {
+ return this.closedLeft && other.closedRight;
+ }
+ if (other.left === this.right) {
+ return other.closedLeft && this.closedRight;
+ }
+ return true;
+ }
+
+ /**
+ * Return `true` if this interval is equal to `other`
+ * (same endpoints and same `closed` type).
+ */
+ equals(other: Interval): boolean {
+ return this.left === other.left && this.right === other.right && this.closed === other.closed;
+ }
+
+ // ─── display ────────────────────────────────────────────────────
+
+ /** Render as a pandas-style string, e.g. `(0.0, 1.5]`. */
+ toString(): string {
+ const l = this.closedLeft ? "[" : "(";
+ const r = this.closedRight ? "]" : ")";
+ return `${l}${this.left}, ${this.right}${r}`;
+ }
+}
+
+// ─── IntervalIndex ────────────────────────────────────────────────────────────
+
+/**
+ * An immutable index of `Interval` objects.
+ *
+ * Mirrors `pandas.IntervalIndex`.
+ */
+export class IntervalIndex {
+ private readonly _intervals: readonly Interval[];
+
+ /** Human-readable axis name. */
+ readonly name: string | null;
+
+ constructor(intervals: readonly Interval[], name: string | null = null) {
+ this._intervals = Object.freeze([...intervals]);
+ this.name = name;
+ }
+
+ // ─── factories ──────────────────────────────────────────────────
+
+ /**
+ * Build an `IntervalIndex` from an array of break points.
+ *
+ * `breaks` must have at least 2 elements. The resulting index contains
+ * `breaks.length − 1` intervals.
+ *
+ * @example
+ * ```ts
+ * IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * // IntervalIndex([(0, 1], (1, 2], (2, 3]])
+ * ```
+ */
+ static fromBreaks(breaks: readonly number[], options?: IntervalOptions): IntervalIndex {
+ if (breaks.length < 2) {
+ throw new RangeError("fromBreaks: at least 2 break points are required");
+ }
+ const closed = options?.closed ?? "right";
+ const name = options?.name ?? null;
+ const intervals: Interval[] = [];
+ for (let i = 0; i < breaks.length - 1; i++) {
+ intervals.push(new Interval(breaks[i] as number, breaks[i + 1] as number, closed));
+ }
+ return new IntervalIndex(intervals, name);
+ }
+
+ /**
+ * Build an `IntervalIndex` from explicit arrays of left and right endpoints.
+ *
+ * Both arrays must have the same length.
+ */
+ static fromArrays(
+ left: readonly number[],
+ right: readonly number[],
+ options?: IntervalOptions,
+ ): IntervalIndex {
+ if (left.length !== right.length) {
+ throw new RangeError("fromArrays: left and right arrays must have the same length");
+ }
+ const closed = options?.closed ?? "right";
+ const name = options?.name ?? null;
+ const intervals: Interval[] = left.map((l, i) => new Interval(l, right[i] as number, closed));
+ return new IntervalIndex(intervals, name);
+ }
+
+ /**
+ * Build an `IntervalIndex` from an array of `Interval` objects.
+ */
+ static fromIntervals(intervals: readonly Interval[], name?: string | null): IntervalIndex {
+ return new IntervalIndex(intervals, name ?? null);
+ }
+
+ // ─── properties ─────────────────────────────────────────────────
+
+ /** Number of intervals. */
+ get size(): number {
+ return this._intervals.length;
+ }
+
+ /** All intervals in order. */
+ get values(): readonly Interval[] {
+ return this._intervals;
+ }
+
+ /** Left endpoints. */
+ get left(): readonly number[] {
+ return this._intervals.map((iv) => iv.left);
+ }
+
+ /** Right endpoints. */
+ get right(): readonly number[] {
+ return this._intervals.map((iv) => iv.right);
+ }
+
+ /** Mid-points. */
+ get mid(): readonly number[] {
+ return this._intervals.map((iv) => iv.mid);
+ }
+
+ /** Lengths (`right − left`) of each interval. */
+ get length(): readonly number[] {
+ return this._intervals.map((iv) => iv.length);
+ }
+
+ /** Which endpoints are closed (taken from the first interval; homogeneous index assumed). */
+ get closed(): ClosedType {
+ return this._intervals[0]?.closed ?? "right";
+ }
+
+ /** True when all intervals are non-overlapping and sorted. */
+ get isMonotonic(): boolean {
+ for (let i = 1; i < this._intervals.length; i++) {
+ const prev = this._intervals[i - 1] as Interval;
+ const curr = this._intervals[i] as Interval;
+ if (prev.right > curr.left) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // ─── lookup ─────────────────────────────────────────────────────
+
+ /**
+ * Return the interval at position `i` (0-based).
+ */
+ get(i: number): Interval {
+ const iv = this._intervals[i];
+ if (iv === undefined) {
+ throw new RangeError(`Index ${i} out of range [0, ${this.size})`);
+ }
+ return iv;
+ }
+
+ /**
+ * Return the 0-based position of the first interval that {@link Interval.contains}
+ * `value`, or `-1` if none.
+ */
+ indexOf(value: number): number {
+ for (let i = 0; i < this._intervals.length; i++) {
+ if ((this._intervals[i] as Interval).contains(value)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Return all intervals that overlap with `other`.
+ */
+ overlapping(other: Interval): IntervalIndex {
+ return new IntervalIndex(
+ this._intervals.filter((iv) => iv.overlaps(other)),
+ this.name,
+ );
+ }
+
+ // ─── set operations ─────────────────────────────────────────────
+
+ /**
+ * Append another `IntervalIndex` to this one.
+ */
+ append(other: IntervalIndex): IntervalIndex {
+ return new IntervalIndex([...this._intervals, ...other._intervals], this.name);
+ }
+
+ // ─── display ────────────────────────────────────────────────────
+
+ /** Render as a pandas-style string. */
+ toString(): string {
+ const inner = this._intervals.map((iv) => iv.toString()).join(", ");
+ return `IntervalIndex([${inner}], closed='${this.closed}')`;
+ }
+}
+
+// ─── intervalRange ────────────────────────────────────────────────────────────
+
+/**
+ * Return an `IntervalIndex` of equal-length intervals.
+ *
+ * Mirrors `pandas.interval_range`. Exactly one of `options.periods` or
+ * `options.freq` must be specified.
+ *
+ * @param start Left edge of the first interval.
+ * @param end Right edge of the last interval.
+ * @param options `periods` (number of intervals) or `freq` (interval length).
+ *
+ * @example
+ * ```ts
+ * intervalRange(0, 1, { periods: 4 });
+ * // IntervalIndex([(0.0, 0.25], (0.25, 0.5], (0.5, 0.75], (0.75, 1.0]])
+ *
+ * intervalRange(0, 10, { freq: 2.5 });
+ * // IntervalIndex([(0.0, 2.5], (2.5, 5.0], (5.0, 7.5], (7.5, 10.0]])
+ * ```
+ */
+export function intervalRange(
+ start: number,
+ end: number,
+ options: IntervalRangeOptions,
+): IntervalIndex {
+ if (end <= start) {
+ throw new RangeError(`intervalRange: end (${end}) must be > start (${start})`);
+ }
+ const closed = options.closed ?? "right";
+ const name = options.name ?? null;
+
+ let breaks: number[];
+
+ if (options.periods !== undefined && options.freq !== undefined) {
+ throw new RangeError("intervalRange: specify exactly one of periods or freq");
+ }
+ if (options.periods !== undefined) {
+ const n = options.periods;
+ if (!Number.isInteger(n) || n < 1) {
+ throw new RangeError("intervalRange: periods must be a positive integer");
+ }
+ const step = (end - start) / n;
+ breaks = Array.from({ length: n + 1 }, (_, i) => start + i * step);
+ breaks[n] = end;
+ } else if (options.freq !== undefined) {
+ const freq = options.freq;
+ if (freq <= 0) {
+ throw new RangeError("intervalRange: freq must be > 0");
+ }
+ breaks = [];
+ let cur = start;
+ while (cur < end - freq * 1e-10) {
+ breaks.push(cur);
+ cur += freq;
+ }
+ breaks.push(end);
+ } else {
+ throw new RangeError("intervalRange: one of periods or freq must be specified");
+ }
+
+ return IntervalIndex.fromBreaks(breaks, { closed, name });
+}
diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts
new file mode 100644
index 00000000..c776bb1f
--- /dev/null
+++ b/src/stats/na_ops.ts
@@ -0,0 +1,336 @@
+/**
+ * na_ops — missing-value utilities for Series and DataFrame.
+ *
+ * Mirrors the following pandas module-level functions and methods:
+ * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values
+ * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values
+ * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values
+ * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link ffillSeries} and {@link bfillSeries}. */
+export interface FillDirectionOptions {
+ /**
+ * Maximum number of consecutive NaN/null values to fill.
+ * `null` means no limit (default).
+ */
+ readonly limit?: number | null;
+}
+
+/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */
+export interface DataFrameFillOptions extends FillDirectionOptions {
+ /**
+ * - `0` or `"index"` (default): fill missing values down each **column**.
+ * - `1` or `"columns"`: fill missing values across each **row**.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Forward-fill an array of scalars in-place (returns a new array). */
+function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+ const out: Scalar[] = Array.from(vals);
+ let lastValid: Scalar = null;
+ let streak = 0;
+ for (let i = 0; i < out.length; i++) {
+ if (isMissing(out[i])) {
+ if (!isMissing(lastValid) && (limit === null || streak < limit)) {
+ out[i] = lastValid;
+ streak++;
+ }
+ } else {
+ lastValid = out[i] as Scalar;
+ streak = 0;
+ }
+ }
+ return out;
+}
+
+/** Backward-fill an array of scalars (returns a new array). */
+function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+ const out: Scalar[] = Array.from(vals);
+ let nextValid: Scalar = null;
+ let streak = 0;
+ for (let i = out.length - 1; i >= 0; i--) {
+ if (isMissing(out[i])) {
+ if (!isMissing(nextValid) && (limit === null || streak < limit)) {
+ out[i] = nextValid;
+ streak++;
+ }
+ } else {
+ nextValid = out[i] as Scalar;
+ streak = 0;
+ }
+ }
+ return out;
+}
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+/**
+ * Detect missing values in a scalar, Series, or DataFrame.
+ *
+ * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`.
+ * - For a **Series**: returns a `Series` of the same index.
+ * - For a **DataFrame**: returns a `DataFrame` of boolean columns.
+ *
+ * Mirrors `pandas.isna()` / `pandas.isnull()`.
+ *
+ * @example
+ * ```ts
+ * import { isna } from "tsb";
+ * isna(null); // true
+ * isna(42); // false
+ * isna(NaN); // true
+ *
+ * const s = new Series({ data: [1, null, NaN, 4] });
+ * isna(s); // Series([false, true, true, false])
+ * ```
+ */
+export function isna(value: Scalar): boolean;
+export function isna(value: Series): Series;
+export function isna(value: DataFrame): DataFrame;
+export function isna(
+ value: Scalar | Series | DataFrame,
+): boolean | Series | DataFrame {
+ if (value instanceof DataFrame) {
+ return value.isna();
+ }
+ if (value instanceof Series) {
+ return value.isna();
+ }
+ return isMissing(value as Scalar);
+}
+
+/**
+ * Detect non-missing values in a scalar, Series, or DataFrame.
+ *
+ * Mirrors `pandas.notna()` / `pandas.notnull()`.
+ *
+ * @example
+ * ```ts
+ * import { notna } from "tsb";
+ * notna(null); // false
+ * notna(42); // true
+ * ```
+ */
+export function notna(value: Scalar): boolean;
+export function notna(value: Series): Series;
+export function notna(value: DataFrame): DataFrame;
+export function notna(
+ value: Scalar | Series | DataFrame,
+): boolean | Series | DataFrame {
+ if (value instanceof DataFrame) {
+ return value.notna();
+ }
+ if (value instanceof Series) {
+ return value.notna();
+ }
+ return !isMissing(value as Scalar);
+}
+
+/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */
+export const isnull = isna;
+
+/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */
+export const notnull = notna;
+
+// ─── ffill ────────────────────────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the last non-missing value
+ * that precedes it (if any). Values before the first non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.ffill()`.
+ *
+ * @param series - Input Series (unchanged).
+ * @param options - Optional `{ limit }` — max consecutive fills.
+ * @returns New Series with forward-filled values.
+ *
+ * @example
+ * ```ts
+ * import { ffillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * ffillSeries(s); // Series([1, 1, 1, 4])
+ * ```
+ */
+export function ffillSeries(
+ series: Series,
+ options?: FillDirectionOptions,
+): Series {
+ const limit = options?.limit ?? null;
+ const filled = ffillArray(series.values as readonly Scalar[], limit) as T[];
+ return new Series({
+ data: filled,
+ index: series.index,
+ dtype: series.dtype,
+ name: series.name ?? undefined,
+ });
+}
+
+/**
+ * Backward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the next non-missing value
+ * that follows it (if any). Values after the last non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.bfill()`.
+ *
+ * @example
+ * ```ts
+ * import { bfillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * bfillSeries(s); // Series([1, 4, 4, 4])
+ * ```
+ */
+export function bfillSeries(
+ series: Series,
+ options?: FillDirectionOptions,
+): Series {
+ const limit = options?.limit ?? null;
+ const filled = bfillArray(series.values as readonly Scalar[], limit) as T[];
+ return new Series({
+ data: filled,
+ index: series.index,
+ dtype: series.dtype,
+ name: series.name ?? undefined,
+ });
+}
+
+// ─── DataFrame ffill / bfill ──────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0): each column is independently
+ * forward-filled. With `axis=1` each row is forward-filled across columns.
+ *
+ * Mirrors `pandas.DataFrame.ffill()`.
+ *
+ * @example
+ * ```ts
+ * import { dataFrameFfill } from "tsb";
+ * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } });
+ * dataFrameFfill(df);
+ * // a: [1, 1, 3]
+ * // b: [null, 2, 2]
+ * ```
+ */
+export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+ const limit = options?.limit ?? null;
+ const axis = options?.axis ?? 0;
+ const byRow = axis === 1 || axis === "columns";
+
+ if (!byRow) {
+ // column-wise: fill each column independently
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const filled = ffillArray(col.values, limit) as Scalar[];
+ colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // row-wise: fill across columns for each row
+ const nRows = df.shape[0];
+ const cols = df.columns.values;
+ const columns = cols.map((name) => df.col(name));
+ const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+ for (let r = 0; r < nRows; r++) {
+ const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+ const filled = ffillArray(rowVals, limit);
+ for (let ci = 0; ci < cols.length; ci++) {
+ const rowsFilledCI = rowsFilled[ci];
+ if (rowsFilledCI !== undefined) {
+ rowsFilledCI[r] = filled[ci] ?? null;
+ }
+ }
+ }
+ const colMap = new Map>();
+ for (let ci = 0; ci < cols.length; ci++) {
+ const name = cols[ci] as string;
+ const col = columns[ci] as Series;
+ colMap.set(
+ name,
+ new Series({
+ data: rowsFilled[ci] ?? [],
+ index: col.index,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Backward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0). With `axis=1` fills across rows.
+ *
+ * Mirrors `pandas.DataFrame.bfill()`.
+ */
+export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+ const limit = options?.limit ?? null;
+ const axis = options?.axis ?? 0;
+ const byRow = axis === 1 || axis === "columns";
+
+ if (!byRow) {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const filled = bfillArray(col.values, limit) as Scalar[];
+ colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ const nRows = df.shape[0];
+ const cols = df.columns.values;
+ const columns = cols.map((name) => df.col(name));
+ const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+ for (let r = 0; r < nRows; r++) {
+ const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+ const filled = bfillArray(rowVals, limit);
+ for (let ci = 0; ci < cols.length; ci++) {
+ const rowsFilledCI = rowsFilled[ci];
+ if (rowsFilledCI !== undefined) {
+ rowsFilledCI[r] = filled[ci] ?? null;
+ }
+ }
+ }
+ const colMap = new Map>();
+ for (let ci = 0; ci < cols.length; ci++) {
+ const name = cols[ci] as string;
+ const col = columns[ci] as Series;
+ colMap.set(
+ name,
+ new Series({
+ data: rowsFilled[ci] ?? [],
+ index: col.index,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/pct_change.ts b/src/stats/pct_change.ts
new file mode 100644
index 00000000..c46c9e84
--- /dev/null
+++ b/src/stats/pct_change.ts
@@ -0,0 +1,231 @@
+/**
+ * pct_change — percentage change between current and prior element.
+ *
+ * Mirrors `pandas.Series.pct_change()` / `pandas.DataFrame.pct_change()`:
+ * - `pctChangeSeries(series, options)` — per-element % change
+ * - `pctChangeDataFrame(df, options)` — column-wise % change
+ *
+ * Formula (per element i, with shift=periods):
+ * `result[i] = (x[i] - x[i-periods]) / x[i-periods]`
+ *
+ * When `fillMethod` is set, NaN/null values in the source are filled *before*
+ * computing the ratio (matching pandas' default behaviour of `fill_method="pad"`).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Fill method applied to NaN/null before computing pct_change. */
+export type PctChangeFillMethod = "pad" | "bfill";
+
+/** Options for {@link pctChangeSeries} and {@link pctChangeDataFrame}. */
+export interface PctChangeOptions {
+ /**
+ * Number of periods (lags) to shift when computing the ratio.
+ * Positive values look backward; negative values look forward.
+ * Default `1`.
+ */
+ readonly periods?: number;
+ /**
+ * How to fill NaN/null values *before* computing the ratio.
+ * - `"pad"` (default): forward-fill (last valid observation carries forward).
+ * - `"bfill"`: backward-fill (next valid observation fills backward).
+ * - `null`: no filling — NaN/null stays as-is.
+ */
+ readonly fillMethod?: PctChangeFillMethod | null;
+ /**
+ * Maximum number of consecutive NaN/null values to fill when `fillMethod`
+ * is set. `undefined` / `null` means no limit.
+ */
+ readonly limit?: number | null;
+}
+
+/** Options for {@link pctChangeDataFrame} — adds an axis selector. */
+export interface DataFramePctChangeOptions extends PctChangeOptions {
+ /**
+ * - `0` or `"index"` (default): apply operation **column-wise** (down rows).
+ * - `1` or `"columns"`: apply operation **row-wise** (across columns).
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a valid number (not null, undefined, or NaN). */
+function isNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v) && v !== null;
+}
+
+/**
+ * Forward-fill an array of scalars in place, respecting an optional limit.
+ * Returns a NEW array.
+ */
+function padFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] {
+ const out: Scalar[] = [...vals];
+ let run = 0;
+ let lastValid: Scalar = null;
+ for (let i = 0; i < out.length; i++) {
+ const v = out[i] as Scalar;
+ if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) {
+ lastValid = v;
+ run = 0;
+ } else if (lastValid !== null && (limit == null || run < limit)) {
+ out[i] = lastValid;
+ run++;
+ }
+ }
+ return out;
+}
+
+/**
+ * Backward-fill an array of scalars, respecting an optional limit.
+ * Returns a NEW array.
+ */
+function bfillFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] {
+ const tmp = padFill([...vals].reverse(), limit);
+ return tmp.reverse();
+}
+
+/** Fill NaN/null in `vals` using the requested method. */
+function applyFill(
+ vals: readonly Scalar[],
+ method: PctChangeFillMethod | null | undefined,
+ limit: number | null | undefined,
+): Scalar[] {
+ if (!method) return [...vals];
+ return method === "pad" ? padFill(vals, limit) : bfillFill(vals, limit);
+}
+
+/** Compute pct_change on a flat array of scalars. */
+function computePct(vals: readonly Scalar[], periods: number): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(null);
+ const shift = periods;
+ if (shift >= 0) {
+ for (let i = shift; i < n; i++) {
+ const curr = vals[i] as Scalar;
+ const prev = vals[i - shift] as Scalar;
+ if (isNum(curr) && isNum(prev) && prev !== 0) {
+ out[i] = curr / prev - 1;
+ } else if (isNum(curr) && isNum(prev) && prev === 0) {
+ // 0 denominator → Infinity (same as pandas)
+ out[i] = curr === 0 ? Number.NaN : curr > 0 ? Infinity : -Infinity;
+ } else {
+ out[i] = null;
+ }
+ }
+ } else {
+ // Negative periods: look forward
+ const absShift = -shift;
+ for (let i = 0; i < n - absShift; i++) {
+ const curr = vals[i] as Scalar;
+ const fwd = vals[i + absShift] as Scalar;
+ if (isNum(curr) && isNum(fwd) && curr !== 0) {
+ out[i] = fwd / curr - 1;
+ } else if (isNum(curr) && isNum(fwd) && curr === 0) {
+ out[i] = fwd === 0 ? Number.NaN : fwd > 0 ? Infinity : -Infinity;
+ } else {
+ out[i] = null;
+ }
+ }
+ }
+ return out;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Compute the fractional change between a Series element and the element
+ * `periods` positions earlier (or later, for negative `periods`).
+ *
+ * Matches `pandas.Series.pct_change()`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [100, 110, 99, 121] });
+ * pctChangeSeries(s); // [null, 0.1, -0.1, 0.2222…]
+ * ```
+ */
+export function pctChangeSeries(series: Series, options: PctChangeOptions = {}): Series {
+ const periods = options.periods ?? 1;
+ const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad";
+ const limit = options.limit ?? null;
+
+ const filled = applyFill(series.values, fillMethod, limit);
+ const result = computePct(filled, periods);
+
+ return new Series({
+ data: result,
+ index: series.index,
+ name: series.name ?? undefined,
+ });
+}
+
+/**
+ * Compute percentage change for every column (or row) of a DataFrame.
+ *
+ * Matches `pandas.DataFrame.pct_change()`.
+ *
+ * @example
+ * ```ts
+ * const df = new DataFrame(new Map([
+ * ["a", new Series({ data: [100, 110, 121] })],
+ * ["b", new Series({ data: [200, 180, 198] })],
+ * ]));
+ * pctChangeDataFrame(df); // fractional change per column
+ * ```
+ */
+export function pctChangeDataFrame(
+ df: DataFrame,
+ options: DataFramePctChangeOptions = {},
+): DataFrame {
+ const axis = options.axis ?? 0;
+ const colWise = axis === 0 || axis === "index";
+
+ if (colWise) {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ colMap.set(name, pctChangeSeries(df.col(name), options));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // Row-wise: each row across columns
+ const periods = options.periods ?? 1;
+ const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad";
+ const limit = options.limit ?? null;
+ const nRows = df.index.length;
+ const cols = df.columns.values;
+ const nCols = cols.length;
+
+ const resultCols = new Map();
+ for (const name of cols) {
+ resultCols.set(name, new Array(nRows).fill(null));
+ }
+
+ for (let r = 0; r < nRows; r++) {
+ const row: Scalar[] = [];
+ for (const name of cols) {
+ row.push(df.col(name).values[r] as Scalar);
+ }
+ const filled = applyFill(row, fillMethod, limit);
+ const pct = computePct(filled, periods);
+ for (let c = 0; c < nCols; c++) {
+ (resultCols.get(cols[c] as string) as Scalar[])[r] = pct[c] as Scalar;
+ }
+ }
+
+ const colMap = new Map>();
+ for (const name of cols) {
+ colMap.set(
+ name,
+ new Series({ data: resultCols.get(name) as Scalar[], index: df.index, name }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/replace.ts b/src/stats/replace.ts
new file mode 100644
index 00000000..54c2662e
--- /dev/null
+++ b/src/stats/replace.ts
@@ -0,0 +1,237 @@
+/**
+ * replace — value substitution for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.replace(to_replace, value)` / `Series.replace(mapping)`
+ * - `DataFrame.replace(to_replace, value)` / `DataFrame.replace(mapping)`
+ *
+ * Supported replacement specs:
+ * - **Scalar → Scalar**: replace every occurrence of one value with another.
+ * - **Array → Scalar**: replace every value in the array with a single value.
+ * - **Array → Array**: pair-wise replacement (must be same length).
+ * - **Record / Map**: lookup-table replacement (`{ old: new, ... }`).
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** A lookup table mapping old values to new values. */
+export type ReplaceMapping = Readonly> | ReadonlyMap;
+
+/**
+ * Replacement specification accepted by {@link replaceSeries} /
+ * {@link replaceDataFrame}.
+ *
+ * Mirrors the first two positional args of `pandas.Series.replace`.
+ */
+export type ReplaceSpec =
+ | { readonly toReplace: Scalar; readonly value: Scalar }
+ | { readonly toReplace: readonly Scalar[]; readonly value: Scalar }
+ | { readonly toReplace: readonly Scalar[]; readonly value: readonly Scalar[] }
+ | { readonly toReplace: ReplaceMapping };
+
+/** Options shared by {@link replaceSeries} and {@link replaceDataFrame}. */
+export interface ReplaceOptions {
+ /**
+ * When `true`, treat `NaN` values as equal for matching purposes.
+ * Default `true`.
+ */
+ readonly matchNaN?: boolean;
+}
+
+/** Options for {@link replaceDataFrame}. */
+export interface DataFrameReplaceOptions extends ReplaceOptions {
+ /**
+ * If provided, only replace values in these column names.
+ * By default all columns are processed.
+ */
+ readonly columns?: readonly string[];
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `a` and `b` are equal (with optional NaN=NaN equality). */
+function scalarEq(a: Scalar, b: Scalar, matchNaN: boolean): boolean {
+ if (
+ matchNaN &&
+ typeof a === "number" &&
+ typeof b === "number" &&
+ Number.isNaN(a) &&
+ Number.isNaN(b)
+ ) {
+ return true;
+ }
+ if (a instanceof Date && b instanceof Date) {
+ return a.getTime() === b.getTime();
+ }
+ return a === b;
+}
+
+/**
+ * Build a replacement function from a {@link ReplaceSpec}.
+ * Returns `(v) => new_value` or `v` unchanged if no match.
+ */
+function buildReplacer(spec: ReplaceSpec, matchNaN: boolean): (v: Scalar) => Scalar {
+ // Mapping variant
+ if (
+ "toReplace" in spec &&
+ !Array.isArray(spec.toReplace) &&
+ typeof spec.toReplace === "object" &&
+ spec.toReplace !== null &&
+ !(spec.toReplace instanceof Map) &&
+ !("value" in spec)
+ ) {
+ // Record
+ const rec = spec.toReplace as Readonly>;
+ return (v: Scalar): Scalar => {
+ const key = String(v);
+ return Object.prototype.hasOwnProperty.call(rec, key) ? (rec[key] as Scalar) : v;
+ };
+ }
+
+ if ("toReplace" in spec && spec.toReplace instanceof Map) {
+ const map = spec.toReplace as ReadonlyMap;
+ return (v: Scalar): Scalar => {
+ for (const [k, val] of map) {
+ if (scalarEq(v, k, matchNaN)) {
+ return val;
+ }
+ }
+ return v;
+ };
+ }
+
+ // Mapping passed via { toReplace: mapping } shape
+ if ("toReplace" in spec && !("value" in spec)) {
+ const mapping = spec.toReplace as ReplaceMapping;
+ if (mapping instanceof Map) {
+ const map = mapping as ReadonlyMap;
+ return (v: Scalar): Scalar => {
+ for (const [k, val] of map) {
+ if (scalarEq(v, k, matchNaN)) {
+ return val;
+ }
+ }
+ return v;
+ };
+ }
+ const rec = mapping as Readonly>;
+ return (v: Scalar): Scalar => {
+ const key = String(v);
+ return Object.prototype.hasOwnProperty.call(rec, key) ? (rec[key] as Scalar) : v;
+ };
+ }
+
+ const s = spec as { toReplace: Scalar | readonly Scalar[]; value: Scalar | readonly Scalar[] };
+
+ if (!Array.isArray(s.toReplace)) {
+ // Scalar → Scalar
+ const old = s.toReplace as Scalar;
+ const newVal = s.value as Scalar;
+ return (v: Scalar): Scalar => (scalarEq(v, old, matchNaN) ? newVal : v);
+ }
+
+ const oldArr = s.toReplace as readonly Scalar[];
+
+ if (!Array.isArray(s.value)) {
+ // Array → Scalar
+ const newVal = s.value as Scalar;
+ return (v: Scalar): Scalar => {
+ for (const old of oldArr) {
+ if (scalarEq(v, old, matchNaN)) {
+ return newVal;
+ }
+ }
+ return v;
+ };
+ }
+
+ // Array → Array (pair-wise)
+ const newArr = s.value as readonly Scalar[];
+ if (oldArr.length !== newArr.length) {
+ throw new RangeError(
+ `replace: toReplace and value arrays must have the same length (got ${oldArr.length} and ${newArr.length})`,
+ );
+ }
+ return (v: Scalar): Scalar => {
+ for (let i = 0; i < oldArr.length; i++) {
+ if (scalarEq(v, oldArr[i] as Scalar, matchNaN)) {
+ return newArr[i] as Scalar;
+ }
+ }
+ return v;
+ };
+}
+
+// ─── Series ───────────────────────────────────────────────────────────────────
+
+/**
+ * Replace values in a Series according to `spec`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { replaceSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 2, 1] });
+ * const r = replaceSeries(s, { toReplace: 2, value: 99 });
+ * // r.values → [1, 99, 3, 99, 1]
+ * ```
+ */
+export function replaceSeries(
+ series: Series,
+ spec: ReplaceSpec,
+ options: ReplaceOptions = {},
+): Series {
+ const matchNaN = options.matchNaN ?? true;
+ const replacer = buildReplacer(spec, matchNaN);
+ const newData = Array.from({ length: series.size }, (_, i) =>
+ replacer(series.values[i] as Scalar),
+ );
+ return new Series({ data: newData, index: series.index, name: series.name });
+}
+
+// ─── DataFrame ────────────────────────────────────────────────────────────────
+
+/**
+ * Replace values in a DataFrame according to `spec`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { replaceDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [2, 2, 4] });
+ * const r = replaceDataFrame(df, { toReplace: 2, value: 0 });
+ * // r.col("a").values → [1, 0, 3]
+ * // r.col("b").values → [0, 0, 4]
+ * ```
+ */
+export function replaceDataFrame(
+ df: DataFrame,
+ spec: ReplaceSpec,
+ options: DataFrameReplaceOptions = {},
+): DataFrame {
+ const matchNaN = options.matchNaN ?? true;
+ const replacer = buildReplacer(spec, matchNaN);
+ const targetCols = new Set(options.columns ?? df.columns.values);
+
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name) as Series;
+ if (targetCols.has(name)) {
+ const newData = Array.from({ length: col.size }, (_, i) => replacer(col.values[i] as Scalar));
+ colMap.set(name, new Series({ data: newData, index: col.index, name: col.name }));
+ } else {
+ colMap.set(name, col);
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/where_mask.ts b/src/stats/where_mask.ts
new file mode 100644
index 00000000..ecbefd72
--- /dev/null
+++ b/src/stats/where_mask.ts
@@ -0,0 +1,430 @@
+/**
+ * where_mask — conditional value selection for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.where(cond, other=NaN)` — keep values where `cond` is True, replace with `other` where False
+ * - `Series.mask(cond, other=NaN)` — replace values where `cond` is True with `other`, keep where False
+ * - `DataFrame.where(cond, other=NaN, axis?)` — same but for DataFrames
+ * - `DataFrame.mask(cond, other=NaN, axis?)` — same but for DataFrames
+ *
+ * The condition can be:
+ * - A `boolean[]` array aligned by position
+ * - A `Series` (aligned by index label when axis=0, or by position)
+ * - A callable `(value: Scalar) => boolean`
+ *
+ * For DataFrames, `cond` may additionally be:
+ * - A `DataFrame` of booleans (same shape)
+ * - A `boolean[][]` 2-D array
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ * Missing values in `cond` are treated as `false`.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Condition types accepted by {@link whereSeries} and {@link maskSeries}. */
+export type SeriesCond =
+ | readonly boolean[]
+ | Series
+ | Series
+ | ((value: Scalar, label: Label) => boolean);
+
+/** Condition types accepted by {@link whereDataFrame} and {@link maskDataFrame}. */
+export type DataFrameCond = readonly (readonly boolean[])[] | DataFrame | SeriesCond;
+
+/** Options for {@link whereSeries} and {@link maskSeries}. */
+export interface WhereOptions {
+ /**
+ * Value to use where the condition is `false` (for `where`) or `true` (for `mask`).
+ * Defaults to `null` (propagated as missing, matching pandas NaN behaviour).
+ */
+ readonly other?: Scalar;
+}
+
+/** Options for {@link whereDataFrame} and {@link maskDataFrame}. */
+export interface WhereDataFrameOptions extends WhereOptions {
+ /**
+ * Axis along which to align a Series condition (when `cond` is a `Series`).
+ * - `0` or `"index"` (default): align by **row** labels (broadcast across columns).
+ * - `1` or `"columns"`: align by **column** labels (broadcast across rows).
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Resolve a boolean condition value from a position + label, given the
+ * various condition types for Series.
+ */
+function resolveSeriesCond(cond: SeriesCond, i: number, label: Label, value: Scalar): boolean {
+ if (typeof cond === "function") {
+ return cond(value, label);
+ }
+ if (Array.isArray(cond)) {
+ const v = (cond as readonly boolean[])[i];
+ return v === true;
+ }
+ // Series or Series
+ const s = cond as Series;
+ // Try label-based lookup first, fall back to positional
+ const strLabel = String(label);
+ for (let j = 0; j < s.index.size; j++) {
+ if (String(s.index.at(j)) === strLabel) {
+ return s.iat(j) === true;
+ }
+ }
+ return false;
+}
+
+// ─── Series: where ────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series keeping values where `cond` is `true`, replacing with
+ * `other` (default `null`) where `cond` is `false`.
+ *
+ * Mirrors `pandas.Series.where(cond, other=np.nan)`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { whereSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * whereSeries(s, [true, false, true, false, true]); // [1, null, 3, null, 5]
+ * whereSeries(s, (v) => (v as number) > 2, { other: 0 }); // [0, 0, 3, 4, 5]
+ * ```
+ */
+export function whereSeries(
+ series: Series,
+ cond: SeriesCond,
+ options?: WhereOptions,
+): Series {
+ const other: Scalar = options?.other !== undefined ? options.other : null;
+ const newData: Scalar[] = [];
+ for (let i = 0; i < series.size; i++) {
+ const label = series.index.at(i);
+ const value = series.iat(i);
+ const keep = resolveSeriesCond(cond, i, label, value);
+ newData.push(keep ? value : other);
+ }
+ return new Series({
+ data: newData,
+ index: series.index,
+ name: series.name,
+ });
+}
+
+// ─── Series: mask ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series replacing values where `cond` is `true` with `other`
+ * (default `null`), keeping values where `cond` is `false`.
+ *
+ * Mirrors `pandas.Series.mask(cond, other=np.nan)`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { maskSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * maskSeries(s, [true, false, true, false, true]); // [null, 2, null, 4, null]
+ * maskSeries(s, (v) => (v as number) > 3, { other: -1 }); // [1, 2, 3, -1, -1]
+ * ```
+ */
+export function maskSeries(
+ series: Series,
+ cond: SeriesCond,
+ options?: WhereOptions,
+): Series {
+ const other: Scalar = options?.other !== undefined ? options.other : null;
+ const newData: Scalar[] = [];
+ for (let i = 0; i < series.size; i++) {
+ const label = series.index.at(i);
+ const value = series.iat(i);
+ const replace = resolveSeriesCond(cond, i, label, value);
+ newData.push(replace ? other : value);
+ }
+ return new Series({
+ data: newData,
+ index: series.index,
+ name: series.name,
+ });
+}
+
+// ─── DataFrame: helpers ───────────────────────────────────────────────────────
+
+/** Set a cell in the keep matrix safely. */
+function setCell(matrix: boolean[][], r: number, c: number, v: boolean): void {
+ const row = matrix[r];
+ if (row !== undefined) {
+ row[c] = v;
+ }
+}
+
+/** Build keep-matrix from a DataFrame condition. */
+function buildFromDataFrameCond(
+ df: DataFrame,
+ cond: DataFrame,
+ matrix: boolean[][],
+ invert: boolean,
+): void {
+ const colNames = df.columns.values;
+ for (let r = 0; r < df.index.size; r++) {
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ let val: Scalar = null;
+ try {
+ val = cond.col(colName).iat(r);
+ } catch {
+ val = null;
+ }
+ const condTrue = val === true;
+ setCell(matrix, r, c, invert ? !condTrue : condTrue);
+ }
+ }
+}
+
+/** Build keep-matrix from a 2-D boolean array condition. */
+function buildFrom2DArray(
+ df: DataFrame,
+ cond2d: readonly (readonly boolean[])[],
+ matrix: boolean[][],
+ invert: boolean,
+): void {
+ const colNames = df.columns.values;
+ for (let r = 0; r < df.index.size; r++) {
+ for (let c = 0; c < colNames.length; c++) {
+ const condTrue = cond2d[r]?.[c] === true;
+ setCell(matrix, r, c, invert ? !condTrue : condTrue);
+ }
+ }
+}
+
+/** Build keep-matrix from a Series/array condition on axis=0 (broadcast over columns). */
+function buildFromSeriesAxis0(
+ df: DataFrame,
+ cond: SeriesCond,
+ matrix: boolean[][],
+ invert: boolean,
+): void {
+ const nCols = df.columns.values.length;
+ for (let r = 0; r < df.index.size; r++) {
+ const label = df.index.at(r);
+ const condTrue = resolveSeriesCond(cond, r, label, null);
+ for (let c = 0; c < nCols; c++) {
+ setCell(matrix, r, c, invert ? !condTrue : condTrue);
+ }
+ }
+}
+
+/** Look up the condition value for a column by name from a Series (for axis=1). */
+function seriesCondForColumn(s: Series, colName: string): boolean {
+ for (let j = 0; j < s.index.size; j++) {
+ if (String(s.index.at(j)) === colName) {
+ return s.iat(j) === true;
+ }
+ }
+ return false;
+}
+
+/** Resolve axis=1 condition for a single column. */
+function resolveAxis1Cond(cond: SeriesCond, c: number, colName: string): boolean {
+ if (cond instanceof Series) {
+ return seriesCondForColumn(cond as Series, colName);
+ }
+ if (Array.isArray(cond)) {
+ return (cond as readonly boolean[])[c] === true;
+ }
+ return false;
+}
+
+/** Build keep-matrix from a Series/array condition on axis=1 (broadcast over rows). */
+function buildFromSeriesAxis1(
+ df: DataFrame,
+ cond: SeriesCond,
+ matrix: boolean[][],
+ invert: boolean,
+): void {
+ const colNames = df.columns.values;
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ const condTrue = resolveAxis1Cond(cond, c, colName);
+ for (let r = 0; r < df.index.size; r++) {
+ setCell(matrix, r, c, invert ? !condTrue : condTrue);
+ }
+ }
+}
+
+/** Build keep-matrix from a callable condition (element-wise). */
+function buildFromCallable(
+ df: DataFrame,
+ cond: (v: Scalar, l: Label) => boolean,
+ matrix: boolean[][],
+ invert: boolean,
+): void {
+ const colNames = df.columns.values;
+ for (let r = 0; r < df.index.size; r++) {
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ const value = df.col(colName).iat(r);
+ const label = df.index.at(r);
+ const condTrue = cond(value, label);
+ setCell(matrix, r, c, invert ? !condTrue : condTrue);
+ }
+ }
+}
+
+/**
+ * Build a 2-D boolean matrix (nRows × nCols) from the condition, where
+ * matrix[row][col] = true means "keep original value" (for `where`) or
+ * "replace with other" (for `mask`, where invert=true flips the meaning).
+ */
+function buildKeepMatrix(
+ df: DataFrame,
+ cond: DataFrameCond,
+ axis: Axis,
+ invert: boolean,
+): boolean[][] {
+ const nRows = df.index.size;
+ const nCols = df.columns.values.length;
+
+ const matrix: boolean[][] = Array.from({ length: nRows }, () =>
+ Array.from({ length: nCols }, () => false),
+ );
+
+ if (cond instanceof DataFrame) {
+ buildFromDataFrameCond(df, cond, matrix, invert);
+ } else if (Array.isArray(cond) && cond.length > 0 && Array.isArray(cond[0])) {
+ buildFrom2DArray(df, cond as readonly (readonly boolean[])[], matrix, invert);
+ } else if (typeof cond === "function") {
+ buildFromCallable(df, cond as (v: Scalar, l: Label) => boolean, matrix, invert);
+ } else {
+ const isRowAxis = axis === 0 || axis === "index";
+ if (isRowAxis) {
+ buildFromSeriesAxis0(df, cond as SeriesCond, matrix, invert);
+ } else {
+ buildFromSeriesAxis1(df, cond as SeriesCond, matrix, invert);
+ }
+ }
+
+ return matrix;
+}
+
+// ─── DataFrame: where ─────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame keeping values where `cond` is `true`, replacing
+ * with `other` (default `null`) where `cond` is `false`.
+ *
+ * Mirrors `pandas.DataFrame.where(cond, other=np.nan, axis=None)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { whereDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * const cond = [[true, false], [false, true], [true, true]];
+ * whereDataFrame(df, cond); // a=[1,null,3], b=[null,5,6]
+ * ```
+ */
+export function whereDataFrame(
+ df: DataFrame,
+ cond: DataFrameCond,
+ options?: WhereDataFrameOptions,
+): DataFrame {
+ const other: Scalar = options?.other !== undefined ? options.other : null;
+ const axis: Axis = options?.axis ?? 0;
+
+ const keepMatrix = buildKeepMatrix(df, cond, axis, false);
+ const colNames = df.columns.values;
+
+ const colMap = new Map>();
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ const col = df.col(colName);
+ const newData: Scalar[] = [];
+ for (let r = 0; r < df.index.size; r++) {
+ const keep = keepMatrix[r]?.[c] === true;
+ newData.push(keep ? col.iat(r) : other);
+ }
+ colMap.set(colName, new Series