diff --git a/playground/na_ops.html b/playground/na_ops.html
new file mode 100644
index 00000000..c321438f
--- /dev/null
+++ b/playground/na_ops.html
@@ -0,0 +1,480 @@
+
+
+
+
+
+ tsb — missing-value operations (isna, ffill, bfill)
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← Back to playground index
+
+ Missing-value operations
+
+ isna / notna — detect missing values in scalars,
+ Series, and DataFrames.
+ ffill / bfill — propagate the last (or next) valid
+ value to fill gaps.
+ Mirrors pd.isna(), Series.ffill(), and
+ DataFrame.bfill() from pandas.
+
+
+
+
+
1 · isna / notna on scalars
+
+ Returns true / false for individual values.
+ null, undefined, and NaN are all
+ considered "missing".
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · isna on a Series
+
+ When passed a Series, isna returns a boolean Series of the
+ same length — true where values are missing.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · isna on a DataFrame
+
+ Returns a DataFrame of booleans with the same shape — one column per
+ original column, true where missing.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · Forward-fill (ffillSeries)
+
+ Propagates the last valid value forward to fill gaps. Leading
+ nulls that have no preceding value remain null.
+ Use the optional limit to cap consecutive fills.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · Backward-fill (bfillSeries)
+
+ Propagates the next valid value backward to fill gaps. Trailing
+ nulls that have no following value remain null.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · DataFrame forward-fill & backward-fill
+
+ dataFrameFfill and dataFrameBfill apply fill
+ column-wise by default (axis=0). Pass axis: 1 to fill
+ row-wise across columns.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series<boolean>
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series<boolean>
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...) // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+ limit?: number | null, // max consecutive fills (default: no limit)
+ axis?: 0 | 1 | "index" | "columns", // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+ limit?: number | null,
+ axis?: 0 | 1 | "index" | "columns",
+}): DataFrame
+
+
+
+
+
+
diff --git a/playground/reduce_ops.html b/playground/reduce_ops.html
new file mode 100644
index 00000000..bafc9bf8
--- /dev/null
+++ b/playground/reduce_ops.html
@@ -0,0 +1,128 @@
+
+
+
+
+
+ tsb — nunique / any / all
+
+
+
+ tsb — nunique / any / all
+
+ Reduction operations that summarise a Series or DataFrame column/row into a scalar or
+ boolean result — mirroring
+ pandas.Series.nunique ,
+ DataFrame.any ,
+ and
+ DataFrame.all .
+
+
+ nuniqueSeries — count distinct values
+ import { Series, nuniqueSeries } from "tsb";
+
+const s = new Series([1, 2, 2, null, 3]);
+nuniqueSeries(s); // 3 (null excluded by default)
+nuniqueSeries(s, { dropna: false }); // 4 (null counted as a distinct value)
+
+
+ nunique — count distinct per column (axis=0, default)
+ import { DataFrame, nunique } from "tsb";
+
+const df = DataFrame.fromColumns({
+ brand: ["apple", "banana", "apple", "cherry"],
+ rating: [5, 3, 5, 4],
+ flag: [true, false, true, null],
+});
+nunique(df);
+// Series { brand: 3, rating: 3, flag: 2 }
+
+
+ nunique — count distinct per row (axis=1)
+ nunique(df, { axis: 1 });
+// Series [3, 3, 3, 3] — each row has 3 distinct values
+
+
+ anySeries / allSeries
+ import { Series, anySeries, allSeries } from "tsb";
+
+const flags = new Series([false, false, true]);
+anySeries(flags); // true — at least one truthy
+allSeries(flags); // false — not all truthy
+
+const ones = new Series([1, 2, 3]);
+allSeries(ones); // true — all truthy
+
+// skipna option
+const withNull = new Series([1, null, 2]);
+allSeries(withNull); // true (null skipped)
+allSeries(withNull, { skipna: false }); // false (null is falsy)
+
+
+ anyDataFrame / allDataFrame
+ import { DataFrame, anyDataFrame, allDataFrame } from "tsb";
+
+const df2 = DataFrame.fromColumns({
+ a: [0, 0, 1],
+ b: [1, 1, 1],
+ c: [0, 0, 0],
+});
+
+anyDataFrame(df2);
+// Series { a: true, b: true, c: false }
+
+allDataFrame(df2);
+// Series { a: false, b: true, c: false }
+
+// axis=1: reduce across columns → one boolean per row
+anyDataFrame(df2, { axis: 1 });
+// Series [true, true, true] (row 0: 0,1,0 → any=true via b)
+
+
+ boolOnly option
+ const mixed = DataFrame.fromColumns({
+ nums: [1, 2, 3],
+ flag: [true, false, true],
+});
+
+// Only consider boolean columns
+anyDataFrame(mixed, { boolOnly: true });
+// Series { flag: true } — 'nums' column excluded
+
+
+
+ Pandas parity note:
+ nunique, any, and all follow pandas' default
+ behaviour: missing values (null, undefined, NaN)
+ are excluded by default (dropna/skipna = true). Use
+ { dropna: false } or { skipna: false } to include them.
+
+
+ ← Back to playground index
+
+
+
+
diff --git a/playground/to_timedelta.html b/playground/to_timedelta.html
new file mode 100644
index 00000000..a09b42ca
--- /dev/null
+++ b/playground/to_timedelta.html
@@ -0,0 +1,132 @@
+
+
+
+
+
+ tsb — toTimedelta
+
+
+
+ ← tsb playground
+ toTimedelta stats
+
+ Convert scalars, arrays, or Series values to
+ Timedelta objects — mirroring
+ pandas.to_timedelta() .
+
+
+ Supported input formats
+
+ Format Example Result (ms)
+ Pandas-style "1 days 02:03:04"93 784 000 ms
+ Clock (HH:MM:SS) "01:30:00"5 400 000 ms
+ ISO 8601 "P1DT2H"93 600 000 ms
+ Human-readable "1h 30m 20s"5 420 000 ms
+ number (unit="ns") 1_000_000_0001 000 ms
+ number (unit="ms") 50005 000 ms
+ Timedelta new Timedelta(1000)unchanged
+ null / undefined / NaN nullnull
+
+
+ Timedelta class
+
+ Property / Method Description
+ .totalMsTotal duration in milliseconds (signed)
+ .daysWhole days
+ .hoursHours within the current day (0–23)
+ .minutesMinutes within the current hour (0–59)
+ .secondsSeconds within the current minute (0–59)
+ .msMilliseconds within the current second (0–999)
+ .abs()Absolute value
+ .add(other)Add two Timedeltas
+ .subtract(other)Subtract a Timedelta
+ .scale(n)Multiply by a scalar
+ .lt(other)Less-than comparison
+ .gt(other)Greater-than comparison
+ .eq(other)Equality comparison
+ .toString()Pandas-style string representation
+
+
+ Error handling
+
+ errors= Behaviour
+ "raise" (default)Throws TypeError on unparseable input
+ "coerce"Returns null on unparseable input
+ "ignore"Returns the original value unchanged
+
+
+ Quick examples
+ import { toTimedelta, Timedelta, formatTimedelta, Series } from "tsb";
+
+// Scalar — various string formats
+toTimedelta("1 days 02:03:04"); // Timedelta(93_784_000 ms)
+toTimedelta("01:30:00"); // Timedelta(5_400_000 ms)
+toTimedelta("P1DT2H3M4S"); // ISO 8601
+toTimedelta("1h 30m 20s 500ms"); // human-readable
+
+// Scalar — numeric
+toTimedelta(1_000_000_000); // default unit "ns" → 1000 ms
+toTimedelta(5000, { unit: "ms" }); // 5000 ms
+toTimedelta(2, { unit: "D" }); // 2 days
+
+// Missing values
+toTimedelta(null); // null
+toTimedelta("nope", { errors: "coerce" }); // null
+toTimedelta("nope", { errors: "ignore" }); // "nope" (unchanged)
+
+// Timedelta arithmetic
+const a = toTimedelta("1h") as Timedelta;
+const b = toTimedelta("30m") as Timedelta;
+a.add(b).toString(); // "0 days 01:30:00"
+a.subtract(b).totalMs; // 1_800_000
+
+// Array
+toTimedelta(["1h", "30m", null]);
+// => [Timedelta(3_600_000), Timedelta(1_800_000), null]
+
+// Series
+const s = new Series({ data: ["1h", "30m", null] });
+toTimedelta(s);
+// => Series<Timedelta | null> with dtype=timedelta
+
+// formatTimedelta
+formatTimedelta(new Timedelta(86_400_000 + 3_661_000));
+// => "1 day 01:01:01"
+
+ Python / pandas equivalent
+
+
+
diff --git a/src/core/sample.ts b/src/core/sample.ts
new file mode 100644
index 00000000..7cd8d529
--- /dev/null
+++ b/src/core/sample.ts
@@ -0,0 +1,328 @@
+/**
+ * sample — random sampling from Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.sample(n, frac, replace, weights, random_state, axis)`
+ * - `pandas.DataFrame.sample(n, frac, replace, weights, random_state, axis)`
+ *
+ * @module
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link sampleSeries} and {@link sampleDataFrame}. */
+export interface SampleOptions {
+ /**
+ * Number of items to return. Mutually exclusive with `frac`.
+ * @defaultValue `1` (when neither `n` nor `frac` is provided)
+ */
+ readonly n?: number;
+ /**
+ * Fraction of items to return (e.g. `0.5` for 50%).
+ * Mutually exclusive with `n`.
+ */
+ readonly frac?: number;
+ /**
+ * Allow sampling with replacement (the same item may appear multiple times).
+ * @defaultValue `false`
+ */
+ readonly replace?: boolean;
+ /**
+ * Weights for each item. Must have the same length as the Series/DataFrame.
+ * Weights do not need to sum to 1 — they are normalized internally.
+ * Missing weights (null/undefined/NaN) are treated as 0.
+ */
+ readonly weights?: readonly (number | null | undefined)[];
+ /**
+ * Seed for the random number generator. When provided, sampling is
+ * deterministic (same seed + same data → same result).
+ * Uses a simple LCG (linear congruential generator).
+ */
+ readonly randomState?: number;
+ /**
+ * Axis to sample along (DataFrame only).
+ * - `0` or `"index"` (default): sample rows.
+ * - `1` or `"columns"`: sample columns.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── seeded RNG ───────────────────────────────────────────────────────────────
+
+/**
+ * Minimal LCG-based PRNG (Knuth constants).
+ * Returns a new seed and a float in [0, 1).
+ */
+function lcgNext(seed: number): [number, number] {
+ // LCG parameters (Numerical Recipes)
+ const a = 1664525;
+ const c = 1013904223;
+ const m = 2 ** 32;
+ const nextSeed = ((a * seed + c) >>> 0) % m;
+ return [nextSeed, nextSeed / m];
+}
+
+/** Build a seeded random float generator that returns [0,1). */
+function makeRng(seed: number | undefined): () => number {
+ if (seed === undefined) {
+ return () => Math.random();
+ }
+ let s = seed >>> 0; // ensure 32-bit unsigned
+ return () => {
+ const [ns, r] = lcgNext(s);
+ s = ns;
+ return r;
+ };
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Resolve how many items to sample from a pool of size `poolSize`. */
+function resolveN(poolSize: number, n: number | undefined, frac: number | undefined): number {
+ if (n !== undefined && frac !== undefined) {
+ throw new Error("Sample: specify either `n` or `frac`, not both.");
+ }
+ if (frac !== undefined) {
+ if (frac < 0) {
+ throw new RangeError("Sample: `frac` must be >= 0.");
+ }
+ return Math.floor(frac * poolSize);
+ }
+ if (n !== undefined) {
+ if (n < 0) {
+ throw new RangeError("Sample: `n` must be >= 0.");
+ }
+ return n;
+ }
+ return 1;
+}
+
+/** Normalize weights to probabilities summing to 1. */
+function normalizeWeights(
+ rawWeights: readonly (number | null | undefined)[],
+ poolSize: number,
+): number[] {
+ if (rawWeights.length !== poolSize) {
+ throw new RangeError(
+ `Sample: weights length (${rawWeights.length}) must equal pool size (${poolSize}).`,
+ );
+ }
+ const ws = rawWeights.map((w) => {
+ const v = w ?? 0;
+ if (typeof v !== "number" || Number.isNaN(v) || v < 0) {
+ return 0;
+ }
+ return v;
+ });
+ const total = ws.reduce((s, v) => s + v, 0);
+ if (total === 0) {
+ throw new Error("Sample: all weights are zero.");
+ }
+ return ws.map((w) => w / total);
+}
+
+/**
+ * Weighted random sample without replacement using the alias method.
+ * Falls back to basic weighted sampling when `replace=true`.
+ */
+function weightedSampleWithoutReplacement(
+ poolSize: number,
+ k: number,
+ probs: number[],
+ rng: () => number,
+): number[] {
+ // Use reservoir sampling with exponential keys: assign key = rand^(1/w), take top-k
+ const keys: Array<[number, number]> = probs.map((p, i) => {
+ const r = rng();
+ const key = p > 0 ? r ** (1 / p) : 0;
+ return [key, i];
+ });
+ keys.sort((a, b) => b[0] - a[0]);
+ return keys.slice(0, k).map(([, i]) => i);
+}
+
+/**
+ * Weighted sample WITH replacement: pick `k` indices based on cumulative probabilities.
+ */
+function weightedSampleWithReplacement(k: number, probs: number[], rng: () => number): number[] {
+ const cumulative: number[] = [];
+ let sum = 0;
+ for (const p of probs) {
+ sum += p;
+ cumulative.push(sum);
+ }
+
+ const result: number[] = [];
+ for (let i = 0; i < k; i++) {
+ const r = rng();
+ let idx = cumulative.findIndex((c) => c >= r);
+ if (idx < 0) {
+ idx = probs.length - 1;
+ }
+ result.push(idx);
+ }
+ return result;
+}
+
+/**
+ * Fisher-Yates shuffle (unweighted, without replacement) — pick the first `k` elements.
+ */
+function fisherYatesSample(poolSize: number, k: number, rng: () => number): number[] {
+ const indices = Array.from({ length: poolSize }, (_, i) => i);
+ for (let i = 0; i < k; i++) {
+ const j = i + Math.floor(rng() * (poolSize - i));
+ const tmp = indices[i];
+ const jVal = indices[j];
+ if (tmp !== undefined && jVal !== undefined) {
+ indices[i] = jVal;
+ indices[j] = tmp;
+ }
+ }
+ return indices.slice(0, k);
+}
+
+/**
+ * Sample with replacement (unweighted): draw `k` integers in [0, poolSize).
+ */
+function uniformSampleWithReplacement(poolSize: number, k: number, rng: () => number): number[] {
+ const result: number[] = [];
+ for (let i = 0; i < k; i++) {
+ result.push(Math.floor(rng() * poolSize));
+ }
+ return result;
+}
+
+/** Core sampling logic: return an array of selected positions. */
+function samplePositions(
+ poolSize: number,
+ k: number,
+ replace: boolean,
+ weights: readonly (number | null | undefined)[] | undefined,
+ rng: () => number,
+): number[] {
+ if (poolSize === 0 || k === 0) {
+ return [];
+ }
+ if (!replace && k > poolSize) {
+ throw new RangeError(
+ `Sample: cannot sample ${k} items without replacement from a pool of ${poolSize}.`,
+ );
+ }
+
+ if (weights !== undefined) {
+ const probs = normalizeWeights(weights, poolSize);
+ if (replace) {
+ return weightedSampleWithReplacement(k, probs, rng);
+ }
+ return weightedSampleWithoutReplacement(poolSize, k, probs, rng);
+ }
+
+ if (replace) {
+ return uniformSampleWithReplacement(poolSize, k, rng);
+ }
+ return fisherYatesSample(poolSize, k, rng);
+}
+
+// ─── Series sample ────────────────────────────────────────────────────────────
+
+/**
+ * Return a random sample of items from a Series.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30, 40, 50] });
+ * sampleSeries(s, { n: 3, randomState: 42 }).values; // [30, 10, 50] (deterministic)
+ * ```
+ */
+export function sampleSeries(series: Series, options?: SampleOptions): Series {
+ const opts = options ?? {};
+ const k = resolveN(series.values.length, opts.n, opts.frac);
+ const replace = opts.replace ?? false;
+ const rng = makeRng(opts.randomState);
+
+ const positions = samplePositions(series.values.length, k, replace, opts.weights, rng);
+ const newValues: Scalar[] = positions.map((i) => series.values[i] ?? null);
+ const newLabels: Label[] = positions.map((i) => series.index.at(i) ?? null);
+
+ return new Series({
+ data: newValues,
+ index: new Index(newLabels),
+ name: series.name ?? null,
+ dtype: series.dtype,
+ });
+}
+
+/**
+ * Return a random sample of rows (or columns) from a DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromRecords([
+ * { a: 1 }, { a: 2 }, { a: 3 }, { a: 4 }, { a: 5 },
+ * ]);
+ * sampleDataFrame(df, { n: 2, randomState: 0 }).shape; // [2, 1]
+ * ```
+ */
+export function sampleDataFrame(df: DataFrame, options?: SampleOptions): DataFrame {
+ const opts = options ?? {};
+ const axis = opts.axis ?? 0;
+ const isColAxis = axis === 1 || axis === "columns";
+
+ if (isColAxis) {
+ return sampleDataFrameColumns(df, opts);
+ }
+ return sampleDataFrameRows(df, opts);
+}
+
+/** Sample rows from a DataFrame. */
+function sampleDataFrameRows(df: DataFrame, opts: SampleOptions): DataFrame {
+ const nRows = df.shape[0];
+ const k = resolveN(nRows, opts.n, opts.frac);
+ const replace = opts.replace ?? false;
+ const rng = makeRng(opts.randomState);
+
+ const positions = samplePositions(nRows, k, replace, opts.weights, rng);
+ const newLabels: Label[] = positions.map((i) => df.index.at(i) ?? null);
+ const newIndex = new Index(newLabels);
+
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const newVals: Scalar[] = positions.map((i) => col.values[i] ?? null);
+ colMap.set(
+ name,
+ new Series({
+ data: newVals,
+ index: newIndex,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, newIndex);
+}
+
+/** Sample columns from a DataFrame. */
+function sampleDataFrameColumns(df: DataFrame, opts: SampleOptions): DataFrame {
+ const allCols = df.columns.values;
+ const nCols = allCols.length;
+ const k = resolveN(nCols, opts.n, opts.frac);
+ const replace = opts.replace ?? false;
+ const rng = makeRng(opts.randomState);
+
+ const positions = samplePositions(nCols, k, replace, opts.weights, rng);
+
+ const colMap = new Map>();
+ for (const pos of positions) {
+ const name = allCols[pos];
+ if (name !== undefined) {
+ const col = df.col(name);
+ colMap.set(name, col);
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/index.ts b/src/index.ts
index 37c6e62e..8f62a18d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -182,7 +182,7 @@ export {
export { Period, PeriodIndex } from "./core/index.ts";
export type { PeriodFreq, PeriodIndexOptions } from "./core/index.ts";
-export { Timedelta, TimedeltaIndex } from "./core/index.ts";
+export { TimedeltaIndex } from "./core/index.ts";
export type { TimedeltaComponents, TimedeltaIndexOptions } from "./core/index.ts";
export {
Day,
@@ -535,3 +535,21 @@ export type {
} from "./stats/index.ts";
export { toDatetime } from "./stats/index.ts";
export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./stats/index.ts";
+
+// Branch-unique exports not yet in main
+export { toTimedelta, parseFrac, formatTimedelta, Timedelta } from "./stats/index.ts";
+export type { TimedeltaUnit, TimedeltaErrors, ToTimedeltaOptions } from "./stats/index.ts";
+export { dateRange, parseFreq, advanceDate, toDateInput } from "./stats/index.ts";
+export type { DateRangeInclusive, ParsedFreq } from "./stats/index.ts";
+export { diffDataFrame, shiftDataFrame } from "./stats/index.ts";
+export type {
+ DiffOptions,
+ DataFrameDiffOptions,
+ ShiftOptions,
+ DataFrameShiftOptions,
+} from "./stats/index.ts";
+export { ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "./stats/index.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts";
+export { intervalRange } from "./stats/index.ts";
+export type { ClosedType } from "./stats/index.ts";
+export { nunique } from "./stats/index.ts";
diff --git a/src/io/read_excel.ts b/src/io/read_excel.ts
index 97d06065..07d0d0d4 100644
--- a/src/io/read_excel.ts
+++ b/src/io/read_excel.ts
@@ -517,7 +517,7 @@ function buildDataFrame(rows: readonly RawRow[], options: ReadExcelOptions): Dat
colMap.set(colName, new Series({ data: colData, dtype: Dtype.from(dtypeName), name: colName }));
}
const toLabel = (v: Scalar): Label =>
- v === undefined || typeof v === "bigint" || v instanceof Date ? null : v;
+ v === undefined || typeof v === "bigint" || (typeof v === "object" && v !== null) ? null : v;
const rowIndex =
indexColIdx >= 0
? new Index((data[indexColIdx] ?? []).map(toLabel))
diff --git a/src/reshape/explode.ts b/src/reshape/explode.ts
new file mode 100644
index 00000000..fadc0b6e
--- /dev/null
+++ b/src/reshape/explode.ts
@@ -0,0 +1,215 @@
+/**
+ * explode — transform list-like column/Series cells into multiple rows.
+ *
+ * Mirrors `pandas.DataFrame.explode` / `pandas.Series.explode`:
+ * - Each element of a list-valued cell becomes its own row.
+ * - All other columns repeat their value for each exploded row.
+ * - Scalar (non-list) values are treated as single-element lists.
+ * - Empty arrays produce a single row with `null`.
+ * - `null`/`undefined` values produce a single `null` row (preserved).
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [[10, 20], [30]] as unknown as Scalar[] });
+ * explodeDataFrame(df, "b");
+ * // a b
+ * // 1 10
+ * // 1 20
+ * // 2 30
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Options for {@link explodeSeries} and {@link explodeDataFrame}. */
+export interface ExplodeOptions {
+ /**
+ * When `true`, the result index is reset to a default `RangeIndex`.
+ * When `false` (default), the original row labels are propagated
+ * (duplicated once for each element of each list value).
+ */
+ readonly ignore_index?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Expand a single cell value into an array of scalars.
+ *
+ * - Array → each element (or `[null]` if empty)
+ * - `null` / `undefined` → `[null]`
+ * - Any other scalar → `[value]`
+ *
+ * Internally widens to `unknown` before the Array.isArray check so that
+ * object-typed Series cells that hold arrays at runtime are handled correctly.
+ */
+function expandCell(value: Scalar): Scalar[] {
+ if (value === null || value === undefined) {
+ return [null];
+ }
+ // Widen to unknown first — Series cells may hold arrays at runtime when the
+ // dtype is "object", even though the static type is Scalar.
+ const raw: unknown = value;
+ if (!Array.isArray(raw)) {
+ return [value];
+ }
+ const arr: unknown[] = raw;
+ if (arr.length === 0) {
+ return [null];
+ }
+ return arr.map((c) => (c ?? null) as Scalar);
+}
+
+// ─── explodeSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Explode a Series of list-like values into a longer Series.
+ *
+ * Each element of an array-valued cell becomes its own row.
+ * Scalar values pass through unchanged (as a single row).
+ * `null`/`undefined` yield a single `null` row.
+ * Empty arrays yield a single `null` row.
+ *
+ * @param series - The Series to explode.
+ * @param options - {@link ExplodeOptions}
+ * @returns A new Series with list-cells expanded to individual rows.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [[1, 2], [3]] as unknown as Scalar[], name: "x" });
+ * explodeSeries(s).toArray(); // [1, 2, 3]
+ * ```
+ */
+export function explodeSeries(series: Series, options?: ExplodeOptions): Series {
+ const ignoreIndex = options?.ignore_index ?? false;
+ const outValues: Scalar[] = [];
+ const outLabels: Label[] = [];
+
+ const n = series.values.length;
+ const idxVals = series.index.values;
+
+ for (let i = 0; i < n; i++) {
+ const cells = expandCell(series.values[i] ?? null);
+ const label: Label = idxVals[i] ?? null;
+ for (const cell of cells) {
+ outValues.push(cell);
+ outLabels.push(label);
+ }
+ }
+
+ const resultIndex: Index = ignoreIndex
+ ? (new RangeIndex(outValues.length) as unknown as Index)
+ : new Index(outLabels);
+
+ return new Series({
+ data: outValues,
+ index: resultIndex,
+ name: series.name,
+ });
+}
+
+// ─── explodeDataFrame ──────────────────────────────────────────────────────────
+
+/**
+ * Explode one or more list-valued columns of a DataFrame into multiple rows.
+ *
+ * All other columns have their values repeated to match the expanded rows.
+ * Row labels are propagated (duplicated) unless `ignore_index` is `true`.
+ *
+ * When multiple columns are specified they must have the same list lengths per
+ * row — pandas raises a `ValueError` for mismatched lengths; here each column
+ * is expanded independently but they are aligned by position. If lengths
+ * differ, the shorter column is padded with `null` (consistent with
+ * zip-longest behaviour pandas uses for multi-column explode).
+ *
+ * @param df - The DataFrame to explode.
+ * @param column - Column name or array of column names to explode.
+ * @param options - {@link ExplodeOptions}
+ * @returns A new DataFrame with the specified column(s) exploded.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({
+ * a: [1, 2],
+ * b: [[10, 20], [30]] as unknown as Scalar[],
+ * });
+ * explodeDataFrame(df, "b").toRecords();
+ * // [{ a: 1, b: 10 }, { a: 1, b: 20 }, { a: 2, b: 30 }]
+ * ```
+ */
+export function explodeDataFrame(
+ df: DataFrame,
+ column: string | readonly string[],
+ options?: ExplodeOptions,
+): DataFrame {
+ const ignoreIndex = options?.ignore_index ?? false;
+ const explodeCols: readonly string[] = typeof column === "string" ? [column] : column;
+
+ // Validate column names
+ for (const col of explodeCols) {
+ if (!df.columns.values.includes(col)) {
+ throw new Error(`Column '${col}' not found in DataFrame`);
+ }
+ }
+
+ const allCols = df.columns.values;
+ const nRows = df.index.size;
+ const idxVals = df.index.values;
+
+ // For each row, determine how many output rows it produces (max of all explode columns)
+ const rowExpansions: number[] = [];
+ for (let i = 0; i < nRows; i++) {
+ let maxLen = 1;
+ for (const col of explodeCols) {
+ const val = df.col(col).iat(i);
+ const cells = expandCell(val);
+ if (cells.length > maxLen) {
+ maxLen = cells.length;
+ }
+ }
+ rowExpansions.push(maxLen);
+ }
+
+ // Build output column arrays
+ const outData: Record = {};
+ for (const col of allCols) {
+ outData[col] = [];
+ }
+ const outLabels: Label[] = [];
+
+ for (let i = 0; i < nRows; i++) {
+ const expansion = rowExpansions[i] ?? 1;
+ const label: Label = idxVals[i] ?? null;
+
+ for (let k = 0; k < expansion; k++) {
+ outLabels.push(label);
+ for (const col of allCols) {
+ const colArr = outData[col];
+ if (colArr === undefined) continue;
+ if (explodeCols.includes(col)) {
+ const val = df.col(col).iat(i);
+ const cells = expandCell(val);
+ colArr.push(k < cells.length ? (cells[k] ?? null) : null);
+ } else {
+ colArr.push(df.col(col).iat(i));
+ }
+ }
+ }
+ }
+
+ const resultIndex: Index = ignoreIndex
+ ? (new RangeIndex(outLabels.length) as unknown as Index)
+ : new Index(outLabels);
+
+ return DataFrame.fromColumns(outData, {
+ index: resultIndex,
+ });
+}
diff --git a/src/stats/date_range.ts b/src/stats/date_range.ts
new file mode 100644
index 00000000..14c01860
--- /dev/null
+++ b/src/stats/date_range.ts
@@ -0,0 +1,665 @@
+/**
+ * date_range — generate a fixed-frequency sequence of Date objects.
+ *
+ * Mirrors `pandas.date_range()`:
+ * - Specify at least two of `start`, `end`, `periods`
+ * - `freq` controls step size (default `"D"`)
+ * - `inclusive` controls endpoint inclusion (default `"both"`)
+ *
+ * Supported frequencies:
+ * - `"D"` — calendar day
+ * - `"B"` — business day (Mon–Fri)
+ * - `"h"` / `"H"` — hour
+ * - `"min"` / `"T"` — minute
+ * - `"s"` / `"S"` — second
+ * - `"ms"` / `"L"` — millisecond
+ * - `"W"` / `"W-SUN"` … `"W-SAT"` — weekly, anchored to a weekday (default Sun)
+ * - `"MS"` — month start; `"ME"` / `"M"` — month end
+ * - `"QS"` — quarter start; `"QE"` / `"Q"` — quarter end
+ * - `"YS"` / `"AS"` — year start; `"YE"` / `"A"` / `"Y"` — year end
+ * - Multiplier prefix: `"2D"`, `"3H"`, etc.
+ *
+ * @module
+ */
+
+// ─── top-level regex constants (biome: useTopLevelRegex) ──────────────────────
+
+/** Parse frequency string: optional integer prefix + unit token. */
+const RE_FREQ =
+ /^(\d+)?(D|B|h|H|T|min|s|S|ms|L|us|U|ns|W(?:-[A-Z]{3})?|MS|ME|M|QS|QE|Q|YS|YE|AS|A|Y)$/i;
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Frequency string for `dateRange`. */
+export type DateRangeFreq = string;
+
+/** Which endpoints to include in the generated range. */
+export type DateRangeInclusive = "both" | "neither" | "left" | "right";
+
+/** Options for `dateRange`. */
+export interface DateRangeOptions {
+ /** Range start. At least two of start / end / periods must be provided. */
+ start?: Date | string | number | null;
+ /** Range end. At least two of start / end / periods must be provided. */
+ end?: Date | string | number | null;
+ /** Number of periods to generate. */
+ periods?: number | null;
+ /** Step frequency (default `"D"`). */
+ freq?: DateRangeFreq | null;
+ /** Which endpoints to include (default `"both"`). */
+ inclusive?: DateRangeInclusive | null;
+ /**
+ * Normalize start/end to midnight UTC before generating.
+ * Equivalent to `pandas.date_range(normalize=True)`.
+ */
+ normalize?: boolean | null;
+}
+
+// ─── internal types ────────────────────────────────────────────────────────────
+
+/** Parsed representation of a frequency string. */
+export interface ParsedFreq {
+ /** Multiplier (e.g. 2 for "2D"). */
+ n: number;
+ /** Normalised unit string. */
+ unit: string;
+ /** Weekday anchor for "W" unit (JS: 0=Sun … 6=Sat). */
+ anchor: number;
+}
+
+// ─── constants ────────────────────────────────────────────────────────────────
+
+const DOW_MAP: Readonly> = {
+ SUN: 0,
+ MON: 1,
+ TUE: 2,
+ WED: 3,
+ THU: 4,
+ FRI: 5,
+ SAT: 6,
+};
+
+const MS_DAY = 86_400_000;
+const MS_HOUR = 3_600_000;
+const MS_MIN = 60_000;
+
+/** Fixed-length unit → milliseconds (for arithmetic advance). */
+const MS_TABLE: Readonly> = {
+ D: MS_DAY,
+ h: MS_HOUR,
+ min: MS_MIN,
+ s: 1_000,
+ ms: 1,
+};
+
+/** Lookup table: raw unit string → canonical unit token. */
+const UNIT_NORM: Readonly> = {
+ H: "h",
+ T: "min",
+ MIN: "min",
+ S: "s",
+ L: "ms",
+ MS: "ms", // handled separately — only in freq context; in unit context "MS" = month-start
+ U: "us",
+ US: "us",
+ NS: "ns",
+ A: "YE",
+ Y: "YE",
+ YE: "YE",
+ AS: "YS",
+ YS: "YS",
+ Q: "QE",
+ QE: "QE",
+ M: "ME",
+ ME: "ME",
+};
+
+// ─── frequency parsing ─────────────────────────────────────────────────────────
+
+/**
+ * Map a raw unit token (from the regex match) to its canonical form.
+ * Returns the uppercased input unchanged if no mapping exists.
+ *
+ * NOTE: "ms" (lowercase) means milliseconds; "MS" (uppercase) means month-start.
+ * Check case-sensitive lowercase tokens BEFORE uppercasing.
+ */
+function normaliseUnit(raw: string): string {
+ // Case-sensitive lowercase tokens — millisecond aliases must come first
+ // so they are not confused with "MS" (month-start) after uppercasing.
+ if (raw === "ms" || raw === "L") return "ms";
+ if (raw === "us") return "us";
+ if (raw === "ns") return "ns";
+ const u = raw.toUpperCase();
+ // Tokens that are passed through unchanged (already canonical)
+ if (u === "MS" || u === "QS" || u === "D" || u === "B") {
+ return u;
+ }
+ return UNIT_NORM[u] ?? u;
+}
+
+/**
+ * Parse a pandas-style frequency string into a `ParsedFreq`.
+ * Throws `RangeError` for unrecognised patterns.
+ */
+export function parseFreq(freq: string): ParsedFreq {
+ const m = RE_FREQ.exec(freq);
+ if (!m) {
+ throw new RangeError(`Unrecognised frequency string: "${freq}"`);
+ }
+ const n = m[1] !== undefined ? Number.parseInt(m[1], 10) : 1;
+ const rawUnit = m[2] ?? "D";
+ let unit = normaliseUnit(rawUnit);
+ let anchor = 0;
+
+ if (unit.startsWith("W")) {
+ const dash = unit.indexOf("-");
+ if (dash !== -1) {
+ const dowStr = unit.slice(dash + 1);
+ anchor = DOW_MAP[dowStr] ?? 0;
+ }
+ unit = "W";
+ }
+
+ return { n, unit, anchor };
+}
+
+// ─── date-input helper ─────────────────────────────────────────────────────────
+
+/**
+ * Convert a `Date | string | number` to a `Date`.
+ * Throws `TypeError` if the value cannot be parsed.
+ */
+export function toDateInput(v: Date | string | number): Date {
+ if (v instanceof Date) {
+ return v;
+ }
+ if (typeof v === "number") {
+ return new Date(v);
+ }
+ const d = new Date(v);
+ if (Number.isNaN(d.getTime())) {
+ throw new TypeError(`Cannot parse date: "${v}"`);
+ }
+ return d;
+}
+
+/** Normalise a Date to midnight UTC on the same calendar date. */
+function normToMidnight(d: Date): Date {
+ return new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()));
+}
+
+// ─── arithmetic advance (fixed-ms units) ──────────────────────────────────────
+
+/** Advance `d` by `n` fixed-ms steps for unit `unit`. */
+function advanceMs(d: Date, n: number, unit: string): Date {
+ const ms = (MS_TABLE[unit] ?? 1) * n;
+ return new Date(d.getTime() + ms);
+}
+
+// ─── business-day advance ──────────────────────────────────────────────────────
+
+/** Advance one business day forward. */
+function addOneBizDay(d: Date): Date {
+ const t = new Date(d.getTime() + MS_DAY);
+ const dow = t.getUTCDay();
+ if (dow === 0) {
+ return new Date(t.getTime() + MS_DAY);
+ }
+ if (dow === 6) {
+ return new Date(t.getTime() + 2 * MS_DAY);
+ }
+ return t;
+}
+
+/** Advance `n` business days forward. */
+function addBizDays(d: Date, n: number): Date {
+ let cur = d;
+ for (let i = 0; i < n; i++) {
+ cur = addOneBizDay(cur);
+ }
+ return cur;
+}
+
+/** Step back one business day. */
+function subOneBizDay(d: Date): Date {
+ const t = new Date(d.getTime() - MS_DAY);
+ const dow = t.getUTCDay();
+ if (dow === 0) {
+ return new Date(t.getTime() - 2 * MS_DAY);
+ }
+ if (dow === 6) {
+ return new Date(t.getTime() - MS_DAY);
+ }
+ return t;
+}
+
+/** Step back `n` business days. */
+function subBizDays(d: Date, n: number): Date {
+ let cur = d;
+ for (let i = 0; i < n; i++) {
+ cur = subOneBizDay(cur);
+ }
+ return cur;
+}
+
+// ─── weekly advance ───────────────────────────────────────────────────────────
+
+/**
+ * Advance `n` weeks from `d`, anchoring to weekday `anchor` (0=Sun…6=Sat).
+ * Each step moves to the next occurrence of `anchor`.
+ */
+function addWeeks(d: Date, n: number, anchor: number): Date {
+ const dow = d.getUTCDay();
+ let daysUntil = (anchor - dow + 7) % 7;
+ if (daysUntil === 0) {
+ daysUntil = 7;
+ }
+ const firstStep = new Date(d.getTime() + daysUntil * MS_DAY);
+ return new Date(firstStep.getTime() + (n - 1) * 7 * MS_DAY);
+}
+
+// ─── month helpers ─────────────────────────────────────────────────────────────
+
+/** Days in month (UTC). */
+function daysInMonth(year: number, month: number): number {
+ return new Date(Date.UTC(year, month + 1, 0)).getUTCDate();
+}
+
+/** Advance `d` by `n` months and snap to month-start (supports negative n). */
+function addMonthStarts(d: Date, n: number): Date {
+ const mo = d.getUTCMonth() + n;
+ const y = d.getUTCFullYear() + Math.floor(mo / 12);
+ const m = ((mo % 12) + 12) % 12;
+ return new Date(Date.UTC(y, m, 1));
+}
+
+/** Advance `d` by `n` months and snap to month-end (supports negative n). */
+function addMonthEnds(d: Date, n: number): Date {
+ const mo = d.getUTCMonth() + n;
+ const y = d.getUTCFullYear() + Math.floor(mo / 12);
+ const m = ((mo % 12) + 12) % 12;
+ return new Date(Date.UTC(y, m, daysInMonth(y, m)));
+}
+
+// ─── quarter helpers ───────────────────────────────────────────────────────────
+
+/** Return the next quarter-start date strictly after `d`. */
+function nextQStart(d: Date): Date {
+ const y = d.getUTCFullYear();
+ const mo = d.getUTCMonth();
+ for (const qm of [3, 6, 9] as const) {
+ if (mo < qm) {
+ return new Date(Date.UTC(y, qm, 1));
+ }
+ }
+ return new Date(Date.UTC(y + 1, 0, 1));
+}
+
+/** Return the next quarter-end date strictly after `d`. */
+function nextQEnd(d: Date): Date {
+ const y = d.getUTCFullYear();
+ const mo = d.getUTCMonth();
+ for (const qm of [2, 5, 8, 11] as const) {
+ if (mo < qm) {
+ return new Date(Date.UTC(y, qm, daysInMonth(y, qm)));
+ }
+ }
+ return new Date(Date.UTC(y + 1, 2, 31));
+}
+
+/** Advance `n` quarter-starts from `d` (negative n = backward via months). */
+function addQStarts(d: Date, n: number): Date {
+ if (n < 0) {
+ return addMonthStarts(d, n * 3);
+ }
+ let cur = d;
+ for (let i = 0; i < n; i++) {
+ cur = nextQStart(cur);
+ }
+ return cur;
+}
+
+/** Advance `n` quarter-ends from `d` (negative n = backward via months). */
+function addQEnds(d: Date, n: number): Date {
+ if (n < 0) {
+ return addMonthEnds(d, n * 3);
+ }
+ let cur = d;
+ for (let i = 0; i < n; i++) {
+ cur = nextQEnd(cur);
+ }
+ return cur;
+}
+
+// ─── year helpers ──────────────────────────────────────────────────────────────
+
+/** Advance `n` years and snap to Jan 1 (supports negative n). */
+function addYearStarts(d: Date, n: number): Date {
+ return new Date(Date.UTC(d.getUTCFullYear() + n, 0, 1));
+}
+
+/** Advance `n` years and snap to Dec 31 (supports negative n). */
+function addYearEnds(d: Date, n: number): Date {
+ return new Date(Date.UTC(d.getUTCFullYear() + n, 11, 31));
+}
+
+// ─── advance dispatcher ────────────────────────────────────────────────────────
+
+/**
+ * Return a new `Date` advanced by one step of `pf` from `d`.
+ *
+ * For fixed-ms units the result is arithmetic.
+ * For calendar units the result is snapped to the next calendar boundary.
+ */
+export function advanceDate(d: Date, pf: ParsedFreq): Date {
+ const { n, unit, anchor } = pf;
+ if (unit in MS_TABLE) {
+ return advanceMs(d, n, unit);
+ }
+ if (unit === "B") {
+ return addBizDays(d, n);
+ }
+ if (unit === "W") {
+ return addWeeks(d, n, anchor);
+ }
+ if (unit === "MS") {
+ return addMonthStarts(d, n);
+ }
+ if (unit === "ME") {
+ return addMonthEnds(d, n);
+ }
+ if (unit === "QS") {
+ return addQStarts(d, n);
+ }
+ if (unit === "QE") {
+ return addQEnds(d, n);
+ }
+ if (unit === "YS") {
+ return addYearStarts(d, n);
+ }
+ if (unit === "YE") {
+ return addYearEnds(d, n);
+ }
+ // sub-ms (us, ns): JS has no sub-ms precision — round to nearest ms
+ const subMs = unit === "us" ? n / 1_000 : n / 1_000_000;
+ return new Date(d.getTime() + Math.round(subMs));
+}
+
+/** Retreat one step of `pf` backward from `d`. */
+function retreatDate(d: Date, pf: ParsedFreq): Date {
+ const { n, unit } = pf;
+ if (unit in MS_TABLE) {
+ return advanceMs(d, -n, unit);
+ }
+ if (unit === "B") {
+ return subBizDays(d, n);
+ }
+ if (unit === "W") {
+ return new Date(d.getTime() - 7 * n * MS_DAY);
+ }
+ if (unit === "MS") {
+ return addMonthStarts(d, -n);
+ }
+ if (unit === "ME") {
+ return addMonthEnds(d, -n);
+ }
+ if (unit === "QS") {
+ return addQStarts(d, -n);
+ }
+ if (unit === "QE") {
+ return addQEnds(d, -n);
+ }
+ if (unit === "YS") {
+ return addYearStarts(d, -n);
+ }
+ if (unit === "YE") {
+ return addYearEnds(d, -n);
+ }
+ const subMs = unit === "us" ? n / 1_000 : n / 1_000_000;
+ return new Date(d.getTime() - Math.round(subMs));
+}
+
+// ─── generation helpers ────────────────────────────────────────────────────────
+
+/**
+ * For anchor-based frequencies (e.g. "W"), snap `d` forward to the first
+ * occurrence of the anchor day on or after `d`. For all other frequencies
+ * the date is returned unchanged.
+ */
+function snapToAnchor(d: Date, pf: ParsedFreq): Date {
+ if (pf.unit === "W") {
+ const dow = d.getUTCDay();
+ const daysUntil = (pf.anchor - dow + 7) % 7;
+ return daysUntil === 0 ? d : new Date(d.getTime() + daysUntil * MS_DAY);
+ }
+ return d;
+}
+
+/**
+ * For calendar boundary frequencies (ME, QE, YE), if the start date does not
+ * fall exactly on a boundary, snap forward to the first boundary on or after `d`.
+ *
+ * For boundary-start frequencies (MS, QS, YS), a start that already lies on a
+ * boundary is included as-is; if it is not on a boundary we snap to the next one.
+ */
+function snapToCalendarBoundary(d: Date, unit: string): Date {
+ const y = d.getUTCFullYear();
+ const m = d.getUTCMonth();
+ const day = d.getUTCDate();
+ switch (unit) {
+ case "MS":
+ // If not already month-start, advance to first day of next month.
+ if (day === 1) return d;
+ return new Date(Date.UTC(y, m + 1, 1));
+ case "ME": {
+ // If not already month-end, snap to end of the current month.
+ const lastDay = daysInMonth(y, m);
+ if (day === lastDay) return d;
+ return new Date(Date.UTC(y, m, lastDay));
+ }
+ case "QS": {
+ // Quarter-starts are Jan/Apr/Jul/Oct 1.
+ const isQS = (m === 0 || m === 3 || m === 6 || m === 9) && day === 1;
+ if (isQS) return d;
+ return nextQStart(d);
+ }
+ case "QE": {
+ // Quarter-ends are Mar 31, Jun 30, Sep 30, Dec 31.
+ const isQE = (m === 2 || m === 5 || m === 8 || m === 11) && day === daysInMonth(y, m);
+ if (isQE) return d;
+ return nextQEnd(d);
+ }
+ case "YS":
+ // Year-start is Jan 1.
+ if (m === 0 && day === 1) return d;
+ return new Date(Date.UTC(y + 1, 0, 1));
+ case "YE":
+ // Year-end is Dec 31.
+ if (m === 11 && day === 31) return d;
+ return new Date(Date.UTC(y, 11, 31));
+ default:
+ return d;
+ }
+}
+
+/** Generate `count` dates starting from `start`, advancing by `pf` each step. */
+function genFromStart(start: Date, count: number, pf: ParsedFreq): Date[] {
+ const out: Date[] = [];
+ let cur = snapToAnchor(start, pf);
+ cur = snapToCalendarBoundary(cur, pf.unit);
+ for (let i = 0; i < count; i++) {
+ out.push(cur);
+ cur = advanceDate(cur, pf);
+ }
+ return out;
+}
+
+/**
+ * Generate all dates advancing from `start` while `≤ end`.
+ * Returns an empty array if `start > end`.
+ */
+function genBetween(start: Date, end: Date, pf: ParsedFreq): Date[] {
+ const out: Date[] = [];
+ let cur = snapToAnchor(start, pf);
+ while (cur.getTime() <= end.getTime()) {
+ out.push(cur);
+ const next = advanceDate(cur, pf);
+ if (next.getTime() <= cur.getTime()) {
+ break; // guard against infinite loop
+ }
+ cur = next;
+ }
+ return out;
+}
+
+/**
+ * Compute the start date such that `(count - 1)` forward steps from it
+ * land exactly on `end`.
+ */
+function startFromEnd(end: Date, count: number, pf: ParsedFreq): Date {
+ let cur = end;
+ for (let i = 0; i < count - 1; i++) {
+ cur = retreatDate(cur, pf);
+ }
+ return cur;
+}
+
+/** Drop dates at endpoints per the `inclusive` option. */
+function applyInclusive(
+ dates: Date[],
+ start: Date,
+ end: Date,
+ inclusive: DateRangeInclusive,
+): Date[] {
+ const incStart = inclusive === "both" || inclusive === "left";
+ const incEnd = inclusive === "both" || inclusive === "right";
+ const st = start.getTime();
+ const et = end.getTime();
+ return dates.filter((d) => {
+ const t = d.getTime();
+ if (!incStart && t === st) {
+ return false;
+ }
+ if (!incEnd && t === et) {
+ return false;
+ }
+ return true;
+ });
+}
+
+// ─── case handlers ─────────────────────────────────────────────────────────────
+
+/** Handle: start + periods, no end. */
+function caseStartPeriods(
+ startDate: Date,
+ periods: number,
+ pf: ParsedFreq,
+ incl: DateRangeInclusive,
+): Date[] {
+ const raw = genFromStart(startDate, periods, pf);
+ const last = raw.at(-1);
+ if (last === undefined) {
+ return [];
+ }
+ return applyInclusive(raw, startDate, last, incl);
+}
+
+/** Handle: end + periods, no start. */
+function caseEndPeriods(
+ endDate: Date,
+ periods: number,
+ pf: ParsedFreq,
+ incl: DateRangeInclusive,
+): Date[] {
+ const synStart = startFromEnd(endDate, periods, pf);
+ const raw = genFromStart(synStart, periods, pf);
+ return applyInclusive(raw, synStart, endDate, incl);
+}
+
+/** Handle: start + end (periods ignored). */
+function caseStartEnd(
+ startDate: Date,
+ endDate: Date,
+ pf: ParsedFreq,
+ incl: DateRangeInclusive,
+): Date[] {
+ const raw = genBetween(startDate, endDate, pf);
+ return applyInclusive(raw, startDate, endDate, incl);
+}
+
+// ─── input parsing helper ──────────────────────────────────────────────────────
+
+/** True when `v` is neither `null` nor `undefined`. */
+function hasValue(v: unknown): boolean {
+ return v !== undefined && v !== null;
+}
+
+/** Validate that at least two of the three inputs are present; throw otherwise. */
+function requireTwoOf(hasStart: boolean, hasEnd: boolean, hasPeriods: boolean): void {
+ if ((hasStart ? 1 : 0) + (hasEnd ? 1 : 0) + (hasPeriods ? 1 : 0) < 2) {
+ throw new RangeError("dateRange: at least two of start, end, and periods must be provided");
+ }
+}
+
+/** Parse and optionally normalise start/end inputs. */
+function parseDateInputs(
+ startRaw: Date | string | number | null | undefined,
+ endRaw: Date | string | number | null | undefined,
+ doNorm: boolean,
+): { startDate: Date | null; endDate: Date | null } {
+ let startDate = hasValue(startRaw) ? toDateInput(startRaw as Date | string | number) : null;
+ let endDate = hasValue(endRaw) ? toDateInput(endRaw as Date | string | number) : null;
+ if (doNorm) {
+ if (startDate !== null) {
+ startDate = normToMidnight(startDate);
+ }
+ if (endDate !== null) {
+ endDate = normToMidnight(endDate);
+ }
+ }
+ return { startDate, endDate };
+}
+
+// ─── public API ────────────────────────────────────────────────────────────────
+
+/**
+ * Generate a fixed-frequency sequence of `Date` objects.
+ *
+ * You must specify at least **two** of `start`, `end`, and `periods`.
+ *
+ * ```ts
+ * import { dateRange } from "tsb";
+ *
+ * // 5 daily dates starting 2024-01-01
+ * dateRange({ start: "2024-01-01", periods: 5 });
+ * // → [Jan 1, Jan 2, Jan 3, Jan 4, Jan 5]
+ *
+ * // Hourly between two timestamps
+ * dateRange({ start: "2024-01-01T00:00:00Z", end: "2024-01-01T06:00:00Z", freq: "h" });
+ *
+ * // Monthly (month-start) for 6 months
+ * dateRange({ start: "2024-01-01", periods: 6, freq: "MS" });
+ * ```
+ */
+export function dateRange(options: DateRangeOptions): Date[] {
+ const { start: startRaw, end: endRaw, periods, freq: freqStr, inclusive, normalize } = options;
+
+ const pf = parseFreq(freqStr ?? "D");
+ const incl: DateRangeInclusive = inclusive ?? "both";
+
+ const hasStart = hasValue(startRaw);
+ const hasEnd = hasValue(endRaw);
+ const hasPeriods = hasValue(periods) && (periods as number) > 0;
+
+ requireTwoOf(hasStart, hasEnd, hasPeriods);
+
+ const { startDate, endDate } = parseDateInputs(startRaw, endRaw, normalize === true);
+
+ if (hasStart && hasPeriods && !hasEnd) {
+ return caseStartPeriods(startDate as Date, periods as number, pf, incl);
+ }
+ if (hasEnd && hasPeriods && !hasStart) {
+ return caseEndPeriods(endDate as Date, periods as number, pf, incl);
+ }
+ return caseStartEnd(startDate as Date, endDate as Date, pf, incl);
+}
diff --git a/src/stats/diff_shift.ts b/src/stats/diff_shift.ts
new file mode 100644
index 00000000..4f62825f
--- /dev/null
+++ b/src/stats/diff_shift.ts
@@ -0,0 +1,368 @@
+/**
+ * diff_shift — discrete difference and value-shift for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.diff(periods=1)` — first discrete difference shifted by `periods`
+ * - `Series.shift(periods=1, fill_value=NaN)` — shift index by `periods`
+ * - `DataFrame.diff(periods=1, axis=0)` — column-wise or row-wise diff
+ * - `DataFrame.shift(periods=1, fill_value=NaN, axis=0)` — column-wise or row-wise shift
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ * Non-numeric values in `diff` yield `null`.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Options for {@link diffSeries} and {@link diffDataFrame}. */
+export interface DiffOptions {
+ /**
+ * Number of periods to shift for calculating difference.
+ * Negative values shift in the opposite direction.
+ * Default `1`.
+ */
+ readonly periods?: number;
+}
+
+/** Options for {@link diffDataFrame}. */
+export interface DataFrameDiffOptions extends DiffOptions {
+ /**
+ * Axis along which to compute the difference.
+ * - `0` or `"index"` (default): diff down each **column**.
+ * - `1` or `"columns"`: diff across each **row**.
+ */
+ readonly axis?: Axis;
+}
+
+/** Options for {@link shiftSeries} and {@link shiftDataFrame}. */
+export interface ShiftOptions {
+ /**
+ * Number of periods to shift.
+ * Positive: shift forward (later rows get earlier values).
+ * Negative: shift backward.
+ * Default `1`.
+ */
+ readonly periods?: number;
+ /**
+ * Value to fill positions that fall outside the original range.
+ * Default `null` (treated as missing, like pandas NaN).
+ */
+ readonly fillValue?: Scalar;
+}
+
+/** Options for {@link shiftDataFrame}. */
+export interface DataFrameShiftOptions extends ShiftOptions {
+ /**
+ * Axis along which to shift.
+ * - `0` or `"index"` (default): shift down each **column**.
+ * - `1` or `"columns"`: shift across each **row**.
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ───────────────────────────────────────────────────────────────────
+
+/** True when `v` is a finite number (not null / undefined / NaN). */
+function isFiniteNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/**
+ * Compute element-wise discrete difference for an array of scalars.
+ * `result[i] = arr[i] - arr[i - periods]`.
+ * Non-numeric positions (either current or prior) yield `null`.
+ */
+function diffArray(vals: readonly Scalar[], periods: number): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(null);
+ for (let i = 0; i < n; i++) {
+ const j = i - periods;
+ if (j < 0 || j >= n) {
+ out[i] = null;
+ continue;
+ }
+ const cur = vals[i] as Scalar;
+ const prev = vals[j] as Scalar;
+ if (isFiniteNum(cur) && isFiniteNum(prev)) {
+ out[i] = cur - prev;
+ } else {
+ out[i] = null;
+ }
+ }
+ return out;
+}
+
+/**
+ * Shift an array of scalars by `periods` positions, filling with `fillValue`.
+ * Positive `periods` moves values forward (later positions get earlier values);
+ * negative `periods` moves values backward.
+ */
+function shiftArray(vals: readonly Scalar[], periods: number, fillValue: Scalar): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(fillValue);
+ if (periods >= 0) {
+ for (let i = periods; i < n; i++) {
+ out[i] = vals[i - periods] as Scalar;
+ }
+ } else {
+ const offset = -periods;
+ for (let i = 0; i < n - offset; i++) {
+ out[i] = vals[i + offset] as Scalar;
+ }
+ }
+ return out;
+}
+
+// ─── Series: diff ──────────────────────────────────────────────────────────────
+
+/**
+ * Compute the first discrete difference of a Series.
+ *
+ * `result[i] = series[i] - series[i - periods]`.
+ * The first `|periods|` positions (or last, for negative) are `null`.
+ * Non-numeric values yield `null`.
+ *
+ * Mirrors `pandas.Series.diff(periods=1)`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { diffSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 3, 6, 10, 15] });
+ * diffSeries(s).values; // [null, 2, 3, 4, 5]
+ * diffSeries(s, { periods: 2 }).values; // [null, null, 5, 7, 9]
+ * ```
+ */
+export function diffSeries(series: Series, options: DiffOptions = {}): Series {
+ const periods = options.periods ?? 1;
+ const data = diffArray(series.values as readonly Scalar[], periods);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+// ─── Series: shift ─────────────────────────────────────────────────────────────
+
+/**
+ * Shift the values of a Series by `periods` positions.
+ *
+ * Positive `periods` shifts values forward (down); earlier positions are filled
+ * with `fillValue`. Negative `periods` shifts backward (up).
+ *
+ * Mirrors `pandas.Series.shift(periods=1, fill_value=NaN)`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { shiftSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * shiftSeries(s).values; // [null, 1, 2, 3, 4]
+ * shiftSeries(s, { periods: -1 }).values; // [2, 3, 4, 5, null]
+ * shiftSeries(s, { periods: 2, fillValue: 0 }).values; // [0, 0, 1, 2, 3]
+ * ```
+ */
+export function shiftSeries(series: Series, options: ShiftOptions = {}): Series {
+ const periods = options.periods ?? 1;
+ const fillValue = options.fillValue !== undefined ? options.fillValue : null;
+ const data = shiftArray(series.values as readonly Scalar[], periods, fillValue);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+// ─── DataFrame: diff ──────────────────────────────────────────────────────────
+
+/**
+ * Compute the first discrete difference of a DataFrame.
+ *
+ * When `axis=0` (default), diffs down each column independently.
+ * When `axis=1`, diffs across each row (column N minus column N-periods).
+ *
+ * Mirrors `pandas.DataFrame.diff(periods=1, axis=0)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { diffDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 3, 6], b: [10, 20, 35] });
+ * diffDataFrame(df).col("a").values; // [null, 2, 3]
+ * diffDataFrame(df).col("b").values; // [null, 10, 15]
+ * ```
+ */
+export function diffDataFrame(df: DataFrame, options: DataFrameDiffOptions = {}): DataFrame {
+ const periods = options.periods ?? 1;
+ const axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+
+ if (axis === 1 || axis === "columns") {
+ return diffDataFrameRowWise(df, colNames, periods);
+ }
+ return diffDataFrameColWise(df, colNames, periods);
+}
+
+/** Diff each column independently (axis=0). */
+function diffDataFrameColWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+): DataFrame {
+ const colMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name) as Series;
+ const data = diffArray(col.values as readonly Scalar[], periods);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/** Diff across columns (axis=1). */
+function diffDataFrameRowWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+): DataFrame {
+ const nRows = df.index.size;
+ const nCols = colNames.length;
+ const colMap = new Map>();
+
+ for (let c = 0; c < nCols; c++) {
+ const name = colNames[c];
+ if (name === undefined) {
+ continue;
+ }
+ const rowData: Scalar[] = new Array(nRows).fill(null);
+ const priorIdx = c - periods;
+ if (priorIdx < 0 || priorIdx >= nCols) {
+ colMap.set(name, new Series({ data: rowData, index: df.index, name }));
+ continue;
+ }
+ const priorName = colNames[priorIdx];
+ if (priorName === undefined) {
+ colMap.set(name, new Series({ data: rowData, index: df.index, name }));
+ continue;
+ }
+ const curCol = df.col(name) as Series;
+ const priorCol = df.col(priorName) as Series;
+ for (let r = 0; r < nRows; r++) {
+ const cur = curCol.iat(r);
+ const prev = priorCol.iat(r);
+ if (isFiniteNum(cur) && isFiniteNum(prev)) {
+ rowData[r] = cur - prev;
+ } else {
+ rowData[r] = null;
+ }
+ }
+ colMap.set(name, new Series({ data: rowData, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── DataFrame: shift ─────────────────────────────────────────────────────────
+
+/**
+ * Shift the values of a DataFrame by `periods` positions.
+ *
+ * When `axis=0` (default), each column is shifted independently.
+ * When `axis=1`, each row is shifted across columns.
+ *
+ * Mirrors `pandas.DataFrame.shift(periods=1, fill_value=NaN, axis=0)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { shiftDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * shiftDataFrame(df).col("a").values; // [null, 1, 2]
+ * shiftDataFrame(df, { periods: -1 }).col("b").values; // [5, 6, null]
+ * ```
+ */
+export function shiftDataFrame(df: DataFrame, options: DataFrameShiftOptions = {}): DataFrame {
+ const periods = options.periods ?? 1;
+ const fillValue = options.fillValue !== undefined ? options.fillValue : null;
+ const axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+
+ if (axis === 1 || axis === "columns") {
+ return shiftDataFrameRowWise(df, colNames, periods, fillValue);
+ }
+ return shiftDataFrameColWise(df, colNames, periods, fillValue);
+}
+
+/** Shift each column independently (axis=0). */
+function shiftDataFrameColWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+ fillValue: Scalar,
+): DataFrame {
+ const colMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name) as Series;
+ const data = shiftArray(col.values as readonly Scalar[], periods, fillValue);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/** Shift each row across columns (axis=1). */
+function shiftDataFrameRowWise(
+ df: DataFrame,
+ colNames: readonly string[],
+ periods: number,
+ fillValue: Scalar,
+): DataFrame {
+ const nRows = df.index.size;
+ const nCols = colNames.length;
+
+ // Build a 2D matrix [row][col] of shifted values
+ const matrix: Scalar[][] = Array.from({ length: nRows }, () =>
+ new Array(nCols).fill(fillValue),
+ );
+
+ if (periods >= 0) {
+ for (let c = periods; c < nCols; c++) {
+ const srcName = colNames[c - periods];
+ if (srcName === undefined) {
+ continue;
+ }
+ const src = df.col(srcName) as Series;
+ for (let r = 0; r < nRows; r++) {
+ const row = matrix[r];
+ if (row !== undefined) {
+ row[c] = src.iat(r);
+ }
+ }
+ }
+ } else {
+ const offset = -periods;
+ for (let c = 0; c < nCols - offset; c++) {
+ const srcName = colNames[c + offset];
+ if (srcName === undefined) {
+ continue;
+ }
+ const src = df.col(srcName) as Series;
+ for (let r = 0; r < nRows; r++) {
+ const row = matrix[r];
+ if (row !== undefined) {
+ row[c] = src.iat(r);
+ }
+ }
+ }
+ }
+
+ const colMap = new Map>();
+ for (let c = 0; c < nCols; c++) {
+ const name = colNames[c];
+ if (name === undefined) {
+ continue;
+ }
+ const data = matrix.map((row) => row[c] as Scalar);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/explode.ts b/src/stats/explode.ts
index c28df9d3..fd3c44c9 100644
--- a/src/stats/explode.ts
+++ b/src/stats/explode.ts
@@ -175,7 +175,7 @@ export function explodeDataFrame(
// Validate column names
for (const c of colNames) {
if (!df.has(c)) {
- throw new Error(`Column "${c}" not found in DataFrame`);
+ throw new Error(`Column '${c}' not found`);
}
}
@@ -219,6 +219,12 @@ export function explodeDataFrame(
const explodedCols = new Map();
explodedCols.set(firstCol, firstOut);
+ // Compute per-row output count from the primary column explosion.
+ const rowCounts: number[] = new Array(nRows).fill(0);
+ for (const p of firstPos) {
+ rowCounts[p] = (rowCounts[p] ?? 0) + 1;
+ }
+
for (let ci = 1; ci < colNames.length; ci++) {
const cname = colNames[ci] as string;
const wideColVals: readonly unknown[] = df.col(cname).values;
@@ -226,16 +232,17 @@ export function explodeDataFrame(
for (let row = 0; row < nRows; row++) {
const v = wideColVals[row];
+ const expectedCount = rowCounts[row] ?? 1;
if (isListLike(v)) {
- if (v.length === 0) {
- out.push(null);
- } else {
- for (const item of v) {
- out.push(item as Scalar);
- }
+ // Push items, padding with null if the list is shorter than expected.
+ for (let k = 0; k < expectedCount; k++) {
+ out.push(k < v.length ? (v[k] as Scalar) : null);
}
} else {
- out.push(v as Scalar);
+ // Scalar: repeat (or pad) to fill the expected slot count.
+ for (let k = 0; k < expectedCount; k++) {
+ out.push(v as Scalar);
+ }
}
}
explodedCols.set(cname, out);
diff --git a/src/stats/index.ts b/src/stats/index.ts
index 63864005..c09ad5ca 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -61,7 +61,7 @@ export {
dataFrameGe,
} from "./compare.ts";
export type { CompareOp, SeriesOther, DataFrameOther } from "./compare.ts";
-export { shiftSeries, diffSeries, dataFrameShift, dataFrameDiff } from "./shift_diff.ts";
+export { dataFrameShift, dataFrameDiff } from "./shift_diff.ts";
export type { ShiftDiffDataFrameOptions } from "./shift_diff.ts";
export { interpolateSeries, dataFrameInterpolate } from "./interpolate.ts";
export type {
@@ -360,3 +360,24 @@ export type {
export { toDatetime } from "./to_datetime.ts";
export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./to_datetime.ts";
+
+export { toTimedelta, parseFrac, formatTimedelta, Timedelta } from "./to_timedelta.ts";
+export type { TimedeltaUnit, TimedeltaErrors, ToTimedeltaOptions } from "./to_timedelta.ts";
+export { dateRange, parseFreq, advanceDate, toDateInput } from "./date_range.ts";
+export type {
+ DateRangeInclusive,
+ DateRangeOptions,
+ ParsedFreq,
+} from "./date_range.ts";
+export { diffDataFrame, shiftDataFrame, diffSeries, shiftSeries } from "./diff_shift.ts";
+export type {
+ DiffOptions,
+ DataFrameDiffOptions,
+ ShiftOptions,
+ DataFrameShiftOptions,
+} from "./diff_shift.ts";
+export { ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "./na_ops.ts";
+export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts";
+export { intervalRange } from "./interval.ts";
+export type { ClosedType } from "./interval.ts";
+export { nunique } from "./reduce_ops.ts";
diff --git a/src/stats/interval.ts b/src/stats/interval.ts
new file mode 100644
index 00000000..7fab0bc1
--- /dev/null
+++ b/src/stats/interval.ts
@@ -0,0 +1,413 @@
+/**
+ * Interval — pandas-compatible interval type and IntervalIndex.
+ *
+ * Mirrors `pandas.Interval` and `pandas.IntervalIndex`:
+ * - `Interval` — a single bounded interval `(left, right]`, `[left, right)`,
+ * `[left, right]`, or `(left, right)`.
+ * - `IntervalIndex` — an ordered array of `Interval` objects used as an axis label.
+ * - `intervalRange()` — construct a sequence of equal-length intervals (like
+ * `pd.interval_range`).
+ *
+ * @example
+ * ```ts
+ * const iv = new Interval(0, 5); // (0, 5]
+ * iv.contains(3); // true
+ * iv.overlaps(new Interval(4, 10)); // true
+ *
+ * const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * // IntervalIndex([(0, 1], (1, 2], (2, 3]])
+ *
+ * const rng = intervalRange(0, 1, { periods: 4 });
+ * // [(0.0, 0.25], (0.25, 0.5], (0.5, 0.75], (0.75, 1.0]]
+ * ```
+ *
+ * @module
+ */
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Specifies which endpoint(s) of an interval are closed (inclusive).
+ *
+ * - `"right"` (default) — `(left, right]`
+ * - `"left"` — `[left, right)`
+ * - `"both"` — `[left, right]`
+ * - `"neither"` — `(left, right)`
+ */
+export type ClosedType = "left" | "right" | "both" | "neither";
+
+/** Options for {@link IntervalIndex.fromBreaks} and {@link intervalRange}. */
+export interface IntervalOptions {
+ /** Which endpoints are closed. Default `"right"`. */
+ readonly closed?: ClosedType;
+ /** Human-readable name for the index axis. */
+ readonly name?: string | null;
+}
+
+/** Options for {@link intervalRange}. */
+export interface IntervalRangeOptions extends IntervalOptions {
+ /**
+ * Number of intervals to generate.
+ * Exactly one of `periods` or `freq` must be provided.
+ */
+ readonly periods?: number;
+ /**
+ * Step size between interval edges.
+ * Exactly one of `periods` or `freq` must be provided.
+ */
+ readonly freq?: number;
+}
+
+// ─── Interval ─────────────────────────────────────────────────────────────────
+
+/**
+ * An immutable bounded interval.
+ *
+ * Mirrors `pandas.Interval`. Endpoints are numbers.
+ */
+export class Interval {
+ /** Left (lower) endpoint. */
+ readonly left: number;
+
+ /** Right (upper) endpoint. */
+ readonly right: number;
+
+ /** Which endpoints are closed (inclusive). */
+ readonly closed: ClosedType;
+
+ constructor(left: number, right: number, closed: ClosedType = "right") {
+ if (left > right) {
+ throw new RangeError(`Interval: left (${left}) must be ≤ right (${right})`);
+ }
+ this.left = left;
+ this.right = right;
+ this.closed = closed;
+ }
+
+ // ─── derived properties ─────────────────────────────────────────
+
+ /** Length of the interval (`right − left`). */
+ get length(): number {
+ return this.right - this.left;
+ }
+
+ /** Mid-point of the interval. */
+ get mid(): number {
+ return (this.left + this.right) / 2;
+ }
+
+ /** True when left endpoint is closed. */
+ get closedLeft(): boolean {
+ return this.closed === "left" || this.closed === "both";
+ }
+
+ /** True when right endpoint is closed. */
+ get closedRight(): boolean {
+ return this.closed === "right" || this.closed === "both";
+ }
+
+ /** True when neither endpoint is closed. */
+ get isOpen(): boolean {
+ return this.closed === "neither";
+ }
+
+ /** True when both endpoints are closed. */
+ get isClosed(): boolean {
+ return this.closed === "both";
+ }
+
+ // ─── membership ─────────────────────────────────────────────────
+
+ /**
+ * Return `true` if `value` falls within this interval.
+ *
+ * @example
+ * ```ts
+ * new Interval(0, 5).contains(5); // true (right-closed)
+ * new Interval(0, 5).contains(0); // false (right-closed, 0 excluded)
+ * new Interval(0, 5, "both").contains(0); // true
+ * ```
+ */
+ contains(value: number): boolean {
+ const leftOk = this.closedLeft ? value >= this.left : value > this.left;
+ const rightOk = this.closedRight ? value <= this.right : value < this.right;
+ return leftOk && rightOk;
+ }
+
+ // ─── comparison / set operations ────────────────────────────────
+
+ /**
+ * Return `true` if this interval overlaps with `other`.
+ *
+ * Two intervals overlap when they share any interior point.
+ * Touching at a single endpoint is considered overlapping when that endpoint
+ * is closed in both intervals.
+ */
+ overlaps(other: Interval): boolean {
+ if (this.left > other.right || other.left > this.right) {
+ return false;
+ }
+ if (this.left === other.right) {
+ return this.closedLeft && other.closedRight;
+ }
+ if (other.left === this.right) {
+ return other.closedLeft && this.closedRight;
+ }
+ return true;
+ }
+
+ /**
+ * Return `true` if this interval is equal to `other`
+ * (same endpoints and same `closed` type).
+ */
+ equals(other: Interval): boolean {
+ return this.left === other.left && this.right === other.right && this.closed === other.closed;
+ }
+
+ // ─── display ────────────────────────────────────────────────────
+
+ /** Render as a pandas-style string, e.g. `(0.0, 1.5]`. */
+ toString(): string {
+ const l = this.closedLeft ? "[" : "(";
+ const r = this.closedRight ? "]" : ")";
+ return `${l}${this.left}, ${this.right}${r}`;
+ }
+}
+
+// ─── IntervalIndex ────────────────────────────────────────────────────────────
+
+/**
+ * An immutable index of `Interval` objects.
+ *
+ * Mirrors `pandas.IntervalIndex`.
+ */
+export class IntervalIndex {
+ private readonly _intervals: readonly Interval[];
+
+ /** Human-readable axis name. */
+ readonly name: string | null;
+
+ constructor(intervals: readonly Interval[], name: string | null = null) {
+ this._intervals = Object.freeze([...intervals]);
+ this.name = name;
+ }
+
+ // ─── factories ──────────────────────────────────────────────────
+
+ /**
+ * Build an `IntervalIndex` from an array of break points.
+ *
+ * `breaks` must have at least 2 elements. The resulting index contains
+ * `breaks.length − 1` intervals.
+ *
+ * @example
+ * ```ts
+ * IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * // IntervalIndex([(0, 1], (1, 2], (2, 3]])
+ * ```
+ */
+ static fromBreaks(breaks: readonly number[], options?: IntervalOptions): IntervalIndex {
+ if (breaks.length < 2) {
+ throw new RangeError("fromBreaks: at least 2 break points are required");
+ }
+ const closed = options?.closed ?? "right";
+ const name = options?.name ?? null;
+ const intervals: Interval[] = [];
+ for (let i = 0; i < breaks.length - 1; i++) {
+ intervals.push(new Interval(breaks[i] as number, breaks[i + 1] as number, closed));
+ }
+ return new IntervalIndex(intervals, name);
+ }
+
+ /**
+ * Build an `IntervalIndex` from explicit arrays of left and right endpoints.
+ *
+ * Both arrays must have the same length.
+ */
+ static fromArrays(
+ left: readonly number[],
+ right: readonly number[],
+ options?: IntervalOptions,
+ ): IntervalIndex {
+ if (left.length !== right.length) {
+ throw new RangeError("fromArrays: left and right arrays must have the same length");
+ }
+ const closed = options?.closed ?? "right";
+ const name = options?.name ?? null;
+ const intervals: Interval[] = left.map((l, i) => new Interval(l, right[i] as number, closed));
+ return new IntervalIndex(intervals, name);
+ }
+
+ /**
+ * Build an `IntervalIndex` from an array of `Interval` objects.
+ */
+ static fromIntervals(intervals: readonly Interval[], name?: string | null): IntervalIndex {
+ return new IntervalIndex(intervals, name ?? null);
+ }
+
+ // ─── properties ─────────────────────────────────────────────────
+
+ /** Number of intervals. */
+ get size(): number {
+ return this._intervals.length;
+ }
+
+ /** All intervals in order. */
+ get values(): readonly Interval[] {
+ return this._intervals;
+ }
+
+ /** Left endpoints. */
+ get left(): readonly number[] {
+ return this._intervals.map((iv) => iv.left);
+ }
+
+ /** Right endpoints. */
+ get right(): readonly number[] {
+ return this._intervals.map((iv) => iv.right);
+ }
+
+ /** Mid-points. */
+ get mid(): readonly number[] {
+ return this._intervals.map((iv) => iv.mid);
+ }
+
+ /** Lengths (`right − left`) of each interval. */
+ get length(): readonly number[] {
+ return this._intervals.map((iv) => iv.length);
+ }
+
+ /** Which endpoints are closed (taken from the first interval; homogeneous index assumed). */
+ get closed(): ClosedType {
+ return this._intervals[0]?.closed ?? "right";
+ }
+
+ /** True when all intervals are non-overlapping and sorted. */
+ get isMonotonic(): boolean {
+ for (let i = 1; i < this._intervals.length; i++) {
+ const prev = this._intervals[i - 1] as Interval;
+ const curr = this._intervals[i] as Interval;
+ if (prev.right > curr.left) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // ─── lookup ─────────────────────────────────────────────────────
+
+ /**
+ * Return the interval at position `i` (0-based).
+ */
+ get(i: number): Interval {
+ const iv = this._intervals[i];
+ if (iv === undefined) {
+ throw new RangeError(`Index ${i} out of range [0, ${this.size})`);
+ }
+ return iv;
+ }
+
+ /**
+ * Return the 0-based position of the first interval that {@link Interval.contains}
+ * `value`, or `-1` if none.
+ */
+ indexOf(value: number): number {
+ for (let i = 0; i < this._intervals.length; i++) {
+ if ((this._intervals[i] as Interval).contains(value)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Return all intervals that overlap with `other`.
+ */
+ overlapping(other: Interval): IntervalIndex {
+ return new IntervalIndex(
+ this._intervals.filter((iv) => iv.overlaps(other)),
+ this.name,
+ );
+ }
+
+ // ─── set operations ─────────────────────────────────────────────
+
+ /**
+ * Append another `IntervalIndex` to this one.
+ */
+ append(other: IntervalIndex): IntervalIndex {
+ return new IntervalIndex([...this._intervals, ...other._intervals], this.name);
+ }
+
+ // ─── display ────────────────────────────────────────────────────
+
+ /** Render as a pandas-style string. */
+ toString(): string {
+ const inner = this._intervals.map((iv) => iv.toString()).join(", ");
+ return `IntervalIndex([${inner}], closed='${this.closed}')`;
+ }
+}
+
+// ─── intervalRange ────────────────────────────────────────────────────────────
+
+/**
+ * Return an `IntervalIndex` of equal-length intervals.
+ *
+ * Mirrors `pandas.interval_range`. Exactly one of `options.periods` or
+ * `options.freq` must be specified.
+ *
+ * @param start Left edge of the first interval.
+ * @param end Right edge of the last interval.
+ * @param options `periods` (number of intervals) or `freq` (interval length).
+ *
+ * @example
+ * ```ts
+ * intervalRange(0, 1, { periods: 4 });
+ * // IntervalIndex([(0.0, 0.25], (0.25, 0.5], (0.5, 0.75], (0.75, 1.0]])
+ *
+ * intervalRange(0, 10, { freq: 2.5 });
+ * // IntervalIndex([(0.0, 2.5], (2.5, 5.0], (5.0, 7.5], (7.5, 10.0]])
+ * ```
+ */
+export function intervalRange(
+ start: number,
+ end: number,
+ options: IntervalRangeOptions,
+): IntervalIndex {
+ if (end <= start) {
+ throw new RangeError(`intervalRange: end (${end}) must be > start (${start})`);
+ }
+ const closed = options.closed ?? "right";
+ const name = options.name ?? null;
+
+ let breaks: number[];
+
+ if (options.periods !== undefined && options.freq !== undefined) {
+ throw new RangeError("intervalRange: specify exactly one of periods or freq");
+ }
+ if (options.periods !== undefined) {
+ const n = options.periods;
+ if (!Number.isInteger(n) || n < 1) {
+ throw new RangeError("intervalRange: periods must be a positive integer");
+ }
+ const step = (end - start) / n;
+ breaks = Array.from({ length: n + 1 }, (_, i) => start + i * step);
+ breaks[n] = end;
+ } else if (options.freq !== undefined) {
+ const freq = options.freq;
+ if (freq <= 0) {
+ throw new RangeError("intervalRange: freq must be > 0");
+ }
+ breaks = [];
+ let cur = start;
+ while (cur < end - freq * 1e-10) {
+ breaks.push(cur);
+ cur += freq;
+ }
+ breaks.push(end);
+ } else {
+ throw new RangeError("intervalRange: one of periods or freq must be specified");
+ }
+
+ return IntervalIndex.fromBreaks(breaks, { closed, name });
+}
diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts
new file mode 100644
index 00000000..37c9a208
--- /dev/null
+++ b/src/stats/na_ops.ts
@@ -0,0 +1,336 @@
+/**
+ * na_ops — missing-value utilities for Series and DataFrame.
+ *
+ * Mirrors the following pandas module-level functions and methods:
+ * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values
+ * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values
+ * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values
+ * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link ffillSeries} and {@link bfillSeries}. */
+export interface FillDirectionOptions {
+ /**
+ * Maximum number of consecutive NaN/null values to fill.
+ * `null` means no limit (default).
+ */
+ readonly limit?: number | null;
+}
+
+/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */
+export interface DataFrameFillOptions extends FillDirectionOptions {
+ /**
+ * - `0` or `"index"` (default): fill missing values down each **column**.
+ * - `1` or `"columns"`: fill missing values across each **row**.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Forward-fill an array of scalars in-place (returns a new array). */
+function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+ const out: Scalar[] = Array.from(vals);
+ let lastValid: Scalar = null;
+ let streak = 0;
+ for (let i = 0; i < out.length; i++) {
+ if (isMissing(out[i])) {
+ if (!isMissing(lastValid) && (limit === null || streak < limit)) {
+ out[i] = lastValid;
+ streak++;
+ }
+ } else {
+ lastValid = out[i] as Scalar;
+ streak = 0;
+ }
+ }
+ return out;
+}
+
+/** Backward-fill an array of scalars (returns a new array). */
+function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] {
+ const out: Scalar[] = Array.from(vals);
+ let nextValid: Scalar = null;
+ let streak = 0;
+ for (let i = out.length - 1; i >= 0; i--) {
+ if (isMissing(out[i])) {
+ if (!isMissing(nextValid) && (limit === null || streak < limit)) {
+ out[i] = nextValid;
+ streak++;
+ }
+ } else {
+ nextValid = out[i] as Scalar;
+ streak = 0;
+ }
+ }
+ return out;
+}
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+/**
+ * Detect missing values in a scalar, Series, or DataFrame.
+ *
+ * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`.
+ * - For a **Series**: returns a `Series` of the same index.
+ * - For a **DataFrame**: returns a `DataFrame` of boolean columns.
+ *
+ * Mirrors `pandas.isna()` / `pandas.isnull()`.
+ *
+ * @example
+ * ```ts
+ * import { isna } from "tsb";
+ * isna(null); // true
+ * isna(42); // false
+ * isna(NaN); // true
+ *
+ * const s = new Series({ data: [1, null, NaN, 4] });
+ * isna(s); // Series([false, true, true, false])
+ * ```
+ */
+export function isna(value: Scalar): boolean;
+export function isna(value: Series): Series;
+export function isna(value: DataFrame): DataFrame;
+export function isna(
+ value: Scalar | Series | DataFrame,
+): boolean | Series | DataFrame {
+ if (value instanceof DataFrame) {
+ return value.isna();
+ }
+ if (value instanceof Series) {
+ return value.isna();
+ }
+ return isMissing(value as Scalar);
+}
+
+/**
+ * Detect non-missing values in a scalar, Series, or DataFrame.
+ *
+ * Mirrors `pandas.notna()` / `pandas.notnull()`.
+ *
+ * @example
+ * ```ts
+ * import { notna } from "tsb";
+ * notna(null); // false
+ * notna(42); // true
+ * ```
+ */
+export function notna(value: Scalar): boolean;
+export function notna(value: Series): Series;
+export function notna(value: DataFrame): DataFrame;
+export function notna(
+ value: Scalar | Series | DataFrame,
+): boolean | Series | DataFrame {
+ if (value instanceof DataFrame) {
+ return value.notna();
+ }
+ if (value instanceof Series) {
+ return value.notna();
+ }
+ return !isMissing(value as Scalar);
+}
+
+/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */
+export const isnull = isna;
+
+/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */
+export const notnull = notna;
+
+// ─── ffill ────────────────────────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the last non-missing value
+ * that precedes it (if any). Values before the first non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.ffill()`.
+ *
+ * @param series - Input Series (unchanged).
+ * @param options - Optional `{ limit }` — max consecutive fills.
+ * @returns New Series with forward-filled values.
+ *
+ * @example
+ * ```ts
+ * import { ffillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * ffillSeries(s); // Series([1, 1, 1, 4])
+ * ```
+ */
+export function ffillSeries(
+ series: Series,
+ options?: FillDirectionOptions,
+): Series {
+ const limit = options?.limit ?? null;
+ const filled = ffillArray(series.values as readonly Scalar[], limit) as T[];
+ return new Series({
+ data: filled,
+ index: series.index,
+ dtype: series.dtype,
+ name: series.name ?? null,
+ });
+}
+
+/**
+ * Backward-fill missing values in a Series.
+ *
+ * Each `null`/`NaN` value is replaced with the next non-missing value
+ * that follows it (if any). Values after the last non-missing value
+ * remain missing.
+ *
+ * Mirrors `pandas.Series.bfill()`.
+ *
+ * @example
+ * ```ts
+ * import { bfillSeries } from "tsb";
+ * const s = new Series({ data: [1, null, null, 4] });
+ * bfillSeries(s); // Series([1, 4, 4, 4])
+ * ```
+ */
+export function bfillSeries(
+ series: Series,
+ options?: FillDirectionOptions,
+): Series {
+ const limit = options?.limit ?? null;
+ const filled = bfillArray(series.values as readonly Scalar[], limit) as T[];
+ return new Series({
+ data: filled,
+ index: series.index,
+ dtype: series.dtype,
+ name: series.name ?? null,
+ });
+}
+
+// ─── DataFrame ffill / bfill ──────────────────────────────────────────────────
+
+/**
+ * Forward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0): each column is independently
+ * forward-filled. With `axis=1` each row is forward-filled across columns.
+ *
+ * Mirrors `pandas.DataFrame.ffill()`.
+ *
+ * @example
+ * ```ts
+ * import { dataFrameFfill } from "tsb";
+ * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } });
+ * dataFrameFfill(df);
+ * // a: [1, 1, 3]
+ * // b: [null, 2, 2]
+ * ```
+ */
+export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+ const limit = options?.limit ?? null;
+ const axis = options?.axis ?? 0;
+ const byRow = axis === 1 || axis === "columns";
+
+ if (!byRow) {
+ // column-wise: fill each column independently
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const filled = ffillArray(col.values, limit) as Scalar[];
+ colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // row-wise: fill across columns for each row
+ const nRows = df.shape[0];
+ const cols = df.columns.values;
+ const columns = cols.map((name) => df.col(name));
+ const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+ for (let r = 0; r < nRows; r++) {
+ const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+ const filled = ffillArray(rowVals, limit);
+ for (let ci = 0; ci < cols.length; ci++) {
+ const rowsFilledCI = rowsFilled[ci];
+ if (rowsFilledCI !== undefined) {
+ rowsFilledCI[r] = filled[ci] ?? null;
+ }
+ }
+ }
+ const colMap = new Map>();
+ for (let ci = 0; ci < cols.length; ci++) {
+ const name = cols[ci] as string;
+ const col = columns[ci] as Series;
+ colMap.set(
+ name,
+ new Series({
+ data: rowsFilled[ci] ?? [],
+ index: col.index,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Backward-fill missing values in a DataFrame.
+ *
+ * By default operates **column-wise** (axis=0). With `axis=1` fills across rows.
+ *
+ * Mirrors `pandas.DataFrame.bfill()`.
+ */
+export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame {
+ const limit = options?.limit ?? null;
+ const axis = options?.axis ?? 0;
+ const byRow = axis === 1 || axis === "columns";
+
+ if (!byRow) {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const filled = bfillArray(col.values, limit) as Scalar[];
+ colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ const nRows = df.shape[0];
+ const cols = df.columns.values;
+ const columns = cols.map((name) => df.col(name));
+ const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values));
+ for (let r = 0; r < nRows; r++) {
+ const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null);
+ const filled = bfillArray(rowVals, limit);
+ for (let ci = 0; ci < cols.length; ci++) {
+ const rowsFilledCI = rowsFilled[ci];
+ if (rowsFilledCI !== undefined) {
+ rowsFilledCI[r] = filled[ci] ?? null;
+ }
+ }
+ }
+ const colMap = new Map>();
+ for (let ci = 0; ci < cols.length; ci++) {
+ const name = cols[ci] as string;
+ const col = columns[ci] as Series;
+ colMap.set(
+ name,
+ new Series({
+ data: rowsFilled[ci] ?? [],
+ index: col.index,
+ dtype: col.dtype,
+ }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/reduce_ops.ts b/src/stats/reduce_ops.ts
new file mode 100644
index 00000000..d25006b8
--- /dev/null
+++ b/src/stats/reduce_ops.ts
@@ -0,0 +1,321 @@
+/**
+ * reduce_ops — boolean and counting reduction operations for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.nunique()` / `DataFrame.nunique()` — count distinct non-null values
+ * - `Series.any()` / `DataFrame.any()` — true if any element is truthy
+ * - `Series.all()` / `DataFrame.all()` — true if all elements are truthy
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link nuniqueSeries} and {@link nunique}. */
+export interface NuniqueOptions {
+ /**
+ * Whether to exclude missing values (null, undefined, NaN) from the count.
+ * Defaults to `true` — same as pandas.
+ */
+ readonly dropna?: boolean;
+}
+
+/** Options for {@link nunique} (DataFrame variant). */
+export interface NuniqueDataFrameOptions extends NuniqueOptions {
+ /**
+ * - `0` or `"index"` (default): count unique values **per column**.
+ * - `1` or `"columns"`: count unique values **per row**.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+/** Options for {@link anySeries}, {@link allSeries}, {@link anyDataFrame}, {@link allDataFrame}. */
+export interface BoolReduceOptions {
+ /**
+ * Whether to skip missing values (null, undefined, NaN).
+ * Defaults to `true` — same as pandas.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link anyDataFrame} and {@link allDataFrame}. */
+export interface BoolReduceDataFrameOptions extends BoolReduceOptions {
+ /**
+ * - `0` or `"index"` (default): reduce along **rows** → result per column.
+ * - `1` or `"columns"`: reduce along **columns** → result per row.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+ /**
+ * When `true`, only consider boolean columns.
+ * Defaults to `false` — same as pandas.
+ */
+ readonly boolOnly?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Resolve axis to a numeric form (0 = column-wise, 1 = row-wise). */
+function resolveAxis(axis: 0 | 1 | "index" | "columns" | undefined): 0 | 1 {
+ if (axis === 1 || axis === "columns") {
+ return 1;
+ }
+ return 0;
+}
+
+/** Count distinct values in an array, optionally dropping missing values. */
+function countUnique(vals: readonly Scalar[], dropna: boolean): number {
+ const seen = new Set();
+ for (const v of vals) {
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ seen.add(v);
+ }
+ return seen.size;
+}
+
+/** True if any element in `vals` is truthy, with optional skipna. */
+function reduceAny(vals: readonly Scalar[], skipna: boolean): boolean {
+ for (const v of vals) {
+ if (skipna && isMissing(v)) {
+ continue;
+ }
+ if (v) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** True if all elements in `vals` are truthy, with optional skipna. */
+function reduceAll(vals: readonly Scalar[], skipna: boolean): boolean {
+ for (const v of vals) {
+ if (skipna && isMissing(v)) {
+ continue;
+ }
+ if (!v) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// ─── nunique ──────────────────────────────────────────────────────────────────
+
+/**
+ * Count distinct values in a Series.
+ *
+ * Missing values (null, undefined, NaN) are excluded by default.
+ *
+ * @param series - Input Series.
+ * @param options - Control whether to exclude missing values.
+ * @returns Number of unique values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series([1, 2, 2, null]);
+ * nuniqueSeries(s); // 2
+ * nuniqueSeries(s, { dropna: false }); // 3
+ * ```
+ */
+export function nuniqueSeries(series: Series, options: NuniqueOptions = {}): number {
+ const dropna = options.dropna ?? true;
+ return countUnique(series.values, dropna);
+}
+
+/**
+ * Count distinct values per column (or per row) in a DataFrame.
+ *
+ * @param df - Input DataFrame.
+ * @param options - Axis (0 = column-wise, 1 = row-wise) and dropna flag.
+ * @returns Series with count of unique values per column/row.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 2], b: ["x", "y", "x"] });
+ * nunique(df); // Series { a: 2, b: 2 }
+ * ```
+ */
+export function nunique(df: DataFrame, options: NuniqueDataFrameOptions = {}): Series {
+ const axis = resolveAxis(options.axis);
+ const dropna = options.dropna ?? true;
+
+ if (axis === 0) {
+ return nuniqueByColumns(df, dropna);
+ }
+ return nuniqueByRows(df, dropna);
+}
+
+/** Count unique values per column → result is column-indexed Series. */
+function nuniqueByColumns(df: DataFrame, dropna: boolean): Series {
+ const colNames = [...df.columns.values];
+ const counts: Scalar[] = colNames.map((name) => countUnique(df.col(name).values, dropna));
+ return new Series({ data: counts, index: new Index(colNames) });
+}
+
+/** Count unique values per row → result is row-indexed Series. */
+function nuniqueByRows(df: DataFrame, dropna: boolean): Series {
+ const colNames = [...df.columns.values];
+ const nRows = df.index.size;
+ const counts: Scalar[] = [];
+ for (let r = 0; r < nRows; r++) {
+ const row: Scalar[] = colNames.map((c) => df.col(c).values[r] ?? null);
+ counts.push(countUnique(row, dropna));
+ }
+ return new Series({ data: counts, index: df.index });
+}
+
+// ─── any ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Return `true` if any element in a Series is truthy.
+ *
+ * @param series - Input Series.
+ * @param options - Whether to skip missing values (default: `true`).
+ * @returns `true` when at least one truthy element exists.
+ *
+ * @example
+ * ```ts
+ * const s = new Series([0, 0, 1]);
+ * anySeries(s); // true
+ * ```
+ */
+export function anySeries(series: Series, options: BoolReduceOptions = {}): boolean {
+ const skipna = options.skipna ?? true;
+ return reduceAny(series.values, skipna);
+}
+
+/**
+ * Return a boolean Series indicating whether any element is truthy per column (or row).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Axis, skipna, and boolOnly options.
+ * @returns Boolean Series with one value per column (axis=0) or row (axis=1).
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [0, 0, 1], b: [0, 0, 0] });
+ * anyDataFrame(df); // Series { a: true, b: false }
+ * ```
+ */
+export function anyDataFrame(
+ df: DataFrame,
+ options: BoolReduceDataFrameOptions = {},
+): Series {
+ const axis = resolveAxis(options.axis);
+ const skipna = options.skipna ?? true;
+ const boolOnly = options.boolOnly ?? false;
+
+ if (axis === 0) {
+ return anyByColumns(df, skipna, boolOnly);
+ }
+ return anyByRows(df, skipna, boolOnly);
+}
+
+/** `any` per column → result indexed by column names. */
+function anyByColumns(df: DataFrame, skipna: boolean, boolOnly: boolean): Series {
+ const colNames = getRelevantColumns(df, boolOnly);
+ const result: Scalar[] = colNames.map((name) => reduceAny(df.col(name).values, skipna));
+ return new Series({ data: result, index: new Index(colNames) });
+}
+
+/** `any` per row → result indexed by row index. */
+function anyByRows(df: DataFrame, skipna: boolean, boolOnly: boolean): Series {
+ const colNames = getRelevantColumns(df, boolOnly);
+ const nRows = df.index.size;
+ const result: Scalar[] = [];
+ for (let r = 0; r < nRows; r++) {
+ const row: Scalar[] = colNames.map((c) => df.col(c).values[r] ?? null);
+ result.push(reduceAny(row, skipna));
+ }
+ return new Series({ data: result, index: df.index });
+}
+
+// ─── all ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Return `true` if all elements in a Series are truthy.
+ *
+ * @param series - Input Series.
+ * @param options - Whether to skip missing values (default: `true`).
+ * @returns `true` when every non-missing element is truthy.
+ *
+ * @example
+ * ```ts
+ * const s = new Series([1, 2, 3]);
+ * allSeries(s); // true
+ * ```
+ */
+export function allSeries(series: Series, options: BoolReduceOptions = {}): boolean {
+ const skipna = options.skipna ?? true;
+ return reduceAll(series.values, skipna);
+}
+
+/**
+ * Return a boolean Series indicating whether all elements are truthy per column (or row).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Axis, skipna, and boolOnly options.
+ * @returns Boolean Series with one value per column (axis=0) or row (axis=1).
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [0, 1, 1] });
+ * allDataFrame(df); // Series { a: true, b: false }
+ * ```
+ */
+export function allDataFrame(
+ df: DataFrame,
+ options: BoolReduceDataFrameOptions = {},
+): Series {
+ const axis = resolveAxis(options.axis);
+ const skipna = options.skipna ?? true;
+ const boolOnly = options.boolOnly ?? false;
+
+ if (axis === 0) {
+ return allByColumns(df, skipna, boolOnly);
+ }
+ return allByRows(df, skipna, boolOnly);
+}
+
+/** `all` per column → result indexed by column names. */
+function allByColumns(df: DataFrame, skipna: boolean, boolOnly: boolean): Series {
+ const colNames = getRelevantColumns(df, boolOnly);
+ const result: Scalar[] = colNames.map((name) => reduceAll(df.col(name).values, skipna));
+ return new Series({ data: result, index: new Index(colNames) });
+}
+
+/** `all` per row → result indexed by row index. */
+function allByRows(df: DataFrame, skipna: boolean, boolOnly: boolean): Series {
+ const colNames = getRelevantColumns(df, boolOnly);
+ const nRows = df.index.size;
+ const result: Scalar[] = [];
+ for (let r = 0; r < nRows; r++) {
+ const row: Scalar[] = colNames.map((c) => df.col(c).values[r] ?? null);
+ result.push(reduceAll(row, skipna));
+ }
+ return new Series({ data: result, index: df.index });
+}
+
+// ─── shared helpers ───────────────────────────────────────────────────────────
+
+/** Filter column names to only boolean columns when boolOnly=true. */
+function getRelevantColumns(df: DataFrame, boolOnly: boolean): string[] {
+ const colNames = [...df.columns.values];
+ if (!boolOnly) {
+ return colNames;
+ }
+ return colNames.filter((name) => df.col(name).dtype.kind === "bool");
+}
diff --git a/src/stats/sample.ts b/src/stats/sample.ts
index 80704eed..f75659c3 100644
--- a/src/stats/sample.ts
+++ b/src/stats/sample.ts
@@ -89,10 +89,10 @@ export interface SampleDataFrameOptions {
readonly ignoreIndex?: boolean;
/**
* Axis to sample along.
- * - `0` (default): sample rows.
- * - `1`: sample columns.
+ * - `0` or `"index"` (default): sample rows.
+ * - `1` or `"columns"`: sample columns.
*/
- readonly axis?: 0 | 1;
+ readonly axis?: 0 | 1 | "index" | "columns";
}
// ─── pseudo-random number generator ──────────────────────────────────────────
@@ -102,8 +102,18 @@ class Rng {
private _state: number;
constructor(seed: number) {
- // Ensure non-zero starting state.
- this._state = seed >>> 0 || 0xdeadbeef;
+ // Mix the seed so that small sequential seeds (0, 1, 2, …) produce
+ // well-distributed starting states. Without mixing, XOR-shift produces
+ // near-zero outputs for seeds like 1, 2, 3 because the internal state
+ // stays small for the first few steps.
+ let s = seed >>> 0 || 0xdeadbeef;
+ // Wang hash to spread bits before the first step.
+ s = (s ^ 61 ^ (s >>> 16)) >>> 0;
+ s = (s + (s << 3)) >>> 0;
+ s = (s ^ (s >>> 4)) >>> 0;
+ s = Math.imul(s, 0x27d4eb2d) >>> 0;
+ s = (s ^ (s >>> 15)) >>> 0;
+ this._state = s || 0xdeadbeef;
}
/** Returns a float in [0, 1). */
@@ -384,7 +394,7 @@ export function sampleSeries(
export function sampleDataFrame(df: DataFrame, options: SampleDataFrameOptions = {}): DataFrame {
const { n, frac, replace = false, weights, randomState, ignoreIndex = false, axis = 0 } = options;
- if (axis === 1) {
+ if (axis === 1 || axis === "columns") {
return sampleColumns(df, n, frac, replace, weights, randomState);
}
return sampleRows(df, n, frac, replace, weights, randomState, ignoreIndex);
diff --git a/src/stats/to_timedelta.ts b/src/stats/to_timedelta.ts
new file mode 100644
index 00000000..b52b482a
--- /dev/null
+++ b/src/stats/to_timedelta.ts
@@ -0,0 +1,474 @@
+/**
+ * to_timedelta — convert scalars, arrays, or Series to Timedelta objects.
+ *
+ * Mirrors `pandas.to_timedelta()`:
+ * - `toTimedelta(value, options?)` — parse a single scalar to a `Timedelta | null`
+ * - `toTimedelta(values, options?)` — parse an array to `(Timedelta | null)[]`
+ * - `toTimedelta(series, options?)` — parse a Series to `Series`
+ *
+ * Supported input types:
+ * - `Timedelta` — returned as-is
+ * - `number` — treated as a duration in the given unit (default `"ns"`)
+ * - `string` — pandas-style "1 days 02:03:04.567", ISO-8601 "P1DT2H3M4S",
+ * clock "HH:MM:SS", or human form "1h 30m 20s 500ms"
+ * - `null` / `undefined` / `NaN` — treated as missing (returns `null`)
+ *
+ * @module
+ */
+
+import { Dtype, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── top-level regex constants (biome: useTopLevelRegex) ──────────────────────
+
+/** Pandas-style: "[± ][N day[s][,]] HH:MM:SS[.fraction]" */
+const RE_PANDAS = /^(-)?(?:(\d+)\s+days?,?\s*)?(\d{1,2}):(\d{2}):(\d{2})(?:\.(\d+))?$/i;
+
+/** ISO 8601 duration: P[nD][T[nH][nM][nS]] */
+const RE_ISO =
+ /^(-)?P(?:(\d+(?:\.\d+)?)D)?(?:T(?:(\d+(?:\.\d+)?)H)?(?:(\d+(?:\.\d+)?)M)?(?:(\d+(?:\.\d+)?)S)?)?$/i;
+
+/** Human unit tokens for scanAll: "1h", "30 minutes", "2.5s" etc. */
+const RE_HUMAN_UNIT =
+ /(\d+(?:\.\d+)?)\s*(weeks?|w|days?|d|hours?|h|milliseconds?|millis?|ms|minutes?|mins?|m|seconds?|secs?|s|microseconds?|micros?|us|nanoseconds?|nanos?|ns)/gi;
+
+/** Pure integer string (no decimal). */
+const RE_INT = /^-?\d+$/;
+
+// ─── Timedelta class ───────────────────────────────────────────────────────────
+
+/**
+ * Represents a fixed duration with millisecond precision internally.
+ *
+ * Mirrors `pandas.Timedelta` (a thin wrapper around a signed millisecond count).
+ */
+export class Timedelta {
+ /** Total duration in milliseconds (may be negative). */
+ readonly totalMs: number;
+
+ constructor(ms: number) {
+ this.totalMs = ms;
+ }
+
+ /** Sign: +1 for non-negative, -1 for negative. */
+ get sign(): number {
+ return this.totalMs < 0 ? -1 : 1;
+ }
+
+ /** Absolute millisecond value. */
+ get absMs(): number {
+ return Math.abs(this.totalMs);
+ }
+
+ /** Whole days component (floor). */
+ get days(): number {
+ return Math.trunc(this.totalMs / 86_400_000);
+ }
+
+ /** Whole hours within the current day (0–23). */
+ get hours(): number {
+ return Math.trunc((this.absMs % 86_400_000) / 3_600_000);
+ }
+
+ /** Whole minutes within the current hour (0–59). */
+ get minutes(): number {
+ return Math.trunc((this.absMs % 3_600_000) / 60_000);
+ }
+
+ /** Whole seconds within the current minute (0–59). */
+ get seconds(): number {
+ return Math.trunc((this.absMs % 60_000) / 1_000);
+ }
+
+ /** Whole milliseconds within the current second (0–999). */
+ get ms(): number {
+ return Math.trunc(this.absMs % 1_000);
+ }
+
+ /** Return a new Timedelta with the absolute value. */
+ abs(): Timedelta {
+ return new Timedelta(this.absMs);
+ }
+
+ /** Add another Timedelta to this one. */
+ add(other: Timedelta): Timedelta {
+ return new Timedelta(this.totalMs + other.totalMs);
+ }
+
+ /** Subtract another Timedelta from this one. */
+ subtract(other: Timedelta): Timedelta {
+ return new Timedelta(this.totalMs - other.totalMs);
+ }
+
+ /** Multiply duration by a numeric scalar. */
+ scale(factor: number): Timedelta {
+ return new Timedelta(this.totalMs * factor);
+ }
+
+ /** Return true if this duration is less than other. */
+ lt(other: Timedelta): boolean {
+ return this.totalMs < other.totalMs;
+ }
+
+ /** Return true if this duration is greater than other. */
+ gt(other: Timedelta): boolean {
+ return this.totalMs > other.totalMs;
+ }
+
+ /** Return true if durations are equal (within 0 ms). */
+ eq(other: Timedelta): boolean {
+ return this.totalMs === other.totalMs;
+ }
+
+ /** Human-readable representation matching pandas Timedelta.__str__. */
+ toString(): string {
+ return formatTimedelta(this);
+ }
+}
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Time unit for numeric inputs. Mirrors pandas `unit` parameter. */
+export type TimedeltaUnit = "W" | "D" | "h" | "m" | "s" | "ms" | "us" | "ns";
+
+/** Error handling behaviour — mirrors pandas `errors` parameter. */
+export type TimedeltaErrors = "raise" | "coerce" | "ignore";
+
+/** Options for `toTimedelta`. */
+export interface ToTimedeltaOptions {
+ /**
+ * Unit for numeric inputs (default `"ns"`).
+ * - `"W"` — weeks
+ * - `"D"` — days
+ * - `"h"` — hours
+ * - `"m"` — minutes
+ * - `"s"` — seconds
+ * - `"ms"` — milliseconds
+ * - `"us"` — microseconds
+ * - `"ns"` — nanoseconds
+ */
+ readonly unit?: TimedeltaUnit;
+ /**
+ * Error handling (default `"raise"`).
+ * - `"raise"` — throw a `TypeError` on unparseable input
+ * - `"coerce"` — return `null` on unparseable input
+ * - `"ignore"` — return the original value unchanged
+ */
+ readonly errors?: TimedeltaErrors;
+}
+
+// ─── overloads ─────────────────────────────────────────────────────────────────
+
+/**
+ * Convert a single scalar value to a `Timedelta`.
+ *
+ * @param value - Scalar to convert.
+ * @param options - Conversion options.
+ */
+export function toTimedelta(value: Scalar, options?: ToTimedeltaOptions): Timedelta | null;
+
+/**
+ * Convert an array of scalars to an array of `Timedelta` objects.
+ *
+ * @param values - Array of scalars to convert.
+ * @param options - Conversion options.
+ */
+export function toTimedelta(
+ values: readonly Scalar[],
+ options?: ToTimedeltaOptions,
+): (Timedelta | null)[];
+
+/**
+ * Convert a `Series` of scalars to a `Series`.
+ *
+ * @param series - Series whose values are converted element-wise.
+ * @param options - Conversion options.
+ */
+export function toTimedelta(
+ series: Series,
+ options?: ToTimedeltaOptions,
+): Series;
+
+// ─── implementation ────────────────────────────────────────────────────────────
+
+export function toTimedelta(
+ input: Scalar | readonly Scalar[] | Series,
+ options: ToTimedeltaOptions = {},
+): Timedelta | null | (Timedelta | null)[] | Series {
+ if (input instanceof Series) {
+ return convertSeries(input, options);
+ }
+ if (Array.isArray(input)) {
+ return (input as readonly Scalar[]).map((v) => convertOne(v, options));
+ }
+ return convertOne(input as Scalar, options);
+}
+
+// ─── series conversion ─────────────────────────────────────────────────────────
+
+function convertSeries(s: Series, options: ToTimedeltaOptions): Series {
+ const converted = s.values.map((v) => convertOne(v, options));
+ return new Series({
+ data: converted as (Timedelta | null)[],
+ index: s.index,
+ dtype: Dtype.timedelta,
+ name: s.name,
+ });
+}
+
+// ─── scalar conversion ─────────────────────────────────────────────────────────
+
+function convertOne(value: Scalar, options: ToTimedeltaOptions): Timedelta | null {
+ const errors = options.errors ?? "raise";
+
+ if (isMissing(value)) {
+ return null;
+ }
+
+ // Passthrough: already a Timedelta
+ if ((value as unknown) instanceof Timedelta) {
+ return value as unknown as Timedelta;
+ }
+
+ if (typeof value === "number") {
+ return convertNumber(value, options);
+ }
+
+ if (typeof value === "string") {
+ return convertString(value, options);
+ }
+
+ return applyErrors(errors, value, `Cannot convert ${typeof value} to Timedelta`);
+}
+
+/** True for null / undefined / NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+// ─── numeric conversion ────────────────────────────────────────────────────────
+
+/** Convert a numeric value to Timedelta using the configured unit. */
+function convertNumber(value: number, options: ToTimedeltaOptions): Timedelta | null {
+ const unit = options.unit ?? "ns";
+ const ms = unitToMs(value, unit);
+ if (!Number.isFinite(ms)) {
+ return applyErrors(
+ options.errors ?? "raise",
+ value as unknown as Scalar,
+ `Invalid numeric timedelta: ${value}`,
+ );
+ }
+ return new Timedelta(ms);
+}
+
+/** Scale a value from the given unit to milliseconds. */
+function unitToMs(value: number, unit: TimedeltaUnit): number {
+ if (unit === "W") {
+ return value * 7 * 86_400_000;
+ }
+ if (unit === "D") {
+ return value * 86_400_000;
+ }
+ if (unit === "h") {
+ return value * 3_600_000;
+ }
+ if (unit === "m") {
+ return value * 60_000;
+ }
+ if (unit === "s") {
+ return value * 1_000;
+ }
+ if (unit === "ms") {
+ return value;
+ }
+ if (unit === "us") {
+ return value / 1_000;
+ }
+ // ns
+ return value / 1_000_000;
+}
+
+// ─── string conversion ─────────────────────────────────────────────────────────
+
+/** Parse a string representation of a duration. */
+function convertString(value: string, options: ToTimedeltaOptions): Timedelta | null {
+ const errors = options.errors ?? "raise";
+ const trimmed = value.trim();
+
+ if (RE_INT.test(trimmed)) {
+ return convertNumber(Number(trimmed), options);
+ }
+
+ const td = tryParseString(trimmed);
+ if (td !== null) {
+ return td;
+ }
+
+ return applyErrors(errors, value as unknown as Scalar, `Cannot parse "${value}" as Timedelta`);
+}
+
+/** Try all known string formats; return a Timedelta or null on no match. */
+function tryParseString(value: string): Timedelta | null {
+ const pandas = RE_PANDAS.exec(value);
+ if (pandas !== null) {
+ return parsePandas(pandas);
+ }
+
+ const iso = RE_ISO.exec(value);
+ if (iso !== null) {
+ return parseIso(iso);
+ }
+
+ return parseHuman(value);
+}
+
+// ─── pandas-format parser ──────────────────────────────────────────────────────
+
+/**
+ * Parse pandas-style duration string.
+ * Examples: "1 days 02:03:04", "-1 days +22:30:00", "0:05:00.500000"
+ */
+function parsePandas(m: RegExpExecArray): Timedelta | null {
+ const neg = m[1] === "-";
+ const days = m[2] !== undefined ? Number(m[2]) : 0;
+ const hours = Number(m[3]);
+ const minutes = Number(m[4]);
+ const seconds = Number(m[5]);
+ const frac = m[6] !== undefined ? parseFrac(m[6]) : 0;
+
+ let ms = days * 86_400_000 + hours * 3_600_000 + minutes * 60_000 + seconds * 1_000 + frac;
+ if (neg) {
+ ms = -ms;
+ }
+ return new Timedelta(ms);
+}
+
+// ─── ISO 8601 parser ───────────────────────────────────────────────────────────
+
+/** Parse ISO 8601 duration: P1DT2H3M4.5S */
+function parseIso(m: RegExpExecArray): Timedelta | null {
+ // Reject bare "P" with no components
+ if (m[2] === undefined && m[3] === undefined && m[4] === undefined && m[5] === undefined) {
+ return null;
+ }
+ const neg = m[1] === "-";
+ const days = m[2] !== undefined ? Number(m[2]) : 0;
+ const hours = m[3] !== undefined ? Number(m[3]) : 0;
+ const minutes = m[4] !== undefined ? Number(m[4]) : 0;
+ const seconds = m[5] !== undefined ? Number(m[5]) : 0;
+
+ let ms = days * 86_400_000 + hours * 3_600_000 + minutes * 60_000 + seconds * 1_000;
+ if (neg) {
+ ms = -ms;
+ }
+ return new Timedelta(ms);
+}
+
+// ─── human-readable parser ─────────────────────────────────────────────────────
+
+/** Parse human-readable form: "1h 30m 20s 500ms". Returns null if nothing matched. */
+function parseHuman(value: string): Timedelta | null {
+ let totalMs = 0;
+ let matched = false;
+
+ for (const match of value.matchAll(RE_HUMAN_UNIT)) {
+ matched = true;
+ const qty = Number(match[1]);
+ const unit = (match[2] ?? "").toLowerCase();
+ totalMs += humanUnitToMs(qty, unit);
+ }
+
+ return matched ? new Timedelta(totalMs) : null;
+}
+
+/** Map a human unit token to milliseconds. */
+function humanUnitToMs(qty: number, unit: string): number {
+ if (unit === "w" || unit.startsWith("week")) {
+ return qty * 7 * 86_400_000;
+ }
+ if (unit === "d" || unit.startsWith("day")) {
+ return qty * 86_400_000;
+ }
+ if (unit === "h" || unit.startsWith("hour")) {
+ return qty * 3_600_000;
+ }
+ if (unit === "m" || unit.startsWith("min")) {
+ return qty * 60_000;
+ }
+ if (unit === "s" || unit.startsWith("sec")) {
+ return qty * 1_000;
+ }
+ if (unit === "us" || unit.startsWith("micro")) {
+ return qty / 1_000;
+ }
+ if (unit === "ns" || unit.startsWith("nano")) {
+ return qty / 1_000_000;
+ }
+ // ms / milli
+ return qty;
+}
+
+// ─── helpers ───────────────────────────────────────────────────────────────────
+
+/**
+ * Parse a fractional-seconds string to milliseconds.
+ * Pads or truncates to 9 digits (nanoseconds), then divides by 1e6 to get ms.
+ * E.g. "5" → "500000000" / 1e6 = 500 ms
+ * "500000" → "500000000" / 1e6 = 500 ms
+ * "123456789" → 123.456789 ms
+ */
+export function parseFrac(s: string): number {
+ const padded = s.padEnd(9, "0").slice(0, 9);
+ return Number(padded) / 1_000_000;
+}
+
+/**
+ * Format a Timedelta as a human-readable string matching pandas' output.
+ * E.g. "0 days 01:30:00", "-1 days +22:30:00.500000"
+ */
+export function formatTimedelta(td: Timedelta): string {
+ const neg = td.totalMs < 0;
+ const absMs = Math.abs(td.totalMs);
+
+ const days = Math.floor(absMs / 86_400_000);
+ const remMs = absMs - days * 86_400_000;
+ const hours = Math.floor(remMs / 3_600_000);
+ const minutes = Math.floor((remMs % 3_600_000) / 60_000);
+ const seconds = Math.floor((remMs % 60_000) / 1_000);
+ const fracMs = remMs % 1_000;
+
+ const hh = String(hours).padStart(2, "0");
+ const mm = String(minutes).padStart(2, "0");
+ const ss = String(seconds).padStart(2, "0");
+ const frac = fracMs > 0 ? `.${String(Math.round(fracMs * 1000)).padStart(6, "0")}` : "";
+ const clock = `${hh}:${mm}:${ss}${frac}`;
+
+ if (neg) {
+ // pandas: "-1 days +HH:MM:SS" style for negative durations
+ const negDays = -(days + 1);
+ return `${negDays} days +${clock}`;
+ }
+
+ const dayLabel = days === 1 ? "1 day" : `${days} days`;
+ return `${dayLabel} ${clock}`;
+}
+
+// ─── error handler ─────────────────────────────────────────────────────────────
+
+/**
+ * Apply errors-handling policy.
+ * - `"raise"` → throws TypeError
+ * - `"coerce"` → returns null
+ * - `"ignore"` → returns original value unchanged
+ */
+function applyErrors(errors: TimedeltaErrors, original: Scalar, message: string): Timedelta | null {
+ if (errors === "raise") {
+ throw new TypeError(message);
+ }
+ if (errors === "coerce") {
+ return null;
+ }
+ // errors === "ignore"
+ return original as unknown as Timedelta;
+}
diff --git a/src/types.ts b/src/types.ts
index 34916738..4b858bf7 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -7,7 +7,12 @@
*/
/** Scalar value types — the atomic units of data in tsb. */
-export type Scalar = number | string | boolean | bigint | null | undefined | Date;
+export type Scalar = number | string | boolean | bigint | null | undefined | Date | TimedeltaLike;
+
+/** Timedelta-like object: any value representing a temporal duration (has totalMs). */
+export interface TimedeltaLike {
+ readonly totalMs: number;
+}
/** A label used to identify rows or columns (similar to pandas Index). */
export type Label = number | string | boolean | null;
diff --git a/tests/core/sample.test.ts b/tests/core/sample.test.ts
new file mode 100644
index 00000000..189fa050
--- /dev/null
+++ b/tests/core/sample.test.ts
@@ -0,0 +1,202 @@
+/**
+ * Tests for core/sample.ts
+ */
+
+import { describe, expect, test } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import { sampleDataFrame, sampleSeries } from "../../src/index.ts";
+
+// ─── sampleSeries ──────────────────────────────────────────────────────────────
+
+describe("sampleSeries", () => {
+ test("returns correct number of items (n)", () => {
+ const s = new Series({ data: [10, 20, 30, 40, 50] });
+ const r = sampleSeries(s, { n: 3, randomState: 1 });
+ expect(r.values.length).toBe(3);
+ });
+
+ test("frac=0.4 on length-5 returns 2 items", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const r = sampleSeries(s, { frac: 0.4, randomState: 0 });
+ expect(r.values.length).toBe(2);
+ });
+
+ test("n=1 is default", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const r = sampleSeries(s, { randomState: 0 });
+ expect(r.values.length).toBe(1);
+ });
+
+ test("replace=false: no repeated items (small pool)", () => {
+ const s = new Series({ data: [10, 20, 30] });
+ const r = sampleSeries(s, { n: 3, replace: false, randomState: 42 });
+ const vals = [...r.values] as number[];
+ expect(new Set(vals).size).toBe(3);
+ expect(vals.sort((a, b) => a - b)).toEqual([10, 20, 30]);
+ });
+
+ test("replace=true can repeat items", () => {
+ // By using a tiny pool and large n, repetitions are guaranteed
+ const s = new Series({ data: [7] });
+ const r = sampleSeries(s, { n: 5, replace: true, randomState: 0 });
+ expect(r.values.length).toBe(5);
+ expect(r.values.every((v) => v === 7)).toBe(true);
+ });
+
+ test("deterministic with randomState", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const r1 = sampleSeries(s, { n: 3, randomState: 99 });
+ const r2 = sampleSeries(s, { n: 3, randomState: 99 });
+ expect(r1.values).toEqual(r2.values);
+ });
+
+ test("different seeds give potentially different results", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+ const r1 = sampleSeries(s, { n: 5, randomState: 1 });
+ const r2 = sampleSeries(s, { n: 5, randomState: 2 });
+ // Not guaranteed but overwhelmingly likely for 10-choose-5
+ expect(r1.values).not.toEqual(r2.values);
+ });
+
+ test("weighted sample: high-weight item is selected more often", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ // Weight 3 heavily on index 2 (value=3)
+ let countOf3 = 0;
+ for (let seed = 0; seed < 20; seed++) {
+ const r = sampleSeries(s, { n: 1, weights: [0.01, 0.01, 0.98], randomState: seed });
+ if (r.values[0] === 3) {
+ countOf3 += 1;
+ }
+ }
+ expect(countOf3).toBeGreaterThan(10);
+ });
+
+ test("throws when n > poolSize and replace=false", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(() => sampleSeries(s, { n: 5 })).toThrow();
+ });
+
+ test("throws when n and frac both specified", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(() => sampleSeries(s, { n: 1, frac: 0.5 })).toThrow();
+ });
+
+ test("n=0 returns empty Series", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const r = sampleSeries(s, { n: 0 });
+ expect(r.values.length).toBe(0);
+ });
+
+ test("sampled values are all from original Series", () => {
+ const s = new Series({ data: [10, 20, 30, 40, 50] });
+ const original = new Set(s.values as number[]);
+ const r = sampleSeries(s, { n: 4, randomState: 7 });
+ for (const v of r.values) {
+ expect(original.has(v as number)).toBe(true);
+ }
+ });
+
+ test("preserves correct index labels", () => {
+ const s = new Series({ data: [100, 200, 300], index: ["a", "b", "c"] });
+ const r = sampleSeries(s, { n: 2, randomState: 0 });
+ // Index labels should match the positions sampled
+ for (let i = 0; i < r.values.length; i++) {
+ const v = r.values[i] as number;
+ const label = r.index.at(i);
+ const origPos = (v - 100) / 100; // 0, 1, or 2
+ const expectedLabel: string | null = ["a", "b", "c"][origPos] ?? null;
+ expect(label).toBe(expectedLabel);
+ }
+ });
+
+ test("property: result length is always n", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer(), { minLength: 1, maxLength: 20 }),
+ fc.nat({ max: 5 }),
+ (arr, n) => {
+ const s = new Series({ data: arr });
+ const safeN = Math.min(n, arr.length);
+ const r = sampleSeries(s, { n: safeN, randomState: 0 });
+ expect(r.values.length).toBe(safeN);
+ },
+ ),
+ );
+ });
+
+ test("property: without replacement, no repeated index positions", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 100 }), { minLength: 3, maxLength: 20 }),
+ fc.integer({ min: 1, max: 3 }),
+ (arr, n) => {
+ const s = new Series({ data: arr });
+ const r = sampleSeries(s, { n, replace: false, randomState: 42 });
+ // Check no index label is repeated (each row label unique since RangeIndex)
+ const labels = Array.from({ length: r.index.size }, (_, i) => r.index.at(i));
+ expect(new Set(labels).size).toBe(labels.length);
+ },
+ ),
+ );
+ });
+});
+
+// ─── sampleDataFrame ──────────────────────────────────────────────────────────
+
+describe("sampleDataFrame", () => {
+ test("sample rows (axis=0)", () => {
+ const df = DataFrame.fromRecords([{ a: 1 }, { a: 2 }, { a: 3 }, { a: 4 }]);
+ const r = sampleDataFrame(df, { n: 2, randomState: 0 });
+ expect(r.shape[0]).toBe(2);
+ expect(r.shape[1]).toBe(1);
+ });
+
+ test("sample columns (axis=1)", () => {
+ const df = DataFrame.fromColumns({ x: [1, 2], y: [3, 4], z: [5, 6] });
+ const r = sampleDataFrame(df, { n: 2, axis: 1, randomState: 0 });
+ expect(r.shape[1]).toBe(2);
+ expect(r.shape[0]).toBe(2);
+ });
+
+ test("frac sampling", () => {
+ const df = DataFrame.fromRecords([{ a: 1 }, { a: 2 }, { a: 3 }, { a: 4 }]);
+ const r = sampleDataFrame(df, { frac: 0.5, randomState: 0 });
+ expect(r.shape[0]).toBe(2);
+ });
+
+ test("replace=true allows row repetition", () => {
+ const df = DataFrame.fromRecords([{ a: 99 }]);
+ const r = sampleDataFrame(df, { n: 3, replace: true, randomState: 0 });
+ expect(r.shape[0]).toBe(3);
+ expect(r.col("a").values.every((v) => v === 99)).toBe(true);
+ });
+
+ test("deterministic with randomState", () => {
+ const df = DataFrame.fromRecords([{ a: 1 }, { a: 2 }, { a: 3 }, { a: 4 }, { a: 5 }]);
+ const r1 = sampleDataFrame(df, { n: 3, randomState: 5 });
+ const r2 = sampleDataFrame(df, { n: 3, randomState: 5 });
+ expect(r1.col("a").values).toEqual(r2.col("a").values);
+ });
+
+ test("sampled rows contain values from original", () => {
+ const df = DataFrame.fromRecords([{ a: 10 }, { a: 20 }, { a: 30 }]);
+ const allowed = new Set([10, 20, 30]);
+ const r = sampleDataFrame(df, { n: 2, randomState: 0 });
+ for (const v of r.col("a").values) {
+ expect(allowed.has(v as number)).toBe(true);
+ }
+ });
+
+ test("all columns preserved when sampling rows", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ const r = sampleDataFrame(df, { n: 2, randomState: 0 });
+ expect(r.columns.values).toEqual(["a", "b"]);
+ });
+
+ test("axis='columns' string form", () => {
+ const df = DataFrame.fromColumns({ x: [1, 2], y: [3, 4], z: [5, 6] });
+ const r = sampleDataFrame(df, { n: 1, axis: "columns", randomState: 0 });
+ expect(r.shape[1]).toBe(1);
+ });
+});
diff --git a/tests/reshape/explode.test.ts b/tests/reshape/explode.test.ts
new file mode 100644
index 00000000..0ffdfa96
--- /dev/null
+++ b/tests/reshape/explode.test.ts
@@ -0,0 +1,341 @@
+/**
+ * Tests for src/reshape/explode.ts — explode list-valued cells into rows.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, type Scalar, Series } from "../../src/index.ts";
+import { explodeDataFrame, explodeSeries } from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function seriesValues(s: Series): Scalar[] {
+ return [...s.values];
+}
+
+function seriesLabels(s: Series): unknown[] {
+ return [...s.index.values];
+}
+
+function colValues(df: DataFrame, col: string): Scalar[] {
+ return [...df.col(col).values];
+}
+
+function dfLabels(df: DataFrame): unknown[] {
+ return [...df.index.values];
+}
+
+// ─── explodeSeries ─────────────────────────────────────────────────────────────
+
+describe("explodeSeries", () => {
+ describe("basic list expansion", () => {
+ it("expands array-valued cells into individual rows", () => {
+ const s = new Series({
+ data: [
+ [1, 2, 3],
+ [4, 5],
+ ] as unknown as Scalar[],
+ name: "x",
+ });
+ expect(seriesValues(explodeSeries(s))).toEqual([1, 2, 3, 4, 5]);
+ });
+
+ it("passes scalar values through unchanged", () => {
+ const s = new Series({ data: [1, 2, 3] as Scalar[], name: "x" });
+ expect(seriesValues(explodeSeries(s))).toEqual([1, 2, 3]);
+ });
+
+ it("mixed arrays and scalars", () => {
+ const s = new Series({
+ data: [[10, 20], 30, [40]] as unknown as Scalar[],
+ name: "x",
+ });
+ expect(seriesValues(explodeSeries(s))).toEqual([10, 20, 30, 40]);
+ });
+
+ it("null value produces single null row", () => {
+ const s = new Series({ data: [null, 1] as Scalar[], name: "x" });
+ expect(seriesValues(explodeSeries(s))).toEqual([null, 1]);
+ });
+
+ it("empty array produces single null row", () => {
+ const s = new Series({
+ data: [[], [1, 2]] as unknown as Scalar[],
+ name: "x",
+ });
+ expect(seriesValues(explodeSeries(s))).toEqual([null, 1, 2]);
+ });
+
+ it("preserves series name", () => {
+ const s = new Series({ data: [[1, 2]] as unknown as Scalar[], name: "myname" });
+ expect(explodeSeries(s).name).toBe("myname");
+ });
+ });
+
+ describe("index handling", () => {
+ it("duplicates labels by default (ignoreIndex=false)", () => {
+ const s = new Series({
+ data: [[1, 2], [3]] as unknown as Scalar[],
+ index: ["a", "b"],
+ name: "x",
+ });
+ const result = explodeSeries(s);
+ expect(seriesLabels(result)).toEqual(["a", "a", "b"]);
+ });
+
+ it("resets to RangeIndex when ignoreIndex=true", () => {
+ const s = new Series({
+ data: [[1, 2], [3]] as unknown as Scalar[],
+ index: ["a", "b"],
+ name: "x",
+ });
+ const result = explodeSeries(s, { ignoreIndex: true });
+ expect(seriesLabels(result)).toEqual([0, 1, 2]);
+ });
+
+ it("preserves numeric labels", () => {
+ const s = new Series({
+ data: [
+ [10, 20],
+ [30, 40],
+ ] as unknown as Scalar[],
+ index: [100, 200],
+ name: "x",
+ });
+ const result = explodeSeries(s);
+ expect(seriesLabels(result)).toEqual([100, 100, 200, 200]);
+ });
+ });
+
+ describe("edge cases", () => {
+ it("empty series returns empty series", () => {
+ const s = new Series({ data: [], name: "x" });
+ const result = explodeSeries(s);
+ expect(result.values.length).toBe(0);
+ });
+
+ it("single-element arrays expand correctly", () => {
+ const s = new Series({
+ data: [[42], [99]] as unknown as Scalar[],
+ name: "x",
+ });
+ expect(seriesValues(explodeSeries(s))).toEqual([42, 99]);
+ });
+
+ it("all null values", () => {
+ const s = new Series({ data: [null, null] as Scalar[], name: "x" });
+ expect(seriesValues(explodeSeries(s))).toEqual([null, null]);
+ });
+ });
+});
+
+// ─── explodeDataFrame ──────────────────────────────────────────────────────────
+
+describe("explodeDataFrame", () => {
+ describe("single column explosion", () => {
+ it("explodes one column, repeats other columns", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 2] as Scalar[],
+ b: [[10, 20], [30]] as unknown as Scalar[],
+ });
+ const result = explodeDataFrame(df, "b");
+ expect(result.shape[0]).toBe(3);
+ expect(colValues(result, "a")).toEqual([1, 1, 2]);
+ expect(colValues(result, "b")).toEqual([10, 20, 30]);
+ });
+
+ it("handles scalar values in explode column", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 2, 3] as Scalar[],
+ b: [10, 20, 30] as Scalar[],
+ });
+ const result = explodeDataFrame(df, "b");
+ expect(result.shape[0]).toBe(3);
+ expect(colValues(result, "b")).toEqual([10, 20, 30]);
+ });
+
+ it("null in explode column → null row", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 2] as Scalar[],
+ b: [null, [3, 4]] as unknown as Scalar[],
+ });
+ const result = explodeDataFrame(df, "b");
+ expect(result.shape[0]).toBe(3);
+ expect(colValues(result, "b")).toEqual([null, 3, 4]);
+ expect(colValues(result, "a")).toEqual([1, 2, 2]);
+ });
+
+ it("empty array in explode column → null row", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 2] as Scalar[],
+ b: [[], [5, 6]] as unknown as Scalar[],
+ });
+ const result = explodeDataFrame(df, "b");
+ expect(colValues(result, "b")).toEqual([null, 5, 6]);
+ expect(colValues(result, "a")).toEqual([1, 2, 2]);
+ });
+
+ it("preserves column order", () => {
+ const df = DataFrame.fromColumns({
+ x: [1] as Scalar[],
+ y: [[2, 3]] as unknown as Scalar[],
+ z: [4] as Scalar[],
+ });
+ const result = explodeDataFrame(df, "y");
+ expect(result.columns.values).toEqual(["x", "y", "z"]);
+ });
+ });
+
+ describe("multi-column explosion", () => {
+ it("explodes two columns together (same-length arrays)", () => {
+ const df = DataFrame.fromColumns({
+ a: [
+ [1, 2],
+ [3, 4],
+ ] as unknown as Scalar[],
+ b: [
+ ["x", "y"],
+ ["p", "q"],
+ ] as unknown as Scalar[],
+ c: [10, 20] as Scalar[],
+ });
+ const result = explodeDataFrame(df, ["a", "b"]);
+ expect(result.shape[0]).toBe(4);
+ expect(colValues(result, "a")).toEqual([1, 2, 3, 4]);
+ expect(colValues(result, "b")).toEqual(["x", "y", "p", "q"]);
+ expect(colValues(result, "c")).toEqual([10, 10, 20, 20]);
+ });
+
+ it("pads shorter column with null on mismatched lengths", () => {
+ const df = DataFrame.fromColumns({
+ a: [[1, 2, 3]] as unknown as Scalar[],
+ b: [["x", "y"]] as unknown as Scalar[],
+ });
+ const result = explodeDataFrame(df, ["a", "b"]);
+ expect(result.shape[0]).toBe(3);
+ expect(colValues(result, "a")).toEqual([1, 2, 3]);
+ expect(colValues(result, "b")).toEqual(["x", "y", null]);
+ });
+ });
+
+ describe("index handling", () => {
+ it("duplicates row labels by default", () => {
+ const df = DataFrame.fromColumns(
+ { a: [[1, 2], [3]] as unknown as Scalar[] },
+ { index: ["r0", "r1"] },
+ );
+ const result = explodeDataFrame(df, "a");
+ expect(dfLabels(result)).toEqual(["r0", "r0", "r1"]);
+ });
+
+ it("resets to RangeIndex when ignoreIndex=true", () => {
+ const df = DataFrame.fromColumns(
+ { a: [[1, 2], [3]] as unknown as Scalar[] },
+ { index: ["r0", "r1"] },
+ );
+ const result = explodeDataFrame(df, "a", { ignoreIndex: true });
+ expect(dfLabels(result)).toEqual([0, 1, 2]);
+ });
+ });
+
+ describe("error handling", () => {
+ it("throws when column does not exist", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] as Scalar[] });
+ expect(() => explodeDataFrame(df, "missing")).toThrow("Column 'missing' not found");
+ });
+ });
+
+ describe("edge cases", () => {
+ it("empty DataFrame returns empty DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [] as Scalar[] });
+ const result = explodeDataFrame(df, "a");
+ expect(result.shape[0]).toBe(0);
+ });
+
+ it("single column DataFrame", () => {
+ const df = DataFrame.fromColumns({ x: [[1, 2, 3]] as unknown as Scalar[] });
+ const result = explodeDataFrame(df, "x");
+ expect(colValues(result, "x")).toEqual([1, 2, 3]);
+ });
+
+ it("multiple rows with varying list lengths", () => {
+ const df = DataFrame.fromColumns({
+ id: [1, 2, 3] as Scalar[],
+ vals: [[1], [2, 3], [4, 5, 6]] as unknown as Scalar[],
+ });
+ const result = explodeDataFrame(df, "vals");
+ expect(result.shape[0]).toBe(6);
+ expect(colValues(result, "id")).toEqual([1, 2, 2, 3, 3, 3]);
+ expect(colValues(result, "vals")).toEqual([1, 2, 3, 4, 5, 6]);
+ });
+ });
+});
+
+// ─── property-based tests ──────────────────────────────────────────────────────
+
+describe("explodeSeries — property tests", () => {
+ it("total output length equals sum of list lengths (scalars count as 1)", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.oneof(fc.integer(), fc.array(fc.integer(), { minLength: 1, maxLength: 5 })), {
+ minLength: 1,
+ maxLength: 20,
+ }),
+ (items) => {
+ const data = items as unknown as Scalar[];
+ const s = new Series({ data, name: "test" });
+ const result = explodeSeries(s);
+ const expectedLen = items.reduce((sum: number, v) => {
+ if (Array.isArray(v)) return sum + (v as unknown[]).length;
+ return sum + 1;
+ }, 0);
+ return result.values.length === expectedLen;
+ },
+ ),
+ );
+ });
+
+ it("ignore_index produces RangeIndex 0..n-1", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.oneof(fc.integer(), fc.array(fc.integer(), { minLength: 1, maxLength: 3 })), {
+ minLength: 0,
+ maxLength: 10,
+ }),
+ (items) => {
+ const s = new Series({ data: items as unknown as Scalar[], name: "t" });
+ const result = explodeSeries(s, { ignoreIndex: true });
+ const labels = result.index.values as unknown[];
+ return labels.every((v, i) => v === i);
+ },
+ ),
+ );
+ });
+});
+
+describe("explodeDataFrame — property tests", () => {
+ it("non-exploded columns repeat values correctly", () => {
+ fc.assert(
+ fc.property(
+ fc.array(
+ fc.record({
+ id: fc.integer({ min: 0, max: 100 }),
+ vals: fc.array(fc.integer(), { minLength: 1, maxLength: 4 }),
+ }),
+ { minLength: 1, maxLength: 10 },
+ ),
+ (rows) => {
+ const df = DataFrame.fromColumns({
+ id: rows.map((r) => r.id) as Scalar[],
+ vals: rows.map((r) => r.vals) as unknown as Scalar[],
+ });
+ const result = explodeDataFrame(df, "vals");
+ // Each id value should repeat as many times as the corresponding vals array length
+ const expectedIds: number[] = rows.flatMap((r) => r.vals.map(() => r.id));
+ const actualIds = colValues(result, "id") as number[];
+ return actualIds.every((v, i) => v === expectedIds[i]);
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/date_range.test.ts b/tests/stats/date_range.test.ts
new file mode 100644
index 00000000..76490486
--- /dev/null
+++ b/tests/stats/date_range.test.ts
@@ -0,0 +1,465 @@
+/**
+ * Tests for stats/date_range — generate fixed-frequency Date sequences.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { advanceDate, dateRange, parseFreq } from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function utc(y: number, mo: number, d: number, h = 0, min = 0, s = 0): Date {
+ return new Date(Date.UTC(y, mo - 1, d, h, min, s));
+}
+
+function ms(d: Date): number {
+ return d.getTime();
+}
+
+// ─── parseFreq ────────────────────────────────────────────────────────────────
+
+describe("parseFreq", () => {
+ it("parses plain 'D'", () => {
+ const pf = parseFreq("D");
+ expect(pf.n).toBe(1);
+ expect(pf.unit).toBe("D");
+ expect(pf.anchor).toBe(0);
+ });
+
+ it("parses '2D'", () => {
+ const pf = parseFreq("2D");
+ expect(pf.n).toBe(2);
+ expect(pf.unit).toBe("D");
+ });
+
+ it("normalises 'H' → 'h'", () => {
+ expect(parseFreq("H").unit).toBe("h");
+ });
+
+ it("normalises 'T' → 'min'", () => {
+ expect(parseFreq("T").unit).toBe("min");
+ });
+
+ it("normalises 'S' → 's'", () => {
+ expect(parseFreq("S").unit).toBe("s");
+ });
+
+ it("normalises 'L' → 'ms'", () => {
+ expect(parseFreq("L").unit).toBe("ms");
+ });
+
+ it("normalises 'A' → 'YE'", () => {
+ expect(parseFreq("A").unit).toBe("YE");
+ });
+
+ it("normalises 'AS' → 'YS'", () => {
+ expect(parseFreq("AS").unit).toBe("YS");
+ });
+
+ it("normalises 'Q' → 'QE'", () => {
+ expect(parseFreq("Q").unit).toBe("QE");
+ });
+
+ it("normalises 'M' → 'ME'", () => {
+ expect(parseFreq("M").unit).toBe("ME");
+ });
+
+ it("parses 'W' with default anchor = 0 (Sun)", () => {
+ const pf = parseFreq("W");
+ expect(pf.unit).toBe("W");
+ expect(pf.anchor).toBe(0);
+ });
+
+ it("parses 'W-MON' with anchor = 1", () => {
+ const pf = parseFreq("W-MON");
+ expect(pf.unit).toBe("W");
+ expect(pf.anchor).toBe(1);
+ });
+
+ it("parses 'W-FRI' with anchor = 5", () => {
+ const pf = parseFreq("W-FRI");
+ expect(pf.unit).toBe("W");
+ expect(pf.anchor).toBe(5);
+ });
+
+ it("parses '3MS'", () => {
+ const pf = parseFreq("3MS");
+ expect(pf.n).toBe(3);
+ expect(pf.unit).toBe("MS");
+ });
+
+ it("throws for unrecognised freq", () => {
+ expect(() => parseFreq("UNKNOWN")).toThrow(RangeError);
+ });
+});
+
+// ─── advanceDate ──────────────────────────────────────────────────────────────
+
+describe("advanceDate — D", () => {
+ it("advances one calendar day", () => {
+ const d = utc(2024, 1, 1);
+ expect(ms(advanceDate(d, parseFreq("D")))).toBe(ms(utc(2024, 1, 2)));
+ });
+
+ it("advances 2 days with '2D'", () => {
+ const d = utc(2024, 1, 1);
+ expect(ms(advanceDate(d, parseFreq("2D")))).toBe(ms(utc(2024, 1, 3)));
+ });
+});
+
+describe("advanceDate — h/min/s/ms", () => {
+ it("advances 1 hour", () => {
+ const d = utc(2024, 1, 1, 0, 0, 0);
+ expect(ms(advanceDate(d, parseFreq("h")))).toBe(ms(utc(2024, 1, 1, 1, 0, 0)));
+ });
+
+ it("advances 30 minutes with '30min'", () => {
+ const d = utc(2024, 1, 1, 0, 0, 0);
+ const r = advanceDate(d, parseFreq("30min"));
+ expect(ms(r)).toBe(ms(utc(2024, 1, 1, 0, 30, 0)));
+ });
+
+ it("advances 1 second", () => {
+ const d = utc(2024, 1, 1);
+ const r = advanceDate(d, parseFreq("s"));
+ expect(r.getTime() - d.getTime()).toBe(1_000);
+ });
+
+ it("advances 1 ms", () => {
+ const d = utc(2024, 1, 1);
+ const r = advanceDate(d, parseFreq("ms"));
+ expect(r.getTime() - d.getTime()).toBe(1);
+ });
+});
+
+describe("advanceDate — B (business day)", () => {
+ it("Mon → Tue", () => {
+ const d = utc(2024, 1, 1); // Monday
+ expect(ms(advanceDate(d, parseFreq("B")))).toBe(ms(utc(2024, 1, 2)));
+ });
+
+ it("Fri → Mon (skips weekend)", () => {
+ const d = utc(2024, 1, 5); // Friday
+ expect(ms(advanceDate(d, parseFreq("B")))).toBe(ms(utc(2024, 1, 8)));
+ });
+});
+
+describe("advanceDate — MS/ME", () => {
+ it("MS advances to next month-start", () => {
+ const d = utc(2024, 1, 1);
+ expect(ms(advanceDate(d, parseFreq("MS")))).toBe(ms(utc(2024, 2, 1)));
+ });
+
+ it("ME advances to next month-end", () => {
+ const d = utc(2024, 1, 31);
+ expect(ms(advanceDate(d, parseFreq("ME")))).toBe(ms(utc(2024, 2, 29))); // 2024 leap year
+ });
+});
+
+describe("advanceDate — QS/QE", () => {
+ it("QS from Jan goes to Apr 1", () => {
+ const d = utc(2024, 1, 1);
+ expect(ms(advanceDate(d, parseFreq("QS")))).toBe(ms(utc(2024, 4, 1)));
+ });
+
+ it("QE from Jan goes to Mar 31", () => {
+ const d = utc(2024, 1, 1);
+ expect(ms(advanceDate(d, parseFreq("QE")))).toBe(ms(utc(2024, 3, 31)));
+ });
+});
+
+describe("advanceDate — YS/YE", () => {
+ it("YS advances to Jan 1 next year", () => {
+ const d = utc(2024, 6, 15);
+ expect(ms(advanceDate(d, parseFreq("YS")))).toBe(ms(utc(2025, 1, 1)));
+ });
+
+ it("YE advances to Dec 31 next year", () => {
+ const d = utc(2024, 6, 15);
+ expect(ms(advanceDate(d, parseFreq("YE")))).toBe(ms(utc(2025, 12, 31)));
+ });
+});
+
+// ─── dateRange — basic daily ──────────────────────────────────────────────────
+
+describe("dateRange — daily", () => {
+ it("start + periods = 5 gives 5 dates", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 5 });
+ expect(r).toHaveLength(5);
+ expect(ms(r[0] as Date)).toBe(ms(utc(2024, 1, 1)));
+ expect(ms(r[4] as Date)).toBe(ms(utc(2024, 1, 5)));
+ });
+
+ it("start + end covers the range", () => {
+ const r = dateRange({ start: "2024-01-01", end: "2024-01-05" });
+ expect(r).toHaveLength(5);
+ });
+
+ it("consecutive dates are 1 day apart", () => {
+ const r = dateRange({ start: "2024-03-28", periods: 5 });
+ for (let i = 1; i < r.length; i++) {
+ expect((r[i] as Date).getTime() - (r[i - 1] as Date).getTime()).toBe(86_400_000);
+ }
+ });
+
+ it("end + periods gives correct start", () => {
+ const r = dateRange({ end: "2024-01-05", periods: 5 });
+ expect(r).toHaveLength(5);
+ expect(ms(r[0] as Date)).toBe(ms(utc(2024, 1, 1)));
+ expect(ms(r[4] as Date)).toBe(ms(utc(2024, 1, 5)));
+ });
+});
+
+// ─── dateRange — inclusive ────────────────────────────────────────────────────
+
+describe("dateRange — inclusive", () => {
+ it("'both' includes start and end", () => {
+ const r = dateRange({ start: "2024-01-01", end: "2024-01-03", inclusive: "both" });
+ expect(r).toHaveLength(3);
+ });
+
+ it("'neither' excludes start and end", () => {
+ const r = dateRange({ start: "2024-01-01", end: "2024-01-05", inclusive: "neither" });
+ expect(r).toHaveLength(3);
+ expect(ms(r[0] as Date)).toBe(ms(utc(2024, 1, 2)));
+ expect(ms(r[2] as Date)).toBe(ms(utc(2024, 1, 4)));
+ });
+
+ it("'left' excludes end only", () => {
+ const r = dateRange({ start: "2024-01-01", end: "2024-01-04", inclusive: "left" });
+ expect(r).toHaveLength(3);
+ expect(ms(r[2] as Date)).toBe(ms(utc(2024, 1, 3)));
+ });
+
+ it("'right' excludes start only", () => {
+ const r = dateRange({ start: "2024-01-01", end: "2024-01-04", inclusive: "right" });
+ expect(r).toHaveLength(3);
+ expect(ms(r[0] as Date)).toBe(ms(utc(2024, 1, 2)));
+ });
+});
+
+// ─── dateRange — hourly ───────────────────────────────────────────────────────
+
+describe("dateRange — hourly", () => {
+ it("6 hours from midnight", () => {
+ const r = dateRange({
+ start: "2024-01-01T00:00:00Z",
+ end: "2024-01-01T06:00:00Z",
+ freq: "h",
+ });
+ expect(r).toHaveLength(7);
+ expect(ms(r[6] as Date)).toBe(ms(utc(2024, 1, 1, 6)));
+ });
+
+ it("2-hourly gives half as many steps", () => {
+ const r = dateRange({
+ start: "2024-01-01T00:00:00Z",
+ periods: 4,
+ freq: "2H",
+ });
+ expect(r).toHaveLength(4);
+ expect((r[3] as Date).getTime() - (r[0] as Date).getTime()).toBe(3 * 2 * 3_600_000);
+ });
+});
+
+// ─── dateRange — business days ────────────────────────────────────────────────
+
+describe("dateRange — business days", () => {
+ it("5 business days starting Monday", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 5, freq: "B" }); // Mon 2024-01-01
+ expect(r).toHaveLength(5);
+ // Mon Tue Wed Thu Fri
+ expect(ms(r[4] as Date)).toBe(ms(utc(2024, 1, 5)));
+ });
+
+ it("skips weekend: Friday → Monday", () => {
+ const r = dateRange({ start: "2024-01-05", periods: 2, freq: "B" }); // Fri
+ expect(ms(r[1] as Date)).toBe(ms(utc(2024, 1, 8)));
+ });
+});
+
+// ─── dateRange — weekly ───────────────────────────────────────────────────────
+
+describe("dateRange — weekly", () => {
+ it("4 weekly dates land on Sundays", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 4, freq: "W" }); // Mon start
+ for (const d of r) {
+ expect(d.getUTCDay()).toBe(0); // Sunday
+ }
+ expect(r).toHaveLength(4);
+ });
+
+ it("W-MON lands on Mondays", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 3, freq: "W-MON" }); // Mon start
+ for (const d of r) {
+ expect(d.getUTCDay()).toBe(1);
+ }
+ });
+});
+
+// ─── dateRange — month start/end ─────────────────────────────────────────────
+
+describe("dateRange — MS", () => {
+ it("generates 6 month-starts", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 6, freq: "MS" });
+ expect(r).toHaveLength(6);
+ for (const d of r) {
+ expect(d.getUTCDate()).toBe(1);
+ }
+ expect(ms(r[5] as Date)).toBe(ms(utc(2024, 6, 1)));
+ });
+
+ it("ME: generates month-ends", () => {
+ const r = dateRange({ start: "2024-01-31", periods: 3, freq: "ME" });
+ expect(r).toHaveLength(3);
+ const days = r.map((d) => d.getUTCDate());
+ expect(days[0]).toBe(31); // Jan
+ expect(days[1]).toBe(29); // Feb (leap)
+ expect(days[2]).toBe(31); // Mar
+ });
+});
+
+// ─── dateRange — quarter start/end ───────────────────────────────────────────
+
+describe("dateRange — QS", () => {
+ it("generates 4 quarter-starts", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 4, freq: "QS" });
+ expect(r).toHaveLength(4);
+ const months = r.map((d) => d.getUTCMonth() + 1);
+ expect(months).toStrictEqual([1, 4, 7, 10]); // Jan Apr Jul Oct
+ });
+
+ it("QE: generates 4 quarter-ends", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 4, freq: "QE" });
+ const months = r.map((d) => d.getUTCMonth() + 1);
+ expect(months).toStrictEqual([3, 6, 9, 12]);
+ expect(r[0]?.getUTCDate()).toBe(31); // Mar 31
+ expect(r[1]?.getUTCDate()).toBe(30); // Jun 30
+ });
+});
+
+// ─── dateRange — year start/end ───────────────────────────────────────────────
+
+describe("dateRange — YS/YE", () => {
+ it("YS generates Jan 1 for each year", () => {
+ const r = dateRange({ start: "2024-01-01", periods: 3, freq: "YS" });
+ expect(r).toHaveLength(3);
+ const years = r.map((d) => d.getUTCFullYear());
+ expect(years).toStrictEqual([2024, 2025, 2026]);
+ });
+
+ it("YE generates Dec 31 for each year", () => {
+ const r = dateRange({ start: "2024-06-15", periods: 3, freq: "YE" });
+ for (const d of r) {
+ expect(d.getUTCMonth()).toBe(11);
+ expect(d.getUTCDate()).toBe(31);
+ }
+ });
+
+ it("A is alias for YE", () => {
+ const r1 = dateRange({ start: "2024-06-15", periods: 2, freq: "YE" });
+ const r2 = dateRange({ start: "2024-06-15", periods: 2, freq: "A" });
+ expect(r1.map(ms)).toStrictEqual(r2.map(ms));
+ });
+});
+
+// ─── dateRange — normalize ────────────────────────────────────────────────────
+
+describe("dateRange — normalize", () => {
+ it("snaps start to midnight UTC", () => {
+ const r = dateRange({
+ start: new Date("2024-01-01T15:30:00Z"),
+ periods: 3,
+ freq: "D",
+ normalize: true,
+ });
+ expect(r[0]?.getUTCHours()).toBe(0);
+ expect(r[0]?.getUTCMinutes()).toBe(0);
+ });
+});
+
+// ─── dateRange — numeric / Date inputs ───────────────────────────────────────
+
+describe("dateRange — input types", () => {
+ it("accepts Date objects", () => {
+ const start = new Date("2024-01-01T00:00:00Z");
+ const r = dateRange({ start, periods: 3 });
+ expect(r).toHaveLength(3);
+ });
+
+ it("accepts numeric timestamps", () => {
+ const ts = Date.UTC(2024, 0, 1);
+ const r = dateRange({ start: ts, periods: 3 });
+ expect(r).toHaveLength(3);
+ expect(ms(r[0] as Date)).toBe(ts);
+ });
+});
+
+// ─── dateRange — error cases ─────────────────────────────────────────────────
+
+describe("dateRange — errors", () => {
+ it("throws if fewer than two of start/end/periods are given", () => {
+ expect(() => dateRange({ start: "2024-01-01" })).toThrow(RangeError);
+ });
+
+ it("throws for bad frequency", () => {
+ expect(() => dateRange({ start: "2024-01-01", periods: 3, freq: "BOGUS" })).toThrow(RangeError);
+ });
+
+ it("returns empty array when start > end", () => {
+ const r = dateRange({ start: "2024-01-10", end: "2024-01-05" });
+ expect(r).toHaveLength(0);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("dateRange — property tests", () => {
+ it("length equals periods for start+periods", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: 1, max: 50 }), (periods) => {
+ const r = dateRange({ start: "2024-01-01", periods });
+ return r.length === periods;
+ }),
+ );
+ });
+
+ it("all dates are strictly ascending", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: 2, max: 20 }), (periods) => {
+ const r = dateRange({ start: "2024-01-01", periods, freq: "D" });
+ for (let i = 1; i < r.length; i++) {
+ if ((r[i] as Date).getTime() <= (r[i - 1] as Date).getTime()) {
+ return false;
+ }
+ }
+ return true;
+ }),
+ );
+ });
+
+ it("end + periods recovers start correctly (daily)", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: 2, max: 30 }), (periods) => {
+ const end = "2024-06-30";
+ const r = dateRange({ end, periods, freq: "D" });
+ const expected = dateRange({ start: r[0] as Date, periods, freq: "D" });
+ return r.map(ms).join() === expected.map(ms).join();
+ }),
+ );
+ });
+
+ it("start+end length ≥ start+periods for same step count", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: 1, max: 20 }), (periods) => {
+ const r = dateRange({ start: "2024-01-01", periods, freq: "D" });
+ const last = r.at(-1);
+ if (last === undefined) {
+ return true;
+ }
+ const r2 = dateRange({ start: "2024-01-01", end: last, freq: "D" });
+ return r2.length === r.length;
+ }),
+ );
+ });
+});
diff --git a/tests/stats/diff_shift.test.ts b/tests/stats/diff_shift.test.ts
new file mode 100644
index 00000000..c713c772
--- /dev/null
+++ b/tests/stats/diff_shift.test.ts
@@ -0,0 +1,325 @@
+/**
+ * Tests for stats/diff_shift.ts
+ *
+ * Covers:
+ * - diffSeries: default (periods=1), custom periods, negative periods, non-numeric passthrough
+ * - shiftSeries: forward, backward, custom fillValue
+ * - diffDataFrame: axis=0 (col-wise), axis=1 (row-wise)
+ * - shiftDataFrame: axis=0 (col-wise), axis=1 (row-wise)
+ * - Property-based tests with fast-check
+ */
+
+import { describe, expect, test } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ diffDataFrame,
+ diffSeries,
+ shiftDataFrame,
+ shiftSeries,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeSeries(data: Scalar[], name?: string): Series {
+ return new Series({ data, name: name ?? "s" });
+}
+
+// ─── diffSeries ───────────────────────────────────────────────────────────────
+
+describe("diffSeries", () => {
+ test("default periods=1", () => {
+ const s = makeSeries([1, 3, 6, 10, 15]);
+ const result = diffSeries(s);
+ expect(result.values).toEqual([null, 2, 3, 4, 5]);
+ });
+
+ test("periods=2", () => {
+ const s = makeSeries([1, 3, 6, 10, 15]);
+ const result = diffSeries(s, { periods: 2 });
+ expect(result.values).toEqual([null, null, 5, 7, 9]);
+ });
+
+ test("periods=-1 (backward)", () => {
+ const s = makeSeries([1, 3, 6, 10, 15]);
+ const result = diffSeries(s, { periods: -1 });
+ expect(result.values).toEqual([-2, -3, -4, -5, null]);
+ });
+
+ test("preserves index and name", () => {
+ const s = makeSeries([10, 20, 30], "myname");
+ const result = diffSeries(s);
+ expect(result.name).toBe("myname");
+ expect(result.index.size).toBe(3);
+ });
+
+ test("non-numeric values produce null", () => {
+ const s = makeSeries([1, null, 3, "x", 5]);
+ const result = diffSeries(s);
+ // [null, null(1-null=null), null(null-null=null), null("x"-null), null(5-"x")]
+ expect(result.values[0]).toBe(null);
+ expect(result.values[1]).toBe(null);
+ expect(result.values[2]).toBe(null);
+ expect(result.values[3]).toBe(null);
+ expect(result.values[4]).toBe(null);
+ });
+
+ test("single element → [null]", () => {
+ const s = makeSeries([42]);
+ expect(diffSeries(s).values).toEqual([null]);
+ });
+
+ test("empty series", () => {
+ const s = makeSeries([]);
+ expect(diffSeries(s).values).toEqual([]);
+ });
+
+ test("periods larger than length → all null", () => {
+ const s = makeSeries([1, 2, 3]);
+ const result = diffSeries(s, { periods: 5 });
+ expect(result.values).toEqual([null, null, null]);
+ });
+
+ test("NaN values produce null", () => {
+ const s = makeSeries([1, Number.NaN, 3]);
+ const result = diffSeries(s);
+ expect(result.values[1]).toBe(null);
+ expect(result.values[2]).toBe(null);
+ });
+});
+
+// ─── shiftSeries ──────────────────────────────────────────────────────────────
+
+describe("shiftSeries", () => {
+ test("default periods=1, fills null", () => {
+ const s = makeSeries([1, 2, 3, 4, 5]);
+ expect(shiftSeries(s).values).toEqual([null, 1, 2, 3, 4]);
+ });
+
+ test("periods=2", () => {
+ const s = makeSeries([1, 2, 3, 4, 5]);
+ expect(shiftSeries(s, { periods: 2 }).values).toEqual([null, null, 1, 2, 3]);
+ });
+
+ test("periods=-1 (backward)", () => {
+ const s = makeSeries([1, 2, 3, 4, 5]);
+ expect(shiftSeries(s, { periods: -1 }).values).toEqual([2, 3, 4, 5, null]);
+ });
+
+ test("periods=-2", () => {
+ const s = makeSeries([1, 2, 3, 4, 5]);
+ expect(shiftSeries(s, { periods: -2 }).values).toEqual([3, 4, 5, null, null]);
+ });
+
+ test("custom fillValue", () => {
+ const s = makeSeries([1, 2, 3]);
+ expect(shiftSeries(s, { periods: 1, fillValue: 0 }).values).toEqual([0, 1, 2]);
+ });
+
+ test("periods=0 → same values", () => {
+ const s = makeSeries([10, 20, 30]);
+ expect(shiftSeries(s, { periods: 0 }).values).toEqual([10, 20, 30]);
+ });
+
+ test("preserves index and name", () => {
+ const s = makeSeries([1, 2, 3], "col");
+ const result = shiftSeries(s);
+ expect(result.name).toBe("col");
+ expect(result.index.size).toBe(3);
+ });
+
+ test("periods >= length → all fillValue", () => {
+ const s = makeSeries([1, 2, 3]);
+ expect(shiftSeries(s, { periods: 5, fillValue: -1 }).values).toEqual([-1, -1, -1]);
+ });
+
+ test("empty series", () => {
+ const s = makeSeries([]);
+ expect(shiftSeries(s).values).toEqual([]);
+ });
+});
+
+// ─── diffDataFrame (axis=0) ───────────────────────────────────────────────────
+
+describe("diffDataFrame axis=0 (column-wise)", () => {
+ test("default periods=1 each column independently", () => {
+ const df = DataFrame.fromColumns({ a: [1, 3, 6], b: [10, 20, 35] });
+ const result = diffDataFrame(df);
+ expect(result.col("a").values).toEqual([null, 2, 3]);
+ expect(result.col("b").values).toEqual([null, 10, 15]);
+ });
+
+ test("periods=2", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 4, 8] });
+ const result = diffDataFrame(df, { periods: 2 });
+ expect(result.col("a").values).toEqual([null, null, 3, 6]);
+ });
+
+ test("preserves index", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const result = diffDataFrame(df);
+ expect(result.index.size).toBe(3);
+ });
+});
+
+// ─── diffDataFrame (axis=1) ───────────────────────────────────────────────────
+
+describe("diffDataFrame axis=1 (row-wise)", () => {
+ test("default periods=1 across columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 10], b: [4, 16], c: [9, 25] });
+ const result = diffDataFrame(df, { axis: 1 });
+ // col a: always null (no prior column)
+ expect(result.col("a").values).toEqual([null, null]);
+ // col b: b - a = [3, 6]
+ expect(result.col("b").values).toEqual([3, 6]);
+ // col c: c - b = [5, 9]
+ expect(result.col("c").values).toEqual([5, 9]);
+ });
+});
+
+// ─── shiftDataFrame (axis=0) ─────────────────────────────────────────────────
+
+describe("shiftDataFrame axis=0 (column-wise)", () => {
+ test("default periods=1", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ const result = shiftDataFrame(df);
+ expect(result.col("a").values).toEqual([null, 1, 2]);
+ expect(result.col("b").values).toEqual([null, 4, 5]);
+ });
+
+ test("periods=-1", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ expect(shiftDataFrame(df, { periods: -1 }).col("a").values).toEqual([2, 3, null]);
+ });
+
+ test("custom fillValue", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const result = shiftDataFrame(df, { periods: 2, fillValue: 0 });
+ expect(result.col("a").values).toEqual([0, 0, 1]);
+ });
+
+ test("preserves column structure", () => {
+ const df = DataFrame.fromColumns({ x: [1, 2], y: [3, 4] });
+ const result = shiftDataFrame(df);
+ expect(result.columns.values).toEqual(["x", "y"]);
+ });
+});
+
+// ─── shiftDataFrame (axis=1) ─────────────────────────────────────────────────
+
+describe("shiftDataFrame axis=1 (row-wise)", () => {
+ test("periods=1 shifts columns right", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4], c: [5, 6] });
+ const result = shiftDataFrame(df, { axis: 1, periods: 1, fillValue: 0 });
+ // col a gets fillValue (no prior col)
+ expect(result.col("a").values).toEqual([0, 0]);
+ // col b gets values from col a
+ expect(result.col("b").values).toEqual([1, 2]);
+ // col c gets values from col b
+ expect(result.col("c").values).toEqual([3, 4]);
+ });
+
+ test("periods=-1 shifts columns left", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4], c: [5, 6] });
+ const result = shiftDataFrame(df, { axis: 1, periods: -1, fillValue: 0 });
+ // col a gets values from col b
+ expect(result.col("a").values).toEqual([3, 4]);
+ // col b gets values from col c
+ expect(result.col("b").values).toEqual([5, 6]);
+ // col c gets fillValue
+ expect(result.col("c").values).toEqual([0, 0]);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property-based: diffSeries", () => {
+ test("diff(periods=1) length equals input length", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 50 }), (arr) => {
+ const s = makeSeries(arr);
+ const result = diffSeries(s);
+ expect(result.size).toBe(s.size);
+ }),
+ );
+ });
+
+ test("diff[0] is always null for periods >= 1", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 1, maxLength: 30 }), (arr) => {
+ const s = makeSeries(arr);
+ const result = diffSeries(s, { periods: 1 });
+ expect(result.values[0]).toBe(null);
+ }),
+ );
+ });
+
+ test("shift+diff reconstructs original for numeric arrays (first element is null)", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -1000, max: 1000 }), { minLength: 2, maxLength: 20 }),
+ (arr) => {
+ const data = arr as Scalar[];
+ const s = makeSeries(data);
+ const shifted = shiftSeries(s, { periods: 1, fillValue: 0 });
+ const d = diffSeries(s);
+ // sum of diffs [1..n] + first value ≈ last value (numeric check)
+ // More directly: diff[i] + shifted[i] = s[i] for i >= 1
+ for (let i = 1; i < arr.length; i++) {
+ const diffVal = d.values[i] as number;
+ const shiftedVal = shifted.values[i] as number;
+ expect(diffVal + shiftedVal).toBeCloseTo(arr[i] as number, 10);
+ }
+ },
+ ),
+ );
+ });
+});
+
+describe("property-based: shiftSeries", () => {
+ test("shift preserves length", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer(), { minLength: 0, maxLength: 50 }),
+ fc.integer({ min: -20, max: 20 }),
+ (arr, periods) => {
+ const s = makeSeries(arr as Scalar[]);
+ const result = shiftSeries(s, { periods });
+ expect(result.size).toBe(s.size);
+ },
+ ),
+ );
+ });
+
+ test("shift(0) is identity", () => {
+ fc.assert(
+ fc.property(fc.array(fc.integer(), { minLength: 0, maxLength: 30 }), (arr) => {
+ const s = makeSeries(arr as Scalar[]);
+ const result = shiftSeries(s, { periods: 0 });
+ for (let i = 0; i < arr.length; i++) {
+ expect(result.values[i]).toBe(arr[i]);
+ }
+ }),
+ );
+ });
+
+ test("shift(n) then shift(-n) recovers original in the middle region", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 4, maxLength: 30 }),
+ fc.integer({ min: 1, max: 5 }),
+ (arr, n) => {
+ const s = makeSeries(arr as Scalar[]);
+ const shifted = shiftSeries(s, { periods: n, fillValue: null });
+ const recovered = shiftSeries(shifted, { periods: -n, fillValue: null });
+ // middle region (indices n..len-n) should match original
+ for (let i = n; i < arr.length - n; i++) {
+ expect(recovered.values[i]).toBe(arr[i]);
+ }
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/explode.test.ts b/tests/stats/explode.test.ts
index cf954773..bec68021 100644
--- a/tests/stats/explode.test.ts
+++ b/tests/stats/explode.test.ts
@@ -211,7 +211,7 @@ describe("explodeDataFrame", () => {
test("throws on unknown column", () => {
const df = DataFrame.fromColumns({ a: [[1, 2]] as unknown as Scalar[] });
- expect(() => explodeDataFrame(df, "z")).toThrow(/Column "z" not found/);
+ expect(() => explodeDataFrame(df, "z")).toThrow(/Column 'z' not found/);
});
test("empty DataFrame returns empty DataFrame", () => {
diff --git a/tests/stats/interval.test.ts b/tests/stats/interval.test.ts
new file mode 100644
index 00000000..d6d51938
--- /dev/null
+++ b/tests/stats/interval.test.ts
@@ -0,0 +1,536 @@
+/**
+ * Tests for stats/interval.ts — Interval, IntervalIndex, intervalRange.
+ */
+
+import { describe, expect, test } from "bun:test";
+import fc from "fast-check";
+import { Interval, IntervalIndex, intervalRange } from "../../src/stats/interval.ts";
+import type { ClosedType } from "../../src/stats/interval.ts";
+
+// ─── Interval ─────────────────────────────────────────────────────────────────
+
+describe("Interval", () => {
+ describe("construction", () => {
+ test("creates right-closed interval by default", () => {
+ const iv = new Interval(0, 5);
+ expect(iv.left).toBe(0);
+ expect(iv.right).toBe(5);
+ expect(iv.closed).toBe("right");
+ });
+
+ test("creates left-closed interval", () => {
+ const iv = new Interval(0, 5, "left");
+ expect(iv.closed).toBe("left");
+ });
+
+ test("creates both-closed interval", () => {
+ const iv = new Interval(0, 5, "both");
+ expect(iv.closed).toBe("both");
+ });
+
+ test("creates neither-closed interval", () => {
+ const iv = new Interval(0, 5, "neither");
+ expect(iv.closed).toBe("neither");
+ });
+
+ test("allows left === right (degenerate interval)", () => {
+ const iv = new Interval(3, 3, "both");
+ expect(iv.left).toBe(3);
+ expect(iv.right).toBe(3);
+ });
+
+ test("throws when left > right", () => {
+ expect(() => new Interval(5, 0)).toThrow(RangeError);
+ });
+
+ test("allows negative endpoints", () => {
+ const iv = new Interval(-10, -1);
+ expect(iv.left).toBe(-10);
+ expect(iv.right).toBe(-1);
+ });
+
+ test("allows floating-point endpoints", () => {
+ const iv = new Interval(0.25, 0.75);
+ expect(iv.left).toBe(0.25);
+ expect(iv.right).toBe(0.75);
+ });
+ });
+
+ describe("derived properties", () => {
+ test("length", () => {
+ expect(new Interval(0, 5).length).toBe(5);
+ expect(new Interval(-2, 3).length).toBe(5);
+ expect(new Interval(1.5, 4.5).length).toBeCloseTo(3);
+ });
+
+ test("mid", () => {
+ expect(new Interval(0, 4).mid).toBe(2);
+ expect(new Interval(-1, 1).mid).toBe(0);
+ expect(new Interval(0, 1).mid).toBe(0.5);
+ });
+
+ test("closedLeft / closedRight", () => {
+ expect(new Interval(0, 1, "right").closedLeft).toBe(false);
+ expect(new Interval(0, 1, "right").closedRight).toBe(true);
+ expect(new Interval(0, 1, "left").closedLeft).toBe(true);
+ expect(new Interval(0, 1, "left").closedRight).toBe(false);
+ expect(new Interval(0, 1, "both").closedLeft).toBe(true);
+ expect(new Interval(0, 1, "both").closedRight).toBe(true);
+ expect(new Interval(0, 1, "neither").closedLeft).toBe(false);
+ expect(new Interval(0, 1, "neither").closedRight).toBe(false);
+ });
+
+ test("isOpen / isClosed", () => {
+ expect(new Interval(0, 1, "neither").isOpen).toBe(true);
+ expect(new Interval(0, 1, "both").isClosed).toBe(true);
+ expect(new Interval(0, 1, "right").isOpen).toBe(false);
+ expect(new Interval(0, 1, "right").isClosed).toBe(false);
+ });
+ });
+
+ describe("contains", () => {
+ test("right-closed: includes right endpoint, excludes left", () => {
+ const iv = new Interval(0, 5);
+ expect(iv.contains(5)).toBe(true);
+ expect(iv.contains(0)).toBe(false);
+ expect(iv.contains(2.5)).toBe(true);
+ });
+
+ test("left-closed: includes left endpoint, excludes right", () => {
+ const iv = new Interval(0, 5, "left");
+ expect(iv.contains(0)).toBe(true);
+ expect(iv.contains(5)).toBe(false);
+ expect(iv.contains(2.5)).toBe(true);
+ });
+
+ test("both: includes both endpoints", () => {
+ const iv = new Interval(0, 5, "both");
+ expect(iv.contains(0)).toBe(true);
+ expect(iv.contains(5)).toBe(true);
+ expect(iv.contains(-0.001)).toBe(false);
+ expect(iv.contains(5.001)).toBe(false);
+ });
+
+ test("neither: excludes both endpoints", () => {
+ const iv = new Interval(0, 5, "neither");
+ expect(iv.contains(0)).toBe(false);
+ expect(iv.contains(5)).toBe(false);
+ expect(iv.contains(2.5)).toBe(true);
+ });
+
+ test("outside range", () => {
+ const iv = new Interval(1, 3);
+ expect(iv.contains(0.999)).toBe(false);
+ expect(iv.contains(3.001)).toBe(false);
+ });
+ });
+
+ describe("overlaps", () => {
+ test("overlapping interiors", () => {
+ const a = new Interval(0, 3);
+ const b = new Interval(2, 5);
+ expect(a.overlaps(b)).toBe(true);
+ expect(b.overlaps(a)).toBe(true);
+ });
+
+ test("touching endpoints — both closed", () => {
+ const a = new Interval(0, 2, "both");
+ const b = new Interval(2, 4, "both");
+ expect(a.overlaps(b)).toBe(true);
+ });
+
+ test("touching endpoints — one open side", () => {
+ const a = new Interval(0, 2, "right");
+ const b = new Interval(2, 4, "left");
+ // a = (0, 2] closes on 2; b = [2, 4) opens on 2 — both include 2, so they overlap.
+ expect(a.overlaps(b)).toBe(true);
+ });
+
+ test("completely disjoint", () => {
+ const a = new Interval(0, 1);
+ const b = new Interval(2, 3);
+ expect(a.overlaps(b)).toBe(false);
+ });
+
+ test("one contains the other", () => {
+ const outer = new Interval(0, 10);
+ const inner = new Interval(2, 5);
+ expect(outer.overlaps(inner)).toBe(true);
+ expect(inner.overlaps(outer)).toBe(true);
+ });
+
+ test("identical intervals overlap", () => {
+ const a = new Interval(1, 4);
+ expect(a.overlaps(a)).toBe(true);
+ });
+ });
+
+ describe("equals", () => {
+ test("equal intervals", () => {
+ expect(new Interval(0, 1).equals(new Interval(0, 1))).toBe(true);
+ });
+
+ test("different endpoints", () => {
+ expect(new Interval(0, 1).equals(new Interval(0, 2))).toBe(false);
+ });
+
+ test("different closed", () => {
+ expect(new Interval(0, 1, "right").equals(new Interval(0, 1, "left"))).toBe(false);
+ });
+ });
+
+ describe("toString", () => {
+ test("right-closed (default)", () => {
+ expect(new Interval(0, 5).toString()).toBe("(0, 5]");
+ });
+
+ test("left-closed", () => {
+ expect(new Interval(0, 5, "left").toString()).toBe("[0, 5)");
+ });
+
+ test("both-closed", () => {
+ expect(new Interval(0, 5, "both").toString()).toBe("[0, 5]");
+ });
+
+ test("neither-closed", () => {
+ expect(new Interval(0, 5, "neither").toString()).toBe("(0, 5)");
+ });
+ });
+});
+
+// ─── IntervalIndex ────────────────────────────────────────────────────────────
+
+describe("IntervalIndex", () => {
+ describe("fromBreaks", () => {
+ test("basic 3-interval index", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ expect(idx.size).toBe(3);
+ expect(idx.get(0).toString()).toBe("(0, 1]");
+ expect(idx.get(1).toString()).toBe("(1, 2]");
+ expect(idx.get(2).toString()).toBe("(2, 3]");
+ });
+
+ test("left-closed", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2], { closed: "left" });
+ expect(idx.get(0).toString()).toBe("[0, 1)");
+ });
+
+ test("throws with fewer than 2 breaks", () => {
+ expect(() => IntervalIndex.fromBreaks([0])).toThrow(RangeError);
+ expect(() => IntervalIndex.fromBreaks([])).toThrow(RangeError);
+ });
+
+ test("preserves name", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2], { name: "score" });
+ expect(idx.name).toBe("score");
+ });
+ });
+
+ describe("fromArrays", () => {
+ test("basic index from left/right arrays", () => {
+ const idx = IntervalIndex.fromArrays([0, 2, 4], [2, 4, 6]);
+ expect(idx.size).toBe(3);
+ expect(idx.get(0).left).toBe(0);
+ expect(idx.get(0).right).toBe(2);
+ });
+
+ test("throws on mismatched lengths", () => {
+ expect(() => IntervalIndex.fromArrays([0, 1], [1, 2, 3])).toThrow(RangeError);
+ });
+ });
+
+ describe("fromIntervals", () => {
+ test("from array of Interval objects", () => {
+ const ivs = [new Interval(0, 1), new Interval(1, 2)];
+ const idx = IntervalIndex.fromIntervals(ivs, "test");
+ expect(idx.size).toBe(2);
+ expect(idx.name).toBe("test");
+ });
+ });
+
+ describe("properties", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+
+ test("left", () => {
+ expect([...idx.left]).toEqual([0, 1, 2]);
+ });
+
+ test("right", () => {
+ expect([...idx.right]).toEqual([1, 2, 3]);
+ });
+
+ test("mid", () => {
+ expect([...idx.mid]).toEqual([0.5, 1.5, 2.5]);
+ });
+
+ test("length (interval widths)", () => {
+ expect([...idx.length]).toEqual([1, 1, 1]);
+ });
+
+ test("closed from first interval", () => {
+ expect(idx.closed).toBe("right");
+ });
+
+ test("values", () => {
+ expect(idx.values.length).toBe(3);
+ });
+ });
+
+ describe("isMonotonic", () => {
+ test("sorted non-overlapping intervals are monotonic", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ expect(idx.isMonotonic).toBe(true);
+ });
+
+ test("overlapping intervals are not monotonic", () => {
+ const idx = IntervalIndex.fromIntervals([new Interval(0, 2), new Interval(1, 3)]);
+ expect(idx.isMonotonic).toBe(false);
+ });
+
+ test("empty index is monotonic", () => {
+ const idx = IntervalIndex.fromIntervals([]);
+ expect(idx.isMonotonic).toBe(true);
+ });
+ });
+
+ describe("get", () => {
+ test("valid index returns interval", () => {
+ const idx = IntervalIndex.fromBreaks([0, 5, 10]);
+ expect(idx.get(0).right).toBe(5);
+ expect(idx.get(1).left).toBe(5);
+ });
+
+ test("out-of-range throws", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1]);
+ expect(() => idx.get(5)).toThrow(RangeError);
+ });
+ });
+
+ describe("indexOf", () => {
+ test("finds value in correct interval", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ expect(idx.indexOf(0.5)).toBe(0);
+ expect(idx.indexOf(1.5)).toBe(1);
+ expect(idx.indexOf(2.5)).toBe(2);
+ });
+
+ test("right endpoint included in interval", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2]);
+ expect(idx.indexOf(1)).toBe(0); // (0,1] — 1 is in first interval
+ });
+
+ test("returns -1 for out-of-range", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2]);
+ expect(idx.indexOf(-1)).toBe(-1);
+ expect(idx.indexOf(3)).toBe(-1);
+ });
+ });
+
+ describe("overlapping", () => {
+ test("returns intervals that overlap query", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2, 3, 4]);
+ const query = new Interval(1.5, 2.5);
+ const result = idx.overlapping(query);
+ expect(result.size).toBe(2); // (1,2] and (2,3]
+ });
+
+ test("no overlapping — returns empty", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2]);
+ const query = new Interval(5, 10);
+ expect(idx.overlapping(query).size).toBe(0);
+ });
+ });
+
+ describe("append", () => {
+ test("concatenates two indexes", () => {
+ const a = IntervalIndex.fromBreaks([0, 1, 2]);
+ const b = IntervalIndex.fromBreaks([2, 3, 4]);
+ const combined = a.append(b);
+ expect(combined.size).toBe(4);
+ });
+ });
+
+ describe("toString", () => {
+ test("renders pandas-style string", () => {
+ const idx = IntervalIndex.fromBreaks([0, 1, 2]);
+ expect(idx.toString()).toContain("IntervalIndex");
+ expect(idx.toString()).toContain("(0, 1]");
+ });
+ });
+});
+
+// ─── intervalRange ────────────────────────────────────────────────────────────
+
+describe("intervalRange", () => {
+ test("periods — 4 equal-width intervals from 0 to 1", () => {
+ const idx = intervalRange(0, 1, { periods: 4 });
+ expect(idx.size).toBe(4);
+ expect(idx.get(0).left).toBeCloseTo(0);
+ expect(idx.get(0).right).toBeCloseTo(0.25);
+ expect(idx.get(3).right).toBeCloseTo(1);
+ });
+
+ test("freq — 2.5-wide intervals from 0 to 10", () => {
+ const idx = intervalRange(0, 10, { freq: 2.5 });
+ expect(idx.size).toBe(4);
+ expect(idx.get(0).right).toBeCloseTo(2.5);
+ expect(idx.get(3).right).toBeCloseTo(10);
+ });
+
+ test("freq — exact 3 intervals from 0 to 3", () => {
+ const idx = intervalRange(0, 3, { freq: 1 });
+ expect(idx.size).toBe(3);
+ });
+
+ test("respects closed option", () => {
+ const idx = intervalRange(0, 4, { periods: 2, closed: "left" });
+ expect(idx.closed).toBe("left");
+ expect(idx.get(0).toString()).toBe("[0, 2)");
+ });
+
+ test("respects name option", () => {
+ const idx = intervalRange(0, 10, { periods: 5, name: "bins" });
+ expect(idx.name).toBe("bins");
+ });
+
+ test("throws when end <= start", () => {
+ expect(() => intervalRange(5, 0, { periods: 3 })).toThrow(RangeError);
+ expect(() => intervalRange(3, 3, { periods: 3 })).toThrow(RangeError);
+ });
+
+ test("throws when both periods and freq are given", () => {
+ expect(() => intervalRange(0, 10, { periods: 5, freq: 2 })).toThrow(RangeError);
+ });
+
+ test("throws when neither periods nor freq are given", () => {
+ expect(() => intervalRange(0, 10, {} as never)).toThrow(RangeError);
+ });
+
+ test("throws when periods < 1", () => {
+ expect(() => intervalRange(0, 10, { periods: 0 })).toThrow(RangeError);
+ });
+
+ test("throws when freq <= 0", () => {
+ expect(() => intervalRange(0, 10, { freq: -1 })).toThrow(RangeError);
+ });
+});
+
+// ─── Property-based tests ─────────────────────────────────────────────────────
+
+describe("Interval properties (fast-check)", () => {
+ test("contains is symmetric within interior", () => {
+ fc.assert(
+ fc.property(
+ fc.float({ min: -100, max: 100, noNaN: true }),
+ fc.float({ min: -100, max: 100, noNaN: true }),
+ (a, b) => {
+ if (a > b) {
+ return true; // skip invalid
+ }
+ const iv = new Interval(a, b, "both");
+ const mid = (a + b) / 2;
+ return iv.contains(mid);
+ },
+ ),
+ );
+ });
+
+ test("length is always non-negative", () => {
+ fc.assert(
+ fc.property(
+ fc.float({ min: -1000, max: 1000, noNaN: true }),
+ fc.float({ min: 0, max: 1000, noNaN: true }),
+ (left, delta) => {
+ const iv = new Interval(left, left + delta);
+ return iv.length >= 0;
+ },
+ ),
+ );
+ });
+
+ test("mid is within [left, right]", () => {
+ fc.assert(
+ fc.property(
+ fc.float({ min: -1000, max: 1000, noNaN: true }),
+ fc.float({ min: 0, max: 1000, noNaN: true }),
+ (left, delta) => {
+ const iv = new Interval(left, left + delta);
+ return iv.mid >= iv.left && iv.mid <= iv.right;
+ },
+ ),
+ );
+ });
+
+ test("fromBreaks produces size = breaks.length - 1", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 100 }), { minLength: 2, maxLength: 20 }),
+ (arr) => {
+ const sorted = [...new Set(arr)].sort((a, b) => a - b);
+ if (sorted.length < 2) {
+ return true;
+ }
+ const idx = IntervalIndex.fromBreaks(sorted);
+ return idx.size === sorted.length - 1;
+ },
+ ),
+ );
+ });
+
+ test("intervalRange with periods produces correct count", () => {
+ fc.assert(
+ fc.property(
+ fc.integer({ min: 1, max: 100 }),
+ fc.float({ min: 0, max: 100, noNaN: true }),
+ fc.float({ min: 1, max: 100, noNaN: true }),
+ (periods, start, span) => {
+ const idx = intervalRange(start, start + span, { periods });
+ return idx.size === periods;
+ },
+ ),
+ );
+ });
+
+ test("intervalRange left/right endpoints are monotonic", () => {
+ fc.assert(
+ fc.property(
+ fc.integer({ min: 1, max: 20 }),
+ fc.float({ min: 0, max: 50, noNaN: true }),
+ fc.float({ min: 1, max: 50, noNaN: true }),
+ (periods, start, span) => {
+ const idx = intervalRange(start, start + span, { periods });
+ const rights = idx.right;
+ for (let i = 1; i < rights.length; i++) {
+ if ((rights[i] as number) < (rights[i - 1] as number)) {
+ return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+});
+
+// ─── closed types matrix ─────────────────────────────────────────────────────
+
+describe("Interval.contains — all closed types", () => {
+ const closedTypes: ClosedType[] = ["left", "right", "both", "neither"];
+
+ for (const closed of closedTypes) {
+ test(`${closed} — interior value always included`, () => {
+ const iv = new Interval(0, 10, closed);
+ expect(iv.contains(5)).toBe(true);
+ });
+
+ test(`${closed} — value far below is excluded`, () => {
+ const iv = new Interval(0, 10, closed);
+ expect(iv.contains(-1)).toBe(false);
+ });
+
+ test(`${closed} — value far above is excluded`, () => {
+ const iv = new Interval(0, 10, closed);
+ expect(iv.contains(11)).toBe(false);
+ });
+ }
+});
diff --git a/tests/stats/na_ops.test.ts b/tests/stats/na_ops.test.ts
new file mode 100644
index 00000000..340406ac
--- /dev/null
+++ b/tests/stats/na_ops.test.ts
@@ -0,0 +1,280 @@
+/**
+ * Tests for na_ops — missing-value utilities (isna, notna, ffill, bfill).
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ bfillSeries,
+ dataFrameBfill,
+ dataFrameFfill,
+ ffillSeries,
+ isna,
+ isnull,
+ notna,
+ notnull,
+} from "../../src/index.ts";
+
+// ─── isna / notna ─────────────────────────────────────────────────────────────
+
+describe("isna (scalar)", () => {
+ it("returns true for null", () => expect(isna(null)).toBe(true));
+ it("returns true for undefined", () => expect(isna(undefined)).toBe(true));
+ it("returns true for NaN", () => expect(isna(Number.NaN)).toBe(true));
+ it("returns false for 0", () => expect(isna(0)).toBe(false));
+ it("returns false for empty string", () => expect(isna("")).toBe(false));
+ it("returns false for false", () => expect(isna(false)).toBe(false));
+ it("returns false for a number", () => expect(isna(42)).toBe(false));
+});
+
+describe("notna (scalar)", () => {
+ it("returns false for null", () => expect(notna(null)).toBe(false));
+ it("returns false for NaN", () => expect(notna(Number.NaN)).toBe(false));
+ it("returns true for 42", () => expect(notna(42)).toBe(true));
+ it("returns true for a string", () => expect(notna("hello")).toBe(true));
+});
+
+describe("isnull / notnull aliases", () => {
+ it("isnull equals isna for scalar", () => {
+ expect(isnull(null)).toBe(isna(null));
+ expect(isnull(42)).toBe(isna(42));
+ });
+ it("notnull equals notna for scalar", () => {
+ expect(notnull(null)).toBe(notna(null));
+ expect(notnull(42)).toBe(notna(42));
+ });
+});
+
+describe("isna (Series)", () => {
+ it("returns boolean Series of correct length", () => {
+ const s = new Series({ data: [1, null, Number.NaN, 4] });
+ const result = isna(s);
+ expect(result).toBeInstanceOf(Series);
+ expect([...result.values]).toEqual([false, true, true, false]);
+ });
+
+ it("all present", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect([...isna(s).values]).toEqual([false, false, false]);
+ });
+
+ it("all missing", () => {
+ const s = new Series({ data: [null, null, Number.NaN] });
+ expect([...isna(s).values]).toEqual([true, true, true]);
+ });
+});
+
+describe("notna (Series)", () => {
+ it("is the inverse of isna", () => {
+ const s = new Series({ data: [1, null, Number.NaN, 4] });
+ const na = isna(s).values;
+ const nna = notna(s).values;
+ for (let i = 0; i < na.length; i++) {
+ expect(nna[i]).toBe(!na[i]);
+ }
+ });
+});
+
+describe("isna (DataFrame)", () => {
+ it("returns DataFrame of booleans", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] });
+ const result = isna(df);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect([...result.col("a").values]).toEqual([false, true]);
+ expect([...result.col("b").values]).toEqual([true, false]);
+ });
+});
+
+describe("notna (DataFrame)", () => {
+ it("returns inverse of isna DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] });
+ expect([...notna(df).col("a").values]).toEqual([true, false]);
+ expect([...notna(df).col("b").values]).toEqual([false, true]);
+ });
+});
+
+// ─── ffillSeries ──────────────────────────────────────────────────────────────
+
+describe("ffillSeries", () => {
+ it("fills nulls with preceding value", () => {
+ const s = new Series({ data: [1, null, null, 4] });
+ expect([...ffillSeries(s).values]).toEqual([1, 1, 1, 4]);
+ });
+
+ it("leaves leading nulls untouched", () => {
+ const s = new Series({ data: [null, null, 3, null] });
+ expect([...ffillSeries(s).values]).toEqual([null, null, 3, 3]);
+ });
+
+ it("NaN is treated as missing", () => {
+ const s = new Series({ data: [2, Number.NaN, 5] });
+ const result = ffillSeries(s).values;
+ expect(result[0]).toBe(2);
+ expect(result[1]).toBe(2);
+ expect(result[2]).toBe(5);
+ });
+
+ it("respects limit option", () => {
+ const s = new Series({ data: [1, null, null, null, 5] });
+ expect([...ffillSeries(s, { limit: 1 }).values]).toEqual([1, 1, null, null, 5]);
+ });
+
+ it("preserves original Series", () => {
+ const s = new Series({ data: [1, null, 3] });
+ ffillSeries(s);
+ expect([...s.values]).toEqual([1, null, 3]);
+ });
+
+ it("empty Series returns empty", () => {
+ const s = new Series({ data: [] });
+ expect([...ffillSeries(s).values]).toEqual([]);
+ });
+
+ it("preserves name and index", () => {
+ const s = new Series({ data: [1, null], name: "x" });
+ const filled = ffillSeries(s);
+ expect(filled.name).toBe("x");
+ expect(filled.index.size).toBe(2);
+ });
+});
+
+// ─── bfillSeries ──────────────────────────────────────────────────────────────
+
+describe("bfillSeries", () => {
+ it("fills nulls with following value", () => {
+ const s = new Series({ data: [1, null, null, 4] });
+ expect([...bfillSeries(s).values]).toEqual([1, 4, 4, 4]);
+ });
+
+ it("leaves trailing nulls untouched", () => {
+ const s = new Series({ data: [null, 3, null, null] });
+ expect([...bfillSeries(s).values]).toEqual([3, 3, null, null]);
+ });
+
+ it("respects limit option", () => {
+ const s = new Series({ data: [1, null, null, null, 5] });
+ expect([...bfillSeries(s, { limit: 2 }).values]).toEqual([1, null, 5, 5, 5]);
+ });
+
+ it("empty Series returns empty", () => {
+ const s = new Series({ data: [] });
+ expect([...bfillSeries(s).values]).toEqual([]);
+ });
+});
+
+// ─── dataFrameFfill ───────────────────────────────────────────────────────────
+
+describe("dataFrameFfill (column-wise)", () => {
+ it("fills each column independently", () => {
+ const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 2, null] });
+ const result = dataFrameFfill(df);
+ expect([...result.col("a").values]).toEqual([1, 1, 3]);
+ expect([...result.col("b").values]).toEqual([null, 2, 2]);
+ });
+
+ it("preserves index", () => {
+ const df = DataFrame.fromColumns({ x: [1, null] });
+ expect(dataFrameFfill(df).index.size).toBe(2);
+ });
+});
+
+describe("dataFrameFfill (row-wise)", () => {
+ it("fills across columns per row", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [null, null], c: [3, 4] });
+ const result = dataFrameFfill(df, { axis: 1 });
+ expect([...result.col("a").values]).toEqual([1, null]);
+ expect([...result.col("b").values]).toEqual([1, null]);
+ expect([...result.col("c").values]).toEqual([3, 4]);
+ });
+});
+
+// ─── dataFrameBfill ───────────────────────────────────────────────────────────
+
+describe("dataFrameBfill (column-wise)", () => {
+ it("fills each column backward", () => {
+ const df = DataFrame.fromColumns({ a: [null, null, 3], b: [1, null, null] });
+ const result = dataFrameBfill(df);
+ expect([...result.col("a").values]).toEqual([3, 3, 3]);
+ expect([...result.col("b").values]).toEqual([1, null, null]);
+ });
+});
+
+describe("dataFrameBfill (row-wise)", () => {
+ it("fills backward across columns per row", () => {
+ const df = DataFrame.fromColumns({ a: [null, 1], b: [null, null], c: [3, null] });
+ const result = dataFrameBfill(df, { axis: 1 });
+ expect([...result.col("a").values]).toEqual([3, 1]);
+ expect([...result.col("b").values]).toEqual([3, null]);
+ expect([...result.col("c").values]).toEqual([3, null]);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property: ffill followed by bfill fills all if any non-null", () => {
+ it("all values filled when at least one is present", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.option(fc.integer({ min: 0, max: 100 }), { nil: null }), {
+ minLength: 1,
+ maxLength: 20,
+ }),
+ (raw) => {
+ const hasNonNull = raw.some((v) => v !== null);
+ if (!hasNonNull) {
+ return true;
+ }
+ const s = new Series({ data: raw });
+ const result = bfillSeries(ffillSeries(s));
+ return result.values.every((v) => v !== null);
+ },
+ ),
+ );
+ });
+});
+
+describe("property: ffill never introduces new non-null values beyond last valid", () => {
+ it("ffilled series has no nulls after first valid value", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.option(fc.integer({ min: -50, max: 50 }), { nil: null }), {
+ minLength: 0,
+ maxLength: 30,
+ }),
+ (raw) => {
+ const s = new Series({ data: raw });
+ const filled = ffillSeries(s).values;
+ let sawValid = false;
+ for (const v of filled) {
+ if (v !== null) {
+ sawValid = true;
+ }
+ if (sawValid && v === null) {
+ return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+});
+
+describe("property: isna is inverse of notna for scalars", () => {
+ it("isna(v) === !notna(v)", () => {
+ fc.assert(
+ fc.property(
+ fc.oneof(
+ fc.integer(),
+ fc.float({ noNaN: false }),
+ fc.constant(null),
+ fc.string(),
+ fc.boolean(),
+ ),
+ (v) => isna(v as Parameters[0]) === !notna(v as Parameters[0]),
+ ),
+ );
+ });
+});
diff --git a/tests/stats/reduce_ops.test.ts b/tests/stats/reduce_ops.test.ts
new file mode 100644
index 00000000..3964bf9a
--- /dev/null
+++ b/tests/stats/reduce_ops.test.ts
@@ -0,0 +1,284 @@
+/**
+ * Tests for src/stats/reduce_ops.ts — nunique, any, all.
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ allDataFrame,
+ allSeries,
+ anyDataFrame,
+ anySeries,
+ nunique,
+ nuniqueSeries,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeDf(): DataFrame {
+ return DataFrame.fromColumns({
+ a: [1, 2, 2, null],
+ b: [true, false, true, true],
+ c: ["x", "y", "x", "z"],
+ });
+}
+
+// ─── nuniqueSeries ────────────────────────────────────────────────────────────
+
+describe("nuniqueSeries", () => {
+ it("counts distinct non-null values by default", () => {
+ const s = new Series({ data: [1, 2, 2, null] });
+ expect(nuniqueSeries(s)).toBe(2);
+ });
+
+ it("includes null when dropna=false", () => {
+ const s = new Series({ data: [1, 2, 2, null] });
+ expect(nuniqueSeries(s, { dropna: false })).toBe(3);
+ });
+
+ it("returns 0 for empty series", () => {
+ const s = new Series({ data: [] });
+ expect(nuniqueSeries(s)).toBe(0);
+ });
+
+ it("counts 1 for all-same series", () => {
+ const s = new Series({ data: [3, 3, 3] });
+ expect(nuniqueSeries(s)).toBe(1);
+ });
+
+ it("treats NaN as missing by default", () => {
+ const s = new Series({ data: [1, Number.NaN, 1] });
+ expect(nuniqueSeries(s)).toBe(1);
+ });
+
+ it("includes NaN when dropna=false", () => {
+ const s = new Series({ data: [1, Number.NaN, 1] });
+ expect(nuniqueSeries(s, { dropna: false })).toBe(2);
+ });
+});
+
+// ─── nunique DataFrame ────────────────────────────────────────────────────────
+
+describe("nunique — DataFrame axis=0 (default)", () => {
+ it("counts distinct per column", () => {
+ const df = makeDf();
+ const result = nunique(df);
+ expect(result.values[0]).toBe(2); // a: 1,2 (null dropped)
+ expect(result.values[1]).toBe(2); // b: true, false
+ expect(result.values[2]).toBe(3); // c: x,y,z
+ });
+
+ it("result is indexed by column names", () => {
+ const df = makeDf();
+ const result = nunique(df);
+ expect([...result.index.values]).toEqual(["a", "b", "c"]);
+ });
+
+ it("includes null when dropna=false", () => {
+ const df = makeDf();
+ const result = nunique(df, { dropna: false });
+ expect(result.values[0]).toBe(3); // a: 1,2,null
+ });
+});
+
+describe("nunique — DataFrame axis=1", () => {
+ it("counts distinct per row", () => {
+ const df = DataFrame.fromColumns({ a: [1, 1], b: [1, 2] });
+ const result = nunique(df, { axis: 1 });
+ expect(result.values[0]).toBe(1); // row 0: 1,1 → 1 unique
+ expect(result.values[1]).toBe(2); // row 1: 1,2 → 2 unique
+ });
+
+ it("result is indexed by row index", () => {
+ const df = makeDf();
+ const result = nunique(df, { axis: "columns" });
+ expect(result.index.size).toBe(df.index.size);
+ });
+});
+
+// ─── anySeries ────────────────────────────────────────────────────────────────
+
+describe("anySeries", () => {
+ it("returns true when any element is truthy", () => {
+ const s = new Series({ data: [0, 0, 1] });
+ expect(anySeries(s)).toBe(true);
+ });
+
+ it("returns false when all elements are falsy", () => {
+ const s = new Series({ data: [0, 0, 0] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("returns false for empty series", () => {
+ const s = new Series({ data: [] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("skips null by default (skipna=true)", () => {
+ const s = new Series({ data: [null, null] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("treats null as falsy when skipna=false", () => {
+ const s = new Series({ data: [null] });
+ expect(anySeries(s, { skipna: false })).toBe(false);
+ });
+
+ it("returns true for boolean true", () => {
+ const s = new Series({ data: [false, true] });
+ expect(anySeries(s)).toBe(true);
+ });
+});
+
+// ─── anyDataFrame ─────────────────────────────────────────────────────────────
+
+describe("anyDataFrame — axis=0 (default)", () => {
+ it("returns true for columns with any truthy value", () => {
+ const df = DataFrame.fromColumns({ a: [0, 0, 1], b: [0, 0, 0] });
+ const result = anyDataFrame(df);
+ expect(result.values[0]).toBe(true);
+ expect(result.values[1]).toBe(false);
+ });
+
+ it("result indexed by column names", () => {
+ const df = makeDf();
+ const result = anyDataFrame(df);
+ expect([...result.index.values]).toEqual(["a", "b", "c"]);
+ });
+
+ it("boolOnly skips non-boolean columns", () => {
+ const df = makeDf();
+ const result = anyDataFrame(df, { boolOnly: true });
+ expect([...result.index.values]).toEqual(["b"]);
+ });
+});
+
+describe("anyDataFrame — axis=1", () => {
+ it("returns true for rows with any truthy value", () => {
+ const df = DataFrame.fromColumns({ a: [0, 1], b: [0, 0] });
+ const result = anyDataFrame(df, { axis: 1 });
+ expect(result.values[0]).toBe(false);
+ expect(result.values[1]).toBe(true);
+ });
+});
+
+// ─── allSeries ────────────────────────────────────────────────────────────────
+
+describe("allSeries", () => {
+ it("returns true when all elements are truthy", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(allSeries(s)).toBe(true);
+ });
+
+ it("returns false when any element is falsy", () => {
+ const s = new Series({ data: [1, 0, 3] });
+ expect(allSeries(s)).toBe(false);
+ });
+
+ it("returns true for empty series", () => {
+ const s = new Series({ data: [] });
+ expect(allSeries(s)).toBe(true);
+ });
+
+ it("skips null by default", () => {
+ const s = new Series({ data: [1, null, 2] });
+ expect(allSeries(s)).toBe(true);
+ });
+
+ it("treats null as falsy when skipna=false", () => {
+ const s = new Series({ data: [1, null, 2] });
+ expect(allSeries(s, { skipna: false })).toBe(false);
+ });
+});
+
+// ─── allDataFrame ─────────────────────────────────────────────────────────────
+
+describe("allDataFrame — axis=0 (default)", () => {
+ it("returns per-column all", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [0, 1, 1] });
+ const result = allDataFrame(df);
+ expect(result.values[0]).toBe(true);
+ expect(result.values[1]).toBe(false);
+ });
+
+ it("result indexed by column names", () => {
+ const df = makeDf();
+ const result = allDataFrame(df);
+ expect([...result.index.values]).toEqual(["a", "b", "c"]);
+ });
+
+ it("boolOnly skips non-boolean columns", () => {
+ const df = makeDf();
+ const result = allDataFrame(df, { boolOnly: true });
+ expect([...result.index.values]).toEqual(["b"]);
+ });
+});
+
+describe("allDataFrame — axis=1", () => {
+ it("returns per-row all", () => {
+ const df = DataFrame.fromColumns({ a: [1, 0], b: [1, 1] });
+ const result = allDataFrame(df, { axis: 1 });
+ expect(result.values[0]).toBe(true);
+ expect(result.values[1]).toBe(false);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("nuniqueSeries — property tests", () => {
+ it("result is always between 0 and length", () => {
+ fc.assert(
+ fc.property(fc.array(fc.oneof(fc.integer(), fc.constant(null))), (arr) => {
+ const s = new Series({ data: arr as Scalar[] });
+ const u = nuniqueSeries(s);
+ return u >= 0 && u <= arr.length;
+ }),
+ );
+ });
+
+ it("nunique with dropna=false >= dropna=true", () => {
+ fc.assert(
+ fc.property(fc.array(fc.oneof(fc.integer(), fc.constant(null))), (arr) => {
+ const s = new Series({ data: arr as Scalar[] });
+ return nuniqueSeries(s, { dropna: false }) >= nuniqueSeries(s, { dropna: true });
+ }),
+ );
+ });
+});
+
+describe("anySeries — property tests", () => {
+ it("any(s) === !all(!s) for boolean arrays", () => {
+ fc.assert(
+ fc.property(fc.array(fc.boolean()), (arr) => {
+ const s = new Series({ data: arr as Scalar[] });
+ const negated = new Series({ data: arr.map((v) => !v) as Scalar[] });
+ return anySeries(s) === !allSeries(negated);
+ }),
+ );
+ });
+});
+
+describe("allSeries — property tests", () => {
+ it("all true array → allSeries returns true", () => {
+ fc.assert(
+ fc.property(fc.array(fc.constant(true)), (arr) => {
+ const s = new Series({ data: arr as Scalar[] });
+ return allSeries(s) === true;
+ }),
+ );
+ });
+
+ it("array with any false → allSeries returns false", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.boolean()).filter((arr) => arr.includes(false)),
+ (arr) => {
+ const s = new Series({ data: arr as Scalar[] });
+ return allSeries(s) === false;
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/shift_diff.test.ts b/tests/stats/shift_diff.test.ts
index 43b02541..4d498152 100644
--- a/tests/stats/shift_diff.test.ts
+++ b/tests/stats/shift_diff.test.ts
@@ -48,55 +48,55 @@ describe("shiftSeries", () => {
});
it("periods=-1 shifts values up by one", () => {
- const result = shiftSeries(s([1, 2, 3, 4]), -1);
+ const result = shiftSeries(s([1, 2, 3, 4]), { periods: -1 });
expect(arrEq(result.values, [2, 3, 4, null])).toBe(true);
});
it("periods=0 returns identical values", () => {
const data = [1, 2, 3, 4] as Scalar[];
- const result = shiftSeries(s(data), 0);
+ const result = shiftSeries(s(data), { periods: 0 });
expect(arrEq(result.values, data)).toBe(true);
});
it("periods larger than length → all null", () => {
- const result = shiftSeries(s([1, 2, 3]), 5);
+ const result = shiftSeries(s([1, 2, 3]), { periods: 5 });
expect(arrEq(result.values, [null, null, null])).toBe(true);
});
it("negative periods larger than length → all null", () => {
- const result = shiftSeries(s([1, 2, 3]), -5);
+ const result = shiftSeries(s([1, 2, 3]), { periods: -5 });
expect(arrEq(result.values, [null, null, null])).toBe(true);
});
it("preserves null/NaN values in the shifted region", () => {
- const result = shiftSeries(s([1, null, 3]), 1);
+ const result = shiftSeries(s([1, null, 3]), { periods: 1 });
expect(arrEq(result.values, [null, 1, null])).toBe(true);
});
it("works on string values", () => {
- const result = shiftSeries(s(["a", "b", "c"]), 1);
+ const result = shiftSeries(s(["a", "b", "c"]), { periods: 1 });
expect(arrEq(result.values, [null, "a", "b"])).toBe(true);
});
it("preserves index and name", () => {
const orig = s([10, 20, 30]);
- const result = shiftSeries(orig, 1);
+ const result = shiftSeries(orig, { periods: 1 });
expect(result.index.size).toBe(orig.index.size);
expect(result.length).toBe(orig.length);
});
it("empty series returns empty series", () => {
- const result = shiftSeries(s([]), 2);
+ const result = shiftSeries(s([]), { periods: 2 });
expect(result.length).toBe(0);
});
it("periods=2 shifts down by two positions", () => {
- const result = shiftSeries(s([10, 20, 30, 40]), 2);
+ const result = shiftSeries(s([10, 20, 30, 40]), { periods: 2 });
expect(arrEq(result.values, [null, null, 10, 20])).toBe(true);
});
it("periods=-2 shifts up by two positions", () => {
- const result = shiftSeries(s([10, 20, 30, 40]), -2);
+ const result = shiftSeries(s([10, 20, 30, 40]), { periods: -2 });
expect(arrEq(result.values, [30, 40, null, null])).toBe(true);
});
});
@@ -106,37 +106,34 @@ describe("shiftSeries", () => {
describe("diffSeries", () => {
it("periods=1 computes first differences", () => {
const result = diffSeries(s([1, 3, 6, 10]));
- expect(arrEq(result.values, [Number.NaN, 2, 3, 4])).toBe(true);
+ expect(arrEq(result.values, [null, 2, 3, 4])).toBe(true);
});
it("periods=2 computes lag-2 differences", () => {
- const result = diffSeries(s([1, 3, 6, 10]), 2);
- expect(arrEq(result.values, [Number.NaN, Number.NaN, 5, 7])).toBe(true);
+ const result = diffSeries(s([1, 3, 6, 10]), { periods: 2 });
+ expect(arrEq(result.values, [null, null, 5, 7])).toBe(true);
});
it("periods=-1 computes forward differences", () => {
- const result = diffSeries(s([1, 3, 6, 10]), -1);
- expect(arrEq(result.values, [-2, -3, -4, Number.NaN])).toBe(true);
+ const result = diffSeries(s([1, 3, 6, 10]), { periods: -1 });
+ expect(arrEq(result.values, [-2, -3, -4, null])).toBe(true);
});
- it("periods=0 yields all NaN (value minus itself is not useful)", () => {
- // diff with 0 lag: no valid pair since periods=0 means x[i] - x[i-0] = 0
- // pandas returns all zeros for periods=0; our impl does the same for numeric
- const result = diffSeries(s([1, 2, 3]), 0);
- // x[i] - x[i] = 0 for all i (all positions have a "previous" value with lag 0)
+ it("periods=0 yields all zeros (value minus itself)", () => {
+ // diff with 0 lag: x[i] - x[i] = 0 for all i
+ const result = diffSeries(s([1, 2, 3]), { periods: 0 });
expect(arrEq(result.values, [0, 0, 0])).toBe(true);
});
- it("null inputs produce NaN at missing positions", () => {
+ it("null inputs produce null at missing positions", () => {
const result = diffSeries(s([1, null, 3, 4]));
- // position 0: NaN (no prev); position 1: NaN (prev=1 but cur=null not finite)
- // position 2: NaN (prev=null not finite); position 3: 1 (4-3)
- expect(arrEq(result.values, [Number.NaN, Number.NaN, Number.NaN, 1])).toBe(true);
+ // position 0: null (no prev); position 1: null (cur=null); position 2: null (prev=null); position 3: 1 (4-3)
+ expect(arrEq(result.values, [null, null, null, 1])).toBe(true);
});
- it("NaN inputs produce NaN at those positions", () => {
+ it("NaN inputs produce null at those positions", () => {
const result = diffSeries(s([1, Number.NaN, 3]));
- expect(arrEq(result.values, [Number.NaN, Number.NaN, Number.NaN])).toBe(true);
+ expect(arrEq(result.values, [null, null, null])).toBe(true);
});
it("preserves index", () => {
@@ -146,12 +143,12 @@ describe("diffSeries", () => {
});
it("empty series returns empty", () => {
- expect(diffSeries(s([]), 1).length).toBe(0);
+ expect(diffSeries(s([]), { periods: 1 }).length).toBe(0);
});
- it("single element returns [NaN]", () => {
+ it("single element returns [null]", () => {
const result = diffSeries(s([42]));
- expect(Number.isNaN(result.values[0] as number)).toBe(true);
+ expect(result.values[0]).toBe(null);
});
});
@@ -164,7 +161,7 @@ describe("shiftSeries — property tests", () => {
fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 20 }),
fc.integer({ min: -10, max: 10 }),
(data, periods) => {
- const result = shiftSeries(new Series({ data }), periods);
+ const result = shiftSeries(new Series({ data }), { periods });
return result.length === data.length;
},
),
@@ -181,8 +178,8 @@ describe("shiftSeries — property tests", () => {
if (n >= data.length) {
return true;
}
- const shifted = shiftSeries(new Series({ data }), n);
- const back = shiftSeries(shifted, -n);
+ const shifted = shiftSeries(new Series({ data }), { periods: n });
+ const back = shiftSeries(shifted, { periods: -n });
// positions [0, length - n) should match the original
for (let i = 0; i < data.length - n; i++) {
if (back.values[i] !== data[i]) {
@@ -201,7 +198,7 @@ describe("shiftSeries — property tests", () => {
fc.array(fc.integer(), { minLength: 1, maxLength: 20 }),
fc.integer({ min: 1, max: 10 }),
(data: number[], n: number): boolean => {
- const result = shiftSeries(new Series({ data }), n);
+ const result = shiftSeries(new Series({ data }), { periods: n });
const cap = Math.min(n, data.length);
for (let i = 0; i < cap; i++) {
if (result.values[i] !== null) {
@@ -222,7 +219,7 @@ describe("diffSeries — property tests", () => {
fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 20 }),
fc.integer({ min: 1, max: 5 }),
(data, periods) => {
- const result = diffSeries(new Series({ data }), periods);
+ const result = diffSeries(new Series({ data }), { periods });
return result.length === data.length;
},
),
@@ -235,8 +232,8 @@ describe("diffSeries — property tests", () => {
fc.array(fc.integer({ min: -1000, max: 1000 }), { minLength: 2, maxLength: 20 }),
(data: number[]): boolean => {
const result = diffSeries(new Series({ data }));
- // position 0 must be NaN
- if (!Number.isNaN(result.values[0] as number)) {
+ // position 0 must be null
+ if (result.values[0] !== null) {
return false;
}
// remaining positions: result[i] = data[i] - data[i-1]
diff --git a/tests/stats/to_timedelta.test.ts b/tests/stats/to_timedelta.test.ts
new file mode 100644
index 00000000..87d63d98
--- /dev/null
+++ b/tests/stats/to_timedelta.test.ts
@@ -0,0 +1,412 @@
+/**
+ * Tests for stats/to_timedelta — convert scalars, arrays, and Series to Timedelta.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series, Timedelta, formatTimedelta, parseFrac, toTimedelta } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function series(data: Scalar[]): Series {
+ return new Series({ data });
+}
+
+// ─── Timedelta class ──────────────────────────────────────────────────────────
+
+describe("Timedelta", () => {
+ it("stores totalMs", () => {
+ expect(new Timedelta(5000).totalMs).toBe(5000);
+ });
+
+ it("days accessor", () => {
+ expect(new Timedelta(2 * 86_400_000 + 3 * 3_600_000).days).toBe(2);
+ });
+
+ it("hours accessor", () => {
+ expect(new Timedelta(2 * 86_400_000 + 3 * 3_600_000).hours).toBe(3);
+ });
+
+ it("minutes accessor", () => {
+ expect(new Timedelta(90 * 60_000).minutes).toBe(30);
+ });
+
+ it("seconds accessor", () => {
+ expect(new Timedelta(65_000).seconds).toBe(5);
+ });
+
+ it("ms accessor", () => {
+ expect(new Timedelta(1_500).ms).toBe(500);
+ });
+
+ it("abs()", () => {
+ expect(new Timedelta(-5000).abs().totalMs).toBe(5000);
+ });
+
+ it("add()", () => {
+ expect(new Timedelta(1000).add(new Timedelta(2000)).totalMs).toBe(3000);
+ });
+
+ it("subtract()", () => {
+ expect(new Timedelta(5000).subtract(new Timedelta(2000)).totalMs).toBe(3000);
+ });
+
+ it("scale()", () => {
+ expect(new Timedelta(1000).scale(3).totalMs).toBe(3000);
+ });
+
+ it("lt()", () => {
+ expect(new Timedelta(1000).lt(new Timedelta(2000))).toBe(true);
+ expect(new Timedelta(2000).lt(new Timedelta(1000))).toBe(false);
+ });
+
+ it("gt()", () => {
+ expect(new Timedelta(2000).gt(new Timedelta(1000))).toBe(true);
+ });
+
+ it("eq()", () => {
+ expect(new Timedelta(1000).eq(new Timedelta(1000))).toBe(true);
+ expect(new Timedelta(1000).eq(new Timedelta(999))).toBe(false);
+ });
+
+ it("sign positive", () => {
+ expect(new Timedelta(100).sign).toBe(1);
+ });
+
+ it("sign negative", () => {
+ expect(new Timedelta(-100).sign).toBe(-1);
+ });
+
+ it("sign zero", () => {
+ expect(new Timedelta(0).sign).toBe(1);
+ });
+});
+
+// ─── null / missing ───────────────────────────────────────────────────────────
+
+describe("toTimedelta — missing values", () => {
+ it("returns null for null", () => {
+ expect(toTimedelta(null)).toBeNull();
+ });
+
+ it("returns null for undefined", () => {
+ expect(toTimedelta(undefined)).toBeNull();
+ });
+
+ it("returns null for NaN", () => {
+ expect(toTimedelta(Number.NaN)).toBeNull();
+ });
+});
+
+// ─── Timedelta passthrough ────────────────────────────────────────────────────
+
+describe("toTimedelta — Timedelta passthrough", () => {
+ it("returns same Timedelta unchanged", () => {
+ const td = new Timedelta(12345);
+ expect(toTimedelta(td as unknown as Scalar)?.totalMs).toBe(12345);
+ });
+});
+
+// ─── numeric inputs ───────────────────────────────────────────────────────────
+
+describe("toTimedelta — numeric", () => {
+ it("default unit ns", () => {
+ expect(toTimedelta(1_000_000)?.totalMs).toBe(1); // 1e6 ns = 1 ms
+ });
+
+ it("unit ms", () => {
+ expect(toTimedelta(1000, { unit: "ms" })?.totalMs).toBe(1000);
+ });
+
+ it("unit s", () => {
+ expect(toTimedelta(2, { unit: "s" })?.totalMs).toBe(2000);
+ });
+
+ it("unit m", () => {
+ expect(toTimedelta(1, { unit: "m" })?.totalMs).toBe(60_000);
+ });
+
+ it("unit h", () => {
+ expect(toTimedelta(1, { unit: "h" })?.totalMs).toBe(3_600_000);
+ });
+
+ it("unit D", () => {
+ expect(toTimedelta(1, { unit: "D" })?.totalMs).toBe(86_400_000);
+ });
+
+ it("unit W", () => {
+ expect(toTimedelta(1, { unit: "W" })?.totalMs).toBe(7 * 86_400_000);
+ });
+
+ it("unit us", () => {
+ expect(toTimedelta(1000, { unit: "us" })?.totalMs).toBe(1);
+ });
+
+ it("zero", () => {
+ expect(toTimedelta(0, { unit: "ms" })?.totalMs).toBe(0);
+ });
+});
+
+// ─── string — pandas format ───────────────────────────────────────────────────
+
+describe("toTimedelta — pandas-style strings", () => {
+ it("parses '0 days 00:00:01'", () => {
+ const td = toTimedelta("0 days 00:00:01") as Timedelta;
+ expect(td.totalMs).toBe(1000);
+ });
+
+ it("parses '1 days 02:03:04'", () => {
+ const td = toTimedelta("1 days 02:03:04") as Timedelta;
+ expect(td.totalMs).toBe(86_400_000 + 2 * 3_600_000 + 3 * 60_000 + 4_000);
+ });
+
+ it("parses '2 days 00:00:00.500000'", () => {
+ const td = toTimedelta("2 days 00:00:00.500000") as Timedelta;
+ expect(td.totalMs).toBe(2 * 86_400_000 + 500);
+ });
+
+ it("parses no-day clock '01:30:00'", () => {
+ const td = toTimedelta("01:30:00") as Timedelta;
+ expect(td.totalMs).toBe(90 * 60_000);
+ });
+
+ it("parses singular 'day'", () => {
+ const td = toTimedelta("1 day 00:00:00") as Timedelta;
+ expect(td.totalMs).toBe(86_400_000);
+ });
+
+ it("parses fractional '0:00:00.001'", () => {
+ const td = toTimedelta("0:00:00.001") as Timedelta;
+ expect(td.totalMs).toBe(1);
+ });
+});
+
+// ─── string — ISO 8601 ────────────────────────────────────────────────────────
+
+describe("toTimedelta — ISO 8601", () => {
+ it("parses 'P1D'", () => {
+ const td = toTimedelta("P1D") as Timedelta;
+ expect(td.totalMs).toBe(86_400_000);
+ });
+
+ it("parses 'PT1H'", () => {
+ const td = toTimedelta("PT1H") as Timedelta;
+ expect(td.totalMs).toBe(3_600_000);
+ });
+
+ it("parses 'PT30M'", () => {
+ const td = toTimedelta("PT30M") as Timedelta;
+ expect(td.totalMs).toBe(30 * 60_000);
+ });
+
+ it("parses 'PT10S'", () => {
+ const td = toTimedelta("PT10S") as Timedelta;
+ expect(td.totalMs).toBe(10_000);
+ });
+
+ it("parses 'P1DT2H3M4S'", () => {
+ const td = toTimedelta("P1DT2H3M4S") as Timedelta;
+ expect(td.totalMs).toBe(86_400_000 + 2 * 3_600_000 + 3 * 60_000 + 4_000);
+ });
+});
+
+// ─── string — human-readable ──────────────────────────────────────────────────
+
+describe("toTimedelta — human-readable", () => {
+ it("parses '1h'", () => {
+ expect((toTimedelta("1h") as Timedelta).totalMs).toBe(3_600_000);
+ });
+
+ it("parses '30m'", () => {
+ expect((toTimedelta("30m") as Timedelta).totalMs).toBe(30 * 60_000);
+ });
+
+ it("parses '1h 30m'", () => {
+ expect((toTimedelta("1h 30m") as Timedelta).totalMs).toBe(90 * 60_000);
+ });
+
+ it("parses '500ms'", () => {
+ expect((toTimedelta("500ms") as Timedelta).totalMs).toBe(500);
+ });
+
+ it("parses '1 day'", () => {
+ expect((toTimedelta("1 day") as Timedelta).totalMs).toBe(86_400_000);
+ });
+
+ it("parses '2 weeks'", () => {
+ expect((toTimedelta("2 weeks") as Timedelta).totalMs).toBe(14 * 86_400_000);
+ });
+
+ it("parses '1h 30m 20s 500ms'", () => {
+ const expected = 3_600_000 + 30 * 60_000 + 20_000 + 500;
+ expect((toTimedelta("1h 30m 20s 500ms") as Timedelta).totalMs).toBe(expected);
+ });
+});
+
+// ─── string — integer string ──────────────────────────────────────────────────
+
+describe("toTimedelta — integer string", () => {
+ it("parses '1000' as ns by default", () => {
+ const td = toTimedelta("1000") as Timedelta;
+ expect(td.totalMs).toBeCloseTo(0.001, 5);
+ });
+
+ it("parses '1000' with unit ms", () => {
+ const td = toTimedelta("1000", { unit: "ms" }) as Timedelta;
+ expect(td.totalMs).toBe(1000);
+ });
+});
+
+// ─── errors handling ──────────────────────────────────────────────────────────
+
+describe("toTimedelta — errors", () => {
+ it("raises by default for bad string", () => {
+ expect(() => toTimedelta("not-a-duration")).toThrow(TypeError);
+ });
+
+ it("coerces to null", () => {
+ expect(toTimedelta("not-a-duration", { errors: "coerce" })).toBeNull();
+ });
+
+ it("ignores and returns original", () => {
+ const result = toTimedelta("not-a-duration", { errors: "ignore" });
+ expect(result).toBe("not-a-duration" as unknown as Timedelta);
+ });
+});
+
+// ─── array overload ───────────────────────────────────────────────────────────
+
+describe("toTimedelta — array", () => {
+ it("converts array of strings", () => {
+ const arr = toTimedelta(["1h", "30m", null] as Scalar[], { unit: "ms" });
+ expect(arr[0]?.totalMs).toBe(3_600_000);
+ expect(arr[1]?.totalMs).toBe(30 * 60_000);
+ expect(arr[2]).toBeNull();
+ });
+
+ it("converts array of numbers", () => {
+ const arr = toTimedelta([1000, 2000] as Scalar[], { unit: "ms" });
+ expect(arr[0]?.totalMs).toBe(1000);
+ expect(arr[1]?.totalMs).toBe(2000);
+ });
+});
+
+// ─── Series overload ──────────────────────────────────────────────────────────
+
+describe("toTimedelta — Series", () => {
+ it("converts Series to Series", () => {
+ const s = series(["1h", "30m", null]);
+ const result = toTimedelta(s);
+ expect(result instanceof Series).toBe(true);
+ expect((result.values[0] as Timedelta | null)?.totalMs).toBe(3_600_000);
+ expect((result.values[1] as Timedelta | null)?.totalMs).toBe(30 * 60_000);
+ expect(result.values[2]).toBeNull();
+ });
+
+ it("preserves Series name", () => {
+ const s = new Series({ data: ["1h"], name: "dur" });
+ const result = toTimedelta(s);
+ expect(result.name).toBe("dur");
+ });
+});
+
+// ─── parseFrac ────────────────────────────────────────────────────────────────
+
+describe("parseFrac", () => {
+ it("parses '5' → 500 ms", () => {
+ expect(parseFrac("5")).toBe(500);
+ });
+
+ it("parses '500000' → 500 ms", () => {
+ expect(parseFrac("500000")).toBe(500);
+ });
+
+ it("parses '001' → 1 ms", () => {
+ expect(parseFrac("001")).toBe(1);
+ });
+
+ it("parses '0' → 0 ms", () => {
+ expect(parseFrac("0")).toBe(0);
+ });
+});
+
+// ─── formatTimedelta ──────────────────────────────────────────────────────────
+
+describe("formatTimedelta", () => {
+ it("formats zero", () => {
+ expect(formatTimedelta(new Timedelta(0))).toBe("0 days 00:00:00");
+ });
+
+ it("formats 1 day", () => {
+ expect(formatTimedelta(new Timedelta(86_400_000))).toBe("1 day 00:00:00");
+ });
+
+ it("formats 2 days", () => {
+ expect(formatTimedelta(new Timedelta(2 * 86_400_000))).toBe("2 days 00:00:00");
+ });
+
+ it("formats hours/minutes/seconds", () => {
+ const td = new Timedelta(3_600_000 + 30 * 60_000 + 5_000);
+ expect(formatTimedelta(td)).toBe("0 days 01:30:05");
+ });
+
+ it("formats fractional seconds", () => {
+ const td = new Timedelta(500);
+ expect(formatTimedelta(td)).toBe("0 days 00:00:00.500000");
+ });
+
+ it("negative: calls toString()", () => {
+ const td = new Timedelta(-86_400_000);
+ const s = td.toString();
+ expect(s).toContain("days");
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("toTimedelta — property tests", () => {
+ it("numeric round-trip: toTimedelta(n, unit=ms).totalMs === n", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: 0, max: 1_000_000 }), (n) => {
+ const td = toTimedelta(n, { unit: "ms" }) as Timedelta;
+ return td.totalMs === n;
+ }),
+ );
+ });
+
+ it("array length preserved", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 1_000_000 }), { minLength: 0, maxLength: 20 }),
+ (arr) => {
+ const scalars = arr as unknown as Scalar[];
+ const result = toTimedelta(scalars, { unit: "ms" });
+ return result.length === arr.length;
+ },
+ ),
+ );
+ });
+
+ it("Timedelta.add is commutative", () => {
+ fc.assert(
+ fc.property(
+ fc.integer({ min: -1_000_000, max: 1_000_000 }),
+ fc.integer({ min: -1_000_000, max: 1_000_000 }),
+ (a, b) => {
+ const ta = new Timedelta(a);
+ const tb = new Timedelta(b);
+ return ta.add(tb).totalMs === tb.add(ta).totalMs;
+ },
+ ),
+ );
+ });
+
+ it("Timedelta.abs is always non-negative", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: -1_000_000, max: 1_000_000 }), (n) => {
+ return new Timedelta(n).abs().totalMs >= 0;
+ }),
+ );
+ });
+});