From b2e093364eea2a1d33e012a3058b5eb3d15411b0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 12 Apr 2026 12:04:27 +0000
Subject: [PATCH 1/8] Initial plan
From 74414650bd931081b9896b490bec62f2fc462e18 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 12 Apr 2026 12:32:47 +0000
Subject: [PATCH 2/8] Merge unique features from PR #120 and PR #81 (iter136)
- Added 13 unique feature modules from PR #120: astype, read_excel,
clip_advanced, idxmin_idxmax, mode, nancumops, nunique, pct_change,
quantile, replace, sem_var, skew_kurt, to_datetime
- Added 11 unique feature modules from PR #81 (iter136): api_types,
attrs, insert_pop, to_from_dict, categorical_ops, format_ops,
numeric_extended, string_ops, string_ops_extended, window_extended,
rolling_apply
- Fixed TypeScript errors in new modules (mode, pct_change, to_datetime,
to_from_dict, string_ops_extended, window_extended tests)
- Fixed test expectations in rolling_apply (min window, count function)
- Updated barrel exports in core/index.ts, stats/index.ts, io/index.ts,
window/index.ts, and index.ts
- All 24 new feature modules pass their tests (598 + 399 = 997 new tests)
Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com>
---
playground/api_types.html | 222 ++++++++
playground/astype.html | 438 ++++++++++++++++
playground/attrs.html | 183 +++++++
playground/categorical_ops.html | 338 +++++++++++++
playground/clip_advanced.html | 163 ++++++
playground/excel.html | 561 +++++++++++++++++++++
playground/format_ops.html | 262 ++++++++++
playground/idxmin_idxmax.html | 439 ++++++++++++++++
playground/index.html | 157 +++++-
playground/insert_pop.html | 172 +++++++
playground/mode.html | 125 +++++
playground/nancumops.html | 295 +++++++++++
playground/numeric_extended.html | 353 +++++++++++++
playground/nunique.html | 112 ++++
playground/pct_change.html | 448 ++++++++++++++++
playground/quantile.html | 182 +++++++
playground/replace.html | 408 +++++++++++++++
playground/rolling_apply.html | 225 +++++++++
playground/sem_var.html | 90 ++++
playground/skew_kurt.html | 137 +++++
playground/string_ops.html | 282 +++++++++++
playground/string_ops_extended.html | 413 +++++++++++++++
playground/to_datetime.html | 118 +++++
playground/to_from_dict.html | 122 +++++
playground/window_extended.html | 304 +++++++++++
src/core/api_types.ts | 629 +++++++++++++++++++++++
src/core/astype.ts | 245 +++++++++
src/core/attrs.ts | 291 +++++++++++
src/core/index.ts | 9 +
src/core/insert_pop.ts | 214 ++++++++
src/core/to_from_dict.ts | 284 +++++++++++
src/index.ts | 172 ++++++-
src/io/index.ts | 2 +
src/io/read_excel.ts | 645 ++++++++++++++++++++++++
src/stats/categorical_ops.ts | 483 ++++++++++++++++++
src/stats/clip_advanced.ts | 290 +++++++++++
src/stats/format_ops.ts | 442 ++++++++++++++++
src/stats/idxmin_idxmax.ts | 234 +++++++++
src/stats/index.ts | 167 ++++++
src/stats/mode.ts | 305 +++++++++++
src/stats/nancumops.ts | 272 ++++++++++
src/stats/numeric_extended.ts | 586 +++++++++++++++++++++
src/stats/nunique.ts | 291 +++++++++++
src/stats/pct_change.ts | 231 +++++++++
src/stats/quantile.ts | 361 +++++++++++++
src/stats/replace.ts | 237 +++++++++
src/stats/sem_var.ts | 283 +++++++++++
src/stats/skew_kurt.ts | 324 ++++++++++++
src/stats/string_ops.ts | 468 +++++++++++++++++
src/stats/string_ops_extended.ts | 429 ++++++++++++++++
src/stats/to_datetime.ts | 312 ++++++++++++
src/stats/window_extended.ts | 321 ++++++++++++
src/window/index.ts | 2 +
src/window/rolling_apply.ts | 323 ++++++++++++
tests/core/api_types.test.ts | 621 +++++++++++++++++++++++
tests/core/astype.test.ts | 292 +++++++++++
tests/core/attrs.test.ts | 542 ++++++++++++++++++++
tests/core/insert_pop.test.ts | 286 +++++++++++
tests/core/to_from_dict.test.ts | 278 ++++++++++
tests/io/read_excel.test.ts | 494 ++++++++++++++++++
tests/stats/categorical_ops.test.ts | 476 +++++++++++++++++
tests/stats/clip_advanced.test.ts | 215 ++++++++
tests/stats/format_ops.test.ts | 568 +++++++++++++++++++++
tests/stats/idxmin_idxmax.test.ts | 270 ++++++++++
tests/stats/mode.test.ts | 199 ++++++++
tests/stats/nancumops.test.ts | 264 ++++++++++
tests/stats/numeric_extended.test.ts | 509 +++++++++++++++++++
tests/stats/nunique.test.ts | 238 +++++++++
tests/stats/pct_change.test.ts | 246 +++++++++
tests/stats/quantile.test.ts | 364 +++++++++++++
tests/stats/replace.test.ts | 246 +++++++++
tests/stats/sem_var.test.ts | 212 ++++++++
tests/stats/skew_kurt.test.ts | 232 +++++++++
tests/stats/string_ops.test.ts | 459 +++++++++++++++++
tests/stats/string_ops_extended.test.ts | 437 ++++++++++++++++
tests/stats/to_datetime.test.ts | 304 +++++++++++
tests/stats/window_extended.test.ts | 365 ++++++++++++++
tests/window/rolling_apply.test.ts | 354 +++++++++++++
78 files changed, 23862 insertions(+), 10 deletions(-)
create mode 100644 playground/api_types.html
create mode 100644 playground/astype.html
create mode 100644 playground/attrs.html
create mode 100644 playground/categorical_ops.html
create mode 100644 playground/clip_advanced.html
create mode 100644 playground/excel.html
create mode 100644 playground/format_ops.html
create mode 100644 playground/idxmin_idxmax.html
create mode 100644 playground/insert_pop.html
create mode 100644 playground/mode.html
create mode 100644 playground/nancumops.html
create mode 100644 playground/numeric_extended.html
create mode 100644 playground/nunique.html
create mode 100644 playground/pct_change.html
create mode 100644 playground/quantile.html
create mode 100644 playground/replace.html
create mode 100644 playground/rolling_apply.html
create mode 100644 playground/sem_var.html
create mode 100644 playground/skew_kurt.html
create mode 100644 playground/string_ops.html
create mode 100644 playground/string_ops_extended.html
create mode 100644 playground/to_datetime.html
create mode 100644 playground/to_from_dict.html
create mode 100644 playground/window_extended.html
create mode 100644 src/core/api_types.ts
create mode 100644 src/core/astype.ts
create mode 100644 src/core/attrs.ts
create mode 100644 src/core/insert_pop.ts
create mode 100644 src/core/to_from_dict.ts
create mode 100644 src/io/read_excel.ts
create mode 100644 src/stats/categorical_ops.ts
create mode 100644 src/stats/clip_advanced.ts
create mode 100644 src/stats/format_ops.ts
create mode 100644 src/stats/idxmin_idxmax.ts
create mode 100644 src/stats/mode.ts
create mode 100644 src/stats/nancumops.ts
create mode 100644 src/stats/numeric_extended.ts
create mode 100644 src/stats/nunique.ts
create mode 100644 src/stats/pct_change.ts
create mode 100644 src/stats/quantile.ts
create mode 100644 src/stats/replace.ts
create mode 100644 src/stats/sem_var.ts
create mode 100644 src/stats/skew_kurt.ts
create mode 100644 src/stats/string_ops.ts
create mode 100644 src/stats/string_ops_extended.ts
create mode 100644 src/stats/to_datetime.ts
create mode 100644 src/stats/window_extended.ts
create mode 100644 src/window/rolling_apply.ts
create mode 100644 tests/core/api_types.test.ts
create mode 100644 tests/core/astype.test.ts
create mode 100644 tests/core/attrs.test.ts
create mode 100644 tests/core/insert_pop.test.ts
create mode 100644 tests/core/to_from_dict.test.ts
create mode 100644 tests/io/read_excel.test.ts
create mode 100644 tests/stats/categorical_ops.test.ts
create mode 100644 tests/stats/clip_advanced.test.ts
create mode 100644 tests/stats/format_ops.test.ts
create mode 100644 tests/stats/idxmin_idxmax.test.ts
create mode 100644 tests/stats/mode.test.ts
create mode 100644 tests/stats/nancumops.test.ts
create mode 100644 tests/stats/numeric_extended.test.ts
create mode 100644 tests/stats/nunique.test.ts
create mode 100644 tests/stats/pct_change.test.ts
create mode 100644 tests/stats/quantile.test.ts
create mode 100644 tests/stats/replace.test.ts
create mode 100644 tests/stats/sem_var.test.ts
create mode 100644 tests/stats/skew_kurt.test.ts
create mode 100644 tests/stats/string_ops.test.ts
create mode 100644 tests/stats/string_ops_extended.test.ts
create mode 100644 tests/stats/to_datetime.test.ts
create mode 100644 tests/stats/window_extended.test.ts
create mode 100644 tests/window/rolling_apply.test.ts
diff --git a/playground/api_types.html b/playground/api_types.html
new file mode 100644
index 00000000..05b8347c
--- /dev/null
+++ b/playground/api_types.html
@@ -0,0 +1,222 @@
+
+
+
+
+
+ tsb — api_types: Runtime type-checking predicates
+
+
+
+ 📦 api_types — Runtime type-checking predicates
+
+ Port of pandas.api.types .
+ Two groups of predicates:
+ value-level (work on arbitrary JS values) and
+ dtype-level (work on Dtype instances or dtype name strings).
+
+
+ Value-Level Predicates
+
+ isScalar(val)
+ Returns true for primitives and Date. Mirrors pd.api.types.is_scalar.
+ import { isScalar } from "tsb";
+
+isScalar(42); // true
+isScalar("hello"); // true
+isScalar(null); // true
+isScalar(new Date()); // true
+isScalar([1, 2]); // false
+isScalar({ a: 1 }); // false
+
+
+ isListLike(val)
+ Returns true for iterables (excluding strings) and objects with a numeric length.
+ isListLike([1, 2, 3]); // true
+isListLike(new Set([1])); // true
+isListLike("abc"); // false
+isListLike(42); // false
+
+
+ isArrayLike(val)
+ Returns true for values with a non-negative integer length (including strings).
+ isArrayLike([1, 2]); // true
+isArrayLike("hello"); // true
+isArrayLike(42); // false
+
+ isDictLike(val)
+ Returns true for plain objects and Map.
+ isDictLike({ a: 1 }); // true
+isDictLike(new Map()); // true
+isDictLike([]); // false
+
+ isNumber / isBool / isStringValue / isFloat / isInteger
+ isNumber(3.14); // true
+isNumber(NaN); // true (typeof NaN === "number")
+isBool(true); // true
+isStringValue("hi"); // true
+isFloat(3.14); // true
+isFloat(3.0); // false (integer value)
+isInteger(42); // true
+isInteger(3.14); // false
+
+
+ isMissing(val)
+ Returns true for null, undefined, or NaN.
+ isMissing(null); // true
+isMissing(undefined); // true
+isMissing(NaN); // true
+isMissing(0); // false
+
+ isHashable(val)
+ Returns true for values safe to use as object keys (primitives).
+ isHashable("key"); // true
+isHashable(42); // true
+isHashable({}); // false
+
+ Dtype-Level Predicates
+ All accept a Dtype instance or a dtype name string.
+
+ import { Dtype, isNumericDtype, isFloatDtype, isIntegerDtype,
+ isStringDtype, isDatetimeDtype, isCategoricalDtype } from "tsb";
+
+isNumericDtype(Dtype.float64); // true
+isNumericDtype("int32"); // true
+isNumericDtype("string"); // false
+
+isFloatDtype("float32"); // true
+isIntegerDtype("int64"); // true
+isUnsignedIntegerDtype("uint8"); // true
+isSignedIntegerDtype("int8"); // true
+isStringDtype("string"); // true
+isDatetimeDtype("datetime"); // true
+isCategoricalDtype("category"); // true
+isObjectDtype("object"); // true
+isExtensionArrayDtype("category"); // true
+isExtensionArrayDtype("int32"); // false
+
+
+ Complete Predicate Reference
+
+ Function Pandas equivalent Description
+ isScalar(val)is_scalarPrimitive or Date
+ isListLike(val)is_list_likeIterable (not string) or has length
+ isArrayLike(val)is_array_likeHas non-negative integer length
+ isDictLike(val)is_dict_likePlain object or Map
+ isIterator(val)is_iteratorHas callable next method
+ isNumber(val)is_numbertypeof === "number"
+ isBool(val)is_booltypeof === "boolean"
+ isStringValue(val)is_stringtypeof === "string"
+ isFloat(val)is_floatFinite number with fractional part
+ isInteger(val)is_integerInteger-valued number
+ isBigInt(val)— typeof === "bigint"
+ isRegExp(val)is_reRegExp instance
+ isReCompilable(val)is_re_compilableString or RegExp
+ isMissing(val)isnanull / undefined / NaN
+ isHashable(val)is_hashableSafe as object key (primitive)
+ isDate(val)— Date instance
+ isNumericDtype(d)is_numeric_dtypeInt, uint, or float
+ isIntegerDtype(d)is_integer_dtypeAny integer (signed or unsigned)
+ isSignedIntegerDtype(d)is_signed_integer_dtypeint8–int64
+ isUnsignedIntegerDtype(d)is_unsigned_integer_dtypeuint8–uint64
+ isFloatDtype(d)is_float_dtypefloat32 or float64
+ isBoolDtype(d)is_bool_dtypebool
+ isStringDtype(d)is_string_dtypestring dtype
+ isDatetimeDtype(d)is_datetime64_dtypedatetime
+ isTimedeltaDtype(d)is_timedelta64_dtypetimedelta
+ isCategoricalDtype(d)is_categorical_dtypecategory
+ isObjectDtype(d)is_object_dtypeobject
+ isComplexDtype(d)is_complex_dtypeAlways false (no complex in tsb)
+ isExtensionArrayDtype(d)is_extension_array_dtypestring/object/datetime/timedelta/category
+ isPeriodDtype(d)is_period_dtypeMaps to datetime
+ isIntervalDtype(d)is_interval_dtypeNumeric dtypes
+
+
+
+
+
diff --git a/playground/astype.html b/playground/astype.html
new file mode 100644
index 00000000..efd9e5ed
--- /dev/null
+++ b/playground/astype.html
@@ -0,0 +1,438 @@
+
+
+
+
+
+ tsb — astype
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← tsb playground
+ astype — dtype coercion
+
+ Cast Series and DataFrame values to a different dtype.
+ Mirrors pandas.Series.astype and pandas.DataFrame.astype.
+
+
+
+
+
1 · Series — float to int64
+
+ Cast floating-point values to integers via truncation (same as
+ pandas.Series.astype("int64")).
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Series — numbers to string
+
Convert every value to its string representation. Null/undefined values
+ become null (not the string "null").
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Overflow clamping for bounded integer dtypes
+
+ Values that overflow the target integer dtype's range are clamped to
+ [min, max] — e.g. uint8 is clamped to
+ [0, 255].
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame — cast all columns
+
Pass a single dtype name to cast every column to the same type.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame — per-column dtype mapping
+
Pass a Record<string, DtypeName> to cast individual
+ columns. Columns not listed are carried over unchanged.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Casting to bool
+
Zero, empty string, and NaN become false;
+ everything else (including non-zero numbers and non-empty strings)
+ becomes true.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Series cast
+astypeSeries(
+ series: Series,
+ dtype: DtypeName | Dtype,
+ options?: AstypeOptions,
+): Series
+
+// DataFrame cast (all columns or per-column mapping)
+astype(
+ df: DataFrame,
+ dtype: DtypeName | Dtype | Record<string, DtypeName | Dtype>,
+ options?: DataFrameAstypeOptions,
+): DataFrame
+
+// Low-level scalar cast
+castScalar(value: Scalar, dtype: Dtype): Scalar
+
+// Options
+interface AstypeOptions {
+ errors?: "raise" | "ignore"; // default "raise"
+}
+
+// Supported dtype names
+type DtypeName =
+ | "int8" | "int16" | "int32" | "int64"
+ | "uint8" | "uint16" | "uint32" | "uint64"
+ | "float32" | "float64"
+ | "bool" | "string" | "object"
+ | "datetime" | "timedelta" | "category"
+
+
+
+
+
+
diff --git a/playground/attrs.html b/playground/attrs.html
new file mode 100644
index 00000000..ae25d5f6
--- /dev/null
+++ b/playground/attrs.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+ tsb — attrs: user-defined metadata
+
+
+
+ ← tsb playground
+
+ attrs — User-Defined Metadata
+
+ Attach arbitrary key→value metadata to any Series or DataFrame
+ — mirrors
+
+ pandas.DataFrame.attrs and
+
+ pandas.Series.attrs .
+
+
+
+ Design note: Because tsb objects are immutable (their data, index,
+ and dtype are frozen), attrs are stored in a WeakMap registry rather than as
+ instance properties. This means attrs are attached & detached without touching the object
+ itself, and garbage-collected automatically when the object is collected.
+
+
+ Basic usage
+
+ import {
+ getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs,
+ clearAttrs, hasAttrs, getAttr, setAttr, deleteAttr,
+ attrsCount, attrsKeys, mergeAttrs,
+} from "tsb";
+import { DataFrame, Series } from "tsb";
+
+// ─── annotate a DataFrame ─────────────────────────────────────────────────
+const df = DataFrame.fromColumns({
+ temperature: [22.1, 23.5, 21.8],
+ humidity: [55, 60, 58],
+});
+
+setAttrs(df, {
+ source: "weather_station_42",
+ unit: "Celsius",
+ notes: "Morning readings",
+});
+
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", notes: "Morning readings" }
+
+getAttr(df, "unit"); // → "Celsius"
+getAttr(df, "missing"); // → undefined
+attrsCount(df); // → 3
+attrsKeys(df); // → ["source", "unit", "notes"]
+hasAttrs(df); // → true
+
+
+ Merging and updating
+
+ // updateAttrs merges new keys, preserves existing
+updateAttrs(df, { version: 2, notes: "Updated notes" });
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", notes: "Updated notes", version: 2 }
+
+// setAttr / deleteAttr for single keys
+setAttr(df, "sensor_id", "WS-042");
+deleteAttr(df, "notes");
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", version: 2, sensor_id: "WS-042" }
+
+
+ Propagating metadata to derived objects
+
+ // copyAttrs: copy all attrs from one object to another
+const s = new Series({ data: [22.1, 23.5, 21.8], name: "temperature" });
+setAttrs(s, { unit: "Celsius", source: "sensor_A" });
+
+const derived = new Series({ data: [71.8, 74.3, 71.2], name: "fahrenheit" });
+copyAttrs(s, derived);
+getAttrs(derived);
+// → { unit: "Celsius", source: "sensor_A" }
+
+// Then update the copy
+setAttr(derived, "unit", "Fahrenheit");
+getAttrs(derived); // → { unit: "Fahrenheit", source: "sensor_A" }
+getAttrs(s); // → { unit: "Celsius", source: "sensor_A" } ← unchanged
+
+
+ Fluent helper — withAttrs
+
+ // withAttrs sets attrs and returns the same object reference
+// Handy for inline annotation
+const annotated = withAttrs(
+ DataFrame.fromColumns({ x: [1, 2, 3] }),
+ { source: "lab_experiment", date: "2026-04-09" },
+);
+
+annotated === annotated; // true — same reference, not a copy
+getAttrs(annotated);
+// → { source: "lab_experiment", date: "2026-04-09" }
+
+
+ Merging from multiple sources
+
+ // mergeAttrs: combine attrs from multiple objects into a target
+const s1 = new Series({ data: [1, 2, 3], name: "a" });
+const s2 = new Series({ data: [4, 5, 6], name: "b" });
+setAttrs(s1, { source: "sensor_A", unit: "kg" });
+setAttrs(s2, { source: "sensor_B", scale: 2.5 });
+
+const combined = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+mergeAttrs([s1, s2], combined);
+// Later sources win on conflicts: source="sensor_B"
+getAttrs(combined);
+// → { source: "sensor_B", unit: "kg", scale: 2.5 }
+
+
+ Clearing metadata
+
+ setAttrs(df, { x: 1, y: 2 });
+hasAttrs(df); // → true
+attrsCount(df); // → 2
+
+clearAttrs(df);
+hasAttrs(df); // → false
+getAttrs(df); // → {}
+
+
+ API reference
+
+
+
+ Function Description
+
+
+ getAttrs(obj) Return a shallow copy of all stored attrs (empty {} if none)
+ setAttrs(obj, attrs) Overwrite attrs completely with the given record
+ updateAttrs(obj, updates) Merge updates into existing attrs (existing keys preserved)
+ withAttrs(obj, attrs) Fluent: set attrs and return the same object
+ copyAttrs(source, target) Copy all attrs from source to target
+ mergeAttrs(sources[], target) Merge attrs from multiple sources; later sources win
+ clearAttrs(obj) Remove all attrs from obj
+ hasAttrs(obj) Return true if any attrs are set
+ getAttr(obj, key) Get a single attr value (undefined if missing)
+ setAttr(obj, key, value) Set a single attr, preserving other keys
+ deleteAttr(obj, key) Delete a single attr key
+ attrsCount(obj) Number of stored attr keys
+ attrsKeys(obj) Array of stored attr key names
+
+
+
+ Comparison with pandas
+
+
+
+ pandas tsb
+
+
+ df.attrsgetAttrs(df)
+ df.attrs = {"k": "v"}setAttrs(df, { k: "v" })
+ df.attrs["k"] = "v"setAttr(df, "k", "v")
+ df.attrs["k"]getAttr(df, "k")
+ del df.attrs["k"]deleteAttr(df, "k")
+ df.attrs.update(d)updateAttrs(df, d)
+ df.attrs.clear()clearAttrs(df)
+
+
+
+
diff --git a/playground/categorical_ops.html b/playground/categorical_ops.html
new file mode 100644
index 00000000..c2d794ca
--- /dev/null
+++ b/playground/categorical_ops.html
@@ -0,0 +1,338 @@
+
+
+
+
+
+ tsb — Categorical Ops
+
+
+
+
+
+
Loading tsb runtime…
+
+
+← back to index
+🏷️ Categorical Ops
+
+ Standalone categorical utility functions that complement the Series.cat accessor.
+ Mirrors pd.Categorical.from_codes, set operations on categories, frequency helpers,
+ and cross-tabulation.
+
+
+
+
+
catFromCodes(codes, categories, opts?)
+
+ Construct a categorical Series from integer codes (0-based) and a categories array.
+ Code -1 maps to null (missing). Mirrors
+ pd.Categorical.from_codes.
+
+
+
▶ Run
+
+
+
+
+
+
Category set operations
+
+ catUnionCategories, catIntersectCategories,
+ catDiffCategories, and catEqualCategories let you
+ combine or compare the category sets of two Series.
+
+
+
▶ Run
+
+
+
+
+
+
catSortByFreq(series, opts?)
+
+ Reorder categories by their frequency in the data (most frequent first by default).
+ Mirrors s.cat.reorder_categories(s.value_counts().index).
+
+
+
▶ Run
+
+
+
+
+
+
catToOrdinal(series, order)
+
+ Create an ordered categorical from a Series using order to define both the
+ category set and their rank. Values not in order become null.
+
+
+
▶ Run
+
+
+
+
+
+
catFreqTable(series)
+
+ Return a plain Record<string, number> of counts per category.
+ Zero-frequency categories are included.
+
+
+
▶ Run
+
+
+
+
+
+
catCrossTab(a, b, opts?)
+
+ Cross-tabulation of two categorical Series. Rows = a's categories,
+ columns = b's categories, cells = co-occurrence counts.
+ Supports margins and normalization.
+
+
+
▶ Run
+
+
+
+
+
+
catRecode(series, mapping)
+
+ Rename categories via an object map or a transform function. Unmapped categories
+ are left unchanged.
+
+
+
▶ Run
+
+
+
+
+
+
+
diff --git a/playground/clip_advanced.html b/playground/clip_advanced.html
new file mode 100644
index 00000000..eb200294
--- /dev/null
+++ b/playground/clip_advanced.html
@@ -0,0 +1,163 @@
+
+
+
+
+
+ tsb — clip_advanced (per-element clipping)
+
+
+
+ tsb — clip_advanced (per-element clipping)
+
+ Clip Series and DataFrame values to per-element bounds.
+ Unlike the simple scalar clip, clipAdvancedSeries and
+ clipAdvancedDataFrame support array, Series, and DataFrame bounds —
+ enabling per-position or element-wise bound specification.
+
+
+ Core concept
+ // Scalar bounds (like pandas s.clip(lower=0, upper=5))
+clipAdvancedSeries(s, { lower: 0, upper: 5 })
+
+// Per-element array bounds
+clipAdvancedSeries(s, { lower: [1, 2, 3], upper: [4, 5, 6] })
+
+// Series bounds (positional alignment)
+clipAdvancedSeries(s, { lower: loSeries, upper: hiSeries })
+
+// DataFrame element-wise bounds
+clipAdvancedDataFrame(df, { lower: loDf, upper: hiDf })
+
+// Series broadcast on DataFrame (axis=0: one bound per column; axis=1: one per row)
+clipAdvancedDataFrame(df, { lower: loSeries, axis: 1 })
+
+
+ pandas equivalent:
+ s.clip(lower=lo_array, upper=hi_array)
+ df.clip(lower=lo_df, upper=hi_df)
+
+
+
+ Demo 1 — clipAdvancedSeries with scalar bounds
+
+
Code
+
const s = new Series({ data: [-3, 1, 5, 10] });
+clipAdvancedSeries(s, { lower: 0, upper: 6 }).values;
+// → [0, 1, 5, 6]
+
Run
+
+
+
+
+ Demo 2 — clipAdvancedSeries with per-element array bounds
+
+
Code
+
const s = new Series({ data: [-1, 0, 5, 12] });
+const lo = [2, -1, 4, 10];
+const hi = [5, 3, 8, 11];
+clipAdvancedSeries(s, { lower: lo, upper: hi }).values;
+// → [2, 0, 5, 11]
+
Run
+
+
+
+
+ Demo 3 — clipAdvancedSeries with Series bounds
+
+
Code
+
const s = new Series({ data: [0, 5, 10, 15] });
+const loBound = new Series({ data: [1, 3, 8, 12] });
+const hiBound = new Series({ data: [2, 7, 9, 20] });
+clipAdvancedSeries(s, { lower: loBound, upper: hiBound }).values;
+// → [1, 5, 9, 15]
+
Run
+
+
+
+
+ Demo 4 — clipAdvancedDataFrame with DataFrame bounds
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const lo = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+const hi = DataFrame.fromColumns({ a: [3, 7, 8], b: [5, 9, 12] });
+const result = clipAdvancedDataFrame(df, { lower: lo, upper: hi });
+result.col("a").values; // → [2, 5, 8]
+result.col("b").values; // → [2, 6, 10]
+
Run
+
+
+
+
+ Demo 5 — clipAdvancedDataFrame with Series broadcast (axis=1)
+
+
Code
+
// axis=1: one lower bound per row
+const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const loPerRow = new Series({ data: [0, 4, 10] });
+const result = clipAdvancedDataFrame(df, { lower: loPerRow, axis: 1 });
+result.col("a").values; // → [1, 5, 10]
+result.col("b").values; // → [2, 6, 10]
+
Run
+
+
+
+
+
+
diff --git a/playground/excel.html b/playground/excel.html
new file mode 100644
index 00000000..563736cf
--- /dev/null
+++ b/playground/excel.html
@@ -0,0 +1,561 @@
+
+
+
+
+
+ tsb — readExcel playground
+
+
+
+
+ 📊 readExcel — XLSX file reading
+
+ tsb can read Excel XLSX files natively — no dependencies. The
+ readExcel() function accepts a Uint8Array or
+ ArrayBuffer and returns a DataFrame.
+
+
+
+ Python equivalent:
+ pd.read_excel("data.xlsx")
+
+
+ Basic usage
+ import { readExcel, xlsxSheetNames } from "tsb";
+
+// Read first sheet (default)
+const df = readExcel(buffer);
+console.log(df.shape); // [rows, cols]
+console.log(df.columns.toArray()); // column names
+
+// List all sheet names
+const sheets = xlsxSheetNames(buffer);
+// → ["Sheet1", "Summary", "Data"]
+
+// Read a specific sheet by name
+const df2 = readExcel(buffer, { sheetName: "Summary" });
+
+// Read a specific sheet by index
+const df3 = readExcel(buffer, { sheetName: 1 });
+
+
+ Options
+
+
+
+ Option
+ Type
+ Default
+ Description
+
+
+
+
+ sheetName
+ string | number
+ 0
+ Sheet to read (name or 0-based index)
+
+
+ header
+ number | null
+ 0
+ Row index of the header, or null for no header
+
+
+ indexCol
+ string | number | null
+ null
+ Column to use as the row index
+
+
+ skipRows
+ number
+ 0
+ Data rows to skip after the header
+
+
+ nrows
+ number
+ unlimited
+ Maximum number of data rows to read
+
+
+ naValues
+ string[]
+ []
+ Additional strings to treat as NA
+
+
+
+
+ Interactive demo
+ Upload an .xlsx file to inspect it, or use the demo data below.
+
+
+
+
or
+
Load demo data
+
+
+
+ Sheet index:
+
+
+
+ No header (header: null)
+
+
+ Skip rows:
+
+ Max rows:
+
+ Parse
+
+
+ Upload a file or click "Load demo data" to start.
+
+ Advanced example
+ // Use a named column as the row index
+const df = readExcel(buffer, { indexCol: "ID" });
+
+// Skip 2 rows and read at most 100 rows
+const df2 = readExcel(buffer, { skipRows: 2, nrows: 100 });
+
+// Treat custom strings as missing
+const df3 = readExcel(buffer, { naValues: ["N/A", "MISSING", "-"] });
+
+// DataFrame operations work immediately
+df.describe();
+df.col("revenue").sum();
+df.groupby("region").mean();
+
+
+ Python equivalent
+ # pandas
+import pandas as pd
+
+df = pd.read_excel("data.xlsx", sheet_name=0)
+df = pd.read_excel("data.xlsx", sheet_name="Summary")
+df = pd.read_excel("data.xlsx", header=None)
+df = pd.read_excel("data.xlsx", index_col="ID")
+df = pd.read_excel("data.xlsx", skiprows=2, nrows=100)
+
+
+
+
+
diff --git a/playground/format_ops.html b/playground/format_ops.html
new file mode 100644
index 00000000..d72fd1ec
--- /dev/null
+++ b/playground/format_ops.html
@@ -0,0 +1,262 @@
+
+
+
+
+
+ tsb — format_ops: Number Formatting
+
+
+
+🔢 format_ops — Number Formatting
+
+ tsb provides a suite of number-formatting helpers that mirror pandas'
+ style.format() and Series.map() patterns.
+ Every function is zero-dependency and fully typed.
+
+← Back to index
+
+Scalar formatters
+
+
+ Function Example input Example output Notes
+
+ formatFloat(n, d)3.14159, 2"3.14"Fixed decimal places
+ formatPercent(n, d)0.1234, 1"12.3%"Multiplies by 100
+ formatScientific(n, d)12345.678, 3"1.235e+4"Exponential notation
+ formatEngineering(n, d)12345.678, 3"12.346e+3"Exponent multiple of 3
+ formatThousands(n, d, sep)1234567.89, 2"1,234,567.89"Thousands separator
+ formatCurrency(n, sym, d)1234.5, "$""$1,234.50"Currency prefix + thousands
+ formatCompact(n, d)1_234_567, 2"1.23M"K / M / B / T suffixes
+
+
+
+Interactive demo — scalar formatting
+
+
Number:
+
Format:
+
+ formatFloat(n, decimals)
+ formatPercent(n, decimals)
+ formatScientific(n, decimals)
+ formatEngineering(n, decimals)
+ formatThousands(n, decimals)
+ formatCurrency(n, "$", decimals)
+ formatCompact(n, decimals)
+
+
+
Decimals:
+
Format
+
+
+
+Formatter factories
+import {
+ makeFloatFormatter,
+ makePercentFormatter,
+ makeCurrencyFormatter,
+} from "tsb";
+
+const fmtFloat = makeFloatFormatter(3); // (v) => formatFloat(v, 3)
+const fmtPct = makePercentFormatter(1); // (v) => formatPercent(v, 1)
+const fmtDollar = makeCurrencyFormatter("$"); // (v) => formatCurrency(v, "$", 2)
+
+fmtFloat(3.14159); // "3.142"
+fmtPct(0.0825); // "8.3%"
+fmtDollar(9999.99); // "$9,999.99"
+
+
+Apply to a Series
+import { Series, applySeriesFormatter, makePercentFormatter } from "tsb";
+
+const returns = new Series({ data: [0.05, -0.02, 0.134, 0.007], name: "returns" });
+
+const formatted = applySeriesFormatter(returns, makePercentFormatter(1));
+// Series<string> ["5.0%", "-2.0%", "13.4%", "0.7%"]
+
+
+Apply to a DataFrame
+import { DataFrame, applyDataFrameFormatter, makeCurrencyFormatter, makePercentFormatter } from "tsb";
+
+const df = DataFrame.fromColumns({
+ price: [1_299.99, 899.50, 45.00],
+ change: [0.025, -0.031, 0.102],
+ volume: [15_000, 8_200, 230_000],
+});
+
+const formatted = applyDataFrameFormatter(df, {
+ price: makeCurrencyFormatter("$", 2),
+ change: makePercentFormatter(2),
+});
+
+// formatted = {
+// price: ["$1,299.99", "$899.50", "$45.00"],
+// change: ["2.50%", "-3.10%", "10.20%"],
+// volume: ["15000", "8200", "230000"], // no formatter → String(v)
+// }
+
+
+Interactive demo — DataFrame formatting
+
+
Run DataFrame example
+
+
+
+String rendering
+import { Series, DataFrame, seriesToString, dataFrameToString, makeFloatFormatter } from "tsb";
+
+const s = new Series({ data: [1.2, 3.4, 5.6], name: "value" });
+console.log(seriesToString(s, { formatter: makeFloatFormatter(1) }));
+// 0 1.2
+// 1 3.4
+// 2 5.6
+// Name: value, dtype: float64
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] });
+console.log(dataFrameToString(df));
+// a b
+// 0 1 4.0
+// 1 2 5.0
+// 2 3 6.0
+
+
+Interactive demo — seriesToString / dataFrameToString
+
+
Run toString example
+
+
+
+
+
+
diff --git a/playground/idxmin_idxmax.html b/playground/idxmin_idxmax.html
new file mode 100644
index 00000000..b771dd36
--- /dev/null
+++ b/playground/idxmin_idxmax.html
@@ -0,0 +1,439 @@
+
+
+
+
+
+ tsb — idxmin / idxmax
+
+
+
+
+
+
Loading TypeScript compiler…
+
+
+ ← tsb playground
+ idxmin / idxmax
+
+ Return the index label of the minimum or maximum value in a
+ Series or each column of a DataFrame.
+ Mirrors pandas.Series.idxmin(), idxmax(),
+ pandas.DataFrame.idxmin(), and DataFrame.idxmax().
+
+
+
+
+
1 · Series.idxmin — label of the minimum value
+
Returns the index label at the position of the minimum value.
+ NaN / null values are skipped by default.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Series.idxmax — label of the maximum value
+
Returns the index label at the position of the maximum value.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · NaN handling — skipna option
+
By default NaN / null values are skipped. Set skipna: false
+ to propagate NaN (returns null if any value is NaN).
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame.idxmin — row label of column minima
+
Returns a Series indexed by column names. Each value is the row label
+ where that column achieves its minimum.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame.idxmax — row label of column maxima
+
Returns a Series indexed by column names, where each entry is the row
+ label of that column's maximum value.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Edge cases — empty, all-NaN, all-equal
+
Behavior for empty series, series where every value is NaN, and series
+ where all values are equal.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Series
+idxminSeries(series, { skipna?: boolean }): Label // default skipna=true
+idxmaxSeries(series, { skipna?: boolean }): Label
+
+// DataFrame (axis=0 — min/max per column)
+idxminDataFrame(df, { skipna?: boolean }): Series // indexed by column names
+idxmaxDataFrame(df, { skipna?: boolean }): Series
+
+
+
+
+
+
diff --git a/playground/index.html b/playground/index.html
index b5499765..15e4590c 100644
--- a/playground/index.html
+++ b/playground/index.html
@@ -194,6 +194,16 @@
+ Function Pandas equivalent Description
+
+
+ insertColumn(df, loc, col, values)
+ df.insert(loc, col, value)
+ Insert a new column at integer position loc
+
+
+ popColumn(df, col)
+ df.pop(col)
+ Remove a column; returns { series, df }
+
+
+ reorderColumns(df, order)
+ df[order]
+ Reorder (and optionally subset) columns
+
+
+ moveColumn(df, col, newLoc)
+ —
+ Move an existing column to a new integer position
+
+
+
+
+ Example 1 — insertColumn
+ import { DataFrame, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Carol"],
+ age: [30, 25, 35],
+});
+// columns: ["name", "age"]
+
+// Insert "city" between "name" and "age"
+const df2 = insertColumn(df, 1, "city", ["NY", "LA", "SF"]);
+// df2.columns.values → ["name", "city", "age"]
+// df2.col("city").values → ["NY", "LA", "SF"]
+
+// Original is unchanged
+// df.columns.values → ["name", "age"]
+
+
+ Example 2 — Insert with a Series
+ import { DataFrame, Series, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const salary = new Series({ data: [100_000, 90_000, 120_000], name: "salary" });
+
+const df2 = insertColumn(df, 0, "salary", salary);
+// df2.columns.values → ["salary", "a", "b"]
+
+
+ Example 3 — popColumn
+ import { DataFrame, popColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: [1, 2, 3],
+ name: ["Alice", "Bob", "Carol"],
+ age: [30, 25, 35],
+});
+
+// Remove "age" and keep the Series
+const { series: ageSeries, df: df2 } = popColumn(df, "age");
+// ageSeries.values → [30, 25, 35]
+// df2.columns.values → ["id", "name"]
+// df.columns.values → ["id", "name", "age"] ← original unchanged
+
+
+ Example 4 — reorderColumns
+ import { DataFrame, reorderColumns } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3], d: [4] });
+
+// Reverse the column order
+const df2 = reorderColumns(df, ["d", "c", "b", "a"]);
+// df2.columns.values → ["d", "c", "b", "a"]
+
+// Select a subset (drops columns not listed)
+const df3 = reorderColumns(df, ["a", "c"]);
+// df3.columns.values → ["a", "c"] (b and d are dropped)
+
+
+ Example 5 — moveColumn
+ import { DataFrame, moveColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+ year: [2020, 2021, 2022],
+ value: [10, 20, 30],
+ label: ["a", "b", "c"],
+});
+// columns: ["year", "value", "label"]
+
+// Move "label" to the front
+const df2 = moveColumn(df, "label", 0);
+// df2.columns.values → ["label", "year", "value"]
+
+
+ Error cases
+ // Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// → RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// → RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]); // df has 3 rows
+// → RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// → RangeError: Column "missing" not found in DataFrame.
+
+
+
+ Immutability: Like all tsb DataFrame operations, these functions never
+ mutate the original DataFrame. Always assign the return value to a new variable.
+
+
+ pandas equivalence table
+
+
+ pandas tsb
+
+
+ df.insert(1, "x", [1,2,3]) *(mutates)*insertColumn(df, 1, "x", [1,2,3])
+ series = df.pop("col") *(mutates)*const { series, df: df2 } = popColumn(df, "col")
+ df[["c","a","b"]]reorderColumns(df, ["c","a","b"])
+
+
+
+
diff --git a/playground/mode.html b/playground/mode.html
new file mode 100644
index 00000000..0a149227
--- /dev/null
+++ b/playground/mode.html
@@ -0,0 +1,125 @@
+
+
+
+
+
+ tsb — mode
+
+
+
+ ← tsb playground
+ 📊 mode
+
+ modeSeries / modeDataFrame —
+ return the most-frequent value(s), mirroring
+ Series.mode() and
+ DataFrame.mode() .
+
+ Equivalent Python: series.mode()
+
+ 1 · Single mode
+
+
const s = new Series({ data: [1, 2, 2, 3] });
+modeSeries(s).values;
+// → [2]
+
+
+
+ 2 · Tied modes — all returned sorted
+
+
const s = new Series({ data: [1, 1, 2, 2, 3] });
+modeSeries(s).values;
+// → [1, 2]
+
+
+
+ 3 · String values
+
+
const s = new Series({ data: ["cat", "dog", "dog", "bird"] });
+modeSeries(s).values;
+// → ["dog"]
+
+
+
+ 4 · Null values excluded (dropna=true default)
+
+
const s = new Series({ data: [null, 1, 1, null, null] });
+modeSeries(s).values;
+// → [1]
+
+
+
+ 5 · DataFrame column-wise (axis=0)
+
+
const df = DataFrame.fromColumns({ a: [1, 1, 2, 2], b: [5, 5, 5, 6] });
+modeDataFrame(df);
+// a: [1, 2], b: [5, null] (null-padded)
+
+
+
+ 6 · DataFrame row-wise (axis=1)
+
+
const df = DataFrame.fromColumns({ a: [1, 2], b: [1, 3], c: [2, 3] });
+modeDataFrame(df, { axis: 1 });
+// row 0: mode=1, row 1: mode=3
+
+
+
+
+
+
diff --git a/playground/nancumops.html b/playground/nancumops.html
new file mode 100644
index 00000000..d7014593
--- /dev/null
+++ b/playground/nancumops.html
@@ -0,0 +1,295 @@
+
+
+
+
+
+ tsb — NaN-Ignoring Aggregates (nancumops)
+
+
+
+
+🔢 NaN-Ignoring Aggregates
+
+ nansum, nanmean, nanmedian, nanstd, nanvar,
+ nanmin, nanmax, nanprod, nancount
+ — mirrors numpy.nan* functions in pandas workflows.
+
+
+
+
+
🧮 Live Calculator
+
Enter a comma-separated list of numbers (use NaN, null for missing).
+
Input values
+
+
ddof (for std/var)
+
+ 1 (sample — default)
+ 0 (population)
+ 2
+
+
Compute All
+
+
+
+
+
+
📖 Function Reference
+
+
+
+ Function
+ Description
+ Empty/all-NaN returns
+ pandas / numpy equivalent
+
+
+
+ nancount(input)Count of valid (non-NaN) numeric values 0np.count_nonzero(~np.isnan(a))
+ nansum(input)Sum, ignoring NaN/null 0np.nansum(a)
+ nanmean(input)Mean, ignoring NaN/null NaNnp.nanmean(a)
+ nanmedian(input)Median, ignoring NaN/null NaNnp.nanmedian(a)
+ nanvar(input, {ddof})Variance (ddof=1 default) NaNnp.nanvar(a, ddof=1)
+ nanstd(input, {ddof})Std deviation (ddof=1 default) NaNnp.nanstd(a, ddof=1)
+ nanmin(input)Minimum, ignoring NaN/null NaNnp.nanmin(a)
+ nanmax(input)Maximum, ignoring NaN/null NaNnp.nanmax(a)
+ nanprod(input)Product, ignoring NaN/null 1np.nanprod(a)
+
+
+
+
+
+
+
💡 Usage Examples
+
+
+ Basic array usage
+
+import { nansum, nanmean, nanmedian, nanstd } from "tsb";
+
+const data = [1, 2, NaN, null, 3, 5];
+
+nansum(data); // 11
+nanmean(data); // 2.75
+nanmedian(data); // 2.5
+nanstd(data); // 1.708...
+
+
+# Python / pandas equivalent
+import numpy as np
+
+data = [1, 2, np.nan, np.nan, 3, 5]
+
+np.nansum(data) # 11.0
+np.nanmean(data) # 2.75
+np.nanmedian(data) # 2.5
+np.nanstd(data, ddof=1) # 1.708...
+
+
+
+
+ Using with Series
+
+import { Series, nansum, nanmean, nancount } from "tsb";
+
+const s = new Series({ data: [10, null, 30, NaN, 50] });
+
+nancount(s); // 3
+nansum(s); // 90
+nanmean(s); // 30
+
+
+# Python / pandas equivalent
+import pandas as pd, numpy as np
+
+s = pd.Series([10, np.nan, 30, np.nan, 50])
+
+s.count() # 3
+s.sum() # 90.0
+s.mean() # 30.0
+
+
+
+
+ Variance and std with ddof
+
+import { nanvar, nanstd } from "tsb";
+
+const xs = [2, 4, 4, 4, 5, 5, 7, 9];
+
+// Sample (ddof=1, default)
+nanvar(xs); // ≈ 4.571
+nanstd(xs); // ≈ 2.138
+
+// Population (ddof=0)
+nanvar(xs, { ddof: 0 }); // 4.0
+nanstd(xs, { ddof: 0 }); // 2.0
+
+
+# Python / pandas equivalent
+import numpy as np
+
+xs = [2, 4, 4, 4, 5, 5, 7, 9]
+
+np.nanvar(xs, ddof=1) # 4.571...
+np.nanstd(xs, ddof=1) # 2.138...
+
+np.nanvar(xs, ddof=0) # 4.0
+np.nanstd(xs, ddof=0) # 2.0
+
+
+
+
+
+
+
⚡ NaN Impact Demo
+
See how NaN values affect results with and without nan-ignoring functions.
+
Run Comparison
+
+
+
+
+
+
+
diff --git a/playground/numeric_extended.html b/playground/numeric_extended.html
new file mode 100644
index 00000000..14cc4990
--- /dev/null
+++ b/playground/numeric_extended.html
@@ -0,0 +1,353 @@
+
+
+
+
+
+ tsb — Numeric Utilities (digitize, histogram, linspace, arange, zscore…)
+
+
+
+ 🔢 Numeric Utilities
+
+ ← back to index
+
+
+ tsb ships numpy/scipy-style numeric utility functions — all implemented
+ from scratch with no external dependencies:
+ digitize, histogram, linspace, arange,
+ percentileOfScore, zscore, minMaxNormalize,
+ coefficientOfVariation.
+
+
+
+
digitize — bin values
+
+ Map each value to the index of the bin it falls into. Mirrors numpy.digitize.
+ Indices are 0-based; values below the first edge return -1.
+
+
import { digitize, seriesDigitize, Series } from "tsb";
+
+// Find which [0,33), [33,66), [66,100] bucket each score belongs to
+const scores = [15, 45, 70, 33, 100];
+const edges = [33, 66, 100];
+
+const bins = digitize(scores, edges);
+// → [-1, 1, 2, 0, 2]
+// 15 < 33 → bin -1 (below first edge)
+// 45 ∈ [33,66) → bin 1
+// 70 ∈ [66,100)→ bin 2
+// 33 ∈ [33,66) → bin 0 (33 < 66, right=false default)
+// 100 = last → bin 2
+
+// Series version — preserves index
+const s = new Series({ data: [15, 45, 70], index: ["Alice","Bob","Carol"] });
+seriesDigitize(s, [33, 66, 100]);
+// Series: Alice→-1, Bob→1, Carol→2
+
Running…
+
+
+
+
histogram — frequency counts
+
Count how many values fall in each bin. Mirrors numpy.histogram.
+
import { histogram } from "tsb";
+
+const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+// Default: 10 equal-width bins
+const { counts, binEdges } = histogram(data);
+
+// Custom: 5 bins, density normalised
+const { counts: d, binEdges: e } = histogram(data, { bins: 5, density: true });
+
+// Explicit edges
+histogram(data, { binEdges: [1, 4, 7, 10] });
+// counts: [ 3, 3, 4 ]
+
Running…
+
+
+
+
linspace & arange — number sequences
+
Generate evenly-spaced sequences, mirroring numpy.linspace and numpy.arange.
+
import { linspace, arange } from "tsb";
+
+// 5 values from 0 to 1 (inclusive)
+linspace(0, 1, 5);
+// → [0, 0.25, 0.5, 0.75, 1]
+
+// 0..4
+arange(5);
+// → [0, 1, 2, 3, 4]
+
+// From 2 to 10, step 2
+arange(2, 10, 2);
+// → [2, 4, 6, 8]
+
+// Descending
+arange(5, 0, -1);
+// → [5, 4, 3, 2, 1]
+
Running…
+
+
+
+
percentileOfScore — percentile rank
+
+ Compute what percentile a given score falls at within a dataset.
+ Mirrors scipy.stats.percentileofscore.
+
+
import { percentileOfScore } from "tsb";
+
+const grades = [55, 60, 70, 75, 80, 85, 90, 95];
+
+// What percentile is a score of 75?
+percentileOfScore(grades, 75); // 50 (rank — default)
+percentileOfScore(grades, 75, "weak"); // 50 (≤ 75: 4/8 = 50%)
+percentileOfScore(grades, 75, "strict"); // 37.5 (< 75: 3/8 = 37.5%)
+
Running…
+
+
+
+
zscore — standardisation
+
+ Transform values to zero mean and unit variance. Mirrors scipy.stats.zscore.
+ Missing values are propagated; zero-variance data returns all NaN.
+
+
import { zscore, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "values" });
+const z = zscore(s);
+
+// z.values ≈ [-1.5, -0.5, -0.5, -0.5, 0, 0, 1, 2]
+
+// With population std (ddof=0)
+const zPop = zscore(s, { ddof: 0 });
+
Running…
+
+
+
+
minMaxNormalize — scale to [0, 1]
+
+ Scale all values to the interval [0, 1] (or a custom range).
+ Mirrors sklearn MinMaxScaler.
+
+
import { minMaxNormalize, Series } from "tsb";
+
+const s = new Series({ data: [0, 25, 50, 75, 100] });
+minMaxNormalize(s).values;
+// → [0, 0.25, 0.5, 0.75, 1]
+
+// Scale to [-1, 1]
+minMaxNormalize(s, { featureRangeMin: -1, featureRangeMax: 1 }).values;
+// → [-1, -0.5, 0, 0.5, 1]
+
Running…
+
+
+
+
coefficientOfVariation — relative spread
+
+ Dimensionless measure of dispersion: std / |mean|.
+ Useful for comparing spread across datasets with different units.
+
+
import { coefficientOfVariation, Series } from "tsb";
+
+// Dataset A: [10, 20, 30] mean=20, std=10 → CV=0.5
+coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+
+// Dataset B: [100, 200, 300] same shape, higher scale → CV=0.5
+coefficientOfVariation(new Series({ data: [100, 200, 300] }));
+
+// CV with population std
+coefficientOfVariation(new Series({ data: [1, 2, 3, 4, 5] }), { ddof: 0 });
+
Running…
+
+
+
+
+
diff --git a/playground/nunique.html b/playground/nunique.html
new file mode 100644
index 00000000..add4399d
--- /dev/null
+++ b/playground/nunique.html
@@ -0,0 +1,112 @@
+
+
+
+
+
+ tsb — nunique / any / all
+
+
+
+ ← tsb playground
+ 🔢 nunique / any / all
+
+ Count unique values and perform boolean reductions, mirroring
+ Series.nunique() ,
+ Series.any() , and
+ Series.all() .
+
+
+ 1 · nunique — count distinct values
+
+
import { Series, nuniqueSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 2, 3, 3, 3, null] });
+
+nuniqueSeries(s); // 3 (null excluded by default)
+nuniqueSeries(s, { dropna: false }); // 4 (null counted as a distinct value)
+
nuniqueSeries(s) → 3
+nuniqueSeries(s, {dropna:false}) → 4
+
+
+ 2 · any — is any element truthy?
+
+
import { anySeries } from "tsb";
+
+const allZero = new Series({ data: [0, 0, 0] });
+const hasOne = new Series({ data: [0, 0, 1] });
+
+anySeries(allZero); // false
+anySeries(hasOne); // true
+
+// With nulls (skipna=true by default)
+const withNull = new Series({ data: [null, 0, null] });
+anySeries(withNull); // false — null skipped, 0 is falsy
+
anySeries(allZero) → false
+anySeries(hasOne) → true
+anySeries(withNull) → false
+
+
+ 3 · all — are all elements truthy?
+
+
import { allSeries } from "tsb";
+
+const allTrue = new Series({ data: [1, 2, 3] });
+const hasFalsy = new Series({ data: [1, 0, 3] });
+
+allSeries(allTrue); // true
+allSeries(hasFalsy); // false
+
+// Empty or all-null series vacuously returns true
+allSeries(new Series({ data: [] })); // true
+allSeries(new Series({ data: [null, null] })); // true
+
allSeries(allTrue) → true
+allSeries(hasFalsy) → false
+allSeries([]) → true (vacuous)
+allSeries([null]) → true (vacuous)
+
+
+ 4 · DataFrame nunique
+
+
import { DataFrame, nuniqueDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ category: ["A", "B", "A", "C"],
+ value: [1, 2, 1, 3 ],
+});
+
+nuniqueDataFrame(df); // per-column: category→3, value→3
+nuniqueDataFrame(df, { axis: 1 }); // per-row: how many distinct values in each row
+
nuniqueDataFrame(df) → category: 3, value: 3
+nuniqueDataFrame(df, {axis:1}) → row0: 2, row1: 2, row2: 2, row3: 2
+
+
+ 5 · DataFrame any / all
+
+
import { anyDataFrame, allDataFrame } from "tsb";
+
+const df2 = DataFrame.fromColumns({
+ a: [0, 0, 1],
+ b: [1, 1, 1],
+});
+
+anyDataFrame(df2); // a: true, b: true (each col has at least one truthy)
+allDataFrame(df2); // a: false, b: true (col a has a 0)
+
+// axis=1: reduce across columns per row
+anyDataFrame(df2, { axis: 1 }); // row0: true, row1: true, row2: true
+allDataFrame(df2, { axis: 1 }); // row0: false, row1: false, row2: true
+
anyDataFrame(df2) → a: true, b: true
+allDataFrame(df2) → a: false, b: true
+anyDataFrame(df2,{axis:1}) → [true, true, true]
+allDataFrame(df2,{axis:1}) → [false, false, true]
+
+
+
diff --git a/playground/pct_change.html b/playground/pct_change.html
new file mode 100644
index 00000000..3576797a
--- /dev/null
+++ b/playground/pct_change.html
@@ -0,0 +1,448 @@
+
+
+
+
+
+ tsb — pct_change
+
+
+
+
+
+
Initializing playground…
+
+ ← Back to roadmap
+ 📊 pct_change — Interactive Playground
+ Compute the fractional change between each element and a prior element.
+ Mirrors pandas.Series.pct_change() /
+ pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run
+ (or Ctrl+Enter) to execute it live in your browser.
+
+
+
+
+
1 · Basic pct_change on a Series
+
pctChangeSeries(series) returns the fractional (not percentage) change
+ from each previous element. The first element is always null.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Multi-period change
+
The periods option controls the lag. Use periods: 2 to
+ compare each value to the one two steps earlier — useful for month-over-month
+ comparisons in quarterly data.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Handling missing values
+
By default, pctChangeSeries forward-fills (fillMethod: "pad")
+ NaN/null values before computing the ratio — so gaps don't break the chain.
+ Set fillMethod: null to propagate NaN instead.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · Limit consecutive fills
+
The limit option caps how many consecutive NaN values get forward-filled.
+ Useful when you want to tolerate short gaps but not bridge large ones.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame column-wise pct_change
+
pctChangeDataFrame(df) applies pctChangeSeries to every
+ column independently. Ideal for comparing multiple assets or metrics simultaneously.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Negative periods (look-forward change)
+
A negative periods value computes the forward change: how much will
+ this element change by the time we reach |periods| steps ahead.
+ Useful for computing returns on a "hold for N periods" strategy.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
All functions return a new Series/DataFrame of the same shape — inputs are never mutated.
+
// Series
+pctChangeSeries(series, {
+ periods?: number, // default 1 (positive = look back, negative = look forward)
+ fillMethod?: "pad" | "bfill" | null, // default "pad"
+ limit?: number | null, // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+ periods?: number,
+ fillMethod?: "pad" | "bfill" | null,
+ limit?: number | null,
+ axis?: 0 | 1 | "index" | "columns", // default 0 (column-wise)
+}): DataFrame
+
+
+
+
+
+
diff --git a/playground/quantile.html b/playground/quantile.html
new file mode 100644
index 00000000..fb019d88
--- /dev/null
+++ b/playground/quantile.html
@@ -0,0 +1,182 @@
+
+
+
+
+
+ tsb — quantile
+
+
+
+ ← tsb playground
+ 📐 quantile
+
+ quantileSeries / quantileDataFrame —
+ compute quantile(s) / percentile(s), mirroring
+ Series.quantile() and
+ DataFrame.quantile() .
+
+ Equivalent Python: series.quantile(q=0.5) / df.quantile(q=0.5)
+
+ 1 · Scalar quantile (median)
+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+quantileSeries(s); // default q=0.5 → 3
+quantileSeries(s, { q: 0.25 }); // → 2
+quantileSeries(s, { q: 0.75 }); // → 4
+
+
+
+ 2 · Multiple quantile levels
+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+const q = quantileSeries(s, { q: [0.25, 0.5, 0.75] });
+// Series indexed by q-values: { 0.25: 2, 0.5: 3, 0.75: 4 }
+
+
+
+ 3 · Interpolation methods
+
+
const s = new Series({ data: [0, 10] });
+// q=0.5 → position 0.5 between indices 0 and 1
+quantileSeries(s, { q: 0.5, interpolation: "linear" }); // 5
+quantileSeries(s, { q: 0.5, interpolation: "lower" }); // 0
+quantileSeries(s, { q: 0.5, interpolation: "higher" }); // 10
+quantileSeries(s, { q: 0.5, interpolation: "midpoint" }); // 5
+quantileSeries(s, { q: 0.5, interpolation: "nearest" }); // 0
+
+
+
+ 4 · NaN handling (skipna=true by default)
+
+
const s = new Series({ data: [1, null, 3, NaN, 5] });
+quantileSeries(s, { q: 0.5 }); // ignores null/NaN → 3
+quantileSeries(s, { q: 0.5, skipna: false }); // NaN propagates → NaN
+
+
+
+ 5 · DataFrame — axis=0 (per-column quantiles)
+
+
const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [10, 20, 30, 40] });
+quantileDataFrame(df, { q: 0.5 });
+// Series { a: 2.5, b: 25 }
+
+quantileDataFrame(df, { q: [0.25, 0.5, 0.75] });
+// DataFrame 3×2: rows=[0.25, 0.5, 0.75], cols=[a, b]
+
+
+
+ 6 · DataFrame — axis=1 (per-row quantiles)
+
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5], c: [5, 6, 7] });
+quantileDataFrame(df, { axis: 1, q: 0.5 });
+// Series — median of each row: [3, 4, 5]
+
+
+
+ 7 · Q=[0, 0.25, 0.5, 0.75, 1] summary table
+
+
const df = DataFrame.fromColumns({ score: [55, 70, 80, 88, 92, 95, 99] });
+quantileDataFrame(df, { q: [0, 0.25, 0.5, 0.75, 1] });
+// → summary statistics table
+
+
+
+
+
+
diff --git a/playground/replace.html b/playground/replace.html
new file mode 100644
index 00000000..19da518a
--- /dev/null
+++ b/playground/replace.html
@@ -0,0 +1,408 @@
+
+
+
+
+
+ tsb — replace (value substitution)
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← Back to playground index
+
+ replace — value substitution
+
+ replaceSeries / replaceDataFrame substitute values
+ matching a pattern with a new value.
+ Supports scalar, array, and mapping (Record / Map) replacement specs.
+ Mirrors Series.replace() and DataFrame.replace() from pandas.
+
+
+
+
+
1 · Scalar → scalar replacement
+
+ Replace every occurrence of a single value with another value.
+ Works on numbers, strings, booleans, and null.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Array replacement
+
+ Replace a list of values with a single target, or perform pair-wise
+ replacement using two equal-length arrays.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Mapping (Record / Map) replacement
+
+ Pass a lookup table as either a plain object (Record<string, Scalar>)
+ or a JavaScript Map for full type flexibility.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame replacement
+
+ replaceDataFrame applies the same spec to all columns by
+ default. Use the columns option to restrict which columns
+ are affected.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Replace values in a Series
+replaceSeries(
+ series: Series,
+ spec: ReplaceSpec,
+ options?: ReplaceOptions,
+): Series
+
+// Replace values in a DataFrame
+replaceDataFrame(
+ df: DataFrame,
+ spec: ReplaceSpec,
+ options?: DataFrameReplaceOptions,
+): DataFrame
+
+// Replacement spec variants
+type ReplaceSpec =
+ | { toReplace: Scalar; value: Scalar } // scalar → scalar
+ | { toReplace: Scalar[]; value: Scalar } // array → scalar
+ | { toReplace: Scalar[]; value: Scalar[] } // array → array (pair-wise)
+ | { toReplace: Record<string, Scalar> } // Record mapping
+ | { toReplace: Map<Scalar, Scalar> } // Map mapping
+
+// Options
+interface ReplaceOptions {
+ matchNaN?: boolean; // treat NaN===NaN for matching (default: true)
+}
+
+interface DataFrameReplaceOptions extends ReplaceOptions {
+ columns?: string[]; // only replace in these columns (default: all)
+}
+
+
+
+
+
+
diff --git a/playground/rolling_apply.html b/playground/rolling_apply.html
new file mode 100644
index 00000000..b307cdbd
--- /dev/null
+++ b/playground/rolling_apply.html
@@ -0,0 +1,225 @@
+
+
+
+
+
+ tsb — Rolling Apply & Multi-Aggregation
+
+
+
+ tsb — Rolling Apply & Multi-Aggregation
+
+ Standalone functions for applying custom aggregation logic over sliding
+ windows, mirroring
+
+ pandas.Series.rolling().apply()
+
+ and
+
+ Rolling.agg()
+ .
+
+
+ 1. rollingApply — Custom Function Per Window
+
+ Apply any aggregation function to each rolling window. The function
+ receives the valid (non-null, non-NaN) numeric values
+ in the window and must return a single number.
+
+ import { rollingApply } from "tsb";
+
+const prices = new Series({ data: [10, 12, 11, 15, 14, 16], name: "price" });
+
+// Custom: range (max - min) over each 3-day window
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+rollingApply(prices, 3, range).toArray();
+// [null, null, 2, 4, 4, 5]
+// ↑↑ insufficient data (need 3 observations)
+
+
+
Options
+
+
+ Option Default Description
+
+
+ minPeriodswindowMinimum valid observations to compute (null otherwise)
+ centerfalseCentre the window (symmetric) instead of trailing
+ rawfalsePass full window including nulls (filtered to valid nums before fn call)
+
+
+
+
+ // minPeriods=1 → start computing from the very first position
+rollingApply(prices, 3, range, { minPeriods: 1 }).toArray();
+// [0, 2, 2, 4, 4, 5]
+
+// center=true → symmetric window around each point
+rollingApply(prices, 3, range, { center: true }).toArray();
+// [null, 2, 4, 4, 5, null]
+
+ 2. rollingAgg — Multiple Aggregations at Once
+
+ Apply several named aggregation functions in a single pass over a Series,
+ returning a DataFrame where each column holds one
+ aggregation result.
+
+ import { rollingAgg } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8] });
+
+const result = rollingAgg(s, 3, {
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ max: (w) => Math.max(...w),
+ min: (w) => Math.min(...w),
+ range:(w) => Math.max(...w) - Math.min(...w),
+});
+
+// result is a DataFrame with columns: "mean", "max", "min", "range"
+// result.col("mean").toArray() → [null, null, 2, 3, 4, 5, 6, 7]
+// result.col("range").toArray() → [null, null, 2, 2, 2, 2, 2, 2]
+
+
+ Pandas equivalent:
+ s.rolling(3).agg({"mean": np.mean, "max": np.max, "min": np.min})
+
+
+ 3. dataFrameRollingApply — Apply Per Column
+
+ Apply a single custom function independently to each column of a
+ DataFrame, returning a new DataFrame of the same shape.
+
+ import { dataFrameRollingApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+ open: [100, 102, 101, 105, 103],
+ close: [101, 103, 100, 106, 104],
+});
+
+// Pairwise range within each 2-step window per column
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+dataFrameRollingApply(df, 2, range);
+// open close
+// 0 null null
+// 1 2 2
+// 2 1 3
+// 3 4 6
+// 4 2 2
+
+ 4. dataFrameRollingAgg — Multi-Agg Per Column
+
+ Apply multiple named aggregation functions to every column of a
+ DataFrame. The result has columns named
+ {originalColumn}_{aggName}.
+
+ import { dataFrameRollingAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+ A: [1, 2, 3, 4, 5],
+ B: [10, 20, 30, 40, 50],
+});
+
+const out = dataFrameRollingAgg(df, 3, {
+ sum: (w) => w.reduce((a, b) => a + b, 0),
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+});
+
+// Columns: "A_sum", "A_mean", "B_sum", "B_mean"
+// A_sum: [null, null, 6, 9, 12]
+// A_mean: [null, null, 2, 3, 4]
+// B_sum: [null, null, 60, 90, 120]
+// B_mean: [null, null, 20, 30, 40]
+
+ Comparison with pandas
+
+
+ tsb pandas
+
+
+
+ rollingApply(s, w, fn)
+ s.rolling(w).apply(fn, raw=True)
+
+
+ rollingApply(s, w, fn, {minPeriods:1})
+ s.rolling(w, min_periods=1).apply(fn)
+
+
+ rollingAgg(s, w, {f1, f2})
+ s.rolling(w).agg({"f1": f1, "f2": f2})
+
+
+ dataFrameRollingApply(df, w, fn)
+ df.rolling(w).apply(fn)
+
+
+ dataFrameRollingAgg(df, w, {f1, f2})
+ df.rolling(w).agg({"f1": f1, "f2": f2})
+
+
+
+
+ Use case: Bollinger Band width
+ import { rollingAgg } from "tsb";
+
+// Bollinger Band width = (upper - lower) / middle
+// where upper = mean + 2·std, lower = mean - 2·std
+const prices = new Series({
+ data: [20, 21, 22, 20, 19, 21, 23, 24, 22, 21],
+ name: "price",
+});
+
+const stats = rollingAgg(prices, 5, {
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ std: (w) => {
+ const m = w.reduce((a, b) => a + b, 0) / w.length;
+ return Math.sqrt(w.reduce((a, b) => a + (b - m) ** 2, 0) / (w.length - 1));
+ },
+});
+
+// Bollinger Band width = 4 * std / mean
+const bw = stats.col("std").toArray().map((std, i) => {
+ const mean = stats.col("mean").toArray()[i];
+ if (std === null || mean === null || mean === 0) return null;
+ return (4 * (std as number)) / (mean as number);
+});
+
+
+ ← Back to tsb playground index
+
+
+
diff --git a/playground/sem_var.html b/playground/sem_var.html
new file mode 100644
index 00000000..a3114054
--- /dev/null
+++ b/playground/sem_var.html
@@ -0,0 +1,90 @@
+
+
+
+
+
+ tsb — sem_var
+
+
+
+ ← tsb playground
+ 📊 Variance & Standard Error (sem_var)
+
+ varSeries / semSeries /
+ varDataFrame / semDataFrame —
+ compute sample/population variance and standard error of the mean, mirroring
+ Series.var() and
+ Series.sem() .
+
+ Equivalent Python: series.var(ddof=1) / series.sem()
+
+ 1 · Sample variance (ddof=1)
+
+
import { Series, varSeries } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+varSeries(s); // 4.0 (sample variance, ddof=1)
+varSeries(s, { ddof: 0 }); // 3.5 (population variance, ddof=0)
+
varSeries(s) → 4.0
+varSeries(s, {ddof:0}) → 3.5
+
+
+ 2 · Standard error of the mean
+
+
import { semSeries } from "tsb";
+
+// SEM = sqrt(var / n)
+semSeries(s); // sqrt(4 / 8) ≈ 0.7071
+
semSeries(s) ≈ 0.7071
+
+
+ 3 · Handling missing values
+
+
const s2 = new Series({ data: [1, 2, 3, null, 5] });
+
+varSeries(s2); // skipna=true (default): ignores null
+varSeries(s2, { skipna: false }); // propagates NaN when null present
+varSeries(s2, { minCount: 5 }); // NaN: need 5 valid values but only 4
+
varSeries(s2) → 2.9167 (approx)
+varSeries(s2, {skipna:false}) → NaN
+varSeries(s2, {minCount:5}) → NaN
+
+
+ 4 · DataFrame column-wise variance
+
+
import { DataFrame, varDataFrame, semDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [10, 20, 30],
+});
+
+varDataFrame(df); // Series { a: 1, b: 100 }
+semDataFrame(df); // Series { a: sqrt(1/3), b: sqrt(100/3) }
+varDataFrame(df, { axis: 1 }); // row-wise variance
+
varDataFrame(df) → a: 1.0, b: 100.0
+semDataFrame(df) → a: ≈0.577, b: ≈5.774
+varDataFrame(df, {axis:1}) → row0: 20.25, row1: 81.0, row2: 182.25
+
+
+ 5 · numericOnly — skip non-numeric columns
+
+
const df2 = DataFrame.fromColumns({
+ score: [10, 20, 30],
+ label: ["A", "B", "C"],
+});
+
+varDataFrame(df2, { numericOnly: true });
+// Only includes "score", excludes "label"
+
varDataFrame(df2, {numericOnly:true}) → score: 100.0
+
+
+
diff --git a/playground/skew_kurt.html b/playground/skew_kurt.html
new file mode 100644
index 00000000..bec28a8b
--- /dev/null
+++ b/playground/skew_kurt.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+ tsb — skew & kurtosis
+
+
+
+ ← tsb playground
+ 📐 skewSeries / kurtSeries
+
+ skewSeries / kurtSeries —
+ compute the adjusted Fisher–Pearson skewness and excess kurtosis (bias-corrected), mirroring
+ Series.skew() and
+ Series.kurt() .
+
+ Equivalent Python: series.skew() / series.kurt()
+
+ 1 · Symmetric distribution — skew ≈ 0
+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+skewSeries(s);
+// → 0
+
+
+
+ 2 · Right-skewed distribution — positive skew
+
+
const s = new Series({ data: [1, 2, 3, 4, 100] });
+skewSeries(s);
+// → large positive value
+
+
+
+ 3 · Kurtosis — uniform-like (platykurtic, negative excess)
+
+
const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+kurtSeries(s);
+// → negative (flatter than normal)
+
+
+
+ 4 · NaN propagation — too few values
+
+
skewSeries(new Series({ data: [1, 2] })); // NaN — need ≥ 3
+kurtSeries(new Series({ data: [1, 2, 3] })); // NaN — need ≥ 4
+
+
+
+ 5 · DataFrame column-wise skewness
+
+
const df = DataFrame.fromColumns({
+ symmetric: [1, 2, 3, 4, 5],
+ right_skew: [1, 2, 3, 4, 100],
+});
+skewDataFrame(df).values;
+
+
+
+ 6 · DataFrame row-wise kurtosis
+
+
const df = DataFrame.fromColumns({
+ a: [1, 10], b: [2, 10], c: [3, 10], d: [4, 10], e: [100, 10], f: [5, 10],
+});
+kurtDataFrame(df, { axis: 1 }).values;
+
+
+
+
+
+
diff --git a/playground/string_ops.html b/playground/string_ops.html
new file mode 100644
index 00000000..5d9fff07
--- /dev/null
+++ b/playground/string_ops.html
@@ -0,0 +1,282 @@
+
+
+
+
+
+ tsb — String Operations
+
+
+
+
+ tsb
+ string_ops
+ Standalone string operations for Series and arrays
+
+
+
+ string_ops provides module-level string functions that complement the
+ Series.str accessor. All functions accept a Series, a
+ string[], or a scalar string.
+
+
+
+
+
strNormalize — Unicode normalisation
+
Normalise every element to NFC, NFD, NFKC, or NFKD. Useful when mixing text
+ from different sources (e.g. macOS NFD vs Windows NFC).
+
+
+ Input strings (one per line)
+
+
+
+ Normalization form
+
+ NFC (compose)
+ NFD (decompose)
+ NFKC (compat compose)
+ NFKD (compat decompose)
+
+
+
+
Run
+
+
+
+
+
+
strGetDummies — one-hot encode by delimiter
+
Split each string by a delimiter and produce a binary indicator DataFrame —
+ one column per unique token. Equivalent to pandas.Series.str.get_dummies().
+
+
Run
+
+
+
+
+
+
strExtractAll — extract all regex matches
+
Find every non-overlapping regex match in each element. Returns a JSON-encoded
+ array of match arrays per element — parse with JSON.parse.
+
+
Run
+
+
+
+
+
+
strRemovePrefix / strRemoveSuffix
+
Strip a leading or trailing string from elements only when it is present.
+
+
Run
+
+
+
+
+
+
strTranslate — character-level substitution
+
Replace or delete individual characters using a lookup table.
+ Format: one mapping per line as from=to or from=
+ to delete.
+
+
+ Input strings (one per line)
+
+
+
+ Translation table (from=to, one per line)
+
+
+
+
Run
+
+
+
+
+
+
strCharWidth & strByteLength — display & byte widths
+
+ strCharWidth counts columns for terminal display (CJK chars count as 2).
+ strByteLength counts UTF-8 bytes (useful for byte-limited APIs).
+
+
Input strings (one per line)
+
+
Run
+
+
+
+
+
+
+
diff --git a/playground/string_ops_extended.html b/playground/string_ops_extended.html
new file mode 100644
index 00000000..81bdaddb
--- /dev/null
+++ b/playground/string_ops_extended.html
@@ -0,0 +1,413 @@
+
+
+
+
+
+ tsb — Extended String Operations
+
+
+
+
+ tsb
+ string_ops_extended
+ Advanced standalone string operations: split-expand, extract, partition, multi-replace, indent, dedent
+
+
+
+ string_ops_extended adds advanced string utilities that complement
+ string_ops and the Series.str accessor. All functions accept
+ a Series, an array, or a scalar string.
+
+
+
+
+
strSplitExpand — split and expand to DataFrame columns
+
+ Split each element by a delimiter and expand the parts into a DataFrame
+ with one column per position. Mirrors pandas.Series.str.split(expand=True).
+ Shorter rows are padded with null.
+
+
+
Run strSplitExpand
+
+
+
+
+
+
strExtractGroups — extract regex capture groups
+
+ Extract regex capture groups from each element into a DataFrame.
+ Named groups ((?<name>...)) become column names; unnamed groups
+ become 0, 1, … Non-matching rows produce null.
+
+
+
+ Input strings (one per line)
+
+
+
+
Pattern (supports named groups)
+
+
Use (?<name>...) for named capture groups.
+
+
+
Run strExtractGroups
+
+
+
+
+
+
strPartition / strRPartition — split into (before, sep, after)
+
+ strPartition splits at the first occurrence of the separator;
+ strRPartition splits at the last . When the separator is not
+ found, strPartition returns [s, "", ""] and
+ strRPartition returns ["", "", s].
+
+
+
+ Input strings (one per line)
+
+
+
+ Separator
+
+
+
+
Run strPartition & strRPartition
+
+
+
+
+
+
strMultiReplace — apply multiple replacements in sequence
+
+ Apply an ordered list of {pat, repl} pairs to each element.
+ Each replacement is applied to the result of the previous one.
+ Patterns can be string literals (replaced globally) or RegExp objects.
+
+
+
+ Input strings (one per line)
+
+
+
+ Replacements (JSON array of {pat, repl} pairs)
+
+
+
+
Run strMultiReplace
+
+
+
+
+
+
strIndent / strDedent — line-level indentation utilities
+
+ strIndent adds a prefix to every non-empty line (mirrors
+ textwrap.indent).
+ strDedent removes the common leading whitespace from all lines
+ (mirrors textwrap.dedent).
+
+
+
+ Input text (use literal \n for newlines)
+
+
+
+ Indent prefix (for strIndent)
+
+
+
+
Run strIndent & strDedent
+
+
+
+
+
+
+
+
diff --git a/playground/to_datetime.html b/playground/to_datetime.html
new file mode 100644
index 00000000..9ed06810
--- /dev/null
+++ b/playground/to_datetime.html
@@ -0,0 +1,118 @@
+
+
+
+
+
+ tsb — toDatetime
+
+
+
+ ← tsb playground
+ toDatetime stats
+
+ Convert scalars, arrays, or Series values to JavaScript
+ Date objects — mirroring
+ pandas.to_datetime() .
+
+
+ Supported input formats
+
+ Format Example Result
+ ISO 8601 date "2024-03-15"Mar 15 2024
+ ISO 8601 datetime "2024-03-15T12:00:00Z"Mar 15 2024 12:00 UTC
+ US format (MM/DD/YYYY) "01/15/2024"Jan 15 2024
+ European (DD-MM-YYYY) "15-03-2024"Mar 15 2024
+ Compact (YYYYMMDD) "20240315"Mar 15 2024
+ Unix ms (number) 1710460800000Mar 15 2024 00:00 UTC
+ Unix s (unit="s") 1710460800Mar 15 2024 00:00 UTC
+ Date object new Date(2024,2,15)unchanged
+ null / undefined / NaN nullnull
+
+
+ Error handling
+
+ errors= Behaviour
+ "raise" (default)Throws TypeError on unparseable input
+ "coerce"Returns null on unparseable input
+ "ignore"Returns the original value unchanged
+
+
+ Quick examples
+ import { toDatetime, Series } from "tsb";
+
+// Scalar
+toDatetime("2024-03-15"); // Date: Mar 15 2024
+toDatetime(1710460800000); // Date from Unix ms
+toDatetime(1710460800, { unit: "s" }); // Date from Unix seconds
+toDatetime(null); // null
+toDatetime("nope", { errors: "coerce" }); // null
+toDatetime("nope", { errors: "ignore" }); // "nope"
+
+// Array
+toDatetime(["2024-01-01", null, "2024-06-15"]);
+// => [Date, null, Date]
+
+// Series
+const s = new Series({ data: ["2024-01-01", "2024-06-15", null] });
+toDatetime(s);
+// => Series<Date | null> with dtype=datetime
+
+ Python / pandas equivalent
+
+
+ Live demo
+ Enter a date string or number and click Convert :
+
+ Convert
+ —
+
+
+
+
diff --git a/playground/to_from_dict.html b/playground/to_from_dict.html
new file mode 100644
index 00000000..a8ca3e88
--- /dev/null
+++ b/playground/to_from_dict.html
@@ -0,0 +1,122 @@
+
+
+
+
+
+ tsb — toDictOriented / fromDictOriented
+
+
+
+ ← tsb playground
+
+ toDictOriented / fromDictOriented
+
+ Convert a DataFrame to and from dictionary structures with flexible orientation — mirrors
+
+ pandas.DataFrame.to_dict(orient=...) and
+
+ pandas.DataFrame.from_dict(orient=...) .
+
+
+ Supported orientations — toDictOriented
+
+ Orient Return type Description
+
+ "dict" / "columns"Record<col, Record<rowLabel, value>>Nested column → row-label → value map
+ "list"Record<col, value[]>Column name → array of values
+ "series"Record<col, Series>Column name → Series object
+ "split"{ index, columns, data }Serialisable split structure
+ "tight"{ index, columns, data, index_names, column_names }Split plus axis-name metadata
+ "records"Record<col, value>[]Array of row objects
+ "index"Record<rowLabel, Record<col, value>>Row-label → column → value
+
+
+
+ Supported orientations — fromDictOriented
+
+ Orient Input shape
+
+ "columns" (default){ col: value[] }
+ "index"{ rowLabel: { col: value } }
+ "split"{ index?, columns, data }
+ "tight"Same as "split", extra fields ignored
+
+
+
+ Example — all orientations
+ import { DataFrame } from "tsb";
+import { toDictOriented, fromDictOriented } from "tsb";
+
+const df = DataFrame.fromColumns(
+ { name: ["Alice", "Bob"], score: [92, 85] },
+ { index: new Index(["r0", "r1"]) },
+);
+
+// "dict" / "columns"
+toDictOriented(df, "dict");
+// { name: { r0: "Alice", r1: "Bob" }, score: { r0: 92, r1: 85 } }
+
+// "list"
+toDictOriented(df, "list");
+// { name: ["Alice", "Bob"], score: [92, 85] }
+
+// "records"
+toDictOriented(df, "records");
+// [ { name: "Alice", score: 92 }, { name: "Bob", score: 85 } ]
+
+// "split"
+toDictOriented(df, "split");
+// { index: ["r0", "r1"], columns: ["name", "score"], data: [["Alice", 92], ["Bob", 85]] }
+
+// "index"
+toDictOriented(df, "index");
+// { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } }
+
+// fromDictOriented — columns (default)
+fromDictOriented({ name: ["Alice", "Bob"], score: [92, 85] });
+
+// fromDictOriented — index
+fromDictOriented(
+ { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } },
+ "index",
+);
+
+// fromDictOriented — split (round-trip)
+const split = toDictOriented(df, "split");
+const df2 = fromDictOriented(split, "split");
+// df2 is equivalent to df
+
+
+ Missing values
+
+ Missing values (null / undefined) are preserved as null
+ in all orientations. When using fromDictOriented with "index"
+ orientation, any column that is absent from a given row object is filled with null.
+
+
+ Type signatures
+ function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record<string, Record<string, Scalar>>;
+function toDictOriented(df: DataFrame, orient: "list"): Record<string, Scalar[]>;
+function toDictOriented(df: DataFrame, orient: "series"): Record<string, Series<Scalar>>;
+function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+function toDictOriented(df: DataFrame, orient: "records"): Record<string, Scalar>[];
+function toDictOriented(df: DataFrame, orient: "index"): Record<string, Record<string, Scalar>>;
+
+function fromDictOriented(data: Record<string, readonly Scalar[]>, orient?: "columns"): DataFrame;
+function fromDictOriented(data: Record<string, Record<string, Scalar>>, orient: "index"): DataFrame;
+function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+
+
+
diff --git a/playground/window_extended.html b/playground/window_extended.html
new file mode 100644
index 00000000..4232fa5d
--- /dev/null
+++ b/playground/window_extended.html
@@ -0,0 +1,304 @@
+
+
+
+
+
+ tsb — Rolling Extended Stats: sem, skew, kurt, quantile
+
+
+
+ tsb — Rolling Extended Statistics
+
+ Higher-order rolling window statistics extending the core
+
+ pandas.Series.rolling()
+
+ API:
+ sem , skew , kurt , and
+ quantile .
+
+
+ 1. rollingSem — Standard Error of the Mean
+
+ The standard error of the mean measures how much the sample mean
+ would vary across repeated samples. For a window of n values:
+
+ sem = std(ddof=1) / √n
+ Requires at least 2 valid observations per window.
+
+ import { rollingSem, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "x" });
+const sem3 = rollingSem(s, 3);
+// [null, null, 0.667, 0, 0.577, 0.577, 1.155, 2.082]
+
+
+
+
Live demo — sem with window=3
+
Comma-separated numbers (nulls accepted):
+
+
Window:
+
minPeriods:
+
Run
+
+
+
+ 2. rollingSkew — Fisher-Pearson Skewness
+
+ Skewness measures asymmetry of the distribution in each window.
+ Positive = right tail heavier; negative = left tail heavier.
+ Uses the unbiased Fisher-Pearson formula (same as pandas):
+
+ skew = [n/((n-1)(n-2))] × Σ[(xᵢ−x̄)/s]³
+ Requires ≥ 3 valid observations.
+
+ import { rollingSkew, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+rollingSkew(s, 3);
+// [null, null, 0, 0, 0] ← symmetric windows → zero skew
+
+
+
+
Live demo — skewness with window=4
+
+
Window:
+
Run
+
+
+
+ 3. rollingKurt — Excess Kurtosis
+
+ Kurtosis measures how heavy the tails are relative to a normal distribution.
+ The excess kurtosis subtracts 3, so a normal distribution gives 0.
+ Uses the Fisher (1930) unbiased formula:
+
+ kurt = [n(n+1)/((n-1)(n-2)(n-3))] × Σ[(xᵢ−x̄)/s]⁴ − 3(n-1)²/((n-2)(n-3))
+ Requires ≥ 4 valid observations.
+
+ import { rollingKurt, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+rollingKurt(s, 4);
+// [null, null, null, -1.2] ← uniform distribution has kurt = -1.2
+
+
+
+
Live demo — excess kurtosis with window=5
+
+
Window:
+
Run
+
+
+
+ 4. rollingQuantile — Rolling Quantile
+
+ Computes any quantile within each sliding window using configurable
+ interpolation. When q = 0.5 this is identical to
+ rolling.median().
+
+
+ import { rollingQuantile, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+
+rollingQuantile(s, 0.5, 3); // rolling median: [null, null, 2, 3, 4]
+rollingQuantile(s, 0.25, 3); // [null, null, 1.5, 2.5, 3.5]
+rollingQuantile(s, 0.75, 3); // [null, null, 2.5, 3.5, 4.5]
+
+
+ Interpolation methods
+
+ Method Behaviour when q falls between two values
+
+ linear (default)Linear interpolation — same as NumPy / pandas default
+ lowerTake the lower of the two surrounding values
+ higherTake the higher of the two surrounding values
+ midpointArithmetic mean of the two surrounding values
+ nearestWhichever surrounding value is closest
+
+
+
+
+
+ Common Options
+
+ Option Type Default Description
+
+ minPeriodsnumber= window Minimum valid obs required per window
+ centerbooleanfalseCentre the window around each position
+
+
+
+
+ Note: Functions are pure — they return new Series objects
+ without modifying the input. Missing values (null, NaN)
+ are excluded from each window calculation.
+
+
+
+
+
diff --git a/src/core/api_types.ts b/src/core/api_types.ts
new file mode 100644
index 00000000..860d2050
--- /dev/null
+++ b/src/core/api_types.ts
@@ -0,0 +1,629 @@
+/**
+ * api_types — runtime type-checking predicates, mirroring `pandas.api.types`.
+ *
+ * Two groups of functions are provided:
+ *
+ * **Value-level predicates** — operate on arbitrary JavaScript values, equivalent
+ * to `pandas.api.types.is_scalar`, `is_list_like`, `is_number`, etc.
+ *
+ * **Dtype-level predicates** — accept a `Dtype` instance or a `DtypeName` string
+ * and answer questions about the dtype's kind, equivalent to
+ * `pandas.api.types.is_numeric_dtype`, `is_float_dtype`, etc.
+ *
+ * @example
+ * ```ts
+ * import { isScalar, isNumericDtype, Dtype } from "tsb";
+ * isScalar(42); // true
+ * isScalar([1, 2, 3]); // false
+ * isListLike([1, 2, 3]); // true
+ * isNumericDtype(Dtype.float64); // true
+ * isStringDtype("string"); // true
+ * ```
+ *
+ * @module
+ */
+
+import { Dtype } from "./dtype.ts";
+import type { DtypeName } from "../types.ts";
+
+// ─── internal helper ──────────────────────────────────────────────────────────
+
+/** Resolve a Dtype | DtypeName to a Dtype instance. */
+function resolveDtype(dtype: Dtype | DtypeName): Dtype {
+ if (dtype instanceof Dtype) {
+ return dtype;
+ }
+ return Dtype.from(dtype);
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// VALUE-LEVEL PREDICATES
+// ═════════════════════════════════════════════════════════════════════════════
+
+/**
+ * Return `true` if `val` is a scalar (not a collection).
+ *
+ * Scalars: `string`, `number`, `bigint`, `boolean`, `symbol`, `null`,
+ * `undefined`, and `Date` objects. Arrays, plain objects, `Map`, `Set`,
+ * iterables, and class instances other than `Date` are **not** scalars.
+ *
+ * Mirrors `pandas.api.types.is_scalar`.
+ *
+ * @example
+ * ```ts
+ * isScalar(42); // true
+ * isScalar("hello"); // true
+ * isScalar(null); // true
+ * isScalar([1, 2]); // false
+ * isScalar({ a: 1 }); // false
+ * ```
+ */
+export function isScalar(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ const t = typeof val;
+ if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") {
+ return true;
+ }
+ if (val instanceof Date) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is "list-like" — i.e. iterable (but not a string)
+ * or has a non-negative integer `length` property.
+ *
+ * Mirrors `pandas.api.types.is_list_like`.
+ *
+ * @example
+ * ```ts
+ * isListLike([1, 2, 3]); // true
+ * isListLike(new Set([1])); // true
+ * isListLike("abc"); // false (strings excluded)
+ * isListLike(42); // false
+ * isListLike({ a: 1 }); // false
+ * ```
+ */
+export function isListLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val === "string") {
+ return false;
+ }
+ // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol
+ if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") {
+ return false;
+ }
+ if (typeof val === "object" || typeof val === "function") {
+ if (Symbol.iterator in (val as object)) {
+ return true;
+ }
+ const len = (val as Record)["length"];
+ if (typeof len === "number" && len >= 0 && Number.isInteger(len)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is array-like — i.e. has a non-negative integer
+ * `length` property.
+ *
+ * Mirrors `pandas.api.types.is_array_like`.
+ *
+ * @example
+ * ```ts
+ * isArrayLike([1, 2]); // true
+ * isArrayLike("abc"); // true (strings have .length)
+ * isArrayLike(42); // false
+ * isArrayLike({}); // false
+ * ```
+ */
+export function isArrayLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val === "string") {
+ return true;
+ }
+ if (typeof val !== "object" && typeof val !== "function") {
+ return false;
+ }
+ const len = (val as Record)["length"];
+ return typeof len === "number" && len >= 0 && Number.isInteger(len);
+}
+
+/**
+ * Return `true` if `val` is dict-like — a plain object (not an array, not a
+ * `Date`, not a class instance).
+ *
+ * Mirrors `pandas.api.types.is_dict_like`.
+ *
+ * @example
+ * ```ts
+ * isDictLike({ a: 1 }); // true
+ * isDictLike(new Map()); // true (has .get / .set)
+ * isDictLike([1, 2]); // false
+ * isDictLike("abc"); // false
+ * ```
+ */
+export function isDictLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val !== "object") {
+ return false;
+ }
+ if (Array.isArray(val)) {
+ return false;
+ }
+ // Treat Map as dict-like (supports key lookup)
+ if (val instanceof Map) {
+ return true;
+ }
+ // Date is not dict-like
+ if (val instanceof Date) {
+ return false;
+ }
+ // Plain objects and other objects with properties
+ return true;
+}
+
+/**
+ * Return `true` if `val` is an iterator — i.e. has a callable `next` method.
+ *
+ * Mirrors `pandas.api.types.is_iterator`.
+ *
+ * @example
+ * ```ts
+ * isIterator([1, 2][Symbol.iterator]()); // true
+ * isIterator([1, 2]); // false
+ * ```
+ */
+export function isIterator(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val !== "object" && typeof val !== "function") {
+ return false;
+ }
+ return typeof (val as Record)["next"] === "function";
+}
+
+/**
+ * Return `true` if `val` is a `number` (including `NaN` and `±Infinity`).
+ *
+ * Mirrors `pandas.api.types.is_number`.
+ *
+ * @example
+ * ```ts
+ * isNumber(3.14); // true
+ * isNumber(NaN); // true
+ * isNumber("3"); // false
+ * ```
+ */
+export function isNumber(val: unknown): val is number {
+ return typeof val === "number";
+}
+
+/**
+ * Return `true` if `val` is a `boolean`.
+ *
+ * Mirrors `pandas.api.types.is_bool`.
+ *
+ * @example
+ * ```ts
+ * isBool(true); // true
+ * isBool(1); // false
+ * ```
+ */
+export function isBool(val: unknown): val is boolean {
+ return typeof val === "boolean";
+}
+
+/**
+ * Return `true` if `val` is a `string`.
+ *
+ * Named `isStringValue` to distinguish from the dtype-level `isStringDtype`.
+ * Mirrors `pandas.api.types.is_string` (not to be confused with dtype checks).
+ *
+ * @example
+ * ```ts
+ * isStringValue("hello"); // true
+ * isStringValue(42); // false
+ * ```
+ */
+export function isStringValue(val: unknown): val is string {
+ return typeof val === "string";
+}
+
+/**
+ * Return `true` if `val` is a finite floating-point number (has a fractional
+ * component or is finite non-integer). `NaN`, `±Infinity` are **not** floats
+ * in the pandas sense.
+ *
+ * Mirrors `pandas.api.types.is_float`.
+ *
+ * @example
+ * ```ts
+ * isFloat(3.14); // true
+ * isFloat(3.0); // false (integer value)
+ * isFloat(NaN); // false
+ * isFloat(Infinity); // false
+ * ```
+ */
+export function isFloat(val: unknown): boolean {
+ if (typeof val !== "number") {
+ return false;
+ }
+ if (!Number.isFinite(val)) {
+ return false;
+ }
+ return val !== Math.trunc(val);
+}
+
+/**
+ * Return `true` if `val` is a finite integer-valued number.
+ *
+ * Mirrors `pandas.api.types.is_integer`.
+ *
+ * @example
+ * ```ts
+ * isInteger(3); // true
+ * isInteger(3.0); // true (integer value stored as float)
+ * isInteger(3.14); // false
+ * isInteger(NaN); // false
+ * ```
+ */
+export function isInteger(val: unknown): boolean {
+ return typeof val === "number" && Number.isInteger(val);
+}
+
+/**
+ * Return `true` if `val` is a `bigint`.
+ *
+ * @example
+ * ```ts
+ * isBigInt(42n); // true
+ * isBigInt(42); // false
+ * ```
+ */
+export function isBigInt(val: unknown): val is bigint {
+ return typeof val === "bigint";
+}
+
+/**
+ * Return `true` if `val` is a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re`.
+ *
+ * @example
+ * ```ts
+ * isRegExp(/abc/); // true
+ * isRegExp(new RegExp("x")); // true
+ * isRegExp("abc"); // false
+ * ```
+ */
+export function isRegExp(val: unknown): val is RegExp {
+ return val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` can be compiled into a `RegExp` — i.e. it is either
+ * a `string` or already a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re_compilable`.
+ *
+ * @example
+ * ```ts
+ * isReCompilable("abc"); // true
+ * isReCompilable(/abc/); // true
+ * isReCompilable(42); // false
+ * ```
+ */
+export function isReCompilable(val: unknown): boolean {
+ return typeof val === "string" || val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` is a "missing" value in the pandas sense: `null`,
+ * `undefined`, or `NaN`.
+ *
+ * @example
+ * ```ts
+ * isMissing(null); // true
+ * isMissing(undefined); // true
+ * isMissing(NaN); // true
+ * isMissing(0); // false
+ * isMissing(""); // false
+ * ```
+ */
+export function isMissing(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ if (typeof val === "number" && Number.isNaN(val)) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is "hashable" — usable as an object-key in
+ * JavaScript. In practice this means it is a primitive (`string`, `number`,
+ * `bigint`, `boolean`, `symbol`, `null`, `undefined`).
+ *
+ * Mirrors the spirit of `pandas.api.types.is_hashable`.
+ *
+ * @example
+ * ```ts
+ * isHashable("key"); // true
+ * isHashable(42); // true
+ * isHashable({}); // false
+ * isHashable([]); // false
+ * ```
+ */
+export function isHashable(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ const t = typeof val;
+ return t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol";
+}
+
+/**
+ * Return `true` if `val` is a `Date` instance.
+ *
+ * @example
+ * ```ts
+ * isDate(new Date()); // true
+ * isDate("2024-01-01"); // false
+ * ```
+ */
+export function isDate(val: unknown): val is Date {
+ return val instanceof Date;
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// DTYPE-LEVEL PREDICATES
+// ═════════════════════════════════════════════════════════════════════════════
+
+/**
+ * Return `true` if the dtype is numeric (integer, unsigned integer, or float).
+ *
+ * Mirrors `pandas.api.types.is_numeric_dtype`.
+ *
+ * @example
+ * ```ts
+ * isNumericDtype(Dtype.float64); // true
+ * isNumericDtype("int32"); // true
+ * isNumericDtype("string"); // false
+ * ```
+ */
+export function isNumericDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isNumeric;
+}
+
+/**
+ * Return `true` if the dtype is any integer kind (signed or unsigned).
+ *
+ * Mirrors `pandas.api.types.is_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntegerDtype("int64"); // true
+ * isIntegerDtype("uint8"); // true
+ * isIntegerDtype("float32"); // false
+ * ```
+ */
+export function isIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isInteger;
+}
+
+/**
+ * Return `true` if the dtype is a signed integer (`int8`–`int64`).
+ *
+ * Mirrors `pandas.api.types.is_signed_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isSignedIntegerDtype("int32"); // true
+ * isSignedIntegerDtype("uint32"); // false
+ * ```
+ */
+export function isSignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isSignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is an unsigned integer (`uint8`–`uint64`).
+ *
+ * Mirrors `pandas.api.types.is_unsigned_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isUnsignedIntegerDtype("uint64"); // true
+ * isUnsignedIntegerDtype("int64"); // false
+ * ```
+ */
+export function isUnsignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isUnsignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is a floating-point type (`float32` or `float64`).
+ *
+ * Mirrors `pandas.api.types.is_float_dtype`.
+ *
+ * @example
+ * ```ts
+ * isFloatDtype("float64"); // true
+ * isFloatDtype("float32"); // true
+ * isFloatDtype("int32"); // false
+ * ```
+ */
+export function isFloatDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isFloat;
+}
+
+/**
+ * Return `true` if the dtype is boolean.
+ *
+ * Mirrors `pandas.api.types.is_bool_dtype`.
+ *
+ * @example
+ * ```ts
+ * isBoolDtype("bool"); // true
+ * isBoolDtype("int8"); // false
+ * ```
+ */
+export function isBoolDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isBool;
+}
+
+/**
+ * Return `true` if the dtype is the `string` dtype.
+ *
+ * Mirrors `pandas.api.types.is_string_dtype`.
+ *
+ * @example
+ * ```ts
+ * isStringDtype("string"); // true
+ * isStringDtype("object"); // false
+ * ```
+ */
+export function isStringDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isString;
+}
+
+/**
+ * Return `true` if the dtype is a datetime type.
+ *
+ * Mirrors `pandas.api.types.is_datetime64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isDatetimeDtype("datetime"); // true
+ * isDatetimeDtype("string"); // false
+ * ```
+ */
+export function isDatetimeDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is a timedelta type.
+ *
+ * Mirrors `pandas.api.types.is_timedelta64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isTimedeltaDtype("timedelta"); // true
+ * isTimedeltaDtype("datetime"); // false
+ * ```
+ */
+export function isTimedeltaDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isTimedelta;
+}
+
+/**
+ * Return `true` if the dtype is the categorical dtype.
+ *
+ * Mirrors `pandas.api.types.is_categorical_dtype`.
+ *
+ * @example
+ * ```ts
+ * isCategoricalDtype("category"); // true
+ * isCategoricalDtype("string"); // false
+ * ```
+ */
+export function isCategoricalDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isCategory;
+}
+
+/**
+ * Return `true` if the dtype is the object dtype.
+ *
+ * Mirrors `pandas.api.types.is_object_dtype`.
+ *
+ * @example
+ * ```ts
+ * isObjectDtype("object"); // true
+ * isObjectDtype("string"); // false
+ * ```
+ */
+export function isObjectDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isObject;
+}
+
+/**
+ * Return `true` if the dtype represents complex numbers.
+ *
+ * JavaScript has no native complex number type, so this always returns `false`
+ * (no complex dtype exists in the `tsb` dtype system). Provided for API
+ * parity with `pandas.api.types.is_complex_dtype`.
+ *
+ * @example
+ * ```ts
+ * isComplexDtype("float64"); // false (no complex dtype)
+ * ```
+ */
+export function isComplexDtype(_dtype: Dtype | DtypeName): boolean {
+ return false;
+}
+
+/**
+ * Return `true` if the dtype is an "extension array" dtype — i.e. any dtype
+ * beyond the numeric primitives: `string`, `object`, `datetime`, `timedelta`,
+ * `category`.
+ *
+ * Mirrors `pandas.api.types.is_extension_array_dtype`.
+ *
+ * @example
+ * ```ts
+ * isExtensionArrayDtype("category"); // true
+ * isExtensionArrayDtype("datetime"); // true
+ * isExtensionArrayDtype("int64"); // false
+ * ```
+ */
+export function isExtensionArrayDtype(dtype: Dtype | DtypeName): boolean {
+ const d = resolveDtype(dtype);
+ return d.isString || d.isObject || d.isDatetime || d.isTimedelta || d.isCategory;
+}
+
+/**
+ * Return `true` if the dtype can hold period (date period) data.
+ * In the current `tsb` dtype system this maps to the `datetime` kind.
+ *
+ * Mirrors `pandas.api.types.is_period_dtype`.
+ *
+ * @example
+ * ```ts
+ * isPeriodDtype("datetime"); // true
+ * isPeriodDtype("float64"); // false
+ * ```
+ */
+export function isPeriodDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is suitable for interval data — float or integer.
+ *
+ * Mirrors `pandas.api.types.is_interval_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntervalDtype("float64"); // true
+ * isIntervalDtype("int32"); // true
+ * isIntervalDtype("string"); // false
+ * ```
+ */
+export function isIntervalDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isNumeric;
+}
diff --git a/src/core/astype.ts b/src/core/astype.ts
new file mode 100644
index 00000000..6a9403be
--- /dev/null
+++ b/src/core/astype.ts
@@ -0,0 +1,245 @@
+/**
+ * astype — dtype coercion for Series and DataFrame.
+ *
+ * Mirrors `pandas.Series.astype` and `pandas.DataFrame.astype`:
+ * cast values to a target dtype, with null/NaN passthrough semantics
+ * matching pandas' default `errors="raise"` behaviour.
+ *
+ * @module
+ */
+
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+import { Dtype } from "./dtype.ts";
+import type { DtypeName, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isNull(v: Scalar): v is null | undefined {
+ return v === null || v === undefined;
+}
+
+/** Integer clamp ranges for each integer dtype name. */
+const INT_RANGES: Readonly<
+ Record
+> = {
+ int8: { lo: -128, hi: 127, unsigned: false },
+ int16: { lo: -32768, hi: 32767, unsigned: false },
+ int32: { lo: -2147483648, hi: 2147483647, unsigned: false },
+ int64: { lo: Number.MIN_SAFE_INTEGER, hi: Number.MAX_SAFE_INTEGER, unsigned: false },
+ uint8: { lo: 0, hi: 255, unsigned: true },
+ uint16: { lo: 0, hi: 65535, unsigned: true },
+ uint32: { lo: 0, hi: 4294967295, unsigned: true },
+ uint64: { lo: 0, hi: Number.MAX_SAFE_INTEGER, unsigned: true },
+};
+
+/**
+ * Cast a single scalar value to the target dtype.
+ *
+ * Rules per dtype kind:
+ * - **int/uint**: `Math.trunc(Number(v))`, clamped to the dtype range. `null/undefined → null`.
+ * - **float32/float64**: `Number(v)`. `null/undefined → null`. Strings that
+ * are not parsable become `NaN` (same as pandas `errors="coerce"`-like
+ * number coercion).
+ * - **bool**: falsy values → `false`; truthy → `true`. `null/undefined → null`.
+ * - **string**: `String(v)`. `null/undefined → null`.
+ * - **datetime**: `new Date(Number(v))` for numbers; `new Date(String(v))` for
+ * strings; `null/undefined → null`.
+ * - **object/category/timedelta**: value is returned as-is (no transformation).
+ */
+export function castScalar(v: Scalar, dtype: Dtype): Scalar {
+ if (isNull(v)) {
+ return null;
+ }
+
+ const k = dtype.kind;
+
+ if (k === "int" || k === "uint") {
+ if (typeof v === "boolean") {
+ return v ? 1 : 0;
+ }
+ if (v instanceof Date) {
+ return Math.trunc(v.getTime());
+ }
+ const n = Number(v);
+ if (Number.isNaN(n)) {
+ return null;
+ }
+ const range = INT_RANGES[dtype.name];
+ if (range === undefined) {
+ return Math.trunc(n);
+ }
+ const t = Math.trunc(n);
+ return Math.max(range.lo, Math.min(range.hi, t));
+ }
+
+ if (k === "float") {
+ if (typeof v === "boolean") {
+ return v ? 1.0 : 0.0;
+ }
+ if (v instanceof Date) {
+ return v.getTime();
+ }
+ return Number(v);
+ }
+
+ if (k === "bool") {
+ if (typeof v === "number") {
+ return !Number.isNaN(v) && v !== 0;
+ }
+ if (v instanceof Date) {
+ return true;
+ }
+ return Boolean(v);
+ }
+
+ if (k === "string") {
+ if (v instanceof Date) {
+ return v.toISOString();
+ }
+ return String(v);
+ }
+
+ if (k === "datetime") {
+ if (v instanceof Date) {
+ return v;
+ }
+ if (typeof v === "number") {
+ return new Date(v);
+ }
+ const d = new Date(String(v));
+ return Number.isNaN(d.getTime()) ? null : d;
+ }
+
+ // object / category / timedelta — return unchanged
+ return v;
+}
+
+// ─── AstypeOptions ────────────────────────────────────────────────────────────
+
+/** Options accepted by {@link astypeSeries} and {@link astype}. */
+export interface AstypeOptions {
+ /**
+ * When `true`, values that cannot be cast are silently replaced with
+ * `null` instead of throwing.
+ *
+ * @default false
+ */
+ readonly errors?: "raise" | "ignore";
+}
+
+// ─── astypeSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Cast a Series to a different dtype.
+ *
+ * Returns a new Series whose values have been coerced to `dtype`. The index
+ * and name are preserved unchanged.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1.9, 2.1, 3.7], name: "x" });
+ * const si = astypeSeries(s, "int64");
+ * si.values; // [1, 2, 3]
+ * si.dtype.name; // "int64"
+ * ```
+ */
+export function astypeSeries(
+ s: Series,
+ dtype: DtypeName | Dtype,
+ options: AstypeOptions = {},
+): Series {
+ const targetDtype = dtype instanceof Dtype ? dtype : Dtype.from(dtype as DtypeName);
+ const { errors = "raise" } = options;
+
+ const casted: Scalar[] = [];
+ for (const v of s.values) {
+ let out: Scalar;
+ try {
+ out = castScalar(v, targetDtype);
+ } catch (e) {
+ if (errors === "ignore") {
+ out = v;
+ } else {
+ throw e;
+ }
+ }
+ casted.push(out);
+ }
+
+ return new Series({
+ data: casted,
+ index: s.index,
+ dtype: targetDtype,
+ name: s.name,
+ });
+}
+
+// ─── DataFrame astype ─────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link astype} (DataFrame variant).
+ */
+export interface DataFrameAstypeOptions extends AstypeOptions {
+ /**
+ * When `true`, only the columns listed in `dtype` (when `dtype` is a
+ * `Record`) are recast; other columns are carried over unchanged.
+ *
+ * When `false` (default) and `dtype` is a `Record`, columns not listed
+ * in the map are carried over unchanged (same behaviour).
+ *
+ * This option exists for pandas API compatibility.
+ */
+ readonly copy?: boolean;
+}
+
+/**
+ * Cast one or more columns in a DataFrame to the specified dtype(s).
+ *
+ * - Pass a single `DtypeName` or `Dtype` to cast **all** columns.
+ * - Pass a `Record` to cast individual columns.
+ * Columns not listed are returned unchanged.
+ *
+ * Returns a new DataFrame; the original is not modified.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: ["3", "4"] });
+ *
+ * // Cast all columns to float64
+ * astype(df, "float64");
+ *
+ * // Cast only column "b" to int64
+ * astype(df, { b: "int64" });
+ * ```
+ */
+export function astype(
+ df: DataFrame,
+ dtype:
+ | DtypeName
+ | Dtype
+ | Readonly>,
+ options: DataFrameAstypeOptions = {},
+): DataFrame {
+ const colMap = new Map>();
+
+ const isSingleDtype =
+ typeof dtype === "string" || dtype instanceof Dtype;
+
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ if (isSingleDtype) {
+ colMap.set(name, astypeSeries(col, dtype as DtypeName | Dtype, options));
+ } else {
+ const mapping = dtype as Readonly>;
+ const target = mapping[name];
+ if (target !== undefined) {
+ colMap.set(name, astypeSeries(col, target, options));
+ } else {
+ colMap.set(name, col);
+ }
+ }
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/core/attrs.ts b/src/core/attrs.ts
new file mode 100644
index 00000000..81c6be1c
--- /dev/null
+++ b/src/core/attrs.ts
@@ -0,0 +1,291 @@
+/**
+ * attrs — user-defined metadata dictionary for Series and DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.attrs` / `pandas.Series.attrs`: an arbitrary
+ * key→value dictionary that travels with a data object and lets callers
+ * annotate it with provenance, units, descriptions, or any other metadata.
+ *
+ * Because the tsb Series and DataFrame classes are immutable by design, this
+ * module maintains a **WeakMap registry** that maps each object to its attrs
+ * record. The registry entries are garbage-collected automatically when the
+ * object itself is collected — there is no memory leak.
+ *
+ * ### Public surface
+ *
+ * ```ts
+ * import { getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, clearAttrs,
+ * hasAttrs } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ x: [1, 2, 3] });
+ *
+ * // Annotate
+ * setAttrs(df, { source: "sensor_A", unit: "metres" });
+ * getAttrs(df); // { source: "sensor_A", unit: "metres" }
+ *
+ * // Merge additional keys
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df); // { source: "sensor_A", unit: "metres", version: 2 }
+ *
+ * // Fluent helper — sets attrs and returns the same object
+ * const annotated = withAttrs(df, { source: "sensor_B" });
+ * annotated === df; // true — same reference
+ *
+ * // Propagate to a derived object
+ * const df2 = DataFrame.fromColumns({ y: [4, 5, 6] });
+ * copyAttrs(df, df2);
+ * getAttrs(df2); // { source: "sensor_A", unit: "metres", version: 2 }
+ * ```
+ *
+ * @module
+ */
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/**
+ * The attrs dictionary type. Keys are strings; values may be any JSON-safe
+ * primitive or nested structure. Mirrors the `dict` type of `pandas.attrs`.
+ */
+export type Attrs = Record;
+
+// ─── registry ─────────────────────────────────────────────────────────────────
+
+/** Internal WeakMap from any object to its attrs record. */
+const registry = new WeakMap();
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Retrieve the attrs dictionary for `obj`.
+ *
+ * Returns a **shallow copy** so callers cannot mutate the stored record
+ * accidentally. If no attrs have been set, returns an empty object `{}`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3] });
+ * setAttrs(s, { unit: "kg" });
+ * getAttrs(s); // { unit: "kg" }
+ * ```
+ */
+export function getAttrs(obj: object): Attrs {
+ const stored = registry.get(obj);
+ return stored !== undefined ? { ...stored } : {};
+}
+
+/**
+ * **Overwrite** the attrs dictionary for `obj` with `attrs`.
+ *
+ * Any previously stored attrs are discarded. Stores a shallow copy so
+ * subsequent mutations to the passed-in object do not affect the stored value.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "sensor_A" });
+ * getAttrs(df); // { source: "sensor_A" }
+ * ```
+ */
+export function setAttrs(obj: object, attrs: Attrs): void {
+ registry.set(obj, { ...attrs });
+}
+
+/**
+ * **Merge** `updates` into the existing attrs for `obj`.
+ *
+ * Existing keys that are not present in `updates` are preserved. Keys that
+ * are present in both `updates` and the existing attrs are overwritten.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "A" });
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df); // { source: "A", version: 2 }
+ * ```
+ */
+export function updateAttrs(obj: object, updates: Attrs): void {
+ const existing = registry.get(obj) ?? {};
+ registry.set(obj, { ...existing, ...updates });
+}
+
+/**
+ * **Copy** the attrs from `source` to `target`, overwriting any existing attrs
+ * on `target`.
+ *
+ * Useful for propagating metadata from an input to a derived result.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df1, { source: "sensor_A" });
+ * const df2 = df1.head(5);
+ * copyAttrs(df1, df2);
+ * getAttrs(df2); // { source: "sensor_A" }
+ * ```
+ */
+export function copyAttrs(source: object, target: object): void {
+ const stored = registry.get(source);
+ if (stored !== undefined) {
+ registry.set(target, { ...stored });
+ } else {
+ registry.delete(target);
+ }
+}
+
+/**
+ * **Fluent helper** — set attrs on `obj` and return the same object.
+ *
+ * This **replaces** any previously stored attrs (same semantics as
+ * {@link setAttrs}). The return type is `T` so callers do not lose the
+ * concrete type of their object.
+ *
+ * @example
+ * ```ts
+ * const annotated = withAttrs(df, { source: "sensor_A", unit: "metres" });
+ * annotated === df; // true — same reference
+ * getAttrs(annotated); // { source: "sensor_A", unit: "metres" }
+ * ```
+ */
+export function withAttrs(obj: T, attrs: Attrs): T {
+ registry.set(obj, { ...attrs });
+ return obj;
+}
+
+/**
+ * **Remove** all attrs from `obj`.
+ *
+ * After calling this, {@link getAttrs} returns `{}` and {@link hasAttrs}
+ * returns `false`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "A" });
+ * clearAttrs(df);
+ * hasAttrs(df); // false
+ * getAttrs(df); // {}
+ * ```
+ */
+export function clearAttrs(obj: object): void {
+ registry.delete(obj);
+}
+
+/**
+ * Returns `true` if `obj` has any attrs set, `false` otherwise.
+ *
+ * @example
+ * ```ts
+ * hasAttrs(df); // false
+ * setAttrs(df, { x: 1 });
+ * hasAttrs(df); // true
+ * clearAttrs(df);
+ * hasAttrs(df); // false
+ * ```
+ */
+export function hasAttrs(obj: object): boolean {
+ return registry.has(obj);
+}
+
+/**
+ * Retrieve a **single** attrs value by key.
+ *
+ * Returns `undefined` if the key does not exist (or no attrs are set).
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { unit: "kg" });
+ * getAttr(df, "unit"); // "kg"
+ * getAttr(df, "missing"); // undefined
+ * ```
+ */
+export function getAttr(obj: object, key: string): unknown {
+ return registry.get(obj)?.[key];
+}
+
+/**
+ * Set a **single** attrs key on `obj`, preserving all other existing attrs.
+ *
+ * @example
+ * ```ts
+ * setAttr(df, "unit", "kg");
+ * setAttr(df, "source", "lab");
+ * getAttrs(df); // { unit: "kg", source: "lab" }
+ * ```
+ */
+export function setAttr(obj: object, key: string, value: unknown): void {
+ const existing = registry.get(obj) ?? {};
+ registry.set(obj, { ...existing, [key]: value });
+}
+
+/**
+ * Delete a **single** attrs key from `obj`, preserving all other keys.
+ *
+ * Does nothing if the key does not exist.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { a: 1, b: 2 });
+ * deleteAttr(df, "a");
+ * getAttrs(df); // { b: 2 }
+ * ```
+ */
+export function deleteAttr(obj: object, key: string): void {
+ const existing = registry.get(obj);
+ if (existing === undefined) return;
+ const { [key]: _removed, ...rest } = existing;
+ if (Object.keys(rest).length === 0) {
+ registry.delete(obj);
+ } else {
+ registry.set(obj, rest);
+ }
+}
+
+/**
+ * Return the number of attrs keys stored on `obj`.
+ *
+ * @example
+ * ```ts
+ * attrsCount(df); // 0
+ * setAttrs(df, { a: 1, b: 2 });
+ * attrsCount(df); // 2
+ * ```
+ */
+export function attrsCount(obj: object): number {
+ return Object.keys(registry.get(obj) ?? {}).length;
+}
+
+/**
+ * Return the list of attrs keys stored on `obj`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { a: 1, b: 2 });
+ * attrsKeys(df); // ["a", "b"]
+ * ```
+ */
+export function attrsKeys(obj: object): string[] {
+ return Object.keys(registry.get(obj) ?? {});
+}
+
+/**
+ * Merge attrs from multiple source objects into a single target object.
+ *
+ * Sources are applied left-to-right; later sources overwrite earlier ones on
+ * key conflicts. Overwrites any existing attrs on `target`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(s1, { source: "A", unit: "kg" });
+ * setAttrs(s2, { source: "B", scale: 2 });
+ * mergeAttrs([s1, s2], df);
+ * getAttrs(df); // { source: "B", unit: "kg", scale: 2 }
+ * ```
+ */
+export function mergeAttrs(sources: readonly object[], target: object): void {
+ const merged: Attrs = {};
+ for (const src of sources) {
+ const stored = registry.get(src);
+ if (stored !== undefined) {
+ Object.assign(merged, stored);
+ }
+ }
+ if (Object.keys(merged).length > 0) {
+ registry.set(target, merged);
+ }
+}
diff --git a/src/core/index.ts b/src/core/index.ts
index ea275952..cf1d78cb 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -53,3 +53,12 @@ export { reindexSeries, reindexDataFrame } from "./reindex.ts";
export type { ReindexMethod, ReindexSeriesOptions, ReindexDataFrameOptions } from "./reindex.ts";
export { alignSeries, alignDataFrame } from "./align.ts";
export type { AlignSeriesOptions, AlignDataFrameOptions } from "./align.ts";
+export { astypeSeries, astype, castScalar } from "./astype.ts";
+export type { AstypeOptions, DataFrameAstypeOptions } from "./astype.ts";
+export { isScalar, isListLike, isArrayLike, isDictLike, isIterator } from "./api_types.ts";
+export { getAttrs, setAttrs, updateAttrs, copyAttrs } from "./attrs.ts";
+export type { Attrs } from "./attrs.ts";
+export { insertColumn, popColumn, reorderColumns, moveColumn } from "./insert_pop.ts";
+export type { PopResult } from "./insert_pop.ts";
+export { toDictOriented, fromDictOriented } from "./to_from_dict.ts";
+export type { ToDictOrient, FromDictOrient, DictSplit, DictTight } from "./to_from_dict.ts";
diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts
new file mode 100644
index 00000000..d56c42bc
--- /dev/null
+++ b/src/core/insert_pop.ts
@@ -0,0 +1,214 @@
+/**
+ * DataFrame.insert() and DataFrame.pop() — column insertion and removal.
+ *
+ * Mirrors `pandas.DataFrame.insert(loc, column, value)` and
+ * `pandas.DataFrame.pop(item)`.
+ *
+ * Since `DataFrame` in tsb is immutable, both operations return a new DataFrame.
+ * `popColumn` returns both the extracted `Series` and the resulting DataFrame.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, insertColumn, popColumn } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ *
+ * // Insert column "x" at position 1 (between "a" and "b")
+ * const df2 = insertColumn(df, 1, "x", [10, 20]);
+ * // df2.columns.values → ["a", "x", "b"]
+ *
+ * // Pop column "a" out of df2
+ * const { series, df: df3 } = popColumn(df2, "a");
+ * // series.values → [1, 2]
+ * // df3.columns.values → ["x", "b"]
+ * ```
+ *
+ * @packageDocumentation
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── insertColumn ─────────────────────────────────────────────────────────────
+
+/**
+ * Insert a new column into `df` at integer column position `loc`.
+ *
+ * Mirrors `pandas.DataFrame.insert(loc, column, value, allow_duplicates=False)`.
+ * Raises a `RangeError` if:
+ * - `column` already exists in `df` (no duplicates by default)
+ * - `loc` is out of range (must be 0 ≤ loc ≤ df.shape[1])
+ * - `values` length does not match the number of rows
+ *
+ * @param df Source DataFrame (not mutated).
+ * @param loc Zero-based integer position at which to insert the column.
+ * @param column Name of the new column.
+ * @param values Column data as an array of scalars or a `Series`.
+ * @param allowDuplicates When `true`, silently allow duplicate column names. Default `false`.
+ * @returns A new DataFrame with the column inserted.
+ */
+export function insertColumn(
+ df: DataFrame,
+ loc: number,
+ column: string,
+ values: readonly Scalar[] | Series,
+ allowDuplicates = false,
+): DataFrame {
+ const nCols = df.shape[1];
+ const nRows = df.shape[0];
+
+ if (!allowDuplicates && df.has(column)) {
+ throw new RangeError(
+ `Column "${column}" already exists. Use allowDuplicates=true to permit duplicate names.`,
+ );
+ }
+
+ if (loc < 0 || loc > nCols) {
+ throw new RangeError(`loc=${loc} is out of range [0, ${nCols}].`);
+ }
+
+ // Resolve values to a Series aligned to df's row index.
+ const series: Series =
+ values instanceof Series
+ ? values
+ : new Series({ data: values, index: df.index, name: column });
+
+ if (series.size !== nRows) {
+ throw new RangeError(
+ `values length ${series.size} does not match DataFrame row count ${nRows}.`,
+ );
+ }
+
+ // Rebuild the column map, inserting the new column at position `loc`.
+ const colMap = new Map>();
+ let idx = 0;
+
+ for (const colName of df.columns.values) {
+ if (idx === loc) {
+ colMap.set(column, series);
+ }
+ colMap.set(colName, df.col(colName));
+ idx++;
+ }
+
+ // Handle insertion at the end (loc === nCols).
+ if (loc === nCols) {
+ colMap.set(column, series);
+ }
+
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── popColumn ────────────────────────────────────────────────────────────────
+
+/** Return type of {@link popColumn}. */
+export interface PopResult {
+ /** The extracted column as a Series. */
+ readonly series: Series;
+ /** The DataFrame with the column removed. */
+ readonly df: DataFrame;
+}
+
+/**
+ * Remove a column from `df` and return both the extracted `Series` and the
+ * resulting DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.pop(item)`, but because tsb DataFrames are
+ * immutable this function returns the removed Series *and* the new DataFrame
+ * (rather than mutating in place).
+ *
+ * Raises a `RangeError` if `col` does not exist in `df`.
+ *
+ * @param df Source DataFrame (not mutated).
+ * @param col Name of the column to remove.
+ * @returns `{ series, df }` — the extracted column and the remaining DataFrame.
+ *
+ * @example
+ * ```ts
+ * const { series, df: remaining } = popColumn(df, "age");
+ * // series contains the "age" column; remaining has all other columns
+ * ```
+ */
+export function popColumn(df: DataFrame, col: string): PopResult {
+ const series = df.get(col);
+ if (series === undefined) {
+ throw new RangeError(`Column "${col}" not found in DataFrame.`);
+ }
+
+ const colMap = new Map>();
+ for (const colName of df.columns.values) {
+ if (colName !== col) {
+ colMap.set(colName, df.col(colName));
+ }
+ }
+
+ return {
+ series,
+ df: new DataFrame(colMap, df.index),
+ };
+}
+
+// ─── reorderColumns ──────────────────────────────────────────────────────────
+
+/**
+ * Reorder the columns of `df` to match `order`.
+ *
+ * Mirrors `df[order]` in pandas. All names in `order` must be present in `df`;
+ * extra names in `df` not listed in `order` are dropped.
+ *
+ * @param df Source DataFrame.
+ * @param order New column order (subset of `df.columns.values`).
+ * @returns A new DataFrame with columns in the specified order.
+ */
+export function reorderColumns(df: DataFrame, order: readonly string[]): DataFrame {
+ const colMap = new Map>();
+ for (const name of order) {
+ const s = df.get(name);
+ if (s === undefined) {
+ throw new RangeError(`Column "${name}" not found in DataFrame.`);
+ }
+ colMap.set(name, s);
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── moveColumn ──────────────────────────────────────────────────────────────
+
+/**
+ * Move an existing column to a new integer position.
+ *
+ * This is a convenience wrapper combining {@link popColumn} and
+ * {@link insertColumn}: it removes the column from its current position and
+ * re-inserts it at `newLoc` in the resulting DataFrame.
+ *
+ * @param df Source DataFrame.
+ * @param col Name of the column to move.
+ * @param newLoc Target position (0 ≤ newLoc ≤ df.shape[1] − 1).
+ * @returns A new DataFrame with the column at the new position.
+ */
+export function moveColumn(df: DataFrame, col: string, newLoc: number): DataFrame {
+ const { series, df: without } = popColumn(df, col);
+ return insertColumn(without, newLoc, col, series);
+}
+
+// ─── internal re-export helper (used by DataFrame constructor access) ─────────
+
+/**
+ * Build a new DataFrame from an ordered iterable of `[name, Series]` pairs and
+ * a row index. Exported for use by other tsb modules that need to construct
+ * DataFrames without going through the public factory methods.
+ *
+ * @internal
+ */
+export function dataFrameFromPairs(
+ pairs: Iterable]>,
+ index: Index,
+): DataFrame {
+ const colMap = new Map>();
+ for (const [name, series] of pairs) {
+ colMap.set(name, series);
+ }
+ return new DataFrame(colMap, index);
+}
diff --git a/src/core/to_from_dict.ts b/src/core/to_from_dict.ts
new file mode 100644
index 00000000..51794a62
--- /dev/null
+++ b/src/core/to_from_dict.ts
@@ -0,0 +1,284 @@
+/**
+ * to_from_dict — DataFrame ↔ dictionary conversions with orient support.
+ *
+ * Mirrors `pandas.DataFrame.to_dict(orient=...)` and
+ * `pandas.DataFrame.from_dict(data, orient=...)`.
+ *
+ * Supported `orient` values for {@link toDictOriented}:
+ * - `"dict"` / `"columns"` — `{col: {rowLabel: value}}`
+ * - `"list"` — `{col: [values]}`
+ * - `"series"` — `{col: Series}`
+ * - `"split"` — `{index, columns, data}`
+ * - `"tight"` — like `"split"` plus `index_names` and `column_names`
+ * - `"records"` — `[{col: value, ...}, ...]`
+ * - `"index"` — `{rowLabel: {col: value}}`
+ *
+ * Supported `orient` values for {@link fromDictOriented}:
+ * - `"columns"` — `{col: [values]}` (default)
+ * - `"index"` — `{rowLabel: {col: value}}`
+ * - `"split"` — `{index?, columns, data}`
+ * - `"tight"` — `{index?, columns, data, index_names?, column_names?}`
+ *
+ * @module
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Orient values supported by {@link toDictOriented}. */
+export type ToDictOrient = "dict" | "columns" | "list" | "series" | "split" | "tight" | "records" | "index";
+
+/** Orient values supported by {@link fromDictOriented}. */
+export type FromDictOrient = "columns" | "index" | "split" | "tight";
+
+/** Result shape for `orient = "split"`. */
+export interface DictSplit {
+ readonly index: Label[];
+ readonly columns: string[];
+ readonly data: Scalar[][];
+}
+
+/** Result shape for `orient = "tight"`. */
+export interface DictTight extends DictSplit {
+ readonly index_names: (string | null)[];
+ readonly column_names: (string | null)[];
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a row label to a string key. */
+function labelKey(label: Label): string {
+ if (label === null || label === undefined) {
+ return "null";
+ }
+ return String(label);
+}
+
+/** True when an array of labels is the default 0…n-1 RangeIndex. */
+function isDefaultRange(labels: readonly Label[]): boolean {
+ for (let i = 0; i < labels.length; i++) {
+ if (labels[i] !== i) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// ─── toDictOriented ───────────────────────────────────────────────────────────
+
+/**
+ * Convert a DataFrame to a dictionary using the given `orient`.
+ *
+ * Mirrors `pandas.DataFrame.to_dict(orient, ...)`.
+ *
+ * @param df Source DataFrame.
+ * @param orient Output structure. Defaults to `"dict"`.
+ */
+export function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record>;
+export function toDictOriented(df: DataFrame, orient: "list"): Record;
+export function toDictOriented(df: DataFrame, orient: "series"): Record>;
+export function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+export function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+export function toDictOriented(df: DataFrame, orient: "records"): Record[];
+export function toDictOriented(df: DataFrame, orient: "index"): Record>;
+export function toDictOriented(
+ df: DataFrame,
+ orient: ToDictOrient = "dict",
+): Record | unknown[] | DictSplit | DictTight {
+ const colNames = [...df.columns.values];
+ const rowLabels = [...(df.index.values as Label[])];
+ const nRows = df.index.size;
+
+ switch (orient) {
+ case "dict":
+ case "columns": {
+ const result: Record> = {};
+ for (const col of colNames) {
+ const series = df.col(col);
+ const colObj: Record = {};
+ for (let i = 0; i < nRows; i++) {
+ const lbl = rowLabels[i];
+ const key = labelKey(lbl !== undefined ? lbl : null);
+ colObj[key] = (series.values[i] ?? null) as Scalar;
+ }
+ result[col] = colObj;
+ }
+ return result;
+ }
+
+ case "list": {
+ const result: Record = {};
+ for (const col of colNames) {
+ result[col] = [...(df.col(col).values as readonly Scalar[])];
+ }
+ return result;
+ }
+
+ case "series": {
+ const result: Record> = {};
+ for (const col of colNames) {
+ result[col] = df.col(col);
+ }
+ return result;
+ }
+
+ case "split": {
+ const data: Scalar[][] = [];
+ for (let i = 0; i < nRows; i++) {
+ const row: Scalar[] = colNames.map((col) => (df.col(col).values[i] ?? null) as Scalar);
+ data.push(row);
+ }
+ return { index: rowLabels, columns: colNames, data } satisfies DictSplit;
+ }
+
+ case "tight": {
+ const data: Scalar[][] = [];
+ for (let i = 0; i < nRows; i++) {
+ const row: Scalar[] = colNames.map((col) => (df.col(col).values[i] ?? null) as Scalar);
+ data.push(row);
+ }
+ return {
+ index: rowLabels,
+ columns: colNames,
+ data,
+ index_names: [null],
+ column_names: [null],
+ } satisfies DictTight;
+ }
+
+ case "records": {
+ return df.toRecords();
+ }
+
+ case "index": {
+ const result: Record> = {};
+ for (let i = 0; i < nRows; i++) {
+ const lbl = rowLabels[i];
+ const key = labelKey(lbl !== undefined ? lbl : null);
+ const rowObj: Record = {};
+ for (const col of colNames) {
+ rowObj[col] = (df.col(col).values[i] ?? null) as Scalar;
+ }
+ result[key] = rowObj;
+ }
+ return result;
+ }
+
+ default: {
+ const exhaustive: never = orient;
+ throw new RangeError(`Unknown orient: ${String(exhaustive)}`);
+ }
+ }
+}
+
+// ─── fromDictOriented ─────────────────────────────────────────────────────────
+
+/** Input type for `orient = "split"` / `"tight"`. */
+export interface SplitInput {
+ readonly index?: readonly Label[];
+ readonly columns: readonly string[];
+ readonly data: readonly (readonly Scalar[])[];
+}
+
+/**
+ * Construct a DataFrame from a dictionary using the given `orient`.
+ *
+ * Mirrors `pandas.DataFrame.from_dict(data, orient=...)`.
+ *
+ * @param data Input dictionary (shape depends on `orient`).
+ * @param orient How `data` is structured. Defaults to `"columns"`.
+ */
+export function fromDictOriented(
+ data: Readonly>,
+ orient?: "columns",
+): DataFrame;
+export function fromDictOriented(
+ data: Readonly>>>,
+ orient: "index",
+): DataFrame;
+export function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+export function fromDictOriented(
+ data: unknown,
+ orient: FromDictOrient = "columns",
+): DataFrame {
+ switch (orient) {
+ case "columns": {
+ const colsData = data as Record;
+ return DataFrame.fromColumns(colsData as Record);
+ }
+
+ case "index": {
+ const indexData = data as Record>;
+ const rowLabels = Object.keys(indexData);
+ // Collect all column names in insertion order
+ const colSet = new Map();
+ for (const rowLabel of rowLabels) {
+ const rowObj = indexData[rowLabel];
+ if (rowObj !== undefined) {
+ for (const col of Object.keys(rowObj)) {
+ colSet.set(col, null);
+ }
+ }
+ }
+ const colNames = [...colSet.keys()];
+ const colArrays: Record = {};
+ for (const col of colNames) {
+ colArrays[col] = [];
+ }
+ for (const rowLabel of rowLabels) {
+ const rowObj = indexData[rowLabel] ?? {};
+ for (const col of colNames) {
+ const arr = colArrays[col];
+ if (arr !== undefined) {
+ arr.push(rowObj[col] ?? null);
+ }
+ }
+ }
+ const idx = new Index(rowLabels as Label[]);
+ return DataFrame.fromColumns(colArrays as Record, { index: idx });
+ }
+
+ case "split":
+ case "tight": {
+ return buildFromSplit(data as SplitInput);
+ }
+
+ default: {
+ const exhaustive: never = orient;
+ throw new RangeError(`Unknown orient: ${String(exhaustive)}`);
+ }
+ }
+}
+
+// ─── internal helpers ──────────────────────────────────────────────────────────
+
+/** Build a DataFrame from a split/tight structure. */
+function buildFromSplit(input: SplitInput): DataFrame {
+ const { columns, data } = input;
+ const colArrays: Record = {};
+ for (const col of columns) {
+ colArrays[col] = [];
+ }
+ for (const row of data) {
+ for (let j = 0; j < columns.length; j++) {
+ const col = columns[j];
+ if (col === undefined) continue;
+ const arr = colArrays[col];
+ if (arr !== undefined) {
+ arr.push(row[j] ?? null);
+ }
+ }
+ }
+
+ // Determine the row index
+ if (input.index !== undefined && !isDefaultRange(input.index)) {
+ const idx = new Index(input.index as Label[]);
+ return DataFrame.fromColumns(colArrays as Record, { index: idx });
+ }
+
+ return DataFrame.fromColumns(colArrays as Record);
+}
diff --git a/src/index.ts b/src/index.ts
index b6c3f93e..371a3968 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -288,4 +288,174 @@ export type { ExplodeOptions, ExplodeDataFrameOptions } from "./stats/index.ts";
export { isin, dataFrameIsin } from "./stats/index.ts";
export type { IsinValues, IsinDict, DataFrameIsinValues } from "./stats/index.ts";
-// Re-merged main (2026-04-09T17:22Z): resolved barrel-export conflicts, PR branch is superset of main
+
+export { readExcel, xlsxSheetNames } from "./io/index.ts";
+export type { ReadExcelOptions } from "./io/index.ts";
+export { astypeSeries, astype, castScalar } from "./core/index.ts";
+export type { AstypeOptions, DataFrameAstypeOptions } from "./core/index.ts";
+
+export { clipAdvancedSeries, clipAdvancedDataFrame } from "./stats/index.ts";
+export type {
+ SeriesBound,
+ DataFrameBound,
+ ClipAdvancedSeriesOptions,
+ ClipAdvancedDataFrameOptions,
+} from "./stats/index.ts";
+export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./stats/index.ts";
+export type { IdxOptions, IdxDataFrameOptions } from "./stats/index.ts";
+export { modeSeries, modeDataFrame } from "./stats/index.ts";
+export type { ModeSeriesOptions, ModeDataFrameOptions } from "./stats/index.ts";
+export {
+ nancount,
+ nansum,
+ nanmean,
+ nanmedian,
+ nanvar,
+ nanstd,
+ nanmin,
+ nanmax,
+ nanprod,
+} from "./stats/index.ts";
+export type { NanInput, NanAggOptions } from "./stats/index.ts";
+export {
+ nuniqueSeries,
+ nuniqueDataFrame,
+ anySeries,
+ allSeries,
+ anyDataFrame,
+ allDataFrame,
+} from "./stats/index.ts";
+export type {
+ NuniqueSeriesOptions,
+ NuniqueDataFrameOptions,
+ AnyAllSeriesOptions,
+ AnyAllDataFrameOptions,
+} from "./stats/index.ts";
+export { pctChangeSeries, pctChangeDataFrame } from "./stats/index.ts";
+export type {
+ PctChangeFillMethod,
+ PctChangeOptions,
+ DataFramePctChangeOptions,
+} from "./stats/index.ts";
+export { quantileSeries, quantileDataFrame } from "./stats/index.ts";
+export type {
+ QuantileInterpolation,
+ QuantileSeriesOptions,
+ QuantileDataFrameOptions,
+} from "./stats/index.ts";
+export { replaceSeries, replaceDataFrame } from "./stats/index.ts";
+export type {
+ ReplaceMapping,
+ ReplaceSpec,
+ ReplaceOptions,
+ DataFrameReplaceOptions,
+} from "./stats/index.ts";
+export { varSeries, semSeries, varDataFrame, semDataFrame } from "./stats/index.ts";
+export type { VarSemSeriesOptions, VarSemDataFrameOptions } from "./stats/index.ts";
+export { skewSeries, kurtSeries, skewDataFrame, kurtDataFrame } from "./stats/index.ts";
+export type {
+ SkewKurtSeriesOptions,
+ SkewKurtDataFrameOptions,
+} from "./stats/index.ts";
+export { toDatetime } from "./stats/index.ts";
+export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./stats/index.ts";
+
+// iter136 features — core
+export { isScalar, isListLike, isArrayLike, isDictLike, isIterator } from "./core/index.ts";
+export { getAttrs, setAttrs, updateAttrs, copyAttrs } from "./core/index.ts";
+export type { Attrs } from "./core/index.ts";
+export { insertColumn, popColumn, reorderColumns, moveColumn } from "./core/index.ts";
+export type { PopResult } from "./core/index.ts";
+export { toDictOriented, fromDictOriented } from "./core/index.ts";
+export type { ToDictOrient, FromDictOrient, DictSplit, DictTight } from "./core/index.ts";
+
+// iter136 features — stats
+export {
+ catFromCodes,
+ catUnionCategories,
+ catSortByFreq,
+ catCrossTab,
+} from "./stats/index.ts";
+export type {
+ CatFromCodesOptions,
+ CatSortByFreqOptions,
+ CatCrossTabOptions,
+} from "./stats/index.ts";
+export {
+ formatFloat,
+ formatPercent,
+ formatScientific,
+ formatEngineering,
+ formatThousands,
+ seriesToString,
+ dataFrameToString,
+} from "./stats/index.ts";
+export type {
+ Formatter,
+ SeriesToStringOptions,
+ DataFrameToStringOptions,
+} from "./stats/index.ts";
+export {
+ histogram,
+ zscore,
+ minMaxNormalize,
+ coefficientOfVariation,
+ digitize,
+ linspace,
+ arange,
+ percentileOfScore,
+ seriesDigitize,
+} from "./stats/index.ts";
+export type {
+ HistogramOptions,
+ HistogramResult,
+ ZscoreOptions,
+ MinMaxOptions,
+ CvOptions,
+} from "./stats/index.ts";
+export {
+ strNormalize,
+ strGetDummies,
+ strExtractAll,
+ strRemovePrefix,
+ strRemoveSuffix,
+ strTranslate,
+ strCharWidth,
+ strByteLength,
+} from "./stats/index.ts";
+export type {
+ NormalizeForm,
+ StrInput,
+ StrGetDummiesOptions,
+ ExtractAllOptions,
+} from "./stats/index.ts";
+export {
+ strSplitExpand,
+ strExtractGroups,
+ strPartition,
+ strRPartition,
+ strMultiReplace,
+ strIndent,
+ strDedent,
+} from "./stats/index.ts";
+export type {
+ SplitExpandOptions,
+ ExtractGroupsOptions,
+ PartitionResult,
+ ReplacePair,
+ IndentOptions,
+} from "./stats/index.ts";
+export {
+ rollingSem,
+ rollingSkew,
+ rollingKurt,
+ rollingQuantile,
+} from "./stats/index.ts";
+export type {
+ WindowExtOptions,
+ RollingQuantileOptions,
+} from "./stats/index.ts";
+
+// iter136 features — window
+export { rollingApply, rollingAgg } from "./window/index.ts";
+export type { RollingApplyOptions, RollingAggOptions, AggFunctions } from "./window/index.ts";
diff --git a/src/io/index.ts b/src/io/index.ts
index 0f894185..fb5d7e75 100644
--- a/src/io/index.ts
+++ b/src/io/index.ts
@@ -10,3 +10,5 @@ export { readJson, toJson } from "./json.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./json.ts";
export { jsonNormalize } from "./json_normalize.ts";
export type { JsonNormalizeOptions, JsonPath } from "./json_normalize.ts";
+export { readExcel, xlsxSheetNames } from "./read_excel.ts";
+export type { ReadExcelOptions } from "./read_excel.ts";
diff --git a/src/io/read_excel.ts b/src/io/read_excel.ts
new file mode 100644
index 00000000..97d06065
--- /dev/null
+++ b/src/io/read_excel.ts
@@ -0,0 +1,645 @@
+/**
+ * readExcel — XLSX file reading for DataFrame.
+ *
+ * Mirrors `pandas.read_excel()`:
+ * - `readExcel(data, options?)` — parse an XLSX binary buffer into a DataFrame.
+ * - `xlsxSheetNames(data)` — list sheet names without parsing cell data.
+ *
+ * Supports:
+ * - Shared string table (type `"s"`)
+ * - Inline strings (type `"inlineStr"`)
+ * - Numbers (type absent or `"n"`)
+ * - Booleans (type `"b"`)
+ * - Formula cached values (type `"str"`)
+ * - Error cells (type `"e"`) — returned as null
+ * - ZIP STORED (method 0) and DEFLATED (method 8) entries
+ *
+ * Limitations (deferred):
+ * - XLSX only — not XLS (legacy binary format)
+ * - No ZIP64 support (up to ~4 GB)
+ * - Date serial numbers are not converted (returned as numeric)
+ *
+ * @module
+ */
+
+// biome-ignore lint/correctness/noNodejsModules: raw DEFLATE decompression for ZIP/XLSX requires node:zlib
+import { inflateRawSync } from "node:zlib";
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import { Dtype } from "../core/index.ts";
+import type { DtypeName, Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link readExcel}. */
+export interface ReadExcelOptions {
+ /**
+ * Which sheet to read.
+ * - `string`: exact sheet name
+ * - `number`: 0-based sheet index
+ * - Default: `0` (first sheet)
+ */
+ readonly sheetName?: string | number;
+ /**
+ * Row index of the header row, or `null` for no header (columns become
+ * `"0"`, `"1"`, `"2"`, …).
+ * Default: `0`.
+ */
+ readonly header?: number | null;
+ /**
+ * Column name or 0-based index of the column to use as the row index.
+ * Default: `null` (use a default `RangeIndex`).
+ */
+ readonly indexCol?: string | number | null;
+ /**
+ * Number of data rows to skip after the header row.
+ * Default: `0`.
+ */
+ readonly skipRows?: number;
+ /**
+ * Maximum number of data rows to read.
+ */
+ readonly nrows?: number;
+ /**
+ * Additional strings to treat as NA (beyond the built-in set:
+ * `""`, `"NA"`, `"N/A"`, `"null"`, `"NaN"`, `"nan"`, `"#N/A"`).
+ */
+ readonly naValues?: readonly string[];
+ /**
+ * Explicit dtype overrides per column name.
+ */
+ readonly dtype?: Readonly>;
+}
+
+// ─── ZIP low-level helpers ────────────────────────────────────────────────────
+
+/** Read a little-endian uint16 from a buffer. */
+function readU16(buf: Uint8Array, off: number): number {
+ return ((buf[off] ?? 0) | ((buf[off + 1] ?? 0) << 8)) >>> 0;
+}
+
+/** Read a little-endian uint32 from a buffer. */
+function readU32(buf: Uint8Array, off: number): number {
+ return (
+ ((buf[off] ?? 0) |
+ ((buf[off + 1] ?? 0) << 8) |
+ ((buf[off + 2] ?? 0) << 16) |
+ ((buf[off + 3] ?? 0) << 24)) >>>
+ 0
+ );
+}
+
+const ZIP_EOCD_SIG = 0x06054b50;
+const ZIP_CD_SIG = 0x02014b50;
+const ZIP_COMP_STORED = 0;
+const ZIP_COMP_DEFLATE = 8;
+
+interface ZipEntry {
+ readonly name: string;
+ readonly compressedSize: number;
+ readonly uncompressedSize: number;
+ readonly method: number;
+ readonly dataOffset: number;
+}
+
+/** Search for the End-of-Central-Directory record. */
+function findEocd(buf: Uint8Array): number {
+ const minOff = Math.max(0, buf.length - 65558);
+ for (let i = buf.length - 22; i >= minOff; i--) {
+ if (readU32(buf, i) === ZIP_EOCD_SIG) {
+ return i;
+ }
+ }
+ throw new Error("Not a valid XLSX file: no ZIP end-of-central-directory found");
+}
+
+/** Compute the actual data offset from the local file header. */
+function localDataOffset(buf: Uint8Array, localOff: number): number {
+ const nameLen = readU16(buf, localOff + 26);
+ const extraLen = readU16(buf, localOff + 28);
+ return localOff + 30 + nameLen + extraLen;
+}
+
+/** Parse the ZIP central directory and return a name→entry map. */
+function parseZipEntries(buf: Uint8Array): Map {
+ const eocd = findEocd(buf);
+ const cdOffset = readU32(buf, eocd + 16);
+ const cdSize = readU32(buf, eocd + 12);
+ const dec = new TextDecoder("utf-8");
+ const entries = new Map();
+ let pos = cdOffset;
+ while (pos < cdOffset + cdSize && pos + 46 <= buf.length) {
+ if (readU32(buf, pos) !== ZIP_CD_SIG) {
+ break;
+ }
+ const method = readU16(buf, pos + 10);
+ const compressedSize = readU32(buf, pos + 20);
+ const uncompressedSize = readU32(buf, pos + 24);
+ const nameLen = readU16(buf, pos + 28);
+ const extraLen = readU16(buf, pos + 30);
+ const commentLen = readU16(buf, pos + 32);
+ const localOff = readU32(buf, pos + 42);
+ const name = dec.decode(buf.subarray(pos + 46, pos + 46 + nameLen));
+ const dataOffset = localDataOffset(buf, localOff);
+ entries.set(name, { name, compressedSize, uncompressedSize, method, dataOffset });
+ pos += 46 + nameLen + extraLen + commentLen;
+ }
+ return entries;
+}
+
+/** Decompress a ZIP entry and decode it as a UTF-8 string. */
+function extractEntry(buf: Uint8Array, entry: ZipEntry): string {
+ const raw = buf.subarray(entry.dataOffset, entry.dataOffset + entry.compressedSize);
+ let bytes: Uint8Array;
+ if (entry.method === ZIP_COMP_STORED) {
+ bytes = raw;
+ } else if (entry.method === ZIP_COMP_DEFLATE) {
+ bytes = inflateRawSync(raw);
+ } else {
+ throw new Error(`Unsupported ZIP compression method: ${entry.method}`);
+ }
+ return new TextDecoder("utf-8").decode(bytes);
+}
+
+/** Extract a named entry or return null if absent. */
+function getZipEntry(buf: Uint8Array, entries: Map, name: string): string | null {
+ const entry = entries.get(name);
+ if (entry === undefined) {
+ return null;
+ }
+ return extractEntry(buf, entry);
+}
+
+// ─── XML helpers ──────────────────────────────────────────────────────────────
+
+// Top-level regex constants (Biome useTopLevelRegex)
+const RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos);/g;
+const RE_SST_SI = /([\s\S]*?)<\/si>/g;
+const RE_SST_T = /]*)>([\s\S]*?)<\/t>/g;
+const RE_WB_SHEET = /]*)>/g;
+const RE_REL = /]*)>/g;
+const RE_ROW = /]*)>([\s\S]*?)<\/row>/g;
+const RE_CELL = /]*)>([\s\S]*?)<\/c>/g;
+const RE_CELL_V = /([\s\S]*?)<\/v>/;
+const RE_CELL_IS = /[\s\S]*?]*)>([\s\S]*?)<\/t>/;
+const RE_COL_LETTERS = /^([A-Z]+)(\d+)$/;
+
+/** Replace XML character references with their literal characters. */
+function xmlUnescape(s: string): string {
+ return s.replace(RE_XML_ENTITY, (m) => {
+ if (m === "&") {
+ return "&";
+ }
+ if (m === "<") {
+ return "<";
+ }
+ if (m === ">") {
+ return ">";
+ }
+ if (m === """) {
+ return '"';
+ }
+ return "'";
+ });
+}
+
+/**
+ * Extract the value of a single named XML attribute from an attribute string.
+ * Uses `new RegExp` (not a literal) to support dynamic attribute names.
+ */
+function attrVal(attrStr: string, key: string): string {
+ const re = new RegExp(`\\b${key}="([^"]*)"`);
+ return re.exec(attrStr)?.[1] ?? "";
+}
+
+// ─── XLSX-specific XML parsing ────────────────────────────────────────────────
+
+/** Iterate all non-overlapping matches of a global regex against a string. */
+function* regexAll(re: RegExp, str: string): Generator {
+ re.lastIndex = 0;
+ let m = re.exec(str);
+ while (m !== null) {
+ yield m;
+ m = re.exec(str);
+ }
+}
+
+/** Parse the shared string table XML into an array of strings. */
+function parseSiText(siContent: string): string {
+ let text = "";
+ for (const t of regexAll(RE_SST_T, siContent)) {
+ text += xmlUnescape(t[1] ?? "");
+ }
+ return text;
+}
+
+/** Parse the shared string table XML into an array of strings. */
+function parseSharedStrings(xml: string): string[] {
+ const strings: string[] = [];
+ for (const si of regexAll(RE_SST_SI, xml)) {
+ strings.push(parseSiText(si[1] ?? ""));
+ }
+ return strings;
+}
+
+interface SheetInfo {
+ readonly name: string;
+ readonly rid: string;
+}
+
+/** Parse the workbook XML and return a list of sheet descriptors. */
+function parseWorkbookSheets(xml: string): SheetInfo[] {
+ const sheets: SheetInfo[] = [];
+ for (const m of regexAll(RE_WB_SHEET, xml)) {
+ const attrs = m[1] ?? "";
+ const name = xmlUnescape(attrVal(attrs, "name"));
+ const rid = attrVal(attrs, "r:id");
+ if (name !== "") {
+ sheets.push({ name, rid });
+ }
+ }
+ return sheets;
+}
+
+/** Parse the workbook relationships XML and return a rid→target map. */
+function parseRelationships(xml: string): Map {
+ const map = new Map();
+ for (const m of regexAll(RE_REL, xml)) {
+ const attrs = m[1] ?? "";
+ const id = attrVal(attrs, "Id");
+ const target = attrVal(attrs, "Target");
+ if (id !== "") {
+ map.set(id, target);
+ }
+ }
+ return map;
+}
+
+// ─── Cell parsing ─────────────────────────────────────────────────────────────
+
+/** Convert a column letter string (e.g. "A", "AB") to a 0-based index. */
+function colLetterToIndex(col: string): number {
+ let idx = 0;
+ for (const ch of col) {
+ idx = idx * 26 + (ch.charCodeAt(0) - 64);
+ }
+ return idx - 1;
+}
+
+/**
+ * Parse a cell reference (e.g. "A1") into [rowIndex, colIndex] (both 0-based).
+ */
+function parseCellRef(ref: string): readonly [number, number] {
+ const m = RE_COL_LETTERS.exec(ref);
+ if (m === null) {
+ throw new Error(`Invalid cell reference: ${ref}`);
+ }
+ const colLetters = m[1] ?? "";
+ const rowNum = Number.parseInt(m[2] ?? "1", 10);
+ return [rowNum - 1, colLetterToIndex(colLetters)];
+}
+
+/** Resolve a cell value given its type tag and raw text. */
+function resolveCellValue(
+ cellType: string,
+ vText: string,
+ isText: string,
+ sharedStrings: readonly string[],
+): Scalar {
+ if (cellType === "s") {
+ const idx = Number.parseInt(vText, 10);
+ return sharedStrings[idx] ?? null;
+ }
+ if (cellType === "b") {
+ return vText === "1";
+ }
+ if (cellType === "inlineStr") {
+ return xmlUnescape(isText);
+ }
+ if (cellType === "e") {
+ return null;
+ }
+ // "str" (formula string), "n" (number), or absent (number)
+ if (vText === "") {
+ return null;
+ }
+ const n = Number(vText);
+ return Number.isNaN(n) ? xmlUnescape(vText) : n;
+}
+
+interface RawRow {
+ readonly rowIndex: number;
+ readonly cells: ReadonlyMap;
+}
+
+/** Parse a single `` element into a RawRow. */
+function parseOneRow(
+ rowAttrs: string,
+ rowContent: string,
+ sharedStrings: readonly string[],
+): RawRow {
+ const rowIdxStr = attrVal(rowAttrs, "r");
+ const rowIndex = rowIdxStr === "" ? 0 : Number.parseInt(rowIdxStr, 10) - 1;
+ const cells = new Map();
+ for (const cellMatch of regexAll(RE_CELL, rowContent)) {
+ const cellAttrs = cellMatch[1] ?? "";
+ const cellContent = cellMatch[2] ?? "";
+ const ref = attrVal(cellAttrs, "r");
+ if (ref === "") {
+ continue;
+ }
+ const cellType = attrVal(cellAttrs, "t");
+ const vMatch = RE_CELL_V.exec(cellContent);
+ const vText = vMatch !== null ? xmlUnescape(vMatch[1] ?? "") : "";
+ const isMatch = RE_CELL_IS.exec(cellContent);
+ const isText = isMatch?.[1] ?? "";
+ const [, colIdx] = parseCellRef(ref);
+ cells.set(colIdx, resolveCellValue(cellType, vText, isText, sharedStrings));
+ }
+ return { rowIndex, cells };
+}
+
+/** Parse all `` elements from a worksheet XML string. */
+function parseWorksheetRows(xml: string, sharedStrings: readonly string[]): RawRow[] {
+ const rows: RawRow[] = [];
+ for (const rowMatch of regexAll(RE_ROW, xml)) {
+ rows.push(parseOneRow(rowMatch[1] ?? "", rowMatch[2] ?? "", sharedStrings));
+ }
+ return rows;
+}
+
+// ─── DataFrame construction ───────────────────────────────────────────────────
+
+const BUILTIN_NA = new Set(["", "NA", "N/A", "null", "NaN", "nan", "#N/A"]);
+
+/** True when a string value should be coerced to null. */
+function isNaStr(s: string, extraNa: ReadonlySet): boolean {
+ return BUILTIN_NA.has(s) || extraNa.has(s);
+}
+
+/** Coerce a raw cell value to null when it matches an NA sentinel. */
+function coerceNa(val: Scalar, extraNa: ReadonlySet): Scalar {
+ if (typeof val === "string" && isNaStr(val, extraNa)) {
+ return null;
+ }
+ return val;
+}
+
+/** Compute the maximum column index across all rows. */
+function maxColIndex(rows: readonly RawRow[]): number {
+ let max = 0;
+ for (const row of rows) {
+ for (const col of row.cells.keys()) {
+ if (col > max) {
+ max = col;
+ }
+ }
+ }
+ return max;
+}
+
+interface ColumnarData {
+ readonly columns: string[];
+ readonly data: Scalar[][];
+}
+
+/** Pad header labels array to `numCols` with numeric fallback names. */
+function padHeaderLabels(labels: string[], numCols: number): void {
+ while (labels.length < numCols) {
+ labels.push(String(labels.length));
+ }
+}
+
+/** Extract header labels from the header row. */
+function extractHeaderLabels(
+ rows: readonly RawRow[],
+ headerRow: number,
+ numCols: number,
+): string[] {
+ const labels: string[] = [];
+ const hRow = rows.find((r) => r.rowIndex === headerRow);
+ if (hRow !== undefined) {
+ for (let c = 0; c < numCols; c++) {
+ const v = hRow.cells.get(c) ?? null;
+ labels.push(v !== null ? String(v) : String(c));
+ }
+ }
+ return labels;
+}
+
+/** Pivot sliced data rows into per-column arrays. */
+function pivotToColumns(
+ sliced: readonly RawRow[],
+ numCols: number,
+ extraNa: ReadonlySet,
+): Scalar[][] {
+ const data: Scalar[][] = Array.from({ length: numCols }, (): Scalar[] => []);
+ for (const row of sliced) {
+ for (let c = 0; c < numCols; c++) {
+ const val = coerceNa(row.cells.get(c) ?? null, extraNa);
+ (data[c] as Scalar[]).push(val);
+ }
+ }
+ return data;
+}
+
+/** Separate header and data rows, then pivot to column-oriented arrays. */
+function buildColumnarData(
+ rows: readonly RawRow[],
+ headerRow: number | null,
+ skipRows: number,
+ nrows: number | undefined,
+ extraNa: ReadonlySet,
+): ColumnarData {
+ const numCols = rows.length === 0 ? 0 : maxColIndex(rows) + 1;
+ const dataRows = rows.filter((r) => headerRow === null || r.rowIndex !== headerRow);
+ const headerLabels = headerRow !== null ? extractHeaderLabels(rows, headerRow, numCols) : [];
+ padHeaderLabels(headerLabels, numCols);
+ const sliced = dataRows.slice(skipRows, nrows !== undefined ? skipRows + nrows : undefined);
+ const data = pivotToColumns(sliced, numCols, extraNa);
+ return { columns: headerLabels, data };
+}
+
+/** Infer a dtype from a column's scalar values. */
+function inferColDtype(values: readonly Scalar[], override: DtypeName | undefined): DtypeName {
+ if (override !== undefined) {
+ return override;
+ }
+ let allNum = true;
+ let allBool = true;
+ let allStr = true;
+ for (const v of values) {
+ if (v === null || v === undefined) {
+ continue;
+ }
+ if (typeof v !== "number") {
+ allNum = false;
+ }
+ if (typeof v !== "boolean") {
+ allBool = false;
+ }
+ if (typeof v !== "string") {
+ allStr = false;
+ }
+ }
+ if (allBool) {
+ return "bool";
+ }
+ if (allNum) {
+ return "float64";
+ }
+ if (allStr) {
+ return "string";
+ }
+ return "object";
+}
+
+/** Build a DataFrame from parsed rows and options. */
+function buildDataFrame(rows: readonly RawRow[], options: ReadExcelOptions): DataFrame {
+ const headerRow = options.header !== undefined ? (options.header ?? null) : 0;
+ const skipRows = options.skipRows ?? 0;
+ const extraNa = new Set(options.naValues ?? []);
+ const dtypeOvr: Readonly> = options.dtype ?? {};
+ const { columns, data } = buildColumnarData(rows, headerRow, skipRows, options.nrows, extraNa);
+ const indexColOpt = options.indexCol ?? null;
+ const indexColIdx = resolveIndexColIdx(columns, indexColOpt);
+ const rowCount = (data[0] ?? []).length;
+ const colMap = new Map>();
+ for (let c = 0; c < columns.length; c++) {
+ if (c === indexColIdx) {
+ continue;
+ }
+ const colName = columns[c] ?? String(c);
+ const colData = data[c] ?? [];
+ const dtypeName = inferColDtype(colData, dtypeOvr[colName]);
+ colMap.set(colName, new Series({ data: colData, dtype: Dtype.from(dtypeName), name: colName }));
+ }
+ const toLabel = (v: Scalar): Label =>
+ v === undefined || typeof v === "bigint" || v instanceof Date ? null : v;
+ const rowIndex =
+ indexColIdx >= 0
+ ? new Index((data[indexColIdx] ?? []).map(toLabel))
+ : new RangeIndex(rowCount);
+ return new DataFrame(colMap, rowIndex);
+}
+
+/** Resolve the numeric column index for the index column option. */
+function resolveIndexColIdx(columns: readonly string[], opt: string | number | null): number {
+ if (opt === null) {
+ return -1;
+ }
+ if (typeof opt === "number") {
+ return opt;
+ }
+ const idx = columns.indexOf(opt);
+ return idx;
+}
+
+// ─── sheet path resolution ────────────────────────────────────────────────────
+
+/** Resolve the XML path inside the ZIP for a given sheet. */
+function resolveSheetPath(
+ rels: ReadonlyMap,
+ sheetInfo: SheetInfo,
+ sheetIndex: number,
+): string {
+ const target = rels.get(sheetInfo.rid) ?? `worksheets/sheet${sheetIndex + 1}.xml`;
+ return target.startsWith("/") ? target.slice(1) : `xl/${target}`;
+}
+
+/** Select the SheetInfo for the requested sheetName option. */
+function selectSheet(sheets: readonly SheetInfo[], sheetName: string | number): SheetInfo {
+ if (typeof sheetName === "number") {
+ const s = sheets[sheetName];
+ if (s === undefined) {
+ throw new Error(`Sheet index out of range: ${sheetName}`);
+ }
+ return s;
+ }
+ const s = sheets.find((sh) => sh.name === sheetName);
+ if (s === undefined) {
+ throw new Error(`Sheet not found: "${sheetName}"`);
+ }
+ return s;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Parse an XLSX binary buffer into a `DataFrame`.
+ *
+ * Mirrors `pandas.read_excel()`.
+ *
+ * @param data - XLSX file contents as a `Uint8Array` or `ArrayBuffer`.
+ * @param options - Parsing options (sheet selection, header, index column, etc.).
+ * @returns A `DataFrame` containing the sheet data.
+ *
+ * @example
+ * ```ts
+ * import { readFileSync } from "node:fs";
+ * const buf = readFileSync("data.xlsx");
+ * const df = readExcel(new Uint8Array(buf));
+ * // df.shape → [100, 5]
+ * ```
+ */
+export function readExcel(
+ data: Uint8Array | ArrayBufferLike,
+ options?: ReadExcelOptions,
+): DataFrame {
+ const buf = data instanceof Uint8Array ? data : new Uint8Array(data);
+ const opts = options ?? {};
+ const entries = parseZipEntries(buf);
+
+ // Load shared strings (optional — may be absent for numeric-only sheets)
+ const sstXml = getZipEntry(buf, entries, "xl/sharedStrings.xml") ?? "";
+ const sharedStrings = sstXml === "" ? [] : parseSharedStrings(sstXml);
+
+ // Load workbook to find sheet names
+ const wbXml = getZipEntry(buf, entries, "xl/workbook.xml");
+ if (wbXml === null) {
+ throw new Error("Invalid XLSX: xl/workbook.xml not found");
+ }
+ const sheets = parseWorkbookSheets(wbXml);
+ if (sheets.length === 0) {
+ throw new Error("Invalid XLSX: no sheets found in workbook");
+ }
+
+ const sheetName = opts.sheetName ?? 0;
+ const sheetInfo = selectSheet(sheets, sheetName);
+ const sheetIndex = typeof sheetName === "number" ? sheetName : sheets.indexOf(sheetInfo);
+
+ // Resolve sheet XML path via workbook relationships
+ const relsXml = getZipEntry(buf, entries, "xl/_rels/workbook.xml.rels") ?? "";
+ const rels = relsXml === "" ? new Map() : parseRelationships(relsXml);
+ const sheetPath = resolveSheetPath(rels, sheetInfo, sheetIndex);
+
+ const wsXml = getZipEntry(buf, entries, sheetPath);
+ if (wsXml === null) {
+ throw new Error(`Sheet XML not found at path: ${sheetPath}`);
+ }
+ const rows = parseWorksheetRows(wsXml, sharedStrings);
+ return buildDataFrame(rows, opts);
+}
+
+/**
+ * Return the sheet names in an XLSX file without parsing cell data.
+ *
+ * @param data - XLSX file contents as a `Uint8Array` or `ArrayBuffer`.
+ * @returns Array of sheet name strings in workbook order.
+ *
+ * @example
+ * ```ts
+ * xlsxSheetNames(buf); // ["Sheet1", "Sheet2"]
+ * ```
+ */
+export function xlsxSheetNames(data: Uint8Array | ArrayBufferLike): string[] {
+ const buf = data instanceof Uint8Array ? data : new Uint8Array(data);
+ const entries = parseZipEntries(buf);
+ const wbXml = getZipEntry(buf, entries, "xl/workbook.xml");
+ if (wbXml === null) {
+ return [];
+ }
+ return parseWorkbookSheets(wbXml).map((s) => s.name);
+}
diff --git a/src/stats/categorical_ops.ts b/src/stats/categorical_ops.ts
new file mode 100644
index 00000000..f9abbb0d
--- /dev/null
+++ b/src/stats/categorical_ops.ts
@@ -0,0 +1,483 @@
+/**
+ * categorical_ops — standalone categorical utility functions.
+ *
+ * Mirrors pandas' `pd.Categorical`, `pd.Categorical.from_codes`, and related
+ * top-level helpers that operate on categorical data without requiring a method
+ * call on an existing `CategoricalAccessor`.
+ *
+ * All functions return a `CatSeriesLike` (or plain data) and are **pure** —
+ * inputs are never mutated.
+ *
+ * ### Included functions
+ *
+ * | Function | Pandas equivalent |
+ * |----------|-------------------|
+ * | `catFromCodes` | `pd.Categorical.from_codes` |
+ * | `catUnionCategories` | `a.cat.set_categories(union(...))` pattern |
+ * | `catIntersectCategories` | `a.cat.set_categories(intersect(...))` |
+ * | `catDiffCategories` | `a.cat.remove_categories(b_cats)` pattern |
+ * | `catEqualCategories` | compare `.cat.categories` sets |
+ * | `catSortByFreq` | `a.cat.reorder_categories(sorted_by_freq)` |
+ * | `catToOrdinal` | `pd.Categorical(values, categories=order, ordered=True)` |
+ * | `catFreqTable` | `a.value_counts(sort=False)` on categorical |
+ * | `catCrossTab` | reduced `pd.crosstab` for two categorical Series |
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { CatSeriesLike } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public option types ───────────────────────────────────────────────────────
+
+/** Options for {@link catFromCodes}. */
+export interface CatFromCodesOptions {
+ /** Whether the resulting categorical is ordered. Default `false`. */
+ ordered?: boolean;
+ /** Series name for the result. */
+ name?: string | null;
+}
+
+/** Options for {@link catSortByFreq}. */
+export interface CatSortByFreqOptions {
+ /** If `true`, least frequent categories come first. Default `false` (most frequent first). */
+ ascending?: boolean;
+}
+
+/** Options for {@link catCrossTab}. */
+export interface CatCrossTabOptions {
+ /** If `true`, include a row and column of totals. Default `false`. */
+ margins?: boolean;
+ /** Label used for the margins row/column. Default `"All"`. */
+ marginsName?: string;
+ /** If `true`, normalize counts (divide by total). Default `false`. */
+ normalize?: boolean;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Return true when value should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Build a sorted unique key list preserving encounter order (for cats). */
+function uniqueKeys(cats: readonly Scalar[]): Scalar[] {
+ const seen = new Set();
+ const result: Scalar[] = [];
+ for (const c of cats) {
+ const k = String(c);
+ if (!seen.has(k)) {
+ seen.add(k);
+ result.push(c);
+ }
+ }
+ return result;
+}
+
+// ─── catFromCodes ─────────────────────────────────────────────────────────────
+
+/**
+ * Construct a categorical `Series` from integer codes and a categories array.
+ *
+ * Mirrors `pandas.Categorical.from_codes(codes, categories, ordered=False)`.
+ *
+ * - Codes are **0-based** indices into `categories`.
+ * - A code of `-1` maps to `null` (missing value), matching pandas `NaN`.
+ * - Any code outside `[-1, categories.length)` throws a `RangeError`.
+ *
+ * @param codes Integer codes (one per element).
+ * @param categories Array of category labels; the order defines ordinal rank.
+ * @param opts Optional settings (ordered, name).
+ * @returns A `CatSeriesLike` with the specified categories.
+ *
+ * @example
+ * ```ts
+ * const s = catFromCodes([0, 2, 1, -1, 0], ["a", "b", "c"]);
+ * s.cat.categories.values; // ["a", "b", "c"]
+ * s.toArray(); // ["a", "c", "b", null, "a"]
+ * ```
+ */
+export function catFromCodes(
+ codes: readonly number[],
+ categories: readonly Scalar[],
+ opts: CatFromCodesOptions = {},
+): CatSeriesLike {
+ const { ordered = false, name = null } = opts;
+ const cats = uniqueKeys(categories);
+ const values: Scalar[] = codes.map((code) => {
+ if (code === -1) return null;
+ if (code < -1 || code >= cats.length) {
+ throw new RangeError(
+ `catFromCodes: code ${code} is out of range [0, ${cats.length - 1}]`,
+ );
+ }
+ return cats[code] as Scalar;
+ });
+ const base = new Series({ data: values, name });
+ return base.cat.setCategories(cats, ordered);
+}
+
+// ─── catUnionCategories ────────────────────────────────────────────────────────
+
+/**
+ * Return a new `CatSeriesLike` with the same values as `a` but whose categories
+ * are the **union** of `a`'s and `b`'s categories.
+ *
+ * Categories from `b` that are not already in `a` are appended (in the order
+ * they appear in `b`). The ordering flag is taken from `a`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["y", "z"] }).cat.setCategories(["y", "z"]);
+ * catUnionCategories(a, b).cat.categories.values; // ["x", "y", "z"]
+ * ```
+ */
+export function catUnionCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+ const aCats = a.cat.categories.values as Scalar[];
+ const bCats = b.cat.categories.values as Scalar[];
+ const seen = new Set(aCats.map(String));
+ const merged = [...aCats];
+ for (const c of bCats) {
+ if (!seen.has(String(c))) {
+ seen.add(String(c));
+ merged.push(c);
+ }
+ }
+ return a.cat.setCategories(merged, a.cat.ordered);
+}
+
+// ─── catIntersectCategories ───────────────────────────────────────────────────
+
+/**
+ * Return a new `CatSeriesLike` with values from `a` whose categories are the
+ * **intersection** of `a`'s and `b`'s categories.
+ *
+ * Values whose category is not in the intersection are set to `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+ * const b = new Series({ data: ["y", "z"] }).cat.setCategories(["y", "z"]);
+ * const r = catIntersectCategories(a, b);
+ * r.cat.categories.values; // ["y", "z"]
+ * r.toArray(); // [null, "y", "z"]
+ * ```
+ */
+export function catIntersectCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+ const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+ const intersected = (a.cat.categories.values as Scalar[]).filter((c) =>
+ bSet.has(String(c)),
+ );
+ return a.cat.setCategories(intersected, a.cat.ordered);
+}
+
+// ─── catDiffCategories ────────────────────────────────────────────────────────
+
+/**
+ * Return a new `CatSeriesLike` with values from `a` whose categories are the
+ * **set difference** `a.categories − b.categories`.
+ *
+ * Values whose category is present in `b` are set to `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+ * const b = new Series({ data: ["z"] }).cat.setCategories(["z"]);
+ * const r = catDiffCategories(a, b);
+ * r.cat.categories.values; // ["x", "y"]
+ * r.toArray(); // ["x", "y", null]
+ * ```
+ */
+export function catDiffCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+ const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+ const remaining = (a.cat.categories.values as Scalar[]).filter(
+ (c) => !bSet.has(String(c)),
+ );
+ return a.cat.setCategories(remaining, a.cat.ordered);
+}
+
+// ─── catEqualCategories ───────────────────────────────────────────────────────
+
+/**
+ * Return `true` when `a` and `b` have exactly the same set of categories,
+ * ignoring order.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["y"] }).cat.setCategories(["y", "x"]);
+ * catEqualCategories(a, b); // true
+ * ```
+ */
+export function catEqualCategories(a: CatSeriesLike, b: CatSeriesLike): boolean {
+ const aSet = new Set((a.cat.categories.values as Scalar[]).map(String));
+ const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+ if (aSet.size !== bSet.size) return false;
+ for (const c of aSet) {
+ if (!bSet.has(c)) return false;
+ }
+ return true;
+}
+
+// ─── catSortByFreq ────────────────────────────────────────────────────────────
+
+/**
+ * Reorder the categories of a categorical Series by their **frequency** in the
+ * data (most frequent first by default).
+ *
+ * Mirrors `series.cat.reorder_categories(series.value_counts().index)`.
+ *
+ * @param series The source categorical Series.
+ * @param opts `{ ascending: false }` — set `true` for rarest-first.
+ * @returns A new `CatSeriesLike` with categories sorted by frequency.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["b", "a", "b", "c", "b", "a"] })
+ * .cat.setCategories(["a", "b", "c"]);
+ * catSortByFreq(s).cat.categories.values; // ["b", "a", "c"]
+ * ```
+ */
+export function catSortByFreq(
+ series: CatSeriesLike,
+ opts: CatSortByFreqOptions = {},
+): CatSeriesLike {
+ const { ascending = false } = opts;
+ const cats = series.cat.categories.values as Scalar[];
+ const freq = new Map();
+ for (const c of cats) freq.set(String(c), 0);
+ for (const v of series.values) {
+ if (!isMissing(v)) {
+ const k = String(v);
+ const prev = freq.get(k);
+ if (prev !== undefined) freq.set(k, prev + 1);
+ }
+ }
+ const sorted = [...cats].sort((a, b) => {
+ const fa = freq.get(String(a)) ?? 0;
+ const fb = freq.get(String(b)) ?? 0;
+ return ascending ? fa - fb : fb - fa;
+ });
+ return series.cat.reorderCategories(sorted);
+}
+
+// ─── catToOrdinal ─────────────────────────────────────────────────────────────
+
+/**
+ * Create an **ordered** categorical Series from `series` using `order` to
+ * define both the category set and their rank.
+ *
+ * Mirrors `pd.Categorical(series, categories=order, ordered=True)`.
+ *
+ * Values not present in `order` are set to `null`. The number of categories
+ * in the result equals `order.length`.
+ *
+ * @param series Source Series (any values).
+ * @param order Ordered list of category labels (low to high).
+ * @returns A new `CatSeriesLike` with `.cat.ordered === true`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["med", "low", "high", "med"] });
+ * const ord = catToOrdinal(s, ["low", "med", "high"]);
+ * ord.cat.ordered; // true
+ * ord.cat.categories.values; // ["low", "med", "high"]
+ * ```
+ */
+export function catToOrdinal(series: CatSeriesLike, order: readonly Scalar[]): CatSeriesLike {
+ return series.cat.setCategories(order, true);
+}
+
+// ─── catFreqTable ─────────────────────────────────────────────────────────────
+
+/**
+ * Return the frequency of each category as a plain `Record`.
+ *
+ * All defined categories are present in the result, even those with zero
+ * occurrences, matching `series.cat.value_counts()` semantics.
+ *
+ * Missing values are excluded from the count.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["b", "a", "b", null] })
+ * .cat.setCategories(["a", "b", "c"]);
+ * catFreqTable(s); // { a: 1, b: 2, c: 0 }
+ * ```
+ */
+export function catFreqTable(series: CatSeriesLike): Record {
+ const cats = series.cat.categories.values as Scalar[];
+ const freq: Record = {};
+ for (const c of cats) freq[String(c)] = 0;
+ for (const v of series.values) {
+ if (!isMissing(v)) {
+ const k = String(v);
+ if (Object.prototype.hasOwnProperty.call(freq, k)) {
+ (freq[k] as number) += 1;
+ }
+ }
+ }
+ return freq;
+}
+
+// ─── catCrossTab ──────────────────────────────────────────────────────────────
+
+/**
+ * Compute a cross-tabulation of two categorical Series.
+ *
+ * Mirrors a simplified `pd.crosstab(a, b)` for categorical inputs:
+ * rows = `a`'s categories, columns = `b`'s categories, cells = co-occurrence
+ * counts. Only aligned positions (same integer index) are tallied; missing
+ * values in either Series skip the row.
+ *
+ * @param a First categorical Series (determines rows).
+ * @param b Second categorical Series (determines columns).
+ * @param opts `{ margins, marginsName, normalize }`.
+ * @returns A `DataFrame` of count (or proportion) values.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "x", "y", "y"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["p", "q", "p", "q"] }).cat.setCategories(["p", "q"]);
+ * const ct = catCrossTab(a, b);
+ * // DataFrame:
+ * // p q
+ * // x 1 1
+ * // y 1 1
+ * ```
+ */
+export function catCrossTab(
+ a: CatSeriesLike,
+ b: CatSeriesLike,
+ opts: CatCrossTabOptions = {},
+): DataFrame {
+ const { margins = false, marginsName = "All", normalize = false } = opts;
+
+ const rowCats = a.cat.categories.values as Label[];
+ const colCats = b.cat.categories.values as Label[];
+
+ // Build count matrix: rowCats × colCats
+ const counts = new Map>();
+ for (const r of rowCats) {
+ const row = new Map();
+ for (const c of colCats) row.set(String(c), 0);
+ counts.set(String(r), row);
+ }
+
+ const aVals = a.values;
+ const bVals = b.values;
+ const n = Math.min(aVals.length, bVals.length);
+ for (let i = 0; i < n; i++) {
+ const av = aVals[i];
+ const bv = bVals[i];
+ if (isMissing(av) || isMissing(bv)) continue;
+ const row = counts.get(String(av));
+ if (row === undefined) continue;
+ const prev = row.get(String(bv));
+ if (prev !== undefined) row.set(String(bv), prev + 1);
+ }
+
+ // Compute total for normalization
+ let total = 0;
+ if (normalize) {
+ for (const row of counts.values()) {
+ for (const v of row.values()) total += v;
+ }
+ }
+
+ // Build data columns: each colCat is a column, each rowCat is a row value
+ const data: Record = {};
+ for (const c of colCats) {
+ const col: Scalar[] = [];
+ for (const r of rowCats) {
+ const v = counts.get(String(r))?.get(String(c)) ?? 0;
+ col.push(normalize && total > 0 ? v / total : v);
+ }
+ data[String(c)] = col;
+ }
+
+ // Add margin column (row totals)
+ if (margins) {
+ const rowTotals: Scalar[] = rowCats.map((r) => {
+ let sum = 0;
+ const row = counts.get(String(r));
+ if (row) for (const v of row.values()) sum += v;
+ return normalize && total > 0 ? sum / total : sum;
+ });
+ data[marginsName] = rowTotals;
+ }
+
+ // Build DataFrame with row index = rowCats
+ const rowLabels: Label[] = [...rowCats];
+
+ // Add margin row (column totals)
+ if (margins) {
+ const allCols = [...colCats.map(String), marginsName];
+ let marginRowTotal = 0;
+ for (const c of colCats) {
+ let colSum = 0;
+ for (const r of rowCats) {
+ colSum += counts.get(String(r))?.get(String(c)) ?? 0;
+ }
+ const val = normalize && total > 0 ? colSum / total : colSum;
+ (data[String(c)] as Scalar[]).push(val);
+ marginRowTotal += normalize && total > 0 ? colSum / total : colSum;
+ }
+ if (margins) {
+ (data[marginsName] as Scalar[]).push(
+ normalize && total > 0 ? marginRowTotal : marginRowTotal,
+ );
+ }
+ rowLabels.push(marginsName as Label);
+ // Ensure all column arrays have the same length
+ for (const col of allCols) {
+ const arr = data[col];
+ if (arr === undefined) data[col] = rowLabels.map(() => 0);
+ }
+ }
+
+ return DataFrame.fromColumns(data, { index: rowLabels });
+}
+
+// ─── catRecode ────────────────────────────────────────────────────────────────
+
+/**
+ * Rename categories of a categorical Series using a string→string map.
+ *
+ * Mirrors `series.cat.rename_categories(mapping)` but as a standalone function
+ * that also accepts a transform function.
+ *
+ * @param series The source categorical.
+ * @param mapping Either a `Record` (rename specified keys) or
+ * a `(label: string) => string` transform applied to every category.
+ * @returns A new `CatSeriesLike` with renamed categories.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["a", "b"] }).cat.setCategories(["a", "b", "c"]);
+ * catRecode(s, { a: "A", b: "B" }).cat.categories.values; // ["A", "B", "c"]
+ * catRecode(s, (x) => x.toUpperCase()).cat.categories.values; // ["A", "B", "C"]
+ * ```
+ */
+export function catRecode(
+ series: CatSeriesLike,
+ mapping: Record | ((label: string) => string),
+): CatSeriesLike {
+ if (typeof mapping === "function") {
+ return series.cat.renameCategories(
+ (series.cat.categories.values as Scalar[]).map((c) => mapping(String(c))),
+ );
+ }
+ const cats = series.cat.categories.values as Scalar[];
+ const newCats = cats.map((c): Scalar => {
+ const k = String(c);
+ return Object.prototype.hasOwnProperty.call(mapping, k)
+ ? (mapping[k] as string)
+ : c;
+ });
+ return series.cat.renameCategories(newCats);
+}
diff --git a/src/stats/clip_advanced.ts b/src/stats/clip_advanced.ts
new file mode 100644
index 00000000..032bce5b
--- /dev/null
+++ b/src/stats/clip_advanced.ts
@@ -0,0 +1,290 @@
+/**
+ * clip_advanced — per-element clipping for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods with array/Series/DataFrame bounds:
+ * - `Series.clip(lower, upper)` — per-element bounds from scalar, array, or Series
+ * - `DataFrame.clip(lower, upper, axis?)` — per-element bounds with broadcast support
+ *
+ * Unlike the simple scalar `clip` in `elem_ops`, this module supports:
+ * - Per-position bounds (array or positionally-aligned Series)
+ * - DataFrame-shaped bounds for element-wise clipping
+ * - Axis-based broadcasting when bounds is a Series
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ * Missing values (null / NaN) are propagated through every operation.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Scalar or per-element bound accepted by {@link clipAdvancedSeries}. */
+export type SeriesBound = number | null | undefined | readonly number[] | Series;
+
+/** Scalar or per-element bound accepted by {@link clipAdvancedDataFrame}. */
+export type DataFrameBound =
+ | number
+ | null
+ | undefined
+ | readonly number[]
+ | Series
+ | DataFrame;
+
+/** Options for {@link clipAdvancedSeries}. */
+export interface ClipAdvancedSeriesOptions {
+ /**
+ * Lower bound — scalar, array, or positionally-aligned Series.
+ * `null` / `undefined` means no lower bound.
+ */
+ readonly lower?: SeriesBound;
+ /**
+ * Upper bound — scalar, array, or positionally-aligned Series.
+ * `null` / `undefined` means no upper bound.
+ */
+ readonly upper?: SeriesBound;
+}
+
+/** Options for {@link clipAdvancedDataFrame}. */
+export interface ClipAdvancedDataFrameOptions {
+ /**
+ * Lower bound — scalar, array, Series, or element-wise DataFrame.
+ * `null` / `undefined` means no lower bound.
+ */
+ readonly lower?: DataFrameBound;
+ /**
+ * Upper bound — scalar, array, Series, or element-wise DataFrame.
+ * `null` / `undefined` means no upper bound.
+ */
+ readonly upper?: DataFrameBound;
+ /**
+ * When `lower` or `upper` is a Series, this axis controls broadcasting.
+ * - `0` or `"index"` (default): broadcast Series along rows (one bound per column).
+ * - `1` or `"columns"`: broadcast Series along columns (one bound per row).
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a finite number (not null / undefined / NaN). */
+function isFiniteNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/** Clip a numeric value to [lo, hi], preserving missing values. */
+function clipValue(v: Scalar, lo: number, hi: number): Scalar {
+ if (!isFiniteNum(v)) {
+ return v;
+ }
+ if (v < lo) {
+ return lo;
+ }
+ if (v > hi) {
+ return hi;
+ }
+ return v;
+}
+
+/**
+ * Resolve a Series bound to a positional number for index `i`.
+ * Arrays are accessed by position; Series are accessed by position.
+ */
+function resolveSeriesBound(bound: SeriesBound, i: number): number {
+ if (bound === null || bound === undefined) {
+ return Number.NaN; // sentinel: no bound
+ }
+ if (typeof bound === "number") {
+ return bound;
+ }
+ if (Array.isArray(bound)) {
+ const v = (bound as readonly number[])[i];
+ return v !== undefined ? v : Number.NaN;
+ }
+ // Series — positional access
+ const s = bound as Series;
+ if (i >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(i);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+}
+
+// ─── clipAdvancedSeries ────────────────────────────────────────────────────────
+
+/**
+ * Clip each element of a Series to per-element [lower, upper] bounds.
+ *
+ * Bounds may be:
+ * - A scalar `number` — applies the same bound to every element
+ * - A `number[]` array — per-position bounds aligned by position
+ * - A `Series` — per-position bounds taken positionally (label order ignored)
+ * - `null` / `undefined` — no bound in that direction
+ *
+ * Non-numeric values (null, NaN, strings, …) pass through unchanged.
+ * Mirrors `pandas.Series.clip(lower, upper)` with array bounds.
+ *
+ * @example
+ * ```ts
+ * import { Series, clipAdvancedSeries } from "tsb";
+ * const s = new Series({ data: [-3, 1, 5, 10] });
+ * const lo = new Series({ data: [-1, 0, 2, 8] });
+ * clipAdvancedSeries(s, { lower: lo }).values; // [-1, 1, 5, 10]
+ * ```
+ */
+export function clipAdvancedSeries(
+ series: Series,
+ options: ClipAdvancedSeriesOptions = {},
+): Series {
+ const { lower, upper } = options;
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+
+ for (let i = 0; i < n; i++) {
+ const v = series.iat(i);
+ if (!isFiniteNum(v)) {
+ out[i] = v;
+ continue;
+ }
+
+ const lo = resolveSeriesBound(lower, i);
+ const hi = resolveSeriesBound(upper, i);
+
+ const effectiveLo = Number.isNaN(lo) ? Number.NEGATIVE_INFINITY : lo;
+ const effectiveHi = Number.isNaN(hi) ? Number.POSITIVE_INFINITY : hi;
+
+ out[i] = clipValue(v, effectiveLo, effectiveHi);
+ }
+
+ return new Series({ data: out, index: series.index, name: series.name });
+}
+
+// ─── DataFrame bound helpers ───────────────────────────────────────────────────
+
+/** Resolve bound for a DataFrame cell where the bound is a Series (axis-based). */
+function resolveSeriesBoundForDf(s: Series, r: number, c: number, axis: Axis): number {
+ const isRowAxis = axis === 0 || axis === "index";
+ if (isRowAxis) {
+ // broadcast along rows → one bound per column → use col index `c`
+ if (c >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(c);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+ }
+ // broadcast along columns → one bound per row → use row index `r`
+ if (r >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(r);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+}
+
+/** Resolve bound for a DataFrame cell where the bound is a DataFrame (element-wise). */
+function resolveDataFrameBoundFromDf(bound: DataFrame, r: number, colName: string): number {
+ let val: Scalar = null;
+ try {
+ val = bound.col(colName).iat(r);
+ } catch {
+ return Number.NaN;
+ }
+ return isFiniteNum(val) ? val : Number.NaN;
+}
+
+/**
+ * Resolve a DataFrame bound value for cell (row r, col c).
+ * Supports: scalar, row-array, Series (broadcast by axis), DataFrame (element-wise).
+ */
+function resolveDataFrameBound(
+ bound: DataFrameBound,
+ r: number,
+ c: number,
+ colName: string,
+ axis: Axis,
+): number {
+ if (bound === null || bound === undefined) {
+ return Number.NaN;
+ }
+ if (typeof bound === "number") {
+ return bound;
+ }
+ if (bound instanceof DataFrame) {
+ return resolveDataFrameBoundFromDf(bound, r, colName);
+ }
+ if (bound instanceof Series) {
+ return resolveSeriesBoundForDf(bound as Series, r, c, axis);
+ }
+ // plain array: treat as row-indexed (one bound per row)
+ if (Array.isArray(bound)) {
+ const v = (bound as readonly number[])[r];
+ return v !== undefined ? v : Number.NaN;
+ }
+ return Number.NaN;
+}
+
+// ─── clipAdvancedDataFrame ─────────────────────────────────────────────────────
+
+/**
+ * Clip each element of a DataFrame to per-element [lower, upper] bounds.
+ *
+ * Bounds may be:
+ * - A scalar `number` — same bound applied to every cell
+ * - A `number[]` array — per-row bounds (one per row, broadcast across columns)
+ * - A `Series` — broadcast by `axis`:
+ * - `axis=0` (default): one bound per **column** (series index = column position)
+ * - `axis=1`: one bound per **row** (series index = row position)
+ * - A `DataFrame` — element-wise bounds (same shape, same column names)
+ * - `null` / `undefined` — no bound in that direction
+ *
+ * Non-numeric values (null, NaN, strings, …) pass through unchanged.
+ * Mirrors `pandas.DataFrame.clip(lower, upper, axis=0)` with array/Series/DF bounds.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, clipAdvancedDataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+ * const loBound = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+ * clipAdvancedDataFrame(df, { lower: loBound }).col("a").values; // [2, 5, 9]
+ * ```
+ */
+export function clipAdvancedDataFrame(
+ df: DataFrame,
+ options: ClipAdvancedDataFrameOptions = {},
+): DataFrame {
+ const { lower, upper } = options;
+ const axis: Axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+ const colMap = new Map>();
+
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ const col = df.col(colName);
+ const out: Scalar[] = new Array(df.index.size);
+
+ for (let r = 0; r < df.index.size; r++) {
+ const v = col.iat(r);
+ if (!isFiniteNum(v)) {
+ out[r] = v;
+ continue;
+ }
+
+ const lo = resolveDataFrameBound(lower, r, c, colName, axis);
+ const hi = resolveDataFrameBound(upper, r, c, colName, axis);
+
+ const effectiveLo = Number.isNaN(lo) ? Number.NEGATIVE_INFINITY : lo;
+ const effectiveHi = Number.isNaN(hi) ? Number.POSITIVE_INFINITY : hi;
+
+ out[r] = clipValue(v, effectiveLo, effectiveHi);
+ }
+
+ colMap.set(colName, new Series({ data: out, index: df.index, name: colName }));
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/format_ops.ts b/src/stats/format_ops.ts
new file mode 100644
index 00000000..148a85b1
--- /dev/null
+++ b/src/stats/format_ops.ts
@@ -0,0 +1,442 @@
+/**
+ * format_ops — number-formatting helpers for Series and DataFrame.
+ *
+ * Mirrors several pandas formatting utilities including `Series.map`,
+ * `DataFrame.style`, and the `format_` methods.
+ *
+ * Exported functions:
+ * - {@link formatFloat} — fixed decimal places
+ * - {@link formatPercent} — percentage string
+ * - {@link formatScientific} — scientific notation (e.g. `1.23e+4`)
+ * - {@link formatEngineering} — engineering notation (exponent multiple of 3)
+ * - {@link formatThousands} — thousands-separated string
+ * - {@link formatCurrency} — currency string
+ * - {@link formatCompact} — compact notation (K, M, B, T)
+ * - {@link makeFloatFormatter} — factory returning a float formatter
+ * - {@link makePercentFormatter} — factory returning a percent formatter
+ * - {@link makeCurrencyFormatter} — factory returning a currency formatter
+ * - {@link applySeriesFormatter} — apply a formatter to every value in a Series
+ * - {@link applyDataFrameFormatter} — apply per-column formatters to a DataFrame
+ * - {@link seriesToString} — render a Series as a human-readable string
+ * - {@link dataFrameToString} — render a DataFrame as a human-readable string
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── scalar formatting ────────────────────────────────────────────────────────
+
+/**
+ * Format a number with a fixed number of decimal places.
+ *
+ * @param value The number to format (non-finite values render as their string).
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function formatFloat(value: number, decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ return value.toFixed(decimals);
+}
+
+/**
+ * Format a number as a percentage string.
+ *
+ * The value is multiplied by 100 before formatting.
+ * e.g. `formatPercent(0.1234, 1)` → `"12.3%"`.
+ *
+ * @param value The proportion to format (0 → `"0.00%"`, 1 → `"100.00%"`).
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function formatPercent(value: number, decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ return `${(value * 100).toFixed(decimals)}%`;
+}
+
+/**
+ * Format a number in scientific notation.
+ *
+ * e.g. `formatScientific(12345.678, 3)` → `"1.235e+4"`.
+ *
+ * @param value The number to format.
+ * @param decimals Significant figures after the decimal point. Default: `3`.
+ */
+export function formatScientific(value: number, decimals = 3): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ return value.toExponential(decimals);
+}
+
+/**
+ * Format a number in engineering notation (exponent always a multiple of 3).
+ *
+ * e.g. `formatEngineering(12345.678, 3)` → `"12.346e+3"`.
+ *
+ * @param value The number to format.
+ * @param decimals Decimal places in the mantissa. Default: `3`.
+ */
+export function formatEngineering(value: number, decimals = 3): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ if (value === 0) {
+ return `0.${"0".repeat(decimals)}e+0`;
+ }
+ const sign = value < 0 ? "-" : "";
+ const abs = Math.abs(value);
+ const exp = Math.floor(Math.log10(abs));
+ const engExp = Math.floor(exp / 3) * 3;
+ const mantissa = abs / 10 ** engExp;
+ const expSign = engExp >= 0 ? "+" : "-";
+ return `${sign}${mantissa.toFixed(decimals)}e${expSign}${Math.abs(engExp)}`;
+}
+
+/**
+ * Format a number with a thousands separator.
+ *
+ * e.g. `formatThousands(1234567.89, 2)` → `"1,234,567.89"`.
+ *
+ * @param value The number to format.
+ * @param decimals Decimal places. Default: `2`.
+ * @param separator Thousands separator. Default: `","`.
+ */
+export function formatThousands(value: number, decimals = 2, separator = ","): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ const fixed = value.toFixed(decimals);
+ const [intPart, fracPart] = fixed.split(".");
+ const intStr = intPart ?? "";
+ const isNeg = intStr.startsWith("-");
+ const digits = isNeg ? intStr.slice(1) : intStr;
+ const withSep = digits.replace(/\B(?=(\d{3})+(?!\d))/g, separator);
+ const sign = isNeg ? "-" : "";
+ return fracPart !== undefined ? `${sign}${withSep}.${fracPart}` : `${sign}${withSep}`;
+}
+
+/**
+ * Format a number as a currency string.
+ *
+ * e.g. `formatCurrency(1234.5, "$", 2)` → `"$1,234.50"`.
+ *
+ * @param value The number to format.
+ * @param symbol Currency symbol. Default: `"$"`.
+ * @param decimals Decimal places. Default: `2`.
+ */
+export function formatCurrency(value: number, symbol = "$", decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return `${symbol}${String(value)}`;
+ }
+ const abs = Math.abs(value);
+ const sign = value < 0 ? "-" : "";
+ return `${sign}${symbol}${formatThousands(abs, decimals)}`;
+}
+
+/**
+ * Format a number in compact notation using SI-style suffixes.
+ *
+ * Thresholds: T ≥ 1e12, B ≥ 1e9, M ≥ 1e6, K ≥ 1e3.
+ * Values below 1000 are formatted with `decimals` decimal places.
+ *
+ * e.g. `formatCompact(1_234_567, 2)` → `"1.23M"`.
+ *
+ * @param value The number to format.
+ * @param decimals Decimal places in the mantissa. Default: `2`.
+ */
+export function formatCompact(value: number, decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ const sign = value < 0 ? "-" : "";
+ const abs = Math.abs(value);
+ if (abs >= 1e12) {
+ return `${sign}${(abs / 1e12).toFixed(decimals)}T`;
+ }
+ if (abs >= 1e9) {
+ return `${sign}${(abs / 1e9).toFixed(decimals)}B`;
+ }
+ if (abs >= 1e6) {
+ return `${sign}${(abs / 1e6).toFixed(decimals)}M`;
+ }
+ if (abs >= 1e3) {
+ return `${sign}${(abs / 1e3).toFixed(decimals)}K`;
+ }
+ return `${sign}${abs.toFixed(decimals)}`;
+}
+
+// ─── formatter factories ──────────────────────────────────────────────────────
+
+/** A function that converts a {@link Scalar} value to a string. */
+export type Formatter = (value: Scalar) => string;
+
+/**
+ * Create a float formatter with the given number of decimal places.
+ *
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function makeFloatFormatter(decimals = 2): Formatter {
+ return (value: Scalar): string => {
+ if (typeof value !== "number") {
+ return String(value ?? "");
+ }
+ return formatFloat(value, decimals);
+ };
+}
+
+/**
+ * Create a percent formatter with the given number of decimal places.
+ *
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function makePercentFormatter(decimals = 2): Formatter {
+ return (value: Scalar): string => {
+ if (typeof value !== "number") {
+ return String(value ?? "");
+ }
+ return formatPercent(value, decimals);
+ };
+}
+
+/**
+ * Create a currency formatter with the given symbol and decimal places.
+ *
+ * @param symbol Currency symbol. Default: `"$"`.
+ * @param decimals Decimal places. Default: `2`.
+ */
+export function makeCurrencyFormatter(symbol = "$", decimals = 2): Formatter {
+ return (value: Scalar): string => {
+ if (typeof value !== "number") {
+ return String(value ?? "");
+ }
+ return formatCurrency(value, symbol, decimals);
+ };
+}
+
+// ─── apply to Series / DataFrame ─────────────────────────────────────────────
+
+/**
+ * Apply a formatter to every element of a Series, returning a `Series`.
+ *
+ * Non-numeric formatters receive the raw {@link Scalar} value; numeric-only
+ * formatters (e.g. from {@link makeFloatFormatter}) receive the value unchanged
+ * and should guard against non-numeric types themselves.
+ *
+ * @param series The source Series.
+ * @param formatter A {@link Formatter} to apply to each value.
+ */
+export function applySeriesFormatter(series: Series, formatter: Formatter): Series {
+ const formatted: string[] = [];
+ for (let i = 0; i < series.size; i++) {
+ formatted.push(formatter(series.values[i] as Scalar));
+ }
+ return new Series({ data: formatted, index: series.index, name: series.name });
+}
+
+/**
+ * Apply per-column formatters to a DataFrame, returning a
+ * `Record` where each key is a column name and the value is
+ * the formatted column data.
+ *
+ * Columns without a matching formatter are rendered via `String(value)`.
+ *
+ * @param df The source DataFrame.
+ * @param formatters Map of column name → {@link Formatter}.
+ */
+export function applyDataFrameFormatter(
+ df: DataFrame,
+ formatters: Readonly>,
+): Record {
+ const result: Record = {};
+ for (const colName of df.columns.values) {
+ const fmt: Formatter = formatters[colName] ?? ((v: Scalar) => String(v ?? ""));
+ const col = df.col(colName);
+ const formatted: string[] = [];
+ for (let i = 0; i < col.size; i++) {
+ formatted.push(fmt(col.values[i] as Scalar));
+ }
+ result[colName] = formatted;
+ }
+ return result;
+}
+
+// ─── to-string rendering ──────────────────────────────────────────────────────
+
+/** Options for {@link seriesToString}. */
+export interface SeriesToStringOptions {
+ /**
+ * Maximum number of rows to display.
+ * Default: `60`.
+ */
+ readonly maxRows?: number;
+ /**
+ * Formatter to apply to each value.
+ * Default: `String`.
+ */
+ readonly formatter?: Formatter;
+ /**
+ * Series name to display in the header.
+ * Default: the series' own name, or `null` for no header.
+ */
+ readonly name?: string | null;
+}
+
+/**
+ * Render a Series as a human-readable multi-line string.
+ *
+ * The output mirrors `repr(series)` in pandas:
+ * ```
+ * 0 1.00
+ * 1 2.00
+ * 2 3.00
+ * Name: x, dtype: float64
+ * ```
+ *
+ * @param series The Series to render.
+ * @param options Optional rendering options.
+ */
+export function seriesToString(series: Series, options: SeriesToStringOptions = {}): string {
+ const maxRows = options.maxRows ?? 60;
+ const fmt: Formatter = options.formatter ?? ((v: Scalar) => String(v ?? "NaN"));
+ const displayName = options.name !== undefined ? options.name : series.name;
+
+ const n = series.size;
+ const truncated = n > maxRows;
+ const displayCount = truncated ? maxRows : n;
+
+ // Compute label column width
+ let labelWidth = 0;
+ for (let i = 0; i < displayCount; i++) {
+ const label = String(series.index.at(i) ?? "");
+ if (label.length > labelWidth) {
+ labelWidth = label.length;
+ }
+ }
+
+ const lines: string[] = [];
+ for (let i = 0; i < displayCount; i++) {
+ const label = String(series.index.at(i) ?? "").padEnd(labelWidth);
+ const val = fmt(series.values[i] as Scalar);
+ lines.push(`${label} ${val}`);
+ }
+
+ if (truncated) {
+ lines.push(`...`);
+ }
+
+ const footer: string[] = [];
+ if (displayName !== null && displayName !== undefined) {
+ footer.push(`Name: ${displayName}`);
+ }
+ footer.push(`dtype: ${series.dtype.name}`);
+
+ if (footer.length > 0) {
+ lines.push(footer.join(", "));
+ }
+
+ return lines.join("\n");
+}
+
+/** Options for {@link dataFrameToString}. */
+export interface DataFrameToStringOptions {
+ /**
+ * Maximum number of rows to display.
+ * Default: `60`.
+ */
+ readonly maxRows?: number;
+ /**
+ * Maximum number of columns to display.
+ * Default: `20`.
+ */
+ readonly maxCols?: number;
+ /**
+ * Per-column formatters.
+ * Default: `String` for all columns.
+ */
+ readonly formatters?: Readonly>;
+}
+
+/**
+ * Render a DataFrame as a human-readable multi-line string (like pandas `repr`).
+ *
+ * @param df The DataFrame to render.
+ * @param options Optional rendering options.
+ */
+export function dataFrameToString(df: DataFrame, options: DataFrameToStringOptions = {}): string {
+ const maxRows = options.maxRows ?? 60;
+ const maxCols = options.maxCols ?? 20;
+ const formatters = options.formatters ?? {};
+
+ const [nRows, nCols] = df.shape;
+ const truncRows = nRows > maxRows;
+ const truncCols = nCols > maxCols;
+ const displayRows = truncRows ? maxRows : nRows;
+
+ // Pick columns to display
+ const allCols = [...df.columns.values];
+ const displayCols = truncCols ? allCols.slice(0, maxCols) : allCols;
+
+ // Gather formatted cells
+ const cells: string[][] = [];
+ for (const colName of displayCols) {
+ const fmt: Formatter = formatters[colName] ?? ((v: Scalar) => String(v ?? ""));
+ const col = df.col(colName);
+ const colCells: string[] = [];
+ for (let i = 0; i < displayRows; i++) {
+ colCells.push(fmt(col.values[i] as Scalar));
+ }
+ cells.push(colCells);
+ }
+
+ // Compute column widths (max of header or any cell)
+ const colWidths: number[] = displayCols.map((name, ci) => {
+ let w = name.length;
+ const colCells = cells[ci];
+ if (colCells !== undefined) {
+ for (const cell of colCells) {
+ if (cell.length > w) {
+ w = cell.length;
+ }
+ }
+ }
+ return w;
+ });
+
+ // Compute index label width
+ let idxWidth = 0;
+ for (let i = 0; i < displayRows; i++) {
+ const label = String(df.index.at(i) ?? "");
+ if (label.length > idxWidth) {
+ idxWidth = label.length;
+ }
+ }
+
+ // Build header row
+ const headerParts = displayCols.map((name, ci) => name.padStart(colWidths[ci] ?? name.length));
+ const header = `${"".padEnd(idxWidth)} ${headerParts.join(" ")}`;
+
+ const lines: string[] = [header];
+
+ for (let i = 0; i < displayRows; i++) {
+ const label = String(df.index.at(i) ?? "").padEnd(idxWidth);
+ const rowParts = displayCols.map((_, ci) => {
+ const cell = cells[ci]?.[i] ?? "";
+ return cell.padStart(colWidths[ci] ?? cell.length);
+ });
+ lines.push(`${label} ${rowParts.join(" ")}`);
+ }
+
+ if (truncRows) {
+ lines.push("...");
+ }
+ if (truncCols) {
+ lines.push(`[${nRows} rows × ${nCols} columns]`);
+ }
+
+ return lines.join("\n");
+}
diff --git a/src/stats/idxmin_idxmax.ts b/src/stats/idxmin_idxmax.ts
new file mode 100644
index 00000000..6ee745f9
--- /dev/null
+++ b/src/stats/idxmin_idxmax.ts
@@ -0,0 +1,234 @@
+/**
+ * idxmin / idxmax — return the index label of the minimum or maximum value.
+ *
+ * Mirrors `pandas.Series.idxmin()` / `pandas.Series.idxmax()` and
+ * `pandas.DataFrame.idxmin()` / `pandas.DataFrame.idxmax()`:
+ *
+ * - `idxminSeries(series)` — label of the minimum value (NaN/null excluded)
+ * - `idxmaxSeries(series)` — label of the maximum value (NaN/null excluded)
+ * - `idxminDataFrame(df)` — Series of row labels where each column achieves its min
+ * - `idxmaxDataFrame(df)` — Series of row labels where each column achieves its max
+ *
+ * When `skipna` is true (the default), NaN / null values are ignored.
+ * When `skipna` is false, any NaN / null causes the result to be `null`.
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Dtype, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link idxminSeries}, {@link idxmaxSeries}. */
+export interface IdxOptions {
+ /**
+ * Whether to skip NaN / null values.
+ * @defaultValue `true`
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link idxminDataFrame}, {@link idxmaxDataFrame}. */
+export interface IdxDataFrameOptions {
+ /**
+ * Whether to skip NaN / null values.
+ * @defaultValue `true`
+ */
+ readonly skipna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Find the index of the extreme value (min or max) among `values`.
+ * Returns `null` when all values are missing (with `skipna=true`) or when
+ * any value is missing (with `skipna=false`).
+ */
+function findExtreme(
+ values: readonly Scalar[],
+ skipna: boolean,
+ isBetter: (a: Scalar, b: Scalar) => boolean,
+): number | null {
+ let bestIdx: number | null = null;
+ let bestVal: Scalar = null;
+
+ for (let i = 0; i < values.length; i++) {
+ const v = values[i] as Scalar;
+ if (isMissing(v)) {
+ if (!skipna) {
+ return null;
+ }
+ continue;
+ }
+ if (bestIdx === null || isBetter(v, bestVal)) {
+ bestIdx = i;
+ bestVal = v;
+ }
+ }
+ return bestIdx;
+}
+
+/** Compare scalars: returns true if `a` is less than `b`. */
+function isLess(a: Scalar, b: Scalar): boolean {
+ if (b === null || b === undefined) {
+ return false;
+ }
+ return (a as number | string | boolean) < (b as number | string | boolean);
+}
+
+/** Compare scalars: returns true if `a` is greater than `b`. */
+function isGreater(a: Scalar, b: Scalar): boolean {
+ if (b === null || b === undefined) {
+ return false;
+ }
+ return (a as number | string | boolean) > (b as number | string | boolean);
+}
+
+// ─── public API — Series ──────────────────────────────────────────────────────
+
+/**
+ * Return the index label of the minimum value in `series`.
+ *
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Returns `null` when the series is empty or all values are NaN / null.
+ *
+ * Mirrors `pandas.Series.idxmin()`.
+ *
+ * @param series - Input Series.
+ * @param options - Options (skipna).
+ * @returns The index label at the minimum value, or `null` if no valid value exists.
+ *
+ * @example
+ * ```ts
+ * import { Series, idxminSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] });
+ * idxminSeries(s); // "b" (first occurrence of 1)
+ * ```
+ */
+export function idxminSeries(series: Series, options: IdxOptions = {}): Label {
+ const skipna = options.skipna ?? true;
+ const idx = findExtreme(series.values, skipna, isLess);
+ if (idx === null) {
+ return null;
+ }
+ return series.index.at(idx);
+}
+
+/**
+ * Return the index label of the maximum value in `series`.
+ *
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Returns `null` when the series is empty or all values are NaN / null.
+ *
+ * Mirrors `pandas.Series.idxmax()`.
+ *
+ * @param series - Input Series.
+ * @param options - Options (skipna).
+ * @returns The index label at the maximum value, or `null` if no valid value exists.
+ *
+ * @example
+ * ```ts
+ * import { Series, idxmaxSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] });
+ * idxmaxSeries(s); // "e"
+ * ```
+ */
+export function idxmaxSeries(series: Series, options: IdxOptions = {}): Label {
+ const skipna = options.skipna ?? true;
+ const idx = findExtreme(series.values, skipna, isGreater);
+ if (idx === null) {
+ return null;
+ }
+ return series.index.at(idx);
+}
+
+// ─── public API — DataFrame ───────────────────────────────────────────────────
+
+/**
+ * Return a Series containing the index label of the minimum value for each column.
+ *
+ * The result Series is indexed by column names.
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Columns where all values are NaN / null yield `null` in the result.
+ *
+ * Mirrors `pandas.DataFrame.idxmin()` (axis=0).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Options (skipna).
+ * @returns A Series indexed by column names, containing the row label of each column's min.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, idxminDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ * idxminDataFrame(df).values; // ["y", "z"]
+ * ```
+ */
+export function idxminDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series {
+ const skipna = options.skipna ?? true;
+ const colNames = df.columns.values;
+ const result: Label[] = colNames.map((colName) => {
+ const s = df.col(colName);
+ const idx = findExtreme(s.values, skipna, isLess);
+ if (idx === null) {
+ return null;
+ }
+ return df.index.at(idx);
+ });
+ return new Series({
+ data: result,
+ index: colNames as unknown as Label[],
+ name: null,
+ dtype: Dtype.from("object"),
+ });
+}
+
+/**
+ * Return a Series containing the index label of the maximum value for each column.
+ *
+ * The result Series is indexed by column names.
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Columns where all values are NaN / null yield `null` in the result.
+ *
+ * Mirrors `pandas.DataFrame.idxmax()` (axis=0).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Options (skipna).
+ * @returns A Series indexed by column names, containing the row label of each column's max.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, idxmaxDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ * idxmaxDataFrame(df).values; // ["z", "y"]
+ * ```
+ */
+export function idxmaxDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series {
+ const skipna = options.skipna ?? true;
+ const colNames = df.columns.values;
+ const result: Label[] = colNames.map((colName) => {
+ const s = df.col(colName);
+ const idx = findExtreme(s.values, skipna, isGreater);
+ if (idx === null) {
+ return null;
+ }
+ return df.index.at(idx);
+ });
+ return new Series({
+ data: result,
+ index: colNames as unknown as Label[],
+ name: null,
+ dtype: Dtype.from("object"),
+ });
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index 4fb5e5b8..a5d9836c 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -188,3 +188,170 @@ export type { ExplodeOptions, ExplodeDataFrameOptions } from "./explode.ts";
export { isin, dataFrameIsin } from "./isin.ts";
export type { IsinValues, IsinDict, DataFrameIsinValues } from "./isin.ts";
+
+export { clipAdvancedSeries, clipAdvancedDataFrame } from "./clip_advanced.ts";
+export type {
+ SeriesBound,
+ DataFrameBound,
+ ClipAdvancedSeriesOptions,
+ ClipAdvancedDataFrameOptions,
+} from "./clip_advanced.ts";
+
+export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./idxmin_idxmax.ts";
+export type { IdxOptions, IdxDataFrameOptions } from "./idxmin_idxmax.ts";
+
+export { modeSeries, modeDataFrame } from "./mode.ts";
+export type { ModeSeriesOptions, ModeDataFrameOptions } from "./mode.ts";
+
+export {
+ nancount,
+ nansum,
+ nanmean,
+ nanmedian,
+ nanvar,
+ nanstd,
+ nanmin,
+ nanmax,
+ nanprod,
+} from "./nancumops.ts";
+export type { NanInput, NanAggOptions } from "./nancumops.ts";
+
+export {
+ nuniqueSeries,
+ nuniqueDataFrame,
+ anySeries,
+ allSeries,
+ anyDataFrame,
+ allDataFrame,
+} from "./nunique.ts";
+export type {
+ NuniqueSeriesOptions,
+ NuniqueDataFrameOptions,
+ AnyAllSeriesOptions,
+ AnyAllDataFrameOptions,
+} from "./nunique.ts";
+
+export { pctChangeSeries, pctChangeDataFrame } from "./pct_change.ts";
+export type {
+ PctChangeFillMethod,
+ PctChangeOptions,
+ DataFramePctChangeOptions,
+} from "./pct_change.ts";
+
+export { quantileSeries, quantileDataFrame } from "./quantile.ts";
+export type {
+ QuantileInterpolation,
+ QuantileSeriesOptions,
+ QuantileDataFrameOptions,
+} from "./quantile.ts";
+
+export { replaceSeries, replaceDataFrame } from "./replace.ts";
+export type {
+ ReplaceMapping,
+ ReplaceSpec,
+ ReplaceOptions,
+ DataFrameReplaceOptions,
+} from "./replace.ts";
+
+export { varSeries, semSeries, varDataFrame, semDataFrame } from "./sem_var.ts";
+export type { VarSemSeriesOptions, VarSemDataFrameOptions } from "./sem_var.ts";
+
+export { skewSeries, kurtSeries, skewDataFrame, kurtDataFrame } from "./skew_kurt.ts";
+export type {
+ SkewKurtSeriesOptions,
+ SkewKurtDataFrameOptions,
+} from "./skew_kurt.ts";
+
+export { toDatetime } from "./to_datetime.ts";
+export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./to_datetime.ts";
+
+export {
+ catFromCodes,
+ catUnionCategories,
+ catSortByFreq,
+ catCrossTab,
+} from "./categorical_ops.ts";
+export type {
+ CatFromCodesOptions,
+ CatSortByFreqOptions,
+ CatCrossTabOptions,
+} from "./categorical_ops.ts";
+
+export {
+ formatFloat,
+ formatPercent,
+ formatScientific,
+ formatEngineering,
+ formatThousands,
+ seriesToString,
+ dataFrameToString,
+} from "./format_ops.ts";
+export type {
+ Formatter,
+ SeriesToStringOptions,
+ DataFrameToStringOptions,
+} from "./format_ops.ts";
+
+export {
+ histogram,
+ zscore,
+ minMaxNormalize,
+ coefficientOfVariation,
+ digitize,
+ linspace,
+ arange,
+ percentileOfScore,
+ seriesDigitize,
+} from "./numeric_extended.ts";
+export type {
+ HistogramOptions,
+ HistogramResult,
+ ZscoreOptions,
+ MinMaxOptions,
+ CvOptions,
+} from "./numeric_extended.ts";
+
+export {
+ strNormalize,
+ strGetDummies,
+ strExtractAll,
+ strRemovePrefix,
+ strRemoveSuffix,
+ strTranslate,
+ strCharWidth,
+ strByteLength,
+} from "./string_ops.ts";
+export type {
+ NormalizeForm,
+ StrInput,
+ StrGetDummiesOptions,
+ ExtractAllOptions,
+} from "./string_ops.ts";
+
+export {
+ strSplitExpand,
+ strExtractGroups,
+ strPartition,
+ strRPartition,
+ strMultiReplace,
+ strIndent,
+ strDedent,
+} from "./string_ops_extended.ts";
+export type {
+ SplitExpandOptions,
+ ExtractGroupsOptions,
+ PartitionResult,
+ ReplacePair,
+ IndentOptions,
+} from "./string_ops_extended.ts";
+
+export {
+ rollingSem,
+ rollingSkew,
+ rollingKurt,
+ rollingQuantile,
+} from "./window_extended.ts";
+export type {
+ WindowExtOptions,
+ RollingQuantileOptions,
+} from "./window_extended.ts";
diff --git a/src/stats/mode.ts b/src/stats/mode.ts
new file mode 100644
index 00000000..dda30951
--- /dev/null
+++ b/src/stats/mode.ts
@@ -0,0 +1,305 @@
+/**
+ * mode — most-frequent value(s) in a Series or DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.mode(dropna?)`
+ * - `pandas.DataFrame.mode(axis?, numeric_only?, dropna?)`
+ *
+ * Returns a new Series/DataFrame containing all tied modal values sorted
+ * ascending. The result index is always a 0-based integer index.
+ * For DataFrames with `axis=0` (default, column-wise), each column's modes
+ * are returned, null-padded to the length of the longest mode list.
+ *
+ * @module
+ */
+
+import { DataFrame, Dtype, Index, Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link modeSeries}. */
+export interface ModeSeriesOptions {
+ /**
+ * If `true` (default), exclude null/NaN values from frequency counts.
+ */
+ readonly dropna?: boolean;
+}
+
+/** Options for {@link modeDataFrame}. */
+export interface ModeDataFrameOptions {
+ /**
+ * Axis along which to compute the mode.
+ * - `0` (default): compute per column.
+ * - `1`: compute per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true`, only include numeric columns when `axis=0`.
+ * Has no effect for `axis=1`.
+ * @defaultValue `false`
+ */
+ readonly numericOnly?: boolean;
+ /**
+ * If `true` (default), exclude null/NaN values.
+ */
+ readonly dropna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric (int, uint, or float). */
+function isNumericKind(kind: DtypeKind): boolean {
+ if (kind === "int") {
+ return true;
+ }
+ if (kind === "uint") {
+ return true;
+ }
+ if (kind === "float") {
+ return true;
+ }
+ return false;
+}
+
+/** Stable string key for a Scalar value used in frequency maps. */
+function scalarKey(v: Scalar): string {
+ if (v === null) {
+ return "\0null";
+ }
+ if (v === undefined) {
+ return "\0undefined";
+ }
+ if (v instanceof Date) {
+ return `\0date:${v.getTime().toString()}`;
+ }
+ return String(v);
+}
+
+/**
+ * Compare two Scalar values for ascending sort.
+ * Numbers < strings < booleans < dates < null/undefined.
+ */
+function compareScalars(a: Scalar, b: Scalar): number {
+ // Both missing — equal
+ if (isMissing(a) && isMissing(b)) {
+ return 0;
+ }
+ // Missing values sort last
+ if (isMissing(a)) {
+ return 1;
+ }
+ if (isMissing(b)) {
+ return -1;
+ }
+ // Both numbers
+ if (typeof a === "number" && typeof b === "number") {
+ return a - b;
+ }
+ // Both bigints
+ if (typeof a === "bigint" && typeof b === "bigint") {
+ if (a < b) {
+ return -1;
+ }
+ if (a > b) {
+ return 1;
+ }
+ return 0;
+ }
+ // Both dates
+ if (a instanceof Date && b instanceof Date) {
+ return a.getTime() - b.getTime();
+ }
+ // Both booleans
+ if (typeof a === "boolean" && typeof b === "boolean") {
+ if (a === b) {
+ return 0;
+ }
+ return a ? 1 : -1;
+ }
+ // Both strings
+ if (typeof a === "string" && typeof b === "string") {
+ if (a < b) {
+ return -1;
+ }
+ if (a > b) {
+ return 1;
+ }
+ return 0;
+ }
+ // Mixed types — compare by canonical key
+ return scalarKey(a) < scalarKey(b) ? -1 : 1;
+}
+
+/**
+ * Compute the modal value(s) from an array of scalars.
+ * Returns all values tied for the highest frequency, sorted ascending.
+ */
+function computeModes(values: readonly Scalar[], dropna: boolean): Scalar[] {
+ const freq = new Map();
+ let maxCount = 0;
+
+ for (const v of values) {
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ const key = scalarKey(v);
+ const entry = freq.get(key);
+ if (entry === undefined) {
+ freq.set(key, { value: v, count: 1 });
+ if (maxCount === 0) {
+ maxCount = 1;
+ }
+ } else {
+ entry.count += 1;
+ if (entry.count > maxCount) {
+ maxCount = entry.count;
+ }
+ }
+ }
+
+ if (maxCount === 0) {
+ return [];
+ }
+
+ const result: Scalar[] = [];
+ for (const { value, count } of freq.values()) {
+ if (count === maxCount) {
+ result.push(value);
+ }
+ }
+ result.sort(compareScalars);
+ return result;
+}
+
+/** Build an integer-index Series from a Scalar array, preserving the source dtype. */
+function buildModeSeries(modes: readonly Scalar[], name: Label, dtype: Dtype): Series {
+ const idx = new Index(modes.map((_, i) => i));
+ const seriesName = typeof name === "string" ? name : name === null ? null : String(name);
+ return new Series({ data: modes.slice(), index: idx, dtype, name: seriesName });
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Return the most-frequent value(s) in a Series.
+ *
+ * When multiple values share the highest frequency they are all returned,
+ * sorted in ascending order. The result has a 0-based integer index.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 2, 3, 3] });
+ * modeSeries(s); // Series([2, 3])
+ * ```
+ */
+export function modeSeries(
+ series: Series,
+ options: ModeSeriesOptions = {},
+): Series {
+ const dropna = options.dropna ?? true;
+ const modes = computeModes(series.values as readonly Scalar[], dropna);
+ return buildModeSeries(modes, series.name, series.dtype);
+}
+
+/**
+ * Return the most-frequent value(s) per column (axis=0) or per row (axis=1).
+ *
+ * For `axis=0` (default): each column gets its own mode list. Columns with
+ * shorter mode lists are null-padded to match the column with the most modes.
+ * The result index is 0-based integers; the columns are the same as the input
+ * (or only numeric columns when `numericOnly=true`).
+ *
+ * For `axis=1`: each row is reduced to its modal values. The result has the
+ * same row index as the input; the columns are 0-based integers.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 1, 2], b: [3, 3, 3] });
+ * modeDataFrame(df); // {a: [1], b: [3]}
+ * ```
+ */
+export function modeDataFrame(df: DataFrame, options: ModeDataFrameOptions = {}): DataFrame {
+ const axis = options.axis ?? 0;
+ const numericOnly = options.numericOnly ?? false;
+ const dropna = options.dropna ?? true;
+
+ if (axis === 0) {
+ return modeByColumn(df, numericOnly, dropna);
+ }
+ return modeByRow(df, dropna);
+}
+
+/** Compute column-wise modes (axis=0). */
+function modeByColumn(df: DataFrame, numericOnly: boolean, dropna: boolean): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const selectedCols = numericOnly
+ ? colNames.filter((c) => {
+ const series = df.col(c);
+ return isNumericKind(series.dtype.kind);
+ })
+ : colNames;
+
+ // Compute modes per column
+ const columnModes: Map = new Map();
+ let maxLen = 0;
+
+ for (const col of selectedCols) {
+ const series = df.col(col);
+ const vals = series.values as readonly Scalar[];
+ const modes = computeModes(vals, dropna);
+ columnModes.set(col, modes);
+ if (modes.length > maxLen) {
+ maxLen = modes.length;
+ }
+ }
+
+ // Null-pad shorter mode lists
+ const record: Record = {};
+ for (const col of selectedCols) {
+ const modes = columnModes.get(col) ?? [];
+ const padded: Scalar[] = modes.slice();
+ while (padded.length < maxLen) {
+ padded.push(null);
+ }
+ record[col] = padded;
+ }
+
+ return DataFrame.fromColumns(record);
+}
+
+/** Compute row-wise modes (axis=1). */
+function modeByRow(df: DataFrame, dropna: boolean): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const rowModes: Scalar[][] = [];
+ let maxLen = 0;
+
+ for (let r = 0; r < rowCount; r++) {
+ const rowVals: Scalar[] = [];
+ for (const col of colNames) {
+ const v = df.col(col).values[r] as Scalar;
+ rowVals.push(v);
+ }
+ const modes = computeModes(rowVals, dropna);
+ rowModes.push(modes);
+ if (modes.length > maxLen) {
+ maxLen = modes.length;
+ }
+ }
+
+ // Build result columns (0, 1, 2, … maxLen-1), rows = original row index
+ const resultCols: Record = {};
+ for (let c = 0; c < maxLen; c++) {
+ const colKey = String(c);
+ resultCols[colKey] = rowModes.map((modes) => (c < modes.length ? (modes[c] as Scalar) : null));
+ }
+
+ return DataFrame.fromColumns(resultCols, { index: df.index });
+}
diff --git a/src/stats/nancumops.ts b/src/stats/nancumops.ts
new file mode 100644
index 00000000..6bdbe769
--- /dev/null
+++ b/src/stats/nancumops.ts
@@ -0,0 +1,272 @@
+/**
+ * nancumops — nan-ignoring aggregate functions for arrays and Series.
+ *
+ * Mirrors the following numpy / pandas utilities:
+ * - `nansum(data, options?)` — sum, ignoring NaN / null
+ * - `nanmean(data, options?)` — mean, ignoring NaN / null
+ * - `nanmedian(data, options?)` — median, ignoring NaN / null
+ * - `nanstd(data, options?)` — standard deviation, ignoring NaN / null
+ * - `nanvar(data, options?)` — variance, ignoring NaN / null
+ * - `nanmin(data, options?)` — minimum, ignoring NaN / null
+ * - `nanmax(data, options?)` — maximum, ignoring NaN / null
+ * - `nanprod(data, options?)` — product, ignoring NaN / null
+ * - `nancount(data)` — count of non-NaN numeric values
+ *
+ * All functions accept `readonly Scalar[]` **or** a `Series` and
+ * return a `number`. Non-numeric scalars (strings, booleans, Dates) are
+ * treated as if they were NaN and excluded.
+ *
+ * @module
+ */
+
+import type { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Input accepted by every nan-aggregate function. */
+export type NanInput = readonly Scalar[] | Series;
+
+/** Options shared by most nan-aggregate functions. */
+export interface NanAggOptions {
+ /**
+ * Degrees of freedom for std / var (default `1` — matches numpy and
+ * pandas default for `ddof`).
+ *
+ * Only meaningful for {@link nanstd} and {@link nanvar}.
+ */
+ readonly ddof?: number;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Returns the underlying array from a Series or passes the array through. */
+function toValues(input: NanInput): readonly Scalar[] {
+ if (Array.isArray(input)) {
+ return input as readonly Scalar[];
+ }
+ // Series — read via .values
+ return (input as Series).values;
+}
+
+/** Returns only the finite numeric values from the input (NaN, null, undefined,
+ * non-numeric scalars are all dropped). */
+function numericValues(input: NanInput): number[] {
+ const raw = toValues(input);
+ const out: number[] = [];
+ for (const v of raw) {
+ if (typeof v === "number" && !Number.isNaN(v)) {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/** Sorts an array of numbers in ascending order (returns a new array). */
+function sortedAsc(xs: number[]): number[] {
+ return xs.slice().sort((a, b) => a - b);
+}
+
+// ─── public functions ─────────────────────────────────────────────────────────
+
+/**
+ * Count of non-NaN numeric values in `input`.
+ *
+ * @example
+ * ```ts
+ * nancount([1, 2, NaN, null, 3]); // 3
+ * ```
+ */
+export function nancount(input: NanInput): number {
+ return numericValues(input).length;
+}
+
+/**
+ * Sum of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `0` when there are no valid values (matches numpy behaviour).
+ *
+ * @example
+ * ```ts
+ * nansum([1, 2, NaN, null, 3]); // 6
+ * ```
+ */
+export function nansum(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return 0;
+ }
+ let s = 0;
+ for (const x of xs) {
+ s += x;
+ }
+ return s;
+}
+
+/**
+ * Arithmetic mean of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmean([1, 2, NaN, 3]); // 2
+ * ```
+ */
+export function nanmean(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return Number.NaN;
+ }
+ let s = 0;
+ for (const x of xs) {
+ s += x;
+ }
+ return s / xs.length;
+}
+
+/**
+ * Median of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmedian([1, 3, 2, NaN]); // 2
+ * ```
+ */
+export function nanmedian(input: NanInput): number {
+ const xs = sortedAsc(numericValues(input));
+ const n = xs.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ const mid = Math.floor(n / 2);
+ if (n % 2 === 1) {
+ return xs[mid] as number;
+ }
+ return ((xs[mid - 1] as number) + (xs[mid] as number)) / 2;
+}
+
+/**
+ * Variance of numeric values in `input`, ignoring NaN / null.
+ *
+ * @param input - Array or Series of scalars.
+ * @param options - `ddof` (degrees of freedom, default `1`).
+ *
+ * Returns `Number.NaN` when there are fewer valid values than `ddof + 1`.
+ *
+ * @example
+ * ```ts
+ * nanvar([2, 4, 4, 4, 5, 5, 7, 9], { ddof: 1 }); // 4.571...
+ * ```
+ */
+export function nanvar(input: NanInput, options: NanAggOptions = {}): number {
+ const ddof = options.ddof ?? 1;
+ const xs = numericValues(input);
+ const n = xs.length;
+ if (n <= ddof) {
+ return Number.NaN;
+ }
+ let s = 0;
+ for (const x of xs) {
+ s += x;
+ }
+ const mean = s / n;
+ let ss = 0;
+ for (const x of xs) {
+ const diff = x - mean;
+ ss += diff * diff;
+ }
+ return ss / (n - ddof);
+}
+
+/**
+ * Standard deviation of numeric values in `input`, ignoring NaN / null.
+ *
+ * @param input - Array or Series of scalars.
+ * @param options - `ddof` (degrees of freedom, default `1`).
+ *
+ * Returns `Number.NaN` when there are fewer valid values than `ddof + 1`.
+ *
+ * @example
+ * ```ts
+ * nanstd([2, 4, 4, 4, 5, 5, 7, 9], { ddof: 1 }); // 2.138...
+ * ```
+ */
+export function nanstd(input: NanInput, options: NanAggOptions = {}): number {
+ return Math.sqrt(nanvar(input, options));
+}
+
+/**
+ * Minimum of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmin([3, 1, NaN, 2]); // 1
+ * ```
+ */
+export function nanmin(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return Number.NaN;
+ }
+ let m = xs[0] as number;
+ for (let i = 1; i < xs.length; i++) {
+ const v = xs[i] as number;
+ if (v < m) {
+ m = v;
+ }
+ }
+ return m;
+}
+
+/**
+ * Maximum of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmax([3, 1, NaN, 2]); // 3
+ * ```
+ */
+export function nanmax(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return Number.NaN;
+ }
+ let m = xs[0] as number;
+ for (let i = 1; i < xs.length; i++) {
+ const v = xs[i] as number;
+ if (v > m) {
+ m = v;
+ }
+ }
+ return m;
+}
+
+/**
+ * Product of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `1` when there are no valid values (matches numpy behaviour for
+ * an empty product — identity element).
+ *
+ * @example
+ * ```ts
+ * nanprod([1, 2, NaN, 3]); // 6
+ * ```
+ */
+export function nanprod(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return 1;
+ }
+ let p = 1;
+ for (const x of xs) {
+ p *= x;
+ }
+ return p;
+}
diff --git a/src/stats/numeric_extended.ts b/src/stats/numeric_extended.ts
new file mode 100644
index 00000000..c5534e71
--- /dev/null
+++ b/src/stats/numeric_extended.ts
@@ -0,0 +1,586 @@
+/**
+ * numeric_extended — additional numeric utility functions for arrays and Series.
+ *
+ * Mirrors frequently-used numpy / scipy / pandas functions not yet in tsb:
+ * - `digitize(values, bins, right?)` — find bin indices (numpy.digitize)
+ * - `histogram(values, options?)` — compute histogram counts and edges (numpy.histogram)
+ * - `linspace(start, stop, num?)` — evenly-spaced sequence (numpy.linspace)
+ * - `arange(start, stop?, step?)` — range with step (numpy.arange)
+ * - `percentileOfScore(arr, score, kind?)` — percentile rank of a score (scipy.stats.percentileofscore)
+ * - `zscore(series, options?)` — z-score standardisation (scipy.stats.zscore)
+ * - `minMaxNormalize(series, options?)` — min-max normalisation to [0, 1] or custom range
+ * - `coefficientOfVariation(series, options?)` — std / mean (dimensionless spread)
+ *
+ * All functions are **pure** (return new values; inputs are unchanged).
+ * Missing values (null / NaN) are handled consistently: ignored in aggregates
+ * and propagated in per-element outputs unless noted otherwise.
+ *
+ * @module
+ */
+
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** True when `v` is a finite, non-null, non-NaN number. */
+function isNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/** Extract finite numbers from scalar array. */
+function finiteNums(vals: readonly Scalar[]): number[] {
+ return vals.filter(isNum);
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link histogram}. */
+export interface HistogramOptions {
+ /**
+ * Number of equal-width bins to produce. Defaults to `10`.
+ * Ignored when `binEdges` is provided.
+ */
+ readonly bins?: number;
+ /**
+ * Explicit bin edges. Must be strictly increasing and have length ≥ 2.
+ * When provided, `bins` is ignored.
+ */
+ readonly binEdges?: readonly number[];
+ /**
+ * `[min, max]` range to consider. Values outside are ignored.
+ * Defaults to `[min(values), max(values)]`.
+ * Only used when `binEdges` is not provided.
+ */
+ readonly range?: readonly [number, number];
+ /**
+ * If `true`, the result is normalised as a probability density so that the
+ * integral over the range is 1 (like `numpy.histogram(density=True)`).
+ * Defaults to `false`.
+ */
+ readonly density?: boolean;
+}
+
+/** Result of {@link histogram}. */
+export interface HistogramResult {
+ /** Bin counts (or densities when `density: true`). */
+ readonly counts: readonly number[];
+ /** Bin edges — always has length `counts.length + 1`. */
+ readonly binEdges: readonly number[];
+}
+
+/** Options for {@link zscore}. */
+export interface ZscoreOptions {
+ /**
+ * Degrees-of-freedom correction for std.
+ * - `1` (default, matches pandas `ddof=1`): sample std
+ * - `0`: population std
+ */
+ readonly ddof?: 0 | 1;
+}
+
+/** Options for {@link minMaxNormalize}. */
+export interface MinMaxOptions {
+ /**
+ * Lower bound of the output range. Defaults to `0`.
+ */
+ readonly featureRangeMin?: number;
+ /**
+ * Upper bound of the output range. Defaults to `1`.
+ */
+ readonly featureRangeMax?: number;
+}
+
+/** Options for {@link coefficientOfVariation}. */
+export interface CvOptions {
+ /**
+ * Degrees-of-freedom correction for std.
+ * - `1` (default): sample std
+ * - `0`: population std
+ */
+ readonly ddof?: 0 | 1;
+}
+
+// ─── digitize ─────────────────────────────────────────────────────────────────
+
+/**
+ * Return the indices of the bins to which each value in `values` belongs.
+ *
+ * Mirrors `numpy.digitize(values, bins, right=False)`.
+ *
+ * Each value `v` is mapped to bin index `i` such that:
+ * - `right = false` (default): `bins[i-1] <= v < bins[i]`
+ * - `right = true`: `bins[i-1] < v <= bins[i]`
+ *
+ * Indices are 0-based (unlike numpy which uses 1-based).
+ * Values below `bins[0]` map to `-1`; values at/above `bins[last]` map to
+ * `bins.length - 1`.
+ *
+ * Missing / NaN values in `values` are mapped to `NaN`.
+ *
+ * @param values - array of numbers to bin (may contain null/NaN)
+ * @param bins - strictly increasing bin-edge array (length ≥ 1)
+ * @param right - if `true`, intervals are open on the left (pandas default is `false`)
+ * @returns array of integer bin indices (same length as `values`)
+ *
+ * @example
+ * ```ts
+ * digitize([0.5, 1.5, 2.5, 3.5], [1, 2, 3]);
+ * // → [-1, 0, 1, 2]
+ * ```
+ */
+export function digitize(
+ values: readonly (number | null)[],
+ bins: readonly number[],
+ right = false,
+): (number | typeof NaN)[] {
+ if (bins.length === 0) {
+ throw new RangeError("bins must have at least one element");
+ }
+ return values.map((v) => {
+ if (v === null || (typeof v === "number" && Number.isNaN(v))) {
+ return Number.NaN;
+ }
+ const n = bins.length;
+ if (right) {
+ // open left, closed right: bins[i-1] < v <= bins[i]
+ for (let i = 0; i < n; i++) {
+ if (v <= (bins[i] as number)) {
+ return i - 1; // below first edge → -1
+ }
+ }
+ return n - 1; // above last edge
+ } else {
+ // closed left, open right: bins[i-1] <= v < bins[i]
+ for (let i = 0; i < n; i++) {
+ if (v < (bins[i] as number)) {
+ return i - 1;
+ }
+ }
+ return n - 1; // at or above last edge
+ }
+ });
+}
+
+// ─── histogram ────────────────────────────────────────────────────────────────
+
+/**
+ * Compute a histogram of `values`.
+ *
+ * Mirrors `numpy.histogram(values, bins=10, range=None, density=False)`.
+ *
+ * NaN / null values are silently ignored.
+ *
+ * @param values - numeric values to bin
+ * @param options - {@link HistogramOptions}
+ * @returns {@link HistogramResult} with `counts` and `binEdges`
+ *
+ * @example
+ * ```ts
+ * histogram([1, 2, 3, 4, 5], { bins: 2 });
+ * // { counts: [2, 3], binEdges: [1, 3, 5] }
+ * ```
+ */
+export function histogram(
+ values: readonly (number | null | Scalar)[],
+ options?: HistogramOptions,
+): HistogramResult {
+ const nums = finiteNums(values as readonly Scalar[]);
+ if (nums.length === 0) {
+ // Return a zero-count histogram over [0, 1] when there is no data.
+ const nb = options?.bins ?? 10;
+ const edges: number[] = [];
+ for (let i = 0; i <= nb; i++) {
+ edges.push(i / nb);
+ }
+ const counts = new Array(nb).fill(0);
+ return { counts, binEdges: edges };
+ }
+
+ let edges: number[];
+
+ if (options?.binEdges !== undefined) {
+ const be = options.binEdges;
+ if (be.length < 2) {
+ throw new RangeError("binEdges must have at least 2 elements");
+ }
+ edges = [...be];
+ } else {
+ const nbins = options?.bins ?? 10;
+ if (nbins < 1) {
+ throw new RangeError("bins must be >= 1");
+ }
+ let lo: number;
+ let hi: number;
+ if (options?.range !== undefined) {
+ [lo, hi] = options.range;
+ } else {
+ lo = Math.min(...nums);
+ hi = Math.max(...nums);
+ }
+ if (lo === hi) {
+ // Degenerate range: widen by 0.5 on each side (mirrors numpy).
+ lo -= 0.5;
+ hi += 0.5;
+ }
+ edges = [];
+ for (let i = 0; i <= nbins; i++) {
+ edges.push(lo + (i / nbins) * (hi - lo));
+ }
+ }
+
+ const nbins = edges.length - 1;
+ const counts = new Array(nbins).fill(0);
+ const lo = edges[0] as number;
+ const hi = edges[nbins] as number;
+
+ for (const v of nums) {
+ if (v < lo || v > hi) {
+ continue; // out of range
+ }
+ if (v === hi) {
+ // Right-most value goes into the last bin.
+ (counts[nbins - 1] as number)++;
+ continue;
+ }
+ // Binary search for the bin.
+ let left = 0;
+ let right = nbins - 1;
+ while (left < right) {
+ const mid = (left + right) >> 1;
+ if (v < (edges[mid + 1] as number)) {
+ right = mid;
+ } else {
+ left = mid + 1;
+ }
+ }
+ (counts[left] as number)++;
+ }
+
+ if (options?.density === true) {
+ const total = nums.length;
+ const densityCounts = counts.map((c, i) => {
+ const width = (edges[i + 1] as number) - (edges[i] as number);
+ return c / (total * width);
+ });
+ return { counts: densityCounts, binEdges: edges };
+ }
+
+ return { counts, binEdges: edges };
+}
+
+// ─── linspace ─────────────────────────────────────────────────────────────────
+
+/**
+ * Return `num` evenly spaced numbers from `start` to `stop` (inclusive).
+ *
+ * Mirrors `numpy.linspace(start, stop, num=50, endpoint=True)`.
+ *
+ * @param start - first value
+ * @param stop - last value (included)
+ * @param num - number of values to generate (default `50`; must be ≥ 0)
+ * @returns array of `num` numbers
+ *
+ * @example
+ * ```ts
+ * linspace(0, 1, 5);
+ * // → [0, 0.25, 0.5, 0.75, 1]
+ * ```
+ */
+export function linspace(start: number, stop: number, num = 50): number[] {
+ if (num < 0) {
+ throw new RangeError("num must be >= 0");
+ }
+ if (num === 0) {
+ return [];
+ }
+ if (num === 1) {
+ return [start];
+ }
+ const step = (stop - start) / (num - 1);
+ const result: number[] = [];
+ for (let i = 0; i < num; i++) {
+ result.push(i === num - 1 ? stop : start + i * step);
+ }
+ return result;
+}
+
+// ─── arange ───────────────────────────────────────────────────────────────────
+
+/**
+ * Return evenly-spaced values within a given interval.
+ *
+ * Mirrors `numpy.arange([start,] stop[, step])`.
+ *
+ * Call signatures:
+ * - `arange(stop)` — values in `[0, stop)` with step `1`
+ * - `arange(start, stop)` — values in `[start, stop)` with step `1`
+ * - `arange(start, stop, step)` — values in `[start, stop)` with given step
+ *
+ * @example
+ * ```ts
+ * arange(5); // [0, 1, 2, 3, 4]
+ * arange(1, 5); // [1, 2, 3, 4]
+ * arange(0, 1, 0.25); // [0, 0.25, 0.5, 0.75]
+ * ```
+ */
+export function arange(stop: number): number[];
+export function arange(start: number, stop: number): number[];
+export function arange(start: number, stop: number, step: number): number[];
+export function arange(startOrStop: number, stop?: number, step?: number): number[] {
+ let start: number;
+ let s: number;
+ let st: number;
+
+ if (stop === undefined) {
+ start = 0;
+ s = startOrStop;
+ st = 1;
+ } else if (step === undefined) {
+ start = startOrStop;
+ s = stop;
+ st = 1;
+ } else {
+ start = startOrStop;
+ s = stop;
+ st = step;
+ }
+
+ if (st === 0) {
+ throw new RangeError("step must not be zero");
+ }
+
+ const result: number[] = [];
+ if (st > 0) {
+ for (let v = start; v < s; v = start + result.length * st) {
+ result.push(v);
+ }
+ } else {
+ for (let v = start; v > s; v = start + result.length * st) {
+ result.push(v);
+ }
+ }
+ return result;
+}
+
+// ─── percentileOfScore ────────────────────────────────────────────────────────
+
+/**
+ * Compute the percentile rank of `score` within `arr`.
+ *
+ * Mirrors `scipy.stats.percentileofscore(arr, score, kind)`.
+ *
+ * @param arr - numeric values (NaN/null are ignored)
+ * @param score - value whose rank to compute
+ * @param kind - ranking method:
+ * - `"rank"` (default): average of `weak` and `strict` percentiles
+ * - `"weak"`: proportion of values ≤ score
+ * - `"strict"`: proportion of values < score
+ * - `"mean"`: mean of `weak` and `strict` (same as `"rank"`)
+ * @returns percentile in `[0, 100]` (or `NaN` when `arr` is empty)
+ *
+ * @example
+ * ```ts
+ * percentileOfScore([1, 2, 3, 4, 5], 3); // 50
+ * percentileOfScore([1, 2, 3, 4, 5], 3, "weak"); // 60
+ * percentileOfScore([1, 2, 3, 4, 5], 3, "strict"); // 40
+ * ```
+ */
+export function percentileOfScore(
+ arr: readonly (number | null | Scalar)[],
+ score: number,
+ kind: "rank" | "weak" | "strict" | "mean" = "rank",
+): number {
+ const nums = finiteNums(arr as readonly Scalar[]);
+ const n = nums.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ const weakCount = nums.filter((v) => v <= score).length;
+ const strictCount = nums.filter((v) => v < score).length;
+
+ switch (kind) {
+ case "weak":
+ return (weakCount / n) * 100;
+ case "strict":
+ return (strictCount / n) * 100;
+ case "rank":
+ case "mean":
+ return ((weakCount + strictCount) / 2 / n) * 100;
+ }
+}
+
+// ─── zscore ───────────────────────────────────────────────────────────────────
+
+/**
+ * Standardise a numeric Series to zero mean and unit variance (z-score).
+ *
+ * Mirrors `scipy.stats.zscore(a, ddof=1)`.
+ *
+ * Each value is transformed as: `z = (x − mean) / std`
+ *
+ * Missing values (null / NaN) are propagated unchanged in the output.
+ * If std is 0 (or fewer than 2 non-missing values), all outputs are `NaN`.
+ *
+ * @param series - input Series (must be numeric)
+ * @param options - {@link ZscoreOptions}
+ * @returns new Series of z-scores with same index
+ *
+ * @example
+ * ```ts
+ * zscore(new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] }));
+ * // approximately [−1.5, −0.5, −0.5, −0.5, 0, 0, 1, 2] (normalised)
+ * ```
+ */
+export function zscore(
+ series: Series,
+ options?: ZscoreOptions,
+): Series {
+ const ddof = options?.ddof ?? 1;
+ const vals = series.values as readonly Scalar[];
+ const nums = finiteNums(vals);
+ const n = nums.length;
+
+ if (n < 2) {
+ const nanVals = vals.map(() => Number.NaN as Scalar);
+ return series.withValues(nanVals) as Series;
+ }
+
+ const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+ const variance = nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - ddof);
+ const std = Math.sqrt(variance);
+
+ if (std === 0) {
+ const nanVals = vals.map((v) => (isNum(v) ? Number.NaN : v) as Scalar);
+ return series.withValues(nanVals) as Series;
+ }
+
+ const zVals = vals.map((v) => (isNum(v) ? ((v - mean) / std) as Scalar : v));
+ return series.withValues(zVals) as Series;
+}
+
+// ─── minMaxNormalize ──────────────────────────────────────────────────────────
+
+/**
+ * Scale a numeric Series to a fixed range using min-max normalisation.
+ *
+ * Mirrors `sklearn.preprocessing.MinMaxScaler` applied to a 1-D array.
+ *
+ * `x_scaled = (x − min) / (max − min) × (rangeMax − rangeMin) + rangeMin`
+ *
+ * Missing values (null / NaN) are propagated unchanged.
+ * If all values are equal, returns a Series of the midpoint of the target range.
+ *
+ * @param series - input Series (must be numeric)
+ * @param options - {@link MinMaxOptions}
+ * @returns new Series normalised to `[featureRangeMin, featureRangeMax]`
+ *
+ * @example
+ * ```ts
+ * minMaxNormalize(new Series({ data: [0, 5, 10] }));
+ * // → Series([0, 0.5, 1])
+ * ```
+ */
+export function minMaxNormalize(
+ series: Series,
+ options?: MinMaxOptions,
+): Series {
+ const rMin = options?.featureRangeMin ?? 0;
+ const rMax = options?.featureRangeMax ?? 1;
+ if (rMin >= rMax) {
+ throw new RangeError("featureRangeMin must be less than featureRangeMax");
+ }
+
+ const vals = series.values as readonly Scalar[];
+ const nums = finiteNums(vals);
+ if (nums.length === 0) {
+ return series.withValues(vals.map(() => Number.NaN as Scalar)) as Series;
+ }
+
+ const min = Math.min(...nums);
+ const max = Math.max(...nums);
+ const span = max - min;
+
+ if (span === 0) {
+ const mid = (rMin + rMax) / 2;
+ const midVals = vals.map((v) => (isNum(v) ? (mid as Scalar) : v));
+ return series.withValues(midVals) as Series;
+ }
+
+ const scaled = vals.map((v) =>
+ isNum(v) ? (((v - min) / span) * (rMax - rMin) + rMin) as Scalar : v,
+ );
+ return series.withValues(scaled) as Series;
+}
+
+// ─── coefficientOfVariation ───────────────────────────────────────────────────
+
+/**
+ * Compute the coefficient of variation (CV) — std / |mean| — as a unitless
+ * measure of relative dispersion.
+ *
+ * NaN / null values are ignored in aggregation.
+ * Returns `NaN` when mean is 0 or fewer than 2 valid values exist.
+ *
+ * @param series - numeric Series
+ * @param options - {@link CvOptions}
+ * @returns ratio std / |mean|
+ *
+ * @example
+ * ```ts
+ * coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+ * // ≈ 0.5
+ * ```
+ */
+export function coefficientOfVariation(
+ series: Series,
+ options?: CvOptions,
+): number {
+ const ddof = options?.ddof ?? 1;
+ const vals = series.values as readonly Scalar[];
+ const nums = finiteNums(vals);
+ const n = nums.length;
+
+ if (n < 2) {
+ return Number.NaN;
+ }
+
+ const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+ if (mean === 0) {
+ return Number.NaN;
+ }
+
+ const variance = nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - ddof);
+ const std = Math.sqrt(variance);
+ return std / Math.abs(mean);
+}
+
+// ─── seriesDigitize ───────────────────────────────────────────────────────────
+
+/**
+ * Apply {@link digitize} to a Series, returning a new numeric Series of bin indices.
+ *
+ * @param series - Series of numeric values
+ * @param bins - strictly increasing bin-edge array
+ * @param right - if `true`, intervals are open on the left
+ * @returns new Series of bin indices (integer or NaN for missing values)
+ *
+ * @example
+ * ```ts
+ * seriesDigitize(new Series({ data: [0.5, 1.5, 2.5] }), [1, 2]);
+ * // → Series([-1, 0, 1])
+ * ```
+ */
+export function seriesDigitize(
+ series: Series,
+ bins: readonly number[],
+ right = false,
+): Series {
+ const vals = series.values as readonly (number | null)[];
+ const indices = digitize(vals, bins, right);
+ return new Series({
+ data: indices as number[],
+ index: series.index as import("../core/index.ts").Index,
+ name: series.name,
+ });
+}
diff --git a/src/stats/nunique.ts b/src/stats/nunique.ts
new file mode 100644
index 00000000..af6d1e11
--- /dev/null
+++ b/src/stats/nunique.ts
@@ -0,0 +1,291 @@
+/**
+ * nunique_any_all — count unique values and boolean reductions for Series and
+ * DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.nunique(dropna?)` — count distinct non-null values
+ * - `pandas.Series.any(skipna?)` — true if any element is truthy
+ * - `pandas.Series.all(skipna?)` — true if all elements are truthy
+ * - `pandas.DataFrame.nunique(axis?, dropna?)`
+ * - `pandas.DataFrame.any(axis?, skipna?, bool_only?)`
+ * - `pandas.DataFrame.all(axis?, skipna?, bool_only?)`
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link nuniqueSeries}. */
+export interface NuniqueSeriesOptions {
+ /**
+ * If `true` (default), exclude null/NaN values from the unique count.
+ */
+ readonly dropna?: boolean;
+}
+
+/** Options for {@link nuniqueDataFrame}. */
+export interface NuniqueDataFrameOptions extends NuniqueSeriesOptions {
+ /**
+ * Axis along which to count unique values.
+ * - `0` (default): count per column.
+ * - `1`: count per row.
+ */
+ readonly axis?: 0 | 1;
+}
+
+/** Options for {@link anySeries} and {@link allSeries}. */
+export interface AnyAllSeriesOptions {
+ /**
+ * If `true` (default), skip null/NaN values (they do not contribute).
+ * If `false`, null/NaN is treated as falsy.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link anyDataFrame} and {@link allDataFrame}. */
+export interface AnyAllDataFrameOptions extends AnyAllSeriesOptions {
+ /**
+ * Axis along which to reduce.
+ * - `0` (default): reduce along rows, one result per column.
+ * - `1`: reduce along columns, one result per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true`, only include boolean-typed columns when `axis=0`.
+ * @defaultValue `false`
+ */
+ readonly boolOnly?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a value is truthy (treating missing as falsy). */
+function isTruthy(v: Scalar): boolean {
+ if (isMissing(v)) {
+ return false;
+ }
+ return Boolean(v);
+}
+
+/** Return true if any value in `vals` is truthy, skipping missing when skipna=true. */
+function anyInSlice(vals: readonly Scalar[], skipna: boolean): boolean {
+ for (const v of vals) {
+ if (skipna && isMissing(v)) {
+ continue;
+ }
+ if (isTruthy(v)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** Return true if all values in `vals` are truthy, skipping missing when skipna=true. */
+function allInSlice(vals: readonly Scalar[], skipna: boolean): boolean {
+ for (const v of vals) {
+ if (skipna && isMissing(v)) {
+ continue;
+ }
+ if (!isTruthy(v)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// ─── nunique ──────────────────────────────────────────────────────────────────
+
+/**
+ * Count the number of unique values in a Series.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 2, 3, null] as Scalar[] });
+ * nuniqueSeries(s); // 3 (null excluded)
+ * nuniqueSeries(s, { dropna: false }); // 4
+ * ```
+ */
+export function nuniqueSeries(series: Series, options: NuniqueSeriesOptions = {}): number {
+ const dropna = options.dropna ?? true;
+ const seen = new Set();
+ for (const v of series.values as readonly Scalar[]) {
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ seen.add(v);
+ }
+ return seen.size;
+}
+
+/**
+ * Count unique values per column (`axis=0`) or per row (`axis=1`) of a
+ * DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 2], b: ["x", "x", "y"] });
+ * nuniqueDataFrame(df); // Series { a: 2, b: 2 }
+ * ```
+ */
+export function nuniqueDataFrame(
+ df: DataFrame,
+ options: NuniqueDataFrameOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ const dropna = options.dropna ?? true;
+ const colNames = df.columns.values as readonly string[];
+
+ if (axis === 0) {
+ const labels: string[] = [];
+ const values: number[] = [];
+ for (const col of colNames) {
+ labels.push(col);
+ values.push(nuniqueSeries(df.col(col), { dropna }));
+ }
+ return new Series({ data: values, index: labels });
+ }
+
+ // axis === 1: count unique values across each row
+ const rowCount = df.index.size;
+ const values: number[] = [];
+ for (let r = 0; r < rowCount; r++) {
+ const seen = new Set();
+ for (const col of colNames) {
+ const v = df.col(col).values[r] as Scalar;
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ seen.add(v);
+ }
+ values.push(seen.size);
+ }
+ return new Series({ data: values, index: df.index });
+}
+
+// ─── any ──────────────────────────────────────────────────────────────────────
+
+/**
+ * Return `true` if any element in the Series is truthy.
+ *
+ * With `skipna=true` (default), null/NaN values are skipped.
+ * An empty (or all-null with skipna) series returns `false`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [0, 0, 1] });
+ * anySeries(s); // true
+ * ```
+ */
+export function anySeries(series: Series, options: AnyAllSeriesOptions = {}): boolean {
+ return anyInSlice(series.values as readonly Scalar[], options.skipna ?? true);
+}
+
+/**
+ * Return `true` if all elements in the Series are truthy.
+ *
+ * With `skipna=true` (default), null/NaN values are skipped.
+ * An empty (or all-null with skipna) series returns `true` (vacuous truth).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3] });
+ * allSeries(s); // true
+ * ```
+ */
+export function allSeries(series: Series, options: AnyAllSeriesOptions = {}): boolean {
+ return allInSlice(series.values as readonly Scalar[], options.skipna ?? true);
+}
+
+// ─── DataFrame any/all ────────────────────────────────────────────────────────
+
+/** Get the column values for a single row `r` from df. */
+function rowValues(df: DataFrame, colNames: readonly string[], r: number): Scalar[] {
+ const row: Scalar[] = [];
+ for (const col of colNames) {
+ row.push(df.col(col).values[r] as Scalar);
+ }
+ return row;
+}
+
+/**
+ * Return whether any element is truthy along an axis of a DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [0, 0], b: [0, 1] });
+ * anyDataFrame(df); // Series { a: false, b: true }
+ * ```
+ */
+export function anyDataFrame(df: DataFrame, options: AnyAllDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const boolOnly = options.boolOnly ?? false;
+ const colNames = df.columns.values as readonly string[];
+
+ if (axis === 0) {
+ const labels: string[] = [];
+ const values: boolean[] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (boolOnly && s.dtype.kind !== "bool") {
+ continue;
+ }
+ labels.push(col);
+ values.push(anySeries(s, { skipna }));
+ }
+ return new Series({ data: values, index: labels });
+ }
+
+ // axis === 1: any across columns for each row
+ const values: boolean[] = [];
+ for (let r = 0; r < df.index.size; r++) {
+ values.push(anyInSlice(rowValues(df, colNames, r), skipna));
+ }
+ return new Series({ data: values, index: df.index });
+}
+
+/**
+ * Return whether all elements are truthy along an axis of a DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 1], b: [1, 0] });
+ * allDataFrame(df); // Series { a: true, b: false }
+ * ```
+ */
+export function allDataFrame(df: DataFrame, options: AnyAllDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const boolOnly = options.boolOnly ?? false;
+ const colNames = df.columns.values as readonly string[];
+
+ if (axis === 0) {
+ const labels: string[] = [];
+ const values: boolean[] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (boolOnly && s.dtype.kind !== "bool") {
+ continue;
+ }
+ labels.push(col);
+ values.push(allSeries(s, { skipna }));
+ }
+ return new Series({ data: values, index: labels });
+ }
+
+ // axis === 1: all across columns for each row
+ const values: boolean[] = [];
+ for (let r = 0; r < df.index.size; r++) {
+ values.push(allInSlice(rowValues(df, colNames, r), skipna));
+ }
+ return new Series({ data: values, index: df.index });
+}
diff --git a/src/stats/pct_change.ts b/src/stats/pct_change.ts
new file mode 100644
index 00000000..53864281
--- /dev/null
+++ b/src/stats/pct_change.ts
@@ -0,0 +1,231 @@
+/**
+ * pct_change — percentage change between current and prior element.
+ *
+ * Mirrors `pandas.Series.pct_change()` / `pandas.DataFrame.pct_change()`:
+ * - `pctChangeSeries(series, options)` — per-element % change
+ * - `pctChangeDataFrame(df, options)` — column-wise % change
+ *
+ * Formula (per element i, with shift=periods):
+ * `result[i] = (x[i] - x[i-periods]) / x[i-periods]`
+ *
+ * When `fillMethod` is set, NaN/null values in the source are filled *before*
+ * computing the ratio (matching pandas' default behaviour of `fill_method="pad"`).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Fill method applied to NaN/null before computing pct_change. */
+export type PctChangeFillMethod = "pad" | "bfill";
+
+/** Options for {@link pctChangeSeries} and {@link pctChangeDataFrame}. */
+export interface PctChangeOptions {
+ /**
+ * Number of periods (lags) to shift when computing the ratio.
+ * Positive values look backward; negative values look forward.
+ * Default `1`.
+ */
+ readonly periods?: number;
+ /**
+ * How to fill NaN/null values *before* computing the ratio.
+ * - `"pad"` (default): forward-fill (last valid observation carries forward).
+ * - `"bfill"`: backward-fill (next valid observation fills backward).
+ * - `null`: no filling — NaN/null stays as-is.
+ */
+ readonly fillMethod?: PctChangeFillMethod | null;
+ /**
+ * Maximum number of consecutive NaN/null values to fill when `fillMethod`
+ * is set. `undefined` / `null` means no limit.
+ */
+ readonly limit?: number | null;
+}
+
+/** Options for {@link pctChangeDataFrame} — adds an axis selector. */
+export interface DataFramePctChangeOptions extends PctChangeOptions {
+ /**
+ * - `0` or `"index"` (default): apply operation **column-wise** (down rows).
+ * - `1` or `"columns"`: apply operation **row-wise** (across columns).
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a valid number (not null, undefined, or NaN). */
+function isNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v) && v !== null;
+}
+
+/**
+ * Forward-fill an array of scalars in place, respecting an optional limit.
+ * Returns a NEW array.
+ */
+function padFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] {
+ const out: Scalar[] = [...vals];
+ let run = 0;
+ let lastValid: Scalar = null;
+ for (let i = 0; i < out.length; i++) {
+ const v = out[i] as Scalar;
+ if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) {
+ lastValid = v;
+ run = 0;
+ } else if (lastValid !== null && (limit == null || run < limit)) {
+ out[i] = lastValid;
+ run++;
+ }
+ }
+ return out;
+}
+
+/**
+ * Backward-fill an array of scalars, respecting an optional limit.
+ * Returns a NEW array.
+ */
+function bfillFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] {
+ const tmp = padFill([...vals].reverse(), limit);
+ return tmp.reverse();
+}
+
+/** Fill NaN/null in `vals` using the requested method. */
+function applyFill(
+ vals: readonly Scalar[],
+ method: PctChangeFillMethod | null | undefined,
+ limit: number | null | undefined,
+): Scalar[] {
+ if (!method) return [...vals];
+ return method === "pad" ? padFill(vals, limit) : bfillFill(vals, limit);
+}
+
+/** Compute pct_change on a flat array of scalars. */
+function computePct(vals: readonly Scalar[], periods: number): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(null);
+ const shift = periods;
+ if (shift >= 0) {
+ for (let i = shift; i < n; i++) {
+ const curr = vals[i] as Scalar;
+ const prev = vals[i - shift] as Scalar;
+ if (isNum(curr) && isNum(prev) && prev !== 0) {
+ out[i] = curr / prev - 1;
+ } else if (isNum(curr) && isNum(prev) && prev === 0) {
+ // 0 denominator → Infinity (same as pandas)
+ out[i] = curr === 0 ? Number.NaN : curr > 0 ? Infinity : -Infinity;
+ } else {
+ out[i] = null;
+ }
+ }
+ } else {
+ // Negative periods: look forward
+ const absShift = -shift;
+ for (let i = 0; i < n - absShift; i++) {
+ const curr = vals[i] as Scalar;
+ const fwd = vals[i + absShift] as Scalar;
+ if (isNum(curr) && isNum(fwd) && curr !== 0) {
+ out[i] = fwd / curr - 1;
+ } else if (isNum(curr) && isNum(fwd) && curr === 0) {
+ out[i] = fwd === 0 ? Number.NaN : fwd > 0 ? Infinity : -Infinity;
+ } else {
+ out[i] = null;
+ }
+ }
+ }
+ return out;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Compute the fractional change between a Series element and the element
+ * `periods` positions earlier (or later, for negative `periods`).
+ *
+ * Matches `pandas.Series.pct_change()`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [100, 110, 99, 121] });
+ * pctChangeSeries(s); // [null, 0.1, -0.1, 0.2222…]
+ * ```
+ */
+export function pctChangeSeries(series: Series, options: PctChangeOptions = {}): Series {
+ const periods = options.periods ?? 1;
+ const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad";
+ const limit = options.limit ?? null;
+
+ const filled = applyFill(series.values, fillMethod, limit);
+ const result = computePct(filled, periods);
+
+ return new Series({
+ data: result,
+ index: series.index,
+ name: series.name,
+ });
+}
+
+/**
+ * Compute percentage change for every column (or row) of a DataFrame.
+ *
+ * Matches `pandas.DataFrame.pct_change()`.
+ *
+ * @example
+ * ```ts
+ * const df = new DataFrame(new Map([
+ * ["a", new Series({ data: [100, 110, 121] })],
+ * ["b", new Series({ data: [200, 180, 198] })],
+ * ]));
+ * pctChangeDataFrame(df); // fractional change per column
+ * ```
+ */
+export function pctChangeDataFrame(
+ df: DataFrame,
+ options: DataFramePctChangeOptions = {},
+): DataFrame {
+ const axis = options.axis ?? 0;
+ const colWise = axis === 0 || axis === "index";
+
+ if (colWise) {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ colMap.set(name, pctChangeSeries(df.col(name), options));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // Row-wise: each row across columns
+ const periods = options.periods ?? 1;
+ const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad";
+ const limit = options.limit ?? null;
+ const nRows = df.index.size;
+ const cols = df.columns.values;
+ const nCols = cols.length;
+
+ const resultCols = new Map();
+ for (const name of cols) {
+ resultCols.set(name, new Array(nRows).fill(null));
+ }
+
+ for (let r = 0; r < nRows; r++) {
+ const row: Scalar[] = [];
+ for (const name of cols) {
+ row.push(df.col(name).values[r] as Scalar);
+ }
+ const filled = applyFill(row, fillMethod, limit);
+ const pct = computePct(filled, periods);
+ for (let c = 0; c < nCols; c++) {
+ (resultCols.get(cols[c] as string) as Scalar[])[r] = pct[c] as Scalar;
+ }
+ }
+
+ const colMap = new Map>();
+ for (const name of cols) {
+ colMap.set(
+ name,
+ new Series({ data: resultCols.get(name) as Scalar[], index: df.index, name }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/quantile.ts b/src/stats/quantile.ts
new file mode 100644
index 00000000..60f6e0dc
--- /dev/null
+++ b/src/stats/quantile.ts
@@ -0,0 +1,361 @@
+/**
+ * quantile — quantile/percentile for Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.quantile(q=0.5, interpolation='linear')`
+ * - `pandas.DataFrame.quantile(q=0.5, axis=0, numeric_only=True, interpolation='linear')`
+ *
+ * ### Interpolation methods (pandas-compatible)
+ * - `"linear"`: linear interpolation between adjacent values
+ * - `"lower"`: take the lower of the two surrounding values
+ * - `"higher"`: take the higher of the two surrounding values
+ * - `"midpoint"`: arithmetic mean of the two surrounding values
+ * - `"nearest"`: whichever of the two surrounding indices is closest
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Interpolation method for quantile estimation. */
+export type QuantileInterpolation = "linear" | "lower" | "higher" | "midpoint" | "nearest";
+
+/** Options for {@link quantileSeries}. */
+export interface QuantileSeriesOptions {
+ /**
+ * Quantile level(s) in [0, 1].
+ * - A single number returns a `number`.
+ * - An array returns a `Series` indexed by the q-values.
+ * @defaultValue `0.5`
+ */
+ readonly q?: number | readonly number[];
+ /**
+ * Interpolation method when the desired quantile lies between two values.
+ * @defaultValue `"linear"`
+ */
+ readonly interpolation?: QuantileInterpolation;
+ /**
+ * If `true` (default), ignore null/NaN values before computing.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link quantileDataFrame}. */
+export interface QuantileDataFrameOptions {
+ /**
+ * Quantile level(s) in [0, 1].
+ * - A single number returns a `Series`.
+ * - An array returns a `DataFrame`.
+ * @defaultValue `0.5`
+ */
+ readonly q?: number | readonly number[];
+ /**
+ * Axis along which to compute.
+ * - `0` (default): across rows — one value per column.
+ * - `1`: across columns — one value per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true` (default), only include numeric columns.
+ * If `false`, non-numeric columns produce `NaN`.
+ */
+ readonly numericOnly?: boolean;
+ /**
+ * Interpolation method when the desired quantile lies between two values.
+ * @defaultValue `"linear"`
+ */
+ readonly interpolation?: QuantileInterpolation;
+ /**
+ * If `true` (default), ignore null/NaN values before computing.
+ */
+ readonly skipna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric. */
+function isNumericKind(kind: DtypeKind): boolean {
+ return kind === "int" || kind === "uint" || kind === "float";
+}
+
+/** Extract numeric (non-missing) values, respecting skipna. */
+function extractNumbers(values: readonly Scalar[], skipna: boolean): number[] {
+ const out: number[] = [];
+ for (const v of values) {
+ if (isMissing(v)) {
+ if (!skipna) {
+ return [];
+ }
+ continue;
+ }
+ if (typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/** Sort numbers ascending (non-mutating). */
+function sortAsc(xs: number[]): number[] {
+ return xs.slice().sort((a, b) => a - b);
+}
+
+/**
+ * Compute a single quantile from a **sorted** array using the given method.
+ *
+ * Returns `NaN` when the array is empty.
+ */
+function computeOne(sorted: readonly number[], q: number, method: QuantileInterpolation): number {
+ const n = sorted.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ if (n === 1) {
+ return sorted[0] as number;
+ }
+ const pos = q * (n - 1);
+ const lo = Math.floor(pos);
+ const hi = Math.ceil(pos);
+ const vlo = sorted[lo] as number;
+ const vhi = sorted[hi] as number;
+ if (lo === hi) {
+ return vlo;
+ }
+ switch (method) {
+ case "lower":
+ return vlo;
+ case "higher":
+ return vhi;
+ case "midpoint":
+ return (vlo + vhi) / 2;
+ case "nearest": {
+ const frac = pos - lo;
+ return frac <= 0.5 ? vlo : vhi;
+ }
+ default: {
+ const frac = pos - lo;
+ return vlo * (1 - frac) + vhi * frac;
+ }
+ }
+}
+
+/** Compute multiple quantile levels from a sorted array. */
+function computeMany(
+ sorted: readonly number[],
+ qLevels: readonly number[],
+ method: QuantileInterpolation,
+): number[] {
+ return qLevels.map((q) => computeOne(sorted, q, method));
+}
+
+// ─── Series API ───────────────────────────────────────────────────────────────
+
+/**
+ * Compute quantile(s) for a Series.
+ *
+ * When `q` is a single number, returns a scalar `number`.
+ * When `q` is an array, returns a `Series` indexed by the q-values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * quantileSeries(s); // 3
+ * quantileSeries(s, { q: 0.25 }); // 1.75
+ * quantileSeries(s, { q: [0.25, 0.5, 0.75] }); // Series { 0.25:1.75, 0.5:3, 0.75:4.25 }
+ * quantileSeries(s, { q: 0.5, interpolation: "lower" }); // 2
+ * ```
+ */
+export function quantileSeries(
+ series: Series,
+ options: QuantileSeriesOptions = {},
+): number | Series {
+ const method: QuantileInterpolation = options.interpolation ?? "linear";
+ const skipna = options.skipna ?? true;
+ const qInput = options.q ?? 0.5;
+
+ const sorted = sortAsc(extractNumbers(series.values as readonly Scalar[], skipna));
+
+ if (typeof qInput === "number") {
+ return computeOne(sorted, qInput, method);
+ }
+
+ const qArr = qInput as readonly number[];
+ const results = computeMany(sorted, qArr, method);
+ return new Series({ data: results, index: qArr as unknown as readonly Label[] });
+}
+
+// ─── DataFrame helpers ────────────────────────────────────────────────────────
+
+/** Build a column record (name → Scalar[]) for DataFrame.fromColumns(). */
+function buildRecord(
+ colMap: ReadonlyMap,
+): Record {
+ const obj: Record = {};
+ for (const [name, vals] of colMap) {
+ obj[name] = vals;
+ }
+ return obj;
+}
+
+/** Collect sorted numeric arrays for each selected column. */
+function collectCols(
+ df: DataFrame,
+ numericOnly: boolean,
+ skipna: boolean,
+): { names: string[]; sorted: number[][] } {
+ const colNames = df.columns.values as readonly string[];
+ const names: string[] = [];
+ const sorted: number[][] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (numericOnly && !isNumericKind(s.dtype.kind)) {
+ continue;
+ }
+ names.push(col);
+ if (isNumericKind(s.dtype.kind)) {
+ sorted.push(sortAsc(extractNumbers(s.values as readonly Scalar[], skipna)));
+ } else {
+ sorted.push([]);
+ }
+ }
+ return { names, sorted };
+}
+
+// ─── axis=0 (reduce rows, one result per column) ─────────────────────────────
+
+function axis0SingleQ(
+ df: DataFrame,
+ q: number,
+ method: QuantileInterpolation,
+ skipna: boolean,
+ numericOnly: boolean,
+): Series {
+ const { names, sorted } = collectCols(df, numericOnly, skipna);
+ const vals = sorted.map((xs) => computeOne(xs, q, method));
+ return new Series({ data: vals, index: names });
+}
+
+function axis0MultiQ(
+ df: DataFrame,
+ qLevels: readonly number[],
+ method: QuantileInterpolation,
+ skipna: boolean,
+ numericOnly: boolean,
+): DataFrame {
+ const { names, sorted } = collectCols(df, numericOnly, skipna);
+ const rowIndex = new Index(qLevels as unknown as Label[]);
+ const colData = new Map();
+ for (let ci = 0; ci < names.length; ci++) {
+ const col = names[ci] as string;
+ const xs = sorted[ci] as number[];
+ colData.set(col, computeMany(xs, qLevels, method));
+ }
+ return DataFrame.fromColumns(buildRecord(colData), { index: rowIndex });
+}
+
+// ─── axis=1 (reduce columns, one result per row) ─────────────────────────────
+
+/** Extract numeric values for a given row across all columns. */
+function rowValues(df: DataFrame, colNames: readonly string[], rowIdx: number): Scalar[] {
+ const out: Scalar[] = [];
+ for (const col of colNames) {
+ out.push(df.col(col).values[rowIdx] as Scalar);
+ }
+ return out;
+}
+
+function axis1SingleQ(
+ df: DataFrame,
+ q: number,
+ method: QuantileInterpolation,
+ skipna: boolean,
+): Series {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const vals: number[] = [];
+ for (let r = 0; r < rowCount; r++) {
+ const xs = sortAsc(extractNumbers(rowValues(df, colNames, r), skipna));
+ vals.push(computeOne(xs, q, method));
+ }
+ return new Series({ data: vals, index: df.index });
+}
+
+function axis1MultiQ(
+ df: DataFrame,
+ qLevels: readonly number[],
+ method: QuantileInterpolation,
+ skipna: boolean,
+): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const qColData: number[][] = qLevels.map(() => []);
+ for (let r = 0; r < rowCount; r++) {
+ const xs = sortAsc(extractNumbers(rowValues(df, colNames, r), skipna));
+ for (let qi = 0; qi < qLevels.length; qi++) {
+ const arr = qColData[qi];
+ if (arr !== undefined) {
+ arr.push(computeOne(xs, qLevels[qi] as number, method));
+ }
+ }
+ }
+ const resultCols: Record = {};
+ for (let qi = 0; qi < qLevels.length; qi++) {
+ resultCols[String(qLevels[qi])] = (qColData[qi] ?? []) as Scalar[];
+ }
+ return DataFrame.fromColumns(resultCols, { index: df.index });
+}
+
+// ─── public DataFrame API ─────────────────────────────────────────────────────
+
+/**
+ * Compute quantile(s) for a DataFrame.
+ *
+ * When `q` is a single number:
+ * - `axis=0`: returns a `Series` (one value per column)
+ * - `axis=1`: returns a `Series` (one value per row)
+ *
+ * When `q` is an array:
+ * - `axis=0`: returns a `DataFrame` (q-values as rows, columns as columns)
+ * - `axis=1`: returns a `DataFrame` (rows as rows, q-values as columns)
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * quantileDataFrame(df); // Series { a: 2, b: 5 }
+ * quantileDataFrame(df, { q: [0.25, 0.75] }); // DataFrame 2×2
+ * quantileDataFrame(df, { axis: 1, q: 0.5 }); // Series (one value per row)
+ * ```
+ */
+export function quantileDataFrame(
+ df: DataFrame,
+ options: QuantileDataFrameOptions = {},
+): Series | DataFrame {
+ const method: QuantileInterpolation = options.interpolation ?? "linear";
+ const skipna = options.skipna ?? true;
+ const numericOnly = options.numericOnly ?? true;
+ const axis = options.axis ?? 0;
+ const qInput = options.q ?? 0.5;
+
+ const multiQ = Array.isArray(qInput);
+
+ if (axis === 0) {
+ if (multiQ) {
+ return axis0MultiQ(df, qInput as readonly number[], method, skipna, numericOnly);
+ }
+ return axis0SingleQ(df, qInput as number, method, skipna, numericOnly);
+ }
+
+ if (multiQ) {
+ return axis1MultiQ(df, qInput as readonly number[], method, skipna);
+ }
+ return axis1SingleQ(df, qInput as number, method, skipna);
+}
diff --git a/src/stats/replace.ts b/src/stats/replace.ts
new file mode 100644
index 00000000..54c2662e
--- /dev/null
+++ b/src/stats/replace.ts
@@ -0,0 +1,237 @@
+/**
+ * replace — value substitution for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.replace(to_replace, value)` / `Series.replace(mapping)`
+ * - `DataFrame.replace(to_replace, value)` / `DataFrame.replace(mapping)`
+ *
+ * Supported replacement specs:
+ * - **Scalar → Scalar**: replace every occurrence of one value with another.
+ * - **Array → Scalar**: replace every value in the array with a single value.
+ * - **Array → Array**: pair-wise replacement (must be same length).
+ * - **Record / Map**: lookup-table replacement (`{ old: new, ... }`).
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** A lookup table mapping old values to new values. */
+export type ReplaceMapping = Readonly> | ReadonlyMap;
+
+/**
+ * Replacement specification accepted by {@link replaceSeries} /
+ * {@link replaceDataFrame}.
+ *
+ * Mirrors the first two positional args of `pandas.Series.replace`.
+ */
+export type ReplaceSpec =
+ | { readonly toReplace: Scalar; readonly value: Scalar }
+ | { readonly toReplace: readonly Scalar[]; readonly value: Scalar }
+ | { readonly toReplace: readonly Scalar[]; readonly value: readonly Scalar[] }
+ | { readonly toReplace: ReplaceMapping };
+
+/** Options shared by {@link replaceSeries} and {@link replaceDataFrame}. */
+export interface ReplaceOptions {
+ /**
+ * When `true`, treat `NaN` values as equal for matching purposes.
+ * Default `true`.
+ */
+ readonly matchNaN?: boolean;
+}
+
+/** Options for {@link replaceDataFrame}. */
+export interface DataFrameReplaceOptions extends ReplaceOptions {
+ /**
+ * If provided, only replace values in these column names.
+ * By default all columns are processed.
+ */
+ readonly columns?: readonly string[];
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `a` and `b` are equal (with optional NaN=NaN equality). */
+function scalarEq(a: Scalar, b: Scalar, matchNaN: boolean): boolean {
+ if (
+ matchNaN &&
+ typeof a === "number" &&
+ typeof b === "number" &&
+ Number.isNaN(a) &&
+ Number.isNaN(b)
+ ) {
+ return true;
+ }
+ if (a instanceof Date && b instanceof Date) {
+ return a.getTime() === b.getTime();
+ }
+ return a === b;
+}
+
+/**
+ * Build a replacement function from a {@link ReplaceSpec}.
+ * Returns `(v) => new_value` or `v` unchanged if no match.
+ */
+function buildReplacer(spec: ReplaceSpec, matchNaN: boolean): (v: Scalar) => Scalar {
+ // Mapping variant
+ if (
+ "toReplace" in spec &&
+ !Array.isArray(spec.toReplace) &&
+ typeof spec.toReplace === "object" &&
+ spec.toReplace !== null &&
+ !(spec.toReplace instanceof Map) &&
+ !("value" in spec)
+ ) {
+ // Record
+ const rec = spec.toReplace as Readonly>;
+ return (v: Scalar): Scalar => {
+ const key = String(v);
+ return Object.prototype.hasOwnProperty.call(rec, key) ? (rec[key] as Scalar) : v;
+ };
+ }
+
+ if ("toReplace" in spec && spec.toReplace instanceof Map) {
+ const map = spec.toReplace as ReadonlyMap;
+ return (v: Scalar): Scalar => {
+ for (const [k, val] of map) {
+ if (scalarEq(v, k, matchNaN)) {
+ return val;
+ }
+ }
+ return v;
+ };
+ }
+
+ // Mapping passed via { toReplace: mapping } shape
+ if ("toReplace" in spec && !("value" in spec)) {
+ const mapping = spec.toReplace as ReplaceMapping;
+ if (mapping instanceof Map) {
+ const map = mapping as ReadonlyMap;
+ return (v: Scalar): Scalar => {
+ for (const [k, val] of map) {
+ if (scalarEq(v, k, matchNaN)) {
+ return val;
+ }
+ }
+ return v;
+ };
+ }
+ const rec = mapping as Readonly>;
+ return (v: Scalar): Scalar => {
+ const key = String(v);
+ return Object.prototype.hasOwnProperty.call(rec, key) ? (rec[key] as Scalar) : v;
+ };
+ }
+
+ const s = spec as { toReplace: Scalar | readonly Scalar[]; value: Scalar | readonly Scalar[] };
+
+ if (!Array.isArray(s.toReplace)) {
+ // Scalar → Scalar
+ const old = s.toReplace as Scalar;
+ const newVal = s.value as Scalar;
+ return (v: Scalar): Scalar => (scalarEq(v, old, matchNaN) ? newVal : v);
+ }
+
+ const oldArr = s.toReplace as readonly Scalar[];
+
+ if (!Array.isArray(s.value)) {
+ // Array → Scalar
+ const newVal = s.value as Scalar;
+ return (v: Scalar): Scalar => {
+ for (const old of oldArr) {
+ if (scalarEq(v, old, matchNaN)) {
+ return newVal;
+ }
+ }
+ return v;
+ };
+ }
+
+ // Array → Array (pair-wise)
+ const newArr = s.value as readonly Scalar[];
+ if (oldArr.length !== newArr.length) {
+ throw new RangeError(
+ `replace: toReplace and value arrays must have the same length (got ${oldArr.length} and ${newArr.length})`,
+ );
+ }
+ return (v: Scalar): Scalar => {
+ for (let i = 0; i < oldArr.length; i++) {
+ if (scalarEq(v, oldArr[i] as Scalar, matchNaN)) {
+ return newArr[i] as Scalar;
+ }
+ }
+ return v;
+ };
+}
+
+// ─── Series ───────────────────────────────────────────────────────────────────
+
+/**
+ * Replace values in a Series according to `spec`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { replaceSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 2, 1] });
+ * const r = replaceSeries(s, { toReplace: 2, value: 99 });
+ * // r.values → [1, 99, 3, 99, 1]
+ * ```
+ */
+export function replaceSeries(
+ series: Series,
+ spec: ReplaceSpec,
+ options: ReplaceOptions = {},
+): Series {
+ const matchNaN = options.matchNaN ?? true;
+ const replacer = buildReplacer(spec, matchNaN);
+ const newData = Array.from({ length: series.size }, (_, i) =>
+ replacer(series.values[i] as Scalar),
+ );
+ return new Series({ data: newData, index: series.index, name: series.name });
+}
+
+// ─── DataFrame ────────────────────────────────────────────────────────────────
+
+/**
+ * Replace values in a DataFrame according to `spec`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { replaceDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [2, 2, 4] });
+ * const r = replaceDataFrame(df, { toReplace: 2, value: 0 });
+ * // r.col("a").values → [1, 0, 3]
+ * // r.col("b").values → [0, 0, 4]
+ * ```
+ */
+export function replaceDataFrame(
+ df: DataFrame,
+ spec: ReplaceSpec,
+ options: DataFrameReplaceOptions = {},
+): DataFrame {
+ const matchNaN = options.matchNaN ?? true;
+ const replacer = buildReplacer(spec, matchNaN);
+ const targetCols = new Set(options.columns ?? df.columns.values);
+
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name) as Series;
+ if (targetCols.has(name)) {
+ const newData = Array.from({ length: col.size }, (_, i) => replacer(col.values[i] as Scalar));
+ colMap.set(name, new Series({ data: newData, index: col.index, name: col.name }));
+ } else {
+ colMap.set(name, col);
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/sem_var.ts b/src/stats/sem_var.ts
new file mode 100644
index 00000000..9e1604dd
--- /dev/null
+++ b/src/stats/sem_var.ts
@@ -0,0 +1,283 @@
+/**
+ * sem_var — sample/population variance and standard error of the mean for
+ * Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.var(ddof?, skipna?, min_count?)` — variance
+ * - `pandas.Series.sem(ddof?, skipna?, min_count?)` — standard error of mean
+ * - `pandas.DataFrame.var(axis?, ddof?, skipna?, numeric_only?)`
+ * - `pandas.DataFrame.sem(axis?, ddof?, skipna?, numeric_only?)`
+ *
+ * `ddof` (degrees of freedom delta):
+ * - `1` (default): sample variance — divides by `n - 1`
+ * - `0`: population variance — divides by `n`
+ *
+ * `skipna` (default `true`): ignore NaN/null values.
+ * `minCount` (default `1`): minimum number of valid observations required;
+ * returns `NaN` if fewer are present.
+ *
+ * SEM = sqrt(var / n) where var uses the given ddof.
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link varSeries} and {@link semSeries}. */
+export interface VarSemSeriesOptions {
+ /**
+ * Delta degrees of freedom. Divisor is `n - ddof`.
+ * @defaultValue `1`
+ */
+ readonly ddof?: number;
+ /**
+ * If `true` (default), exclude null/NaN values.
+ */
+ readonly skipna?: boolean;
+ /**
+ * Minimum number of non-null observations required. Returns `NaN` when
+ * fewer valid values are present.
+ * @defaultValue `1`
+ */
+ readonly minCount?: number;
+}
+
+/** Options for {@link varDataFrame} and {@link semDataFrame}. */
+export interface VarSemDataFrameOptions extends VarSemSeriesOptions {
+ /**
+ * Axis along which to compute.
+ * - `0` (default): reduce along rows, one result per column.
+ * - `1`: reduce along columns, one result per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true`, only include numeric columns when `axis=0`.
+ * @defaultValue `false`
+ */
+ readonly numericOnly?: boolean;
+}
+
+/** Internal callback type for variance/SEM reduction. */
+type StatFn = (xs: readonly number[], ddof: number, minCount: number) => number;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric. */
+function isNumericKind(kind: DtypeKind): boolean {
+ return kind === "int" || kind === "uint" || kind === "float";
+}
+
+/**
+ * Extract numeric values, respecting skipna and minCount.
+ * Returns an empty array when skipna=false and any missing value is present.
+ */
+function extractNumbers(values: readonly Scalar[], skipna: boolean): number[] {
+ const out: number[] = [];
+ for (const v of values) {
+ if (isMissing(v)) {
+ if (!skipna) {
+ return []; // NaN propagation when skipna=false
+ }
+ continue;
+ }
+ if (typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/**
+ * Compute sample/population variance.
+ *
+ * Returns `NaN` when fewer than `minCount` values are present, or when
+ * `n - ddof <= 0`.
+ */
+function computeVar(xs: readonly number[], ddof: number, minCount: number): number {
+ const n = xs.length;
+ if (n < minCount) {
+ return Number.NaN;
+ }
+ const denom = n - ddof;
+ if (denom <= 0) {
+ return Number.NaN;
+ }
+ let sum = 0;
+ for (const x of xs) {
+ sum += x;
+ }
+ const mean = sum / n;
+ let ss = 0;
+ for (const x of xs) {
+ const d = x - mean;
+ ss += d * d;
+ }
+ return ss / denom;
+}
+
+/**
+ * Compute standard error of the mean: sqrt(var(ddof) / n).
+ *
+ * Returns `NaN` when variance is `NaN` or n = 0.
+ */
+function computeSem(xs: readonly number[], ddof: number, minCount: number): number {
+ const n = xs.length;
+ if (n < minCount || n === 0) {
+ return Number.NaN;
+ }
+ const v = computeVar(xs, ddof, minCount);
+ if (Number.isNaN(v)) {
+ return Number.NaN;
+ }
+ return Math.sqrt(v / n);
+}
+
+// ─── Series reduction ─────────────────────────────────────────────────────────
+
+function reduceSeriesImpl(
+ series: Series,
+ options: VarSemSeriesOptions,
+ statFn: StatFn,
+): number {
+ const ddof = options.ddof ?? 1;
+ const skipna = options.skipna ?? true;
+ const minCount = options.minCount ?? 1;
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ return statFn(xs, ddof, minCount);
+}
+
+// ─── DataFrame reduction ──────────────────────────────────────────────────────
+
+/** Reduce each column of df to a scalar using statFn. */
+function reduceColumns(
+ df: DataFrame,
+ options: VarSemDataFrameOptions,
+ statFn: StatFn,
+): Series {
+ const ddof = options.ddof ?? 1;
+ const skipna = options.skipna ?? true;
+ const minCount = options.minCount ?? 1;
+ const numericOnly = options.numericOnly ?? false;
+
+ const colNames = df.columns.values as readonly string[];
+ const labels: string[] = [];
+ const values: number[] = [];
+
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (numericOnly && !isNumericKind(s.dtype.kind)) {
+ continue;
+ }
+ labels.push(col);
+ if (!isNumericKind(s.dtype.kind)) {
+ values.push(Number.NaN);
+ continue;
+ }
+ const xs = extractNumbers(s.values as readonly Scalar[], skipna);
+ values.push(statFn(xs, ddof, minCount));
+ }
+
+ return new Series({ data: values, index: labels });
+}
+
+/** Reduce each row of df to a scalar using statFn. */
+function reduceRows(
+ df: DataFrame,
+ options: VarSemDataFrameOptions,
+ statFn: StatFn,
+): Series {
+ const ddof = options.ddof ?? 1;
+ const skipna = options.skipna ?? true;
+ const minCount = options.minCount ?? 1;
+
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const values: number[] = [];
+
+ for (let r = 0; r < rowCount; r++) {
+ const rowVals: Scalar[] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (isNumericKind(s.dtype.kind)) {
+ rowVals.push(s.values[r] as Scalar);
+ }
+ }
+ const xs = extractNumbers(rowVals, skipna);
+ values.push(statFn(xs, ddof, minCount));
+ }
+
+ return new Series({ data: values, index: df.index });
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Return the variance of a numeric Series.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ * varSeries(s); // 4 (sample variance, ddof=1)
+ * varSeries(s, { ddof: 0 }); // 3.5 (population variance)
+ * ```
+ */
+export function varSeries(series: Series, options: VarSemSeriesOptions = {}): number {
+ return reduceSeriesImpl(series, options, computeVar);
+}
+
+/**
+ * Return the standard error of the mean (SEM) of a numeric Series.
+ *
+ * SEM = sqrt(var(ddof) / n) where n is the number of valid observations.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ * semSeries(s); // sqrt(4 / 8) = 0.707...
+ * ```
+ */
+export function semSeries(series: Series, options: VarSemSeriesOptions = {}): number {
+ return reduceSeriesImpl(series, options, computeSem);
+}
+
+/**
+ * Return the variance of each column (`axis=0`, default) or each row
+ * (`axis=1`) of a DataFrame as a numeric Series.
+ *
+ * Non-numeric columns without `numericOnly` contribute `NaN` to the result.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * varDataFrame(df); // Series { a: 1, b: 1 }
+ * ```
+ */
+export function varDataFrame(df: DataFrame, options: VarSemDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ return axis === 0 ? reduceColumns(df, options, computeVar) : reduceRows(df, options, computeVar);
+}
+
+/**
+ * Return the standard error of the mean for each column (`axis=0`, default)
+ * or each row (`axis=1`) of a DataFrame as a numeric Series.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * semDataFrame(df); // Series { a: sqrt(1/3), b: sqrt(1/3) }
+ * ```
+ */
+export function semDataFrame(df: DataFrame, options: VarSemDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ return axis === 0 ? reduceColumns(df, options, computeSem) : reduceRows(df, options, computeSem);
+}
diff --git a/src/stats/skew_kurt.ts b/src/stats/skew_kurt.ts
new file mode 100644
index 00000000..bcfd614e
--- /dev/null
+++ b/src/stats/skew_kurt.ts
@@ -0,0 +1,324 @@
+/**
+ * skew_kurt — skewness and excess kurtosis for Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.skew(skipna?, min_count?)` — Fisher–Pearson coefficient
+ * - `pandas.Series.kurt(skipna?)` — Fisher's definition of excess kurtosis
+ * - `pandas.DataFrame.skew(axis?, skipna?, numeric_only?)`
+ * - `pandas.DataFrame.kurt(axis?, skipna?, numeric_only?)`
+ *
+ * Formulas follow pandas defaults:
+ * - Skewness: adjusted Fisher–Pearson (unbiased, n/(n-1)/(n-2) correction)
+ * - Kurtosis: excess kurtosis (subtract 3) with pandas' bias-correction factor
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link skewSeries} and {@link kurtSeries}. */
+export interface SkewKurtSeriesOptions {
+ /**
+ * If `true` (default), exclude null/NaN values before computing.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link skewDataFrame} and {@link kurtDataFrame}. */
+export interface SkewKurtDataFrameOptions {
+ /**
+ * Axis along which to compute.
+ * - `0` (default): reduce along rows, one result per column.
+ * - `1`: reduce along columns, one result per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true` (default), exclude null/NaN values.
+ */
+ readonly skipna?: boolean;
+ /**
+ * If `true`, only include numeric columns (when `axis=0`).
+ * @defaultValue `false`
+ */
+ readonly numericOnly?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric (int, uint, or float). */
+function isNumericKind(kind: DtypeKind): boolean {
+ if (kind === "int") {
+ return true;
+ }
+ if (kind === "uint") {
+ return true;
+ }
+ if (kind === "float") {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Extract finite numeric values from a Scalar array, optionally skipping
+ * missing values.
+ */
+function extractNumbers(values: readonly Scalar[], skipna: boolean): number[] {
+ const out: number[] = [];
+ for (const v of values) {
+ if (isMissing(v)) {
+ if (!skipna) {
+ return []; // presence of NaN propagates as NaN
+ }
+ continue;
+ }
+ if (typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/**
+ * Compute the adjusted Fisher–Pearson skewness coefficient (unbiased).
+ *
+ * Formula (same as pandas):
+ * G1 = n / ((n-1)(n-2)) * sum((x - mean)^3) / std_sample^3
+ *
+ * Returns `NaN` when `n < 3` or std is 0.
+ */
+function computeSkewness(xs: readonly number[]): number {
+ const n = xs.length;
+ if (n < 3) {
+ return Number.NaN;
+ }
+
+ let sum = 0;
+ for (const x of xs) {
+ sum += x;
+ }
+ const mean = sum / n;
+
+ let m2 = 0;
+ let m3 = 0;
+ for (const x of xs) {
+ const d = x - mean;
+ m2 += d * d;
+ m3 += d * d * d;
+ }
+
+ const variance = m2 / (n - 1); // sample variance
+ const std = Math.sqrt(variance);
+
+ if (std === 0) {
+ return Number.NaN;
+ }
+
+ const skew = (n / ((n - 1) * (n - 2))) * (m3 / (std * std * std));
+ return skew;
+}
+
+/**
+ * Compute excess kurtosis with pandas' bias-correction factor.
+ *
+ * Formula (same as pandas):
+ * G2 = n(n+1)/((n-1)(n-2)(n-3)) * sum((x-mean)^4) / s_sample^4
+ * - 3(n-1)^2 / ((n-2)(n-3))
+ * where s_sample^2 = sum((x-mean)^2) / (n-1)
+ *
+ * Returns `NaN` when `n < 4` or sample variance is 0.
+ */
+function computeKurtosis(xs: readonly number[]): number {
+ const n = xs.length;
+ if (n < 4) {
+ return Number.NaN;
+ }
+
+ let sum = 0;
+ for (const x of xs) {
+ sum += x;
+ }
+ const mean = sum / n;
+
+ let m2 = 0;
+ let m4 = 0;
+ for (const x of xs) {
+ const d = x - mean;
+ const d2 = d * d;
+ m2 += d2;
+ m4 += d2 * d2;
+ }
+
+ // Sample variance (denominator n-1)
+ const sampleVar = m2 / (n - 1);
+
+ if (sampleVar === 0) {
+ return Number.NaN;
+ }
+
+ const a = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3));
+ const b = m4 / (sampleVar * sampleVar);
+ const c = (3 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3));
+
+ return a * b - c;
+}
+
+// ─── public API — Series ──────────────────────────────────────────────────────
+
+/**
+ * Return the adjusted Fisher–Pearson skewness of a numeric Series.
+ *
+ * Returns `NaN` when fewer than 3 non-null values are present or the
+ * standard deviation is zero.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 100] });
+ * skewSeries(s); // approx 2.02
+ * ```
+ */
+export function skewSeries(
+ series: Series,
+ options: SkewKurtSeriesOptions = {},
+): number {
+ const skipna = options.skipna ?? true;
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ return computeSkewness(xs);
+}
+
+/**
+ * Return the excess kurtosis (Fisher's definition, bias-corrected) of a
+ * numeric Series.
+ *
+ * Returns `NaN` when fewer than 4 non-null values are present or the
+ * standard deviation is zero.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 2, 3, 3, 3, 4, 4, 5] });
+ * kurtSeries(s); // approx -0.44
+ * ```
+ */
+export function kurtSeries(
+ series: Series,
+ options: SkewKurtSeriesOptions = {},
+): number {
+ const skipna = options.skipna ?? true;
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ return computeKurtosis(xs);
+}
+
+// ─── public API — DataFrame ───────────────────────────────────────────────────
+
+/**
+ * Return the skewness of each column (axis=0) or each row (axis=1) of a
+ * DataFrame as a numeric Series.
+ *
+ * Non-numeric columns are omitted when `axis=0`. When `axis=1`, only
+ * numeric values in each row contribute.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 8, 16] });
+ * skewDataFrame(df); // Series with index ["a","b"]
+ * ```
+ */
+export function skewDataFrame(
+ df: DataFrame,
+ options: SkewKurtDataFrameOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const numericOnly = options.numericOnly ?? false;
+
+ if (axis === 0) {
+ return reduceColumns(df, numericOnly, skipna, computeSkewness);
+ }
+ return reduceRows(df, skipna, computeSkewness);
+}
+
+/**
+ * Return the excess kurtosis of each column (axis=0) or each row (axis=1)
+ * of a DataFrame as a numeric Series.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [1, 1, 8, 8] });
+ * kurtDataFrame(df); // Series with index ["a","b"]
+ * ```
+ */
+export function kurtDataFrame(
+ df: DataFrame,
+ options: SkewKurtDataFrameOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const numericOnly = options.numericOnly ?? false;
+
+ if (axis === 0) {
+ return reduceColumns(df, numericOnly, skipna, computeKurtosis);
+ }
+ return reduceRows(df, skipna, computeKurtosis);
+}
+
+/** Reduce each numeric column to a single number using the given statistic. */
+function reduceColumns(
+ df: DataFrame,
+ numericOnly: boolean,
+ skipna: boolean,
+ statFn: (xs: readonly number[]) => number,
+): Series {
+ const colNames = df.columns.values as readonly string[];
+ const labels: string[] = [];
+ const values: number[] = [];
+
+ for (const col of colNames) {
+ const series = df.col(col);
+ if (numericOnly && !isNumericKind(series.dtype.kind)) {
+ continue;
+ }
+ if (!isNumericKind(series.dtype.kind)) {
+ continue;
+ }
+ labels.push(col);
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ values.push(statFn(xs));
+ }
+
+ return new Series({ data: values, index: labels });
+}
+
+/** Reduce each row to a single number using the given statistic. */
+function reduceRows(
+ df: DataFrame,
+ skipna: boolean,
+ statFn: (xs: readonly number[]) => number,
+): Series {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const values: number[] = [];
+
+ for (let r = 0; r < rowCount; r++) {
+ const rowVals: Scalar[] = [];
+ for (const col of colNames) {
+ const series = df.col(col);
+ if (isNumericKind(series.dtype.kind)) {
+ rowVals.push(series.values[r] as Scalar);
+ }
+ }
+ const xs = extractNumbers(rowVals, skipna);
+ values.push(statFn(xs));
+ }
+
+ return new Series({ data: values, index: df.index });
+}
diff --git a/src/stats/string_ops.ts b/src/stats/string_ops.ts
new file mode 100644
index 00000000..4a6a8ba8
--- /dev/null
+++ b/src/stats/string_ops.ts
@@ -0,0 +1,468 @@
+/**
+ * string_ops — standalone string operation functions for Series and arrays.
+ *
+ * Provides string transformation utilities that work on `Series`,
+ * `string[]`, and scalar strings. These complement the `StringAccessor`
+ * class by offering module-level functions that do not require the `.str`
+ * accessor pattern.
+ *
+ * Functions mirror pandas `str` accessor methods that are either missing from
+ * the accessor or better expressed as pure standalone utilities:
+ *
+ * - `strNormalize` — Unicode normalization (NFC / NFD / NFKC / NFKD)
+ * - `strGetDummies` — split strings by delimiter → one-hot DataFrame
+ * - `strExtractAll` — extract ALL regex matches per element
+ * - `strRemovePrefix` — remove a leading prefix
+ * - `strRemoveSuffix` — remove a trailing suffix
+ * - `strTranslate` — character-level substitution via a mapping
+ * - `strCharWidth` — display width (accounts for CJK full-width characters)
+ * - `strByteLength` — UTF-8 encoded byte length
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Unicode normalization form. */
+export type NormalizeForm = "NFC" | "NFD" | "NFKC" | "NFKD";
+
+/** Input accepted by all string-op functions. */
+export type StrInput = Series | readonly string[] | string;
+
+/** Options for {@link strGetDummies}. */
+export interface StrGetDummiesOptions {
+ /**
+ * The delimiter used to split each element into tokens.
+ * @default "|"
+ */
+ readonly sep?: string;
+
+ /**
+ * Prefix prepended to every column name in the output DataFrame.
+ * @default ""
+ */
+ readonly prefix?: string;
+
+ /**
+ * Separator between the prefix and the token name.
+ * @default "_"
+ */
+ readonly prefixSep?: string;
+}
+
+/** Options for {@link strExtractAll}. */
+export interface ExtractAllOptions {
+ /**
+ * RegExp flags used when `pat` is supplied as a plain string.
+ * The `g` flag is always added internally — you do not need to include it.
+ * @default ""
+ */
+ readonly flags?: string;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Extract a plain string from a Scalar value; returns `""` for non-strings. */
+function scalarToStr(v: Scalar): string {
+ if (typeof v === "string") return v;
+ if (v === null || v === undefined) return "";
+ return String(v);
+}
+
+/**
+ * Normalise the input to a `string[]`.
+ * Scalars are wrapped in a single-element array.
+ */
+function toStringArray(input: StrInput): string[] {
+ if (typeof input === "string") return [input];
+ if (input instanceof Series) {
+ return input.values.map(scalarToStr);
+ }
+ return input.map(scalarToStr);
+}
+
+/**
+ * Build an output `Series` whose index mirrors the input.
+ * - `Series` → copy the input index
+ * - `string[]` → default `RangeIndex`
+ * - `string` → default `RangeIndex` of length 1
+ */
+function buildSeries(data: Scalar[], input: StrInput): Series {
+ if (input instanceof Series) {
+ return new Series({ data, index: input.index });
+ }
+ return new Series({ data });
+}
+
+// ─── strNormalize ─────────────────────────────────────────────────────────────
+
+/**
+ * Apply Unicode normalization to every element.
+ *
+ * Mirrors `pandas.Series.str.normalize(form)`.
+ *
+ * @param input - Input data (Series, string array, or scalar string).
+ * @param form - One of `"NFC"` (default), `"NFD"`, `"NFKC"`, or `"NFKD"`.
+ * @returns A new `Series` (or scalar string) with normalised values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["\u00e9", "caf\u0065\u0301"] });
+ * strNormalize(s, "NFC");
+ * // Series ["é", "café"] (both now NFC)
+ * ```
+ */
+export function strNormalize(input: string, form?: NormalizeForm): string;
+export function strNormalize(
+ input: readonly string[] | Series,
+ form?: NormalizeForm,
+): Series;
+export function strNormalize(
+ input: StrInput,
+ form: NormalizeForm = "NFC",
+): Series | string {
+ if (typeof input === "string") return input.normalize(form);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) => s.normalize(form));
+ return buildSeries(data, input);
+}
+
+// ─── strGetDummies ────────────────────────────────────────────────────────────
+
+/**
+ * Encode each string element as a row in a one-hot DataFrame by splitting on a
+ * delimiter.
+ *
+ * Mirrors `pandas.Series.str.get_dummies(sep)`.
+ *
+ * @param input - Series or string array.
+ * @param options - `sep` (default `"|"`), `prefix` and `prefixSep` for column names.
+ * @returns A `DataFrame` of 0/1 integer values, one column per unique token.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["a|b", "b|c", "a"] });
+ * strGetDummies(s);
+ * // DataFrame
+ * // a b c
+ * // 0 1 1 0
+ * // 1 0 1 1
+ * // 2 1 0 0
+ * ```
+ */
+export function strGetDummies(
+ input: readonly string[] | Series,
+ options: StrGetDummiesOptions = {},
+): DataFrame {
+ const sep = options.sep ?? "|";
+ const prefix = options.prefix ?? "";
+ const prefixSep = options.prefixSep ?? "_";
+
+ const strs = toStringArray(input);
+
+ // 1. Collect all unique tokens in first-seen order.
+ const seen = new Set();
+ const tokenRows: string[][] = strs.map((s) => {
+ const tokens = s === "" ? [] : s.split(sep);
+ tokens.forEach((t) => seen.add(t));
+ return tokens;
+ });
+
+ const allTokens = [...seen].sort(); // stable alphabetical order
+
+ // 2. Build column name with optional prefix.
+ const colName = (token: string): string =>
+ prefix === "" ? token : `${prefix}${prefixSep}${token}`;
+
+ // 3. Build one Scalar[] per column.
+ const columns: Record = {};
+ for (const token of allTokens) {
+ const name = colName(token);
+ columns[name] = tokenRows.map((row) => (row.includes(token) ? 1 : 0));
+ }
+
+ // 4. Preserve the row index from a Series input.
+ if (input instanceof Series) {
+ const rowIndex = input.index;
+ return DataFrame.fromColumns(columns, { index: rowIndex });
+ }
+ return DataFrame.fromColumns(columns);
+}
+
+// ─── strExtractAll ────────────────────────────────────────────────────────────
+
+/**
+ * Extract ALL non-overlapping regex matches from every element.
+ *
+ * Each element maps to an array of match arrays (one inner array per match;
+ * each inner array contains the full match and any capture groups).
+ *
+ * Mirrors `pandas.Series.str.extractall(pat)`, but returns a
+ * `Series` rather than a multi-indexed DataFrame to avoid
+ * the overhead of MultiIndex construction.
+ *
+ * @param input - Series or string array.
+ * @param pat - Regular expression (string or `RegExp`).
+ * @param options - Optional flags when `pat` is a string.
+ * @returns A `Series` whose values are `string[][]` (an array of match arrays).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["abc 123", "foo 456 bar 789"] });
+ * strExtractAll(s, /(\d+)/);
+ * // Series [
+ * // [["123", "123"]],
+ * // [["456", "456"], ["789", "789"]],
+ * // ]
+ * ```
+ */
+export function strExtractAll(
+ input: readonly string[] | Series,
+ pat: string | RegExp,
+ options: ExtractAllOptions = {},
+): Series {
+ const strs = toStringArray(input);
+ const flags =
+ pat instanceof RegExp
+ ? pat.flags.includes("g")
+ ? pat.flags
+ : `${pat.flags}g`
+ : `${options.flags ?? ""}g`;
+ const source = pat instanceof RegExp ? pat.source : pat;
+ const re = new RegExp(source, flags);
+
+ const data: Scalar[] = strs.map((s) => {
+ const matches: string[][] = [];
+ let m: RegExpExecArray | null;
+ re.lastIndex = 0;
+ while ((m = re.exec(s)) !== null) {
+ matches.push([...m]);
+ if (!re.global) break;
+ }
+ // Store as JSON string so it fits in Scalar; consumers can JSON.parse
+ return JSON.stringify(matches);
+ });
+
+ return buildSeries(data, input);
+}
+
+// ─── strRemovePrefix ──────────────────────────────────────────────────────────
+
+/**
+ * Remove a leading prefix from each element (only if the element starts with it).
+ *
+ * Mirrors Python 3.9+ `str.removeprefix()` and can be used as a pandas
+ * equivalent via `df["col"].str.removeprefix(prefix)`.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param prefix - Prefix to remove.
+ * @returns A new Series (or scalar string) with the prefix stripped where present.
+ *
+ * @example
+ * ```ts
+ * strRemovePrefix(["prefix_a", "prefix_b", "other"], "prefix_");
+ * // Series ["a", "b", "other"]
+ * ```
+ */
+export function strRemovePrefix(input: string, prefix: string): string;
+export function strRemovePrefix(
+ input: readonly string[] | Series,
+ prefix: string,
+): Series;
+export function strRemovePrefix(
+ input: StrInput,
+ prefix: string,
+): Series | string {
+ if (typeof input === "string") {
+ return input.startsWith(prefix) ? input.slice(prefix.length) : input;
+ }
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) =>
+ s.startsWith(prefix) ? s.slice(prefix.length) : s,
+ );
+ return buildSeries(data, input);
+}
+
+// ─── strRemoveSuffix ──────────────────────────────────────────────────────────
+
+/**
+ * Remove a trailing suffix from each element (only if the element ends with it).
+ *
+ * Mirrors Python 3.9+ `str.removesuffix()`.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param suffix - Suffix to remove.
+ * @returns A new Series (or scalar string) with the suffix stripped where present.
+ *
+ * @example
+ * ```ts
+ * strRemoveSuffix(["hello_suffix", "world_suffix", "test"], "_suffix");
+ * // Series ["hello", "world", "test"]
+ * ```
+ */
+export function strRemoveSuffix(input: string, suffix: string): string;
+export function strRemoveSuffix(
+ input: readonly string[] | Series,
+ suffix: string,
+): Series;
+export function strRemoveSuffix(
+ input: StrInput,
+ suffix: string,
+): Series | string {
+ if (typeof input === "string") {
+ return input.endsWith(suffix) ? input.slice(0, input.length - suffix.length) : input;
+ }
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) =>
+ s.endsWith(suffix) ? s.slice(0, s.length - suffix.length) : s,
+ );
+ return buildSeries(data, input);
+}
+
+// ─── strTranslate ─────────────────────────────────────────────────────────────
+
+/**
+ * Translate characters in each element according to a mapping.
+ *
+ * Works like Python's `str.translate(table)`, where a `Map`
+ * maps single characters to their replacements (`null` means delete).
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param table - Map from single source characters to replacement strings or
+ * `null` (to delete the character).
+ * @returns A new Series (or scalar string) with characters replaced.
+ *
+ * @example
+ * ```ts
+ * const t = new Map([["a", "A"], ["e", null]]);
+ * strTranslate(["cafe", "bale"], t);
+ * // Series ["cAf", "bAl"]
+ * ```
+ */
+export function strTranslate(input: string, table: ReadonlyMap): string;
+export function strTranslate(
+ input: readonly string[] | Series,
+ table: ReadonlyMap,
+): Series;
+export function strTranslate(
+ input: StrInput,
+ table: ReadonlyMap,
+): Series | string {
+ const translate = (s: string): string => {
+ let result = "";
+ for (const ch of s) {
+ if (table.has(ch)) {
+ const repl = table.get(ch);
+ if (repl !== null && repl !== undefined) result += repl;
+ // null → delete: skip
+ } else {
+ result += ch;
+ }
+ }
+ return result;
+ };
+
+ if (typeof input === "string") return translate(input);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map(translate);
+ return buildSeries(data, input);
+}
+
+// ─── strCharWidth ─────────────────────────────────────────────────────────────
+
+/**
+ * Compute the *display width* of each element, counting CJK (Chinese/Japanese/
+ * Korean) and other full-width characters as 2 columns.
+ *
+ * This is useful when formatting text tables that mix ASCII and East-Asian
+ * scripts.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @returns A new `Series` of numbers (or a number for scalar input).
+ *
+ * @example
+ * ```ts
+ * strCharWidth("hello"); // 5
+ * strCharWidth("こんにちは"); // 10
+ * ```
+ */
+export function strCharWidth(input: string): number;
+export function strCharWidth(
+ input: readonly string[] | Series,
+): Series;
+export function strCharWidth(
+ input: StrInput,
+): Series | number {
+ const width = (s: string): number => {
+ let w = 0;
+ for (const ch of s) {
+ const cp = ch.codePointAt(0) ?? 0;
+ // Full-width and CJK ranges (simplified but covers the common cases)
+ if (
+ (cp >= 0x1100 && cp <= 0x115f) || // Hangul Jamo
+ (cp >= 0x2e80 && cp <= 0x303e) || // CJK Radicals, Kangxi
+ (cp >= 0x3041 && cp <= 0x33ff) || // Hiragana, Katakana, CJK
+ (cp >= 0x3400 && cp <= 0x4dbf) || // CJK Extension A
+ (cp >= 0x4e00 && cp <= 0xa4c6) || // CJK Unified + Yi
+ (cp >= 0xa960 && cp <= 0xa97c) || // Hangul Jamo Extended-A
+ (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
+ (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility
+ (cp >= 0xfe10 && cp <= 0xfe19) || // Vertical forms
+ (cp >= 0xfe30 && cp <= 0xfe6b) || // CJK Compatibility Forms
+ (cp >= 0xff01 && cp <= 0xff60) || // Halfwidth/Fullwidth
+ (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Signs
+ (cp >= 0x1b000 && cp <= 0x1b001) || // Kana Supplement
+ (cp >= 0x1f004 && cp <= 0x1f004) || // Mahjong tile
+ (cp >= 0x1f0cf && cp <= 0x1f0cf) || // Playing card
+ (cp >= 0x1f200 && cp <= 0x1f251) || // Enclosed CJK
+ (cp >= 0x20000 && cp <= 0x2fffd) || // CJK Extension B–F
+ (cp >= 0x30000 && cp <= 0x3fffd) // CJK Extension G
+ ) {
+ w += 2;
+ } else {
+ w += 1;
+ }
+ }
+ return w;
+ };
+
+ if (typeof input === "string") return width(input);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) => width(s));
+ return buildSeries(data, input);
+}
+
+// ─── strByteLength ────────────────────────────────────────────────────────────
+
+/**
+ * Compute the UTF-8 encoded byte length of each element.
+ *
+ * Useful when working with byte-limited APIs (HTTP headers, database columns)
+ * where the character count alone is insufficient.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @returns A new `Series` of numbers (or a number for scalar input).
+ *
+ * @example
+ * ```ts
+ * strByteLength("hello"); // 5
+ * strByteLength("こんにちは"); // 15 (3 bytes per character)
+ * ```
+ */
+export function strByteLength(input: string): number;
+export function strByteLength(
+ input: readonly string[] | Series,
+): Series;
+export function strByteLength(
+ input: StrInput,
+): Series | number {
+ const byteLen = (s: string): number => new TextEncoder().encode(s).length;
+
+ if (typeof input === "string") return byteLen(input);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) => byteLen(s));
+ return buildSeries(data, input);
+}
diff --git a/src/stats/string_ops_extended.ts b/src/stats/string_ops_extended.ts
new file mode 100644
index 00000000..ee8cb3b2
--- /dev/null
+++ b/src/stats/string_ops_extended.ts
@@ -0,0 +1,429 @@
+/**
+ * string_ops_extended — extended standalone string operations.
+ *
+ * Provides advanced string manipulation utilities that complement
+ * `string_ops.ts` and `StringAccessor`:
+ *
+ * - `strSplitExpand` — split strings by delimiter and expand each part into
+ * a DataFrame column (mirrors `str.split(expand=True)`)
+ * - `strExtractGroups` — extract regex capture groups into a DataFrame
+ * (mirrors `str.extract` with capture groups)
+ * - `strPartition` — split at first occurrence of sep → (before, sep, after)
+ * - `strRPartition` — split at last occurrence of sep → (before, sep, after)
+ * - `strMultiReplace` — apply multiple find/replace pairs in sequence
+ * - `strIndent` — prefix every (non-empty) line with a string
+ * - `strDedent` — remove common leading whitespace from all lines
+ *
+ * @module
+ */
+
+import { DataFrame, Index, RangeIndex, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+import type { StrInput } from "./string_ops.ts";
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+function toStrOrNull(v: Scalar): string | null {
+ if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+ return null;
+ }
+ return String(v);
+}
+
+function toValues(input: readonly Scalar[] | Series): readonly Scalar[] {
+ return input instanceof Series ? input.values : input;
+}
+
+function rowIndex(input: readonly Scalar[] | Series): Index {
+ return input instanceof Series ? input.index : new RangeIndex(toValues(input).length);
+}
+
+function escapeRegex(s: string): string {
+ return s.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
+}
+
+// ─── strSplitExpand ───────────────────────────────────────────────────────────
+
+/** Options for {@link strSplitExpand}. */
+export interface SplitExpandOptions {
+ /**
+ * Maximum number of splits to perform. `-1` means unlimited.
+ * @default -1
+ */
+ readonly n?: number;
+}
+
+/** Split a scalar string into an array of parts. */
+export function strSplitExpand(
+ input: string,
+ sep?: string | RegExp,
+ options?: SplitExpandOptions,
+): string[];
+/** Split each element and expand the parts into a DataFrame (one column per part). */
+export function strSplitExpand(
+ input: readonly Scalar[] | Series,
+ sep?: string | RegExp,
+ options?: SplitExpandOptions,
+): DataFrame;
+/** @internal */
+export function strSplitExpand(
+ input: string | readonly Scalar[] | Series,
+ sep: string | RegExp = " ",
+ options: SplitExpandOptions = {},
+): string[] | DataFrame {
+ const maxSplits = options.n ?? -1;
+
+ function splitOne(s: string | null): (string | null)[] {
+ if (s === null) return [null];
+ if (maxSplits < 0) {
+ // unlimited splits
+ const pat = sep instanceof RegExp ? sep : new RegExp(escapeRegex(sep));
+ return s.split(pat);
+ }
+ // limited splits: extract up to maxSplits separators
+ const parts: string[] = [];
+ let rest = s;
+ for (let i = 0; i < maxSplits; i++) {
+ let idx: number;
+ let sepLen: number;
+ if (typeof sep === "string") {
+ idx = rest.indexOf(sep);
+ sepLen = sep.length;
+ } else {
+ const m = rest.match(sep);
+ if (m === null || m.index === undefined) break;
+ idx = m.index;
+ sepLen = m[0]?.length ?? 0;
+ }
+ if (idx === -1) break;
+ parts.push(rest.slice(0, idx));
+ rest = rest.slice(idx + sepLen);
+ }
+ parts.push(rest);
+ return parts;
+ }
+
+ if (typeof input === "string") {
+ return splitOne(input) as string[];
+ }
+
+ const vals = toValues(input);
+ const rows: (string | null)[][] = vals.map((v) => splitOne(toStrOrNull(v)));
+
+ // determine column width (maximum number of parts in any row)
+ const width = rows.reduce((m, r) => Math.max(m, r.length), 0);
+
+ const columns: Record = {};
+ for (let c = 0; c < width; c++) {
+ const col: Scalar[] = rows.map((r) => {
+ const cell = r[c];
+ return cell !== undefined ? cell : null;
+ });
+ columns[String(c)] = col;
+ }
+
+ return DataFrame.fromColumns(columns, { index: rowIndex(input) });
+}
+
+// ─── strExtractGroups ─────────────────────────────────────────────────────────
+
+/** Options for {@link strExtractGroups}. */
+export interface ExtractGroupsOptions {
+ /** Additional regex flags to merge with any flags already on a RegExp pattern. */
+ readonly flags?: string;
+}
+
+/**
+ * Extract regex capture groups from each element into a DataFrame.
+ *
+ * One column is created per capture group. Named groups (`(?