diff --git a/playground/api_types.html b/playground/api_types.html
new file mode 100644
index 00000000..05b8347c
--- /dev/null
+++ b/playground/api_types.html
@@ -0,0 +1,222 @@
+
+
+
+
+
+ tsb β api_types: Runtime type-checking predicates
+
+
+
+ π¦ api_types β Runtime type-checking predicates
+
+ Port of pandas.api.types .
+ Two groups of predicates:
+ value-level (work on arbitrary JS values) and
+ dtype-level (work on Dtype instances or dtype name strings).
+
+
+ Value-Level Predicates
+
+ isScalar(val)
+ Returns true for primitives and Date. Mirrors pd.api.types.is_scalar.
+ import { isScalar } from "tsb";
+
+isScalar(42); // true
+isScalar("hello"); // true
+isScalar(null); // true
+isScalar(new Date()); // true
+isScalar([1, 2]); // false
+isScalar({ a: 1 }); // false
+
+
+ isListLike(val)
+ Returns true for iterables (excluding strings) and objects with a numeric length.
+ isListLike([1, 2, 3]); // true
+isListLike(new Set([1])); // true
+isListLike("abc"); // false
+isListLike(42); // false
+
+
+ isArrayLike(val)
+ Returns true for values with a non-negative integer length (including strings).
+ isArrayLike([1, 2]); // true
+isArrayLike("hello"); // true
+isArrayLike(42); // false
+
+ isDictLike(val)
+ Returns true for plain objects and Map.
+ isDictLike({ a: 1 }); // true
+isDictLike(new Map()); // true
+isDictLike([]); // false
+
+ isNumber / isBool / isStringValue / isFloat / isInteger
+ isNumber(3.14); // true
+isNumber(NaN); // true (typeof NaN === "number")
+isBool(true); // true
+isStringValue("hi"); // true
+isFloat(3.14); // true
+isFloat(3.0); // false (integer value)
+isInteger(42); // true
+isInteger(3.14); // false
+
+
+ isMissing(val)
+ Returns true for null, undefined, or NaN.
+ isMissing(null); // true
+isMissing(undefined); // true
+isMissing(NaN); // true
+isMissing(0); // false
+
+ isHashable(val)
+ Returns true for values safe to use as object keys (primitives).
+ isHashable("key"); // true
+isHashable(42); // true
+isHashable({}); // false
+
+ Dtype-Level Predicates
+ All accept a Dtype instance or a dtype name string.
+
+ import { Dtype, isNumericDtype, isFloatDtype, isIntegerDtype,
+ isStringDtype, isDatetimeDtype, isCategoricalDtype } from "tsb";
+
+isNumericDtype(Dtype.float64); // true
+isNumericDtype("int32"); // true
+isNumericDtype("string"); // false
+
+isFloatDtype("float32"); // true
+isIntegerDtype("int64"); // true
+isUnsignedIntegerDtype("uint8"); // true
+isSignedIntegerDtype("int8"); // true
+isStringDtype("string"); // true
+isDatetimeDtype("datetime"); // true
+isCategoricalDtype("category"); // true
+isObjectDtype("object"); // true
+isExtensionArrayDtype("category"); // true
+isExtensionArrayDtype("int32"); // false
+
+
+ Complete Predicate Reference
+
+ Function Pandas equivalent Description
+ isScalar(val)is_scalarPrimitive or Date
+ isListLike(val)is_list_likeIterable (not string) or has length
+ isArrayLike(val)is_array_likeHas non-negative integer length
+ isDictLike(val)is_dict_likePlain object or Map
+ isIterator(val)is_iteratorHas callable next method
+ isNumber(val)is_numbertypeof === "number"
+ isBool(val)is_booltypeof === "boolean"
+ isStringValue(val)is_stringtypeof === "string"
+ isFloat(val)is_floatFinite number with fractional part
+ isInteger(val)is_integerInteger-valued number
+ isBigInt(val)β typeof === "bigint"
+ isRegExp(val)is_reRegExp instance
+ isReCompilable(val)is_re_compilableString or RegExp
+ isMissing(val)isnanull / undefined / NaN
+ isHashable(val)is_hashableSafe as object key (primitive)
+ isDate(val)β Date instance
+ isNumericDtype(d)is_numeric_dtypeInt, uint, or float
+ isIntegerDtype(d)is_integer_dtypeAny integer (signed or unsigned)
+ isSignedIntegerDtype(d)is_signed_integer_dtypeint8βint64
+ isUnsignedIntegerDtype(d)is_unsigned_integer_dtypeuint8βuint64
+ isFloatDtype(d)is_float_dtypefloat32 or float64
+ isBoolDtype(d)is_bool_dtypebool
+ isStringDtype(d)is_string_dtypestring dtype
+ isDatetimeDtype(d)is_datetime64_dtypedatetime
+ isTimedeltaDtype(d)is_timedelta64_dtypetimedelta
+ isCategoricalDtype(d)is_categorical_dtypecategory
+ isObjectDtype(d)is_object_dtypeobject
+ isComplexDtype(d)is_complex_dtypeAlways false (no complex in tsb)
+ isExtensionArrayDtype(d)is_extension_array_dtypestring/object/datetime/timedelta/category
+ isPeriodDtype(d)is_period_dtypeMaps to datetime
+ isIntervalDtype(d)is_interval_dtypeNumeric dtypes
+
+
+
+
+
diff --git a/playground/attrs.html b/playground/attrs.html
new file mode 100644
index 00000000..ae25d5f6
--- /dev/null
+++ b/playground/attrs.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+ tsb β attrs: user-defined metadata
+
+
+
+ β tsb playground
+
+ attrs β User-Defined Metadata
+
+ Attach arbitrary keyβvalue metadata to any Series or DataFrame
+ β mirrors
+
+ pandas.DataFrame.attrs and
+
+ pandas.Series.attrs .
+
+
+
+ Design note: Because tsb objects are immutable (their data, index,
+ and dtype are frozen), attrs are stored in a WeakMap registry rather than as
+ instance properties. This means attrs are attached & detached without touching the object
+ itself, and garbage-collected automatically when the object is collected.
+
+
+ Basic usage
+
+ import {
+ getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs,
+ clearAttrs, hasAttrs, getAttr, setAttr, deleteAttr,
+ attrsCount, attrsKeys, mergeAttrs,
+} from "tsb";
+import { DataFrame, Series } from "tsb";
+
+// βββ annotate a DataFrame βββββββββββββββββββββββββββββββββββββββββββββββββ
+const df = DataFrame.fromColumns({
+ temperature: [22.1, 23.5, 21.8],
+ humidity: [55, 60, 58],
+});
+
+setAttrs(df, {
+ source: "weather_station_42",
+ unit: "Celsius",
+ notes: "Morning readings",
+});
+
+getAttrs(df);
+// β { source: "weather_station_42", unit: "Celsius", notes: "Morning readings" }
+
+getAttr(df, "unit"); // β "Celsius"
+getAttr(df, "missing"); // β undefined
+attrsCount(df); // β 3
+attrsKeys(df); // β ["source", "unit", "notes"]
+hasAttrs(df); // β true
+
+
+ Merging and updating
+
+ // updateAttrs merges new keys, preserves existing
+updateAttrs(df, { version: 2, notes: "Updated notes" });
+getAttrs(df);
+// β { source: "weather_station_42", unit: "Celsius", notes: "Updated notes", version: 2 }
+
+// setAttr / deleteAttr for single keys
+setAttr(df, "sensor_id", "WS-042");
+deleteAttr(df, "notes");
+getAttrs(df);
+// β { source: "weather_station_42", unit: "Celsius", version: 2, sensor_id: "WS-042" }
+
+
+ Propagating metadata to derived objects
+
+ // copyAttrs: copy all attrs from one object to another
+const s = new Series({ data: [22.1, 23.5, 21.8], name: "temperature" });
+setAttrs(s, { unit: "Celsius", source: "sensor_A" });
+
+const derived = new Series({ data: [71.8, 74.3, 71.2], name: "fahrenheit" });
+copyAttrs(s, derived);
+getAttrs(derived);
+// β { unit: "Celsius", source: "sensor_A" }
+
+// Then update the copy
+setAttr(derived, "unit", "Fahrenheit");
+getAttrs(derived); // β { unit: "Fahrenheit", source: "sensor_A" }
+getAttrs(s); // β { unit: "Celsius", source: "sensor_A" } β unchanged
+
+
+ Fluent helper β withAttrs
+
+ // withAttrs sets attrs and returns the same object reference
+// Handy for inline annotation
+const annotated = withAttrs(
+ DataFrame.fromColumns({ x: [1, 2, 3] }),
+ { source: "lab_experiment", date: "2026-04-09" },
+);
+
+annotated === annotated; // true β same reference, not a copy
+getAttrs(annotated);
+// β { source: "lab_experiment", date: "2026-04-09" }
+
+
+ Merging from multiple sources
+
+ // mergeAttrs: combine attrs from multiple objects into a target
+const s1 = new Series({ data: [1, 2, 3], name: "a" });
+const s2 = new Series({ data: [4, 5, 6], name: "b" });
+setAttrs(s1, { source: "sensor_A", unit: "kg" });
+setAttrs(s2, { source: "sensor_B", scale: 2.5 });
+
+const combined = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+mergeAttrs([s1, s2], combined);
+// Later sources win on conflicts: source="sensor_B"
+getAttrs(combined);
+// β { source: "sensor_B", unit: "kg", scale: 2.5 }
+
+
+ Clearing metadata
+
+ setAttrs(df, { x: 1, y: 2 });
+hasAttrs(df); // β true
+attrsCount(df); // β 2
+
+clearAttrs(df);
+hasAttrs(df); // β false
+getAttrs(df); // β {}
+
+
+ API reference
+
+
+
+ Function Description
+
+
+ getAttrs(obj) Return a shallow copy of all stored attrs (empty {} if none)
+ setAttrs(obj, attrs) Overwrite attrs completely with the given record
+ updateAttrs(obj, updates) Merge updates into existing attrs (existing keys preserved)
+ withAttrs(obj, attrs) Fluent: set attrs and return the same object
+ copyAttrs(source, target) Copy all attrs from source to target
+ mergeAttrs(sources[], target) Merge attrs from multiple sources; later sources win
+ clearAttrs(obj) Remove all attrs from obj
+ hasAttrs(obj) Return true if any attrs are set
+ getAttr(obj, key) Get a single attr value (undefined if missing)
+ setAttr(obj, key, value) Set a single attr, preserving other keys
+ deleteAttr(obj, key) Delete a single attr key
+ attrsCount(obj) Number of stored attr keys
+ attrsKeys(obj) Array of stored attr key names
+
+
+
+ Comparison with pandas
+
+
+
+ pandas tsb
+
+
+ df.attrsgetAttrs(df)
+ df.attrs = {"k": "v"}setAttrs(df, { k: "v" })
+ df.attrs["k"] = "v"setAttr(df, "k", "v")
+ df.attrs["k"]getAttr(df, "k")
+ del df.attrs["k"]deleteAttr(df, "k")
+ df.attrs.update(d)updateAttrs(df, d)
+ df.attrs.clear()clearAttrs(df)
+
+
+
+
diff --git a/playground/categorical_ops.html b/playground/categorical_ops.html
new file mode 100644
index 00000000..c2d794ca
--- /dev/null
+++ b/playground/categorical_ops.html
@@ -0,0 +1,338 @@
+
+
+
+
+
+ tsb β Categorical Ops
+
+
+
+
+
+
Loading tsb runtimeβ¦
+
+
+β back to index
+π·οΈ Categorical Ops
+
+ Standalone categorical utility functions that complement the Series.cat accessor.
+ Mirrors pd.Categorical.from_codes, set operations on categories, frequency helpers,
+ and cross-tabulation.
+
+
+
+
+
catFromCodes(codes, categories, opts?)
+
+ Construct a categorical Series from integer codes (0-based) and a categories array.
+ Code -1 maps to null (missing). Mirrors
+ pd.Categorical.from_codes.
+
+
+
βΆ Run
+
+
+
+
+
+
Category set operations
+
+ catUnionCategories, catIntersectCategories,
+ catDiffCategories, and catEqualCategories let you
+ combine or compare the category sets of two Series.
+
+
+
βΆ Run
+
+
+
+
+
+
catSortByFreq(series, opts?)
+
+ Reorder categories by their frequency in the data (most frequent first by default).
+ Mirrors s.cat.reorder_categories(s.value_counts().index).
+
+
+
βΆ Run
+
+
+
+
+
+
catToOrdinal(series, order)
+
+ Create an ordered categorical from a Series using order to define both the
+ category set and their rank. Values not in order become null.
+
+
+
βΆ Run
+
+
+
+
+
+
catFreqTable(series)
+
+ Return a plain Record<string, number> of counts per category.
+ Zero-frequency categories are included.
+
+
+
βΆ Run
+
+
+
+
+
+
catCrossTab(a, b, opts?)
+
+ Cross-tabulation of two categorical Series. Rows = a's categories,
+ columns = b's categories, cells = co-occurrence counts.
+ Supports margins and normalization.
+
+
+
βΆ Run
+
+
+
+
+
+
catRecode(series, mapping)
+
+ Rename categories via an object map or a transform function. Unmapped categories
+ are left unchanged.
+
+
+
βΆ Run
+
+
+
+
+
+
+
diff --git a/playground/cut_qcut.html b/playground/cut_qcut.html
new file mode 100644
index 00000000..1d273a17
--- /dev/null
+++ b/playground/cut_qcut.html
@@ -0,0 +1,163 @@
+
+
+
+
+
+ tsb β cut / qcut: Binning Continuous Data
+
+
+
+ tsb β cut / qcut: Binning Continuous Data
+
+ cut and qcut partition continuous numeric values into
+ discrete intervals β the TypeScript equivalents of
+ pandas.cut
+ and
+ pandas.qcut .
+
+
+ 1. cut β Fixed-Width Binning
+
+ Bin values into equal-width (or user-specified) intervals.
+ Pass an integer for automatic bins, or an explicit edge array.
+
+
+ Integer bins
+ import { cut } from "tsb";
+
+const ages = [5, 18, 25, 35, 50, 70];
+const { codes, labels, bins } = cut(ages, 3);
+
+// labels: ["(5.0, 26.7]", "(26.7, 48.3]", "(48.3, 70.0]"]
+// bins: [4.935, 26.667, 48.333, 70]
+// codes: [0, 0, 0, 1, 1, 2]
+console.table(ages.map((a, i) => ({ age: a, bin: labels[codes[i]!] })));
+
+
+ Explicit bin edges
+ const scores = [55, 65, 72, 80, 91, 98];
+const { codes, labels } = cut(scores, [0, 60, 70, 80, 90, 100], {
+ labels: ["F", "D", "C", "B", "A"],
+ include_lowest: true,
+});
+// codes: [0, 1, 2, 3, 4, 4]
+// labels[codes[0]] β "F"
+// labels[codes[5]] β "A"
+
+
+ Options
+
+ Option Default Description
+
+ righttrueIntervals closed on right: (a, b]. Set false for [a, b).
+ include_lowestfalseMake lowest interval left-closed: [a, b].
+ labelsauto Custom string labels, or false for integer codes.
+ precision3Decimal places in auto-generated labels.
+ duplicates"raise""drop" to silently remove duplicate bin edges.
+
+
+
+ 2. qcut β Quantile-Based Binning
+
+ Divide values into bins of (approximately) equal population using quantiles.
+ Useful for creating percentile buckets or roughly equal-sized groups.
+
+
+ Quartile split
+ import { qcut } from "tsb";
+
+const values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+const { codes, labels, bins } = qcut(values, 4);
+
+// labels: ["[1, 3.25]", "(3.25, 5.5]", "(5.5, 7.75]", "(7.75, 10]"]
+// Every bin has ~2-3 elements
+
+
+ Custom quantile probabilities
+ const { labels } = qcut(values, [0, 0.1, 0.5, 0.9, 1], {
+ labels: ["bottom 10%", "lower middle", "upper middle", "top 10%"],
+});
+
+
+ Decile labels
+ const { codes } = qcut(data, 10, { labels: false });
+// codes[i] is 0..9 β the decile bucket index
+
+
+ 3. Return Value: BinResult
+ interface BinResult {
+ codes: ReadonlyArray<number | null>; // bin index per value; null for NaN
+ labels: readonly string[]; // ordered label per bin
+ bins: readonly number[]; // bin edge array (labels.length + 1)
+}
+
+
+
+ Missing values : NaN and Infinity are
+ assigned null in the codes array and are never placed
+ in a bin.
+
+
+ 4. cut vs qcut
+
+ cutqcut
+
+ Bin width Equal (uniform edges) Varies (equal population)
+ Bin count Determined by bins Determined by q
+ Best for Meaningful thresholds (age groups, grade bands) Percentile buckets, rank-based analysis
+ Left edge of first bin Open ( unless include_lowest Always closed [
+
+
+
+ 5. pandas Compatibility
+ # Python pandas
+pd.cut([1, 2, 3, 4, 5], 2)
+# Interval(0.996, 3.0, closed='right') ...
+
+# tsb equivalent
+cut([1, 2, 3, 4, 5], 2)
+// codes: [0, 0, 0, 1, 1]
+// labels: ["(0.996, 3.0]", "(3.0, 5.0]"]
+
+
+
+ Both cut and qcut follow pandas semantics exactly:
+ right-closed by default, linear interpolation for quantiles, and duplicate-edge
+ handling via duplicates.
+
+
+ β Back to tsb feature index
+
+
diff --git a/playground/format_ops.html b/playground/format_ops.html
new file mode 100644
index 00000000..d72fd1ec
--- /dev/null
+++ b/playground/format_ops.html
@@ -0,0 +1,262 @@
+
+
+
+
+
+ tsb β format_ops: Number Formatting
+
+
+
+π’ format_ops β Number Formatting
+
+ tsb provides a suite of number-formatting helpers that mirror pandas'
+ style.format() and Series.map() patterns.
+ Every function is zero-dependency and fully typed.
+
+β Back to index
+
+Scalar formatters
+
+
+ Function Example input Example output Notes
+
+ formatFloat(n, d)3.14159, 2"3.14"Fixed decimal places
+ formatPercent(n, d)0.1234, 1"12.3%"Multiplies by 100
+ formatScientific(n, d)12345.678, 3"1.235e+4"Exponential notation
+ formatEngineering(n, d)12345.678, 3"12.346e+3"Exponent multiple of 3
+ formatThousands(n, d, sep)1234567.89, 2"1,234,567.89"Thousands separator
+ formatCurrency(n, sym, d)1234.5, "$""$1,234.50"Currency prefix + thousands
+ formatCompact(n, d)1_234_567, 2"1.23M"K / M / B / T suffixes
+
+
+
+Interactive demo β scalar formatting
+
+
Number:
+
Format:
+
+ formatFloat(n, decimals)
+ formatPercent(n, decimals)
+ formatScientific(n, decimals)
+ formatEngineering(n, decimals)
+ formatThousands(n, decimals)
+ formatCurrency(n, "$", decimals)
+ formatCompact(n, decimals)
+
+
+
Decimals:
+
Format
+
+
+
+Formatter factories
+import {
+ makeFloatFormatter,
+ makePercentFormatter,
+ makeCurrencyFormatter,
+} from "tsb";
+
+const fmtFloat = makeFloatFormatter(3); // (v) => formatFloat(v, 3)
+const fmtPct = makePercentFormatter(1); // (v) => formatPercent(v, 1)
+const fmtDollar = makeCurrencyFormatter("$"); // (v) => formatCurrency(v, "$", 2)
+
+fmtFloat(3.14159); // "3.142"
+fmtPct(0.0825); // "8.3%"
+fmtDollar(9999.99); // "$9,999.99"
+
+
+Apply to a Series
+import { Series, applySeriesFormatter, makePercentFormatter } from "tsb";
+
+const returns = new Series({ data: [0.05, -0.02, 0.134, 0.007], name: "returns" });
+
+const formatted = applySeriesFormatter(returns, makePercentFormatter(1));
+// Series<string> ["5.0%", "-2.0%", "13.4%", "0.7%"]
+
+
+Apply to a DataFrame
+import { DataFrame, applyDataFrameFormatter, makeCurrencyFormatter, makePercentFormatter } from "tsb";
+
+const df = DataFrame.fromColumns({
+ price: [1_299.99, 899.50, 45.00],
+ change: [0.025, -0.031, 0.102],
+ volume: [15_000, 8_200, 230_000],
+});
+
+const formatted = applyDataFrameFormatter(df, {
+ price: makeCurrencyFormatter("$", 2),
+ change: makePercentFormatter(2),
+});
+
+// formatted = {
+// price: ["$1,299.99", "$899.50", "$45.00"],
+// change: ["2.50%", "-3.10%", "10.20%"],
+// volume: ["15000", "8200", "230000"], // no formatter β String(v)
+// }
+
+
+Interactive demo β DataFrame formatting
+
+
Run DataFrame example
+
+
+
+String rendering
+import { Series, DataFrame, seriesToString, dataFrameToString, makeFloatFormatter } from "tsb";
+
+const s = new Series({ data: [1.2, 3.4, 5.6], name: "value" });
+console.log(seriesToString(s, { formatter: makeFloatFormatter(1) }));
+// 0 1.2
+// 1 3.4
+// 2 5.6
+// Name: value, dtype: float64
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] });
+console.log(dataFrameToString(df));
+// a b
+// 0 1 4.0
+// 1 2 5.0
+// 2 3 6.0
+
+
+Interactive demo β seriesToString / dataFrameToString
+
+
Run toString example
+
+
+
+
+
+
diff --git a/playground/index.html b/playground/index.html
index 48bfbcb9..4f78bf7a 100644
--- a/playground/index.html
+++ b/playground/index.html
@@ -264,6 +264,75 @@
+ Function Pandas equivalent Description
+
+
+ insertColumn(df, loc, col, values)
+ df.insert(loc, col, value)
+ Insert a new column at integer position loc
+
+
+ popColumn(df, col)
+ df.pop(col)
+ Remove a column; returns { series, df }
+
+
+ reorderColumns(df, order)
+ df[order]
+ Reorder (and optionally subset) columns
+
+
+ moveColumn(df, col, newLoc)
+ β
+ Move an existing column to a new integer position
+
+
+
+
+ Example 1 β insertColumn
+ import { DataFrame, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Carol"],
+ age: [30, 25, 35],
+});
+// columns: ["name", "age"]
+
+// Insert "city" between "name" and "age"
+const df2 = insertColumn(df, 1, "city", ["NY", "LA", "SF"]);
+// df2.columns.values β ["name", "city", "age"]
+// df2.col("city").values β ["NY", "LA", "SF"]
+
+// Original is unchanged
+// df.columns.values β ["name", "age"]
+
+
+ Example 2 β Insert with a Series
+ import { DataFrame, Series, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const salary = new Series({ data: [100_000, 90_000, 120_000], name: "salary" });
+
+const df2 = insertColumn(df, 0, "salary", salary);
+// df2.columns.values β ["salary", "a", "b"]
+
+
+ Example 3 β popColumn
+ import { DataFrame, popColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: [1, 2, 3],
+ name: ["Alice", "Bob", "Carol"],
+ age: [30, 25, 35],
+});
+
+// Remove "age" and keep the Series
+const { series: ageSeries, df: df2 } = popColumn(df, "age");
+// ageSeries.values β [30, 25, 35]
+// df2.columns.values β ["id", "name"]
+// df.columns.values β ["id", "name", "age"] β original unchanged
+
+
+ Example 4 β reorderColumns
+ import { DataFrame, reorderColumns } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3], d: [4] });
+
+// Reverse the column order
+const df2 = reorderColumns(df, ["d", "c", "b", "a"]);
+// df2.columns.values β ["d", "c", "b", "a"]
+
+// Select a subset (drops columns not listed)
+const df3 = reorderColumns(df, ["a", "c"]);
+// df3.columns.values β ["a", "c"] (b and d are dropped)
+
+
+ Example 5 β moveColumn
+ import { DataFrame, moveColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+ year: [2020, 2021, 2022],
+ value: [10, 20, 30],
+ label: ["a", "b", "c"],
+});
+// columns: ["year", "value", "label"]
+
+// Move "label" to the front
+const df2 = moveColumn(df, "label", 0);
+// df2.columns.values β ["label", "year", "value"]
+
+
+ Error cases
+ // Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// β RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// β RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]); // df has 3 rows
+// β RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// β RangeError: Column "missing" not found in DataFrame.
+
+
+
+ Immutability: Like all tsb DataFrame operations, these functions never
+ mutate the original DataFrame. Always assign the return value to a new variable.
+
+
+ pandas equivalence table
+
+
+ pandas tsb
+
+
+ df.insert(1, "x", [1,2,3]) *(mutates)*insertColumn(df, 1, "x", [1,2,3])
+ series = df.pop("col") *(mutates)*const { series, df: df2 } = popColumn(df, "col")
+ df[["c","a","b"]]reorderColumns(df, ["c","a","b"])
+
+
+
+
diff --git a/playground/notna_isna.html b/playground/notna_isna.html
new file mode 100644
index 00000000..8002a5d9
--- /dev/null
+++ b/playground/notna_isna.html
@@ -0,0 +1,242 @@
+
+
+
+
+
+ tsb Β· isna / notna β Missing Value Detection
+
+
+
+β Back to tsb playground
+
+isna / notna
+Module-level missing-value detection β mirrors pd.isna(), pd.notna(), pd.isnull(), pd.notnull() from pandas.
+
+What is "missing"?
+In tsb, the following values are considered missing:
+
+ null
+ undefined
+ NaN (IEEE 754 not-a-number)
+
+Everything else β 0, false, "", new Date(...) β is not missing.
+
+API Overview
+
+ Function Input Output Pandas equivalent
+ isna(v) Scalar boolean pd.isna(v)
+ isna(arr) Scalar[] boolean[] pd.isna(arr)
+ isna(series) Series Series<boolean> pd.isna(series)
+ isna(df) DataFrame DataFrame pd.isna(df)
+ notna(v) any of above same shape, inverted pd.notna(v)
+ isnull / notnull any of above same as isna/notna aliases
+ fillna(obj, {value}) Scalar/array/Series/DataFrame same type, no missing pd.Series.fillna()
+ dropna(obj, opts?) array/Series/DataFrame missing entries removed pd.Series.dropna()
+ countna(obj) array or Series number series.isna().sum()
+ countValid(obj) array or Series number series.count()
+
+
+π¬ Try it: isna on scalars
+
+
Test values (comma-separated, use "null", "NaN", "undefined")
+
+
Run isna
+
Click "Run isna" to see results.
+
+
+π¬ Try it: isna on arrays
+
+
Array values (JSON array, use null for missing, "NaN" string for NaN)
+
+
Run isna
+
Click "Run isna" to see results.
+
+
+π¬ Try it: fillna on arrays
+
+
Array (JSON, use null for missing)
+
+
Fill value
+
+
Run fillna
+
Click "Run fillna" to see results.
+
+
+π¬ Try it: dropna on arrays
+
+
Array (JSON, use null for missing)
+
+
Run dropna
+
Click "Run dropna" to see results.
+
+
+π Code examples
+
+
+import { isna, notna, isnull, notnull, fillna, dropna, countna, countValid } from "tsb";
+import { Series, DataFrame } from "tsb";
+
+// ββ scalar ββββββββββββββββββββββββββββββββββββββββββββββββββ
+isna(null); // true
+isna(undefined); // true
+isna(NaN); // true
+isna(0); // false β zero is not missing
+isna(false); // false β false is not missing
+isna(""); // false β empty string is not missing
+
+// ββ array βββββββββββββββββββββββββββββββββββββββββββββββββββ
+isna([1, null, NaN, 3]); // [false, true, true, false]
+notna([1, null, NaN, 3]); // [true, false, false, true]
+
+// ββ Series ββββββββββββββββββββββββββββββββββββββββββββββββββ
+const s = new Series({ data: [1, null, NaN, 4] });
+isna(s).values; // [false, true, true, false]
+notna(s).values; // [true, false, false, true]
+
+// ββ DataFrame βββββββββββββββββββββββββββββββββββββββββββββββ
+const df = new DataFrame(new Map([
+ ["a", new Series({ data: [1, null, 3] })],
+ ["b", new Series({ data: [NaN, 5, 6] })],
+]));
+isna(df).col("a").values; // [false, true, false]
+isna(df).col("b").values; // [true, false, false]
+
+// ββ aliases βββββββββββββββββββββββββββββββββββββββββββββββββ
+isnull(null); // true (same as isna)
+notnull(42); // true (same as notna)
+
+// ββ fillna ββββββββββββββββββββββββββββββββββββββββββββββββββ
+fillna([1, null, NaN, 4], { value: 0 }); // [1, 0, 0, 4]
+fillna(s, { value: -1 }).values; // [1, -1, -1, 4]
+fillna(df, { value: 0 }).col("b").values; // [0, 5, 6]
+
+// ββ dropna ββββββββββββββββββββββββββββββββββββββββββββββββββ
+dropna([1, null, NaN, 3]); // [1, 3]
+dropna(s).values; // [1, 4]
+dropna(df).shape; // [2, 2] (row 0 dropped because b[0]=NaN, row 1 dropped because a[1]=null)
+dropna(df, { how: "all" }).shape; // drops only rows where ALL values are missing
+dropna(df, { axis: 1 }).columns.values; // drops columns that contain any missing value
+
+// ββ countna / countValid βββββββββββββββββββββββββββββββββββββ
+countna([1, null, NaN, 3]); // 2
+countValid([1, null, NaN, 3]); // 2
+
+
+
+
+
+
diff --git a/playground/numeric_extended.html b/playground/numeric_extended.html
new file mode 100644
index 00000000..14cc4990
--- /dev/null
+++ b/playground/numeric_extended.html
@@ -0,0 +1,353 @@
+
+
+
+
+
+ tsb β Numeric Utilities (digitize, histogram, linspace, arange, zscoreβ¦)
+
+
+
+ π’ Numeric Utilities
+
+ β back to index
+
+
+ tsb ships numpy/scipy-style numeric utility functions β all implemented
+ from scratch with no external dependencies:
+ digitize, histogram, linspace, arange,
+ percentileOfScore, zscore, minMaxNormalize,
+ coefficientOfVariation.
+
+
+
+
digitize β bin values
+
+ Map each value to the index of the bin it falls into. Mirrors numpy.digitize.
+ Indices are 0-based; values below the first edge return -1.
+
+
import { digitize, seriesDigitize, Series } from "tsb";
+
+// Find which [0,33), [33,66), [66,100] bucket each score belongs to
+const scores = [15, 45, 70, 33, 100];
+const edges = [33, 66, 100];
+
+const bins = digitize(scores, edges);
+// β [-1, 1, 2, 0, 2]
+// 15 < 33 β bin -1 (below first edge)
+// 45 β [33,66) β bin 1
+// 70 β [66,100)β bin 2
+// 33 β [33,66) β bin 0 (33 < 66, right=false default)
+// 100 = last β bin 2
+
+// Series version β preserves index
+const s = new Series({ data: [15, 45, 70], index: ["Alice","Bob","Carol"] });
+seriesDigitize(s, [33, 66, 100]);
+// Series: Aliceβ-1, Bobβ1, Carolβ2
+
Runningβ¦
+
+
+
+
histogram β frequency counts
+
Count how many values fall in each bin. Mirrors numpy.histogram.
+
import { histogram } from "tsb";
+
+const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+// Default: 10 equal-width bins
+const { counts, binEdges } = histogram(data);
+
+// Custom: 5 bins, density normalised
+const { counts: d, binEdges: e } = histogram(data, { bins: 5, density: true });
+
+// Explicit edges
+histogram(data, { binEdges: [1, 4, 7, 10] });
+// counts: [ 3, 3, 4 ]
+
Runningβ¦
+
+
+
+
linspace & arange β number sequences
+
Generate evenly-spaced sequences, mirroring numpy.linspace and numpy.arange.
+
import { linspace, arange } from "tsb";
+
+// 5 values from 0 to 1 (inclusive)
+linspace(0, 1, 5);
+// β [0, 0.25, 0.5, 0.75, 1]
+
+// 0..4
+arange(5);
+// β [0, 1, 2, 3, 4]
+
+// From 2 to 10, step 2
+arange(2, 10, 2);
+// β [2, 4, 6, 8]
+
+// Descending
+arange(5, 0, -1);
+// β [5, 4, 3, 2, 1]
+
Runningβ¦
+
+
+
+
percentileOfScore β percentile rank
+
+ Compute what percentile a given score falls at within a dataset.
+ Mirrors scipy.stats.percentileofscore.
+
+
import { percentileOfScore } from "tsb";
+
+const grades = [55, 60, 70, 75, 80, 85, 90, 95];
+
+// What percentile is a score of 75?
+percentileOfScore(grades, 75); // 50 (rank β default)
+percentileOfScore(grades, 75, "weak"); // 50 (β€ 75: 4/8 = 50%)
+percentileOfScore(grades, 75, "strict"); // 37.5 (< 75: 3/8 = 37.5%)
+
Runningβ¦
+
+
+
+
zscore β standardisation
+
+ Transform values to zero mean and unit variance. Mirrors scipy.stats.zscore.
+ Missing values are propagated; zero-variance data returns all NaN.
+
+
import { zscore, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "values" });
+const z = zscore(s);
+
+// z.values β [-1.5, -0.5, -0.5, -0.5, 0, 0, 1, 2]
+
+// With population std (ddof=0)
+const zPop = zscore(s, { ddof: 0 });
+
Runningβ¦
+
+
+
+
minMaxNormalize β scale to [0, 1]
+
+ Scale all values to the interval [0, 1] (or a custom range).
+ Mirrors sklearn MinMaxScaler.
+
+
import { minMaxNormalize, Series } from "tsb";
+
+const s = new Series({ data: [0, 25, 50, 75, 100] });
+minMaxNormalize(s).values;
+// β [0, 0.25, 0.5, 0.75, 1]
+
+// Scale to [-1, 1]
+minMaxNormalize(s, { featureRangeMin: -1, featureRangeMax: 1 }).values;
+// β [-1, -0.5, 0, 0.5, 1]
+
Runningβ¦
+
+
+
+
coefficientOfVariation β relative spread
+
+ Dimensionless measure of dispersion: std / |mean|.
+ Useful for comparing spread across datasets with different units.
+
+
import { coefficientOfVariation, Series } from "tsb";
+
+// Dataset A: [10, 20, 30] mean=20, std=10 β CV=0.5
+coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+
+// Dataset B: [100, 200, 300] same shape, higher scale β CV=0.5
+coefficientOfVariation(new Series({ data: [100, 200, 300] }));
+
+// CV with population std
+coefficientOfVariation(new Series({ data: [1, 2, 3, 4, 5] }), { ddof: 0 });
+
Runningβ¦
+
+
+
+
+
diff --git a/playground/pipe_apply.html b/playground/pipe_apply.html
new file mode 100644
index 00000000..25f10a21
--- /dev/null
+++ b/playground/pipe_apply.html
@@ -0,0 +1,276 @@
+
+
+
+
+
+ tsb β pipe_apply: functional pipeline & apply utilities
+
+
+
+ β tsb playground
+
+ pipe_apply β Functional Pipeline & Apply Utilities
+
+ Standalone equivalents of the pandas
+ DataFrame.pipe()
+ /
+ Series.pipe()
+ chaining pattern plus various
+ apply()
+ /
+ applymap()
+ operations β usable without method-call syntax.
+
+
+
+ Why standalone? pandas chains operations via methods:
+ df.pipe(fn1).pipe(fn2). tsb provides a module-level
+ pipe(value, fn1, fn2, β¦) that works on any value,
+ not just DataFrames. All functions are pure β inputs are never mutated.
+
+
+ API Summary
+
+
+ Function Pandas equivalent Description
+
+
+
+ pipe(value, fn1, fn2, β¦)
+ df.pipe(fn).pipe(fn2)
+ Variadic type-safe pipeline β passes value through fns left-to-right
+
+
+ seriesApply(s, fn)
+ s.apply(fn)
+ Element-wise; fn receives (value, label, position)
+
+
+ seriesTransform(s, fn)
+ s.transform(fn)
+ Element-wise scalarβscalar; simpler than seriesApply
+
+
+ dataFrameApply(df, fn, axis?)
+ df.apply(fn, axis=0|1)
+ Apply fn to each column (axis=0) or row (axis=1) β Series of results
+
+
+ dataFrameApplyMap(df, fn)
+ df.applymap(fn) / df.map(fn)
+ Apply fn to every cell; fn receives (value, rowLabel, colName)
+
+
+ dataFrameTransform(df, fn)
+ df.transform(fn)
+ Replace each column with fn(col) β must return same-length Series
+
+
+ dataFrameTransformRows(df, fn)
+ df.apply(fn, axis=1, result_type='expand')
+ Replace each row with fn(rowRecord) β partial updates allowed
+
+
+
+
+ pipe β functional pipeline
+
+ import { pipe } from "tsb";
+import { DataFrame } from "tsb";
+
+// Type-safe pipeline with up to 8 steps (return type inferred at each step)
+const result = pipe(
+ rawData,
+ (df) => df.dropna(), // DataFrame β DataFrame
+ (df) => df.assign({ z: df.col("x").add(df.col("y")).values }), // DataFrame β DataFrame
+ (df) => df.head(10), // DataFrame β DataFrame
+ (df) => df.sum(), // DataFrame β Series
+);
+
+// Works on any value β not just DataFrames
+const n = pipe(
+ 3,
+ (x) => x + 1, // 4
+ (x) => x * x, // 16
+ (x) => x - 1, // 15
+);
+// n === 15
+
+ seriesApply β element-wise apply
+
+ import { seriesApply, seriesTransform } from "tsb";
+import { Series } from "tsb";
+
+const temps = new Series({ data: [22.1, 23.5, null, 21.8], name: "temp_C" });
+
+// Element-wise with (value, label, position) context
+const fahrenheit = seriesApply(temps, (v) => v === null ? null : (v as number) * 9/5 + 32);
+// [71.78, 74.3, null, 71.24]
+
+// Simple scalar transform (no label/position needed)
+const rounded = seriesTransform(temps, (v) => v === null ? null : Math.round(v as number));
+// [22, 24, null, 22]
+
+// Using position to build cumulative logic
+const withPos = seriesApply(
+ new Series({ data: [10, 20, 30] }),
+ (v, _label, pos) => (v as number) + pos * 100,
+);
+// [10, 120, 230]
+
+ dataFrameApply β column/row aggregation
+
+ import { dataFrameApply } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ score: [85, 92, 78, 95],
+ weight: [1.0, 1.2, 0.8, 1.5],
+});
+
+// axis=0 (default): apply fn to each column β Series indexed by column names
+const colMax = dataFrameApply(df, (col) => col.max() ?? null);
+// colMax.at("score") === 95
+// colMax.at("weight") === 1.5
+
+// axis=1: apply fn to each row β Series indexed by row labels
+const weightedScore = dataFrameApply(
+ df,
+ (row) => (row.at("score") as number) * (row.at("weight") as number),
+ 1,
+);
+// [85, 110.4, 62.4, 142.5]
+
+ dataFrameApplyMap β element-wise cell transform
+
+ import { dataFrameApplyMap } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, -2, 3],
+ b: [-4, 5, -6],
+});
+
+// Zero out all negative values (like pandas df.applymap(lambda x: max(x, 0)))
+const clipped = dataFrameApplyMap(df, (v) => {
+ return typeof v === "number" && v < 0 ? 0 : v;
+});
+// a: [1, 0, 3]
+// b: [0, 5, 0]
+
+// fn receives full context: (value, rowLabel, colName)
+const tagged = dataFrameApplyMap(df, (v, row, col) => `${col}[${row}]=${v}`);
+// a: ["a[0]=1", "a[1]=-2", "a[2]=3"]
+// b: ["b[0]=-4", "b[1]=5", "b[2]=-6"]
+
+ dataFrameTransform β column-wise transform
+
+ import { dataFrameTransform, seriesTransform } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ x: [1, 2, 3, 4, 5],
+ y: [10, 20, 30, 40, 50],
+});
+
+// Z-score normalize each column
+const normalized = dataFrameTransform(df, (col) => {
+ const mu = col.mean();
+ const sd = col.std();
+ return seriesTransform(col, (v) =>
+ typeof v === "number" && sd > 0 ? (v - mu) / sd : v
+ );
+});
+
+// Bin each column into quartiles
+const binned = dataFrameTransform(df, (col) => {
+ const q1 = col.quantile(0.25);
+ const q2 = col.quantile(0.5);
+ const q3 = col.quantile(0.75);
+ return seriesTransform(col, (v) => {
+ const n = v as number;
+ if (n <= q1) return "Q1";
+ if (n <= q2) return "Q2";
+ if (n <= q3) return "Q3";
+ return "Q4";
+ });
+});
+
+ dataFrameTransformRows β row-wise transform
+
+ import { dataFrameTransformRows } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ first: ["alice", "bob", "carol"],
+ last: ["smith", "jones", "white"],
+ score: [88, 75, 92],
+});
+
+// Normalise scores relative to the row's position (illustrative)
+const updated = dataFrameTransformRows(df, (row, _label, pos) => ({
+ // Only return keys you want to change β others are preserved as-is
+ score: (row["score"] as number) + pos,
+}));
+// scores become [88, 76, 94]
+// first and last columns are unchanged
+
+// Full row transformation (compute full name)
+const withFull = dataFrameTransformRows(df, (row) => ({
+ first: row["first"],
+ last: row["last"],
+ score: row["score"],
+ full: `${row["first"]} ${row["last"]}`,
+}));
+
+ Combining pipe + apply
+
+ import { pipe, dataFrameApplyMap, dataFrameTransform, seriesTransform } from "tsb";
+import { DataFrame } from "tsb";
+
+const raw = DataFrame.fromColumns({
+ price: [9.99, -1, 24.5, null, 49.0],
+ quantity: [3, 5, null, 2, 1],
+});
+
+// Clean β impute β normalise in one readable pipeline
+const clean = pipe(
+ raw,
+ // 1. zero out invalid prices/quantities
+ (df) => dataFrameApplyMap(df, (v) =>
+ v === null || (typeof v === "number" && v < 0) ? 0 : v
+ ),
+ // 2. add derived revenue column
+ (df) => df.assign({
+ revenue: df.col("price").mul(df.col("quantity")).values,
+ }),
+ // 3. round everything to 2 dp
+ (df) => dataFrameTransform(df, (col) =>
+ seriesTransform(col, (v) =>
+ typeof v === "number" ? Math.round(v * 100) / 100 : v
+ )
+ ),
+);
+
+
+
+ pandas DataFrame.pipe docs
+ Β·
+ pandas DataFrame.apply docs
+ Β·
+ tsb on GitHub
+
+
+
diff --git a/playground/rolling_apply.html b/playground/rolling_apply.html
new file mode 100644
index 00000000..b307cdbd
--- /dev/null
+++ b/playground/rolling_apply.html
@@ -0,0 +1,225 @@
+
+
+
+
+
+ tsb β Rolling Apply & Multi-Aggregation
+
+
+
+ tsb β Rolling Apply & Multi-Aggregation
+
+ Standalone functions for applying custom aggregation logic over sliding
+ windows, mirroring
+
+ pandas.Series.rolling().apply()
+
+ and
+
+ Rolling.agg()
+ .
+
+
+ 1. rollingApply β Custom Function Per Window
+
+ Apply any aggregation function to each rolling window. The function
+ receives the valid (non-null, non-NaN) numeric values
+ in the window and must return a single number.
+
+ import { rollingApply } from "tsb";
+
+const prices = new Series({ data: [10, 12, 11, 15, 14, 16], name: "price" });
+
+// Custom: range (max - min) over each 3-day window
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+rollingApply(prices, 3, range).toArray();
+// [null, null, 2, 4, 4, 5]
+// ββ insufficient data (need 3 observations)
+
+
+
Options
+
+
+ Option Default Description
+
+
+ minPeriodswindowMinimum valid observations to compute (null otherwise)
+ centerfalseCentre the window (symmetric) instead of trailing
+ rawfalsePass full window including nulls (filtered to valid nums before fn call)
+
+
+
+
+ // minPeriods=1 β start computing from the very first position
+rollingApply(prices, 3, range, { minPeriods: 1 }).toArray();
+// [0, 2, 2, 4, 4, 5]
+
+// center=true β symmetric window around each point
+rollingApply(prices, 3, range, { center: true }).toArray();
+// [null, 2, 4, 4, 5, null]
+
+ 2. rollingAgg β Multiple Aggregations at Once
+
+ Apply several named aggregation functions in a single pass over a Series,
+ returning a DataFrame where each column holds one
+ aggregation result.
+
+ import { rollingAgg } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8] });
+
+const result = rollingAgg(s, 3, {
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ max: (w) => Math.max(...w),
+ min: (w) => Math.min(...w),
+ range:(w) => Math.max(...w) - Math.min(...w),
+});
+
+// result is a DataFrame with columns: "mean", "max", "min", "range"
+// result.col("mean").toArray() β [null, null, 2, 3, 4, 5, 6, 7]
+// result.col("range").toArray() β [null, null, 2, 2, 2, 2, 2, 2]
+
+
+ Pandas equivalent:
+ s.rolling(3).agg({"mean": np.mean, "max": np.max, "min": np.min})
+
+
+ 3. dataFrameRollingApply β Apply Per Column
+
+ Apply a single custom function independently to each column of a
+ DataFrame, returning a new DataFrame of the same shape.
+
+ import { dataFrameRollingApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+ open: [100, 102, 101, 105, 103],
+ close: [101, 103, 100, 106, 104],
+});
+
+// Pairwise range within each 2-step window per column
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+dataFrameRollingApply(df, 2, range);
+// open close
+// 0 null null
+// 1 2 2
+// 2 1 3
+// 3 4 6
+// 4 2 2
+
+ 4. dataFrameRollingAgg β Multi-Agg Per Column
+
+ Apply multiple named aggregation functions to every column of a
+ DataFrame. The result has columns named
+ {originalColumn}_{aggName}.
+
+ import { dataFrameRollingAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+ A: [1, 2, 3, 4, 5],
+ B: [10, 20, 30, 40, 50],
+});
+
+const out = dataFrameRollingAgg(df, 3, {
+ sum: (w) => w.reduce((a, b) => a + b, 0),
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+});
+
+// Columns: "A_sum", "A_mean", "B_sum", "B_mean"
+// A_sum: [null, null, 6, 9, 12]
+// A_mean: [null, null, 2, 3, 4]
+// B_sum: [null, null, 60, 90, 120]
+// B_mean: [null, null, 20, 30, 40]
+
+ Comparison with pandas
+
+
+ tsb pandas
+
+
+
+ rollingApply(s, w, fn)
+ s.rolling(w).apply(fn, raw=True)
+
+
+ rollingApply(s, w, fn, {minPeriods:1})
+ s.rolling(w, min_periods=1).apply(fn)
+
+
+ rollingAgg(s, w, {f1, f2})
+ s.rolling(w).agg({"f1": f1, "f2": f2})
+
+
+ dataFrameRollingApply(df, w, fn)
+ df.rolling(w).apply(fn)
+
+
+ dataFrameRollingAgg(df, w, {f1, f2})
+ df.rolling(w).agg({"f1": f1, "f2": f2})
+
+
+
+
+ Use case: Bollinger Band width
+ import { rollingAgg } from "tsb";
+
+// Bollinger Band width = (upper - lower) / middle
+// where upper = mean + 2Β·std, lower = mean - 2Β·std
+const prices = new Series({
+ data: [20, 21, 22, 20, 19, 21, 23, 24, 22, 21],
+ name: "price",
+});
+
+const stats = rollingAgg(prices, 5, {
+ mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ std: (w) => {
+ const m = w.reduce((a, b) => a + b, 0) / w.length;
+ return Math.sqrt(w.reduce((a, b) => a + (b - m) ** 2, 0) / (w.length - 1));
+ },
+});
+
+// Bollinger Band width = 4 * std / mean
+const bw = stats.col("std").toArray().map((std, i) => {
+ const mean = stats.col("mean").toArray()[i];
+ if (std === null || mean === null || mean === 0) return null;
+ return (4 * (std as number)) / (mean as number);
+});
+
+
+ β Back to tsb playground index
+
+
+
diff --git a/playground/string_ops.html b/playground/string_ops.html
new file mode 100644
index 00000000..5d9fff07
--- /dev/null
+++ b/playground/string_ops.html
@@ -0,0 +1,282 @@
+
+
+
+
+
+ tsb β String Operations
+
+
+
+
+ tsb
+ string_ops
+ Standalone string operations for Series and arrays
+
+
+
+ string_ops provides module-level string functions that complement the
+ Series.str accessor. All functions accept a Series, a
+ string[], or a scalar string.
+
+
+
+
+
strNormalize β Unicode normalisation
+
Normalise every element to NFC, NFD, NFKC, or NFKD. Useful when mixing text
+ from different sources (e.g. macOS NFD vs Windows NFC).
+
+
+ Input strings (one per line)
+
+
+
+ Normalization form
+
+ NFC (compose)
+ NFD (decompose)
+ NFKC (compat compose)
+ NFKD (compat decompose)
+
+
+
+
Run
+
+
+
+
+
+
strGetDummies β one-hot encode by delimiter
+
Split each string by a delimiter and produce a binary indicator DataFrame β
+ one column per unique token. Equivalent to pandas.Series.str.get_dummies().
+
+
Run
+
+
+
+
+
+
strExtractAll β extract all regex matches
+
Find every non-overlapping regex match in each element. Returns a JSON-encoded
+ array of match arrays per element β parse with JSON.parse.
+
+
Run
+
+
+
+
+
+
strRemovePrefix / strRemoveSuffix
+
Strip a leading or trailing string from elements only when it is present.
+
+
Run
+
+
+
+
+
+
strTranslate β character-level substitution
+
Replace or delete individual characters using a lookup table.
+ Format: one mapping per line as from=to or from=
+ to delete.
+
+
+ Input strings (one per line)
+
+
+
+ Translation table (from=to, one per line)
+
+
+
+
Run
+
+
+
+
+
+
strCharWidth & strByteLength β display & byte widths
+
+ strCharWidth counts columns for terminal display (CJK chars count as 2).
+ strByteLength counts UTF-8 bytes (useful for byte-limited APIs).
+
+
Input strings (one per line)
+
+
Run
+
+
+
+
+
+
+
diff --git a/playground/string_ops_extended.html b/playground/string_ops_extended.html
new file mode 100644
index 00000000..81bdaddb
--- /dev/null
+++ b/playground/string_ops_extended.html
@@ -0,0 +1,413 @@
+
+
+
+
+
+ tsb β Extended String Operations
+
+
+
+
+ tsb
+ string_ops_extended
+ Advanced standalone string operations: split-expand, extract, partition, multi-replace, indent, dedent
+
+
+
+ string_ops_extended adds advanced string utilities that complement
+ string_ops and the Series.str accessor. All functions accept
+ a Series, an array, or a scalar string.
+
+
+
+
+
strSplitExpand β split and expand to DataFrame columns
+
+ Split each element by a delimiter and expand the parts into a DataFrame
+ with one column per position. Mirrors pandas.Series.str.split(expand=True).
+ Shorter rows are padded with null.
+
+
+
Run strSplitExpand
+
+
+
+
+
+
strExtractGroups β extract regex capture groups
+
+ Extract regex capture groups from each element into a DataFrame.
+ Named groups ((?<name>...)) become column names; unnamed groups
+ become 0, 1, β¦ Non-matching rows produce null.
+
+
+
+ Input strings (one per line)
+
+
+
+
Pattern (supports named groups)
+
+
Use (?<name>...) for named capture groups.
+
+
+
Run strExtractGroups
+
+
+
+
+
+
strPartition / strRPartition β split into (before, sep, after)
+
+ strPartition splits at the first occurrence of the separator;
+ strRPartition splits at the last . When the separator is not
+ found, strPartition returns [s, "", ""] and
+ strRPartition returns ["", "", s].
+
+
+
+ Input strings (one per line)
+
+
+
+ Separator
+
+
+
+
Run strPartition & strRPartition
+
+
+
+
+
+
strMultiReplace β apply multiple replacements in sequence
+
+ Apply an ordered list of {pat, repl} pairs to each element.
+ Each replacement is applied to the result of the previous one.
+ Patterns can be string literals (replaced globally) or RegExp objects.
+
+
+
+ Input strings (one per line)
+
+
+
+ Replacements (JSON array of {pat, repl} pairs)
+
+
+
+
Run strMultiReplace
+
+
+
+
+
+
strIndent / strDedent β line-level indentation utilities
+
+ strIndent adds a prefix to every non-empty line (mirrors
+ textwrap.indent).
+ strDedent removes the common leading whitespace from all lines
+ (mirrors textwrap.dedent).
+
+
+
+ Input text (use literal \n for newlines)
+
+
+
+ Indent prefix (for strIndent)
+
+
+
+
Run strIndent & strDedent
+
+
+
+
+
+
+
+
diff --git a/playground/to_from_dict.html b/playground/to_from_dict.html
new file mode 100644
index 00000000..a8ca3e88
--- /dev/null
+++ b/playground/to_from_dict.html
@@ -0,0 +1,122 @@
+
+
+
+
+
+ tsb β toDictOriented / fromDictOriented
+
+
+
+ β tsb playground
+
+ toDictOriented / fromDictOriented
+
+ Convert a DataFrame to and from dictionary structures with flexible orientation β mirrors
+
+ pandas.DataFrame.to_dict(orient=...) and
+
+ pandas.DataFrame.from_dict(orient=...) .
+
+
+ Supported orientations β toDictOriented
+
+ Orient Return type Description
+
+ "dict" / "columns"Record<col, Record<rowLabel, value>>Nested column β row-label β value map
+ "list"Record<col, value[]>Column name β array of values
+ "series"Record<col, Series>Column name β Series object
+ "split"{ index, columns, data }Serialisable split structure
+ "tight"{ index, columns, data, index_names, column_names }Split plus axis-name metadata
+ "records"Record<col, value>[]Array of row objects
+ "index"Record<rowLabel, Record<col, value>>Row-label β column β value
+
+
+
+ Supported orientations β fromDictOriented
+
+ Orient Input shape
+
+ "columns" (default){ col: value[] }
+ "index"{ rowLabel: { col: value } }
+ "split"{ index?, columns, data }
+ "tight"Same as "split", extra fields ignored
+
+
+
+ Example β all orientations
+ import { DataFrame } from "tsb";
+import { toDictOriented, fromDictOriented } from "tsb";
+
+const df = DataFrame.fromColumns(
+ { name: ["Alice", "Bob"], score: [92, 85] },
+ { index: new Index(["r0", "r1"]) },
+);
+
+// "dict" / "columns"
+toDictOriented(df, "dict");
+// { name: { r0: "Alice", r1: "Bob" }, score: { r0: 92, r1: 85 } }
+
+// "list"
+toDictOriented(df, "list");
+// { name: ["Alice", "Bob"], score: [92, 85] }
+
+// "records"
+toDictOriented(df, "records");
+// [ { name: "Alice", score: 92 }, { name: "Bob", score: 85 } ]
+
+// "split"
+toDictOriented(df, "split");
+// { index: ["r0", "r1"], columns: ["name", "score"], data: [["Alice", 92], ["Bob", 85]] }
+
+// "index"
+toDictOriented(df, "index");
+// { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } }
+
+// fromDictOriented β columns (default)
+fromDictOriented({ name: ["Alice", "Bob"], score: [92, 85] });
+
+// fromDictOriented β index
+fromDictOriented(
+ { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } },
+ "index",
+);
+
+// fromDictOriented β split (round-trip)
+const split = toDictOriented(df, "split");
+const df2 = fromDictOriented(split, "split");
+// df2 is equivalent to df
+
+
+ Missing values
+
+ Missing values (null / undefined) are preserved as null
+ in all orientations. When using fromDictOriented with "index"
+ orientation, any column that is absent from a given row object is filled with null.
+
+
+ Type signatures
+ function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record<string, Record<string, Scalar>>;
+function toDictOriented(df: DataFrame, orient: "list"): Record<string, Scalar[]>;
+function toDictOriented(df: DataFrame, orient: "series"): Record<string, Series<Scalar>>;
+function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+function toDictOriented(df: DataFrame, orient: "records"): Record<string, Scalar>[];
+function toDictOriented(df: DataFrame, orient: "index"): Record<string, Record<string, Scalar>>;
+
+function fromDictOriented(data: Record<string, readonly Scalar[]>, orient?: "columns"): DataFrame;
+function fromDictOriented(data: Record<string, Record<string, Scalar>>, orient: "index"): DataFrame;
+function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+
+
+
diff --git a/playground/where_mask.html b/playground/where_mask.html
new file mode 100644
index 00000000..89a50a05
--- /dev/null
+++ b/playground/where_mask.html
@@ -0,0 +1,220 @@
+
+
+
+
+
+ tsb β where / mask: Conditional Selection
+
+
+
+ tsb β where / mask: Conditional Selection
+
+ seriesWhere / seriesMask and their DataFrame equivalents
+ allow element-wise conditional replacement β the TypeScript equivalents of
+ pandas.Series.where
+ and
+ pandas.Series.mask .
+
+
+
+ Quick rule:
+ where(cond) β keep where cond is true , replace elsewhere.
+ mask(cond) β keep where cond is false , replace elsewhere.
+ They are exact inverses of each other.
+
+
+ 1. seriesWhere β Boolean Array Condition
+
+ Pass a boolean[] to keep values at true positions, replace
+ the rest with null (or a custom other value).
+
+ import { Series, seriesWhere } from "tsb";
+
+const scores = new Series({ data: [42, 91, 67, 55, 88] });
+const highScores = seriesWhere(scores, [false, true, false, false, true]);
+// Series [null, 91, null, null, 88]
+
+// Custom replacement value
+const clamped = seriesWhere(scores, [false, true, false, false, true], { other: 0 });
+// Series [0, 91, 0, 0, 88]
+
+ 2. seriesWhere β Callable Condition
+
+ Pass a function that receives the Series and returns a boolean[] or
+ Series<boolean>. This avoids computing the condition array manually.
+
+ import { Series, seriesWhere } from "tsb";
+
+const temps = new Series({ data: [-5, 12, 23, -3, 8] });
+
+// Keep only values above freezing
+const aboveFreezing = seriesWhere(
+ temps,
+ (s) => s.values.map((v) => (v as number) > 0),
+);
+// Series [null, 12, 23, null, 8]
+
+// Replace with 0 instead of null
+const noFreeze = seriesWhere(
+ temps,
+ (s) => s.values.map((v) => (v as number) > 0),
+ { other: 0 },
+);
+// Series [0, 12, 23, 0, 8]
+
+ 3. seriesMask β The Inverse
+
+ mask replaces positions where the condition is true
+ (the opposite of where). Use it to "blank out" outliers or invalid values.
+
+ import { Series, seriesMask } from "tsb";
+
+const data = new Series({ data: [1, 2, 3, 4, 5] });
+
+// Mask out values greater than 3
+const masked = seriesMask(
+ data,
+ (s) => s.values.map((v) => (v as number) > 3),
+ { other: null },
+);
+// Series [1, 2, 3, null, null]
+
+ 4. dataFrameWhere β Element-Wise on DataFrames
+
+ Pass a boolean DataFrame or a callable that returns one.
+ Columns and row labels are aligned by name.
+
+ import { DataFrame, dataFrameWhere } from "tsb";
+
+const df = DataFrame.fromColumns({
+ temp_c: [22, -3, 18, -7, 30],
+ humidity: [55, 80, 62, 75, 45],
+});
+
+// Keep only valid summer readings (temp > 0)
+const condDf = DataFrame.fromColumns({
+ temp_c: [true, false, true, false, true],
+ humidity: [true, false, true, false, true],
+});
+
+const summer = dataFrameWhere(df, condDf);
+// DataFrame:
+// temp_c [22, null, 18, null, 30 ]
+// humidity [55, null, 62, null, 45 ]
+
+ 5. dataFrameWhere β Callable Condition
+ import { DataFrame, dataFrameWhere } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3, 4, 5],
+ b: [10, 20, 30, 40, 50],
+});
+
+// Keep only values > 2 (column-wise threshold)
+const result = dataFrameWhere(df, (d) => {
+ const condCols: Record<string, boolean[]> = {};
+ for (const col of d.columns) {
+ condCols[col as string] = d.col(col as string).values.map(
+ (v) => (v as number) > 2
+ );
+ }
+ return DataFrame.fromColumns(condCols);
+});
+// DataFrame:
+// a: [null, null, 3, 4, 5]
+// b: [10, 20, 30, 40, 50]
+
+ 6. dataFrameMask β DataFrame Mask
+ import { DataFrame, dataFrameMask } from "tsb";
+
+const df = DataFrame.fromColumns({
+ sales: [100, 200, 50, 300, 80],
+ profit: [10, 40, -5, 60, -2],
+});
+
+// Mask out (replace) rows with negative profit
+const cleaned = dataFrameMask(
+ df,
+ (d) => {
+ const condCols: Record<string, boolean[]> = {};
+ for (const col of d.columns) {
+ condCols[col as string] = d.col(col as string).values.map(
+ (v) => (v as number) < 0
+ );
+ }
+ return DataFrame.fromColumns(condCols);
+ },
+ { other: 0 },
+);
+// DataFrame:
+// sales: [100, 200, 50, 300, 80]
+// profit: [10, 40, 0, 60, 0 ]
+
+ Label-Aligned Series Condition
+
+ When you pass a Series<boolean> as the condition, values are aligned
+ by label , not position. Labels absent from the condition series are treated
+ as false.
+
+ import { Series, seriesWhere } from "tsb";
+
+const prices = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+const valid = new Series<boolean>({ data: [false, true], index: ["a", "b"] });
+
+// Only "b" is in the condition with value=true; "a"=false, "c" missingβfalse
+const result = seriesWhere(prices, valid, { other: -1 });
+// Series { a: -1, b: 20, c: -1 }
+
+ API Reference
+
+ Function Keeps when cond is⦠Replaces with
+ seriesWhere(s, cond, {other})trueother (default null)
+ seriesMask(s, cond, {other})falseother (default null)
+ dataFrameWhere(df, cond, {other})trueother (default null)
+ dataFrameMask(df, cond, {other})falseother (default null)
+
+
+ Condition types
+
+ Type Series ops DataFrame ops
+ Boolean array β
positional β
+ Series<boolean>β
label-aligned β
+ DataFrame (boolean)β β
label-aligned
+ Callable β
receives Series β
receives DataFrame
+
+
+ β Back to tsb playground index
+
+
diff --git a/playground/wide_to_long.html b/playground/wide_to_long.html
new file mode 100644
index 00000000..b30980cd
--- /dev/null
+++ b/playground/wide_to_long.html
@@ -0,0 +1,113 @@
+
+
+
+
+
+ tsb β wideToLong
+
+
+
+ β tsb playground
+
+ wideToLong
+
+ Reshape a wide-format DataFrame to long format by collapsing stub-prefixed column
+ groups into rows β mirrors
+
+ pandas.wide_to_long() .
+
+
+ Concept
+
+ Given a wide DataFrame where repeated measurements are spread across columns with a
+ common stub prefix and a numeric (or other) suffix β e.g. score_2021,
+ score_2022 β wideToLong pivots those column groups into rows.
+ One row per original row per unique suffix is produced.
+
+
+ Example β numeric suffixes
+ import { DataFrame } from "tsb";
+import { wideToLong } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: ["x", "y"],
+ A1: [1, 2],
+ A2: [3, 4],
+ B1: [5, 6],
+ B2: [7, 8],
+});
+
+const long = wideToLong(df, ["A", "B"], "id", "num");
+
+// long.columns.values β ["id", "num", "A", "B"]
+// long.shape β [4, 4]
+//
+// id num A B
+// x 1 1 5
+// y 1 2 6
+// x 2 3 7
+// y 2 4 8
+
+
+ Example β separator and custom suffix
+ const df = DataFrame.fromColumns({
+ country: ["US", "UK"],
+ gdp_2020: [21e12, 2.7e12],
+ gdp_2021: [23e12, 3.1e12],
+ pop_2020: [331e6, 67e6],
+ pop_2021: [332e6, 68e6],
+});
+
+const long = wideToLong(df, ["gdp", "pop"], "country", "year", { sep: "_" });
+// long.shape β [4, 4] β 2 countries Γ 2 years
+// Columns: ["country", "year", "gdp", "pop"]
+
+
+ API reference
+ function wideToLong(
+ df: DataFrame,
+ stubnames: string | string[],
+ i: string | string[],
+ j: string,
+ options?: WideToLongOptions,
+): DataFrame;
+
+interface WideToLongOptions {
+ sep?: string; // separator between stub and suffix, default ""
+ suffix?: string; // regex string matching suffix, default "\\d+"
+}
+
+
+ Parameters
+
+ Parameter Type Description
+
+ dfDataFrameSource DataFrame (not mutated)
+ stubnamesstring | string[]Prefix(es) shared by the wide column groups
+ istring | string[]Column(s) to keep as id variables (repeated per suffix)
+ jstringName of the new column holding the suffix values
+ options.sepstringSeparator between stub and suffix (default: "")
+ options.suffixstringRegex string matching the suffix (default: "\\d+")
+
+
+
+ Output layout
+
+ Output columns are always ordered: id cols , j , stub cols
+ (in the same order the stubs were passed). Suffixes are sorted numerically when they are all
+ integers, otherwise lexicographically. Wide columns that are absent from the DataFrame are
+ filled with null.
+
+
+
diff --git a/playground/window_extended.html b/playground/window_extended.html
new file mode 100644
index 00000000..4232fa5d
--- /dev/null
+++ b/playground/window_extended.html
@@ -0,0 +1,304 @@
+
+
+
+
+
+ tsb β Rolling Extended Stats: sem, skew, kurt, quantile
+
+
+
+ tsb β Rolling Extended Statistics
+
+ Higher-order rolling window statistics extending the core
+
+ pandas.Series.rolling()
+
+ API:
+ sem , skew , kurt , and
+ quantile .
+
+
+ 1. rollingSem β Standard Error of the Mean
+
+ The standard error of the mean measures how much the sample mean
+ would vary across repeated samples. For a window of n values:
+
+ sem = std(ddof=1) / βn
+ Requires at least 2 valid observations per window.
+
+ import { rollingSem, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "x" });
+const sem3 = rollingSem(s, 3);
+// [null, null, 0.667, 0, 0.577, 0.577, 1.155, 2.082]
+
+
+
+
Live demo β sem with window=3
+
Comma-separated numbers (nulls accepted):
+
+
Window:
+
minPeriods:
+
Run
+
+
+
+ 2. rollingSkew β Fisher-Pearson Skewness
+
+ Skewness measures asymmetry of the distribution in each window.
+ Positive = right tail heavier; negative = left tail heavier.
+ Uses the unbiased Fisher-Pearson formula (same as pandas):
+
+ skew = [n/((n-1)(n-2))] Γ Ξ£[(xα΅’βxΜ)/s]Β³
+ Requires β₯ 3 valid observations.
+
+ import { rollingSkew, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+rollingSkew(s, 3);
+// [null, null, 0, 0, 0] β symmetric windows β zero skew
+
+
+
+
Live demo β skewness with window=4
+
+
Window:
+
Run
+
+
+
+ 3. rollingKurt β Excess Kurtosis
+
+ Kurtosis measures how heavy the tails are relative to a normal distribution.
+ The excess kurtosis subtracts 3, so a normal distribution gives 0.
+ Uses the Fisher (1930) unbiased formula:
+
+ kurt = [n(n+1)/((n-1)(n-2)(n-3))] Γ Ξ£[(xα΅’βxΜ)/s]β΄ β 3(n-1)Β²/((n-2)(n-3))
+ Requires β₯ 4 valid observations.
+
+ import { rollingKurt, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+rollingKurt(s, 4);
+// [null, null, null, -1.2] β uniform distribution has kurt = -1.2
+
+
+
+
Live demo β excess kurtosis with window=5
+
+
Window:
+
Run
+
+
+
+ 4. rollingQuantile β Rolling Quantile
+
+ Computes any quantile within each sliding window using configurable
+ interpolation. When q = 0.5 this is identical to
+ rolling.median().
+
+
+ import { rollingQuantile, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+
+rollingQuantile(s, 0.5, 3); // rolling median: [null, null, 2, 3, 4]
+rollingQuantile(s, 0.25, 3); // [null, null, 1.5, 2.5, 3.5]
+rollingQuantile(s, 0.75, 3); // [null, null, 2.5, 3.5, 4.5]
+
+
+ Interpolation methods
+
+ Method Behaviour when q falls between two values
+
+ linear (default)Linear interpolation β same as NumPy / pandas default
+ lowerTake the lower of the two surrounding values
+ higherTake the higher of the two surrounding values
+ midpointArithmetic mean of the two surrounding values
+ nearestWhichever surrounding value is closest
+
+
+
+
+
+ Common Options
+
+ Option Type Default Description
+
+ minPeriodsnumber= window Minimum valid obs required per window
+ centerbooleanfalseCentre the window around each position
+
+
+
+
+ Note: Functions are pure β they return new Series objects
+ without modifying the input. Missing values (null, NaN)
+ are excluded from each window calculation.
+
+
+
+
+
diff --git a/src/core/api_types.ts b/src/core/api_types.ts
new file mode 100644
index 00000000..860d2050
--- /dev/null
+++ b/src/core/api_types.ts
@@ -0,0 +1,629 @@
+/**
+ * api_types β runtime type-checking predicates, mirroring `pandas.api.types`.
+ *
+ * Two groups of functions are provided:
+ *
+ * **Value-level predicates** β operate on arbitrary JavaScript values, equivalent
+ * to `pandas.api.types.is_scalar`, `is_list_like`, `is_number`, etc.
+ *
+ * **Dtype-level predicates** β accept a `Dtype` instance or a `DtypeName` string
+ * and answer questions about the dtype's kind, equivalent to
+ * `pandas.api.types.is_numeric_dtype`, `is_float_dtype`, etc.
+ *
+ * @example
+ * ```ts
+ * import { isScalar, isNumericDtype, Dtype } from "tsb";
+ * isScalar(42); // true
+ * isScalar([1, 2, 3]); // false
+ * isListLike([1, 2, 3]); // true
+ * isNumericDtype(Dtype.float64); // true
+ * isStringDtype("string"); // true
+ * ```
+ *
+ * @module
+ */
+
+import { Dtype } from "./dtype.ts";
+import type { DtypeName } from "../types.ts";
+
+// βββ internal helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Resolve a Dtype | DtypeName to a Dtype instance. */
+function resolveDtype(dtype: Dtype | DtypeName): Dtype {
+ if (dtype instanceof Dtype) {
+ return dtype;
+ }
+ return Dtype.from(dtype);
+}
+
+// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+// VALUE-LEVEL PREDICATES
+// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return `true` if `val` is a scalar (not a collection).
+ *
+ * Scalars: `string`, `number`, `bigint`, `boolean`, `symbol`, `null`,
+ * `undefined`, and `Date` objects. Arrays, plain objects, `Map`, `Set`,
+ * iterables, and class instances other than `Date` are **not** scalars.
+ *
+ * Mirrors `pandas.api.types.is_scalar`.
+ *
+ * @example
+ * ```ts
+ * isScalar(42); // true
+ * isScalar("hello"); // true
+ * isScalar(null); // true
+ * isScalar([1, 2]); // false
+ * isScalar({ a: 1 }); // false
+ * ```
+ */
+export function isScalar(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ const t = typeof val;
+ if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") {
+ return true;
+ }
+ if (val instanceof Date) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is "list-like" β i.e. iterable (but not a string)
+ * or has a non-negative integer `length` property.
+ *
+ * Mirrors `pandas.api.types.is_list_like`.
+ *
+ * @example
+ * ```ts
+ * isListLike([1, 2, 3]); // true
+ * isListLike(new Set([1])); // true
+ * isListLike("abc"); // false (strings excluded)
+ * isListLike(42); // false
+ * isListLike({ a: 1 }); // false
+ * ```
+ */
+export function isListLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val === "string") {
+ return false;
+ }
+ // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol
+ if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") {
+ return false;
+ }
+ if (typeof val === "object" || typeof val === "function") {
+ if (Symbol.iterator in (val as object)) {
+ return true;
+ }
+ const len = (val as Record)["length"];
+ if (typeof len === "number" && len >= 0 && Number.isInteger(len)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is array-like β i.e. has a non-negative integer
+ * `length` property.
+ *
+ * Mirrors `pandas.api.types.is_array_like`.
+ *
+ * @example
+ * ```ts
+ * isArrayLike([1, 2]); // true
+ * isArrayLike("abc"); // true (strings have .length)
+ * isArrayLike(42); // false
+ * isArrayLike({}); // false
+ * ```
+ */
+export function isArrayLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val === "string") {
+ return true;
+ }
+ if (typeof val !== "object" && typeof val !== "function") {
+ return false;
+ }
+ const len = (val as Record)["length"];
+ return typeof len === "number" && len >= 0 && Number.isInteger(len);
+}
+
+/**
+ * Return `true` if `val` is dict-like β a plain object (not an array, not a
+ * `Date`, not a class instance).
+ *
+ * Mirrors `pandas.api.types.is_dict_like`.
+ *
+ * @example
+ * ```ts
+ * isDictLike({ a: 1 }); // true
+ * isDictLike(new Map()); // true (has .get / .set)
+ * isDictLike([1, 2]); // false
+ * isDictLike("abc"); // false
+ * ```
+ */
+export function isDictLike(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val !== "object") {
+ return false;
+ }
+ if (Array.isArray(val)) {
+ return false;
+ }
+ // Treat Map as dict-like (supports key lookup)
+ if (val instanceof Map) {
+ return true;
+ }
+ // Date is not dict-like
+ if (val instanceof Date) {
+ return false;
+ }
+ // Plain objects and other objects with properties
+ return true;
+}
+
+/**
+ * Return `true` if `val` is an iterator β i.e. has a callable `next` method.
+ *
+ * Mirrors `pandas.api.types.is_iterator`.
+ *
+ * @example
+ * ```ts
+ * isIterator([1, 2][Symbol.iterator]()); // true
+ * isIterator([1, 2]); // false
+ * ```
+ */
+export function isIterator(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return false;
+ }
+ if (typeof val !== "object" && typeof val !== "function") {
+ return false;
+ }
+ return typeof (val as Record)["next"] === "function";
+}
+
+/**
+ * Return `true` if `val` is a `number` (including `NaN` and `Β±Infinity`).
+ *
+ * Mirrors `pandas.api.types.is_number`.
+ *
+ * @example
+ * ```ts
+ * isNumber(3.14); // true
+ * isNumber(NaN); // true
+ * isNumber("3"); // false
+ * ```
+ */
+export function isNumber(val: unknown): val is number {
+ return typeof val === "number";
+}
+
+/**
+ * Return `true` if `val` is a `boolean`.
+ *
+ * Mirrors `pandas.api.types.is_bool`.
+ *
+ * @example
+ * ```ts
+ * isBool(true); // true
+ * isBool(1); // false
+ * ```
+ */
+export function isBool(val: unknown): val is boolean {
+ return typeof val === "boolean";
+}
+
+/**
+ * Return `true` if `val` is a `string`.
+ *
+ * Named `isStringValue` to distinguish from the dtype-level `isStringDtype`.
+ * Mirrors `pandas.api.types.is_string` (not to be confused with dtype checks).
+ *
+ * @example
+ * ```ts
+ * isStringValue("hello"); // true
+ * isStringValue(42); // false
+ * ```
+ */
+export function isStringValue(val: unknown): val is string {
+ return typeof val === "string";
+}
+
+/**
+ * Return `true` if `val` is a finite floating-point number (has a fractional
+ * component or is finite non-integer). `NaN`, `Β±Infinity` are **not** floats
+ * in the pandas sense.
+ *
+ * Mirrors `pandas.api.types.is_float`.
+ *
+ * @example
+ * ```ts
+ * isFloat(3.14); // true
+ * isFloat(3.0); // false (integer value)
+ * isFloat(NaN); // false
+ * isFloat(Infinity); // false
+ * ```
+ */
+export function isFloat(val: unknown): boolean {
+ if (typeof val !== "number") {
+ return false;
+ }
+ if (!Number.isFinite(val)) {
+ return false;
+ }
+ return val !== Math.trunc(val);
+}
+
+/**
+ * Return `true` if `val` is a finite integer-valued number.
+ *
+ * Mirrors `pandas.api.types.is_integer`.
+ *
+ * @example
+ * ```ts
+ * isInteger(3); // true
+ * isInteger(3.0); // true (integer value stored as float)
+ * isInteger(3.14); // false
+ * isInteger(NaN); // false
+ * ```
+ */
+export function isInteger(val: unknown): boolean {
+ return typeof val === "number" && Number.isInteger(val);
+}
+
+/**
+ * Return `true` if `val` is a `bigint`.
+ *
+ * @example
+ * ```ts
+ * isBigInt(42n); // true
+ * isBigInt(42); // false
+ * ```
+ */
+export function isBigInt(val: unknown): val is bigint {
+ return typeof val === "bigint";
+}
+
+/**
+ * Return `true` if `val` is a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re`.
+ *
+ * @example
+ * ```ts
+ * isRegExp(/abc/); // true
+ * isRegExp(new RegExp("x")); // true
+ * isRegExp("abc"); // false
+ * ```
+ */
+export function isRegExp(val: unknown): val is RegExp {
+ return val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` can be compiled into a `RegExp` β i.e. it is either
+ * a `string` or already a `RegExp`.
+ *
+ * Mirrors `pandas.api.types.is_re_compilable`.
+ *
+ * @example
+ * ```ts
+ * isReCompilable("abc"); // true
+ * isReCompilable(/abc/); // true
+ * isReCompilable(42); // false
+ * ```
+ */
+export function isReCompilable(val: unknown): boolean {
+ return typeof val === "string" || val instanceof RegExp;
+}
+
+/**
+ * Return `true` if `val` is a "missing" value in the pandas sense: `null`,
+ * `undefined`, or `NaN`.
+ *
+ * @example
+ * ```ts
+ * isMissing(null); // true
+ * isMissing(undefined); // true
+ * isMissing(NaN); // true
+ * isMissing(0); // false
+ * isMissing(""); // false
+ * ```
+ */
+export function isMissing(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ if (typeof val === "number" && Number.isNaN(val)) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Return `true` if `val` is "hashable" β usable as an object-key in
+ * JavaScript. In practice this means it is a primitive (`string`, `number`,
+ * `bigint`, `boolean`, `symbol`, `null`, `undefined`).
+ *
+ * Mirrors the spirit of `pandas.api.types.is_hashable`.
+ *
+ * @example
+ * ```ts
+ * isHashable("key"); // true
+ * isHashable(42); // true
+ * isHashable({}); // false
+ * isHashable([]); // false
+ * ```
+ */
+export function isHashable(val: unknown): boolean {
+ if (val === null || val === undefined) {
+ return true;
+ }
+ const t = typeof val;
+ return t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol";
+}
+
+/**
+ * Return `true` if `val` is a `Date` instance.
+ *
+ * @example
+ * ```ts
+ * isDate(new Date()); // true
+ * isDate("2024-01-01"); // false
+ * ```
+ */
+export function isDate(val: unknown): val is Date {
+ return val instanceof Date;
+}
+
+// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+// DTYPE-LEVEL PREDICATES
+// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return `true` if the dtype is numeric (integer, unsigned integer, or float).
+ *
+ * Mirrors `pandas.api.types.is_numeric_dtype`.
+ *
+ * @example
+ * ```ts
+ * isNumericDtype(Dtype.float64); // true
+ * isNumericDtype("int32"); // true
+ * isNumericDtype("string"); // false
+ * ```
+ */
+export function isNumericDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isNumeric;
+}
+
+/**
+ * Return `true` if the dtype is any integer kind (signed or unsigned).
+ *
+ * Mirrors `pandas.api.types.is_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntegerDtype("int64"); // true
+ * isIntegerDtype("uint8"); // true
+ * isIntegerDtype("float32"); // false
+ * ```
+ */
+export function isIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isInteger;
+}
+
+/**
+ * Return `true` if the dtype is a signed integer (`int8`β`int64`).
+ *
+ * Mirrors `pandas.api.types.is_signed_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isSignedIntegerDtype("int32"); // true
+ * isSignedIntegerDtype("uint32"); // false
+ * ```
+ */
+export function isSignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isSignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is an unsigned integer (`uint8`β`uint64`).
+ *
+ * Mirrors `pandas.api.types.is_unsigned_integer_dtype`.
+ *
+ * @example
+ * ```ts
+ * isUnsignedIntegerDtype("uint64"); // true
+ * isUnsignedIntegerDtype("int64"); // false
+ * ```
+ */
+export function isUnsignedIntegerDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isUnsignedInteger;
+}
+
+/**
+ * Return `true` if the dtype is a floating-point type (`float32` or `float64`).
+ *
+ * Mirrors `pandas.api.types.is_float_dtype`.
+ *
+ * @example
+ * ```ts
+ * isFloatDtype("float64"); // true
+ * isFloatDtype("float32"); // true
+ * isFloatDtype("int32"); // false
+ * ```
+ */
+export function isFloatDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isFloat;
+}
+
+/**
+ * Return `true` if the dtype is boolean.
+ *
+ * Mirrors `pandas.api.types.is_bool_dtype`.
+ *
+ * @example
+ * ```ts
+ * isBoolDtype("bool"); // true
+ * isBoolDtype("int8"); // false
+ * ```
+ */
+export function isBoolDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isBool;
+}
+
+/**
+ * Return `true` if the dtype is the `string` dtype.
+ *
+ * Mirrors `pandas.api.types.is_string_dtype`.
+ *
+ * @example
+ * ```ts
+ * isStringDtype("string"); // true
+ * isStringDtype("object"); // false
+ * ```
+ */
+export function isStringDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isString;
+}
+
+/**
+ * Return `true` if the dtype is a datetime type.
+ *
+ * Mirrors `pandas.api.types.is_datetime64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isDatetimeDtype("datetime"); // true
+ * isDatetimeDtype("string"); // false
+ * ```
+ */
+export function isDatetimeDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is a timedelta type.
+ *
+ * Mirrors `pandas.api.types.is_timedelta64_dtype`.
+ *
+ * @example
+ * ```ts
+ * isTimedeltaDtype("timedelta"); // true
+ * isTimedeltaDtype("datetime"); // false
+ * ```
+ */
+export function isTimedeltaDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isTimedelta;
+}
+
+/**
+ * Return `true` if the dtype is the categorical dtype.
+ *
+ * Mirrors `pandas.api.types.is_categorical_dtype`.
+ *
+ * @example
+ * ```ts
+ * isCategoricalDtype("category"); // true
+ * isCategoricalDtype("string"); // false
+ * ```
+ */
+export function isCategoricalDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isCategory;
+}
+
+/**
+ * Return `true` if the dtype is the object dtype.
+ *
+ * Mirrors `pandas.api.types.is_object_dtype`.
+ *
+ * @example
+ * ```ts
+ * isObjectDtype("object"); // true
+ * isObjectDtype("string"); // false
+ * ```
+ */
+export function isObjectDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isObject;
+}
+
+/**
+ * Return `true` if the dtype represents complex numbers.
+ *
+ * JavaScript has no native complex number type, so this always returns `false`
+ * (no complex dtype exists in the `tsb` dtype system). Provided for API
+ * parity with `pandas.api.types.is_complex_dtype`.
+ *
+ * @example
+ * ```ts
+ * isComplexDtype("float64"); // false (no complex dtype)
+ * ```
+ */
+export function isComplexDtype(_dtype: Dtype | DtypeName): boolean {
+ return false;
+}
+
+/**
+ * Return `true` if the dtype is an "extension array" dtype β i.e. any dtype
+ * beyond the numeric primitives: `string`, `object`, `datetime`, `timedelta`,
+ * `category`.
+ *
+ * Mirrors `pandas.api.types.is_extension_array_dtype`.
+ *
+ * @example
+ * ```ts
+ * isExtensionArrayDtype("category"); // true
+ * isExtensionArrayDtype("datetime"); // true
+ * isExtensionArrayDtype("int64"); // false
+ * ```
+ */
+export function isExtensionArrayDtype(dtype: Dtype | DtypeName): boolean {
+ const d = resolveDtype(dtype);
+ return d.isString || d.isObject || d.isDatetime || d.isTimedelta || d.isCategory;
+}
+
+/**
+ * Return `true` if the dtype can hold period (date period) data.
+ * In the current `tsb` dtype system this maps to the `datetime` kind.
+ *
+ * Mirrors `pandas.api.types.is_period_dtype`.
+ *
+ * @example
+ * ```ts
+ * isPeriodDtype("datetime"); // true
+ * isPeriodDtype("float64"); // false
+ * ```
+ */
+export function isPeriodDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isDatetime;
+}
+
+/**
+ * Return `true` if the dtype is suitable for interval data β float or integer.
+ *
+ * Mirrors `pandas.api.types.is_interval_dtype`.
+ *
+ * @example
+ * ```ts
+ * isIntervalDtype("float64"); // true
+ * isIntervalDtype("int32"); // true
+ * isIntervalDtype("string"); // false
+ * ```
+ */
+export function isIntervalDtype(dtype: Dtype | DtypeName): boolean {
+ return resolveDtype(dtype).isNumeric;
+}
diff --git a/src/core/attrs.ts b/src/core/attrs.ts
new file mode 100644
index 00000000..81c6be1c
--- /dev/null
+++ b/src/core/attrs.ts
@@ -0,0 +1,291 @@
+/**
+ * attrs β user-defined metadata dictionary for Series and DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.attrs` / `pandas.Series.attrs`: an arbitrary
+ * keyβvalue dictionary that travels with a data object and lets callers
+ * annotate it with provenance, units, descriptions, or any other metadata.
+ *
+ * Because the tsb Series and DataFrame classes are immutable by design, this
+ * module maintains a **WeakMap registry** that maps each object to its attrs
+ * record. The registry entries are garbage-collected automatically when the
+ * object itself is collected β there is no memory leak.
+ *
+ * ### Public surface
+ *
+ * ```ts
+ * import { getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, clearAttrs,
+ * hasAttrs } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ x: [1, 2, 3] });
+ *
+ * // Annotate
+ * setAttrs(df, { source: "sensor_A", unit: "metres" });
+ * getAttrs(df); // { source: "sensor_A", unit: "metres" }
+ *
+ * // Merge additional keys
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df); // { source: "sensor_A", unit: "metres", version: 2 }
+ *
+ * // Fluent helper β sets attrs and returns the same object
+ * const annotated = withAttrs(df, { source: "sensor_B" });
+ * annotated === df; // true β same reference
+ *
+ * // Propagate to a derived object
+ * const df2 = DataFrame.fromColumns({ y: [4, 5, 6] });
+ * copyAttrs(df, df2);
+ * getAttrs(df2); // { source: "sensor_A", unit: "metres", version: 2 }
+ * ```
+ *
+ * @module
+ */
+
+// βββ types ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * The attrs dictionary type. Keys are strings; values may be any JSON-safe
+ * primitive or nested structure. Mirrors the `dict` type of `pandas.attrs`.
+ */
+export type Attrs = Record;
+
+// βββ registry βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Internal WeakMap from any object to its attrs record. */
+const registry = new WeakMap();
+
+// βββ public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Retrieve the attrs dictionary for `obj`.
+ *
+ * Returns a **shallow copy** so callers cannot mutate the stored record
+ * accidentally. If no attrs have been set, returns an empty object `{}`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3] });
+ * setAttrs(s, { unit: "kg" });
+ * getAttrs(s); // { unit: "kg" }
+ * ```
+ */
+export function getAttrs(obj: object): Attrs {
+ const stored = registry.get(obj);
+ return stored !== undefined ? { ...stored } : {};
+}
+
+/**
+ * **Overwrite** the attrs dictionary for `obj` with `attrs`.
+ *
+ * Any previously stored attrs are discarded. Stores a shallow copy so
+ * subsequent mutations to the passed-in object do not affect the stored value.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "sensor_A" });
+ * getAttrs(df); // { source: "sensor_A" }
+ * ```
+ */
+export function setAttrs(obj: object, attrs: Attrs): void {
+ registry.set(obj, { ...attrs });
+}
+
+/**
+ * **Merge** `updates` into the existing attrs for `obj`.
+ *
+ * Existing keys that are not present in `updates` are preserved. Keys that
+ * are present in both `updates` and the existing attrs are overwritten.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "A" });
+ * updateAttrs(df, { version: 2 });
+ * getAttrs(df); // { source: "A", version: 2 }
+ * ```
+ */
+export function updateAttrs(obj: object, updates: Attrs): void {
+ const existing = registry.get(obj) ?? {};
+ registry.set(obj, { ...existing, ...updates });
+}
+
+/**
+ * **Copy** the attrs from `source` to `target`, overwriting any existing attrs
+ * on `target`.
+ *
+ * Useful for propagating metadata from an input to a derived result.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df1, { source: "sensor_A" });
+ * const df2 = df1.head(5);
+ * copyAttrs(df1, df2);
+ * getAttrs(df2); // { source: "sensor_A" }
+ * ```
+ */
+export function copyAttrs(source: object, target: object): void {
+ const stored = registry.get(source);
+ if (stored !== undefined) {
+ registry.set(target, { ...stored });
+ } else {
+ registry.delete(target);
+ }
+}
+
+/**
+ * **Fluent helper** β set attrs on `obj` and return the same object.
+ *
+ * This **replaces** any previously stored attrs (same semantics as
+ * {@link setAttrs}). The return type is `T` so callers do not lose the
+ * concrete type of their object.
+ *
+ * @example
+ * ```ts
+ * const annotated = withAttrs(df, { source: "sensor_A", unit: "metres" });
+ * annotated === df; // true β same reference
+ * getAttrs(annotated); // { source: "sensor_A", unit: "metres" }
+ * ```
+ */
+export function withAttrs(obj: T, attrs: Attrs): T {
+ registry.set(obj, { ...attrs });
+ return obj;
+}
+
+/**
+ * **Remove** all attrs from `obj`.
+ *
+ * After calling this, {@link getAttrs} returns `{}` and {@link hasAttrs}
+ * returns `false`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { source: "A" });
+ * clearAttrs(df);
+ * hasAttrs(df); // false
+ * getAttrs(df); // {}
+ * ```
+ */
+export function clearAttrs(obj: object): void {
+ registry.delete(obj);
+}
+
+/**
+ * Returns `true` if `obj` has any attrs set, `false` otherwise.
+ *
+ * @example
+ * ```ts
+ * hasAttrs(df); // false
+ * setAttrs(df, { x: 1 });
+ * hasAttrs(df); // true
+ * clearAttrs(df);
+ * hasAttrs(df); // false
+ * ```
+ */
+export function hasAttrs(obj: object): boolean {
+ return registry.has(obj);
+}
+
+/**
+ * Retrieve a **single** attrs value by key.
+ *
+ * Returns `undefined` if the key does not exist (or no attrs are set).
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { unit: "kg" });
+ * getAttr(df, "unit"); // "kg"
+ * getAttr(df, "missing"); // undefined
+ * ```
+ */
+export function getAttr(obj: object, key: string): unknown {
+ return registry.get(obj)?.[key];
+}
+
+/**
+ * Set a **single** attrs key on `obj`, preserving all other existing attrs.
+ *
+ * @example
+ * ```ts
+ * setAttr(df, "unit", "kg");
+ * setAttr(df, "source", "lab");
+ * getAttrs(df); // { unit: "kg", source: "lab" }
+ * ```
+ */
+export function setAttr(obj: object, key: string, value: unknown): void {
+ const existing = registry.get(obj) ?? {};
+ registry.set(obj, { ...existing, [key]: value });
+}
+
+/**
+ * Delete a **single** attrs key from `obj`, preserving all other keys.
+ *
+ * Does nothing if the key does not exist.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { a: 1, b: 2 });
+ * deleteAttr(df, "a");
+ * getAttrs(df); // { b: 2 }
+ * ```
+ */
+export function deleteAttr(obj: object, key: string): void {
+ const existing = registry.get(obj);
+ if (existing === undefined) return;
+ const { [key]: _removed, ...rest } = existing;
+ if (Object.keys(rest).length === 0) {
+ registry.delete(obj);
+ } else {
+ registry.set(obj, rest);
+ }
+}
+
+/**
+ * Return the number of attrs keys stored on `obj`.
+ *
+ * @example
+ * ```ts
+ * attrsCount(df); // 0
+ * setAttrs(df, { a: 1, b: 2 });
+ * attrsCount(df); // 2
+ * ```
+ */
+export function attrsCount(obj: object): number {
+ return Object.keys(registry.get(obj) ?? {}).length;
+}
+
+/**
+ * Return the list of attrs keys stored on `obj`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(df, { a: 1, b: 2 });
+ * attrsKeys(df); // ["a", "b"]
+ * ```
+ */
+export function attrsKeys(obj: object): string[] {
+ return Object.keys(registry.get(obj) ?? {});
+}
+
+/**
+ * Merge attrs from multiple source objects into a single target object.
+ *
+ * Sources are applied left-to-right; later sources overwrite earlier ones on
+ * key conflicts. Overwrites any existing attrs on `target`.
+ *
+ * @example
+ * ```ts
+ * setAttrs(s1, { source: "A", unit: "kg" });
+ * setAttrs(s2, { source: "B", scale: 2 });
+ * mergeAttrs([s1, s2], df);
+ * getAttrs(df); // { source: "B", unit: "kg", scale: 2 }
+ * ```
+ */
+export function mergeAttrs(sources: readonly object[], target: object): void {
+ const merged: Attrs = {};
+ for (const src of sources) {
+ const stored = registry.get(src);
+ if (stored !== undefined) {
+ Object.assign(merged, stored);
+ }
+ }
+ if (Object.keys(merged).length > 0) {
+ registry.set(target, merged);
+ }
+}
diff --git a/src/core/index.ts b/src/core/index.ts
index ada43b65..08713cae 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -15,3 +15,71 @@ export { CategoricalAccessor } from "./cat_accessor.ts";
export type { CatSeriesLike } from "./cat_accessor.ts";
export { MultiIndex } from "./multi_index.ts";
export type { MultiIndexOptions } from "./multi_index.ts";
+export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./insert_pop.ts";
+export type { PopResult } from "./insert_pop.ts";
+export { toDictOriented, fromDictOriented } from "./to_from_dict.ts";
+export type {
+ ToDictOrient,
+ FromDictOrient,
+ DictSplit,
+ DictTight,
+ SplitInput,
+} from "./to_from_dict.ts";
+export {
+ getAttrs,
+ setAttrs,
+ updateAttrs,
+ copyAttrs,
+ withAttrs,
+ clearAttrs,
+ hasAttrs,
+ getAttr,
+ setAttr,
+ deleteAttr,
+ attrsCount,
+ attrsKeys,
+ mergeAttrs,
+} from "./attrs.ts";
+export type { Attrs } from "./attrs.ts";
+export {
+ pipe,
+ seriesApply,
+ seriesTransform,
+ dataFrameApply,
+ dataFrameApplyMap,
+ dataFrameTransform,
+ dataFrameTransformRows,
+} from "./pipe_apply.ts";
+export {
+ isScalar,
+ isListLike,
+ isArrayLike,
+ isDictLike,
+ isIterator,
+ isNumber,
+ isBool,
+ isStringValue,
+ isFloat,
+ isInteger,
+ isBigInt,
+ isRegExp,
+ isReCompilable,
+ isMissing,
+ isHashable,
+ isDate,
+ isNumericDtype,
+ isIntegerDtype,
+ isSignedIntegerDtype,
+ isUnsignedIntegerDtype,
+ isFloatDtype,
+ isBoolDtype,
+ isStringDtype,
+ isDatetimeDtype,
+ isTimedeltaDtype,
+ isCategoricalDtype,
+ isObjectDtype,
+ isComplexDtype,
+ isExtensionArrayDtype,
+ isPeriodDtype,
+ isIntervalDtype,
+} from "./api_types.ts";
diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts
new file mode 100644
index 00000000..d56c42bc
--- /dev/null
+++ b/src/core/insert_pop.ts
@@ -0,0 +1,214 @@
+/**
+ * DataFrame.insert() and DataFrame.pop() β column insertion and removal.
+ *
+ * Mirrors `pandas.DataFrame.insert(loc, column, value)` and
+ * `pandas.DataFrame.pop(item)`.
+ *
+ * Since `DataFrame` in tsb is immutable, both operations return a new DataFrame.
+ * `popColumn` returns both the extracted `Series` and the resulting DataFrame.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, insertColumn, popColumn } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ *
+ * // Insert column "x" at position 1 (between "a" and "b")
+ * const df2 = insertColumn(df, 1, "x", [10, 20]);
+ * // df2.columns.values β ["a", "x", "b"]
+ *
+ * // Pop column "a" out of df2
+ * const { series, df: df3 } = popColumn(df2, "a");
+ * // series.values β [1, 2]
+ * // df3.columns.values β ["x", "b"]
+ * ```
+ *
+ * @packageDocumentation
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// βββ insertColumn βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Insert a new column into `df` at integer column position `loc`.
+ *
+ * Mirrors `pandas.DataFrame.insert(loc, column, value, allow_duplicates=False)`.
+ * Raises a `RangeError` if:
+ * - `column` already exists in `df` (no duplicates by default)
+ * - `loc` is out of range (must be 0 β€ loc β€ df.shape[1])
+ * - `values` length does not match the number of rows
+ *
+ * @param df Source DataFrame (not mutated).
+ * @param loc Zero-based integer position at which to insert the column.
+ * @param column Name of the new column.
+ * @param values Column data as an array of scalars or a `Series`.
+ * @param allowDuplicates When `true`, silently allow duplicate column names. Default `false`.
+ * @returns A new DataFrame with the column inserted.
+ */
+export function insertColumn(
+ df: DataFrame,
+ loc: number,
+ column: string,
+ values: readonly Scalar[] | Series,
+ allowDuplicates = false,
+): DataFrame {
+ const nCols = df.shape[1];
+ const nRows = df.shape[0];
+
+ if (!allowDuplicates && df.has(column)) {
+ throw new RangeError(
+ `Column "${column}" already exists. Use allowDuplicates=true to permit duplicate names.`,
+ );
+ }
+
+ if (loc < 0 || loc > nCols) {
+ throw new RangeError(`loc=${loc} is out of range [0, ${nCols}].`);
+ }
+
+ // Resolve values to a Series aligned to df's row index.
+ const series: Series =
+ values instanceof Series
+ ? values
+ : new Series({ data: values, index: df.index, name: column });
+
+ if (series.size !== nRows) {
+ throw new RangeError(
+ `values length ${series.size} does not match DataFrame row count ${nRows}.`,
+ );
+ }
+
+ // Rebuild the column map, inserting the new column at position `loc`.
+ const colMap = new Map>();
+ let idx = 0;
+
+ for (const colName of df.columns.values) {
+ if (idx === loc) {
+ colMap.set(column, series);
+ }
+ colMap.set(colName, df.col(colName));
+ idx++;
+ }
+
+ // Handle insertion at the end (loc === nCols).
+ if (loc === nCols) {
+ colMap.set(column, series);
+ }
+
+ return new DataFrame(colMap, df.index);
+}
+
+// βββ popColumn ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Return type of {@link popColumn}. */
+export interface PopResult {
+ /** The extracted column as a Series. */
+ readonly series: Series;
+ /** The DataFrame with the column removed. */
+ readonly df: DataFrame;
+}
+
+/**
+ * Remove a column from `df` and return both the extracted `Series` and the
+ * resulting DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.pop(item)`, but because tsb DataFrames are
+ * immutable this function returns the removed Series *and* the new DataFrame
+ * (rather than mutating in place).
+ *
+ * Raises a `RangeError` if `col` does not exist in `df`.
+ *
+ * @param df Source DataFrame (not mutated).
+ * @param col Name of the column to remove.
+ * @returns `{ series, df }` β the extracted column and the remaining DataFrame.
+ *
+ * @example
+ * ```ts
+ * const { series, df: remaining } = popColumn(df, "age");
+ * // series contains the "age" column; remaining has all other columns
+ * ```
+ */
+export function popColumn(df: DataFrame, col: string): PopResult {
+ const series = df.get(col);
+ if (series === undefined) {
+ throw new RangeError(`Column "${col}" not found in DataFrame.`);
+ }
+
+ const colMap = new Map>();
+ for (const colName of df.columns.values) {
+ if (colName !== col) {
+ colMap.set(colName, df.col(colName));
+ }
+ }
+
+ return {
+ series,
+ df: new DataFrame(colMap, df.index),
+ };
+}
+
+// βββ reorderColumns ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Reorder the columns of `df` to match `order`.
+ *
+ * Mirrors `df[order]` in pandas. All names in `order` must be present in `df`;
+ * extra names in `df` not listed in `order` are dropped.
+ *
+ * @param df Source DataFrame.
+ * @param order New column order (subset of `df.columns.values`).
+ * @returns A new DataFrame with columns in the specified order.
+ */
+export function reorderColumns(df: DataFrame, order: readonly string[]): DataFrame {
+ const colMap = new Map>();
+ for (const name of order) {
+ const s = df.get(name);
+ if (s === undefined) {
+ throw new RangeError(`Column "${name}" not found in DataFrame.`);
+ }
+ colMap.set(name, s);
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// βββ moveColumn ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Move an existing column to a new integer position.
+ *
+ * This is a convenience wrapper combining {@link popColumn} and
+ * {@link insertColumn}: it removes the column from its current position and
+ * re-inserts it at `newLoc` in the resulting DataFrame.
+ *
+ * @param df Source DataFrame.
+ * @param col Name of the column to move.
+ * @param newLoc Target position (0 β€ newLoc β€ df.shape[1] β 1).
+ * @returns A new DataFrame with the column at the new position.
+ */
+export function moveColumn(df: DataFrame, col: string, newLoc: number): DataFrame {
+ const { series, df: without } = popColumn(df, col);
+ return insertColumn(without, newLoc, col, series);
+}
+
+// βββ internal re-export helper (used by DataFrame constructor access) βββββββββ
+
+/**
+ * Build a new DataFrame from an ordered iterable of `[name, Series]` pairs and
+ * a row index. Exported for use by other tsb modules that need to construct
+ * DataFrames without going through the public factory methods.
+ *
+ * @internal
+ */
+export function dataFrameFromPairs(
+ pairs: Iterable]>,
+ index: Index,
+): DataFrame {
+ const colMap = new Map>();
+ for (const [name, series] of pairs) {
+ colMap.set(name, series);
+ }
+ return new DataFrame(colMap, index);
+}
diff --git a/src/core/pipe_apply.ts b/src/core/pipe_apply.ts
new file mode 100644
index 00000000..2f0b0180
--- /dev/null
+++ b/src/core/pipe_apply.ts
@@ -0,0 +1,303 @@
+/**
+ * pipe_apply β functional pipeline and apply utilities for Series and DataFrame.
+ *
+ * Provides standalone equivalents of the pandas `.pipe()` chaining pattern and
+ * various `.apply()` / `applymap()` operations, usable without method-call syntax.
+ *
+ * | Function | Pandas equivalent |
+ * |----------------------|-------------------------------------------|
+ * | `pipe` | `df.pipe(fn)` / `s.pipe(fn)` chained |
+ * | `seriesApply` | `s.apply(fn)` |
+ * | `seriesTransform` | `s.transform(fn)` (scalarβscalar variant) |
+ * | `dataFrameApply` | `df.apply(fn, axis=0\|1)` |
+ * | `dataFrameApplyMap` | `df.applymap(fn)` / `df.map(fn)` (β₯2.1) |
+ * | `dataFrameTransform` | `df.transform(fn)` |
+ *
+ * All functions are **pure** β inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ pipe βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Pass `value` through a sequence of unary functions left-to-right.
+ *
+ * Each function receives the output of the previous one. The overloads
+ * preserve precise return types at each step up to 8 functions deep; beyond
+ * that the return type widens to `unknown`.
+ *
+ * ```ts
+ * const result = pipe(
+ * df,
+ * (d) => d.dropna(),
+ * (d) => d.assign({ z: d.col("x").add(d.col("y")).values }),
+ * (d) => d.head(10),
+ * );
+ * ```
+ *
+ * Mirrors `pandas.DataFrame.pipe(fn)` / `pandas.Series.pipe(fn)` chaining,
+ * but works on **any** value β not just DataFrames and Series.
+ */
+export function pipe(value: A): A;
+export function pipe (value: A, fn1: (a: A) => B): B;
+export function pipe (value: A, fn1: (a: A) => B, fn2: (b: B) => C): C;
+export function pipe (
+ value: A,
+ fn1: (a: A) => B,
+ fn2: (b: B) => C,
+ fn3: (c: C) => D,
+): D;
+export function pipe (
+ value: A,
+ fn1: (a: A) => B,
+ fn2: (b: B) => C,
+ fn3: (c: C) => D,
+ fn4: (d: D) => E,
+): E;
+export function pipe (
+ value: A,
+ fn1: (a: A) => B,
+ fn2: (b: B) => C,
+ fn3: (c: C) => D,
+ fn4: (d: D) => E,
+ fn5: (e: E) => F,
+): F;
+export function pipe (
+ value: A,
+ fn1: (a: A) => B,
+ fn2: (b: B) => C,
+ fn3: (c: C) => D,
+ fn4: (d: D) => E,
+ fn5: (e: E) => F,
+ fn6: (f: F) => G,
+): G;
+export function pipe (
+ value: A,
+ fn1: (a: A) => B,
+ fn2: (b: B) => C,
+ fn3: (c: C) => D,
+ fn4: (d: D) => E,
+ fn5: (e: E) => F,
+ fn6: (f: F) => G,
+ fn7: (g: G) => H,
+): H;
+// Implementation (untyped fallback for arbitrarily long pipelines)
+export function pipe(value: unknown, ...fns: ReadonlyArray<(x: unknown) => unknown>): unknown {
+ let acc = value;
+ for (const fn of fns) {
+ acc = fn(acc);
+ }
+ return acc;
+}
+
+// βββ Series apply βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Apply `fn` to every element of `series`, returning a new `Series`.
+ *
+ * `fn` receives `(value, label, position)`:
+ * - `value` β the raw element
+ * - `label` β the index label at this position
+ * - `position` β the zero-based integer position
+ *
+ * Mirrors `pandas.Series.apply(func)`.
+ *
+ * ```ts
+ * const doubled = seriesApply(s, (v) => typeof v === "number" ? v * 2 : v);
+ * ```
+ */
+export function seriesApply(
+ series: Series,
+ fn: (value: Scalar, label: Label, position: number) => Scalar,
+): Series {
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = fn(series.iat(i), series.index.at(i), i);
+ }
+ return new Series({ data: out, index: series.index, ...(series.name !== null ? { name: series.name } : {}) });
+}
+
+/**
+ * Apply a scalar-to-scalar `fn` to every element and return a new Series.
+ *
+ * Unlike {@link seriesApply}, `fn` only receives the value β no label or
+ * position. This matches the most common pandas `s.apply(lambda x: β¦)` usage.
+ *
+ * ```ts
+ * const capped = seriesTransform(s, (v) => typeof v === "number" ? Math.min(v, 100) : v);
+ * ```
+ */
+export function seriesTransform(
+ series: Series,
+ fn: (value: Scalar) => Scalar,
+): Series {
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = fn(series.iat(i));
+ }
+ return new Series({ data: out, index: series.index, ...(series.name !== null ? { name: series.name } : {}) });
+}
+
+// βββ DataFrame apply ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Apply `fn` to each column or row of `df`, aggregating to a scalar per column/row.
+ *
+ * - **axis = 0** (default): `fn` receives each column `Series`; results are
+ * indexed by column names, mirroring `df.apply(fn, axis=0)`.
+ * - **axis = 1**: `fn` receives each row as a `Series` indexed by column names;
+ * results are indexed by the DataFrame's row labels, mirroring `df.apply(fn, axis=1)`.
+ *
+ * ```ts
+ * // max value in each column
+ * const colMax = dataFrameApply(df, (s) => s.max() ?? null);
+ *
+ * // sum across each row
+ * const rowSums = dataFrameApply(df, (s) => s.sum(), 1);
+ * ```
+ */
+export function dataFrameApply(
+ df: DataFrame,
+ fn: (s: Series, label: string | Label) => Scalar,
+ axis: 0 | 1 = 0,
+): Series {
+ if (axis === 0) {
+ const colNames = df.columns.values as readonly string[];
+ const out: Scalar[] = colNames.map((name) => fn(df.col(name), name));
+ return new Series({ data: out, index: df.columns });
+ }
+ // axis === 1
+ const colNames = df.columns.values as readonly string[];
+ const n = df.index.size;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ const rowData: Scalar[] = colNames.map((name) => df.col(name).iat(i));
+ const rowSeries = new Series({ data: rowData, index: [...colNames] });
+ out[i] = fn(rowSeries, df.index.at(i));
+ }
+ return new Series({ data: out, index: df.index });
+}
+
+/**
+ * Apply `fn` to every element of `df`, returning a new DataFrame with the same
+ * shape (same index, same columns).
+ *
+ * `fn` receives `(value, rowLabel, columnName)`:
+ * - `value` β the scalar at `(row, col)`
+ * - `rowLabel` β the row index label
+ * - `columnName` β the column name
+ *
+ * Mirrors `pandas.DataFrame.applymap(fn)` (renamed `DataFrame.map` in pandas 2.1).
+ *
+ * ```ts
+ * // zero-out negatives
+ * const clamped = dataFrameApplyMap(df, (v) => typeof v === "number" && v < 0 ? 0 : v);
+ * ```
+ */
+export function dataFrameApplyMap(
+ df: DataFrame,
+ fn: (value: Scalar, rowLabel: Label, colName: string) => Scalar,
+): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const rowLabels = df.index.values as readonly Label[];
+ const n = rowLabels.length;
+ const newData: Record = {};
+ for (const colName of colNames) {
+ const col = df.col(colName);
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = fn(col.iat(i), rowLabels[i] as Label, colName);
+ }
+ newData[colName] = out;
+ }
+ return DataFrame.fromColumns(newData, { index: df.index });
+}
+
+/**
+ * Apply `fn` to each column of `df`, replacing each column with the transformed
+ * Series. Returns a new DataFrame with the same index and column names.
+ *
+ * `fn` receives a column `Series` and must return a `Series` of
+ * the **same length**.
+ *
+ * Mirrors `pandas.DataFrame.transform(fn)` (column-wise variant).
+ *
+ * ```ts
+ * // z-score normalise every column
+ * const normed = dataFrameTransform(df, (col) => {
+ * const mu = col.mean();
+ * const sd = col.std();
+ * return seriesTransform(col, (v) => typeof v === "number" ? (v - mu) / sd : v);
+ * });
+ * ```
+ */
+export function dataFrameTransform(
+ df: DataFrame,
+ fn: (col: Series, colName: string) => Series,
+): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const newData: Record = {};
+ for (const colName of colNames) {
+ const transformed = fn(df.col(colName), colName);
+ if (transformed.size !== df.index.size) {
+ throw new RangeError(
+ `dataFrameTransform: column "${colName}" β transform returned ${transformed.size} rows, expected ${df.index.size}`,
+ );
+ }
+ newData[colName] = transformed.values;
+ }
+ return DataFrame.fromColumns(newData, { index: df.index });
+}
+
+/**
+ * Apply `fn` to each row of `df`, replacing each row with the transformed
+ * record. Returns a new DataFrame with the same index and column names.
+ *
+ * `fn` receives an object `{ colName: value, β¦ }` for the row and must return
+ * a partial or full record of the same columns. Missing keys keep their
+ * original value; extra keys are ignored.
+ *
+ * ```ts
+ * // negate every value in each row
+ * const neg = dataFrameTransformRows(df, (row) =>
+ * Object.fromEntries(Object.entries(row).map(([k, v]) => [k, typeof v === "number" ? -v : v]))
+ * );
+ * ```
+ */
+export function dataFrameTransformRows(
+ df: DataFrame,
+ fn: (row: Readonly>, rowLabel: Label, position: number) => Readonly>,
+): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const rowLabels = df.index.values as readonly Label[];
+ const n = rowLabels.length;
+ // build output arrays per column
+ const colArrays = new Map();
+ for (const c of colNames) {
+ colArrays.set(c, new Array(n));
+ }
+ for (let i = 0; i < n; i++) {
+ const rowIn: Record = {};
+ for (const c of colNames) {
+ rowIn[c] = df.col(c).iat(i);
+ }
+ const rowOut = fn(rowIn, rowLabels[i] as Label, i);
+ for (const c of colNames) {
+ const colArr = colArrays.get(c);
+ if (colArr === undefined) continue;
+ // use the transformed value if present, else keep original
+ colArr[i] = c in rowOut ? (rowOut[c] as Scalar) : rowIn[c];
+ }
+ }
+ const newData: Record = {};
+ for (const c of colNames) {
+ newData[c] = colArrays.get(c) as Scalar[];
+ }
+ return DataFrame.fromColumns(newData, { index: df.index });
+}
diff --git a/src/core/to_from_dict.ts b/src/core/to_from_dict.ts
new file mode 100644
index 00000000..975a7fc5
--- /dev/null
+++ b/src/core/to_from_dict.ts
@@ -0,0 +1,283 @@
+/**
+ * to_from_dict β DataFrame β dictionary conversions with orient support.
+ *
+ * Mirrors `pandas.DataFrame.to_dict(orient=...)` and
+ * `pandas.DataFrame.from_dict(data, orient=...)`.
+ *
+ * Supported `orient` values for {@link toDictOriented}:
+ * - `"dict"` / `"columns"` β `{col: {rowLabel: value}}`
+ * - `"list"` β `{col: [values]}`
+ * - `"series"` β `{col: Series}`
+ * - `"split"` β `{index, columns, data}`
+ * - `"tight"` β like `"split"` plus `index_names` and `column_names`
+ * - `"records"` β `[{col: value, ...}, ...]`
+ * - `"index"` β `{rowLabel: {col: value}}`
+ *
+ * Supported `orient` values for {@link fromDictOriented}:
+ * - `"columns"` β `{col: [values]}` (default)
+ * - `"index"` β `{rowLabel: {col: value}}`
+ * - `"split"` β `{index?, columns, data}`
+ * - `"tight"` β `{index?, columns, data, index_names?, column_names?}`
+ *
+ * @module
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// βββ public types ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Orient values supported by {@link toDictOriented}. */
+export type ToDictOrient = "dict" | "columns" | "list" | "series" | "split" | "tight" | "records" | "index";
+
+/** Orient values supported by {@link fromDictOriented}. */
+export type FromDictOrient = "columns" | "index" | "split" | "tight";
+
+/** Result shape for `orient = "split"`. */
+export interface DictSplit {
+ readonly index: Label[];
+ readonly columns: string[];
+ readonly data: Scalar[][];
+}
+
+/** Result shape for `orient = "tight"`. */
+export interface DictTight extends DictSplit {
+ readonly index_names: (string | null)[];
+ readonly column_names: (string | null)[];
+}
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Convert a row label to a string key. */
+function labelKey(label: Label): string {
+ if (label === null || label === undefined) {
+ return "null";
+ }
+ return String(label);
+}
+
+/** True when an array of labels is the default 0β¦n-1 RangeIndex. */
+function isDefaultRange(labels: readonly Label[]): boolean {
+ for (let i = 0; i < labels.length; i++) {
+ if (labels[i] !== i) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// βββ toDictOriented βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Convert a DataFrame to a dictionary using the given `orient`.
+ *
+ * Mirrors `pandas.DataFrame.to_dict(orient, ...)`.
+ *
+ * @param df Source DataFrame.
+ * @param orient Output structure. Defaults to `"dict"`.
+ */
+export function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record>;
+export function toDictOriented(df: DataFrame, orient: "list"): Record;
+export function toDictOriented(df: DataFrame, orient: "series"): Record>;
+export function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+export function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+export function toDictOriented(df: DataFrame, orient: "records"): Record[];
+export function toDictOriented(df: DataFrame, orient: "index"): Record>;
+export function toDictOriented(
+ df: DataFrame,
+ orient: ToDictOrient = "dict",
+): Record | unknown[] {
+ const colNames = [...df.columns.values];
+ const rowLabels = [...(df.index.values as Label[])];
+ const nRows = df.index.size;
+
+ switch (orient) {
+ case "dict":
+ case "columns": {
+ const result: Record> = {};
+ for (const col of colNames) {
+ const series = df.col(col);
+ const colObj: Record = {};
+ for (let i = 0; i < nRows; i++) {
+ const lbl = rowLabels[i];
+ const key = labelKey(lbl !== undefined ? lbl : null);
+ colObj[key] = (series.values[i] ?? null) as Scalar;
+ }
+ result[col] = colObj;
+ }
+ return result;
+ }
+
+ case "list": {
+ const result: Record = {};
+ for (const col of colNames) {
+ result[col] = [...(df.col(col).values as readonly Scalar[])];
+ }
+ return result;
+ }
+
+ case "series": {
+ const result: Record> = {};
+ for (const col of colNames) {
+ result[col] = df.col(col);
+ }
+ return result;
+ }
+
+ case "split": {
+ const data: Scalar[][] = [];
+ for (let i = 0; i < nRows; i++) {
+ const row: Scalar[] = colNames.map((col) => (df.col(col).values[i] ?? null) as Scalar);
+ data.push(row);
+ }
+ return { index: rowLabels, columns: colNames, data } satisfies DictSplit;
+ }
+
+ case "tight": {
+ const data: Scalar[][] = [];
+ for (let i = 0; i < nRows; i++) {
+ const row: Scalar[] = colNames.map((col) => (df.col(col).values[i] ?? null) as Scalar);
+ data.push(row);
+ }
+ return {
+ index: rowLabels,
+ columns: colNames,
+ data,
+ index_names: [null],
+ column_names: [null],
+ } satisfies DictTight;
+ }
+
+ case "records": {
+ return df.toRecords();
+ }
+
+ case "index": {
+ const result: Record> = {};
+ for (let i = 0; i < nRows; i++) {
+ const lbl = rowLabels[i];
+ const key = labelKey(lbl !== undefined ? lbl : null);
+ const rowObj: Record = {};
+ for (const col of colNames) {
+ rowObj[col] = (df.col(col).values[i] ?? null) as Scalar;
+ }
+ result[key] = rowObj;
+ }
+ return result;
+ }
+
+ default: {
+ const exhaustive: never = orient;
+ throw new RangeError(`Unknown orient: ${String(exhaustive)}`);
+ }
+ }
+}
+
+// βββ fromDictOriented βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Input type for `orient = "split"` / `"tight"`. */
+export interface SplitInput {
+ readonly index?: readonly Label[];
+ readonly columns: readonly string[];
+ readonly data: readonly (readonly Scalar[])[];
+}
+
+/**
+ * Construct a DataFrame from a dictionary using the given `orient`.
+ *
+ * Mirrors `pandas.DataFrame.from_dict(data, orient=...)`.
+ *
+ * @param data Input dictionary (shape depends on `orient`).
+ * @param orient How `data` is structured. Defaults to `"columns"`.
+ */
+export function fromDictOriented(
+ data: Readonly>,
+ orient?: "columns",
+): DataFrame;
+export function fromDictOriented(
+ data: Readonly>>>,
+ orient: "index",
+): DataFrame;
+export function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+export function fromDictOriented(
+ data: unknown,
+ orient: FromDictOrient = "columns",
+): DataFrame {
+ switch (orient) {
+ case "columns": {
+ const colsData = data as Record;
+ return DataFrame.fromColumns(colsData as Record);
+ }
+
+ case "index": {
+ const indexData = data as Record>;
+ const rowLabels = Object.keys(indexData);
+ // Collect all column names in insertion order
+ const colSet = new Map();
+ for (const rowLabel of rowLabels) {
+ const rowObj = indexData[rowLabel];
+ if (rowObj !== undefined) {
+ for (const col of Object.keys(rowObj)) {
+ colSet.set(col, null);
+ }
+ }
+ }
+ const colNames = [...colSet.keys()];
+ const colArrays: Record = {};
+ for (const col of colNames) {
+ colArrays[col] = [];
+ }
+ for (const rowLabel of rowLabels) {
+ const rowObj = indexData[rowLabel] ?? {};
+ for (const col of colNames) {
+ const arr = colArrays[col];
+ if (arr !== undefined) {
+ arr.push(rowObj[col] ?? null);
+ }
+ }
+ }
+ const idx = new Index(rowLabels as Label[]);
+ return DataFrame.fromColumns(colArrays as Record, { index: idx });
+ }
+
+ case "split":
+ case "tight": {
+ return buildFromSplit(data as SplitInput);
+ }
+
+ default: {
+ const exhaustive: never = orient;
+ throw new RangeError(`Unknown orient: ${String(exhaustive)}`);
+ }
+ }
+}
+
+// βββ internal helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Build a DataFrame from a split/tight structure. */
+function buildFromSplit(input: SplitInput): DataFrame {
+ const { columns, data } = input;
+ const colArrays: Record = {};
+ for (const col of columns) {
+ colArrays[col] = [];
+ }
+ for (const row of data) {
+ for (let j = 0; j < columns.length; j++) {
+ const col = columns[j];
+ const arr = colArrays[col];
+ if (col !== undefined && arr !== undefined) {
+ arr.push(row[j] ?? null);
+ }
+ }
+ }
+
+ // Determine the row index
+ if (input.index !== undefined && !isDefaultRange(input.index)) {
+ const idx = new Index(input.index as Label[]);
+ return DataFrame.fromColumns(colArrays as Record, { index: idx });
+ }
+
+ return DataFrame.fromColumns(colArrays as Record);
+}
diff --git a/src/index.ts b/src/index.ts
index 1dd0aa57..ab2dbcdc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -61,6 +61,13 @@ export type { ExpandingOptions, ExpandingSeriesLike } from "./window/index.ts";
export { DataFrameExpanding } from "./core/index.ts";
export { EWM } from "./window/index.ts";
export type { EwmOptions, EwmSeriesLike } from "./window/index.ts";
+export {
+ rollingApply,
+ rollingAgg,
+ dataFrameRollingApply,
+ dataFrameRollingAgg,
+} from "./window/index.ts";
+export type { RollingApplyOptions, RollingAggOptions, AggFunctions } from "./window/index.ts";
export { DataFrameEwm } from "./core/index.ts";
export { CategoricalAccessor } from "./core/index.ts";
export type { CatSeriesLike } from "./core/index.ts";
@@ -107,3 +114,165 @@ export {
export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts";
export { valueCounts, dataFrameValueCounts } from "./stats/index.ts";
export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts";
+
+export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./core/index.ts";
+export type { PopResult } from "./core/index.ts";
+export { toDictOriented, fromDictOriented } from "./core/index.ts";
+export type { ToDictOrient, FromDictOrient, DictSplit, DictTight, SplitInput } from "./core/index.ts";
+export { wideToLong } from "./reshape/index.ts";
+export type { WideToLongOptions } from "./reshape/index.ts";
+export { cut, qcut } from "./stats/index.ts";
+export type { BinResult, CutOptions, QCutOptions } from "./stats/index.ts";
+export { rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "./stats/index.ts";
+export type { WindowExtOptions, RollingQuantileOptions } from "./stats/index.ts";
+export { seriesWhere, seriesMask, dataFrameWhere, dataFrameMask } from "./stats/index.ts";
+export type {
+ SeriesCond,
+ DataFrameCond,
+ SeriesWhereOptions,
+ DataFrameWhereOptions,
+} from "./stats/index.ts";
+export { isna, notna, isnull, notnull, fillna, dropna, countna, countValid } from "./stats/index.ts";
+export type { IsnaInput, FillnaOptions, DropnaOptions } from "./stats/index.ts";
+export {
+ getAttrs,
+ setAttrs,
+ updateAttrs,
+ copyAttrs,
+ withAttrs,
+ clearAttrs,
+ hasAttrs,
+ getAttr,
+ setAttr,
+ deleteAttr,
+ attrsCount,
+ attrsKeys,
+ mergeAttrs,
+} from "./core/index.ts";
+export type { Attrs } from "./core/index.ts";
+export {
+ pipe,
+ seriesApply,
+ seriesTransform,
+ dataFrameApply,
+ dataFrameApplyMap,
+ dataFrameTransform,
+ dataFrameTransformRows,
+} from "./core/index.ts";
+export {
+ isScalar,
+ isListLike,
+ isArrayLike,
+ isDictLike,
+ isIterator,
+ isNumber,
+ isBool,
+ isStringValue,
+ isFloat,
+ isInteger,
+ isBigInt,
+ isRegExp,
+ isReCompilable,
+ isMissing,
+ isHashable,
+ isDate,
+ isNumericDtype,
+ isIntegerDtype,
+ isSignedIntegerDtype,
+ isUnsignedIntegerDtype,
+ isFloatDtype,
+ isBoolDtype,
+ isStringDtype,
+ isDatetimeDtype,
+ isTimedeltaDtype,
+ isCategoricalDtype,
+ isObjectDtype,
+ isComplexDtype,
+ isExtensionArrayDtype,
+ isPeriodDtype,
+ isIntervalDtype,
+} from "./core/index.ts";
+export {
+ strNormalize,
+ strGetDummies,
+ strExtractAll,
+ strRemovePrefix,
+ strRemoveSuffix,
+ strTranslate,
+ strCharWidth,
+ strByteLength,
+ strSplitExpand,
+ strExtractGroups,
+ strPartition,
+ strRPartition,
+ strMultiReplace,
+ strIndent,
+ strDedent,
+} from "./stats/index.ts";
+export type {
+ NormalizeForm,
+ StrInput,
+ GetDummiesOptions,
+ ExtractAllOptions,
+ SplitExpandOptions,
+ ExtractGroupsOptions,
+ PartitionResult,
+ ReplacePair,
+ IndentOptions,
+} from "./stats/index.ts";
+export {
+ digitize,
+ histogram,
+ linspace,
+ arange,
+ percentileOfScore,
+ zscore,
+ minMaxNormalize,
+ coefficientOfVariation,
+ seriesDigitize,
+} from "./stats/index.ts";
+export type {
+ HistogramOptions,
+ HistogramResult,
+ ZscoreOptions,
+ MinMaxOptions,
+ CvOptions,
+} from "./stats/index.ts";
+export {
+ catFromCodes,
+ catUnionCategories,
+ catIntersectCategories,
+ catDiffCategories,
+ catEqualCategories,
+ catSortByFreq,
+ catToOrdinal,
+ catFreqTable,
+ catCrossTab,
+ catRecode,
+} from "./stats/index.ts";
+export type {
+ CatFromCodesOptions,
+ CatSortByFreqOptions,
+ CatCrossTabOptions,
+} from "./stats/index.ts";
+export {
+ formatFloat,
+ formatPercent,
+ formatScientific,
+ formatEngineering,
+ formatThousands,
+ formatCurrency,
+ formatCompact,
+ makeFloatFormatter,
+ makePercentFormatter,
+ makeCurrencyFormatter,
+ applySeriesFormatter,
+ applyDataFrameFormatter,
+ seriesToString,
+ dataFrameToString,
+} from "./stats/index.ts";
+export type {
+ Formatter,
+ SeriesToStringOptions,
+ DataFrameToStringOptions,
+} from "./stats/index.ts";
diff --git a/src/reshape/index.ts b/src/reshape/index.ts
index f15320ca..849c435d 100644
--- a/src/reshape/index.ts
+++ b/src/reshape/index.ts
@@ -10,3 +10,5 @@ export { pivot, pivotTable } from "./pivot.ts";
export type { PivotOptions, PivotTableOptions, AggFuncName } from "./pivot.ts";
export { stack, unstack, STACK_DEFAULT_SEP } from "./stack_unstack.ts";
export type { StackOptions, UnstackOptions } from "./stack_unstack.ts";
+export { wideToLong } from "./wide_to_long.ts";
+export type { WideToLongOptions } from "./wide_to_long.ts";
diff --git a/src/reshape/wide_to_long.ts b/src/reshape/wide_to_long.ts
new file mode 100644
index 00000000..7ac62ba8
--- /dev/null
+++ b/src/reshape/wide_to_long.ts
@@ -0,0 +1,217 @@
+/**
+ * wide_to_long β reshape a wide DataFrame to a long format by collapsing
+ * stub-prefixed column groups into rows.
+ *
+ * Mirrors `pandas.wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+')`.
+ *
+ * Given a DataFrame whose columns include groups like
+ * `"A1"`, `"A2"`, `"B1"`, `"B2"` (stubs `["A","B"]`, separator `""`, suffix `\\d+`),
+ * this function pivots those groups into long format where each unique suffix
+ * value becomes a new row:
+ *
+ * ```
+ * id num A B
+ * x 1 1 5
+ * x 2 3 7
+ * y 1 2 6
+ * y 2 4 8
+ * ```
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { wideToLong } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * id: ["x", "y"],
+ * A1: [1, 2],
+ * A2: [3, 4],
+ * B1: [5, 6],
+ * B2: [7, 8],
+ * });
+ *
+ * const long = wideToLong(df, ["A", "B"], "id", "num");
+ * // long.columns.values β ["id", "num", "A", "B"]
+ * // long.shape β [4, 4]
+ * ```
+ *
+ * @module
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import { Index } from "../core/base-index.ts";
+import { DataFrame } from "../core/frame.ts";
+import { RangeIndex } from "../core/range-index.ts";
+
+// βββ public types ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link wideToLong}. */
+export interface WideToLongOptions {
+ /**
+ * Separator between stub name and suffix in column names.
+ * Defaults to `""` (no separator).
+ * @example `sep: "_"` matches columns like `"value_2021"`, `"value_2022"`.
+ */
+ readonly sep?: string;
+ /**
+ * Regular expression (as a string) that the suffix must match.
+ * Defaults to `"\\d+"` (one or more digits).
+ * @example `suffix: "[a-z]+"` matches alphabetic suffixes.
+ */
+ readonly suffix?: string;
+}
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Normalise a string-or-string-array option to `string[]`. */
+function toStringArray(x: readonly string[] | string): string[] {
+ return typeof x === "string" ? [x] : [...x];
+}
+
+/**
+ * Collect the unique suffix values that appear in the DataFrame column names
+ * for the given stubs, separator, and suffix regex.
+ *
+ * Returns suffixes in the order they first appear (scanning columns left to right).
+ */
+function collectSuffixes(
+ colNames: readonly string[],
+ stubs: readonly string[],
+ sep: string,
+ suffixRe: RegExp,
+): string[] {
+ const seen = new Map(); // suffix β first-seen position
+ for (const col of colNames) {
+ for (const stub of stubs) {
+ const prefix = stub + sep;
+ if (col.startsWith(prefix)) {
+ const rest = col.slice(prefix.length);
+ const m = rest.match(suffixRe);
+ if (m !== null && m[0] === rest) {
+ const pos = seen.size;
+ if (!seen.has(rest)) {
+ seen.set(rest, pos);
+ }
+ }
+ }
+ }
+ }
+ return [...seen.keys()].sort((a, b) => {
+ // Sort numerically when both look like integers, otherwise lexicographically.
+ const na = Number(a);
+ const nb = Number(b);
+ if (!Number.isNaN(na) && !Number.isNaN(nb)) {
+ return na - nb;
+ }
+ return a < b ? -1 : a > b ? 1 : 0;
+ });
+}
+
+// βββ wideToLong βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Reshape a wide-format DataFrame to long format by collapsing stub-prefixed
+ * column groups into rows.
+ *
+ * Mirrors `pandas.wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+')`.
+ *
+ * @param df Source DataFrame (not mutated).
+ * @param stubnames Stub name(s) that prefix the wide columns (e.g. `["A", "B"]`).
+ * @param i Column name(s) to use as id variables (kept for every row).
+ * @param j Name of the new column that will hold the suffix value.
+ * @param options Optional `sep` and `suffix` overrides.
+ * @returns A new long-format DataFrame.
+ *
+ * @throws {RangeError} if any `i` column does not exist in `df`.
+ * @throws {RangeError} if `j` conflicts with an existing non-stub column name.
+ */
+export function wideToLong(
+ df: DataFrame,
+ stubnames: readonly string[] | string,
+ i: readonly string[] | string,
+ j: string,
+ options: WideToLongOptions = {},
+): DataFrame {
+ const stubs = toStringArray(stubnames);
+ const idCols = toStringArray(i);
+ const sep = options.sep ?? "";
+ const suffixPattern = options.suffix ?? "\\d+";
+ const suffixRe = new RegExp(`^(?:${suffixPattern})$`);
+
+ // Validate id columns exist.
+ for (const col of idCols) {
+ if (!df.has(col)) {
+ throw new RangeError(`id column "${col}" does not exist in DataFrame.`);
+ }
+ }
+
+ // j must not conflict with a non-stub, non-id column.
+ const colNames = [...df.columns.values];
+ const stubSet = new Set(stubs);
+ for (const col of colNames) {
+ if (col === j && !stubSet.has(col) && !idCols.includes(col)) {
+ throw new RangeError(`Column name "${j}" conflicts with existing column.`);
+ }
+ }
+
+ // Collect ordered suffix values.
+ const suffixes = collectSuffixes(colNames, stubs, sep, suffixRe);
+
+ const nRows = df.index.size;
+
+ // Build output column arrays.
+ const idArrays: Record = {};
+ for (const col of idCols) {
+ idArrays[col] = [];
+ }
+ const jArray: Scalar[] = [];
+ const stubArrays: Record = {};
+ for (const stub of stubs) {
+ stubArrays[stub] = [];
+ }
+
+ // Coerce suffix to number if possible (for the j-column values).
+ function coerceSuffix(s: string): Scalar {
+ const n = Number(s);
+ return Number.isNaN(n) ? s : n;
+ }
+
+ for (const suffix of suffixes) {
+ for (let row = 0; row < nRows; row++) {
+ // Append id column values.
+ for (const col of idCols) {
+ const arr = idArrays[col];
+ if (arr !== undefined) {
+ arr.push((df.col(col).values[row] ?? null) as Scalar);
+ }
+ }
+ // Append j value.
+ jArray.push(coerceSuffix(suffix));
+ // Append stub values.
+ for (const stub of stubs) {
+ const wideColName = stub + sep + suffix;
+ const arr = stubArrays[stub];
+ if (arr !== undefined) {
+ const wideCol = df.get(wideColName);
+ const val: Scalar = wideCol !== undefined ? ((wideCol.values[row] ?? null) as Scalar) : null;
+ arr.push(val);
+ }
+ }
+ }
+ }
+
+ // Assemble output DataFrame column map.
+ const outData: Record = {};
+ for (const col of idCols) {
+ outData[col] = idArrays[col] ?? [];
+ }
+ outData[j] = jArray;
+ for (const stub of stubs) {
+ outData[stub] = stubArrays[stub] ?? [];
+ }
+
+ const totalRows = nRows * suffixes.length;
+ const rowIndex = new RangeIndex(totalRows) as unknown as Index;
+
+ return DataFrame.fromColumns(outData as Record, { index: rowIndex });
+}
diff --git a/src/stats/categorical_ops.ts b/src/stats/categorical_ops.ts
new file mode 100644
index 00000000..f9abbb0d
--- /dev/null
+++ b/src/stats/categorical_ops.ts
@@ -0,0 +1,483 @@
+/**
+ * categorical_ops β standalone categorical utility functions.
+ *
+ * Mirrors pandas' `pd.Categorical`, `pd.Categorical.from_codes`, and related
+ * top-level helpers that operate on categorical data without requiring a method
+ * call on an existing `CategoricalAccessor`.
+ *
+ * All functions return a `CatSeriesLike` (or plain data) and are **pure** β
+ * inputs are never mutated.
+ *
+ * ### Included functions
+ *
+ * | Function | Pandas equivalent |
+ * |----------|-------------------|
+ * | `catFromCodes` | `pd.Categorical.from_codes` |
+ * | `catUnionCategories` | `a.cat.set_categories(union(...))` pattern |
+ * | `catIntersectCategories` | `a.cat.set_categories(intersect(...))` |
+ * | `catDiffCategories` | `a.cat.remove_categories(b_cats)` pattern |
+ * | `catEqualCategories` | compare `.cat.categories` sets |
+ * | `catSortByFreq` | `a.cat.reorder_categories(sorted_by_freq)` |
+ * | `catToOrdinal` | `pd.Categorical(values, categories=order, ordered=True)` |
+ * | `catFreqTable` | `a.value_counts(sort=False)` on categorical |
+ * | `catCrossTab` | reduced `pd.crosstab` for two categorical Series |
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { CatSeriesLike } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ public option types βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link catFromCodes}. */
+export interface CatFromCodesOptions {
+ /** Whether the resulting categorical is ordered. Default `false`. */
+ ordered?: boolean;
+ /** Series name for the result. */
+ name?: string | null;
+}
+
+/** Options for {@link catSortByFreq}. */
+export interface CatSortByFreqOptions {
+ /** If `true`, least frequent categories come first. Default `false` (most frequent first). */
+ ascending?: boolean;
+}
+
+/** Options for {@link catCrossTab}. */
+export interface CatCrossTabOptions {
+ /** If `true`, include a row and column of totals. Default `false`. */
+ margins?: boolean;
+ /** Label used for the margins row/column. Default `"All"`. */
+ marginsName?: string;
+ /** If `true`, normalize counts (divide by total). Default `false`. */
+ normalize?: boolean;
+}
+
+// βββ internal helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Return true when value should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Build a sorted unique key list preserving encounter order (for cats). */
+function uniqueKeys(cats: readonly Scalar[]): Scalar[] {
+ const seen = new Set();
+ const result: Scalar[] = [];
+ for (const c of cats) {
+ const k = String(c);
+ if (!seen.has(k)) {
+ seen.add(k);
+ result.push(c);
+ }
+ }
+ return result;
+}
+
+// βββ catFromCodes βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Construct a categorical `Series` from integer codes and a categories array.
+ *
+ * Mirrors `pandas.Categorical.from_codes(codes, categories, ordered=False)`.
+ *
+ * - Codes are **0-based** indices into `categories`.
+ * - A code of `-1` maps to `null` (missing value), matching pandas `NaN`.
+ * - Any code outside `[-1, categories.length)` throws a `RangeError`.
+ *
+ * @param codes Integer codes (one per element).
+ * @param categories Array of category labels; the order defines ordinal rank.
+ * @param opts Optional settings (ordered, name).
+ * @returns A `CatSeriesLike` with the specified categories.
+ *
+ * @example
+ * ```ts
+ * const s = catFromCodes([0, 2, 1, -1, 0], ["a", "b", "c"]);
+ * s.cat.categories.values; // ["a", "b", "c"]
+ * s.toArray(); // ["a", "c", "b", null, "a"]
+ * ```
+ */
+export function catFromCodes(
+ codes: readonly number[],
+ categories: readonly Scalar[],
+ opts: CatFromCodesOptions = {},
+): CatSeriesLike {
+ const { ordered = false, name = null } = opts;
+ const cats = uniqueKeys(categories);
+ const values: Scalar[] = codes.map((code) => {
+ if (code === -1) return null;
+ if (code < -1 || code >= cats.length) {
+ throw new RangeError(
+ `catFromCodes: code ${code} is out of range [0, ${cats.length - 1}]`,
+ );
+ }
+ return cats[code] as Scalar;
+ });
+ const base = new Series({ data: values, name });
+ return base.cat.setCategories(cats, ordered);
+}
+
+// βββ catUnionCategories ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return a new `CatSeriesLike` with the same values as `a` but whose categories
+ * are the **union** of `a`'s and `b`'s categories.
+ *
+ * Categories from `b` that are not already in `a` are appended (in the order
+ * they appear in `b`). The ordering flag is taken from `a`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["y", "z"] }).cat.setCategories(["y", "z"]);
+ * catUnionCategories(a, b).cat.categories.values; // ["x", "y", "z"]
+ * ```
+ */
+export function catUnionCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+ const aCats = a.cat.categories.values as Scalar[];
+ const bCats = b.cat.categories.values as Scalar[];
+ const seen = new Set(aCats.map(String));
+ const merged = [...aCats];
+ for (const c of bCats) {
+ if (!seen.has(String(c))) {
+ seen.add(String(c));
+ merged.push(c);
+ }
+ }
+ return a.cat.setCategories(merged, a.cat.ordered);
+}
+
+// βββ catIntersectCategories βββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return a new `CatSeriesLike` with values from `a` whose categories are the
+ * **intersection** of `a`'s and `b`'s categories.
+ *
+ * Values whose category is not in the intersection are set to `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+ * const b = new Series({ data: ["y", "z"] }).cat.setCategories(["y", "z"]);
+ * const r = catIntersectCategories(a, b);
+ * r.cat.categories.values; // ["y", "z"]
+ * r.toArray(); // [null, "y", "z"]
+ * ```
+ */
+export function catIntersectCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+ const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+ const intersected = (a.cat.categories.values as Scalar[]).filter((c) =>
+ bSet.has(String(c)),
+ );
+ return a.cat.setCategories(intersected, a.cat.ordered);
+}
+
+// βββ catDiffCategories ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return a new `CatSeriesLike` with values from `a` whose categories are the
+ * **set difference** `a.categories β b.categories`.
+ *
+ * Values whose category is present in `b` are set to `null`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "y", "z"] }).cat.setCategories(["x", "y", "z"]);
+ * const b = new Series({ data: ["z"] }).cat.setCategories(["z"]);
+ * const r = catDiffCategories(a, b);
+ * r.cat.categories.values; // ["x", "y"]
+ * r.toArray(); // ["x", "y", null]
+ * ```
+ */
+export function catDiffCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike {
+ const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+ const remaining = (a.cat.categories.values as Scalar[]).filter(
+ (c) => !bSet.has(String(c)),
+ );
+ return a.cat.setCategories(remaining, a.cat.ordered);
+}
+
+// βββ catEqualCategories βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return `true` when `a` and `b` have exactly the same set of categories,
+ * ignoring order.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["y"] }).cat.setCategories(["y", "x"]);
+ * catEqualCategories(a, b); // true
+ * ```
+ */
+export function catEqualCategories(a: CatSeriesLike, b: CatSeriesLike): boolean {
+ const aSet = new Set((a.cat.categories.values as Scalar[]).map(String));
+ const bSet = new Set((b.cat.categories.values as Scalar[]).map(String));
+ if (aSet.size !== bSet.size) return false;
+ for (const c of aSet) {
+ if (!bSet.has(c)) return false;
+ }
+ return true;
+}
+
+// βββ catSortByFreq ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Reorder the categories of a categorical Series by their **frequency** in the
+ * data (most frequent first by default).
+ *
+ * Mirrors `series.cat.reorder_categories(series.value_counts().index)`.
+ *
+ * @param series The source categorical Series.
+ * @param opts `{ ascending: false }` β set `true` for rarest-first.
+ * @returns A new `CatSeriesLike` with categories sorted by frequency.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["b", "a", "b", "c", "b", "a"] })
+ * .cat.setCategories(["a", "b", "c"]);
+ * catSortByFreq(s).cat.categories.values; // ["b", "a", "c"]
+ * ```
+ */
+export function catSortByFreq(
+ series: CatSeriesLike,
+ opts: CatSortByFreqOptions = {},
+): CatSeriesLike {
+ const { ascending = false } = opts;
+ const cats = series.cat.categories.values as Scalar[];
+ const freq = new Map();
+ for (const c of cats) freq.set(String(c), 0);
+ for (const v of series.values) {
+ if (!isMissing(v)) {
+ const k = String(v);
+ const prev = freq.get(k);
+ if (prev !== undefined) freq.set(k, prev + 1);
+ }
+ }
+ const sorted = [...cats].sort((a, b) => {
+ const fa = freq.get(String(a)) ?? 0;
+ const fb = freq.get(String(b)) ?? 0;
+ return ascending ? fa - fb : fb - fa;
+ });
+ return series.cat.reorderCategories(sorted);
+}
+
+// βββ catToOrdinal βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Create an **ordered** categorical Series from `series` using `order` to
+ * define both the category set and their rank.
+ *
+ * Mirrors `pd.Categorical(series, categories=order, ordered=True)`.
+ *
+ * Values not present in `order` are set to `null`. The number of categories
+ * in the result equals `order.length`.
+ *
+ * @param series Source Series (any values).
+ * @param order Ordered list of category labels (low to high).
+ * @returns A new `CatSeriesLike` with `.cat.ordered === true`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["med", "low", "high", "med"] });
+ * const ord = catToOrdinal(s, ["low", "med", "high"]);
+ * ord.cat.ordered; // true
+ * ord.cat.categories.values; // ["low", "med", "high"]
+ * ```
+ */
+export function catToOrdinal(series: CatSeriesLike, order: readonly Scalar[]): CatSeriesLike {
+ return series.cat.setCategories(order, true);
+}
+
+// βββ catFreqTable βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return the frequency of each category as a plain `Record`.
+ *
+ * All defined categories are present in the result, even those with zero
+ * occurrences, matching `series.cat.value_counts()` semantics.
+ *
+ * Missing values are excluded from the count.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["b", "a", "b", null] })
+ * .cat.setCategories(["a", "b", "c"]);
+ * catFreqTable(s); // { a: 1, b: 2, c: 0 }
+ * ```
+ */
+export function catFreqTable(series: CatSeriesLike): Record {
+ const cats = series.cat.categories.values as Scalar[];
+ const freq: Record = {};
+ for (const c of cats) freq[String(c)] = 0;
+ for (const v of series.values) {
+ if (!isMissing(v)) {
+ const k = String(v);
+ if (Object.prototype.hasOwnProperty.call(freq, k)) {
+ (freq[k] as number) += 1;
+ }
+ }
+ }
+ return freq;
+}
+
+// βββ catCrossTab ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute a cross-tabulation of two categorical Series.
+ *
+ * Mirrors a simplified `pd.crosstab(a, b)` for categorical inputs:
+ * rows = `a`'s categories, columns = `b`'s categories, cells = co-occurrence
+ * counts. Only aligned positions (same integer index) are tallied; missing
+ * values in either Series skip the row.
+ *
+ * @param a First categorical Series (determines rows).
+ * @param b Second categorical Series (determines columns).
+ * @param opts `{ margins, marginsName, normalize }`.
+ * @returns A `DataFrame` of count (or proportion) values.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["x", "x", "y", "y"] }).cat.setCategories(["x", "y"]);
+ * const b = new Series({ data: ["p", "q", "p", "q"] }).cat.setCategories(["p", "q"]);
+ * const ct = catCrossTab(a, b);
+ * // DataFrame:
+ * // p q
+ * // x 1 1
+ * // y 1 1
+ * ```
+ */
+export function catCrossTab(
+ a: CatSeriesLike,
+ b: CatSeriesLike,
+ opts: CatCrossTabOptions = {},
+): DataFrame {
+ const { margins = false, marginsName = "All", normalize = false } = opts;
+
+ const rowCats = a.cat.categories.values as Label[];
+ const colCats = b.cat.categories.values as Label[];
+
+ // Build count matrix: rowCats Γ colCats
+ const counts = new Map>();
+ for (const r of rowCats) {
+ const row = new Map();
+ for (const c of colCats) row.set(String(c), 0);
+ counts.set(String(r), row);
+ }
+
+ const aVals = a.values;
+ const bVals = b.values;
+ const n = Math.min(aVals.length, bVals.length);
+ for (let i = 0; i < n; i++) {
+ const av = aVals[i];
+ const bv = bVals[i];
+ if (isMissing(av) || isMissing(bv)) continue;
+ const row = counts.get(String(av));
+ if (row === undefined) continue;
+ const prev = row.get(String(bv));
+ if (prev !== undefined) row.set(String(bv), prev + 1);
+ }
+
+ // Compute total for normalization
+ let total = 0;
+ if (normalize) {
+ for (const row of counts.values()) {
+ for (const v of row.values()) total += v;
+ }
+ }
+
+ // Build data columns: each colCat is a column, each rowCat is a row value
+ const data: Record = {};
+ for (const c of colCats) {
+ const col: Scalar[] = [];
+ for (const r of rowCats) {
+ const v = counts.get(String(r))?.get(String(c)) ?? 0;
+ col.push(normalize && total > 0 ? v / total : v);
+ }
+ data[String(c)] = col;
+ }
+
+ // Add margin column (row totals)
+ if (margins) {
+ const rowTotals: Scalar[] = rowCats.map((r) => {
+ let sum = 0;
+ const row = counts.get(String(r));
+ if (row) for (const v of row.values()) sum += v;
+ return normalize && total > 0 ? sum / total : sum;
+ });
+ data[marginsName] = rowTotals;
+ }
+
+ // Build DataFrame with row index = rowCats
+ const rowLabels: Label[] = [...rowCats];
+
+ // Add margin row (column totals)
+ if (margins) {
+ const allCols = [...colCats.map(String), marginsName];
+ let marginRowTotal = 0;
+ for (const c of colCats) {
+ let colSum = 0;
+ for (const r of rowCats) {
+ colSum += counts.get(String(r))?.get(String(c)) ?? 0;
+ }
+ const val = normalize && total > 0 ? colSum / total : colSum;
+ (data[String(c)] as Scalar[]).push(val);
+ marginRowTotal += normalize && total > 0 ? colSum / total : colSum;
+ }
+ if (margins) {
+ (data[marginsName] as Scalar[]).push(
+ normalize && total > 0 ? marginRowTotal : marginRowTotal,
+ );
+ }
+ rowLabels.push(marginsName as Label);
+ // Ensure all column arrays have the same length
+ for (const col of allCols) {
+ const arr = data[col];
+ if (arr === undefined) data[col] = rowLabels.map(() => 0);
+ }
+ }
+
+ return DataFrame.fromColumns(data, { index: rowLabels });
+}
+
+// βββ catRecode ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Rename categories of a categorical Series using a stringβstring map.
+ *
+ * Mirrors `series.cat.rename_categories(mapping)` but as a standalone function
+ * that also accepts a transform function.
+ *
+ * @param series The source categorical.
+ * @param mapping Either a `Record` (rename specified keys) or
+ * a `(label: string) => string` transform applied to every category.
+ * @returns A new `CatSeriesLike` with renamed categories.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["a", "b"] }).cat.setCategories(["a", "b", "c"]);
+ * catRecode(s, { a: "A", b: "B" }).cat.categories.values; // ["A", "B", "c"]
+ * catRecode(s, (x) => x.toUpperCase()).cat.categories.values; // ["A", "B", "C"]
+ * ```
+ */
+export function catRecode(
+ series: CatSeriesLike,
+ mapping: Record | ((label: string) => string),
+): CatSeriesLike {
+ if (typeof mapping === "function") {
+ return series.cat.renameCategories(
+ (series.cat.categories.values as Scalar[]).map((c) => mapping(String(c))),
+ );
+ }
+ const cats = series.cat.categories.values as Scalar[];
+ const newCats = cats.map((c): Scalar => {
+ const k = String(c);
+ return Object.prototype.hasOwnProperty.call(mapping, k)
+ ? (mapping[k] as string)
+ : c;
+ });
+ return series.cat.renameCategories(newCats);
+}
diff --git a/src/stats/cut_qcut.ts b/src/stats/cut_qcut.ts
new file mode 100644
index 00000000..d24b3dda
--- /dev/null
+++ b/src/stats/cut_qcut.ts
@@ -0,0 +1,383 @@
+/**
+ * cut / qcut β bin continuous data into discrete intervals.
+ *
+ * Mirrors `pandas.cut` and `pandas.qcut`:
+ *
+ * - {@link cut} β bin values into fixed-width or user-supplied bins.
+ * - {@link qcut} β bin values into quantile-based bins of equal population.
+ *
+ * Both functions return a {@link BinResult} describing the assigned bin for
+ * each input value, the ordered bin labels, the numeric bin edges, and (for
+ * `qcut`) the actual quantile edges used.
+ *
+ * @example
+ * ```ts
+ * import { cut, qcut } from "tsb";
+ *
+ * const result = cut([1, 2, 3, 4, 5], 2);
+ * result.codes; // [0, 0, 0, 1, 1]
+ * result.labels; // ["(1.0, 3.0]", "(3.0, 5.0]"]
+ * result.bins; // [1, 3, 5]
+ *
+ * const qr = qcut([1, 2, 3, 4, 5], 2);
+ * qr.codes; // [0, 0, 1, 1, 1] (median split)
+ * ```
+ *
+ * @module
+ */
+
+// βββ public types βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Result of {@link cut} or {@link qcut}.
+ *
+ * - `codes` β integer bin index for each input value (`null` for NaN / missing).
+ * - `labels` β ordered array of label strings (one per bin).
+ * - `bins` β numeric bin edge array (length = `labels.length + 1`).
+ */
+export interface BinResult {
+ /** Bin index for each input value. `null` when the value is NaN or out of range. */
+ readonly codes: ReadonlyArray;
+ /** Ordered bin labels. */
+ readonly labels: readonly string[];
+ /** Bin edge array: `bins[i]` to `bins[i+1]` is the i-th bin. */
+ readonly bins: readonly number[];
+}
+
+/** Options for {@link cut}. */
+export interface CutOptions {
+ /**
+ * Custom labels for the resulting bins.
+ * - Array of strings: one label per bin (length must equal number of bins).
+ * - `false`: return integer codes directly (labels will be `["0","1",...]`).
+ * - Omitted: auto-generate interval strings like `"(0.5, 1.5]"`.
+ */
+ readonly labels?: readonly string[] | false;
+ /**
+ * Whether intervals are closed on the right (default `true`).
+ * - `true` β `(a, b]`
+ * - `false` β `[a, b)`
+ */
+ readonly right?: boolean;
+ /**
+ * When `true`, the leftmost interval is closed on the left as well
+ * (default `false`). Only meaningful when `right` is `true`.
+ */
+ readonly include_lowest?: boolean;
+ /**
+ * Number of decimal places in auto-generated interval labels (default `3`).
+ */
+ readonly precision?: number;
+ /**
+ * How to handle duplicate bin edges generated from data (default `"raise"`).
+ * - `"raise"` β throw if duplicate edges are detected.
+ * - `"drop"` β silently remove duplicate edges.
+ *
+ * Only relevant when `bins` is an integer.
+ */
+ readonly duplicates?: "raise" | "drop";
+}
+
+/** Options for {@link qcut}. */
+export interface QCutOptions {
+ /**
+ * Custom labels (same semantics as {@link CutOptions.labels}).
+ */
+ readonly labels?: readonly string[] | false;
+ /**
+ * Number of decimal places in auto-generated interval labels (default `3`).
+ */
+ readonly precision?: number;
+ /**
+ * How to handle duplicate quantile edges (default `"raise"`).
+ */
+ readonly duplicates?: "raise" | "drop";
+}
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Format a numeric edge to at most `precision` decimal places. */
+function fmt(v: number, precision: number): string {
+ return v.toFixed(precision).replace(/\.?0+$/, "").replace(/^-0$/, "0");
+}
+
+/** Build interval label string from two edges. */
+function intervalLabel(lo: number, hi: number, right: boolean, precision: number): string {
+ const left = right ? "(" : "[";
+ const right_bracket = right ? "]" : ")";
+ return `${left}${fmt(lo, precision)}, ${fmt(hi, precision)}${right_bracket}`;
+}
+
+/** Compute the k-th quantile (0β1) of a sorted (non-NaN) array using linear interpolation. */
+function quantileOfSorted(sorted: readonly number[], q: number): number {
+ if (sorted.length === 0) return Number.NaN;
+ if (q <= 0) return sorted[0] as number;
+ if (q >= 1) return sorted[sorted.length - 1] as number;
+ const idx = q * (sorted.length - 1);
+ const lo = Math.floor(idx);
+ const hi = Math.ceil(idx);
+ if (lo === hi) return sorted[lo] as number;
+ const frac = idx - lo;
+ return (sorted[lo] as number) * (1 - frac) + (sorted[hi] as number) * frac;
+}
+
+/** Deduplicate a sorted numeric array, optionally raising on duplicates. */
+function deduplicateEdges(edges: number[], duplicates: "raise" | "drop"): number[] {
+ const deduped: number[] = [edges[0] as number];
+ for (let i = 1; i < edges.length; i++) {
+ if ((edges[i] as number) === deduped[deduped.length - 1]) {
+ if (duplicates === "raise") {
+ throw new Error(
+ `Duplicate bin edge ${edges[i]}. Pass duplicates="drop" to silently remove duplicates.`,
+ );
+ }
+ // drop duplicate β skip
+ } else {
+ deduped.push(edges[i] as number);
+ }
+ }
+ return deduped;
+}
+
+/** Assign each value to a bin index given sorted bin edges. */
+function assignBins(
+ values: readonly number[],
+ edges: readonly number[],
+ right: boolean,
+ include_lowest: boolean,
+): Array {
+ const n = edges.length - 1; // number of bins
+ return values.map((v) => {
+ if (!Number.isFinite(v) || Number.isNaN(v)) return null;
+ // Binary search for the bin
+ let lo = 0;
+ let hi = n - 1;
+ while (lo < hi) {
+ const mid = (lo + hi) >> 1;
+ const binHi = edges[mid + 1] as number;
+ const binLo = edges[mid] as number;
+ if (right) {
+ // (binLo, binHi]
+ if (v <= binHi) hi = mid;
+ else lo = mid + 1;
+ } else {
+ // [binLo, binHi)
+ if (v < binHi) hi = mid;
+ else lo = mid + 1;
+ }
+ }
+ // Validate the found bin
+ const binLo = edges[lo] as number;
+ const binHi = edges[lo + 1] as number;
+ if (right) {
+ // (binLo, binHi] β but first bin may be [binLo, binHi] if include_lowest
+ if (v > binHi) return null;
+ if (lo === 0 && include_lowest) {
+ if (v < binLo) return null;
+ } else {
+ if (v <= binLo) return null;
+ }
+ } else {
+ // [binLo, binHi)
+ if (v < binLo || v >= binHi) {
+ // Last bin includes the right edge
+ if (lo === n - 1 && v === binHi) return lo;
+ return null;
+ }
+ }
+ return lo;
+ });
+}
+
+// βββ cut ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Bin values into discrete intervals.
+ *
+ * @param x Array of numeric values to bin.
+ * @param bins Number of equal-width bins **or** an explicit array of
+ * monotonically increasing bin edges.
+ * @param options See {@link CutOptions}.
+ * @returns A {@link BinResult}.
+ *
+ * @example
+ * ```ts
+ * const { codes, labels } = cut([1, 2, 3, 4, 5], 2);
+ * // codes β [0, 0, 0, 1, 1]
+ * // labels β ["(1.0, 3.0]", "(3.0, 5.0]"]
+ * ```
+ */
+export function cut(
+ x: readonly number[],
+ bins: number | readonly number[],
+ options: CutOptions = {},
+): BinResult {
+ const {
+ labels: labelsOpt,
+ right = true,
+ include_lowest = false,
+ precision = 3,
+ duplicates = "raise",
+ } = options;
+
+ // ββ build bin edges βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+ let edges: number[];
+ if (typeof bins === "number") {
+ if (bins < 1 || !Number.isInteger(bins)) {
+ throw new Error("`bins` must be a positive integer when passed as a number.");
+ }
+ const finite = x.filter((v) => Number.isFinite(v));
+ if (finite.length === 0) {
+ throw new Error("Cannot cut empty or all-NaN array.");
+ }
+ const mn = Math.min(...finite);
+ const mx = Math.max(...finite);
+ if (mn === mx) {
+ throw new Error("Cannot cut constant array (all values identical).");
+ }
+ const step = (mx - mn) / bins;
+ edges = Array.from({ length: bins + 1 }, (_, i) => mn + i * step);
+ // Slightly extend the lower edge so the minimum value is included
+ edges[0] = mn - step * 0.001;
+ edges = deduplicateEdges(edges, duplicates);
+ } else {
+ if (bins.length < 2) {
+ throw new Error("At least 2 bin edges must be supplied.");
+ }
+ edges = [...bins];
+ // Validate monotone
+ for (let i = 1; i < edges.length; i++) {
+ if ((edges[i] as number) <= (edges[i - 1] as number)) {
+ throw new Error("Bin edges must be monotonically increasing.");
+ }
+ }
+ }
+
+ const numBins = edges.length - 1;
+
+ // ββ build labels ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+ let resolvedLabels: string[];
+ if (labelsOpt === false) {
+ resolvedLabels = Array.from({ length: numBins }, (_, i) => String(i));
+ } else if (Array.isArray(labelsOpt)) {
+ if (labelsOpt.length !== numBins) {
+ throw new Error(
+ `Length of labels (${labelsOpt.length}) must equal number of bins (${numBins}).`,
+ );
+ }
+ resolvedLabels = [...labelsOpt];
+ } else {
+ resolvedLabels = Array.from({ length: numBins }, (_, i) => {
+ const lo = edges[i] as number;
+ const hi = edges[i + 1] as number;
+ if (i === 0 && include_lowest && right) {
+ // Show the leftmost bin as [lo, hi]
+ return `[${fmt(lo, precision)}, ${fmt(hi, precision)}]`;
+ }
+ return intervalLabel(lo, hi, right, precision);
+ });
+ }
+
+ // ββ assign bins βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+ const codes = assignBins(x, edges, right, include_lowest);
+
+ return { codes, labels: resolvedLabels, bins: edges };
+}
+
+// βββ qcut βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Quantile-based discretization.
+ *
+ * Bins values such that each bin contains (approximately) equal numbers of
+ * observations, using linearly-interpolated quantiles.
+ *
+ * @param x Array of numeric values to bin.
+ * @param q Number of quantile bins (integer β₯ 2) **or** an explicit array
+ * of quantile probabilities in [0, 1] (monotonically increasing).
+ * Common shorthand: `4` β quartiles, `10` β deciles.
+ * @param options See {@link QCutOptions}.
+ * @returns A {@link BinResult}.
+ *
+ * @example
+ * ```ts
+ * const { codes, labels } = qcut([1, 2, 3, 4, 5], 4);
+ * // Quantile edges at 0%, 25%, 50%, 75%, 100% of [1..5]
+ * ```
+ */
+export function qcut(
+ x: readonly number[],
+ q: number | readonly number[],
+ options: QCutOptions = {},
+): BinResult {
+ const { labels: labelsOpt, precision = 3, duplicates = "raise" } = options;
+
+ // ββ build quantile probabilities βββββββββββββββββββββββββββββββββββββββββ
+ let quantiles: number[];
+ if (typeof q === "number") {
+ if (q < 2 || !Number.isInteger(q)) {
+ throw new Error("`q` must be an integer β₯ 2 when passed as a number.");
+ }
+ quantiles = Array.from({ length: q + 1 }, (_, i) => i / q);
+ } else {
+ quantiles = [...q];
+ if (quantiles.length < 2) {
+ throw new Error("At least 2 quantile probabilities must be supplied.");
+ }
+ for (let i = 1; i < quantiles.length; i++) {
+ if ((quantiles[i] as number) <= (quantiles[i - 1] as number)) {
+ throw new Error("Quantile probabilities must be monotonically increasing.");
+ }
+ }
+ if ((quantiles[0] as number) < 0 || (quantiles[quantiles.length - 1] as number) > 1) {
+ throw new Error("Quantile probabilities must be in [0, 1].");
+ }
+ }
+
+ // ββ compute edges from sorted data βββββββββββββββββββββββββββββββββββββββ
+ const finite = x.filter((v) => Number.isFinite(v) && !Number.isNaN(v));
+ if (finite.length === 0) {
+ throw new Error("Cannot qcut empty or all-NaN array.");
+ }
+ const sorted = [...finite].sort((a, b) => a - b);
+
+ let edges: number[] = quantiles.map((p) => quantileOfSorted(sorted, p));
+
+ // Deduplicate
+ edges = deduplicateEdges(edges, duplicates);
+
+ const numBins = edges.length - 1;
+ if (numBins < 1) {
+ throw new Error(
+ "Not enough unique quantile edges. Try passing duplicates=\"drop\" or reducing `q`.",
+ );
+ }
+
+ // ββ build labels ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+ let resolvedLabels: string[];
+ if (labelsOpt === false) {
+ resolvedLabels = Array.from({ length: numBins }, (_, i) => String(i));
+ } else if (Array.isArray(labelsOpt)) {
+ if (labelsOpt.length !== numBins) {
+ throw new Error(
+ `Length of labels (${labelsOpt.length}) must equal number of bins (${numBins}).`,
+ );
+ }
+ resolvedLabels = [...labelsOpt];
+ } else {
+ resolvedLabels = Array.from({ length: numBins }, (_, i) => {
+ const lo = edges[i] as number;
+ const hi = edges[i + 1] as number;
+ if (i === 0) {
+ // First bin is always left-closed in qcut (pandas semantics)
+ return `[${fmt(lo, precision)}, ${fmt(hi, precision)}]`;
+ }
+ return `(${fmt(lo, precision)}, ${fmt(hi, precision)}]`;
+ });
+ }
+
+ // ββ assign bins (qcut always uses right-closed, include_lowest) ββββββββββ
+ const codes = assignBins(x, edges, true, true);
+
+ return { codes, labels: resolvedLabels, bins: edges };
+}
diff --git a/src/stats/format_ops.ts b/src/stats/format_ops.ts
new file mode 100644
index 00000000..148a85b1
--- /dev/null
+++ b/src/stats/format_ops.ts
@@ -0,0 +1,442 @@
+/**
+ * format_ops β number-formatting helpers for Series and DataFrame.
+ *
+ * Mirrors several pandas formatting utilities including `Series.map`,
+ * `DataFrame.style`, and the `format_` methods.
+ *
+ * Exported functions:
+ * - {@link formatFloat} β fixed decimal places
+ * - {@link formatPercent} β percentage string
+ * - {@link formatScientific} β scientific notation (e.g. `1.23e+4`)
+ * - {@link formatEngineering} β engineering notation (exponent multiple of 3)
+ * - {@link formatThousands} β thousands-separated string
+ * - {@link formatCurrency} β currency string
+ * - {@link formatCompact} β compact notation (K, M, B, T)
+ * - {@link makeFloatFormatter} β factory returning a float formatter
+ * - {@link makePercentFormatter} β factory returning a percent formatter
+ * - {@link makeCurrencyFormatter} β factory returning a currency formatter
+ * - {@link applySeriesFormatter} β apply a formatter to every value in a Series
+ * - {@link applyDataFrameFormatter} β apply per-column formatters to a DataFrame
+ * - {@link seriesToString} β render a Series as a human-readable string
+ * - {@link dataFrameToString} β render a DataFrame as a human-readable string
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// βββ scalar formatting ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Format a number with a fixed number of decimal places.
+ *
+ * @param value The number to format (non-finite values render as their string).
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function formatFloat(value: number, decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ return value.toFixed(decimals);
+}
+
+/**
+ * Format a number as a percentage string.
+ *
+ * The value is multiplied by 100 before formatting.
+ * e.g. `formatPercent(0.1234, 1)` β `"12.3%"`.
+ *
+ * @param value The proportion to format (0 β `"0.00%"`, 1 β `"100.00%"`).
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function formatPercent(value: number, decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ return `${(value * 100).toFixed(decimals)}%`;
+}
+
+/**
+ * Format a number in scientific notation.
+ *
+ * e.g. `formatScientific(12345.678, 3)` β `"1.235e+4"`.
+ *
+ * @param value The number to format.
+ * @param decimals Significant figures after the decimal point. Default: `3`.
+ */
+export function formatScientific(value: number, decimals = 3): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ return value.toExponential(decimals);
+}
+
+/**
+ * Format a number in engineering notation (exponent always a multiple of 3).
+ *
+ * e.g. `formatEngineering(12345.678, 3)` β `"12.346e+3"`.
+ *
+ * @param value The number to format.
+ * @param decimals Decimal places in the mantissa. Default: `3`.
+ */
+export function formatEngineering(value: number, decimals = 3): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ if (value === 0) {
+ return `0.${"0".repeat(decimals)}e+0`;
+ }
+ const sign = value < 0 ? "-" : "";
+ const abs = Math.abs(value);
+ const exp = Math.floor(Math.log10(abs));
+ const engExp = Math.floor(exp / 3) * 3;
+ const mantissa = abs / 10 ** engExp;
+ const expSign = engExp >= 0 ? "+" : "-";
+ return `${sign}${mantissa.toFixed(decimals)}e${expSign}${Math.abs(engExp)}`;
+}
+
+/**
+ * Format a number with a thousands separator.
+ *
+ * e.g. `formatThousands(1234567.89, 2)` β `"1,234,567.89"`.
+ *
+ * @param value The number to format.
+ * @param decimals Decimal places. Default: `2`.
+ * @param separator Thousands separator. Default: `","`.
+ */
+export function formatThousands(value: number, decimals = 2, separator = ","): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ const fixed = value.toFixed(decimals);
+ const [intPart, fracPart] = fixed.split(".");
+ const intStr = intPart ?? "";
+ const isNeg = intStr.startsWith("-");
+ const digits = isNeg ? intStr.slice(1) : intStr;
+ const withSep = digits.replace(/\B(?=(\d{3})+(?!\d))/g, separator);
+ const sign = isNeg ? "-" : "";
+ return fracPart !== undefined ? `${sign}${withSep}.${fracPart}` : `${sign}${withSep}`;
+}
+
+/**
+ * Format a number as a currency string.
+ *
+ * e.g. `formatCurrency(1234.5, "$", 2)` β `"$1,234.50"`.
+ *
+ * @param value The number to format.
+ * @param symbol Currency symbol. Default: `"$"`.
+ * @param decimals Decimal places. Default: `2`.
+ */
+export function formatCurrency(value: number, symbol = "$", decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return `${symbol}${String(value)}`;
+ }
+ const abs = Math.abs(value);
+ const sign = value < 0 ? "-" : "";
+ return `${sign}${symbol}${formatThousands(abs, decimals)}`;
+}
+
+/**
+ * Format a number in compact notation using SI-style suffixes.
+ *
+ * Thresholds: T β₯ 1e12, B β₯ 1e9, M β₯ 1e6, K β₯ 1e3.
+ * Values below 1000 are formatted with `decimals` decimal places.
+ *
+ * e.g. `formatCompact(1_234_567, 2)` β `"1.23M"`.
+ *
+ * @param value The number to format.
+ * @param decimals Decimal places in the mantissa. Default: `2`.
+ */
+export function formatCompact(value: number, decimals = 2): string {
+ if (!Number.isFinite(value)) {
+ return String(value);
+ }
+ const sign = value < 0 ? "-" : "";
+ const abs = Math.abs(value);
+ if (abs >= 1e12) {
+ return `${sign}${(abs / 1e12).toFixed(decimals)}T`;
+ }
+ if (abs >= 1e9) {
+ return `${sign}${(abs / 1e9).toFixed(decimals)}B`;
+ }
+ if (abs >= 1e6) {
+ return `${sign}${(abs / 1e6).toFixed(decimals)}M`;
+ }
+ if (abs >= 1e3) {
+ return `${sign}${(abs / 1e3).toFixed(decimals)}K`;
+ }
+ return `${sign}${abs.toFixed(decimals)}`;
+}
+
+// βββ formatter factories ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** A function that converts a {@link Scalar} value to a string. */
+export type Formatter = (value: Scalar) => string;
+
+/**
+ * Create a float formatter with the given number of decimal places.
+ *
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function makeFloatFormatter(decimals = 2): Formatter {
+ return (value: Scalar): string => {
+ if (typeof value !== "number") {
+ return String(value ?? "");
+ }
+ return formatFloat(value, decimals);
+ };
+}
+
+/**
+ * Create a percent formatter with the given number of decimal places.
+ *
+ * @param decimals Number of decimal places. Default: `2`.
+ */
+export function makePercentFormatter(decimals = 2): Formatter {
+ return (value: Scalar): string => {
+ if (typeof value !== "number") {
+ return String(value ?? "");
+ }
+ return formatPercent(value, decimals);
+ };
+}
+
+/**
+ * Create a currency formatter with the given symbol and decimal places.
+ *
+ * @param symbol Currency symbol. Default: `"$"`.
+ * @param decimals Decimal places. Default: `2`.
+ */
+export function makeCurrencyFormatter(symbol = "$", decimals = 2): Formatter {
+ return (value: Scalar): string => {
+ if (typeof value !== "number") {
+ return String(value ?? "");
+ }
+ return formatCurrency(value, symbol, decimals);
+ };
+}
+
+// βββ apply to Series / DataFrame βββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Apply a formatter to every element of a Series, returning a `Series`.
+ *
+ * Non-numeric formatters receive the raw {@link Scalar} value; numeric-only
+ * formatters (e.g. from {@link makeFloatFormatter}) receive the value unchanged
+ * and should guard against non-numeric types themselves.
+ *
+ * @param series The source Series.
+ * @param formatter A {@link Formatter} to apply to each value.
+ */
+export function applySeriesFormatter(series: Series, formatter: Formatter): Series {
+ const formatted: string[] = [];
+ for (let i = 0; i < series.size; i++) {
+ formatted.push(formatter(series.values[i] as Scalar));
+ }
+ return new Series({ data: formatted, index: series.index, name: series.name });
+}
+
+/**
+ * Apply per-column formatters to a DataFrame, returning a
+ * `Record` where each key is a column name and the value is
+ * the formatted column data.
+ *
+ * Columns without a matching formatter are rendered via `String(value)`.
+ *
+ * @param df The source DataFrame.
+ * @param formatters Map of column name β {@link Formatter}.
+ */
+export function applyDataFrameFormatter(
+ df: DataFrame,
+ formatters: Readonly>,
+): Record {
+ const result: Record = {};
+ for (const colName of df.columns.values) {
+ const fmt: Formatter = formatters[colName] ?? ((v: Scalar) => String(v ?? ""));
+ const col = df.col(colName);
+ const formatted: string[] = [];
+ for (let i = 0; i < col.size; i++) {
+ formatted.push(fmt(col.values[i] as Scalar));
+ }
+ result[colName] = formatted;
+ }
+ return result;
+}
+
+// βββ to-string rendering ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link seriesToString}. */
+export interface SeriesToStringOptions {
+ /**
+ * Maximum number of rows to display.
+ * Default: `60`.
+ */
+ readonly maxRows?: number;
+ /**
+ * Formatter to apply to each value.
+ * Default: `String`.
+ */
+ readonly formatter?: Formatter;
+ /**
+ * Series name to display in the header.
+ * Default: the series' own name, or `null` for no header.
+ */
+ readonly name?: string | null;
+}
+
+/**
+ * Render a Series as a human-readable multi-line string.
+ *
+ * The output mirrors `repr(series)` in pandas:
+ * ```
+ * 0 1.00
+ * 1 2.00
+ * 2 3.00
+ * Name: x, dtype: float64
+ * ```
+ *
+ * @param series The Series to render.
+ * @param options Optional rendering options.
+ */
+export function seriesToString(series: Series, options: SeriesToStringOptions = {}): string {
+ const maxRows = options.maxRows ?? 60;
+ const fmt: Formatter = options.formatter ?? ((v: Scalar) => String(v ?? "NaN"));
+ const displayName = options.name !== undefined ? options.name : series.name;
+
+ const n = series.size;
+ const truncated = n > maxRows;
+ const displayCount = truncated ? maxRows : n;
+
+ // Compute label column width
+ let labelWidth = 0;
+ for (let i = 0; i < displayCount; i++) {
+ const label = String(series.index.at(i) ?? "");
+ if (label.length > labelWidth) {
+ labelWidth = label.length;
+ }
+ }
+
+ const lines: string[] = [];
+ for (let i = 0; i < displayCount; i++) {
+ const label = String(series.index.at(i) ?? "").padEnd(labelWidth);
+ const val = fmt(series.values[i] as Scalar);
+ lines.push(`${label} ${val}`);
+ }
+
+ if (truncated) {
+ lines.push(`...`);
+ }
+
+ const footer: string[] = [];
+ if (displayName !== null && displayName !== undefined) {
+ footer.push(`Name: ${displayName}`);
+ }
+ footer.push(`dtype: ${series.dtype.name}`);
+
+ if (footer.length > 0) {
+ lines.push(footer.join(", "));
+ }
+
+ return lines.join("\n");
+}
+
+/** Options for {@link dataFrameToString}. */
+export interface DataFrameToStringOptions {
+ /**
+ * Maximum number of rows to display.
+ * Default: `60`.
+ */
+ readonly maxRows?: number;
+ /**
+ * Maximum number of columns to display.
+ * Default: `20`.
+ */
+ readonly maxCols?: number;
+ /**
+ * Per-column formatters.
+ * Default: `String` for all columns.
+ */
+ readonly formatters?: Readonly>;
+}
+
+/**
+ * Render a DataFrame as a human-readable multi-line string (like pandas `repr`).
+ *
+ * @param df The DataFrame to render.
+ * @param options Optional rendering options.
+ */
+export function dataFrameToString(df: DataFrame, options: DataFrameToStringOptions = {}): string {
+ const maxRows = options.maxRows ?? 60;
+ const maxCols = options.maxCols ?? 20;
+ const formatters = options.formatters ?? {};
+
+ const [nRows, nCols] = df.shape;
+ const truncRows = nRows > maxRows;
+ const truncCols = nCols > maxCols;
+ const displayRows = truncRows ? maxRows : nRows;
+
+ // Pick columns to display
+ const allCols = [...df.columns.values];
+ const displayCols = truncCols ? allCols.slice(0, maxCols) : allCols;
+
+ // Gather formatted cells
+ const cells: string[][] = [];
+ for (const colName of displayCols) {
+ const fmt: Formatter = formatters[colName] ?? ((v: Scalar) => String(v ?? ""));
+ const col = df.col(colName);
+ const colCells: string[] = [];
+ for (let i = 0; i < displayRows; i++) {
+ colCells.push(fmt(col.values[i] as Scalar));
+ }
+ cells.push(colCells);
+ }
+
+ // Compute column widths (max of header or any cell)
+ const colWidths: number[] = displayCols.map((name, ci) => {
+ let w = name.length;
+ const colCells = cells[ci];
+ if (colCells !== undefined) {
+ for (const cell of colCells) {
+ if (cell.length > w) {
+ w = cell.length;
+ }
+ }
+ }
+ return w;
+ });
+
+ // Compute index label width
+ let idxWidth = 0;
+ for (let i = 0; i < displayRows; i++) {
+ const label = String(df.index.at(i) ?? "");
+ if (label.length > idxWidth) {
+ idxWidth = label.length;
+ }
+ }
+
+ // Build header row
+ const headerParts = displayCols.map((name, ci) => name.padStart(colWidths[ci] ?? name.length));
+ const header = `${"".padEnd(idxWidth)} ${headerParts.join(" ")}`;
+
+ const lines: string[] = [header];
+
+ for (let i = 0; i < displayRows; i++) {
+ const label = String(df.index.at(i) ?? "").padEnd(idxWidth);
+ const rowParts = displayCols.map((_, ci) => {
+ const cell = cells[ci]?.[i] ?? "";
+ return cell.padStart(colWidths[ci] ?? cell.length);
+ });
+ lines.push(`${label} ${rowParts.join(" ")}`);
+ }
+
+ if (truncRows) {
+ lines.push("...");
+ }
+ if (truncCols) {
+ lines.push(`[${nRows} rows Γ ${nCols} columns]`);
+ }
+
+ return lines.join("\n");
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index b1de48eb..103e87fe 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -39,3 +39,113 @@ export {
nsmallestDataFrame,
} from "./nlargest.ts";
export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts";
+export { cut, qcut } from "./cut_qcut.ts";
+export type { BinResult, CutOptions, QCutOptions } from "./cut_qcut.ts";
+export { rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "./window_extended.ts";
+export type { WindowExtOptions, RollingQuantileOptions } from "./window_extended.ts";
+export { seriesWhere, seriesMask, dataFrameWhere, dataFrameMask } from "./where_mask.ts";
+export type {
+ SeriesCond,
+ DataFrameCond,
+ SeriesWhereOptions,
+ DataFrameWhereOptions,
+} from "./where_mask.ts";
+export {
+ isna,
+ notna,
+ isnull,
+ notnull,
+ fillna,
+ dropna,
+ countna,
+ countValid,
+} from "./notna_isna.ts";
+export type { IsnaInput, FillnaOptions, DropnaOptions } from "./notna_isna.ts";
+export {
+ strNormalize,
+ strGetDummies,
+ strExtractAll,
+ strRemovePrefix,
+ strRemoveSuffix,
+ strTranslate,
+ strCharWidth,
+ strByteLength,
+} from "./string_ops.ts";
+export type {
+ NormalizeForm,
+ StrInput,
+ GetDummiesOptions,
+ ExtractAllOptions,
+} from "./string_ops.ts";
+export {
+ strSplitExpand,
+ strExtractGroups,
+ strPartition,
+ strRPartition,
+ strMultiReplace,
+ strIndent,
+ strDedent,
+} from "./string_ops_extended.ts";
+export type {
+ SplitExpandOptions,
+ ExtractGroupsOptions,
+ PartitionResult,
+ ReplacePair,
+ IndentOptions,
+} from "./string_ops_extended.ts";
+export {
+ digitize,
+ histogram,
+ linspace,
+ arange,
+ percentileOfScore,
+ zscore,
+ minMaxNormalize,
+ coefficientOfVariation,
+ seriesDigitize,
+} from "./numeric_extended.ts";
+export type {
+ HistogramOptions,
+ HistogramResult,
+ ZscoreOptions,
+ MinMaxOptions,
+ CvOptions,
+} from "./numeric_extended.ts";
+export {
+ catFromCodes,
+ catUnionCategories,
+ catIntersectCategories,
+ catDiffCategories,
+ catEqualCategories,
+ catSortByFreq,
+ catToOrdinal,
+ catFreqTable,
+ catCrossTab,
+ catRecode,
+} from "./categorical_ops.ts";
+export type {
+ CatFromCodesOptions,
+ CatSortByFreqOptions,
+ CatCrossTabOptions,
+} from "./categorical_ops.ts";
+export {
+ formatFloat,
+ formatPercent,
+ formatScientific,
+ formatEngineering,
+ formatThousands,
+ formatCurrency,
+ formatCompact,
+ makeFloatFormatter,
+ makePercentFormatter,
+ makeCurrencyFormatter,
+ applySeriesFormatter,
+ applyDataFrameFormatter,
+ seriesToString,
+ dataFrameToString,
+} from "./format_ops.ts";
+export type {
+ Formatter,
+ SeriesToStringOptions,
+ DataFrameToStringOptions,
+} from "./format_ops.ts";
diff --git a/src/stats/notna_isna.ts b/src/stats/notna_isna.ts
new file mode 100644
index 00000000..bd685c1c
--- /dev/null
+++ b/src/stats/notna_isna.ts
@@ -0,0 +1,369 @@
+/**
+ * notna_isna β module-level missing-value utilities.
+ *
+ * Mirrors the pandas top-level functions:
+ * - `pd.isna(obj)` / `pd.isnull(obj)` β detect missing values
+ * - `pd.notna(obj)` / `pd.notnull(obj)` β detect non-missing values
+ * - `pd.array_isna(arr)` β convenience wrapper for arrays
+ *
+ * Plus standalone `fillna` and `dropna` that operate on scalars, arrays,
+ * `Series`, and `DataFrame` without requiring a method call.
+ *
+ * ### What counts as "missing"?
+ * - `null`
+ * - `undefined`
+ * - `NaN` (IEEE 754 `number`)
+ *
+ * Everything else β including `0`, `false`, `""`, `0n`, `new Date(NaN)` β is
+ * treated as **not** missing. (`Date(NaN)` has NaN time but is a valid object,
+ * matching pandas semantics where `NaT` is only produced by explicit
+ * datetime constructors.)
+ *
+ * ### Overloads
+ *
+ * | Input type | Return type |
+ * |--------------------|---------------------|
+ * | `Scalar` | `boolean` |
+ * | `readonly Scalar[]`| `boolean[]` |
+ * | `Series` | `Series` |
+ * | `DataFrame` | `DataFrame` |
+ *
+ * All functions are **pure** β inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ primitive helper βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** True when `v` is null, undefined, or NaN. */
+function scalarIsna(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+// βββ public types βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Union of all input types accepted by `isna` / `notna`. */
+export type IsnaInput = Scalar | readonly Scalar[] | Series | DataFrame;
+
+/** Options for standalone `fillna`. */
+export interface FillnaOptions {
+ /**
+ * The value used to replace missing entries.
+ * Must be compatible with the element type.
+ */
+ value: Scalar;
+}
+
+/** Options for standalone `dropna`. */
+export interface DropnaOptions {
+ /**
+ * Only used for DataFrame inputs.
+ * - `"any"` (default) β drop a row if **any** column is missing
+ * - `"all"` β drop a row only if **all** columns are missing
+ */
+ how?: "any" | "all";
+ /**
+ * `axis=0` (default) β drop rows that contain missing values.
+ * `axis=1` β drop columns that contain missing values.
+ */
+ axis?: 0 | 1;
+}
+
+// βββ isna overloads βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Detect missing values in a scalar, array, Series, or DataFrame.
+ *
+ * Returns `true` for `null`, `undefined`, and `NaN`; `false` for everything
+ * else.
+ *
+ * @example
+ * ```ts
+ * import { isna } from "tsb";
+ *
+ * isna(null); // true
+ * isna(0); // false
+ * isna([1, null, NaN]); // [false, true, true]
+ * ```
+ */
+export function isna(obj: Scalar): boolean;
+/** @overload */
+export function isna(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function isna(obj: Series): Series;
+/** @overload */
+export function isna(obj: DataFrame): DataFrame;
+export function isna(
+ obj: Scalar | readonly Scalar[] | Series | DataFrame,
+): boolean | boolean[] | Series | DataFrame {
+ if (obj instanceof DataFrame) {
+ return obj.isna();
+ }
+ if (obj instanceof Series) {
+ return obj.isna();
+ }
+ if (Array.isArray(obj)) {
+ return (obj as readonly Scalar[]).map(scalarIsna);
+ }
+ return scalarIsna(obj as Scalar);
+}
+
+// βββ notna overloads ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Detect non-missing values β the inverse of {@link isna}.
+ *
+ * Returns `false` for `null`, `undefined`, and `NaN`; `true` for everything
+ * else.
+ *
+ * @example
+ * ```ts
+ * import { notna } from "tsb";
+ *
+ * notna(null); // false
+ * notna(42); // true
+ * notna([1, null, NaN]); // [true, false, false]
+ * ```
+ */
+export function notna(obj: Scalar): boolean;
+/** @overload */
+export function notna(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function notna(obj: Series): Series;
+/** @overload */
+export function notna(obj: DataFrame): DataFrame;
+export function notna(
+ obj: Scalar | readonly Scalar[] | Series | DataFrame,
+): boolean | boolean[] | Series | DataFrame {
+ if (obj instanceof DataFrame) {
+ return obj.notna();
+ }
+ if (obj instanceof Series) {
+ return obj.notna();
+ }
+ if (Array.isArray(obj)) {
+ return (obj as readonly Scalar[]).map((v) => !scalarIsna(v));
+ }
+ return !scalarIsna(obj as Scalar);
+}
+
+// βββ pandas-compatible aliases ββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Alias for {@link isna}. Mirrors `pandas.isnull`.
+ * @see isna
+ */
+export function isnull(obj: Scalar): boolean;
+/** @overload */
+export function isnull(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function isnull(obj: Series): Series;
+/** @overload */
+export function isnull(obj: DataFrame): DataFrame;
+export function isnull(
+ obj: Scalar | readonly Scalar[] | Series | DataFrame,
+): boolean | boolean[] | Series | DataFrame {
+ return isna(obj as Parameters[0]);
+}
+
+/**
+ * Alias for {@link notna}. Mirrors `pandas.notnull`.
+ * @see notna
+ */
+export function notnull(obj: Scalar): boolean;
+/** @overload */
+export function notnull(obj: readonly Scalar[]): boolean[];
+/** @overload */
+export function notnull(obj: Series): Series;
+/** @overload */
+export function notnull(obj: DataFrame): DataFrame;
+export function notnull(
+ obj: Scalar | readonly Scalar[] | Series | DataFrame,
+): boolean | boolean[] | Series | DataFrame {
+ return notna(obj as Parameters[0]);
+}
+
+// βββ standalone fillna ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Replace missing values with a fill value.
+ *
+ * Standalone equivalent of `Series.fillna()` / `DataFrame.fillna()`.
+ * Also handles bare arrays and scalars for convenience.
+ *
+ * @example
+ * ```ts
+ * import { fillna } from "tsb";
+ *
+ * fillna([1, null, NaN, 3], { value: 0 }); // [1, 0, 0, 3]
+ * ```
+ */
+export function fillna(obj: Scalar, opts: FillnaOptions): Scalar;
+/** @overload */
+export function fillna(obj: readonly Scalar[], opts: FillnaOptions): Scalar[];
+/** @overload */
+export function fillna(obj: Series, opts: FillnaOptions): Series;
+/** @overload */
+export function fillna(obj: DataFrame, opts: FillnaOptions): DataFrame;
+export function fillna(
+ obj: Scalar | readonly Scalar[] | Series | DataFrame,
+ opts: FillnaOptions,
+): Scalar | Scalar[] | Series | DataFrame {
+ const { value } = opts;
+ if (obj instanceof DataFrame) {
+ return obj.fillna(value);
+ }
+ if (obj instanceof Series) {
+ return obj.fillna(value);
+ }
+ if (Array.isArray(obj)) {
+ return (obj as readonly Scalar[]).map((v) => (scalarIsna(v) ? value : v));
+ }
+ const s = obj as Scalar;
+ return scalarIsna(s) ? value : s;
+}
+
+// βββ standalone dropna ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Remove missing values from an array, Series, or DataFrame.
+ *
+ * Standalone equivalent of `Series.dropna()` / `DataFrame.dropna()`.
+ *
+ * For DataFrames, the `how` and `axis` options control which rows/columns are
+ * dropped (defaults: `how="any"`, `axis=0`).
+ *
+ * @example
+ * ```ts
+ * import { dropna } from "tsb";
+ *
+ * dropna([1, null, NaN, 3]); // [1, 3]
+ * ```
+ */
+export function dropna(obj: readonly Scalar[], opts?: DropnaOptions): Scalar[];
+/** @overload */
+export function dropna(obj: Series, opts?: DropnaOptions): Series;
+/** @overload */
+export function dropna(obj: DataFrame, opts?: DropnaOptions): DataFrame;
+export function dropna(
+ obj: readonly Scalar[] | Series | DataFrame,
+ opts: DropnaOptions = {},
+): Scalar[] | Series | DataFrame {
+ const how: "any" | "all" = opts.how ?? "any";
+ const axis: 0 | 1 = opts.axis ?? 0;
+
+ if (obj instanceof DataFrame) {
+ return _dataFrameDropna(obj, how, axis);
+ }
+ if (obj instanceof Series) {
+ return obj.dropna();
+ }
+ // plain array
+ return (obj as readonly Scalar[]).filter((v) => !scalarIsna(v));
+}
+
+// βββ DataFrame dropna helpers βββββββββββββββββββββββββββββββββββββββββββββββββ
+
+function _dataFrameDropna(df: DataFrame, how: "any" | "all", axis: 0 | 1): DataFrame {
+ if (axis === 1) {
+ return _dropnaColumns(df, how);
+ }
+ return _dropnaRows(df, how);
+}
+
+function _dropnaRows(df: DataFrame, how: "any" | "all"): DataFrame {
+ const nRows = df.index.size;
+ const colNames = df.columns.values as string[];
+ const keep: number[] = [];
+
+ for (let i = 0; i < nRows; i++) {
+ const rowMissing: boolean[] = colNames.map((col) => scalarIsna(df.col(col).iat(i)));
+
+ const shouldDrop =
+ how === "any" ? rowMissing.some(Boolean) : rowMissing.every(Boolean);
+
+ if (!shouldDrop) {
+ keep.push(i);
+ }
+ }
+
+ // Rebuild DataFrame with kept rows
+ const colMap = new Map>();
+ const keptLabels: Label[] = keep.map((i) => df.index.at(i));
+ const newIndex = new Index(keptLabels);
+ for (const name of colNames) {
+ const series = df.col(name);
+ const keptValues: Scalar[] = keep.map((i) => series.iat(i));
+ colMap.set(
+ name,
+ new Series({
+ data: keptValues,
+ index: newIndex,
+ dtype: series.dtype,
+ name,
+ }),
+ );
+ }
+ return new DataFrame(colMap, newIndex);
+}
+
+function _dropnaColumns(df: DataFrame, how: "any" | "all"): DataFrame {
+ const colNames = df.columns.values as string[];
+ const colMap = new Map>();
+
+ for (const name of colNames) {
+ const series = df.col(name);
+ const vals = series.values;
+ const missingFlags = vals.map(scalarIsna);
+
+ const shouldDrop =
+ how === "any" ? missingFlags.some(Boolean) : missingFlags.every(Boolean);
+
+ if (!shouldDrop) {
+ colMap.set(name, series);
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// βββ countna / countValid helpers βββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Count missing values in an array or Series.
+ *
+ * Mirrors `Series.isna().sum()` but without constructing an intermediate
+ * boolean Series.
+ *
+ * @example
+ * ```ts
+ * import { countna } from "tsb";
+ *
+ * countna([1, null, NaN, 3]); // 2
+ * ```
+ */
+export function countna(obj: readonly Scalar[] | Series): number {
+ const vals: readonly Scalar[] = obj instanceof Series ? obj.values : obj;
+ return vals.reduce((acc, v) => acc + (scalarIsna(v) ? 1 : 0), 0);
+}
+
+/**
+ * Count non-missing values in an array or Series.
+ *
+ * Mirrors `Series.count()`.
+ *
+ * @example
+ * ```ts
+ * import { countValid } from "tsb";
+ *
+ * countValid([1, null, NaN, 3]); // 2
+ * ```
+ */
+export function countValid(obj: readonly Scalar[] | Series): number {
+ const vals: readonly Scalar[] = obj instanceof Series ? obj.values : obj;
+ return vals.reduce((acc, v) => acc + (scalarIsna(v) ? 0 : 1), 0);
+}
diff --git a/src/stats/numeric_extended.ts b/src/stats/numeric_extended.ts
new file mode 100644
index 00000000..c5534e71
--- /dev/null
+++ b/src/stats/numeric_extended.ts
@@ -0,0 +1,586 @@
+/**
+ * numeric_extended β additional numeric utility functions for arrays and Series.
+ *
+ * Mirrors frequently-used numpy / scipy / pandas functions not yet in tsb:
+ * - `digitize(values, bins, right?)` β find bin indices (numpy.digitize)
+ * - `histogram(values, options?)` β compute histogram counts and edges (numpy.histogram)
+ * - `linspace(start, stop, num?)` β evenly-spaced sequence (numpy.linspace)
+ * - `arange(start, stop?, step?)` β range with step (numpy.arange)
+ * - `percentileOfScore(arr, score, kind?)` β percentile rank of a score (scipy.stats.percentileofscore)
+ * - `zscore(series, options?)` β z-score standardisation (scipy.stats.zscore)
+ * - `minMaxNormalize(series, options?)` β min-max normalisation to [0, 1] or custom range
+ * - `coefficientOfVariation(series, options?)` β std / mean (dimensionless spread)
+ *
+ * All functions are **pure** (return new values; inputs are unchanged).
+ * Missing values (null / NaN) are handled consistently: ignored in aggregates
+ * and propagated in per-element outputs unless noted otherwise.
+ *
+ * @module
+ */
+
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ internal helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** True when `v` is a finite, non-null, non-NaN number. */
+function isNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/** Extract finite numbers from scalar array. */
+function finiteNums(vals: readonly Scalar[]): number[] {
+ return vals.filter(isNum);
+}
+
+// βββ public types βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link histogram}. */
+export interface HistogramOptions {
+ /**
+ * Number of equal-width bins to produce. Defaults to `10`.
+ * Ignored when `binEdges` is provided.
+ */
+ readonly bins?: number;
+ /**
+ * Explicit bin edges. Must be strictly increasing and have length β₯ 2.
+ * When provided, `bins` is ignored.
+ */
+ readonly binEdges?: readonly number[];
+ /**
+ * `[min, max]` range to consider. Values outside are ignored.
+ * Defaults to `[min(values), max(values)]`.
+ * Only used when `binEdges` is not provided.
+ */
+ readonly range?: readonly [number, number];
+ /**
+ * If `true`, the result is normalised as a probability density so that the
+ * integral over the range is 1 (like `numpy.histogram(density=True)`).
+ * Defaults to `false`.
+ */
+ readonly density?: boolean;
+}
+
+/** Result of {@link histogram}. */
+export interface HistogramResult {
+ /** Bin counts (or densities when `density: true`). */
+ readonly counts: readonly number[];
+ /** Bin edges β always has length `counts.length + 1`. */
+ readonly binEdges: readonly number[];
+}
+
+/** Options for {@link zscore}. */
+export interface ZscoreOptions {
+ /**
+ * Degrees-of-freedom correction for std.
+ * - `1` (default, matches pandas `ddof=1`): sample std
+ * - `0`: population std
+ */
+ readonly ddof?: 0 | 1;
+}
+
+/** Options for {@link minMaxNormalize}. */
+export interface MinMaxOptions {
+ /**
+ * Lower bound of the output range. Defaults to `0`.
+ */
+ readonly featureRangeMin?: number;
+ /**
+ * Upper bound of the output range. Defaults to `1`.
+ */
+ readonly featureRangeMax?: number;
+}
+
+/** Options for {@link coefficientOfVariation}. */
+export interface CvOptions {
+ /**
+ * Degrees-of-freedom correction for std.
+ * - `1` (default): sample std
+ * - `0`: population std
+ */
+ readonly ddof?: 0 | 1;
+}
+
+// βββ digitize βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return the indices of the bins to which each value in `values` belongs.
+ *
+ * Mirrors `numpy.digitize(values, bins, right=False)`.
+ *
+ * Each value `v` is mapped to bin index `i` such that:
+ * - `right = false` (default): `bins[i-1] <= v < bins[i]`
+ * - `right = true`: `bins[i-1] < v <= bins[i]`
+ *
+ * Indices are 0-based (unlike numpy which uses 1-based).
+ * Values below `bins[0]` map to `-1`; values at/above `bins[last]` map to
+ * `bins.length - 1`.
+ *
+ * Missing / NaN values in `values` are mapped to `NaN`.
+ *
+ * @param values - array of numbers to bin (may contain null/NaN)
+ * @param bins - strictly increasing bin-edge array (length β₯ 1)
+ * @param right - if `true`, intervals are open on the left (pandas default is `false`)
+ * @returns array of integer bin indices (same length as `values`)
+ *
+ * @example
+ * ```ts
+ * digitize([0.5, 1.5, 2.5, 3.5], [1, 2, 3]);
+ * // β [-1, 0, 1, 2]
+ * ```
+ */
+export function digitize(
+ values: readonly (number | null)[],
+ bins: readonly number[],
+ right = false,
+): (number | typeof NaN)[] {
+ if (bins.length === 0) {
+ throw new RangeError("bins must have at least one element");
+ }
+ return values.map((v) => {
+ if (v === null || (typeof v === "number" && Number.isNaN(v))) {
+ return Number.NaN;
+ }
+ const n = bins.length;
+ if (right) {
+ // open left, closed right: bins[i-1] < v <= bins[i]
+ for (let i = 0; i < n; i++) {
+ if (v <= (bins[i] as number)) {
+ return i - 1; // below first edge β -1
+ }
+ }
+ return n - 1; // above last edge
+ } else {
+ // closed left, open right: bins[i-1] <= v < bins[i]
+ for (let i = 0; i < n; i++) {
+ if (v < (bins[i] as number)) {
+ return i - 1;
+ }
+ }
+ return n - 1; // at or above last edge
+ }
+ });
+}
+
+// βββ histogram ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute a histogram of `values`.
+ *
+ * Mirrors `numpy.histogram(values, bins=10, range=None, density=False)`.
+ *
+ * NaN / null values are silently ignored.
+ *
+ * @param values - numeric values to bin
+ * @param options - {@link HistogramOptions}
+ * @returns {@link HistogramResult} with `counts` and `binEdges`
+ *
+ * @example
+ * ```ts
+ * histogram([1, 2, 3, 4, 5], { bins: 2 });
+ * // { counts: [2, 3], binEdges: [1, 3, 5] }
+ * ```
+ */
+export function histogram(
+ values: readonly (number | null | Scalar)[],
+ options?: HistogramOptions,
+): HistogramResult {
+ const nums = finiteNums(values as readonly Scalar[]);
+ if (nums.length === 0) {
+ // Return a zero-count histogram over [0, 1] when there is no data.
+ const nb = options?.bins ?? 10;
+ const edges: number[] = [];
+ for (let i = 0; i <= nb; i++) {
+ edges.push(i / nb);
+ }
+ const counts = new Array(nb).fill(0);
+ return { counts, binEdges: edges };
+ }
+
+ let edges: number[];
+
+ if (options?.binEdges !== undefined) {
+ const be = options.binEdges;
+ if (be.length < 2) {
+ throw new RangeError("binEdges must have at least 2 elements");
+ }
+ edges = [...be];
+ } else {
+ const nbins = options?.bins ?? 10;
+ if (nbins < 1) {
+ throw new RangeError("bins must be >= 1");
+ }
+ let lo: number;
+ let hi: number;
+ if (options?.range !== undefined) {
+ [lo, hi] = options.range;
+ } else {
+ lo = Math.min(...nums);
+ hi = Math.max(...nums);
+ }
+ if (lo === hi) {
+ // Degenerate range: widen by 0.5 on each side (mirrors numpy).
+ lo -= 0.5;
+ hi += 0.5;
+ }
+ edges = [];
+ for (let i = 0; i <= nbins; i++) {
+ edges.push(lo + (i / nbins) * (hi - lo));
+ }
+ }
+
+ const nbins = edges.length - 1;
+ const counts = new Array(nbins).fill(0);
+ const lo = edges[0] as number;
+ const hi = edges[nbins] as number;
+
+ for (const v of nums) {
+ if (v < lo || v > hi) {
+ continue; // out of range
+ }
+ if (v === hi) {
+ // Right-most value goes into the last bin.
+ (counts[nbins - 1] as number)++;
+ continue;
+ }
+ // Binary search for the bin.
+ let left = 0;
+ let right = nbins - 1;
+ while (left < right) {
+ const mid = (left + right) >> 1;
+ if (v < (edges[mid + 1] as number)) {
+ right = mid;
+ } else {
+ left = mid + 1;
+ }
+ }
+ (counts[left] as number)++;
+ }
+
+ if (options?.density === true) {
+ const total = nums.length;
+ const densityCounts = counts.map((c, i) => {
+ const width = (edges[i + 1] as number) - (edges[i] as number);
+ return c / (total * width);
+ });
+ return { counts: densityCounts, binEdges: edges };
+ }
+
+ return { counts, binEdges: edges };
+}
+
+// βββ linspace βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return `num` evenly spaced numbers from `start` to `stop` (inclusive).
+ *
+ * Mirrors `numpy.linspace(start, stop, num=50, endpoint=True)`.
+ *
+ * @param start - first value
+ * @param stop - last value (included)
+ * @param num - number of values to generate (default `50`; must be β₯ 0)
+ * @returns array of `num` numbers
+ *
+ * @example
+ * ```ts
+ * linspace(0, 1, 5);
+ * // β [0, 0.25, 0.5, 0.75, 1]
+ * ```
+ */
+export function linspace(start: number, stop: number, num = 50): number[] {
+ if (num < 0) {
+ throw new RangeError("num must be >= 0");
+ }
+ if (num === 0) {
+ return [];
+ }
+ if (num === 1) {
+ return [start];
+ }
+ const step = (stop - start) / (num - 1);
+ const result: number[] = [];
+ for (let i = 0; i < num; i++) {
+ result.push(i === num - 1 ? stop : start + i * step);
+ }
+ return result;
+}
+
+// βββ arange βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return evenly-spaced values within a given interval.
+ *
+ * Mirrors `numpy.arange([start,] stop[, step])`.
+ *
+ * Call signatures:
+ * - `arange(stop)` β values in `[0, stop)` with step `1`
+ * - `arange(start, stop)` β values in `[start, stop)` with step `1`
+ * - `arange(start, stop, step)` β values in `[start, stop)` with given step
+ *
+ * @example
+ * ```ts
+ * arange(5); // [0, 1, 2, 3, 4]
+ * arange(1, 5); // [1, 2, 3, 4]
+ * arange(0, 1, 0.25); // [0, 0.25, 0.5, 0.75]
+ * ```
+ */
+export function arange(stop: number): number[];
+export function arange(start: number, stop: number): number[];
+export function arange(start: number, stop: number, step: number): number[];
+export function arange(startOrStop: number, stop?: number, step?: number): number[] {
+ let start: number;
+ let s: number;
+ let st: number;
+
+ if (stop === undefined) {
+ start = 0;
+ s = startOrStop;
+ st = 1;
+ } else if (step === undefined) {
+ start = startOrStop;
+ s = stop;
+ st = 1;
+ } else {
+ start = startOrStop;
+ s = stop;
+ st = step;
+ }
+
+ if (st === 0) {
+ throw new RangeError("step must not be zero");
+ }
+
+ const result: number[] = [];
+ if (st > 0) {
+ for (let v = start; v < s; v = start + result.length * st) {
+ result.push(v);
+ }
+ } else {
+ for (let v = start; v > s; v = start + result.length * st) {
+ result.push(v);
+ }
+ }
+ return result;
+}
+
+// βββ percentileOfScore ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute the percentile rank of `score` within `arr`.
+ *
+ * Mirrors `scipy.stats.percentileofscore(arr, score, kind)`.
+ *
+ * @param arr - numeric values (NaN/null are ignored)
+ * @param score - value whose rank to compute
+ * @param kind - ranking method:
+ * - `"rank"` (default): average of `weak` and `strict` percentiles
+ * - `"weak"`: proportion of values β€ score
+ * - `"strict"`: proportion of values < score
+ * - `"mean"`: mean of `weak` and `strict` (same as `"rank"`)
+ * @returns percentile in `[0, 100]` (or `NaN` when `arr` is empty)
+ *
+ * @example
+ * ```ts
+ * percentileOfScore([1, 2, 3, 4, 5], 3); // 50
+ * percentileOfScore([1, 2, 3, 4, 5], 3, "weak"); // 60
+ * percentileOfScore([1, 2, 3, 4, 5], 3, "strict"); // 40
+ * ```
+ */
+export function percentileOfScore(
+ arr: readonly (number | null | Scalar)[],
+ score: number,
+ kind: "rank" | "weak" | "strict" | "mean" = "rank",
+): number {
+ const nums = finiteNums(arr as readonly Scalar[]);
+ const n = nums.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ const weakCount = nums.filter((v) => v <= score).length;
+ const strictCount = nums.filter((v) => v < score).length;
+
+ switch (kind) {
+ case "weak":
+ return (weakCount / n) * 100;
+ case "strict":
+ return (strictCount / n) * 100;
+ case "rank":
+ case "mean":
+ return ((weakCount + strictCount) / 2 / n) * 100;
+ }
+}
+
+// βββ zscore βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Standardise a numeric Series to zero mean and unit variance (z-score).
+ *
+ * Mirrors `scipy.stats.zscore(a, ddof=1)`.
+ *
+ * Each value is transformed as: `z = (x β mean) / std`
+ *
+ * Missing values (null / NaN) are propagated unchanged in the output.
+ * If std is 0 (or fewer than 2 non-missing values), all outputs are `NaN`.
+ *
+ * @param series - input Series (must be numeric)
+ * @param options - {@link ZscoreOptions}
+ * @returns new Series of z-scores with same index
+ *
+ * @example
+ * ```ts
+ * zscore(new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] }));
+ * // approximately [β1.5, β0.5, β0.5, β0.5, 0, 0, 1, 2] (normalised)
+ * ```
+ */
+export function zscore(
+ series: Series,
+ options?: ZscoreOptions,
+): Series {
+ const ddof = options?.ddof ?? 1;
+ const vals = series.values as readonly Scalar[];
+ const nums = finiteNums(vals);
+ const n = nums.length;
+
+ if (n < 2) {
+ const nanVals = vals.map(() => Number.NaN as Scalar);
+ return series.withValues(nanVals) as Series;
+ }
+
+ const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+ const variance = nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - ddof);
+ const std = Math.sqrt(variance);
+
+ if (std === 0) {
+ const nanVals = vals.map((v) => (isNum(v) ? Number.NaN : v) as Scalar);
+ return series.withValues(nanVals) as Series;
+ }
+
+ const zVals = vals.map((v) => (isNum(v) ? ((v - mean) / std) as Scalar : v));
+ return series.withValues(zVals) as Series;
+}
+
+// βββ minMaxNormalize ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Scale a numeric Series to a fixed range using min-max normalisation.
+ *
+ * Mirrors `sklearn.preprocessing.MinMaxScaler` applied to a 1-D array.
+ *
+ * `x_scaled = (x β min) / (max β min) Γ (rangeMax β rangeMin) + rangeMin`
+ *
+ * Missing values (null / NaN) are propagated unchanged.
+ * If all values are equal, returns a Series of the midpoint of the target range.
+ *
+ * @param series - input Series (must be numeric)
+ * @param options - {@link MinMaxOptions}
+ * @returns new Series normalised to `[featureRangeMin, featureRangeMax]`
+ *
+ * @example
+ * ```ts
+ * minMaxNormalize(new Series({ data: [0, 5, 10] }));
+ * // β Series([0, 0.5, 1])
+ * ```
+ */
+export function minMaxNormalize(
+ series: Series,
+ options?: MinMaxOptions,
+): Series {
+ const rMin = options?.featureRangeMin ?? 0;
+ const rMax = options?.featureRangeMax ?? 1;
+ if (rMin >= rMax) {
+ throw new RangeError("featureRangeMin must be less than featureRangeMax");
+ }
+
+ const vals = series.values as readonly Scalar[];
+ const nums = finiteNums(vals);
+ if (nums.length === 0) {
+ return series.withValues(vals.map(() => Number.NaN as Scalar)) as Series;
+ }
+
+ const min = Math.min(...nums);
+ const max = Math.max(...nums);
+ const span = max - min;
+
+ if (span === 0) {
+ const mid = (rMin + rMax) / 2;
+ const midVals = vals.map((v) => (isNum(v) ? (mid as Scalar) : v));
+ return series.withValues(midVals) as Series;
+ }
+
+ const scaled = vals.map((v) =>
+ isNum(v) ? (((v - min) / span) * (rMax - rMin) + rMin) as Scalar : v,
+ );
+ return series.withValues(scaled) as Series;
+}
+
+// βββ coefficientOfVariation βββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute the coefficient of variation (CV) β std / |mean| β as a unitless
+ * measure of relative dispersion.
+ *
+ * NaN / null values are ignored in aggregation.
+ * Returns `NaN` when mean is 0 or fewer than 2 valid values exist.
+ *
+ * @param series - numeric Series
+ * @param options - {@link CvOptions}
+ * @returns ratio std / |mean|
+ *
+ * @example
+ * ```ts
+ * coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+ * // β 0.5
+ * ```
+ */
+export function coefficientOfVariation(
+ series: Series,
+ options?: CvOptions,
+): number {
+ const ddof = options?.ddof ?? 1;
+ const vals = series.values as readonly Scalar[];
+ const nums = finiteNums(vals);
+ const n = nums.length;
+
+ if (n < 2) {
+ return Number.NaN;
+ }
+
+ const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+ if (mean === 0) {
+ return Number.NaN;
+ }
+
+ const variance = nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - ddof);
+ const std = Math.sqrt(variance);
+ return std / Math.abs(mean);
+}
+
+// βββ seriesDigitize βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Apply {@link digitize} to a Series, returning a new numeric Series of bin indices.
+ *
+ * @param series - Series of numeric values
+ * @param bins - strictly increasing bin-edge array
+ * @param right - if `true`, intervals are open on the left
+ * @returns new Series of bin indices (integer or NaN for missing values)
+ *
+ * @example
+ * ```ts
+ * seriesDigitize(new Series({ data: [0.5, 1.5, 2.5] }), [1, 2]);
+ * // β Series([-1, 0, 1])
+ * ```
+ */
+export function seriesDigitize(
+ series: Series,
+ bins: readonly number[],
+ right = false,
+): Series {
+ const vals = series.values as readonly (number | null)[];
+ const indices = digitize(vals, bins, right);
+ return new Series({
+ data: indices as number[],
+ index: series.index as import("../core/index.ts").Index,
+ name: series.name,
+ });
+}
diff --git a/src/stats/string_ops.ts b/src/stats/string_ops.ts
new file mode 100644
index 00000000..19d69e13
--- /dev/null
+++ b/src/stats/string_ops.ts
@@ -0,0 +1,468 @@
+/**
+ * string_ops β standalone string operation functions for Series and arrays.
+ *
+ * Provides string transformation utilities that work on `Series`,
+ * `string[]`, and scalar strings. These complement the `StringAccessor`
+ * class by offering module-level functions that do not require the `.str`
+ * accessor pattern.
+ *
+ * Functions mirror pandas `str` accessor methods that are either missing from
+ * the accessor or better expressed as pure standalone utilities:
+ *
+ * - `strNormalize` β Unicode normalization (NFC / NFD / NFKC / NFKD)
+ * - `strGetDummies` β split strings by delimiter β one-hot DataFrame
+ * - `strExtractAll` β extract ALL regex matches per element
+ * - `strRemovePrefix` β remove a leading prefix
+ * - `strRemoveSuffix` β remove a trailing suffix
+ * - `strTranslate` β character-level substitution via a mapping
+ * - `strCharWidth` β display width (accounts for CJK full-width characters)
+ * - `strByteLength` β UTF-8 encoded byte length
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ public types βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Unicode normalization form. */
+export type NormalizeForm = "NFC" | "NFD" | "NFKC" | "NFKD";
+
+/** Input accepted by all string-op functions. */
+export type StrInput = Series | readonly string[] | string;
+
+/** Options for {@link strGetDummies}. */
+export interface GetDummiesOptions {
+ /**
+ * The delimiter used to split each element into tokens.
+ * @default "|"
+ */
+ readonly sep?: string;
+
+ /**
+ * Prefix prepended to every column name in the output DataFrame.
+ * @default ""
+ */
+ readonly prefix?: string;
+
+ /**
+ * Separator between the prefix and the token name.
+ * @default "_"
+ */
+ readonly prefixSep?: string;
+}
+
+/** Options for {@link strExtractAll}. */
+export interface ExtractAllOptions {
+ /**
+ * RegExp flags used when `pat` is supplied as a plain string.
+ * The `g` flag is always added internally β you do not need to include it.
+ * @default ""
+ */
+ readonly flags?: string;
+}
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Extract a plain string from a Scalar value; returns `""` for non-strings. */
+function scalarToStr(v: Scalar): string {
+ if (typeof v === "string") return v;
+ if (v === null || v === undefined) return "";
+ return String(v);
+}
+
+/**
+ * Normalise the input to a `string[]`.
+ * Scalars are wrapped in a single-element array.
+ */
+function toStringArray(input: StrInput): string[] {
+ if (typeof input === "string") return [input];
+ if (input instanceof Series) {
+ return input.values.map(scalarToStr);
+ }
+ return input.map(scalarToStr);
+}
+
+/**
+ * Build an output `Series` whose index mirrors the input.
+ * - `Series` β copy the input index
+ * - `string[]` β default `RangeIndex`
+ * - `string` β default `RangeIndex` of length 1
+ */
+function buildSeries(data: Scalar[], input: StrInput): Series {
+ if (input instanceof Series) {
+ return new Series({ data, index: input.index });
+ }
+ return new Series({ data });
+}
+
+// βββ strNormalize βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Apply Unicode normalization to every element.
+ *
+ * Mirrors `pandas.Series.str.normalize(form)`.
+ *
+ * @param input - Input data (Series, string array, or scalar string).
+ * @param form - One of `"NFC"` (default), `"NFD"`, `"NFKC"`, or `"NFKD"`.
+ * @returns A new `Series` (or scalar string) with normalised values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["\u00e9", "caf\u0065\u0301"] });
+ * strNormalize(s, "NFC");
+ * // Series ["Γ©", "cafΓ©"] (both now NFC)
+ * ```
+ */
+export function strNormalize(input: string, form?: NormalizeForm): string;
+export function strNormalize(
+ input: readonly string[] | Series,
+ form?: NormalizeForm,
+): Series;
+export function strNormalize(
+ input: StrInput,
+ form: NormalizeForm = "NFC",
+): Series | string {
+ if (typeof input === "string") return input.normalize(form);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) => s.normalize(form));
+ return buildSeries(data, input);
+}
+
+// βββ strGetDummies ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Encode each string element as a row in a one-hot DataFrame by splitting on a
+ * delimiter.
+ *
+ * Mirrors `pandas.Series.str.get_dummies(sep)`.
+ *
+ * @param input - Series or string array.
+ * @param options - `sep` (default `"|"`), `prefix` and `prefixSep` for column names.
+ * @returns A `DataFrame` of 0/1 integer values, one column per unique token.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["a|b", "b|c", "a"] });
+ * strGetDummies(s);
+ * // DataFrame
+ * // a b c
+ * // 0 1 1 0
+ * // 1 0 1 1
+ * // 2 1 0 0
+ * ```
+ */
+export function strGetDummies(
+ input: readonly string[] | Series,
+ options: GetDummiesOptions = {},
+): DataFrame {
+ const sep = options.sep ?? "|";
+ const prefix = options.prefix ?? "";
+ const prefixSep = options.prefixSep ?? "_";
+
+ const strs = toStringArray(input);
+
+ // 1. Collect all unique tokens in first-seen order.
+ const seen = new Set();
+ const tokenRows: string[][] = strs.map((s) => {
+ const tokens = s === "" ? [] : s.split(sep);
+ tokens.forEach((t) => seen.add(t));
+ return tokens;
+ });
+
+ const allTokens = [...seen].sort(); // stable alphabetical order
+
+ // 2. Build column name with optional prefix.
+ const colName = (token: string): string =>
+ prefix === "" ? token : `${prefix}${prefixSep}${token}`;
+
+ // 3. Build one Scalar[] per column.
+ const columns: Record = {};
+ for (const token of allTokens) {
+ const name = colName(token);
+ columns[name] = tokenRows.map((row) => (row.includes(token) ? 1 : 0));
+ }
+
+ // 4. Preserve the row index from a Series input.
+ if (input instanceof Series) {
+ const rowIndex = input.index;
+ return DataFrame.fromColumns(columns, { index: rowIndex });
+ }
+ return DataFrame.fromColumns(columns);
+}
+
+// βββ strExtractAll ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Extract ALL non-overlapping regex matches from every element.
+ *
+ * Each element maps to an array of match arrays (one inner array per match;
+ * each inner array contains the full match and any capture groups).
+ *
+ * Mirrors `pandas.Series.str.extractall(pat)`, but returns a
+ * `Series` rather than a multi-indexed DataFrame to avoid
+ * the overhead of MultiIndex construction.
+ *
+ * @param input - Series or string array.
+ * @param pat - Regular expression (string or `RegExp`).
+ * @param options - Optional flags when `pat` is a string.
+ * @returns A `Series` whose values are `string[][]` (an array of match arrays).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["abc 123", "foo 456 bar 789"] });
+ * strExtractAll(s, /(\d+)/);
+ * // Series [
+ * // [["123", "123"]],
+ * // [["456", "456"], ["789", "789"]],
+ * // ]
+ * ```
+ */
+export function strExtractAll(
+ input: readonly string[] | Series,
+ pat: string | RegExp,
+ options: ExtractAllOptions = {},
+): Series {
+ const strs = toStringArray(input);
+ const flags =
+ pat instanceof RegExp
+ ? pat.flags.includes("g")
+ ? pat.flags
+ : `${pat.flags}g`
+ : `${options.flags ?? ""}g`;
+ const source = pat instanceof RegExp ? pat.source : pat;
+ const re = new RegExp(source, flags);
+
+ const data: Scalar[] = strs.map((s) => {
+ const matches: string[][] = [];
+ let m: RegExpExecArray | null;
+ re.lastIndex = 0;
+ while ((m = re.exec(s)) !== null) {
+ matches.push([...m]);
+ if (!re.global) break;
+ }
+ // Store as JSON string so it fits in Scalar; consumers can JSON.parse
+ return JSON.stringify(matches);
+ });
+
+ return buildSeries(data, input);
+}
+
+// βββ strRemovePrefix ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Remove a leading prefix from each element (only if the element starts with it).
+ *
+ * Mirrors Python 3.9+ `str.removeprefix()` and can be used as a pandas
+ * equivalent via `df["col"].str.removeprefix(prefix)`.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param prefix - Prefix to remove.
+ * @returns A new Series (or scalar string) with the prefix stripped where present.
+ *
+ * @example
+ * ```ts
+ * strRemovePrefix(["prefix_a", "prefix_b", "other"], "prefix_");
+ * // Series ["a", "b", "other"]
+ * ```
+ */
+export function strRemovePrefix(input: string, prefix: string): string;
+export function strRemovePrefix(
+ input: readonly string[] | Series,
+ prefix: string,
+): Series;
+export function strRemovePrefix(
+ input: StrInput,
+ prefix: string,
+): Series | string {
+ if (typeof input === "string") {
+ return input.startsWith(prefix) ? input.slice(prefix.length) : input;
+ }
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) =>
+ s.startsWith(prefix) ? s.slice(prefix.length) : s,
+ );
+ return buildSeries(data, input);
+}
+
+// βββ strRemoveSuffix ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Remove a trailing suffix from each element (only if the element ends with it).
+ *
+ * Mirrors Python 3.9+ `str.removesuffix()`.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param suffix - Suffix to remove.
+ * @returns A new Series (or scalar string) with the suffix stripped where present.
+ *
+ * @example
+ * ```ts
+ * strRemoveSuffix(["hello_suffix", "world_suffix", "test"], "_suffix");
+ * // Series ["hello", "world", "test"]
+ * ```
+ */
+export function strRemoveSuffix(input: string, suffix: string): string;
+export function strRemoveSuffix(
+ input: readonly string[] | Series,
+ suffix: string,
+): Series;
+export function strRemoveSuffix(
+ input: StrInput,
+ suffix: string,
+): Series | string {
+ if (typeof input === "string") {
+ return input.endsWith(suffix) ? input.slice(0, input.length - suffix.length) : input;
+ }
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) =>
+ s.endsWith(suffix) ? s.slice(0, s.length - suffix.length) : s,
+ );
+ return buildSeries(data, input);
+}
+
+// βββ strTranslate βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Translate characters in each element according to a mapping.
+ *
+ * Works like Python's `str.translate(table)`, where a `Map`
+ * maps single characters to their replacements (`null` means delete).
+ *
+ * @param input - Series, string array, or scalar string.
+ * @param table - Map from single source characters to replacement strings or
+ * `null` (to delete the character).
+ * @returns A new Series (or scalar string) with characters replaced.
+ *
+ * @example
+ * ```ts
+ * const t = new Map([["a", "A"], ["e", null]]);
+ * strTranslate(["cafe", "bale"], t);
+ * // Series ["cAf", "bAl"]
+ * ```
+ */
+export function strTranslate(input: string, table: ReadonlyMap): string;
+export function strTranslate(
+ input: readonly string[] | Series,
+ table: ReadonlyMap,
+): Series;
+export function strTranslate(
+ input: StrInput,
+ table: ReadonlyMap,
+): Series | string {
+ const translate = (s: string): string => {
+ let result = "";
+ for (const ch of s) {
+ if (table.has(ch)) {
+ const repl = table.get(ch);
+ if (repl !== null && repl !== undefined) result += repl;
+ // null β delete: skip
+ } else {
+ result += ch;
+ }
+ }
+ return result;
+ };
+
+ if (typeof input === "string") return translate(input);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map(translate);
+ return buildSeries(data, input);
+}
+
+// βββ strCharWidth βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute the *display width* of each element, counting CJK (Chinese/Japanese/
+ * Korean) and other full-width characters as 2 columns.
+ *
+ * This is useful when formatting text tables that mix ASCII and East-Asian
+ * scripts.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @returns A new `Series` of numbers (or a number for scalar input).
+ *
+ * @example
+ * ```ts
+ * strCharWidth("hello"); // 5
+ * strCharWidth("γγγ«γ‘γ―"); // 10
+ * ```
+ */
+export function strCharWidth(input: string): number;
+export function strCharWidth(
+ input: readonly string[] | Series,
+): Series;
+export function strCharWidth(
+ input: StrInput,
+): Series | number {
+ const width = (s: string): number => {
+ let w = 0;
+ for (const ch of s) {
+ const cp = ch.codePointAt(0) ?? 0;
+ // Full-width and CJK ranges (simplified but covers the common cases)
+ if (
+ (cp >= 0x1100 && cp <= 0x115f) || // Hangul Jamo
+ (cp >= 0x2e80 && cp <= 0x303e) || // CJK Radicals, Kangxi
+ (cp >= 0x3041 && cp <= 0x33ff) || // Hiragana, Katakana, CJK
+ (cp >= 0x3400 && cp <= 0x4dbf) || // CJK Extension A
+ (cp >= 0x4e00 && cp <= 0xa4c6) || // CJK Unified + Yi
+ (cp >= 0xa960 && cp <= 0xa97c) || // Hangul Jamo Extended-A
+ (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
+ (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility
+ (cp >= 0xfe10 && cp <= 0xfe19) || // Vertical forms
+ (cp >= 0xfe30 && cp <= 0xfe6b) || // CJK Compatibility Forms
+ (cp >= 0xff01 && cp <= 0xff60) || // Halfwidth/Fullwidth
+ (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Signs
+ (cp >= 0x1b000 && cp <= 0x1b001) || // Kana Supplement
+ (cp >= 0x1f004 && cp <= 0x1f004) || // Mahjong tile
+ (cp >= 0x1f0cf && cp <= 0x1f0cf) || // Playing card
+ (cp >= 0x1f200 && cp <= 0x1f251) || // Enclosed CJK
+ (cp >= 0x20000 && cp <= 0x2fffd) || // CJK Extension BβF
+ (cp >= 0x30000 && cp <= 0x3fffd) // CJK Extension G
+ ) {
+ w += 2;
+ } else {
+ w += 1;
+ }
+ }
+ return w;
+ };
+
+ if (typeof input === "string") return width(input);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) => width(s));
+ return buildSeries(data, input);
+}
+
+// βββ strByteLength ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute the UTF-8 encoded byte length of each element.
+ *
+ * Useful when working with byte-limited APIs (HTTP headers, database columns)
+ * where the character count alone is insufficient.
+ *
+ * @param input - Series, string array, or scalar string.
+ * @returns A new `Series` of numbers (or a number for scalar input).
+ *
+ * @example
+ * ```ts
+ * strByteLength("hello"); // 5
+ * strByteLength("γγγ«γ‘γ―"); // 15 (3 bytes per character)
+ * ```
+ */
+export function strByteLength(input: string): number;
+export function strByteLength(
+ input: readonly string[] | Series,
+): Series;
+export function strByteLength(
+ input: StrInput,
+): Series | number {
+ const byteLen = (s: string): number => new TextEncoder().encode(s).length;
+
+ if (typeof input === "string") return byteLen(input);
+ const strs = toStringArray(input);
+ const data: Scalar[] = strs.map((s) => byteLen(s));
+ return buildSeries(data, input);
+}
diff --git a/src/stats/string_ops_extended.ts b/src/stats/string_ops_extended.ts
new file mode 100644
index 00000000..ed6e2a42
--- /dev/null
+++ b/src/stats/string_ops_extended.ts
@@ -0,0 +1,429 @@
+/**
+ * string_ops_extended β extended standalone string operations.
+ *
+ * Provides advanced string manipulation utilities that complement
+ * `string_ops.ts` and `StringAccessor`:
+ *
+ * - `strSplitExpand` β split strings by delimiter and expand each part into
+ * a DataFrame column (mirrors `str.split(expand=True)`)
+ * - `strExtractGroups` β extract regex capture groups into a DataFrame
+ * (mirrors `str.extract` with capture groups)
+ * - `strPartition` β split at first occurrence of sep β (before, sep, after)
+ * - `strRPartition` β split at last occurrence of sep β (before, sep, after)
+ * - `strMultiReplace` β apply multiple find/replace pairs in sequence
+ * - `strIndent` β prefix every (non-empty) line with a string
+ * - `strDedent` β remove common leading whitespace from all lines
+ *
+ * @module
+ */
+
+import { DataFrame, Index, RangeIndex, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+import type { StrInput } from "./string_ops.ts";
+
+// βββ internal helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+function toStrOrNull(v: Scalar): string | null {
+ if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+ return null;
+ }
+ return String(v);
+}
+
+function toValues(input: readonly Scalar[] | Series): readonly Scalar[] {
+ return input instanceof Series ? input.values : input;
+}
+
+function rowIndex(input: readonly Scalar[] | Series): Index {
+ return input instanceof Series ? input.index : new RangeIndex(toValues(input).length);
+}
+
+function escapeRegex(s: string): string {
+ return s.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
+}
+
+// βββ strSplitExpand βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link strSplitExpand}. */
+export interface SplitExpandOptions {
+ /**
+ * Maximum number of splits to perform. `-1` means unlimited.
+ * @default -1
+ */
+ readonly n?: number;
+}
+
+/** Split a scalar string into an array of parts. */
+export function strSplitExpand(
+ input: string,
+ sep?: string | RegExp,
+ options?: SplitExpandOptions,
+): string[];
+/** Split each element and expand the parts into a DataFrame (one column per part). */
+export function strSplitExpand(
+ input: readonly Scalar[] | Series,
+ sep?: string | RegExp,
+ options?: SplitExpandOptions,
+): DataFrame;
+/** @internal */
+export function strSplitExpand(
+ input: StrInput,
+ sep: string | RegExp = " ",
+ options: SplitExpandOptions = {},
+): string[] | DataFrame {
+ const maxSplits = options.n ?? -1;
+
+ function splitOne(s: string | null): (string | null)[] {
+ if (s === null) return [null];
+ if (maxSplits < 0) {
+ // unlimited splits
+ const pat = sep instanceof RegExp ? sep : new RegExp(escapeRegex(sep));
+ return s.split(pat);
+ }
+ // limited splits: extract up to maxSplits separators
+ const parts: string[] = [];
+ let rest = s;
+ for (let i = 0; i < maxSplits; i++) {
+ let idx: number;
+ let sepLen: number;
+ if (typeof sep === "string") {
+ idx = rest.indexOf(sep);
+ sepLen = sep.length;
+ } else {
+ const m = rest.match(sep);
+ if (m === null || m.index === undefined) break;
+ idx = m.index;
+ sepLen = m[0]?.length ?? 0;
+ }
+ if (idx === -1) break;
+ parts.push(rest.slice(0, idx));
+ rest = rest.slice(idx + sepLen);
+ }
+ parts.push(rest);
+ return parts;
+ }
+
+ if (typeof input === "string") {
+ return splitOne(input) as string[];
+ }
+
+ const vals = toValues(input);
+ const rows: (string | null)[][] = vals.map((v) => splitOne(toStrOrNull(v)));
+
+ // determine column width (maximum number of parts in any row)
+ const width = rows.reduce((m, r) => Math.max(m, r.length), 0);
+
+ const columns: Record = {};
+ for (let c = 0; c < width; c++) {
+ const col: Scalar[] = rows.map((r) => {
+ const cell = r[c];
+ return cell !== undefined ? cell : null;
+ });
+ columns[String(c)] = col;
+ }
+
+ return DataFrame.fromColumns(columns, { index: rowIndex(input) });
+}
+
+// βββ strExtractGroups βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link strExtractGroups}. */
+export interface ExtractGroupsOptions {
+ /** Additional regex flags to merge with any flags already on a RegExp pattern. */
+ readonly flags?: string;
+}
+
+/**
+ * Extract regex capture groups from each element into a DataFrame.
+ *
+ * One column is created per capture group. Named groups (`(?...)`)
+ * produce named columns; unnamed groups produce `"0"`, `"1"`, β¦ columns.
+ *
+ * Non-matching elements produce a row of `null` values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["2024-01-15", "2025-12-31"] });
+ * const df = strExtractGroups(s, /(?\d{4})-(?\d{2})-(?\d{2})/);
+ * // DataFrame with columns: year, month, day
+ * ```
+ */
+export function strExtractGroups(
+ input: readonly Scalar[] | Series,
+ pat: string | RegExp,
+ options?: ExtractGroupsOptions,
+): DataFrame {
+ const flags = options?.flags ?? "";
+ const re =
+ pat instanceof RegExp
+ ? flags === ""
+ ? pat
+ : new RegExp(pat.source, pat.flags + flags)
+ : new RegExp(pat, flags);
+
+ const groupNames = extractGroupNames(re);
+ const vals = toValues(input);
+
+ const rows: (string | null)[][] = vals.map((v) => {
+ const s = toStrOrNull(v);
+ if (s === null) return [];
+ const m = re.exec(s);
+ if (m === null) return [];
+ return Array.from({ length: m.length - 1 }, (_, i) => {
+ const captured = m[i + 1];
+ return captured !== undefined ? captured : null;
+ });
+ });
+
+ const width = rows.reduce((w, r) => Math.max(w, r.length), 0);
+
+ // Use named groups if available and count matches; otherwise use 0-indexed strings.
+ const colNames: string[] =
+ groupNames.length === width && width > 0
+ ? groupNames
+ : Array.from({ length: width }, (_, i) => String(i));
+
+ const columns: Record = {};
+ for (let c = 0; c < width; c++) {
+ const name = colNames[c] ?? String(c);
+ const col: Scalar[] = rows.map((r) => {
+ const cell = r[c];
+ return cell !== undefined ? cell : null;
+ });
+ columns[name] = col;
+ }
+
+ return DataFrame.fromColumns(columns, { index: rowIndex(input) });
+}
+
+/** Parse named capture group names from a regex source string. */
+function extractGroupNames(re: RegExp): string[] {
+ const namedGroupPattern = /\(\?<([^>]+)>/g;
+ const names: string[] = [];
+ let m: RegExpExecArray | null;
+ while ((m = namedGroupPattern.exec(re.source)) !== null) {
+ const name = m[1];
+ if (name !== undefined) names.push(name);
+ }
+ return names;
+}
+
+// βββ strPartition βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Result of {@link strPartition} / {@link strRPartition} on a scalar input:
+ * a 3-tuple `[before, separator, after]`.
+ */
+export type PartitionResult = [string, string, string];
+
+/** Partition a scalar string at the first occurrence of `sep`. */
+export function strPartition(input: string, sep: string): PartitionResult;
+/** Partition each element and expand to a DataFrame with columns `"0"`, `"1"`, `"2"`. */
+export function strPartition(
+ input: readonly Scalar[] | Series,
+ sep: string,
+): DataFrame;
+/** @internal */
+export function strPartition(
+ input: StrInput,
+ sep: string,
+): PartitionResult | DataFrame {
+ function partitionOne(s: string | null): [string | null, string | null, string | null] {
+ if (s === null) return [null, null, null];
+ const idx = s.indexOf(sep);
+ if (idx === -1) return [s, "", ""];
+ return [s.slice(0, idx), sep, s.slice(idx + sep.length)];
+ }
+
+ if (typeof input === "string") {
+ return partitionOne(input) as PartitionResult;
+ }
+
+ const vals = toValues(input);
+ const rows = vals.map((v) => partitionOne(toStrOrNull(v)));
+
+ return DataFrame.fromColumns(
+ {
+ "0": rows.map((r) => r[0]),
+ "1": rows.map((r) => r[1]),
+ "2": rows.map((r) => r[2]),
+ },
+ { index: rowIndex(input) },
+ );
+}
+
+// βββ strRPartition ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Partition a scalar string at the LAST occurrence of `sep`. */
+export function strRPartition(input: string, sep: string): PartitionResult;
+/** Partition each element at the last occurrence and expand to a DataFrame. */
+export function strRPartition(
+ input: readonly Scalar[] | Series,
+ sep: string,
+): DataFrame;
+/** @internal */
+export function strRPartition(
+ input: StrInput,
+ sep: string,
+): PartitionResult | DataFrame {
+ function rpartitionOne(s: string | null): [string | null, string | null, string | null] {
+ if (s === null) return [null, null, null];
+ const idx = s.lastIndexOf(sep);
+ if (idx === -1) return ["", "", s];
+ return [s.slice(0, idx), sep, s.slice(idx + sep.length)];
+ }
+
+ if (typeof input === "string") {
+ return rpartitionOne(input) as PartitionResult;
+ }
+
+ const vals = toValues(input);
+ const rows = vals.map((v) => rpartitionOne(toStrOrNull(v)));
+
+ return DataFrame.fromColumns(
+ {
+ "0": rows.map((r) => r[0]),
+ "1": rows.map((r) => r[1]),
+ "2": rows.map((r) => r[2]),
+ },
+ { index: rowIndex(input) },
+ );
+}
+
+// βββ strMultiReplace ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** A single find/replace pair for {@link strMultiReplace}. */
+export interface ReplacePair {
+ /** Pattern to search for (string literal or regular expression). */
+ readonly pat: string | RegExp;
+ /** Replacement string (may use `$1`, `$2`, β¦ back-references for RegExp patterns). */
+ readonly repl: string;
+}
+
+/** Apply a sequence of find/replace pairs to a scalar string. */
+export function strMultiReplace(input: string, replacements: readonly ReplacePair[]): string;
+/** Apply a sequence of find/replace pairs to each element of a Series or array. */
+export function strMultiReplace(
+ input: readonly Scalar[] | Series,
+ replacements: readonly ReplacePair[],
+): Series;
+/** @internal */
+export function strMultiReplace(
+ input: StrInput,
+ replacements: readonly ReplacePair[],
+): string | Series {
+ function applyAll(s: string | null): string | null {
+ if (s === null) return null;
+ let result = s;
+ for (const { pat, repl } of replacements) {
+ result = result.replace(pat instanceof RegExp ? pat : new RegExp(escapeRegex(pat), "g"), repl);
+ }
+ return result;
+ }
+
+ if (typeof input === "string") {
+ return applyAll(input) ?? "";
+ }
+
+ const vals = toValues(input);
+ const out: Scalar[] = vals.map((v) => applyAll(toStrOrNull(v)));
+ if (input instanceof Series) {
+ return input.withValues(out);
+ }
+ return new Series({ data: out });
+}
+
+// βββ strIndent ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link strIndent}. */
+export interface IndentOptions {
+ /**
+ * Predicate to decide which lines get the prefix.
+ * Defaults to all non-empty lines (lines that are not solely whitespace).
+ */
+ readonly predicate?: (line: string) => boolean;
+}
+
+/** Prefix every (non-empty) line in a scalar string. */
+export function strIndent(input: string, prefix: string, options?: IndentOptions): string;
+/** Prefix every (non-empty) line in each element of a Series or array. */
+export function strIndent(
+ input: readonly Scalar[] | Series,
+ prefix: string,
+ options?: IndentOptions,
+): Series;
+/** @internal */
+export function strIndent(
+ input: StrInput,
+ prefix: string,
+ options: IndentOptions = {},
+): string | Series {
+ const predicate = options.predicate ?? ((line: string) => line.trim().length > 0);
+
+ function indentOne(s: string | null): string | null {
+ if (s === null) return null;
+ return s
+ .split("\n")
+ .map((line) => (predicate(line) ? prefix + line : line))
+ .join("\n");
+ }
+
+ if (typeof input === "string") {
+ return indentOne(input) ?? "";
+ }
+
+ const vals = toValues(input);
+ const out: Scalar[] = vals.map((v) => indentOne(toStrOrNull(v)));
+ if (input instanceof Series) {
+ return input.withValues(out);
+ }
+ return new Series({ data: out });
+}
+
+// βββ strDedent ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Remove the common leading whitespace from every line of a string.
+ *
+ * Mirrors `textwrap.dedent` from Python's standard library.
+ * Lines that are entirely whitespace are not used to compute the common
+ * prefix but are still included in the output (trimmed to empty).
+ *
+ * @example
+ * ```ts
+ * strDedent(" hello\n world") // "hello\nworld"
+ * strDedent(" a\n b") // "a\n b"
+ * ```
+ */
+export function strDedent(input: string): string;
+/** Remove common leading whitespace from each element of a Series or array. */
+export function strDedent(input: readonly Scalar[] | Series): Series;
+/** @internal */
+export function strDedent(input: StrInput): string | Series {
+ function dedentOne(s: string | null): string | null {
+ if (s === null) return null;
+ const lines = s.split("\n");
+ // find the minimum leading-whitespace length among non-whitespace-only lines
+ let minIndent = Infinity;
+ for (const line of lines) {
+ if (line.trim().length === 0) continue;
+ const leading = line.length - line.trimStart().length;
+ if (leading < minIndent) minIndent = leading;
+ }
+ if (minIndent === Infinity || minIndent === 0) return s;
+ return lines
+ .map((line) => (line.trim().length === 0 ? "" : line.slice(minIndent)))
+ .join("\n");
+ }
+
+ if (typeof input === "string") {
+ return dedentOne(input) ?? "";
+ }
+
+ const vals = toValues(input);
+ const out: Scalar[] = vals.map((v) => dedentOne(toStrOrNull(v)));
+ if (input instanceof Series) {
+ return input.withValues(out);
+ }
+ return new Series({ data: out });
+}
diff --git a/src/stats/where_mask.ts b/src/stats/where_mask.ts
new file mode 100644
index 00000000..d6921cd9
--- /dev/null
+++ b/src/stats/where_mask.ts
@@ -0,0 +1,289 @@
+/**
+ * where_mask β element-wise conditional selection for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.where(cond, other)` β keep values where `cond` is truthy, replace with `other` elsewhere
+ * - `Series.mask(cond, other)` β inverse of `where`; keep where `cond` is falsy
+ * - `DataFrame.where(cond, other)` β element-wise `where` for DataFrames
+ * - `DataFrame.mask(cond, other)` β element-wise `mask` for DataFrames
+ *
+ * The `cond` parameter accepts:
+ * - A boolean array (aligned positionally to the series/column values)
+ * - A boolean `Series` (aligned by label to the target series)
+ * - A callable `(s: Series) => boolean[] | Series` for Series ops
+ * - A boolean `DataFrame` (aligned by label) for DataFrame ops
+ * - A callable `(df: DataFrame) => DataFrame` returning a boolean DataFrame for DataFrame ops
+ *
+ * All functions are **pure** β inputs are never mutated.
+ * Missing values in `cond` are treated as `false` (i.e. the position is replaced).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ public types βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * A boolean condition for a Series operation.
+ *
+ * - `readonly boolean[]` β positional mask (must match series length)
+ * - `Series` β label-aligned boolean series
+ * - `(s: Series) => readonly boolean[] | Series` β callable
+ */
+export type SeriesCond =
+ | readonly boolean[]
+ | Series
+ | ((s: Series) => readonly boolean[] | Series);
+
+/**
+ * A boolean condition for a DataFrame operation.
+ *
+ * - `DataFrame` β label-aligned boolean DataFrame
+ * - `(df: DataFrame) => DataFrame` β callable returning a boolean DataFrame
+ */
+export type DataFrameCond = DataFrame | ((df: DataFrame) => DataFrame);
+
+/** Options for {@link seriesWhere} and {@link seriesMask}. */
+export interface SeriesWhereOptions {
+ /**
+ * Replacement value for positions where the condition is not satisfied.
+ * Defaults to `null` (pandas uses `NaN` for numeric; we use `null` as the
+ * universal missing sentinel in tsb).
+ */
+ readonly other?: Scalar;
+}
+
+/** Options for {@link dataFrameWhere} and {@link dataFrameMask}. */
+export interface DataFrameWhereOptions {
+ /**
+ * Replacement value for positions where the condition is not satisfied.
+ * Defaults to `null`.
+ */
+ readonly other?: Scalar;
+}
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Resolve a {@link SeriesCond} to a positional boolean array aligned to
+ * `series`.
+ *
+ * For a label-aligned `Series`, labels that are absent in the target
+ * series are treated as `false`.
+ */
+function resolveSeriesCond(
+ series: Series,
+ cond: SeriesCond,
+): readonly boolean[] {
+ if (typeof cond === "function") {
+ const resolved = cond(series);
+ return resolveSeriesCond(series, resolved);
+ }
+
+ if (Array.isArray(cond)) {
+ return cond as readonly boolean[];
+ }
+
+ // Series β align by label
+ const boolSeries = cond as Series;
+ const labels = series.index.values as readonly Label[];
+ return labels.map((label) => {
+ const pos = boolSeries.index.values.indexOf(label);
+ if (pos === -1) return false;
+ const v = boolSeries.values[pos];
+ return v === true;
+ });
+}
+
+/**
+ * Apply a positional boolean mask to `series`.
+ *
+ * @param series - source series
+ * @param mask - `true` β keep original, `false` β use `other`
+ * @param other - replacement value (default `null`)
+ */
+function applyMaskToSeries(
+ series: Series,
+ mask: readonly boolean[],
+ other: Scalar,
+): Series {
+ const result: Scalar[] = series.values.map((v, i) => (mask[i] === true ? v : other));
+ return new Series({ data: result, index: series.index, name: series.name });
+}
+
+// βββ Series operations ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Return a new Series keeping values where `cond` is truthy, replacing all
+ * other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.Series.where(cond, other)`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesWhere(s, [true, false, true, false, true]);
+ * // Series [1, null, 3, null, 5]
+ *
+ * seriesWhere(s, (x) => x.values.map((v) => (v as number) > 2), { other: 0 });
+ * // Series [0, 0, 3, 4, 5]
+ * ```
+ */
+export function seriesWhere(
+ series: Series,
+ cond: SeriesCond,
+ options: SeriesWhereOptions = {},
+): Series {
+ const other: Scalar = options.other !== undefined ? options.other : null;
+ const mask = resolveSeriesCond(series, cond);
+ return applyMaskToSeries(series, mask, other);
+}
+
+/**
+ * Return a new Series keeping values where `cond` is **falsy**, replacing all
+ * other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.Series.mask(cond, other)`.
+ *
+ * `mask` is the exact inverse of `where`:
+ * `seriesMask(s, cond) === seriesWhere(s, inverted_cond)`
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesMask(s, [true, false, true, false, true]);
+ * // Series [null, 2, null, 4, null]
+ *
+ * seriesMask(s, (x) => x.values.map((v) => (v as number) > 2), { other: -1 });
+ * // Series [1, 2, -1, -1, -1]
+ * ```
+ */
+export function seriesMask(
+ series: Series,
+ cond: SeriesCond,
+ options: SeriesWhereOptions = {},
+): Series {
+ const other: Scalar = options.other !== undefined ? options.other : null;
+ const mask = resolveSeriesCond(series, cond);
+ // Invert: keep where cond is FALSE
+ const inverted = mask.map((b) => !b);
+ return applyMaskToSeries(series, inverted, other);
+}
+
+// βββ DataFrame operations βββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Resolve a {@link DataFrameCond} to a per-column positional boolean array map.
+ *
+ * For a label-aligned boolean `DataFrame`, missing column/row labels are treated
+ * as `false`.
+ */
+function resolveDataFrameCond(
+ df: DataFrame,
+ cond: DataFrameCond,
+): Map {
+ const condDf: DataFrame = typeof cond === "function" ? cond(df) : cond;
+
+ const result = new Map();
+ const rowLabels = df.index.values as readonly Label[];
+
+ for (const colName of df.columns.values) {
+ const condColIdx = condDf.columns.indexOf(colName);
+ if (condColIdx === -1) {
+ // Column absent from condition β treat entire column as false
+ result.set(colName, rowLabels.map(() => false));
+ continue;
+ }
+
+ const condCol = condDf.col(colName);
+ const rowMask: boolean[] = rowLabels.map((label) => {
+ const rowPos = condDf.index.values.indexOf(label);
+ if (rowPos === -1) return false;
+ return condCol.values[rowPos] === true;
+ });
+ result.set(colName, rowMask);
+ }
+ return result;
+}
+
+/**
+ * Return a new DataFrame keeping values where the element-wise `cond` is
+ * truthy, replacing all other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.DataFrame.where(cond, other)`.
+ *
+ * `cond` may be:
+ * - A `DataFrame` of booleans (label-aligned)
+ * - A callable `(df: DataFrame) => DataFrame` that returns a boolean DataFrame
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * const mask = DataFrame.fromColumns({ a: [true, false, true], b: [false, true, false] });
+ * dataFrameWhere(df, mask);
+ * // DataFrame { a: [1, null, 3], b: [null, 5, null] }
+ *
+ * dataFrameWhere(df, (d) =>
+ * DataFrame.fromColumns(
+ * Object.fromEntries(d.columns.map((c) => [c, d.col(c as string).values.map((v) => (v as number) > 2)]))
+ * )
+ * );
+ * // DataFrame { a: [null, null, 3], b: [4, 5, 6] }
+ * ```
+ */
+export function dataFrameWhere(
+ df: DataFrame,
+ cond: DataFrameCond,
+ options: DataFrameWhereOptions = {},
+): DataFrame {
+ const other: Scalar = options.other !== undefined ? options.other : null;
+ const condMap = resolveDataFrameCond(df, cond);
+
+ const resultCols: Record = {};
+ for (const colName of df.columns.values) {
+ const srcCol = df.col(colName);
+ const mask = condMap.get(colName) ?? srcCol.values.map(() => false);
+ resultCols[colName] = srcCol.values.map((v, i) => (mask[i] === true ? v : other));
+ }
+
+ return DataFrame.fromColumns(resultCols, { index: df.index });
+}
+
+/**
+ * Return a new DataFrame keeping values where the element-wise `cond` is
+ * **falsy**, replacing all other positions with `other` (default `null`).
+ *
+ * Mirrors `pandas.DataFrame.mask(cond, other)`.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * dataFrameMask(df, (d) =>
+ * DataFrame.fromColumns(
+ * Object.fromEntries(d.columns.values.map((c) => [c, d.col(c).values.map((v) => (v as number) > 2)]))
+ * )
+ * );
+ * // DataFrame { a: [1, 2, null], b: [null, null, null] }
+ * ```
+ */
+export function dataFrameMask(
+ df: DataFrame,
+ cond: DataFrameCond,
+ options: DataFrameWhereOptions = {},
+): DataFrame {
+ const other: Scalar = options.other !== undefined ? options.other : null;
+ const condMap = resolveDataFrameCond(df, cond);
+
+ const resultCols: Record = {};
+ for (const colName of df.columns.values) {
+ const srcCol = df.col(colName);
+ const mask = condMap.get(colName) ?? srcCol.values.map(() => false);
+ // Invert: keep where cond is FALSE
+ resultCols[colName] = srcCol.values.map((v, i) => (mask[i] !== true ? v : other));
+ }
+
+ return DataFrame.fromColumns(resultCols, { index: df.index });
+}
diff --git a/src/stats/window_extended.ts b/src/stats/window_extended.ts
new file mode 100644
index 00000000..3811122d
--- /dev/null
+++ b/src/stats/window_extended.ts
@@ -0,0 +1,321 @@
+/**
+ * window_extended β additional rolling-window aggregations for Series.
+ *
+ * Extends the core `Rolling` aggregations with higher-order statistics that
+ * mirror pandas methods:
+ * - `Series.rolling(w).sem()` β {@link rollingSem}
+ * - `Series.rolling(w).skew()` β {@link rollingSkew}
+ * - `Series.rolling(w).kurt()` β {@link rollingKurt}
+ * - `Series.rolling(w).quantile(q)` β {@link rollingQuantile}
+ *
+ * All functions are **pure** (return new Series; inputs are unchanged).
+ * Missing values (null / NaN) are excluded from each window calculation.
+ * A `null` result is produced whenever the window has fewer than `minPeriods`
+ * valid observations (or fewer than the function's minimum required count).
+ *
+ * @module
+ */
+
+import type { Index } from "../core/base-index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ public types βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options shared by all rolling-window functions in this module. */
+export interface WindowExtOptions {
+ /**
+ * Minimum number of valid (non-null / non-NaN) observations required in the
+ * window to produce a non-null result.
+ *
+ * Defaults to the `window` size (matching pandas behaviour).
+ */
+ readonly minPeriods?: number;
+ /**
+ * Whether to centre the window around each position.
+ * When `false` (default) the window is trailing (right-aligned).
+ */
+ readonly center?: boolean;
+}
+
+/** Options for {@link rollingQuantile}. */
+export interface RollingQuantileOptions extends WindowExtOptions {
+ /**
+ * Interpolation method when the desired quantile falls between two values.
+ * - `"linear"` (default): pandas default β linear interpolation.
+ * - `"lower"`: take the lower of the two surrounding values.
+ * - `"higher"`: take the higher of the two surrounding values.
+ * - `"midpoint"`: arithmetic mean of the two surrounding values.
+ * - `"nearest"`: whichever value is closest (lower on tie).
+ */
+ readonly interpolation?: "linear" | "lower" | "higher" | "midpoint" | "nearest";
+}
+
+// βββ minimal Series interface (mirrors RollingSeriesLike) βββββββββββββββββββββ
+
+/** Minimal interface the real `Series` class satisfies. */
+interface SeriesLike {
+ readonly values: readonly Scalar[];
+ readonly index: Index;
+ readonly name: string | null;
+ withValues(data: readonly Scalar[], name?: string | null): SeriesLike;
+}
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+function validNums(slice: readonly Scalar[]): number[] {
+ const out: number[] = [];
+ for (const v of slice) {
+ if (!isMissing(v) && typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+function trailingBounds(i: number, window: number, n: number): [number, number] {
+ return [Math.max(0, i - window + 1), Math.min(n, i + 1)];
+}
+
+function centeredBounds(i: number, window: number, n: number): [number, number] {
+ const half = Math.floor((window - 1) / 2);
+ return [Math.max(0, i - half), Math.min(n, i + (window - half))];
+}
+
+function windowBounds(i: number, window: number, n: number, center: boolean): [number, number] {
+ return center ? centeredBounds(i, window, n) : trailingBounds(i, window, n);
+}
+
+function numMean(nums: readonly number[]): number {
+ return nums.reduce((s, v) => s + v, 0) / nums.length;
+}
+
+function numVar(nums: readonly number[], ddof: number): number {
+ if (nums.length - ddof <= 0) {
+ return Number.NaN;
+ }
+ const m = numMean(nums);
+ return nums.reduce((s, v) => s + (v - m) ** 2, 0) / (nums.length - ddof);
+}
+
+function numStd(nums: readonly number[], ddof: number): number {
+ return Math.sqrt(numVar(nums, ddof));
+}
+
+/** Apply an aggregation over each window, returning a new Series. */
+function applyWindow(
+ series: SeriesLike,
+ window: number,
+ opts: WindowExtOptions,
+ minN: number,
+ agg: (nums: number[], n: number) => Scalar,
+): SeriesLike {
+ const { values, index, name } = series;
+ const n = values.length;
+ const minPeriods = opts.minPeriods ?? window;
+ const effectiveMin = Math.max(minN, minPeriods);
+ const center = opts.center ?? false;
+ const out: Scalar[] = new Array(n).fill(null);
+
+ for (let i = 0; i < n; i++) {
+ const [lo, hi] = windowBounds(i, window, n, center);
+ const nums = validNums(values.slice(lo, hi));
+ if (nums.length < effectiveMin) {
+ continue;
+ }
+ out[i] = agg(nums, nums.length);
+ }
+
+ return series.withValues(out, name);
+}
+
+// βββ Rolling SEM ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Rolling standard error of the mean.
+ *
+ * `sem = std(ddof=1) / sqrt(n)` where `n` is the number of valid observations
+ * in the window. Requires at least 2 valid values (else `null`).
+ *
+ * Mirrors `pandas.Series.rolling(window).sem()`.
+ *
+ * @param series - Input Series.
+ * @param window - Size of the sliding window (number of observations).
+ * @param opts - Window options.
+ * @returns A new Series with rolling SEM values.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4, 5]);
+ * rollingSem(s, 3); // [null, null, ~0.577, ~0.577, ~0.577]
+ * ```
+ */
+export function rollingSem(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike {
+ return applyWindow(series, window, opts, 2, (nums) => {
+ const s = numStd(nums, 1);
+ return s / Math.sqrt(nums.length);
+ });
+}
+
+// βββ Rolling Skewness βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Rolling Fisher-Pearson skewness (unbiased, 3rd standardised moment).
+ *
+ * Uses the standard adjustment formula:
+ * ```
+ * skew = [n / ((n-1)(n-2))] * Ξ£[(xα΅’ - xΜ) / s]Β³
+ * ```
+ * where `s` is the sample standard deviation (`ddof=1`).
+ * Requires at least 3 valid observations (else `null`).
+ *
+ * Mirrors `pandas.Series.rolling(window).skew()`.
+ *
+ * @param series - Input Series.
+ * @param window - Size of the sliding window.
+ * @param opts - Window options.
+ * @returns A new Series with rolling skewness values.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4, 5]);
+ * rollingSkew(s, 3); // [null, null, 0, 0, 0] (symmetric windows)
+ * ```
+ */
+export function rollingSkew(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike {
+ return applyWindow(series, window, opts, 3, (nums, n) => {
+ const m = numMean(nums);
+ const s = numStd(nums, 1);
+ if (s === 0 || Number.isNaN(s)) {
+ return 0;
+ }
+ const sum3 = nums.reduce((acc, v) => acc + ((v - m) / s) ** 3, 0);
+ return (n / ((n - 1) * (n - 2))) * sum3;
+ });
+}
+
+// βββ Rolling Kurtosis βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Rolling excess kurtosis (Fisher's definition, unbiased, 4th standardised moment).
+ *
+ * Uses the standard adjustment (Fisher, 1930):
+ * ```
+ * kurt = [n(n+1) / ((n-1)(n-2)(n-3))] * Ξ£[(xα΅’ - xΜ) / s]β΄
+ * β 3(n-1)Β² / ((n-2)(n-3))
+ * ```
+ * where `s` is the sample standard deviation (`ddof=1`).
+ * Requires at least 4 valid observations (else `null`).
+ *
+ * Mirrors `pandas.Series.rolling(window).kurt()`.
+ *
+ * @param series - Input Series.
+ * @param window - Size of the sliding window.
+ * @param opts - Window options.
+ * @returns A new Series with rolling excess kurtosis values.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4]);
+ * rollingKurt(s, 4); // [null, null, null, -1.2] (uniform distribution)
+ * ```
+ */
+export function rollingKurt(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike {
+ return applyWindow(series, window, opts, 4, (nums, n) => {
+ const m = numMean(nums);
+ const s = numStd(nums, 1);
+ if (s === 0 || Number.isNaN(s)) {
+ return 0;
+ }
+ const sum4 = nums.reduce((acc, v) => acc + ((v - m) / s) ** 4, 0);
+ const term1 = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * sum4;
+ const term2 = (3 * (n - 1) ** 2) / ((n - 2) * (n - 3));
+ return term1 - term2;
+ });
+}
+
+// βββ Rolling Quantile βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Compute a quantile value for each rolling window.
+ *
+ * @param sorted - A sorted array of valid numbers in the window.
+ * @param q - Quantile in [0, 1].
+ * @param method - Interpolation method.
+ */
+function computeQuantile(
+ sorted: readonly number[],
+ q: number,
+ method: RollingQuantileOptions["interpolation"],
+): number {
+ const n = sorted.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ if (n === 1) {
+ return sorted[0] as number;
+ }
+ const virtual = q * (n - 1);
+ const lo = Math.floor(virtual);
+ const hi = Math.ceil(virtual);
+ const loVal = sorted[lo] as number;
+ const hiVal = sorted[hi] as number;
+ switch (method ?? "linear") {
+ case "lower":
+ return loVal;
+ case "higher":
+ return hiVal;
+ case "midpoint":
+ return (loVal + hiVal) / 2;
+ case "nearest": {
+ const fracLo = virtual - lo;
+ return fracLo < 0.5 ? loVal : hiVal;
+ }
+ case "linear":
+ default: {
+ const frac = virtual - lo;
+ return loVal + frac * (hiVal - loVal);
+ }
+ }
+}
+
+/**
+ * Rolling quantile.
+ *
+ * Computes the `q`-th quantile for each sliding window, using linear
+ * interpolation by default.
+ *
+ * Mirrors `pandas.Series.rolling(window).quantile(q, interpolation)`.
+ *
+ * @param series - Input Series.
+ * @param q - Quantile to compute (0 β€ q β€ 1).
+ * @param window - Size of the sliding window.
+ * @param opts - Window options including interpolation method.
+ * @returns A new Series with rolling quantile values.
+ *
+ * @throws {RangeError} If `q` is outside `[0, 1]`.
+ *
+ * @example
+ * ```ts
+ * const s = Series.from([1, 2, 3, 4, 5]);
+ * rollingQuantile(s, 0.5, 3); // [null, null, 2, 3, 4] (rolling median)
+ * ```
+ */
+export function rollingQuantile(
+ series: SeriesLike,
+ q: number,
+ window: number,
+ opts: RollingQuantileOptions = {},
+): SeriesLike {
+ if (q < 0 || q > 1) {
+ throw new RangeError(`rollingQuantile: q must be in [0, 1], got ${q}`);
+ }
+ const { interpolation } = opts;
+ return applyWindow(series, window, opts, 1, (nums) => {
+ const sorted = [...nums].sort((a, b) => a - b);
+ return computeQuantile(sorted, q, interpolation);
+ });
+}
diff --git a/src/window/index.ts b/src/window/index.ts
index 90f8c0dd..378222e2 100644
--- a/src/window/index.ts
+++ b/src/window/index.ts
@@ -10,3 +10,10 @@ export { Expanding } from "./expanding.ts";
export type { ExpandingOptions, ExpandingSeriesLike } from "./expanding.ts";
export { EWM } from "./ewm.ts";
export type { EwmOptions, EwmSeriesLike } from "./ewm.ts";
+export {
+ rollingApply,
+ rollingAgg,
+ dataFrameRollingApply,
+ dataFrameRollingAgg,
+} from "./rolling_apply.ts";
+export type { RollingApplyOptions, RollingAggOptions, AggFunctions } from "./rolling_apply.ts";
diff --git a/src/window/rolling_apply.ts b/src/window/rolling_apply.ts
new file mode 100644
index 00000000..18d09c93
--- /dev/null
+++ b/src/window/rolling_apply.ts
@@ -0,0 +1,323 @@
+/**
+ * rolling_apply β standalone rolling-window apply and multi-aggregation.
+ *
+ * Mirrors the flexibility of `pandas.core.window.Rolling.apply()` with
+ * additional utilities not available on the Rolling class:
+ *
+ * - {@link rollingApply} β apply a custom function over each window of a
+ * Series, with `raw` mode support (pass all window values including null/NaN
+ * vs. only valid numbers).
+ * - {@link rollingAgg} β apply multiple named aggregation functions in a
+ * single pass, returning a DataFrame keyed by function name.
+ * - {@link dataFrameRollingApply} β apply a custom function per-column across
+ * a DataFrame.
+ * - {@link dataFrameRollingAgg} β apply multiple named aggregation functions
+ * per-column across a DataFrame.
+ *
+ * ### raw vs. filtered mode
+ *
+ * By default (`raw: false`) the aggregation function receives only the **valid
+ * (non-null, non-NaN) numeric values** in the current window β matching the
+ * default `raw=True` behaviour of `pandas.Rolling.apply` with NaN values
+ * already stripped. With `raw: true` the function receives the **full window
+ * slice** including `null`/`undefined`/`NaN` entries (as `null`), giving the
+ * aggregation full control over missing-value handling.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// βββ public option types ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** Options for {@link rollingApply} and {@link dataFrameRollingApply}. */
+export interface RollingApplyOptions {
+ /**
+ * Minimum number of valid (non-null/NaN) observations required to produce a
+ * non-null result.
+ *
+ * Defaults to `window` (same as `pandas.Rolling` behaviour).
+ */
+ readonly minPeriods?: number;
+ /**
+ * Whether to centre the window. When `true` the window is symmetric around
+ * each index position; when `false` (default) the window is trailing.
+ */
+ readonly center?: boolean;
+ /**
+ * When `true`, the aggregation function receives the **full** window slice
+ * including `null`/`NaN` values (represented as `null`). When `false`
+ * (default), only the valid numeric values are passed.
+ */
+ readonly raw?: boolean;
+}
+
+/** Options for {@link rollingAgg} and {@link dataFrameRollingAgg}. */
+export type RollingAggOptions = Omit;
+
+/** A named map of aggregation functions for {@link rollingAgg}. */
+export type AggFunctions = Record number>;
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/** True when a Scalar is missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Extract the numeric values from a window slice, excluding missing entries. */
+function validNums(slice: readonly Scalar[]): number[] {
+ const out: number[] = [];
+ for (const v of slice) {
+ if (!isMissing(v) && typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/** Convert a raw window slice to `null`-substituted numeric array. */
+function rawWindow(slice: readonly Scalar[]): (number | null)[] {
+ return slice.map((v): number | null => {
+ if (isMissing(v)) return null;
+ if (typeof v === "number") return v;
+ return null;
+ });
+}
+
+/** Trailing-window [start, end) indices for position `i`. */
+function trailingBounds(i: number, window: number, n: number): [number, number] {
+ return [Math.max(0, i - window + 1), Math.min(n, i + 1)];
+}
+
+/** Centred-window [start, end) indices for position `i`. */
+function centeredBounds(i: number, window: number, n: number): [number, number] {
+ const half = Math.floor((window - 1) / 2);
+ return [Math.max(0, i - half), Math.min(n, i + (window - half))];
+}
+
+/** Select trailing or centred window bounds. */
+function bounds(i: number, window: number, n: number, center: boolean): [number, number] {
+ return center ? centeredBounds(i, window, n) : trailingBounds(i, window, n);
+}
+
+// βββ core engine ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Iterate over each position in `vals`, yielding the window's valid numeric
+ * values (or, when `useRaw`, the raw slice with nulls). Returns whether the
+ * window met `minPeriods` and the processed window array.
+ */
+function* windowIterator(
+ vals: readonly Scalar[],
+ window: number,
+ minPeriods: number,
+ center: boolean,
+ useRaw: boolean,
+): Generator<{ met: boolean; nums: readonly number[]; raw: readonly (number | null)[] }> {
+ const n = vals.length;
+ for (let i = 0; i < n; i++) {
+ const [start, end] = bounds(i, window, n, center);
+ const slice = vals.slice(start, end);
+ const nums = validNums(slice);
+ const met = nums.length >= minPeriods;
+ yield { met, nums, raw: useRaw ? rawWindow(slice) : [] };
+ }
+}
+
+// βββ public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+/**
+ * Apply a custom aggregation function over a rolling window of a Series.
+ *
+ * This is the standalone counterpart to `series.rolling(w).apply(fn)`. It
+ * adds `raw` mode support and returns a `Series` with the
+ * original index and name preserved.
+ *
+ * @param series - Input Series (numeric values only; non-numeric treated as missing).
+ * @param window - Window size (positive integer).
+ * @param fn - Aggregation function. In default (`raw: false`) mode
+ * receives only valid numeric values; in `raw: true` mode
+ * receives the full window with nulls.
+ * @param options - {@link RollingApplyOptions}.
+ * @returns A new `Series`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * rollingApply(s, 3, (w) => w.reduce((a, b) => a + b, 0) / w.length);
+ * // Series([null, null, 2, 3, 4])
+ * ```
+ */
+export function rollingApply(
+ series: Series,
+ window: number,
+ fn: (values: readonly number[]) => number,
+ options?: RollingApplyOptions,
+): Series {
+ if (!Number.isInteger(window) || window < 1) {
+ throw new RangeError(`window must be a positive integer, got ${window}`);
+ }
+ const minPeriods = options?.minPeriods ?? window;
+ const center = options?.center ?? false;
+ const useRaw = options?.raw ?? false;
+
+ const vals = series.values;
+ const result: (number | null)[] = [];
+
+ for (const { met, nums, raw } of windowIterator(vals, window, minPeriods, center, useRaw)) {
+ if (!met) {
+ result.push(null);
+ } else if (useRaw) {
+ const validOnly = (raw as readonly (number | null)[]).filter(
+ (v): v is number => v !== null,
+ );
+ result.push(fn(validOnly));
+ } else {
+ result.push(fn(nums));
+ }
+ }
+
+ return new Series({
+ data: result,
+ index: series.index as Index,
+ name: series.name,
+ });
+}
+
+/**
+ * Apply multiple named aggregation functions over a rolling window of a
+ * Series, returning a DataFrame where each column corresponds to one
+ * aggregation function.
+ *
+ * Mirrors `pandas.Series.rolling(w).agg({"mean": np.mean, "std": np.std})`.
+ *
+ * @param series - Input Series.
+ * @param window - Window size (positive integer).
+ * @param fns - Named map of aggregation functions (each receives valid
+ * numeric values in the window).
+ * @param options - {@link RollingAggOptions}.
+ * @returns A `DataFrame` with one column per function in `fns`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * rollingAgg(s, 3, {
+ * mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+ * max: (w) => Math.max(...w),
+ * });
+ * // DataFrame with columns "mean" and "max"
+ * ```
+ */
+export function rollingAgg(
+ series: Series,
+ window: number,
+ fns: AggFunctions,
+ options?: RollingAggOptions,
+): DataFrame {
+ if (!Number.isInteger(window) || window < 1) {
+ throw new RangeError(`window must be a positive integer, got ${window}`);
+ }
+ const minPeriods = options?.minPeriods ?? window;
+ const center = options?.center ?? false;
+
+ const fnEntries = Object.entries(fns);
+ const cols: Map = new Map(fnEntries.map(([k]) => [k, []]));
+ const vals = series.values;
+
+ for (const { met, nums } of windowIterator(vals, window, minPeriods, center, false)) {
+ for (const [name, fn] of fnEntries) {
+ const col = cols.get(name) as (number | null)[];
+ col.push(met ? fn(nums) : null);
+ }
+ }
+
+ const colMap = new Map>();
+ for (const [name, data] of cols) {
+ colMap.set(
+ name,
+ new Series({
+ data,
+ index: series.index as Index,
+ name,
+ }),
+ );
+ }
+ return new DataFrame(colMap, series.index as Index);
+}
+
+/**
+ * Apply a custom aggregation function over a rolling window for each column of
+ * a DataFrame.
+ *
+ * @param df - Input DataFrame.
+ * @param window - Window size (positive integer).
+ * @param fn - Aggregation function receiving valid numeric values.
+ * @param options - {@link RollingApplyOptions}.
+ * @returns A new `DataFrame` with the same shape as `df`.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * dataFrameRollingApply(df, 2, (w) => w[w.length - 1] - w[0]);
+ * // DataFrame with pairwise diff per column
+ * ```
+ */
+export function dataFrameRollingApply(
+ df: DataFrame,
+ window: number,
+ fn: (values: readonly number[]) => number,
+ options?: RollingApplyOptions,
+): DataFrame {
+ const colMap = new Map>();
+ for (const colName of df.columns.values) {
+ const col = df.col(colName);
+ const result = rollingApply(col, window, fn, options);
+ colMap.set(colName, result as Series);
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Apply multiple named aggregation functions over a rolling window for each
+ * column of a DataFrame.
+ *
+ * Each column produces a sub-DataFrame of results. All sub-DataFrames are
+ * concatenated horizontally, with column names formatted as `{col}_{aggName}`.
+ *
+ * @param df - Input DataFrame.
+ * @param window - Window size (positive integer).
+ * @param fns - Named map of aggregation functions.
+ * @param options - {@link RollingAggOptions}.
+ * @returns A `DataFrame` with columns `{col}_{aggName}` for every combination.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ x: [1, 2, 3, 4], y: [5, 6, 7, 8] });
+ * dataFrameRollingAgg(df, 2, { mean: avg, sum: s });
+ * // columns: "x_mean", "x_sum", "y_mean", "y_sum"
+ * ```
+ */
+export function dataFrameRollingAgg(
+ df: DataFrame,
+ window: number,
+ fns: AggFunctions,
+ options?: RollingAggOptions,
+): DataFrame {
+ const colMap = new Map>();
+ const fnEntries = Object.entries(fns);
+
+ for (const colName of df.columns.values) {
+ const col = df.col(colName);
+ const aggDf = rollingAgg(col, window, fns, options);
+
+ for (const [aggName] of fnEntries) {
+ const key = `${colName}_${aggName}`;
+ colMap.set(key, aggDf.col(aggName));
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/tests/core/api_types.test.ts b/tests/core/api_types.test.ts
new file mode 100644
index 00000000..17064ceb
--- /dev/null
+++ b/tests/core/api_types.test.ts
@@ -0,0 +1,621 @@
+/**
+ * Tests for src/core/api_types.ts β runtime type-checking predicates.
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Dtype } from "../../src/index.ts";
+import {
+ isArrayLike,
+ isBigInt,
+ isBool,
+ isBoolDtype,
+ isCategoricalDtype,
+ isComplexDtype,
+ isDate,
+ isDatetimeDtype,
+ isDictLike,
+ isExtensionArrayDtype,
+ isFloat,
+ isFloatDtype,
+ isHashable,
+ isInteger,
+ isIntegerDtype,
+ isIntervalDtype,
+ isIterator,
+ isListLike,
+ isMissing,
+ isNumber,
+ isNumericDtype,
+ isObjectDtype,
+ isPeriodDtype,
+ isRegExp,
+ isReCompilable,
+ isScalar,
+ isSignedIntegerDtype,
+ isStringDtype,
+ isStringValue,
+ isTimedeltaDtype,
+ isUnsignedIntegerDtype,
+} from "../../src/core/api_types.ts";
+
+// βββ isScalar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isScalar", () => {
+ it("returns true for primitives", () => {
+ expect(isScalar(42)).toBe(true);
+ expect(isScalar(3.14)).toBe(true);
+ expect(isScalar("hello")).toBe(true);
+ expect(isScalar(true)).toBe(true);
+ expect(isScalar(false)).toBe(true);
+ expect(isScalar(null)).toBe(true);
+ expect(isScalar(undefined)).toBe(true);
+ expect(isScalar(BigInt(7))).toBe(true);
+ expect(isScalar(Symbol("x"))).toBe(true);
+ });
+
+ it("returns true for Date", () => {
+ expect(isScalar(new Date())).toBe(true);
+ });
+
+ it("returns false for arrays", () => {
+ expect(isScalar([])).toBe(false);
+ expect(isScalar([1, 2])).toBe(false);
+ });
+
+ it("returns false for plain objects", () => {
+ expect(isScalar({})).toBe(false);
+ expect(isScalar({ a: 1 })).toBe(false);
+ });
+
+ it("returns false for Map/Set", () => {
+ expect(isScalar(new Map())).toBe(false);
+ expect(isScalar(new Set())).toBe(false);
+ });
+
+ it("property: all numbers are scalars", () => {
+ fc.assert(
+ fc.property(fc.float({ noNaN: true }), (n) => isScalar(n) === true),
+ );
+ });
+});
+
+// βββ isListLike βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isListLike", () => {
+ it("returns true for arrays", () => {
+ expect(isListLike([])).toBe(true);
+ expect(isListLike([1, 2, 3])).toBe(true);
+ });
+
+ it("returns true for Set/Map", () => {
+ expect(isListLike(new Set([1, 2]))).toBe(true);
+ expect(isListLike(new Map())).toBe(true);
+ });
+
+ it("returns false for strings (excluded)", () => {
+ expect(isListLike("abc")).toBe(false);
+ expect(isListLike("")).toBe(false);
+ });
+
+ it("returns false for numbers and booleans", () => {
+ expect(isListLike(42)).toBe(false);
+ expect(isListLike(true)).toBe(false);
+ });
+
+ it("returns false for null/undefined", () => {
+ expect(isListLike(null)).toBe(false);
+ expect(isListLike(undefined)).toBe(false);
+ });
+
+ it("returns true for array-like objects with length", () => {
+ expect(isListLike({ length: 3, 0: "a", 1: "b", 2: "c" })).toBe(true);
+ });
+});
+
+// βββ isArrayLike ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isArrayLike", () => {
+ it("returns true for arrays", () => {
+ expect(isArrayLike([])).toBe(true);
+ expect(isArrayLike([1, 2])).toBe(true);
+ });
+
+ it("returns true for strings (have .length)", () => {
+ expect(isArrayLike("hello")).toBe(true);
+ expect(isArrayLike("")).toBe(true);
+ });
+
+ it("returns false for numbers", () => {
+ expect(isArrayLike(42)).toBe(false);
+ expect(isArrayLike(NaN)).toBe(false);
+ });
+
+ it("returns false for null/undefined", () => {
+ expect(isArrayLike(null)).toBe(false);
+ expect(isArrayLike(undefined)).toBe(false);
+ });
+
+ it("returns true for typed arrays", () => {
+ expect(isArrayLike(new Uint8Array(3))).toBe(true);
+ expect(isArrayLike(new Float64Array(0))).toBe(true);
+ });
+
+ it("returns true for object with non-negative integer length", () => {
+ expect(isArrayLike({ length: 0 })).toBe(true);
+ expect(isArrayLike({ length: 5 })).toBe(true);
+ });
+});
+
+// βββ isDictLike βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isDictLike", () => {
+ it("returns true for plain objects", () => {
+ expect(isDictLike({})).toBe(true);
+ expect(isDictLike({ a: 1 })).toBe(true);
+ });
+
+ it("returns true for Map", () => {
+ expect(isDictLike(new Map())).toBe(true);
+ });
+
+ it("returns false for arrays", () => {
+ expect(isDictLike([])).toBe(false);
+ expect(isDictLike([1, 2])).toBe(false);
+ });
+
+ it("returns false for Date", () => {
+ expect(isDictLike(new Date())).toBe(false);
+ });
+
+ it("returns false for null/undefined/primitives", () => {
+ expect(isDictLike(null)).toBe(false);
+ expect(isDictLike(undefined)).toBe(false);
+ expect(isDictLike(42)).toBe(false);
+ expect(isDictLike("abc")).toBe(false);
+ });
+});
+
+// βββ isIterator βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isIterator", () => {
+ it("returns true for array iterator", () => {
+ const iter = [1, 2, 3][Symbol.iterator]();
+ expect(isIterator(iter)).toBe(true);
+ });
+
+ it("returns true for generator", () => {
+ function* gen(): Generator {
+ yield 1;
+ }
+ expect(isIterator(gen())).toBe(true);
+ });
+
+ it("returns false for array (not iterator)", () => {
+ expect(isIterator([1, 2, 3])).toBe(false);
+ });
+
+ it("returns false for null/undefined", () => {
+ expect(isIterator(null)).toBe(false);
+ expect(isIterator(undefined)).toBe(false);
+ });
+});
+
+// βββ isNumber / isBool / isStringValue βββββββββββββββββββββββββββββββββββββββ
+
+describe("isNumber", () => {
+ it("true for numbers including NaN and Infinity", () => {
+ expect(isNumber(3.14)).toBe(true);
+ expect(isNumber(0)).toBe(true);
+ expect(isNumber(NaN)).toBe(true);
+ expect(isNumber(Infinity)).toBe(true);
+ expect(isNumber(-Infinity)).toBe(true);
+ });
+
+ it("false for non-numbers", () => {
+ expect(isNumber("3")).toBe(false);
+ expect(isNumber(true)).toBe(false);
+ expect(isNumber(null)).toBe(false);
+ });
+});
+
+describe("isBool", () => {
+ it("true for booleans only", () => {
+ expect(isBool(true)).toBe(true);
+ expect(isBool(false)).toBe(true);
+ expect(isBool(1)).toBe(false);
+ expect(isBool(0)).toBe(false);
+ expect(isBool("true")).toBe(false);
+ });
+});
+
+describe("isStringValue", () => {
+ it("true for strings", () => {
+ expect(isStringValue("")).toBe(true);
+ expect(isStringValue("hello")).toBe(true);
+ });
+
+ it("false for non-strings", () => {
+ expect(isStringValue(42)).toBe(false);
+ expect(isStringValue(null)).toBe(false);
+ });
+});
+
+// βββ isFloat / isInteger ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isFloat", () => {
+ it("true for numbers with fractional part", () => {
+ expect(isFloat(3.14)).toBe(true);
+ expect(isFloat(-0.5)).toBe(true);
+ expect(isFloat(0.001)).toBe(true);
+ });
+
+ it("false for integer-valued numbers", () => {
+ expect(isFloat(3.0)).toBe(false);
+ expect(isFloat(0)).toBe(false);
+ expect(isFloat(-4)).toBe(false);
+ });
+
+ it("false for NaN and Infinity", () => {
+ expect(isFloat(NaN)).toBe(false);
+ expect(isFloat(Infinity)).toBe(false);
+ expect(isFloat(-Infinity)).toBe(false);
+ });
+
+ it("false for non-numbers", () => {
+ expect(isFloat("3.14")).toBe(false);
+ });
+});
+
+describe("isInteger", () => {
+ it("true for integer-valued numbers", () => {
+ expect(isInteger(0)).toBe(true);
+ expect(isInteger(42)).toBe(true);
+ expect(isInteger(-7)).toBe(true);
+ expect(isInteger(3.0)).toBe(true);
+ });
+
+ it("false for fractional numbers", () => {
+ expect(isInteger(3.14)).toBe(false);
+ });
+
+ it("false for NaN and Infinity", () => {
+ expect(isInteger(NaN)).toBe(false);
+ expect(isInteger(Infinity)).toBe(false);
+ });
+
+ it("false for non-numbers", () => {
+ expect(isInteger("3")).toBe(false);
+ });
+});
+
+// βββ isBigInt / isRegExp / isReCompilable βββββββββββββββββββββββββββββββββββββ
+
+describe("isBigInt", () => {
+ it("true for bigint", () => {
+ expect(isBigInt(BigInt(42))).toBe(true);
+ expect(isBigInt(0n)).toBe(true);
+ });
+
+ it("false for regular numbers", () => {
+ expect(isBigInt(42)).toBe(false);
+ expect(isBigInt("42")).toBe(false);
+ });
+});
+
+describe("isRegExp", () => {
+ it("true for RegExp instances", () => {
+ expect(isRegExp(/abc/)).toBe(true);
+ expect(isRegExp(new RegExp("xyz"))).toBe(true);
+ });
+
+ it("false for strings and other values", () => {
+ expect(isRegExp("abc")).toBe(false);
+ expect(isRegExp(null)).toBe(false);
+ });
+});
+
+describe("isReCompilable", () => {
+ it("true for strings and RegExp", () => {
+ expect(isReCompilable("abc")).toBe(true);
+ expect(isReCompilable(/abc/)).toBe(true);
+ });
+
+ it("false for numbers and objects", () => {
+ expect(isReCompilable(42)).toBe(false);
+ expect(isReCompilable({})).toBe(false);
+ });
+});
+
+// βββ isMissing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isMissing", () => {
+ it("true for null, undefined, NaN", () => {
+ expect(isMissing(null)).toBe(true);
+ expect(isMissing(undefined)).toBe(true);
+ expect(isMissing(NaN)).toBe(true);
+ });
+
+ it("false for valid values", () => {
+ expect(isMissing(0)).toBe(false);
+ expect(isMissing("")).toBe(false);
+ expect(isMissing(false)).toBe(false);
+ expect(isMissing(Infinity)).toBe(false);
+ });
+
+ it("property: no finite number is missing", () => {
+ fc.assert(
+ fc.property(fc.float({ noNaN: true }), (n) => {
+ if (!Number.isFinite(n)) return true;
+ return !isMissing(n);
+ }),
+ );
+ });
+});
+
+// βββ isHashable / isDate ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isHashable", () => {
+ it("true for primitives", () => {
+ expect(isHashable("key")).toBe(true);
+ expect(isHashable(42)).toBe(true);
+ expect(isHashable(true)).toBe(true);
+ expect(isHashable(null)).toBe(true);
+ expect(isHashable(undefined)).toBe(true);
+ expect(isHashable(Symbol("x"))).toBe(true);
+ });
+
+ it("false for objects and arrays", () => {
+ expect(isHashable({})).toBe(false);
+ expect(isHashable([])).toBe(false);
+ expect(isHashable(new Date())).toBe(false);
+ });
+});
+
+describe("isDate", () => {
+ it("true for Date instances", () => {
+ expect(isDate(new Date())).toBe(true);
+ expect(isDate(new Date("2024-01-01"))).toBe(true);
+ });
+
+ it("false for strings and timestamps", () => {
+ expect(isDate("2024-01-01")).toBe(false);
+ expect(isDate(1704067200000)).toBe(false);
+ expect(isDate(null)).toBe(false);
+ });
+});
+
+// βββ Dtype-level predicates βββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("isNumericDtype", () => {
+ it("true for all numeric dtypes", () => {
+ for (const name of ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64"] as const) {
+ expect(isNumericDtype(name)).toBe(true);
+ expect(isNumericDtype(Dtype.from(name))).toBe(true);
+ }
+ });
+
+ it("false for non-numeric dtypes", () => {
+ expect(isNumericDtype("string")).toBe(false);
+ expect(isNumericDtype("bool")).toBe(false);
+ expect(isNumericDtype("datetime")).toBe(false);
+ expect(isNumericDtype("category")).toBe(false);
+ });
+});
+
+describe("isIntegerDtype", () => {
+ it("true for signed and unsigned integers", () => {
+ expect(isIntegerDtype("int32")).toBe(true);
+ expect(isIntegerDtype("uint64")).toBe(true);
+ });
+
+ it("false for floats and others", () => {
+ expect(isIntegerDtype("float64")).toBe(false);
+ expect(isIntegerDtype("bool")).toBe(false);
+ });
+});
+
+describe("isSignedIntegerDtype", () => {
+ it("true for int8/16/32/64", () => {
+ expect(isSignedIntegerDtype("int8")).toBe(true);
+ expect(isSignedIntegerDtype("int64")).toBe(true);
+ });
+
+ it("false for uint", () => {
+ expect(isSignedIntegerDtype("uint8")).toBe(false);
+ expect(isSignedIntegerDtype("uint64")).toBe(false);
+ });
+});
+
+describe("isUnsignedIntegerDtype", () => {
+ it("true for uint8/16/32/64", () => {
+ expect(isUnsignedIntegerDtype("uint8")).toBe(true);
+ expect(isUnsignedIntegerDtype("uint64")).toBe(true);
+ });
+
+ it("false for int", () => {
+ expect(isUnsignedIntegerDtype("int8")).toBe(false);
+ expect(isUnsignedIntegerDtype("int64")).toBe(false);
+ });
+});
+
+describe("isFloatDtype", () => {
+ it("true for float32 and float64", () => {
+ expect(isFloatDtype("float32")).toBe(true);
+ expect(isFloatDtype("float64")).toBe(true);
+ expect(isFloatDtype(Dtype.float64)).toBe(true);
+ });
+
+ it("false for integers and others", () => {
+ expect(isFloatDtype("int32")).toBe(false);
+ expect(isFloatDtype("string")).toBe(false);
+ });
+});
+
+describe("isBoolDtype", () => {
+ it("true for bool", () => {
+ expect(isBoolDtype("bool")).toBe(true);
+ expect(isBoolDtype(Dtype.bool)).toBe(true);
+ });
+
+ it("false for others", () => {
+ expect(isBoolDtype("int8")).toBe(false);
+ expect(isBoolDtype("string")).toBe(false);
+ });
+});
+
+describe("isStringDtype", () => {
+ it("true for string dtype", () => {
+ expect(isStringDtype("string")).toBe(true);
+ expect(isStringDtype(Dtype.string)).toBe(true);
+ });
+
+ it("false for object and others", () => {
+ expect(isStringDtype("object")).toBe(false);
+ expect(isStringDtype("int32")).toBe(false);
+ });
+});
+
+describe("isDatetimeDtype", () => {
+ it("true for datetime", () => {
+ expect(isDatetimeDtype("datetime")).toBe(true);
+ expect(isDatetimeDtype(Dtype.datetime)).toBe(true);
+ });
+
+ it("false for timedelta and others", () => {
+ expect(isDatetimeDtype("timedelta")).toBe(false);
+ expect(isDatetimeDtype("string")).toBe(false);
+ });
+});
+
+describe("isTimedeltaDtype", () => {
+ it("true for timedelta", () => {
+ expect(isTimedeltaDtype("timedelta")).toBe(true);
+ expect(isTimedeltaDtype(Dtype.timedelta)).toBe(true);
+ });
+
+ it("false for datetime and others", () => {
+ expect(isTimedeltaDtype("datetime")).toBe(false);
+ expect(isTimedeltaDtype("float64")).toBe(false);
+ });
+});
+
+describe("isCategoricalDtype", () => {
+ it("true for category", () => {
+ expect(isCategoricalDtype("category")).toBe(true);
+ expect(isCategoricalDtype(Dtype.category)).toBe(true);
+ });
+
+ it("false for others", () => {
+ expect(isCategoricalDtype("string")).toBe(false);
+ expect(isCategoricalDtype("int32")).toBe(false);
+ });
+});
+
+describe("isObjectDtype", () => {
+ it("true for object dtype", () => {
+ expect(isObjectDtype("object")).toBe(true);
+ expect(isObjectDtype(Dtype.object)).toBe(true);
+ });
+
+ it("false for string and others", () => {
+ expect(isObjectDtype("string")).toBe(false);
+ expect(isObjectDtype("int32")).toBe(false);
+ });
+});
+
+describe("isComplexDtype", () => {
+ it("always returns false (no complex type in tsb)", () => {
+ expect(isComplexDtype("float64")).toBe(false);
+ expect(isComplexDtype("int32")).toBe(false);
+ expect(isComplexDtype(Dtype.float64)).toBe(false);
+ });
+});
+
+describe("isExtensionArrayDtype", () => {
+ it("true for string/object/datetime/timedelta/category", () => {
+ expect(isExtensionArrayDtype("string")).toBe(true);
+ expect(isExtensionArrayDtype("object")).toBe(true);
+ expect(isExtensionArrayDtype("datetime")).toBe(true);
+ expect(isExtensionArrayDtype("timedelta")).toBe(true);
+ expect(isExtensionArrayDtype("category")).toBe(true);
+ });
+
+ it("false for numeric dtypes", () => {
+ expect(isExtensionArrayDtype("int32")).toBe(false);
+ expect(isExtensionArrayDtype("float64")).toBe(false);
+ expect(isExtensionArrayDtype("bool")).toBe(false);
+ });
+});
+
+describe("isPeriodDtype", () => {
+ it("true for datetime (maps to period)", () => {
+ expect(isPeriodDtype("datetime")).toBe(true);
+ });
+
+ it("false for others", () => {
+ expect(isPeriodDtype("float64")).toBe(false);
+ expect(isPeriodDtype("string")).toBe(false);
+ });
+});
+
+describe("isIntervalDtype", () => {
+ it("true for numeric dtypes (interval uses numeric bounds)", () => {
+ expect(isIntervalDtype("float64")).toBe(true);
+ expect(isIntervalDtype("int32")).toBe(true);
+ expect(isIntervalDtype("uint8")).toBe(true);
+ });
+
+ it("false for string/category/bool", () => {
+ expect(isIntervalDtype("string")).toBe(false);
+ expect(isIntervalDtype("category")).toBe(false);
+ expect(isIntervalDtype("bool")).toBe(false);
+ });
+});
+
+// βββ property-based cross-checks βββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("dtype predicate cross-checks", () => {
+ const numericNames = ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64"] as const;
+ const nonNumericNames = ["bool", "string", "object", "datetime", "timedelta", "category"] as const;
+
+ it("isNumericDtype and isIntegerDtype are consistent", () => {
+ for (const n of numericNames) {
+ if (isIntegerDtype(n)) {
+ expect(isNumericDtype(n)).toBe(true);
+ }
+ }
+ });
+
+ it("no numeric dtype is extension array", () => {
+ for (const n of numericNames) {
+ if (!isBoolDtype(n)) {
+ expect(isExtensionArrayDtype(n)).toBe(false);
+ }
+ }
+ });
+
+ it("signed and unsigned integers are disjoint", () => {
+ for (const n of numericNames) {
+ if (isSignedIntegerDtype(n)) {
+ expect(isUnsignedIntegerDtype(n)).toBe(false);
+ }
+ if (isUnsignedIntegerDtype(n)) {
+ expect(isSignedIntegerDtype(n)).toBe(false);
+ }
+ }
+ });
+
+ it("float dtypes are not integer dtypes", () => {
+ for (const n of numericNames) {
+ if (isFloatDtype(n)) {
+ expect(isIntegerDtype(n)).toBe(false);
+ }
+ }
+ });
+
+ it("non-numeric dtypes fail isNumericDtype", () => {
+ for (const n of nonNumericNames) {
+ expect(isNumericDtype(n)).toBe(false);
+ }
+ });
+});
diff --git a/tests/core/attrs.test.ts b/tests/core/attrs.test.ts
new file mode 100644
index 00000000..8a22786b
--- /dev/null
+++ b/tests/core/attrs.test.ts
@@ -0,0 +1,542 @@
+/**
+ * Tests for src/core/attrs.ts
+ *
+ * Covers:
+ * - getAttrs: returns empty {} when no attrs set
+ * - getAttrs: returns shallow copy (caller mutations don't affect registry)
+ * - setAttrs: sets attrs; subsequent getAttrs reflects the new values
+ * - setAttrs: overwrites previous attrs completely
+ * - updateAttrs: merges new keys, preserves existing keys
+ * - updateAttrs: overwrites existing keys on conflict
+ * - updateAttrs: works on an object with no prior attrs
+ * - copyAttrs: copies attrs from source to target
+ * - copyAttrs: overwrites target's existing attrs
+ * - copyAttrs: when source has no attrs, clears target's attrs
+ * - withAttrs: returns the same object reference
+ * - withAttrs: sets the attrs (replaces existing)
+ * - clearAttrs: removes all attrs
+ * - clearAttrs: is a no-op if no attrs exist
+ * - hasAttrs: false when no attrs set
+ * - hasAttrs: true after setAttrs
+ * - hasAttrs: false after clearAttrs
+ * - getAttr: returns undefined for missing key
+ * - getAttr: returns the correct value
+ * - setAttr: sets a single key, preserves other keys
+ * - deleteAttr: removes a key, preserves remaining
+ * - deleteAttr: clears registry when last key removed
+ * - deleteAttr: no-op if key doesn't exist
+ * - attrsCount: 0 when no attrs; n when n keys set
+ * - attrsKeys: [] when no attrs; list of keys otherwise
+ * - mergeAttrs: merges attrs from multiple sources
+ * - mergeAttrs: later sources win on key conflicts
+ * - Independence: separate objects have independent attrs
+ * - Works with Series and DataFrame objects
+ * - Property: setAttrs/getAttrs round-trip
+ * - Property: updateAttrs is a superset of previous attrs
+ * - Property: copyAttrs makes target equal to source
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import {
+ attrsCount,
+ attrsKeys,
+ clearAttrs,
+ copyAttrs,
+ deleteAttr,
+ getAttr,
+ getAttrs,
+ hasAttrs,
+ mergeAttrs,
+ setAttr,
+ setAttrs,
+ updateAttrs,
+ withAttrs,
+} from "../../src/core/attrs.ts";
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+function freshObj(): object {
+ return {};
+}
+
+function makeSeries(): Series {
+ return new Series({ data: [1, 2, 3], name: "x" });
+}
+
+function makeDF(): DataFrame {
+ return DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+}
+
+// βββ getAttrs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("getAttrs", () => {
+ test("returns {} when no attrs set", () => {
+ expect(getAttrs(freshObj())).toEqual({});
+ });
+
+ test("returns the stored attrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { x: 1, y: "hello" });
+ expect(getAttrs(obj)).toEqual({ x: 1, y: "hello" });
+ });
+
+ test("returns a shallow copy β caller mutations don't leak into registry", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1 });
+ const copy = getAttrs(obj);
+ copy["a"] = 999;
+ // original should be unchanged
+ expect(getAttrs(obj)).toEqual({ a: 1 });
+ });
+});
+
+// βββ setAttrs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("setAttrs", () => {
+ test("sets attrs from scratch", () => {
+ const obj = freshObj();
+ setAttrs(obj, { source: "lab", version: 3 });
+ expect(getAttrs(obj)).toEqual({ source: "lab", version: 3 });
+ });
+
+ test("overwrites previous attrs completely", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2 });
+ setAttrs(obj, { c: 3 });
+ expect(getAttrs(obj)).toEqual({ c: 3 });
+ });
+
+ test("shallow-copies the input β later mutation doesn't change stored attrs", () => {
+ const obj = freshObj();
+ const input: Record = { x: 10 };
+ setAttrs(obj, input);
+ input["x"] = 999;
+ expect(getAttrs(obj)).toEqual({ x: 10 });
+ });
+});
+
+// βββ updateAttrs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("updateAttrs", () => {
+ test("adds new keys without removing existing ones", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1 });
+ updateAttrs(obj, { b: 2 });
+ expect(getAttrs(obj)).toEqual({ a: 1, b: 2 });
+ });
+
+ test("overwrites existing keys on conflict", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2 });
+ updateAttrs(obj, { a: 99 });
+ expect(getAttrs(obj)).toEqual({ a: 99, b: 2 });
+ });
+
+ test("works when no prior attrs exist", () => {
+ const obj = freshObj();
+ updateAttrs(obj, { fresh: true });
+ expect(getAttrs(obj)).toEqual({ fresh: true });
+ });
+
+ test("merges multiple key types: number, string, boolean, null", () => {
+ const obj = freshObj();
+ setAttrs(obj, { n: 1 });
+ updateAttrs(obj, { s: "hi", flag: false, nothing: null });
+ expect(getAttrs(obj)).toEqual({ n: 1, s: "hi", flag: false, nothing: null });
+ });
+});
+
+// βββ copyAttrs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("copyAttrs", () => {
+ test("copies source attrs to target", () => {
+ const src = freshObj();
+ const tgt = freshObj();
+ setAttrs(src, { unit: "kg", version: 1 });
+ copyAttrs(src, tgt);
+ expect(getAttrs(tgt)).toEqual({ unit: "kg", version: 1 });
+ });
+
+ test("overwrites target's existing attrs", () => {
+ const src = freshObj();
+ const tgt = freshObj();
+ setAttrs(src, { new: true });
+ setAttrs(tgt, { old: true });
+ copyAttrs(src, tgt);
+ expect(getAttrs(tgt)).toEqual({ new: true });
+ });
+
+ test("when source has no attrs, clears target attrs", () => {
+ const src = freshObj();
+ const tgt = freshObj();
+ setAttrs(tgt, { old: 1 });
+ copyAttrs(src, tgt);
+ expect(getAttrs(tgt)).toEqual({});
+ expect(hasAttrs(tgt)).toBe(false);
+ });
+
+ test("copy is shallow β subsequent changes to source don't affect target", () => {
+ const src = freshObj();
+ const tgt = freshObj();
+ setAttrs(src, { x: 1 });
+ copyAttrs(src, tgt);
+ updateAttrs(src, { x: 999 });
+ expect(getAttrs(tgt)).toEqual({ x: 1 });
+ });
+});
+
+// βββ withAttrs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("withAttrs", () => {
+ test("returns the same object reference", () => {
+ const obj = freshObj();
+ const result = withAttrs(obj, { k: 1 });
+ expect(result).toBe(obj);
+ });
+
+ test("sets attrs on the object", () => {
+ const obj = freshObj();
+ withAttrs(obj, { source: "sensor_A" });
+ expect(getAttrs(obj)).toEqual({ source: "sensor_A" });
+ });
+
+ test("replaces any previous attrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { old: true });
+ withAttrs(obj, { new: true });
+ expect(getAttrs(obj)).toEqual({ new: true });
+ });
+
+ test("type is preserved β works with Series", () => {
+ const s = makeSeries();
+ const result = withAttrs(s, { unit: "m" });
+ // same object, type preserved
+ expect(result).toBe(s);
+ expect(result instanceof Series).toBe(true);
+ expect(getAttrs(result)).toEqual({ unit: "m" });
+ });
+});
+
+// βββ clearAttrs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("clearAttrs", () => {
+ test("removes all attrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2 });
+ clearAttrs(obj);
+ expect(getAttrs(obj)).toEqual({});
+ expect(hasAttrs(obj)).toBe(false);
+ });
+
+ test("no-op when no attrs exist", () => {
+ const obj = freshObj();
+ expect(() => clearAttrs(obj)).not.toThrow();
+ expect(getAttrs(obj)).toEqual({});
+ });
+});
+
+// βββ hasAttrs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("hasAttrs", () => {
+ test("returns false when no attrs set", () => {
+ expect(hasAttrs(freshObj())).toBe(false);
+ });
+
+ test("returns true after setAttrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { x: 1 });
+ expect(hasAttrs(obj)).toBe(true);
+ });
+
+ test("returns false after clearAttrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { x: 1 });
+ clearAttrs(obj);
+ expect(hasAttrs(obj)).toBe(false);
+ });
+});
+
+// βββ getAttr / setAttr ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("getAttr", () => {
+ test("returns undefined for missing key", () => {
+ expect(getAttr(freshObj(), "missing")).toBeUndefined();
+ });
+
+ test("returns undefined for key missing after setAttrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1 });
+ expect(getAttr(obj, "b")).toBeUndefined();
+ });
+
+ test("returns the correct value", () => {
+ const obj = freshObj();
+ setAttrs(obj, { unit: "kg", scale: 2 });
+ expect(getAttr(obj, "unit")).toBe("kg");
+ expect(getAttr(obj, "scale")).toBe(2);
+ });
+});
+
+describe("setAttr", () => {
+ test("sets a single key from scratch", () => {
+ const obj = freshObj();
+ setAttr(obj, "unit", "kg");
+ expect(getAttrs(obj)).toEqual({ unit: "kg" });
+ });
+
+ test("adds a key without removing existing ones", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1 });
+ setAttr(obj, "b", 2);
+ expect(getAttrs(obj)).toEqual({ a: 1, b: 2 });
+ });
+
+ test("overwrites a single key", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2 });
+ setAttr(obj, "a", 99);
+ expect(getAttrs(obj)).toEqual({ a: 99, b: 2 });
+ });
+});
+
+// βββ deleteAttr βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("deleteAttr", () => {
+ test("removes the specified key", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2 });
+ deleteAttr(obj, "a");
+ expect(getAttrs(obj)).toEqual({ b: 2 });
+ });
+
+ test("clears registry entry when last key is removed", () => {
+ const obj = freshObj();
+ setAttrs(obj, { only: true });
+ deleteAttr(obj, "only");
+ expect(hasAttrs(obj)).toBe(false);
+ expect(getAttrs(obj)).toEqual({});
+ });
+
+ test("no-op if key doesn't exist", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1 });
+ deleteAttr(obj, "missing");
+ expect(getAttrs(obj)).toEqual({ a: 1 });
+ });
+
+ test("no-op if no attrs at all", () => {
+ const obj = freshObj();
+ expect(() => deleteAttr(obj, "x")).not.toThrow();
+ expect(getAttrs(obj)).toEqual({});
+ });
+});
+
+// βββ attrsCount / attrsKeys βββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("attrsCount", () => {
+ test("returns 0 when no attrs", () => {
+ expect(attrsCount(freshObj())).toBe(0);
+ });
+
+ test("returns correct count after setAttrs", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2, c: 3 });
+ expect(attrsCount(obj)).toBe(3);
+ });
+
+ test("updates after deleteAttr", () => {
+ const obj = freshObj();
+ setAttrs(obj, { a: 1, b: 2 });
+ deleteAttr(obj, "a");
+ expect(attrsCount(obj)).toBe(1);
+ });
+});
+
+describe("attrsKeys", () => {
+ test("returns [] when no attrs", () => {
+ expect(attrsKeys(freshObj())).toEqual([]);
+ });
+
+ test("returns key list", () => {
+ const obj = freshObj();
+ setAttrs(obj, { x: 1, y: 2 });
+ const keys = attrsKeys(obj);
+ expect(keys.sort()).toEqual(["x", "y"]);
+ });
+});
+
+// βββ mergeAttrs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("mergeAttrs", () => {
+ test("merges attrs from multiple sources", () => {
+ const s1 = freshObj();
+ const s2 = freshObj();
+ const tgt = freshObj();
+ setAttrs(s1, { a: 1, b: 2 });
+ setAttrs(s2, { c: 3 });
+ mergeAttrs([s1, s2], tgt);
+ expect(getAttrs(tgt)).toEqual({ a: 1, b: 2, c: 3 });
+ });
+
+ test("later sources win on key conflicts", () => {
+ const s1 = freshObj();
+ const s2 = freshObj();
+ const tgt = freshObj();
+ setAttrs(s1, { source: "A", unit: "kg" });
+ setAttrs(s2, { source: "B", scale: 2 });
+ mergeAttrs([s1, s2], tgt);
+ expect(getAttrs(tgt)).toEqual({ source: "B", unit: "kg", scale: 2 });
+ });
+
+ test("skips sources with no attrs", () => {
+ const s1 = freshObj();
+ const s2 = freshObj();
+ const tgt = freshObj();
+ setAttrs(s1, { a: 1 });
+ // s2 has no attrs
+ mergeAttrs([s1, s2], tgt);
+ expect(getAttrs(tgt)).toEqual({ a: 1 });
+ });
+
+ test("merging from empty sources leaves target without attrs", () => {
+ const s1 = freshObj();
+ const tgt = freshObj();
+ setAttrs(tgt, { old: 1 });
+ mergeAttrs([s1], tgt);
+ // no sources had attrs β target should have empty attrs
+ // (mergeAttrs with empty merged dict does not write to registry)
+ // but previous attrs on target are NOT cleared (only set if there's content)
+ // this is intentional β mergeAttrs is additive, not destructive
+ expect(getAttrs(tgt)).toEqual({ old: 1 });
+ });
+});
+
+// βββ independence βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("independence", () => {
+ test("separate objects have independent attrs", () => {
+ const obj1 = freshObj();
+ const obj2 = freshObj();
+ setAttrs(obj1, { id: "A" });
+ setAttrs(obj2, { id: "B" });
+ expect(getAttrs(obj1)).toEqual({ id: "A" });
+ expect(getAttrs(obj2)).toEqual({ id: "B" });
+ });
+
+ test("clearing one object does not affect another", () => {
+ const obj1 = freshObj();
+ const obj2 = freshObj();
+ setAttrs(obj1, { x: 1 });
+ setAttrs(obj2, { y: 2 });
+ clearAttrs(obj1);
+ expect(hasAttrs(obj1)).toBe(false);
+ expect(getAttrs(obj2)).toEqual({ y: 2 });
+ });
+});
+
+// βββ integration: Series and DataFrame βββββββββββββββββββββββββββββββββββββββ
+
+describe("integration with Series and DataFrame", () => {
+ test("can attach attrs to a Series", () => {
+ const s = makeSeries();
+ setAttrs(s, { unit: "metres", source: "GPS" });
+ expect(getAttrs(s)).toEqual({ unit: "metres", source: "GPS" });
+ expect(hasAttrs(s)).toBe(true);
+ });
+
+ test("can attach attrs to a DataFrame", () => {
+ const df = makeDF();
+ setAttrs(df, { description: "sensor readings", rows: 3 });
+ expect(getAttr(df, "description")).toBe("sensor readings");
+ expect(attrsCount(df)).toBe(2);
+ });
+
+ test("withAttrs fluent helper on DataFrame", () => {
+ const df = makeDF();
+ const result = withAttrs(df, { version: 5 });
+ expect(result).toBe(df);
+ expect(getAttrs(result)).toEqual({ version: 5 });
+ });
+
+ test("copyAttrs from Series to DataFrame", () => {
+ const s = makeSeries();
+ const df = makeDF();
+ setAttrs(s, { lineage: "processed" });
+ copyAttrs(s, df);
+ expect(getAttrs(df)).toEqual({ lineage: "processed" });
+ });
+
+ test("mergeAttrs from two Series into a DataFrame", () => {
+ const s1 = makeSeries();
+ const s2 = makeSeries();
+ const df = makeDF();
+ setAttrs(s1, { source: "A", unit: "kg" });
+ setAttrs(s2, { source: "B", version: 1 });
+ mergeAttrs([s1, s2], df);
+ expect(getAttrs(df)).toEqual({ source: "B", unit: "kg", version: 1 });
+ });
+});
+
+// βββ property-based tests βββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("property: setAttrs/getAttrs round-trip", () => {
+ test("any record can be stored and retrieved intact", () => {
+ fc.assert(
+ fc.property(
+ fc.dictionary(fc.string({ minLength: 1, maxLength: 10 }), fc.oneof(fc.integer(), fc.string(), fc.boolean())),
+ (attrs) => {
+ const obj = freshObj();
+ setAttrs(obj, attrs);
+ const result = getAttrs(obj);
+ expect(result).toEqual(attrs);
+ },
+ ),
+ );
+ });
+});
+
+describe("property: updateAttrs is a superset of previous", () => {
+ test("all keys from the original are still present after update", () => {
+ fc.assert(
+ fc.property(
+ fc.dictionary(fc.string({ minLength: 1 }), fc.integer()),
+ fc.dictionary(fc.string({ minLength: 1 }), fc.integer()),
+ (original, updates) => {
+ const obj = freshObj();
+ setAttrs(obj, original);
+ updateAttrs(obj, updates);
+ const result = getAttrs(obj);
+ // every key from original that is NOT in updates must still be present
+ for (const [k, v] of Object.entries(original)) {
+ if (!(k in updates)) {
+ expect(result[k]).toBe(v);
+ }
+ }
+ // every key from updates must be present with the update value
+ for (const [k, v] of Object.entries(updates)) {
+ expect(result[k]).toBe(v);
+ }
+ },
+ ),
+ );
+ });
+});
+
+describe("property: copyAttrs makes target equal to source", () => {
+ test("after copyAttrs, getAttrs(target) deep-equals getAttrs(source)", () => {
+ fc.assert(
+ fc.property(
+ fc.dictionary(fc.string({ minLength: 1 }), fc.oneof(fc.integer(), fc.string())),
+ (sourceAttrs) => {
+ const src = freshObj();
+ const tgt = freshObj();
+ setAttrs(src, sourceAttrs);
+ copyAttrs(src, tgt);
+ expect(getAttrs(tgt)).toEqual(getAttrs(src));
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/core/insert_pop.test.ts b/tests/core/insert_pop.test.ts
new file mode 100644
index 00000000..daab7705
--- /dev/null
+++ b/tests/core/insert_pop.test.ts
@@ -0,0 +1,286 @@
+/**
+ * Tests for src/core/insert_pop.ts β insertColumn(), popColumn(), reorderColumns(), moveColumn().
+ *
+ * Covers:
+ * - insertColumn: basic insertion at various positions
+ * - insertColumn: insertion at start (loc=0) and end (loc=nCols)
+ * - insertColumn: insertion with a Series value
+ * - insertColumn: error on duplicate column name (allowDuplicates=false)
+ * - insertColumn: allowDuplicates=true bypasses duplicate check
+ * - insertColumn: error on out-of-range loc
+ * - insertColumn: error on wrong-length values
+ * - popColumn: removes column and returns Series + new DataFrame
+ * - popColumn: error on missing column
+ * - reorderColumns: reorders to specified order
+ * - reorderColumns: error on missing column in order
+ * - moveColumn: moves column to new position
+ * - Property-based: insertColumn then popColumn round-trips shape
+ * - Property-based: column order after insert is correct
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import { insertColumn, moveColumn, popColumn, reorderColumns } from "../../src/core/insert_pop.ts";
+
+// βββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+function makeDF(): DataFrame {
+ return DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9] });
+}
+
+// βββ insertColumn βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("insertColumn", () => {
+ test("inserts at position 0 (start)", () => {
+ const df = makeDF();
+ const df2 = insertColumn(df, 0, "x", [10, 20, 30]);
+ expect(df2.columns.values).toEqual(["x", "a", "b", "c"]);
+ expect(df2.col("x").values).toEqual([10, 20, 30]);
+ });
+
+ test("inserts at position 1 (middle)", () => {
+ const df = makeDF();
+ const df2 = insertColumn(df, 1, "x", [10, 20, 30]);
+ expect(df2.columns.values).toEqual(["a", "x", "b", "c"]);
+ expect(df2.col("x").values).toEqual([10, 20, 30]);
+ });
+
+ test("inserts at end (loc = nCols)", () => {
+ const df = makeDF();
+ const df2 = insertColumn(df, 3, "x", [10, 20, 30]);
+ expect(df2.columns.values).toEqual(["a", "b", "c", "x"]);
+ });
+
+ test("inserts using a Series value", () => {
+ const df = makeDF();
+ const s = new Series({ data: [100, 200, 300], name: "s" });
+ const df2 = insertColumn(df, 2, "s", s);
+ expect(df2.columns.values).toEqual(["a", "b", "s", "c"]);
+ expect(df2.col("s").values).toEqual([100, 200, 300]);
+ });
+
+ test("preserves original DataFrame (immutable)", () => {
+ const df = makeDF();
+ insertColumn(df, 1, "x", [10, 20, 30]);
+ expect(df.columns.values).toEqual(["a", "b", "c"]);
+ });
+
+ test("preserves row index", () => {
+ const df = makeDF();
+ const df2 = insertColumn(df, 0, "z", [0, 0, 0]);
+ expect(df2.shape[0]).toBe(3);
+ expect(df2.index.values).toEqual(df.index.values);
+ });
+
+ test("throws on duplicate column (allowDuplicates=false)", () => {
+ const df = makeDF();
+ expect(() => insertColumn(df, 1, "a", [1, 2, 3])).toThrow(RangeError);
+ });
+
+ test("allows duplicate column when allowDuplicates=true", () => {
+ const df = makeDF();
+ const df2 = insertColumn(df, 1, "a", [99, 99, 99], true);
+ // The first "a" is at index 0, second at index 1
+ expect(df2.shape[1]).toBe(4);
+ });
+
+ test("throws on loc < 0", () => {
+ const df = makeDF();
+ expect(() => insertColumn(df, -1, "x", [1, 2, 3])).toThrow(RangeError);
+ });
+
+ test("throws on loc > nCols", () => {
+ const df = makeDF();
+ expect(() => insertColumn(df, 10, "x", [1, 2, 3])).toThrow(RangeError);
+ });
+
+ test("throws on wrong-length values array", () => {
+ const df = makeDF();
+ expect(() => insertColumn(df, 1, "x", [1, 2])).toThrow(RangeError);
+ });
+
+ test("inserts into empty DataFrame (0 rows) at pos 0", () => {
+ const df = DataFrame.fromColumns({});
+ const df2 = insertColumn(df, 0, "a", []);
+ expect(df2.columns.values).toEqual(["a"]);
+ expect(df2.shape[0]).toBe(0);
+ });
+
+ test("shape[1] increases by 1", () => {
+ const df = makeDF();
+ const df2 = insertColumn(df, 2, "new", [1, 2, 3]);
+ expect(df2.shape[1]).toBe(df.shape[1] + 1);
+ });
+});
+
+// βββ popColumn ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("popColumn", () => {
+ test("removes column and returns it as Series", () => {
+ const df = makeDF();
+ const { series, df: df2 } = popColumn(df, "b");
+ expect(series.values).toEqual([4, 5, 6]);
+ expect(df2.columns.values).toEqual(["a", "c"]);
+ });
+
+ test("popping first column", () => {
+ const df = makeDF();
+ const { series, df: df2 } = popColumn(df, "a");
+ expect(series.values).toEqual([1, 2, 3]);
+ expect(df2.columns.values).toEqual(["b", "c"]);
+ });
+
+ test("popping last column", () => {
+ const df = makeDF();
+ const { series, df: df2 } = popColumn(df, "c");
+ expect(series.values).toEqual([7, 8, 9]);
+ expect(df2.columns.values).toEqual(["a", "b"]);
+ });
+
+ test("preserves original DataFrame (immutable)", () => {
+ const df = makeDF();
+ popColumn(df, "b");
+ expect(df.columns.values).toEqual(["a", "b", "c"]);
+ });
+
+ test("shape[1] decreases by 1", () => {
+ const df = makeDF();
+ const { df: df2 } = popColumn(df, "a");
+ expect(df2.shape[1]).toBe(df.shape[1] - 1);
+ });
+
+ test("throws on missing column", () => {
+ const df = makeDF();
+ expect(() => popColumn(df, "z")).toThrow(RangeError);
+ });
+
+ test("popping all columns leaves empty-column DataFrame", () => {
+ const df = DataFrame.fromColumns({ x: [1, 2] });
+ const { df: df2 } = popColumn(df, "x");
+ expect(df2.shape[1]).toBe(0);
+ expect(df2.shape[0]).toBe(2);
+ });
+});
+
+// βββ reorderColumns ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("reorderColumns", () => {
+ test("reorders columns to new order", () => {
+ const df = makeDF();
+ const df2 = reorderColumns(df, ["c", "a", "b"]);
+ expect(df2.columns.values).toEqual(["c", "a", "b"]);
+ });
+
+ test("values are preserved after reorder", () => {
+ const df = makeDF();
+ const df2 = reorderColumns(df, ["c", "b", "a"]);
+ expect(df2.col("a").values).toEqual([1, 2, 3]);
+ expect(df2.col("b").values).toEqual([4, 5, 6]);
+ expect(df2.col("c").values).toEqual([7, 8, 9]);
+ });
+
+ test("can select subset of columns (acts like df[subset])", () => {
+ const df = makeDF();
+ const df2 = reorderColumns(df, ["a", "c"]);
+ expect(df2.columns.values).toEqual(["a", "c"]);
+ expect(df2.shape[1]).toBe(2);
+ });
+
+ test("throws on column not in DataFrame", () => {
+ const df = makeDF();
+ expect(() => reorderColumns(df, ["a", "z"])).toThrow(RangeError);
+ });
+});
+
+// βββ moveColumn ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("moveColumn", () => {
+ test("moves last column to position 0", () => {
+ const df = makeDF();
+ const df2 = moveColumn(df, "c", 0);
+ expect(df2.columns.values).toEqual(["c", "a", "b"]);
+ });
+
+ test("moves first column to end", () => {
+ const df = makeDF();
+ const df2 = moveColumn(df, "a", 2);
+ expect(df2.columns.values).toEqual(["b", "c", "a"]);
+ });
+
+ test("values are preserved", () => {
+ const df = makeDF();
+ const df2 = moveColumn(df, "b", 0);
+ expect(df2.col("b").values).toEqual([4, 5, 6]);
+ expect(df2.col("a").values).toEqual([1, 2, 3]);
+ });
+
+ test("shape is unchanged", () => {
+ const df = makeDF();
+ const df2 = moveColumn(df, "b", 2);
+ expect(df2.shape).toEqual(df.shape);
+ });
+});
+
+// βββ property-based tests ββββββββββββββββββββββββββββββββββββββββββββββββββββ
+
+describe("insertColumn + popColumn property tests", () => {
+ test("insert then pop round-trips shape", () => {
+ fc.assert(
+ fc.property(
+ fc.integer({ min: 1, max: 5 }),
+ fc.integer({ min: 1, max: 5 }),
+ (nCols, nRows) => {
+ // Build a DataFrame with nCols columns
+ const colData: Record