+ queryDataFrame and evalDataFrame let you filter rows or evaluate
+ expressions using a Python-pandas-style expression string. This mirrors
+ pandas.DataFrame.query()
+ and
+ pandas.DataFrame.eval().
+
+
+
Import
+
import { queryDataFrame, evalDataFrame, DataFrame } from "tsb";
+
+
queryDataFrame(df, expr)
+
Returns a new DataFrame containing only the rows where expr evaluates to truthy.
strFindall/strFindallCount/strFindFirst/strFindallExpand — regex match extraction per element (mirrors pandas str.findall). toJsonDenormalize/toJsonRecords/toJsonSplit/toJsonIndex — serialize DataFrames to nested or flat JSON.
+ Two new features in tsb:
+ strFindall / strFindallCount / strFindFirst / strFindallExpand
+ (mirrors pandas.Series.str.findall)
+ and
+ toJsonDenormalize / toJsonRecords / toJsonSplit / toJsonIndex
+ (the inverse of jsonNormalize).
+
Mirrors pandas.Series.str.findall(pat). Returns a Series where each value is a JSON-encoded array of all non-overlapping matches.
+
// pandas equivalent:
+// s.str.findall(r'\d+')
+
+import { Series } from 'tsb';
+import { strFindall, strFindallCount, strFindFirst } from 'tsb';
+
+const prices = new Series({ data: ['$10.99 and $5.00', 'free!', '$3.50'] });
+
+const allPrices = strFindall(prices, /\$[\d.]+/);
+// Series [
+// '["$10.99","$5.00"]', ← JSON string
+// '[]',
+// '["$3.50"]'
+// ]
+
+// Parse the JSON to get actual arrays:
+JSON.parse(allPrices.values[0]); // ["$10.99", "$5.00"]
+JSON.parse(allPrices.values[1]); // []
+
✅ Each element contains a JSON.stringify(string[]) result.
+
+
With capture groups
+
// When the pattern has a capture group, returns the captured value
+const s = new Series({ data: ['name: Alice', 'name: Bob', 'unknown'] });
+const names = strFindall(s, /name: (\w+)/);
+// Series ['["Alice"]', '["Bob"]', '[]']
+
+// First capture group is extracted (pandas behaviour)
+
+
Null / NaN handling
+
const s = new Series({ data: ['hello', null, NaN, 'world'] });
+const result = strFindall(s, /\w+/);
+// Series ['["hello"]', null, null, '["world"]']
+// Null/NaN elements return null (not []) — matches pandas
+
+
+
+
+
2. strFindallCount — count matches per element
+
import { strFindallCount } from 'tsb';
+
+const words = new Series({ data: ['one two three', 'four', 'five six'] });
+const counts = strFindallCount(words, /\b\w+\b/);
+// Series [3, 1, 2]
+
+// Count vowels per word
+const vowels = new Series({ data: ['beautiful', 'rhythm', 'aeiou'] });
+strFindallCount(vowels, /[aeiou]/i);
+// Series [5, 0, 5]
+
💡 More efficient than strFindall when you only need the count, not the matches themselves.
+
+
+
+
+
3. strFindFirst — first match per element
+
import { strFindFirst } from 'tsb';
+
+const logs = new Series({ data: [
+ '2024-01-15: ERROR occurred',
+ '2024-02-20: INFO ok',
+ 'no date here',
+] });
+
+const dates = strFindFirst(logs, /\d{4}-\d{2}-\d{2}/);
+// Series ['2024-01-15', '2024-02-20', null]
+
+// Extract just the year (first capture group)
+const years = strFindFirst(logs, /(\d{4})-\d{2}-\d{2}/);
+// Series ['2024', '2024', null]
+
+
+
+
+
4. strFindallExpand — expand capture groups into a DataFrame
+
+
diff --git a/src/index.ts b/src/index.ts
index b246fd76..1471f7e9 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -56,6 +56,8 @@ export { readJson, toJson } from "./io/index.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./io/index.ts";
export { jsonNormalize } from "./io/index.ts";
export type { JsonNormalizeOptions, JsonPath } from "./io/index.ts";
+export { toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex } from "./io/index.ts";
+export type { JsonDenormalizeOptions, JsonSplitOptions, JsonSplitResult } from "./io/index.ts";
export { pearsonCorr, dataFrameCorr, dataFrameCov } from "./stats/index.ts";
export type { CorrMethod, CorrOptions, CovOptions } from "./stats/index.ts";
export { Rolling } from "./window/index.ts";
@@ -556,3 +558,4 @@ export { intervalRange } from "./stats/index.ts";
export type { ClosedType } from "./stats/index.ts";
export { nunique } from "./stats/index.ts";
export { queryDataFrame, evalDataFrame } from "./stats/index.ts";
+export { strFindall, strFindallCount, strFindFirst, strFindallExpand } from "./stats/index.ts";
diff --git a/src/io/index.ts b/src/io/index.ts
index e868c4c8..afb4ac57 100644
--- a/src/io/index.ts
+++ b/src/io/index.ts
@@ -10,6 +10,17 @@ export { readJson, toJson } from "./json.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./json.ts";
export { jsonNormalize } from "./json_normalize.ts";
export type { JsonPath, JsonNormalizeOptions } from "./json_normalize.ts";
+export {
+ toJsonDenormalize,
+ toJsonRecords,
+ toJsonSplit,
+ toJsonIndex,
+} from "./to_json_normalize.ts";
+export type {
+ JsonDenormalizeOptions,
+ JsonSplitOptions,
+ JsonSplitResult,
+} from "./to_json_normalize.ts";
// readExcel / xlsxSheetNames use node:zlib and cannot be bundled for the
// browser. Import them directly from "tsb/io/read_excel" when running in
// Node / Bun.
diff --git a/src/io/to_json_normalize.ts b/src/io/to_json_normalize.ts
new file mode 100644
index 00000000..1911c654
--- /dev/null
+++ b/src/io/to_json_normalize.ts
@@ -0,0 +1,275 @@
+/**
+ * toJsonDenormalize — convert a flat DataFrame back to nested JSON records.
+ *
+ * This is the inverse operation of `jsonNormalize`: given a flat DataFrame
+ * whose column names use a separator (e.g. `"."`) to encode nesting depth,
+ * reconstruct an array of nested JSON objects.
+ *
+ * For example, a DataFrame with columns `["name", "address.city", "address.zip"]`
+ * produces records like `{ name: "Alice", address: { city: "NY", zip: "10001" } }`.
+ *
+ * Additional utilities:
+ *
+ * - `toJsonDenormalize` — main function; mirrors inverting `pandas.json_normalize`
+ * - `toJsonRecords` — simple orient="records" serialisation (no nesting)
+ * - `toJsonSplit` — orient="split" (columns + data + index)
+ * - `toJsonIndex` — orient="index" (keyed by index label)
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── JSON value types (no `any`) ──────────────────────────────────────────────
+
+/** A JSON primitive (leaf value). */
+type JsonPrimitive = string | number | boolean | null;
+
+/** Any valid JSON value. */
+type JsonValue = JsonPrimitive | JsonValue[] | JsonRecord;
+
+/** A JSON object (dict). */
+interface JsonRecord {
+ [key: string]: JsonValue;
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link toJsonDenormalize}. */
+export interface JsonDenormalizeOptions {
+ /**
+ * Separator used in column names to encode nesting depth.
+ * Must match the separator used when `jsonNormalize` was called.
+ * @default "."
+ */
+ readonly sep?: string;
+
+ /**
+ * When `true`, omit keys whose value is `null`.
+ * @default false
+ */
+ readonly dropNull?: boolean;
+}
+
+/** Options for {@link toJsonSplit}. */
+export interface JsonSplitOptions {
+ /**
+ * When `true`, include the DataFrame index in the output.
+ * @default true
+ */
+ readonly includeIndex?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Convert a Scalar to a JSON-compatible value. */
+function scalarToJson(v: Scalar): JsonPrimitive {
+ if (v === null || v === undefined) return null;
+ if (typeof v === "number") {
+ if (Number.isNaN(v) || !Number.isFinite(v)) return null;
+ return v;
+ }
+ if (typeof v === "boolean") return v;
+ return String(v);
+}
+
+/**
+ * Set a value in a nested object using a dot-separated path.
+ * Intermediate objects are created as needed.
+ */
+function setNested(obj: JsonRecord, keys: readonly string[], value: JsonPrimitive): void {
+ let current: JsonRecord = obj;
+ for (let i = 0; i < keys.length - 1; i++) {
+ const k = keys[i] as string;
+ if (!(k in current) || typeof current[k] !== "object" || current[k] === null || Array.isArray(current[k])) {
+ current[k] = {};
+ }
+ current = current[k] as JsonRecord;
+ }
+ const lastKey = keys[keys.length - 1] as string;
+ current[lastKey] = value;
+}
+
+// ─── toJsonDenormalize ────────────────────────────────────────────────────────
+
+/**
+ * Convert a flat DataFrame to an array of nested JSON objects.
+ *
+ * Reverses the flattening performed by `jsonNormalize`: column names
+ * containing the separator (default `"."`) are split into nested keys.
+ *
+ * @param df - Input DataFrame.
+ * @param options - Configuration options.
+ * @returns An array of nested `JsonRecord` objects, one per row.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({
+ * name: ["Alice", "Bob"],
+ * "address.city": ["NY", "LA"],
+ * "address.zip": ["10001", "90001"],
+ * });
+ * toJsonDenormalize(df);
+ * // [
+ * // { name: "Alice", address: { city: "NY", zip: "10001" } },
+ * // { name: "Bob", address: { city: "LA", zip: "90001" } },
+ * // ]
+ * ```
+ */
+export function toJsonDenormalize(
+ df: DataFrame,
+ options: JsonDenormalizeOptions = {},
+): JsonRecord[] {
+ const sep = options.sep ?? ".";
+ const dropNull = options.dropNull ?? false;
+
+ const columns = df.columns.values;
+ // Pre-split all column names into key paths.
+ const paths: string[][] = columns.map((col) => col.split(sep));
+
+ const nRows = df.index.size;
+ const result: JsonRecord[] = [];
+
+ for (let r = 0; r < nRows; r++) {
+ const record: JsonRecord = {};
+
+ for (let c = 0; c < columns.length; c++) {
+ const colName = columns[c] as string;
+ const col = df.col(colName);
+ const raw = col.values[r] as Scalar;
+ const value = scalarToJson(raw);
+
+ if (dropNull && value === null) continue;
+
+ const keys = paths[c] as string[];
+ setNested(record, keys, value);
+ }
+
+ result.push(record);
+ }
+
+ return result;
+}
+
+// ─── toJsonRecords ────────────────────────────────────────────────────────────
+
+/**
+ * Serialize a DataFrame as an array of flat record objects.
+ *
+ * This is equivalent to `df.to_json(orient="records")` in pandas.
+ * Column names are NOT split on any separator — the output is always flat.
+ *
+ * @param df - Input DataFrame.
+ * @returns An array of `JsonRecord` objects, one per row.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * toJsonRecords(df);
+ * // [{ a: 1, b: "x" }, { a: 2, b: "y" }]
+ * ```
+ */
+export function toJsonRecords(df: DataFrame): JsonRecord[] {
+ const columns = df.columns.values;
+ const nRows = df.index.size;
+ const result: JsonRecord[] = [];
+
+ for (let r = 0; r < nRows; r++) {
+ const record: JsonRecord = {};
+ for (const col of columns) {
+ const series = df.col(col);
+ record[col] = scalarToJson(series.values[r] as Scalar);
+ }
+ result.push(record);
+ }
+
+ return result;
+}
+
+// ─── toJsonSplit ──────────────────────────────────────────────────────────────
+
+/**
+ * Serialize a DataFrame in "split" orientation.
+ *
+ * Mirrors `df.to_json(orient="split")` in pandas.
+ *
+ * @param df - Input DataFrame.
+ * @param options - Configuration options.
+ * @returns An object with `{ columns, index?, data }` keys.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * toJsonSplit(df);
+ * // {
+ * // columns: ["a", "b"],
+ * // index: [0, 1],
+ * // data: [[1, "x"], [2, "y"]],
+ * // }
+ * ```
+ */
+export interface JsonSplitResult {
+ columns: string[];
+ index?: JsonPrimitive[];
+ data: JsonPrimitive[][];
+}
+
+export function toJsonSplit(df: DataFrame, options: JsonSplitOptions = {}): JsonSplitResult {
+ const includeIndex = options.includeIndex ?? true;
+ const columns = df.columns.values;
+ const nRows = df.index.size;
+
+ const data: JsonPrimitive[][] = [];
+ for (let r = 0; r < nRows; r++) {
+ const row: JsonPrimitive[] = [];
+ for (const col of columns) {
+ const series = df.col(col);
+ row.push(scalarToJson(series.values[r] as Scalar));
+ }
+ data.push(row);
+ }
+
+ const result: JsonSplitResult = { columns: [...columns], data };
+ if (includeIndex) {
+ result.index = df.index.toArray().map(scalarToJson);
+ }
+ return result;
+}
+
+// ─── toJsonIndex ──────────────────────────────────────────────────────────────
+
+/**
+ * Serialize a DataFrame in "index" orientation.
+ *
+ * Mirrors `df.to_json(orient="index")` in pandas.
+ * Rows are keyed by their index label (converted to string).
+ *
+ * @param df - Input DataFrame.
+ * @returns An object mapping index label → flat record.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ * toJsonIndex(df);
+ * // { "0": { a: 1, b: "x" }, "1": { a: 2, b: "y" } }
+ * ```
+ */
+export function toJsonIndex(df: DataFrame): JsonRecord {
+ const columns = df.columns.values;
+ const indexLabels = df.index.toArray();
+ const nRows = indexLabels.length;
+ const result: JsonRecord = {};
+
+ for (let r = 0; r < nRows; r++) {
+ const label = String(indexLabels[r]);
+ const record: JsonRecord = {};
+ for (const col of columns) {
+ const series = df.col(col);
+ record[col] = scalarToJson(series.values[r] as Scalar);
+ }
+ result[label] = record;
+ }
+
+ return result;
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index 2091e791..4d726aa0 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -382,3 +382,4 @@ export { intervalRange } from "./interval.ts";
export type { ClosedType } from "./interval.ts";
export { nunique } from "./reduce_ops.ts";
export { queryDataFrame, evalDataFrame } from "./eval_query.ts";
+export { strFindall, strFindallCount, strFindFirst, strFindallExpand } from "./str_findall.ts";
diff --git a/src/stats/str_findall.ts b/src/stats/str_findall.ts
new file mode 100644
index 00000000..c8a70f15
--- /dev/null
+++ b/src/stats/str_findall.ts
@@ -0,0 +1,339 @@
+/**
+ * str_findall — findall, findFirst, and findallCount for Series strings.
+ *
+ * Mirrors `pandas.Series.str.findall(pat)` and related helpers:
+ *
+ * - `strFindall` — all non-overlapping regex matches per element
+ * - `strFindallCount` — count of matches per element
+ * - `strFindFirst` — first match per element (or null if none)
+ * - `strFindallExpand`— expand first N capture groups into a DataFrame
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+import type { StrInput } from "./string_ops.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function toStr(v: Scalar): string | null {
+ if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+ return null;
+ }
+ return String(v);
+}
+
+function toInputStrings(input: StrInput): string[] {
+ if (typeof input === "string") {
+ return [input];
+ }
+ if (input instanceof Series) {
+ return input.values.map((v) => toStr(v) ?? "");
+ }
+ return (input as readonly Scalar[]).map((v) => toStr(v) ?? "");
+}
+
+function buildResult(data: Scalar[], input: StrInput): Series {
+ if (input instanceof Series) {
+ return new Series({ data, index: input.index });
+ }
+ return new Series({ data });
+}
+
+/** Build a global RegExp from a pattern, optionally with flags. */
+function makeGlobal(pat: string | RegExp, flags?: string): RegExp {
+ if (pat instanceof RegExp) {
+ const f = pat.flags.includes("g") ? pat.flags : `${pat.flags}g`;
+ return new RegExp(pat.source, f);
+ }
+ const f = `${flags ?? ""}g`.replace(/g{2,}/, "g");
+ return new RegExp(pat, f);
+}
+
+// ─── strFindall ───────────────────────────────────────────────────────────────
+
+/**
+ * Find all non-overlapping regex matches in each element.
+ *
+ * Mirrors `pandas.Series.str.findall(pat, flags=0)`.
+ *
+ * Each element in the returned Series contains a `string[]` of matches
+ * (the full match if no capture groups; the single capture group string if
+ * exactly one group is present; a `string[]` per match if multiple groups).
+ * Null/NaN elements produce `null`.
+ *
+ * The `string[]` value is stored as a JSON-serialized string for compatibility
+ * with `Scalar`. Use `JSON.parse` to recover the array.
+ *
+ * @param input - Series, array, or scalar string.
+ * @param pat - Regular expression pattern (string or RegExp).
+ * @param flags - Regex flags (only used when `pat` is a string).
+ * @returns A `Series` where each value is a JSON string of `string[]`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["one two three", "four five"] });
+ * const result = strFindall(s, /\w+/);
+ * // Series [
+ * // '["one","two","three"]',
+ * // '["four","five"]',
+ * // ]
+ * JSON.parse(result.values[0] as string); // ["one", "two", "three"]
+ * ```
+ */
+export function strFindall(
+ input: StrInput,
+ pat: string | RegExp,
+ flags?: string,
+): Series {
+ const strs = toInputStrings(input);
+ const re = makeGlobal(pat, flags);
+
+ const data: Scalar[] = strs.map((s, i) => {
+ // null/NaN elements: check original value
+ const orig =
+ input instanceof Series
+ ? input.values[i]
+ : typeof input === "string"
+ ? input
+ : (input as readonly Scalar[])[i];
+ if (orig === null || orig === undefined || (typeof orig === "number" && Number.isNaN(orig))) {
+ return null;
+ }
+
+ re.lastIndex = 0;
+ const matches: string[] = [];
+ for (;;) {
+ const m = re.exec(s);
+ if (m === null) break;
+ // If there are capture groups, use the first group (pandas behaviour).
+ matches.push(m.length > 1 ? (m[1] ?? "") : m[0] ?? "");
+ }
+ return JSON.stringify(matches);
+ });
+
+ return buildResult(data, input);
+}
+
+// ─── strFindallCount ──────────────────────────────────────────────────────────
+
+/**
+ * Count all non-overlapping regex matches in each element.
+ *
+ * This is equivalent to `strFindall(s, pat).map(x => JSON.parse(x).length)`
+ * but more efficient since it avoids allocating match arrays.
+ *
+ * @param input - Series, array, or scalar string.
+ * @param pat - Regular expression pattern.
+ * @param flags - Regex flags (only when `pat` is a string).
+ * @returns A `Series` of integer counts. Null elements return `null`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["aaa", "bb", "c"] });
+ * strFindallCount(s, /a+/);
+ * // Series [1, 0, 0]
+ * ```
+ */
+export function strFindallCount(
+ input: StrInput,
+ pat: string | RegExp,
+ flags?: string,
+): Series {
+ const strs = toInputStrings(input);
+ const re = makeGlobal(pat, flags);
+
+ const data: Scalar[] = strs.map((s, i) => {
+ const orig =
+ input instanceof Series
+ ? input.values[i]
+ : typeof input === "string"
+ ? input
+ : (input as readonly Scalar[])[i];
+ if (orig === null || orig === undefined || (typeof orig === "number" && Number.isNaN(orig))) {
+ return null;
+ }
+
+ re.lastIndex = 0;
+ let count = 0;
+ for (;;) {
+ const m = re.exec(s);
+ if (m === null) break;
+ count++;
+ }
+ return count;
+ });
+
+ return buildResult(data, input);
+}
+
+// ─── strFindFirst ─────────────────────────────────────────────────────────────
+
+/**
+ * Return the first regex match in each element, or `null` if there is none.
+ *
+ * If the pattern has capture groups, returns the first capture group's value
+ * (mirrors pandas behaviour for single-group patterns).
+ *
+ * @param input - Series, array, or scalar string.
+ * @param pat - Regular expression pattern.
+ * @param flags - Regex flags (only when `pat` is a string).
+ * @returns A `Series` of strings (first match) or `null`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["price: $10.99", "no price", "cost: $5.00"] });
+ * strFindFirst(s, /\$[\d.]+/);
+ * // Series ["$10.99", null, "$5.00"]
+ * ```
+ */
+export function strFindFirst(
+ input: StrInput,
+ pat: string | RegExp,
+ flags?: string,
+): Series {
+ const strs = toInputStrings(input);
+ const source = pat instanceof RegExp ? pat.source : pat;
+ const baseFlags = pat instanceof RegExp ? pat.flags.replace("g", "") : (flags ?? "");
+ const re = new RegExp(source, baseFlags);
+
+ const data: Scalar[] = strs.map((s, i) => {
+ const orig =
+ input instanceof Series
+ ? input.values[i]
+ : typeof input === "string"
+ ? input
+ : (input as readonly Scalar[])[i];
+ if (orig === null || orig === undefined || (typeof orig === "number" && Number.isNaN(orig))) {
+ return null;
+ }
+
+ const m = re.exec(s);
+ if (m === null) return null;
+ return m.length > 1 ? (m[1] ?? null) : (m[0] ?? null);
+ });
+
+ return buildResult(data, input);
+}
+
+// ─── strFindallExpand ─────────────────────────────────────────────────────────
+
+/**
+ * Extract capture groups from the **first** match of each element into a
+ * DataFrame, one column per capture group.
+ *
+ * This is a simplified variant of `str.extract(pat, expand=True)` limited
+ * to named or positional capture groups in the pattern.
+ *
+ * Column names are taken from named capture groups (`(?...)`) where
+ * present; otherwise numbered as `"0"`, `"1"`, etc.
+ *
+ * @param input - Series or string array.
+ * @param pat - Regular expression with capture groups.
+ * @param flags - Regex flags (only when `pat` is a string).
+ * @returns A `DataFrame` with one row per input element and one column per
+ * capture group. Non-matching elements produce `null` in all columns.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: ["John 30", "Jane 25", "unknown"] });
+ * strFindallExpand(s, /(?\w+)\s+(?\d+)/);
+ * // DataFrame
+ * // name age
+ * // 0 John 30
+ * // 1 Jane 25
+ * // 2 null null
+ * ```
+ */
+export function strFindallExpand(
+ input: readonly string[] | Series,
+ pat: string | RegExp,
+ flags?: string,
+): DataFrame {
+ const source = pat instanceof RegExp ? pat.source : pat;
+ const baseFlags = pat instanceof RegExp ? pat.flags.replace("g", "") : (flags ?? "");
+ const re = new RegExp(source, baseFlags);
+
+ const strs = toInputStrings(input);
+
+ // Determine group names by running a dummy match
+ const testMatch = re.exec("") ?? re.exec("\0");
+ const groups = testMatch?.groups;
+ const namedKeys = groups !== null && groups !== undefined ? Object.keys(groups) : [];
+
+ // Determine number of capture groups from source
+ // Count open parens that aren't non-capturing groups (?:
+ let groupCount = 0;
+ for (let i = 0; i < source.length; i++) {
+ if (
+ source[i] === "(" &&
+ source[i + 1] !== "?" &&
+ source[i + 1] !== "*"
+ ) {
+ groupCount++;
+ } else if (
+ source[i] === "(" &&
+ source[i + 1] === "?" &&
+ source[i + 2] !== ":" &&
+ source[i + 2] !== "=" &&
+ source[i + 2] !== "!" &&
+ source[i + 2] !== "<" // negative look-behind uses (? 0 ? namedKeys.length : Math.max(groupCount, 1);
+ const colNames: string[] =
+ namedKeys.length > 0 ? namedKeys : Array.from({ length: colCount }, (_, k) => String(k));
+
+ const columns: Record = {};
+ for (const col of colNames) {
+ columns[col] = [];
+ }
+
+ for (let i = 0; i < strs.length; i++) {
+ const isNull: boolean = input instanceof Series
+ ? ((): boolean => {
+ const v = input.values[i];
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+ })()
+ : (input as readonly string[])[i] === undefined;
+
+ if (isNull) {
+ for (const col of colNames) {
+ (columns[col] as Scalar[]).push(null);
+ }
+ continue;
+ }
+
+ const m = re.exec(strs[i] ?? "");
+ if (m === null) {
+ for (const col of colNames) {
+ (columns[col] as Scalar[]).push(null);
+ }
+ } else if (namedKeys.length > 0 && m.groups !== null && m.groups !== undefined) {
+ for (const col of namedKeys) {
+ (columns[col] as Scalar[]).push(m.groups[col] ?? null);
+ }
+ } else {
+ for (let k = 0; k < colCount; k++) {
+ (columns[colNames[k] as string] as Scalar[]).push(m[k + 1] ?? null);
+ }
+ }
+ }
+
+ if (input instanceof Series) {
+ return DataFrame.fromColumns(columns, { index: input.index });
+ }
+ return DataFrame.fromColumns(columns);
+}
diff --git a/tests/io/to_json_normalize.test.ts b/tests/io/to_json_normalize.test.ts
new file mode 100644
index 00000000..0829a463
--- /dev/null
+++ b/tests/io/to_json_normalize.test.ts
@@ -0,0 +1,253 @@
+/**
+ * Tests for to_json_normalize — toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame } from "../../src/index.ts";
+import {
+ toJsonDenormalize,
+ toJsonRecords,
+ toJsonSplit,
+ toJsonIndex,
+} from "../../src/io/to_json_normalize.ts";
+
+// ─── toJsonDenormalize ────────────────────────────────────────────────────────
+
+describe("toJsonDenormalize", () => {
+ test("flat columns unchanged", () => {
+ const df = DataFrame.fromColumns({ name: ["Alice", "Bob"], age: [30, 25] });
+ const result = toJsonDenormalize(df);
+ expect(result).toEqual([
+ { name: "Alice", age: 30 },
+ { name: "Bob", age: 25 },
+ ]);
+ });
+
+ test("nested columns reconstructed", () => {
+ const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob"],
+ "address.city": ["NY", "LA"],
+ "address.zip": ["10001", "90001"],
+ });
+ const result = toJsonDenormalize(df);
+ expect(result).toEqual([
+ { name: "Alice", address: { city: "NY", zip: "10001" } },
+ { name: "Bob", address: { city: "LA", zip: "90001" } },
+ ]);
+ });
+
+ test("deeply nested columns", () => {
+ const df = DataFrame.fromColumns({
+ "a.b.c": [1, 2],
+ "a.b.d": [3, 4],
+ "a.e": [5, 6],
+ });
+ const result = toJsonDenormalize(df);
+ expect(result[0]).toEqual({ a: { b: { c: 1, d: 3 }, e: 5 } });
+ expect(result[1]).toEqual({ a: { b: { c: 2, d: 4 }, e: 6 } });
+ });
+
+ test("custom separator", () => {
+ const df = DataFrame.fromColumns({
+ "x__y": [1, 2],
+ "x__z": [3, 4],
+ });
+ const result = toJsonDenormalize(df, { sep: "__" });
+ expect(result[0]).toEqual({ x: { y: 1, z: 3 } });
+ });
+
+ test("null values preserved", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [null, 2] });
+ const result = toJsonDenormalize(df);
+ expect(result[0]).toEqual({ a: 1, b: null });
+ expect(result[1]).toEqual({ a: null, b: 2 });
+ });
+
+ test("dropNull omits null fields", () => {
+ const df = DataFrame.fromColumns({ a: [1, null], b: [null, 2] });
+ const result = toJsonDenormalize(df, { dropNull: true });
+ expect(Object.keys(result[0] as object)).toContain("a");
+ expect(Object.keys(result[0] as object)).not.toContain("b");
+ expect(Object.keys(result[1] as object)).not.toContain("a");
+ expect(Object.keys(result[1] as object)).toContain("b");
+ });
+
+ test("empty DataFrame returns empty array", () => {
+ const df = DataFrame.fromColumns({ a: [] as number[] });
+ expect(toJsonDenormalize(df)).toEqual([]);
+ });
+
+ test("NaN values map to null", () => {
+ const df = DataFrame.fromColumns({ a: [Number.NaN, 1] });
+ const result = toJsonDenormalize(df);
+ expect(result[0]).toEqual({ a: null });
+ expect(result[1]).toEqual({ a: 1 });
+ });
+
+ // property: flat DataFrame round-trips through toJsonDenormalize→fromColumns
+ test("property: round-trip for flat numeric DataFrames", () => {
+ fc.assert(
+ fc.property(
+ fc.record({
+ x: fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 5 }),
+ y: fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 5 }),
+ }).filter((r) => r.x.length === r.y.length),
+ ({ x, y }) => {
+ const df = DataFrame.fromColumns({ x, y });
+ const records = toJsonDenormalize(df);
+ expect(records.length).toBe(x.length);
+ for (let i = 0; i < x.length; i++) {
+ expect((records[i] as { x: number; y: number }).x).toBe(x[i]);
+ expect((records[i] as { x: number; y: number }).y).toBe(y[i]);
+ }
+ },
+ ),
+ );
+ });
+});
+
+// ─── toJsonRecords ────────────────────────────────────────────────────────────
+
+describe("toJsonRecords", () => {
+ test("basic records", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ const result = toJsonRecords(df);
+ expect(result).toEqual([
+ { a: 1, b: "x" },
+ { a: 2, b: "y" },
+ ]);
+ });
+
+ test("empty DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [] as number[] });
+ expect(toJsonRecords(df)).toEqual([]);
+ });
+
+ test("column names with dots are NOT split", () => {
+ const df = DataFrame.fromColumns({ "a.b": [1, 2] });
+ const result = toJsonRecords(df);
+ expect(result[0]).toHaveProperty("a.b", 1);
+ });
+
+ test("null values preserved", () => {
+ const df = DataFrame.fromColumns({ x: [null, 1] });
+ const result = toJsonRecords(df);
+ expect(result[0]).toEqual({ x: null });
+ });
+
+ // property: each record has correct columns
+ test("property: all records have same keys as DataFrame columns", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 100 }), { minLength: 1, maxLength: 5 }),
+ (nums) => {
+ const df = DataFrame.fromColumns({ val: nums });
+ const records = toJsonRecords(df);
+ for (const r of records) {
+ expect(Object.keys(r)).toEqual(["val"]);
+ }
+ },
+ ),
+ );
+ });
+});
+
+// ─── toJsonSplit ──────────────────────────────────────────────────────────────
+
+describe("toJsonSplit", () => {
+ test("basic split structure", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ const result = toJsonSplit(df);
+ expect(result.columns).toEqual(["a", "b"]);
+ expect(result.data).toEqual([[1, "x"], [2, "y"]]);
+ expect(result.index).toEqual([0, 1]);
+ });
+
+ test("index excluded when includeIndex=false", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const result = toJsonSplit(df, { includeIndex: false });
+ expect(result.index).toBeUndefined();
+ });
+
+ test("custom index preserved", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] }, { index: ["r1", "r2"] });
+ const result = toJsonSplit(df);
+ expect(result.index).toEqual(["r1", "r2"]);
+ });
+
+ test("empty DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [] as number[] });
+ const result = toJsonSplit(df);
+ expect(result.columns).toEqual(["a"]);
+ expect(result.data).toEqual([]);
+ });
+
+ test("NaN maps to null", () => {
+ const df = DataFrame.fromColumns({ a: [Number.NaN, 1] });
+ const result = toJsonSplit(df);
+ expect(result.data[0]).toEqual([null]);
+ expect(result.data[1]).toEqual([1]);
+ });
+
+ // property: data rows count equals index.size
+ test("property: data length equals row count", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer(), { minLength: 0, maxLength: 10 }),
+ (nums) => {
+ const df = DataFrame.fromColumns({ n: nums });
+ const result = toJsonSplit(df);
+ expect(result.data.length).toBe(nums.length);
+ },
+ ),
+ );
+ });
+});
+
+// ─── toJsonIndex ──────────────────────────────────────────────────────────────
+
+describe("toJsonIndex", () => {
+ test("basic index structure", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: ["x", "y"] });
+ const result = toJsonIndex(df);
+ expect(result).toEqual({
+ "0": { a: 1, b: "x" },
+ "1": { a: 2, b: "y" },
+ });
+ });
+
+ test("custom string index", () => {
+ const df = DataFrame.fromColumns({ v: [10, 20] }, { index: ["foo", "bar"] });
+ const result = toJsonIndex(df);
+ expect(result).toHaveProperty("foo");
+ expect(result).toHaveProperty("bar");
+ expect((result["foo"] as { v: number }).v).toBe(10);
+ });
+
+ test("empty DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [] as number[] });
+ expect(toJsonIndex(df)).toEqual({});
+ });
+
+ test("null values", () => {
+ const df = DataFrame.fromColumns({ x: [null, 5] });
+ const result = toJsonIndex(df);
+ expect((result["0"] as { x: null }).x).toBeNull();
+ expect((result["1"] as { x: number }).x).toBe(5);
+ });
+
+ // property: number of keys equals row count
+ test("property: key count equals rows", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer(), { minLength: 0, maxLength: 10 }),
+ (nums) => {
+ const df = DataFrame.fromColumns({ n: nums });
+ const result = toJsonIndex(df);
+ expect(Object.keys(result).length).toBe(nums.length);
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/str_findall.test.ts b/tests/stats/str_findall.test.ts
new file mode 100644
index 00000000..f66886c3
--- /dev/null
+++ b/tests/stats/str_findall.test.ts
@@ -0,0 +1,282 @@
+/**
+ * Tests for str_findall — strFindall, strFindallCount, strFindFirst, strFindallExpand
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import {
+ strFindall,
+ strFindallCount,
+ strFindFirst,
+ strFindallExpand,
+} from "../../src/stats/str_findall.ts";
+
+// ─── strFindall ───────────────────────────────────────────────────────────────
+
+describe("strFindall", () => {
+ test("basic word matching", () => {
+ const s = new Series({ data: ["one two three", "four five"] });
+ const result = strFindall(s, /\w+/);
+ expect(JSON.parse(result.values[0] as string)).toEqual(["one", "two", "three"]);
+ expect(JSON.parse(result.values[1] as string)).toEqual(["four", "five"]);
+ });
+
+ test("no matches returns empty array", () => {
+ const s = new Series({ data: ["hello", "world"] });
+ const result = strFindall(s, /\d+/);
+ expect(JSON.parse(result.values[0] as string)).toEqual([]);
+ expect(JSON.parse(result.values[1] as string)).toEqual([]);
+ });
+
+ test("null/NaN elements return null", () => {
+ const s = new Series({ data: ["hello", null, Number.NaN, "world"] });
+ const result = strFindall(s, /\w+/);
+ expect(result.values[0]).not.toBeNull();
+ expect(result.values[1]).toBeNull();
+ expect(result.values[2]).toBeNull();
+ expect(result.values[3]).not.toBeNull();
+ });
+
+ test("with capture group returns first group", () => {
+ const s = new Series({ data: ["key=val", "a=1 b=2"] });
+ const result = strFindall(s, /(\w+)=\w+/);
+ expect(JSON.parse(result.values[0] as string)).toEqual(["key"]);
+ expect(JSON.parse(result.values[1] as string)).toEqual(["a", "b"]);
+ });
+
+ test("string pattern with flags", () => {
+ const s = new Series({ data: ["AAA bbb", "ccc DDD"] });
+ const result = strFindall(s, "[a-z]+", "i");
+ expect((JSON.parse(result.values[0] as string) as string[]).length).toBe(2);
+ });
+
+ test("preserves index", () => {
+ const s = new Series({ data: ["a b", "c d"], index: ["x", "y"] });
+ const result = strFindall(s, /\w/);
+ expect(result.index.toArray()).toEqual(["x", "y"]);
+ });
+
+ test("array input", () => {
+ const result = strFindall(["hello world", "foo bar"], /\w+/);
+ expect(JSON.parse(result.values[0] as string)).toEqual(["hello", "world"]);
+ });
+
+ test("scalar input treated as single element", () => {
+ const result = strFindall("hello world", /\w+/);
+ expect(result.values.length).toBe(1);
+ expect(JSON.parse(result.values[0] as string)).toEqual(["hello", "world"]);
+ });
+
+ test("consecutive matches", () => {
+ const s = new Series({ data: ["aababc"] });
+ const result = strFindall(s, /a+/);
+ expect(JSON.parse(result.values[0] as string)).toEqual(["a", "a", "a"]);
+ });
+
+ // property: count of findall matches equals strFindallCount
+ test("property: findall length matches findallCount", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.string({ minLength: 0, maxLength: 20 }), { minLength: 1, maxLength: 5 }),
+ (strs) => {
+ const s = new Series({ data: strs });
+ const all = strFindall(s, /\w+/);
+ const cnt = strFindallCount(s, /\w+/);
+ for (let i = 0; i < strs.length; i++) {
+ const matches = JSON.parse(all.values[i] as string) as string[];
+ expect(cnt.values[i]).toBe(matches.length);
+ }
+ },
+ ),
+ );
+ });
+});
+
+// ─── strFindallCount ──────────────────────────────────────────────────────────
+
+describe("strFindallCount", () => {
+ test("counts matches correctly", () => {
+ const s = new Series({ data: ["aaa", "bbb", "ccc"] });
+ const result = strFindallCount(s, /a/);
+ expect(result.values).toEqual([3, 0, 0]);
+ });
+
+ test("zero for no match", () => {
+ const s = new Series({ data: ["xyz", "abc"] });
+ const result = strFindallCount(s, /\d/);
+ expect(result.values).toEqual([0, 0]);
+ });
+
+ test("null for null input", () => {
+ const s = new Series({ data: [null, "abc"] });
+ const result = strFindallCount(s, /\w/);
+ expect(result.values[0]).toBeNull();
+ expect(result.values[1]).toBe(3);
+ });
+
+ test("overlapping-looking pattern counts non-overlapping", () => {
+ const s = new Series({ data: ["aaaa"] });
+ // /aa/ matches at index 0 and 2 → 2 matches
+ const result = strFindallCount(s, /aa/);
+ expect(result.values[0]).toBe(2);
+ });
+
+ test("string pattern", () => {
+ const s = new Series({ data: ["Hello World", "FOO FOO"] });
+ const result = strFindallCount(s, "[A-Z]+", "g");
+ // /[A-Z]+/g: "H", "W" → 2; "FOO", "FOO" → 2
+ expect(result.values[0]).toBe(2);
+ expect(result.values[1]).toBe(2);
+ });
+
+ // property: count is always non-negative integer for non-null inputs
+ test("property: count >= 0 for non-null", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.string({ minLength: 0, maxLength: 30 }), { minLength: 1, maxLength: 10 }),
+ (strs) => {
+ const s = new Series({ data: strs });
+ const cnt = strFindallCount(s, /\w/);
+ for (const v of cnt.values) {
+ expect(typeof v === "number" && v >= 0).toBe(true);
+ }
+ },
+ ),
+ );
+ });
+});
+
+// ─── strFindFirst ─────────────────────────────────────────────────────────────
+
+describe("strFindFirst", () => {
+ test("returns first match", () => {
+ const s = new Series({ data: ["price: $10.99", "no price", "cost: $5.00"] });
+ const result = strFindFirst(s, /\$[\d.]+/);
+ expect(result.values).toEqual(["$10.99", null, "$5.00"]);
+ });
+
+ test("null for null input", () => {
+ const s = new Series({ data: [null, "abc123"] });
+ const result = strFindFirst(s, /\d+/);
+ expect(result.values[0]).toBeNull();
+ expect(result.values[1]).toBe("123");
+ });
+
+ test("null for no match", () => {
+ const s = new Series({ data: ["hello", "world"] });
+ const result = strFindFirst(s, /\d+/);
+ expect(result.values).toEqual([null, null]);
+ });
+
+ test("returns first capture group when group present", () => {
+ const s = new Series({ data: ["2024-01-15", "2023-12-31"] });
+ const result = strFindFirst(s, /(\d{4})-\d{2}-\d{2}/);
+ expect(result.values).toEqual(["2024", "2023"]);
+ });
+
+ test("does not consume multiple matches (only first)", () => {
+ const s = new Series({ data: ["aaa"] });
+ const result = strFindFirst(s, /a/);
+ expect(result.values).toEqual(["a"]);
+ });
+
+ test("preserves index", () => {
+ const s = new Series({ data: ["foo1", "bar2"], index: ["p", "q"] });
+ const result = strFindFirst(s, /\d/);
+ expect(result.index.toArray()).toEqual(["p", "q"]);
+ expect(result.values).toEqual(["1", "2"]);
+ });
+
+ test("array input", () => {
+ const result = strFindFirst(["hello123", "world456"], /\d+/);
+ expect(result.values).toEqual(["123", "456"]);
+ });
+
+ // property: strFindFirst result matches first element of strFindall
+ test("property: findFirst equals first element of findall", () => {
+ fc.assert(
+ fc.property(
+ fc.array(
+ fc.string({ minLength: 0, maxLength: 15 }).filter((s) => !s.includes("\0")),
+ { minLength: 1, maxLength: 6 },
+ ),
+ (strs) => {
+ const s = new Series({ data: strs });
+ const first = strFindFirst(s, /[a-z]+/);
+ const all = strFindall(s, /[a-z]+/);
+ for (let i = 0; i < strs.length; i++) {
+ const allMatches = JSON.parse(all.values[i] as string) as string[];
+ if (allMatches.length === 0) {
+ expect(first.values[i]).toBeNull();
+ } else {
+ expect(first.values[i]).toBe(allMatches[0]);
+ }
+ }
+ },
+ ),
+ );
+ });
+});
+
+// ─── strFindallExpand ─────────────────────────────────────────────────────────
+
+describe("strFindallExpand", () => {
+ test("named capture groups become columns", () => {
+ const s = new Series({ data: ["John 30", "Jane 25", "unknown"] });
+ const df = strFindallExpand(s, /(?\w+)\s+(?\d+)/);
+ expect(df.columns).toEqual(["name", "age"]);
+ expect(df.col("name").values).toEqual(["John", "Jane", null]);
+ expect(df.col("age").values).toEqual(["30", "25", null]);
+ });
+
+ test("unnamed groups numbered as 0, 1, ...", () => {
+ const s = new Series({ data: ["abc 123", "def 456"] });
+ const df = strFindallExpand(s, /(\w+)\s+(\d+)/);
+ expect(df.columns).toContain("0");
+ expect(df.columns).toContain("1");
+ expect(df.col("0").values).toEqual(["abc", "def"]);
+ expect(df.col("1").values).toEqual(["123", "456"]);
+ });
+
+ test("null input produces null row", () => {
+ const s = new Series({ data: ["hello 5", null] });
+ const df = strFindallExpand(s, /(?\w+)\s+(?\d+)/);
+ expect(df.col("word").values[1]).toBeNull();
+ expect(df.col("num").values[1]).toBeNull();
+ });
+
+ test("no match produces null row", () => {
+ const s = new Series({ data: ["hello", "world 42"] });
+ const df = strFindallExpand(s, /(?\w+)\s+(?\d+)/);
+ expect(df.col("word").values[0]).toBeNull();
+ expect(df.col("num").values[0]).toBeNull();
+ expect(df.col("word").values[1]).toBe("world");
+ expect(df.col("num").values[1]).toBe("42");
+ });
+
+ test("preserves row index from Series", () => {
+ const s = new Series({ data: ["a 1", "b 2"], index: ["r1", "r2"] });
+ const df = strFindallExpand(s, /(?\w)\s+(?\d)/);
+ expect(df.index.toArray()).toEqual(["r1", "r2"]);
+ });
+
+ test("array input works", () => {
+ const df = strFindallExpand(["x 10", "y 20"], /(?\w)\s+(?\d+)/);
+ expect(df.col("c").values).toEqual(["x", "y"]);
+ expect(df.col("n").values).toEqual(["10", "20"]);
+ });
+
+ // property: output has same number of rows as input
+ test("property: output rows match input length", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.string({ minLength: 0, maxLength: 20 }), { minLength: 1, maxLength: 10 }),
+ (strs) => {
+ const df = strFindallExpand(strs, /(?\w+)/);
+ expect(df.index.size).toBe(strs.length);
+ },
+ ),
+ );
+ });
+});
From e1cf834a9045587dac33f09f20f8bc2ed17f8160 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 21 Apr 2026 22:40:00 +0000
Subject: [PATCH 07/30] fix(lint): add missing return types and fix import
restrictions
- Add explicit return types to arrow functions in rolling_apply.test.ts
(nursery/useExplicitType)
- Import Rolling, RollingSeriesLike, Scalar from src/index.ts instead
of internal module paths in rolling.test.ts
(nursery/useImportRestrictions)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
tests/window/rolling.test.ts | 6 ++----
tests/window/rolling_apply.test.ts | 6 +++---
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/tests/window/rolling.test.ts b/tests/window/rolling.test.ts
index ca6a6a6b..6ab8576c 100644
--- a/tests/window/rolling.test.ts
+++ b/tests/window/rolling.test.ts
@@ -4,10 +4,8 @@
import { describe, expect, it } from "bun:test";
import fc from "fast-check";
-import { DataFrame, DataFrameRolling, Series } from "../../src/index.ts";
-import type { Scalar } from "../../src/types.ts";
-import { Rolling } from "../../src/window/index.ts";
-import type { RollingSeriesLike } from "../../src/window/index.ts";
+import { DataFrame, DataFrameRolling, Rolling, Series } from "../../src/index.ts";
+import type { RollingSeriesLike, Scalar } from "../../src/index.ts";
// ─── helpers ──────────────────────────────────────────────────────────────────
diff --git a/tests/window/rolling_apply.test.ts b/tests/window/rolling_apply.test.ts
index 912329cc..4b6ed1d4 100644
--- a/tests/window/rolling_apply.test.ts
+++ b/tests/window/rolling_apply.test.ts
@@ -121,7 +121,7 @@ describe("rollingApply", () => {
test("pairwise diff function", () => {
// last - first in window
- const diff = (nums: readonly number[]) => (nums.at(-1) ?? 0) - (nums[0] ?? 0);
+ const diff = (nums: readonly number[]): number => (nums.at(-1) ?? 0) - (nums[0] ?? 0);
const out = rollingApply(s(1, 3, 6, 10, 15), 3, diff);
expect(out.toArray()).toEqual([null, null, 5, 7, 9]);
});
@@ -151,7 +151,7 @@ describe("rollingApply", () => {
});
test("range function over window", () => {
- const range = (nums: readonly number[]) => Math.max(...nums) - Math.min(...nums);
+ const range = (nums: readonly number[]): number => Math.max(...nums) - Math.min(...nums);
const out = rollingApply(s(1, 5, 2, 8, 3), 3, range);
expect(out.toArray()).toEqual([null, null, 4, 6, 6]);
});
@@ -244,7 +244,7 @@ describe("dataFrameRollingApply", () => {
});
test("custom function applied independently per column", () => {
- const diff = (nums: readonly number[]) => (nums.at(-1) ?? 0) - (nums[0] ?? 0);
+ const diff = (nums: readonly number[]): number => (nums.at(-1) ?? 0) - (nums[0] ?? 0);
const df = DataFrame.fromColumns({ a: [1, 3, 6], b: [10, 15, 21] });
const out = dataFrameRollingApply(df, 2, diff);
expect(out.col("a").toArray()).toEqual([null, 2, 3]);
From fed12addf5546ad0303b3744af57f2afde927b32 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 22 Apr 2026 00:44:41 +0000
Subject: [PATCH 08/30] Iteration 237: Add cutBinsToFrame + xs cross-section
selection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- src/stats/cut_bins_to_frame.ts: cutBinsToFrame (bin summary DataFrame),
cutBinCounts (label→count dict), binEdges (edges-only DataFrame)
- src/stats/xs.ts: xsDataFrame / xsSeries — pandas .xs() cross-section
selection, flat and MultiIndex, axis=0/1, level targeting, dropLevel control
- Full test suites with property-based tests via fast-check
- Playground pages for both features
Metric: 113 (+2)
Run: https://github.com/githubnext/tsessebe/actions/runs/24753646544
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/cut_bins_to_frame.html | 92 +++++++++
playground/index.html | 10 +
playground/xs.html | 109 +++++++++++
src/index.ts | 4 +
src/stats/cut_bins_to_frame.ts | 161 ++++++++++++++++
src/stats/index.ts | 8 +
src/stats/xs.ts | 258 +++++++++++++++++++++++++
tests/stats/cut_bins_to_frame.test.ts | 219 +++++++++++++++++++++
tests/stats/xs.test.ts | 264 ++++++++++++++++++++++++++
9 files changed, 1125 insertions(+)
create mode 100644 playground/cut_bins_to_frame.html
create mode 100644 playground/xs.html
create mode 100644 src/stats/cut_bins_to_frame.ts
create mode 100644 src/stats/xs.ts
create mode 100644 tests/stats/cut_bins_to_frame.test.ts
create mode 100644 tests/stats/xs.test.ts
diff --git a/playground/cut_bins_to_frame.html b/playground/cut_bins_to_frame.html
new file mode 100644
index 00000000..43815fcd
--- /dev/null
+++ b/playground/cut_bins_to_frame.html
@@ -0,0 +1,92 @@
+
+
+
+
+
+ cutBinsToFrame — tsb playground
+
+
+
+
cutBinsToFrame
+
+ cutBinsToFrame(result, { data }) converts the output of
+ cut() or qcut() into a summary DataFrame with
+ one row per bin, showing the bin label, edges, count, and frequency.
+
+
+
Interactive Demo
+
+
+
+
+
+
Click "Run" to see the result.
+
+
What it does
+
import { cut, cutBinsToFrame, cutBinCounts, binEdges } from "tsb";
+
+// Bin 20 random values into 4 equal-width bins
+const data = Array.from({ length: 20 }, () => Math.random() * 100);
+const result = cut(data, 4);
+
+// Summary DataFrame: bin | left | right | count | frequency
+const df = cutBinsToFrame(result, { data });
+
+// Just the count dictionary
+const counts = cutBinCounts(result);
+// { "(0.0, 25.0]": 5, "(25.0, 50.0]": 6, ... }
+
+// Just edges indexed by label
+const edges = binEdges(result);
+
+
+
Related Functions
+
+
cut(data, bins) — bin values into equal-width bins
+
qcut(data, bins) — bin values into quantile-based bins
+
cutBinsToFrame(result, { data }) — summary DataFrame
+
cutBinCounts(result) — label → count dictionary
+
binEdges(result) — edges DataFrame indexed by label
Convert cut/qcut BinResult into a tidy summary DataFrame. cutBinsToFrame returns bin labels, edges, counts, and frequencies. cutBinCounts returns a label→count dict. binEdges returns an edges-only DataFrame.
import { Series, seriesUpdate } from "tsb";
+
+const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+// other only has label 1 — other labels unchanged
+const other = new Series({ data: [99], index: [1] });
+seriesUpdate(s, other).values;
+// [1, 99, 3]
+
+
+
diff --git a/src/index.ts b/src/index.ts
index e8eb1b65..028e1079 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -572,3 +572,9 @@ export {
export type { SwapLevelDataFrameOptions, ReorderLevelsDataFrameOptions } from "./stats/swaplevel.ts";
export { truncateSeries, truncateDataFrame } from "./stats/truncate.ts";
export type { TruncateOptions } from "./stats/truncate.ts";
+export { seriesBetween } from "./stats/index.ts";
+export type { BetweenInclusive, BetweenOptions } from "./stats/index.ts";
+export { seriesUpdate, dataFrameUpdate } from "./stats/index.ts";
+export type { UpdateOptions } from "./stats/index.ts";
+export { filterDataFrame, filterSeries } from "./stats/index.ts";
+export type { FilterLabelsOptions } from "./stats/index.ts";
diff --git a/src/stats/between.ts b/src/stats/between.ts
new file mode 100644
index 00000000..014de5d4
--- /dev/null
+++ b/src/stats/between.ts
@@ -0,0 +1,125 @@
+/**
+ * between — element-wise range check for Series values.
+ *
+ * Mirrors `pandas.Series.between(left, right, inclusive='both')`.
+ *
+ * Returns a boolean Series indicating whether each element falls within the
+ * interval `[left, right]` (by default inclusive on both ends).
+ *
+ * - {@link seriesBetween} — element-wise range check
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesBetween } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesBetween(s, 2, 4).values; // [false, true, true, true, false]
+ *
+ * seriesBetween(s, 2, 4, { inclusive: "left" }).values;
+ * // [false, true, true, false, false]
+ * ```
+ *
+ * @module
+ */
+
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Controls which endpoints of the interval are included.
+ * - `"both"` (default): left ≤ x ≤ right
+ * - `"left"`: left ≤ x < right
+ * - `"right"`: left < x ≤ right
+ * - `"neither"`: left < x < right
+ */
+export type BetweenInclusive = "both" | "left" | "right" | "neither";
+
+/** Options for {@link seriesBetween}. */
+export interface BetweenOptions {
+ /**
+ * Which endpoints to include.
+ * @default "both"
+ */
+ readonly inclusive?: BetweenInclusive;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Return `true` when `v` is a missing value (null, undefined, NaN). */
+function isMissing(v: unknown): boolean {
+ if (v === null || v === undefined) {
+ return true;
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return true;
+ }
+ return false;
+}
+
+/** Compare two scalar values as numbers or strings. */
+function scalarLt(a: Scalar, b: Scalar): boolean {
+ return (a as unknown as number) < (b as unknown as number);
+}
+
+function scalarLte(a: Scalar, b: Scalar): boolean {
+ return (a as unknown as number) <= (b as unknown as number);
+}
+
+/**
+ * Check whether a single scalar `v` falls inside [left, right] according to
+ * the `inclusive` setting. Returns `false` for any missing value.
+ */
+function inRange(v: Scalar, left: Scalar, right: Scalar, inclusive: BetweenInclusive): boolean {
+ if (isMissing(v) || isMissing(left) || isMissing(right)) {
+ return false;
+ }
+ const leftOk = inclusive === "both" || inclusive === "left" ? scalarLte(left, v) : scalarLt(left, v);
+ const rightOk = inclusive === "both" || inclusive === "right" ? scalarLte(v, right) : scalarLt(v, right);
+ return leftOk && rightOk;
+}
+
+// ─── seriesBetween ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a boolean Series indicating whether each element of `s` lies within
+ * the range `[left, right]`.
+ *
+ * Missing values in `s` produce `false` (matching pandas behaviour).
+ *
+ * @param s - Source Series.
+ * @param left - Left bound of the interval.
+ * @param right - Right bound of the interval.
+ * @param options - See {@link BetweenOptions}.
+ * @returns Boolean Series with the same index as `s`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesBetween } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * seriesBetween(s, 2, 4).values;
+ * // [false, true, true, true, false]
+ *
+ * seriesBetween(s, 2, 4, { inclusive: "neither" }).values;
+ * // [false, false, true, false, false]
+ * ```
+ */
+export function seriesBetween(
+ s: Series,
+ left: Scalar,
+ right: Scalar,
+ options: BetweenOptions = {},
+): Series {
+ const inclusive: BetweenInclusive = options.inclusive ?? "both";
+ const data: boolean[] = [];
+ for (let i = 0; i < s.size; i++) {
+ data.push(inRange(s.values[i] as Scalar, left, right, inclusive));
+ }
+ return new Series({
+ data,
+ index: s.index,
+ name: s.name,
+ });
+}
diff --git a/src/stats/filter_labels.ts b/src/stats/filter_labels.ts
new file mode 100644
index 00000000..c8378fbc
--- /dev/null
+++ b/src/stats/filter_labels.ts
@@ -0,0 +1,201 @@
+/**
+ * filter_labels — filter a Series or DataFrame by row/column labels.
+ *
+ * Mirrors `pandas.DataFrame.filter(items, like, regex, axis)`.
+ *
+ * Exactly one of `items`, `like`, or `regex` must be specified.
+ *
+ * - {@link filterDataFrame} — filter DataFrame rows or columns by label
+ * - {@link filterSeries} — filter Series index labels
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, filterDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4], c_x: [5, 6] });
+ *
+ * // Keep only columns whose name is in the list
+ * filterDataFrame(df, { items: ["a", "c_x"] }).columns.values;
+ * // ["a", "c_x"]
+ *
+ * // Keep columns whose name contains "_x"
+ * filterDataFrame(df, { like: "_x" }).columns.values;
+ * // ["c_x"]
+ *
+ * // Keep columns matching regex "^[ab]$"
+ * filterDataFrame(df, { regex: "^[ab]$" }).columns.values;
+ * // ["a", "b"]
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link filterDataFrame} and {@link filterSeries}. */
+export interface FilterLabelsOptions {
+ /**
+ * Keep labels whose string representation appears in this list.
+ * Mutually exclusive with `like` and `regex`.
+ */
+ readonly items?: readonly Label[];
+
+ /**
+ * Keep labels whose string representation **contains** this substring.
+ * Mutually exclusive with `items` and `regex`.
+ */
+ readonly like?: string;
+
+ /**
+ * Keep labels whose string representation matches this regular expression.
+ * Mutually exclusive with `items` and `like`.
+ */
+ readonly regex?: string;
+
+ /**
+ * Axis to filter along (DataFrame only).
+ * - `0` or `"index"`: filter rows (default).
+ * - `1` or `"columns"`: filter columns.
+ * @default 1 (columns, matching pandas default for DataFrame.filter)
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/**
+ * Build a predicate for a label given the filter options.
+ * Exactly one of `items`, `like`, or `regex` is expected to be set.
+ */
+function buildPredicate(options: FilterLabelsOptions): (label: Label) => boolean {
+ const { items, like, regex } = options;
+ const setCount = (items !== undefined ? 1 : 0) + (like !== undefined ? 1 : 0) + (regex !== undefined ? 1 : 0);
+ if (setCount === 0) {
+ throw new TypeError("filterDataFrame: exactly one of items, like, or regex must be specified");
+ }
+ if (setCount > 1) {
+ throw new TypeError("filterDataFrame: only one of items, like, or regex may be specified");
+ }
+
+ if (items !== undefined) {
+ const set = new Set(items.map(String));
+ return (label: Label): boolean => set.has(String(label));
+ }
+ if (like !== undefined) {
+ return (label: Label): boolean => String(label).includes(like);
+ }
+ if (regex !== undefined) {
+ const re = new RegExp(regex);
+ return (label: Label): boolean => re.test(String(label));
+ }
+ // unreachable — setCount === 1 guarantees one branch was taken
+ throw new TypeError("filterDataFrame: internal error");
+}
+
+// ─── filterDataFrame ──────────────────────────────────────────────────────────
+
+/**
+ * Filter rows or columns of a DataFrame by label.
+ *
+ * Pass exactly one of `items`, `like`, or `regex` in `options`.
+ * The `axis` option controls whether rows (`0`/`"index"`) or columns
+ * (`1`/`"columns"`) are filtered; defaults to `1` (columns), matching the
+ * pandas default.
+ *
+ * @param df - Source DataFrame.
+ * @param options - See {@link FilterLabelsOptions}.
+ * @returns New DataFrame with only the matching rows or columns.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, filterDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns(
+ * { a: [1, 2, 3], b: [4, 5, 6], c: [7, 8, 9] },
+ * { index: [10, 20, 30] },
+ * );
+ *
+ * // Columns
+ * filterDataFrame(df, { items: ["a", "c"] }).columns.values; // ["a", "c"]
+ * filterDataFrame(df, { like: "b" }).columns.values; // ["b"]
+ * filterDataFrame(df, { regex: "[ac]" }).columns.values; // ["a", "c"]
+ *
+ * // Rows
+ * filterDataFrame(df, { items: [10, 30], axis: 0 }).index.values; // [10, 30]
+ * ```
+ */
+export function filterDataFrame(df: DataFrame, options: FilterLabelsOptions): DataFrame {
+ const axisSpec = options.axis ?? 1;
+ const filterRows = axisSpec === 0 || axisSpec === "index";
+ const predicate = buildPredicate(options);
+
+ if (filterRows) {
+ const positions: number[] = [];
+ for (let i = 0; i < df.index.size; i++) {
+ if (predicate(df.index.at(i))) {
+ positions.push(i);
+ }
+ }
+ const newIndexLabels = positions.map((i) => df.index.at(i));
+ const newIndex = new Index(newIndexLabels);
+ const colNames = df.columns.values as readonly string[];
+ const cols = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name);
+ const data = positions.map((i) => col.values[i] as Scalar);
+ cols.set(name, new Series({ data, index: newIndex, dtype: col.dtype }));
+ }
+ return new DataFrame(cols, newIndex);
+ } else {
+ const colNames = df.columns.values as readonly string[];
+ const kept = colNames.filter((name) => predicate(name));
+ const cols = new Map>();
+ for (const name of kept) {
+ const col = df.col(name);
+ cols.set(name, new Series({ data: col.values as Scalar[], index: df.index, dtype: col.dtype }));
+ }
+ return new DataFrame(cols, df.index);
+ }
+}
+
+// ─── filterSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Filter a Series by its index labels.
+ *
+ * Pass exactly one of `items`, `like`, or `regex` in `options`.
+ * (The `axis` option is ignored for Series — only the index is filtered.)
+ *
+ * @param s - Source Series.
+ * @param options - See {@link FilterLabelsOptions}.
+ * @returns New Series with only the matching index positions.
+ *
+ * @example
+ * ```ts
+ * import { Series, filterSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3], index: ["alpha", "beta", "gamma"] });
+ * filterSeries(s, { like: "a" }).index.values; // ["alpha", "gamma"]
+ * filterSeries(s, { items: ["beta"] }).values; // [2]
+ * ```
+ */
+export function filterSeries(s: Series, options: FilterLabelsOptions): Series {
+ const predicate = buildPredicate(options);
+ const positions: number[] = [];
+ for (let i = 0; i < s.size; i++) {
+ if (predicate(s.index.at(i))) {
+ positions.push(i);
+ }
+ }
+ const data = positions.map((i) => s.values[i] as Scalar);
+ const labels = positions.map((i) => s.index.at(i));
+ return new Series({
+ data,
+ index: new Index(labels),
+ dtype: s.dtype,
+ name: s.name,
+ });
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index b625a51c..7a9cc4a5 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -400,3 +400,9 @@ export {
export type { SwapLevelDataFrameOptions, ReorderLevelsDataFrameOptions } from "./swaplevel.ts";
export { truncateSeries, truncateDataFrame } from "./truncate.ts";
export type { TruncateOptions } from "./truncate.ts";
+export { seriesBetween } from "./between.ts";
+export type { BetweenInclusive, BetweenOptions } from "./between.ts";
+export { seriesUpdate, dataFrameUpdate } from "./update.ts";
+export type { UpdateOptions } from "./update.ts";
+export { filterDataFrame, filterSeries } from "./filter_labels.ts";
+export type { FilterLabelsOptions } from "./filter_labels.ts";
diff --git a/src/stats/update.ts b/src/stats/update.ts
new file mode 100644
index 00000000..cca982c5
--- /dev/null
+++ b/src/stats/update.ts
@@ -0,0 +1,188 @@
+/**
+ * update — in-place update of a Series or DataFrame using non-NA values
+ * from another object.
+ *
+ * Mirrors `pandas.DataFrame.update` and `pandas.Series.update`.
+ *
+ * - {@link seriesUpdate} — update a Series from another Series
+ * - {@link dataFrameUpdate} — update a DataFrame from another DataFrame
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesUpdate } from "tsb";
+ *
+ * const s = new Series({ data: [1, null, 3], index: [0, 1, 2] });
+ * const other = new Series({ data: [null, 20, 30], index: [0, 1, 2] });
+ * seriesUpdate(s, other).values; // [1, 20, 30]
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link dataFrameUpdate}. */
+export interface UpdateOptions {
+ /**
+ * When `true` (default), matching values from `other` overwrite existing
+ * non-NA values in `self`. When `false`, only positions that are NA in
+ * `self` are updated.
+ */
+ readonly overwrite?: boolean;
+
+ /**
+ * - `"ignore"` (default): silently skip overlapping non-NA values when
+ * `overwrite` is `false`.
+ * - `"raise"`: throw a `RangeError` when `other` has a non-NA value at a
+ * position where `self` also has a non-NA value and `overwrite` is `false`.
+ */
+ readonly errors?: "ignore" | "raise";
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Return `true` when `v` is a missing value (null, undefined, NaN). */
+function isMissing(v: unknown): boolean {
+ if (v === null || v === undefined) {
+ return true;
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return true;
+ }
+ return false;
+}
+
+// ─── seriesUpdate ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series whose values are updated from `other` using label
+ * alignment.
+ *
+ * For each label in `self`:
+ * - If `other` has a non-NA value at that label, use it (unless
+ * `overwrite=false` and `self` is already non-NA — in which case `errors`
+ * controls whether to raise or ignore).
+ * - Otherwise keep the existing value from `self`.
+ *
+ * Labels present in `other` but not in `self` are ignored.
+ *
+ * @param self - Source Series.
+ * @param other - Series to update from.
+ * @param options - See {@link UpdateOptions}.
+ * @returns New Series with updated values.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesUpdate } from "tsb";
+ *
+ * const s = new Series({ data: [1, null, 3], index: ["a", "b", "c"] });
+ * const o = new Series({ data: [10, 20, null], index: ["a", "b", "c"] });
+ * seriesUpdate(s, o).values; // [10, 20, 3]
+ * ```
+ */
+export function seriesUpdate(
+ self: Series,
+ other: Series,
+ options: UpdateOptions = {},
+): Series {
+ const overwrite = options.overwrite ?? true;
+ const errors = options.errors ?? "ignore";
+
+ // Build a label → value map from other
+ const otherMap = new Map();
+ for (let i = 0; i < other.size; i++) {
+ otherMap.set(other.index.at(i), other.values[i] as Scalar);
+ }
+
+ const newData: Scalar[] = [];
+ for (let i = 0; i < self.size; i++) {
+ const label = self.index.at(i);
+ const selfVal = self.values[i] as Scalar;
+
+ if (otherMap.has(label)) {
+ const otherVal = otherMap.get(label) as Scalar;
+ if (!isMissing(otherVal)) {
+ if (!overwrite && !isMissing(selfVal)) {
+ if (errors === "raise") {
+ throw new RangeError(
+ `update: non-NA value overlap at label "${String(label)}" and overwrite=false`,
+ );
+ }
+ newData.push(selfVal);
+ } else {
+ newData.push(otherVal);
+ }
+ } else {
+ newData.push(selfVal);
+ }
+ } else {
+ newData.push(selfVal);
+ }
+ }
+
+ return new Series({
+ data: newData,
+ index: self.index,
+ dtype: self.dtype,
+ name: self.name,
+ });
+}
+
+// ─── dataFrameUpdate ──────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame whose values are updated from `other` using label
+ * alignment on both row index and column names.
+ *
+ * For each column present in both `self` and `other`, non-NA values in `other`
+ * overwrite the corresponding cells in `self` (subject to `overwrite` and
+ * `errors` options). Columns in `other` that are absent from `self` are
+ * ignored.
+ *
+ * @param self - Source DataFrame.
+ * @param other - DataFrame to update from.
+ * @param options - See {@link UpdateOptions}.
+ * @returns New DataFrame with updated values.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameUpdate } from "tsb";
+ *
+ * const df = DataFrame.fromColumns(
+ * { a: [1, null, 3], b: [10, 20, 30] },
+ * { index: [0, 1, 2] },
+ * );
+ * const other = DataFrame.fromColumns(
+ * { a: [null, 99, null], b: [null, null, 300] },
+ * { index: [0, 1, 2] },
+ * );
+ * const result = dataFrameUpdate(df, other);
+ * // a: [1, 99, 3], b: [10, 20, 300]
+ * ```
+ */
+export function dataFrameUpdate(
+ self: DataFrame,
+ other: DataFrame,
+ options: UpdateOptions = {},
+): DataFrame {
+ const colNames = self.columns.values as readonly string[];
+ const otherColNames = new Set(other.columns.values as readonly string[]);
+
+ const cols = new Map>();
+ for (const name of colNames) {
+ if (otherColNames.has(name)) {
+ const selfCol = self.col(name);
+ const otherCol = other.col(name);
+ const updated = seriesUpdate(selfCol, otherCol, options);
+ cols.set(name, updated);
+ } else {
+ const col = self.col(name);
+ cols.set(name, new Series({ data: col.values as Scalar[], index: self.index, dtype: col.dtype }));
+ }
+ }
+
+ return new DataFrame(cols, self.index);
+}
diff --git a/tests/stats/between.test.ts b/tests/stats/between.test.ts
new file mode 100644
index 00000000..ca767d46
--- /dev/null
+++ b/tests/stats/between.test.ts
@@ -0,0 +1,195 @@
+/**
+ * Tests for between — seriesBetween.
+ *
+ * Covers:
+ * - inclusive="both" (default): left ≤ x ≤ right
+ * - inclusive="left": left ≤ x < right
+ * - inclusive="right": left < x ≤ right
+ * - inclusive="neither": left < x < right
+ * - Missing values produce false
+ * - String labels
+ * - Property-based: results match naive range check
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { Series, seriesBetween } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+describe("seriesBetween", () => {
+ // ─── inclusive="both" ───────────────────────────────────────────────────────
+ describe('inclusive="both" (default)', () => {
+ test("basic numeric range", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 2, 4).values).toEqual([false, true, true, true, false]);
+ });
+
+ test("boundary values included", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 1, 5).values).toEqual([true, true, true, true, true]);
+ });
+
+ test("single value range", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 3, 3).values).toEqual([false, false, true, false, false]);
+ });
+
+ test("all outside range", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 10, 20).values).toEqual([false, false, false, false, false]);
+ });
+
+ test("negative numbers", () => {
+ const s = new Series({ data: [-3, -1, 0, 1, 3] });
+ expect(seriesBetween(s, -1, 1).values).toEqual([false, true, true, true, false]);
+ });
+
+ test("float values", () => {
+ const s = new Series({ data: [0.5, 1.0, 1.5, 2.0, 2.5] });
+ expect(seriesBetween(s, 1.0, 2.0).values).toEqual([false, true, true, true, false]);
+ });
+
+ test("empty Series", () => {
+ const s = new Series({ data: [] });
+ expect(seriesBetween(s, 0, 10).values).toEqual([]);
+ });
+
+ test("preserves index", () => {
+ const s = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+ const result = seriesBetween(s, 1, 2);
+ expect(result.index.values).toEqual(["a", "b", "c"]);
+ expect(result.values).toEqual([true, true, false]);
+ });
+
+ test("missing values (null/undefined/NaN) → false", () => {
+ const s = new Series({ data: [1, null, undefined, Number.NaN, 3] });
+ expect(seriesBetween(s, 0, 5).values).toEqual([true, false, false, false, true]);
+ });
+ });
+
+ // ─── inclusive="left" ───────────────────────────────────────────────────────
+ describe('inclusive="left"', () => {
+ test("left boundary included, right excluded", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 2, 4, { inclusive: "left" }).values).toEqual([
+ false,
+ true,
+ true,
+ false,
+ false,
+ ]);
+ });
+
+ test("left boundary at exact match", () => {
+ const s = new Series({ data: [2, 3, 4] });
+ expect(seriesBetween(s, 2, 4, { inclusive: "left" }).values).toEqual([true, true, false]);
+ });
+ });
+
+ // ─── inclusive="right" ──────────────────────────────────────────────────────
+ describe('inclusive="right"', () => {
+ test("right boundary included, left excluded", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 2, 4, { inclusive: "right" }).values).toEqual([
+ false,
+ false,
+ true,
+ true,
+ false,
+ ]);
+ });
+
+ test("right boundary at exact match", () => {
+ const s = new Series({ data: [2, 3, 4] });
+ expect(seriesBetween(s, 2, 4, { inclusive: "right" }).values).toEqual([false, true, true]);
+ });
+ });
+
+ // ─── inclusive="neither" ────────────────────────────────────────────────────
+ describe('inclusive="neither"', () => {
+ test("both boundaries excluded", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(seriesBetween(s, 2, 4, { inclusive: "neither" }).values).toEqual([
+ false,
+ false,
+ true,
+ false,
+ false,
+ ]);
+ });
+
+ test("only the exact middle value qualifies", () => {
+ const s = new Series({ data: [2, 3, 4] });
+ expect(seriesBetween(s, 2, 4, { inclusive: "neither" }).values).toEqual([false, true, false]);
+ });
+
+ test("empty interval → all false", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ // left === right → no value can be strictly between
+ expect(seriesBetween(s, 2, 2, { inclusive: "neither" }).values).toEqual([
+ false,
+ false,
+ false,
+ ]);
+ });
+ });
+
+ // ─── string labels ───────────────────────────────────────────────────────────
+ describe("string values", () => {
+ test("string range", () => {
+ const s = new Series({ data: ["apple", "banana", "cherry", "date"] });
+ // lexicographic: "banana" <= x <= "cherry"
+ expect(seriesBetween(s, "banana", "cherry").values).toEqual([false, true, true, false]);
+ });
+ });
+
+ // ─── property-based ───────────────────────────────────────────────────────────
+ describe("property-based", () => {
+ test("results match naive range check", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 20 }),
+ fc.float({ noNaN: true }),
+ fc.float({ noNaN: true }),
+ (arr, a, b) => {
+ const left = Math.min(a, b);
+ const right = Math.max(a, b);
+ const s = new Series({ data: arr });
+ const result = seriesBetween(s, left, right).values as boolean[];
+ for (let i = 0; i < arr.length; i++) {
+ const v = arr[i] as number;
+ const expected = v >= left && v <= right;
+ if (result[i] !== expected) return false;
+ }
+ return true;
+ },
+ ),
+ );
+ });
+
+ test("seriesBetween left returns subset of both", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 1,
+ maxLength: 20,
+ }),
+ fc.float({ noNaN: true, noDefaultInfinity: true }),
+ fc.float({ noNaN: true, noDefaultInfinity: true }),
+ (arr, a, b) => {
+ const left = Math.min(a, b);
+ const right = Math.max(a, b);
+ const s = new Series({ data: arr });
+ const both = seriesBetween(s, left, right, { inclusive: "both" }).values as boolean[];
+ const leftOnly = seriesBetween(s, left, right, { inclusive: "left" }).values as boolean[];
+ // "left" must be a subset of "both"
+ for (let i = 0; i < arr.length; i++) {
+ if (leftOnly[i] && !both[i]) return false;
+ }
+ return true;
+ },
+ ),
+ );
+ });
+ });
+});
diff --git a/tests/stats/filter_labels.test.ts b/tests/stats/filter_labels.test.ts
new file mode 100644
index 00000000..422e7709
--- /dev/null
+++ b/tests/stats/filter_labels.test.ts
@@ -0,0 +1,209 @@
+/**
+ * Tests for filter_labels — filterDataFrame and filterSeries.
+ *
+ * Covers:
+ * - filterDataFrame: items, like, regex on columns (default axis)
+ * - filterDataFrame: items, like, regex on rows (axis=0)
+ * - filterSeries: items, like, regex
+ * - Error cases: no filter / multiple filters
+ * - Property-based: filtered columns are subset of original
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series, filterDataFrame, filterSeries } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+describe("filterDataFrame", () => {
+ // ─── columns (default axis=1) ─────────────────────────────────────────────
+ describe("columns (default)", () => {
+ const df = DataFrame.fromColumns({
+ apple: [1, 2],
+ banana: [3, 4],
+ cherry: [5, 6],
+ apricot: [7, 8],
+ });
+
+ test("items: keeps specified columns in original order", () => {
+ const result = filterDataFrame(df, { items: ["apple", "cherry"] });
+ expect(result.columns.values).toEqual(["apple", "cherry"]);
+ expect(result.col("apple").values).toEqual([1, 2]);
+ });
+
+ test("items: missing item names are silently skipped", () => {
+ const result = filterDataFrame(df, { items: ["apple", "missing"] });
+ expect(result.columns.values).toEqual(["apple"]);
+ });
+
+ test("items: empty items returns empty DataFrame", () => {
+ const result = filterDataFrame(df, { items: [] });
+ expect(result.columns.values).toEqual([]);
+ });
+
+ test("like: keeps columns containing substring", () => {
+ const result = filterDataFrame(df, { like: "ap" });
+ expect(result.columns.values).toEqual(["apple", "apricot"]);
+ });
+
+ test("like: no match returns empty DataFrame", () => {
+ const result = filterDataFrame(df, { like: "zzz" });
+ expect(result.columns.values).toEqual([]);
+ });
+
+ test("like: empty string matches all", () => {
+ const result = filterDataFrame(df, { like: "" });
+ expect(result.columns.values).toEqual(["apple", "banana", "cherry", "apricot"]);
+ });
+
+ test("regex: keeps columns matching pattern", () => {
+ const result = filterDataFrame(df, { regex: "^a" });
+ expect(result.columns.values).toEqual(["apple", "apricot"]);
+ });
+
+ test("regex: pattern with groups", () => {
+ const result = filterDataFrame(df, { regex: "(cherry|banana)" });
+ expect(result.columns.values).toEqual(["banana", "cherry"]);
+ });
+
+ test("explicit axis=1 same as default", () => {
+ const r1 = filterDataFrame(df, { like: "an", axis: 1 });
+ const r2 = filterDataFrame(df, { like: "an" });
+ expect(r1.columns.values).toEqual(r2.columns.values);
+ });
+
+ test("preserves row data", () => {
+ const result = filterDataFrame(df, { items: ["banana"] });
+ expect(result.col("banana").values).toEqual([3, 4]);
+ });
+ });
+
+ // ─── rows (axis=0) ───────────────────────────────────────────────────────────
+ describe("rows (axis=0)", () => {
+ const df = DataFrame.fromColumns(
+ { a: [10, 20, 30, 40], b: [1, 2, 3, 4] },
+ { index: ["foo", "bar", "baz", "qux"] },
+ );
+
+ test("items: keeps specified row labels", () => {
+ const result = filterDataFrame(df, { items: ["foo", "baz"], axis: 0 });
+ expect(result.index.values).toEqual(["foo", "baz"]);
+ expect(result.col("a").values).toEqual([10, 30]);
+ });
+
+ test("like: keeps rows containing substring", () => {
+ const result = filterDataFrame(df, { like: "ba", axis: 0 });
+ expect(result.index.values).toEqual(["bar", "baz"]);
+ });
+
+ test("regex: row labels matching pattern", () => {
+ const result = filterDataFrame(df, { regex: "^b", axis: 0 });
+ expect(result.index.values).toEqual(["bar", "baz"]);
+ });
+
+ test("axis='index' is alias for axis=0", () => {
+ const result = filterDataFrame(df, { like: "oo", axis: "index" });
+ expect(result.index.values).toEqual(["foo"]);
+ });
+
+ test("axis='columns' is alias for axis=1", () => {
+ const result = filterDataFrame(df, { like: "a", axis: "columns" });
+ expect(result.columns.values).toEqual(["a"]);
+ });
+
+ test("numeric index with items", () => {
+ const df2 = DataFrame.fromColumns({ x: [1, 2, 3] }, { index: [10, 20, 30] });
+ const result = filterDataFrame(df2, { items: [10, 30], axis: 0 });
+ expect(result.index.values).toEqual([10, 30]);
+ expect(result.col("x").values).toEqual([1, 3]);
+ });
+ });
+
+ // ─── error cases ─────────────────────────────────────────────────────────────
+ describe("error cases", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2] });
+
+ test("no filter specified throws TypeError", () => {
+ expect(() => filterDataFrame(df, {})).toThrow(TypeError);
+ });
+
+ test("multiple filters specified throws TypeError", () => {
+ expect(() => filterDataFrame(df, { items: ["a"], like: "a" })).toThrow(TypeError);
+ });
+ });
+
+ // ─── property-based ──────────────────────────────────────────────────────────
+ describe("property-based", () => {
+ test("filtered columns are always a subset of original", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.string({ minLength: 1, maxLength: 6 }), { minLength: 0, maxLength: 8 }).map(
+ (names) => [...new Set(names)],
+ ),
+ fc.string({ minLength: 0, maxLength: 3 }),
+ (colNames, pattern) => {
+ const cols: Record = {};
+ for (const name of colNames) cols[name] = [1, 2];
+ const df2 = DataFrame.fromColumns(cols);
+ const result = filterDataFrame(df2, { like: pattern });
+ const original = new Set(colNames);
+ for (const c of result.columns.values as string[]) {
+ if (!original.has(c)) return false;
+ }
+ return true;
+ },
+ ),
+ );
+ });
+ });
+});
+
+describe("filterSeries", () => {
+ const s = new Series({
+ data: [10, 20, 30, 40],
+ index: ["alpha", "beta", "gamma", "aleph"],
+ });
+
+ test("items: keeps specified labels", () => {
+ const result = filterSeries(s, { items: ["alpha", "gamma"] });
+ expect(result.index.values).toEqual(["alpha", "gamma"]);
+ expect(result.values).toEqual([10, 30]);
+ });
+
+ test("like: keeps labels containing substring", () => {
+ const result = filterSeries(s, { like: "al" });
+ expect(result.index.values).toEqual(["alpha", "aleph"]);
+ expect(result.values).toEqual([10, 40]);
+ });
+
+ test("regex: keeps labels matching pattern", () => {
+ const result = filterSeries(s, { regex: "^b" });
+ expect(result.index.values).toEqual(["beta"]);
+ expect(result.values).toEqual([20]);
+ });
+
+ test("no match: returns empty series", () => {
+ const result = filterSeries(s, { like: "zzz" });
+ expect(result.size).toBe(0);
+ });
+
+ test("preserves name", () => {
+ const named = new Series({ data: [1, 2], index: ["a", "b"], name: "test" });
+ const result = filterSeries(named, { items: ["a"] });
+ expect(result.name).toBe("test");
+ });
+
+ test("error: no filter", () => {
+ expect(() => filterSeries(s, {})).toThrow(TypeError);
+ });
+
+ test("error: multiple filters", () => {
+ expect(() => filterSeries(s, { items: ["alpha"], regex: "^a" })).toThrow(TypeError);
+ });
+
+ test("numeric index items", () => {
+ const ns = new Series({ data: [1, 2, 3], index: [100, 200, 300] });
+ const result = filterSeries(ns, { items: [100, 300] });
+ expect(result.index.values).toEqual([100, 300]);
+ expect(result.values).toEqual([1, 3]);
+ });
+});
diff --git a/tests/stats/update.test.ts b/tests/stats/update.test.ts
new file mode 100644
index 00000000..cef25ce6
--- /dev/null
+++ b/tests/stats/update.test.ts
@@ -0,0 +1,171 @@
+/**
+ * Tests for update — seriesUpdate and dataFrameUpdate.
+ *
+ * Covers:
+ * - seriesUpdate: basic overwrite, NA handling, overwrite=false, errors="raise"
+ * - dataFrameUpdate: column subset update, NA skipping
+ * - Label alignment (only matching labels updated)
+ * - Property-based: non-NA values from other always appear in result when overwrite=true
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series, dataFrameUpdate, seriesUpdate } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+describe("seriesUpdate", () => {
+ test("basic overwrite: non-NA values from other replace self", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [10, 20, 30], index: [0, 1, 2] });
+ expect(seriesUpdate(s, other).values).toEqual([10, 20, 30]);
+ });
+
+ test("NA in other does not overwrite self", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [10, null, 30], index: [0, 1, 2] });
+ expect(seriesUpdate(s, other).values).toEqual([10, 2, 30]);
+ });
+
+ test("NA in self gets updated by non-NA from other", () => {
+ const s = new Series({ data: [1, null, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [null, 20, null], index: [0, 1, 2] });
+ expect(seriesUpdate(s, other).values).toEqual([1, 20, 3]);
+ });
+
+ test("overwrite=false: does not replace non-NA self with non-NA other", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [10, 20, 30], index: [0, 1, 2] });
+ const result = seriesUpdate(s, other, { overwrite: false });
+ expect(result.values).toEqual([1, 2, 3]);
+ });
+
+ test("overwrite=false: fills NA in self with non-NA from other", () => {
+ const s = new Series({ data: [1, null, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [10, 20, 30], index: [0, 1, 2] });
+ const result = seriesUpdate(s, other, { overwrite: false });
+ expect(result.values).toEqual([1, 20, 3]);
+ });
+
+ test("overwrite=false + errors='raise': throws on non-NA overlap", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [10, 20, 30], index: [0, 1, 2] });
+ expect(() => seriesUpdate(s, other, { overwrite: false, errors: "raise" })).toThrow(RangeError);
+ });
+
+ test("label alignment: only matching labels are updated", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [99], index: [1] });
+ expect(seriesUpdate(s, other).values).toEqual([1, 99, 3]);
+ });
+
+ test("extra labels in other are ignored", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [10, 20, 30, 40], index: [0, 1, 2, 3] });
+ expect(seriesUpdate(s, other).values).toEqual([10, 20, 30]);
+ });
+
+ test("empty other produces unchanged self", () => {
+ const s = new Series({ data: [1, 2, 3], index: [0, 1, 2] });
+ const other = new Series({ data: [], index: [] });
+ expect(seriesUpdate(s, other).values).toEqual([1, 2, 3]);
+ });
+
+ test("preserves index and name", () => {
+ const s = new Series({ data: [1, 2], index: ["a", "b"], name: "x" });
+ const other = new Series({ data: [10], index: ["a"] });
+ const result = seriesUpdate(s, other);
+ expect(result.index.values).toEqual(["a", "b"]);
+ expect(result.name).toBe("x");
+ });
+
+ test("NaN in other does not overwrite", () => {
+ const s = new Series({ data: [5, 6, 7], index: [0, 1, 2] });
+ const other = new Series({ data: [Number.NaN, 60, 70], index: [0, 1, 2] });
+ expect(seriesUpdate(s, other).values).toEqual([5, 60, 70]);
+ });
+
+ // property-based
+ test("property: when overwrite=true, non-NA from other always wins", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.oneof(fc.float({ noNaN: true }), fc.constant(null)), { minLength: 1, maxLength: 20 }),
+ fc.array(fc.oneof(fc.float({ noNaN: true }), fc.constant(null)), { minLength: 1, maxLength: 20 }),
+ (arr1, arr2) => {
+ const n = Math.min(arr1.length, arr2.length);
+ const idx = Array.from({ length: n }, (_, i) => i);
+ const s = new Series({ data: arr1.slice(0, n) as Scalar[], index: idx });
+ const other = new Series({ data: arr2.slice(0, n) as Scalar[], index: idx });
+ const result = seriesUpdate(s, other);
+ for (let i = 0; i < n; i++) {
+ const ov = arr2[i];
+ const sv = arr1[i];
+ const rv = result.values[i];
+ if (ov !== null && ov !== undefined) {
+ if (rv !== ov) return false;
+ } else if (sv !== null) {
+ if (rv !== sv) return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+});
+
+describe("dataFrameUpdate", () => {
+ test("basic update: non-NA values from other overwrite self", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [10, 20, 30] });
+ const other = DataFrame.fromColumns({ a: [100, 200, null] });
+ const result = dataFrameUpdate(df, other);
+ expect(result.col("a").values).toEqual([100, 200, 3]);
+ expect(result.col("b").values).toEqual([10, 20, 30]);
+ });
+
+ test("columns in other not in self are ignored", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const other = DataFrame.fromColumns({ a: [10, null, 30], z: [99, 99, 99] });
+ const result = dataFrameUpdate(df, other);
+ expect(result.columns.values).toEqual(["a"]);
+ expect(result.col("a").values).toEqual([10, 2, 30]);
+ });
+
+ test("overwrite=false: fills NA in self", () => {
+ const df = DataFrame.fromColumns({ a: [1, null, 3] });
+ const other = DataFrame.fromColumns({ a: [10, 20, 30] });
+ const result = dataFrameUpdate(df, other, { overwrite: false });
+ expect(result.col("a").values).toEqual([1, 20, 3]);
+ });
+
+ test("empty other returns copy of self", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ const other = DataFrame.fromColumns({});
+ const result = dataFrameUpdate(df, other);
+ expect(result.col("a").values).toEqual([1, 2, 3]);
+ expect(result.col("b").values).toEqual([4, 5, 6]);
+ });
+
+ test("all NA in other preserves self", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const other = DataFrame.fromColumns({ a: [null, null, null] });
+ const result = dataFrameUpdate(df, other);
+ expect(result.col("a").values).toEqual([1, 2, 3]);
+ });
+
+ test("preserves row index", () => {
+ const idx = [10, 20, 30];
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] }, { index: idx });
+ const other = DataFrame.fromColumns({ a: [9, null, 7] }, { index: idx });
+ const result = dataFrameUpdate(df, other);
+ expect(result.index.values).toEqual([10, 20, 30]);
+ expect(result.col("a").values).toEqual([9, 2, 7]);
+ });
+
+ test("multiple columns updated correctly", () => {
+ const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 20, 30] });
+ const other = DataFrame.fromColumns({ a: [10, 20, null], b: [100, null, null] });
+ const result = dataFrameUpdate(df, other);
+ expect(result.col("a").values).toEqual([10, 20, 3]);
+ expect(result.col("b").values).toEqual([100, 20, 30]);
+ });
+});
From 3f99b9352aa1c619969ca03a68b898b88dc64c72 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 22 Apr 2026 07:00:53 +0000
Subject: [PATCH 11/30] Iteration 241: Add combineSeries/combineDataFrame +
keepTrue/keepFalse/filterBy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- src/stats/combine.ts: combineSeries, combineDataFrame (pandas Series/DataFrame.combine)
- src/stats/notna_boolean.ts: keepTrue, keepFalse, filterBy boolean-mask helpers
- Comprehensive tests with property-based coverage for both modules
- Playground pages: combine.html, notna_boolean.html
- Metric: 118 → 120 (+2)
Run: https://github.com/githubnext/tsessebe/actions/runs/24764149736
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/combine.html | 109 +++++++++++++
playground/index.html | 10 ++
playground/notna_boolean.html | 104 ++++++++++++
src/index.ts | 3 +
src/stats/combine.ts | 255 ++++++++++++++++++++++++++++++
src/stats/index.ts | 4 +
src/stats/notna_boolean.ts | 158 ++++++++++++++++++
tests/stats/combine.test.ts | 253 +++++++++++++++++++++++++++++
tests/stats/notna_boolean.test.ts | 207 ++++++++++++++++++++++++
9 files changed, 1103 insertions(+)
create mode 100644 playground/combine.html
create mode 100644 playground/notna_boolean.html
create mode 100644 src/stats/combine.ts
create mode 100644 src/stats/notna_boolean.ts
create mode 100644 tests/stats/combine.test.ts
create mode 100644 tests/stats/notna_boolean.test.ts
diff --git a/playground/combine.html b/playground/combine.html
new file mode 100644
index 00000000..7ef22b7c
--- /dev/null
+++ b/playground/combine.html
@@ -0,0 +1,109 @@
+
+
+
+
+
+ combine — Element-wise Combination — tsb playground
+
+
+
+
combine — Element-wise Combination
+
+ combineSeries(a, b, func) and combineDataFrame(a, b, func)
+ combine two objects element-wise using a caller-supplied binary function.
+ The result index is the union of both indices; a
+ fillValue (default null) is used when only one
+ side has a value for a given label.
+
combineSeries / combineDataFrame — combine two objects element-wise with a caller-supplied binary function. Result index is the union of both indices. Mirrors pandas Series.combine() / DataFrame.combine().
+ Compute pairwise Pearson correlations between a DataFrame and a Series or
+ another DataFrame, and compute the lag-N autocorrelation of a numeric Series.
+ Mirrors pandas.DataFrame.corrwith() and
+ pandas.Series.autocorr().
+
+
+
autoCorr — lag-N autocorrelation
+
Python pandas equivalent:
+
import pandas as pd
+
+s = pd.Series([1, 2, 3, 4, 5])
+print(s.autocorr(lag=1)) # 1.0 (perfectly linear → perfect self-correlation)
+print(s.autocorr(lag=2)) # 1.0
+
+# Alternating sign series → -1 autocorrelation at lag 1
+s2 = pd.Series([1, -1, 1, -1, 1, -1])
+print(s2.autocorr(lag=1)) # -1.0
+
+# NaN when constant (zero variance)
+s3 = pd.Series([5, 5, 5, 5])
+print(s3.autocorr(lag=1)) # NaN
+
corrWith — DataFrame correlated with another DataFrame
+
Python pandas equivalent:
+
import pandas as pd
+import numpy as np
+
+df1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+df2 = pd.DataFrame({"A": [1, 2, 3], "B": [6, 5, 4]})
+
+print(df1.corrwith(df2))
+# A 1.0
+# B -1.0
+
+# Columns in only one DataFrame get NaN (drop=False default)
+df3 = pd.DataFrame({"A": [1, 2, 3], "C": [7, 8, 9]})
+print(df1.corrwith(df3))
+# A 1.0
+# B NaN
+# C NaN
+
+# drop=True keeps only common columns
+print(df1.corrwith(df3, drop=True))
+# A 1.0
+
corrWith / autoCorr — compute pairwise Pearson correlations between a DataFrame and a Series or DataFrame, and compute lag-N autocorrelation for a Series. Mirrors pandas DataFrame.corrwith() and Series.autocorr().
+ Utilities to extract scalar values from Series and DataFrames.
+ Mirrors pandas.Series.squeeze(), Series.item(),
+ Series.bool(), Series.first_valid_index(),
+ Series.last_valid_index(), and their DataFrame equivalents.
+
+
+
squeezeSeries — extract scalar from a single-element Series
+
+
+
+
diff --git a/src/index.ts b/src/index.ts
index 26d1d472..e62534ac 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -581,3 +581,17 @@ export type { FilterLabelsOptions } from "./stats/index.ts";
export { combineSeries, combineDataFrame } from "./stats/index.ts";
export type { CombineDataFrameOptions } from "./stats/index.ts";
export { keepTrue, keepFalse, filterBy } from "./stats/index.ts";
+export {
+ squeezeSeries,
+ squeezeDataFrame,
+ itemSeries,
+ boolSeries,
+ boolDataFrame,
+ firstValidIndex,
+ lastValidIndex,
+ dataFrameFirstValidIndex,
+ dataFrameLastValidIndex,
+} from "./stats/index.ts";
+export type { SqueezeResult } from "./stats/index.ts";
+export { autoCorr, corrWith } from "./stats/index.ts";
+export type { CorrWithOptions } from "./stats/index.ts";
diff --git a/src/stats/corrwith.ts b/src/stats/corrwith.ts
new file mode 100644
index 00000000..3910a763
--- /dev/null
+++ b/src/stats/corrwith.ts
@@ -0,0 +1,272 @@
+/**
+ * corrwith — pairwise correlation of a DataFrame with a Series or another DataFrame.
+ * autocorr — lag-N autocorrelation for a numeric Series.
+ *
+ * Mirrors:
+ * - `pandas.Series.autocorr(lag=1)` — Pearson correlation of the Series with
+ * itself shifted by `lag` positions (positional shift, not label-aligned).
+ * - `pandas.DataFrame.corrwith(other, axis=0, drop=False, method="pearson")` —
+ * compute the pairwise column-wise (or row-wise) Pearson correlation between
+ * a DataFrame and a Series or another DataFrame.
+ *
+ * ### autoCorr
+ *
+ * The autocorrelation at lag `k` is `pearsonCorr(s, s.shift(k))`. The shift
+ * is positional — i.e. the first `k` elements of the shifted copy become `null`
+ * (dropped from the correlation computation). This matches pandas' behaviour.
+ *
+ * ### corrWith
+ *
+ * When `other` is a **Series** (axis=0):
+ * - Each *column* of `df` is correlated with `other` using label alignment.
+ * - The result is a Series indexed by the column names of `df`.
+ *
+ * When `other` is a **DataFrame** (axis=0):
+ * - Columns present in both DataFrames are correlated pairwise.
+ * - If `drop=false` (default), columns present in only one DataFrame receive
+ * `NaN` in the result. If `drop=true`, those columns are omitted.
+ * - The result is a Series indexed by the union (or intersection) of column
+ * names.
+ *
+ * When `axis=1` the same logic applies along rows instead of columns.
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+import { pearsonCorr } from "./corr.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link corrWith}. */
+export interface CorrWithOptions {
+ /**
+ * Axis along which to align and correlate.
+ * - `0` / `"index"` (default): correlate columns
+ * - `1` / `"columns"`: correlate rows
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+ /**
+ * When `true`, drop columns/rows that appear in only one of the two objects.
+ * When `false` (default), those labels receive `NaN`.
+ */
+ readonly drop?: boolean;
+ /**
+ * Minimum number of non-NaN observation pairs required to compute a valid
+ * correlation. Defaults to `1`.
+ */
+ readonly minPeriods?: number;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True iff `v` is null, undefined, or NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Transpose a DataFrame — rows become columns, columns become rows. */
+function transpose(df: DataFrame): DataFrame {
+ const rowLabels = df.index.toArray();
+ const colLabels = df.columns.toArray();
+
+ const newCols: Record = {};
+ for (const r of rowLabels) {
+ newCols[String(r)] = [];
+ }
+ for (const col of colLabels) {
+ const vals = df.col(col).values;
+ for (let i = 0; i < rowLabels.length; i++) {
+ const r = rowLabels[i];
+ if (r !== null && r !== undefined) {
+ const arr = newCols[String(r)];
+ if (arr !== undefined) {
+ const v = vals[i];
+ arr.push(v !== undefined ? v : null);
+ }
+ }
+ }
+ }
+ return DataFrame.fromColumns(newCols, { index: colLabels });
+}
+
+// ─── autoCorr ─────────────────────────────────────────────────────────────────
+
+/**
+ * Compute the lag-N autocorrelation of a numeric Series.
+ *
+ * The autocorrelation at lag `k` is the Pearson correlation coefficient
+ * between the Series and the same Series shifted by `k` positions.
+ * The first `k` values of the shifted copy are `null` (excluded from
+ * the correlation).
+ *
+ * Returns `NaN` when:
+ * - There are fewer than 2 valid observation pairs.
+ * - All valid values are identical (zero variance).
+ *
+ * Mirrors `pandas.Series.autocorr(lag=1)`.
+ *
+ * @param s - Input numeric Series.
+ * @param lag - Shift amount (default `1`). Must be a non-negative integer.
+ *
+ * @example
+ * ```ts
+ * import { Series, autoCorr } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * autoCorr(s); // lag=1 → 1.0 (perfectly correlated with itself)
+ * autoCorr(s, 0); // lag=0 → 1.0
+ * autoCorr(s, 2); // lag=2 → 1.0
+ * ```
+ */
+export function autoCorr(s: Series, lag = 1): number {
+ if (lag < 0 || !Number.isInteger(lag)) {
+ throw new RangeError(`autoCorr: lag must be a non-negative integer, got ${lag}`);
+ }
+
+ if (lag === 0) {
+ // lag=0 → corr with itself = 1 if any valid value exists
+ for (const v of s.values) {
+ if (!isMissing(v !== undefined ? v : null)) {
+ return 1;
+ }
+ }
+ return Number.NaN;
+ }
+
+ const vals = s.values;
+ const n = vals.length;
+ if (lag >= n) {
+ return Number.NaN;
+ }
+
+ // Collect aligned (original[i], original[i-lag]) pairs — drop if either is NA
+ const xs: number[] = [];
+ const ys: number[] = [];
+ for (let i = lag; i < n; i++) {
+ const rawA = vals[i];
+ const rawB = vals[i - lag];
+ const a: Scalar = rawA !== undefined ? rawA : null;
+ const b: Scalar = rawB !== undefined ? rawB : null;
+ if (isMissing(a) || isMissing(b)) {
+ continue;
+ }
+ if (typeof a !== "number" || typeof b !== "number") {
+ continue;
+ }
+ xs.push(a);
+ ys.push(b);
+ }
+
+ if (xs.length < 2) {
+ return Number.NaN;
+ }
+
+ const meanX = xs.reduce((acc, v) => acc + v, 0) / xs.length;
+ const meanY = ys.reduce((acc, v) => acc + v, 0) / ys.length;
+ let num = 0;
+ let varX = 0;
+ let varY = 0;
+ for (let i = 0; i < xs.length; i++) {
+ const dx = (xs[i] as number) - meanX;
+ const dy = (ys[i] as number) - meanY;
+ num += dx * dy;
+ varX += dx * dx;
+ varY += dy * dy;
+ }
+ const denom = Math.sqrt(varX * varY);
+ return denom === 0 ? Number.NaN : num / denom;
+}
+
+// ─── corrWith ─────────────────────────────────────────────────────────────────
+
+/**
+ * Compute the pairwise Pearson correlation of `df` columns with a Series or
+ * another DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.corrwith(other, axis=0, drop=False, method="pearson")`.
+ *
+ * **When `other` is a Series (axis=0):**
+ * Each column of `df` is correlated individually with `other` using
+ * label-based alignment. The result is a Series indexed by `df`'s column
+ * names.
+ *
+ * **When `other` is a DataFrame (axis=0):**
+ * Columns present in both DataFrames are correlated pairwise. Columns
+ * appearing in only one are set to `NaN` unless `drop=true`, in which case
+ * they are excluded from the result.
+ *
+ * **axis=1:**
+ * The same logic applies along rows. Each *row* of `df` is correlated with
+ * the corresponding element in `other` (by row-label alignment). The result
+ * is a Series indexed by `df`'s row index.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, Series, corrWith } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: [1, 2, 3, 4, 5],
+ * B: [5, 4, 3, 2, 1],
+ * });
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * corrWith(df, s).values;
+ * // A → 1.0, B → -1.0
+ * ```
+ */
+export function corrWith(
+ df: DataFrame,
+ other: DataFrame | Series,
+ options: CorrWithOptions = {},
+): Series {
+ const axis = options.axis === 1 || options.axis === "columns" ? 1 : 0;
+ const drop = options.drop ?? false;
+ const minPeriods = options.minPeriods ?? 1;
+
+ const dfWork = axis === 1 ? transpose(df) : df;
+
+ if (other instanceof Series) {
+ return _corrWithSeries(dfWork, other, minPeriods);
+ }
+
+ const otherWork = axis === 1 ? transpose(other) : other;
+ return _corrWithDataFrame(dfWork, otherWork, drop, minPeriods);
+}
+
+/** Correlate each column of `df` with a single Series. */
+function _corrWithSeries(
+ df: DataFrame,
+ other: Series,
+ minPeriods: number,
+): Series {
+ const cols = df.columns.toArray();
+ const results: Scalar[] = cols.map((c) =>
+ pearsonCorr(df.col(c), other, { minPeriods }),
+ );
+ return new Series({ data: results, index: new Index(cols) });
+}
+
+/** Correlate each common column of `df` with the matching column of `other`. */
+function _corrWithDataFrame(
+ df: DataFrame,
+ other: DataFrame,
+ drop: boolean,
+ minPeriods: number,
+): Series {
+ const dfCols = new Set(df.columns.toArray());
+ const otherCols = new Set(other.columns.toArray());
+
+ const allCols = drop
+ ? [...dfCols].filter((c) => otherCols.has(c))
+ : [...new Set([...dfCols, ...otherCols])];
+
+ const results: Scalar[] = allCols.map((c) => {
+ if (!dfCols.has(c) || !otherCols.has(c)) {
+ return Number.NaN;
+ }
+ return pearsonCorr(df.col(c), other.col(c), { minPeriods });
+ });
+
+ return new Series({ data: results, index: new Index(allCols) });
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index 0ebd74c0..ac11dfc3 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -410,3 +410,17 @@ export type { FilterLabelsOptions } from "./filter_labels.ts";
export { combineSeries, combineDataFrame } from "./combine.ts";
export type { CombineDataFrameOptions } from "./combine.ts";
export { keepTrue, keepFalse, filterBy } from "./notna_boolean.ts";
+export {
+ squeezeSeries,
+ squeezeDataFrame,
+ itemSeries,
+ boolSeries,
+ boolDataFrame,
+ firstValidIndex,
+ lastValidIndex,
+ dataFrameFirstValidIndex,
+ dataFrameLastValidIndex,
+} from "./scalar_extract.ts";
+export type { SqueezeResult } from "./scalar_extract.ts";
+export { autoCorr, corrWith } from "./corrwith.ts";
+export type { CorrWithOptions } from "./corrwith.ts";
diff --git a/src/stats/scalar_extract.ts b/src/stats/scalar_extract.ts
new file mode 100644
index 00000000..a2de5458
--- /dev/null
+++ b/src/stats/scalar_extract.ts
@@ -0,0 +1,331 @@
+/**
+ * scalar_extract — extract scalar / Series values from Series and DataFrame.
+ *
+ * Mirrors several pandas scalar-extraction utilities:
+ * - `Series.squeeze()` — return scalar if length == 1, else self
+ * - `DataFrame.squeeze(axis?)` — squeeze 1-D axis objects into scalars/Series
+ * - `Series.item()` — return the single element (throws if size != 1)
+ * - `Series.bool()` — return bool of single-element Series
+ * - `DataFrame.bool()` — return bool of single-element DataFrame
+ * - `Series.first_valid_index()` — index label of first non-NA value or null
+ * - `Series.last_valid_index()` — index label of last non-NA value or null
+ * - `DataFrame.first_valid_index()` — label of first row with any non-NA value
+ * - `DataFrame.last_valid_index()` — label of last row with any non-NA value
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Axis, Label, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Safely read values[i] with noUncheckedIndexedAccess. */
+function atVal(arr: readonly T[], i: number): T | null {
+ const v = arr[i];
+ return v !== undefined ? v : null;
+}
+
+// ─── squeeze ──────────────────────────────────────────────────────────────────
+
+/**
+ * Return the single element of a one-element Series as a scalar.
+ * If the Series has more than one element, return the Series unchanged.
+ *
+ * Mirrors `pandas.Series.squeeze()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, squeezeSeries } from "tsb";
+ *
+ * squeezeSeries(new Series({ data: [42] })); // 42
+ * squeezeSeries(new Series({ data: [1, 2] })); // Series([1, 2])
+ * ```
+ */
+export function squeezeSeries(s: Series): Scalar | Series {
+ if (s.size === 1) {
+ return atVal(s.values, 0);
+ }
+ return s;
+}
+
+/**
+ * Result type for {@link squeezeDataFrame}.
+ *
+ * - `scalar` — returned when the DataFrame is 1×1 and `axis` is not specified
+ * - `series` — returned when one axis has size 1
+ * - `dataframe` — returned when neither axis has size 1
+ */
+export type SqueezeResult = Scalar | Series | DataFrame;
+
+/**
+ * Squeeze 1-D axis objects from a DataFrame into a scalar or Series.
+ *
+ * Mirrors `pandas.DataFrame.squeeze(axis?)`:
+ * - `axis=undefined` (default): squeeze as many dimensions as possible.
+ * - 1 row AND 1 col → scalar
+ * - 1 row only → the single row as a Series (indexed by column names)
+ * - 1 col only → the single column as a Series (indexed by row labels)
+ * - Otherwise → DataFrame unchanged
+ * - `axis=0` / `"index"`: squeeze rows. If 1 row → Series; else → DataFrame.
+ * - `axis=1` / `"columns"`: squeeze columns. If 1 col → Series; else → DataFrame.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, squeezeDataFrame } from "tsb";
+ *
+ * const df1x1 = DataFrame.fromColumns({ A: [10] });
+ * squeezeDataFrame(df1x1); // 10
+ *
+ * const df1xN = DataFrame.fromColumns({ A: [1], B: [2] });
+ * squeezeDataFrame(df1xN); // Series([1, 2], index=["A", "B"])
+ *
+ * const dfNx1 = DataFrame.fromColumns({ A: [1, 2, 3] });
+ * squeezeDataFrame(dfNx1); // Series([1, 2, 3])
+ * ```
+ */
+export function squeezeDataFrame(df: DataFrame, axis?: Axis): SqueezeResult {
+ const [nRows, nCols] = df.shape;
+ const normalAxis = axis === "index" ? 0 : axis === "columns" ? 1 : axis;
+
+ if (normalAxis === 0) {
+ if (nRows === 1) {
+ return _rowSeries(df, 0);
+ }
+ return df;
+ }
+
+ if (normalAxis === 1) {
+ if (nCols === 1) {
+ return df.col(df.columns.at(0));
+ }
+ return df;
+ }
+
+ // axis === undefined — squeeze as many dimensions as possible
+ if (nRows === 1 && nCols === 1) {
+ const s = df.col(df.columns.at(0));
+ return atVal(s.values, 0);
+ }
+ if (nRows === 1) {
+ return _rowSeries(df, 0);
+ }
+ if (nCols === 1) {
+ return df.col(df.columns.at(0));
+ }
+ return df;
+}
+
+/** Extract row `i` as a Series indexed by column names. */
+function _rowSeries(df: DataFrame, row: number): Series {
+ const colLabels = df.columns.toArray();
+ const values: Scalar[] = colLabels.map((c) => atVal(df.col(c).values, row));
+ return new Series({ data: values, index: new Index(colLabels) });
+}
+
+// ─── item ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Return the single element of a Series as a scalar value.
+ *
+ * Throws a `RangeError` if the Series does not have exactly one element.
+ *
+ * Mirrors `pandas.Series.item()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, itemSeries } from "tsb";
+ *
+ * itemSeries(new Series({ data: [7] })); // 7
+ * ```
+ */
+export function itemSeries(s: Series): Scalar {
+ if (s.size !== 1) {
+ throw new RangeError(
+ `itemSeries: Series must have exactly 1 element, got ${s.size}`,
+ );
+ }
+ return atVal(s.values, 0);
+}
+
+// ─── bool ─────────────────────────────────────────────────────────────────────
+
+/**
+ * Return the boolean value of a single-element Series.
+ *
+ * Throws if the Series does not contain exactly one element, or if that
+ * element is null/undefined.
+ *
+ * Mirrors `pandas.Series.bool()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, boolSeries } from "tsb";
+ *
+ * boolSeries(new Series({ data: [1] })); // true
+ * boolSeries(new Series({ data: [0] })); // false
+ * boolSeries(new Series({ data: [true] })); // true
+ * ```
+ */
+export function boolSeries(s: Series): boolean {
+ if (s.size !== 1) {
+ throw new RangeError(
+ `boolSeries: only a single-element Series can be converted to a scalar boolean, got size ${s.size}`,
+ );
+ }
+ const v = atVal(s.values, 0);
+ if (v === null || v === undefined) {
+ throw new TypeError("boolSeries: element is null/undefined — cannot convert to bool");
+ }
+ return Boolean(v);
+}
+
+/**
+ * Return the boolean value of a single-element (1×1) DataFrame.
+ *
+ * Throws if the DataFrame shape is not exactly 1×1.
+ *
+ * Mirrors `pandas.DataFrame.bool()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, boolDataFrame } from "tsb";
+ *
+ * boolDataFrame(DataFrame.fromColumns({ A: [1] })); // true
+ * boolDataFrame(DataFrame.fromColumns({ A: [false] })); // false
+ * ```
+ */
+export function boolDataFrame(df: DataFrame): boolean {
+ const [nRows, nCols] = df.shape;
+ if (nRows !== 1 || nCols !== 1) {
+ throw new RangeError(
+ `boolDataFrame: only a 1×1 DataFrame can be converted to a scalar boolean, got shape [${nRows}, ${nCols}]`,
+ );
+ }
+ const s = df.col(df.columns.at(0));
+ const v = atVal(s.values, 0);
+ if (v === null || v === undefined) {
+ throw new TypeError("boolDataFrame: element is null/undefined — cannot convert to bool");
+ }
+ return Boolean(v);
+}
+
+// ─── first/last valid index ───────────────────────────────────────────────────
+
+/**
+ * Return the index label of the first non-NA value in a Series.
+ * Returns `null` if all values are NA (null / undefined / NaN).
+ *
+ * Mirrors `pandas.Series.first_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, firstValidIndex } from "tsb";
+ *
+ * firstValidIndex(new Series({ data: [null, NaN, 3, 4], index: ["a","b","c","d"] }));
+ * // "c"
+ * firstValidIndex(new Series({ data: [null, null] }));
+ * // null
+ * ```
+ */
+export function firstValidIndex(s: Series): Label | null {
+ for (let i = 0; i < s.size; i++) {
+ const v = atVal(s.values, i);
+ if (!isMissing(v)) {
+ return s.index.at(i);
+ }
+ }
+ return null;
+}
+
+/**
+ * Return the index label of the last non-NA value in a Series.
+ * Returns `null` if all values are NA.
+ *
+ * Mirrors `pandas.Series.last_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { Series, lastValidIndex } from "tsb";
+ *
+ * lastValidIndex(new Series({ data: [1, 2, null, null], index: ["a","b","c","d"] }));
+ * // "b"
+ * ```
+ */
+export function lastValidIndex(s: Series): Label | null {
+ for (let i = s.size - 1; i >= 0; i--) {
+ const v = atVal(s.values, i);
+ if (!isMissing(v)) {
+ return s.index.at(i);
+ }
+ }
+ return null;
+}
+
+/**
+ * Return the row index label of the first row that contains at least one
+ * non-NA value across all columns.
+ * Returns `null` if every value in the DataFrame is NA.
+ *
+ * Mirrors `pandas.DataFrame.first_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameFirstValidIndex } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: [null, null, 1],
+ * B: [null, 2, 3],
+ * });
+ * dataFrameFirstValidIndex(df); // 1 (row 1 has B=2)
+ * ```
+ */
+export function dataFrameFirstValidIndex(df: DataFrame): Label | null {
+ const [nRows] = df.shape;
+ const colNames = df.columns.toArray();
+ for (let i = 0; i < nRows; i++) {
+ for (const col of colNames) {
+ const v = atVal(df.col(col).values, i);
+ if (!isMissing(v)) {
+ return df.index.at(i);
+ }
+ }
+ }
+ return null;
+}
+
+/**
+ * Return the row index label of the last row that contains at least one
+ * non-NA value across all columns.
+ * Returns `null` if every value in the DataFrame is NA.
+ *
+ * Mirrors `pandas.DataFrame.last_valid_index()`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameLastValidIndex } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * A: [1, null, null],
+ * B: [2, 3, null],
+ * });
+ * dataFrameLastValidIndex(df); // 1 (row 1 has B=3)
+ * ```
+ */
+export function dataFrameLastValidIndex(df: DataFrame): Label | null {
+ const [nRows] = df.shape;
+ const colNames = df.columns.toArray();
+ for (let i = nRows - 1; i >= 0; i--) {
+ for (const col of colNames) {
+ const v = atVal(df.col(col).values, i);
+ if (!isMissing(v)) {
+ return df.index.at(i);
+ }
+ }
+ }
+ return null;
+}
diff --git a/tests/stats/corrwith.test.ts b/tests/stats/corrwith.test.ts
new file mode 100644
index 00000000..ee75820d
--- /dev/null
+++ b/tests/stats/corrwith.test.ts
@@ -0,0 +1,256 @@
+/**
+ * Tests for corrwith — autoCorr and corrWith.
+ */
+
+import { describe, expect, it } from "bun:test";
+import * as fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ autoCorr,
+ corrWith,
+ pearsonCorr,
+} from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Round to `d` decimal places. */
+function round(v: number, d = 8): number {
+ const f = 10 ** d;
+ return Math.round(v * f) / f;
+}
+
+// ─── autoCorr ─────────────────────────────────────────────────────────────────
+
+describe("autoCorr", () => {
+ it("lag=0 returns 1 for a non-constant Series", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(autoCorr(s, 0)).toBe(1);
+ });
+
+ it("lag=1 returns 1 for a perfectly linearly increasing Series", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(round(autoCorr(s, 1))).toBe(1);
+ });
+
+ it("lag=1 for alternating Series returns -1", () => {
+ const s = new Series({ data: [1, -1, 1, -1, 1, -1, 1, -1] });
+ expect(round(autoCorr(s, 1))).toBe(-1);
+ });
+
+ it("lag >= length returns NaN", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(Number.isNaN(autoCorr(s, 3))).toBe(true);
+ expect(Number.isNaN(autoCorr(s, 10))).toBe(true);
+ });
+
+ it("lag=0 default is 1 (uses default parameter)", () => {
+ const s = new Series({ data: [10, 20, 30] });
+ // default lag=1 for linearly increasing → 1
+ expect(round(autoCorr(s))).toBe(1);
+ });
+
+ it("returns NaN for constant Series (zero variance)", () => {
+ const s = new Series({ data: [5, 5, 5, 5, 5] });
+ expect(Number.isNaN(autoCorr(s, 1))).toBe(true);
+ });
+
+ it("ignores NA values in both positions", () => {
+ // [1, null, 3, null, 5] shifted by 1: [null, 1, null, 3, null]
+ // valid pairs (positional): (3,1), (5,3) — both positive → positive corr
+ const s = new Series({ data: [1, null, 3, null, 5] });
+ const r = autoCorr(s, 1);
+ expect(Number.isNaN(r) || r > 0).toBe(true);
+ });
+
+ it("throws RangeError for negative lag", () => {
+ expect(() => autoCorr(new Series({ data: [1, 2, 3] }), -1)).toThrow(RangeError);
+ });
+
+ it("throws RangeError for non-integer lag", () => {
+ expect(() => autoCorr(new Series({ data: [1, 2, 3] }), 1.5)).toThrow(RangeError);
+ });
+
+ it("returns NaN for all-NA Series", () => {
+ const s = new Series({ data: [null, null, null] });
+ expect(Number.isNaN(autoCorr(s, 1))).toBe(true);
+ });
+
+ it("property: lag=0 autocorr is always 1 for non-empty numeric Series", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 1,
+ maxLength: 10,
+ }),
+ (arr) => {
+ const s = new Series({ data: arr });
+ const r = autoCorr(s, 0);
+ return r === 1;
+ },
+ ),
+ );
+ });
+
+ it("property: autocorr result is in [-1, 1] or NaN", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 3,
+ maxLength: 20,
+ }),
+ fc.integer({ min: 0, max: 5 }),
+ (arr, lag) => {
+ const s = new Series({ data: arr });
+ const r = autoCorr(s, lag);
+ return Number.isNaN(r) || (r >= -1 - 1e-9 && r <= 1 + 1e-9);
+ },
+ ),
+ );
+ });
+});
+
+// ─── corrWith (Series) ────────────────────────────────────────────────────────
+
+describe("corrWith – with Series", () => {
+ it("each column is correlated with the Series", () => {
+ const df = DataFrame.fromColumns({
+ A: [1, 2, 3, 4, 5],
+ B: [5, 4, 3, 2, 1],
+ });
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = corrWith(df, s);
+
+ expect(result).toBeInstanceOf(Series);
+ expect(result.index.toArray()).toEqual(["A", "B"]);
+ expect(round(result.values[0] as number)).toBe(1); // A vs [1..5] → 1
+ expect(round(result.values[1] as number)).toBe(-1); // B vs [1..5] → -1
+ });
+
+ it("result is indexed by df column names", () => {
+ const df = DataFrame.fromColumns({ X: [1, 2], Y: [3, 4], Z: [5, 6] });
+ const s = new Series({ data: [1, 2] });
+ const r = corrWith(df, s);
+ expect(r.index.toArray()).toEqual(["X", "Y", "Z"]);
+ });
+
+ it("uncorrelated columns return NaN", () => {
+ // Column of constants vs non-constant Series → NaN (zero variance)
+ const df = DataFrame.fromColumns({ A: [1, 1, 1, 1] });
+ const s = new Series({ data: [1, 2, 3, 4] });
+ const r = corrWith(df, s);
+ expect(Number.isNaN(r.values[0] as number)).toBe(true);
+ });
+
+ it("matches pearsonCorr called individually for each column", () => {
+ const df = DataFrame.fromColumns({
+ A: [2, 4, 6, 8],
+ B: [8, 6, 4, 2],
+ });
+ const s = new Series({ data: [1, 2, 3, 4] });
+ const r = corrWith(df, s);
+ const expectedA = pearsonCorr(df.col("A"), s);
+ const expectedB = pearsonCorr(df.col("B"), s);
+ expect(round(r.values[0] as number)).toBe(round(expectedA));
+ expect(round(r.values[1] as number)).toBe(round(expectedB));
+ });
+});
+
+// ─── corrWith (DataFrame) ─────────────────────────────────────────────────────
+
+describe("corrWith – with DataFrame", () => {
+ it("correlates common columns pairwise", () => {
+ const df1 = DataFrame.fromColumns({ A: [1, 2, 3], B: [4, 5, 6] });
+ const df2 = DataFrame.fromColumns({ A: [1, 2, 3], B: [6, 5, 4] });
+ const r = corrWith(df1, df2);
+
+ expect(r.index.toArray()).toEqual(["A", "B"]);
+ expect(round(r.values[0] as number)).toBe(1); // A vs A → 1
+ expect(round(r.values[1] as number)).toBe(-1); // B vs B (inverted) → -1
+ });
+
+ it("columns in only one DataFrame get NaN by default (drop=false)", () => {
+ const df1 = DataFrame.fromColumns({ A: [1, 2, 3], B: [4, 5, 6] });
+ const df2 = DataFrame.fromColumns({ A: [1, 2, 3], C: [7, 8, 9] });
+ const r = corrWith(df1, df2, { drop: false });
+
+ const idx = r.index.toArray();
+ // Union should have A, B, C
+ expect(idx).toContain("A");
+ expect(idx).toContain("B");
+ expect(idx).toContain("C");
+
+ const bVal = r.values[idx.indexOf("B")] as number;
+ const cVal = r.values[idx.indexOf("C")] as number;
+ expect(Number.isNaN(bVal)).toBe(true); // B not in df2
+ expect(Number.isNaN(cVal)).toBe(true); // C not in df1
+ });
+
+ it("drop=true keeps only common columns", () => {
+ const df1 = DataFrame.fromColumns({ A: [1, 2, 3], B: [4, 5, 6] });
+ const df2 = DataFrame.fromColumns({ A: [1, 2, 3], C: [7, 8, 9] });
+ const r = corrWith(df1, df2, { drop: true });
+
+ expect(r.index.toArray()).toEqual(["A"]);
+ expect(round(r.values[0] as number)).toBe(1);
+ });
+
+ it("empty intersection with drop=true returns empty Series", () => {
+ const df1 = DataFrame.fromColumns({ A: [1, 2] });
+ const df2 = DataFrame.fromColumns({ B: [3, 4] });
+ const r = corrWith(df1, df2, { drop: true });
+ expect(r.size).toBe(0);
+ });
+
+ it("property: correlating a DataFrame with itself on all columns returns 1 or NaN", () => {
+ fc.assert(
+ fc.property(
+ fc.array(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 3,
+ maxLength: 5,
+ }),
+ { minLength: 1, maxLength: 3 },
+ ),
+ (cols) => {
+ const data: Record = {};
+ for (let i = 0; i < cols.length; i++) {
+ const c = cols[i];
+ if (c !== undefined) {
+ data[`c${i}`] = c;
+ }
+ }
+ const df = DataFrame.fromColumns(data);
+ const r = corrWith(df, df, { drop: true });
+ for (const v of r.values) {
+ const n = v as number;
+ if (!Number.isNaN(n) && !(Math.abs(n - 1) < 1e-9)) {
+ return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+});
+
+// ─── corrWith axis=1 ─────────────────────────────────────────────────────────
+
+describe("corrWith – axis=1", () => {
+ it("correlates rows (axis=1) with a Series", () => {
+ // Transpose perspective: rows become "columns" for correlation
+ const df = DataFrame.fromColumns({
+ A: [1, 2],
+ B: [2, 4],
+ C: [3, 6],
+ });
+ const s = new Series({ data: [1, 2, 3] });
+ const r = corrWith(df, s, { axis: 1 });
+ // Row 0: [1, 2, 3] vs [1, 2, 3] → 1
+ // Row 1: [2, 4, 6] vs [1, 2, 3] → 1
+ expect(r.size).toBe(2);
+ expect(round(r.values[0] as number)).toBe(1);
+ expect(round(r.values[1] as number)).toBe(1);
+ });
+});
diff --git a/tests/stats/scalar_extract.test.ts b/tests/stats/scalar_extract.test.ts
new file mode 100644
index 00000000..fd65dad9
--- /dev/null
+++ b/tests/stats/scalar_extract.test.ts
@@ -0,0 +1,366 @@
+/**
+ * Tests for scalar_extract — squeeze, item, bool, firstValidIndex, lastValidIndex.
+ */
+
+import { describe, expect, it } from "bun:test";
+import * as fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ boolDataFrame,
+ boolSeries,
+ dataFrameFirstValidIndex,
+ dataFrameLastValidIndex,
+ firstValidIndex,
+ itemSeries,
+ lastValidIndex,
+ squeezeSeries,
+ squeezeDataFrame,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── squeezeSeries ────────────────────────────────────────────────────────────
+
+describe("squeezeSeries", () => {
+ it("returns scalar when Series has 1 element", () => {
+ expect(squeezeSeries(new Series({ data: [42] }))).toBe(42);
+ expect(squeezeSeries(new Series({ data: ["hello"] }))).toBe("hello");
+ expect(squeezeSeries(new Series({ data: [null] }))).toBe(null);
+ });
+
+ it("returns the Series unchanged when length > 1", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const result = squeezeSeries(s);
+ expect(result).toBeInstanceOf(Series);
+ expect((result as Series).values).toEqual([1, 2, 3]);
+ });
+
+ it("returns null for single-element Series with undefined value", () => {
+ // Underlying array has undefined coerced to null
+ const s = new Series({ data: [undefined] });
+ expect(squeezeSeries(s)).toBeNull();
+ });
+
+ it("property: single-element Series always returns a scalar", () => {
+ fc.assert(
+ fc.property(fc.float({ noNaN: true }), (v) => {
+ const result = squeezeSeries(new Series({ data: [v] }));
+ return typeof result === "number";
+ }),
+ );
+ });
+
+ it("property: multi-element Series always returns a Series", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 2, maxLength: 10 }), (arr) => {
+ const result = squeezeSeries(new Series({ data: arr }));
+ return result instanceof Series;
+ }),
+ );
+ });
+});
+
+// ─── squeezeDataFrame ─────────────────────────────────────────────────────────
+
+describe("squeezeDataFrame", () => {
+ it("1×1 DataFrame with no axis → scalar", () => {
+ const df = DataFrame.fromColumns({ A: [99] });
+ expect(squeezeDataFrame(df)).toBe(99);
+ });
+
+ it("1×N DataFrame with no axis → Series indexed by column names", () => {
+ const df = DataFrame.fromColumns({ A: [1], B: [2], C: [3] });
+ const result = squeezeDataFrame(df);
+ expect(result).toBeInstanceOf(Series);
+ const s = result as Series;
+ expect([...s.values]).toEqual([1, 2, 3]);
+ expect(s.index.toArray()).toEqual(["A", "B", "C"]);
+ });
+
+ it("N×1 DataFrame with no axis → Series indexed by row labels", () => {
+ const df = DataFrame.fromColumns({ A: [10, 20, 30] });
+ const result = squeezeDataFrame(df);
+ expect(result).toBeInstanceOf(Series);
+ expect([...(result as Series).values]).toEqual([10, 20, 30]);
+ });
+
+ it("N×M DataFrame with no axis → DataFrame unchanged", () => {
+ const df = DataFrame.fromColumns({ A: [1, 2], B: [3, 4] });
+ expect(squeezeDataFrame(df)).toBeInstanceOf(DataFrame);
+ });
+
+ it("axis=0: 1-row DataFrame → Series", () => {
+ const df = DataFrame.fromColumns({ X: [5], Y: [6] });
+ const result = squeezeDataFrame(df, 0);
+ expect(result).toBeInstanceOf(Series);
+ expect([...(result as Series).values]).toEqual([5, 6]);
+ });
+
+ it("axis=0: multi-row DataFrame → DataFrame unchanged", () => {
+ const df = DataFrame.fromColumns({ A: [1, 2] });
+ expect(squeezeDataFrame(df, 0)).toBeInstanceOf(DataFrame);
+ });
+
+ it("axis=1: 1-col DataFrame → Series", () => {
+ const df = DataFrame.fromColumns({ A: [7, 8, 9] });
+ const result = squeezeDataFrame(df, 1);
+ expect(result).toBeInstanceOf(Series);
+ expect([...(result as Series).values]).toEqual([7, 8, 9]);
+ });
+
+ it("axis=1: multi-col DataFrame → DataFrame unchanged", () => {
+ const df = DataFrame.fromColumns({ A: [1, 2], B: [3, 4] });
+ expect(squeezeDataFrame(df, 1)).toBeInstanceOf(DataFrame);
+ });
+
+ it("axis='index' behaves like axis=0", () => {
+ const df = DataFrame.fromColumns({ X: [1], Y: [2] });
+ const r0 = squeezeDataFrame(df, 0);
+ const rStr = squeezeDataFrame(df, "index");
+ expect((r0 as Series).values).toEqual((rStr as Series).values);
+ });
+
+ it("axis='columns' behaves like axis=1", () => {
+ const df = DataFrame.fromColumns({ A: [10, 20] });
+ const r1 = squeezeDataFrame(df, 1);
+ const rStr = squeezeDataFrame(df, "columns");
+ expect((r1 as Series).values).toEqual((rStr as Series).values);
+ });
+});
+
+// ─── itemSeries ───────────────────────────────────────────────────────────────
+
+describe("itemSeries", () => {
+ it("returns single element as scalar", () => {
+ expect(itemSeries(new Series({ data: [7] }))).toBe(7);
+ expect(itemSeries(new Series({ data: ["x"] }))).toBe("x");
+ expect(itemSeries(new Series({ data: [true] }))).toBe(true);
+ expect(itemSeries(new Series({ data: [null] }))).toBeNull();
+ });
+
+ it("throws RangeError for empty Series", () => {
+ expect(() => itemSeries(new Series({ data: [] }))).toThrow(RangeError);
+ });
+
+ it("throws RangeError for multi-element Series", () => {
+ expect(() => itemSeries(new Series({ data: [1, 2] }))).toThrow(RangeError);
+ expect(() => itemSeries(new Series({ data: [1, 2, 3] }))).toThrow(RangeError);
+ });
+
+ it("property: itemSeries always equals squeezeSeries for single-element Series", () => {
+ fc.assert(
+ fc.property(fc.float({ noNaN: true }), (v) => {
+ const s = new Series({ data: [v] });
+ return itemSeries(s) === squeezeSeries(s);
+ }),
+ );
+ });
+});
+
+// ─── boolSeries ───────────────────────────────────────────────────────────────
+
+describe("boolSeries", () => {
+ it("truthy values → true", () => {
+ expect(boolSeries(new Series({ data: [1] }))).toBe(true);
+ expect(boolSeries(new Series({ data: [2] }))).toBe(true);
+ expect(boolSeries(new Series({ data: [true] }))).toBe(true);
+ expect(boolSeries(new Series({ data: ["hello"] }))).toBe(true);
+ });
+
+ it("falsy values → false", () => {
+ expect(boolSeries(new Series({ data: [0] }))).toBe(false);
+ expect(boolSeries(new Series({ data: [false] }))).toBe(false);
+ expect(boolSeries(new Series({ data: [""] }))).toBe(false);
+ });
+
+ it("throws TypeError for null element", () => {
+ expect(() => boolSeries(new Series({ data: [null] }))).toThrow(TypeError);
+ });
+
+ it("throws RangeError for multi-element Series", () => {
+ expect(() => boolSeries(new Series({ data: [1, 2] }))).toThrow(RangeError);
+ });
+
+ it("throws RangeError for empty Series", () => {
+ expect(() => boolSeries(new Series({ data: [] }))).toThrow(RangeError);
+ });
+});
+
+// ─── boolDataFrame ────────────────────────────────────────────────────────────
+
+describe("boolDataFrame", () => {
+ it("returns bool of single-element 1×1 DataFrame", () => {
+ expect(boolDataFrame(DataFrame.fromColumns({ A: [1] }))).toBe(true);
+ expect(boolDataFrame(DataFrame.fromColumns({ A: [0] }))).toBe(false);
+ expect(boolDataFrame(DataFrame.fromColumns({ A: [true] }))).toBe(true);
+ expect(boolDataFrame(DataFrame.fromColumns({ A: [false] }))).toBe(false);
+ });
+
+ it("throws RangeError for 1×2 DataFrame", () => {
+ expect(() => boolDataFrame(DataFrame.fromColumns({ A: [1], B: [2] }))).toThrow(RangeError);
+ });
+
+ it("throws RangeError for 2×1 DataFrame", () => {
+ expect(() => boolDataFrame(DataFrame.fromColumns({ A: [1, 2] }))).toThrow(RangeError);
+ });
+
+ it("throws TypeError for null element in 1×1 DataFrame", () => {
+ expect(() => boolDataFrame(DataFrame.fromColumns({ A: [null] }))).toThrow(TypeError);
+ });
+});
+
+// ─── firstValidIndex ──────────────────────────────────────────────────────────
+
+describe("firstValidIndex", () => {
+ it("returns first non-NA label", () => {
+ const s = new Series({ data: [null, Number.NaN, 3, 4], index: ["a", "b", "c", "d"] });
+ expect(firstValidIndex(s)).toBe("c");
+ });
+
+ it("returns first element label if first is valid", () => {
+ const s = new Series({ data: [10, null, null], index: [0, 1, 2] });
+ expect(firstValidIndex(s)).toBe(0);
+ });
+
+ it("returns null when all values are NA", () => {
+ const s = new Series({ data: [null, null, null] });
+ expect(firstValidIndex(s)).toBeNull();
+ });
+
+ it("returns null for empty Series", () => {
+ expect(firstValidIndex(new Series({ data: [] }))).toBeNull();
+ });
+
+ it("works with numeric index", () => {
+ const s = new Series({ data: [null, 99], index: [10, 20] });
+ expect(firstValidIndex(s)).toBe(20);
+ });
+
+ it("property: first valid index is consistent with manual scan", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.oneof(fc.constant(null), fc.float({ noNaN: true })), {
+ minLength: 1,
+ maxLength: 8,
+ }),
+ (arr) => {
+ const s = new Series({ data: arr });
+ const fvi = firstValidIndex(s);
+ const idx = arr.findIndex((v) => v !== null);
+ if (idx === -1) {
+ return fvi === null;
+ }
+ return fvi === idx;
+ },
+ ),
+ );
+ });
+});
+
+// ─── lastValidIndex ───────────────────────────────────────────────────────────
+
+describe("lastValidIndex", () => {
+ it("returns last non-NA label", () => {
+ const s = new Series({ data: [1, 2, null, null], index: ["a", "b", "c", "d"] });
+ expect(lastValidIndex(s)).toBe("b");
+ });
+
+ it("returns null when all NA", () => {
+ expect(lastValidIndex(new Series({ data: [null, null] }))).toBeNull();
+ });
+
+ it("returns last label when last element is valid", () => {
+ const s = new Series({ data: [null, null, 5], index: [0, 1, 2] });
+ expect(lastValidIndex(s)).toBe(2);
+ });
+
+ it("property: last valid index is consistent with manual scan", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.oneof(fc.constant(null), fc.float({ noNaN: true })), {
+ minLength: 1,
+ maxLength: 8,
+ }),
+ (arr) => {
+ const s = new Series({ data: arr });
+ const lvi = lastValidIndex(s);
+ let last = -1;
+ for (let i = 0; i < arr.length; i++) {
+ if (arr[i] !== null) {
+ last = i;
+ }
+ }
+ if (last === -1) {
+ return lvi === null;
+ }
+ return lvi === last;
+ },
+ ),
+ );
+ });
+});
+
+// ─── dataFrameFirstValidIndex ─────────────────────────────────────────────────
+
+describe("dataFrameFirstValidIndex", () => {
+ it("returns first row label with any non-NA value", () => {
+ const df = DataFrame.fromColumns({
+ A: [null, null, 1],
+ B: [null, 2, 3],
+ });
+ expect(dataFrameFirstValidIndex(df)).toBe(1);
+ });
+
+ it("returns 0-based index when default RangeIndex and first row has a value", () => {
+ const df = DataFrame.fromColumns({ X: [10, 20], Y: [30, 40] });
+ expect(dataFrameFirstValidIndex(df)).toBe(0);
+ });
+
+ it("returns null when entire DataFrame is NA", () => {
+ const df = DataFrame.fromColumns({ A: [null, null], B: [null, null] });
+ expect(dataFrameFirstValidIndex(df)).toBeNull();
+ });
+
+ it("returns null for empty DataFrame", () => {
+ const df = DataFrame.fromColumns({ A: [] });
+ expect(dataFrameFirstValidIndex(df)).toBeNull();
+ });
+
+ it("uses the row index label (not position) when custom index is set", () => {
+ const df = DataFrame.fromColumns(
+ { A: [null, null, 5] },
+ { index: ["r0", "r1", "r2"] },
+ );
+ expect(dataFrameFirstValidIndex(df)).toBe("r2");
+ });
+});
+
+// ─── dataFrameLastValidIndex ──────────────────────────────────────────────────
+
+describe("dataFrameLastValidIndex", () => {
+ it("returns last row label with any non-NA value", () => {
+ const df = DataFrame.fromColumns({
+ A: [1, null, null],
+ B: [2, 3, null],
+ });
+ expect(dataFrameLastValidIndex(df)).toBe(1);
+ });
+
+ it("returns last row when last has a value", () => {
+ const df = DataFrame.fromColumns({ A: [1, 2, 3] });
+ expect(dataFrameLastValidIndex(df)).toBe(2);
+ });
+
+ it("returns null when all NA", () => {
+ const df = DataFrame.fromColumns({ A: [null], B: [null] });
+ expect(dataFrameLastValidIndex(df)).toBeNull();
+ });
+
+ it("uses the row index label", () => {
+ const df = DataFrame.fromColumns(
+ { A: [5, null, null] },
+ { index: ["x", "y", "z"] },
+ );
+ expect(dataFrameLastValidIndex(df)).toBe("x");
+ });
+});
From dedd1321d3515851bcba8cc05d67a391a6217b95 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:56:06 +0000
Subject: [PATCH 13/30] Iteration 243: +rename_ops
(renameSeriesIndex/DataFrame, addPrefix/Suffix, setAxis, seriesToFrame)
+math_ops (absSeries/DataFrame, roundSeries/DataFrame)
Run: https://github.com/githubnext/tsessebe/actions/runs/24771121921
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/math_ops.html | 100 ++++++++++
playground/rename_ops.html | 144 ++++++++++++++
src/index.ts | 14 ++
src/stats/index.ts | 14 ++
src/stats/math_ops.ts | 162 ++++++++++++++++
src/stats/rename_ops.ts | 343 +++++++++++++++++++++++++++++++++
tests/stats/math_ops.test.ts | 232 ++++++++++++++++++++++
tests/stats/rename_ops.test.ts | 310 +++++++++++++++++++++++++++++
8 files changed, 1319 insertions(+)
create mode 100644 playground/math_ops.html
create mode 100644 playground/rename_ops.html
create mode 100644 src/stats/math_ops.ts
create mode 100644 src/stats/rename_ops.ts
create mode 100644 tests/stats/math_ops.test.ts
create mode 100644 tests/stats/rename_ops.test.ts
diff --git a/playground/math_ops.html b/playground/math_ops.html
new file mode 100644
index 00000000..0d138cda
--- /dev/null
+++ b/playground/math_ops.html
@@ -0,0 +1,100 @@
+
+
+
+
+
+ math_ops — abs, round — tsb playground
+
+
+
+
math_ops — abs, round
+
+ Element-wise mathematical transformations for Series and DataFrame.
+ Mirrors pandas.Series.abs(), pandas.DataFrame.abs(),
+ pandas.Series.round(), and pandas.DataFrame.round().
+ Missing values (null, NaN) are preserved as-is.
+
+ Functions for renaming labels, adding prefix/suffix to column or index labels,
+ replacing an axis entirely (set_axis), and converting a Series to a
+ single-column DataFrame (to_frame). Mirrors the corresponding
+ pandas methods.
+
+
+
+
+
diff --git a/src/index.ts b/src/index.ts
index e62534ac..c2cf21dc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -595,3 +595,17 @@ export {
export type { SqueezeResult } from "./stats/index.ts";
export { autoCorr, corrWith } from "./stats/index.ts";
export type { CorrWithOptions } from "./stats/index.ts";
+export {
+ renameSeriesIndex,
+ renameDataFrame,
+ addPrefixDataFrame,
+ addSuffixDataFrame,
+ addPrefixSeries,
+ addSuffixSeries,
+ setAxisSeries,
+ setAxisDataFrame,
+ seriesToFrame,
+} from "./stats/index.ts";
+export type { LabelMapper, RenameDataFrameOptions } from "./stats/index.ts";
+export { absSeries, absDataFrame, roundSeries, roundDataFrame } from "./stats/index.ts";
+export type { RoundDataFrameSpec } from "./stats/index.ts";
diff --git a/src/stats/index.ts b/src/stats/index.ts
index ac11dfc3..f5313e89 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -424,3 +424,17 @@ export {
export type { SqueezeResult } from "./scalar_extract.ts";
export { autoCorr, corrWith } from "./corrwith.ts";
export type { CorrWithOptions } from "./corrwith.ts";
+export {
+ renameSeriesIndex,
+ renameDataFrame,
+ addPrefixDataFrame,
+ addSuffixDataFrame,
+ addPrefixSeries,
+ addSuffixSeries,
+ setAxisSeries,
+ setAxisDataFrame,
+ seriesToFrame,
+} from "./rename_ops.ts";
+export type { LabelMapper, RenameDataFrameOptions } from "./rename_ops.ts";
+export { absSeries, absDataFrame, roundSeries, roundDataFrame } from "./math_ops.ts";
+export type { RoundDataFrameSpec } from "./math_ops.ts";
diff --git a/src/stats/math_ops.ts b/src/stats/math_ops.ts
new file mode 100644
index 00000000..fd762987
--- /dev/null
+++ b/src/stats/math_ops.ts
@@ -0,0 +1,162 @@
+/**
+ * math_ops — element-wise mathematical transformations for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.abs()` — absolute value of each element
+ * - `DataFrame.abs()` — element-wise absolute value
+ * - `Series.round(decimals?)` — round each element to N decimal places
+ * - `DataFrame.round(decimals?)` — round each column to N decimal places
+ * (or per-column decimals via a Record)
+ *
+ * All functions are **pure** — inputs are never mutated.
+ * `null` / `undefined` / `NaN` values propagate unchanged.
+ *
+ * @module
+ */
+
+import { DataFrame, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Round a number to `decimals` decimal places using the "round half away from
+ * zero" strategy (matches Python's `round()` for positive decimals).
+ */
+function roundNum(v: number, decimals: number): number {
+ if (decimals === 0) {
+ return Math.round(v);
+ }
+ const factor = 10 ** decimals;
+ return Math.round(v * factor) / factor;
+}
+
+// ─── absSeries ────────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with the absolute value of each numeric element.
+ *
+ * Mirrors `pandas.Series.abs()`.
+ *
+ * Non-numeric and missing values (`null`, `undefined`, `NaN`) are preserved
+ * as-is.
+ *
+ * @example
+ * ```ts
+ * import { Series, absSeries } from "tsb";
+ *
+ * absSeries(new Series({ data: [-1, 2, -3, null] })).values;
+ * // [1, 2, 3, null]
+ * ```
+ */
+export function absSeries(s: Series): Series {
+ const data: Scalar[] = s.values.map((v) => {
+ if (isMissing(v)) return v;
+ if (typeof v === "number") return Math.abs(v);
+ return v; // non-numeric (string, boolean) — pass through unchanged
+ });
+ return new Series({ data, index: s.index, name: s.name ?? undefined });
+}
+
+// ─── absDataFrame ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame where every numeric cell has been replaced by its
+ * absolute value.
+ *
+ * Mirrors `pandas.DataFrame.abs()`.
+ *
+ * Non-numeric and missing values are preserved as-is.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, absDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [-1, 2], b: [3, -4] });
+ * absDataFrame(df).col("a").values; // [1, 2]
+ * absDataFrame(df).col("b").values; // [3, 4]
+ * ```
+ */
+export function absDataFrame(df: DataFrame): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const newColMap = new Map>();
+ for (const name of colNames) {
+ newColMap.set(name, absSeries(df.col(name)));
+ }
+ return new DataFrame(newColMap, df.index, [...colNames]);
+}
+
+// ─── roundSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with each numeric element rounded to `decimals` decimal
+ * places.
+ *
+ * Mirrors `pandas.Series.round(decimals=0)`.
+ *
+ * Missing values (`null`, `undefined`, `NaN`) are preserved as-is.
+ *
+ * @param decimals - Number of decimal places (default `0`). Negative values
+ * round to tens, hundreds, etc. (e.g. `-1` rounds to the nearest 10).
+ *
+ * @example
+ * ```ts
+ * import { Series, roundSeries } from "tsb";
+ *
+ * roundSeries(new Series({ data: [1.234, 5.678] }), 2).values;
+ * // [1.23, 5.68]
+ * ```
+ */
+export function roundSeries(s: Series, decimals = 0): Series {
+ const data: Scalar[] = s.values.map((v) => {
+ if (isMissing(v)) return v;
+ if (typeof v === "number") return roundNum(v, decimals);
+ return v;
+ });
+ return new Series({ data, index: s.index, name: s.name ?? undefined });
+}
+
+// ─── roundDataFrame ───────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link roundDataFrame}.
+ *
+ * Either a single `decimals` number (applied to all columns) or a per-column
+ * `Record` (unspecified columns default to `0`).
+ */
+export type RoundDataFrameSpec = number | Readonly>;
+
+/**
+ * Return a new DataFrame with each numeric cell rounded to the specified
+ * number of decimal places.
+ *
+ * Mirrors `pandas.DataFrame.round(decimals)`:
+ * - Pass a single number to apply the same precision to all columns.
+ * - Pass a `Record` to use per-column precision.
+ * Columns not listed default to `0`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, roundDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1.111, 2.222], b: [3.333, 4.444] });
+ * roundDataFrame(df, 2).col("a").values; // [1.11, 2.22]
+ * roundDataFrame(df, { a: 1, b: 2 }).col("b").values; // [3.33, 4.44]
+ * ```
+ */
+export function roundDataFrame(df: DataFrame, decimals: RoundDataFrameSpec = 0): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const newColMap = new Map>();
+ for (const name of colNames) {
+ const d =
+ typeof decimals === "number"
+ ? decimals
+ : (decimals[name] ?? 0);
+ newColMap.set(name, roundSeries(df.col(name), d));
+ }
+ return new DataFrame(newColMap, df.index, [...colNames]);
+}
diff --git a/src/stats/rename_ops.ts b/src/stats/rename_ops.ts
new file mode 100644
index 00000000..f162a8b6
--- /dev/null
+++ b/src/stats/rename_ops.ts
@@ -0,0 +1,343 @@
+/**
+ * rename_ops — rename labels, add prefix/suffix, set axis, and convert Series
+ * to DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.rename(index)` — rename index labels via mapping or function
+ * - `DataFrame.rename(columns?, index?)` — rename columns and/or index labels
+ * - `DataFrame.add_prefix(prefix)` — prefix all column labels
+ * - `DataFrame.add_suffix(suffix)` — suffix all column labels
+ * - `Series.add_prefix(prefix)` — prefix index labels
+ * - `Series.add_suffix(suffix)` — suffix index labels
+ * - `Series.set_axis(labels)` — replace the index of a Series
+ * - `DataFrame.set_axis(labels, axis)` — replace the column or row axis
+ * - `Series.to_frame(name?)` — convert a Series to a single-column DataFrame
+ *
+ * All functions are **pure** — inputs are never mutated.
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** A mapper: either a `Record` mapping or a `(label:Label)=>Label` function. */
+export type LabelMapper = Readonly> | ((label: Label) => Label);
+
+/** Options for {@link renameDataFrame}. */
+export interface RenameDataFrameOptions {
+ /**
+ * Rename column labels.
+ * Pass a `Record` or a `(name: Label) => Label` function.
+ */
+ readonly columns?: LabelMapper;
+ /**
+ * Rename row-index labels.
+ * Pass a `Record` or a `(label: Label) => Label` function.
+ */
+ readonly index?: LabelMapper;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Apply a LabelMapper to a single label. */
+function applyMapper(mapper: LabelMapper, label: Label): Label {
+ if (typeof mapper === "function") {
+ return mapper(label);
+ }
+ // TypeScript narrows mapper to Record here
+ const key = String(label);
+ const mapped = mapper[key];
+ return mapped !== undefined ? mapped : label;
+}
+
+/** Apply a LabelMapper to every element of an array of labels. */
+function mapLabels(mapper: LabelMapper, labels: readonly Label[]): Label[] {
+ return labels.map((l) => applyMapper(mapper, l));
+}
+
+// ─── renameSeriesIndex ────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with renamed index labels.
+ *
+ * Mirrors `pandas.Series.rename(index=...)`.
+ *
+ * The `mapper` argument may be:
+ * - A `Record` — each matching label is replaced; others
+ * are kept as-is.
+ * - A `(label: Label) => Label` function — called for every index label.
+ *
+ * @example
+ * ```ts
+ * import { Series, renameSeriesIndex } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+ * renameSeriesIndex(s, { a: "x", c: "z" }).index.values;
+ * // ["x", "b", "z"]
+ * ```
+ */
+export function renameSeriesIndex(
+ s: Series,
+ mapper: LabelMapper,
+): Series {
+ const newLabels = mapLabels(mapper, s.index.values as readonly Label[]);
+ return new Series({
+ data: s.values,
+ index: new Index(newLabels),
+ name: s.name ?? undefined,
+ dtype: s.dtype,
+ });
+}
+
+// ─── renameDataFrame ──────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame with renamed column and/or row-index labels.
+ *
+ * Mirrors `pandas.DataFrame.rename(columns=..., index=...)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, renameDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * renameDataFrame(df, { columns: { a: "x", b: "y" } }).columns.values;
+ * // ["x", "y"]
+ * ```
+ */
+export function renameDataFrame(df: DataFrame, options: RenameDataFrameOptions): DataFrame {
+ const colMapper = options.columns;
+ const idxMapper = options.index;
+
+ // Build new column map
+ const colNames = df.columns.values as readonly string[];
+ const newColNames: string[] = colMapper
+ ? mapLabels(colMapper, colNames as readonly Label[]).map(String)
+ : [...colNames];
+
+ // Build new row index
+ const rowLabels = df.index.values as readonly Label[];
+ const newRowLabels: Label[] = idxMapper ? mapLabels(idxMapper, rowLabels) : [...rowLabels];
+ const newRowIndex = new Index(newRowLabels);
+
+ // Rebuild column map with new names but same data (reindexed rows)
+ const newColMap = new Map>();
+ for (let i = 0; i < colNames.length; i++) {
+ const oldName = colNames[i];
+ const newName = newColNames[i];
+ if (oldName === undefined || newName === undefined) continue;
+ const col = df.col(oldName);
+ const newCol = new Series({
+ data: col.values,
+ index: newRowIndex,
+ });
+ newColMap.set(newName, newCol);
+ }
+
+ return new DataFrame(newColMap, newRowIndex, newColNames);
+}
+
+// ─── addPrefix / addSuffix ────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame with `prefix` prepended to every column label.
+ *
+ * Mirrors `pandas.DataFrame.add_prefix(prefix)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, addPrefixDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ * addPrefixDataFrame(df, "col_").columns.values;
+ * // ["col_a", "col_b"]
+ * ```
+ */
+export function addPrefixDataFrame(df: DataFrame, prefix: string): DataFrame {
+ return renameDataFrame(df, { columns: (label) => `${prefix}${String(label)}` });
+}
+
+/**
+ * Return a new DataFrame with `suffix` appended to every column label.
+ *
+ * Mirrors `pandas.DataFrame.add_suffix(suffix)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, addSuffixDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ * addSuffixDataFrame(df, "_v1").columns.values;
+ * // ["a_v1", "b_v1"]
+ * ```
+ */
+export function addSuffixDataFrame(df: DataFrame, suffix: string): DataFrame {
+ return renameDataFrame(df, { columns: (label) => `${String(label)}${suffix}` });
+}
+
+/**
+ * Return a new Series with `prefix` prepended to every index label.
+ *
+ * Mirrors `pandas.Series.add_prefix(prefix)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, addPrefixSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ * addPrefixSeries(s, "x_").index.values;
+ * // ["x_a", "x_b"]
+ * ```
+ */
+export function addPrefixSeries(s: Series, prefix: string): Series {
+ return renameSeriesIndex(s, (label) => `${prefix}${String(label)}`);
+}
+
+/**
+ * Return a new Series with `suffix` appended to every index label.
+ *
+ * Mirrors `pandas.Series.add_suffix(suffix)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, addSuffixSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ * addSuffixSeries(s, "_end").index.values;
+ * // ["a_end", "b_end"]
+ * ```
+ */
+export function addSuffixSeries(s: Series, suffix: string): Series {
+ return renameSeriesIndex(s, (label) => `${String(label)}${suffix}`);
+}
+
+// ─── setAxisSeries ────────────────────────────────────────────────────────────
+
+/**
+ * Return a new Series with the given labels as its index.
+ *
+ * Mirrors `pandas.Series.set_axis(labels)`.
+ *
+ * @throws {RangeError} if `labels` length does not match the Series size.
+ *
+ * @example
+ * ```ts
+ * import { Series, setAxisSeries } from "tsb";
+ *
+ * const s = new Series({ data: [10, 20, 30] });
+ * setAxisSeries(s, ["x", "y", "z"]).index.values;
+ * // ["x", "y", "z"]
+ * ```
+ */
+export function setAxisSeries(
+ s: Series,
+ labels: readonly Label[],
+): Series {
+ if (labels.length !== s.size) {
+ throw new RangeError(
+ `set_axis: labels length ${labels.length} does not match Series size ${s.size}`,
+ );
+ }
+ return new Series({
+ data: s.values,
+ index: new Index(labels),
+ name: s.name ?? undefined,
+ dtype: s.dtype,
+ });
+}
+
+/**
+ * Return a new DataFrame with the given labels replacing the specified axis.
+ *
+ * Mirrors `pandas.DataFrame.set_axis(labels, axis=0|1)`:
+ * - `axis = 0` / `"index"` (default) — replace row index labels.
+ * - `axis = 1` / `"columns"` — replace column labels.
+ *
+ * @throws {RangeError} if `labels` length does not match the relevant axis size.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, setAxisDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * setAxisDataFrame(df, ["r0", "r1"], 0).index.values; // ["r0", "r1"]
+ * setAxisDataFrame(df, ["x", "y"], 1).columns.values; // ["x", "y"]
+ * ```
+ */
+export function setAxisDataFrame(
+ df: DataFrame,
+ labels: readonly Label[],
+ axis: 0 | 1 | "index" | "columns" = 0,
+): DataFrame {
+ const isColumns = axis === 1 || axis === "columns";
+
+ if (isColumns) {
+ const colNames = df.columns.values as readonly string[];
+ if (labels.length !== colNames.length) {
+ throw new RangeError(
+ `set_axis: labels length ${labels.length} does not match columns count ${colNames.length}`,
+ );
+ }
+ return renameDataFrame(df, {
+ columns: (label) => {
+ const idx = colNames.indexOf(String(label));
+ if (idx < 0 || idx >= labels.length) return label;
+ const newLabel = labels[idx];
+ return newLabel !== undefined ? newLabel : label;
+ },
+ });
+ }
+
+ // axis = 0: replace row index
+ if (labels.length !== df.index.size) {
+ throw new RangeError(
+ `set_axis: labels length ${labels.length} does not match row count ${df.index.size}`,
+ );
+ }
+ const newRowIndex = new Index(labels);
+ const colNames = df.columns.values as readonly string[];
+ const newColMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name);
+ newColMap.set(
+ name,
+ new Series({ data: col.values, index: newRowIndex }),
+ );
+ }
+ return new DataFrame(newColMap, newRowIndex, colNames);
+}
+
+// ─── seriesToFrame ────────────────────────────────────────────────────────────
+
+/**
+ * Convert a Series to a single-column DataFrame.
+ *
+ * Mirrors `pandas.Series.to_frame(name?)`:
+ * - The resulting DataFrame has one column whose name is `name` (if given)
+ * or the Series name, falling back to `0`.
+ * - The row index is the same as the Series index.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesToFrame } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3], name: "score" });
+ * seriesToFrame(s).columns.values; // ["score"]
+ * seriesToFrame(s, "points").columns.values; // ["points"]
+ * ```
+ */
+export function seriesToFrame(
+ s: Series,
+ name?: string | null,
+): DataFrame {
+ const colName = name !== undefined && name !== null ? name : (s.name ?? "0");
+ const colMap = new Map>();
+ colMap.set(
+ colName,
+ new Series({ data: s.values as readonly Scalar[], index: s.index }),
+ );
+ return new DataFrame(colMap, s.index, [colName]);
+}
diff --git a/tests/stats/math_ops.test.ts b/tests/stats/math_ops.test.ts
new file mode 100644
index 00000000..3b79a8ba
--- /dev/null
+++ b/tests/stats/math_ops.test.ts
@@ -0,0 +1,232 @@
+/**
+ * Tests for math_ops — absSeries, absDataFrame, roundSeries, roundDataFrame.
+ *
+ * Covers:
+ * - absSeries: positive, negative, zero, null/NaN passthrough, non-numeric passthrough
+ * - absDataFrame: multi-column element-wise abs
+ * - roundSeries: 0/2/negative decimals, missing values
+ * - roundDataFrame: uniform decimals, per-column Record, missing columns default to 0
+ * - Property-based: abs is idempotent (abs(abs(x)) == abs(x))
+ * - Property-based: round preserves values within tolerance
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import { DataFrame, Series, absDataFrame, absSeries, roundDataFrame, roundSeries } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── absSeries ────────────────────────────────────────────────────────────────
+
+describe("absSeries", () => {
+ test("positive values unchanged", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect([...absSeries(s).values]).toEqual([1, 2, 3]);
+ });
+
+ test("negative values become positive", () => {
+ const s = new Series({ data: [-1, -5, -100] });
+ expect([...absSeries(s).values]).toEqual([1, 5, 100]);
+ });
+
+ test("zero stays zero", () => {
+ const s = new Series({ data: [0, -0] });
+ const r = absSeries(s).values;
+ expect(r[0]).toBe(0);
+ expect(r[1]).toBe(0);
+ });
+
+ test("null values pass through unchanged", () => {
+ const s = new Series({ data: [-1, null, -3] });
+ expect([...absSeries(s).values]).toEqual([1, null, 3]);
+ });
+
+ test("NaN passes through unchanged", () => {
+ const s = new Series({ data: [Number.NaN, -1] });
+ const r = absSeries(s).values;
+ expect(Number.isNaN(r[0] as number)).toBe(true);
+ expect(r[1]).toBe(1);
+ });
+
+ test("string values pass through unchanged", () => {
+ const s = new Series({ data: ["hello", "world"] });
+ expect([...absSeries(s).values]).toEqual(["hello", "world"]);
+ });
+
+ test("preserves index", () => {
+ const s = new Series({ data: [-1, -2], index: ["a", "b"] });
+ expect([...absSeries(s).index.values]).toEqual(["a", "b"]);
+ });
+
+ test("preserves name", () => {
+ const s = new Series({ data: [-1], name: "val" });
+ expect(absSeries(s).name).toBe("val");
+ });
+
+ test("does not mutate original", () => {
+ const s = new Series({ data: [-1, -2] });
+ absSeries(s);
+ expect([...s.values]).toEqual([-1, -2]);
+ });
+});
+
+// ─── absDataFrame ─────────────────────────────────────────────────────────────
+
+describe("absDataFrame", () => {
+ test("element-wise abs on all columns", () => {
+ const df = DataFrame.fromColumns({ a: [-1, 2], b: [3, -4] });
+ const r = absDataFrame(df);
+ expect([...r.col("a").values]).toEqual([1, 2]);
+ expect([...r.col("b").values]).toEqual([3, 4]);
+ });
+
+ test("null values preserved", () => {
+ const df = DataFrame.fromColumns({ a: [-1, null, -3] });
+ expect([...absDataFrame(df).col("a").values]).toEqual([1, null, 3]);
+ });
+
+ test("column names preserved", () => {
+ const df = DataFrame.fromColumns({ x: [-1], y: [-2] });
+ expect([...absDataFrame(df).columns.values]).toEqual(["x", "y"]);
+ });
+
+ test("index preserved", () => {
+ const df = DataFrame.fromColumns({ a: [-1, -2] }, { index: ["r0", "r1"] });
+ expect([...absDataFrame(df).index.values]).toEqual(["r0", "r1"]);
+ });
+});
+
+// ─── roundSeries ─────────────────────────────────────────────────────────────
+
+describe("roundSeries", () => {
+ test("round to 0 decimals (default)", () => {
+ const s = new Series({ data: [1.4, 1.5, 2.7] });
+ expect([...roundSeries(s).values]).toEqual([1, 2, 3]);
+ });
+
+ test("round to 2 decimals", () => {
+ const s = new Series({ data: [1.234, 5.678] });
+ expect([...roundSeries(s, 2).values]).toEqual([1.23, 5.68]);
+ });
+
+ test("round to negative decimals (nearest 10)", () => {
+ const s = new Series({ data: [14, 15, 26] });
+ expect([...roundSeries(s, -1).values]).toEqual([10, 20, 30]);
+ });
+
+ test("null values pass through", () => {
+ const s = new Series({ data: [1.5, null, 2.5] });
+ const r = roundSeries(s, 0).values;
+ expect(r[0]).toBe(2);
+ expect(r[1]).toBeNull();
+ expect(r[2]).toBe(3);
+ });
+
+ test("NaN passes through", () => {
+ const s = new Series({ data: [Number.NaN] });
+ const r = roundSeries(s, 2).values;
+ expect(Number.isNaN(r[0] as number)).toBe(true);
+ });
+
+ test("string values pass through", () => {
+ const s = new Series({ data: ["abc"] });
+ expect([...roundSeries(s, 2).values]).toEqual(["abc"]);
+ });
+
+ test("preserves index and name", () => {
+ const s = new Series({ data: [1.1, 2.2], index: ["a", "b"], name: "v" });
+ const r = roundSeries(s, 1);
+ expect([...r.index.values]).toEqual(["a", "b"]);
+ expect(r.name).toBe("v");
+ });
+});
+
+// ─── roundDataFrame ───────────────────────────────────────────────────────────
+
+describe("roundDataFrame", () => {
+ test("uniform decimals applied to all columns", () => {
+ const df = DataFrame.fromColumns({ a: [1.111, 2.222], b: [3.333, 4.444] });
+ const r = roundDataFrame(df, 2);
+ expect([...r.col("a").values]).toEqual([1.11, 2.22]);
+ expect([...r.col("b").values]).toEqual([3.33, 4.44]);
+ });
+
+ test("per-column Record", () => {
+ const df = DataFrame.fromColumns({ a: [1.5, 2.5], b: [3.33, 4.44] });
+ const r = roundDataFrame(df, { a: 0, b: 1 });
+ expect([...r.col("a").values]).toEqual([2, 3]);
+ expect([...r.col("b").values]).toEqual([3.3, 4.4]);
+ });
+
+ test("columns not in Record default to 0 decimals", () => {
+ const df = DataFrame.fromColumns({ a: [1.7], b: [2.3] });
+ const r = roundDataFrame(df, { a: 1 }); // b not specified
+ expect([...r.col("a").values]).toEqual([1.7]);
+ expect([...r.col("b").values]).toEqual([2]);
+ });
+
+ test("default (no arg) rounds to 0 decimals", () => {
+ const df = DataFrame.fromColumns({ x: [1.9] });
+ expect([...roundDataFrame(df).col("x").values]).toEqual([2]);
+ });
+
+ test("column names preserved", () => {
+ const df = DataFrame.fromColumns({ a: [1.1], b: [2.2] });
+ expect([...roundDataFrame(df, 1).columns.values]).toEqual(["a", "b"]);
+ });
+
+ test("index preserved", () => {
+ const df = DataFrame.fromColumns({ a: [1.1] }, { index: ["r0"] });
+ expect([...roundDataFrame(df, 1).index.values]).toEqual(["r0"]);
+ });
+
+ test("null values preserved", () => {
+ const df = DataFrame.fromColumns({ a: [1.5, null] });
+ const r = roundDataFrame(df, 0).col("a").values;
+ expect(r[0]).toBe(2);
+ expect(r[1]).toBeNull();
+ });
+});
+
+// ─── Property-based ───────────────────────────────────────────────────────────
+
+describe("math_ops property tests", () => {
+ test("abs is idempotent: abs(abs(x)) == abs(x)", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), { minLength: 1, maxLength: 20 }),
+ (data) => {
+ const s = new Series({ data });
+ const once = absSeries(s);
+ const twice = absSeries(once);
+ expect([...twice.values]).toEqual([...once.values]);
+ },
+ ),
+ );
+ });
+
+ test("abs values are all >= 0", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), { minLength: 1, maxLength: 20 }),
+ (data) => {
+ const s = new Series({ data });
+ for (const v of absSeries(s).values) {
+ expect((v as number) >= 0).toBe(true);
+ }
+ },
+ ),
+ );
+ });
+
+ test("round preserves integer values (0 decimals)", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -1000, max: 1000 }), { minLength: 1, maxLength: 20 }),
+ (data) => {
+ const s = new Series({ data });
+ expect([...roundSeries(s, 0).values]).toEqual([...s.values]);
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/rename_ops.test.ts b/tests/stats/rename_ops.test.ts
new file mode 100644
index 00000000..18abb963
--- /dev/null
+++ b/tests/stats/rename_ops.test.ts
@@ -0,0 +1,310 @@
+/**
+ * Tests for rename_ops — renameSeriesIndex, renameDataFrame, addPrefix/addSuffix,
+ * setAxisSeries, setAxisDataFrame, seriesToFrame.
+ *
+ * Covers:
+ * - renameSeriesIndex: mapping, function, partial mapping
+ * - renameDataFrame: columns mapping, index mapping, both combined
+ * - addPrefixDataFrame / addSuffixDataFrame
+ * - addPrefixSeries / addSuffixSeries
+ * - setAxisSeries / setAxisDataFrame (axis 0 and 1)
+ * - seriesToFrame: default name, explicit name, fallback to "0"
+ * - Error paths for mismatched label counts
+ * - Property-based: rename/set_axis preserve values
+ */
+
+import { describe, expect, test } from "bun:test";
+import * as fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ addPrefixDataFrame,
+ addPrefixSeries,
+ addSuffixDataFrame,
+ addSuffixSeries,
+ renameDataFrame,
+ renameSeriesIndex,
+ seriesToFrame,
+ setAxisDataFrame,
+ setAxisSeries,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── renameSeriesIndex ────────────────────────────────────────────────────────
+
+describe("renameSeriesIndex", () => {
+ test("rename with Record mapping — partial", () => {
+ const s = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+ const result = renameSeriesIndex(s, { a: "x", c: "z" });
+ expect([...result.index.values]).toEqual(["x", "b", "z"]);
+ expect([...result.values]).toEqual([1, 2, 3]);
+ });
+
+ test("rename with function mapper", () => {
+ const s = new Series({ data: [10, 20], index: ["foo", "bar"] });
+ const result = renameSeriesIndex(s, (l) => String(l).toUpperCase());
+ expect([...result.index.values]).toEqual(["FOO", "BAR"]);
+ });
+
+ test("rename does not mutate original", () => {
+ const s = new Series({ data: [1], index: ["a"] });
+ renameSeriesIndex(s, { a: "z" });
+ expect(s.index.at(0)).toBe("a");
+ });
+
+ test("rename preserves name and dtype", () => {
+ const s = new Series({ data: [1, 2], index: ["a", "b"], name: "col" });
+ const r = renameSeriesIndex(s, { a: "x" });
+ expect(r.name).toBe("col");
+ });
+
+ test("rename with identity mapping returns same labels", () => {
+ const s = new Series({ data: [1, 2, 3], index: ["p", "q", "r"] });
+ const r = renameSeriesIndex(s, {});
+ expect([...r.index.values]).toEqual(["p", "q", "r"]);
+ });
+});
+
+// ─── renameDataFrame ──────────────────────────────────────────────────────────
+
+describe("renameDataFrame", () => {
+ test("rename columns via Record", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const result = renameDataFrame(df, { columns: { a: "x", b: "y" } });
+ expect([...result.columns.values]).toEqual(["x", "y"]);
+ expect([...result.col("x").values]).toEqual([1, 2]);
+ expect([...result.col("y").values]).toEqual([3, 4]);
+ });
+
+ test("rename columns via function", () => {
+ const df = DataFrame.fromColumns({ alpha: [1], beta: [2] });
+ const result = renameDataFrame(df, { columns: (c) => String(c).slice(0, 1) });
+ expect([...result.columns.values]).toEqual(["a", "b"]);
+ });
+
+ test("rename index labels via Record", () => {
+ const df = DataFrame.fromColumns({ v: [10, 20] }, { index: ["r0", "r1"] });
+ const result = renameDataFrame(df, { index: { r0: "row0", r1: "row1" } });
+ expect([...result.index.values]).toEqual(["row0", "row1"]);
+ });
+
+ test("rename both columns and index simultaneously", () => {
+ const df = DataFrame.fromColumns({ a: [1] }, { index: ["x"] });
+ const result = renameDataFrame(df, { columns: { a: "A" }, index: { x: "X" } });
+ expect([...result.columns.values]).toEqual(["A"]);
+ expect(result.index.at(0)).toBe("X");
+ });
+
+ test("partial rename leaves other columns unchanged", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3] });
+ const result = renameDataFrame(df, { columns: { b: "B" } });
+ expect([...result.columns.values]).toEqual(["a", "B", "c"]);
+ });
+
+ test("values are preserved after rename", () => {
+ const df = DataFrame.fromColumns({ x: [7, 8, 9] });
+ const result = renameDataFrame(df, { columns: { x: "y" } });
+ expect([...result.col("y").values]).toEqual([7, 8, 9]);
+ });
+});
+
+// ─── addPrefixDataFrame / addSuffixDataFrame ───────────────────────────────────
+
+describe("addPrefixDataFrame", () => {
+ test("adds prefix to all columns", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ expect([...addPrefixDataFrame(df, "col_").columns.values]).toEqual(["col_a", "col_b"]);
+ });
+
+ test("empty prefix leaves columns unchanged", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ expect([...addPrefixDataFrame(df, "").columns.values]).toEqual(["a", "b"]);
+ });
+
+ test("values preserved", () => {
+ const df = DataFrame.fromColumns({ x: [99] });
+ expect([...addPrefixDataFrame(df, "p_").col("p_x").values]).toEqual([99]);
+ });
+});
+
+describe("addSuffixDataFrame", () => {
+ test("adds suffix to all columns", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ expect([...addSuffixDataFrame(df, "_v1").columns.values]).toEqual(["a_v1", "b_v1"]);
+ });
+
+ test("empty suffix leaves columns unchanged", () => {
+ const df = DataFrame.fromColumns({ a: [1] });
+ expect([...addSuffixDataFrame(df, "").columns.values]).toEqual(["a"]);
+ });
+});
+
+// ─── addPrefixSeries / addSuffixSeries ────────────────────────────────────────
+
+describe("addPrefixSeries", () => {
+ test("adds prefix to all index labels", () => {
+ const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ expect([...addPrefixSeries(s, "x_").index.values]).toEqual(["x_a", "x_b"]);
+ });
+
+ test("values preserved", () => {
+ const s = new Series({ data: [10, 20], index: ["a", "b"] });
+ expect([...addPrefixSeries(s, "p_").values]).toEqual([10, 20]);
+ });
+});
+
+describe("addSuffixSeries", () => {
+ test("adds suffix to all index labels", () => {
+ const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ expect([...addSuffixSeries(s, "_end").index.values]).toEqual(["a_end", "b_end"]);
+ });
+});
+
+// ─── setAxisSeries ────────────────────────────────────────────────────────────
+
+describe("setAxisSeries", () => {
+ test("replaces index with new labels", () => {
+ const s = new Series({ data: [10, 20, 30] });
+ const r = setAxisSeries(s, ["x", "y", "z"]);
+ expect([...r.index.values]).toEqual(["x", "y", "z"]);
+ expect([...r.values]).toEqual([10, 20, 30]);
+ });
+
+ test("preserves name", () => {
+ const s = new Series({ data: [1], name: "col" });
+ expect(setAxisSeries(s, ["a"]).name).toBe("col");
+ });
+
+ test("throws on length mismatch", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(() => setAxisSeries(s, ["a", "b"])).toThrow(RangeError);
+ });
+
+ test("does not mutate original", () => {
+ const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ setAxisSeries(s, ["x", "y"]);
+ expect(s.index.at(0)).toBe("a");
+ });
+});
+
+// ─── setAxisDataFrame ─────────────────────────────────────────────────────────
+
+describe("setAxisDataFrame", () => {
+ test("axis=0 replaces row index", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const r = setAxisDataFrame(df, ["r0", "r1"], 0);
+ expect([...r.index.values]).toEqual(["r0", "r1"]);
+ expect([...r.col("a").values]).toEqual([1, 2]);
+ });
+
+ test("axis=1 replaces column labels", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const r = setAxisDataFrame(df, ["x", "y"], 1);
+ expect([...r.columns.values]).toEqual(["x", "y"]);
+ expect([...r.col("x").values]).toEqual([1, 2]);
+ });
+
+ test(`axis="index" same as axis=0`, () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const r = setAxisDataFrame(df, ["p", "q"], "index");
+ expect([...r.index.values]).toEqual(["p", "q"]);
+ });
+
+ test(`axis="columns" same as axis=1`, () => {
+ const df = DataFrame.fromColumns({ a: [1] });
+ const r = setAxisDataFrame(df, ["z"], "columns");
+ expect([...r.columns.values]).toEqual(["z"]);
+ });
+
+ test("throws when row label count mismatches", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ expect(() => setAxisDataFrame(df, ["only_one"], 0)).toThrow(RangeError);
+ });
+
+ test("throws when column label count mismatches", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ expect(() => setAxisDataFrame(df, ["only_one"], 1)).toThrow(RangeError);
+ });
+});
+
+// ─── seriesToFrame ────────────────────────────────────────────────────────────
+
+describe("seriesToFrame", () => {
+ test("uses Series name as column", () => {
+ const s = new Series({ data: [1, 2, 3], name: "score" });
+ const df = seriesToFrame(s);
+ expect([...df.columns.values]).toEqual(["score"]);
+ expect([...df.col("score").values]).toEqual([1, 2, 3]);
+ });
+
+ test("explicit name overrides Series name", () => {
+ const s = new Series({ data: [1, 2], name: "old" });
+ const df = seriesToFrame(s, "new");
+ expect([...df.columns.values]).toEqual(["new"]);
+ });
+
+ test("falls back to '0' when Series has no name", () => {
+ const s = new Series({ data: [5, 6] });
+ const df = seriesToFrame(s);
+ expect([...df.columns.values]).toEqual(["0"]);
+ expect([...df.col("0").values]).toEqual([5, 6]);
+ });
+
+ test("preserves index", () => {
+ const s = new Series({ data: [1, 2], index: ["a", "b"] });
+ const df = seriesToFrame(s);
+ expect([...df.index.values]).toEqual(["a", "b"]);
+ });
+
+ test("null name falls back to '0'", () => {
+ const s = new Series({ data: [1] });
+ const df = seriesToFrame(s, null);
+ expect([...df.columns.values]).toEqual(["0"]);
+ });
+});
+
+// ─── Property-based ───────────────────────────────────────────────────────────
+
+describe("rename_ops property tests", () => {
+ test("renameSeriesIndex preserves values and size", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 10 }),
+ (data) => {
+ const s = new Series({ data });
+ const r = renameSeriesIndex(s, (l) => `r_${String(l)}`);
+ expect(r.size).toBe(s.size);
+ expect([...r.values]).toEqual([...s.values]);
+ },
+ ),
+ );
+ });
+
+ test("setAxisSeries sets exactly the supplied labels", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 99 }), { minLength: 1, maxLength: 8 }),
+ (data) => {
+ const labels = data.map((_, i) => `lbl${i}`);
+ const s = new Series({ data });
+ const r = setAxisSeries(s, labels);
+ expect([...r.index.values]).toEqual(labels);
+ expect([...r.values]).toEqual([...s.values]);
+ },
+ ),
+ );
+ });
+
+ test("seriesToFrame single column contains original values", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.oneof(fc.integer(), fc.constant(null)), { minLength: 0, maxLength: 10 }),
+ (data) => {
+ const s = new Series({ data, name: "v" });
+ const df = seriesToFrame(s);
+ expect([...df.col("v").values]).toEqual([...s.values]);
+ },
+ ),
+ );
+ });
+});
From 61ed13e5cb093918fba30338e0f1bc09c3e90b8d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 22 Apr 2026 10:48:40 +0000
Subject: [PATCH 14/30] Iteration 244: +dot_matmul
(seriesDotSeries/DataFrame/dataFrameDotSeries/DataFrame) +transform_agg
(seriesTransform/dataFrameTransform)
Run: https://github.com/githubnext/tsessebe/actions/runs/24773518651
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/dot_matmul.html | 121 ++++++++
playground/transform_agg.html | 147 +++++++++
src/index.ts | 8 +
src/stats/dot_matmul.ts | 285 +++++++++++++++++
src/stats/index.ts | 8 +
src/stats/transform_agg.ts | 497 ++++++++++++++++++++++++++++++
tests/stats/dot_matmul.test.ts | 240 +++++++++++++++
tests/stats/transform_agg.test.ts | 404 ++++++++++++++++++++++++
8 files changed, 1710 insertions(+)
create mode 100644 playground/dot_matmul.html
create mode 100644 playground/transform_agg.html
create mode 100644 src/stats/dot_matmul.ts
create mode 100644 src/stats/transform_agg.ts
create mode 100644 tests/stats/dot_matmul.test.ts
create mode 100644 tests/stats/transform_agg.test.ts
diff --git a/playground/dot_matmul.html b/playground/dot_matmul.html
new file mode 100644
index 00000000..5054f8bb
--- /dev/null
+++ b/playground/dot_matmul.html
@@ -0,0 +1,121 @@
+
+
+
+
+
+ dot_matmul — dot product & matrix multiply — tsb playground
+
+
+
+
dot_matmul — dot product & matrix multiplication
+
+ Dot product and matrix multiplication for Series and DataFrame.
+ Mirrors pandas.Series.dot() and pandas.DataFrame.dot().
+ Index alignment is performed automatically (inner join on shared labels).
+
+
+
API
+
+import { seriesDotSeries, seriesDotDataFrame, dataFrameDotSeries, dataFrameDotDataFrame } from "tsb";
+
+// Series · Series → scalar
+seriesDotSeries(a, b);
+
+// Series · DataFrame → Series
+seriesDotDataFrame(s, df);
+
+// DataFrame · Series → Series
+dataFrameDotSeries(df, s);
+
+// DataFrame · DataFrame → DataFrame
+dataFrameDotDataFrame(A, B);
+
+
+
Interactive Demo
+
+
+
+
+
Click a button above to run an example.
+
+
Examples
+
+// Series dot product
+const a = new Series({ data: [1, 2, 3], index: ["x","y","z"] });
+const b = new Series({ data: [4, 5, 6], index: ["x","y","z"] });
+seriesDotSeries(a, b); // 1*4 + 2*5 + 3*6 = 32
+
+// DataFrame · vector
+const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+const v = new Series({ data: [1, 1], index: ["a", "b"] });
+dataFrameDotSeries(df, v).values; // [4, 6] (row sums)
+
+// Matrix multiply
+const A = DataFrame.fromColumns({ k: [1, 2] }); // 2×1
+// ... B with row index ["k"] ...
+dataFrameDotDataFrame(A, B).col("r").values; // [3, 6]
+
+ Apply one or more functions to a Series or DataFrame and return a result with the
+ same index (broadcast scalars to full length).
+ Mirrors pandas.Series.transform() and pandas.DataFrame.transform().
+
+
+
API
+
+import { seriesTransform, dataFrameTransform } from "tsb";
+
+// single function or built-in name → Series
+seriesTransform(s, "cumsum");
+seriesTransform(s, (x) => x);
+
+// array → DataFrame (one column per function)
+seriesTransform(s, ["sum", "cumsum", "mean"]);
+
+// Record → DataFrame with named columns
+seriesTransform(s, { total: "sum", running: "cumsum" });
+
+// DataFrame transform (column-wise by default)
+dataFrameTransform(df, "cumsum");
+dataFrameTransform(df, { a: "sum", b: "cummin" }); // per-column
+dataFrameTransform(df, "cumsum", { axis: 1 }); // row-wise
+