From b2e093364eea2a1d33e012a3058b5eb3d15411b0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:04:27 +0000 Subject: [PATCH 1/8] Initial plan From 74414650bd931081b9896b490bec62f2fc462e18 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:32:47 +0000 Subject: [PATCH 2/8] Merge unique features from PR #120 and PR #81 (iter136) - Added 13 unique feature modules from PR #120: astype, read_excel, clip_advanced, idxmin_idxmax, mode, nancumops, nunique, pct_change, quantile, replace, sem_var, skew_kurt, to_datetime - Added 11 unique feature modules from PR #81 (iter136): api_types, attrs, insert_pop, to_from_dict, categorical_ops, format_ops, numeric_extended, string_ops, string_ops_extended, window_extended, rolling_apply - Fixed TypeScript errors in new modules (mode, pct_change, to_datetime, to_from_dict, string_ops_extended, window_extended tests) - Fixed test expectations in rolling_apply (min window, count function) - Updated barrel exports in core/index.ts, stats/index.ts, io/index.ts, window/index.ts, and index.ts - All 24 new feature modules pass their tests (598 + 399 = 997 new tests) Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- playground/api_types.html | 222 ++++++++ playground/astype.html | 438 ++++++++++++++++ playground/attrs.html | 183 +++++++ playground/categorical_ops.html | 338 +++++++++++++ playground/clip_advanced.html | 163 ++++++ playground/excel.html | 561 +++++++++++++++++++++ playground/format_ops.html | 262 ++++++++++ playground/idxmin_idxmax.html | 439 ++++++++++++++++ playground/index.html | 157 +++++- playground/insert_pop.html | 172 +++++++ playground/mode.html | 125 +++++ playground/nancumops.html | 295 +++++++++++ playground/numeric_extended.html | 353 +++++++++++++ playground/nunique.html | 112 ++++ playground/pct_change.html | 448 ++++++++++++++++ playground/quantile.html | 182 +++++++ playground/replace.html | 408 +++++++++++++++ playground/rolling_apply.html | 225 +++++++++ playground/sem_var.html | 90 ++++ playground/skew_kurt.html | 137 +++++ playground/string_ops.html | 282 +++++++++++ playground/string_ops_extended.html | 413 +++++++++++++++ playground/to_datetime.html | 118 +++++ playground/to_from_dict.html | 122 +++++ playground/window_extended.html | 304 +++++++++++ src/core/api_types.ts | 629 +++++++++++++++++++++++ src/core/astype.ts | 245 +++++++++ src/core/attrs.ts | 291 +++++++++++ src/core/index.ts | 9 + src/core/insert_pop.ts | 214 ++++++++ src/core/to_from_dict.ts | 284 +++++++++++ src/index.ts | 172 ++++++- src/io/index.ts | 2 + src/io/read_excel.ts | 645 ++++++++++++++++++++++++ src/stats/categorical_ops.ts | 483 ++++++++++++++++++ src/stats/clip_advanced.ts | 290 +++++++++++ src/stats/format_ops.ts | 442 ++++++++++++++++ src/stats/idxmin_idxmax.ts | 234 +++++++++ src/stats/index.ts | 167 ++++++ src/stats/mode.ts | 305 +++++++++++ src/stats/nancumops.ts | 272 ++++++++++ src/stats/numeric_extended.ts | 586 +++++++++++++++++++++ src/stats/nunique.ts | 291 +++++++++++ src/stats/pct_change.ts | 231 +++++++++ src/stats/quantile.ts | 361 +++++++++++++ src/stats/replace.ts | 237 +++++++++ src/stats/sem_var.ts | 283 +++++++++++ src/stats/skew_kurt.ts | 324 ++++++++++++ src/stats/string_ops.ts | 468 +++++++++++++++++ src/stats/string_ops_extended.ts | 429 ++++++++++++++++ src/stats/to_datetime.ts | 312 ++++++++++++ src/stats/window_extended.ts | 321 ++++++++++++ src/window/index.ts | 2 + src/window/rolling_apply.ts | 323 ++++++++++++ tests/core/api_types.test.ts | 621 +++++++++++++++++++++++ tests/core/astype.test.ts | 292 +++++++++++ tests/core/attrs.test.ts | 542 ++++++++++++++++++++ tests/core/insert_pop.test.ts | 286 +++++++++++ tests/core/to_from_dict.test.ts | 278 ++++++++++ tests/io/read_excel.test.ts | 494 ++++++++++++++++++ tests/stats/categorical_ops.test.ts | 476 +++++++++++++++++ tests/stats/clip_advanced.test.ts | 215 ++++++++ tests/stats/format_ops.test.ts | 568 +++++++++++++++++++++ tests/stats/idxmin_idxmax.test.ts | 270 ++++++++++ tests/stats/mode.test.ts | 199 ++++++++ tests/stats/nancumops.test.ts | 264 ++++++++++ tests/stats/numeric_extended.test.ts | 509 +++++++++++++++++++ tests/stats/nunique.test.ts | 238 +++++++++ tests/stats/pct_change.test.ts | 246 +++++++++ tests/stats/quantile.test.ts | 364 +++++++++++++ tests/stats/replace.test.ts | 246 +++++++++ tests/stats/sem_var.test.ts | 212 ++++++++ tests/stats/skew_kurt.test.ts | 232 +++++++++ tests/stats/string_ops.test.ts | 459 +++++++++++++++++ tests/stats/string_ops_extended.test.ts | 437 ++++++++++++++++ tests/stats/to_datetime.test.ts | 304 +++++++++++ tests/stats/window_extended.test.ts | 365 ++++++++++++++ tests/window/rolling_apply.test.ts | 354 +++++++++++++ 78 files changed, 23862 insertions(+), 10 deletions(-) create mode 100644 playground/api_types.html create mode 100644 playground/astype.html create mode 100644 playground/attrs.html create mode 100644 playground/categorical_ops.html create mode 100644 playground/clip_advanced.html create mode 100644 playground/excel.html create mode 100644 playground/format_ops.html create mode 100644 playground/idxmin_idxmax.html create mode 100644 playground/insert_pop.html create mode 100644 playground/mode.html create mode 100644 playground/nancumops.html create mode 100644 playground/numeric_extended.html create mode 100644 playground/nunique.html create mode 100644 playground/pct_change.html create mode 100644 playground/quantile.html create mode 100644 playground/replace.html create mode 100644 playground/rolling_apply.html create mode 100644 playground/sem_var.html create mode 100644 playground/skew_kurt.html create mode 100644 playground/string_ops.html create mode 100644 playground/string_ops_extended.html create mode 100644 playground/to_datetime.html create mode 100644 playground/to_from_dict.html create mode 100644 playground/window_extended.html create mode 100644 src/core/api_types.ts create mode 100644 src/core/astype.ts create mode 100644 src/core/attrs.ts create mode 100644 src/core/insert_pop.ts create mode 100644 src/core/to_from_dict.ts create mode 100644 src/io/read_excel.ts create mode 100644 src/stats/categorical_ops.ts create mode 100644 src/stats/clip_advanced.ts create mode 100644 src/stats/format_ops.ts create mode 100644 src/stats/idxmin_idxmax.ts create mode 100644 src/stats/mode.ts create mode 100644 src/stats/nancumops.ts create mode 100644 src/stats/numeric_extended.ts create mode 100644 src/stats/nunique.ts create mode 100644 src/stats/pct_change.ts create mode 100644 src/stats/quantile.ts create mode 100644 src/stats/replace.ts create mode 100644 src/stats/sem_var.ts create mode 100644 src/stats/skew_kurt.ts create mode 100644 src/stats/string_ops.ts create mode 100644 src/stats/string_ops_extended.ts create mode 100644 src/stats/to_datetime.ts create mode 100644 src/stats/window_extended.ts create mode 100644 src/window/rolling_apply.ts create mode 100644 tests/core/api_types.test.ts create mode 100644 tests/core/astype.test.ts create mode 100644 tests/core/attrs.test.ts create mode 100644 tests/core/insert_pop.test.ts create mode 100644 tests/core/to_from_dict.test.ts create mode 100644 tests/io/read_excel.test.ts create mode 100644 tests/stats/categorical_ops.test.ts create mode 100644 tests/stats/clip_advanced.test.ts create mode 100644 tests/stats/format_ops.test.ts create mode 100644 tests/stats/idxmin_idxmax.test.ts create mode 100644 tests/stats/mode.test.ts create mode 100644 tests/stats/nancumops.test.ts create mode 100644 tests/stats/numeric_extended.test.ts create mode 100644 tests/stats/nunique.test.ts create mode 100644 tests/stats/pct_change.test.ts create mode 100644 tests/stats/quantile.test.ts create mode 100644 tests/stats/replace.test.ts create mode 100644 tests/stats/sem_var.test.ts create mode 100644 tests/stats/skew_kurt.test.ts create mode 100644 tests/stats/string_ops.test.ts create mode 100644 tests/stats/string_ops_extended.test.ts create mode 100644 tests/stats/to_datetime.test.ts create mode 100644 tests/stats/window_extended.test.ts create mode 100644 tests/window/rolling_apply.test.ts diff --git a/playground/api_types.html b/playground/api_types.html new file mode 100644 index 00000000..05b8347c --- /dev/null +++ b/playground/api_types.html @@ -0,0 +1,222 @@ + + + + + + tsb — api_types: Runtime type-checking predicates + + + +

📦 api_types — Runtime type-checking predicates

+

+ Port of pandas.api.types. + Two groups of predicates: + value-level (work on arbitrary JS values) and + dtype-level (work on Dtype instances or dtype name strings). +

+ +

Value-Level Predicates

+ +

isScalar(val)

+

Returns true for primitives and Date. Mirrors pd.api.types.is_scalar.

+
import { isScalar } from "tsb";
+
+isScalar(42);            // true
+isScalar("hello");       // true
+isScalar(null);          // true
+isScalar(new Date());    // true
+isScalar([1, 2]);        // false
+isScalar({ a: 1 });      // false
+
+ +

isListLike(val)

+

Returns true for iterables (excluding strings) and objects with a numeric length.

+
isListLike([1, 2, 3]);      // true
+isListLike(new Set([1]));   // true
+isListLike("abc");          // false
+isListLike(42);             // false
+
+ +

isArrayLike(val)

+

Returns true for values with a non-negative integer length (including strings).

+
isArrayLike([1, 2]);      // true
+isArrayLike("hello");     // true
+isArrayLike(42);          // false
+ +

isDictLike(val)

+

Returns true for plain objects and Map.

+
isDictLike({ a: 1 });   // true
+isDictLike(new Map());  // true
+isDictLike([]);         // false
+ +

isNumber / isBool / isStringValue / isFloat / isInteger

+
isNumber(3.14);      // true
+isNumber(NaN);       // true  (typeof NaN === "number")
+isBool(true);        // true
+isStringValue("hi"); // true
+isFloat(3.14);       // true
+isFloat(3.0);        // false  (integer value)
+isInteger(42);       // true
+isInteger(3.14);     // false
+
+ +

isMissing(val)

+

Returns true for null, undefined, or NaN.

+
isMissing(null);       // true
+isMissing(undefined);  // true
+isMissing(NaN);        // true
+isMissing(0);          // false
+ +

isHashable(val)

+

Returns true for values safe to use as object keys (primitives).

+
isHashable("key");  // true
+isHashable(42);     // true
+isHashable({});     // false
+ +

Dtype-Level Predicates

+

All accept a Dtype instance or a dtype name string.

+ +
import { Dtype, isNumericDtype, isFloatDtype, isIntegerDtype,
+         isStringDtype, isDatetimeDtype, isCategoricalDtype } from "tsb";
+
+isNumericDtype(Dtype.float64);    // true
+isNumericDtype("int32");          // true
+isNumericDtype("string");         // false
+
+isFloatDtype("float32");          // true
+isIntegerDtype("int64");          // true
+isUnsignedIntegerDtype("uint8");  // true
+isSignedIntegerDtype("int8");     // true
+isStringDtype("string");          // true
+isDatetimeDtype("datetime");      // true
+isCategoricalDtype("category");   // true
+isObjectDtype("object");          // true
+isExtensionArrayDtype("category"); // true
+isExtensionArrayDtype("int32");    // false
+
+ +

Complete Predicate Reference

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionPandas equivalentDescription
isScalar(val)is_scalarPrimitive or Date
isListLike(val)is_list_likeIterable (not string) or has length
isArrayLike(val)is_array_likeHas non-negative integer length
isDictLike(val)is_dict_likePlain object or Map
isIterator(val)is_iteratorHas callable next method
isNumber(val)is_numbertypeof === "number"
isBool(val)is_booltypeof === "boolean"
isStringValue(val)is_stringtypeof === "string"
isFloat(val)is_floatFinite number with fractional part
isInteger(val)is_integerInteger-valued number
isBigInt(val)typeof === "bigint"
isRegExp(val)is_reRegExp instance
isReCompilable(val)is_re_compilableString or RegExp
isMissing(val)isnanull / undefined / NaN
isHashable(val)is_hashableSafe as object key (primitive)
isDate(val)Date instance
isNumericDtype(d)is_numeric_dtypeInt, uint, or float
isIntegerDtype(d)is_integer_dtypeAny integer (signed or unsigned)
isSignedIntegerDtype(d)is_signed_integer_dtypeint8–int64
isUnsignedIntegerDtype(d)is_unsigned_integer_dtypeuint8–uint64
isFloatDtype(d)is_float_dtypefloat32 or float64
isBoolDtype(d)is_bool_dtypebool
isStringDtype(d)is_string_dtypestring dtype
isDatetimeDtype(d)is_datetime64_dtypedatetime
isTimedeltaDtype(d)is_timedelta64_dtypetimedelta
isCategoricalDtype(d)is_categorical_dtypecategory
isObjectDtype(d)is_object_dtypeobject
isComplexDtype(d)is_complex_dtypeAlways false (no complex in tsb)
isExtensionArrayDtype(d)is_extension_array_dtypestring/object/datetime/timedelta/category
isPeriodDtype(d)is_period_dtypeMaps to datetime
isIntervalDtype(d)is_interval_dtypeNumeric dtypes
+ + + + diff --git a/playground/astype.html b/playground/astype.html new file mode 100644 index 00000000..efd9e5ed --- /dev/null +++ b/playground/astype.html @@ -0,0 +1,438 @@ + + + + + + tsb — astype + + + +
+
+

Loading tsb runtime…

+
+ + ← tsb playground +

astype — dtype coercion

+

+ Cast Series and DataFrame values to a different dtype. + Mirrors pandas.Series.astype and pandas.DataFrame.astype. +

+ + +
+

1 · Series — float to int64

+

+ Cast floating-point values to integers via truncation (same as + pandas.Series.astype("int64")). +

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Series — numbers to string

+

Convert every value to its string representation. Null/undefined values + become null (not the string "null").

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Overflow clamping for bounded integer dtypes

+

+ Values that overflow the target integer dtype's range are clamped to + [min, max] — e.g. uint8 is clamped to + [0, 255]. +

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · DataFrame — cast all columns

+

Pass a single dtype name to cast every column to the same type.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame — per-column dtype mapping

+

Pass a Record<string, DtypeName> to cast individual + columns. Columns not listed are carried over unchanged.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Casting to bool

+

Zero, empty string, and NaN become false; + everything else (including non-zero numbers and non-empty strings) + becomes true.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Series cast
+astypeSeries(
+  series: Series,
+  dtype: DtypeName | Dtype,
+  options?: AstypeOptions,
+): Series
+
+// DataFrame cast (all columns or per-column mapping)
+astype(
+  df: DataFrame,
+  dtype: DtypeName | Dtype | Record<string, DtypeName | Dtype>,
+  options?: DataFrameAstypeOptions,
+): DataFrame
+
+// Low-level scalar cast
+castScalar(value: Scalar, dtype: Dtype): Scalar
+
+// Options
+interface AstypeOptions {
+  errors?: "raise" | "ignore";  // default "raise"
+}
+
+// Supported dtype names
+type DtypeName =
+  | "int8" | "int16" | "int32" | "int64"
+  | "uint8" | "uint16" | "uint32" | "uint64"
+  | "float32" | "float64"
+  | "bool" | "string" | "object"
+  | "datetime" | "timedelta" | "category"
+
+ + + + + diff --git a/playground/attrs.html b/playground/attrs.html new file mode 100644 index 00000000..ae25d5f6 --- /dev/null +++ b/playground/attrs.html @@ -0,0 +1,183 @@ + + + + + + tsb — attrs: user-defined metadata + + + +

← tsb playground

+ +

attrs — User-Defined Metadata

+

+ Attach arbitrary key→value metadata to any Series or DataFrame + — mirrors + + pandas.DataFrame.attrs and + + pandas.Series.attrs. +

+ +
+ Design note: Because tsb objects are immutable (their data, index, + and dtype are frozen), attrs are stored in a WeakMap registry rather than as + instance properties. This means attrs are attached & detached without touching the object + itself, and garbage-collected automatically when the object is collected. +
+ +

Basic usage

+ +
import {
+  getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs,
+  clearAttrs, hasAttrs, getAttr, setAttr, deleteAttr,
+  attrsCount, attrsKeys, mergeAttrs,
+} from "tsb";
+import { DataFrame, Series } from "tsb";
+
+// ─── annotate a DataFrame ─────────────────────────────────────────────────
+const df = DataFrame.fromColumns({
+  temperature: [22.1, 23.5, 21.8],
+  humidity:    [55, 60, 58],
+});
+
+setAttrs(df, {
+  source: "weather_station_42",
+  unit:   "Celsius",
+  notes: "Morning readings",
+});
+
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", notes: "Morning readings" }
+
+getAttr(df, "unit");     // → "Celsius"
+getAttr(df, "missing");  // → undefined
+attrsCount(df);          // → 3
+attrsKeys(df);           // → ["source", "unit", "notes"]
+hasAttrs(df);            // → true
+
+ +

Merging and updating

+ +
// updateAttrs merges new keys, preserves existing
+updateAttrs(df, { version: 2, notes: "Updated notes" });
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", notes: "Updated notes", version: 2 }
+
+// setAttr / deleteAttr for single keys
+setAttr(df, "sensor_id", "WS-042");
+deleteAttr(df, "notes");
+getAttrs(df);
+// → { source: "weather_station_42", unit: "Celsius", version: 2, sensor_id: "WS-042" }
+
+ +

Propagating metadata to derived objects

+ +
// copyAttrs: copy all attrs from one object to another
+const s = new Series({ data: [22.1, 23.5, 21.8], name: "temperature" });
+setAttrs(s, { unit: "Celsius", source: "sensor_A" });
+
+const derived = new Series({ data: [71.8, 74.3, 71.2], name: "fahrenheit" });
+copyAttrs(s, derived);
+getAttrs(derived);
+// → { unit: "Celsius", source: "sensor_A" }
+
+// Then update the copy
+setAttr(derived, "unit", "Fahrenheit");
+getAttrs(derived);  // → { unit: "Fahrenheit", source: "sensor_A" }
+getAttrs(s);        // → { unit: "Celsius", source: "sensor_A" }  ← unchanged
+
+ +

Fluent helper — withAttrs

+ +
// withAttrs sets attrs and returns the same object reference
+// Handy for inline annotation
+const annotated = withAttrs(
+  DataFrame.fromColumns({ x: [1, 2, 3] }),
+  { source: "lab_experiment", date: "2026-04-09" },
+);
+
+annotated === annotated;  // true — same reference, not a copy
+getAttrs(annotated);
+// → { source: "lab_experiment", date: "2026-04-09" }
+
+ +

Merging from multiple sources

+ +
// mergeAttrs: combine attrs from multiple objects into a target
+const s1 = new Series({ data: [1, 2, 3], name: "a" });
+const s2 = new Series({ data: [4, 5, 6], name: "b" });
+setAttrs(s1, { source: "sensor_A", unit: "kg" });
+setAttrs(s2, { source: "sensor_B", scale: 2.5 });
+
+const combined = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+mergeAttrs([s1, s2], combined);
+// Later sources win on conflicts: source="sensor_B"
+getAttrs(combined);
+// → { source: "sensor_B", unit: "kg", scale: 2.5 }
+
+ +

Clearing metadata

+ +
setAttrs(df, { x: 1, y: 2 });
+hasAttrs(df);   // → true
+attrsCount(df); // → 2
+
+clearAttrs(df);
+hasAttrs(df);   // → false
+getAttrs(df);   // → {}
+
+ +

API reference

+ + + + + + + + + + + + + + + + + + + + +
FunctionDescription
getAttrs(obj)Return a shallow copy of all stored attrs (empty {} if none)
setAttrs(obj, attrs)Overwrite attrs completely with the given record
updateAttrs(obj, updates)Merge updates into existing attrs (existing keys preserved)
withAttrs(obj, attrs)Fluent: set attrs and return the same object
copyAttrs(source, target)Copy all attrs from source to target
mergeAttrs(sources[], target)Merge attrs from multiple sources; later sources win
clearAttrs(obj)Remove all attrs from obj
hasAttrs(obj)Return true if any attrs are set
getAttr(obj, key)Get a single attr value (undefined if missing)
setAttr(obj, key, value)Set a single attr, preserving other keys
deleteAttr(obj, key)Delete a single attr key
attrsCount(obj)Number of stored attr keys
attrsKeys(obj)Array of stored attr key names
+ +

Comparison with pandas

+ + + + + + + + + + + + + + +
pandastsb
df.attrsgetAttrs(df)
df.attrs = {"k": "v"}setAttrs(df, { k: "v" })
df.attrs["k"] = "v"setAttr(df, "k", "v")
df.attrs["k"]getAttr(df, "k")
del df.attrs["k"]deleteAttr(df, "k")
df.attrs.update(d)updateAttrs(df, d)
df.attrs.clear()clearAttrs(df)
+ + diff --git a/playground/categorical_ops.html b/playground/categorical_ops.html new file mode 100644 index 00000000..c2d794ca --- /dev/null +++ b/playground/categorical_ops.html @@ -0,0 +1,338 @@ + + + + + + tsb — Categorical Ops + + + +
+
+ Loading tsb runtime… +
+ +← back to index +

🏷️ Categorical Ops

+

+ Standalone categorical utility functions that complement the Series.cat accessor. + Mirrors pd.Categorical.from_codes, set operations on categories, frequency helpers, + and cross-tabulation. +

+ + +
+

catFromCodes(codes, categories, opts?)

+

+ Construct a categorical Series from integer codes (0-based) and a categories array. + Code -1 maps to null (missing). Mirrors + pd.Categorical.from_codes. +

+ + +
+
+ + +
+

Category set operations

+

+ catUnionCategories, catIntersectCategories, + catDiffCategories, and catEqualCategories let you + combine or compare the category sets of two Series. +

+ + +
+
+ + +
+

catSortByFreq(series, opts?)

+

+ Reorder categories by their frequency in the data (most frequent first by default). + Mirrors s.cat.reorder_categories(s.value_counts().index). +

+ + +
+
+ + +
+

catToOrdinal(series, order)

+

+ Create an ordered categorical from a Series using order to define both the + category set and their rank. Values not in order become null. +

+ + +
+
+ + +
+

catFreqTable(series)

+

+ Return a plain Record<string, number> of counts per category. + Zero-frequency categories are included. +

+ + +
+
+ + +
+

catCrossTab(a, b, opts?)

+

+ Cross-tabulation of two categorical Series. Rows = a's categories, + columns = b's categories, cells = co-occurrence counts. + Supports margins and normalization. +

+ + +
+
+ + +
+

catRecode(series, mapping)

+

+ Rename categories via an object map or a transform function. Unmapped categories + are left unchanged. +

+ + +
+
+ + + + + diff --git a/playground/clip_advanced.html b/playground/clip_advanced.html new file mode 100644 index 00000000..eb200294 --- /dev/null +++ b/playground/clip_advanced.html @@ -0,0 +1,163 @@ + + + + + + tsb — clip_advanced (per-element clipping) + + + +

tsb — clip_advanced (per-element clipping)

+

+ Clip Series and DataFrame values to per-element bounds. + Unlike the simple scalar clip, clipAdvancedSeries and + clipAdvancedDataFrame support array, Series, and DataFrame bounds — + enabling per-position or element-wise bound specification. +

+ +

Core concept

+
// Scalar bounds (like pandas s.clip(lower=0, upper=5))
+clipAdvancedSeries(s, { lower: 0, upper: 5 })
+
+// Per-element array bounds
+clipAdvancedSeries(s, { lower: [1, 2, 3], upper: [4, 5, 6] })
+
+// Series bounds (positional alignment)
+clipAdvancedSeries(s, { lower: loSeries, upper: hiSeries })
+
+// DataFrame element-wise bounds
+clipAdvancedDataFrame(df, { lower: loDf, upper: hiDf })
+
+// Series broadcast on DataFrame (axis=0: one bound per column; axis=1: one per row)
+clipAdvancedDataFrame(df, { lower: loSeries, axis: 1 })
+ +
+ pandas equivalent:
+ s.clip(lower=lo_array, upper=hi_array)
+ df.clip(lower=lo_df, upper=hi_df) +
+ + +

Demo 1 — clipAdvancedSeries with scalar bounds

+
+
Code
+
const s = new Series({ data: [-3, 1, 5, 10] });
+clipAdvancedSeries(s, { lower: 0, upper: 6 }).values;
+// → [0, 1, 5, 6]
+ + +
+ + +

Demo 2 — clipAdvancedSeries with per-element array bounds

+
+
Code
+
const s = new Series({ data: [-1, 0, 5, 12] });
+const lo = [2, -1, 4, 10];
+const hi = [5,  3, 8, 11];
+clipAdvancedSeries(s, { lower: lo, upper: hi }).values;
+// → [2, 0, 5, 11]
+ + +
+ + +

Demo 3 — clipAdvancedSeries with Series bounds

+
+
Code
+
const s = new Series({ data: [0, 5, 10, 15] });
+const loBound = new Series({ data: [1, 3, 8, 12] });
+const hiBound = new Series({ data: [2, 7, 9, 20] });
+clipAdvancedSeries(s, { lower: loBound, upper: hiBound }).values;
+// → [1, 5, 9, 15]
+ + +
+ + +

Demo 4 — clipAdvancedDataFrame with DataFrame bounds

+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const lo = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+const hi = DataFrame.fromColumns({ a: [3, 7, 8], b: [5, 9, 12] });
+const result = clipAdvancedDataFrame(df, { lower: lo, upper: hi });
+result.col("a").values; // → [2, 5, 8]
+result.col("b").values; // → [2, 6, 10]
+ + +
+ + +

Demo 5 — clipAdvancedDataFrame with Series broadcast (axis=1)

+
+
Code
+
// axis=1: one lower bound per row
+const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const loPerRow = new Series({ data: [0, 4, 10] });
+const result = clipAdvancedDataFrame(df, { lower: loPerRow, axis: 1 });
+result.col("a").values; // → [1, 5, 10]
+result.col("b").values; // → [2, 6, 10]
+ + +
+ + + + diff --git a/playground/excel.html b/playground/excel.html new file mode 100644 index 00000000..563736cf --- /dev/null +++ b/playground/excel.html @@ -0,0 +1,561 @@ + + + + + + tsb — readExcel playground + + + + +

📊 readExcel — XLSX file reading

+

+ tsb can read Excel XLSX files natively — no dependencies. The + readExcel() function accepts a Uint8Array or + ArrayBuffer and returns a DataFrame. +

+ +
+ Python equivalent: + pd.read_excel("data.xlsx") +
+ +

Basic usage

+
import { readExcel, xlsxSheetNames } from "tsb";
+
+// Read first sheet (default)
+const df = readExcel(buffer);
+console.log(df.shape);         // [rows, cols]
+console.log(df.columns.toArray()); // column names
+
+// List all sheet names
+const sheets = xlsxSheetNames(buffer);
+// → ["Sheet1", "Summary", "Data"]
+
+// Read a specific sheet by name
+const df2 = readExcel(buffer, { sheetName: "Summary" });
+
+// Read a specific sheet by index
+const df3 = readExcel(buffer, { sheetName: 1 });
+
+ +

Options

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
OptionTypeDefaultDescription
sheetNamestring | number0Sheet to read (name or 0-based index)
headernumber | null0Row index of the header, or null for no header
indexColstring | number | nullnullColumn to use as the row index
skipRowsnumber0Data rows to skip after the header
nrowsnumberunlimitedMaximum number of data rows to read
naValuesstring[][]Additional strings to treat as NA
+ +

Interactive demo

+

Upload an .xlsx file to inspect it, or use the demo data below.

+ +
+ +

or

+ +
+ +
+ +   + +   + +   + +

+ +
+ +
Upload a file or click "Load demo data" to start.
+ +

Advanced example

+
// Use a named column as the row index
+const df = readExcel(buffer, { indexCol: "ID" });
+
+// Skip 2 rows and read at most 100 rows
+const df2 = readExcel(buffer, { skipRows: 2, nrows: 100 });
+
+// Treat custom strings as missing
+const df3 = readExcel(buffer, { naValues: ["N/A", "MISSING", "-"] });
+
+// DataFrame operations work immediately
+df.describe();
+df.col("revenue").sum();
+df.groupby("region").mean();
+
+ +

Python equivalent

+
# pandas
+import pandas as pd
+
+df = pd.read_excel("data.xlsx", sheet_name=0)
+df = pd.read_excel("data.xlsx", sheet_name="Summary")
+df = pd.read_excel("data.xlsx", header=None)
+df = pd.read_excel("data.xlsx", index_col="ID")
+df = pd.read_excel("data.xlsx", skiprows=2, nrows=100)
+
+ + + + diff --git a/playground/format_ops.html b/playground/format_ops.html new file mode 100644 index 00000000..d72fd1ec --- /dev/null +++ b/playground/format_ops.html @@ -0,0 +1,262 @@ + + + + + + tsb — format_ops: Number Formatting + + + +

🔢 format_ops — Number Formatting

+

+ tsb provides a suite of number-formatting helpers that mirror pandas' + style.format() and Series.map() patterns. + Every function is zero-dependency and fully typed. +

+

← Back to index

+ +

Scalar formatters

+ + + + + + + + + + + + +
FunctionExample inputExample outputNotes
formatFloat(n, d)3.14159, 2"3.14"Fixed decimal places
formatPercent(n, d)0.1234, 1"12.3%"Multiplies by 100
formatScientific(n, d)12345.678, 3"1.235e+4"Exponential notation
formatEngineering(n, d)12345.678, 3"12.346e+3"Exponent multiple of 3
formatThousands(n, d, sep)1234567.89, 2"1,234,567.89"Thousands separator
formatCurrency(n, sym, d)1234.5, "$""$1,234.50"Currency prefix + thousands
formatCompact(n, d)1_234_567, 2"1.23M"K / M / B / T suffixes
+ +

Interactive demo — scalar formatting

+
+ + + + +
+
+ +

Formatter factories

+
import {
+  makeFloatFormatter,
+  makePercentFormatter,
+  makeCurrencyFormatter,
+} from "tsb";
+
+const fmtFloat   = makeFloatFormatter(3);      // (v) => formatFloat(v, 3)
+const fmtPct     = makePercentFormatter(1);     // (v) => formatPercent(v, 1)
+const fmtDollar  = makeCurrencyFormatter("$");  // (v) => formatCurrency(v, "$", 2)
+
+fmtFloat(3.14159);   // "3.142"
+fmtPct(0.0825);      // "8.3%"
+fmtDollar(9999.99);  // "$9,999.99"
+
+ +

Apply to a Series

+
import { Series, applySeriesFormatter, makePercentFormatter } from "tsb";
+
+const returns = new Series({ data: [0.05, -0.02, 0.134, 0.007], name: "returns" });
+
+const formatted = applySeriesFormatter(returns, makePercentFormatter(1));
+// Series<string> ["5.0%", "-2.0%", "13.4%", "0.7%"]
+
+ +

Apply to a DataFrame

+
import { DataFrame, applyDataFrameFormatter, makeCurrencyFormatter, makePercentFormatter } from "tsb";
+
+const df = DataFrame.fromColumns({
+  price:   [1_299.99, 899.50, 45.00],
+  change:  [0.025, -0.031, 0.102],
+  volume:  [15_000, 8_200, 230_000],
+});
+
+const formatted = applyDataFrameFormatter(df, {
+  price:  makeCurrencyFormatter("$", 2),
+  change: makePercentFormatter(2),
+});
+
+// formatted = {
+//   price:  ["$1,299.99", "$899.50", "$45.00"],
+//   change: ["2.50%", "-3.10%", "10.20%"],
+//   volume: ["15000", "8200", "230000"],      // no formatter → String(v)
+// }
+
+ +

Interactive demo — DataFrame formatting

+
+ +
+
+ +

String rendering

+
import { Series, DataFrame, seriesToString, dataFrameToString, makeFloatFormatter } from "tsb";
+
+const s = new Series({ data: [1.2, 3.4, 5.6], name: "value" });
+console.log(seriesToString(s, { formatter: makeFloatFormatter(1) }));
+// 0    1.2
+// 1    3.4
+// 2    5.6
+// Name: value, dtype: float64
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] });
+console.log(dataFrameToString(df));
+//    a    b
+// 0  1  4.0
+// 1  2  5.0
+// 2  3  6.0
+
+ +

Interactive demo — seriesToString / dataFrameToString

+
+ +
+
+ + + + diff --git a/playground/idxmin_idxmax.html b/playground/idxmin_idxmax.html new file mode 100644 index 00000000..b771dd36 --- /dev/null +++ b/playground/idxmin_idxmax.html @@ -0,0 +1,439 @@ + + + + + + tsb — idxmin / idxmax + + + +
+
+
Loading TypeScript compiler…
+
+ + ← tsb playground +

idxmin / idxmax

+

+ Return the index label of the minimum or maximum value in a + Series or each column of a DataFrame. + Mirrors pandas.Series.idxmin(), idxmax(), + pandas.DataFrame.idxmin(), and DataFrame.idxmax(). +

+ + +
+

1 · Series.idxmin — label of the minimum value

+

Returns the index label at the position of the minimum value. + NaN / null values are skipped by default.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Series.idxmax — label of the maximum value

+

Returns the index label at the position of the maximum value.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · NaN handling — skipna option

+

By default NaN / null values are skipped. Set skipna: false + to propagate NaN (returns null if any value is NaN).

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · DataFrame.idxmin — row label of column minima

+

Returns a Series indexed by column names. Each value is the row label + where that column achieves its minimum.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame.idxmax — row label of column maxima

+

Returns a Series indexed by column names, where each entry is the row + label of that column's maximum value.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Edge cases — empty, all-NaN, all-equal

+

Behavior for empty series, series where every value is NaN, and series + where all values are equal.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Series
+idxminSeries(series, { skipna?: boolean }): Label   // default skipna=true
+idxmaxSeries(series, { skipna?: boolean }): Label
+
+// DataFrame (axis=0 — min/max per column)
+idxminDataFrame(df, { skipna?: boolean }): Series   // indexed by column names
+idxmaxDataFrame(df, { skipna?: boolean }): Series
+
+ + + + + diff --git a/playground/index.html b/playground/index.html index b5499765..15e4590c 100644 --- a/playground/index.html +++ b/playground/index.html @@ -194,6 +194,16 @@

JSON I/O. readJson / toJson with five orient formats: records, split, index, columns, values.

✅ Complete
+
+

📗 Excel I/O

+

XLSX file reading. readExcel() parses Excel files from a Uint8Array/ArrayBuffer — ZIP+XML parsing from scratch, shared strings, number/string/boolean cells, sheet selection, header, indexCol, skipRows, nrows.

+
✅ Complete
+
+
+

🔀 json_normalize

+

Flatten semi-structured JSON into a tabular DataFrame. jsonNormalize() supports nested object flattening, recordPath to unpack nested arrays, meta columns, prefixes, custom sep, and maxLevel depth limit.

+
✅ Complete
+

📈 corr & cov

Pearson correlation & covariance. Series.corr(), DataFrame.corr(), DataFrame.cov(), dataFrameCorr(), dataFrameCov() with index alignment, null handling, and configurable ddof/minPeriods.

@@ -229,6 +239,11 @@

Reshape with aggregation. pivot() for unique reshaping; pivotTable() for aggregation (mean/sum/count/min/max/first/last) with fill_value and dropna support.

✅ Complete

+
+

📊 pivotTableFull (margins)

+

Enhanced pivot table with row/column margin totals. Supports all aggfuncs, custom margins_name, sort order, fill_value, dropna, and multiple index/column columns.

+
✅ Complete
+

📐 stack & unstack

Pivot column labels to/from row index. stack() rotates columns into a compound-index Series; unstack() recovers the DataFrame. Custom sep, dropna, and fill_value support.

@@ -254,6 +269,11 @@

Element-wise transformations. clip(), seriesAbs(), seriesRound() for Series and DataFrame with min/max bounds, decimal precision, and axis support.

✅ Complete

+
+

🔍 missing-value ops

+

Detect and fill missing values. isna(), notna(), isnull(), notnull() for scalars/Series/DataFrame. ffillSeries(), bfillSeries(), dataFrameFfill(), dataFrameBfill() with optional limit and axis support.

+
✅ Complete
+

🔢 value_counts

Count unique values. valueCounts() for Series and dataFrameValueCounts() for DataFrame with normalize, sort, ascending, and dropna options.

@@ -264,16 +284,135 @@

✅ Complete

- - - -
-

Performance

-
-

⚡ Benchmarks

-

Side-by-side performance comparison of tsb (TypeScript/Bun) vs pandas (Python). Timing metrics for each function.

-
🏗️ In Progress
+

📊 pct_change

+

Fractional change between elements. pctChangeSeries() and pctChangeDataFrame() with periods, fillMethod (pad/bfill), limit, and axis options.

+
✅ Complete
+
+
+

🔎 idxmin / idxmax

+

Return the index label of the minimum or maximum value. idxminSeries(), idxmaxSeries(), idxminDataFrame(), idxmaxDataFrame() with skipna support.

+
✅ Complete
+
+
+

🔄 astype

+

Cast Series and DataFrame values to a different dtype. astypeSeries(), astype() with per-column mapping support and integer clamping.

+
✅ Complete
+
+
+

🔁 replace

+

Substitute values in Series and DataFrame. Supports scalar, array (many→one, pair-wise), Record, and Map replacement specs.

+
✅ Complete
+
+
+

🔀 where / mask

+

Conditional value selection. where keeps values where the condition is true; mask replaces them. Supports boolean arrays, Series, DataFrame, and callable conditions.

+
✅ Complete
+
+
+

📈 diff / shift

+

Discrete difference and value shifting for Series and DataFrame. diff computes element-wise differences; shift lags or leads values by a number of periods. Essential for time-series analysis.

+
✅ Complete
+
+
+

🔍 duplicated / drop_duplicates

+

Detect and remove duplicate values or rows. Supports keep="first", keep="last", and keep=false (mark all occurrences). DataFrame supports a subset of columns.

+
✅ Complete
+
+
+

🎲 sample

+

Random sampling from Series and DataFrame. Supports fixed count, fractional sampling, with/without replacement, weighted sampling, and seeded deterministic results via randomState.

+
✅ Complete
+
+
+

✂️ clip_advanced

+

Per-element clipping with scalar, array, Series, or DataFrame bounds. Supports axis-based Series broadcasting for DataFrames — mirrors pandas.Series.clip(lower, upper) with array bounds.

+
✅ Complete
+
+
+

🔧 apply / map

+

Function application and value mapping. applySeries, mapSeries (function/dict lookup), applyDataFrame (reduce per col/row), applyExpandDataFrame (transform per col/row), mapDataFrame (element-wise).

+
✅ Complete
+
+
+

🪣 cut / qcut

+

Bin continuous data into discrete intervals. cut for equal-width or user-defined bins; qcut for equal-frequency quantile bins. Custom labels, retbins, cutCodes, and cutCategories.

+
✅ Complete
+
+
+

📐 Interval / IntervalIndex

+

Bounded interval objects and an ordered index of intervals. Interval supports all four closed types; IntervalIndex supports lookup, overlap queries, and intervalRange for equal-length ranges.

+
✅ Complete
+
+
+

🎲 getDummies / fromDummies

+

One-hot encode categorical Series or DataFrame columns into binary indicator columns. getDummies supports custom prefix, separator, dropFirst, and dummyNa. fromDummies reverses the encoding.

+
✅ Complete
+
+
+

📊 crosstab

+

Cross-tabulation frequency tables for two categorical factors. Supports custom aggfunc (count, sum, mean, min, max), row/column margins, normalize (all / index / columns), and dropna.

+
✅ Complete
+
+
+

🔀 explode

+

Transform list-valued cells into multiple rows — one row per element. Supports explodeSeries, explodeDataFrame, multi-column explosion, and ignore_index.

+
✅ Complete
+
+
+

🔢 factorize

+

Encode an array of values as integer codes. Maps each unique non-null value to a 0-based code (first-seen or sorted). Missing values receive a configurable sentinel (default -1). Mirrors pandas.factorize().

+
✅ Complete
+
+
+

↔️ wide_to_long

+

Reshape a wide-format DataFrame to long format by gathering stub-prefixed columns into rows. Supports multiple stubs, custom separator, custom suffix patterns, and multiple id columns. Mirrors pandas.wide_to_long().

+
✅ Complete
+
+
+

📈 interpolate

+

Fill missing values using interpolation. Supports linear, pad/ffill, backfill/bfill, and nearest methods with limit, limitDirection, and limitArea options. Mirrors Series.interpolate().

+
✅ Complete
+
+
+

🔍 selectDtypes

+

Filter DataFrame columns by dtype. Accepts exact dtype names or generic aliases: number, integer, floating, bool, string, datetime, category. Supports include and exclude. Mirrors DataFrame.select_dtypes().

+
✅ Complete
+
+
+

📊 mode

+

Most-frequent value(s) in a Series or DataFrame. Returns all tied modes sorted ascending. Supports dropna, axis, and numericOnly. Mirrors Series.mode() / DataFrame.mode().

+
✅ Complete
+
+
+

📐 skew / kurt

+

Adjusted Fisher–Pearson skewness and bias-corrected excess kurtosis for Series and DataFrame. Supports skipna, axis, and numericOnly. Mirrors Series.skew() / Series.kurt().

+
✅ Complete
+
+
+

📊 var / sem

+

Sample/population variance (varSeries) and standard error of the mean (semSeries) for Series and DataFrame. Configurable ddof, skipna, minCount, and axis. Mirrors Series.var() / Series.sem().

+
✅ Complete
+
+
+

🔢 nunique / any / all

+

Count unique values (nuniqueSeries, nuniqueDataFrame) and boolean reductions (anySeries, allSeries, anyDataFrame, allDataFrame). Supports dropna, skipna, axis, and boolOnly.

+
✅ Complete
+
+
+

🔢 NaN-Ignoring Aggregates

+

Top-level nan-ignoring aggregate functions: nansum, nanmean, nanmedian, nanstd, nanvar, nanmin, nanmax, nanprod, nancount. Mirrors numpy.nan* functions. Works on arrays and Series.

+
✅ Complete
+
+
+

⏱️ toTimedelta

+

Convert scalars, arrays, or Series to Timedelta objects. Accepts pandas-style strings ("1 days 02:03:04"), ISO 8601 ("P1DT2H"), human-readable ("1h 30m"), numeric (ns/us/ms/s/m/h/D/W). Timedelta class with arithmetic: add/subtract/scale/abs/lt/gt/eq.

+
✅ Complete
+
+
+

📅 dateRange

+

Generate fixed-frequency sequences of Date objects. Mirrors pandas.date_range(). Supports D/B/h/min/s/ms/W/MS/ME/QS/QE/YS/YE frequencies with multiplier prefixes. Inclusive endpoint control: both/left/right/neither.

+
✅ Complete
diff --git a/playground/insert_pop.html b/playground/insert_pop.html new file mode 100644 index 00000000..8b724566 --- /dev/null +++ b/playground/insert_pop.html @@ -0,0 +1,172 @@ + + + + + + tsb — insertColumn / popColumn + + + +

← tsb playground

+ +

insertColumn / popColumn

+

+ Column insertion and removal for DataFrames — mirrors + + pandas.DataFrame.insert() and + + pandas.DataFrame.pop(). +

+

+ Because tsb DataFrames are immutable, both functions return a new DataFrame + rather than mutating the original. popColumn returns both the extracted + Series and the resulting DataFrame. +

+ +

API summary

+ + + + + + + + + + + + + + + + + + + + + + + + +
FunctionPandas equivalentDescription
insertColumn(df, loc, col, values)df.insert(loc, col, value)Insert a new column at integer position loc
popColumn(df, col)df.pop(col)Remove a column; returns { series, df }
reorderColumns(df, order)df[order]Reorder (and optionally subset) columns
moveColumn(df, col, newLoc)Move an existing column to a new integer position
+ +

Example 1 — insertColumn

+
import { DataFrame, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+// columns: ["name", "age"]
+
+// Insert "city" between "name" and "age"
+const df2 = insertColumn(df, 1, "city", ["NY", "LA", "SF"]);
+// df2.columns.values → ["name", "city", "age"]
+// df2.col("city").values → ["NY", "LA", "SF"]
+
+// Original is unchanged
+// df.columns.values → ["name", "age"]
+
+ +

Example 2 — Insert with a Series

+
import { DataFrame, Series, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const salary = new Series({ data: [100_000, 90_000, 120_000], name: "salary" });
+
+const df2 = insertColumn(df, 0, "salary", salary);
+// df2.columns.values → ["salary", "a", "b"]
+
+ +

Example 3 — popColumn

+
import { DataFrame, popColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:   [1, 2, 3],
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+
+// Remove "age" and keep the Series
+const { series: ageSeries, df: df2 } = popColumn(df, "age");
+// ageSeries.values       → [30, 25, 35]
+// df2.columns.values     → ["id", "name"]
+// df.columns.values      → ["id", "name", "age"]  ← original unchanged
+
+ +

Example 4 — reorderColumns

+
import { DataFrame, reorderColumns } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3], d: [4] });
+
+// Reverse the column order
+const df2 = reorderColumns(df, ["d", "c", "b", "a"]);
+// df2.columns.values → ["d", "c", "b", "a"]
+
+// Select a subset (drops columns not listed)
+const df3 = reorderColumns(df, ["a", "c"]);
+// df3.columns.values → ["a", "c"]   (b and d are dropped)
+
+ +

Example 5 — moveColumn

+
import { DataFrame, moveColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  year:  [2020, 2021, 2022],
+  value: [10, 20, 30],
+  label: ["a", "b", "c"],
+});
+// columns: ["year", "value", "label"]
+
+// Move "label" to the front
+const df2 = moveColumn(df, "label", 0);
+// df2.columns.values → ["label", "year", "value"]
+
+ +

Error cases

+
// Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// → RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// → RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]);  // df has 3 rows
+// → RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// → RangeError: Column "missing" not found in DataFrame.
+
+ +
+ Immutability: Like all tsb DataFrame operations, these functions never + mutate the original DataFrame. Always assign the return value to a new variable. +
+ +

pandas equivalence table

+ + + + + + + + + +
pandastsb
df.insert(1, "x", [1,2,3]) *(mutates)*insertColumn(df, 1, "x", [1,2,3])
series = df.pop("col") *(mutates)*const { series, df: df2 } = popColumn(df, "col")
df[["c","a","b"]]reorderColumns(df, ["c","a","b"])
+ + diff --git a/playground/mode.html b/playground/mode.html new file mode 100644 index 00000000..0a149227 --- /dev/null +++ b/playground/mode.html @@ -0,0 +1,125 @@ + + + + + + tsb — mode + + + +

← tsb playground

+

📊 mode

+

+ modeSeries / modeDataFrame — + return the most-frequent value(s), mirroring + Series.mode() and + DataFrame.mode(). +

+

Equivalent Python: series.mode()

+ +

1 · Single mode

+
+
const s = new Series({ data: [1, 2, 2, 3] });
+modeSeries(s).values;
+// → [2]
+
+
+ +

2 · Tied modes — all returned sorted

+
+
const s = new Series({ data: [1, 1, 2, 2, 3] });
+modeSeries(s).values;
+// → [1, 2]
+
+
+ +

3 · String values

+
+
const s = new Series({ data: ["cat", "dog", "dog", "bird"] });
+modeSeries(s).values;
+// → ["dog"]
+
+
+ +

4 · Null values excluded (dropna=true default)

+
+
const s = new Series({ data: [null, 1, 1, null, null] });
+modeSeries(s).values;
+// → [1]
+
+
+ +

5 · DataFrame column-wise (axis=0)

+
+
const df = DataFrame.fromColumns({ a: [1, 1, 2, 2], b: [5, 5, 5, 6] });
+modeDataFrame(df);
+// a: [1, 2], b: [5, null]  (null-padded)
+
+
+ +

6 · DataFrame row-wise (axis=1)

+
+
const df = DataFrame.fromColumns({ a: [1, 2], b: [1, 3], c: [2, 3] });
+modeDataFrame(df, { axis: 1 });
+// row 0: mode=1, row 1: mode=3
+
+
+ + + + diff --git a/playground/nancumops.html b/playground/nancumops.html new file mode 100644 index 00000000..d7014593 --- /dev/null +++ b/playground/nancumops.html @@ -0,0 +1,295 @@ + + + + + + tsb — NaN-Ignoring Aggregates (nancumops) + + + + +

🔢 NaN-Ignoring Aggregates

+

+ nansum, nanmean, nanmedian, nanstd, nanvar, + nanmin, nanmax, nanprod, nancount + — mirrors numpy.nan* functions in pandas workflows. +

+ + +
+

🧮 Live Calculator

+

Enter a comma-separated list of numbers (use NaN, null for missing).

+ + + + + +
+
+ + +
+

📖 Function Reference

+ + + + + + + + + + + + + + + + + + + + +
FunctionDescriptionEmpty/all-NaN returnspandas / numpy equivalent
nancount(input)Count of valid (non-NaN) numeric values0np.count_nonzero(~np.isnan(a))
nansum(input)Sum, ignoring NaN/null0np.nansum(a)
nanmean(input)Mean, ignoring NaN/nullNaNnp.nanmean(a)
nanmedian(input)Median, ignoring NaN/nullNaNnp.nanmedian(a)
nanvar(input, {ddof})Variance (ddof=1 default)NaNnp.nanvar(a, ddof=1)
nanstd(input, {ddof})Std deviation (ddof=1 default)NaNnp.nanstd(a, ddof=1)
nanmin(input)Minimum, ignoring NaN/nullNaNnp.nanmin(a)
nanmax(input)Maximum, ignoring NaN/nullNaNnp.nanmax(a)
nanprod(input)Product, ignoring NaN/null1np.nanprod(a)
+
+ + +
+

💡 Usage Examples

+ +
+ Basic array usage +
+import { nansum, nanmean, nanmedian, nanstd } from "tsb";
+
+const data = [1, 2, NaN, null, 3, 5];
+
+nansum(data);     // 11
+nanmean(data);    // 2.75
+nanmedian(data);  // 2.5
+nanstd(data);     // 1.708...
+
+
+# Python / pandas equivalent +import numpy as np + +data = [1, 2, np.nan, np.nan, 3, 5] + +np.nansum(data) # 11.0 +np.nanmean(data) # 2.75 +np.nanmedian(data) # 2.5 +np.nanstd(data, ddof=1) # 1.708... +
+
+ +
+ Using with Series +
+import { Series, nansum, nanmean, nancount } from "tsb";
+
+const s = new Series({ data: [10, null, 30, NaN, 50] });
+
+nancount(s);  // 3
+nansum(s);    // 90
+nanmean(s);   // 30
+
+
+# Python / pandas equivalent +import pandas as pd, numpy as np + +s = pd.Series([10, np.nan, 30, np.nan, 50]) + +s.count() # 3 +s.sum() # 90.0 +s.mean() # 30.0 +
+
+ +
+ Variance and std with ddof +
+import { nanvar, nanstd } from "tsb";
+
+const xs = [2, 4, 4, 4, 5, 5, 7, 9];
+
+// Sample (ddof=1, default)
+nanvar(xs);           // ≈ 4.571
+nanstd(xs);           // ≈ 2.138
+
+// Population (ddof=0)
+nanvar(xs, { ddof: 0 });  // 4.0
+nanstd(xs, { ddof: 0 });  // 2.0
+
+
+# Python / pandas equivalent +import numpy as np + +xs = [2, 4, 4, 4, 5, 5, 7, 9] + +np.nanvar(xs, ddof=1) # 4.571... +np.nanstd(xs, ddof=1) # 2.138... + +np.nanvar(xs, ddof=0) # 4.0 +np.nanstd(xs, ddof=0) # 2.0 +
+
+
+ + +
+

⚡ NaN Impact Demo

+

See how NaN values affect results with and without nan-ignoring functions.

+ +
+
+ + + + + diff --git a/playground/numeric_extended.html b/playground/numeric_extended.html new file mode 100644 index 00000000..14cc4990 --- /dev/null +++ b/playground/numeric_extended.html @@ -0,0 +1,353 @@ + + + + + + tsb — Numeric Utilities (digitize, histogram, linspace, arange, zscore…) + + + +

🔢 Numeric Utilities

+

+ ← back to index +

+

+ tsb ships numpy/scipy-style numeric utility functions — all implemented + from scratch with no external dependencies: + digitize, histogram, linspace, arange, + percentileOfScore, zscore, minMaxNormalize, + coefficientOfVariation. +

+ +
+

digitize — bin values

+

+ Map each value to the index of the bin it falls into. Mirrors numpy.digitize. + Indices are 0-based; values below the first edge return -1. +

+
import { digitize, seriesDigitize, Series } from "tsb";
+
+// Find which [0,33), [33,66), [66,100] bucket each score belongs to
+const scores = [15, 45, 70, 33, 100];
+const edges  = [33, 66, 100];
+
+const bins = digitize(scores, edges);
+// → [-1, 1, 2, 0, 2]
+// 15 < 33      → bin -1 (below first edge)
+// 45 ∈ [33,66) → bin  1
+// 70 ∈ [66,100)→ bin  2
+// 33 ∈ [33,66) → bin  0 (33 < 66, right=false default)
+// 100 = last   → bin  2
+
+// Series version — preserves index
+const s = new Series({ data: [15, 45, 70], index: ["Alice","Bob","Carol"] });
+seriesDigitize(s, [33, 66, 100]);
+// Series: Alice→-1, Bob→1, Carol→2
+
Running…
+
+ +
+

histogram — frequency counts

+

Count how many values fall in each bin. Mirrors numpy.histogram.

+
import { histogram } from "tsb";
+
+const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+// Default: 10 equal-width bins
+const { counts, binEdges } = histogram(data);
+
+// Custom: 5 bins, density normalised
+const { counts: d, binEdges: e } = histogram(data, { bins: 5, density: true });
+
+// Explicit edges
+histogram(data, { binEdges: [1, 4, 7, 10] });
+// counts: [ 3, 3, 4 ]
+
Running…
+
+ +
+

linspace & arange — number sequences

+

Generate evenly-spaced sequences, mirroring numpy.linspace and numpy.arange.

+
import { linspace, arange } from "tsb";
+
+// 5 values from 0 to 1 (inclusive)
+linspace(0, 1, 5);
+// → [0, 0.25, 0.5, 0.75, 1]
+
+// 0..4
+arange(5);
+// → [0, 1, 2, 3, 4]
+
+// From 2 to 10, step 2
+arange(2, 10, 2);
+// → [2, 4, 6, 8]
+
+// Descending
+arange(5, 0, -1);
+// → [5, 4, 3, 2, 1]
+
Running…
+
+ +
+

percentileOfScore — percentile rank

+

+ Compute what percentile a given score falls at within a dataset. + Mirrors scipy.stats.percentileofscore. +

+
import { percentileOfScore } from "tsb";
+
+const grades = [55, 60, 70, 75, 80, 85, 90, 95];
+
+// What percentile is a score of 75?
+percentileOfScore(grades, 75);            // 50 (rank — default)
+percentileOfScore(grades, 75, "weak");    // 50 (≤ 75: 4/8 = 50%)
+percentileOfScore(grades, 75, "strict");  // 37.5 (< 75: 3/8 = 37.5%)
+
Running…
+
+ +
+

zscore — standardisation

+

+ Transform values to zero mean and unit variance. Mirrors scipy.stats.zscore. + Missing values are propagated; zero-variance data returns all NaN. +

+
import { zscore, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "values" });
+const z = zscore(s);
+
+// z.values ≈ [-1.5, -0.5, -0.5, -0.5, 0, 0, 1, 2]
+
+// With population std (ddof=0)
+const zPop = zscore(s, { ddof: 0 });
+
Running…
+
+ +
+

minMaxNormalize — scale to [0, 1]

+

+ Scale all values to the interval [0, 1] (or a custom range). + Mirrors sklearn MinMaxScaler. +

+
import { minMaxNormalize, Series } from "tsb";
+
+const s = new Series({ data: [0, 25, 50, 75, 100] });
+minMaxNormalize(s).values;
+// → [0, 0.25, 0.5, 0.75, 1]
+
+// Scale to [-1, 1]
+minMaxNormalize(s, { featureRangeMin: -1, featureRangeMax: 1 }).values;
+// → [-1, -0.5, 0, 0.5, 1]
+
Running…
+
+ +
+

coefficientOfVariation — relative spread

+

+ Dimensionless measure of dispersion: std / |mean|. + Useful for comparing spread across datasets with different units. +

+
import { coefficientOfVariation, Series } from "tsb";
+
+// Dataset A: [10, 20, 30]  mean=20, std=10  → CV=0.5
+coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+
+// Dataset B: [100, 200, 300]  same shape, higher scale  → CV=0.5
+coefficientOfVariation(new Series({ data: [100, 200, 300] }));
+
+// CV with population std
+coefficientOfVariation(new Series({ data: [1, 2, 3, 4, 5] }), { ddof: 0 });
+
Running…
+
+ + + + diff --git a/playground/nunique.html b/playground/nunique.html new file mode 100644 index 00000000..add4399d --- /dev/null +++ b/playground/nunique.html @@ -0,0 +1,112 @@ + + + + + + tsb — nunique / any / all + + + +

← tsb playground

+

🔢 nunique / any / all

+

+ Count unique values and perform boolean reductions, mirroring + Series.nunique(), + Series.any(), and + Series.all(). +

+ +

1 · nunique — count distinct values

+
+
import { Series, nuniqueSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 2, 3, 3, 3, null] });
+
+nuniqueSeries(s);                 // 3 (null excluded by default)
+nuniqueSeries(s, { dropna: false }); // 4 (null counted as a distinct value)
+
nuniqueSeries(s) → 3 +nuniqueSeries(s, {dropna:false}) → 4
+
+ +

2 · any — is any element truthy?

+
+
import { anySeries } from "tsb";
+
+const allZero = new Series({ data: [0, 0, 0] });
+const hasOne  = new Series({ data: [0, 0, 1] });
+
+anySeries(allZero); // false
+anySeries(hasOne);  // true
+
+// With nulls (skipna=true by default)
+const withNull = new Series({ data: [null, 0, null] });
+anySeries(withNull); // false — null skipped, 0 is falsy
+
anySeries(allZero) → false +anySeries(hasOne) → true +anySeries(withNull) → false
+
+ +

3 · all — are all elements truthy?

+
+
import { allSeries } from "tsb";
+
+const allTrue = new Series({ data: [1, 2, 3] });
+const hasFalsy = new Series({ data: [1, 0, 3] });
+
+allSeries(allTrue);  // true
+allSeries(hasFalsy); // false
+
+// Empty or all-null series vacuously returns true
+allSeries(new Series({ data: [] }));              // true
+allSeries(new Series({ data: [null, null] }));    // true
+
allSeries(allTrue) → true +allSeries(hasFalsy) → false +allSeries([]) → true (vacuous) +allSeries([null]) → true (vacuous)
+
+ +

4 · DataFrame nunique

+
+
import { DataFrame, nuniqueDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  category: ["A", "B", "A", "C"],
+  value:    [1,   2,   1,   3  ],
+});
+
+nuniqueDataFrame(df);          // per-column: category→3, value→3
+nuniqueDataFrame(df, { axis: 1 }); // per-row: how many distinct values in each row
+
nuniqueDataFrame(df) → category: 3, value: 3 +nuniqueDataFrame(df, {axis:1}) → row0: 2, row1: 2, row2: 2, row3: 2
+
+ +

5 · DataFrame any / all

+
+
import { anyDataFrame, allDataFrame } from "tsb";
+
+const df2 = DataFrame.fromColumns({
+  a: [0, 0, 1],
+  b: [1, 1, 1],
+});
+
+anyDataFrame(df2); // a: true, b: true  (each col has at least one truthy)
+allDataFrame(df2); // a: false, b: true (col a has a 0)
+
+// axis=1: reduce across columns per row
+anyDataFrame(df2, { axis: 1 }); // row0: true, row1: true, row2: true
+allDataFrame(df2, { axis: 1 }); // row0: false, row1: false, row2: true
+
anyDataFrame(df2) → a: true, b: true +allDataFrame(df2) → a: false, b: true +anyDataFrame(df2,{axis:1}) → [true, true, true] +allDataFrame(df2,{axis:1}) → [false, false, true]
+
+ + diff --git a/playground/pct_change.html b/playground/pct_change.html new file mode 100644 index 00000000..3576797a --- /dev/null +++ b/playground/pct_change.html @@ -0,0 +1,448 @@ + + + + + + tsb — pct_change + + + +
+
+
Initializing playground…
+
+ ← Back to roadmap +

📊 pct_change — Interactive Playground

+

Compute the fractional change between each element and a prior element. + Mirrors pandas.Series.pct_change() / + pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run + (or Ctrl+Enter) to execute it live in your browser. +

+ + +
+

1 · Basic pct_change on a Series

+

pctChangeSeries(series) returns the fractional (not percentage) change + from each previous element. The first element is always null.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Multi-period change

+

The periods option controls the lag. Use periods: 2 to + compare each value to the one two steps earlier — useful for month-over-month + comparisons in quarterly data.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Handling missing values

+

By default, pctChangeSeries forward-fills (fillMethod: "pad") + NaN/null values before computing the ratio — so gaps don't break the chain. + Set fillMethod: null to propagate NaN instead.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Limit consecutive fills

+

The limit option caps how many consecutive NaN values get forward-filled. + Useful when you want to tolerate short gaps but not bridge large ones.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame column-wise pct_change

+

pctChangeDataFrame(df) applies pctChangeSeries to every + column independently. Ideal for comparing multiple assets or metrics simultaneously.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Negative periods (look-forward change)

+

A negative periods value computes the forward change: how much will + this element change by the time we reach |periods| steps ahead. + Useful for computing returns on a "hold for N periods" strategy.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+

All functions return a new Series/DataFrame of the same shape — inputs are never mutated.

+
// Series
+pctChangeSeries(series, {
+  periods?: number,           // default 1 (positive = look back, negative = look forward)
+  fillMethod?: "pad" | "bfill" | null,  // default "pad"
+  limit?: number | null,      // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+  periods?: number,
+  fillMethod?: "pad" | "bfill" | null,
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+ + + + + diff --git a/playground/quantile.html b/playground/quantile.html new file mode 100644 index 00000000..fb019d88 --- /dev/null +++ b/playground/quantile.html @@ -0,0 +1,182 @@ + + + + + + tsb — quantile + + + +

← tsb playground

+

📐 quantile

+

+ quantileSeries / quantileDataFrame — + compute quantile(s) / percentile(s), mirroring + Series.quantile() and + DataFrame.quantile(). +

+

Equivalent Python: series.quantile(q=0.5) / df.quantile(q=0.5)

+ +

1 · Scalar quantile (median)

+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+quantileSeries(s);          // default q=0.5 → 3
+quantileSeries(s, { q: 0.25 }); // → 2
+quantileSeries(s, { q: 0.75 }); // → 4
+
+
+ +

2 · Multiple quantile levels

+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+const q = quantileSeries(s, { q: [0.25, 0.5, 0.75] });
+// Series indexed by q-values: { 0.25: 2, 0.5: 3, 0.75: 4 }
+
+
+ +

3 · Interpolation methods

+
+
const s = new Series({ data: [0, 10] });
+// q=0.5 → position 0.5 between indices 0 and 1
+quantileSeries(s, { q: 0.5, interpolation: "linear" });   // 5
+quantileSeries(s, { q: 0.5, interpolation: "lower" });    // 0
+quantileSeries(s, { q: 0.5, interpolation: "higher" });   // 10
+quantileSeries(s, { q: 0.5, interpolation: "midpoint" }); // 5
+quantileSeries(s, { q: 0.5, interpolation: "nearest" });  // 0
+
+
+ +

4 · NaN handling (skipna=true by default)

+
+
const s = new Series({ data: [1, null, 3, NaN, 5] });
+quantileSeries(s, { q: 0.5 });             // ignores null/NaN → 3
+quantileSeries(s, { q: 0.5, skipna: false }); // NaN propagates → NaN
+
+
+ +

5 · DataFrame — axis=0 (per-column quantiles)

+
+
const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [10, 20, 30, 40] });
+quantileDataFrame(df, { q: 0.5 });
+// Series { a: 2.5, b: 25 }
+
+quantileDataFrame(df, { q: [0.25, 0.5, 0.75] });
+// DataFrame 3×2: rows=[0.25, 0.5, 0.75], cols=[a, b]
+
+
+ +

6 · DataFrame — axis=1 (per-row quantiles)

+
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5], c: [5, 6, 7] });
+quantileDataFrame(df, { axis: 1, q: 0.5 });
+// Series — median of each row: [3, 4, 5]
+
+
+ +

7 · Q=[0, 0.25, 0.5, 0.75, 1] summary table

+
+
const df = DataFrame.fromColumns({ score: [55, 70, 80, 88, 92, 95, 99] });
+quantileDataFrame(df, { q: [0, 0.25, 0.5, 0.75, 1] });
+// → summary statistics table
+
+
+ + + + diff --git a/playground/replace.html b/playground/replace.html new file mode 100644 index 00000000..19da518a --- /dev/null +++ b/playground/replace.html @@ -0,0 +1,408 @@ + + + + + + tsb — replace (value substitution) + + + +
+
+
Loading tsb runtime…
+
+ + ← Back to playground index + +

replace — value substitution

+

+ replaceSeries / replaceDataFrame substitute values + matching a pattern with a new value.
+ Supports scalar, array, and mapping (Record / Map) replacement specs.
+ Mirrors Series.replace() and DataFrame.replace() from pandas. +

+ + +
+

1 · Scalar → scalar replacement

+

+ Replace every occurrence of a single value with another value. + Works on numbers, strings, booleans, and null. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Array replacement

+

+ Replace a list of values with a single target, or perform pair-wise + replacement using two equal-length arrays. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Mapping (Record / Map) replacement

+

+ Pass a lookup table as either a plain object (Record<string, Scalar>) + or a JavaScript Map for full type flexibility. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · DataFrame replacement

+

+ replaceDataFrame applies the same spec to all columns by + default. Use the columns option to restrict which columns + are affected. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Replace values in a Series
+replaceSeries(
+  series: Series,
+  spec: ReplaceSpec,
+  options?: ReplaceOptions,
+): Series
+
+// Replace values in a DataFrame
+replaceDataFrame(
+  df: DataFrame,
+  spec: ReplaceSpec,
+  options?: DataFrameReplaceOptions,
+): DataFrame
+
+// Replacement spec variants
+type ReplaceSpec =
+  | { toReplace: Scalar;              value: Scalar }               // scalar → scalar
+  | { toReplace: Scalar[];            value: Scalar }               // array  → scalar
+  | { toReplace: Scalar[];            value: Scalar[] }             // array  → array (pair-wise)
+  | { toReplace: Record<string, Scalar> }                          // Record mapping
+  | { toReplace: Map<Scalar, Scalar> }                             // Map mapping
+
+// Options
+interface ReplaceOptions {
+  matchNaN?: boolean;  // treat NaN===NaN for matching (default: true)
+}
+
+interface DataFrameReplaceOptions extends ReplaceOptions {
+  columns?: string[];  // only replace in these columns (default: all)
+}
+
+ + + + + diff --git a/playground/rolling_apply.html b/playground/rolling_apply.html new file mode 100644 index 00000000..b307cdbd --- /dev/null +++ b/playground/rolling_apply.html @@ -0,0 +1,225 @@ + + + + + + tsb — Rolling Apply & Multi-Aggregation + + + +

tsb — Rolling Apply & Multi-Aggregation

+

+ Standalone functions for applying custom aggregation logic over sliding + windows, mirroring + + pandas.Series.rolling().apply() + + and + + Rolling.agg() + . +

+ +

1. rollingApply — Custom Function Per Window

+

+ Apply any aggregation function to each rolling window. The function + receives the valid (non-null, non-NaN) numeric values + in the window and must return a single number. +

+
import { rollingApply } from "tsb";
+
+const prices = new Series({ data: [10, 12, 11, 15, 14, 16], name: "price" });
+
+// Custom: range (max - min) over each 3-day window
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+rollingApply(prices, 3, range).toArray();
+// [null, null, 2, 4, 4, 5]
+//  ↑↑ insufficient data (need 3 observations)
+ +
+

Options

+ + + + + + + + + +
OptionDefaultDescription
minPeriodswindowMinimum valid observations to compute (null otherwise)
centerfalseCentre the window (symmetric) instead of trailing
rawfalsePass full window including nulls (filtered to valid nums before fn call)
+
+ +
// minPeriods=1 → start computing from the very first position
+rollingApply(prices, 3, range, { minPeriods: 1 }).toArray();
+// [0, 2, 2, 4, 4, 5]
+
+// center=true → symmetric window around each point
+rollingApply(prices, 3, range, { center: true }).toArray();
+// [null, 2, 4, 4, 5, null]
+ +

2. rollingAgg — Multiple Aggregations at Once

+

+ Apply several named aggregation functions in a single pass over a Series, + returning a DataFrame where each column holds one + aggregation result. +

+
import { rollingAgg } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8] });
+
+const result = rollingAgg(s, 3, {
+  mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+  max:  (w) => Math.max(...w),
+  min:  (w) => Math.min(...w),
+  range:(w) => Math.max(...w) - Math.min(...w),
+});
+
+// result is a DataFrame with columns: "mean", "max", "min", "range"
+// result.col("mean").toArray() → [null, null, 2, 3, 4, 5, 6, 7]
+// result.col("range").toArray() → [null, null, 2, 2, 2, 2, 2, 2]
+ +
+ Pandas equivalent:
+ s.rolling(3).agg({"mean": np.mean, "max": np.max, "min": np.min}) +
+ +

3. dataFrameRollingApply — Apply Per Column

+

+ Apply a single custom function independently to each column of a + DataFrame, returning a new DataFrame of the same shape. +

+
import { dataFrameRollingApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+  open:  [100, 102, 101, 105, 103],
+  close: [101, 103, 100, 106, 104],
+});
+
+// Pairwise range within each 2-step window per column
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+dataFrameRollingApply(df, 2, range);
+//   open  close
+// 0 null  null
+// 1    2     2
+// 2    1     3
+// 3    4     6
+// 4    2     2
+ +

4. dataFrameRollingAgg — Multi-Agg Per Column

+

+ Apply multiple named aggregation functions to every column of a + DataFrame. The result has columns named + {originalColumn}_{aggName}. +

+
import { dataFrameRollingAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+  A: [1, 2, 3, 4, 5],
+  B: [10, 20, 30, 40, 50],
+});
+
+const out = dataFrameRollingAgg(df, 3, {
+  sum:  (w) => w.reduce((a, b) => a + b, 0),
+  mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+});
+
+// Columns: "A_sum", "A_mean", "B_sum", "B_mean"
+// A_sum:  [null, null, 6, 9, 12]
+// A_mean: [null, null, 2, 3,  4]
+// B_sum:  [null, null, 60, 90, 120]
+// B_mean: [null, null, 20, 30,  40]
+ +

Comparison with pandas

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
tsbpandas
rollingApply(s, w, fn)s.rolling(w).apply(fn, raw=True)
rollingApply(s, w, fn, {minPeriods:1})s.rolling(w, min_periods=1).apply(fn)
rollingAgg(s, w, {f1, f2})s.rolling(w).agg({"f1": f1, "f2": f2})
dataFrameRollingApply(df, w, fn)df.rolling(w).apply(fn)
dataFrameRollingAgg(df, w, {f1, f2})df.rolling(w).agg({"f1": f1, "f2": f2})
+ +

Use case: Bollinger Band width

+
import { rollingAgg } from "tsb";
+
+// Bollinger Band width = (upper - lower) / middle
+// where upper = mean + 2·std, lower = mean - 2·std
+const prices = new Series({
+  data: [20, 21, 22, 20, 19, 21, 23, 24, 22, 21],
+  name: "price",
+});
+
+const stats = rollingAgg(prices, 5, {
+  mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+  std: (w) => {
+    const m = w.reduce((a, b) => a + b, 0) / w.length;
+    return Math.sqrt(w.reduce((a, b) => a + (b - m) ** 2, 0) / (w.length - 1));
+  },
+});
+
+// Bollinger Band width = 4 * std / mean
+const bw = stats.col("std").toArray().map((std, i) => {
+  const mean = stats.col("mean").toArray()[i];
+  if (std === null || mean === null || mean === 0) return null;
+  return (4 * (std as number)) / (mean as number);
+});
+ +

+ ← Back to tsb playground index +

+ + diff --git a/playground/sem_var.html b/playground/sem_var.html new file mode 100644 index 00000000..a3114054 --- /dev/null +++ b/playground/sem_var.html @@ -0,0 +1,90 @@ + + + + + + tsb — sem_var + + + +

← tsb playground

+

📊 Variance & Standard Error (sem_var)

+

+ varSeries / semSeries / + varDataFrame / semDataFrame — + compute sample/population variance and standard error of the mean, mirroring + Series.var() and + Series.sem(). +

+

Equivalent Python: series.var(ddof=1) / series.sem()

+ +

1 · Sample variance (ddof=1)

+
+
import { Series, varSeries } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+varSeries(s);           // 4.0  (sample variance, ddof=1)
+varSeries(s, { ddof: 0 }); // 3.5  (population variance, ddof=0)
+
varSeries(s) → 4.0 +varSeries(s, {ddof:0}) → 3.5
+
+ +

2 · Standard error of the mean

+
+
import { semSeries } from "tsb";
+
+// SEM = sqrt(var / n)
+semSeries(s); // sqrt(4 / 8) ≈ 0.7071
+
semSeries(s) ≈ 0.7071
+
+ +

3 · Handling missing values

+
+
const s2 = new Series({ data: [1, 2, 3, null, 5] });
+
+varSeries(s2);                 // skipna=true (default): ignores null
+varSeries(s2, { skipna: false }); // propagates NaN when null present
+varSeries(s2, { minCount: 5 });   // NaN: need 5 valid values but only 4
+
varSeries(s2) → 2.9167 (approx) +varSeries(s2, {skipna:false}) → NaN +varSeries(s2, {minCount:5}) → NaN
+
+ +

4 · DataFrame column-wise variance

+
+
import { DataFrame, varDataFrame, semDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3],
+  b: [10, 20, 30],
+});
+
+varDataFrame(df);          // Series { a: 1, b: 100 }
+semDataFrame(df);          // Series { a: sqrt(1/3), b: sqrt(100/3) }
+varDataFrame(df, { axis: 1 }); // row-wise variance
+
varDataFrame(df) → a: 1.0, b: 100.0 +semDataFrame(df) → a: ≈0.577, b: ≈5.774 +varDataFrame(df, {axis:1}) → row0: 20.25, row1: 81.0, row2: 182.25
+
+ +

5 · numericOnly — skip non-numeric columns

+
+
const df2 = DataFrame.fromColumns({
+  score: [10, 20, 30],
+  label: ["A", "B", "C"],
+});
+
+varDataFrame(df2, { numericOnly: true });
+// Only includes "score", excludes "label"
+
varDataFrame(df2, {numericOnly:true}) → score: 100.0
+
+ + diff --git a/playground/skew_kurt.html b/playground/skew_kurt.html new file mode 100644 index 00000000..bec28a8b --- /dev/null +++ b/playground/skew_kurt.html @@ -0,0 +1,137 @@ + + + + + + tsb — skew & kurtosis + + + +

← tsb playground

+

📐 skewSeries / kurtSeries

+

+ skewSeries / kurtSeries — + compute the adjusted Fisher–Pearson skewness and excess kurtosis (bias-corrected), mirroring + Series.skew() and + Series.kurt(). +

+

Equivalent Python: series.skew() / series.kurt()

+ +

1 · Symmetric distribution — skew ≈ 0

+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+skewSeries(s);
+// → 0
+
+
+ +

2 · Right-skewed distribution — positive skew

+
+
const s = new Series({ data: [1, 2, 3, 4, 100] });
+skewSeries(s);
+// → large positive value
+
+
+ +

3 · Kurtosis — uniform-like (platykurtic, negative excess)

+
+
const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+kurtSeries(s);
+// → negative (flatter than normal)
+
+
+ +

4 · NaN propagation — too few values

+
+
skewSeries(new Series({ data: [1, 2] }));  // NaN — need ≥ 3
+kurtSeries(new Series({ data: [1, 2, 3] })); // NaN — need ≥ 4
+
+
+ +

5 · DataFrame column-wise skewness

+
+
const df = DataFrame.fromColumns({
+  symmetric: [1, 2, 3, 4, 5],
+  right_skew: [1, 2, 3, 4, 100],
+});
+skewDataFrame(df).values;
+
+
+ +

6 · DataFrame row-wise kurtosis

+
+
const df = DataFrame.fromColumns({
+  a: [1, 10], b: [2, 10], c: [3, 10], d: [4, 10], e: [100, 10], f: [5, 10],
+});
+kurtDataFrame(df, { axis: 1 }).values;
+
+
+ + + + diff --git a/playground/string_ops.html b/playground/string_ops.html new file mode 100644 index 00000000..5d9fff07 --- /dev/null +++ b/playground/string_ops.html @@ -0,0 +1,282 @@ + + + + + + tsb — String Operations + + + +
+

tsb

+ string_ops + Standalone string operations for Series and arrays +
+
+

+ string_ops provides module-level string functions that complement the + Series.str accessor. All functions accept a Series, a + string[], or a scalar string. +

+ + +
+

strNormalize — Unicode normalisation

+

Normalise every element to NFC, NFD, NFKC, or NFKD. Useful when mixing text + from different sources (e.g. macOS NFD vs Windows NFC).

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strGetDummies — one-hot encode by delimiter

+

Split each string by a delimiter and produce a binary indicator DataFrame — + one column per unique token. Equivalent to pandas.Series.str.get_dummies().

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strExtractAll — extract all regex matches

+

Find every non-overlapping regex match in each element. Returns a JSON-encoded + array of match arrays per element — parse with JSON.parse.

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strRemovePrefix / strRemoveSuffix

+

Strip a leading or trailing string from elements only when it is present.

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strTranslate — character-level substitution

+

Replace or delete individual characters using a lookup table. + Format: one mapping per line as from=to or from= + to delete.

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strCharWidth & strByteLength — display & byte widths

+

+ strCharWidth counts columns for terminal display (CJK chars count as 2).
+ strByteLength counts UTF-8 bytes (useful for byte-limited APIs). +

+ + + +

+  
+
+ + + + diff --git a/playground/string_ops_extended.html b/playground/string_ops_extended.html new file mode 100644 index 00000000..81bdaddb --- /dev/null +++ b/playground/string_ops_extended.html @@ -0,0 +1,413 @@ + + + + + + tsb — Extended String Operations + + + +
+

tsb

+ string_ops_extended + Advanced standalone string operations: split-expand, extract, partition, multi-replace, indent, dedent +
+
+

+ string_ops_extended adds advanced string utilities that complement + string_ops and the Series.str accessor. All functions accept + a Series, an array, or a scalar string. +

+ + +
+

strSplitExpand — split and expand to DataFrame columns

+

+ Split each element by a delimiter and expand the parts into a DataFrame + with one column per position. Mirrors pandas.Series.str.split(expand=True). + Shorter rows are padded with null. +

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strExtractGroups — extract regex capture groups

+

+ Extract regex capture groups from each element into a DataFrame. + Named groups ((?<name>...)) become column names; unnamed groups + become 0, 1, … Non-matching rows produce null. +

+
+
+ + +
+
+ + +
Use (?<name>...) for named capture groups.
+
+
+ +

+  
+ + +
+

strPartition / strRPartition — split into (before, sep, after)

+

+ strPartition splits at the first occurrence of the separator; + strRPartition splits at the last. When the separator is not + found, strPartition returns [s, "", ""] and + strRPartition returns ["", "", s]. +

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strMultiReplace — apply multiple replacements in sequence

+

+ Apply an ordered list of {pat, repl} pairs to each element. + Each replacement is applied to the result of the previous one. + Patterns can be string literals (replaced globally) or RegExp objects. +

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strIndent / strDedent — line-level indentation utilities

+

+ strIndent adds a prefix to every non-empty line (mirrors + textwrap.indent). + strDedent removes the common leading whitespace from all lines + (mirrors textwrap.dedent). +

+
+
+ + +
+
+ + +
+
+ +

+  
+ +
+ + + + diff --git a/playground/to_datetime.html b/playground/to_datetime.html new file mode 100644 index 00000000..9ed06810 --- /dev/null +++ b/playground/to_datetime.html @@ -0,0 +1,118 @@ + + + + + + tsb — toDatetime + + + +

← tsb playground

+

toDatetime stats

+

+ Convert scalars, arrays, or Series values to JavaScript + Date objects — mirroring + pandas.to_datetime(). +

+ +

Supported input formats

+ + + + + + + + + + + +
FormatExampleResult
ISO 8601 date"2024-03-15"Mar 15 2024
ISO 8601 datetime"2024-03-15T12:00:00Z"Mar 15 2024 12:00 UTC
US format (MM/DD/YYYY)"01/15/2024"Jan 15 2024
European (DD-MM-YYYY)"15-03-2024"Mar 15 2024
Compact (YYYYMMDD)"20240315"Mar 15 2024
Unix ms (number)1710460800000Mar 15 2024 00:00 UTC
Unix s (unit="s")1710460800Mar 15 2024 00:00 UTC
Date objectnew Date(2024,2,15)unchanged
null / undefined / NaNnullnull
+ +

Error handling

+ + + + + +
errors=Behaviour
"raise" (default)Throws TypeError on unparseable input
"coerce"Returns null on unparseable input
"ignore"Returns the original value unchanged
+ +

Quick examples

+
import { toDatetime, Series } from "tsb";
+
+// Scalar
+toDatetime("2024-03-15");         // Date: Mar 15 2024
+toDatetime(1710460800000);        // Date from Unix ms
+toDatetime(1710460800, { unit: "s" }); // Date from Unix seconds
+toDatetime(null);                 // null
+toDatetime("nope", { errors: "coerce" }); // null
+toDatetime("nope", { errors: "ignore" }); // "nope"
+
+// Array
+toDatetime(["2024-01-01", null, "2024-06-15"]);
+// => [Date, null, Date]
+
+// Series
+const s = new Series({ data: ["2024-01-01", "2024-06-15", null] });
+toDatetime(s);
+// => Series<Date | null> with dtype=datetime
+ +

Python / pandas equivalent

+ + +

Live demo

+

Enter a date string or number and click Convert:

+ + +
+ + + + diff --git a/playground/to_from_dict.html b/playground/to_from_dict.html new file mode 100644 index 00000000..a8ca3e88 --- /dev/null +++ b/playground/to_from_dict.html @@ -0,0 +1,122 @@ + + + + + + tsb — toDictOriented / fromDictOriented + + + +

← tsb playground

+ +

toDictOriented / fromDictOriented

+

+ Convert a DataFrame to and from dictionary structures with flexible orientation — mirrors + + pandas.DataFrame.to_dict(orient=...) and + + pandas.DataFrame.from_dict(orient=...). +

+ +

Supported orientations — toDictOriented

+ + + + + + + + + + + +
OrientReturn typeDescription
"dict" / "columns"Record<col, Record<rowLabel, value>>Nested column → row-label → value map
"list"Record<col, value[]>Column name → array of values
"series"Record<col, Series>Column name → Series object
"split"{ index, columns, data }Serialisable split structure
"tight"{ index, columns, data, index_names, column_names }Split plus axis-name metadata
"records"Record<col, value>[]Array of row objects
"index"Record<rowLabel, Record<col, value>>Row-label → column → value
+ +

Supported orientations — fromDictOriented

+ + + + + + + + +
OrientInput shape
"columns" (default){ col: value[] }
"index"{ rowLabel: { col: value } }
"split"{ index?, columns, data }
"tight"Same as "split", extra fields ignored
+ +

Example — all orientations

+
import { DataFrame } from "tsb";
+import { toDictOriented, fromDictOriented } from "tsb";
+
+const df = DataFrame.fromColumns(
+  { name: ["Alice", "Bob"], score: [92, 85] },
+  { index: new Index(["r0", "r1"]) },
+);
+
+// "dict" / "columns"
+toDictOriented(df, "dict");
+// { name: { r0: "Alice", r1: "Bob" }, score: { r0: 92, r1: 85 } }
+
+// "list"
+toDictOriented(df, "list");
+// { name: ["Alice", "Bob"], score: [92, 85] }
+
+// "records"
+toDictOriented(df, "records");
+// [ { name: "Alice", score: 92 }, { name: "Bob", score: 85 } ]
+
+// "split"
+toDictOriented(df, "split");
+// { index: ["r0", "r1"], columns: ["name", "score"], data: [["Alice", 92], ["Bob", 85]] }
+
+// "index"
+toDictOriented(df, "index");
+// { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } }
+
+// fromDictOriented — columns (default)
+fromDictOriented({ name: ["Alice", "Bob"], score: [92, 85] });
+
+// fromDictOriented — index
+fromDictOriented(
+  { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } },
+  "index",
+);
+
+// fromDictOriented — split (round-trip)
+const split = toDictOriented(df, "split");
+const df2 = fromDictOriented(split, "split");
+// df2 is equivalent to df
+
+ +

Missing values

+
+ Missing values (null / undefined) are preserved as null + in all orientations. When using fromDictOriented with "index" + orientation, any column that is absent from a given row object is filled with null. +
+ +

Type signatures

+
function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record<string, Record<string, Scalar>>;
+function toDictOriented(df: DataFrame, orient: "list"): Record<string, Scalar[]>;
+function toDictOriented(df: DataFrame, orient: "series"): Record<string, Series<Scalar>>;
+function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+function toDictOriented(df: DataFrame, orient: "records"): Record<string, Scalar>[];
+function toDictOriented(df: DataFrame, orient: "index"): Record<string, Record<string, Scalar>>;
+
+function fromDictOriented(data: Record<string, readonly Scalar[]>, orient?: "columns"): DataFrame;
+function fromDictOriented(data: Record<string, Record<string, Scalar>>, orient: "index"): DataFrame;
+function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+
+ + diff --git a/playground/window_extended.html b/playground/window_extended.html new file mode 100644 index 00000000..4232fa5d --- /dev/null +++ b/playground/window_extended.html @@ -0,0 +1,304 @@ + + + + + + tsb — Rolling Extended Stats: sem, skew, kurt, quantile + + + +

tsb — Rolling Extended Statistics

+

+ Higher-order rolling window statistics extending the core + + pandas.Series.rolling() + + API: + sem, skew, kurt, and + quantile. +

+ +

1. rollingSem — Standard Error of the Mean

+

+ The standard error of the mean measures how much the sample mean + would vary across repeated samples. For a window of n values: +

+
sem = std(ddof=1) / √n
+

Requires at least 2 valid observations per window.

+ +
import { rollingSem, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "x" });
+const sem3 = rollingSem(s, 3);
+// [null, null, 0.667, 0, 0.577, 0.577, 1.155, 2.082]
+
+ +
+

Live demo — sem with window=3

+

Comma-separated numbers (nulls accepted):

+ + + + +

+    
+ +

2. rollingSkew — Fisher-Pearson Skewness

+

+ Skewness measures asymmetry of the distribution in each window. + Positive = right tail heavier; negative = left tail heavier. + Uses the unbiased Fisher-Pearson formula (same as pandas): +

+
skew = [n/((n-1)(n-2))] × Σ[(xᵢ−x̄)/s]³
+

Requires ≥ 3 valid observations.

+ +
import { rollingSkew, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+rollingSkew(s, 3);
+// [null, null, 0, 0, 0]   ← symmetric windows → zero skew
+
+ +
+

Live demo — skewness with window=4

+ + + +

+    
+ +

3. rollingKurt — Excess Kurtosis

+

+ Kurtosis measures how heavy the tails are relative to a normal distribution. + The excess kurtosis subtracts 3, so a normal distribution gives 0. + Uses the Fisher (1930) unbiased formula: +

+
kurt = [n(n+1)/((n-1)(n-2)(n-3))] × Σ[(xᵢ−x̄)/s]⁴ − 3(n-1)²/((n-2)(n-3))
+

Requires ≥ 4 valid observations.

+ +
import { rollingKurt, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+rollingKurt(s, 4);
+// [null, null, null, -1.2]   ← uniform distribution has kurt = -1.2
+
+ +
+

Live demo — excess kurtosis with window=5

+ + + +

+    
+ +

4. rollingQuantile — Rolling Quantile

+

+ Computes any quantile within each sliding window using configurable + interpolation. When q = 0.5 this is identical to + rolling.median(). +

+ +
import { rollingQuantile, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+
+rollingQuantile(s, 0.5, 3);  // rolling median: [null, null, 2, 3, 4]
+rollingQuantile(s, 0.25, 3); // [null, null, 1.5, 2.5, 3.5]
+rollingQuantile(s, 0.75, 3); // [null, null, 2.5, 3.5, 4.5]
+
+ +

Interpolation methods

+ + + + + + + + + +
MethodBehaviour when q falls between two values
linear (default)Linear interpolation — same as NumPy / pandas default
lowerTake the lower of the two surrounding values
higherTake the higher of the two surrounding values
midpointArithmetic mean of the two surrounding values
nearestWhichever surrounding value is closest
+ +
+

Live demo — rolling quantile

+ + + + + +

+    
+ +

Common Options

+ + + + + + +
OptionTypeDefaultDescription
minPeriodsnumber= windowMinimum valid obs required per window
centerbooleanfalseCentre the window around each position
+ +
+ Note: Functions are pure — they return new Series objects + without modifying the input. Missing values (null, NaN) + are excluded from each window calculation. +
+ + + + diff --git a/src/core/api_types.ts b/src/core/api_types.ts new file mode 100644 index 00000000..860d2050 --- /dev/null +++ b/src/core/api_types.ts @@ -0,0 +1,629 @@ +/** + * api_types — runtime type-checking predicates, mirroring `pandas.api.types`. + * + * Two groups of functions are provided: + * + * **Value-level predicates** — operate on arbitrary JavaScript values, equivalent + * to `pandas.api.types.is_scalar`, `is_list_like`, `is_number`, etc. + * + * **Dtype-level predicates** — accept a `Dtype` instance or a `DtypeName` string + * and answer questions about the dtype's kind, equivalent to + * `pandas.api.types.is_numeric_dtype`, `is_float_dtype`, etc. + * + * @example + * ```ts + * import { isScalar, isNumericDtype, Dtype } from "tsb"; + * isScalar(42); // true + * isScalar([1, 2, 3]); // false + * isListLike([1, 2, 3]); // true + * isNumericDtype(Dtype.float64); // true + * isStringDtype("string"); // true + * ``` + * + * @module + */ + +import { Dtype } from "./dtype.ts"; +import type { DtypeName } from "../types.ts"; + +// ─── internal helper ────────────────────────────────────────────────────────── + +/** Resolve a Dtype | DtypeName to a Dtype instance. */ +function resolveDtype(dtype: Dtype | DtypeName): Dtype { + if (dtype instanceof Dtype) { + return dtype; + } + return Dtype.from(dtype); +} + +// ═════════════════════════════════════════════════════════════════════════════ +// VALUE-LEVEL PREDICATES +// ═════════════════════════════════════════════════════════════════════════════ + +/** + * Return `true` if `val` is a scalar (not a collection). + * + * Scalars: `string`, `number`, `bigint`, `boolean`, `symbol`, `null`, + * `undefined`, and `Date` objects. Arrays, plain objects, `Map`, `Set`, + * iterables, and class instances other than `Date` are **not** scalars. + * + * Mirrors `pandas.api.types.is_scalar`. + * + * @example + * ```ts + * isScalar(42); // true + * isScalar("hello"); // true + * isScalar(null); // true + * isScalar([1, 2]); // false + * isScalar({ a: 1 }); // false + * ``` + */ +export function isScalar(val: unknown): boolean { + if (val === null || val === undefined) { + return true; + } + const t = typeof val; + if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") { + return true; + } + if (val instanceof Date) { + return true; + } + return false; +} + +/** + * Return `true` if `val` is "list-like" — i.e. iterable (but not a string) + * or has a non-negative integer `length` property. + * + * Mirrors `pandas.api.types.is_list_like`. + * + * @example + * ```ts + * isListLike([1, 2, 3]); // true + * isListLike(new Set([1])); // true + * isListLike("abc"); // false (strings excluded) + * isListLike(42); // false + * isListLike({ a: 1 }); // false + * ``` + */ +export function isListLike(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val === "string") { + return false; + } + // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol + if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") { + return false; + } + if (typeof val === "object" || typeof val === "function") { + if (Symbol.iterator in (val as object)) { + return true; + } + const len = (val as Record)["length"]; + if (typeof len === "number" && len >= 0 && Number.isInteger(len)) { + return true; + } + } + return false; +} + +/** + * Return `true` if `val` is array-like — i.e. has a non-negative integer + * `length` property. + * + * Mirrors `pandas.api.types.is_array_like`. + * + * @example + * ```ts + * isArrayLike([1, 2]); // true + * isArrayLike("abc"); // true (strings have .length) + * isArrayLike(42); // false + * isArrayLike({}); // false + * ``` + */ +export function isArrayLike(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val === "string") { + return true; + } + if (typeof val !== "object" && typeof val !== "function") { + return false; + } + const len = (val as Record)["length"]; + return typeof len === "number" && len >= 0 && Number.isInteger(len); +} + +/** + * Return `true` if `val` is dict-like — a plain object (not an array, not a + * `Date`, not a class instance). + * + * Mirrors `pandas.api.types.is_dict_like`. + * + * @example + * ```ts + * isDictLike({ a: 1 }); // true + * isDictLike(new Map()); // true (has .get / .set) + * isDictLike([1, 2]); // false + * isDictLike("abc"); // false + * ``` + */ +export function isDictLike(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val !== "object") { + return false; + } + if (Array.isArray(val)) { + return false; + } + // Treat Map as dict-like (supports key lookup) + if (val instanceof Map) { + return true; + } + // Date is not dict-like + if (val instanceof Date) { + return false; + } + // Plain objects and other objects with properties + return true; +} + +/** + * Return `true` if `val` is an iterator — i.e. has a callable `next` method. + * + * Mirrors `pandas.api.types.is_iterator`. + * + * @example + * ```ts + * isIterator([1, 2][Symbol.iterator]()); // true + * isIterator([1, 2]); // false + * ``` + */ +export function isIterator(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val !== "object" && typeof val !== "function") { + return false; + } + return typeof (val as Record)["next"] === "function"; +} + +/** + * Return `true` if `val` is a `number` (including `NaN` and `±Infinity`). + * + * Mirrors `pandas.api.types.is_number`. + * + * @example + * ```ts + * isNumber(3.14); // true + * isNumber(NaN); // true + * isNumber("3"); // false + * ``` + */ +export function isNumber(val: unknown): val is number { + return typeof val === "number"; +} + +/** + * Return `true` if `val` is a `boolean`. + * + * Mirrors `pandas.api.types.is_bool`. + * + * @example + * ```ts + * isBool(true); // true + * isBool(1); // false + * ``` + */ +export function isBool(val: unknown): val is boolean { + return typeof val === "boolean"; +} + +/** + * Return `true` if `val` is a `string`. + * + * Named `isStringValue` to distinguish from the dtype-level `isStringDtype`. + * Mirrors `pandas.api.types.is_string` (not to be confused with dtype checks). + * + * @example + * ```ts + * isStringValue("hello"); // true + * isStringValue(42); // false + * ``` + */ +export function isStringValue(val: unknown): val is string { + return typeof val === "string"; +} + +/** + * Return `true` if `val` is a finite floating-point number (has a fractional + * component or is finite non-integer). `NaN`, `±Infinity` are **not** floats + * in the pandas sense. + * + * Mirrors `pandas.api.types.is_float`. + * + * @example + * ```ts + * isFloat(3.14); // true + * isFloat(3.0); // false (integer value) + * isFloat(NaN); // false + * isFloat(Infinity); // false + * ``` + */ +export function isFloat(val: unknown): boolean { + if (typeof val !== "number") { + return false; + } + if (!Number.isFinite(val)) { + return false; + } + return val !== Math.trunc(val); +} + +/** + * Return `true` if `val` is a finite integer-valued number. + * + * Mirrors `pandas.api.types.is_integer`. + * + * @example + * ```ts + * isInteger(3); // true + * isInteger(3.0); // true (integer value stored as float) + * isInteger(3.14); // false + * isInteger(NaN); // false + * ``` + */ +export function isInteger(val: unknown): boolean { + return typeof val === "number" && Number.isInteger(val); +} + +/** + * Return `true` if `val` is a `bigint`. + * + * @example + * ```ts + * isBigInt(42n); // true + * isBigInt(42); // false + * ``` + */ +export function isBigInt(val: unknown): val is bigint { + return typeof val === "bigint"; +} + +/** + * Return `true` if `val` is a `RegExp`. + * + * Mirrors `pandas.api.types.is_re`. + * + * @example + * ```ts + * isRegExp(/abc/); // true + * isRegExp(new RegExp("x")); // true + * isRegExp("abc"); // false + * ``` + */ +export function isRegExp(val: unknown): val is RegExp { + return val instanceof RegExp; +} + +/** + * Return `true` if `val` can be compiled into a `RegExp` — i.e. it is either + * a `string` or already a `RegExp`. + * + * Mirrors `pandas.api.types.is_re_compilable`. + * + * @example + * ```ts + * isReCompilable("abc"); // true + * isReCompilable(/abc/); // true + * isReCompilable(42); // false + * ``` + */ +export function isReCompilable(val: unknown): boolean { + return typeof val === "string" || val instanceof RegExp; +} + +/** + * Return `true` if `val` is a "missing" value in the pandas sense: `null`, + * `undefined`, or `NaN`. + * + * @example + * ```ts + * isMissing(null); // true + * isMissing(undefined); // true + * isMissing(NaN); // true + * isMissing(0); // false + * isMissing(""); // false + * ``` + */ +export function isMissing(val: unknown): boolean { + if (val === null || val === undefined) { + return true; + } + if (typeof val === "number" && Number.isNaN(val)) { + return true; + } + return false; +} + +/** + * Return `true` if `val` is "hashable" — usable as an object-key in + * JavaScript. In practice this means it is a primitive (`string`, `number`, + * `bigint`, `boolean`, `symbol`, `null`, `undefined`). + * + * Mirrors the spirit of `pandas.api.types.is_hashable`. + * + * @example + * ```ts + * isHashable("key"); // true + * isHashable(42); // true + * isHashable({}); // false + * isHashable([]); // false + * ``` + */ +export function isHashable(val: unknown): boolean { + if (val === null || val === undefined) { + return true; + } + const t = typeof val; + return t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol"; +} + +/** + * Return `true` if `val` is a `Date` instance. + * + * @example + * ```ts + * isDate(new Date()); // true + * isDate("2024-01-01"); // false + * ``` + */ +export function isDate(val: unknown): val is Date { + return val instanceof Date; +} + +// ═════════════════════════════════════════════════════════════════════════════ +// DTYPE-LEVEL PREDICATES +// ═════════════════════════════════════════════════════════════════════════════ + +/** + * Return `true` if the dtype is numeric (integer, unsigned integer, or float). + * + * Mirrors `pandas.api.types.is_numeric_dtype`. + * + * @example + * ```ts + * isNumericDtype(Dtype.float64); // true + * isNumericDtype("int32"); // true + * isNumericDtype("string"); // false + * ``` + */ +export function isNumericDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isNumeric; +} + +/** + * Return `true` if the dtype is any integer kind (signed or unsigned). + * + * Mirrors `pandas.api.types.is_integer_dtype`. + * + * @example + * ```ts + * isIntegerDtype("int64"); // true + * isIntegerDtype("uint8"); // true + * isIntegerDtype("float32"); // false + * ``` + */ +export function isIntegerDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isInteger; +} + +/** + * Return `true` if the dtype is a signed integer (`int8`–`int64`). + * + * Mirrors `pandas.api.types.is_signed_integer_dtype`. + * + * @example + * ```ts + * isSignedIntegerDtype("int32"); // true + * isSignedIntegerDtype("uint32"); // false + * ``` + */ +export function isSignedIntegerDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isSignedInteger; +} + +/** + * Return `true` if the dtype is an unsigned integer (`uint8`–`uint64`). + * + * Mirrors `pandas.api.types.is_unsigned_integer_dtype`. + * + * @example + * ```ts + * isUnsignedIntegerDtype("uint64"); // true + * isUnsignedIntegerDtype("int64"); // false + * ``` + */ +export function isUnsignedIntegerDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isUnsignedInteger; +} + +/** + * Return `true` if the dtype is a floating-point type (`float32` or `float64`). + * + * Mirrors `pandas.api.types.is_float_dtype`. + * + * @example + * ```ts + * isFloatDtype("float64"); // true + * isFloatDtype("float32"); // true + * isFloatDtype("int32"); // false + * ``` + */ +export function isFloatDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isFloat; +} + +/** + * Return `true` if the dtype is boolean. + * + * Mirrors `pandas.api.types.is_bool_dtype`. + * + * @example + * ```ts + * isBoolDtype("bool"); // true + * isBoolDtype("int8"); // false + * ``` + */ +export function isBoolDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isBool; +} + +/** + * Return `true` if the dtype is the `string` dtype. + * + * Mirrors `pandas.api.types.is_string_dtype`. + * + * @example + * ```ts + * isStringDtype("string"); // true + * isStringDtype("object"); // false + * ``` + */ +export function isStringDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isString; +} + +/** + * Return `true` if the dtype is a datetime type. + * + * Mirrors `pandas.api.types.is_datetime64_dtype`. + * + * @example + * ```ts + * isDatetimeDtype("datetime"); // true + * isDatetimeDtype("string"); // false + * ``` + */ +export function isDatetimeDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isDatetime; +} + +/** + * Return `true` if the dtype is a timedelta type. + * + * Mirrors `pandas.api.types.is_timedelta64_dtype`. + * + * @example + * ```ts + * isTimedeltaDtype("timedelta"); // true + * isTimedeltaDtype("datetime"); // false + * ``` + */ +export function isTimedeltaDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isTimedelta; +} + +/** + * Return `true` if the dtype is the categorical dtype. + * + * Mirrors `pandas.api.types.is_categorical_dtype`. + * + * @example + * ```ts + * isCategoricalDtype("category"); // true + * isCategoricalDtype("string"); // false + * ``` + */ +export function isCategoricalDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isCategory; +} + +/** + * Return `true` if the dtype is the object dtype. + * + * Mirrors `pandas.api.types.is_object_dtype`. + * + * @example + * ```ts + * isObjectDtype("object"); // true + * isObjectDtype("string"); // false + * ``` + */ +export function isObjectDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isObject; +} + +/** + * Return `true` if the dtype represents complex numbers. + * + * JavaScript has no native complex number type, so this always returns `false` + * (no complex dtype exists in the `tsb` dtype system). Provided for API + * parity with `pandas.api.types.is_complex_dtype`. + * + * @example + * ```ts + * isComplexDtype("float64"); // false (no complex dtype) + * ``` + */ +export function isComplexDtype(_dtype: Dtype | DtypeName): boolean { + return false; +} + +/** + * Return `true` if the dtype is an "extension array" dtype — i.e. any dtype + * beyond the numeric primitives: `string`, `object`, `datetime`, `timedelta`, + * `category`. + * + * Mirrors `pandas.api.types.is_extension_array_dtype`. + * + * @example + * ```ts + * isExtensionArrayDtype("category"); // true + * isExtensionArrayDtype("datetime"); // true + * isExtensionArrayDtype("int64"); // false + * ``` + */ +export function isExtensionArrayDtype(dtype: Dtype | DtypeName): boolean { + const d = resolveDtype(dtype); + return d.isString || d.isObject || d.isDatetime || d.isTimedelta || d.isCategory; +} + +/** + * Return `true` if the dtype can hold period (date period) data. + * In the current `tsb` dtype system this maps to the `datetime` kind. + * + * Mirrors `pandas.api.types.is_period_dtype`. + * + * @example + * ```ts + * isPeriodDtype("datetime"); // true + * isPeriodDtype("float64"); // false + * ``` + */ +export function isPeriodDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isDatetime; +} + +/** + * Return `true` if the dtype is suitable for interval data — float or integer. + * + * Mirrors `pandas.api.types.is_interval_dtype`. + * + * @example + * ```ts + * isIntervalDtype("float64"); // true + * isIntervalDtype("int32"); // true + * isIntervalDtype("string"); // false + * ``` + */ +export function isIntervalDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isNumeric; +} diff --git a/src/core/astype.ts b/src/core/astype.ts new file mode 100644 index 00000000..6a9403be --- /dev/null +++ b/src/core/astype.ts @@ -0,0 +1,245 @@ +/** + * astype — dtype coercion for Series and DataFrame. + * + * Mirrors `pandas.Series.astype` and `pandas.DataFrame.astype`: + * cast values to a target dtype, with null/NaN passthrough semantics + * matching pandas' default `errors="raise"` behaviour. + * + * @module + */ + +import { DataFrame } from "./frame.ts"; +import { Series } from "./series.ts"; +import { Dtype } from "./dtype.ts"; +import type { DtypeName, Scalar } from "../types.ts"; + +// ─── helpers ────────────────────────────────────────────────────────────────── + +function isNull(v: Scalar): v is null | undefined { + return v === null || v === undefined; +} + +/** Integer clamp ranges for each integer dtype name. */ +const INT_RANGES: Readonly< + Record +> = { + int8: { lo: -128, hi: 127, unsigned: false }, + int16: { lo: -32768, hi: 32767, unsigned: false }, + int32: { lo: -2147483648, hi: 2147483647, unsigned: false }, + int64: { lo: Number.MIN_SAFE_INTEGER, hi: Number.MAX_SAFE_INTEGER, unsigned: false }, + uint8: { lo: 0, hi: 255, unsigned: true }, + uint16: { lo: 0, hi: 65535, unsigned: true }, + uint32: { lo: 0, hi: 4294967295, unsigned: true }, + uint64: { lo: 0, hi: Number.MAX_SAFE_INTEGER, unsigned: true }, +}; + +/** + * Cast a single scalar value to the target dtype. + * + * Rules per dtype kind: + * - **int/uint**: `Math.trunc(Number(v))`, clamped to the dtype range. `null/undefined → null`. + * - **float32/float64**: `Number(v)`. `null/undefined → null`. Strings that + * are not parsable become `NaN` (same as pandas `errors="coerce"`-like + * number coercion). + * - **bool**: falsy values → `false`; truthy → `true`. `null/undefined → null`. + * - **string**: `String(v)`. `null/undefined → null`. + * - **datetime**: `new Date(Number(v))` for numbers; `new Date(String(v))` for + * strings; `null/undefined → null`. + * - **object/category/timedelta**: value is returned as-is (no transformation). + */ +export function castScalar(v: Scalar, dtype: Dtype): Scalar { + if (isNull(v)) { + return null; + } + + const k = dtype.kind; + + if (k === "int" || k === "uint") { + if (typeof v === "boolean") { + return v ? 1 : 0; + } + if (v instanceof Date) { + return Math.trunc(v.getTime()); + } + const n = Number(v); + if (Number.isNaN(n)) { + return null; + } + const range = INT_RANGES[dtype.name]; + if (range === undefined) { + return Math.trunc(n); + } + const t = Math.trunc(n); + return Math.max(range.lo, Math.min(range.hi, t)); + } + + if (k === "float") { + if (typeof v === "boolean") { + return v ? 1.0 : 0.0; + } + if (v instanceof Date) { + return v.getTime(); + } + return Number(v); + } + + if (k === "bool") { + if (typeof v === "number") { + return !Number.isNaN(v) && v !== 0; + } + if (v instanceof Date) { + return true; + } + return Boolean(v); + } + + if (k === "string") { + if (v instanceof Date) { + return v.toISOString(); + } + return String(v); + } + + if (k === "datetime") { + if (v instanceof Date) { + return v; + } + if (typeof v === "number") { + return new Date(v); + } + const d = new Date(String(v)); + return Number.isNaN(d.getTime()) ? null : d; + } + + // object / category / timedelta — return unchanged + return v; +} + +// ─── AstypeOptions ──────────────────────────────────────────────────────────── + +/** Options accepted by {@link astypeSeries} and {@link astype}. */ +export interface AstypeOptions { + /** + * When `true`, values that cannot be cast are silently replaced with + * `null` instead of throwing. + * + * @default false + */ + readonly errors?: "raise" | "ignore"; +} + +// ─── astypeSeries ───────────────────────────────────────────────────────────── + +/** + * Cast a Series to a different dtype. + * + * Returns a new Series whose values have been coerced to `dtype`. The index + * and name are preserved unchanged. + * + * @example + * ```ts + * const s = new Series({ data: [1.9, 2.1, 3.7], name: "x" }); + * const si = astypeSeries(s, "int64"); + * si.values; // [1, 2, 3] + * si.dtype.name; // "int64" + * ``` + */ +export function astypeSeries( + s: Series, + dtype: DtypeName | Dtype, + options: AstypeOptions = {}, +): Series { + const targetDtype = dtype instanceof Dtype ? dtype : Dtype.from(dtype as DtypeName); + const { errors = "raise" } = options; + + const casted: Scalar[] = []; + for (const v of s.values) { + let out: Scalar; + try { + out = castScalar(v, targetDtype); + } catch (e) { + if (errors === "ignore") { + out = v; + } else { + throw e; + } + } + casted.push(out); + } + + return new Series({ + data: casted, + index: s.index, + dtype: targetDtype, + name: s.name, + }); +} + +// ─── DataFrame astype ───────────────────────────────────────────────────────── + +/** + * Options for {@link astype} (DataFrame variant). + */ +export interface DataFrameAstypeOptions extends AstypeOptions { + /** + * When `true`, only the columns listed in `dtype` (when `dtype` is a + * `Record`) are recast; other columns are carried over unchanged. + * + * When `false` (default) and `dtype` is a `Record`, columns not listed + * in the map are carried over unchanged (same behaviour). + * + * This option exists for pandas API compatibility. + */ + readonly copy?: boolean; +} + +/** + * Cast one or more columns in a DataFrame to the specified dtype(s). + * + * - Pass a single `DtypeName` or `Dtype` to cast **all** columns. + * - Pass a `Record` to cast individual columns. + * Columns not listed are returned unchanged. + * + * Returns a new DataFrame; the original is not modified. + * + * @example + * ```ts + * const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: ["3", "4"] }); + * + * // Cast all columns to float64 + * astype(df, "float64"); + * + * // Cast only column "b" to int64 + * astype(df, { b: "int64" }); + * ``` + */ +export function astype( + df: DataFrame, + dtype: + | DtypeName + | Dtype + | Readonly>, + options: DataFrameAstypeOptions = {}, +): DataFrame { + const colMap = new Map>(); + + const isSingleDtype = + typeof dtype === "string" || dtype instanceof Dtype; + + for (const name of df.columns.values) { + const col = df.col(name); + if (isSingleDtype) { + colMap.set(name, astypeSeries(col, dtype as DtypeName | Dtype, options)); + } else { + const mapping = dtype as Readonly>; + const target = mapping[name]; + if (target !== undefined) { + colMap.set(name, astypeSeries(col, target, options)); + } else { + colMap.set(name, col); + } + } + } + + return new DataFrame(colMap, df.index); +} diff --git a/src/core/attrs.ts b/src/core/attrs.ts new file mode 100644 index 00000000..81c6be1c --- /dev/null +++ b/src/core/attrs.ts @@ -0,0 +1,291 @@ +/** + * attrs — user-defined metadata dictionary for Series and DataFrame. + * + * Mirrors `pandas.DataFrame.attrs` / `pandas.Series.attrs`: an arbitrary + * key→value dictionary that travels with a data object and lets callers + * annotate it with provenance, units, descriptions, or any other metadata. + * + * Because the tsb Series and DataFrame classes are immutable by design, this + * module maintains a **WeakMap registry** that maps each object to its attrs + * record. The registry entries are garbage-collected automatically when the + * object itself is collected — there is no memory leak. + * + * ### Public surface + * + * ```ts + * import { getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, clearAttrs, + * hasAttrs } from "tsb"; + * + * const df = DataFrame.fromColumns({ x: [1, 2, 3] }); + * + * // Annotate + * setAttrs(df, { source: "sensor_A", unit: "metres" }); + * getAttrs(df); // { source: "sensor_A", unit: "metres" } + * + * // Merge additional keys + * updateAttrs(df, { version: 2 }); + * getAttrs(df); // { source: "sensor_A", unit: "metres", version: 2 } + * + * // Fluent helper — sets attrs and returns the same object + * const annotated = withAttrs(df, { source: "sensor_B" }); + * annotated === df; // true — same reference + * + * // Propagate to a derived object + * const df2 = DataFrame.fromColumns({ y: [4, 5, 6] }); + * copyAttrs(df, df2); + * getAttrs(df2); // { source: "sensor_A", unit: "metres", version: 2 } + * ``` + * + * @module + */ + +// ─── types ──────────────────────────────────────────────────────────────────── + +/** + * The attrs dictionary type. Keys are strings; values may be any JSON-safe + * primitive or nested structure. Mirrors the `dict` type of `pandas.attrs`. + */ +export type Attrs = Record; + +// ─── registry ───────────────────────────────────────────────────────────────── + +/** Internal WeakMap from any object to its attrs record. */ +const registry = new WeakMap(); + +// ─── public API ─────────────────────────────────────────────────────────────── + +/** + * Retrieve the attrs dictionary for `obj`. + * + * Returns a **shallow copy** so callers cannot mutate the stored record + * accidentally. If no attrs have been set, returns an empty object `{}`. + * + * @example + * ```ts + * const s = new Series({ data: [1, 2, 3] }); + * setAttrs(s, { unit: "kg" }); + * getAttrs(s); // { unit: "kg" } + * ``` + */ +export function getAttrs(obj: object): Attrs { + const stored = registry.get(obj); + return stored !== undefined ? { ...stored } : {}; +} + +/** + * **Overwrite** the attrs dictionary for `obj` with `attrs`. + * + * Any previously stored attrs are discarded. Stores a shallow copy so + * subsequent mutations to the passed-in object do not affect the stored value. + * + * @example + * ```ts + * setAttrs(df, { source: "sensor_A" }); + * getAttrs(df); // { source: "sensor_A" } + * ``` + */ +export function setAttrs(obj: object, attrs: Attrs): void { + registry.set(obj, { ...attrs }); +} + +/** + * **Merge** `updates` into the existing attrs for `obj`. + * + * Existing keys that are not present in `updates` are preserved. Keys that + * are present in both `updates` and the existing attrs are overwritten. + * + * @example + * ```ts + * setAttrs(df, { source: "A" }); + * updateAttrs(df, { version: 2 }); + * getAttrs(df); // { source: "A", version: 2 } + * ``` + */ +export function updateAttrs(obj: object, updates: Attrs): void { + const existing = registry.get(obj) ?? {}; + registry.set(obj, { ...existing, ...updates }); +} + +/** + * **Copy** the attrs from `source` to `target`, overwriting any existing attrs + * on `target`. + * + * Useful for propagating metadata from an input to a derived result. + * + * @example + * ```ts + * setAttrs(df1, { source: "sensor_A" }); + * const df2 = df1.head(5); + * copyAttrs(df1, df2); + * getAttrs(df2); // { source: "sensor_A" } + * ``` + */ +export function copyAttrs(source: object, target: object): void { + const stored = registry.get(source); + if (stored !== undefined) { + registry.set(target, { ...stored }); + } else { + registry.delete(target); + } +} + +/** + * **Fluent helper** — set attrs on `obj` and return the same object. + * + * This **replaces** any previously stored attrs (same semantics as + * {@link setAttrs}). The return type is `T` so callers do not lose the + * concrete type of their object. + * + * @example + * ```ts + * const annotated = withAttrs(df, { source: "sensor_A", unit: "metres" }); + * annotated === df; // true — same reference + * getAttrs(annotated); // { source: "sensor_A", unit: "metres" } + * ``` + */ +export function withAttrs(obj: T, attrs: Attrs): T { + registry.set(obj, { ...attrs }); + return obj; +} + +/** + * **Remove** all attrs from `obj`. + * + * After calling this, {@link getAttrs} returns `{}` and {@link hasAttrs} + * returns `false`. + * + * @example + * ```ts + * setAttrs(df, { source: "A" }); + * clearAttrs(df); + * hasAttrs(df); // false + * getAttrs(df); // {} + * ``` + */ +export function clearAttrs(obj: object): void { + registry.delete(obj); +} + +/** + * Returns `true` if `obj` has any attrs set, `false` otherwise. + * + * @example + * ```ts + * hasAttrs(df); // false + * setAttrs(df, { x: 1 }); + * hasAttrs(df); // true + * clearAttrs(df); + * hasAttrs(df); // false + * ``` + */ +export function hasAttrs(obj: object): boolean { + return registry.has(obj); +} + +/** + * Retrieve a **single** attrs value by key. + * + * Returns `undefined` if the key does not exist (or no attrs are set). + * + * @example + * ```ts + * setAttrs(df, { unit: "kg" }); + * getAttr(df, "unit"); // "kg" + * getAttr(df, "missing"); // undefined + * ``` + */ +export function getAttr(obj: object, key: string): unknown { + return registry.get(obj)?.[key]; +} + +/** + * Set a **single** attrs key on `obj`, preserving all other existing attrs. + * + * @example + * ```ts + * setAttr(df, "unit", "kg"); + * setAttr(df, "source", "lab"); + * getAttrs(df); // { unit: "kg", source: "lab" } + * ``` + */ +export function setAttr(obj: object, key: string, value: unknown): void { + const existing = registry.get(obj) ?? {}; + registry.set(obj, { ...existing, [key]: value }); +} + +/** + * Delete a **single** attrs key from `obj`, preserving all other keys. + * + * Does nothing if the key does not exist. + * + * @example + * ```ts + * setAttrs(df, { a: 1, b: 2 }); + * deleteAttr(df, "a"); + * getAttrs(df); // { b: 2 } + * ``` + */ +export function deleteAttr(obj: object, key: string): void { + const existing = registry.get(obj); + if (existing === undefined) return; + const { [key]: _removed, ...rest } = existing; + if (Object.keys(rest).length === 0) { + registry.delete(obj); + } else { + registry.set(obj, rest); + } +} + +/** + * Return the number of attrs keys stored on `obj`. + * + * @example + * ```ts + * attrsCount(df); // 0 + * setAttrs(df, { a: 1, b: 2 }); + * attrsCount(df); // 2 + * ``` + */ +export function attrsCount(obj: object): number { + return Object.keys(registry.get(obj) ?? {}).length; +} + +/** + * Return the list of attrs keys stored on `obj`. + * + * @example + * ```ts + * setAttrs(df, { a: 1, b: 2 }); + * attrsKeys(df); // ["a", "b"] + * ``` + */ +export function attrsKeys(obj: object): string[] { + return Object.keys(registry.get(obj) ?? {}); +} + +/** + * Merge attrs from multiple source objects into a single target object. + * + * Sources are applied left-to-right; later sources overwrite earlier ones on + * key conflicts. Overwrites any existing attrs on `target`. + * + * @example + * ```ts + * setAttrs(s1, { source: "A", unit: "kg" }); + * setAttrs(s2, { source: "B", scale: 2 }); + * mergeAttrs([s1, s2], df); + * getAttrs(df); // { source: "B", unit: "kg", scale: 2 } + * ``` + */ +export function mergeAttrs(sources: readonly object[], target: object): void { + const merged: Attrs = {}; + for (const src of sources) { + const stored = registry.get(src); + if (stored !== undefined) { + Object.assign(merged, stored); + } + } + if (Object.keys(merged).length > 0) { + registry.set(target, merged); + } +} diff --git a/src/core/index.ts b/src/core/index.ts index ea275952..cf1d78cb 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -53,3 +53,12 @@ export { reindexSeries, reindexDataFrame } from "./reindex.ts"; export type { ReindexMethod, ReindexSeriesOptions, ReindexDataFrameOptions } from "./reindex.ts"; export { alignSeries, alignDataFrame } from "./align.ts"; export type { AlignSeriesOptions, AlignDataFrameOptions } from "./align.ts"; +export { astypeSeries, astype, castScalar } from "./astype.ts"; +export type { AstypeOptions, DataFrameAstypeOptions } from "./astype.ts"; +export { isScalar, isListLike, isArrayLike, isDictLike, isIterator } from "./api_types.ts"; +export { getAttrs, setAttrs, updateAttrs, copyAttrs } from "./attrs.ts"; +export type { Attrs } from "./attrs.ts"; +export { insertColumn, popColumn, reorderColumns, moveColumn } from "./insert_pop.ts"; +export type { PopResult } from "./insert_pop.ts"; +export { toDictOriented, fromDictOriented } from "./to_from_dict.ts"; +export type { ToDictOrient, FromDictOrient, DictSplit, DictTight } from "./to_from_dict.ts"; diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts new file mode 100644 index 00000000..d56c42bc --- /dev/null +++ b/src/core/insert_pop.ts @@ -0,0 +1,214 @@ +/** + * DataFrame.insert() and DataFrame.pop() — column insertion and removal. + * + * Mirrors `pandas.DataFrame.insert(loc, column, value)` and + * `pandas.DataFrame.pop(item)`. + * + * Since `DataFrame` in tsb is immutable, both operations return a new DataFrame. + * `popColumn` returns both the extracted `Series` and the resulting DataFrame. + * + * @example + * ```ts + * import { DataFrame, insertColumn, popColumn } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + * + * // Insert column "x" at position 1 (between "a" and "b") + * const df2 = insertColumn(df, 1, "x", [10, 20]); + * // df2.columns.values → ["a", "x", "b"] + * + * // Pop column "a" out of df2 + * const { series, df: df3 } = popColumn(df2, "a"); + * // series.values → [1, 2] + * // df3.columns.values → ["x", "b"] + * ``` + * + * @packageDocumentation + */ + +import type { Label, Scalar } from "../types.ts"; +import { Index } from "./base-index.ts"; +import { DataFrame } from "./frame.ts"; +import { Series } from "./series.ts"; + +// ─── insertColumn ───────────────────────────────────────────────────────────── + +/** + * Insert a new column into `df` at integer column position `loc`. + * + * Mirrors `pandas.DataFrame.insert(loc, column, value, allow_duplicates=False)`. + * Raises a `RangeError` if: + * - `column` already exists in `df` (no duplicates by default) + * - `loc` is out of range (must be 0 ≤ loc ≤ df.shape[1]) + * - `values` length does not match the number of rows + * + * @param df Source DataFrame (not mutated). + * @param loc Zero-based integer position at which to insert the column. + * @param column Name of the new column. + * @param values Column data as an array of scalars or a `Series`. + * @param allowDuplicates When `true`, silently allow duplicate column names. Default `false`. + * @returns A new DataFrame with the column inserted. + */ +export function insertColumn( + df: DataFrame, + loc: number, + column: string, + values: readonly Scalar[] | Series, + allowDuplicates = false, +): DataFrame { + const nCols = df.shape[1]; + const nRows = df.shape[0]; + + if (!allowDuplicates && df.has(column)) { + throw new RangeError( + `Column "${column}" already exists. Use allowDuplicates=true to permit duplicate names.`, + ); + } + + if (loc < 0 || loc > nCols) { + throw new RangeError(`loc=${loc} is out of range [0, ${nCols}].`); + } + + // Resolve values to a Series aligned to df's row index. + const series: Series = + values instanceof Series + ? values + : new Series({ data: values, index: df.index, name: column }); + + if (series.size !== nRows) { + throw new RangeError( + `values length ${series.size} does not match DataFrame row count ${nRows}.`, + ); + } + + // Rebuild the column map, inserting the new column at position `loc`. + const colMap = new Map>(); + let idx = 0; + + for (const colName of df.columns.values) { + if (idx === loc) { + colMap.set(column, series); + } + colMap.set(colName, df.col(colName)); + idx++; + } + + // Handle insertion at the end (loc === nCols). + if (loc === nCols) { + colMap.set(column, series); + } + + return new DataFrame(colMap, df.index); +} + +// ─── popColumn ──────────────────────────────────────────────────────────────── + +/** Return type of {@link popColumn}. */ +export interface PopResult { + /** The extracted column as a Series. */ + readonly series: Series; + /** The DataFrame with the column removed. */ + readonly df: DataFrame; +} + +/** + * Remove a column from `df` and return both the extracted `Series` and the + * resulting DataFrame. + * + * Mirrors `pandas.DataFrame.pop(item)`, but because tsb DataFrames are + * immutable this function returns the removed Series *and* the new DataFrame + * (rather than mutating in place). + * + * Raises a `RangeError` if `col` does not exist in `df`. + * + * @param df Source DataFrame (not mutated). + * @param col Name of the column to remove. + * @returns `{ series, df }` — the extracted column and the remaining DataFrame. + * + * @example + * ```ts + * const { series, df: remaining } = popColumn(df, "age"); + * // series contains the "age" column; remaining has all other columns + * ``` + */ +export function popColumn(df: DataFrame, col: string): PopResult { + const series = df.get(col); + if (series === undefined) { + throw new RangeError(`Column "${col}" not found in DataFrame.`); + } + + const colMap = new Map>(); + for (const colName of df.columns.values) { + if (colName !== col) { + colMap.set(colName, df.col(colName)); + } + } + + return { + series, + df: new DataFrame(colMap, df.index), + }; +} + +// ─── reorderColumns ────────────────────────────────────────────────────────── + +/** + * Reorder the columns of `df` to match `order`. + * + * Mirrors `df[order]` in pandas. All names in `order` must be present in `df`; + * extra names in `df` not listed in `order` are dropped. + * + * @param df Source DataFrame. + * @param order New column order (subset of `df.columns.values`). + * @returns A new DataFrame with columns in the specified order. + */ +export function reorderColumns(df: DataFrame, order: readonly string[]): DataFrame { + const colMap = new Map>(); + for (const name of order) { + const s = df.get(name); + if (s === undefined) { + throw new RangeError(`Column "${name}" not found in DataFrame.`); + } + colMap.set(name, s); + } + return new DataFrame(colMap, df.index); +} + +// ─── moveColumn ────────────────────────────────────────────────────────────── + +/** + * Move an existing column to a new integer position. + * + * This is a convenience wrapper combining {@link popColumn} and + * {@link insertColumn}: it removes the column from its current position and + * re-inserts it at `newLoc` in the resulting DataFrame. + * + * @param df Source DataFrame. + * @param col Name of the column to move. + * @param newLoc Target position (0 ≤ newLoc ≤ df.shape[1] − 1). + * @returns A new DataFrame with the column at the new position. + */ +export function moveColumn(df: DataFrame, col: string, newLoc: number): DataFrame { + const { series, df: without } = popColumn(df, col); + return insertColumn(without, newLoc, col, series); +} + +// ─── internal re-export helper (used by DataFrame constructor access) ───────── + +/** + * Build a new DataFrame from an ordered iterable of `[name, Series]` pairs and + * a row index. Exported for use by other tsb modules that need to construct + * DataFrames without going through the public factory methods. + * + * @internal + */ +export function dataFrameFromPairs( + pairs: Iterable]>, + index: Index