diff --git a/playground/api_types.html b/playground/api_types.html new file mode 100644 index 00000000..05b8347c --- /dev/null +++ b/playground/api_types.html @@ -0,0 +1,222 @@ + + + + + + tsb β€” api_types: Runtime type-checking predicates + + + +

πŸ“¦ api_types β€” Runtime type-checking predicates

+

+ Port of pandas.api.types. + Two groups of predicates: + value-level (work on arbitrary JS values) and + dtype-level (work on Dtype instances or dtype name strings). +

+ +

Value-Level Predicates

+ +

isScalar(val)

+

Returns true for primitives and Date. Mirrors pd.api.types.is_scalar.

+
import { isScalar } from "tsb";
+
+isScalar(42);            // true
+isScalar("hello");       // true
+isScalar(null);          // true
+isScalar(new Date());    // true
+isScalar([1, 2]);        // false
+isScalar({ a: 1 });      // false
+
+ +

isListLike(val)

+

Returns true for iterables (excluding strings) and objects with a numeric length.

+
isListLike([1, 2, 3]);      // true
+isListLike(new Set([1]));   // true
+isListLike("abc");          // false
+isListLike(42);             // false
+
+ +

isArrayLike(val)

+

Returns true for values with a non-negative integer length (including strings).

+
isArrayLike([1, 2]);      // true
+isArrayLike("hello");     // true
+isArrayLike(42);          // false
+ +

isDictLike(val)

+

Returns true for plain objects and Map.

+
isDictLike({ a: 1 });   // true
+isDictLike(new Map());  // true
+isDictLike([]);         // false
+ +

isNumber / isBool / isStringValue / isFloat / isInteger

+
isNumber(3.14);      // true
+isNumber(NaN);       // true  (typeof NaN === "number")
+isBool(true);        // true
+isStringValue("hi"); // true
+isFloat(3.14);       // true
+isFloat(3.0);        // false  (integer value)
+isInteger(42);       // true
+isInteger(3.14);     // false
+
+ +

isMissing(val)

+

Returns true for null, undefined, or NaN.

+
isMissing(null);       // true
+isMissing(undefined);  // true
+isMissing(NaN);        // true
+isMissing(0);          // false
+ +

isHashable(val)

+

Returns true for values safe to use as object keys (primitives).

+
isHashable("key");  // true
+isHashable(42);     // true
+isHashable({});     // false
+ +

Dtype-Level Predicates

+

All accept a Dtype instance or a dtype name string.

+ +
import { Dtype, isNumericDtype, isFloatDtype, isIntegerDtype,
+         isStringDtype, isDatetimeDtype, isCategoricalDtype } from "tsb";
+
+isNumericDtype(Dtype.float64);    // true
+isNumericDtype("int32");          // true
+isNumericDtype("string");         // false
+
+isFloatDtype("float32");          // true
+isIntegerDtype("int64");          // true
+isUnsignedIntegerDtype("uint8");  // true
+isSignedIntegerDtype("int8");     // true
+isStringDtype("string");          // true
+isDatetimeDtype("datetime");      // true
+isCategoricalDtype("category");   // true
+isObjectDtype("object");          // true
+isExtensionArrayDtype("category"); // true
+isExtensionArrayDtype("int32");    // false
+
+ +

Complete Predicate Reference

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionPandas equivalentDescription
isScalar(val)is_scalarPrimitive or Date
isListLike(val)is_list_likeIterable (not string) or has length
isArrayLike(val)is_array_likeHas non-negative integer length
isDictLike(val)is_dict_likePlain object or Map
isIterator(val)is_iteratorHas callable next method
isNumber(val)is_numbertypeof === "number"
isBool(val)is_booltypeof === "boolean"
isStringValue(val)is_stringtypeof === "string"
isFloat(val)is_floatFinite number with fractional part
isInteger(val)is_integerInteger-valued number
isBigInt(val)β€”typeof === "bigint"
isRegExp(val)is_reRegExp instance
isReCompilable(val)is_re_compilableString or RegExp
isMissing(val)isnanull / undefined / NaN
isHashable(val)is_hashableSafe as object key (primitive)
isDate(val)β€”Date instance
isNumericDtype(d)is_numeric_dtypeInt, uint, or float
isIntegerDtype(d)is_integer_dtypeAny integer (signed or unsigned)
isSignedIntegerDtype(d)is_signed_integer_dtypeint8–int64
isUnsignedIntegerDtype(d)is_unsigned_integer_dtypeuint8–uint64
isFloatDtype(d)is_float_dtypefloat32 or float64
isBoolDtype(d)is_bool_dtypebool
isStringDtype(d)is_string_dtypestring dtype
isDatetimeDtype(d)is_datetime64_dtypedatetime
isTimedeltaDtype(d)is_timedelta64_dtypetimedelta
isCategoricalDtype(d)is_categorical_dtypecategory
isObjectDtype(d)is_object_dtypeobject
isComplexDtype(d)is_complex_dtypeAlways false (no complex in tsb)
isExtensionArrayDtype(d)is_extension_array_dtypestring/object/datetime/timedelta/category
isPeriodDtype(d)is_period_dtypeMaps to datetime
isIntervalDtype(d)is_interval_dtypeNumeric dtypes
+ + + + diff --git a/playground/attrs.html b/playground/attrs.html new file mode 100644 index 00000000..ae25d5f6 --- /dev/null +++ b/playground/attrs.html @@ -0,0 +1,183 @@ + + + + + + tsb β€” attrs: user-defined metadata + + + +

← tsb playground

+ +

attrs β€” User-Defined Metadata

+

+ Attach arbitrary key→value metadata to any Series or DataFrame + — mirrors + + pandas.DataFrame.attrs and + + pandas.Series.attrs. +

+ +
+ Design note: Because tsb objects are immutable (their data, index, + and dtype are frozen), attrs are stored in a WeakMap registry rather than as + instance properties. This means attrs are attached & detached without touching the object + itself, and garbage-collected automatically when the object is collected. +
+ +

Basic usage

+ +
import {
+  getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs,
+  clearAttrs, hasAttrs, getAttr, setAttr, deleteAttr,
+  attrsCount, attrsKeys, mergeAttrs,
+} from "tsb";
+import { DataFrame, Series } from "tsb";
+
+// ─── annotate a DataFrame ─────────────────────────────────────────────────
+const df = DataFrame.fromColumns({
+  temperature: [22.1, 23.5, 21.8],
+  humidity:    [55, 60, 58],
+});
+
+setAttrs(df, {
+  source: "weather_station_42",
+  unit:   "Celsius",
+  notes: "Morning readings",
+});
+
+getAttrs(df);
+// β†’ { source: "weather_station_42", unit: "Celsius", notes: "Morning readings" }
+
+getAttr(df, "unit");     // β†’ "Celsius"
+getAttr(df, "missing");  // β†’ undefined
+attrsCount(df);          // β†’ 3
+attrsKeys(df);           // β†’ ["source", "unit", "notes"]
+hasAttrs(df);            // β†’ true
+
+ +

Merging and updating

+ +
// updateAttrs merges new keys, preserves existing
+updateAttrs(df, { version: 2, notes: "Updated notes" });
+getAttrs(df);
+// β†’ { source: "weather_station_42", unit: "Celsius", notes: "Updated notes", version: 2 }
+
+// setAttr / deleteAttr for single keys
+setAttr(df, "sensor_id", "WS-042");
+deleteAttr(df, "notes");
+getAttrs(df);
+// β†’ { source: "weather_station_42", unit: "Celsius", version: 2, sensor_id: "WS-042" }
+
+ +

Propagating metadata to derived objects

+ +
// copyAttrs: copy all attrs from one object to another
+const s = new Series({ data: [22.1, 23.5, 21.8], name: "temperature" });
+setAttrs(s, { unit: "Celsius", source: "sensor_A" });
+
+const derived = new Series({ data: [71.8, 74.3, 71.2], name: "fahrenheit" });
+copyAttrs(s, derived);
+getAttrs(derived);
+// β†’ { unit: "Celsius", source: "sensor_A" }
+
+// Then update the copy
+setAttr(derived, "unit", "Fahrenheit");
+getAttrs(derived);  // β†’ { unit: "Fahrenheit", source: "sensor_A" }
+getAttrs(s);        // β†’ { unit: "Celsius", source: "sensor_A" }  ← unchanged
+
+ +

Fluent helper β€” withAttrs

+ +
// withAttrs sets attrs and returns the same object reference
+// Handy for inline annotation
+const annotated = withAttrs(
+  DataFrame.fromColumns({ x: [1, 2, 3] }),
+  { source: "lab_experiment", date: "2026-04-09" },
+);
+
+annotated === annotated;  // true β€” same reference, not a copy
+getAttrs(annotated);
+// β†’ { source: "lab_experiment", date: "2026-04-09" }
+
+ +

Merging from multiple sources

+ +
// mergeAttrs: combine attrs from multiple objects into a target
+const s1 = new Series({ data: [1, 2, 3], name: "a" });
+const s2 = new Series({ data: [4, 5, 6], name: "b" });
+setAttrs(s1, { source: "sensor_A", unit: "kg" });
+setAttrs(s2, { source: "sensor_B", scale: 2.5 });
+
+const combined = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+mergeAttrs([s1, s2], combined);
+// Later sources win on conflicts: source="sensor_B"
+getAttrs(combined);
+// β†’ { source: "sensor_B", unit: "kg", scale: 2.5 }
+
+ +

Clearing metadata

+ +
setAttrs(df, { x: 1, y: 2 });
+hasAttrs(df);   // β†’ true
+attrsCount(df); // β†’ 2
+
+clearAttrs(df);
+hasAttrs(df);   // β†’ false
+getAttrs(df);   // β†’ {}
+
+ +

API reference

+ + + + + + + + + + + + + + + + + + + + +
FunctionDescription
getAttrs(obj)Return a shallow copy of all stored attrs (empty {} if none)
setAttrs(obj, attrs)Overwrite attrs completely with the given record
updateAttrs(obj, updates)Merge updates into existing attrs (existing keys preserved)
withAttrs(obj, attrs)Fluent: set attrs and return the same object
copyAttrs(source, target)Copy all attrs from source to target
mergeAttrs(sources[], target)Merge attrs from multiple sources; later sources win
clearAttrs(obj)Remove all attrs from obj
hasAttrs(obj)Return true if any attrs are set
getAttr(obj, key)Get a single attr value (undefined if missing)
setAttr(obj, key, value)Set a single attr, preserving other keys
deleteAttr(obj, key)Delete a single attr key
attrsCount(obj)Number of stored attr keys
attrsKeys(obj)Array of stored attr key names
+ +

Comparison with pandas

+ + + + + + + + + + + + + + +
pandastsb
df.attrsgetAttrs(df)
df.attrs = {"k": "v"}setAttrs(df, { k: "v" })
df.attrs["k"] = "v"setAttr(df, "k", "v")
df.attrs["k"]getAttr(df, "k")
del df.attrs["k"]deleteAttr(df, "k")
df.attrs.update(d)updateAttrs(df, d)
df.attrs.clear()clearAttrs(df)
+ + diff --git a/playground/categorical_ops.html b/playground/categorical_ops.html new file mode 100644 index 00000000..c2d794ca --- /dev/null +++ b/playground/categorical_ops.html @@ -0,0 +1,338 @@ + + + + + + tsb β€” Categorical Ops + + + +
+
+ Loading tsb runtime… +
+ +← back to index +

🏷️ Categorical Ops

+

+ Standalone categorical utility functions that complement the Series.cat accessor. + Mirrors pd.Categorical.from_codes, set operations on categories, frequency helpers, + and cross-tabulation. +

+ + +
+

catFromCodes(codes, categories, opts?)

+

+ Construct a categorical Series from integer codes (0-based) and a categories array. + Code -1 maps to null (missing). Mirrors + pd.Categorical.from_codes. +

+ + +
+
+ + +
+

Category set operations

+

+ catUnionCategories, catIntersectCategories, + catDiffCategories, and catEqualCategories let you + combine or compare the category sets of two Series. +

+ + +
+
+ + +
+

catSortByFreq(series, opts?)

+

+ Reorder categories by their frequency in the data (most frequent first by default). + Mirrors s.cat.reorder_categories(s.value_counts().index). +

+ + +
+
+ + +
+

catToOrdinal(series, order)

+

+ Create an ordered categorical from a Series using order to define both the + category set and their rank. Values not in order become null. +

+ + +
+
+ + +
+

catFreqTable(series)

+

+ Return a plain Record<string, number> of counts per category. + Zero-frequency categories are included. +

+ + +
+
+ + +
+

catCrossTab(a, b, opts?)

+

+ Cross-tabulation of two categorical Series. Rows = a's categories, + columns = b's categories, cells = co-occurrence counts. + Supports margins and normalization. +

+ + +
+
+ + +
+

catRecode(series, mapping)

+

+ Rename categories via an object map or a transform function. Unmapped categories + are left unchanged. +

+ + +
+
+ + + + + diff --git a/playground/cut_qcut.html b/playground/cut_qcut.html new file mode 100644 index 00000000..1d273a17 --- /dev/null +++ b/playground/cut_qcut.html @@ -0,0 +1,163 @@ + + + + + + tsb β€” cut / qcut: Binning Continuous Data + + + +

tsb β€” cut / qcut: Binning Continuous Data

+

+ cut and qcut partition continuous numeric values into + discrete intervals β€” the TypeScript equivalents of + pandas.cut + and + pandas.qcut. +

+ +

1. cut β€” Fixed-Width Binning

+

+ Bin values into equal-width (or user-specified) intervals. + Pass an integer for automatic bins, or an explicit edge array. +

+ +

Integer bins

+
import { cut } from "tsb";
+
+const ages = [5, 18, 25, 35, 50, 70];
+const { codes, labels, bins } = cut(ages, 3);
+
+// labels: ["(5.0, 26.7]", "(26.7, 48.3]", "(48.3, 70.0]"]
+// bins:   [4.935, 26.667, 48.333, 70]
+// codes:  [0, 0, 0, 1, 1, 2]
+console.table(ages.map((a, i) => ({ age: a, bin: labels[codes[i]!] })));
+
+ +

Explicit bin edges

+
const scores = [55, 65, 72, 80, 91, 98];
+const { codes, labels } = cut(scores, [0, 60, 70, 80, 90, 100], {
+  labels: ["F", "D", "C", "B", "A"],
+  include_lowest: true,
+});
+// codes:  [0, 1, 2, 3, 4, 4]
+// labels[codes[0]] β†’ "F"
+// labels[codes[5]] β†’ "A"
+
+ +

Options

+ + + + + + + + + +
OptionDefaultDescription
righttrueIntervals closed on right: (a, b]. Set false for [a, b).
include_lowestfalseMake lowest interval left-closed: [a, b].
labelsautoCustom string labels, or false for integer codes.
precision3Decimal places in auto-generated labels.
duplicates"raise""drop" to silently remove duplicate bin edges.
+ +

2. qcut β€” Quantile-Based Binning

+

+ Divide values into bins of (approximately) equal population using quantiles. + Useful for creating percentile buckets or roughly equal-sized groups. +

+ +

Quartile split

+
import { qcut } from "tsb";
+
+const values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+const { codes, labels, bins } = qcut(values, 4);
+
+// labels: ["[1, 3.25]", "(3.25, 5.5]", "(5.5, 7.75]", "(7.75, 10]"]
+// Every bin has ~2-3 elements
+
+ +

Custom quantile probabilities

+
const { labels } = qcut(values, [0, 0.1, 0.5, 0.9, 1], {
+  labels: ["bottom 10%", "lower middle", "upper middle", "top 10%"],
+});
+
+ +

Decile labels

+
const { codes } = qcut(data, 10, { labels: false });
+// codes[i] is 0..9 β€” the decile bucket index
+
+ +

3. Return Value: BinResult

+
interface BinResult {
+  codes:  ReadonlyArray<number | null>; // bin index per value; null for NaN
+  labels: readonly string[];            // ordered label per bin
+  bins:   readonly number[];            // bin edge array (labels.length + 1)
+}
+
+ +
+ Missing values: NaN and Infinity are + assigned null in the codes array and are never placed + in a bin. +
+ +

4. cut vs qcut

+ + + + + + + + +
cutqcut
Bin widthEqual (uniform edges)Varies (equal population)
Bin countDetermined by binsDetermined by q
Best forMeaningful thresholds (age groups, grade bands)Percentile buckets, rank-based analysis
Left edge of first binOpen ( unless include_lowestAlways closed [
+ +

5. pandas Compatibility

+
# Python pandas
+pd.cut([1, 2, 3, 4, 5], 2)
+# Interval(0.996, 3.0, closed='right')  ...
+
+# tsb equivalent
+cut([1, 2, 3, 4, 5], 2)
+// codes: [0, 0, 0, 1, 1]
+// labels: ["(0.996, 3.0]", "(3.0, 5.0]"]
+
+ +

+ Both cut and qcut follow pandas semantics exactly: + right-closed by default, linear interpolation for quantiles, and duplicate-edge + handling via duplicates. +

+ +

← Back to tsb feature index

+ + diff --git a/playground/format_ops.html b/playground/format_ops.html new file mode 100644 index 00000000..d72fd1ec --- /dev/null +++ b/playground/format_ops.html @@ -0,0 +1,262 @@ + + + + + + tsb β€” format_ops: Number Formatting + + + +

πŸ”’ format_ops β€” Number Formatting

+

+ tsb provides a suite of number-formatting helpers that mirror pandas' + style.format() and Series.map() patterns. + Every function is zero-dependency and fully typed. +

+

← Back to index

+ +

Scalar formatters

+ + + + + + + + + + + + +
FunctionExample inputExample outputNotes
formatFloat(n, d)3.14159, 2"3.14"Fixed decimal places
formatPercent(n, d)0.1234, 1"12.3%"Multiplies by 100
formatScientific(n, d)12345.678, 3"1.235e+4"Exponential notation
formatEngineering(n, d)12345.678, 3"12.346e+3"Exponent multiple of 3
formatThousands(n, d, sep)1234567.89, 2"1,234,567.89"Thousands separator
formatCurrency(n, sym, d)1234.5, "$""$1,234.50"Currency prefix + thousands
formatCompact(n, d)1_234_567, 2"1.23M"K / M / B / T suffixes
+ +

Interactive demo β€” scalar formatting

+
+ + + + +
+
+ +

Formatter factories

+
import {
+  makeFloatFormatter,
+  makePercentFormatter,
+  makeCurrencyFormatter,
+} from "tsb";
+
+const fmtFloat   = makeFloatFormatter(3);      // (v) => formatFloat(v, 3)
+const fmtPct     = makePercentFormatter(1);     // (v) => formatPercent(v, 1)
+const fmtDollar  = makeCurrencyFormatter("$");  // (v) => formatCurrency(v, "$", 2)
+
+fmtFloat(3.14159);   // "3.142"
+fmtPct(0.0825);      // "8.3%"
+fmtDollar(9999.99);  // "$9,999.99"
+
+ +

Apply to a Series

+
import { Series, applySeriesFormatter, makePercentFormatter } from "tsb";
+
+const returns = new Series({ data: [0.05, -0.02, 0.134, 0.007], name: "returns" });
+
+const formatted = applySeriesFormatter(returns, makePercentFormatter(1));
+// Series<string> ["5.0%", "-2.0%", "13.4%", "0.7%"]
+
+ +

Apply to a DataFrame

+
import { DataFrame, applyDataFrameFormatter, makeCurrencyFormatter, makePercentFormatter } from "tsb";
+
+const df = DataFrame.fromColumns({
+  price:   [1_299.99, 899.50, 45.00],
+  change:  [0.025, -0.031, 0.102],
+  volume:  [15_000, 8_200, 230_000],
+});
+
+const formatted = applyDataFrameFormatter(df, {
+  price:  makeCurrencyFormatter("$", 2),
+  change: makePercentFormatter(2),
+});
+
+// formatted = {
+//   price:  ["$1,299.99", "$899.50", "$45.00"],
+//   change: ["2.50%", "-3.10%", "10.20%"],
+//   volume: ["15000", "8200", "230000"],      // no formatter β†’ String(v)
+// }
+
+ +

Interactive demo β€” DataFrame formatting

+
+ +
+
+ +

String rendering

+
import { Series, DataFrame, seriesToString, dataFrameToString, makeFloatFormatter } from "tsb";
+
+const s = new Series({ data: [1.2, 3.4, 5.6], name: "value" });
+console.log(seriesToString(s, { formatter: makeFloatFormatter(1) }));
+// 0    1.2
+// 1    3.4
+// 2    5.6
+// Name: value, dtype: float64
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] });
+console.log(dataFrameToString(df));
+//    a    b
+// 0  1  4.0
+// 1  2  5.0
+// 2  3  6.0
+
+ +

Interactive demo β€” seriesToString / dataFrameToString

+
+ +
+
+ + + + diff --git a/playground/index.html b/playground/index.html index 48bfbcb9..4f78bf7a 100644 --- a/playground/index.html +++ b/playground/index.html @@ -264,6 +264,75 @@

βœ… Complete +
+

πŸ“₯ insertColumn / popColumn

+

Insert and remove DataFrame columns at precise positions. insertColumn(df, loc, col, values) inserts at integer position, popColumn(df, col) returns { series, df }. Also includes reorderColumns and moveColumn. Mirrors pandas.DataFrame.insert() and .pop().

+
βœ… Complete
+
+
+

βœ‚οΈ cut / qcut

+

Bin continuous numeric data into discrete intervals. cut() uses fixed-width or explicit bin edges; qcut() uses quantile-based bins of equal population. Both return codes, labels, and bin edges. Mirrors pandas.cut and pandas.qcut.

+
βœ… Complete
+
+
+

πŸ“Š Rolling Extended Stats

+

Higher-order rolling window statistics: rollingSem (standard error of mean), rollingSkew (Fisher-Pearson skewness), rollingKurt (excess kurtosis), and rollingQuantile (arbitrary percentile with 5 interpolation methods). Mirrors pandas.Series.rolling().sem/skew/kurt/quantile().

+
βœ… Complete
+
+
+

πŸ”§ Rolling Apply & Multi-Agg

+

Standalone custom rolling-window functions: rollingApply (custom fn per window), rollingAgg (multiple named aggregations β†’ DataFrame), dataFrameRollingApply, dataFrameRollingAgg. Supports minPeriods, center, and raw mode. Mirrors pandas.Rolling.apply() and Rolling.agg().

+
βœ… Complete
+
+
+

🎭 where / mask

+

Element-wise conditional selection: seriesWhere / seriesMask and dataFrameWhere / dataFrameMask. Accepts boolean arrays, label-aligned boolean Series/DataFrame, or callables. Mirrors pandas.Series.where, pandas.DataFrame.where, and their .mask() inverses.

+
βœ… Complete
+
+
+

πŸ” isna / notna

+

Module-level missing-value detection: isna, notna, isnull, notnull work on scalars, arrays, Series, and DataFrames. Plus standalone fillna, dropna, countna, and countValid. Mirrors pandas.isna, pandas.notna, pandas.isnull, pandas.notnull.

+
βœ… Complete
+
+
+

🏷️ attrs β€” User Metadata

+

Attach arbitrary key→value metadata to any Series or DataFrame via a WeakMap registry. Provides getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, mergeAttrs, clearAttrs, getAttr, setAttr, deleteAttr, attrsCount, attrsKeys. Mirrors pandas.DataFrame.attrs / pandas.Series.attrs.

+
βœ… Complete
+
+
+

πŸ”€ string_ops β€” Standalone String Ops

+

Module-level string utilities: strNormalize (Unicode NFC/NFD/NFKC/NFKD), strGetDummies (one-hot DataFrame), strExtractAll (all regex matches), strRemovePrefix, strRemoveSuffix, strTranslate (char-level substitution), strCharWidth (CJK-aware display width), strByteLength. Works on Series, arrays, or scalars.

+
βœ… Complete
+
+
+

πŸ”€ string_ops_extended β€” Extended String Ops

+

Advanced string utilities: strSplitExpand (split β†’ DataFrame columns), strExtractGroups (regex capture groups β†’ DataFrame), strPartition / strRPartition (split into before/sep/after), strMultiReplace (batch replacements), strIndent / strDedent (line-level indentation). Works on Series, arrays, or scalars.

+
βœ… Complete
+
+
+

πŸ”— pipe_apply β€” Pipeline & Apply Utilities

+

Standalone equivalents of pandas' pipe() / apply() / applymap(): pipe (variadic type-safe pipeline), seriesApply (element-wise with label/pos context), seriesTransform, dataFrameApply (axis 0/1), dataFrameApplyMap (cell-wise), dataFrameTransform (column-wise), dataFrameTransformRows (row-wise).

+
βœ… Complete
+
+
+

πŸ”’ numeric_extended β€” Numeric Utilities

+

numpy/scipy-style numeric utilities: digitize (bin values), histogram (frequency counts with density option), linspace / arange (number sequences), percentileOfScore (percentile rank of a score), zscore (z-score standardisation), minMaxNormalize (scale to [0,1] or custom range), coefficientOfVariation (std/mean). Series-aware variants included.

+
βœ… Complete
+
+ +
+
+

🏷️ categorical_ops β€” Categorical Utilities

+

Standalone categorical helpers: catFromCodes (from integer codes), set operations (catUnionCategories, catIntersectCategories, catDiffCategories, catEqualCategories), catSortByFreq, catToOrdinal, catFreqTable, catCrossTab, catRecode.

+
βœ… Complete
+
+
+
+
+

πŸ”’ format_ops β€” Number Formatting

+

Number-formatting helpers for Series and DataFrame. Scalar formatters: formatFloat, formatPercent, formatScientific, formatEngineering, formatThousands, formatCurrency, formatCompact. Formatter factories: makeFloatFormatter, makePercentFormatter, makeCurrencyFormatter. Apply to collections: applySeriesFormatter, applyDataFrameFormatter. Render to string: seriesToString, dataFrameToString.

+
βœ… Complete
+
diff --git a/playground/insert_pop.html b/playground/insert_pop.html new file mode 100644 index 00000000..8b724566 --- /dev/null +++ b/playground/insert_pop.html @@ -0,0 +1,172 @@ + + + + + + tsb β€” insertColumn / popColumn + + + +

← tsb playground

+ +

insertColumn / popColumn

+

+ Column insertion and removal for DataFrames β€” mirrors + + pandas.DataFrame.insert() and + + pandas.DataFrame.pop(). +

+

+ Because tsb DataFrames are immutable, both functions return a new DataFrame + rather than mutating the original. popColumn returns both the extracted + Series and the resulting DataFrame. +

+ +

API summary

+ + + + + + + + + + + + + + + + + + + + + + + + +
FunctionPandas equivalentDescription
insertColumn(df, loc, col, values)df.insert(loc, col, value)Insert a new column at integer position loc
popColumn(df, col)df.pop(col)Remove a column; returns { series, df }
reorderColumns(df, order)df[order]Reorder (and optionally subset) columns
moveColumn(df, col, newLoc)β€”Move an existing column to a new integer position
+ +

Example 1 β€” insertColumn

+
import { DataFrame, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+// columns: ["name", "age"]
+
+// Insert "city" between "name" and "age"
+const df2 = insertColumn(df, 1, "city", ["NY", "LA", "SF"]);
+// df2.columns.values β†’ ["name", "city", "age"]
+// df2.col("city").values β†’ ["NY", "LA", "SF"]
+
+// Original is unchanged
+// df.columns.values β†’ ["name", "age"]
+
+ +

Example 2 β€” Insert with a Series

+
import { DataFrame, Series, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const salary = new Series({ data: [100_000, 90_000, 120_000], name: "salary" });
+
+const df2 = insertColumn(df, 0, "salary", salary);
+// df2.columns.values β†’ ["salary", "a", "b"]
+
+ +

Example 3 β€” popColumn

+
import { DataFrame, popColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:   [1, 2, 3],
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+
+// Remove "age" and keep the Series
+const { series: ageSeries, df: df2 } = popColumn(df, "age");
+// ageSeries.values       β†’ [30, 25, 35]
+// df2.columns.values     β†’ ["id", "name"]
+// df.columns.values      β†’ ["id", "name", "age"]  ← original unchanged
+
+ +

Example 4 β€” reorderColumns

+
import { DataFrame, reorderColumns } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3], d: [4] });
+
+// Reverse the column order
+const df2 = reorderColumns(df, ["d", "c", "b", "a"]);
+// df2.columns.values β†’ ["d", "c", "b", "a"]
+
+// Select a subset (drops columns not listed)
+const df3 = reorderColumns(df, ["a", "c"]);
+// df3.columns.values β†’ ["a", "c"]   (b and d are dropped)
+
+ +

Example 5 β€” moveColumn

+
import { DataFrame, moveColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  year:  [2020, 2021, 2022],
+  value: [10, 20, 30],
+  label: ["a", "b", "c"],
+});
+// columns: ["year", "value", "label"]
+
+// Move "label" to the front
+const df2 = moveColumn(df, "label", 0);
+// df2.columns.values β†’ ["label", "year", "value"]
+
+ +

Error cases

+
// Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// β†’ RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// β†’ RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]);  // df has 3 rows
+// β†’ RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// β†’ RangeError: Column "missing" not found in DataFrame.
+
+ +
+ Immutability: Like all tsb DataFrame operations, these functions never + mutate the original DataFrame. Always assign the return value to a new variable. +
+ +

pandas equivalence table

+ + + + + + + + + +
pandastsb
df.insert(1, "x", [1,2,3]) *(mutates)*insertColumn(df, 1, "x", [1,2,3])
series = df.pop("col") *(mutates)*const { series, df: df2 } = popColumn(df, "col")
df[["c","a","b"]]reorderColumns(df, ["c","a","b"])
+ + diff --git a/playground/notna_isna.html b/playground/notna_isna.html new file mode 100644 index 00000000..8002a5d9 --- /dev/null +++ b/playground/notna_isna.html @@ -0,0 +1,242 @@ + + + + + + tsb Β· isna / notna β€” Missing Value Detection + + + + + +

isna / notna

+

Module-level missing-value detection β€” mirrors pd.isna(), pd.notna(), pd.isnull(), pd.notnull() from pandas.

+ +

What is "missing"?

+

In tsb, the following values are considered missing:

+ +

Everything else β€” 0, false, "", new Date(...) β€” is not missing.

+ +

API Overview

+ + + + + + + + + + + + +
FunctionInputOutputPandas equivalent
isna(v)Scalarbooleanpd.isna(v)
isna(arr)Scalar[]boolean[]pd.isna(arr)
isna(series)SeriesSeries<boolean>pd.isna(series)
isna(df)DataFrameDataFramepd.isna(df)
notna(v)any of abovesame shape, invertedpd.notna(v)
isnull / notnullany of abovesame as isna/notnaaliases
fillna(obj, {value})Scalar/array/Series/DataFramesame type, no missingpd.Series.fillna()
dropna(obj, opts?)array/Series/DataFramemissing entries removedpd.Series.dropna()
countna(obj)array or Seriesnumberseries.isna().sum()
countValid(obj)array or Seriesnumberseries.count()
+ +

πŸ”¬ Try it: isna on scalars

+
+ + + +
Click "Run isna" to see results.
+
+ +

πŸ”¬ Try it: isna on arrays

+
+ + + +
Click "Run isna" to see results.
+
+ +

πŸ”¬ Try it: fillna on arrays

+
+ + + + + +
Click "Run fillna" to see results.
+
+ +

πŸ”¬ Try it: dropna on arrays

+
+ + + +
Click "Run dropna" to see results.
+
+ +

πŸ“ Code examples

+
+
+import { isna, notna, isnull, notnull, fillna, dropna, countna, countValid } from "tsb";
+import { Series, DataFrame } from "tsb";
+
+// ── scalar ──────────────────────────────────────────────────
+isna(null);          // true
+isna(undefined);     // true
+isna(NaN);           // true
+isna(0);             // false  β€” zero is not missing
+isna(false);         // false  β€” false is not missing
+isna("");            // false  β€” empty string is not missing
+
+// ── array ───────────────────────────────────────────────────
+isna([1, null, NaN, 3]);     // [false, true, true, false]
+notna([1, null, NaN, 3]);    // [true, false, false, true]
+
+// ── Series ──────────────────────────────────────────────────
+const s = new Series({ data: [1, null, NaN, 4] });
+isna(s).values;   // [false, true, true, false]
+notna(s).values;  // [true, false, false, true]
+
+// ── DataFrame ───────────────────────────────────────────────
+const df = new DataFrame(new Map([
+  ["a", new Series({ data: [1, null, 3] })],
+  ["b", new Series({ data: [NaN, 5, 6] })],
+]));
+isna(df).col("a").values;  // [false, true, false]
+isna(df).col("b").values;  // [true, false, false]
+
+// ── aliases ─────────────────────────────────────────────────
+isnull(null);   // true  (same as isna)
+notnull(42);    // true  (same as notna)
+
+// ── fillna ──────────────────────────────────────────────────
+fillna([1, null, NaN, 4], { value: 0 });   // [1, 0, 0, 4]
+fillna(s, { value: -1 }).values;           // [1, -1, -1, 4]
+fillna(df, { value: 0 }).col("b").values;  // [0, 5, 6]
+
+// ── dropna ──────────────────────────────────────────────────
+dropna([1, null, NaN, 3]);   // [1, 3]
+dropna(s).values;            // [1, 4]
+dropna(df).shape;            // [2, 2]  (row 0 dropped because b[0]=NaN, row 1 dropped because a[1]=null)
+dropna(df, { how: "all" }).shape;         // drops only rows where ALL values are missing
+dropna(df, { axis: 1 }).columns.values;  // drops columns that contain any missing value
+
+// ── countna / countValid ─────────────────────────────────────
+countna([1, null, NaN, 3]);    // 2
+countValid([1, null, NaN, 3]); // 2
+
+
+ + + + diff --git a/playground/numeric_extended.html b/playground/numeric_extended.html new file mode 100644 index 00000000..14cc4990 --- /dev/null +++ b/playground/numeric_extended.html @@ -0,0 +1,353 @@ + + + + + + tsb β€” Numeric Utilities (digitize, histogram, linspace, arange, zscore…) + + + +

πŸ”’ Numeric Utilities

+

+ ← back to index +

+

+ tsb ships numpy/scipy-style numeric utility functions β€” all implemented + from scratch with no external dependencies: + digitize, histogram, linspace, arange, + percentileOfScore, zscore, minMaxNormalize, + coefficientOfVariation. +

+ +
+

digitize β€” bin values

+

+ Map each value to the index of the bin it falls into. Mirrors numpy.digitize. + Indices are 0-based; values below the first edge return -1. +

+
import { digitize, seriesDigitize, Series } from "tsb";
+
+// Find which [0,33), [33,66), [66,100] bucket each score belongs to
+const scores = [15, 45, 70, 33, 100];
+const edges  = [33, 66, 100];
+
+const bins = digitize(scores, edges);
+// β†’ [-1, 1, 2, 0, 2]
+// 15 < 33      β†’ bin -1 (below first edge)
+// 45 ∈ [33,66) β†’ bin  1
+// 70 ∈ [66,100)β†’ bin  2
+// 33 ∈ [33,66) β†’ bin  0 (33 < 66, right=false default)
+// 100 = last   β†’ bin  2
+
+// Series version β€” preserves index
+const s = new Series({ data: [15, 45, 70], index: ["Alice","Bob","Carol"] });
+seriesDigitize(s, [33, 66, 100]);
+// Series: Alice→-1, Bob→1, Carol→2
+
Running…
+
+ +
+

histogram β€” frequency counts

+

Count how many values fall in each bin. Mirrors numpy.histogram.

+
import { histogram } from "tsb";
+
+const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+// Default: 10 equal-width bins
+const { counts, binEdges } = histogram(data);
+
+// Custom: 5 bins, density normalised
+const { counts: d, binEdges: e } = histogram(data, { bins: 5, density: true });
+
+// Explicit edges
+histogram(data, { binEdges: [1, 4, 7, 10] });
+// counts: [ 3, 3, 4 ]
+
Running…
+
+ +
+

linspace & arange β€” number sequences

+

Generate evenly-spaced sequences, mirroring numpy.linspace and numpy.arange.

+
import { linspace, arange } from "tsb";
+
+// 5 values from 0 to 1 (inclusive)
+linspace(0, 1, 5);
+// β†’ [0, 0.25, 0.5, 0.75, 1]
+
+// 0..4
+arange(5);
+// β†’ [0, 1, 2, 3, 4]
+
+// From 2 to 10, step 2
+arange(2, 10, 2);
+// β†’ [2, 4, 6, 8]
+
+// Descending
+arange(5, 0, -1);
+// β†’ [5, 4, 3, 2, 1]
+
Running…
+
+ +
+

percentileOfScore β€” percentile rank

+

+ Compute what percentile a given score falls at within a dataset. + Mirrors scipy.stats.percentileofscore. +

+
import { percentileOfScore } from "tsb";
+
+const grades = [55, 60, 70, 75, 80, 85, 90, 95];
+
+// What percentile is a score of 75?
+percentileOfScore(grades, 75);            // 50 (rank β€” default)
+percentileOfScore(grades, 75, "weak");    // 50 (≀ 75: 4/8 = 50%)
+percentileOfScore(grades, 75, "strict");  // 37.5 (< 75: 3/8 = 37.5%)
+
Running…
+
+ +
+

zscore β€” standardisation

+

+ Transform values to zero mean and unit variance. Mirrors scipy.stats.zscore. + Missing values are propagated; zero-variance data returns all NaN. +

+
import { zscore, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "values" });
+const z = zscore(s);
+
+// z.values β‰ˆ [-1.5, -0.5, -0.5, -0.5, 0, 0, 1, 2]
+
+// With population std (ddof=0)
+const zPop = zscore(s, { ddof: 0 });
+
Running…
+
+ +
+

minMaxNormalize β€” scale to [0, 1]

+

+ Scale all values to the interval [0, 1] (or a custom range). + Mirrors sklearn MinMaxScaler. +

+
import { minMaxNormalize, Series } from "tsb";
+
+const s = new Series({ data: [0, 25, 50, 75, 100] });
+minMaxNormalize(s).values;
+// β†’ [0, 0.25, 0.5, 0.75, 1]
+
+// Scale to [-1, 1]
+minMaxNormalize(s, { featureRangeMin: -1, featureRangeMax: 1 }).values;
+// β†’ [-1, -0.5, 0, 0.5, 1]
+
Running…
+
+ +
+

coefficientOfVariation β€” relative spread

+

+ Dimensionless measure of dispersion: std / |mean|. + Useful for comparing spread across datasets with different units. +

+
import { coefficientOfVariation, Series } from "tsb";
+
+// Dataset A: [10, 20, 30]  mean=20, std=10  β†’ CV=0.5
+coefficientOfVariation(new Series({ data: [10, 20, 30] }));
+
+// Dataset B: [100, 200, 300]  same shape, higher scale  β†’ CV=0.5
+coefficientOfVariation(new Series({ data: [100, 200, 300] }));
+
+// CV with population std
+coefficientOfVariation(new Series({ data: [1, 2, 3, 4, 5] }), { ddof: 0 });
+
Running…
+
+ + + + diff --git a/playground/pipe_apply.html b/playground/pipe_apply.html new file mode 100644 index 00000000..25f10a21 --- /dev/null +++ b/playground/pipe_apply.html @@ -0,0 +1,276 @@ + + + + + + tsb β€” pipe_apply: functional pipeline & apply utilities + + + +

← tsb playground

+ +

pipe_apply β€” Functional Pipeline & Apply Utilities

+

+ Standalone equivalents of the pandas + DataFrame.pipe() + / + Series.pipe() + chaining pattern plus various + apply() + / + applymap() + operations β€” usable without method-call syntax. +

+ +
+ Why standalone? pandas chains operations via methods: + df.pipe(fn1).pipe(fn2). tsb provides a module-level + pipe(value, fn1, fn2, …) that works on any value, + not just DataFrames. All functions are pure β€” inputs are never mutated. +
+ +

API Summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionPandas equivalentDescription
pipe(value, fn1, fn2, …)df.pipe(fn).pipe(fn2)Variadic type-safe pipeline β€” passes value through fns left-to-right
seriesApply(s, fn)s.apply(fn)Element-wise; fn receives (value, label, position)
seriesTransform(s, fn)s.transform(fn)Element-wise scalar→scalar; simpler than seriesApply
dataFrameApply(df, fn, axis?)df.apply(fn, axis=0|1)Apply fn to each column (axis=0) or row (axis=1) β†’ Series of results
dataFrameApplyMap(df, fn)df.applymap(fn) / df.map(fn)Apply fn to every cell; fn receives (value, rowLabel, colName)
dataFrameTransform(df, fn)df.transform(fn)Replace each column with fn(col) β€” must return same-length Series
dataFrameTransformRows(df, fn)df.apply(fn, axis=1, result_type='expand')Replace each row with fn(rowRecord) β€” partial updates allowed
+ +

pipe β€” functional pipeline

+ +
import { pipe } from "tsb";
+import { DataFrame } from "tsb";
+
+// Type-safe pipeline with up to 8 steps (return type inferred at each step)
+const result = pipe(
+  rawData,
+  (df) => df.dropna(),                                  // DataFrame β†’ DataFrame
+  (df) => df.assign({ z: df.col("x").add(df.col("y")).values }), // DataFrame β†’ DataFrame
+  (df) => df.head(10),                                  // DataFrame β†’ DataFrame
+  (df) => df.sum(),                                     // DataFrame β†’ Series
+);
+
+// Works on any value β€” not just DataFrames
+const n = pipe(
+  3,
+  (x) => x + 1,   // 4
+  (x) => x * x,   // 16
+  (x) => x - 1,   // 15
+);
+// n === 15
+ +

seriesApply β€” element-wise apply

+ +
import { seriesApply, seriesTransform } from "tsb";
+import { Series } from "tsb";
+
+const temps = new Series({ data: [22.1, 23.5, null, 21.8], name: "temp_C" });
+
+// Element-wise with (value, label, position) context
+const fahrenheit = seriesApply(temps, (v) => v === null ? null : (v as number) * 9/5 + 32);
+// [71.78, 74.3, null, 71.24]
+
+// Simple scalar transform (no label/position needed)
+const rounded = seriesTransform(temps, (v) => v === null ? null : Math.round(v as number));
+// [22, 24, null, 22]
+
+// Using position to build cumulative logic
+const withPos = seriesApply(
+  new Series({ data: [10, 20, 30] }),
+  (v, _label, pos) => (v as number) + pos * 100,
+);
+// [10, 120, 230]
+ +

dataFrameApply β€” column/row aggregation

+ +
import { dataFrameApply } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  score: [85, 92, 78, 95],
+  weight: [1.0, 1.2, 0.8, 1.5],
+});
+
+// axis=0 (default): apply fn to each column β†’ Series indexed by column names
+const colMax = dataFrameApply(df, (col) => col.max() ?? null);
+// colMax.at("score")  === 95
+// colMax.at("weight") === 1.5
+
+// axis=1: apply fn to each row β†’ Series indexed by row labels
+const weightedScore = dataFrameApply(
+  df,
+  (row) => (row.at("score") as number) * (row.at("weight") as number),
+  1,
+);
+// [85, 110.4, 62.4, 142.5]
+ +

dataFrameApplyMap β€” element-wise cell transform

+ +
import { dataFrameApplyMap } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, -2, 3],
+  b: [-4, 5, -6],
+});
+
+// Zero out all negative values (like pandas df.applymap(lambda x: max(x, 0)))
+const clipped = dataFrameApplyMap(df, (v) => {
+  return typeof v === "number" && v < 0 ? 0 : v;
+});
+// a: [1, 0, 3]
+// b: [0, 5, 0]
+
+// fn receives full context: (value, rowLabel, colName)
+const tagged = dataFrameApplyMap(df, (v, row, col) => `${col}[${row}]=${v}`);
+// a: ["a[0]=1", "a[1]=-2", "a[2]=3"]
+// b: ["b[0]=-4", "b[1]=5", "b[2]=-6"]
+ +

dataFrameTransform β€” column-wise transform

+ +
import { dataFrameTransform, seriesTransform } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  x: [1, 2, 3, 4, 5],
+  y: [10, 20, 30, 40, 50],
+});
+
+// Z-score normalize each column
+const normalized = dataFrameTransform(df, (col) => {
+  const mu = col.mean();
+  const sd = col.std();
+  return seriesTransform(col, (v) =>
+    typeof v === "number" && sd > 0 ? (v - mu) / sd : v
+  );
+});
+
+// Bin each column into quartiles
+const binned = dataFrameTransform(df, (col) => {
+  const q1 = col.quantile(0.25);
+  const q2 = col.quantile(0.5);
+  const q3 = col.quantile(0.75);
+  return seriesTransform(col, (v) => {
+    const n = v as number;
+    if (n <= q1) return "Q1";
+    if (n <= q2) return "Q2";
+    if (n <= q3) return "Q3";
+    return "Q4";
+  });
+});
+ +

dataFrameTransformRows β€” row-wise transform

+ +
import { dataFrameTransformRows } from "tsb";
+import { DataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  first: ["alice", "bob", "carol"],
+  last:  ["smith", "jones", "white"],
+  score: [88, 75, 92],
+});
+
+// Normalise scores relative to the row's position (illustrative)
+const updated = dataFrameTransformRows(df, (row, _label, pos) => ({
+  // Only return keys you want to change β€” others are preserved as-is
+  score: (row["score"] as number) + pos,
+}));
+// scores become [88, 76, 94]
+// first and last columns are unchanged
+
+// Full row transformation (compute full name)
+const withFull = dataFrameTransformRows(df, (row) => ({
+  first: row["first"],
+  last:  row["last"],
+  score: row["score"],
+  full:  `${row["first"]} ${row["last"]}`,
+}));
+ +

Combining pipe + apply

+ +
import { pipe, dataFrameApplyMap, dataFrameTransform, seriesTransform } from "tsb";
+import { DataFrame } from "tsb";
+
+const raw = DataFrame.fromColumns({
+  price:    [9.99, -1, 24.5, null, 49.0],
+  quantity: [3, 5, null, 2, 1],
+});
+
+// Clean β†’ impute β†’ normalise in one readable pipeline
+const clean = pipe(
+  raw,
+  // 1. zero out invalid prices/quantities
+  (df) => dataFrameApplyMap(df, (v) =>
+    v === null || (typeof v === "number" && v < 0) ? 0 : v
+  ),
+  // 2. add derived revenue column
+  (df) => df.assign({
+    revenue: df.col("price").mul(df.col("quantity")).values,
+  }),
+  // 3. round everything to 2 dp
+  (df) => dataFrameTransform(df, (col) =>
+    seriesTransform(col, (v) =>
+      typeof v === "number" ? Math.round(v * 100) / 100 : v
+    )
+  ),
+);
+ +
+

+ pandas DataFrame.pipe docs + Β· + pandas DataFrame.apply docs + Β· + tsb on GitHub +

+ + diff --git a/playground/rolling_apply.html b/playground/rolling_apply.html new file mode 100644 index 00000000..b307cdbd --- /dev/null +++ b/playground/rolling_apply.html @@ -0,0 +1,225 @@ + + + + + + tsb β€” Rolling Apply & Multi-Aggregation + + + +

tsb β€” Rolling Apply & Multi-Aggregation

+

+ Standalone functions for applying custom aggregation logic over sliding + windows, mirroring + + pandas.Series.rolling().apply() + + and + + Rolling.agg() + . +

+ +

1. rollingApply β€” Custom Function Per Window

+

+ Apply any aggregation function to each rolling window. The function + receives the valid (non-null, non-NaN) numeric values + in the window and must return a single number. +

+
import { rollingApply } from "tsb";
+
+const prices = new Series({ data: [10, 12, 11, 15, 14, 16], name: "price" });
+
+// Custom: range (max - min) over each 3-day window
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+rollingApply(prices, 3, range).toArray();
+// [null, null, 2, 4, 4, 5]
+//  ↑↑ insufficient data (need 3 observations)
+ +
+

Options

+ + + + + + + + + +
OptionDefaultDescription
minPeriodswindowMinimum valid observations to compute (null otherwise)
centerfalseCentre the window (symmetric) instead of trailing
rawfalsePass full window including nulls (filtered to valid nums before fn call)
+
+ +
// minPeriods=1 β†’ start computing from the very first position
+rollingApply(prices, 3, range, { minPeriods: 1 }).toArray();
+// [0, 2, 2, 4, 4, 5]
+
+// center=true β†’ symmetric window around each point
+rollingApply(prices, 3, range, { center: true }).toArray();
+// [null, 2, 4, 4, 5, null]
+ +

2. rollingAgg β€” Multiple Aggregations at Once

+

+ Apply several named aggregation functions in a single pass over a Series, + returning a DataFrame where each column holds one + aggregation result. +

+
import { rollingAgg } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8] });
+
+const result = rollingAgg(s, 3, {
+  mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+  max:  (w) => Math.max(...w),
+  min:  (w) => Math.min(...w),
+  range:(w) => Math.max(...w) - Math.min(...w),
+});
+
+// result is a DataFrame with columns: "mean", "max", "min", "range"
+// result.col("mean").toArray() β†’ [null, null, 2, 3, 4, 5, 6, 7]
+// result.col("range").toArray() β†’ [null, null, 2, 2, 2, 2, 2, 2]
+ +
+ Pandas equivalent:
+ s.rolling(3).agg({"mean": np.mean, "max": np.max, "min": np.min}) +
+ +

3. dataFrameRollingApply β€” Apply Per Column

+

+ Apply a single custom function independently to each column of a + DataFrame, returning a new DataFrame of the same shape. +

+
import { dataFrameRollingApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+  open:  [100, 102, 101, 105, 103],
+  close: [101, 103, 100, 106, 104],
+});
+
+// Pairwise range within each 2-step window per column
+const range = (w) => Math.max(...w) - Math.min(...w);
+
+dataFrameRollingApply(df, 2, range);
+//   open  close
+// 0 null  null
+// 1    2     2
+// 2    1     3
+// 3    4     6
+// 4    2     2
+ +

4. dataFrameRollingAgg β€” Multi-Agg Per Column

+

+ Apply multiple named aggregation functions to every column of a + DataFrame. The result has columns named + {originalColumn}_{aggName}. +

+
import { dataFrameRollingAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+  A: [1, 2, 3, 4, 5],
+  B: [10, 20, 30, 40, 50],
+});
+
+const out = dataFrameRollingAgg(df, 3, {
+  sum:  (w) => w.reduce((a, b) => a + b, 0),
+  mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+});
+
+// Columns: "A_sum", "A_mean", "B_sum", "B_mean"
+// A_sum:  [null, null, 6, 9, 12]
+// A_mean: [null, null, 2, 3,  4]
+// B_sum:  [null, null, 60, 90, 120]
+// B_mean: [null, null, 20, 30,  40]
+ +

Comparison with pandas

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
tsbpandas
rollingApply(s, w, fn)s.rolling(w).apply(fn, raw=True)
rollingApply(s, w, fn, {minPeriods:1})s.rolling(w, min_periods=1).apply(fn)
rollingAgg(s, w, {f1, f2})s.rolling(w).agg({"f1": f1, "f2": f2})
dataFrameRollingApply(df, w, fn)df.rolling(w).apply(fn)
dataFrameRollingAgg(df, w, {f1, f2})df.rolling(w).agg({"f1": f1, "f2": f2})
+ +

Use case: Bollinger Band width

+
import { rollingAgg } from "tsb";
+
+// Bollinger Band width = (upper - lower) / middle
+// where upper = mean + 2Β·std, lower = mean - 2Β·std
+const prices = new Series({
+  data: [20, 21, 22, 20, 19, 21, 23, 24, 22, 21],
+  name: "price",
+});
+
+const stats = rollingAgg(prices, 5, {
+  mean: (w) => w.reduce((a, b) => a + b, 0) / w.length,
+  std: (w) => {
+    const m = w.reduce((a, b) => a + b, 0) / w.length;
+    return Math.sqrt(w.reduce((a, b) => a + (b - m) ** 2, 0) / (w.length - 1));
+  },
+});
+
+// Bollinger Band width = 4 * std / mean
+const bw = stats.col("std").toArray().map((std, i) => {
+  const mean = stats.col("mean").toArray()[i];
+  if (std === null || mean === null || mean === 0) return null;
+  return (4 * (std as number)) / (mean as number);
+});
+ +

+ ← Back to tsb playground index +

+ + diff --git a/playground/string_ops.html b/playground/string_ops.html new file mode 100644 index 00000000..5d9fff07 --- /dev/null +++ b/playground/string_ops.html @@ -0,0 +1,282 @@ + + + + + + tsb β€” String Operations + + + +
+

tsb

+ string_ops + Standalone string operations for Series and arrays +
+
+

+ string_ops provides module-level string functions that complement the + Series.str accessor. All functions accept a Series, a + string[], or a scalar string. +

+ + +
+

strNormalize β€” Unicode normalisation

+

Normalise every element to NFC, NFD, NFKC, or NFKD. Useful when mixing text + from different sources (e.g. macOS NFD vs Windows NFC).

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strGetDummies β€” one-hot encode by delimiter

+

Split each string by a delimiter and produce a binary indicator DataFrame β€” + one column per unique token. Equivalent to pandas.Series.str.get_dummies().

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strExtractAll β€” extract all regex matches

+

Find every non-overlapping regex match in each element. Returns a JSON-encoded + array of match arrays per element β€” parse with JSON.parse.

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strRemovePrefix / strRemoveSuffix

+

Strip a leading or trailing string from elements only when it is present.

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strTranslate β€” character-level substitution

+

Replace or delete individual characters using a lookup table. + Format: one mapping per line as from=to or from= + to delete.

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strCharWidth & strByteLength β€” display & byte widths

+

+ strCharWidth counts columns for terminal display (CJK chars count as 2).
+ strByteLength counts UTF-8 bytes (useful for byte-limited APIs). +

+ + + +

+  
+
+ + + + diff --git a/playground/string_ops_extended.html b/playground/string_ops_extended.html new file mode 100644 index 00000000..81bdaddb --- /dev/null +++ b/playground/string_ops_extended.html @@ -0,0 +1,413 @@ + + + + + + tsb β€” Extended String Operations + + + +
+

tsb

+ string_ops_extended + Advanced standalone string operations: split-expand, extract, partition, multi-replace, indent, dedent +
+
+

+ string_ops_extended adds advanced string utilities that complement + string_ops and the Series.str accessor. All functions accept + a Series, an array, or a scalar string. +

+ + +
+

strSplitExpand β€” split and expand to DataFrame columns

+

+ Split each element by a delimiter and expand the parts into a DataFrame + with one column per position. Mirrors pandas.Series.str.split(expand=True). + Shorter rows are padded with null. +

+
+
+ + +
+
+ + + + +
+
+ +

+  
+ + +
+

strExtractGroups β€” extract regex capture groups

+

+ Extract regex capture groups from each element into a DataFrame. + Named groups ((?<name>...)) become column names; unnamed groups + become 0, 1, … Non-matching rows produce null. +

+
+
+ + +
+
+ + +
Use (?<name>...) for named capture groups.
+
+
+ +

+  
+ + +
+

strPartition / strRPartition β€” split into (before, sep, after)

+

+ strPartition splits at the first occurrence of the separator; + strRPartition splits at the last. When the separator is not + found, strPartition returns [s, "", ""] and + strRPartition returns ["", "", s]. +

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strMultiReplace β€” apply multiple replacements in sequence

+

+ Apply an ordered list of {pat, repl} pairs to each element. + Each replacement is applied to the result of the previous one. + Patterns can be string literals (replaced globally) or RegExp objects. +

+
+
+ + +
+
+ + +
+
+ +

+  
+ + +
+

strIndent / strDedent β€” line-level indentation utilities

+

+ strIndent adds a prefix to every non-empty line (mirrors + textwrap.indent). + strDedent removes the common leading whitespace from all lines + (mirrors textwrap.dedent). +

+
+
+ + +
+
+ + +
+
+ +

+  
+ +
+ + + + diff --git a/playground/to_from_dict.html b/playground/to_from_dict.html new file mode 100644 index 00000000..a8ca3e88 --- /dev/null +++ b/playground/to_from_dict.html @@ -0,0 +1,122 @@ + + + + + + tsb β€” toDictOriented / fromDictOriented + + + +

← tsb playground

+ +

toDictOriented / fromDictOriented

+

+ Convert a DataFrame to and from dictionary structures with flexible orientation β€” mirrors + + pandas.DataFrame.to_dict(orient=...) and + + pandas.DataFrame.from_dict(orient=...). +

+ +

Supported orientations β€” toDictOriented

+ + + + + + + + + + + +
OrientReturn typeDescription
"dict" / "columns"Record<col, Record<rowLabel, value>>Nested column β†’ row-label β†’ value map
"list"Record<col, value[]>Column name β†’ array of values
"series"Record<col, Series>Column name β†’ Series object
"split"{ index, columns, data }Serialisable split structure
"tight"{ index, columns, data, index_names, column_names }Split plus axis-name metadata
"records"Record<col, value>[]Array of row objects
"index"Record<rowLabel, Record<col, value>>Row-label β†’ column β†’ value
+ +

Supported orientations β€” fromDictOriented

+ + + + + + + + +
OrientInput shape
"columns" (default){ col: value[] }
"index"{ rowLabel: { col: value } }
"split"{ index?, columns, data }
"tight"Same as "split", extra fields ignored
+ +

Example β€” all orientations

+
import { DataFrame } from "tsb";
+import { toDictOriented, fromDictOriented } from "tsb";
+
+const df = DataFrame.fromColumns(
+  { name: ["Alice", "Bob"], score: [92, 85] },
+  { index: new Index(["r0", "r1"]) },
+);
+
+// "dict" / "columns"
+toDictOriented(df, "dict");
+// { name: { r0: "Alice", r1: "Bob" }, score: { r0: 92, r1: 85 } }
+
+// "list"
+toDictOriented(df, "list");
+// { name: ["Alice", "Bob"], score: [92, 85] }
+
+// "records"
+toDictOriented(df, "records");
+// [ { name: "Alice", score: 92 }, { name: "Bob", score: 85 } ]
+
+// "split"
+toDictOriented(df, "split");
+// { index: ["r0", "r1"], columns: ["name", "score"], data: [["Alice", 92], ["Bob", 85]] }
+
+// "index"
+toDictOriented(df, "index");
+// { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } }
+
+// fromDictOriented β€” columns (default)
+fromDictOriented({ name: ["Alice", "Bob"], score: [92, 85] });
+
+// fromDictOriented β€” index
+fromDictOriented(
+  { r0: { name: "Alice", score: 92 }, r1: { name: "Bob", score: 85 } },
+  "index",
+);
+
+// fromDictOriented β€” split (round-trip)
+const split = toDictOriented(df, "split");
+const df2 = fromDictOriented(split, "split");
+// df2 is equivalent to df
+
+ +

Missing values

+
+ Missing values (null / undefined) are preserved as null + in all orientations. When using fromDictOriented with "index" + orientation, any column that is absent from a given row object is filled with null. +
+ +

Type signatures

+
function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record<string, Record<string, Scalar>>;
+function toDictOriented(df: DataFrame, orient: "list"): Record<string, Scalar[]>;
+function toDictOriented(df: DataFrame, orient: "series"): Record<string, Series<Scalar>>;
+function toDictOriented(df: DataFrame, orient: "split"): DictSplit;
+function toDictOriented(df: DataFrame, orient: "tight"): DictTight;
+function toDictOriented(df: DataFrame, orient: "records"): Record<string, Scalar>[];
+function toDictOriented(df: DataFrame, orient: "index"): Record<string, Record<string, Scalar>>;
+
+function fromDictOriented(data: Record<string, readonly Scalar[]>, orient?: "columns"): DataFrame;
+function fromDictOriented(data: Record<string, Record<string, Scalar>>, orient: "index"): DataFrame;
+function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame;
+
+ + diff --git a/playground/where_mask.html b/playground/where_mask.html new file mode 100644 index 00000000..89a50a05 --- /dev/null +++ b/playground/where_mask.html @@ -0,0 +1,220 @@ + + + + + + tsb β€” where / mask: Conditional Selection + + + +

tsb β€” where / mask: Conditional Selection

+

+ seriesWhere / seriesMask and their DataFrame equivalents + allow element-wise conditional replacement β€” the TypeScript equivalents of + pandas.Series.where + and + pandas.Series.mask. +

+ +
+ Quick rule:
+ where(cond) β€” keep where cond is true, replace elsewhere.
+ mask(cond) β€” keep where cond is false, replace elsewhere.
+ They are exact inverses of each other. +
+ +

1. seriesWhere β€” Boolean Array Condition

+

+ Pass a boolean[] to keep values at true positions, replace + the rest with null (or a custom other value). +

+
import { Series, seriesWhere } from "tsb";
+
+const scores = new Series({ data: [42, 91, 67, 55, 88] });
+const highScores = seriesWhere(scores, [false, true, false, false, true]);
+// Series [null, 91, null, null, 88]
+
+// Custom replacement value
+const clamped = seriesWhere(scores, [false, true, false, false, true], { other: 0 });
+// Series [0, 91, 0, 0, 88]
+ +

2. seriesWhere β€” Callable Condition

+

+ Pass a function that receives the Series and returns a boolean[] or + Series<boolean>. This avoids computing the condition array manually. +

+
import { Series, seriesWhere } from "tsb";
+
+const temps = new Series({ data: [-5, 12, 23, -3, 8] });
+
+// Keep only values above freezing
+const aboveFreezing = seriesWhere(
+  temps,
+  (s) => s.values.map((v) => (v as number) > 0),
+);
+// Series [null, 12, 23, null, 8]
+
+// Replace with 0 instead of null
+const noFreeze = seriesWhere(
+  temps,
+  (s) => s.values.map((v) => (v as number) > 0),
+  { other: 0 },
+);
+// Series [0, 12, 23, 0, 8]
+ +

3. seriesMask β€” The Inverse

+

+ mask replaces positions where the condition is true + (the opposite of where). Use it to "blank out" outliers or invalid values. +

+
import { Series, seriesMask } from "tsb";
+
+const data = new Series({ data: [1, 2, 3, 4, 5] });
+
+// Mask out values greater than 3
+const masked = seriesMask(
+  data,
+  (s) => s.values.map((v) => (v as number) > 3),
+  { other: null },
+);
+// Series [1, 2, 3, null, null]
+ +

4. dataFrameWhere β€” Element-Wise on DataFrames

+

+ Pass a boolean DataFrame or a callable that returns one. + Columns and row labels are aligned by name. +

+
import { DataFrame, dataFrameWhere } from "tsb";
+
+const df = DataFrame.fromColumns({
+  temp_c:   [22, -3, 18, -7, 30],
+  humidity: [55, 80, 62, 75, 45],
+});
+
+// Keep only valid summer readings (temp > 0)
+const condDf = DataFrame.fromColumns({
+  temp_c:   [true, false, true, false, true],
+  humidity: [true, false, true, false, true],
+});
+
+const summer = dataFrameWhere(df, condDf);
+// DataFrame:
+//   temp_c   [22,   null, 18,   null, 30  ]
+//   humidity [55,   null, 62,   null, 45  ]
+ +

5. dataFrameWhere β€” Callable Condition

+
import { DataFrame, dataFrameWhere } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3, 4, 5],
+  b: [10, 20, 30, 40, 50],
+});
+
+// Keep only values > 2 (column-wise threshold)
+const result = dataFrameWhere(df, (d) => {
+  const condCols: Record<string, boolean[]> = {};
+  for (const col of d.columns) {
+    condCols[col as string] = d.col(col as string).values.map(
+      (v) => (v as number) > 2
+    );
+  }
+  return DataFrame.fromColumns(condCols);
+});
+// DataFrame:
+//   a: [null, null, 3, 4, 5]
+//   b: [10,   20,   30, 40, 50]
+ +

6. dataFrameMask β€” DataFrame Mask

+
import { DataFrame, dataFrameMask } from "tsb";
+
+const df = DataFrame.fromColumns({
+  sales:  [100, 200, 50,  300, 80],
+  profit: [10,  40,  -5,  60,  -2],
+});
+
+// Mask out (replace) rows with negative profit
+const cleaned = dataFrameMask(
+  df,
+  (d) => {
+    const condCols: Record<string, boolean[]> = {};
+    for (const col of d.columns) {
+      condCols[col as string] = d.col(col as string).values.map(
+        (v) => (v as number) < 0
+      );
+    }
+    return DataFrame.fromColumns(condCols);
+  },
+  { other: 0 },
+);
+// DataFrame:
+//   sales:  [100, 200, 50,  300, 80]
+//   profit: [10,  40,  0,   60,  0 ]
+ +

Label-Aligned Series Condition

+

+ When you pass a Series<boolean> as the condition, values are aligned + by label, not position. Labels absent from the condition series are treated + as false. +

+
import { Series, seriesWhere } from "tsb";
+
+const prices = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+const valid  = new Series<boolean>({ data: [false, true], index: ["a", "b"] });
+
+// Only "b" is in the condition with value=true; "a"=false, "c" missing→false
+const result = seriesWhere(prices, valid, { other: -1 });
+// Series { a: -1, b: 20, c: -1 }
+ +

API Reference

+ + + + + + +
FunctionKeeps when cond is…Replaces with
seriesWhere(s, cond, {other})trueother (default null)
seriesMask(s, cond, {other})falseother (default null)
dataFrameWhere(df, cond, {other})trueother (default null)
dataFrameMask(df, cond, {other})falseother (default null)
+ +

Condition types

+ + + + + + +
TypeSeries opsDataFrame ops
Boolean arrayβœ… positionalβ€”
Series<boolean>βœ… label-alignedβ€”
DataFrame (boolean)β€”βœ… label-aligned
Callableβœ… receives Seriesβœ… receives DataFrame
+ +

← Back to tsb playground index

+ + diff --git a/playground/wide_to_long.html b/playground/wide_to_long.html new file mode 100644 index 00000000..b30980cd --- /dev/null +++ b/playground/wide_to_long.html @@ -0,0 +1,113 @@ + + + + + + tsb β€” wideToLong + + + +

← tsb playground

+ +

wideToLong

+

+ Reshape a wide-format DataFrame to long format by collapsing stub-prefixed column + groups into rows β€” mirrors + + pandas.wide_to_long(). +

+ +

Concept

+

+ Given a wide DataFrame where repeated measurements are spread across columns with a + common stub prefix and a numeric (or other) suffix β€” e.g. score_2021, + score_2022 β€” wideToLong pivots those column groups into rows. + One row per original row per unique suffix is produced. +

+ +

Example β€” numeric suffixes

+
import { DataFrame } from "tsb";
+import { wideToLong } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:  ["x", "y"],
+  A1:  [1, 2],
+  A2:  [3, 4],
+  B1:  [5, 6],
+  B2:  [7, 8],
+});
+
+const long = wideToLong(df, ["A", "B"], "id", "num");
+
+// long.columns.values β†’ ["id", "num", "A", "B"]
+// long.shape          β†’ [4, 4]
+//
+// id  num   A   B
+//  x    1   1   5
+//  y    1   2   6
+//  x    2   3   7
+//  y    2   4   8
+
+ +

Example β€” separator and custom suffix

+
const df = DataFrame.fromColumns({
+  country: ["US", "UK"],
+  gdp_2020: [21e12, 2.7e12],
+  gdp_2021: [23e12, 3.1e12],
+  pop_2020: [331e6, 67e6],
+  pop_2021: [332e6, 68e6],
+});
+
+const long = wideToLong(df, ["gdp", "pop"], "country", "year", { sep: "_" });
+// long.shape β†’ [4, 4]  β€” 2 countries Γ— 2 years
+// Columns: ["country", "year", "gdp", "pop"]
+
+ +

API reference

+
function wideToLong(
+  df: DataFrame,
+  stubnames: string | string[],
+  i: string | string[],
+  j: string,
+  options?: WideToLongOptions,
+): DataFrame;
+
+interface WideToLongOptions {
+  sep?: string;      // separator between stub and suffix, default ""
+  suffix?: string;   // regex string matching suffix, default "\\d+"
+}
+
+ +

Parameters

+ + + + + + + + + + +
ParameterTypeDescription
dfDataFrameSource DataFrame (not mutated)
stubnamesstring | string[]Prefix(es) shared by the wide column groups
istring | string[]Column(s) to keep as id variables (repeated per suffix)
jstringName of the new column holding the suffix values
options.sepstringSeparator between stub and suffix (default: "")
options.suffixstringRegex string matching the suffix (default: "\\d+")
+ +

Output layout

+
+ Output columns are always ordered: id cols, j, stub cols + (in the same order the stubs were passed). Suffixes are sorted numerically when they are all + integers, otherwise lexicographically. Wide columns that are absent from the DataFrame are + filled with null. +
+ + diff --git a/playground/window_extended.html b/playground/window_extended.html new file mode 100644 index 00000000..4232fa5d --- /dev/null +++ b/playground/window_extended.html @@ -0,0 +1,304 @@ + + + + + + tsb β€” Rolling Extended Stats: sem, skew, kurt, quantile + + + +

tsb β€” Rolling Extended Statistics

+

+ Higher-order rolling window statistics extending the core + + pandas.Series.rolling() + + API: + sem, skew, kurt, and + quantile. +

+ +

1. rollingSem β€” Standard Error of the Mean

+

+ The standard error of the mean measures how much the sample mean + would vary across repeated samples. For a window of n values: +

+
sem = std(ddof=1) / √n
+

Requires at least 2 valid observations per window.

+ +
import { rollingSem, Series } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9], name: "x" });
+const sem3 = rollingSem(s, 3);
+// [null, null, 0.667, 0, 0.577, 0.577, 1.155, 2.082]
+
+ +
+

Live demo β€” sem with window=3

+

Comma-separated numbers (nulls accepted):

+ + + + +

+    
+ +

2. rollingSkew β€” Fisher-Pearson Skewness

+

+ Skewness measures asymmetry of the distribution in each window. + Positive = right tail heavier; negative = left tail heavier. + Uses the unbiased Fisher-Pearson formula (same as pandas): +

+
skew = [n/((n-1)(n-2))] Γ— Ξ£[(xα΅’βˆ’xΜ„)/s]Β³
+

Requires β‰₯ 3 valid observations.

+ +
import { rollingSkew, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+rollingSkew(s, 3);
+// [null, null, 0, 0, 0]   ← symmetric windows β†’ zero skew
+
+ +
+

Live demo β€” skewness with window=4

+ + + +

+    
+ +

3. rollingKurt β€” Excess Kurtosis

+

+ Kurtosis measures how heavy the tails are relative to a normal distribution. + The excess kurtosis subtracts 3, so a normal distribution gives 0. + Uses the Fisher (1930) unbiased formula: +

+
kurt = [n(n+1)/((n-1)(n-2)(n-3))] Γ— Ξ£[(xα΅’βˆ’xΜ„)/s]⁴ βˆ’ 3(n-1)Β²/((n-2)(n-3))
+

Requires β‰₯ 4 valid observations.

+ +
import { rollingKurt, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+rollingKurt(s, 4);
+// [null, null, null, -1.2]   ← uniform distribution has kurt = -1.2
+
+ +
+

Live demo β€” excess kurtosis with window=5

+ + + +

+    
+ +

4. rollingQuantile β€” Rolling Quantile

+

+ Computes any quantile within each sliding window using configurable + interpolation. When q = 0.5 this is identical to + rolling.median(). +

+ +
import { rollingQuantile, Series } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+
+rollingQuantile(s, 0.5, 3);  // rolling median: [null, null, 2, 3, 4]
+rollingQuantile(s, 0.25, 3); // [null, null, 1.5, 2.5, 3.5]
+rollingQuantile(s, 0.75, 3); // [null, null, 2.5, 3.5, 4.5]
+
+ +

Interpolation methods

+ + + + + + + + + +
MethodBehaviour when q falls between two values
linear (default)Linear interpolation β€” same as NumPy / pandas default
lowerTake the lower of the two surrounding values
higherTake the higher of the two surrounding values
midpointArithmetic mean of the two surrounding values
nearestWhichever surrounding value is closest
+ +
+

Live demo β€” rolling quantile

+ + + + + +

+    
+ +

Common Options

+ + + + + + +
OptionTypeDefaultDescription
minPeriodsnumber= windowMinimum valid obs required per window
centerbooleanfalseCentre the window around each position
+ +
+ Note: Functions are pure β€” they return new Series objects + without modifying the input. Missing values (null, NaN) + are excluded from each window calculation. +
+ + + + diff --git a/src/core/api_types.ts b/src/core/api_types.ts new file mode 100644 index 00000000..860d2050 --- /dev/null +++ b/src/core/api_types.ts @@ -0,0 +1,629 @@ +/** + * api_types β€” runtime type-checking predicates, mirroring `pandas.api.types`. + * + * Two groups of functions are provided: + * + * **Value-level predicates** β€” operate on arbitrary JavaScript values, equivalent + * to `pandas.api.types.is_scalar`, `is_list_like`, `is_number`, etc. + * + * **Dtype-level predicates** β€” accept a `Dtype` instance or a `DtypeName` string + * and answer questions about the dtype's kind, equivalent to + * `pandas.api.types.is_numeric_dtype`, `is_float_dtype`, etc. + * + * @example + * ```ts + * import { isScalar, isNumericDtype, Dtype } from "tsb"; + * isScalar(42); // true + * isScalar([1, 2, 3]); // false + * isListLike([1, 2, 3]); // true + * isNumericDtype(Dtype.float64); // true + * isStringDtype("string"); // true + * ``` + * + * @module + */ + +import { Dtype } from "./dtype.ts"; +import type { DtypeName } from "../types.ts"; + +// ─── internal helper ────────────────────────────────────────────────────────── + +/** Resolve a Dtype | DtypeName to a Dtype instance. */ +function resolveDtype(dtype: Dtype | DtypeName): Dtype { + if (dtype instanceof Dtype) { + return dtype; + } + return Dtype.from(dtype); +} + +// ═════════════════════════════════════════════════════════════════════════════ +// VALUE-LEVEL PREDICATES +// ═════════════════════════════════════════════════════════════════════════════ + +/** + * Return `true` if `val` is a scalar (not a collection). + * + * Scalars: `string`, `number`, `bigint`, `boolean`, `symbol`, `null`, + * `undefined`, and `Date` objects. Arrays, plain objects, `Map`, `Set`, + * iterables, and class instances other than `Date` are **not** scalars. + * + * Mirrors `pandas.api.types.is_scalar`. + * + * @example + * ```ts + * isScalar(42); // true + * isScalar("hello"); // true + * isScalar(null); // true + * isScalar([1, 2]); // false + * isScalar({ a: 1 }); // false + * ``` + */ +export function isScalar(val: unknown): boolean { + if (val === null || val === undefined) { + return true; + } + const t = typeof val; + if (t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol") { + return true; + } + if (val instanceof Date) { + return true; + } + return false; +} + +/** + * Return `true` if `val` is "list-like" β€” i.e. iterable (but not a string) + * or has a non-negative integer `length` property. + * + * Mirrors `pandas.api.types.is_list_like`. + * + * @example + * ```ts + * isListLike([1, 2, 3]); // true + * isListLike(new Set([1])); // true + * isListLike("abc"); // false (strings excluded) + * isListLike(42); // false + * isListLike({ a: 1 }); // false + * ``` + */ +export function isListLike(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val === "string") { + return false; + } + // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol + if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") { + return false; + } + if (typeof val === "object" || typeof val === "function") { + if (Symbol.iterator in (val as object)) { + return true; + } + const len = (val as Record)["length"]; + if (typeof len === "number" && len >= 0 && Number.isInteger(len)) { + return true; + } + } + return false; +} + +/** + * Return `true` if `val` is array-like β€” i.e. has a non-negative integer + * `length` property. + * + * Mirrors `pandas.api.types.is_array_like`. + * + * @example + * ```ts + * isArrayLike([1, 2]); // true + * isArrayLike("abc"); // true (strings have .length) + * isArrayLike(42); // false + * isArrayLike({}); // false + * ``` + */ +export function isArrayLike(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val === "string") { + return true; + } + if (typeof val !== "object" && typeof val !== "function") { + return false; + } + const len = (val as Record)["length"]; + return typeof len === "number" && len >= 0 && Number.isInteger(len); +} + +/** + * Return `true` if `val` is dict-like β€” a plain object (not an array, not a + * `Date`, not a class instance). + * + * Mirrors `pandas.api.types.is_dict_like`. + * + * @example + * ```ts + * isDictLike({ a: 1 }); // true + * isDictLike(new Map()); // true (has .get / .set) + * isDictLike([1, 2]); // false + * isDictLike("abc"); // false + * ``` + */ +export function isDictLike(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val !== "object") { + return false; + } + if (Array.isArray(val)) { + return false; + } + // Treat Map as dict-like (supports key lookup) + if (val instanceof Map) { + return true; + } + // Date is not dict-like + if (val instanceof Date) { + return false; + } + // Plain objects and other objects with properties + return true; +} + +/** + * Return `true` if `val` is an iterator β€” i.e. has a callable `next` method. + * + * Mirrors `pandas.api.types.is_iterator`. + * + * @example + * ```ts + * isIterator([1, 2][Symbol.iterator]()); // true + * isIterator([1, 2]); // false + * ``` + */ +export function isIterator(val: unknown): boolean { + if (val === null || val === undefined) { + return false; + } + if (typeof val !== "object" && typeof val !== "function") { + return false; + } + return typeof (val as Record)["next"] === "function"; +} + +/** + * Return `true` if `val` is a `number` (including `NaN` and `Β±Infinity`). + * + * Mirrors `pandas.api.types.is_number`. + * + * @example + * ```ts + * isNumber(3.14); // true + * isNumber(NaN); // true + * isNumber("3"); // false + * ``` + */ +export function isNumber(val: unknown): val is number { + return typeof val === "number"; +} + +/** + * Return `true` if `val` is a `boolean`. + * + * Mirrors `pandas.api.types.is_bool`. + * + * @example + * ```ts + * isBool(true); // true + * isBool(1); // false + * ``` + */ +export function isBool(val: unknown): val is boolean { + return typeof val === "boolean"; +} + +/** + * Return `true` if `val` is a `string`. + * + * Named `isStringValue` to distinguish from the dtype-level `isStringDtype`. + * Mirrors `pandas.api.types.is_string` (not to be confused with dtype checks). + * + * @example + * ```ts + * isStringValue("hello"); // true + * isStringValue(42); // false + * ``` + */ +export function isStringValue(val: unknown): val is string { + return typeof val === "string"; +} + +/** + * Return `true` if `val` is a finite floating-point number (has a fractional + * component or is finite non-integer). `NaN`, `Β±Infinity` are **not** floats + * in the pandas sense. + * + * Mirrors `pandas.api.types.is_float`. + * + * @example + * ```ts + * isFloat(3.14); // true + * isFloat(3.0); // false (integer value) + * isFloat(NaN); // false + * isFloat(Infinity); // false + * ``` + */ +export function isFloat(val: unknown): boolean { + if (typeof val !== "number") { + return false; + } + if (!Number.isFinite(val)) { + return false; + } + return val !== Math.trunc(val); +} + +/** + * Return `true` if `val` is a finite integer-valued number. + * + * Mirrors `pandas.api.types.is_integer`. + * + * @example + * ```ts + * isInteger(3); // true + * isInteger(3.0); // true (integer value stored as float) + * isInteger(3.14); // false + * isInteger(NaN); // false + * ``` + */ +export function isInteger(val: unknown): boolean { + return typeof val === "number" && Number.isInteger(val); +} + +/** + * Return `true` if `val` is a `bigint`. + * + * @example + * ```ts + * isBigInt(42n); // true + * isBigInt(42); // false + * ``` + */ +export function isBigInt(val: unknown): val is bigint { + return typeof val === "bigint"; +} + +/** + * Return `true` if `val` is a `RegExp`. + * + * Mirrors `pandas.api.types.is_re`. + * + * @example + * ```ts + * isRegExp(/abc/); // true + * isRegExp(new RegExp("x")); // true + * isRegExp("abc"); // false + * ``` + */ +export function isRegExp(val: unknown): val is RegExp { + return val instanceof RegExp; +} + +/** + * Return `true` if `val` can be compiled into a `RegExp` β€” i.e. it is either + * a `string` or already a `RegExp`. + * + * Mirrors `pandas.api.types.is_re_compilable`. + * + * @example + * ```ts + * isReCompilable("abc"); // true + * isReCompilable(/abc/); // true + * isReCompilable(42); // false + * ``` + */ +export function isReCompilable(val: unknown): boolean { + return typeof val === "string" || val instanceof RegExp; +} + +/** + * Return `true` if `val` is a "missing" value in the pandas sense: `null`, + * `undefined`, or `NaN`. + * + * @example + * ```ts + * isMissing(null); // true + * isMissing(undefined); // true + * isMissing(NaN); // true + * isMissing(0); // false + * isMissing(""); // false + * ``` + */ +export function isMissing(val: unknown): boolean { + if (val === null || val === undefined) { + return true; + } + if (typeof val === "number" && Number.isNaN(val)) { + return true; + } + return false; +} + +/** + * Return `true` if `val` is "hashable" β€” usable as an object-key in + * JavaScript. In practice this means it is a primitive (`string`, `number`, + * `bigint`, `boolean`, `symbol`, `null`, `undefined`). + * + * Mirrors the spirit of `pandas.api.types.is_hashable`. + * + * @example + * ```ts + * isHashable("key"); // true + * isHashable(42); // true + * isHashable({}); // false + * isHashable([]); // false + * ``` + */ +export function isHashable(val: unknown): boolean { + if (val === null || val === undefined) { + return true; + } + const t = typeof val; + return t === "string" || t === "number" || t === "bigint" || t === "boolean" || t === "symbol"; +} + +/** + * Return `true` if `val` is a `Date` instance. + * + * @example + * ```ts + * isDate(new Date()); // true + * isDate("2024-01-01"); // false + * ``` + */ +export function isDate(val: unknown): val is Date { + return val instanceof Date; +} + +// ═════════════════════════════════════════════════════════════════════════════ +// DTYPE-LEVEL PREDICATES +// ═════════════════════════════════════════════════════════════════════════════ + +/** + * Return `true` if the dtype is numeric (integer, unsigned integer, or float). + * + * Mirrors `pandas.api.types.is_numeric_dtype`. + * + * @example + * ```ts + * isNumericDtype(Dtype.float64); // true + * isNumericDtype("int32"); // true + * isNumericDtype("string"); // false + * ``` + */ +export function isNumericDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isNumeric; +} + +/** + * Return `true` if the dtype is any integer kind (signed or unsigned). + * + * Mirrors `pandas.api.types.is_integer_dtype`. + * + * @example + * ```ts + * isIntegerDtype("int64"); // true + * isIntegerDtype("uint8"); // true + * isIntegerDtype("float32"); // false + * ``` + */ +export function isIntegerDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isInteger; +} + +/** + * Return `true` if the dtype is a signed integer (`int8`–`int64`). + * + * Mirrors `pandas.api.types.is_signed_integer_dtype`. + * + * @example + * ```ts + * isSignedIntegerDtype("int32"); // true + * isSignedIntegerDtype("uint32"); // false + * ``` + */ +export function isSignedIntegerDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isSignedInteger; +} + +/** + * Return `true` if the dtype is an unsigned integer (`uint8`–`uint64`). + * + * Mirrors `pandas.api.types.is_unsigned_integer_dtype`. + * + * @example + * ```ts + * isUnsignedIntegerDtype("uint64"); // true + * isUnsignedIntegerDtype("int64"); // false + * ``` + */ +export function isUnsignedIntegerDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isUnsignedInteger; +} + +/** + * Return `true` if the dtype is a floating-point type (`float32` or `float64`). + * + * Mirrors `pandas.api.types.is_float_dtype`. + * + * @example + * ```ts + * isFloatDtype("float64"); // true + * isFloatDtype("float32"); // true + * isFloatDtype("int32"); // false + * ``` + */ +export function isFloatDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isFloat; +} + +/** + * Return `true` if the dtype is boolean. + * + * Mirrors `pandas.api.types.is_bool_dtype`. + * + * @example + * ```ts + * isBoolDtype("bool"); // true + * isBoolDtype("int8"); // false + * ``` + */ +export function isBoolDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isBool; +} + +/** + * Return `true` if the dtype is the `string` dtype. + * + * Mirrors `pandas.api.types.is_string_dtype`. + * + * @example + * ```ts + * isStringDtype("string"); // true + * isStringDtype("object"); // false + * ``` + */ +export function isStringDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isString; +} + +/** + * Return `true` if the dtype is a datetime type. + * + * Mirrors `pandas.api.types.is_datetime64_dtype`. + * + * @example + * ```ts + * isDatetimeDtype("datetime"); // true + * isDatetimeDtype("string"); // false + * ``` + */ +export function isDatetimeDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isDatetime; +} + +/** + * Return `true` if the dtype is a timedelta type. + * + * Mirrors `pandas.api.types.is_timedelta64_dtype`. + * + * @example + * ```ts + * isTimedeltaDtype("timedelta"); // true + * isTimedeltaDtype("datetime"); // false + * ``` + */ +export function isTimedeltaDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isTimedelta; +} + +/** + * Return `true` if the dtype is the categorical dtype. + * + * Mirrors `pandas.api.types.is_categorical_dtype`. + * + * @example + * ```ts + * isCategoricalDtype("category"); // true + * isCategoricalDtype("string"); // false + * ``` + */ +export function isCategoricalDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isCategory; +} + +/** + * Return `true` if the dtype is the object dtype. + * + * Mirrors `pandas.api.types.is_object_dtype`. + * + * @example + * ```ts + * isObjectDtype("object"); // true + * isObjectDtype("string"); // false + * ``` + */ +export function isObjectDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isObject; +} + +/** + * Return `true` if the dtype represents complex numbers. + * + * JavaScript has no native complex number type, so this always returns `false` + * (no complex dtype exists in the `tsb` dtype system). Provided for API + * parity with `pandas.api.types.is_complex_dtype`. + * + * @example + * ```ts + * isComplexDtype("float64"); // false (no complex dtype) + * ``` + */ +export function isComplexDtype(_dtype: Dtype | DtypeName): boolean { + return false; +} + +/** + * Return `true` if the dtype is an "extension array" dtype β€” i.e. any dtype + * beyond the numeric primitives: `string`, `object`, `datetime`, `timedelta`, + * `category`. + * + * Mirrors `pandas.api.types.is_extension_array_dtype`. + * + * @example + * ```ts + * isExtensionArrayDtype("category"); // true + * isExtensionArrayDtype("datetime"); // true + * isExtensionArrayDtype("int64"); // false + * ``` + */ +export function isExtensionArrayDtype(dtype: Dtype | DtypeName): boolean { + const d = resolveDtype(dtype); + return d.isString || d.isObject || d.isDatetime || d.isTimedelta || d.isCategory; +} + +/** + * Return `true` if the dtype can hold period (date period) data. + * In the current `tsb` dtype system this maps to the `datetime` kind. + * + * Mirrors `pandas.api.types.is_period_dtype`. + * + * @example + * ```ts + * isPeriodDtype("datetime"); // true + * isPeriodDtype("float64"); // false + * ``` + */ +export function isPeriodDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isDatetime; +} + +/** + * Return `true` if the dtype is suitable for interval data β€” float or integer. + * + * Mirrors `pandas.api.types.is_interval_dtype`. + * + * @example + * ```ts + * isIntervalDtype("float64"); // true + * isIntervalDtype("int32"); // true + * isIntervalDtype("string"); // false + * ``` + */ +export function isIntervalDtype(dtype: Dtype | DtypeName): boolean { + return resolveDtype(dtype).isNumeric; +} diff --git a/src/core/attrs.ts b/src/core/attrs.ts new file mode 100644 index 00000000..81c6be1c --- /dev/null +++ b/src/core/attrs.ts @@ -0,0 +1,291 @@ +/** + * attrs β€” user-defined metadata dictionary for Series and DataFrame. + * + * Mirrors `pandas.DataFrame.attrs` / `pandas.Series.attrs`: an arbitrary + * keyβ†’value dictionary that travels with a data object and lets callers + * annotate it with provenance, units, descriptions, or any other metadata. + * + * Because the tsb Series and DataFrame classes are immutable by design, this + * module maintains a **WeakMap registry** that maps each object to its attrs + * record. The registry entries are garbage-collected automatically when the + * object itself is collected β€” there is no memory leak. + * + * ### Public surface + * + * ```ts + * import { getAttrs, setAttrs, updateAttrs, copyAttrs, withAttrs, clearAttrs, + * hasAttrs } from "tsb"; + * + * const df = DataFrame.fromColumns({ x: [1, 2, 3] }); + * + * // Annotate + * setAttrs(df, { source: "sensor_A", unit: "metres" }); + * getAttrs(df); // { source: "sensor_A", unit: "metres" } + * + * // Merge additional keys + * updateAttrs(df, { version: 2 }); + * getAttrs(df); // { source: "sensor_A", unit: "metres", version: 2 } + * + * // Fluent helper β€” sets attrs and returns the same object + * const annotated = withAttrs(df, { source: "sensor_B" }); + * annotated === df; // true β€” same reference + * + * // Propagate to a derived object + * const df2 = DataFrame.fromColumns({ y: [4, 5, 6] }); + * copyAttrs(df, df2); + * getAttrs(df2); // { source: "sensor_A", unit: "metres", version: 2 } + * ``` + * + * @module + */ + +// ─── types ──────────────────────────────────────────────────────────────────── + +/** + * The attrs dictionary type. Keys are strings; values may be any JSON-safe + * primitive or nested structure. Mirrors the `dict` type of `pandas.attrs`. + */ +export type Attrs = Record; + +// ─── registry ───────────────────────────────────────────────────────────────── + +/** Internal WeakMap from any object to its attrs record. */ +const registry = new WeakMap(); + +// ─── public API ─────────────────────────────────────────────────────────────── + +/** + * Retrieve the attrs dictionary for `obj`. + * + * Returns a **shallow copy** so callers cannot mutate the stored record + * accidentally. If no attrs have been set, returns an empty object `{}`. + * + * @example + * ```ts + * const s = new Series({ data: [1, 2, 3] }); + * setAttrs(s, { unit: "kg" }); + * getAttrs(s); // { unit: "kg" } + * ``` + */ +export function getAttrs(obj: object): Attrs { + const stored = registry.get(obj); + return stored !== undefined ? { ...stored } : {}; +} + +/** + * **Overwrite** the attrs dictionary for `obj` with `attrs`. + * + * Any previously stored attrs are discarded. Stores a shallow copy so + * subsequent mutations to the passed-in object do not affect the stored value. + * + * @example + * ```ts + * setAttrs(df, { source: "sensor_A" }); + * getAttrs(df); // { source: "sensor_A" } + * ``` + */ +export function setAttrs(obj: object, attrs: Attrs): void { + registry.set(obj, { ...attrs }); +} + +/** + * **Merge** `updates` into the existing attrs for `obj`. + * + * Existing keys that are not present in `updates` are preserved. Keys that + * are present in both `updates` and the existing attrs are overwritten. + * + * @example + * ```ts + * setAttrs(df, { source: "A" }); + * updateAttrs(df, { version: 2 }); + * getAttrs(df); // { source: "A", version: 2 } + * ``` + */ +export function updateAttrs(obj: object, updates: Attrs): void { + const existing = registry.get(obj) ?? {}; + registry.set(obj, { ...existing, ...updates }); +} + +/** + * **Copy** the attrs from `source` to `target`, overwriting any existing attrs + * on `target`. + * + * Useful for propagating metadata from an input to a derived result. + * + * @example + * ```ts + * setAttrs(df1, { source: "sensor_A" }); + * const df2 = df1.head(5); + * copyAttrs(df1, df2); + * getAttrs(df2); // { source: "sensor_A" } + * ``` + */ +export function copyAttrs(source: object, target: object): void { + const stored = registry.get(source); + if (stored !== undefined) { + registry.set(target, { ...stored }); + } else { + registry.delete(target); + } +} + +/** + * **Fluent helper** β€” set attrs on `obj` and return the same object. + * + * This **replaces** any previously stored attrs (same semantics as + * {@link setAttrs}). The return type is `T` so callers do not lose the + * concrete type of their object. + * + * @example + * ```ts + * const annotated = withAttrs(df, { source: "sensor_A", unit: "metres" }); + * annotated === df; // true β€” same reference + * getAttrs(annotated); // { source: "sensor_A", unit: "metres" } + * ``` + */ +export function withAttrs(obj: T, attrs: Attrs): T { + registry.set(obj, { ...attrs }); + return obj; +} + +/** + * **Remove** all attrs from `obj`. + * + * After calling this, {@link getAttrs} returns `{}` and {@link hasAttrs} + * returns `false`. + * + * @example + * ```ts + * setAttrs(df, { source: "A" }); + * clearAttrs(df); + * hasAttrs(df); // false + * getAttrs(df); // {} + * ``` + */ +export function clearAttrs(obj: object): void { + registry.delete(obj); +} + +/** + * Returns `true` if `obj` has any attrs set, `false` otherwise. + * + * @example + * ```ts + * hasAttrs(df); // false + * setAttrs(df, { x: 1 }); + * hasAttrs(df); // true + * clearAttrs(df); + * hasAttrs(df); // false + * ``` + */ +export function hasAttrs(obj: object): boolean { + return registry.has(obj); +} + +/** + * Retrieve a **single** attrs value by key. + * + * Returns `undefined` if the key does not exist (or no attrs are set). + * + * @example + * ```ts + * setAttrs(df, { unit: "kg" }); + * getAttr(df, "unit"); // "kg" + * getAttr(df, "missing"); // undefined + * ``` + */ +export function getAttr(obj: object, key: string): unknown { + return registry.get(obj)?.[key]; +} + +/** + * Set a **single** attrs key on `obj`, preserving all other existing attrs. + * + * @example + * ```ts + * setAttr(df, "unit", "kg"); + * setAttr(df, "source", "lab"); + * getAttrs(df); // { unit: "kg", source: "lab" } + * ``` + */ +export function setAttr(obj: object, key: string, value: unknown): void { + const existing = registry.get(obj) ?? {}; + registry.set(obj, { ...existing, [key]: value }); +} + +/** + * Delete a **single** attrs key from `obj`, preserving all other keys. + * + * Does nothing if the key does not exist. + * + * @example + * ```ts + * setAttrs(df, { a: 1, b: 2 }); + * deleteAttr(df, "a"); + * getAttrs(df); // { b: 2 } + * ``` + */ +export function deleteAttr(obj: object, key: string): void { + const existing = registry.get(obj); + if (existing === undefined) return; + const { [key]: _removed, ...rest } = existing; + if (Object.keys(rest).length === 0) { + registry.delete(obj); + } else { + registry.set(obj, rest); + } +} + +/** + * Return the number of attrs keys stored on `obj`. + * + * @example + * ```ts + * attrsCount(df); // 0 + * setAttrs(df, { a: 1, b: 2 }); + * attrsCount(df); // 2 + * ``` + */ +export function attrsCount(obj: object): number { + return Object.keys(registry.get(obj) ?? {}).length; +} + +/** + * Return the list of attrs keys stored on `obj`. + * + * @example + * ```ts + * setAttrs(df, { a: 1, b: 2 }); + * attrsKeys(df); // ["a", "b"] + * ``` + */ +export function attrsKeys(obj: object): string[] { + return Object.keys(registry.get(obj) ?? {}); +} + +/** + * Merge attrs from multiple source objects into a single target object. + * + * Sources are applied left-to-right; later sources overwrite earlier ones on + * key conflicts. Overwrites any existing attrs on `target`. + * + * @example + * ```ts + * setAttrs(s1, { source: "A", unit: "kg" }); + * setAttrs(s2, { source: "B", scale: 2 }); + * mergeAttrs([s1, s2], df); + * getAttrs(df); // { source: "B", unit: "kg", scale: 2 } + * ``` + */ +export function mergeAttrs(sources: readonly object[], target: object): void { + const merged: Attrs = {}; + for (const src of sources) { + const stored = registry.get(src); + if (stored !== undefined) { + Object.assign(merged, stored); + } + } + if (Object.keys(merged).length > 0) { + registry.set(target, merged); + } +} diff --git a/src/core/index.ts b/src/core/index.ts index ada43b65..08713cae 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -15,3 +15,71 @@ export { CategoricalAccessor } from "./cat_accessor.ts"; export type { CatSeriesLike } from "./cat_accessor.ts"; export { MultiIndex } from "./multi_index.ts"; export type { MultiIndexOptions } from "./multi_index.ts"; +export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./insert_pop.ts"; +export type { PopResult } from "./insert_pop.ts"; +export { toDictOriented, fromDictOriented } from "./to_from_dict.ts"; +export type { + ToDictOrient, + FromDictOrient, + DictSplit, + DictTight, + SplitInput, +} from "./to_from_dict.ts"; +export { + getAttrs, + setAttrs, + updateAttrs, + copyAttrs, + withAttrs, + clearAttrs, + hasAttrs, + getAttr, + setAttr, + deleteAttr, + attrsCount, + attrsKeys, + mergeAttrs, +} from "./attrs.ts"; +export type { Attrs } from "./attrs.ts"; +export { + pipe, + seriesApply, + seriesTransform, + dataFrameApply, + dataFrameApplyMap, + dataFrameTransform, + dataFrameTransformRows, +} from "./pipe_apply.ts"; +export { + isScalar, + isListLike, + isArrayLike, + isDictLike, + isIterator, + isNumber, + isBool, + isStringValue, + isFloat, + isInteger, + isBigInt, + isRegExp, + isReCompilable, + isMissing, + isHashable, + isDate, + isNumericDtype, + isIntegerDtype, + isSignedIntegerDtype, + isUnsignedIntegerDtype, + isFloatDtype, + isBoolDtype, + isStringDtype, + isDatetimeDtype, + isTimedeltaDtype, + isCategoricalDtype, + isObjectDtype, + isComplexDtype, + isExtensionArrayDtype, + isPeriodDtype, + isIntervalDtype, +} from "./api_types.ts"; diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts new file mode 100644 index 00000000..d56c42bc --- /dev/null +++ b/src/core/insert_pop.ts @@ -0,0 +1,214 @@ +/** + * DataFrame.insert() and DataFrame.pop() β€” column insertion and removal. + * + * Mirrors `pandas.DataFrame.insert(loc, column, value)` and + * `pandas.DataFrame.pop(item)`. + * + * Since `DataFrame` in tsb is immutable, both operations return a new DataFrame. + * `popColumn` returns both the extracted `Series` and the resulting DataFrame. + * + * @example + * ```ts + * import { DataFrame, insertColumn, popColumn } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + * + * // Insert column "x" at position 1 (between "a" and "b") + * const df2 = insertColumn(df, 1, "x", [10, 20]); + * // df2.columns.values β†’ ["a", "x", "b"] + * + * // Pop column "a" out of df2 + * const { series, df: df3 } = popColumn(df2, "a"); + * // series.values β†’ [1, 2] + * // df3.columns.values β†’ ["x", "b"] + * ``` + * + * @packageDocumentation + */ + +import type { Label, Scalar } from "../types.ts"; +import { Index } from "./base-index.ts"; +import { DataFrame } from "./frame.ts"; +import { Series } from "./series.ts"; + +// ─── insertColumn ───────────────────────────────────────────────────────────── + +/** + * Insert a new column into `df` at integer column position `loc`. + * + * Mirrors `pandas.DataFrame.insert(loc, column, value, allow_duplicates=False)`. + * Raises a `RangeError` if: + * - `column` already exists in `df` (no duplicates by default) + * - `loc` is out of range (must be 0 ≀ loc ≀ df.shape[1]) + * - `values` length does not match the number of rows + * + * @param df Source DataFrame (not mutated). + * @param loc Zero-based integer position at which to insert the column. + * @param column Name of the new column. + * @param values Column data as an array of scalars or a `Series`. + * @param allowDuplicates When `true`, silently allow duplicate column names. Default `false`. + * @returns A new DataFrame with the column inserted. + */ +export function insertColumn( + df: DataFrame, + loc: number, + column: string, + values: readonly Scalar[] | Series, + allowDuplicates = false, +): DataFrame { + const nCols = df.shape[1]; + const nRows = df.shape[0]; + + if (!allowDuplicates && df.has(column)) { + throw new RangeError( + `Column "${column}" already exists. Use allowDuplicates=true to permit duplicate names.`, + ); + } + + if (loc < 0 || loc > nCols) { + throw new RangeError(`loc=${loc} is out of range [0, ${nCols}].`); + } + + // Resolve values to a Series aligned to df's row index. + const series: Series = + values instanceof Series + ? values + : new Series({ data: values, index: df.index, name: column }); + + if (series.size !== nRows) { + throw new RangeError( + `values length ${series.size} does not match DataFrame row count ${nRows}.`, + ); + } + + // Rebuild the column map, inserting the new column at position `loc`. + const colMap = new Map>(); + let idx = 0; + + for (const colName of df.columns.values) { + if (idx === loc) { + colMap.set(column, series); + } + colMap.set(colName, df.col(colName)); + idx++; + } + + // Handle insertion at the end (loc === nCols). + if (loc === nCols) { + colMap.set(column, series); + } + + return new DataFrame(colMap, df.index); +} + +// ─── popColumn ──────────────────────────────────────────────────────────────── + +/** Return type of {@link popColumn}. */ +export interface PopResult { + /** The extracted column as a Series. */ + readonly series: Series; + /** The DataFrame with the column removed. */ + readonly df: DataFrame; +} + +/** + * Remove a column from `df` and return both the extracted `Series` and the + * resulting DataFrame. + * + * Mirrors `pandas.DataFrame.pop(item)`, but because tsb DataFrames are + * immutable this function returns the removed Series *and* the new DataFrame + * (rather than mutating in place). + * + * Raises a `RangeError` if `col` does not exist in `df`. + * + * @param df Source DataFrame (not mutated). + * @param col Name of the column to remove. + * @returns `{ series, df }` β€” the extracted column and the remaining DataFrame. + * + * @example + * ```ts + * const { series, df: remaining } = popColumn(df, "age"); + * // series contains the "age" column; remaining has all other columns + * ``` + */ +export function popColumn(df: DataFrame, col: string): PopResult { + const series = df.get(col); + if (series === undefined) { + throw new RangeError(`Column "${col}" not found in DataFrame.`); + } + + const colMap = new Map>(); + for (const colName of df.columns.values) { + if (colName !== col) { + colMap.set(colName, df.col(colName)); + } + } + + return { + series, + df: new DataFrame(colMap, df.index), + }; +} + +// ─── reorderColumns ────────────────────────────────────────────────────────── + +/** + * Reorder the columns of `df` to match `order`. + * + * Mirrors `df[order]` in pandas. All names in `order` must be present in `df`; + * extra names in `df` not listed in `order` are dropped. + * + * @param df Source DataFrame. + * @param order New column order (subset of `df.columns.values`). + * @returns A new DataFrame with columns in the specified order. + */ +export function reorderColumns(df: DataFrame, order: readonly string[]): DataFrame { + const colMap = new Map>(); + for (const name of order) { + const s = df.get(name); + if (s === undefined) { + throw new RangeError(`Column "${name}" not found in DataFrame.`); + } + colMap.set(name, s); + } + return new DataFrame(colMap, df.index); +} + +// ─── moveColumn ────────────────────────────────────────────────────────────── + +/** + * Move an existing column to a new integer position. + * + * This is a convenience wrapper combining {@link popColumn} and + * {@link insertColumn}: it removes the column from its current position and + * re-inserts it at `newLoc` in the resulting DataFrame. + * + * @param df Source DataFrame. + * @param col Name of the column to move. + * @param newLoc Target position (0 ≀ newLoc ≀ df.shape[1] βˆ’ 1). + * @returns A new DataFrame with the column at the new position. + */ +export function moveColumn(df: DataFrame, col: string, newLoc: number): DataFrame { + const { series, df: without } = popColumn(df, col); + return insertColumn(without, newLoc, col, series); +} + +// ─── internal re-export helper (used by DataFrame constructor access) ───────── + +/** + * Build a new DataFrame from an ordered iterable of `[name, Series]` pairs and + * a row index. Exported for use by other tsb modules that need to construct + * DataFrames without going through the public factory methods. + * + * @internal + */ +export function dataFrameFromPairs( + pairs: Iterable]>, + index: Index