, left: boolean, right: boolean): string {
+ let start = 0;
+ let end = s.length;
+ if (left) {
+ while (start < end && set.has(s[start] ?? "")) {
+ start++;
+ }
+ }
+ if (right) {
+ while (end > start && set.has(s[end - 1] ?? "")) {
+ end--;
+ }
+ }
+ return s.slice(start, end);
+}
+
+/** Pad a string to the given width. */
+function padString(
+ s: string,
+ width: number,
+ side: "left" | "right" | "both",
+ fillchar: string,
+): string {
+ const needed = width - s.length;
+ if (needed <= 0) {
+ return s;
+ }
+ if (side === "left") {
+ return fillchar.repeat(needed) + s;
+ }
+ if (side === "right") {
+ return s + fillchar.repeat(needed);
+ }
+ const lpad = Math.floor(needed / 2);
+ const rpad = needed - lpad;
+ return fillchar.repeat(lpad) + s + fillchar.repeat(rpad);
+}
+
+/** Escape special regex characters. */
+function escapeRegex(s: string): string {
+ return s.replace(RE_ESCAPE_CHARS, "\\$&");
+}
+
+/** Replace all occurrences of pat in s. */
+function replaceAll(
+ s: string,
+ pat: string | RegExp,
+ repl: string,
+ regex: boolean,
+ flags: string,
+): string {
+ if (pat instanceof RegExp) {
+ const re = pat.flags.includes("g") ? pat : new RegExp(pat.source, `${pat.flags}g`);
+ return s.replace(re, repl);
+ }
+ if (!regex) {
+ return s.split(pat).join(repl);
+ }
+ return s.replace(new RegExp(pat, `g${flags}`), repl);
+}
+
+/** Replace n occurrences of pat in s (n >= 1). */
+function replaceN(
+ s: string,
+ pat: string | RegExp,
+ repl: string,
+ n: number,
+ regex: boolean,
+ flags: string,
+): string {
+ let result = s;
+ let count = 0;
+ const re = buildReplaceRegex(pat, regex, flags);
+ while (count < n) {
+ const replaced = re !== null ? result.replace(re, repl) : result.replace(pat as string, repl);
+ if (replaced === result) {
+ break;
+ }
+ result = replaced;
+ count++;
+ }
+ return result;
+}
+
+/** Build regex for replaceN, or null for literal string replace. */
+function buildReplaceRegex(pat: string | RegExp, regex: boolean, flags: string): RegExp | null {
+ if (pat instanceof RegExp) {
+ return pat;
+ }
+ if (regex) {
+ return new RegExp(pat, flags);
+ }
+ return null;
+}
+
+/** Replace occurrences of `pat` in `s`. */
+function doReplace(
+ s: string,
+ pat: string | RegExp,
+ repl: string,
+ n: number,
+ regex: boolean,
+ flags: string,
+): string {
+ if (n === -1 || n === undefined) {
+ return replaceAll(s, pat, repl, regex, flags);
+ }
+ return replaceN(s, pat, repl, n, regex, flags);
+}
+
+/** Split a string with optional maxsplit. */
+function splitString(s: string, sep: string, maxsplit?: number): string[] {
+ if (maxsplit === undefined) {
+ return s.split(sep);
+ }
+ const result: string[] = [];
+ let remaining = s;
+ let splits = 0;
+ while (splits < maxsplit) {
+ const idx = remaining.indexOf(sep);
+ if (idx === -1) {
+ break;
+ }
+ result.push(remaining.slice(0, idx));
+ remaining = remaining.slice(idx + sep.length);
+ splits++;
+ }
+ result.push(remaining);
+ return result;
+}
+
+/** Split from the right with optional maxsplit. */
+function rsplitString(s: string, sep: string, maxsplit?: number): string[] {
+ if (maxsplit === undefined) {
+ return s.split(sep);
+ }
+ const result: string[] = [];
+ let remaining = s;
+ let splits = 0;
+ while (splits < maxsplit) {
+ const idx = remaining.lastIndexOf(sep);
+ if (idx === -1) {
+ break;
+ }
+ result.unshift(remaining.slice(idx + sep.length));
+ remaining = remaining.slice(0, idx);
+ splits++;
+ }
+ result.unshift(remaining);
+ return result;
+}
+
+/** Python-style slice a string. */
+function sliceString(s: string, start?: number, stop?: number, step?: number): string {
+ const st = step ?? 1;
+ if (st === 1) {
+ return s.slice(start, stop);
+ }
+ const chars = s.split("");
+ const len = chars.length;
+ const from = normaliseIndex(start ?? 0, len);
+ const to = stop === undefined ? len : normaliseIndex(stop, len);
+ const out: string[] = [];
+ for (let i = from; i < to; i += st) {
+ out.push(chars[i] ?? "");
+ }
+ return out.join("");
+}
+
+/** Normalise a possibly-negative index. */
+function normaliseIndex(i: number, len: number): number {
+ const idx = i < 0 ? len + i : i;
+ return Math.max(0, Math.min(idx, len));
+}
+
+/** Word-wrap a string at `width` characters. */
+function wrapString(s: string, width: number): string {
+ const words = s.split(" ");
+ const lines: string[] = [];
+ let current = "";
+ for (const word of words) {
+ if (current.length === 0) {
+ current = word;
+ } else if (current.length + 1 + word.length <= width) {
+ current += ` ${word}`;
+ } else {
+ lines.push(current);
+ current = word;
+ }
+ }
+ if (current.length > 0) {
+ lines.push(current);
+ }
+ return lines.join("\n");
+}
diff --git a/tests/core/string_accessor.test.ts b/tests/core/string_accessor.test.ts
new file mode 100644
index 00000000..cd0de0a1
--- /dev/null
+++ b/tests/core/string_accessor.test.ts
@@ -0,0 +1,413 @@
+/**
+ * Tests for StringAccessor (Series.str).
+ *
+ * Covers all public methods with unit tests and property-based tests.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/index.ts";
+import type { StringSeriesLike } from "../../src/index.ts";
+
+// ─── top-level regex constants (required by biome useTopLevelRegex) ───────────
+const RE_DIGITS = /\d/;
+const RE_WHITESPACE_GLOBAL = /\s+/;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function strSeries(data: (string | null)[], name?: string): Series {
+ return new Series({ data, name: name ?? null });
+}
+
+/** Extract string values from a StringSeriesLike. */
+function vals(s: StringSeriesLike): (string | null)[] {
+ return s.toArray() as (string | null)[];
+}
+
+// ─── case ─────────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — case", () => {
+ it("lower()", () => {
+ const s = strSeries(["Hello", "WORLD", null]);
+ expect(vals(s.str.lower())).toEqual(["hello", "world", null]);
+ });
+
+ it("upper()", () => {
+ const s = strSeries(["hello", "World", null]);
+ expect(vals(s.str.upper())).toEqual(["HELLO", "WORLD", null]);
+ });
+
+ it("title()", () => {
+ const s = strSeries(["hello world", "foo bar"]);
+ expect(vals(s.str.title())).toEqual(["Hello World", "Foo Bar"]);
+ });
+
+ it("capitalize()", () => {
+ const s = strSeries(["hello WORLD", "fOO"]);
+ expect(vals(s.str.capitalize())).toEqual(["Hello world", "Foo"]);
+ });
+
+ it("swapcase()", () => {
+ const s = strSeries(["Hello", "WORLD"]);
+ expect(vals(s.str.swapcase())).toEqual(["hELLO", "world"]);
+ });
+
+ it("round-trip lower→upper", () => {
+ fc.assert(
+ fc.property(fc.string(), (s) => {
+ const series = strSeries([s]);
+ const result = vals(series.str.lower().str.upper())[0];
+ expect(result).toBe(s.toUpperCase());
+ }),
+ );
+ });
+});
+
+// ─── length ───────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — len", () => {
+ it("returns string length", () => {
+ const s = strSeries(["hello", "", "ab", null]);
+ expect(s.str.len().toArray()).toEqual([5, 0, 2, null]);
+ });
+
+ it("len property test", () => {
+ fc.assert(
+ fc.property(fc.string(), (str) => {
+ const s = strSeries([str]);
+ expect(s.str.len().toArray()[0]).toBe(str.length);
+ }),
+ );
+ });
+});
+
+// ─── strip ────────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — strip", () => {
+ it("strip() removes leading and trailing whitespace", () => {
+ const s = strSeries([" hello ", "\tfoo\n", null]);
+ expect(vals(s.str.strip())).toEqual(["hello", "foo", null]);
+ });
+
+ it("lstrip() removes only leading whitespace", () => {
+ const s = strSeries([" hello "]);
+ expect(vals(s.str.lstrip())).toEqual(["hello "]);
+ });
+
+ it("rstrip() removes only trailing whitespace", () => {
+ const s = strSeries([" hello "]);
+ expect(vals(s.str.rstrip())).toEqual([" hello"]);
+ });
+
+ it("strip(chars) removes specific chars", () => {
+ const s = strSeries(["***hello***", "***"]);
+ expect(vals(s.str.strip("*"))).toEqual(["hello", ""]);
+ });
+
+ it("strip property: result has no leading/trailing whitespace", () => {
+ fc.assert(
+ fc.property(fc.string(), (str) => {
+ const s = strSeries([str]);
+ const r = (vals(s.str.strip())[0] ?? "") as string;
+ expect(r.trim()).toBe(r);
+ }),
+ );
+ });
+});
+
+// ─── pad ──────────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — pad", () => {
+ it("pad left (rjust equivalent)", () => {
+ const s = strSeries(["hi", "hello"]);
+ expect(vals(s.str.pad(10, "left"))).toEqual([" hi", " hello"]);
+ });
+
+ it("pad right (ljust equivalent)", () => {
+ const s = strSeries(["hi"]);
+ expect(vals(s.str.pad(5, "right"))).toEqual(["hi "]);
+ });
+
+ it("pad both (center equivalent)", () => {
+ const s = strSeries(["hi"]);
+ expect(vals(s.str.pad(6, "both"))).toEqual([" hi "]);
+ });
+
+ it("ljust()", () => {
+ const s = strSeries(["foo"]);
+ expect(vals(s.str.ljust(5))).toEqual(["foo "]);
+ });
+
+ it("rjust()", () => {
+ const s = strSeries(["foo"]);
+ expect(vals(s.str.rjust(5))).toEqual([" foo"]);
+ });
+
+ it("center()", () => {
+ const s = strSeries(["hi"]);
+ expect(vals(s.str.center(6))).toEqual([" hi "]);
+ });
+
+ it("zfill() pads with zeros", () => {
+ const s = strSeries(["42", "-42", "5"]);
+ expect(vals(s.str.zfill(5))).toEqual(["00042", "-0042", "00005"]);
+ });
+
+ it("pad does not truncate longer strings", () => {
+ const s = strSeries(["hello world"]);
+ expect(vals(s.str.pad(3))).toEqual(["hello world"]);
+ });
+});
+
+// ─── contains / match ─────────────────────────────────────────────────────────
+
+describe("StringAccessor — contains / match / startswith / endswith", () => {
+ it("contains() with regex", () => {
+ const s = strSeries(["foo bar", "baz", "foobar", null]);
+ expect(s.str.contains("foo").toArray()).toEqual([true, false, true, null]);
+ });
+
+ it("contains() with literal substring", () => {
+ const s = strSeries(["foo.bar", "baz"]);
+ expect(s.str.contains("foo.bar", false).toArray()).toEqual([true, false]);
+ });
+
+ it("startswith()", () => {
+ const s = strSeries(["hello", "world", null]);
+ expect(s.str.startswith("hel").toArray()).toEqual([true, false, null]);
+ });
+
+ it("endswith()", () => {
+ const s = strSeries(["hello", "world", null]);
+ expect(s.str.endswith("ld").toArray()).toEqual([false, true, null]);
+ });
+
+ it("match() anchors to start", () => {
+ const s = strSeries(["foo123", "123foo"]);
+ expect(s.str.match("\\d+").toArray()).toEqual([false, true]);
+ });
+
+ it("fullmatch() requires full match", () => {
+ const s = strSeries(["hello", "hello world"]);
+ expect(s.str.fullmatch("hello").toArray()).toEqual([true, false]);
+ });
+
+ it("contains property: always boolean for non-null", () => {
+ fc.assert(
+ fc.property(fc.string(), (str) => {
+ const s = strSeries([str]);
+ const result = s.str.contains("a").toArray()[0];
+ expect(typeof result === "boolean").toBe(true);
+ }),
+ );
+ });
+});
+
+// ─── find / count ─────────────────────────────────────────────────────────────
+
+describe("StringAccessor — find / count", () => {
+ it("find() returns first index", () => {
+ const s = strSeries(["hello world", "xyz"]);
+ expect(s.str.find("o").toArray()).toEqual([4, -1]);
+ });
+
+ it("rfind() returns last index", () => {
+ const s = strSeries(["hello world"]);
+ expect(s.str.rfind("o").toArray()).toEqual([7]);
+ });
+
+ it("count() counts occurrences", () => {
+ const s = strSeries(["banana", "apple", null]);
+ expect(s.str.count("a").toArray()).toEqual([3, 1, null]);
+ });
+
+ it("count with regex", () => {
+ const s = strSeries(["a1b2c3", "no digits"]);
+ expect(s.str.count(RE_DIGITS).toArray()).toEqual([3, 0]);
+ });
+});
+
+// ─── replace ──────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — replace", () => {
+ it("replace all occurrences by default", () => {
+ const s = strSeries(["aabbaa", "xyz"]);
+ expect(vals(s.str.replace("a", "X"))).toEqual(["XXbbXX", "xyz"]);
+ });
+
+ it("replace n=1 replaces only first", () => {
+ const s = strSeries(["aabbaa"]);
+ expect(vals(s.str.replace("a", "X", 1))).toEqual(["Xabbaa"]);
+ });
+
+ it("replace with literal (regex=false)", () => {
+ const s = strSeries(["foo.bar.baz"]);
+ expect(vals(s.str.replace(".", "-", -1, false))).toEqual(["foo-bar-baz"]);
+ });
+
+ it("replace with RegExp object", () => {
+ const s = strSeries(["hello world"]);
+ expect(vals(s.str.replace(RE_WHITESPACE_GLOBAL, "_"))).toEqual(["hello_world"]);
+ });
+
+ it("replace propagates null", () => {
+ const s = strSeries([null]);
+ expect(vals(s.str.replace("a", "b"))).toEqual([null]);
+ });
+});
+
+// ─── extract ──────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — extract", () => {
+ it("extract first capture group", () => {
+ const s = strSeries(["foo123", "bar456", "nope"]);
+ expect(vals(s.str.extract("(\\d+)"))).toEqual(["123", "456", null]);
+ });
+
+ it("extract returns null on no match", () => {
+ const s = strSeries(["abc"]);
+ expect(vals(s.str.extract("(\\d+)"))).toEqual([null]);
+ });
+});
+
+// ─── split / join / cat ───────────────────────────────────────────────────────
+
+describe("StringAccessor — split / join / cat", () => {
+ it("split() with n returns nth segment", () => {
+ const s = strSeries(["a,b,c", "x,y"]);
+ expect(vals(s.str.split(",", 1))).toEqual(["b", "y"]);
+ });
+
+ it("split() without n returns JSON array", () => {
+ const s = strSeries(["a,b"]);
+ expect(vals(s.str.split(","))).toEqual(['["a","b"]']);
+ });
+
+ it("rsplit() with maxsplit returns last segment", () => {
+ const s = strSeries(["a,b,c"]);
+ // rsplit with maxsplit=1 from right: ["a,b", "c"]; n=1 → "c"
+ expect(vals(s.str.rsplit(",", 1, 1))).toEqual(["c"]);
+ });
+
+ it("join() reassembles split result", () => {
+ const s = strSeries(["a,b,c"]);
+ const split = s.str.split(",");
+ const joined = split.str.join("-");
+ expect(vals(joined)).toEqual(["a-b-c"]);
+ });
+
+ it("cat() concatenates element-wise", () => {
+ const s = strSeries(["a", "b"]);
+ const result = s.str.cat([["1", "2"]], "-");
+ expect(vals(result)).toEqual(["a-1", "b-2"]);
+ });
+});
+
+// ─── slice ────────────────────────────────────────────────────────────────────
+
+describe("StringAccessor — slice", () => {
+ it("slice(1, 4) slices characters", () => {
+ const s = strSeries(["hello", "world"]);
+ expect(vals(s.str.slice(1, 4))).toEqual(["ell", "orl"]);
+ });
+
+ it("slice with step", () => {
+ const s = strSeries(["abcdef"]);
+ expect(vals(s.str.slice(0, undefined, 2))).toEqual(["ace"]);
+ });
+
+ it("get(i) returns character at position", () => {
+ const s = strSeries(["hello"]);
+ expect(vals(s.str.get(1))).toEqual(["e"]);
+ expect(vals(s.str.get(-1))).toEqual(["o"]);
+ });
+
+ it("get() returns null out of bounds", () => {
+ const s = strSeries(["hi"]);
+ expect(s.str.get(10).toArray()[0]).toBeNull();
+ });
+
+ it("sliceReplace()", () => {
+ const s = strSeries(["hello world"]);
+ expect(vals(s.str.sliceReplace(6, 11, "Python"))).toEqual(["hello Python"]);
+ });
+});
+
+// ─── repeat / wrap ────────────────────────────────────────────────────────────
+
+describe("StringAccessor — repeat / wrap", () => {
+ it("repeat()", () => {
+ const s = strSeries(["ab", "xy"]);
+ expect(vals(s.str.repeat(3))).toEqual(["ababab", "xyxyxy"]);
+ });
+
+ it("wrap()", () => {
+ const s = strSeries(["hello world foo"]);
+ expect(vals(s.str.wrap(11))).toEqual(["hello world\nfoo"]);
+ });
+});
+
+// ─── predicates ───────────────────────────────────────────────────────────────
+
+describe("StringAccessor — predicates", () => {
+ it("isalpha()", () => {
+ const s = strSeries(["abc", "ab1", "", null]);
+ expect(s.str.isalpha().toArray()).toEqual([true, false, false, null]);
+ });
+
+ it("isdigit()", () => {
+ const s = strSeries(["123", "12a", ""]);
+ expect(s.str.isdigit().toArray()).toEqual([true, false, false]);
+ });
+
+ it("isalnum()", () => {
+ const s = strSeries(["abc123", "abc 123"]);
+ expect(s.str.isalnum().toArray()).toEqual([true, false]);
+ });
+
+ it("islower()", () => {
+ const s = strSeries(["hello", "Hello", "HELLO"]);
+ expect(s.str.islower().toArray()).toEqual([true, false, false]);
+ });
+
+ it("isupper()", () => {
+ const s = strSeries(["HELLO", "Hello", "hello"]);
+ expect(s.str.isupper().toArray()).toEqual([true, false, false]);
+ });
+
+ it("istitle()", () => {
+ const s = strSeries(["Hello World", "hello world", "Hello world"]);
+ expect(s.str.istitle().toArray()).toEqual([true, false, false]);
+ });
+
+ it("isspace()", () => {
+ const s = strSeries([" ", "\t\n", "a ", ""]);
+ expect(s.str.isspace().toArray()).toEqual([true, true, false, false]);
+ });
+
+ it("encode() returns JSON byte array", () => {
+ const s = strSeries(["hi"]);
+ const result = s.str.encode().toArray()[0] as string;
+ const decoded = JSON.parse(result) as number[];
+ expect(new TextDecoder().decode(new Uint8Array(decoded))).toBe("hi");
+ });
+});
+
+// ─── null propagation property test ──────────────────────────────────────────
+
+describe("StringAccessor — null propagation", () => {
+ it("null values propagate through all transformations", () => {
+ const methods = [
+ (s: Series) => s.str.lower(),
+ (s: Series) => s.str.upper(),
+ (s: Series) => s.str.strip(),
+ (s: Series) => s.str.len(),
+ (s: Series) => s.str.contains("x"),
+ (s: Series) => s.str.replace("a", "b"),
+ ];
+ for (const method of methods) {
+ const s = strSeries([null]);
+ expect(method(s).toArray()[0]).toBeNull();
+ }
+ });
+});
From 6e95d03a88ea09a51b712538bf83d7181813a94d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:10:36 +0000
Subject: [PATCH 005/104] Iteration 56: Implement Series.dt datetime accessor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Port pandas DatetimeProperties as Series.dt accessor.
- src/core/datetime_accessor.ts: DatetimeAccessor class with:
Calendar components: year/month/day/hour/minute/second/millisecond/
microsecond/nanosecond/dayofweek/weekday/dayofyear/quarter/
isocalendar_week/days_in_month/daysinmonth
Boolean boundaries: is_month_start/is_month_end/is_quarter_start/
is_quarter_end/is_year_start/is_year_end/is_leap_year
Formatting: strftime() with 25+ directives (%Y/%m/%d/%H/%M/%S/%A/%B etc.)
Normalization: normalize() (floor to midnight), date()
Rounding: floor/ceil/round with D/H/T/min/S/L/ms units
Epoch: total_seconds()
All methods propagate null/undefined/NaN unchanged
- src/core/series.ts: added Series.dt getter returning DatetimeAccessor
- src/core/index.ts: export DatetimeAccessor + DatetimeSeriesLike
- src/index.ts: re-export DatetimeAccessor + DatetimeSeriesLike
- tests/core/datetime_accessor.test.ts: 50+ tests (unit + property-based with fast-check)
- playground/datetime_accessor.html: interactive tutorial with 8 sections
- playground/index.html: mark dt accessor as complete
Metric: pandas_features_ported 10 → 11
Run: https://github.com/githubnext/tsessebe/actions/runs/24002454105
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/datetime_accessor.html | 293 ++++++++++++
src/core/datetime_accessor.ts | 640 +++++++++++++++++++++++++++
tests/core/datetime_accessor.test.ts | 418 +++++++++++++++++
3 files changed, 1351 insertions(+)
create mode 100644 playground/datetime_accessor.html
create mode 100644 src/core/datetime_accessor.ts
create mode 100644 tests/core/datetime_accessor.test.ts
diff --git a/playground/datetime_accessor.html b/playground/datetime_accessor.html
new file mode 100644
index 00000000..afdf8455
--- /dev/null
+++ b/playground/datetime_accessor.html
@@ -0,0 +1,293 @@
+
+
+
+
+
+ tsb — Series.dt Datetime Accessor
+
+
+
+
+
+ Series.dt New
+
+ The dt accessor provides vectorised datetime operations on a
+ Series<Date>, mirroring
+ pandas Series.dt .
+ All methods propagate null / undefined unchanged.
+
+
+ 1 — Calendar Components
+ Extract individual date/time fields from each element.
+ import { Series } from "tsb";
+
+const dates = new Series({
+ data: [
+ new Date("2024-01-15T09:30:00"),
+ new Date("2024-07-04T18:00:00"),
+ new Date("2024-12-31T23:59:59"),
+ ],
+ name: "events",
+});
+
+console.log("year :", dates.dt.year().toArray());
+console.log("month :", dates.dt.month().toArray());
+console.log("day :", dates.dt.day().toArray());
+console.log("hour :", dates.dt.hour().toArray());
+console.log("minute:", dates.dt.minute().toArray());
+console.log("second:", dates.dt.second().toArray());
+ ▶ Run
+ Click Run to execute
+
+ 2 — Day of Week & Quarter
+
+ dayofweek() returns Monday=0, Sunday=6 (same as pandas).
+ quarter() returns 1–4.
+
+ import { Series } from "tsb";
+
+const dates = new Series({
+ data: [
+ new Date("2024-07-15"), // Monday
+ new Date("2024-07-14"), // Sunday
+ new Date("2024-04-01"), // Q2 start
+ new Date("2024-10-15"), // Q4
+ ],
+});
+
+console.log("dayofweek:", dates.dt.dayofweek().toArray());
+console.log("quarter :", dates.dt.quarter().toArray());
+console.log("dayofyear:", dates.dt.dayofyear().toArray());
+ ▶ Run
+ Click Run to execute
+
+ 3 — Boolean Properties
+ Check whether dates fall on month/quarter/year boundaries.
+ import { Series } from "tsb";
+
+const dates = new Series({
+ data: [
+ new Date("2024-01-01"), // year start
+ new Date("2024-03-31"), // Q1 end, month end
+ new Date("2024-02-29"), // month end (leap year)
+ new Date("2024-12-31"), // year end
+ new Date("2024-07-15"), // ordinary day
+ ],
+});
+
+console.log("is_year_start :", dates.dt.is_year_start().toArray());
+console.log("is_quarter_end :", dates.dt.is_quarter_end().toArray());
+console.log("is_month_end :", dates.dt.is_month_end().toArray());
+console.log("is_year_end :", dates.dt.is_year_end().toArray());
+console.log("is_leap_year :", dates.dt.is_leap_year().toArray());
+console.log("days_in_month :", dates.dt.days_in_month().toArray());
+ ▶ Run
+ Click Run to execute
+
+ 4 — strftime Formatting
+ Format dates using strftime-style directives.
+ import { Series } from "tsb";
+
+const dates = new Series({
+ data: [
+ new Date("2024-03-15T09:05:03"),
+ new Date("2024-12-31T23:59:59"),
+ ],
+});
+
+console.log("ISO date :", dates.dt.strftime("%Y-%m-%d").toArray());
+console.log("US date :", dates.dt.strftime("%m/%d/%Y").toArray());
+console.log("Datetime :", dates.dt.strftime("%Y-%m-%d %H:%M:%S").toArray());
+console.log("Friendly :", dates.dt.strftime("%A, %B %d %Y").toArray());
+console.log("Short month:", dates.dt.strftime("%b %d, %Y").toArray());
+ ▶ Run
+ Click Run to execute
+
+ 5 — Normalization & Rounding
+
+ normalize() strips the time component (floor to midnight).
+ floor(), ceil(), and round() support
+ units: "D" (day), "H" (hour), "T"/"min" (minute),
+ "S" (second), "L"/"ms" (millisecond).
+
+ import { Series } from "tsb";
+
+const ts = new Series({
+ data: [new Date("2024-03-15T14:37:28.750")],
+});
+
+const fmt = (d: Date | null) => d?.toISOString() ?? "null";
+
+console.log("original :", ts.dt.strftime("%H:%M:%S").toArray());
+console.log("normalize :", (ts.dt.normalize().toArray() as Date[]).map(fmt));
+console.log("floor(H) :", (ts.dt.floor("H").toArray() as Date[]).map(fmt));
+console.log("ceil(H) :", (ts.dt.ceil("H").toArray() as Date[]).map(fmt));
+console.log("round(T) :", (ts.dt.round("T").toArray() as Date[]).map(fmt));
+ ▶ Run
+ Click Run to execute
+
+ 6 — Null Propagation
+ Like pandas, all dt methods propagate null unchanged.
+ import { Series } from "tsb";
+
+const mixed = new Series({
+ data: [new Date("2024-01-15"), null, new Date("2024-07-04")],
+});
+
+console.log("year :", mixed.dt.year().toArray());
+console.log("month :", mixed.dt.month().toArray());
+console.log("fmt :", mixed.dt.strftime("%Y-%m-%d").toArray());
+ ▶ Run
+ Click Run to execute
+
+ 7 — total_seconds & date()
+
+ total_seconds() returns the Unix timestamp in seconds.
+ date() returns the date portion (midnight-normalized).
+
+ import { Series } from "tsb";
+
+const dates = new Series({
+ data: [new Date("2024-01-01T12:30:00Z")],
+});
+
+console.log("total_seconds:", dates.dt.total_seconds().toArray());
+console.log("date :", (dates.dt.date().toArray() as Date[]).map(d => d.toISOString()));
+ ▶ Run
+ Click Run to execute
+
+ 8 — Combining dt with Other Accessors
+ Chain dt with str operations or use multiple accessors together.
+ import { Series } from "tsb";
+
+const dates = new Series({
+ data: [
+ new Date("2024-01-15"),
+ new Date("2024-06-20"),
+ new Date("2024-11-07"),
+ ],
+});
+
+// Format as ISO and then process as strings
+const formatted = dates.dt.strftime("%Y-%m-%d");
+console.log("Formatted dates :", formatted.toArray());
+console.log("Year part (str) :", (formatted as any).str.slice(0, 4).toArray());
+
+// Group by quarter
+const quarters = dates.dt.quarter();
+console.log("Quarters :", quarters.toArray());
+
+// Is weekend (Sat=5, Sun=6)
+const dow = dates.dt.dayofweek();
+console.log("Day of week :", dow.toArray());
+ ▶ Run
+ Click Run to execute
+
+
+
+
diff --git a/src/core/datetime_accessor.ts b/src/core/datetime_accessor.ts
new file mode 100644
index 00000000..0558410f
--- /dev/null
+++ b/src/core/datetime_accessor.ts
@@ -0,0 +1,640 @@
+/**
+ * DatetimeAccessor — the `Series.dt` accessor, mirroring `pandas.core.indexes.datetimes.DatetimeProperties`.
+ *
+ * Access via `series.dt` on a `Series` (or Series containing Date objects).
+ * Calendar component properties return new Series of numbers/booleans; `strftime`
+ * returns a Series of strings. Null / undefined values are propagated unchanged
+ * throughout (pandas behaviour).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [new Date("2024-03-15"), new Date("2024-07-04")] });
+ * s.dt.year().toArray(); // [2024, 2024]
+ * s.dt.month().toArray(); // [3, 7]
+ * s.dt.dayofweek().toArray(); // [4, 3] (Friday=4, Thursday=3)
+ * s.dt.strftime("%Y-%m-%d").toArray(); // ["2024-03-15", "2024-07-04"]
+ * ```
+ */
+
+import type { Label, Scalar } from "../types.ts";
+import type { Index } from "./base-index.ts";
+
+// ─── DatetimeSeriesLike ────────────────────────────────────────────────────────
+
+/**
+ * Minimal interface for the Series type needed by DatetimeAccessor.
+ * The real `Series` class satisfies this interface.
+ */
+export interface DatetimeSeriesLike {
+ readonly values: readonly Scalar[];
+ readonly index: Index;
+ readonly name: string | null;
+ readonly dt: DatetimeAccessor;
+ withValues(data: readonly Scalar[], name?: string | null): DatetimeSeriesLike;
+ toArray(): readonly Scalar[];
+}
+
+// ─── helpers ───────────────────────────────────────────────────────────────────
+
+/** Cast a Scalar to a Date, returning null for missing / non-date values. */
+function toDate(v: Scalar): Date | null {
+ if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+ return null;
+ }
+ if (v instanceof Date) {
+ return v;
+ }
+ if (typeof v === "string" || typeof v === "number") {
+ const d = new Date(v);
+ return Number.isNaN(d.getTime()) ? null : d;
+ }
+ return null;
+}
+
+/** Apply a Date → number transformation, propagating null. */
+function mapNum(series: DatetimeSeriesLike, fn: (d: Date) => number): DatetimeSeriesLike {
+ const result: Scalar[] = series.values.map((v) => {
+ const d = toDate(v);
+ return d === null ? null : fn(d);
+ });
+ return series.withValues(result);
+}
+
+/** Apply a Date → boolean transformation, propagating null. */
+function mapBool(series: DatetimeSeriesLike, fn: (d: Date) => boolean): DatetimeSeriesLike {
+ const result: Scalar[] = series.values.map((v) => {
+ const d = toDate(v);
+ return d === null ? null : fn(d);
+ });
+ return series.withValues(result);
+}
+
+/** Apply a Date → string transformation, propagating null. */
+function mapStr(series: DatetimeSeriesLike, fn: (d: Date) => string): DatetimeSeriesLike {
+ const result: Scalar[] = series.values.map((v) => {
+ const d = toDate(v);
+ return d === null ? null : fn(d);
+ });
+ return series.withValues(result);
+}
+
+/** Apply a Date → Date transformation, propagating null. */
+function mapDate(series: DatetimeSeriesLike, fn: (d: Date) => Date): DatetimeSeriesLike {
+ const result: Scalar[] = series.values.map((v) => {
+ const d = toDate(v);
+ return d === null ? null : fn(d);
+ });
+ return series.withValues(result);
+}
+
+// ─── strftime helpers ──────────────────────────────────────────────────────────
+
+const DAYS_FULL = [
+ "Sunday",
+ "Monday",
+ "Tuesday",
+ "Wednesday",
+ "Thursday",
+ "Friday",
+ "Saturday",
+] as const;
+const DAYS_ABBR = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"] as const;
+const MONTHS_FULL = [
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December",
+] as const;
+const MONTHS_ABBR = [
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec",
+] as const;
+
+/** Pad a number with leading zeros to a given width. */
+function pad(n: number, width: number): string {
+ return String(n).padStart(width, "0");
+}
+
+/** Return the ISO week number (1–53) for a Date. */
+function isoWeekNumber(d: Date): number {
+ const jan4 = new Date(d.getFullYear(), 0, 4);
+ const startOfWeek1 = new Date(jan4);
+ const dayOfWeek = (jan4.getDay() + 6) % 7; // Mon=0
+ startOfWeek1.setDate(jan4.getDate() - dayOfWeek);
+ const diff = d.getTime() - startOfWeek1.getTime();
+ const week = Math.floor(diff / (7 * 24 * 60 * 60 * 1000)) + 1;
+ if (week < 1) {
+ return isoWeekNumber(new Date(d.getFullYear() - 1, 11, 31));
+ }
+ if (week > 52) {
+ const nextJan4 = new Date(d.getFullYear() + 1, 0, 4);
+ const nextStartOfWeek1 = new Date(nextJan4);
+ const nextDow = (nextJan4.getDay() + 6) % 7;
+ nextStartOfWeek1.setDate(nextJan4.getDate() - nextDow);
+ if (d >= nextStartOfWeek1) {
+ return 1;
+ }
+ }
+ return week;
+}
+
+/** Return the day-of-year (1–366) for a Date. */
+function dayOfYear(d: Date): number {
+ const start = new Date(d.getFullYear(), 0, 0);
+ const diff = d.getTime() - start.getTime();
+ return Math.floor(diff / (24 * 60 * 60 * 1000));
+}
+
+/** True when `year` is a leap year. */
+function isLeapYear(year: number): boolean {
+ return (year % 4 === 0 && year % 100 !== 0) || year % 400 === 0;
+}
+
+/** Number of days in a month (1-indexed month). */
+function daysInMonth(year: number, month: number): number {
+ return new Date(year, month, 0).getDate();
+}
+
+/** Apply strftime-style format string to a Date. */
+function applyStrftime(d: Date, fmt: string): string {
+ const y = d.getFullYear();
+ const m = d.getMonth(); // 0-indexed
+ const day = d.getDate();
+ const dow = d.getDay(); // 0=Sunday
+ const H = d.getHours();
+ const M = d.getMinutes();
+ const S = d.getSeconds();
+ const ms = d.getMilliseconds();
+ const weekNum = isoWeekNumber(d);
+ const doy = dayOfYear(d);
+ const isoPaddedYear = pad(y, 4);
+
+ let result = "";
+ let i = 0;
+ while (i < fmt.length) {
+ const ch = fmt[i];
+ if (ch !== "%" || i + 1 >= fmt.length) {
+ result += ch;
+ i++;
+ continue;
+ }
+ const directive = fmt[i + 1];
+ result += expandDirective(
+ directive ?? "",
+ y,
+ m,
+ day,
+ dow,
+ H,
+ M,
+ S,
+ ms,
+ weekNum,
+ doy,
+ isoPaddedYear,
+ );
+ i += 2;
+ }
+ return result;
+}
+
+/** Expand date-part strftime directive character. Returns null for unknown. */
+function expandDatePart(
+ directive: string,
+ y: number,
+ m: number,
+ day: number,
+ dow: number,
+ doy: number,
+ weekNum: number,
+ isoPaddedYear: string,
+): string | null {
+ switch (directive) {
+ case "Y":
+ return isoPaddedYear;
+ case "y":
+ return pad(y % 100, 2);
+ case "m":
+ return pad(m + 1, 2);
+ case "d":
+ return pad(day, 2);
+ case "A":
+ return DAYS_FULL[dow] ?? "";
+ case "a":
+ return DAYS_ABBR[dow] ?? "";
+ case "B":
+ return MONTHS_FULL[m] ?? "";
+ case "b":
+ case "h":
+ return MONTHS_ABBR[m] ?? "";
+ case "j":
+ return pad(doy, 3);
+ case "U": {
+ const sundayBased = Math.floor((doy + (dow === 0 ? 0 : 7 - dow)) / 7);
+ return pad(sundayBased, 2);
+ }
+ case "W": {
+ const mondayDow = (dow + 6) % 7;
+ const mondayBased = Math.floor((doy + (mondayDow === 0 ? 0 : 7 - mondayDow)) / 7);
+ return pad(mondayBased, 2);
+ }
+ case "V":
+ return pad(weekNum, 2);
+ case "G":
+ return isoPaddedYear;
+ case "u":
+ return String(dow === 0 ? 7 : dow);
+ case "w":
+ return String(dow);
+ default:
+ return null;
+ }
+}
+
+/** Expand time-part or composite strftime directive character. */
+function expandTimePart(
+ directive: string,
+ y: number,
+ m: number,
+ day: number,
+ dow: number,
+ H: number,
+ M: number,
+ S: number,
+ ms: number,
+ _isoPaddedYear: string,
+): string {
+ switch (directive) {
+ case "H":
+ return pad(H, 2);
+ case "I":
+ return pad(H % 12 === 0 ? 12 : H % 12, 2);
+ case "M":
+ return pad(M, 2);
+ case "S":
+ return pad(S, 2);
+ case "f":
+ return pad(ms * 1000, 6);
+ case "p":
+ return H < 12 ? "AM" : "PM";
+ case "Z":
+ case "z":
+ return "";
+ case "c":
+ return `${DAYS_ABBR[dow] ?? ""} ${MONTHS_ABBR[m] ?? ""} ${String(day).padStart(2, " ")} ${pad(H, 2)}:${pad(M, 2)}:${pad(S, 2)} ${pad(y, 4)}`;
+ case "x":
+ return `${pad(m + 1, 2)}/${pad(day, 2)}/${pad(y % 100, 2)}`;
+ case "X":
+ return `${pad(H, 2)}:${pad(M, 2)}:${pad(S, 2)}`;
+ case "%":
+ return "%";
+ default:
+ return `%${directive}`;
+ }
+}
+
+/** Expand a single strftime directive character. */
+function expandDirective(
+ directive: string,
+ y: number,
+ m: number,
+ day: number,
+ dow: number,
+ H: number,
+ M: number,
+ S: number,
+ ms: number,
+ weekNum: number,
+ doy: number,
+ isoPaddedYear: string,
+): string {
+ const datePart = expandDatePart(directive, y, m, day, dow, doy, weekNum, isoPaddedYear);
+ if (datePart !== null) {
+ return datePart;
+ }
+ return expandTimePart(directive, y, m, day, dow, H, M, S, ms, isoPaddedYear);
+}
+
+// ─── DatetimeAccessor ──────────────────────────────────────────────────────────
+
+/**
+ * Vectorised datetime operations for a Series.
+ *
+ * Returned from `Series.dt`. All operations work element-wise and propagate
+ * `null` / `NaN` / `undefined` through unchanged (pandas behaviour).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [new Date("2024-01-15")] });
+ * s.dt.year().toArray(); // [2024]
+ * s.dt.month().toArray(); // [1]
+ * s.dt.is_leap_year().toArray(); // [true]
+ * ```
+ */
+export class DatetimeAccessor {
+ readonly #series: DatetimeSeriesLike;
+
+ constructor(series: DatetimeSeriesLike) {
+ this.#series = series;
+ }
+
+ // ─── calendar components ────────────────────────────────────────────────────
+
+ /** Extract the year component (e.g. 2024). */
+ year(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getFullYear());
+ }
+
+ /** Extract the month component (1 = January, 12 = December). */
+ month(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getMonth() + 1);
+ }
+
+ /** Extract the day of month (1–31). */
+ day(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getDate());
+ }
+
+ /** Extract the hour component (0–23). */
+ hour(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getHours());
+ }
+
+ /** Extract the minute component (0–59). */
+ minute(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getMinutes());
+ }
+
+ /** Extract the second component (0–59). */
+ second(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getSeconds());
+ }
+
+ /** Extract the millisecond component (0–999). */
+ millisecond(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => d.getMilliseconds());
+ }
+
+ /**
+ * Extract the microsecond component (always 0 — JS Date has millisecond precision).
+ * Included for pandas API parity.
+ */
+ microsecond(): DatetimeSeriesLike {
+ return mapNum(this.#series, () => 0);
+ }
+
+ /**
+ * Extract the nanosecond component (always 0 — JS Date has millisecond precision).
+ * Included for pandas API parity.
+ */
+ nanosecond(): DatetimeSeriesLike {
+ return mapNum(this.#series, () => 0);
+ }
+
+ /**
+ * Day of the week as an integer (Monday = 0, Sunday = 6).
+ * Mirrors `pandas.Series.dt.dayofweek` / `pandas.Series.dt.weekday`.
+ */
+ dayofweek(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => (d.getDay() + 6) % 7);
+ }
+
+ /** Alias for `dayofweek()`. */
+ weekday(): DatetimeSeriesLike {
+ return this.dayofweek();
+ }
+
+ /** Day of the year (1–366). */
+ dayofyear(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => dayOfYear(d));
+ }
+
+ /**
+ * Quarter of the year (1–4).
+ * Q1 = Jan–Mar, Q2 = Apr–Jun, Q3 = Jul–Sep, Q4 = Oct–Dec.
+ */
+ quarter(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => Math.floor(d.getMonth() / 3) + 1);
+ }
+
+ /** ISO week number of the year (1–53). */
+ isocalendar_week(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => isoWeekNumber(d));
+ }
+
+ /** Number of days in the month for each date (28–31). */
+ days_in_month(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => daysInMonth(d.getFullYear(), d.getMonth() + 1));
+ }
+
+ /** Alias for `days_in_month()`. */
+ daysinmonth(): DatetimeSeriesLike {
+ return this.days_in_month();
+ }
+
+ // ─── boolean properties ─────────────────────────────────────────────────────
+
+ /** True when the date is the first day of the month. */
+ is_month_start(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => d.getDate() === 1);
+ }
+
+ /** True when the date is the last day of the month. */
+ is_month_end(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => {
+ const year = d.getFullYear();
+ const month = d.getMonth() + 1;
+ return d.getDate() === daysInMonth(year, month);
+ });
+ }
+
+ /** True when the date is the first day of the quarter. */
+ is_quarter_start(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => {
+ const month = d.getMonth() + 1; // 1-indexed
+ return d.getDate() === 1 && (month === 1 || month === 4 || month === 7 || month === 10);
+ });
+ }
+
+ /** True when the date is the last day of the quarter. */
+ is_quarter_end(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => {
+ const year = d.getFullYear();
+ const month = d.getMonth() + 1;
+ const lastDay = daysInMonth(year, month);
+ return d.getDate() === lastDay && (month === 3 || month === 6 || month === 9 || month === 12);
+ });
+ }
+
+ /** True when the date is the first day of the year. */
+ is_year_start(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => d.getMonth() === 0 && d.getDate() === 1);
+ }
+
+ /** True when the date is the last day of the year. */
+ is_year_end(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => d.getMonth() === 11 && d.getDate() === 31);
+ }
+
+ /** True when the year of the date is a leap year. */
+ is_leap_year(): DatetimeSeriesLike {
+ return mapBool(this.#series, (d) => isLeapYear(d.getFullYear()));
+ }
+
+ // ─── formatting ─────────────────────────────────────────────────────────────
+
+ /**
+ * Format each date using a strftime-style format string.
+ *
+ * Supported directives:
+ * `%Y` (4-digit year), `%y` (2-digit year), `%m` (month 01–12),
+ * `%d` (day 01–31), `%H` (hour 00–23), `%I` (hour 01–12), `%M` (minute),
+ * `%S` (second), `%f` (microseconds, zero-padded to 6), `%A` (full weekday),
+ * `%a` (abbrev weekday), `%B` (full month name), `%b`/`%h` (abbrev month),
+ * `%p` (AM/PM), `%j` (day of year), `%U` (Sunday week), `%W` (Monday week),
+ * `%V` (ISO week), `%u` (ISO weekday Mon=1), `%w` (weekday Sun=0),
+ * `%c` (locale-like), `%x` (date), `%X` (time), `%%` (literal %).
+ *
+ * @example
+ * ```ts
+ * s.dt.strftime("%Y-%m-%d").toArray(); // ["2024-01-15", ...]
+ * ```
+ */
+ strftime(format: string): DatetimeSeriesLike {
+ return mapStr(this.#series, (d) => applyStrftime(d, format));
+ }
+
+ // ─── normalization ──────────────────────────────────────────────────────────
+
+ /**
+ * Normalize each datetime to midnight (00:00:00.000), preserving the date.
+ * Mirrors `pandas.Series.dt.normalize()`.
+ */
+ normalize(): DatetimeSeriesLike {
+ return mapDate(this.#series, (d) => new Date(d.getFullYear(), d.getMonth(), d.getDate()));
+ }
+
+ /**
+ * Round each datetime down to the nearest unit.
+ *
+ * @param unit - `"D"` (day), `"H"` (hour), `"T"`/`"min"` (minute),
+ * `"S"` (second), `"L"`/`"ms"` (millisecond)
+ */
+ floor(unit: "D" | "H" | "T" | "min" | "S" | "L" | "ms"): DatetimeSeriesLike {
+ return mapDate(this.#series, (d) => floorDate(d, unit));
+ }
+
+ /**
+ * Round each datetime up to the nearest unit.
+ *
+ * @param unit - `"D"` (day), `"H"` (hour), `"T"`/`"min"` (minute),
+ * `"S"` (second), `"L"`/`"ms"` (millisecond)
+ */
+ ceil(unit: "D" | "H" | "T" | "min" | "S" | "L" | "ms"): DatetimeSeriesLike {
+ return mapDate(this.#series, (d) => ceilDate(d, unit));
+ }
+
+ /**
+ * Round each datetime to the nearest unit.
+ *
+ * @param unit - `"D"` (day), `"H"` (hour), `"T"`/`"min"` (minute),
+ * `"S"` (second), `"L"`/`"ms"` (millisecond)
+ */
+ round(unit: "D" | "H" | "T" | "min" | "S" | "L" | "ms"): DatetimeSeriesLike {
+ return mapDate(this.#series, (d) => roundDate(d, unit));
+ }
+
+ // ─── epoch conversion ───────────────────────────────────────────────────────
+
+ /**
+ * Convert each datetime to Unix timestamp in seconds (integer, floored).
+ * Mirrors pandas `dt.asi8` but at second granularity.
+ */
+ total_seconds(): DatetimeSeriesLike {
+ return mapNum(this.#series, (d) => Math.floor(d.getTime() / 1000));
+ }
+
+ /**
+ * Return the date portion of each datetime as a new Date at midnight.
+ * Mirrors `pandas.Series.dt.date`.
+ */
+ date(): DatetimeSeriesLike {
+ return this.normalize();
+ }
+}
+
+// ─── floor/ceil/round helpers ─────────────────────────────────────────────────
+
+type TimeUnit = "D" | "H" | "T" | "min" | "S" | "L" | "ms";
+
+/** Return the unit duration in milliseconds. */
+function unitMs(unit: TimeUnit): number {
+ switch (unit) {
+ case "D":
+ return 24 * 60 * 60 * 1000;
+ case "H":
+ return 60 * 60 * 1000;
+ case "T":
+ case "min":
+ return 60 * 1000;
+ case "S":
+ return 1000;
+ case "L":
+ case "ms":
+ return 1;
+ default:
+ return 1;
+ }
+}
+
+function floorDate(d: Date, unit: TimeUnit): Date {
+ const ms = unitMs(unit);
+ if (unit === "D") {
+ return new Date(d.getFullYear(), d.getMonth(), d.getDate());
+ }
+ return new Date(Math.floor(d.getTime() / ms) * ms);
+}
+
+function ceilDate(d: Date, unit: TimeUnit): Date {
+ const ms = unitMs(unit);
+ if (unit === "D") {
+ const floored = floorDate(d, unit);
+ if (floored.getTime() === d.getTime()) {
+ return floored;
+ }
+ return new Date(floored.getTime() + ms);
+ }
+ const floored = Math.floor(d.getTime() / ms) * ms;
+ if (floored === d.getTime()) {
+ return new Date(floored);
+ }
+ return new Date(floored + ms);
+}
+
+function roundDate(d: Date, unit: TimeUnit): Date {
+ const ms = unitMs(unit);
+ if (unit === "D") {
+ const midnight = floorDate(d, unit);
+ const midpointMs = midnight.getTime() + ms / 2;
+ return d.getTime() < midpointMs ? midnight : new Date(midnight.getTime() + ms);
+ }
+ return new Date(Math.round(d.getTime() / ms) * ms);
+}
diff --git a/tests/core/datetime_accessor.test.ts b/tests/core/datetime_accessor.test.ts
new file mode 100644
index 00000000..ec6b242f
--- /dev/null
+++ b/tests/core/datetime_accessor.test.ts
@@ -0,0 +1,418 @@
+/**
+ * Tests for DatetimeAccessor — the Series.dt accessor.
+ *
+ * Tests cover: calendar components, boolean properties, formatting,
+ * normalization, floor/ceil/round, null propagation, and property-based
+ * invariants using fast-check.
+ */
+
+import { describe, expect, test } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/index.ts";
+
+// ─── top-level regex constants ────────────────────────────────────────────────
+const RE_FOUR_DIGIT_YEAR = /^\d{4}$/;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function makeSeries(dates: Array): Series {
+ return new Series({ data: dates });
+}
+
+// ─── calendar components ──────────────────────────────────────────────────────
+
+describe("DatetimeAccessor calendar components", () => {
+ const base = new Date("2024-07-15T10:30:45.123Z");
+ const s = makeSeries([base]);
+
+ test("year()", () => {
+ expect(s.dt.year().toArray()).toEqual([base.getFullYear()]);
+ });
+
+ test("month() is 1-indexed", () => {
+ expect(s.dt.month().toArray()).toEqual([base.getMonth() + 1]);
+ });
+
+ test("day()", () => {
+ expect(s.dt.day().toArray()).toEqual([base.getDate()]);
+ });
+
+ test("hour()", () => {
+ expect(s.dt.hour().toArray()).toEqual([base.getHours()]);
+ });
+
+ test("minute()", () => {
+ expect(s.dt.minute().toArray()).toEqual([base.getMinutes()]);
+ });
+
+ test("second()", () => {
+ expect(s.dt.second().toArray()).toEqual([base.getSeconds()]);
+ });
+
+ test("millisecond()", () => {
+ expect(s.dt.millisecond().toArray()).toEqual([base.getMilliseconds()]);
+ });
+
+ test("microsecond() always 0", () => {
+ expect(s.dt.microsecond().toArray()).toEqual([0]);
+ });
+
+ test("nanosecond() always 0", () => {
+ expect(s.dt.nanosecond().toArray()).toEqual([0]);
+ });
+
+ test("dayofweek() Monday=0, Sunday=6", () => {
+ // 2024-07-15 is a Monday → 0
+ const monday = makeSeries([new Date("2024-07-15")]);
+ expect(monday.dt.dayofweek().toArray()).toEqual([0]);
+
+ // 2024-07-14 is a Sunday → 6
+ const sunday = makeSeries([new Date("2024-07-14")]);
+ expect(sunday.dt.dayofweek().toArray()).toEqual([6]);
+ });
+
+ test("weekday() is alias for dayofweek()", () => {
+ const s2 = makeSeries([new Date("2024-07-16")]); // Tuesday
+ expect(s2.dt.weekday().toArray()).toEqual([1]);
+ });
+
+ test("dayofyear()", () => {
+ const jan1 = makeSeries([new Date("2024-01-01")]);
+ expect(jan1.dt.dayofyear().toArray()).toEqual([1]);
+
+ const dec31 = makeSeries([new Date("2024-12-31")]);
+ expect(dec31.dt.dayofyear().toArray()).toEqual([366]); // 2024 is leap year
+ });
+
+ test("quarter()", () => {
+ const q1 = makeSeries([new Date("2024-01-01"), new Date("2024-03-31")]);
+ expect(q1.dt.quarter().toArray()).toEqual([1, 1]);
+
+ const q2 = makeSeries([new Date("2024-04-01"), new Date("2024-06-30")]);
+ expect(q2.dt.quarter().toArray()).toEqual([2, 2]);
+
+ const q3 = makeSeries([new Date("2024-07-01"), new Date("2024-09-30")]);
+ expect(q3.dt.quarter().toArray()).toEqual([3, 3]);
+
+ const q4 = makeSeries([new Date("2024-10-01"), new Date("2024-12-31")]);
+ expect(q4.dt.quarter().toArray()).toEqual([4, 4]);
+ });
+
+ test("days_in_month()", () => {
+ const months = makeSeries([
+ new Date("2024-01-01"), // 31
+ new Date("2024-02-01"), // 29 (leap)
+ new Date("2023-02-01"), // 28 (non-leap)
+ new Date("2024-04-01"), // 30
+ ]);
+ expect(months.dt.days_in_month().toArray()).toEqual([31, 29, 28, 30]);
+ });
+
+ test("daysinmonth() is alias for days_in_month()", () => {
+ const s2 = makeSeries([new Date("2024-02-01")]);
+ expect(s2.dt.daysinmonth().toArray()).toEqual([29]);
+ });
+});
+
+// ─── boolean properties ───────────────────────────────────────────────────────
+
+describe("DatetimeAccessor boolean properties", () => {
+ test("is_month_start()", () => {
+ const s = makeSeries([new Date("2024-01-01"), new Date("2024-01-15")]);
+ expect(s.dt.is_month_start().toArray()).toEqual([true, false]);
+ });
+
+ test("is_month_end()", () => {
+ const s = makeSeries([new Date("2024-01-31"), new Date("2024-01-30")]);
+ expect(s.dt.is_month_end().toArray()).toEqual([true, false]);
+ });
+
+ test("is_month_end() for February in leap year", () => {
+ const s = makeSeries([new Date("2024-02-29")]);
+ expect(s.dt.is_month_end().toArray()).toEqual([true]);
+ });
+
+ test("is_quarter_start()", () => {
+ const s = makeSeries([
+ new Date("2024-01-01"),
+ new Date("2024-04-01"),
+ new Date("2024-07-01"),
+ new Date("2024-10-01"),
+ new Date("2024-02-01"),
+ ]);
+ expect(s.dt.is_quarter_start().toArray()).toEqual([true, true, true, true, false]);
+ });
+
+ test("is_quarter_end()", () => {
+ const s = makeSeries([
+ new Date("2024-03-31"),
+ new Date("2024-06-30"),
+ new Date("2024-09-30"),
+ new Date("2024-12-31"),
+ new Date("2024-03-30"),
+ ]);
+ expect(s.dt.is_quarter_end().toArray()).toEqual([true, true, true, true, false]);
+ });
+
+ test("is_year_start()", () => {
+ const s = makeSeries([new Date("2024-01-01"), new Date("2024-01-02")]);
+ expect(s.dt.is_year_start().toArray()).toEqual([true, false]);
+ });
+
+ test("is_year_end()", () => {
+ const s = makeSeries([new Date("2024-12-31"), new Date("2024-12-30")]);
+ expect(s.dt.is_year_end().toArray()).toEqual([true, false]);
+ });
+
+ test("is_leap_year()", () => {
+ const s = makeSeries([
+ new Date("2024-06-01"), // 2024 is leap
+ new Date("2023-06-01"), // 2023 is not
+ new Date("2000-06-01"), // 2000 is leap
+ new Date("1900-06-01"), // 1900 is not
+ ]);
+ expect(s.dt.is_leap_year().toArray()).toEqual([true, false, true, false]);
+ });
+});
+
+// ─── null propagation ─────────────────────────────────────────────────────────
+
+describe("DatetimeAccessor null propagation", () => {
+ test("all methods propagate null", () => {
+ const s = makeSeries([null, new Date("2024-01-01"), null]);
+ expect(s.dt.year().toArray()).toEqual([null, 2024, null]);
+ expect(s.dt.month().toArray()).toEqual([null, 1, null]);
+ expect(s.dt.day().toArray()).toEqual([null, 1, null]);
+ expect(s.dt.dayofweek().toArray()).toEqual([null, 0 /* Mon */, null]);
+ expect(s.dt.is_month_start().toArray()).toEqual([null, true, null]);
+ expect(s.dt.is_leap_year().toArray()).toEqual([null, true, null]);
+ expect(s.dt.strftime("%Y").toArray()).toEqual([null, "2024", null]);
+ expect(s.dt.normalize().toArray()).toEqual([null, new Date("2024-01-01"), null]);
+ });
+});
+
+// ─── strftime ─────────────────────────────────────────────────────────────────
+
+describe("DatetimeAccessor strftime", () => {
+ const d = new Date("2024-03-15T09:05:03.007");
+ const s = makeSeries([d]);
+
+ test("%Y year", () => expect(s.dt.strftime("%Y").toArray()).toEqual(["2024"]));
+ test("%y 2-digit year", () => expect(s.dt.strftime("%y").toArray()).toEqual(["24"]));
+ test("%m zero-padded month", () => expect(s.dt.strftime("%m").toArray()).toEqual(["03"]));
+ test("%d zero-padded day", () => expect(s.dt.strftime("%d").toArray()).toEqual(["15"]));
+ test("%H hour", () => expect(s.dt.strftime("%H").toArray()).toEqual(["09"]));
+ test("%M minute", () => expect(s.dt.strftime("%M").toArray()).toEqual(["05"]));
+ test("%S second", () => expect(s.dt.strftime("%S").toArray()).toEqual(["03"]));
+ test("%B full month", () => expect(s.dt.strftime("%B").toArray()).toEqual(["March"]));
+ test("%b abbrev month", () => expect(s.dt.strftime("%b").toArray()).toEqual(["Mar"]));
+ test("%A full weekday", () => expect(s.dt.strftime("%A").toArray()).toEqual(["Friday"]));
+ test("%a abbrev weekday", () => expect(s.dt.strftime("%a").toArray()).toEqual(["Fri"]));
+ test("%% literal percent", () => expect(s.dt.strftime("100%%").toArray()).toEqual(["100%"]));
+ test("compound format", () =>
+ expect(s.dt.strftime("%Y-%m-%d").toArray()).toEqual(["2024-03-15"]));
+ test("datetime format", () =>
+ expect(s.dt.strftime("%Y-%m-%d %H:%M:%S").toArray()).toEqual(["2024-03-15 09:05:03"]));
+});
+
+// ─── normalize ────────────────────────────────────────────────────────────────
+
+describe("DatetimeAccessor normalize", () => {
+ test("strips time component", () => {
+ const s = makeSeries([new Date("2024-07-15T14:23:55")]);
+ const result = s.dt.normalize().toArray()[0] as Date;
+ expect(result.getHours()).toBe(0);
+ expect(result.getMinutes()).toBe(0);
+ expect(result.getSeconds()).toBe(0);
+ expect(result.getDate()).toBe(15);
+ });
+
+ test("midnight stays midnight", () => {
+ const d = new Date("2024-07-15T00:00:00.000");
+ const s = makeSeries([d]);
+ const result = s.dt.normalize().toArray()[0] as Date;
+ expect(result.getTime()).toBe(new Date(2024, 6, 15).getTime());
+ });
+});
+
+// ─── floor/ceil/round ─────────────────────────────────────────────────────────
+
+describe("DatetimeAccessor floor/ceil/round", () => {
+ const d = new Date("2024-03-15T14:37:28.750");
+
+ test("floor(H) truncates to hour", () => {
+ const s = makeSeries([d]);
+ const result = s.dt.floor("H").toArray()[0] as Date;
+ expect(result.getHours()).toBe(14);
+ expect(result.getMinutes()).toBe(0);
+ expect(result.getSeconds()).toBe(0);
+ expect(result.getMilliseconds()).toBe(0);
+ });
+
+ test("floor(T) truncates to minute", () => {
+ const s = makeSeries([d]);
+ const result = s.dt.floor("T").toArray()[0] as Date;
+ expect(result.getMinutes()).toBe(37);
+ expect(result.getSeconds()).toBe(0);
+ });
+
+ test("ceil(H) rounds up to next hour", () => {
+ const s = makeSeries([d]);
+ const result = s.dt.ceil("H").toArray()[0] as Date;
+ expect(result.getHours()).toBe(15);
+ expect(result.getMinutes()).toBe(0);
+ });
+
+ test("ceil on exact boundary stays the same", () => {
+ const exact = new Date("2024-03-15T14:00:00.000");
+ const s = makeSeries([exact]);
+ const result = s.dt.ceil("H").toArray()[0] as Date;
+ expect(result.getTime()).toBe(exact.getTime());
+ });
+
+ test("round(T) rounds to nearest minute", () => {
+ // 37:28 → rounds down to 37:00
+ const s = makeSeries([d]);
+ const result = s.dt.round("T").toArray()[0] as Date;
+ expect(result.getMinutes()).toBe(37);
+ expect(result.getSeconds()).toBe(0);
+ });
+
+ test("round(T) rounds up at 30s", () => {
+ const d2 = new Date("2024-03-15T14:37:30.000");
+ const s = makeSeries([d2]);
+ const result = s.dt.round("T").toArray()[0] as Date;
+ expect(result.getMinutes()).toBe(38);
+ });
+
+ test("floor(D) normalizes to midnight", () => {
+ const s = makeSeries([d]);
+ const result = s.dt.floor("D").toArray()[0] as Date;
+ expect(result.getHours()).toBe(0);
+ expect(result.getDate()).toBe(15);
+ });
+});
+
+// ─── total_seconds ────────────────────────────────────────────────────────────
+
+describe("DatetimeAccessor total_seconds", () => {
+ test("unix epoch → 0", () => {
+ const s = makeSeries([new Date("1970-01-01T00:00:00.000Z")]);
+ expect(s.dt.total_seconds().toArray()).toEqual([0]);
+ });
+
+ test("known timestamp", () => {
+ const d = new Date("2024-01-01T00:00:00.000Z");
+ const s = makeSeries([d]);
+ const expected = Math.floor(d.getTime() / 1000);
+ expect(s.dt.total_seconds().toArray()).toEqual([expected]);
+ });
+});
+
+// ─── date() ──────────────────────────────────────────────────────────────────
+
+describe("DatetimeAccessor date", () => {
+ test("returns Date at midnight", () => {
+ const s = makeSeries([new Date("2024-07-15T18:30:00")]);
+ const result = s.dt.date().toArray()[0] as Date;
+ expect(result.getHours()).toBe(0);
+ expect(result.getDate()).toBe(15);
+ expect(result.getMonth()).toBe(6); // 0-indexed
+ expect(result.getFullYear()).toBe(2024);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("DatetimeAccessor property-based", () => {
+ const anyDate = fc.date({ min: new Date("1970-01-01"), max: new Date("2099-12-31") });
+
+ test("month() is in 1..12", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const m = s.dt.month().toArray()[0] as number;
+ return m >= 1 && m <= 12;
+ }),
+ );
+ });
+
+ test("day() is in 1..31", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const day = s.dt.day().toArray()[0] as number;
+ return day >= 1 && day <= 31;
+ }),
+ );
+ });
+
+ test("dayofweek() is in 0..6", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const dow = s.dt.dayofweek().toArray()[0] as number;
+ return dow >= 0 && dow <= 6;
+ }),
+ );
+ });
+
+ test("quarter() is in 1..4", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const q = s.dt.quarter().toArray()[0] as number;
+ return q >= 1 && q <= 4;
+ }),
+ );
+ });
+
+ test("days_in_month() is in 28..31", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const dim = s.dt.days_in_month().toArray()[0] as number;
+ return dim >= 28 && dim <= 31;
+ }),
+ );
+ });
+
+ test("normalize() preserves year/month/day", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const norm = s.dt.normalize().toArray()[0] as Date;
+ return (
+ norm.getFullYear() === d.getFullYear() &&
+ norm.getMonth() === d.getMonth() &&
+ norm.getDate() === d.getDate() &&
+ norm.getHours() === 0 &&
+ norm.getMinutes() === 0 &&
+ norm.getSeconds() === 0 &&
+ norm.getMilliseconds() === 0
+ );
+ }),
+ );
+ });
+
+ test("strftime(%Y) returns 4-digit year string", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const y = s.dt.strftime("%Y").toArray()[0] as string;
+ return RE_FOUR_DIGIT_YEAR.test(y);
+ }),
+ );
+ });
+
+ test("floor(H) is idempotent", () => {
+ fc.assert(
+ fc.property(anyDate, (d) => {
+ const s = makeSeries([d]);
+ const once = s.dt.floor("H").toArray()[0] as Date;
+ const twice = makeSeries([once]).dt.floor("H").toArray()[0] as Date;
+ return once.getTime() === twice.getTime();
+ }),
+ );
+ });
+});
From 0a8d69e8a5c52dee0d41fa37900fc576d3fab2bf Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:10:37 +0000
Subject: [PATCH 006/104] Iteration 57: Add describe() and quantile() summary
statistics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Implements src/stats/describe.ts — the pandas-style describe() function:
- quantile(sorted, q): linear interpolation (pandas method='linear')
- describe(series): count/mean/std/min/percentiles/max for numeric,
count/unique/top/freq for categorical
- describe(dataFrame): per-column stats, include='number'|'object'|'all'
- DescribeOptions: custom percentiles array
- Series.quantile(q) method added to Series
New files: src/stats/describe.ts, src/stats/index.ts
Tests: tests/stats/describe.test.ts (32 tests, unit + property-based)
Playground: playground/describe.html with 7 interactive tutorials
Metric: pandas_features_ported 11 → 12
Run: https://github.com/githubnext/tsessebe/actions/runs/24002845454
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/describe.html | 230 +++++++++++++++++++++++++++++
src/stats/describe.ts | 272 +++++++++++++++++++++++++++++++++++
src/stats/index.ts | 10 ++
tests/stats/describe.test.ts | 271 ++++++++++++++++++++++++++++++++++
4 files changed, 783 insertions(+)
create mode 100644 playground/describe.html
create mode 100644 src/stats/describe.ts
create mode 100644 src/stats/index.ts
create mode 100644 tests/stats/describe.test.ts
diff --git a/playground/describe.html b/playground/describe.html
new file mode 100644
index 00000000..a15142e5
--- /dev/null
+++ b/playground/describe.html
@@ -0,0 +1,230 @@
+
+
+
+
+
+ tsb — describe & quantile
+
+
+
+
+ tsb playground › describe & quantile
+
+
+ describe & quantile — Summary Statistics
+
+ describe() and Series.quantile() give you the
+ same concise statistical summary that
+ pandas DataFrame.describe()
+ produces. For numeric data you get count , mean ,
+ std , min , user-defined percentiles, and max .
+ For categorical / string data you get count , unique ,
+ top , and freq .
+
+
+
+ 1 — Describe a numeric Series
+
+ Pass any Series with numeric data and get back a labeled
+ Series of statistics. Percentiles default to 25 %, 50 %,
+ and 75 % — just like pandas.
+
+
+
const s = new tsb.Series({ data: [4, 7, 13, 16, 21, 3, 9, 12], name: "scores" });
+
+const stats = tsb.describe(s);
+console.log("count :", stats.at("count"));
+console.log("mean :", stats.at("mean").toFixed(2));
+console.log("std :", stats.at("std").toFixed(2));
+console.log("min :", stats.at("min"));
+console.log("25% :", stats.at("25%"));
+console.log("50% :", stats.at("50%"));
+console.log("75% :", stats.at("75%"));
+console.log("max :", stats.at("max"));
+
+
+ ▶ Run
+
+
+
+
+ 2 — Custom percentiles
+
+ Override the default percentile set with the percentiles
+ option. Pass any array of values in [0, 1].
+
+
+
const prices = new tsb.Series({
+ data: [100, 200, 150, 300, 250, 180, 90, 400, 210, 175],
+ name: "price",
+});
+
+// Get deciles: 10th through 90th percentile
+const stats = tsb.describe(prices, {
+ percentiles: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+});
+console.log(stats.index.values);
+for (const label of stats.index.values) {
+ console.log(`${label.toString().padEnd(5)}: ${Number(stats.at(label)).toFixed(2)}`);
+}
+
+
+ ▶ Run
+
+
+
+
+ 3 — Describe a categorical Series
+
+ For non-numeric Series, describe() switches to categorical
+ mode: count , unique , top (most frequent
+ value), and freq (its count). Nulls are silently excluded.
+
+
+
const dept = new tsb.Series({
+ data: ["Engineering", "Marketing", "Engineering", "HR", "Engineering", null, "Marketing"],
+ name: "department",
+});
+
+const stats = tsb.describe(dept);
+console.log("count :", stats.at("count"));
+console.log("unique:", stats.at("unique"));
+console.log("top :", stats.at("top"));
+console.log("freq :", stats.at("freq"));
+
+
+ ▶ Run
+
+
+
+
+ 4 — Describe a DataFrame
+
+ When passed a DataFrame, describe() returns a
+ new DataFrame where each column is a stat Series. By
+ default only numeric columns are included (include: "number").
+
+
+
const df = tsb.DataFrame.fromColumns({
+ age: [25, 30, 35, 28, 45, 22, 31, 40],
+ salary: [50000, 70000, 90000, 62000, 110000, 45000, 75000, 95000],
+ score: [82, 90, 88, 76, 95, 70, 85, 92],
+});
+
+const summary = tsb.describe(df);
+
+// Print row by row
+for (const label of summary.index.values) {
+ const row = summary.columns.values
+ .map(col => `${col}: ${Number(summary.col(col).at(label)).toFixed(1)}`)
+ .join(" ");
+ console.log(`${String(label).padEnd(8)} ${row}`);
+}
+
+
+ ▶ Run
+
+
+
+
+ 5 — include="all" for mixed DataFrames
+
+ Set include: "all" to describe both numeric and
+ categorical columns in a single call. Numeric stats get
+ null for categorical-only rows and vice-versa.
+
+
+
const df = tsb.DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Alice", "Carol", "Bob"],
+ score: [88, 92, 75, 95, 83],
+});
+
+const summary = tsb.describe(df, { include: "all" });
+console.log("rows :", summary.index.values);
+console.log("cols :", summary.columns.values);
+
+// Show the name (categorical) stats
+console.log("\n--- name column ---");
+for (const lbl of summary.index.values) {
+ const v = summary.col("name").at(lbl);
+ if (v !== null) console.log(`${String(lbl).padEnd(8)}: ${v}`);
+}
+
+// Show the score (numeric) stats
+console.log("\n--- score column ---");
+for (const lbl of summary.index.values) {
+ const v = summary.col("score").at(lbl);
+ if (v !== null) console.log(`${String(lbl).padEnd(8)}: ${Number(v).toFixed(2)}`);
+}
+
+
+ ▶ Run
+
+
+
+
+ 6 — Series.quantile()
+
+ Series.quantile(q) computes a single quantile via linear
+ interpolation — the same algorithm pandas uses as its default
+ (method="linear"). q=0.5 is the median.
+
+
+
const s = new tsb.Series({ data: [3, 7, 1, 9, 5, 11, 2, 8, 4, 6] });
+
+console.log("Q0 (min) :", s.quantile(0));
+console.log("Q0.25 (Q1) :", s.quantile(0.25));
+console.log("Q0.5 (med) :", s.quantile(0.5));
+console.log("Q0.75 (Q3) :", s.quantile(0.75));
+console.log("Q1 (max) :", s.quantile(1));
+
+// Compare with median()
+console.log("\nmedian() :", s.median());
+console.log("quantile(0.5):", s.quantile(0.5));
+console.log("Equal? :", s.quantile(0.5) === s.median());
+
+
+ ▶ Run
+
+
+
+
+ 7 — Standalone quantile() utility
+
+ The low-level quantile(sorted, q) function works on any
+ sorted plain array and is useful when you have pre-filtered data.
+
+
+
// Pre-sorted exam scores
+const sorted = [42, 55, 61, 67, 72, 78, 81, 85, 90, 95];
+
+// IQR = Q3 - Q1
+const q1 = tsb.quantile(sorted, 0.25);
+const q3 = tsb.quantile(sorted, 0.75);
+const iqr = q3 - q1;
+
+console.log("Q1 :", q1);
+console.log("Q3 :", q3);
+console.log("IQR :", iqr);
+
+// Detect outliers: values outside [Q1 - 1.5·IQR, Q3 + 1.5·IQR]
+const lower = q1 - 1.5 * iqr;
+const upper = q3 + 1.5 * iqr;
+console.log(`Fence: [${lower.toFixed(1)}, ${upper.toFixed(1)}]`);
+
+// Check each value
+for (const v of sorted) {
+ if (v < lower || v > upper) console.log(` OUTLIER: ${v}`);
+}
+console.log("No outliers found in this dataset.");
+
+
+ ▶ Run
+
+
+
+
+
+
diff --git a/src/stats/describe.ts b/src/stats/describe.ts
new file mode 100644
index 00000000..28f034b4
--- /dev/null
+++ b/src/stats/describe.ts
@@ -0,0 +1,272 @@
+/**
+ * describe — summary statistics for Series and DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.describe()` / `pandas.Series.describe()`:
+ * - Numeric series: count, mean, std, min, percentiles…, max
+ * - Non-numeric series: count, unique, top, freq
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public API types ─────────────────────────────────────────────────────────
+
+/** Options for {@link describe}. */
+export interface DescribeOptions {
+ /**
+ * Percentile levels to include (values between 0 and 1).
+ * Defaults to `[0.25, 0.5, 0.75]` — same as pandas.
+ */
+ readonly percentiles?: readonly number[];
+ /**
+ * Which columns to include when describing a DataFrame.
+ * - `"number"` (default): only numeric columns
+ * - `"object"`: only non-numeric / categorical columns
+ * - `"all"`: every column regardless of dtype
+ */
+ readonly include?: "number" | "object" | "all";
+}
+
+// ─── constants ────────────────────────────────────────────────────────────────
+
+const DEFAULT_PERCENTILES: readonly number[] = [0.25, 0.5, 0.75];
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a dtype kind is numeric (int, uint, or float). */
+function isNumericKind(kind: DtypeKind): boolean {
+ return kind === "int" || kind === "uint" || kind === "float";
+}
+
+/** True when a Series holds numeric data. */
+function isNumericSeries(s: Series): boolean {
+ return isNumericKind(s.dtype.kind);
+}
+
+/** Extract finite, non-missing numbers from a scalar array. */
+function numericValues(vals: readonly Scalar[]): number[] {
+ return vals.filter((v): v is number => typeof v === "number" && !Number.isNaN(v));
+}
+
+/**
+ * Compute a single quantile via linear interpolation (pandas `method='linear'`).
+ *
+ * @param sorted - ascending-sorted numeric array (no NaN/null)
+ * @param q - quantile level in [0, 1]
+ * @returns interpolated value, or `NaN` when the array is empty
+ *
+ * @example
+ * ```ts
+ * quantile([1, 2, 3, 4], 0.5); // 2.5
+ * ```
+ */
+export function quantile(sorted: readonly number[], q: number): number {
+ const n = sorted.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ const pos = q * (n - 1);
+ const lo = Math.floor(pos);
+ const hi = Math.ceil(pos);
+ if (lo === hi) {
+ return sorted[lo] as number;
+ }
+ const frac = pos - lo;
+ return (sorted[lo] as number) * (1 - frac) + (sorted[hi] as number) * frac;
+}
+
+/** Format a quantile level as a percentage label (0.25 → `"25%"`). */
+function pctLabel(q: number): string {
+ const rounded = Math.round(q * 100 * 1e6) / 1e6;
+ return `${rounded}%`;
+}
+
+/** Build the ordered row-label list for a numeric describe. */
+function numericRowLabels(percentiles: readonly number[]): Label[] {
+ return ["count", "mean", "std", "min", ...percentiles.map(pctLabel), "max"];
+}
+
+// ─── single-column statistics ─────────────────────────────────────────────────
+
+/** Compute numeric stats: [count, mean, std, min, ...pct, max]. */
+function numericStats(vals: readonly Scalar[], percentiles: readonly number[]): Scalar[] {
+ const nums = numericValues(vals);
+ const n = nums.length;
+ if (n === 0) {
+ const nanRow = percentiles.map(() => Number.NaN as Scalar);
+ return [0, Number.NaN, Number.NaN, Number.NaN, ...nanRow, Number.NaN];
+ }
+ const sorted = [...nums].sort((a, b) => a - b);
+ const mean = nums.reduce((acc, v) => acc + v, 0) / n;
+ const variance =
+ n >= 2 ? nums.reduce((acc, v) => acc + (v - mean) ** 2, 0) / (n - 1) : Number.NaN;
+ const std = Math.sqrt(variance);
+ const pctVals = percentiles.map((q) => quantile(sorted, q));
+ return [n, mean, std, sorted[0] as number, ...pctVals, sorted[n - 1] as number];
+}
+
+/** Compute categorical stats: [count, unique, top, freq]. */
+function categoricalStats(vals: readonly Scalar[]): Scalar[] {
+ const nonNull = vals.filter(
+ (v) => v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v)),
+ );
+ const n = nonNull.length;
+ if (n === 0) {
+ return [0, 0, null, null];
+ }
+ const freq = new Map();
+ for (const v of nonNull) {
+ freq.set(v, (freq.get(v) ?? 0) + 1);
+ }
+ let topVal: Scalar = null;
+ let topFreq = 0;
+ for (const [v, f] of freq) {
+ if (f > topFreq) {
+ topFreq = f;
+ topVal = v;
+ }
+ }
+ return [n, freq.size, topVal, topFreq];
+}
+
+// ─── Series describe ──────────────────────────────────────────────────────────
+
+/** Describe a numeric Series (count / mean / std / min / percentiles / max). */
+function describeNumericSeries(s: Series, percentiles: readonly number[]): Series {
+ const labels = numericRowLabels(percentiles);
+ const stats = numericStats(s.values as readonly Scalar[], percentiles);
+ return new Series({ data: stats, index: new Index(labels), name: s.name });
+}
+
+/** Describe a categorical Series (count / unique / top / freq). */
+function describeCategoricalSeries(s: Series): Series {
+ const labels: Label[] = ["count", "unique", "top", "freq"];
+ const stats = categoricalStats(s.values as readonly Scalar[]);
+ return new Series({ data: stats, index: new Index(labels), name: s.name });
+}
+
+// ─── DataFrame describe ───────────────────────────────────────────────────────
+
+/** Select columns to describe based on the `include` option. */
+function selectColumns(df: DataFrame, include: DescribeOptions["include"]): string[] {
+ return df.columns.values.filter((name) => {
+ const numeric = isNumericSeries(df.col(name));
+ if (include === "number") {
+ return numeric;
+ }
+ if (include === "object") {
+ return !numeric;
+ }
+ return true; // "all"
+ });
+}
+
+/** Determine unified row labels when include === "all" and types are mixed. */
+function rowLabelsForAll(cols: readonly string[], df: DataFrame, pcts: readonly number[]): Label[] {
+ const hasNum = cols.some((c) => isNumericSeries(df.col(c)));
+ const hasCat = cols.some((c) => !isNumericSeries(df.col(c)));
+ if (hasNum && hasCat) {
+ return [...numericRowLabels(pcts), "unique", "top", "freq"];
+ }
+ if (hasNum) {
+ return numericRowLabels(pcts);
+ }
+ return ["count", "unique", "top", "freq"];
+}
+
+/** Choose the row labels for a describe DataFrame. */
+function chooseRowLabels(
+ include: Required["include"],
+ cols: readonly string[],
+ df: DataFrame,
+ pcts: readonly number[],
+): Label[] {
+ if (include === "number") {
+ return numericRowLabels(pcts);
+ }
+ if (include === "object") {
+ return ["count", "unique", "top", "freq"];
+ }
+ return rowLabelsForAll(cols, df, pcts);
+}
+
+/** Build a stat-Series for one column, filling nulls for absent rows. */
+function buildColStat(
+ s: Series,
+ opts: Required,
+ rowLabels: readonly Label[],
+): Series {
+ const isNum = isNumericSeries(s);
+ const rawLabels: Label[] = isNum
+ ? numericRowLabels(opts.percentiles)
+ : ["count", "unique", "top", "freq"];
+ const rawStats: Scalar[] = isNum
+ ? numericStats(s.values as readonly Scalar[], opts.percentiles)
+ : categoricalStats(s.values as readonly Scalar[]);
+ const statMap = new Map(rawLabels.map((lbl, i) => [lbl, rawStats[i] ?? null]));
+ const data = rowLabels.map((lbl) => statMap.get(lbl) ?? null);
+ return new Series({ data, index: new Index([...rowLabels]), name: s.name });
+}
+
+/** Describe a DataFrame: stat-Series per selected column assembled into a result DataFrame. */
+function describeDataFrame(df: DataFrame, opts: Required): DataFrame {
+ const cols = selectColumns(df, opts.include);
+ if (cols.length === 0) {
+ return new DataFrame(new Map(), new Index([]));
+ }
+ const rowLabels = chooseRowLabels(opts.include, cols, df, opts.percentiles);
+ const colMap = new Map>();
+ for (const name of cols) {
+ colMap.set(name, buildColStat(df.col(name), opts, rowLabels));
+ }
+ return new DataFrame(colMap, new Index(rowLabels));
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Compute summary statistics for a `Series` or `DataFrame`.
+ *
+ * Mirrors `pandas.DataFrame.describe()` / `pandas.Series.describe()`.
+ *
+ * - **Numeric** series/columns: `count`, `mean`, `std`, `min`,
+ * percentile rows (default 25 %, 50 %, 75 %), `max`.
+ * - **Non-numeric** series/columns: `count`, `unique`, `top`, `freq`.
+ *
+ * @param obj - A `Series` or `DataFrame` to summarize.
+ * @param options - Optional configuration.
+ * @returns A `Series` (from a Series input) or a `DataFrame`.
+ *
+ * @example
+ * ```ts
+ * import { Series, describe } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * describe(s);
+ * // Series { count: 5, mean: 3, std: 1.58…, min: 1, "25%": 2, "50%": 3, "75%": 4, max: 5 }
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * describe(df);
+ * // DataFrame rows: count / mean / std / min / 25% / 50% / 75% / max
+ * ```
+ */
+export function describe(
+ obj: Series | DataFrame,
+ options?: DescribeOptions,
+): Series | DataFrame {
+ const percentiles = options?.percentiles ?? DEFAULT_PERCENTILES;
+ const include = options?.include ?? "number";
+ const opts: Required = { percentiles, include };
+ if (obj instanceof Series) {
+ if (isNumericSeries(obj)) {
+ return describeNumericSeries(obj, opts.percentiles);
+ }
+ return describeCategoricalSeries(obj);
+ }
+ return describeDataFrame(obj, opts);
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
new file mode 100644
index 00000000..160de5b7
--- /dev/null
+++ b/src/stats/index.ts
@@ -0,0 +1,10 @@
+/**
+ * tsb/stats — statistical functions.
+ *
+ * @module
+ */
+
+export { describe, quantile } from "./describe.ts";
+export type { DescribeOptions } from "./describe.ts";
+export { pearsonCorr, dataFrameCorr, dataFrameCov } from "./corr.ts";
+export type { CorrMethod, CorrOptions, CovOptions } from "./corr.ts";
diff --git a/tests/stats/describe.test.ts b/tests/stats/describe.test.ts
new file mode 100644
index 00000000..f82537bb
--- /dev/null
+++ b/tests/stats/describe.test.ts
@@ -0,0 +1,271 @@
+/**
+ * Tests for src/stats/describe.ts — describe() and quantile().
+ */
+import { describe as bDescribe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series, describe, quantile } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── quantile ─────────────────────────────────────────────────────────────────
+
+bDescribe("quantile", () => {
+ it("returns NaN for empty array", () => {
+ expect(Number.isNaN(quantile([], 0.5))).toBe(true);
+ });
+
+ it("returns the single element at any q for a one-element array", () => {
+ expect(quantile([42], 0)).toBe(42);
+ expect(quantile([42], 0.5)).toBe(42);
+ expect(quantile([42], 1)).toBe(42);
+ });
+
+ it("min at q=0, max at q=1", () => {
+ const sorted = [1, 2, 3, 4, 5];
+ expect(quantile(sorted, 0)).toBe(1);
+ expect(quantile(sorted, 1)).toBe(5);
+ });
+
+ it("median at q=0.5 for even-length array", () => {
+ expect(quantile([1, 2, 3, 4], 0.5)).toBeCloseTo(2.5);
+ });
+
+ it("median at q=0.5 for odd-length array", () => {
+ expect(quantile([1, 2, 3], 0.5)).toBe(2);
+ });
+
+ it("Q1 and Q3 for [1,2,3,4,5]", () => {
+ const sorted = [1, 2, 3, 4, 5];
+ expect(quantile(sorted, 0.25)).toBe(2);
+ expect(quantile(sorted, 0.75)).toBe(4);
+ });
+
+ it("linear interpolation between neighbours", () => {
+ expect(quantile([0, 10], 0.3)).toBeCloseTo(3);
+ });
+
+ it("property: result is within [min, max]", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 1,
+ maxLength: 50,
+ }),
+ fc.float({ min: 0, max: 1, noNaN: true }),
+ (arr, q) => {
+ const sorted = [...arr].sort((a, b) => a - b);
+ const result = quantile(sorted, q);
+ return result >= (sorted[0] as number) && result <= (sorted.at(-1) as number);
+ },
+ ),
+ );
+ });
+});
+
+// ─── Series.quantile ─────────────────────────────────────────────────────────
+
+bDescribe("Series.quantile", () => {
+ it("matches standalone quantile function", () => {
+ const s = new Series({ data: [3, 1, 4, 1, 5, 9, 2, 6] });
+ const sorted = [1, 1, 2, 3, 4, 5, 6, 9];
+ expect(s.quantile(0.5)).toBeCloseTo(quantile(sorted, 0.5));
+ expect(s.quantile(0.25)).toBeCloseTo(quantile(sorted, 0.25));
+ expect(s.quantile(0.75)).toBeCloseTo(quantile(sorted, 0.75));
+ });
+
+ it("returns NaN for empty Series", () => {
+ const s = new Series({ data: [] });
+ expect(Number.isNaN(s.quantile(0.5))).toBe(true);
+ });
+
+ it("ignores null values", () => {
+ const s = new Series({ data: [1, null, 3, null, 5] });
+ const sorted = [1, 3, 5];
+ expect(s.quantile(0.5)).toBeCloseTo(quantile(sorted, 0.5));
+ });
+});
+
+// ─── describe(Series) ─────────────────────────────────────────────────────────
+
+bDescribe("describe(Series)", () => {
+ it("numeric Series has correct stat labels", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = describe(s) as Series;
+ expect(result.index.values).toEqual([
+ "count",
+ "mean",
+ "std",
+ "min",
+ "25%",
+ "50%",
+ "75%",
+ "max",
+ ]);
+ });
+
+ it("numeric Series counts correctly", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = describe(s) as Series;
+ expect(result.at("count")).toBe(5);
+ });
+
+ it("numeric Series mean is correct", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = describe(s) as Series;
+ expect(result.at("mean")).toBeCloseTo(3);
+ });
+
+ it("numeric Series std is correct (ddof=1)", () => {
+ // [1,2,3,4,5]: variance = 2.5, std = sqrt(2.5) ≈ 1.5811
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = describe(s) as Series;
+ expect(result.at("std")).toBeCloseTo(Math.sqrt(2.5), 5);
+ });
+
+ it("numeric Series min and max", () => {
+ const s = new Series({ data: [3, 1, 4, 1, 5, 9] });
+ const result = describe(s) as Series;
+ expect(result.at("min")).toBe(1);
+ expect(result.at("max")).toBe(9);
+ });
+
+ it("numeric Series 50% matches median", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = describe(s) as Series;
+ expect(result.at("50%")).toBeCloseTo(3);
+ });
+
+ it("custom percentiles appear correctly", () => {
+ const s = new Series({ data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+ const result = describe(s, { percentiles: [0.1, 0.9] }) as Series;
+ expect(result.index.values).toContain("10%");
+ expect(result.index.values).toContain("90%");
+ expect(result.index.values).not.toContain("25%");
+ });
+
+ it("handles single-element numeric Series", () => {
+ const s = new Series({ data: [42] });
+ const result = describe(s) as Series;
+ expect(result.at("count")).toBe(1);
+ expect(result.at("mean")).toBeCloseTo(42);
+ expect(result.at("min")).toBe(42);
+ expect(result.at("max")).toBe(42);
+ expect(Number.isNaN(result.at("std") as number)).toBe(true);
+ });
+
+ it("handles all-null numeric Series (empty numerics)", async () => {
+ const { Dtype } = await import("../../src/index.ts");
+ const s = new Series({ data: [null, null], dtype: Dtype.float64 });
+ const result = describe(s) as Series;
+ expect(result.at("count")).toBe(0);
+ expect(Number.isNaN(result.at("mean") as number)).toBe(true);
+ });
+
+ it("categorical Series has correct stat labels", () => {
+ const s = new Series({ data: ["a", "b", "a", "c"] });
+ const result = describe(s) as Series;
+ expect(result.index.values).toEqual(["count", "unique", "top", "freq"]);
+ });
+
+ it("categorical Series count", () => {
+ const s = new Series({ data: ["a", "b", "a", "c"] });
+ const result = describe(s) as Series;
+ expect(result.at("count")).toBe(4);
+ });
+
+ it("categorical Series unique", () => {
+ const s = new Series({ data: ["a", "b", "a", "c"] });
+ const result = describe(s) as Series;
+ expect(result.at("unique")).toBe(3);
+ });
+
+ it("categorical Series top and freq", () => {
+ const s = new Series({ data: ["a", "b", "a", "c"] });
+ const result = describe(s) as Series;
+ expect(result.at("top")).toBe("a");
+ expect(result.at("freq")).toBe(2);
+ });
+
+ it("series name is preserved in result", () => {
+ const s = new Series({ data: [1, 2, 3], name: "score" });
+ const result = describe(s) as Series;
+ expect(result.name).toBe("score");
+ });
+});
+
+// ─── describe(DataFrame) ─────────────────────────────────────────────────────
+
+bDescribe("describe(DataFrame)", () => {
+ it("numeric-only DataFrame has expected columns and rows", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ const result = describe(df) as DataFrame;
+ expect(result.columns.values).toEqual(["a", "b"]);
+ expect(result.index.values).toEqual([
+ "count",
+ "mean",
+ "std",
+ "min",
+ "25%",
+ "50%",
+ "75%",
+ "max",
+ ]);
+ });
+
+ it("count row equals number of non-null values", () => {
+ const df = DataFrame.fromColumns({ x: [1, 2, null, 4] });
+ const result = describe(df) as DataFrame;
+ expect(result.col("x").at("count")).toBe(3);
+ });
+
+ it("mean is correct per column", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const result = describe(df) as DataFrame;
+ expect(result.col("a").at("mean")).toBeCloseTo(2);
+ });
+
+ it("include='object' shows only string columns", () => {
+ const df = DataFrame.fromColumns({ name: ["Alice", "Bob", "Alice"], age: [25, 30, 25] });
+ const result = describe(df, { include: "object" }) as DataFrame;
+ expect(result.columns.values).toEqual(["name"]);
+ expect(result.index.values).toEqual(["count", "unique", "top", "freq"]);
+ });
+
+ it("include='all' shows both numeric and categorical columns", () => {
+ const df = DataFrame.fromColumns({ name: ["A", "B", "A"], score: [10, 20, 30] });
+ const result = describe(df, { include: "all" }) as DataFrame;
+ expect(result.columns.values).toContain("name");
+ expect(result.columns.values).toContain("score");
+ // numeric rows present
+ expect(result.index.values).toContain("mean");
+ // categorical rows present
+ expect(result.index.values).toContain("unique");
+ });
+
+ it("returns empty DataFrame when no columns match include filter", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const result = describe(df, { include: "object" }) as DataFrame;
+ expect(result.columns.values).toEqual([]);
+ });
+
+ it("property: count row never exceeds column length for numeric data", () => {
+ fc.assert(
+ fc.property(
+ fc.array(
+ fc.oneof(fc.float({ noNaN: true, noDefaultInfinity: true }), fc.constant(null)),
+ { minLength: 1, maxLength: 20 },
+ ),
+ (arr) => {
+ // ensure at least one non-null number so column is treated as numeric
+ const data = [1, ...arr];
+ const df = DataFrame.fromColumns({ x: data });
+ const result = describe(df) as DataFrame;
+ if (!result.columns.values.includes("x")) {
+ return true; // filtered out
+ }
+ const cnt = result.col("x").at("count") as number;
+ return cnt >= 0 && cnt <= data.length;
+ },
+ ),
+ );
+ });
+});
From 106c75cca1a498ef81c7fe6e1a46b2c05f706b43 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:10:38 +0000
Subject: [PATCH 007/104] Iteration 58: Implement readCsv / toCsv CSV I/O
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add src/io/csv.ts — pandas-style CSV read/write:
- readCsv(text, options?): parse CSV string → DataFrame with automatic
dtype inference (bool/int64/float64/string/object), NA handling,
quoted fields (RFC 4180), custom separators, indexCol, skipRows, nRows
- toCsv(df, options?): serialize DataFrame → CSV string with header,
index, custom sep, lineterminator, naRep
- Comprehensive tests: 35+ unit + property-based (fast-check) tests
- Interactive playground: playground/csv.html with 7 tutorial sections
Metric: 12 → 13 (+1)
Run: https://github.com/githubnext/tsessebe/actions/runs/24003267099
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/csv.html | 206 ++++++++++++++++++++
src/io/csv.ts | 453 +++++++++++++++++++++++++++++++++++++++++++
src/io/index.ts | 10 +
tests/io/csv.test.ts | 342 ++++++++++++++++++++++++++++++++
4 files changed, 1011 insertions(+)
create mode 100644 playground/csv.html
create mode 100644 src/io/csv.ts
create mode 100644 src/io/index.ts
create mode 100644 tests/io/csv.test.ts
diff --git a/playground/csv.html b/playground/csv.html
new file mode 100644
index 00000000..2d4bbc14
--- /dev/null
+++ b/playground/csv.html
@@ -0,0 +1,206 @@
+
+
+
+
+
+ tsb — readCsv & toCsv
+
+
+
+
+ tsb playground › readCsv & toCsv
+
+
+ readCsv & toCsv — CSV I/O
+
+ readCsv() and toCsv() mirror
+ pandas read_csv()
+ and
+ pandas DataFrame.to_csv() .
+ Parse CSV text into a DataFrame with automatic dtype inference,
+ and serialize any DataFrame back to CSV with full formatting control.
+
+
+
+ 1 — Parse a CSV string
+
+ The simplest call is readCsv(text). The first row is the header,
+ subsequent rows are data. Column dtypes are inferred automatically.
+
+
+
const csv = `name,age,score
+alice,30,88.5
+bob,25,92.0
+carol,35,75.3`;
+
+const df = tsb.readCsv(csv);
+console.log("shape:", df.shape);
+console.log("columns:", [...df.columns.values]);
+console.log("name dtype :", df.col("name").dtype.name);
+console.log("age dtype :", df.col("age").dtype.name);
+console.log("score dtype:", df.col("score").dtype.name);
+console.log("names:", [...df.col("name").values]);
+console.log("ages :", [...df.col("age").values]);
+
+
+ ▶ Run
+
+
+
+
+ 2 — Missing values (NA)
+
+ Empty fields, NA, NaN, null, None,
+ and several other sentinel strings are automatically converted to null.
+ Pass extra strings via naValues.
+
+
+
const csv = `x,y
+1,
+2,NA
+3,?
+4,5`;
+
+// "?" added as an extra NA value
+const df = tsb.readCsv(csv, { naValues: ["?"] });
+console.log("x:", [...df.col("x").values]);
+console.log("y:", [...df.col("y").values]);
+
+
+ ▶ Run
+
+
+
+
+ 3 — Quoted fields & custom separator
+
+ Fields containing the separator, quotes, or newlines can be wrapped in double-quotes.
+ Use sep to change the delimiter (tab, semicolon, pipe, etc.).
+
+
+
const csv = `name;note
+"Smith, Jr.";Senior developer
+"O'Brien";"Joined ""the team"" in 2020"
+Jones;Junior`;
+
+const df = tsb.readCsv(csv, { sep: ";" });
+console.log("name[0]:", df.col("name").values[0]);
+console.log("note[1]:", df.col("note").values[1]);
+console.log("name[2]:", df.col("name").values[2]);
+
+
+ ▶ Run
+
+
+
+
+ 4 — Index column
+
+ Set indexCol to a column name or position to use that column
+ as the row index instead of the default RangeIndex.
+
+
+
const csv = `id,city,pop
+A,London,9000000
+B,Paris,2200000
+C,Berlin,3700000`;
+
+const df = tsb.readCsv(csv, { indexCol: "id" });
+console.log("index:", [...df.index.values]);
+console.log("columns:", [...df.columns.values]);
+console.log("city at A:", df.col("city").at("A"));
+
+
+ ▶ Run
+
+
+
+
+ 5 — Limiting rows
+
+ Use nRows to read only the first N data rows, and
+ skipRows to skip rows at the start.
+
+
+
const csv = `val
+10
+20
+30
+40
+50`;
+
+const first3 = tsb.readCsv(csv, { nRows: 3 });
+console.log("nRows=3:", [...first3.col("val").values]);
+
+const skip2 = tsb.readCsv(csv, { skipRows: 2 });
+console.log("skipRows=2:", [...skip2.col("val").values]);
+
+
+ ▶ Run
+
+
+
+
+ 6 — Serialize with toCsv
+
+ toCsv(df) converts a DataFrame back to a CSV string.
+ Control index inclusion, header, separator, and NA representation.
+
+
+
const df = tsb.DataFrame.fromColumns({
+ name: ["alice", "bob", "carol"],
+ score: [88, 92, 75],
+});
+
+// Default: includes index and header
+console.log("--- with index ---");
+console.log(tsb.toCsv(df));
+
+// Without index
+console.log("--- no index ---");
+console.log(tsb.toCsv(df, { index: false }));
+
+// Semicolon separator, custom NA rep
+const df2 = tsb.DataFrame.fromColumns({ x: [1, null, 3] });
+console.log("--- semicolons + NA rep ---");
+console.log(tsb.toCsv(df2, { sep: ";", index: false, naRep: "MISSING" }));
+
+
+ ▶ Run
+
+
+
+
+ 7 — Round-trip
+
+ A DataFrame serialized with toCsv can be
+ reconstructed with readCsv without data loss.
+
+
+
const original = tsb.DataFrame.fromColumns({
+ x: [1, 2, 3, 4, 5],
+ y: [10.1, 20.2, 30.3, 40.4, 50.5],
+ label: ["a", "b", "c", "d", "e"],
+});
+
+const csv = tsb.toCsv(original, { index: false });
+console.log("CSV:\n" + csv);
+
+const restored = tsb.readCsv(csv);
+console.log("restored x:", [...restored.col("x").values]);
+console.log("restored y:", [...restored.col("y").values]);
+console.log("restored label:", [...restored.col("label").values]);
+console.log("shapes match:", JSON.stringify(original.shape) === JSON.stringify(restored.shape));
+
+
+ ▶ Run
+
+
+
+
+
+
diff --git a/src/io/csv.ts b/src/io/csv.ts
new file mode 100644
index 00000000..673a68fe
--- /dev/null
+++ b/src/io/csv.ts
@@ -0,0 +1,453 @@
+/**
+ * readCsv / toCsv — CSV I/O for DataFrame.
+ *
+ * Mirrors `pandas.read_csv()` and `pandas.DataFrame.to_csv()`:
+ * - `readCsv(text, options?)` — parse a CSV string into a DataFrame
+ * - `toCsv(df, options?)` — serialize a DataFrame to a CSV string
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import { Dtype } from "../core/index.ts";
+import type { DtypeName, Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link readCsv}. */
+export interface ReadCsvOptions {
+ /** Column separator. Default: `","`. */
+ readonly sep?: string;
+ /**
+ * Row index of the header row, or `null` for no header.
+ * Default: `0` (first row is the header).
+ */
+ readonly header?: number | null;
+ /**
+ * Column name or index of the column to use as the row index.
+ * Default: `null` (use a default `RangeIndex`).
+ */
+ readonly indexCol?: string | number | null;
+ /**
+ * Map of column name → dtype name to force a specific type for that column.
+ */
+ readonly dtype?: Readonly>;
+ /**
+ * Additional strings to treat as missing/NA (in addition to built-in defaults).
+ */
+ readonly naValues?: readonly string[];
+ /**
+ * Number of data rows to skip at the beginning (after the header).
+ * Default: `0`.
+ */
+ readonly skipRows?: number;
+ /**
+ * Maximum number of data rows to read.
+ * Default: unlimited.
+ */
+ readonly nRows?: number;
+}
+
+/** Options for {@link toCsv}. */
+export interface ToCsvOptions {
+ /** Column separator. Default: `","`. */
+ readonly sep?: string;
+ /** Whether to include the header row. Default: `true`. */
+ readonly header?: boolean;
+ /** Whether to include the index column. Default: `true`. */
+ readonly index?: boolean;
+ /** Line terminator. Default: `"\n"`. */
+ readonly lineterminator?: string;
+ /** String representation for missing/NaN values. Default: `""`. */
+ readonly naRep?: string;
+}
+
+// ─── constants ────────────────────────────────────────────────────────────────
+
+const DEFAULT_NA_STRINGS: ReadonlySet = new Set([
+ "",
+ "null",
+ "NULL",
+ "NaN",
+ "NA",
+ "N/A",
+ "n/a",
+ "#N/A",
+ "none",
+ "None",
+ "#NA",
+]);
+
+// Top-level regex literals (Biome `useTopLevelRegex` rule).
+const RE_LINE_SPLIT = /\r\n|\n|\r/;
+const RE_INT = /^-?\d+$/;
+const RE_FLOAT = /^-?(\d+\.?\d*|\.\d+)([eE][+-]?\d+)?$/;
+const RE_BOOL_TRUE = /^(true|True|TRUE|1)$/;
+const RE_BOOL_FALSE = /^(false|False|FALSE|0)$/;
+const RE_DOUBLE_QUOTE = /"/g;
+
+// ─── CSV line parser ──────────────────────────────────────────────────────────
+
+/** Split text into non-empty lines. */
+function splitLines(text: string): string[] {
+ return text.split(RE_LINE_SPLIT).filter((l) => l.length > 0);
+}
+
+/**
+ * Parse one CSV line into raw string fields.
+ * Handles double-quoted fields — `""` inside quotes represents a literal `"`.
+ */
+function parseLine(line: string, sep: string): string[] {
+ const fields: string[] = [];
+ let current = "";
+ let inQuote = false;
+ let i = 0;
+ while (i < line.length) {
+ const ch = line.charAt(i);
+ if (inQuote) {
+ if (ch === '"' && line.charAt(i + 1) === '"') {
+ current += '"';
+ i += 2;
+ } else if (ch === '"') {
+ inQuote = false;
+ i += 1;
+ } else {
+ current += ch;
+ i += 1;
+ }
+ } else if (ch === '"') {
+ inQuote = true;
+ i += 1;
+ } else if (line.startsWith(sep, i)) {
+ fields.push(current);
+ current = "";
+ i += sep.length;
+ } else {
+ current += ch;
+ i += 1;
+ }
+ }
+ fields.push(current);
+ return fields;
+}
+
+// ─── dtype inference ──────────────────────────────────────────────────────────
+
+/** True when a raw string should be treated as missing. */
+function isNaRaw(raw: string, naSet: ReadonlySet): boolean {
+ return naSet.has(raw);
+}
+
+/** Infer the most specific dtype for a column from its raw string values. */
+function inferColumnDtype(raws: readonly string[], naSet: ReadonlySet): DtypeName {
+ const nonNa = raws.filter((r) => !isNaRaw(r, naSet));
+ if (nonNa.length === 0) {
+ return "object";
+ }
+ const allBool = nonNa.every((r) => RE_BOOL_TRUE.test(r) || RE_BOOL_FALSE.test(r));
+ if (allBool) {
+ return "bool";
+ }
+ const allInt = nonNa.every((r) => RE_INT.test(r));
+ if (allInt) {
+ return "int64";
+ }
+ const allFloat = nonNa.every((r) => RE_FLOAT.test(r));
+ if (allFloat) {
+ return "float64";
+ }
+ return "string";
+}
+
+/** Parse a raw string to a Scalar for an inferred dtype. */
+function parseInferred(raw: string, dtype: DtypeName, naSet: ReadonlySet): Scalar {
+ if (isNaRaw(raw, naSet)) {
+ return null;
+ }
+ if (dtype === "bool") {
+ return RE_BOOL_TRUE.test(raw);
+ }
+ if (dtype === "int64") {
+ return Number.parseInt(raw, 10);
+ }
+ if (dtype === "float64") {
+ return Number.parseFloat(raw);
+ }
+ return raw;
+}
+
+/** Parse an int/uint dtype (helper to keep CC low). */
+function parseForcedInt(raw: string): Scalar {
+ const n = Number(raw);
+ return Number.isNaN(n) ? null : Math.trunc(n);
+}
+
+/** Parse a float dtype (helper to keep CC low). */
+function parseForcedFloat(raw: string): Scalar {
+ const n = Number(raw);
+ return Number.isNaN(n) ? null : n;
+}
+
+/** Parse a bool dtype (helper to keep CC low). */
+function parseForcedBool(raw: string): Scalar {
+ if (RE_BOOL_TRUE.test(raw)) {
+ return true;
+ }
+ if (RE_BOOL_FALSE.test(raw)) {
+ return false;
+ }
+ return null;
+}
+
+/** Parse a raw string to a Scalar when a specific dtype is forced by the caller. */
+function parseForcedDtype(raw: string, dtypeName: DtypeName, naSet: ReadonlySet): Scalar {
+ if (isNaRaw(raw, naSet)) {
+ return null;
+ }
+ if (dtypeName.startsWith("int") || dtypeName.startsWith("uint")) {
+ return parseForcedInt(raw);
+ }
+ if (dtypeName.startsWith("float")) {
+ return parseForcedFloat(raw);
+ }
+ if (dtypeName === "bool") {
+ return parseForcedBool(raw);
+ }
+ return raw;
+}
+
+/** Build a `Series` from raw strings with the resolved dtype. */
+function buildColumnSeries(
+ name: string,
+ raws: readonly string[],
+ dtypeName: DtypeName,
+ naSet: ReadonlySet,
+ forced: boolean,
+): Series {
+ const data: Scalar[] = raws.map((r) =>
+ forced ? parseForcedDtype(r, dtypeName, naSet) : parseInferred(r, dtypeName, naSet),
+ );
+ return new Series({ data, name, dtype: Dtype.from(dtypeName) });
+}
+
+// ─── column extraction ────────────────────────────────────────────────────────
+
+/** Transpose rows into per-column raw-string arrays. */
+function extractRawColumns(rows: readonly (readonly string[])[], numCols: number): string[][] {
+ const rawCols: string[][] = Array.from({ length: numCols }, (): string[] => []);
+ for (const row of rows) {
+ for (let ci = 0; ci < numCols; ci++) {
+ const rawVal = row[ci];
+ (rawCols[ci] as string[]).push(rawVal ?? "");
+ }
+ }
+ return rawCols;
+}
+
+/** True when a column should become the row index. */
+function isIndexColumn(name: string, ci: number, indexCol: string | number | null): boolean {
+ if (indexCol === null) {
+ return false;
+ }
+ if (typeof indexCol === "string") {
+ return indexCol === name;
+ }
+ return indexCol === ci;
+}
+
+/** Build an empty DataFrame with named columns but no rows. */
+function emptyDataFrame(colNames: readonly string[]): DataFrame {
+ const colMap = new Map>();
+ for (const name of colNames) {
+ colMap.set(name, new Series({ data: [], name }));
+ }
+ return new DataFrame(colMap, new Index([]));
+}
+
+// ─── public: readCsv ──────────────────────────────────────────────────────────
+
+/**
+ * Parse a CSV string into a `DataFrame`.
+ *
+ * Mirrors `pandas.read_csv()`.
+ *
+ * @param text - Raw CSV string (supports `\n`, `\r\n`, `\r` line endings).
+ * @param options - Optional parsing configuration.
+ * @returns A new `DataFrame`.
+ *
+ * @example
+ * ```ts
+ * import { readCsv } from "tsb";
+ *
+ * const df = readCsv("a,b,c\n1,2,3\n4,5,6");
+ * // DataFrame: a=[1,4], b=[2,5], c=[3,6]
+ * ```
+ */
+export function readCsv(text: string, options?: ReadCsvOptions): DataFrame {
+ const sep = options?.sep ?? ",";
+ const headerRow = options?.header ?? 0;
+ const indexCol = options?.indexCol ?? null;
+ const dtypeMap: Readonly> = options?.dtype ?? {};
+ const skipRows = options?.skipRows ?? 0;
+ const nRows = options?.nRows ?? null;
+
+ const naSet: Set = new Set(DEFAULT_NA_STRINGS);
+ if (options?.naValues !== undefined) {
+ for (const v of options.naValues) {
+ naSet.add(v);
+ }
+ }
+
+ const lines = splitLines(text);
+
+ let colNames: string[];
+ let dataStart: number;
+ if (headerRow === null || headerRow < 0) {
+ colNames = [];
+ dataStart = 0;
+ } else {
+ if (headerRow >= lines.length) {
+ return new DataFrame(new Map(), new Index([]));
+ }
+ colNames = parseLine(lines[headerRow] as string, sep);
+ dataStart = headerRow + 1;
+ }
+
+ let dataLines = lines.slice(dataStart + skipRows);
+ if (nRows !== null) {
+ dataLines = dataLines.slice(0, nRows);
+ }
+
+ if (dataLines.length === 0) {
+ if (colNames.length === 0) {
+ return new DataFrame(new Map(), new Index([]));
+ }
+ return emptyDataFrame(colNames);
+ }
+
+ const rows = dataLines.map((l) => parseLine(l, sep));
+
+ if (colNames.length === 0) {
+ const numCols = rows[0]?.length ?? 0;
+ colNames = Array.from({ length: numCols }, (_, i) => String(i));
+ }
+
+ const numCols = colNames.length;
+ const rawCols = extractRawColumns(rows, numCols);
+ const colMap = new Map>();
+ let indexSeries: Series | null = null;
+
+ for (let ci = 0; ci < numCols; ci++) {
+ const name = colNames[ci] as string;
+ const raws = rawCols[ci] as readonly string[];
+ const forcedName: DtypeName | undefined = dtypeMap[name];
+ const forced = forcedName !== undefined;
+ const dtypeName: DtypeName = forced
+ ? (forcedName as DtypeName)
+ : inferColumnDtype(raws, naSet);
+ const series = buildColumnSeries(name, raws, dtypeName, naSet, forced);
+
+ if (isIndexColumn(name, ci, indexCol)) {
+ indexSeries = series;
+ } else {
+ colMap.set(name, series);
+ }
+ }
+
+ const rowIndex: Index =
+ indexSeries !== null
+ ? new Index(indexSeries.values as readonly Label[])
+ : (new RangeIndex(rows.length) as unknown as Index);
+
+ return new DataFrame(colMap, rowIndex);
+}
+
+// ─── public: toCsv ────────────────────────────────────────────────────────────
+
+/** Quote a CSV field when it contains the separator, a quote, or a newline. */
+function quoteCsvField(val: string, sep: string): string {
+ const needsQuoting = val.includes(sep) || val.includes('"') || val.includes("\n");
+ if (!needsQuoting) {
+ return val;
+ }
+ return `"${val.replace(RE_DOUBLE_QUOTE, '""')}"`;
+}
+
+/** Convert a scalar to its CSV string representation. */
+function scalarToStr(v: Scalar, naRep: string): string {
+ if (v === null || v === undefined) {
+ return naRep;
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return naRep;
+ }
+ if (v instanceof Date) {
+ return v.toISOString();
+ }
+ return String(v);
+}
+
+/**
+ * Serialize a `DataFrame` to a CSV string.
+ *
+ * Mirrors `pandas.DataFrame.to_csv()`.
+ *
+ * @param df - The DataFrame to serialize.
+ * @param options - Optional formatting options.
+ * @returns A CSV string.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, toCsv } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * toCsv(df, { index: false });
+ * // "a,b\n1,3\n2,4\n"
+ * ```
+ */
+export function toCsv(df: DataFrame, options?: ToCsvOptions): string {
+ const sep = options?.sep ?? ",";
+ const includeHeader = options?.header ?? true;
+ const includeIndex = options?.index ?? true;
+ const linesep = options?.lineterminator ?? "\n";
+ const naRep = options?.naRep ?? "";
+
+ const colNames = df.columns.values;
+ const nRows = df.shape[0];
+ const indexVals = df.index.values;
+ const lines: string[] = [];
+
+ if (includeHeader) {
+ const headerFields: string[] = [];
+ if (includeIndex) {
+ headerFields.push("");
+ }
+ for (const name of colNames) {
+ headerFields.push(quoteCsvField(String(name), sep));
+ }
+ lines.push(headerFields.join(sep));
+ }
+
+ for (let ri = 0; ri < nRows; ri++) {
+ const fields: string[] = [];
+ if (includeIndex) {
+ const idxVal = indexVals[ri] ?? null;
+ const idxStr = idxVal !== null ? String(idxVal) : naRep;
+ fields.push(quoteCsvField(idxStr, sep));
+ }
+ for (const name of colNames) {
+ const s = df.col(name);
+ const v = s.iloc(ri);
+ fields.push(quoteCsvField(scalarToStr(v, naRep), sep));
+ }
+ lines.push(fields.join(sep));
+ }
+
+ if (lines.length === 0) {
+ return "";
+ }
+ return lines.join(linesep) + linesep;
+}
diff --git a/src/io/index.ts b/src/io/index.ts
new file mode 100644
index 00000000..1788a87b
--- /dev/null
+++ b/src/io/index.ts
@@ -0,0 +1,10 @@
+/**
+ * tsb/io — I/O utilities.
+ *
+ * @module
+ */
+
+export { readCsv, toCsv } from "./csv.ts";
+export type { ReadCsvOptions, ToCsvOptions } from "./csv.ts";
+export { readJson, toJson } from "./json.ts";
+export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./json.ts";
diff --git a/tests/io/csv.test.ts b/tests/io/csv.test.ts
new file mode 100644
index 00000000..f33cc4a9
--- /dev/null
+++ b/tests/io/csv.test.ts
@@ -0,0 +1,342 @@
+/**
+ * Tests for src/io/csv.ts — readCsv() and toCsv().
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, readCsv, toCsv } from "../../src/index.ts";
+
+// ─── readCsv: basic parsing ───────────────────────────────────────────────────
+
+describe("readCsv — basic parsing", () => {
+ it("parses a simple 3-column CSV", () => {
+ const df = readCsv("a,b,c\n1,2,3\n4,5,6");
+ expect(df.shape).toEqual([2, 3]);
+ expect([...df.columns.values]).toEqual(["a", "b", "c"]);
+ expect([...df.col("a").values]).toEqual([1, 4]);
+ expect([...df.col("b").values]).toEqual([2, 5]);
+ expect([...df.col("c").values]).toEqual([3, 6]);
+ });
+
+ it("infers integer dtype", () => {
+ const df = readCsv("x\n10\n20\n30");
+ expect(df.col("x").dtype.name).toBe("int64");
+ expect([...df.col("x").values]).toEqual([10, 20, 30]);
+ });
+
+ it("infers float dtype", () => {
+ const df = readCsv("x\n1.5\n2.5");
+ expect(df.col("x").dtype.name).toBe("float64");
+ expect([...df.col("x").values]).toEqual([1.5, 2.5]);
+ });
+
+ it("infers float dtype for scientific notation", () => {
+ const df = readCsv("x\n1e3\n2.5e-1");
+ expect(df.col("x").dtype.name).toBe("float64");
+ expect([...df.col("x").values]).toEqual([1000, 0.25]);
+ });
+
+ it("infers boolean dtype", () => {
+ const df = readCsv("flag\ntrue\nfalse\nTrue");
+ expect(df.col("flag").dtype.name).toBe("bool");
+ expect([...df.col("flag").values]).toEqual([true, false, true]);
+ });
+
+ it("infers string dtype for mixed content", () => {
+ const df = readCsv("name\nalice\nbob");
+ expect(df.col("name").dtype.name).toBe("string");
+ expect([...df.col("name").values]).toEqual(["alice", "bob"]);
+ });
+
+ it("handles CRLF line endings", () => {
+ const df = readCsv("a,b\r\n1,2\r\n3,4");
+ expect(df.shape).toEqual([2, 2]);
+ expect([...df.col("a").values]).toEqual([1, 3]);
+ });
+
+ it("handles CR-only line endings", () => {
+ const df = readCsv("a,b\r1,2\r3,4");
+ expect(df.shape).toEqual([2, 2]);
+ expect([...df.col("a").values]).toEqual([1, 3]);
+ });
+
+ it("returns empty DataFrame for whitespace-only string", () => {
+ const df = readCsv("");
+ expect(df.shape).toEqual([0, 0]);
+ });
+
+ it("returns empty-row DataFrame when only header present", () => {
+ const df = readCsv("a,b,c");
+ expect(df.shape).toEqual([0, 3]);
+ expect([...df.columns.values]).toEqual(["a", "b", "c"]);
+ });
+
+ it("handles single-column CSV", () => {
+ const df = readCsv("v\n10\n20");
+ expect(df.shape).toEqual([2, 1]);
+ expect([...df.col("v").values]).toEqual([10, 20]);
+ });
+
+ it("handles negative integers", () => {
+ const df = readCsv("x\n-1\n-2\n3");
+ expect(df.col("x").dtype.name).toBe("int64");
+ expect([...df.col("x").values]).toEqual([-1, -2, 3]);
+ });
+});
+
+// ─── readCsv: NA handling ─────────────────────────────────────────────────────
+
+describe("readCsv — NA handling", () => {
+ it("treats empty fields as null", () => {
+ const df = readCsv("a,b\n1,\n,3");
+ expect(df.col("a").values[1]).toBeNull();
+ expect(df.col("b").values[0]).toBeNull();
+ });
+
+ it("treats 'NA' as null", () => {
+ const df = readCsv("x\n1\nNA\n3");
+ expect(df.col("x").values[1]).toBeNull();
+ });
+
+ it("treats 'NaN' as null", () => {
+ const df = readCsv("x\n1.0\nNaN\n3.0");
+ expect(df.col("x").values[1]).toBeNull();
+ });
+
+ it("treats 'null' and 'None' as null", () => {
+ const df = readCsv("x\nnull\nNone");
+ expect(df.col("x").values[0]).toBeNull();
+ expect(df.col("x").values[1]).toBeNull();
+ });
+
+ it("treats custom naValues as null", () => {
+ const df = readCsv("x\n1\nMISSING\n3", { naValues: ["MISSING"] });
+ expect(df.col("x").values[1]).toBeNull();
+ });
+
+ it("all-NA column gets object dtype", () => {
+ const df = readCsv("x\nNA\nNA");
+ expect(df.col("x").dtype.name).toBe("object");
+ });
+});
+
+// ─── readCsv: quoted fields ───────────────────────────────────────────────────
+
+describe("readCsv — quoted fields", () => {
+ it("handles a field containing the separator inside quotes", () => {
+ const df = readCsv('name,note\n"Smith, Jr.",hello');
+ expect(df.col("name").values[0]).toBe("Smith, Jr.");
+ expect(df.col("note").values[0]).toBe("hello");
+ });
+
+ it("handles escaped double quotes inside quoted fields", () => {
+ const df = readCsv('q\n"say ""hi"""');
+ expect(df.col("q").values[0]).toBe('say "hi"');
+ });
+
+ it("handles quoted strings with spaces", () => {
+ const df = readCsv('a,b\n"hello world",2');
+ expect(df.col("a").values[0]).toBe("hello world");
+ expect(df.col("b").values[0]).toBe(2);
+ });
+});
+
+// ─── readCsv: options ─────────────────────────────────────────────────────────
+
+describe("readCsv — options", () => {
+ it("respects custom sep (semicolon)", () => {
+ const df = readCsv("a;b;c\n1;2;3", { sep: ";" });
+ expect([...df.columns.values]).toEqual(["a", "b", "c"]);
+ expect(df.col("a").values[0]).toBe(1);
+ });
+
+ it("respects tab separator", () => {
+ const df = readCsv("a\tb\n1\t2", { sep: "\t" });
+ expect([...df.columns.values]).toEqual(["a", "b"]);
+ expect(df.col("a").values[0]).toBe(1);
+ });
+
+ it("generates numeric column names when header=null", () => {
+ const df = readCsv("1,2,3\n4,5,6", { header: null });
+ expect([...df.columns.values]).toEqual(["0", "1", "2"]);
+ expect(df.shape).toEqual([2, 3]);
+ expect([...df.col("0").values]).toEqual([1, 4]);
+ });
+
+ it("forces dtype when dtype option provided", () => {
+ const df = readCsv("x\n1\n2\n3", { dtype: { x: "float64" } });
+ expect(df.col("x").dtype.name).toBe("float64");
+ expect(df.col("x").values[0]).toBe(1.0);
+ });
+
+ it("respects nRows limit", () => {
+ const df = readCsv("a\n1\n2\n3\n4\n5", { nRows: 3 });
+ expect(df.shape).toEqual([3, 1]);
+ expect([...df.col("a").values]).toEqual([1, 2, 3]);
+ });
+
+ it("respects skipRows", () => {
+ const df = readCsv("a\n1\n2\n3", { skipRows: 1 });
+ expect(df.shape).toEqual([2, 1]);
+ expect([...df.col("a").values]).toEqual([2, 3]);
+ });
+
+ it("uses indexCol by name to set row index", () => {
+ const df = readCsv("id,val\na,10\nb,20", { indexCol: "id" });
+ expect([...df.columns.values]).toEqual(["val"]);
+ expect([...df.index.values]).toEqual(["a", "b"]);
+ });
+
+ it("uses indexCol by position to set row index", () => {
+ const df = readCsv("id,val\na,10\nb,20", { indexCol: 0 });
+ expect([...df.columns.values]).toEqual(["val"]);
+ expect([...df.index.values]).toEqual(["a", "b"]);
+ });
+});
+
+// ─── toCsv: basic serialization ───────────────────────────────────────────────
+
+describe("toCsv — basic serialization", () => {
+ it("serializes a simple DataFrame with header and index", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const csv = toCsv(df);
+ const lines = csv.split("\n").filter((l) => l.length > 0);
+ expect(lines[0]).toBe(",a,b");
+ expect(lines[1]).toBe("0,1,3");
+ expect(lines[2]).toBe("1,2,4");
+ });
+
+ it("omits index when index=false", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const csv = toCsv(df, { index: false });
+ const lines = csv.split("\n").filter((l) => l.length > 0);
+ expect(lines[0]).toBe("a,b");
+ expect(lines[1]).toBe("1,3");
+ expect(lines[2]).toBe("2,4");
+ });
+
+ it("omits header when header=false", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const csv = toCsv(df, { header: false, index: false });
+ const lines = csv.split("\n").filter((l) => l.length > 0);
+ expect(lines[0]).toBe("1");
+ expect(lines[1]).toBe("2");
+ });
+
+ it("uses custom separator", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2] });
+ const csv = toCsv(df, { sep: ";", index: false });
+ const lines = csv.trim().split("\n");
+ expect(lines[0]).toBe("a;b");
+ expect(lines[1]).toBe("1;2");
+ });
+
+ it("uses naRep for null values", () => {
+ const df = DataFrame.fromColumns({ x: [1, null, 3] });
+ const csv = toCsv(df, { index: false, naRep: "NA" });
+ const lines = csv.split("\n").filter((l) => l.length > 0);
+ expect(lines[1]).toBe("NA");
+ });
+
+ it("quotes fields that contain the separator", () => {
+ const df = DataFrame.fromColumns({ name: ["Smith, Jr."] });
+ const csv = toCsv(df, { index: false });
+ expect(csv).toContain('"Smith, Jr."');
+ });
+
+ it("uses custom lineterminator", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const csv = toCsv(df, { index: false, lineterminator: "\r\n" });
+ expect(csv).toContain("\r\n");
+ });
+
+ it("returns empty string for no-column DataFrame with no header", () => {
+ const df = DataFrame.fromColumns({});
+ const csv = toCsv(df, { index: false, header: false });
+ expect(csv).toBe("");
+ });
+
+ it("CSV ends with a newline", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const csv = toCsv(df, { index: false });
+ expect(csv.endsWith("\n")).toBe(true);
+ });
+});
+
+// ─── round-trip ───────────────────────────────────────────────────────────────
+
+describe("readCsv / toCsv — round-trip", () => {
+ it("round-trips a numeric DataFrame", () => {
+ const original = DataFrame.fromColumns({ x: [1, 2, 3], y: [4, 5, 6] });
+ const csv = toCsv(original, { index: false });
+ const restored = readCsv(csv);
+ expect([...restored.col("x").values]).toEqual([1, 2, 3]);
+ expect([...restored.col("y").values]).toEqual([4, 5, 6]);
+ });
+
+ it("round-trips a float DataFrame", () => {
+ const original = DataFrame.fromColumns({ x: [1.5, 2.5, 3.5] });
+ const csv = toCsv(original, { index: false });
+ const restored = readCsv(csv);
+ expect([...restored.col("x").values]).toEqual([1.5, 2.5, 3.5]);
+ });
+
+ it("round-trips a string DataFrame", () => {
+ const original = DataFrame.fromColumns({ name: ["alice", "bob"] });
+ const csv = toCsv(original, { index: false });
+ const restored = readCsv(csv);
+ expect([...restored.col("name").values]).toEqual(["alice", "bob"]);
+ });
+
+ it("round-trips a boolean DataFrame", () => {
+ const original = DataFrame.fromColumns({ flag: [true, false, true] });
+ const csv = toCsv(original, { index: false });
+ const restored = readCsv(csv);
+ expect([...restored.col("flag").values]).toEqual([true, false, true]);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("readCsv — property tests", () => {
+ it("never throws on arbitrary text input", () => {
+ fc.assert(
+ fc.property(fc.string(), (text) => {
+ readCsv(text);
+ return true;
+ }),
+ );
+ });
+
+ it("shape rows matches number of data lines", () => {
+ fc.assert(
+ fc.property(
+ fc.array(
+ fc.tuple(
+ fc.integer({ min: 0, max: 999 }),
+ fc.integer({ min: 0, max: 999 }),
+ ),
+ { minLength: 1, maxLength: 20 },
+ ),
+ (dataRows) => {
+ const lines = dataRows.map(([a, b]) => `${a},${b}`).join("\n");
+ const csv = `col1,col2\n${lines}`;
+ const df = readCsv(csv);
+ return df.shape[0] === dataRows.length;
+ },
+ ),
+ );
+ });
+
+ it("toCsv always ends with newline for non-empty DataFrames", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 100 }), { minLength: 1, maxLength: 20 }),
+ (nums) => {
+ const df = DataFrame.fromColumns({ v: nums });
+ const csv = toCsv(df, { index: false });
+ return csv.endsWith("\n");
+ },
+ ),
+ );
+ });
+});
From 0a712721cb60c06679328f8e0e28db56daca1f81 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:10:39 +0000
Subject: [PATCH 008/104] Iteration 59: Implement readJson / toJson JSON I/O
Five orient formats (records, split, index, columns, values) mirroring
pandas read_json() and DataFrame.to_json(). Full null propagation,
dtype override support, and JSON auto-detection. 31 unit + property-based
tests. Interactive playground: playground/json.html.
Run: https://github.com/githubnext/tsessebe/actions/runs/24003815679
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
playground/json.html | 199 ++++++++++++++++++
src/io/json.ts | 473 ++++++++++++++++++++++++++++++++++++++++++
tests/io/json.test.ts | 309 +++++++++++++++++++++++++++
3 files changed, 981 insertions(+)
create mode 100644 playground/json.html
create mode 100644 src/io/json.ts
create mode 100644 tests/io/json.test.ts
diff --git a/playground/json.html b/playground/json.html
new file mode 100644
index 00000000..3ac6d4a9
--- /dev/null
+++ b/playground/json.html
@@ -0,0 +1,199 @@
+
+
+
+
+
+ tsb — readJson & toJson
+
+
+
+
+ tsb playground › readJson & toJson
+
+
+ readJson & toJson — JSON I/O
+
+ readJson() and toJson() mirror
+ pandas read_json()
+ and
+ pandas DataFrame.to_json() .
+ Parse JSON text into a DataFrame and serialize any
+ DataFrame back to JSON, supporting five orient formats.
+
+
+
+ 1 — Parse records JSON (default)
+
+ The "records" orient is an array of row objects — the most
+ natural JSON format for tabular data. It is also the auto-detected
+ default when the input is a JSON array.
+
+
+
const json = `[
+ {"name": "alice", "age": 30, "score": 88.5},
+ {"name": "bob", "age": 25, "score": 92.0},
+ {"name": "carol", "age": 35, "score": 75.3}
+]`;
+
+const df = tsb.readJson(json);
+console.log("shape:", df.shape);
+console.log("columns:", [...df.columns.values]);
+console.log("names:", [...df.col("name").values]);
+console.log("ages :", [...df.col("age").values]);
+
+
+ ▶ Run
+
+
+
+
+ 2 — Split orient
+
+ The "split" orient stores columns, index, and data
+ separately — compact and lossless. It is auto-detected when the root
+ object contains "columns" and "data" keys.
+
+
+
const json = JSON.stringify({
+ columns: ["x", "y"],
+ index: [10, 20, 30],
+ data: [[1, 4], [2, 5], [3, 6]],
+});
+
+const df = tsb.readJson(json);
+console.log("orient auto-detected:", "split");
+console.log("shape:", df.shape);
+console.log("index:", [...df.index.values]);
+console.log("x:", [...df.col("x").values]);
+console.log("y:", [...df.col("y").values]);
+
+
+ ▶ Run
+
+
+
+
+ 3 — Index orient
+
+ The "index" orient uses row-index labels as keys, each
+ mapping to a record of column values.
+
+
+
const json = JSON.stringify({
+ "r0": { a: 1, b: "foo" },
+ "r1": { a: 2, b: "bar" },
+ "r2": { a: 3, b: "baz" },
+});
+
+const df = tsb.readJson(json, { orient: "index" });
+console.log("shape:", df.shape);
+console.log("index:", [...df.index.values]);
+console.log("a:", [...df.col("a").values]);
+console.log("b:", [...df.col("b").values]);
+
+
+ ▶ Run
+
+
+
+
+ 4 — Columns orient
+
+ The "columns" orient uses column names as keys, each
+ mapping to an object of index-label → value pairs. Useful for
+ column-major storage.
+
+
+
const json = JSON.stringify({
+ x: { "0": 10, "1": 20, "2": 30 },
+ y: { "0": 1.1, "1": 2.2, "2": 3.3 },
+});
+
+const df = tsb.readJson(json, { orient: "columns" });
+console.log("shape:", df.shape);
+console.log("index:", [...df.index.values]);
+console.log("x:", [...df.col("x").values]);
+console.log("y:", [...df.col("y").values]);
+
+
+ ▶ Run
+
+
+
+
+ 5 — Values orient
+
+ The "values" orient is a plain 2-D array — no index or
+ column labels. Columns are auto-named "0",
+ "1", etc.
+
+
+
const json = "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]";
+
+const df = tsb.readJson(json, { orient: "values" });
+console.log("shape:", df.shape);
+console.log("columns:", [...df.columns.values]);
+console.log("col 0:", [...df.col("0").values]);
+
+
+ ▶ Run
+
+
+
+
+ 6 — Serialize with toJson()
+
+ toJson(df, options) serializes a DataFrame
+ to a JSON string. Choose any orient and optionally pretty-print with
+ indent.
+
+
+
const df = tsb.DataFrame.fromColumns({
+ city: ["New York", "London", "Tokyo"],
+ pop: [8_336_817, 8_982_000, 13_960_000],
+ gdp: [1.6, 0.6, 1.0],
+});
+
+console.log("--- records (compact) ---");
+console.log(tsb.toJson(df));
+
+console.log("--- split (indented) ---");
+console.log(tsb.toJson(df, { orient: "split", indent: 2 }));
+
+
+ ▶ Run
+
+
+
+
+ 7 — Round-trip
+
+ A DataFrame serialized with toJson can be
+ reconstructed with readJson without data loss.
+
+
+
const original = tsb.DataFrame.fromColumns({
+ x: [1, 2, 3, 4, 5],
+ label: ["a", "b", "c", "d", "e"],
+});
+
+const json = tsb.toJson(original, { orient: "split" });
+const restored = tsb.readJson(json);
+
+console.log("JSON:", json);
+console.log("x restored:", [...restored.col("x").values]);
+console.log("label restored:", [...restored.col("label").values]);
+console.log("shapes match:", JSON.stringify(original.shape) === JSON.stringify(restored.shape));
+
+
+ ▶ Run
+
+
+
+
+
+
diff --git a/src/io/json.ts b/src/io/json.ts
new file mode 100644
index 00000000..093152f3
--- /dev/null
+++ b/src/io/json.ts
@@ -0,0 +1,473 @@
+/**
+ * readJson / toJson — JSON I/O for DataFrame.
+ *
+ * Mirrors `pandas.read_json()` and `pandas.DataFrame.to_json()`:
+ * - `readJson(text, options?)` — parse a JSON string into a DataFrame
+ * - `toJson(df, options?)` — serialize a DataFrame to a JSON string
+ *
+ * Supported orient values:
+ * - `"records"` — array of row objects: `[{col: val, ...}, ...]`
+ * - `"split"` — `{"columns": [...], "index": [...], "data": [[...], ...]}`
+ * - `"index"` — object keyed by index label: `{"idx": {col: val, ...}, ...}`
+ * - `"columns"` — object keyed by column: `{"col": {idx: val, ...}, ...}`
+ * - `"values"` — 2-D array (no index/column labels): `[[val, ...], ...]`
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import { Dtype } from "../core/index.ts";
+import type { DtypeName, Label, Scalar } from "../types.ts";
+
+// ─── JSON value types (no `any`) ─────────────────────────────────────────────
+
+/** A JSON primitive (leaf value). */
+type JsonPrimitive = string | number | boolean | null;
+
+/** Any valid JSON value. */
+type JsonValue = JsonPrimitive | JsonValue[] | JsonObject;
+
+/** A JSON object. */
+interface JsonObject {
+ [key: string]: JsonValue;
+}
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Orientation of the JSON representation.
+ *
+ * Mirrors pandas' `orient` parameter for `read_json` / `to_json`.
+ */
+export type JsonOrient = "records" | "split" | "index" | "columns" | "values";
+
+/** Options for {@link readJson}. */
+export interface ReadJsonOptions {
+ /**
+ * Format of the JSON string. When omitted the function auto-detects from
+ * the root structure:
+ * - Array → `"records"` or `"values"`
+ * - Object with `"columns"` + `"data"` → `"split"`
+ * - Other object → `"index"` or `"columns"` heuristic
+ */
+ readonly orient?: JsonOrient;
+ /**
+ * Override dtype for specific columns (column name → dtype name).
+ * Applied after parsing; the column values are re-cast.
+ */
+ readonly dtype?: Readonly>;
+}
+
+/** Options for {@link toJson}. */
+export interface ToJsonOptions {
+ /**
+ * Format of the output JSON. Default: `"records"`.
+ */
+ readonly orient?: JsonOrient;
+ /**
+ * JSON indentation (spaces). `0` → compact. Default: `0`.
+ */
+ readonly indent?: number;
+}
+
+// ─── helpers — JSON type guards ───────────────────────────────────────────────
+
+function isJsonObject(v: JsonValue): v is JsonObject {
+ return typeof v === "object" && v !== null && !Array.isArray(v);
+}
+
+function isJsonArray(v: JsonValue): v is JsonValue[] {
+ return Array.isArray(v);
+}
+
+/** Safe property access on a JsonObject (resolves noPropertyAccessFromIndexSignature). */
+function getProp(obj: JsonObject, key: string): JsonValue {
+ return obj[key] ?? null;
+}
+
+// ─── helpers — scalar conversion ──────────────────────────────────────────────
+
+/** Convert a parsed JSON value to a {@link Scalar} suitable for Series data. */
+function toScalar(v: JsonValue): Scalar {
+ if (v === null) {
+ return null;
+ }
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
+ return v;
+ }
+ // Nested objects/arrays → stringify (matches pandas object dtype behaviour)
+ return JSON.stringify(v);
+}
+
+/** Convert a parsed JSON value to a {@link Label} for use as an index entry. */
+function toLabel(v: JsonValue): Label {
+ if (v === null) {
+ return null;
+ }
+ if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
+ return v;
+ }
+ return String(v);
+}
+
+// ─── helpers — Series building ────────────────────────────────────────────────
+
+/** Build a Series from an array of JSON values, applying an optional dtype override. */
+function buildSeries(
+ name: string,
+ data: readonly JsonValue[],
+ index: Index,
+ dtypeOverride: DtypeName | undefined,
+): Series {
+ const scalars: Scalar[] = data.map(toScalar);
+ if (dtypeOverride !== undefined) {
+ return new Series({ data: scalars, name, index, dtype: Dtype.from(dtypeOverride) });
+ }
+ return new Series({ data: scalars, name, index });
+}
+
+// ─── helpers — orient detection ───────────────────────────────────────────────
+
+/** Auto-detect the orient of a parsed JSON value. */
+function detectOrient(root: JsonValue): JsonOrient {
+ if (isJsonArray(root)) {
+ const first = root[0];
+ if (first === undefined || isJsonObject(first)) {
+ return "records";
+ }
+ return "values";
+ }
+ if (isJsonObject(root)) {
+ if ("columns" in root && "data" in root) {
+ return "split";
+ }
+ // Check if first value is an object → "index" orient, else "columns"
+ const firstVal = Object.values(root)[0];
+ if (firstVal !== undefined && isJsonObject(firstVal)) {
+ return "index";
+ }
+ return "columns";
+ }
+ return "records";
+}
+
+// ─── orient parsers ───────────────────────────────────────────────────────────
+
+/** Parse `"records"` orient: `[{col: val, ...}, ...]`. */
+function parseRecords(root: JsonValue, dtypes: Readonly>): DataFrame {
+ if (!isJsonArray(root)) {
+ throw new TypeError(`readJson("records"): expected array, got ${typeof root}`);
+ }
+ if (root.length === 0) {
+ return new DataFrame(new Map(), new RangeIndex(0) as unknown as Index);
+ }
+ // Collect all column names in insertion order from all rows
+ const colSet = new Set();
+ for (const row of root) {
+ if (isJsonObject(row)) {
+ for (const k of Object.keys(row)) {
+ colSet.add(k);
+ }
+ }
+ }
+ const colNames = [...colSet];
+ const rowIndex: Index = new RangeIndex(root.length) as unknown as Index;
+ const colMap = new Map