From 8dff3f1c6bb46e57dd90dcd3c802bd7982d56616 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 25 Apr 2026 14:59:03 +0000 Subject: [PATCH 1/3] Initial plan From eec09812be59db225043a99b86b8cd1840940a25 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 25 Apr 2026 15:09:34 +0000 Subject: [PATCH 2/3] Iteration 281: extract strGetDummies into dedicated module Agent-Logs-Url: https://github.com/githubnext/tsessebe/sessions/615193c1-c4f0-473c-bd90-e155a60846a7 Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- playground/str_get_dummies.html | 109 ++++++++++ src/index.ts | 3 +- src/stats/index.ts | 3 +- src/stats/str_get_dummies.ts | 129 ++++++++++++ src/stats/string_ops.ts | 88 +------- tests/stats/str_get_dummies.test.ts | 304 ++++++++++++++++++++++++++++ tests/stats/string_ops.test.ts | 65 +----- 7 files changed, 548 insertions(+), 153 deletions(-) create mode 100644 playground/str_get_dummies.html create mode 100644 src/stats/str_get_dummies.ts create mode 100644 tests/stats/str_get_dummies.test.ts diff --git a/playground/str_get_dummies.html b/playground/str_get_dummies.html new file mode 100644 index 00000000..467403eb --- /dev/null +++ b/playground/str_get_dummies.html @@ -0,0 +1,109 @@ + + + + + + tsb — str.get_dummies: multi-label string encoding + + + +

str.get_dummies — multi-label string encoding

+

+ Port of pandas.Series.str.get_dummies(sep). Splits each + string by a separator (default "|") and returns a + DataFrame of binary indicator columns — one per unique token, + sorted lexicographically. null / undefined / + NaN values produce a row of all zeros. +

+

← back to index

+ +
+

Example 1 — basic split on |

+ + +
(click Run)
+
+ +
+

Example 2 — custom separator

+ + +
(click Run)
+
+ +
+

Example 3 — null / undefined / NaN → all-zero rows

+ + +
(click Run)
+
+ +
+

Example 4 — preserved Series index

+ + +
(click Run)
+
+ + + + diff --git a/src/index.ts b/src/index.ts index 411cb787..27a15d16 100644 --- a/src/index.ts +++ b/src/index.ts @@ -386,7 +386,6 @@ export { } from "./core/index.ts"; export { strNormalize, - strGetDummies, strExtractAll, strRemovePrefix, strRemoveSuffix, @@ -401,6 +400,8 @@ export { strIndent, strDedent, } from "./stats/index.ts"; +export { strGetDummies } from "./stats/index.ts"; +export type { StrGetDummiesOptions } from "./stats/index.ts"; export type { NormalizeForm, StrInput, diff --git a/src/stats/index.ts b/src/stats/index.ts index cd2c46fe..454582aa 100644 --- a/src/stats/index.ts +++ b/src/stats/index.ts @@ -203,7 +203,6 @@ export { fillna, countna, countValid } from "./notna_isna.ts"; export type { IsnaInput, FillnaOptions, DropnaOptions } from "./notna_isna.ts"; export { strNormalize, - strGetDummies, strExtractAll, strRemovePrefix, strRemoveSuffix, @@ -212,6 +211,8 @@ export { strByteLength, } from "./string_ops.ts"; export type { NormalizeForm, StrInput, ExtractAllOptions } from "./string_ops.ts"; +export { strGetDummies } from "./str_get_dummies.ts"; +export type { StrGetDummiesOptions } from "./str_get_dummies.ts"; export { strSplitExpand, strExtractGroups, diff --git a/src/stats/str_get_dummies.ts b/src/stats/str_get_dummies.ts new file mode 100644 index 00000000..cdda92ec --- /dev/null +++ b/src/stats/str_get_dummies.ts @@ -0,0 +1,129 @@ +/** + * str_get_dummies — split string Series by separator and return a DataFrame of + * binary dummy/indicator variables. + * + * Mirrors `pandas.Series.str.get_dummies(sep='|')`. + * + * Each element is split by `sep`; the unique tokens across all elements become + * columns. A cell is **1** if the token appeared in that row, **0** otherwise. + * Missing values (`null` / `undefined` / `NaN`) contribute no tokens and + * produce a row of all zeros. Columns are sorted lexicographically and the + * original Series index is preserved on the returned DataFrame. + * + * @example + * ```ts + * import { Series, strGetDummies } from "tsb"; + * + * const s = new Series({ data: ["a|b", "b|c", "a"], name: "flags" }); + * const df = strGetDummies(s); + * // DataFrame: + * // a b c + * // 0 1 1 0 + * // 1 0 1 1 + * // 2 1 0 0 + * ``` + * + * @module + */ + +import { DataFrame, Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── Options ───────────────────────────────────────────────────────────────── + +/** Options for {@link strGetDummies}. */ +export interface StrGetDummiesOptions { + /** + * Separator string used to split each element. + * @default "|" + */ + readonly sep?: string; + + /** + * Optional prefix prepended to every column name. + * @default "" + */ + readonly prefix?: string; + + /** + * Separator between the prefix and the token name. + * @default "_" + */ + readonly prefixSep?: string; +} + +// ─── Implementation ─────────────────────────────────────────────────────────── + +/** + * Split each string in `series` by `sep` and return a DataFrame of binary + * dummy/indicator variables — one column per unique token. + * + * Mirrors `pandas.Series.str.get_dummies(sep)`. + * + * @param series A Series whose values are strings (or null/undefined/NaN). + * @param options Options controlling the separator (default `"|"`). + * @returns A DataFrame with the same index as `series` and integer + * (`0`/`1`) columns — one per unique token, sorted + * lexicographically. + * + * @example + * ```ts + * import { Series, strGetDummies } from "tsb"; + * + * const s = new Series({ data: ["a|b", "b|c", null], name: "tags" }); + * const df = strGetDummies(s, { sep: "|" }); + * // a b c + * // 0 1 1 0 + * // 1 0 1 1 + * // 2 0 0 0 + * ``` + */ +export function strGetDummies( + series: Series, + options: StrGetDummiesOptions = {}, +): DataFrame { + const sep = options.sep ?? "|"; + const prefix = options.prefix ?? ""; + const prefixSep = options.prefixSep ?? "_"; + const colName = (token: string): string => + prefix === "" ? token : `${prefix}${prefixSep}${token}`; + const vals = series.values; + const n = vals.length; + + // Collect all unique tokens and per-row token sets. + const tokenSet = new Set(); + const rowTokens: Set[] = new Array>(n); + + for (let i = 0; i < n; i++) { + const v = vals[i]; + const tokens = new Set(); + if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) { + const str = typeof v === "string" ? v : String(v); + if (str !== "") { + for (const tok of str.split(sep)) { + tokens.add(tok); + tokenSet.add(tok); + } + } + } + rowTokens[i] = tokens; + } + + // Sort tokens lexicographically (pandas sorts columns for get_dummies). + const columns = [...tokenSet].sort(); + + // Build one Series per column. Use a Map (rather than a plain object) + // so that lexicographic order is preserved even for integer-like token + // names (plain object keys re-order numeric strings). + const idx = series.index; + const colMap = new Map>(); + for (const col of columns) { + const arr: Scalar[] = new Array(n); + for (let i = 0; i < n; i++) { + arr[i] = rowTokens[i]?.has(col) === true ? 1 : 0; + } + colMap.set(colName(col), new Series({ data: arr, index: idx })); + } + + return new DataFrame(colMap, idx); +} diff --git a/src/stats/string_ops.ts b/src/stats/string_ops.ts index b5b486fb..5a444c50 100644 --- a/src/stats/string_ops.ts +++ b/src/stats/string_ops.ts @@ -10,7 +10,6 @@ * the accessor or better expressed as pure standalone utilities: * * - `strNormalize` — Unicode normalization (NFC / NFD / NFKC / NFKD) - * - `strGetDummies` — split strings by delimiter → one-hot DataFrame * - `strExtractAll` — extract ALL regex matches per element * - `strRemovePrefix` — remove a leading prefix * - `strRemoveSuffix` — remove a trailing suffix @@ -21,7 +20,7 @@ * @module */ -import { DataFrame, Series } from "../core/index.ts"; +import { Series } from "../core/index.ts"; import type { Scalar } from "../types.ts"; // ─── public types ───────────────────────────────────────────────────────────── @@ -32,27 +31,6 @@ export type NormalizeForm = "NFC" | "NFD" | "NFKC" | "NFKD"; /** Input accepted by all string-op functions. */ export type StrInput = Series | readonly Scalar[] | readonly string[] | string; -/** Options for {@link strGetDummies}. */ -export interface StrGetDummiesOptions { - /** - * The delimiter used to split each element into tokens. - * @default "|" - */ - readonly sep?: string; - - /** - * Prefix prepended to every column name in the output DataFrame. - * @default "" - */ - readonly prefix?: string; - - /** - * Separator between the prefix and the token name. - * @default "_" - */ - readonly prefixSep?: string; -} - /** Options for {@link strExtractAll}. */ export interface ExtractAllOptions { /** @@ -138,70 +116,6 @@ export function strNormalize( return buildSeries(data, input); } -// ─── strGetDummies ──────────────────────────────────────────────────────────── - -/** - * Encode each string element as a row in a one-hot DataFrame by splitting on a - * delimiter. - * - * Mirrors `pandas.Series.str.get_dummies(sep)`. - * - * @param input - Series or string array. - * @param options - `sep` (default `"|"`), `prefix` and `prefixSep` for column names. - * @returns A `DataFrame` of 0/1 integer values, one column per unique token. - * - * @example - * ```ts - * const s = new Series({ data: ["a|b", "b|c", "a"] }); - * strGetDummies(s); - * // DataFrame - * // a b c - * // 0 1 1 0 - * // 1 0 1 1 - * // 2 1 0 0 - * ``` - */ -export function strGetDummies( - input: readonly string[] | Series, - options: StrGetDummiesOptions = {}, -): DataFrame { - const sep = options.sep ?? "|"; - const prefix = options.prefix ?? ""; - const prefixSep = options.prefixSep ?? "_"; - - const strs = toStringArray(input); - - // 1. Collect all unique tokens in first-seen order. - const seen = new Set(); - const tokenRows: string[][] = strs.map((s) => { - const tokens = s === "" ? [] : s.split(sep); - for (const t of tokens) { - seen.add(t); - } - return tokens; - }); - - const allTokens = [...seen].sort(); // stable alphabetical order - - // 2. Build column name with optional prefix. - const colName = (token: string): string => - prefix === "" ? token : `${prefix}${prefixSep}${token}`; - - // 3. Build one Scalar[] per column. - const columns: Record = {}; - for (const token of allTokens) { - const name = colName(token); - columns[name] = tokenRows.map((row) => (row.includes(token) ? 1 : 0)); - } - - // 4. Preserve the row index from a Series input. - if (input instanceof Series) { - const rowIndex = input.index; - return DataFrame.fromColumns(columns, { index: rowIndex }); - } - return DataFrame.fromColumns(columns); -} - // ─── strExtractAll ──────────────────────────────────────────────────────────── /** diff --git a/tests/stats/str_get_dummies.test.ts b/tests/stats/str_get_dummies.test.ts new file mode 100644 index 00000000..30f39718 --- /dev/null +++ b/tests/stats/str_get_dummies.test.ts @@ -0,0 +1,304 @@ +/** + * Tests for strGetDummies — pandas.Series.str.get_dummies(sep) port. + */ + +import { describe, expect, test } from "bun:test"; +import * as fc from "fast-check"; +import { DataFrame, Series, strGetDummies } from "../../src/index.ts"; +import type { Scalar } from "../../src/index.ts"; + +function s(data: readonly Scalar[]): Series { + return new Series({ data: [...data] }); +} + +// ─── Basic functionality ────────────────────────────────────────────────────── + +describe("strGetDummies — basic", () => { + test("splits by default pipe separator", () => { + const df = strGetDummies(s(["a|b", "b|c", "a"])); + expect(df).toBeInstanceOf(DataFrame); + expect([...df.columns.values]).toEqual(["a", "b", "c"]); + expect(df.shape).toEqual([3, 3]); + }); + + test("correct indicator values", () => { + const df = strGetDummies(s(["a|b", "b|c", "a"])); + expect([...df.col("a").values]).toEqual([1, 0, 1]); + expect([...df.col("b").values]).toEqual([1, 1, 0]); + expect([...df.col("c").values]).toEqual([0, 1, 0]); + }); + + test("custom separator", () => { + const df = strGetDummies(s(["x,y", "y,z", "x"]), { sep: "," }); + expect([...df.columns.values]).toEqual(["x", "y", "z"]); + expect([...df.col("x").values]).toEqual([1, 0, 1]); + }); + + test("single-value elements", () => { + const df = strGetDummies(s(["a", "b", "c"])); + expect([...df.columns.values]).toEqual(["a", "b", "c"]); + expect([...df.col("a").values]).toEqual([1, 0, 0]); + }); + + test("columns are sorted lexicographically", () => { + const df = strGetDummies(s(["z|a|m", "a|z"])); + expect([...df.columns.values]).toEqual(["a", "m", "z"]); + }); + + test("null values produce all-zero rows", () => { + const df = strGetDummies(s(["a|b", null, "b"])); + expect([...df.col("a").values]).toEqual([1, 0, 0]); + expect([...df.col("b").values]).toEqual([1, 0, 1]); + }); + + test("undefined values produce all-zero rows", () => { + const df = strGetDummies(s(["a", undefined as unknown as Scalar, "b"])); + expect([...df.col("a").values]).toEqual([1, 0, 0]); + expect([...df.col("b").values]).toEqual([0, 0, 1]); + }); + + test("NaN values produce all-zero rows", () => { + const df = strGetDummies(s(["a|b", Number.NaN, "b"])); + expect([...df.col("a").values]).toEqual([1, 0, 0]); + expect([...df.col("b").values]).toEqual([1, 0, 1]); + }); + + test("empty-string elements produce all-zero rows", () => { + const df = strGetDummies(s(["a|b", ""])); + expect([...df.col("a").values]).toEqual([1, 0]); + expect([...df.col("b").values]).toEqual([1, 0]); + }); + + test("all nulls returns empty-column DataFrame with preserved row count", () => { + const df = strGetDummies(s([null, null])); + expect([...df.columns.values]).toEqual([]); + expect(df.shape).toEqual([2, 0]); + }); + + test("empty series returns empty DataFrame", () => { + const df = strGetDummies(s([])); + expect(df.shape).toEqual([0, 0]); + }); + + test("preserves original index", () => { + const ser = new Series({ data: ["a|b", "c"], index: ["r1", "r2"] }); + const df = strGetDummies(ser); + expect([...df.index.values]).toEqual(["r1", "r2"]); + }); + + test("duplicate tokens in same element are counted once", () => { + const df = strGetDummies(s(["a|a|b"])); + expect([...df.col("a").values]).toEqual([1]); + expect([...df.col("b").values]).toEqual([1]); + }); + + test("whitespace tokens are preserved as-is (not stripped)", () => { + const df = strGetDummies(s([" a | b "])); + expect([...df.columns.values]).toEqual([" a ", " b "]); + }); + + test("prefix and prefixSep option", () => { + const df = strGetDummies(s(["x|y"]), { prefix: "tag", prefixSep: "-" }); + expect([...df.columns.values]).toEqual(["tag-x", "tag-y"]); + }); +}); + +// ─── Result shape and types ─────────────────────────────────────────────────── + +describe("strGetDummies — result shape", () => { + test("row count matches input series length", () => { + const df = strGetDummies(s(["a|b", "c", "d|e|f"])); + expect(df.shape[0]).toBe(3); + }); + + test("column count equals unique token count", () => { + const df = strGetDummies(s(["a|b|c", "b|d"])); + expect(df.shape[1]).toBe(4); + }); + + test("all values are 0 or 1", () => { + const df = strGetDummies(s(["x|y", "y|z", "x|z"])); + for (const col of df.columns.values as readonly string[]) { + for (const v of df.col(col).values) { + expect(v === 0 || v === 1).toBe(true); + } + } + }); + + test("sum of row values equals unique-token count per row", () => { + const df = strGetDummies(s(["a|b|c", "a", "b|c"])); + const cols = df.columns.values as readonly string[]; + const rowSums = [0, 1, 2].map((i) => + cols.reduce((acc, col) => { + const v = df.col(col).values[i]; + return acc + (typeof v === "number" ? v : 0); + }, 0), + ); + expect(rowSums).toEqual([3, 1, 2]); + }); +}); + +// ─── Pandas parity examples ─────────────────────────────────────────────────── + +describe("strGetDummies — pandas parity", () => { + test("pandas example: a|b, b|c, a", () => { + // >>> pd.Series(['a|b', 'b|c', 'a']).str.get_dummies() + // a b c + // 0 1 1 0 + // 1 0 1 1 + // 2 1 0 0 + const df = strGetDummies(s(["a|b", "b|c", "a"])); + expect(df.toRecords()).toEqual([ + { a: 1, b: 1, c: 0 }, + { a: 0, b: 1, c: 1 }, + { a: 1, b: 0, c: 0 }, + ]); + }); + + test("pandas example: custom sep comma", () => { + const df = strGetDummies(s(["a,b", "b,c", "a"]), { sep: "," }); + expect(df.toRecords()).toEqual([ + { a: 1, b: 1, c: 0 }, + { a: 0, b: 1, c: 1 }, + { a: 1, b: 0, c: 0 }, + ]); + }); + + test("multi-label tags scenario", () => { + const df = strGetDummies( + s(["python|pandas", "python|numpy", "pandas|numpy|scipy"]), + ); + expect([...df.columns.values]).toEqual(["numpy", "pandas", "python", "scipy"]); + expect([...df.col("python").values]).toEqual([1, 1, 0]); + expect([...df.col("scipy").values]).toEqual([0, 0, 1]); + }); +}); + +// ─── Property-based tests ───────────────────────────────────────────────────── + +describe("strGetDummies — property-based", () => { + test("row count always equals series length", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.string({ maxLength: 10 }), { nil: null }), { + minLength: 1, + maxLength: 20, + }), + (arr) => { + const df = strGetDummies(s(arr as Scalar[])); + return df.shape[0] === arr.length; + }, + ), + ); + }); + + test("all cell values are 0 or 1", () => { + fc.assert( + fc.property( + fc.array( + fc.option( + fc + .array(fc.string({ minLength: 1, maxLength: 5 }), { + minLength: 1, + maxLength: 4, + }) + .map((parts) => parts.join("|")), + { nil: null }, + ), + { minLength: 1, maxLength: 15 }, + ), + (arr) => { + const df = strGetDummies(s(arr as Scalar[])); + for (const col of df.columns.values as readonly string[]) { + for (const v of df.col(col).values) { + if (v !== 0 && v !== 1) { + return false; + } + } + } + return true; + }, + ), + ); + }); + + test("index is preserved", () => { + fc.assert( + fc.property( + fc.array(fc.string({ maxLength: 10 }), { minLength: 1, maxLength: 15 }), + (arr) => { + const ser = s(arr as Scalar[]); + const df = strGetDummies(ser); + const origIdx = [...ser.index.values]; + const dfIdx = [...df.index.values]; + return ( + origIdx.length === dfIdx.length && origIdx.every((v, i) => v === dfIdx[i]) + ); + }, + ), + ); + }); + + test("columns are always sorted lexicographically", () => { + fc.assert( + fc.property( + fc.array( + fc + .array(fc.string({ minLength: 1, maxLength: 5 }), { + minLength: 1, + maxLength: 4, + }) + .map((parts) => parts.join("|")), + { minLength: 1, maxLength: 10 }, + ), + (arr) => { + const df = strGetDummies(s(arr as Scalar[])); + const cols = (df.columns.values as readonly string[]).map(String); + const sorted = [...cols].sort(); + return cols.every((c, i) => c === sorted[i]); + }, + ), + ); + }); + + test("null/undefined/NaN rows always have row-sum 0", () => { + fc.assert( + fc.property( + fc.array( + fc.oneof( + fc.constant(null), + fc.constant(undefined), + fc.constant(Number.NaN), + fc + .array(fc.string({ minLength: 1, maxLength: 4 }), { + minLength: 1, + maxLength: 3, + }) + .map((parts) => parts.join("|")), + ), + { minLength: 1, maxLength: 10 }, + ), + (arr) => { + const df = strGetDummies(s(arr as Scalar[])); + const cols = df.columns.values as readonly string[]; + for (let i = 0; i < arr.length; i++) { + const v = arr[i]; + const isMissing = + v === null || + v === undefined || + (typeof v === "number" && Number.isNaN(v)); + if (!isMissing) { + continue; + } + for (const col of cols) { + if (df.col(col).values[i] !== 0) { + return false; + } + } + } + return true; + }, + ), + ); + }); +}); diff --git a/tests/stats/string_ops.test.ts b/tests/stats/string_ops.test.ts index 0f5b84cc..435af513 100644 --- a/tests/stats/string_ops.test.ts +++ b/tests/stats/string_ops.test.ts @@ -1,6 +1,6 @@ /** * Tests for src/stats/string_ops.ts - * — strNormalize, strGetDummies, strExtractAll, strRemovePrefix, + * — strNormalize, strExtractAll, strRemovePrefix, * strRemoveSuffix, strTranslate, strCharWidth, strByteLength */ import { describe, expect, it } from "bun:test"; @@ -11,7 +11,6 @@ import { strByteLength, strCharWidth, strExtractAll, - strGetDummies, strNormalize, strRemovePrefix, strRemoveSuffix, @@ -78,68 +77,6 @@ describe("strNormalize", () => { }); }); -// ─── strGetDummies ──────────────────────────────────────────────────────────── - -describe("strGetDummies", () => { - it("basic | separator", () => { - const df = strGetDummies(s(["a|b", "b|c", "a"])); - expect(df.shape[0]).toBe(3); - expect([...df.columns.values].sort()).toEqual(["a", "b", "c"]); - expect(df.col("a").values[0]).toBe(1); - expect(df.col("a").values[1]).toBe(0); - expect(df.col("a").values[2]).toBe(1); - expect(df.col("b").values[0]).toBe(1); - expect(df.col("b").values[1]).toBe(1); - expect(df.col("b").values[2]).toBe(0); - expect(df.col("c").values[0]).toBe(0); - expect(df.col("c").values[1]).toBe(1); - expect(df.col("c").values[2]).toBe(0); - }); - - it("custom separator", () => { - const df = strGetDummies(s(["a,b", "b,c"]), { sep: "," }); - expect([...df.columns.values].sort()).toEqual(["a", "b", "c"]); - }); - - it("prefix option", () => { - const df = strGetDummies(s(["x|y"]), { prefix: "tag", prefixSep: "-" }); - expect([...df.columns.values].sort()).toEqual(["tag-x", "tag-y"]); - }); - - it("empty string element maps to no tokens", () => { - const df = strGetDummies(s(["a|b", ""])); - expect(df.col("a").values[1]).toBe(0); - expect(df.col("b").values[1]).toBe(0); - }); - - it("single-token element", () => { - const df = strGetDummies(s(["a", "b", "a"])); - expect(df.shape[0]).toBe(3); - expect(df.col("a").values[0]).toBe(1); - expect(df.col("a").values[1]).toBe(0); - expect(df.col("a").values[2]).toBe(1); - }); - - it("all same token → single column of ones", () => { - const df = strGetDummies(s(["x", "x", "x"])); - expect(df.shape[1]).toBe(1); - expect([...df.col("x").values]).toEqual([1, 1, 1]); - }); - - it("preserves Series index in output rows", () => { - const ser = new Series({ data: ["a|b", "b"] as Scalar[], index: [10, 20] }); - const df = strGetDummies(ser); - expect(df.index.values[0]).toBe(10); - expect(df.index.values[1]).toBe(20); - }); - - it("array input (not Series)", () => { - const df = strGetDummies(["a|b", "c"]); - expect(df.shape[0]).toBe(2); - expect([...df.columns.values].sort()).toEqual(["a", "b", "c"]); - }); -}); - // ─── strExtractAll ──────────────────────────────────────────────────────────── describe("strExtractAll", () => { From 917c7e69a485cd11d6c89b09fa2f59ad59d600a9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 25 Apr 2026 19:14:18 +0000 Subject: [PATCH 3/3] fix: apply biome formatting to str_get_dummies test Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/stats/str_get_dummies.test.ts | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/stats/str_get_dummies.test.ts b/tests/stats/str_get_dummies.test.ts index 30f39718..924f2c1e 100644 --- a/tests/stats/str_get_dummies.test.ts +++ b/tests/stats/str_get_dummies.test.ts @@ -165,9 +165,7 @@ describe("strGetDummies — pandas parity", () => { }); test("multi-label tags scenario", () => { - const df = strGetDummies( - s(["python|pandas", "python|numpy", "pandas|numpy|scipy"]), - ); + const df = strGetDummies(s(["python|pandas", "python|numpy", "pandas|numpy|scipy"])); expect([...df.columns.values]).toEqual(["numpy", "pandas", "python", "scipy"]); expect([...df.col("python").values]).toEqual([1, 1, 0]); expect([...df.col("scipy").values]).toEqual([0, 0, 1]); @@ -231,9 +229,7 @@ describe("strGetDummies — property-based", () => { const df = strGetDummies(ser); const origIdx = [...ser.index.values]; const dfIdx = [...df.index.values]; - return ( - origIdx.length === dfIdx.length && origIdx.every((v, i) => v === dfIdx[i]) - ); + return origIdx.length === dfIdx.length && origIdx.every((v, i) => v === dfIdx[i]); }, ), ); @@ -284,9 +280,7 @@ describe("strGetDummies — property-based", () => { for (let i = 0; i < arr.length; i++) { const v = arr[i]; const isMissing = - v === null || - v === undefined || - (typeof v === "number" && Number.isNaN(v)); + v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)); if (!isMissing) { continue; }