From a902b460fad38730855358fa471f4567ac862e73 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Fri, 29 Mar 2024 11:48:41 -0400 Subject: [PATCH 01/16] GH-40891: [JS] Store Dates as TimestampMillisecond --- js/src/factories.ts | 4 ++-- js/src/type.ts | 2 +- js/src/visitor/get.ts | 6 +++++- js/test/generate-test-data.ts | 5 ++++- js/test/unit/vector/date-vector-tests.ts | 19 ++++++++++++++----- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/js/src/factories.ts b/js/src/factories.ts index aa54498c50b..657ae1b95ab 100644 --- a/js/src/factories.ts +++ b/js/src/factories.ts @@ -65,7 +65,7 @@ export function makeBuilder(option export function vectorFromArray(values: readonly (null | undefined)[], type?: dtypes.Null): Vector; export function vectorFromArray(values: readonly (null | undefined | boolean)[], type?: dtypes.Bool): Vector; export function vectorFromArray = dtypes.Dictionary>(values: readonly (null | undefined | string)[], type?: T): Vector; -export function vectorFromArray(values: readonly (null | undefined | Date)[], type?: T): Vector; +export function vectorFromArray(values: readonly (null | undefined | Date)[], type?: T): Vector; export function vectorFromArray(values: readonly (null | undefined | number)[], type: T): Vector; export function vectorFromArray(values: readonly (null | undefined | bigint)[], type?: T): Vector; export function vectorFromArray(values: readonly (null | undefined | number)[], type?: T): Vector; @@ -145,7 +145,7 @@ function inferType(value: readonly unknown[]): dtypes.DataType { } else if (booleansCount + nullsCount === value.length) { return new dtypes.Bool; } else if (datesCount + nullsCount === value.length) { - return new dtypes.DateMillisecond; + return new dtypes.TimestampMillisecond; } else if (arraysCount + nullsCount === value.length) { const array = value as Array[]; const childType = inferType(array[array.findIndex((ary) => ary != null)]); diff --git a/js/src/type.ts b/js/src/type.ts index ae3aefa0259..2bb330c164b 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -406,7 +406,7 @@ type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisec /** @ignore */ interface Timestamp_ extends DataType { TArray: Int32Array; - TValue: number; + TValue: number | Date; ArrayType: TypedArrayConstructor; } diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 3ab3bcb68c3..425accd1938 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -180,7 +180,11 @@ const getDate = (data: Data, index: number): T['TValue'] => /** @ignore */ const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2); /** @ignore */ -const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2); +const getTimestampMillisecond = ({ values, type }: Data, index: number): T['TValue'] => { + const value = epochMillisecondsLongToMs(values, index * 2); + // js dates are timezone agnostic so we only convert to date if there is no timezome + return type.timezone ? value : epochMillisecondsToDate(value); +}; /** @ignore */ const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2); /** @ignore */ diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index 8e6e47de836..559283680ad 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -415,7 +415,10 @@ function generateTimestamp(this: TestDataVectorGenerator, t type.unit === TimeUnit.MICROSECOND ? 1000000 : type.unit === TimeUnit.MILLISECOND ? 1000 : 1; const data = createTimestamp(length, nullBitmap, multiple, values); - return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; + return { + values: () => type.unit === TimeUnit.MILLISECOND && !type.timezone ? values.map((x) => x == null ? null : new Date(x)) : values, + vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) + }; } function generateTime(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts index f8b4c1c7976..61d50d48715 100644 --- a/js/test/unit/vector/date-vector-tests.ts +++ b/js/test/unit/vector/date-vector-tests.ts @@ -15,10 +15,19 @@ // specific language governing permissions and limitations // under the License. -import { DateDay, DateMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow'; +import { DateDay, DateMillisecond, TimestampMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow'; + +describe(`TimeStampVector`, () => { + test(`Dates are stored in TimestampMillisecond`, () => { + const date = new Date('2023-02-01T12:34:56Z'); + const vec = vectorFromArray([date]); + expect(vec.type).toBeInstanceOf(TimestampMillisecond); + expect(vec.get(0)).toBeInstanceOf(Date); + }); +}); describe(`DateVector`, () => { - it('returns days since the epoch as correct JS Dates', () => { + test(`returns days since the epoch as correct JS Dates`, () => { const table = new Table(RecordBatchReader.from(test_data)); const expectedMillis = expectedMillis32(); const date32 = table.getChildAt(0)!; @@ -28,7 +37,7 @@ describe(`DateVector`, () => { } }); - it('returns millisecond longs since the epoch as correct JS Dates', () => { + test(`returns millisecond longs since the epoch as correct JS Dates`, () => { const table = new Table(RecordBatchReader.from(test_data)); const expectedMillis = expectedMillis64(); const date64 = table.getChildAt(1)!; @@ -38,9 +47,9 @@ describe(`DateVector`, () => { } }); - it('returns the same date that was in the vector', () => { + test(`returns the same date that was in the vector`, () => { const dates = [new Date(1950, 1, 0)]; - const vec = vectorFromArray(dates); + const vec = vectorFromArray(dates, new DateMillisecond()); for (const date of vec) { expect(date).toEqual(dates.shift()); } From 66fb984edf3d1b1ecba3f76eecbce8b48f719c27 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Fri, 29 Mar 2024 11:59:33 -0400 Subject: [PATCH 02/16] Fix type --- js/src/visitor/set.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index 5dc42283c36..f828b4d3685 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -178,13 +178,13 @@ export const setDate = (data: Data, index: number, value: T[ }; /** @ignore */ -export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000); +export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, (value as number) / 1000); /** @ignore */ -export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value); +export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value instanceof Date ? value.getTime() : value); /** @ignore */ -export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value); +export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value as number); /** @ignore */ -export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value); +export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value as number); /* istanbul ignore next */ /** @ignore */ export const setTimestamp = (data: Data, index: number, value: T['TValue']): void => { From 45a8a87037c5dd9fd4d9d307e8c9091be11775e7 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 1 Apr 2024 20:14:35 -0400 Subject: [PATCH 03/16] Don't convert timestamps to dates --- js/src/type.ts | 2 +- js/src/visitor/get.ts | 6 +----- js/src/visitor/set.ts | 8 ++++---- js/test/generate-test-data.ts | 5 +---- js/test/unit/vector/date-vector-tests.ts | 4 ++-- 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/js/src/type.ts b/js/src/type.ts index 2bb330c164b..ae3aefa0259 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -406,7 +406,7 @@ type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisec /** @ignore */ interface Timestamp_ extends DataType { TArray: Int32Array; - TValue: number | Date; + TValue: number; ArrayType: TypedArrayConstructor; } diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 425accd1938..3ab3bcb68c3 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -180,11 +180,7 @@ const getDate = (data: Data, index: number): T['TValue'] => /** @ignore */ const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2); /** @ignore */ -const getTimestampMillisecond = ({ values, type }: Data, index: number): T['TValue'] => { - const value = epochMillisecondsLongToMs(values, index * 2); - // js dates are timezone agnostic so we only convert to date if there is no timezome - return type.timezone ? value : epochMillisecondsToDate(value); -}; +const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2); /** @ignore */ const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2); /** @ignore */ diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index f828b4d3685..5dc42283c36 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -178,13 +178,13 @@ export const setDate = (data: Data, index: number, value: T[ }; /** @ignore */ -export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, (value as number) / 1000); +export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000); /** @ignore */ -export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value instanceof Date ? value.getTime() : value); +export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value); /** @ignore */ -export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value as number); +export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value); /** @ignore */ -export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value as number); +export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value); /* istanbul ignore next */ /** @ignore */ export const setTimestamp = (data: Data, index: number, value: T['TValue']): void => { diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index 559283680ad..8e6e47de836 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -415,10 +415,7 @@ function generateTimestamp(this: TestDataVectorGenerator, t type.unit === TimeUnit.MICROSECOND ? 1000000 : type.unit === TimeUnit.MILLISECOND ? 1000 : 1; const data = createTimestamp(length, nullBitmap, multiple, values); - return { - values: () => type.unit === TimeUnit.MILLISECOND && !type.timezone ? values.map((x) => x == null ? null : new Date(x)) : values, - vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) - }; + return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; } function generateTime(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts index 61d50d48715..e5cd49933ea 100644 --- a/js/test/unit/vector/date-vector-tests.ts +++ b/js/test/unit/vector/date-vector-tests.ts @@ -17,12 +17,12 @@ import { DateDay, DateMillisecond, TimestampMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow'; -describe(`TimeStampVector`, () => { +describe(`TimestampVector`, () => { test(`Dates are stored in TimestampMillisecond`, () => { const date = new Date('2023-02-01T12:34:56Z'); const vec = vectorFromArray([date]); expect(vec.type).toBeInstanceOf(TimestampMillisecond); - expect(vec.get(0)).toBeInstanceOf(Date); + expect(vec.get(0)).toBe(date.valueOf()); }); }); From 55ef9ff516c182587c9e4a25418a3e2eb40cda95 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 19:48:14 -0400 Subject: [PATCH 04/16] Describe why DateMillisecond is bad --- js/src/type.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/js/src/type.ts b/js/src/type.ts index ae3aefa0259..a42552d65ad 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -349,7 +349,19 @@ export class Date_ extends DataType { /** @ignore */ export class DateDay extends Date_ { constructor() { super(DateUnit.DAY); } } -/** @ignore */ +/** + * A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) in milliseconds. + * According to the specification, this should be treated as the number of days, in milliseconds, since the UNIX epoch. + * Therefore, values must be evenly divisible by `86_400_000` (the number of milliseconds in a standard day). + * + * Practically, validation that values of this type are evenly divisible by `86_400_000` is not enforced by this library + * for performance and usability reasons. + * + * Users should prefer to use {@link DateDay} to cleanly represent the number of days. For JS dates, + * {@link TimestampMillisecond} is the preferred type. + * + * @ignore + */ export class DateMillisecond extends Date_ { constructor() { super(DateUnit.MILLISECOND); } } /** @ignore */ From 9c4eefd44531cd980c11071fc8a71ecb8fa4cc8d Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 18:41:46 -0400 Subject: [PATCH 05/16] Timestamps as bigints --- js/src/type.ts | 11 +++++------ js/src/visitor/get.ts | 11 +++++------ js/src/visitor/set.ts | 20 +++++--------------- js/test/generate-test-data.ts | 10 +++------- js/test/unit/vector/date-vector-tests.ts | 2 +- js/test/unit/vector/vector-tests.ts | 6 +++--- 6 files changed, 22 insertions(+), 38 deletions(-) diff --git a/js/src/type.ts b/js/src/type.ts index a42552d65ad..f5bf7edd64b 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -417,9 +417,9 @@ export class TimeNanosecond extends Time_ { constructor() { type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond; /** @ignore */ interface Timestamp_ extends DataType { - TArray: Int32Array; - TValue: number; - ArrayType: TypedArrayConstructor; + TArray: BigInt64Array; + TValue: bigint; + ArrayType: BigIntArrayConstructor; } /** @ignore */ @@ -432,7 +432,7 @@ class Timestamp_ extends DataType { protected static [Symbol.toStringTag] = ((proto: Timestamp_) => { (proto).unit = null; (proto).timezone = null; - (proto).ArrayType = Int32Array; + (proto).ArrayType = BigInt64Array; return proto[Symbol.toStringTag] = 'Timestamp'; })(Timestamp_.prototype); } @@ -483,7 +483,7 @@ type Durations = Type.Duration | Type.DurationSecond | Type.DurationMillisecond export interface Duration extends DataType { TArray: BigInt64Array; TValue: bigint; - ArrayType: BigInt64Array; + ArrayType: BigIntArrayConstructor; } /** @ignore */ @@ -737,7 +737,6 @@ export function strideForType(type: DataType) { const t: any = type; switch (type.typeId) { case Type.Decimal: return (type as Decimal).bitWidth / 32; - case Type.Timestamp: return 2; case Type.Date: return 1 + (t as Date_).unit; case Type.Interval: return 1 + (t as Interval_).unit; // case Type.Int: return 1 + +((t as Int_).bitWidth > 32); diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 3ab3bcb68c3..28d442678f4 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -106,9 +106,8 @@ function wrapGet(fn: (data: Data, _1: any) => any) { } /** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index]; + /** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0); -/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000); -/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000); /** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs); /** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index)); @@ -178,13 +177,13 @@ const getDate = (data: Data, index: number): T['TValue'] => ); /** @ignore */ -const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2); +const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000n * values[index]; /** @ignore */ -const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2); +const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => values[index]; /** @ignore */ -const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2); +const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => values[index] / 1000n; /** @ignore */ -const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2); +const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => values[index] / 1000000n; /* istanbul ignore next */ /** @ignore */ const getTimestamp = (data: Data, index: number): T['TValue'] => { diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index 5dc42283c36..fa2f7ea3799 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -114,16 +114,6 @@ export const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, ep data[index] = Math.floor(epochMs % 4294967296); data[index + 1] = Math.floor(epochMs / 4294967296); }; -/** @ignore */ -export const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => { - data[index] = Math.floor((epochMs * 1000) % 4294967296); - data[index + 1] = Math.floor((epochMs * 1000) / 4294967296); -}; -/** @ignore */ -export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => { - data[index] = Math.floor((epochMs * 1000000) % 4294967296); - data[index + 1] = Math.floor((epochMs * 1000000) / 4294967296); -}; /** @ignore */ export const setVariableWidthBytes = (values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => { @@ -178,16 +168,16 @@ export const setDate = (data: Data, index: number, value: T[ }; /** @ignore */ -export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000); +export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value) / 1000n; }; /** @ignore */ -export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value); +export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value); }; /** @ignore */ -export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value); +export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value) * 1000n; }; /** @ignore */ -export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value); +export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value) * 1000000n; }; /* istanbul ignore next */ /** @ignore */ -export const setTimestamp = (data: Data, index: number, value: T['TValue']): void => { +export const setTimestamp = (data: Data, index: number, value: T['TValue'] | number): void => { switch (data.type.unit) { case TimeUnit.SECOND: return setTimestampSecond(data as Data, index, value); case TimeUnit.MILLISECOND: return setTimestampMillisecond(data as Data, index, value); diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index 8e6e47de836..8be2f8bdad1 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -415,7 +415,7 @@ function generateTimestamp(this: TestDataVectorGenerator, t type.unit === TimeUnit.MICROSECOND ? 1000000 : type.unit === TimeUnit.MILLISECOND ? 1000 : 1; const data = createTimestamp(length, nullBitmap, multiple, values); - return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; + return { values: () => values.map(v => v !== null ? BigInt(v) : v), vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; } function generateTime(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { @@ -767,15 +767,11 @@ function createDate64(length: number, nullBitmap: Uint8Array, values: (number | function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) { const mult = 86400 * multiple; - const data = new Int32Array(length * 2).fill(0); + const data = new BigInt64Array(length); const data32 = createDate32(length, nullBitmap, values); iterateBitmap(length, nullBitmap, (i, valid) => { if (valid) { - const value = data32[i] * mult; - const hi = Math.trunc(value / 4294967296); - const lo = Math.trunc(value - 4294967296 * hi); - data[i * 2 + 0] = lo; - data[i * 2 + 1] = hi; + data[i] = BigInt(data32[i] * mult); } }); return data; diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts index e5cd49933ea..5b201d70ad0 100644 --- a/js/test/unit/vector/date-vector-tests.ts +++ b/js/test/unit/vector/date-vector-tests.ts @@ -22,7 +22,7 @@ describe(`TimestampVector`, () => { const date = new Date('2023-02-01T12:34:56Z'); const vec = vectorFromArray([date]); expect(vec.type).toBeInstanceOf(TimestampMillisecond); - expect(vec.get(0)).toBe(date.valueOf()); + expect(vec.get(0)).toBe(date.getTime()); }); }); diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts index bfcf0d85478..10558b680bd 100644 --- a/js/test/unit/vector/vector-tests.ts +++ b/js/test/unit/vector/vector-tests.ts @@ -250,12 +250,12 @@ describe(`toArray()`, () => { }); test(`when stride is 2`, () => { - let d1 = vectorFromArray([0, 1, 2], new Timestamp(TimeUnit.MILLISECOND)).data[0]; - let d2 = vectorFromArray([3, 4, 5], new Timestamp(TimeUnit.MILLISECOND)).data[0]; + const d1 = vectorFromArray([0, 1, 2], new DateMillisecond()).data[0]; + const d2 = vectorFromArray([3, 4, 5], new DateMillisecond()).data[0]; const vector = new Vector([d1, d2]); - let array = Array.from(vector.toArray()); + const array = Array.from(vector.toArray()); expect(array).toHaveLength(6 * 2); expect(Array.from(array)).toMatchObject([0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0]); }); From 432e1eb6fdb52167084d52f56f4d1bfeaa8b1c23 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 18:48:23 -0400 Subject: [PATCH 06/16] Numbers for timestamps in external interface --- js/src/type.ts | 2 +- js/src/util/bigint.ts | 13 +++++++++++++ js/src/visitor/get.ts | 10 +++++----- js/src/visitor/set.ts | 10 +++++----- js/test/generate-test-data.ts | 2 +- js/test/unit/vector/vector-tests.ts | 2 +- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/js/src/type.ts b/js/src/type.ts index f5bf7edd64b..6aa471a7250 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -418,7 +418,7 @@ type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisec /** @ignore */ interface Timestamp_ extends DataType { TArray: BigInt64Array; - TValue: bigint; + TValue: number; ArrayType: BigIntArrayConstructor; } diff --git a/js/src/util/bigint.ts b/js/src/util/bigint.ts index 5af2f7f0523..470b83f5fba 100644 --- a/js/src/util/bigint.ts +++ b/js/src/util/bigint.ts @@ -24,3 +24,16 @@ export function bigIntToNumber(number: bigint | number): number { } return Number(number); } + +/** + * Duivides the bigint number by the divisor and returns the result as a number. + * Dividing bigints always results in bigints so we don't get the remainder. + * This function gives us the remainder but assumes that the result fits into a number. + * + * @param number The number to divide. + * @param divisor The divisor. + * @returns The result of the division as a number. + */ +export function divideBigInts(number: bigint, divisor: bigint): number { + return bigIntToNumber(number / divisor) + bigIntToNumber(number % divisor) / bigIntToNumber(divisor); +} diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 28d442678f4..71bbbcae541 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -21,7 +21,7 @@ import { Vector } from '../vector.js'; import { Visitor } from '../visitor.js'; import { MapRow } from '../row/map.js'; import { StructRow, StructRowProxy } from '../row/struct.js'; -import { bigIntToNumber } from '../util/bigint.js'; +import { bigIntToNumber, divideBigInts } from '../util/bigint.js'; import { decodeUtf8 } from '../util/utf8.js'; import { TypeToDataType } from '../interfaces.js'; import { uint16ToFloat64 } from '../util/math.js'; @@ -177,13 +177,13 @@ const getDate = (data: Data, index: number): T['TValue'] => ); /** @ignore */ -const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000n * values[index]; +const getTimestampSecond = ({ values }: Data, index: number): T['TValue'] => 1000 * bigIntToNumber(values[index]); /** @ignore */ -const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => values[index]; +const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => bigIntToNumber(values[index]); /** @ignore */ -const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => values[index] / 1000n; +const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], 1000n); /** @ignore */ -const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => values[index] / 1000000n; +const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], 1000000n); /* istanbul ignore next */ /** @ignore */ const getTimestamp = (data: Data, index: number): T['TValue'] => { diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index fa2f7ea3799..c8d4c318948 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -168,16 +168,16 @@ export const setDate = (data: Data, index: number, value: T[ }; /** @ignore */ -export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value) / 1000n; }; +export const setTimestampSecond = ({ values }: Data, index: number, value: T['TValue']): void => { values[index] = BigInt(value / 1000); }; /** @ignore */ -export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value); }; +export const setTimestampMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => { values[index] = BigInt(value); }; /** @ignore */ -export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value) * 1000n; }; +export const setTimestampMicrosecond = ({ values }: Data, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000); }; /** @ignore */ -export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue'] | number): void => { values[index] = BigInt(value) * 1000000n; }; +export const setTimestampNanosecond = ({ values }: Data, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000000); }; /* istanbul ignore next */ /** @ignore */ -export const setTimestamp = (data: Data, index: number, value: T['TValue'] | number): void => { +export const setTimestamp = (data: Data, index: number, value: T['TValue']): void => { switch (data.type.unit) { case TimeUnit.SECOND: return setTimestampSecond(data as Data, index, value); case TimeUnit.MILLISECOND: return setTimestampMillisecond(data as Data, index, value); diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index 8be2f8bdad1..a99c810963c 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -415,7 +415,7 @@ function generateTimestamp(this: TestDataVectorGenerator, t type.unit === TimeUnit.MICROSECOND ? 1000000 : type.unit === TimeUnit.MILLISECOND ? 1000 : 1; const data = createTimestamp(length, nullBitmap, multiple, values); - return { values: () => values.map(v => v !== null ? BigInt(v) : v), vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; + return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; } function generateTime(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts index 10558b680bd..159a98bda7b 100644 --- a/js/test/unit/vector/vector-tests.ts +++ b/js/test/unit/vector/vector-tests.ts @@ -16,7 +16,7 @@ // under the License. import { - Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Timestamp, TimeUnit, Utf8, LargeUtf8, util, Vector, vectorFromArray + Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Utf8, LargeUtf8, util, Vector, vectorFromArray } from 'apache-arrow'; describe(`makeVectorFromArray`, () => { From 0ffb3c1932bde0bd88a6c9d13b2e358f57734955 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 18:56:05 -0400 Subject: [PATCH 07/16] Test input and output explicitly --- js/test/unit/vector/date-vector-tests.ts | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts index 5b201d70ad0..59f30832fe4 100644 --- a/js/test/unit/vector/date-vector-tests.ts +++ b/js/test/unit/vector/date-vector-tests.ts @@ -15,7 +15,10 @@ // specific language governing permissions and limitations // under the License. -import { DateDay, DateMillisecond, TimestampMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow'; +import { + DateDay, DateMillisecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, RecordBatchReader, + Table, vectorFromArray +} from 'apache-arrow'; describe(`TimestampVector`, () => { test(`Dates are stored in TimestampMillisecond`, () => { @@ -24,6 +27,22 @@ describe(`TimestampVector`, () => { expect(vec.type).toBeInstanceOf(TimestampMillisecond); expect(vec.get(0)).toBe(date.getTime()); }); + + test(`Correctly get back TimestampMicrosecond from Date`, () => { + const date = new Date('2023-02-01T12:34:56Z'); + const vec = vectorFromArray([date, 0.5], new TimestampMicrosecond); + expect(vec.type).toBeInstanceOf(TimestampMicrosecond); + expect(vec.get(0)).toBe(date.getTime()); + expect(vec.get(1)).toBe(0.5); + }); + + test(`Correctly get back TimestampNanosecond from Date`, () => { + const date = new Date('2023-02-01T12:34:56Z'); + const vec = vectorFromArray([date, 0.5], new TimestampNanosecond); + expect(vec.type).toBeInstanceOf(TimestampNanosecond); + expect(vec.get(0)).toBe(date.getTime()); + expect(vec.get(1)).toBe(0.5); + }); }); describe(`DateVector`, () => { From ee5ce9da9daf797b2b53d88b510b7a34b3bb006d Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 19:33:00 -0400 Subject: [PATCH 08/16] Only use native iterator for millisecond timestamps because those are the only ones stored correctly --- js/src/visitor/iterator.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts index bf7e9d1591b..54398b0434f 100644 --- a/js/src/visitor/iterator.ts +++ b/js/src/visitor/iterator.ts @@ -17,7 +17,7 @@ import { Vector } from '../vector.js'; import { Visitor } from '../visitor.js'; -import { Type, Precision } from '../enum.js'; +import { Type, Precision, TimeUnit } from '../enum.js'; import { TypeToDataType } from '../interfaces.js'; import { DataType, Dictionary, @@ -101,10 +101,10 @@ function vectorIterator(vector: Vector): IterableIterator // Fast case, defer to native iterators if possible if (vector.nullCount === 0 && vector.stride === 1 && ( - (type.typeId === Type.Timestamp) || - (type instanceof Int && (type as Int).bitWidth !== 64) || - (type instanceof Time && (type as Time).bitWidth !== 64) || - (type instanceof Float && (type as Float).precision !== Precision.HALF) + (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND || + (DataType.isInt(type) && type.bitWidth !== 64) || + (DataType.isTime(type) && type.bitWidth !== 64) || + (DataType.isFloat(type) && type.precision !== Precision.HALF) )) { return new ChunkedIterator(vector.data.length, (chunkIndex) => { const data = vector.data[chunkIndex]; From 22684108a59b8d66afcff014643368b488a906a7 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 19:41:21 -0400 Subject: [PATCH 09/16] fix iterator --- js/src/visitor/iterator.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts index 54398b0434f..80a5323bad8 100644 --- a/js/src/visitor/iterator.ts +++ b/js/src/visitor/iterator.ts @@ -101,7 +101,8 @@ function vectorIterator(vector: Vector): IterableIterator // Fast case, defer to native iterators if possible if (vector.nullCount === 0 && vector.stride === 1 && ( - (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND || + // Don't defer to native iterator for timestamps since Numbers are expected + // (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND || (DataType.isInt(type) && type.bitWidth !== 64) || (DataType.isTime(type) && type.bitWidth !== 64) || (DataType.isFloat(type) && type.precision !== Precision.HALF) From 5e0aa8f0668021e953cc1b4275ad670914a02cbe Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 19:41:51 -0400 Subject: [PATCH 10/16] rm bad test --- js/test/unit/vector/vector-tests.ts | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts index 159a98bda7b..70b24bcc144 100644 --- a/js/test/unit/vector/vector-tests.ts +++ b/js/test/unit/vector/vector-tests.ts @@ -248,17 +248,6 @@ describe(`toArray()`, () => { const array = vector.toArray(); expect(array).toHaveLength(26); }); - - test(`when stride is 2`, () => { - const d1 = vectorFromArray([0, 1, 2], new DateMillisecond()).data[0]; - const d2 = vectorFromArray([3, 4, 5], new DateMillisecond()).data[0]; - - const vector = new Vector([d1, d2]); - - const array = Array.from(vector.toArray()); - expect(array).toHaveLength(6 * 2); - expect(Array.from(array)).toMatchObject([0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0]); - }); }); // Creates some basic tests for the given vector. From 2c726017ba9f4c37d2b1169568bcbcb7784f927c Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2024 23:15:19 -0400 Subject: [PATCH 11/16] Store dates in 32 or 64 bits and don't auto-convert to Dates --- js/src/type.ts | 17 ++++++++++++++--- js/src/visitor/get.ts | 10 ++-------- js/src/visitor/iterator.ts | 2 +- js/src/visitor/set.ts | 7 +------ js/test/generate-test-data.ts | 16 ++++------------ js/test/unit/builders/date-tests.ts | 20 +++++++++----------- js/test/unit/builders/utils.ts | 19 +++++-------------- js/test/unit/vector/date-vector-tests.ts | 6 +++--- js/test/unit/vector/vector-tests.ts | 4 ++-- 9 files changed, 41 insertions(+), 60 deletions(-) diff --git a/js/src/type.ts b/js/src/type.ts index 6aa471a7250..88aadb864ec 100644 --- a/js/src/type.ts +++ b/js/src/type.ts @@ -333,16 +333,28 @@ export class Decimal extends DataType { /** @ignore */ export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond; /** @ignore */ -export interface Date_ extends DataType { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor } +type DateType = { + [Type.Date]: { TArray: Int32Array | BigInt64Array }; + [Type.DateDay]: { TArray: Int32Array }; + [Type.DateMillisecond]: { TArray: BigInt64Array }; +}; +/** @ignore */ +export interface Date_ extends DataType { + TArray: DateType[T]['TArray']; + TValue: number; +} /** @ignore */ export class Date_ extends DataType { constructor(public readonly unit: DateUnit) { super(Type.Date as T); } public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; } + + public get ArrayType() { + return this.unit === DateUnit.DAY ? Int32Array : BigInt64Array; + } protected static [Symbol.toStringTag] = ((proto: Date_) => { (proto).unit = null; - (proto).ArrayType = Int32Array; return proto[Symbol.toStringTag] = 'Date'; })(Date_.prototype); } @@ -737,7 +749,6 @@ export function strideForType(type: DataType) { const t: any = type; switch (type.typeId) { case Type.Decimal: return (type as Decimal).bitWidth / 32; - case Type.Date: return 1 + (t as Date_).unit; case Type.Interval: return 1 + (t as Interval_).unit; // case Type.Int: return 1 + +((t as Int_).bitWidth > 32); // case Type.Time: return 1 + +((t as Time_).bitWidth > 32); diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 71bbbcae541..72fd26dd45e 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -107,12 +107,6 @@ function wrapGet(fn: (data: Data, _1: any) => any) { /** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index]; -/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0); - -/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs); -/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index)); -/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index)); - /** @ignore */ const getNull = (_data: Data, _index: number): T['TValue'] => null; /** @ignore */ @@ -138,9 +132,9 @@ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Floa type Numeric2X = Int64 | Uint64; /** @ignore */ -const getDateDay = ({ values }: Data, index: number): T['TValue'] => epochDaysToDate(values, index); +const getDateDay = ({ values }: Data, index: number): T['TValue'] => epochDaysToMs(values, index); /** @ignore */ -const getDateMillisecond = ({ values }: Data, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2); +const getDateMillisecond = ({ values }: Data, index: number): T['TValue'] => bigIntToNumber(values[index]); /** @ignore */ const getNumeric = ({ stride, values }: Data, index: number): T['TValue'] => values[stride * index]; /** @ignore */ diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts index 80a5323bad8..2a55f743679 100644 --- a/js/src/visitor/iterator.ts +++ b/js/src/visitor/iterator.ts @@ -17,7 +17,7 @@ import { Vector } from '../vector.js'; import { Visitor } from '../visitor.js'; -import { Type, Precision, TimeUnit } from '../enum.js'; +import { Type, Precision } from '../enum.js'; import { TypeToDataType } from '../interfaces.js'; import { DataType, Dictionary, diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index c8d4c318948..5f2a3af4df1 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -109,11 +109,6 @@ function wrapSet(fn: (data: Data, _1: any, _2: any) => vo /** @ignore */ export const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = Math.floor(epochMs / 86400000); }; -/** @ignore */ -export const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => { - data[index] = Math.floor(epochMs % 4294967296); - data[index + 1] = Math.floor(epochMs / 4294967296); -}; /** @ignore */ export const setVariableWidthBytes = (values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => { @@ -151,7 +146,7 @@ export const setAnyFloat = (data: Data, index: number, value /** @ignore */ export const setDateDay = ({ values }: Data, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); }; /** @ignore */ -export const setDateMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); }; +export const setDateMillisecond = ({ values }: Data, index: number, value: T['TValue']): void => { values[index] = BigInt(value); }; /** @ignore */ export const setFixedSizeBinary = ({ stride, values }: Data, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); }; diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index a99c810963c..3796bb3f63c 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -402,10 +402,7 @@ function generateDate(this: TestDataVectorGenerator, type: T, l const data = type.unit === DateUnit.DAY ? createDate32(length, nullBitmap, values) : createDate64(length, nullBitmap, values); - return { - values: () => values.map((x) => x == null ? null : new Date(x)), - vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) - }; + return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) }; } function generateTimestamp(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector { @@ -750,16 +747,11 @@ function createDate32(length: number, nullBitmap: Uint8Array, values: (number | } function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) { - const data = new Int32Array(length * 2).fill(0); + const data = new BigInt64Array(length).fill(0n); const data32 = createDate32(length, nullBitmap, values); iterateBitmap(length, nullBitmap, (i, valid) => { if (valid) { - const value = data32[i] * 86400000; - const hi = Math.trunc(value / 4294967296); - const lo = Math.trunc(value - 4294967296 * hi); - values[i] = value; - data[i * 2 + 0] = lo; - data[i * 2 + 1] = hi; + data[i] = BigInt(data32[i] * 86400000); } }); return data; @@ -767,7 +759,7 @@ function createDate64(length: number, nullBitmap: Uint8Array, values: (number | function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) { const mult = 86400 * multiple; - const data = new BigInt64Array(length); + const data = new BigInt64Array(length).fill(0n); const data32 = createDate32(length, nullBitmap, values); iterateBitmap(length, nullBitmap, (i, valid) => { if (valid) { diff --git a/js/test/unit/builders/date-tests.ts b/js/test/unit/builders/date-tests.ts index 318bb7dfc4f..3fd3f7832f3 100644 --- a/js/test/unit/builders/date-tests.ts +++ b/js/test/unit/builders/date-tests.ts @@ -17,10 +17,8 @@ import 'web-streams-polyfill'; import { - date32sNoNulls, - date32sWithNulls, - date64sNoNulls, - date64sWithNulls, + dateNoNulls, + dateWithNulls, encodeAll, encodeEach, encodeEachDOM, @@ -41,14 +39,14 @@ describe('DateDayBuilder', () => { testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new DateDay(), 25)); testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new DateDay(), 25)); - function runTestsWithEncoder(name: string, encode: (vals: (Date | null)[], nullVals?: any[]) => Promise>) { + function runTestsWithEncoder(name: string, encode: (vals: (number | null)[], nullVals?: any[]) => Promise>) { describe(`${encode.name} ${name}`, () => { it(`encodes dates no nulls`, async () => { - const vals = date32sNoNulls(20); + const vals = dateNoNulls(20); validateVector(vals, await encode(vals, []), []); }); it(`encodes dates with nulls`, async () => { - const vals = date32sWithNulls(20); + const vals = dateWithNulls(20); validateVector(vals, await encode(vals, [null]), [null]); }); }); @@ -63,14 +61,14 @@ describe('DateMillisecondBuilder', () => { testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new DateMillisecond(), 25)); testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new DateMillisecond(), 25)); - function runTestsWithEncoder(name: string, encode: (vals: (Date | null)[], nullVals?: any[]) => Promise>) { + function runTestsWithEncoder(name: string, encode: (vals: (number | null)[], nullVals?: any[]) => Promise>) { describe(`${encode.name} ${name}`, () => { it(`encodes dates no nulls`, async () => { - const vals = date64sNoNulls(20); + const vals = dateNoNulls(20); validateVector(vals, await encode(vals, []), []); }); it(`encodes dates with nulls`, async () => { - const vals = date64sWithNulls(20); + const vals = dateWithNulls(20); validateVector(vals, await encode(vals, [null]), [null]); }); }); @@ -100,7 +98,7 @@ describe('DateMillisecondBuilder with nulls', () => { '2019-03-10T21:15:32.237Z', '2019-03-21T07:25:34.864Z', null - ].map((x) => x === null ? x : new Date(x)); + ].map((x) => x === null ? x : new Date(x).getTime()); it(`encodes dates with nulls`, async () => { const vals = dates.slice(); validateVector(vals, await encode(vals, [null]), [null]); diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts index db4e80d0027..5bf2f26a116 100644 --- a/js/test/unit/builders/utils.ts +++ b/js/test/unit/builders/utils.ts @@ -32,22 +32,14 @@ const randnulls = (values: T[], n: TNull = null) => values export const randomBytes = (length: number) => fillRandom(Uint8Array, length); export const stringsNoNulls = (length = 20) => Array.from({ length }, (_) => randomString(1 + (Math.trunc(Math.random() * 19)))); -export const timestamp32sNoNulls = (length = 20, now = Math.trunc(Date.now() / 86400000)) => +export const timestampNoNulls = (length = 20, now = Math.trunc(Date.now() / 86400000)) => Array.from({ length }, (_) => (Math.trunc(now + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)))) * 86400000); -export const timestamp64sNoNulls = (length = 20, now = Date.now()) => Array.from({ length }, (_) => { - const ms = now + (Math.trunc(rand() * 31557600000 * (rand() > 0.5 ? -1 : 1))); - return new Int32Array([Math.trunc(ms % 4294967296), Math.trunc(ms / 4294967296)]); -}); - -export const timestamp32sWithNulls = (length = 20) => randnulls(timestamp32sNoNulls(length), null); -export const timestamp64sWithNulls = (length = 20) => randnulls(timestamp64sNoNulls(length), null); -export const timestamp32sWithMaxInts = (length = 20) => randnulls(timestamp32sNoNulls(length), 0x7FFFFFFF); -export const timestamp64sWithMaxInts = (length = 20) => randnulls(timestamp64sNoNulls(length), 9223372034707292159n); +export const timestampWithNulls = (length = 20) => randnulls(timestampNoNulls(length), null); export const boolsNoNulls = (length = 20) => Array.from({ length }, () => rand() > 0.5); -export const date32sNoNulls = (length = 20) => timestamp32sNoNulls(length).map((x) => new Date(x)); -export const date64sNoNulls = (length = 20) => timestamp64sNoNulls(length).map((x) => new Date(4294967296 * x[1] + (x[0] >>> 0))); + +export const dateNoNulls = (length = 20) => timestampNoNulls(length); export const int8sNoNulls = (length = 20) => Array.from(new Int8Array(randomBytes(length * Int8Array.BYTES_PER_ELEMENT).buffer)); export const int16sNoNulls = (length = 20) => Array.from(new Int16Array(randomBytes(length * Int16Array.BYTES_PER_ELEMENT).buffer)); export const int32sNoNulls = (length = 20) => Array.from(new Int32Array(randomBytes(length * Int32Array.BYTES_PER_ELEMENT).buffer)); @@ -66,8 +58,7 @@ export const stringsWithNulls = (length = 20) => randnulls(stringsNoNulls(length export const stringsWithEmpties = (length = 20) => randnulls(stringsNoNulls(length), '\0'); export const boolsWithNulls = (length = 20) => randnulls(boolsNoNulls(length), null); -export const date32sWithNulls = (length = 20) => randnulls(date32sNoNulls(length), null); -export const date64sWithNulls = (length = 20) => randnulls(date64sNoNulls(length), null); +export const dateWithNulls = (length = 20) => randnulls(dateNoNulls(length), null); export const int8sWithNulls = (length = 20) => randnulls(int8sNoNulls(length), null); export const int16sWithNulls = (length = 20) => randnulls(int16sNoNulls(length), null); export const int32sWithNulls = (length = 20) => randnulls(int32sNoNulls(length), null); diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts index 59f30832fe4..7b016a2f7fe 100644 --- a/js/test/unit/vector/date-vector-tests.ts +++ b/js/test/unit/vector/date-vector-tests.ts @@ -52,7 +52,7 @@ describe(`DateVector`, () => { const date32 = table.getChildAt(0)!; for (const date of date32) { const millis = expectedMillis.shift(); - expect(date).toEqual(millis === null ? null : new Date(millis!)); + expect(date).toEqual(millis); } }); @@ -62,7 +62,7 @@ describe(`DateVector`, () => { const date64 = table.getChildAt(1)!; for (const date of date64) { const millis = expectedMillis.shift(); - expect(date).toEqual(millis === null ? null : new Date(millis!)); + expect(date).toEqual(millis); } }); @@ -70,7 +70,7 @@ describe(`DateVector`, () => { const dates = [new Date(1950, 1, 0)]; const vec = vectorFromArray(dates, new DateMillisecond()); for (const date of vec) { - expect(date).toEqual(dates.shift()); + expect(date).toEqual(dates.shift()?.getTime()); } }); }); diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts index 70b24bcc144..c47705ddafc 100644 --- a/js/test/unit/vector/vector-tests.ts +++ b/js/test/unit/vector/vector-tests.ts @@ -89,7 +89,7 @@ describe(`DateVector`, () => { new Date(1988, 3, 25, 4, 5, 6), new Date(1987, 2, 24, 7, 8, 9), new Date(2018, 4, 12, 17, 30, 0) - ]; + ].map(v => v.getTime()); const vector = vectorFromArray(values, new DateMillisecond); basicVectorTests(vector, values, extras); }); @@ -100,7 +100,7 @@ describe(`DateVector`, () => { new Date(Date.UTC(1988, 3, 25)), new Date(Date.UTC(1987, 2, 24)), new Date(Date.UTC(2018, 4, 12)) - ]; + ].map(v => v.getTime()); const vector = vectorFromArray(values, new DateDay); basicVectorTests(vector, values, extras); From 1802bc5771280c30176435fb03b5b1b057661fb2 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 4 Apr 2024 16:53:19 -0400 Subject: [PATCH 12/16] Add TimestampSecond test --- js/test/unit/vector/date-vector-tests.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts index 7b016a2f7fe..4a6ffc1a825 100644 --- a/js/test/unit/vector/date-vector-tests.ts +++ b/js/test/unit/vector/date-vector-tests.ts @@ -16,7 +16,7 @@ // under the License. import { - DateDay, DateMillisecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, RecordBatchReader, + DateDay, TimestampSecond, DateMillisecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow'; @@ -28,6 +28,13 @@ describe(`TimestampVector`, () => { expect(vec.get(0)).toBe(date.getTime()); }); + test(`Correctly get back TimestampSecond from Date`, () => { + const date = new Date('2023-02-01T12:34:56Z'); + const vec = vectorFromArray([date], new TimestampSecond); + expect(vec.type).toBeInstanceOf(TimestampSecond); + expect(vec.get(0)).toBe(date.getTime()); + }); + test(`Correctly get back TimestampMicrosecond from Date`, () => { const date = new Date('2023-02-01T12:34:56Z'); const vec = vectorFromArray([date, 0.5], new TimestampMicrosecond); From aca95f0e396a7149c5dffc302bdca8b1cd5a0f84 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sun, 14 Apr 2024 21:16:46 -0400 Subject: [PATCH 13/16] fix merge --- js/test/unit/vector/vector-tests.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts index b2d7e69ef6b..01b7d92eeb1 100644 --- a/js/test/unit/vector/vector-tests.ts +++ b/js/test/unit/vector/vector-tests.ts @@ -16,7 +16,7 @@ // under the License. import { - Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Timestamp, TimeUnit, Utf8, LargeUtf8, util, Vector, vectorFromArray, makeData + Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Utf8, LargeUtf8, util, Vector, vectorFromArray, makeData } from 'apache-arrow'; describe(`makeVectorFromArray`, () => { From 8c86cf93547275a335faa8e018841cccb09918c1 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sun, 14 Apr 2024 22:52:42 -0400 Subject: [PATCH 14/16] generate timestamps with meaningful accuracy --- js/test/generate-test-data.ts | 37 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts index 3796bb3f63c..65719f875c3 100644 --- a/js/test/generate-test-data.ts +++ b/js/test/generate-test-data.ts @@ -646,6 +646,7 @@ type TypedArrayConstructor = const rand = Math.random.bind(Math); +const randSign = () => rand() > 0.5 ? -1 : 1; const randomBytes = (length: number) => fillRandom(Uint8Array, length); const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))(); @@ -658,7 +659,7 @@ function fillRandom(ArrayType: T, length: numbe const BPE = ArrayType.BYTES_PER_ELEMENT; const array = new ArrayType(length); const max = (2 ** (8 * BPE)) - 1; - for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1)); + for (let i = -1; ++i < length; array[i] = rand() * max * randSign()); return array as InstanceType; } @@ -666,7 +667,7 @@ function fillRandomBigInt 0.5 ? -1 : 1))); + for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * randSign())); return array as InstanceType; } @@ -732,6 +733,9 @@ function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offset return bytes; } +/** + * Creates timestamps with the accuracy of days (86400000 millisecond). + */ function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) { const data = new Int32Array(length).fill(Math.trunc(Date.now() / 86400000)); iterateBitmap(length, nullBitmap, (i, valid) => { @@ -739,7 +743,7 @@ function createDate32(length: number, nullBitmap: Uint8Array, values: (number | data[i] = 0; values[i] = null; } else { - data[i] = Math.trunc(data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1))); + data[i] = Math.trunc(data[i] + (rand() * 10000 * randSign())); values[i] = data[i] * 86400000; } }); @@ -747,23 +751,26 @@ function createDate32(length: number, nullBitmap: Uint8Array, values: (number | } function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) { - const data = new BigInt64Array(length).fill(0n); const data32 = createDate32(length, nullBitmap, values); - iterateBitmap(length, nullBitmap, (i, valid) => { - if (valid) { - data[i] = BigInt(data32[i] * 86400000); - } - }); - return data; + return BigInt64Array.from(data32, x => BigInt(x * 86400000)); +} + +function divideBigInts(number: bigint, divisor: bigint): number { + return Number(number / divisor) + Number(number % divisor) / Number(divisor); } function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) { - const mult = 86400 * multiple; const data = new BigInt64Array(length).fill(0n); - const data32 = createDate32(length, nullBitmap, values); + const tenYears = 10 * 365 * 24 * 60 * 60 * multiple; + const now = Math.trunc(Date.now() / 1000 * multiple); iterateBitmap(length, nullBitmap, (i, valid) => { - if (valid) { - data[i] = BigInt(data32[i] * mult); + if (!valid) { + data[i] = 0n; + values[i] = null; + } else { + const value = BigInt(now + Math.trunc(rand() * randSign() * tenYears)); + data[i] = value; + values[i] = divideBigInts(value * 1000n, BigInt(multiple)); } }); return data; @@ -776,7 +783,7 @@ function createTime32(length: number, nullBitmap: Uint8Array, multiple: number, data[i] = 0; values[i] = null; } else { - values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1); + values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * randSign(); } }); return data; From e9788f00fe10e5b5cf51f6136cded51b8bc324bd Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 16 Apr 2024 13:31:10 -0400 Subject: [PATCH 15/16] More range for dates --- js/test/unit/builders/utils.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts index 5bf2f26a116..1d0707a6ca5 100644 --- a/js/test/unit/builders/utils.ts +++ b/js/test/unit/builders/utils.ts @@ -32,14 +32,11 @@ const randnulls = (values: T[], n: TNull = null) => values export const randomBytes = (length: number) => fillRandom(Uint8Array, length); export const stringsNoNulls = (length = 20) => Array.from({ length }, (_) => randomString(1 + (Math.trunc(Math.random() * 19)))); -export const timestampNoNulls = (length = 20, now = Math.trunc(Date.now() / 86400000)) => - Array.from({ length }, (_) => (Math.trunc(now + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)))) * 86400000); - -export const timestampWithNulls = (length = 20) => randnulls(timestampNoNulls(length), null); export const boolsNoNulls = (length = 20) => Array.from({ length }, () => rand() > 0.5); -export const dateNoNulls = (length = 20) => timestampNoNulls(length); +export const dateNoNulls = (length = 20, now = Math.trunc(Date.now() / 86400000)) => + Array.from({ length }, (_) => (Math.trunc(now + (rand() * 100000 * (rand() > 0.5 ? -1 : 1)))) * 86400000); export const int8sNoNulls = (length = 20) => Array.from(new Int8Array(randomBytes(length * Int8Array.BYTES_PER_ELEMENT).buffer)); export const int16sNoNulls = (length = 20) => Array.from(new Int16Array(randomBytes(length * Int16Array.BYTES_PER_ELEMENT).buffer)); export const int32sNoNulls = (length = 20) => Array.from(new Int32Array(randomBytes(length * Int32Array.BYTES_PER_ELEMENT).buffer)); From a03a22733cf6ba2ce45afb1e09f0d1aeabf5fad8 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 16 Apr 2024 13:48:13 -0400 Subject: [PATCH 16/16] fix build on older js --- js/src/visitor/get.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index 72fd26dd45e..ddfc04884f8 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -175,9 +175,9 @@ const getTimestampSecond = ({ values }: Data, inde /** @ignore */ const getTimestampMillisecond = ({ values }: Data, index: number): T['TValue'] => bigIntToNumber(values[index]); /** @ignore */ -const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], 1000n); +const getTimestampMicrosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000)); /** @ignore */ -const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], 1000000n); +const getTimestampNanosecond = ({ values }: Data, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000000)); /* istanbul ignore next */ /** @ignore */ const getTimestamp = (data: Data, index: number): T['TValue'] => {