From 35227cdeef9d872890540f212ead9ccc6a9438c4 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Sep 2025 09:49:29 +0200 Subject: [PATCH 1/3] Unit test that reproduces hashing bug with numbers --- packages/db-ivm/tests/utils.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/db-ivm/tests/utils.test.ts b/packages/db-ivm/tests/utils.test.ts index a77ddf358..44e3d4680 100644 --- a/packages/db-ivm/tests/utils.test.ts +++ b/packages/db-ivm/tests/utils.test.ts @@ -76,6 +76,8 @@ describe(`hash`, () => { // Same numbers should have same hash expect(hash(42)).toBe(result1) + expect(hash(2.0)).not.toBe(hash(2.5)) + expect(hash(3.14159)).toBe(result4) }) it(`should hash booleans`, () => { From b9497434c84976033838d3c74aa1db5da2a8ae89 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Sep 2025 10:08:52 +0200 Subject: [PATCH 2/3] Fix hashing of numbers by hashing all 8 bytes instead of only 4 bytes --- packages/db-ivm/src/hashing/murmur.ts | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/db-ivm/src/hashing/murmur.ts b/packages/db-ivm/src/hashing/murmur.ts index bb45f533c..a90b9b72c 100644 --- a/packages/db-ivm/src/hashing/murmur.ts +++ b/packages/db-ivm/src/hashing/murmur.ts @@ -20,6 +20,12 @@ export interface Hasher { digest: () => number } +// Allocate these once, outside the hot path +const buf = new ArrayBuffer(8) +// dv and u8 are 2 different views on the same buffer `buf` +const dv = new DataView(buf) +const u8 = new Uint8Array(buf) + /** * This implementation of Murmur hash uses a random initial seed and random markers. * This means that hashes aren't deterministic across app restarts. @@ -82,10 +88,15 @@ export class MurmurHashStream implements Hasher { } return case `number`: - this._writeByte(chunk & 0xff) - this._writeByte((chunk >>> 8) & 0xff) - this._writeByte((chunk >>> 16) & 0xff) - this._writeByte((chunk >>> 24) & 0xff) + dv.setFloat64(0, chunk, true) // fixed little-endian + this._writeByte(u8[0]!) + this._writeByte(u8[1]!) + this._writeByte(u8[2]!) + this._writeByte(u8[3]!) + this._writeByte(u8[4]!) + this._writeByte(u8[5]!) + this._writeByte(u8[6]!) + this._writeByte(u8[7]!) return case `bigint`: { let value = chunk From 27c83430cec4da8d7adfeb8206ec36dbbac197cc Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Sep 2025 10:27:04 +0200 Subject: [PATCH 3/3] Changeset --- .changeset/some-lands-matter.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/some-lands-matter.md diff --git a/.changeset/some-lands-matter.md b/.changeset/some-lands-matter.md new file mode 100644 index 000000000..47bc08f98 --- /dev/null +++ b/.changeset/some-lands-matter.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db-ivm": patch +--- + +Fix bug where different numbers would hash to the same value. This caused distinct not to work properly.