From 62ee647576b3eff3b30a51926c1f567c1f720ef0 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 27 Apr 2021 10:08:29 -0700 Subject: [PATCH] InDimFilter: Fix cache key computation to avoid collisions. The prior code did not include separation between values, and encoded null ambiguously. This patch fixes both of those issues by encoding strings as length + value instead of just value. I think cache key computation was OK prior to #9800. Prior to that patch, the cache key was computed using CacheKeyBuilder.appendStrings, which encodes strings as UTF-8 and inserts a separator byte (0xff) between them that cannot appear in a UTF-8 stream. --- .../druid/query/filter/InDimFilter.java | 7 ++++- .../druid/query/filter/InDimFilterTest.java | 29 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index ca12c2c19809..e65da3d0780c 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -410,14 +410,19 @@ private byte[] computeCacheKey() { final List sortedValues = new ArrayList<>(values); sortedValues.sort(Comparator.nullsFirst(Ordering.natural())); + + // Hash all values, in sorted order, as their length followed by their content. final Hasher hasher = Hashing.sha256().newHasher(); for (String v : sortedValues) { if (v == null) { - hasher.putInt(0); + // Encode null as length -1, no content. + hasher.putInt(-1); } else { + hasher.putInt(v.length()); hasher.putString(v, StandardCharsets.UTF_8); } } + return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) .appendString(dimension) .appendByte(DimFilterUtils.STRING_SEPARATOR) diff --git a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java index 9cf5b8f67bdb..627bec0499cf 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/InDimFilterTest.java @@ -87,6 +87,19 @@ public void testGetCacheKeyReturningSameKeyForValuesOfDifferentOrders() Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey()); } + @Test + public void testGetCacheKeyForNullVsEmptyString() + { + final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null); + final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("", "abc"), null); + + if (NullHandling.sqlCompatible()) { + Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); + } else { + Assert.assertArrayEquals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()); + } + } + @Test public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfLists() { @@ -95,6 +108,22 @@ public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfLists() Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); } + @Test + public void testGetCacheKeyDifferentKeysForNullAndFourZeroChars() + { + final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null); + final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("\0\0\0\0", "abc"), null); + Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); + } + + @Test + public void testGetCacheKeyDifferentKeysWhenStringBoundariesMove() + { + final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("bar", "foo"), null); + final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("barf", "oo"), null); + Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); + } + @Test public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn() {