From 509b85e6f5e48907147a2a5085bb616a8c9bfe55 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Tue, 30 Apr 2024 11:36:53 +0530 Subject: [PATCH] Prevent joining on nested arrays and complex types (#16349) #16068 modified DimensionHandlerUtils to accept complex types to be dimensions. This had an unintended side effect of allowing complex types to be joined upon (which wasn't guarded explicitly, it doesn't work). This PR modifies the IndexedTable to reject building the index on the complex types to prevent joining on complex types. The PR adds back the check in the same place, explicitly. --- .../segment/join/table/RowBasedIndexBuilder.java | 6 ++++++ .../sql/calcite/CalciteNestedDataQueryTest.java | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/RowBasedIndexBuilder.java b/processing/src/main/java/org/apache/druid/segment/join/table/RowBasedIndexBuilder.java index 5574f607e83d..732039befc41 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/RowBasedIndexBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/RowBasedIndexBuilder.java @@ -26,6 +26,7 @@ import it.unimi.dsi.fastutil.longs.Long2ObjectMap; import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap; import it.unimi.dsi.fastutil.objects.ObjectIterator; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.ISE; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.column.ColumnType; @@ -62,6 +63,11 @@ public RowBasedIndexBuilder(ColumnType keyType) { this.keyType = keyType; + // Cannot build index on complex types, and non-primitive arrays + if (keyType.is(ValueType.COMPLEX) || keyType.isArray() && !keyType.isPrimitiveArray()) { + throw InvalidInput.exception("Cannot join when the join condition has column of type [%s]", keyType); + } + if (keyType.is(ValueType.LONG)) { // We're specializing the type even though we don't specialize usage in this class, for two reasons: // (1) It's still useful to reduce overall memory footprint. diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index dceddbaff436..997c7172eda3 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -32,6 +32,7 @@ import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.error.DruidException; import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.guice.NestedDataModule; @@ -77,6 +78,7 @@ import org.apache.druid.timeline.partition.LinearShardSpec; import org.hamcrest.CoreMatchers; import org.junit.internal.matchers.ThrowableMessageMatcher; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.Arrays; @@ -5392,6 +5394,19 @@ public void testGroupByRootSingleTypeStringMixed1SparseNotNull() ); } + @Test + public void testJoinOnNestedColumnThrows() + { + DruidException e = Assertions.assertThrows(DruidException.class, () -> { + testQuery( + "SELECT * FROM druid.nested a INNER JOIN druid.nested b ON a.nester = b.nester", + ImmutableList.of(), + ImmutableList.of() + ); + }); + Assertions.assertEquals("Cannot join when the join condition has column of type [COMPLEX]", e.getMessage()); + } + @Test public void testScanStringNotNullCast() {