From a9f8c6b03b609dc3d63d6a77478322e0558bfa22 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 29 Mar 2023 21:39:12 -0700 Subject: [PATCH 1/2] lower segment heap footprint and fix bug with expression type coercion rules --- .../common/task/CompactionTaskTest.java | 3 +- .../common/io/smoosh/SmooshedFileMapper.java | 6 ++- .../math/expr/ExpressionTypeConversion.java | 4 +- .../org/apache/druid/segment/IndexIO.java | 32 +++++++------ .../druid/segment/SimpleQueryableIndex.java | 45 +++++-------------- .../druid/math/expr/OutputTypeTest.java | 28 ++++++++++++ 6 files changed, 65 insertions(+), 53 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index 374adcb43841..01a3f8475d39 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -2129,13 +2129,12 @@ void removeMetadata(File file) file, new SimpleQueryableIndex( index.getDataInterval(), - index.getColumnNames(), index.getAvailableDimensions(), index.getBitmapFactoryForDimensions(), index.getColumns(), index.getFileMapper(), null, - () -> index.getDimensionHandlers() + false ) ); } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java b/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java index dbc01ebf274f..69f25bb84159 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java @@ -48,7 +48,11 @@ */ public class SmooshedFileMapper implements Closeable { - private static final Interner STRING_INTERNER = Interners.newWeakInterner(); + /** + * Interner for smoosh internal files, which includes all column names since very column has an internal file + * associated with it + */ + public static final Interner STRING_INTERNER = Interners.newWeakInterner(); public static SmooshedFileMapper load(File baseDir) throws IOException { diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java index 8a3d6b19d4b3..584a77ed3682 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionTypeConversion.java @@ -88,10 +88,10 @@ public static ExpressionType operator(@Nullable ExpressionType type, @Nullable E return type; } if (type.is(ExprType.COMPLEX) || other.is(ExprType.COMPLEX)) { - if (type.getElementType() == null) { + if (type.getComplexTypeName() == null) { return other; } - if (other.getElementType() == null) { + if (other.getComplexTypeName() == null) { return type; } if (!Objects.equals(type, other)) { diff --git a/processing/src/main/java/org/apache/druid/segment/IndexIO.java b/processing/src/main/java/org/apache/druid/segment/IndexIO.java index 9b74f71768bd..5477e8b616fb 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexIO.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexIO.java @@ -60,7 +60,9 @@ import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.ImmutableRTreeObjectStrategy; +import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.IndexedIterable; +import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.data.VSizeColumnarMultiInts; import org.apache.druid.segment.serde.ComplexColumnPartSupplier; import org.apache.druid.segment.serde.DictionaryEncodedColumnSupplier; @@ -639,14 +641,12 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen loadFailed ); - final GenericIndexed finalCols, finalDims; + final Indexed finalCols, finalDims; if (allCols != null) { // To restore original column order, we merge allCols/allDims and nonNullCols/nonNullDims, respectively. - final List mergedCols = restoreColumns(nonNullCols, allCols); - final List mergedDims = restoreColumns(nonNullDims, allDims); - finalCols = GenericIndexed.fromIterable(mergedCols, GenericIndexed.STRING_STRATEGY); - finalDims = GenericIndexed.fromIterable(mergedDims, GenericIndexed.STRING_STRATEGY); + finalCols = new ListIndexed<>(restoreColumns(nonNullCols, allCols)); + finalDims = new ListIndexed<>(restoreColumns(nonNullDims, allDims)); } else { finalCols = nonNullCols; finalDims = nonNullDims; @@ -701,9 +701,9 @@ private List restoreColumns(GenericIndexed nonNullCols, GenericI + "while allColsIterator expects one. This is likely a potential bug in creating this segment. " + "Try reingesting your data with storeEmptyColumns setting to false in task context." ); - mergedCols.add(nonNullColsIterator.next()); + mergedCols.add(SmooshedFileMapper.STRING_INTERNER.intern(nonNullColsIterator.next())); } else { - mergedCols.add(next); + mergedCols.add(SmooshedFileMapper.STRING_INTERNER.intern(next)); } } @@ -712,7 +712,7 @@ private List restoreColumns(GenericIndexed nonNullCols, GenericI private void registerColumnHolders( File inDir, - GenericIndexed cols, + Indexed cols, boolean lazy, Map> columns, ObjectMapper mapper, @@ -726,7 +726,7 @@ private void registerColumnHolders( continue; } - ByteBuffer colBuffer = smooshedFiles.mapFile(columnName); + final ByteBuffer colBuffer = smooshedFiles.mapFile(columnName); registerColumnHolder( lazy, columns, @@ -749,12 +749,16 @@ private void registerColumnHolder( SegmentLazyLoadFailCallback loadFailed ) throws IOException { + + // we use the interner here too even though it might have already been added by restoreColumns(..) because that + // only happens if there are some null columns + final String internedColumnName = SmooshedFileMapper.STRING_INTERNER.intern(columnName); if (lazy) { - columns.put(columnName, Suppliers.memoize( + columns.put(internedColumnName, Suppliers.memoize( () -> { try { return deserializeColumn( - columnName, + internedColumnName, mapper, colBuffer, smooshedFiles @@ -768,13 +772,13 @@ private void registerColumnHolder( } )); } else { - ColumnHolder columnHolder = deserializeColumn( - columnName, + final ColumnHolder columnHolder = deserializeColumn( + internedColumnName, mapper, colBuffer, smooshedFiles ); - columns.put(columnName, () -> columnHolder); + columns.put(internedColumnName, () -> columnHolder); } } diff --git a/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java b/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java index ea009a85a667..1c1e0b614850 100644 --- a/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java @@ -83,40 +83,6 @@ public SimpleQueryableIndex( } } - private Map initDimensionHandlers(Indexed availableDimensions) - { - Map dimensionHandlerMap = Maps.newLinkedHashMap(); - for (String dim : availableDimensions) { - final ColumnHolder columnHolder = getColumnHolder(dim); - ColumnCapabilities capabilities = columnHolder.getHandlerCapabilities(); - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); - dimensionHandlerMap.put(dim, handler); - } - return dimensionHandlerMap; - } - - @VisibleForTesting - public SimpleQueryableIndex( - Interval interval, - List columnNames, - Indexed availableDimensions, - BitmapFactory bitmapFactory, - Map> columns, - SmooshedFileMapper fileMapper, - @Nullable Metadata metadata, - Supplier> dimensionHandlers - ) - { - this.dataInterval = interval; - this.columnNames = columnNames; - this.availableDimensions = availableDimensions; - this.bitmapFactory = bitmapFactory; - this.columns = columns; - this.fileMapper = fileMapper; - this.metadata = metadata; - this.dimensionHandlers = dimensionHandlers; - } - @Override public Interval getDataInterval() { @@ -193,4 +159,15 @@ public Map getDimensionHandlers() return dimensionHandlers.get(); } + private Map initDimensionHandlers(Indexed availableDimensions) + { + Map dimensionHandlerMap = Maps.newLinkedHashMap(); + for (String dim : availableDimensions) { + final ColumnHolder columnHolder = getColumnHolder(dim); + ColumnCapabilities capabilities = columnHolder.getHandlerCapabilities(); + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); + dimensionHandlerMap.put(dim, handler); + } + return dimensionHandlerMap; + } } diff --git a/processing/src/test/java/org/apache/druid/math/expr/OutputTypeTest.java b/processing/src/test/java/org/apache/druid/math/expr/OutputTypeTest.java index 00682b9632d8..eeedef5fd21c 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/OutputTypeTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/OutputTypeTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Rule; @@ -537,6 +538,20 @@ public void testOperatorAutoConversion() ExpressionType.STRING_ARRAY, ExpressionTypeConversion.operator(ExpressionType.STRING_ARRAY, ExpressionType.STRING_ARRAY) ); + + ExpressionType nested = ExpressionType.fromColumnType(NestedDataComplexTypeSerde.TYPE); + Assert.assertEquals( + nested, + ExpressionTypeConversion.operator(nested, nested) + ); + Assert.assertEquals( + nested, + ExpressionTypeConversion.operator(nested, ExpressionType.UNKNOWN_COMPLEX) + ); + Assert.assertEquals( + nested, + ExpressionTypeConversion.operator(ExpressionType.UNKNOWN_COMPLEX, nested) + ); } @Test @@ -601,6 +616,19 @@ public void testFunctionAutoConversion() ExpressionType.STRING_ARRAY, ExpressionTypeConversion.function(ExpressionType.STRING_ARRAY, ExpressionType.STRING_ARRAY) ); + ExpressionType nested = ExpressionType.fromColumnType(NestedDataComplexTypeSerde.TYPE); + Assert.assertEquals( + nested, + ExpressionTypeConversion.function(nested, nested) + ); + Assert.assertEquals( + nested, + ExpressionTypeConversion.function(nested, ExpressionType.UNKNOWN_COMPLEX) + ); + Assert.assertEquals( + nested, + ExpressionTypeConversion.function(ExpressionType.UNKNOWN_COMPLEX, nested) + ); } @Test From a349472a9f8f579fe9d85cb951f0d17f12ce38de Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 31 Mar 2023 00:47:18 -0700 Subject: [PATCH 2/2] Update SmooshedFileMapper.java --- .../druid/java/util/common/io/smoosh/SmooshedFileMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java b/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java index 69f25bb84159..a7c92c595903 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapper.java @@ -49,7 +49,7 @@ public class SmooshedFileMapper implements Closeable { /** - * Interner for smoosh internal files, which includes all column names since very column has an internal file + * Interner for smoosh internal files, which includes all column names since every column has an internal file * associated with it */ public static final Interner STRING_INTERNER = Interners.newWeakInterner();