From 408e34348289c4273c3b42c91ac403e244e5e355 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 13 Aug 2024 08:49:09 +0530 Subject: [PATCH 01/21] String array frame reader --- .../columnar/StringFrameColumnReader.java | 11 ++++---- .../operator/window/RowsAndColumnsHelper.java | 6 +++++ ...ualColumnEvaluationRowsAndColumnsTest.java | 26 ++++++++++++++----- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index d9fb9d83a9f4..5a0c9dfa6493 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -92,17 +92,18 @@ public Column readRACColumn(Frame frame) final Memory memory = frame.region(columnNumber); validate(memory); - final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), false); - final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), false); + final boolean multiValue = isMultiValue(memory); + final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), multiValue); + final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), multiValue); StringFrameColumn frameCol = new StringFrameColumn( frame, - false, + multiValue, memory, positionOfLengths, positionOfPayloads, - asArray || isMultiValue(memory) // Read MVDs as String arrays + asArray || multiValue // Read MVDs as String arrays ); return new ColumnAccessorBasedColumn(frameCol); @@ -397,7 +398,7 @@ public int numRows() @Override protected Object getVal(int rowNum) { - return getString(frame.physicalRow(rowNum)); + return getRowAsObject(frame.physicalRow(rowNum), true); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java b/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java index 2636156b53cc..dc73e4f7ca51 100644 --- a/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java +++ b/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java @@ -280,6 +280,12 @@ public void validate(String msgBase, Column col) } else { Assert.assertEquals(msg, ((Long) expectedVal).longValue(), accessor.getLong(i)); } + } else if (expectedVal instanceof Object[]) { + if (expectedNulls[i]) { + Assert.assertEquals(msg, 0, accessor.getObject(i)); + } else { + Assert.assertArrayEquals(msg, (Object[]) expectedVals[i], (Object[]) accessor.getObject(i)); + } } else { if (expectedNulls[i]) { Assert.assertNull(msg, accessor.getObject(i)); diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java index c26508694d40..cacaa85b04bf 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java @@ -49,10 +49,10 @@ public TestVirtualColumnEvaluationRowsAndColumnsTest(String name, Function Date: Tue, 13 Aug 2024 12:06:51 +0530 Subject: [PATCH 02/21] separate string array frame reader --- .../read/columnar/FrameColumnReaders.java | 2 +- .../columnar/StringFrameColumnReader.java | 87 ++++++++++++++++--- 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java index 9b4dc85cb1e0..8e34b4c25ece 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java @@ -59,7 +59,7 @@ public static FrameColumnReader create( case ARRAY: switch (columnType.getElementType().getType()) { case STRING: - return new StringFrameColumnReader(columnNumber, true); + return new StringArrayFrameColumnReader(columnNumber); case LONG: return new LongArrayFrameColumnReader(columnNumber); case FLOAT: diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 5a0c9dfa6493..7726473bc0c0 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -71,7 +71,7 @@ */ public class StringFrameColumnReader implements FrameColumnReader { - private final int columnNumber; + final int columnNumber; private final boolean asArray; /** @@ -103,7 +103,7 @@ public Column readRACColumn(Frame frame) memory, positionOfLengths, positionOfPayloads, - asArray || multiValue // Read MVDs as String arrays + multiValue // Read MVDs as String arrays ); return new ColumnAccessorBasedColumn(frameCol); @@ -121,10 +121,10 @@ public ColumnPlus readColumn(final Frame frame) final BaseColumn baseColumn; - if (asArray) { + if (multiValue) { baseColumn = new StringArrayFrameColumn( frame, - multiValue, + true, memory, startOfStringLengthSection, startOfStringDataSection @@ -132,7 +132,7 @@ public ColumnPlus readColumn(final Frame frame) } else { baseColumn = new StringFrameColumn( frame, - multiValue, + false, memory, startOfStringLengthSection, startOfStringDataSection, @@ -143,7 +143,7 @@ public ColumnPlus readColumn(final Frame frame) return new ColumnPlus( baseColumn, new ColumnCapabilitiesImpl().setType(asArray ? ColumnType.STRING_ARRAY : ColumnType.STRING) - .setHasMultipleValues(!asArray && multiValue) + .setHasMultipleValues(multiValue) .setDictionaryEncoded(false) .setHasBitmapIndexes(false) .setHasSpatialIndexes(false) @@ -152,7 +152,7 @@ public ColumnPlus readColumn(final Frame frame) ); } - private void validate(final Memory region) + void validate(final Memory region) { // Check if column is big enough for a header if (region.getCapacity() < StringFrameColumnWriter.DATA_OFFSET) { @@ -171,7 +171,7 @@ private void validate(final Memory region) } } - private static boolean isMultiValue(final Memory memory) + static boolean isMultiValue(final Memory memory) { return memory.getByte(1) == 1; } @@ -181,7 +181,7 @@ private static long getStartOfCumulativeLengthSection() return StringFrameColumnWriter.DATA_OFFSET; } - private static long getStartOfStringLengthSection( + static long getStartOfStringLengthSection( final int numRows, final boolean multiValue ) @@ -193,7 +193,7 @@ private static long getStartOfStringLengthSection( } } - private static long getStartOfStringDataSection( + static long getStartOfStringDataSection( final Memory memory, final int numRows, final boolean multiValue @@ -232,7 +232,7 @@ static class StringFrameColumn extends ObjectColumnAccessorBase implements Dicti */ private final boolean asArray; - private StringFrameColumn( + StringFrameColumn( Frame frame, boolean multiValue, Memory memory, @@ -678,7 +678,7 @@ static class StringArrayFrameColumn implements BaseColumn { private final StringFrameColumn delegate; - private StringArrayFrameColumn( + StringArrayFrameColumn( Frame frame, boolean multiValue, Memory memory, @@ -710,3 +710,66 @@ public void close() } } } + +class StringArrayFrameColumnReader extends StringFrameColumnReader +{ + StringArrayFrameColumnReader(int columnNumber) + { + super(columnNumber, true); + } + + @Override + public Column readRACColumn(Frame frame) + { + final Memory memory = frame.region(columnNumber); + validate(memory); + + // we expect the memory to be stored as if there are multi values + assert isMultiValue(memory); + final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), true); + final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), true); + + StringFrameColumn frameCol = + new StringFrameColumn( + frame, + true, + memory, + positionOfLengths, + positionOfPayloads, + true + ); + + return new ColumnAccessorBasedColumn(frameCol); + } + + @Override + public ColumnPlus readColumn(final Frame frame) + { + final Memory memory = frame.region(columnNumber); + validate(memory); + + // we expect the memory to be stored as if there are multi values + assert isMultiValue(memory); + final long startOfStringLengthSection = getStartOfStringLengthSection(frame.numRows(), true); + final long startOfStringDataSection = getStartOfStringDataSection(memory, frame.numRows(), true); + + final BaseColumn baseColumn = new StringArrayFrameColumn( + frame, + true, + memory, + startOfStringLengthSection, + startOfStringDataSection + ); + + return new ColumnPlus( + baseColumn, + new ColumnCapabilitiesImpl().setType(ColumnType.STRING_ARRAY) + .setHasMultipleValues(false) + .setDictionaryEncoded(false) + .setHasBitmapIndexes(false) + .setHasSpatialIndexes(false) + .setHasNulls(ColumnCapabilities.Capable.UNKNOWN), + frame.numRows() + ); + } +} From 2a617bed447dced787ed2c2fc546dc64c132e842 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 13 Aug 2024 21:48:34 +0530 Subject: [PATCH 03/21] fix --- .../columnar/StringFrameColumnReader.java | 69 ++++++++----------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 7726473bc0c0..31f8112863ab 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -67,7 +67,7 @@ import java.util.List; /** - * Reader for {@link StringFrameColumnWriter}, types {@link ColumnType#STRING} and {@link ColumnType#STRING_ARRAY}. + * Reader for {@link StringFrameColumnWriter}, type {@link ColumnType#STRING}. */ public class StringFrameColumnReader implements FrameColumnReader { @@ -96,15 +96,14 @@ public Column readRACColumn(Frame frame) final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), multiValue); final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), multiValue); - StringFrameColumn frameCol = - new StringFrameColumn( - frame, - multiValue, - memory, - positionOfLengths, - positionOfPayloads, - multiValue // Read MVDs as String arrays - ); + StringFrameColumn frameCol = new StringFrameColumn( + frame, + multiValue, + memory, + positionOfLengths, + positionOfPayloads, + multiValue // Read MVDs as String arrays + ); return new ColumnAccessorBasedColumn(frameCol); } @@ -119,30 +118,18 @@ public ColumnPlus readColumn(final Frame frame) final long startOfStringLengthSection = getStartOfStringLengthSection(frame.numRows(), multiValue); final long startOfStringDataSection = getStartOfStringDataSection(memory, frame.numRows(), multiValue); - final BaseColumn baseColumn; - - if (multiValue) { - baseColumn = new StringArrayFrameColumn( - frame, - true, - memory, - startOfStringLengthSection, - startOfStringDataSection - ); - } else { - baseColumn = new StringFrameColumn( - frame, - false, - memory, - startOfStringLengthSection, - startOfStringDataSection, - false - ); - } + final BaseColumn baseColumn = new StringFrameColumn( + frame, + multiValue, + memory, + startOfStringLengthSection, + startOfStringDataSection, + false + ); return new ColumnPlus( baseColumn, - new ColumnCapabilitiesImpl().setType(asArray ? ColumnType.STRING_ARRAY : ColumnType.STRING) + new ColumnCapabilitiesImpl().setType(ColumnType.STRING) .setHasMultipleValues(multiValue) .setDictionaryEncoded(false) .setHasBitmapIndexes(false) @@ -711,6 +698,9 @@ public void close() } } +/** + * Reader for {@link ColumnType#STRING_ARRAY}. + */ class StringArrayFrameColumnReader extends StringFrameColumnReader { StringArrayFrameColumnReader(int columnNumber) @@ -729,15 +719,14 @@ public Column readRACColumn(Frame frame) final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), true); final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), true); - StringFrameColumn frameCol = - new StringFrameColumn( - frame, - true, - memory, - positionOfLengths, - positionOfPayloads, - true - ); + StringFrameColumn frameCol = new StringFrameColumn( + frame, + true, + memory, + positionOfLengths, + positionOfPayloads, + true + ); return new ColumnAccessorBasedColumn(frameCol); } From 2df2d86ee3f9497fabccba96e441792f9199c757 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 13 Aug 2024 22:10:13 +0530 Subject: [PATCH 04/21] refactor --- .../read/columnar/FrameColumnReaders.java | 2 +- .../columnar/StringFrameColumnReader.java | 30 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java index 8e34b4c25ece..6ceb6b729176 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/FrameColumnReaders.java @@ -51,7 +51,7 @@ public static FrameColumnReader create( return new DoubleFrameColumnReader(columnNumber); case STRING: - return new StringFrameColumnReader(columnNumber, false); + return new StringFrameColumnReader(columnNumber); case COMPLEX: return new ComplexFrameColumnReader(columnNumber); diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 31f8112863ab..41698e188268 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -72,18 +72,15 @@ public class StringFrameColumnReader implements FrameColumnReader { final int columnNumber; - private final boolean asArray; /** * Create a new reader. * * @param columnNumber column number - * @param asArray true for {@link ColumnType#STRING_ARRAY}, false for {@link ColumnType#STRING} */ - StringFrameColumnReader(int columnNumber, boolean asArray) + StringFrameColumnReader(int columnNumber) { this.columnNumber = columnNumber; - this.asArray = asArray; } @Override @@ -139,7 +136,7 @@ public ColumnPlus readColumn(final Frame frame) ); } - void validate(final Memory region) + private void validate(final Memory region) { // Check if column is big enough for a header if (region.getCapacity() < StringFrameColumnWriter.DATA_OFFSET) { @@ -147,7 +144,7 @@ void validate(final Memory region) } final byte typeCode = region.getByte(0); - final byte expectedTypeCode = asArray ? FrameColumnWriters.TYPE_STRING_ARRAY : FrameColumnWriters.TYPE_STRING; + final byte expectedTypeCode = FrameColumnWriters.TYPE_STRING; if (typeCode != expectedTypeCode) { throw DruidException.defensive( "Column[%s] does not have the correct type code; expected[%s], got[%s]", @@ -705,7 +702,7 @@ class StringArrayFrameColumnReader extends StringFrameColumnReader { StringArrayFrameColumnReader(int columnNumber) { - super(columnNumber, true); + super(columnNumber); } @Override @@ -761,4 +758,23 @@ public ColumnPlus readColumn(final Frame frame) frame.numRows() ); } + + private void validate(final Memory region) + { + // Check if column is big enough for a header + if (region.getCapacity() < StringFrameColumnWriter.DATA_OFFSET) { + throw DruidException.defensive("Column[%s] is not big enough for a header", columnNumber); + } + + final byte typeCode = region.getByte(0); + final byte expectedTypeCode = FrameColumnWriters.TYPE_STRING_ARRAY; + if (typeCode != expectedTypeCode) { + throw DruidException.defensive( + "Column[%s] does not have the correct type code; expected[%s], got[%s]", + columnNumber, + expectedTypeCode, + typeCode + ); + } + } } From 8d6ddeef8b509f74c7f108183a1954d3861eca45 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Wed, 14 Aug 2024 08:09:59 +0530 Subject: [PATCH 05/21] test fix --- .../query/operator/window/RowsAndColumnsHelper.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java b/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java index dc73e4f7ca51..0a10ebc118c8 100644 --- a/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java +++ b/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java @@ -29,6 +29,7 @@ import org.apache.druid.segment.column.ColumnType; import org.junit.Assert; +import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; @@ -281,10 +282,15 @@ public void validate(String msgBase, Column col) Assert.assertEquals(msg, ((Long) expectedVal).longValue(), accessor.getLong(i)); } } else if (expectedVal instanceof Object[]) { + Object actualVal = accessor.getObject(i); if (expectedNulls[i]) { - Assert.assertEquals(msg, 0, accessor.getObject(i)); + Assert.assertEquals(msg, 0, actualVal); } else { - Assert.assertArrayEquals(msg, (Object[]) expectedVals[i], (Object[]) accessor.getObject(i)); + if (actualVal instanceof ArrayList) { + Assert.assertArrayEquals(msg, (Object[]) expectedVals[i], ((ArrayList) actualVal).toArray()); + } else { + Assert.assertArrayEquals(msg, (Object[]) expectedVals[i], (Object[]) actualVal); + } } } else { if (expectedNulls[i]) { From ccd1ba7592af8845e5698229694a9a2c63833778 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Fri, 16 Aug 2024 16:57:05 +0530 Subject: [PATCH 06/21] refactor --- .../StringArrayFrameColumnReader.java | 183 ++++++++++++++++++ .../columnar/StringFrameColumnReader.java | 140 +------------- .../operator/window/RowsAndColumnsHelper.java | 2 +- ...t.java => EvaluateRowsAndColumnsTest.java} | 4 +- 4 files changed, 193 insertions(+), 136 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java rename processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/{TestVirtualColumnEvaluationRowsAndColumnsTest.java => EvaluateRowsAndColumnsTest.java} (94%) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java new file mode 100644 index 000000000000..e5c3414592a3 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.read.columnar; + +import org.apache.datasketches.memory.Memory; +import org.apache.druid.error.DruidException; +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.write.columnar.FrameColumnWriters; +import org.apache.druid.frame.write.columnar.StringFrameColumnWriter; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.column.ColumnAccessorBasedColumn; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.BaseColumn; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.ReadableOffset; + +/** + * Reader for {@link ColumnType#STRING_ARRAY}. + */ +public class StringArrayFrameColumnReader implements FrameColumnReader +{ + final int columnNumber; + + /** + * Create a new reader. + * + * @param columnNumber column number + */ + StringArrayFrameColumnReader(int columnNumber) + { + this.columnNumber = columnNumber; + } + + @Override + public Column readRACColumn(Frame frame) + { + final Memory memory = frame.region(columnNumber); + validate(memory); + + // String arrays always store multiple values per row. + assert isMultiValue(memory); + final long positionOfLengths = getStartOfStringLengthSection(frame.numRows()); + final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows()); + + StringFrameColumnReader.StringFrameColumn frameCol = new StringFrameColumnReader.StringFrameColumn( + frame, + true, + memory, + positionOfLengths, + positionOfPayloads, + true + ); + + return new ColumnAccessorBasedColumn(frameCol); + } + + @Override + public ColumnPlus readColumn(final Frame frame) + { + final Memory memory = frame.region(columnNumber); + validate(memory); + + // String arrays always store multiple values per row. + assert isMultiValue(memory); + final long startOfStringLengthSection = getStartOfStringLengthSection(frame.numRows()); + final long startOfStringDataSection = getStartOfStringDataSection(memory, frame.numRows()); + + final BaseColumn baseColumn = new StringArrayFrameColumn( + frame, + memory, + startOfStringLengthSection, + startOfStringDataSection + ); + + return new ColumnPlus( + baseColumn, + new ColumnCapabilitiesImpl().setType(ColumnType.STRING_ARRAY) + .setHasMultipleValues(false) + .setDictionaryEncoded(false), + frame.numRows() + ); + } + + private void validate(final Memory region) + { + // Check if column is big enough for a header + if (region.getCapacity() < StringFrameColumnWriter.DATA_OFFSET) { + throw DruidException.defensive("Column[%s] is not big enough for a header", columnNumber); + } + + final byte typeCode = region.getByte(0); + if (typeCode != FrameColumnWriters.TYPE_STRING_ARRAY) { + throw DruidException.defensive( + "Column[%s] does not have the correct type code; expected[%s], got[%s]", + columnNumber, + FrameColumnWriters.TYPE_STRING_ARRAY, + typeCode + ); + } + } + + static boolean isMultiValue(final Memory memory) + { + return memory.getByte(1) == 1; + } + + private static long getStartOfCumulativeLengthSection() + { + return StringFrameColumnWriter.DATA_OFFSET; + } + + private static long getStartOfStringLengthSection(final int numRows) + { + return StringFrameColumnWriter.DATA_OFFSET + (long) Integer.BYTES * numRows; + } + + private long getStartOfStringDataSection( + final Memory memory, + final int numRows + ) + { + final int totalNumValues = FrameColumnReaderUtils.getAdjustedCumulativeRowLength( + memory, + getStartOfCumulativeLengthSection(), + numRows - 1 + ); + + return getStartOfStringLengthSection(numRows) + (long) Integer.BYTES * totalNumValues; + } + + private static class StringArrayFrameColumn implements BaseColumn + { + private final StringFrameColumnReader.StringFrameColumn delegate; + + StringArrayFrameColumn( + Frame frame, + Memory memory, + long startOfStringLengthSection, + long startOfStringDataSection + ) + { + this.delegate = new StringFrameColumnReader.StringFrameColumn( + frame, + true, + memory, + startOfStringLengthSection, + startOfStringDataSection, + true + ); + } + + @Override + @SuppressWarnings("rawtypes") + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + { + return delegate.makeDimensionSelectorInternal(offset, null); + } + + @Override + public void close() + { + delegate.close(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 41698e188268..8d5252b1cdc9 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -40,13 +40,11 @@ import org.apache.druid.query.rowsandcols.column.ColumnAccessorBasedColumn; import org.apache.druid.query.rowsandcols.column.accessor.ObjectColumnAccessorBase; import org.apache.druid.segment.BaseSingleValueDimensionSelector; -import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionDictionarySelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.column.BaseColumn; -import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; @@ -128,10 +126,7 @@ public ColumnPlus readColumn(final Frame frame) baseColumn, new ColumnCapabilitiesImpl().setType(ColumnType.STRING) .setHasMultipleValues(multiValue) - .setDictionaryEncoded(false) - .setHasBitmapIndexes(false) - .setHasSpatialIndexes(false) - .setHasNulls(ColumnCapabilities.Capable.UNKNOWN), + .setDictionaryEncoded(false), frame.numRows() ); } @@ -144,18 +139,17 @@ private void validate(final Memory region) } final byte typeCode = region.getByte(0); - final byte expectedTypeCode = FrameColumnWriters.TYPE_STRING; - if (typeCode != expectedTypeCode) { + if (typeCode != FrameColumnWriters.TYPE_STRING) { throw DruidException.defensive( "Column[%s] does not have the correct type code; expected[%s], got[%s]", columnNumber, - expectedTypeCode, + FrameColumnWriters.TYPE_STRING, typeCode ); } } - static boolean isMultiValue(final Memory memory) + private static boolean isMultiValue(final Memory memory) { return memory.getByte(1) == 1; } @@ -165,7 +159,7 @@ private static long getStartOfCumulativeLengthSection() return StringFrameColumnWriter.DATA_OFFSET; } - static long getStartOfStringLengthSection( + private static long getStartOfStringLengthSection( final int numRows, final boolean multiValue ) @@ -177,7 +171,7 @@ static long getStartOfStringLengthSection( } } - static long getStartOfStringDataSection( + private static long getStartOfStringDataSection( final Memory memory, final int numRows, final boolean multiValue @@ -517,7 +511,7 @@ private List getRowAsListUtf8(final int physicalRow) * Selector used by this column. It's versatile: it can run as string array (asArray = true) or regular string * column (asArray = false). */ - private DimensionSelector makeDimensionSelectorInternal(ReadableOffset offset, @Nullable ExtractionFn extractionFn) + protected DimensionSelector makeDimensionSelectorInternal(ReadableOffset offset, @Nullable ExtractionFn extractionFn) { if (multiValue) { class MultiValueSelector implements DimensionSelector @@ -657,124 +651,4 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } } - - static class StringArrayFrameColumn implements BaseColumn - { - private final StringFrameColumn delegate; - - StringArrayFrameColumn( - Frame frame, - boolean multiValue, - Memory memory, - long startOfStringLengthSection, - long startOfStringDataSection - ) - { - this.delegate = new StringFrameColumn( - frame, - multiValue, - memory, - startOfStringLengthSection, - startOfStringDataSection, - true - ); - } - - @Override - @SuppressWarnings("rawtypes") - public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) - { - return delegate.makeDimensionSelectorInternal(offset, null); - } - - @Override - public void close() - { - delegate.close(); - } - } -} - -/** - * Reader for {@link ColumnType#STRING_ARRAY}. - */ -class StringArrayFrameColumnReader extends StringFrameColumnReader -{ - StringArrayFrameColumnReader(int columnNumber) - { - super(columnNumber); - } - - @Override - public Column readRACColumn(Frame frame) - { - final Memory memory = frame.region(columnNumber); - validate(memory); - - // we expect the memory to be stored as if there are multi values - assert isMultiValue(memory); - final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), true); - final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), true); - - StringFrameColumn frameCol = new StringFrameColumn( - frame, - true, - memory, - positionOfLengths, - positionOfPayloads, - true - ); - - return new ColumnAccessorBasedColumn(frameCol); - } - - @Override - public ColumnPlus readColumn(final Frame frame) - { - final Memory memory = frame.region(columnNumber); - validate(memory); - - // we expect the memory to be stored as if there are multi values - assert isMultiValue(memory); - final long startOfStringLengthSection = getStartOfStringLengthSection(frame.numRows(), true); - final long startOfStringDataSection = getStartOfStringDataSection(memory, frame.numRows(), true); - - final BaseColumn baseColumn = new StringArrayFrameColumn( - frame, - true, - memory, - startOfStringLengthSection, - startOfStringDataSection - ); - - return new ColumnPlus( - baseColumn, - new ColumnCapabilitiesImpl().setType(ColumnType.STRING_ARRAY) - .setHasMultipleValues(false) - .setDictionaryEncoded(false) - .setHasBitmapIndexes(false) - .setHasSpatialIndexes(false) - .setHasNulls(ColumnCapabilities.Capable.UNKNOWN), - frame.numRows() - ); - } - - private void validate(final Memory region) - { - // Check if column is big enough for a header - if (region.getCapacity() < StringFrameColumnWriter.DATA_OFFSET) { - throw DruidException.defensive("Column[%s] is not big enough for a header", columnNumber); - } - - final byte typeCode = region.getByte(0); - final byte expectedTypeCode = FrameColumnWriters.TYPE_STRING_ARRAY; - if (typeCode != expectedTypeCode) { - throw DruidException.defensive( - "Column[%s] does not have the correct type code; expected[%s], got[%s]", - columnNumber, - expectedTypeCode, - typeCode - ); - } - } } diff --git a/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java b/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java index 0a10ebc118c8..cdc84620ab0f 100644 --- a/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java +++ b/processing/src/test/java/org/apache/druid/query/operator/window/RowsAndColumnsHelper.java @@ -284,7 +284,7 @@ public void validate(String msgBase, Column col) } else if (expectedVal instanceof Object[]) { Object actualVal = accessor.getObject(i); if (expectedNulls[i]) { - Assert.assertEquals(msg, 0, actualVal); + Assert.assertNull(msg, accessor.getObject(i)); } else { if (actualVal instanceof ArrayList) { Assert.assertArrayEquals(msg, (Object[]) expectedVals[i], ((ArrayList) actualVal).toArray()); diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java similarity index 94% rename from processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java rename to processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java index cacaa85b04bf..2387994a6c60 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java @@ -38,9 +38,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assume.assumeNotNull; -public class TestVirtualColumnEvaluationRowsAndColumnsTest extends SemanticTestBase +public class EvaluateRowsAndColumnsTest extends SemanticTestBase { - public TestVirtualColumnEvaluationRowsAndColumnsTest(String name, Function fn) + public EvaluateRowsAndColumnsTest(String name, Function fn) { super(name, fn); } From 0046edcfca543982dabcd54ededf0c67f35524bc Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Mon, 19 Aug 2024 22:52:05 +0530 Subject: [PATCH 07/21] fix string array field reader --- .../druid/frame/field/StringFieldReader.java | 5 ++-- .../columnar/StringFrameColumnReader.java | 2 +- .../semantic/EvaluateRowsAndColumnsTest.java | 24 +++++++++++-------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java index 1c51a914e0d5..846cf4923f9f 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java @@ -480,9 +480,8 @@ public int numRows() public boolean isNull(int rowNum) { final long fieldPosition = coach.computeFieldPosition(rowNum); - byte[] nullBytes = new byte[3]; - dataRegion.getByteArray(fieldPosition, nullBytes, 0, 3); - return Arrays.equals(nullBytes, EXPECTED_BYTES_FOR_NULL); + return dataRegion.getByte(fieldPosition) == StringFieldWriter.VALUE_TERMINATOR + && dataRegion.getByte(fieldPosition + 1) == StringFieldWriter.ROW_TERMINATOR; } @Override diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 8d5252b1cdc9..6fbba3dcc822 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -210,7 +210,7 @@ static class StringFrameColumn extends ObjectColumnAccessorBase implements Dicti */ private final boolean asArray; - StringFrameColumn( + protected StringFrameColumn( Frame frame, boolean multiValue, Memory memory, diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java index 2387994a6c60..2d45115de4c6 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java @@ -46,30 +46,30 @@ public EvaluateRowsAndColumnsTest(String name, Function Date: Tue, 20 Aug 2024 07:20:16 +0530 Subject: [PATCH 08/21] checks --- .../java/org/apache/druid/frame/field/StringFieldReader.java | 2 +- .../druid/frame/read/columnar/StringArrayFrameColumnReader.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java index 846cf4923f9f..be2263be138d 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java @@ -480,7 +480,7 @@ public int numRows() public boolean isNull(int rowNum) { final long fieldPosition = coach.computeFieldPosition(rowNum); - return dataRegion.getByte(fieldPosition) == StringFieldWriter.VALUE_TERMINATOR + return dataRegion.getByte(fieldPosition) == StringFieldWriter.NULL_ROW && dataRegion.getByte(fieldPosition + 1) == StringFieldWriter.ROW_TERMINATOR; } diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index e5c3414592a3..5b45f12c6a1f 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -137,6 +137,7 @@ private long getStartOfStringDataSection( final int numRows ) { + assert numRows >= 0; final int totalNumValues = FrameColumnReaderUtils.getAdjustedCumulativeRowLength( memory, getStartOfCumulativeLengthSection(), From 1d6ebe4cab95f8646eec84d0f1e1628ee88abff5 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 20 Aug 2024 18:16:25 +0530 Subject: [PATCH 09/21] checks and comments --- .../frame/read/columnar/StringArrayFrameColumnReader.java | 6 ++++-- .../druid/frame/read/columnar/StringFrameColumnReader.java | 2 +- .../druid/frame/write/columnar/StringFrameColumnWriter.java | 5 ++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 5b45f12c6a1f..752c16bebb2e 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -119,7 +119,7 @@ private void validate(final Memory region) static boolean isMultiValue(final Memory memory) { - return memory.getByte(1) == 1; + return memory.getByte(StringFrameColumnWriter.MULTI_VALUE_POSITION) == StringFrameColumnWriter.MULTI_VALUE_BYTE; } private static long getStartOfCumulativeLengthSection() @@ -137,7 +137,9 @@ private long getStartOfStringDataSection( final int numRows ) { - assert numRows >= 0; + if (numRows < 0) { + throw DruidException.defensive("Encountered -ve numRows [%s] while reading frame", numRows); + } final int totalNumValues = FrameColumnReaderUtils.getAdjustedCumulativeRowLength( memory, getStartOfCumulativeLengthSection(), diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 6fbba3dcc822..e0531be55a70 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -151,7 +151,7 @@ private void validate(final Memory region) private static boolean isMultiValue(final Memory memory) { - return memory.getByte(1) == 1; + return memory.getByte(StringFrameColumnWriter.MULTI_VALUE_POSITION) == StringFrameColumnWriter.MULTI_VALUE_BYTE; } private static long getStartOfCumulativeLengthSection() diff --git a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java index 8eee0fd0cef9..75f64613388b 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java @@ -44,6 +44,9 @@ public abstract class StringFrameColumnWriter imp public static final long DATA_OFFSET = 1 /* type code */ + 1 /* single or multi-value? */; + public static final byte MULTI_VALUE_BYTE = (byte) 0x01; + public static final long MULTI_VALUE_POSITION = 1; + private final T selector; private final byte typeCode; protected final ColumnCapabilities.Capable multiValue; @@ -228,7 +231,7 @@ public long writeTo(final WritableMemory memory, final long startPosition) long currentPosition = startPosition; memory.putByte(currentPosition, typeCode); - memory.putByte(currentPosition + 1, writeMultiValue ? (byte) 1 : (byte) 0); + memory.putByte(currentPosition + 1, writeMultiValue ? MULTI_VALUE_BYTE : (byte) 0); currentPosition += 2; if (writeMultiValue) { From b25d56ee6e5228df4eb3d7fef50f8ed2735fbb3e Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sat, 24 Aug 2024 23:28:52 +0530 Subject: [PATCH 10/21] refactor --- .../StringArrayFrameColumnReader.java | 383 ++++++++++++++++-- .../columnar/StringFrameColumnReader.java | 323 +++++++-------- 2 files changed, 499 insertions(+), 207 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 752c16bebb2e..eb2a2006e02c 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -19,18 +19,45 @@ package org.apache.druid.frame.read.columnar; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Ints; +import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.datasketches.memory.Memory; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; +import org.apache.druid.frame.read.FrameReaderUtils; +import org.apache.druid.frame.write.FrameWriterUtils; import org.apache.druid.frame.write.columnar.FrameColumnWriters; import org.apache.druid.frame.write.columnar.StringFrameColumnWriter; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.query.rowsandcols.column.ColumnAccessorBasedColumn; -import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.query.rowsandcols.column.accessor.ObjectColumnAccessorBase; +import org.apache.druid.segment.DimensionDictionarySelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.DimensionSelectorUtils; +import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.RangeIndexedInts; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorObjectSelector; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Comparator; /** * Reader for {@link ColumnType#STRING_ARRAY}. @@ -55,18 +82,14 @@ public Column readRACColumn(Frame frame) final Memory memory = frame.region(columnNumber); validate(memory); - // String arrays always store multiple values per row. - assert isMultiValue(memory); final long positionOfLengths = getStartOfStringLengthSection(frame.numRows()); final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows()); - StringFrameColumnReader.StringFrameColumn frameCol = new StringFrameColumnReader.StringFrameColumn( + StringArrayFrameColumn frameCol = new StringArrayFrameColumn( frame, - true, memory, positionOfLengths, - positionOfPayloads, - true + positionOfPayloads ); return new ColumnAccessorBasedColumn(frameCol); @@ -78,8 +101,6 @@ public ColumnPlus readColumn(final Frame frame) final Memory memory = frame.region(columnNumber); validate(memory); - // String arrays always store multiple values per row. - assert isMultiValue(memory); final long startOfStringLengthSection = getStartOfStringLengthSection(frame.numRows()); final long startOfStringDataSection = getStartOfStringDataSection(memory, frame.numRows()); @@ -117,11 +138,6 @@ private void validate(final Memory region) } } - static boolean isMultiValue(final Memory memory) - { - return memory.getByte(StringFrameColumnWriter.MULTI_VALUE_POSITION) == StringFrameColumnWriter.MULTI_VALUE_BYTE; - } - private static long getStartOfCumulativeLengthSection() { return StringFrameColumnWriter.DATA_OFFSET; @@ -149,38 +165,347 @@ private long getStartOfStringDataSection( return getStartOfStringLengthSection(numRows) + (long) Integer.BYTES * totalNumValues; } - private static class StringArrayFrameColumn implements BaseColumn + @VisibleForTesting + private static class StringArrayFrameColumn extends ObjectColumnAccessorBase implements DictionaryEncodedColumn { - private final StringFrameColumnReader.StringFrameColumn delegate; + private final Frame frame; + private final Memory memory; + private final long startOfStringLengthSection; + private final long startOfStringDataSection; - StringArrayFrameColumn( + protected StringArrayFrameColumn( Frame frame, Memory memory, long startOfStringLengthSection, long startOfStringDataSection ) { - this.delegate = new StringFrameColumnReader.StringFrameColumn( - frame, - true, - memory, - startOfStringLengthSection, - startOfStringDataSection, - true - ); + this.frame = frame; + this.memory = memory; + this.startOfStringLengthSection = startOfStringLengthSection; + this.startOfStringDataSection = startOfStringDataSection; + } + + @Override + public boolean hasMultipleValues() + { + // Only used in segment tests that don't run on frames. + throw new UnsupportedOperationException(); + } + + @Override + public int getSingleValueRow(int rowNum) + { + // Only used in segment tests that don't run on frames. + throw new UnsupportedOperationException(); + } + + @Override + public IndexedInts getMultiValueRow(int rowNum) + { + // Only used in segment tests that don't run on frames. + throw new UnsupportedOperationException(); + } + + @Nullable + @Override + public String lookupName(int id) + { + // Only used on columns from segments, not frames. + throw new UnsupportedOperationException(); + } + + @Override + public int lookupId(String name) + { + // Only used on columns from segments, not frames. + throw new UnsupportedOperationException(); } @Override - @SuppressWarnings("rawtypes") - public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + public int getCardinality() { - return delegate.makeDimensionSelectorInternal(offset, null); + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; + } + + @Override + public DimensionSelector makeDimensionSelector(ReadableOffset offset, @Nullable ExtractionFn extractionFn) + { + class MultipleValueSelector implements DimensionSelector + { + @Override + public int getValueCardinality() + { + return CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + return null; + } + + @Nullable + @Override + public ByteBuffer lookupNameUtf8(int id) + { + return null; + } + + @Override + public boolean supportsLookupNameUtf8() + { + return extractionFn == null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public IndexedInts getRow() + { + return new RangeIndexedInts(); + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return DimensionSelectorUtils.makeValueMatcherGeneric(this, value); + } + + @Override + public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory) + { + return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicateFactory); + } + + @Nullable + @Override + public Object getObject() + { + return getRowAsObject(frame.physicalRow(offset.getOffset()), true); + } + + @Override + public Class classOfObject() + { + return String.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + // Do nothing. + } + } + + return new MultipleValueSelector(); + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset) + { + // Callers should use object selectors, because we have no dictionary. + throw new UnsupportedOperationException(); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset) + { + // Callers should use object selectors, because we have no dictionary. + throw new UnsupportedOperationException(); + } + + @Override + public VectorObjectSelector makeVectorObjectSelector(final ReadableVectorOffset offset) + { + class StringArrayFrameVectorObjectSelector implements VectorObjectSelector + { + private final Object[] vector = new Object[offset.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; + + @Override + public Object[] getObjectVector() + { + computeVectorIfNeeded(); + return vector; + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + private void computeVectorIfNeeded() + { + if (id == offset.getId()) { + return; + } + + if (offset.isContiguous()) { + final int start = offset.getStartOffset(); + + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + final int physicalRow = frame.physicalRow(i + start); + vector[i] = getRowAsObject(physicalRow, true); + } + } else { + final int[] offsets = offset.getOffsets(); + + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + final int physicalRow = frame.physicalRow(offsets[i]); + vector[i] = getRowAsObject(physicalRow, true); + } + } + + id = offset.getId(); + } + } + + return new StringArrayFrameVectorObjectSelector(); + } + + @Override + public int length() + { + return frame.numRows(); } @Override public void close() { - delegate.close(); + // Do nothing. + } + + @Override + public ColumnType getType() + { + return ColumnType.STRING_ARRAY; + } + + @Override + public int numRows() + { + return length(); + } + + @Override + protected Object getVal(int rowNum) + { + return getRowAsObject(frame.physicalRow(rowNum), true); + } + + @Override + protected Comparator getComparator() + { + return Comparator.nullsFirst(Comparator.comparing(o -> ((String) o))); + } + + /** + * Returns a ByteBuffer containing UTF-8 encoded string number {@code index}. The ByteBuffer is always newly + * created, so it is OK to change its position, limit, etc. However, it may point to shared memory, so it is + * not OK to write to its contents. + */ + @Nullable + private ByteBuffer getStringUtf8(final int index) + { + final long dataStart; + final long dataEnd = + startOfStringDataSection + + memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * index); + + if (index == 0) { + dataStart = startOfStringDataSection; + } else { + dataStart = + startOfStringDataSection + + memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * (index - 1)); + } + + final int dataLength = Ints.checkedCast(dataEnd - dataStart); + + if ((dataLength == 0 && NullHandling.replaceWithDefault()) || + (dataLength == 1 && memory.getByte(dataStart) == FrameWriterUtils.NULL_STRING_MARKER)) { + return null; + } + + return FrameReaderUtils.readByteBuffer(memory, dataStart, dataLength); + } + + @Nullable + private String getString(final int index) + { + final ByteBuffer stringUtf8 = getStringUtf8(index); + + if (stringUtf8 == null) { + return null; + } else { + return StringUtils.fromUtf8(stringUtf8); + } + } + + /** + * Returns the object at the given physical row number. + * + * @param physicalRow physical row number + * @param decode if true, return java.lang.String. If false, return UTF-8 ByteBuffer. + */ + @Nullable + private Object getRowAsObject(final int physicalRow, final boolean decode) + { + final int cumulativeRowLength = FrameColumnReaderUtils.getCumulativeRowLength( + memory, + getStartOfCumulativeLengthSection(), + physicalRow + ); + final int rowLength; + + if (FrameColumnReaderUtils.isNullRow(cumulativeRowLength)) { + return null; + } else if (physicalRow == 0) { + rowLength = cumulativeRowLength; + } else { + rowLength = cumulativeRowLength - FrameColumnReaderUtils.getAdjustedCumulativeRowLength( + memory, + getStartOfCumulativeLengthSection(), + physicalRow - 1 + ); + } + + if (rowLength == 0) { + return ObjectArrays.EMPTY_ARRAY; + } else if (rowLength == 1) { + final int index = cumulativeRowLength - 1; + final Object o = decode ? getString(index) : getStringUtf8(index); + return new Object[]{o}; + } else { + final Object[] row = new Object[rowLength]; + + for (int i = 0; i < rowLength; i++) { + final int index = cumulativeRowLength - rowLength + i; + row[i] = decode ? getString(index) : getStringUtf8(index); + } + + return row; + } } } } diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index e0531be55a70..3a505a17590e 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -21,7 +21,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; -import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.datasketches.memory.Memory; import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; @@ -30,7 +29,6 @@ import org.apache.druid.frame.write.FrameWriterUtils; import org.apache.druid.frame.write.columnar.FrameColumnWriters; import org.apache.druid.frame.write.columnar.StringFrameColumnWriter; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.DruidPredicateFactory; @@ -96,8 +94,7 @@ public Column readRACColumn(Frame frame) multiValue, memory, positionOfLengths, - positionOfPayloads, - multiValue // Read MVDs as String arrays + positionOfPayloads ); return new ColumnAccessorBasedColumn(frameCol); @@ -118,8 +115,7 @@ public ColumnPlus readColumn(final Frame frame) multiValue, memory, startOfStringLengthSection, - startOfStringDataSection, - false + startOfStringDataSection ); return new ColumnPlus( @@ -193,7 +189,7 @@ private static long getStartOfStringDataSection( } @VisibleForTesting - static class StringFrameColumn extends ObjectColumnAccessorBase implements DictionaryEncodedColumn + private static class StringFrameColumn extends ObjectColumnAccessorBase implements DictionaryEncodedColumn { private final Frame frame; private final Memory memory; @@ -205,18 +201,12 @@ static class StringFrameColumn extends ObjectColumnAccessorBase implements Dicti */ private final boolean multiValue; - /** - * Whether the column is being read as {@link ColumnType#STRING_ARRAY} (true) or {@link ColumnType#STRING} (false). - */ - private final boolean asArray; - protected StringFrameColumn( Frame frame, boolean multiValue, Memory memory, long startOfStringLengthSection, - long startOfStringDataSection, - final boolean asArray + long startOfStringDataSection ) { this.frame = frame; @@ -224,7 +214,6 @@ protected StringFrameColumn( this.memory = memory; this.startOfStringLengthSection = startOfStringLengthSection; this.startOfStringDataSection = startOfStringDataSection; - this.asArray = asArray; } @Override @@ -272,11 +261,142 @@ public int getCardinality() @Override public DimensionSelector makeDimensionSelector(ReadableOffset offset, @Nullable ExtractionFn extractionFn) { - if (asArray) { - throw new ISE("Cannot call makeDimensionSelector on field of type [%s]", ColumnType.STRING_ARRAY); - } + if (multiValue) { + class MultiValueSelector implements DimensionSelector + { + private int currentRow = -1; + private List currentValues = null; + private final RangeIndexedInts indexedInts = new RangeIndexedInts(); + + @Override + public int getValueCardinality() + { + return CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + populate(); + final ByteBuffer buf = currentValues.get(id); + final String s = buf == null ? null : StringUtils.fromUtf8(buf.duplicate()); + return extractionFn == null ? s : extractionFn.apply(s); + } + + @Nullable + @Override + public ByteBuffer lookupNameUtf8(int id) + { + assert supportsLookupNameUtf8(); + populate(); + return currentValues.get(id); + } + + @Override + public boolean supportsLookupNameUtf8() + { + return extractionFn == null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public IndexedInts getRow() + { + populate(); + return indexedInts; + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return DimensionSelectorUtils.makeValueMatcherGeneric(this, value); + } + + @Override + public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory) + { + return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicateFactory); + } + + @Nullable + @Override + public Object getObject() + { + return getRowAsObject(frame.physicalRow(offset.getOffset()), true); + } + + @Override + public Class classOfObject() + { + return String.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + // Do nothing. + } + + private void populate() + { + final int row = offset.getOffset(); + + if (row != currentRow) { + currentValues = getRowAsListUtf8(frame.physicalRow(row)); + indexedInts.setSize(currentValues.size()); + currentRow = row; + } + } + } - return makeDimensionSelectorInternal(offset, extractionFn); + return new MultiValueSelector(); + } else { + class SingleValueSelector extends BaseSingleValueDimensionSelector + { + @Nullable + @Override + protected String getValue() + { + final String s = getString(frame.physicalRow(offset.getOffset())); + return extractionFn == null ? s : extractionFn.apply(s); + } + + @Nullable + @Override + public ByteBuffer lookupNameUtf8(int id) + { + assert supportsLookupNameUtf8(); + return getStringUtf8(frame.physicalRow(offset.getOffset())); + } + + @Override + public boolean supportsLookupNameUtf8() + { + return extractionFn == null; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + // Do nothing. + } + } + + return new SingleValueSelector(); + } } @Override @@ -364,7 +484,7 @@ public void close() @Override public ColumnType getType() { - return asArray ? ColumnType.STRING_ARRAY : ColumnType.STRING; + return ColumnType.STRING; } @Override @@ -431,10 +551,6 @@ private String getString(final int index) /** * Returns the object at the given physical row number. * - * When {@link #asArray}, the return value is always of type {@code Object[]}. Otherwise, the return value - * is either an empty list (if the row is empty), a single String (if the row has one value), or a List - * of Strings (if the row has more than one value). - * * @param physicalRow physical row number * @param decode if true, return java.lang.String. If false, return UTF-8 ByteBuffer. */ @@ -462,11 +578,11 @@ private Object getRowAsObject(final int physicalRow, final boolean decode) } if (rowLength == 0) { - return asArray ? ObjectArrays.EMPTY_ARRAY : Collections.emptyList(); + return Collections.emptyList(); } else if (rowLength == 1) { final int index = cumulativeRowLength - 1; final Object o = decode ? getString(index) : getStringUtf8(index); - return asArray ? new Object[]{o} : o; + return o; } else { final Object[] row = new Object[rowLength]; @@ -475,26 +591,21 @@ private Object getRowAsObject(final int physicalRow, final boolean decode) row[i] = decode ? getString(index) : getStringUtf8(index); } - return asArray ? row : Arrays.asList(row); + return Arrays.asList(row); } } else { final Object o = decode ? getString(physicalRow) : getStringUtf8(physicalRow); - return asArray ? new Object[]{o} : o; + return o; } } /** - * Returns the value at the given physical row number as a list of ByteBuffers. Only valid when !asArray, i.e., - * when type is {@link ColumnType#STRING}. + * Returns the value at the given physical row number as a list of ByteBuffers. * * @param physicalRow physical row number */ private List getRowAsListUtf8(final int physicalRow) { - if (asArray) { - throw DruidException.defensive("Unexpected call for array column"); - } - final Object object = getRowAsObject(physicalRow, false); if (object == null) { @@ -506,149 +617,5 @@ private List getRowAsListUtf8(final int physicalRow) return Collections.singletonList((ByteBuffer) object); } } - - /** - * Selector used by this column. It's versatile: it can run as string array (asArray = true) or regular string - * column (asArray = false). - */ - protected DimensionSelector makeDimensionSelectorInternal(ReadableOffset offset, @Nullable ExtractionFn extractionFn) - { - if (multiValue) { - class MultiValueSelector implements DimensionSelector - { - private int currentRow = -1; - private List currentValues = null; - private final RangeIndexedInts indexedInts = new RangeIndexedInts(); - - @Override - public int getValueCardinality() - { - return CARDINALITY_UNKNOWN; - } - - @Nullable - @Override - public String lookupName(int id) - { - populate(); - final ByteBuffer buf = currentValues.get(id); - final String s = buf == null ? null : StringUtils.fromUtf8(buf.duplicate()); - return extractionFn == null ? s : extractionFn.apply(s); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - assert supportsLookupNameUtf8(); - populate(); - return currentValues.get(id); - } - - @Override - public boolean supportsLookupNameUtf8() - { - return extractionFn == null; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return false; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return null; - } - - @Override - public IndexedInts getRow() - { - populate(); - return indexedInts; - } - - @Override - public ValueMatcher makeValueMatcher(@Nullable String value) - { - return DimensionSelectorUtils.makeValueMatcherGeneric(this, value); - } - - @Override - public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory) - { - return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicateFactory); - } - - @Nullable - @Override - public Object getObject() - { - return getRowAsObject(frame.physicalRow(offset.getOffset()), true); - } - - @Override - public Class classOfObject() - { - return String.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - // Do nothing. - } - - private void populate() - { - final int row = offset.getOffset(); - - if (row != currentRow) { - currentValues = getRowAsListUtf8(frame.physicalRow(row)); - indexedInts.setSize(currentValues.size()); - currentRow = row; - } - } - } - - return new MultiValueSelector(); - } else { - class SingleValueSelector extends BaseSingleValueDimensionSelector - { - @Nullable - @Override - protected String getValue() - { - final String s = getString(frame.physicalRow(offset.getOffset())); - return extractionFn == null ? s : extractionFn.apply(s); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - assert supportsLookupNameUtf8(); - return getStringUtf8(frame.physicalRow(offset.getOffset())); - } - - @Override - public boolean supportsLookupNameUtf8() - { - return extractionFn == null; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - // Do nothing. - } - } - - return new SingleValueSelector(); - } - } } } From edb0d8772d2e666bf42ca2b58bf85ecf81ae71f6 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sat, 24 Aug 2024 23:37:32 +0530 Subject: [PATCH 11/21] fix --- .../frame/read/columnar/StringArrayFrameColumnReader.java | 4 ++-- .../druid/frame/read/columnar/StringFrameColumnReader.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index eb2a2006e02c..bf65f069a2f2 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -64,7 +64,7 @@ */ public class StringArrayFrameColumnReader implements FrameColumnReader { - final int columnNumber; + private final int columnNumber; /** * Create a new reader. @@ -300,7 +300,7 @@ public Object getObject() @Override public Class classOfObject() { - return String.class; + return String[].class; } @Override diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 3a505a17590e..860b13e38932 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -67,7 +67,7 @@ */ public class StringFrameColumnReader implements FrameColumnReader { - final int columnNumber; + private final int columnNumber; /** * Create a new reader. From a8b57e742435ac63b7ba72f657c157895be1e5f2 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sun, 25 Aug 2024 08:02:57 +0530 Subject: [PATCH 12/21] codeql check --- .../frame/read/columnar/StringArrayFrameColumnReader.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index bf65f069a2f2..ab7af226ea6e 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -427,6 +427,9 @@ protected Comparator getComparator() @Nullable private ByteBuffer getStringUtf8(final int index) { + if (memory.getCapacity() < startOfStringLengthSection + (long) Integer.BYTES * (index + 1)) { + throw DruidException.defensive("StringArrayFrameColumn trying to read outside frame memory!"); + } final long dataStart; final long dataEnd = startOfStringDataSection + From 905d12943a6bae20ad3c9b6638182fc5d577e761 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sun, 25 Aug 2024 08:58:26 +0530 Subject: [PATCH 13/21] codeql check --- .../frame/read/columnar/StringArrayFrameColumnReader.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index ab7af226ea6e..48f61b390a8a 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -427,8 +427,9 @@ protected Comparator getComparator() @Nullable private ByteBuffer getStringUtf8(final int index) { - if (memory.getCapacity() < startOfStringLengthSection + (long) Integer.BYTES * (index + 1)) { - throw DruidException.defensive("StringArrayFrameColumn trying to read outside frame memory!"); + if (index > Long.MAX_VALUE / Integer.BYTES + || startOfStringLengthSection > Long.MAX_VALUE - (long) Integer.BYTES * index) { + throw DruidException.defensive("DataEnd index would overflow trying to read the frame memory!"); } final long dataStart; final long dataEnd = From ddf9a3afeca2c8d4fba700df674ef260f5653303 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sun, 25 Aug 2024 09:13:11 +0530 Subject: [PATCH 14/21] fix --- .../frame/read/columnar/StringArrayFrameColumnReader.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 48f61b390a8a..03c9334a27c9 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -427,8 +427,7 @@ protected Comparator getComparator() @Nullable private ByteBuffer getStringUtf8(final int index) { - if (index > Long.MAX_VALUE / Integer.BYTES - || startOfStringLengthSection > Long.MAX_VALUE - (long) Integer.BYTES * index) { + if (startOfStringLengthSection > Long.MAX_VALUE - (long) Integer.BYTES * index) { throw DruidException.defensive("DataEnd index would overflow trying to read the frame memory!"); } final long dataStart; From d7ebcc26abcaa982134789b9a7c7e2f77eff8c6a Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sun, 25 Aug 2024 10:35:13 +0530 Subject: [PATCH 15/21] checks --- .../StringArrayFrameColumnReader.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 03c9334a27c9..502b23ee18c6 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -428,19 +428,25 @@ protected Comparator getComparator() private ByteBuffer getStringUtf8(final int index) { if (startOfStringLengthSection > Long.MAX_VALUE - (long) Integer.BYTES * index) { - throw DruidException.defensive("DataEnd index would overflow trying to read the frame memory!"); + throw DruidException.defensive("length index would overflow trying to read the frame memory!"); } + + final int dataEndVariableIndex = memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * index); + if (startOfStringDataSection > Long.MAX_VALUE - dataEndVariableIndex) { + throw DruidException.defensive("data end index would overflow trying to read the frame memory!"); + } + final long dataStart; - final long dataEnd = - startOfStringDataSection + - memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * index); + final long dataEnd = startOfStringDataSection + dataEndVariableIndex; if (index == 0) { dataStart = startOfStringDataSection; } else { - dataStart = - startOfStringDataSection + - memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * (index - 1)); + final int dataStartVariableIndex = memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * (index - 1)); + if (startOfStringDataSection > Long.MAX_VALUE - dataStartVariableIndex) { + throw DruidException.defensive("data start index would overflow trying to read the frame memory!"); + } + dataStart = startOfStringDataSection + dataStartVariableIndex; } final int dataLength = Ints.checkedCast(dataEnd - dataStart); From 428fc4195de05610a6ef2dd64fcb506fb21156ad Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 27 Aug 2024 08:01:41 +0530 Subject: [PATCH 16/21] change column value selector --- .../StringArrayFrameColumnReader.java | 156 ++---------------- .../semantic/EvaluateRowsAndColumnsTest.java | 4 +- 2 files changed, 13 insertions(+), 147 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 502b23ee18c6..b248a0366bf0 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -31,28 +31,18 @@ import org.apache.druid.frame.write.columnar.FrameColumnWriters; import org.apache.druid.frame.write.columnar.StringFrameColumnWriter; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.DruidPredicateFactory; -import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.query.rowsandcols.column.ColumnAccessorBasedColumn; import org.apache.druid.query.rowsandcols.column.accessor.ObjectColumnAccessorBase; -import org.apache.druid.segment.DimensionDictionarySelector; -import org.apache.druid.segment.DimensionSelector; -import org.apache.druid.segment.DimensionSelectorUtils; -import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.ObjectColumnSelector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.data.RangeIndexedInts; import org.apache.druid.segment.data.ReadableOffset; -import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.ReadableVectorInspector; import org.apache.druid.segment.vector.ReadableVectorOffset; -import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; import javax.annotation.Nullable; @@ -166,7 +156,7 @@ private long getStartOfStringDataSection( } @VisibleForTesting - private static class StringArrayFrameColumn extends ObjectColumnAccessorBase implements DictionaryEncodedColumn + private static class StringArrayFrameColumn extends ObjectColumnAccessorBase implements BaseColumn { private final Frame frame; private final Memory memory; @@ -187,107 +177,14 @@ protected StringArrayFrameColumn( } @Override - public boolean hasMultipleValues() + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { - // Only used in segment tests that don't run on frames. - throw new UnsupportedOperationException(); - } - - @Override - public int getSingleValueRow(int rowNum) - { - // Only used in segment tests that don't run on frames. - throw new UnsupportedOperationException(); - } - - @Override - public IndexedInts getMultiValueRow(int rowNum) - { - // Only used in segment tests that don't run on frames. - throw new UnsupportedOperationException(); - } - - @Nullable - @Override - public String lookupName(int id) - { - // Only used on columns from segments, not frames. - throw new UnsupportedOperationException(); - } - - @Override - public int lookupId(String name) - { - // Only used on columns from segments, not frames. - throw new UnsupportedOperationException(); - } - - @Override - public int getCardinality() - { - return DimensionDictionarySelector.CARDINALITY_UNKNOWN; - } - - @Override - public DimensionSelector makeDimensionSelector(ReadableOffset offset, @Nullable ExtractionFn extractionFn) - { - class MultipleValueSelector implements DimensionSelector + return new ObjectColumnSelector() { @Override - public int getValueCardinality() - { - return CARDINALITY_UNKNOWN; - } - - @Nullable - @Override - public String lookupName(int id) - { - return null; - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return null; - } - - @Override - public boolean supportsLookupNameUtf8() - { - return extractionFn == null; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return false; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return null; - } - - @Override - public IndexedInts getRow() - { - return new RangeIndexedInts(); - } - - @Override - public ValueMatcher makeValueMatcher(@Nullable String value) - { - return DimensionSelectorUtils.makeValueMatcherGeneric(this, value); - } - - @Override - public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory) + public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicateFactory); + // Do nothing. } @Nullable @@ -302,29 +199,7 @@ public Class classOfObject() { return String[].class; } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - // Do nothing. - } - } - - return new MultipleValueSelector(); - } - - @Override - public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset) - { - // Callers should use object selectors, because we have no dictionary. - throw new UnsupportedOperationException(); - } - - @Override - public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset) - { - // Callers should use object selectors, because we have no dictionary. - throw new UnsupportedOperationException(); + }; } @Override @@ -383,12 +258,6 @@ private void computeVectorIfNeeded() return new StringArrayFrameVectorObjectSelector(); } - @Override - public int length() - { - return frame.numRows(); - } - @Override public void close() { @@ -404,7 +273,7 @@ public ColumnType getType() @Override public int numRows() { - return length(); + return frame.numRows(); } @Override @@ -442,7 +311,8 @@ private ByteBuffer getStringUtf8(final int index) if (index == 0) { dataStart = startOfStringDataSection; } else { - final int dataStartVariableIndex = memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * (index - 1)); + final int dataStartVariableIndex = memory.getInt(startOfStringLengthSection + (long) Integer.BYTES * (index + - 1)); if (startOfStringDataSection > Long.MAX_VALUE - dataStartVariableIndex) { throw DruidException.defensive("data start index would overflow trying to read the frame memory!"); } @@ -501,10 +371,6 @@ private Object getRowAsObject(final int physicalRow, final boolean decode) if (rowLength == 0) { return ObjectArrays.EMPTY_ARRAY; - } else if (rowLength == 1) { - final int index = cumulativeRowLength - 1; - final Object o = decode ? getString(index) : getStringUtf8(index); - return new Object[]{o}; } else { final Object[] row = new Object[rowLength]; diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java index 2d45115de4c6..53202fe74ce0 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java @@ -50,7 +50,7 @@ public void testMaterializeColumns() { Object[][] vals = new Object[][] { {1L, "a", 123L, new Object[]{"xyz", "x"}, 0L}, - {2L, "a", 456L, new Object[]{"xyz", "x"}, 1L}, + {2L, "a", 456L, new Object[]{"abc"}, 1L}, {3L, "b", 789L, new Object[]{null}, 2L}, {4L, null, 123L, null, 3L}, }; @@ -67,7 +67,7 @@ public void testMaterializeColumns() Object[] expectedArr = new Object[][] { {"xyz", "x"}, - {"xyz", "x"}, + {"abc"}, {null}, null }; From 067fdf8532202581257790be14900c5b5a6ea4a1 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 27 Aug 2024 08:16:30 +0530 Subject: [PATCH 17/21] fix comparator --- .../druid/frame/read/columnar/StringArrayFrameColumnReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index b248a0366bf0..40147340cb20 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -285,7 +285,7 @@ protected Object getVal(int rowNum) @Override protected Comparator getComparator() { - return Comparator.nullsFirst(Comparator.comparing(o -> ((String) o))); + return Comparator.nullsFirst(ColumnType.STRING_ARRAY.getStrategy()); } /** From eb5a3b38fcb2c79298fb8f7f4bf1523ecc33fe63 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 27 Aug 2024 08:23:25 +0530 Subject: [PATCH 18/21] make private --- .../druid/frame/read/columnar/StringArrayFrameColumnReader.java | 2 +- .../druid/frame/read/columnar/StringFrameColumnReader.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 40147340cb20..bbb36633f919 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -163,7 +163,7 @@ private static class StringArrayFrameColumn extends ObjectColumnAccessorBase imp private final long startOfStringLengthSection; private final long startOfStringDataSection; - protected StringArrayFrameColumn( + private StringArrayFrameColumn( Frame frame, Memory memory, long startOfStringLengthSection, diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 860b13e38932..69e6c6b3a3a8 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -201,7 +201,7 @@ private static class StringFrameColumn extends ObjectColumnAccessorBase implemen */ private final boolean multiValue; - protected StringFrameColumn( + private StringFrameColumn( Frame frame, boolean multiValue, Memory memory, From 6b85990086bbbc71e64eb16d5bfa78c421df067e Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Mon, 2 Sep 2024 10:16:56 +0530 Subject: [PATCH 19/21] reject mvds in String reader readRACColumn --- .../read/columnar/StringArrayFrameColumnReader.java | 3 ++- .../frame/read/columnar/StringFrameColumnReader.java | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index bbb36633f919..297470276baf 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -51,6 +51,7 @@ /** * Reader for {@link ColumnType#STRING_ARRAY}. + * This is similar to {@link StringFrameColumnReader} reading mvds in reading bytes from frame */ public class StringArrayFrameColumnReader implements FrameColumnReader { @@ -197,7 +198,7 @@ public Object getObject() @Override public Class classOfObject() { - return String[].class; + return Object[].class; } }; } diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index 69e6c6b3a3a8..edf1cee5f937 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -85,13 +85,16 @@ public Column readRACColumn(Frame frame) final Memory memory = frame.region(columnNumber); validate(memory); - final boolean multiValue = isMultiValue(memory); - final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), multiValue); - final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), multiValue); + if (isMultiValue(memory)) { + throw DruidException.defensive("Only Window Functions invoke readRACColumn and they do not support MVDs. " + + "Use MV_TO_ARRAY to convert them to Arrays"); + } + final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), false); + final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), false); StringFrameColumn frameCol = new StringFrameColumn( frame, - multiValue, + false, memory, positionOfLengths, positionOfPayloads From 807a6ad01fd1e7bfb916632036b4c3c8ecb8d8f5 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Sat, 7 Sep 2024 19:51:39 +0530 Subject: [PATCH 20/21] comments --- .../frame/read/columnar/StringArrayFrameColumnReader.java | 2 -- .../druid/frame/read/columnar/StringFrameColumnReader.java | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java index 297470276baf..31c56bf38e7d 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringArrayFrameColumnReader.java @@ -19,7 +19,6 @@ package org.apache.druid.frame.read.columnar; -import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.datasketches.memory.Memory; @@ -156,7 +155,6 @@ private long getStartOfStringDataSection( return getStartOfStringLengthSection(numRows) + (long) Integer.BYTES * totalNumValues; } - @VisibleForTesting private static class StringArrayFrameColumn extends ObjectColumnAccessorBase implements BaseColumn { private final Frame frame; diff --git a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java index edf1cee5f937..2385c431e5b3 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/columnar/StringFrameColumnReader.java @@ -19,11 +19,11 @@ package org.apache.druid.frame.read.columnar; -import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; import org.apache.datasketches.memory.Memory; import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; import org.apache.druid.frame.Frame; import org.apache.druid.frame.read.FrameReaderUtils; import org.apache.druid.frame.write.FrameWriterUtils; @@ -86,8 +86,8 @@ public Column readRACColumn(Frame frame) validate(memory); if (isMultiValue(memory)) { - throw DruidException.defensive("Only Window Functions invoke readRACColumn and they do not support MVDs. " - + "Use MV_TO_ARRAY to convert them to Arrays"); + throw InvalidInput.exception("Encountered a multi value column. Window processing does not support MVDs. " + + "Consider using UNNEST or MV_TO_ARRAY."); } final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), false); final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), false); @@ -191,7 +191,6 @@ private static long getStartOfStringDataSection( return getStartOfStringLengthSection(numRows, multiValue) + (long) Integer.BYTES * totalNumValues; } - @VisibleForTesting private static class StringFrameColumn extends ObjectColumnAccessorBase implements DictionaryEncodedColumn { private final Frame frame; From d81658e47dc516dc70611845c878aa1ed6619635 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Tue, 10 Sep 2024 10:31:08 +0530 Subject: [PATCH 21/21] fix conflict --- .../rowsandcols/semantic/EvaluateRowsAndColumnsTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java index 29d1ef2397ab..e2cee35a8e9a 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java @@ -65,7 +65,6 @@ public void testMaterializeColumns() final RowsAndColumns base = make(MapOfColumnsRowsAndColumns.fromRowObjects(vals, siggy)); -<<<<<<< HEAD:processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/EvaluateRowsAndColumnsTest.java Object[] expectedArr = new Object[][] { {"xyz", "x"}, {"abc"}, @@ -77,10 +76,7 @@ public void testMaterializeColumns() .expectColumn("array", expectedArr, ColumnType.STRING_ARRAY) .validate(base); - assumeNotNull("skipping: StorageAdapter not supported", base.as(StorageAdapter.class)); -======= assumeNotNull("skipping: CursorFactory not supported", base.as(CursorFactory.class)); ->>>>>>> master:processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/TestVirtualColumnEvaluationRowsAndColumnsTest.java LazilyDecoratedRowsAndColumns ras = new LazilyDecoratedRowsAndColumns( base,