From be642682cd74a83d0d30408968311b5056dcb317 Mon Sep 17 00:00:00 2001 From: cryptoe Date: Thu, 3 Oct 2024 19:46:18 +0530 Subject: [PATCH 1/4] When replaceNullBytes is set, length calculations did not take into account null bytes. --- .../druid/frame/field/StringFieldWriter.java | 4 +-- .../druid/frame/write/FrameWriterUtils.java | 24 ++++++++++--- .../frame/field/StringFieldWriterTest.java | 12 +++++-- ...tringFieldWritersWithNullHandlingTest.java | 36 +++++++++++++++++++ 4 files changed, 66 insertions(+), 10 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java diff --git a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java index 2ffb79a12da7..5b304f8a0534 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java @@ -128,14 +128,14 @@ static long writeUtf8ByteBuffers( written++; if (len > 0) { - FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes( + int lenWritten = FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes( utf8Datum, memory, position + written, len, removeNullBytes ); - written += len; + written += lenWritten; } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java index 9ba2b29fe51b..541aae9ae5c6 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java @@ -34,6 +34,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.data.IndexedInts; +import javax.annotation.CheckReturnValue; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -212,9 +213,12 @@ public static void copyByteBufferToMemoryAllowingNullBytes( /** * Copies {@code src} to {@code dst}, disallowing null bytes to be written to the destination. If {@code removeNullBytes} - * is true, the method will drop the null bytes, and if it is false, the method will throw an exception. + * is true, the method will drop the null bytes, and if it is false, the method will throw an exception. The written bytes + * can be less than "len" if the null bytes are dropped, and the callers must evaluate the return value to see the actual + * length of the buffer that is copied */ - public static void copyByteBufferToMemoryDisallowingNullBytes( + @CheckReturnValue + public static int copyByteBufferToMemoryDisallowingNullBytes( final ByteBuffer src, final WritableMemory dst, final long dstPosition, @@ -222,11 +226,16 @@ public static void copyByteBufferToMemoryDisallowingNullBytes( final boolean removeNullBytes ) { - copyByteBufferToMemory(src, dst, dstPosition, len, false, removeNullBytes); + return copyByteBufferToMemory(src, dst, dstPosition, len, false, removeNullBytes); } /** - * Copies "len" bytes from {@code src.position()} to "dstPosition" in "memory". Does not update the position of src. + * Tries to copy "len" bytes from {@code src.position()} to "dstPosition" in "memory". If removeNullBytes is set to true, + * it will remove the U+0000 bytes from the src buffer, and the written bytes will be less than "len". It is imperative that the + * callers check the number of written bytes when "removeNullBytes" can be set to true, i.e. this method is invoked via + * {@link #copyByteBufferToMemoryDisallowingNullBytes} + *

+ * Does not update the position of src. *

* Whenever "allowNullBytes" is true, "removeNullBytes" must be false. Use the methods {@link #copyByteBufferToMemoryAllowingNullBytes} * and {@link #copyByteBufferToMemoryDisallowingNullBytes} to copy between the memory @@ -234,7 +243,7 @@ public static void copyByteBufferToMemoryDisallowingNullBytes( * * @throws InvalidNullByteException if "allowNullBytes" and "removeNullBytes" is false and a null byte is encountered */ - private static void copyByteBufferToMemory( + private static int copyByteBufferToMemory( final ByteBuffer src, final WritableMemory dst, final long dstPosition, @@ -251,6 +260,7 @@ private static void copyByteBufferToMemory( } final int srcEnd = src.position() + len; + int writtenLength = 0; if (allowNullBytes) { if (src.hasArray()) { @@ -264,6 +274,8 @@ private static void copyByteBufferToMemory( dst.putByte(q, b); } } + // The method does not alter the length of the memory copied if null bytes are allowed + writtenLength = len; } else { long q = dstPosition; for (int p = src.position(); p < srcEnd; p++) { @@ -282,9 +294,11 @@ private static void copyByteBufferToMemory( } else { dst.putByte(q, b); q++; + writtenLength++; } } } + return writtenLength; } /** diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java index 0108e772d94e..c8d5a53c3d04 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java @@ -57,9 +57,9 @@ public class StringFieldWriterTest extends InitializedNullHandlingTest @Mock public DimensionSelector selectorUtf8; - private WritableMemory memory; - private FieldWriter fieldWriter; - private FieldWriter fieldWriterUtf8; + protected WritableMemory memory; + protected FieldWriter fieldWriter; + protected FieldWriter fieldWriterUtf8; @Before public void setUp() @@ -106,6 +106,12 @@ public void testMultiValueStringContainingNulls() doTest(Arrays.asList("foo", NullHandling.emptyToNullIfNeeded(""), "bar", null)); } + @Test + public void testNullBytes() + { + doTest(Arrays.asList("abc", "foo" + NullHandling.emptyToNullIfNeeded("") + "bar", "def")); + } + private void doTest(final List values) { mockSelectors(values); diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java new file mode 100644 index 000000000000..342bd3ad2d7e --- /dev/null +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.field; + +import org.apache.datasketches.memory.WritableMemory; +import org.junit.Before; + +public class StringFieldWritersWithNullHandlingTest extends StringFieldWriterTest +{ + + @Override + @Before + public void setUp() + { + memory = WritableMemory.allocate(1000); + fieldWriter = new StringFieldWriter(selector, true); + fieldWriterUtf8 = new StringFieldWriter(selectorUtf8, true); + } +} From 24896916bb8ea1bee43367e6ec3c18684136dcd6 Mon Sep 17 00:00:00 2001 From: cryptoe Date: Mon, 7 Oct 2024 14:35:24 +0530 Subject: [PATCH 2/4] Adding tests --- .../frame/field/StringFieldWriterTest.java | 100 ++++++++++++++---- ...tringFieldWritersWithNullHandlingTest.java | 36 ------- 2 files changed, 80 insertions(+), 56 deletions(-) delete mode 100644 processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java index c8d5a53c3d04..35ab30644bd9 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java @@ -21,6 +21,7 @@ import org.apache.datasketches.memory.WritableMemory; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.frame.write.InvalidNullByteException; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.ColumnValueSelector; @@ -40,9 +41,11 @@ import org.mockito.quality.Strictness; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; public class StringFieldWriterTest extends InitializedNullHandlingTest { @@ -57,9 +60,12 @@ public class StringFieldWriterTest extends InitializedNullHandlingTest @Mock public DimensionSelector selectorUtf8; - protected WritableMemory memory; - protected FieldWriter fieldWriter; - protected FieldWriter fieldWriterUtf8; + + private WritableMemory memory; + private FieldWriter fieldWriter; + private FieldWriter fieldWriterUtf8; + private FieldWriter fieldWriterRemoveNull; + private FieldWriter fieldWriterUtf8RemoveNull; @Before public void setUp() @@ -67,13 +73,32 @@ public void setUp() memory = WritableMemory.allocate(1000); fieldWriter = new StringFieldWriter(selector, false); fieldWriterUtf8 = new StringFieldWriter(selectorUtf8, false); + fieldWriterRemoveNull = new StringFieldWriter(selector, true); + fieldWriterUtf8RemoveNull = new StringFieldWriter(selectorUtf8, true); } @After public void tearDown() { - fieldWriter.close(); - fieldWriterUtf8.close(); + for (FieldWriter fw : getFieldWriter(FieldWritersType.ALL)) { + try { + fw.close(); + } + catch (Exception ignore) { + } + } + } + + + private List getFieldWriter(FieldWritersType fieldWritersType) + { + if (fieldWritersType == FieldWritersType.NULL_REPLACING) { + return Arrays.asList(fieldWriterRemoveNull, fieldWriterUtf8RemoveNull); + } else if (fieldWritersType == FieldWritersType.ALL) { + return Arrays.asList(fieldWriter, fieldWriterUtf8, fieldWriterRemoveNull, fieldWriterUtf8RemoveNull); + } else { + throw new ISE("Handler missing for type:[%s]", fieldWritersType); + } } @Test @@ -100,6 +125,7 @@ public void testMultiValueString() doTest(Arrays.asList("foo", "bar")); } + @Test public void testMultiValueStringContainingNulls() { @@ -107,30 +133,53 @@ public void testMultiValueStringContainingNulls() } @Test - public void testNullBytes() + public void testNullByteReplacement() { - doTest(Arrays.asList("abc", "foo" + NullHandling.emptyToNullIfNeeded("") + "bar", "def")); + doTest( + Arrays.asList("abc\u0000", "foo" + NullHandling.emptyToNullIfNeeded("") + "bar", "def"), + FieldWritersType.NULL_REPLACING + ); + } + + @Test + public void testNullByteNotReplaced() + { + mockSelectors(Arrays.asList("abc\u0000", "foo" + NullHandling.emptyToNullIfNeeded("") + "bar", "def")); + Assert.assertThrows(InvalidNullByteException.class, () -> { + doTestWithSpecificFieldWriter(fieldWriter); + }); + Assert.assertThrows(InvalidNullByteException.class, () -> { + doTestWithSpecificFieldWriter(fieldWriterUtf8); + }); } private void doTest(final List values) + { + doTest(values, FieldWritersType.ALL); + } + + private void doTest(final List values, FieldWritersType fieldWritersType) { mockSelectors(values); - // Non-UTF8 test - { - final long written = writeToMemory(fieldWriter); - final Object[] valuesRead = readFromMemory(written); - Assert.assertEquals("values read (non-UTF8)", values, Arrays.asList(valuesRead)); + List fieldWriters = getFieldWriter(fieldWritersType); + for (FieldWriter fw : fieldWriters) { + final Object[] valuesRead = doTestWithSpecificFieldWriter(fw); + List expectedResults = new ArrayList<>(values); + if (fieldWritersType == FieldWritersType.NULL_REPLACING) { + expectedResults = expectedResults.stream().map(val -> val.replace("\u0000", "")).collect(Collectors.toList()); + } + Assert.assertEquals("values read", expectedResults, Arrays.asList(valuesRead)); } + } - // UTF8 test - { - final long writtenUtf8 = writeToMemory(fieldWriterUtf8); - final Object[] valuesReadUtf8 = readFromMemory(writtenUtf8); - Assert.assertEquals("values read (UTF8)", values, Arrays.asList(valuesReadUtf8)); - } + private Object[] doTestWithSpecificFieldWriter(FieldWriter fieldWriter) + { + final long written = writeToMemory(fieldWriter); + return readFromMemory(written); } + private void mockSelectors(final List values) { final RangeIndexedInts row = new RangeIndexedInts(); @@ -189,9 +238,20 @@ private Object[] readFromMemory(final long written) memory.getByteArray(MEMORY_POSITION, bytes, 0, (int) written); final FieldReader fieldReader = FieldReaders.create("columnNameDoesntMatterHere", ColumnType.STRING_ARRAY); - final ColumnValueSelector selector = - fieldReader.makeColumnValueSelector(memory, new ConstantFieldPointer(MEMORY_POSITION, -1)); + final ColumnValueSelector selector = fieldReader.makeColumnValueSelector( + memory, + new ConstantFieldPointer( + MEMORY_POSITION, + -1 + ) + ); return (Object[]) selector.getObject(); } + + private enum FieldWritersType + { + NULL_REPLACING, // include null replacing writers only + ALL // include all writers + } } diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java deleted file mode 100644 index 342bd3ad2d7e..000000000000 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWritersWithNullHandlingTest.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.frame.field; - -import org.apache.datasketches.memory.WritableMemory; -import org.junit.Before; - -public class StringFieldWritersWithNullHandlingTest extends StringFieldWriterTest -{ - - @Override - @Before - public void setUp() - { - memory = WritableMemory.allocate(1000); - fieldWriter = new StringFieldWriter(selector, true); - fieldWriterUtf8 = new StringFieldWriter(selectorUtf8, true); - } -} From 8837ffcdb5531f333a26b70e84f3d587f4dbe3b5 Mon Sep 17 00:00:00 2001 From: cryptoe Date: Mon, 7 Oct 2024 14:47:56 +0530 Subject: [PATCH 3/4] Fixing forbidden check --- .../org/apache/druid/frame/field/StringFieldWriterTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java index 35ab30644bd9..adde9f89cbd4 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java @@ -167,7 +167,9 @@ private void doTest(final List values, FieldWritersType fieldWritersType final Object[] valuesRead = doTestWithSpecificFieldWriter(fw); List expectedResults = new ArrayList<>(values); if (fieldWritersType == FieldWritersType.NULL_REPLACING) { - expectedResults = expectedResults.stream().map(val -> val.replace("\u0000", "")).collect(Collectors.toList()); + expectedResults = expectedResults.stream() + .map(val -> StringUtils.replace(val, "\u0000", "")) + .collect(Collectors.toList()); } Assert.assertEquals("values read", expectedResults, Arrays.asList(valuesRead)); } From 05568da43b5d75c680506bbdc4de1abf79db56cf Mon Sep 17 00:00:00 2001 From: cryptoe Date: Mon, 7 Oct 2024 15:02:44 +0530 Subject: [PATCH 4/4] Fixing java 17 check --- .../java/org/apache/druid/frame/write/FrameWriterUtils.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java index 541aae9ae5c6..a480767f1113 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java @@ -34,7 +34,6 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.data.IndexedInts; -import javax.annotation.CheckReturnValue; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -217,7 +216,6 @@ public static void copyByteBufferToMemoryAllowingNullBytes( * can be less than "len" if the null bytes are dropped, and the callers must evaluate the return value to see the actual * length of the buffer that is copied */ - @CheckReturnValue public static int copyByteBufferToMemoryDisallowingNullBytes( final ByteBuffer src, final WritableMemory dst,