From 6d07aab28bf6f7e14f5c55d74ffa2863bac44b72 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 2 Jul 2024 23:25:28 +0530 Subject: [PATCH 01/12] Refactor SemanticUtils --- .../semantic}/SemanticCreator.java | 8 +- .../druid/common/semantic/SemanticUtils.java | 86 +++++++++++++++++++ .../apache/druid/error/DruidException.java | 27 +++++- .../apache/druid/error/NotYetImplemented.java | 72 ++++++++++++++++ .../druid/java/util/common/StringUtils.java | 6 ++ .../rowsandcols/ArrayListRowsAndColumns.java | 4 +- .../LazilyDecoratedRowsAndColumns.java | 4 +- .../query/rowsandcols/RowsAndColumns.java | 31 ------- .../QueryableIndexRowsAndColumns.java | 5 +- .../druid/segment/QueryableIndexSegment.java | 19 +++- .../semantic/SemanticCreatorUsageTest.java | 7 +- 11 files changed, 223 insertions(+), 46 deletions(-) rename processing/src/main/java/org/apache/druid/{query/rowsandcols => common/semantic}/SemanticCreator.java (84%) create mode 100644 processing/src/main/java/org/apache/druid/common/semantic/SemanticUtils.java create mode 100644 processing/src/main/java/org/apache/druid/error/NotYetImplemented.java rename processing/src/test/java/org/apache/druid/{query/rowsandcols => common}/semantic/SemanticCreatorUsageTest.java (96%) diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/SemanticCreator.java b/processing/src/main/java/org/apache/druid/common/semantic/SemanticCreator.java similarity index 84% rename from processing/src/main/java/org/apache/druid/query/rowsandcols/SemanticCreator.java rename to processing/src/main/java/org/apache/druid/common/semantic/SemanticCreator.java index bb1af0e4d9f1..0142b3e8ed0a 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/SemanticCreator.java +++ b/processing/src/main/java/org/apache/druid/common/semantic/SemanticCreator.java @@ -17,7 +17,9 @@ * under the License. */ -package org.apache.druid.query.rowsandcols; +package org.apache.druid.common.semantic; + +import org.apache.druid.query.rowsandcols.RowsAndColumns; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; @@ -26,8 +28,8 @@ /** * Annotation used to indicate that the method is used as a creator for a semantic interface. - * - * Used in conjuction with {@link RowsAndColumns#makeAsMap(Class)} to build maps for simplified implementation of + *

+ * Used in conjuction with {@link SemanticUtils#makeAsMap(Class)} to build maps for simplified implementation of * the {@link RowsAndColumns#as(Class)} method. */ @Retention(RetentionPolicy.RUNTIME) diff --git a/processing/src/main/java/org/apache/druid/common/semantic/SemanticUtils.java b/processing/src/main/java/org/apache/druid/common/semantic/SemanticUtils.java new file mode 100644 index 000000000000..b2ee869e2dd1 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/common/semantic/SemanticUtils.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.common.semantic; + +import org.apache.druid.error.DruidException; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.function.Function; + +public class SemanticUtils +{ + private static final Map, Map, Function>> OVERRIDES = new LinkedHashMap<>(); + + @SuppressWarnings("unused") + public static void registerAsOverride(Class clazz, Class asInterface, Function fn) + { + final Map, Function> classOverrides = OVERRIDES.computeIfAbsent( + clazz, + theClazz -> new LinkedHashMap<>() + ); + + final Function oldVal = classOverrides.get(asInterface); + if (oldVal != null) { + throw DruidException.defensive( + "Attempt to side-override the same interface [%s] multiple times for the same class [%s].", + asInterface, + clazz + ); + } else { + classOverrides.put(asInterface, fn); + } + } + + public static Map, Function> makeAsMap(Class clazz) + { + final Map, Function> retVal = new HashMap<>(); + + for (Method method : clazz.getMethods()) { + if (method.isAnnotationPresent(SemanticCreator.class)) { + if (method.getParameterCount() != 0) { + throw DruidException.defensive("Method [%s] annotated with SemanticCreator was not 0-argument.", method); + } + + retVal.put(method.getReturnType(), arg -> { + try { + return method.invoke(arg); + } + catch (InvocationTargetException | IllegalAccessException e) { + throw DruidException.defensive().build(e, "Problem invoking method [%s]", method); + } + }); + } + } + + final Map, Function> classOverrides = OVERRIDES.get(clazz); + if (classOverrides != null) { + for (Map.Entry, Function> overrideEntry : classOverrides.entrySet()) { + //noinspection unchecked + retVal.put(overrideEntry.getKey(), (Function) overrideEntry.getValue()); + } + } + + return retVal; + } +} diff --git a/processing/src/main/java/org/apache/druid/error/DruidException.java b/processing/src/main/java/org/apache/druid/error/DruidException.java index a04f3f6512cf..1ba318361cfa 100644 --- a/processing/src/main/java/org/apache/druid/error/DruidException.java +++ b/processing/src/main/java/org/apache/druid/error/DruidException.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.StringUtils; import javax.annotation.concurrent.NotThreadSafe; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.Map; @@ -130,6 +131,8 @@ @NotThreadSafe public class DruidException extends RuntimeException { + public static final String CLASS_NAME_STR = DruidException.class.getName(); + /** * Starts building a "general" DruidException targeting the specified persona. * @@ -167,7 +170,7 @@ public static DruidExceptionBuilder defensive() /** * Build a "defensive" exception, this is an exception that should never actually be triggered, but we are - * throwing it inside of a defensive check. + * throwing it inside a defensive check. * * @return A builder for a defensive exception. */ @@ -176,6 +179,17 @@ public static DruidException defensive(String format, Object... args) return defensive().build(format, args); } + /** + * Build a "defensive" exception, this is an exception that should never actually be triggered, but we are + * throwing it inside a defensive check. + * + * @return A builder for a defensive exception. + */ + public static DruidException defensive(Throwable cause, String format, Object... args) + { + return defensive().build(cause, format, args); + } + /** * Build a "defensive" exception, this is an exception that should never actually be triggered. Throw to * allow messages to be seen by developers @@ -467,7 +481,7 @@ public DruidException build(String formatMe, Object... vals) public DruidException build(Throwable cause, String formatMe, Object... vals) { - return new DruidException( + final DruidException retVal = new DruidException( cause, errorCode, targetPersona, @@ -475,6 +489,15 @@ public DruidException build(Throwable cause, String formatMe, Object... vals) StringUtils.nonStrictFormat(formatMe, vals), deserialized ); + + StackTraceElement[] stackTrace = retVal.getStackTrace(); + int firstNonDruidExceptionIndex = 0; + while (stackTrace[firstNonDruidExceptionIndex].getClassName().startsWith(CLASS_NAME_STR)) { + ++firstNonDruidExceptionIndex; + } + retVal.setStackTrace(Arrays.copyOfRange(stackTrace, firstNonDruidExceptionIndex, stackTrace.length)); + + return retVal; } } diff --git a/processing/src/main/java/org/apache/druid/error/NotYetImplemented.java b/processing/src/main/java/org/apache/druid/error/NotYetImplemented.java new file mode 100644 index 000000000000..3e8df191e32c --- /dev/null +++ b/processing/src/main/java/org/apache/druid/error/NotYetImplemented.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.error; + +/** + * A failure class that is used to indicate that something is just not implemented yet. This is useful when a + * developer builds something and they intentionally do not implement a specific branch of code or type of object. + *

+ * The lack of implementation is not necessarily a statement that it SHOULDN'T be implemented, it's just an indication + * that it has not YET been implemented. When one of these exceptions is seen, it is usually an indication that it is + * now time to actually implement the path that was previously elided. + *

+ * Often times, the code path wasn't implemented because the developer thought that it wasn't actually possible to + * see it executed. So, collecting and providing information about why the particular path got executed is often + * extremely helpful in understanding why it happened and accelerating the implementation of what the correct behavior + * should be. + */ +public class NotYetImplemented extends DruidException.Failure +{ + public static DruidException ex(String msg, Object... args) + { + return ex(null, msg, args); + } + + public static DruidException ex(Throwable t, String msg, Object... args) + { + return DruidException.fromFailure(new NotYetImplemented(t, msg, args)); + } + + private final Throwable t; + private final String msg; + private final Object[] args; + + public NotYetImplemented(Throwable t, String msg, Object[] args) + { + super("notYetImplemented"); + this.t = t; + this.msg = msg; + this.args = args; + } + + + @Override + protected DruidException makeException(DruidException.DruidExceptionBuilder bob) + { + bob = bob.forPersona(DruidException.Persona.DEVELOPER) + .ofCategory(DruidException.Category.DEFENSIVE); + + if (t == null) { + return bob.build(msg, args); + } else { + return bob.build(t, msg, args); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java index e5d9b2c8e4ec..7b27ab125fcb 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java @@ -349,6 +349,12 @@ public static byte[] toUtf8Nullable(@Nullable final String string) return toUtf8(string); } + @SuppressWarnings("unused") + public static String nullableValueOf(@Nullable final Object obj) + { + return obj == null ? null : obj.toString(); + } + /** * Equivalent of String.format(Locale.ENGLISH, message, formatArgs). */ diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java index 6f5460095113..14b6f8a851a4 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java @@ -23,6 +23,8 @@ import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntComparator; import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.druid.common.semantic.SemanticCreator; +import org.apache.druid.common.semantic.SemanticUtils; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.operator.ColumnWithDirection; @@ -73,7 +75,7 @@ public class ArrayListRowsAndColumns implements AppendableRowsAndColumns { @SuppressWarnings("rawtypes") - private static final Map, Function> AS_MAP = RowsAndColumns + private static final Map, Function> AS_MAP = SemanticUtils .makeAsMap(ArrayListRowsAndColumns.class); private final ArrayList rows; diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java index 0dae40467f3f..ce199a7803c5 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java @@ -20,6 +20,8 @@ package org.apache.druid.query.rowsandcols; import com.google.common.collect.ImmutableList; +import org.apache.druid.common.semantic.SemanticCreator; +import org.apache.druid.common.semantic.SemanticUtils; import org.apache.druid.frame.Frame; import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; import org.apache.druid.frame.key.KeyColumn; @@ -66,7 +68,7 @@ public class LazilyDecoratedRowsAndColumns implements RowsAndColumns { - private static final Map, Function> AS_MAP = RowsAndColumns + private static final Map, Function> AS_MAP = SemanticUtils .makeAsMap(LazilyDecoratedRowsAndColumns.class); private RowsAndColumns base; diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/RowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/RowsAndColumns.java index d139265d147d..7b6a1f6215d3 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/RowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/RowsAndColumns.java @@ -19,19 +19,13 @@ package org.apache.druid.query.rowsandcols; -import org.apache.druid.error.DruidException; import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.query.rowsandcols.semantic.AppendableRowsAndColumns; import org.apache.druid.query.rowsandcols.semantic.FramedOnHeapAggregatable; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.function.Function; /** * An interface representing a chunk of RowsAndColumns. Essentially a RowsAndColumns is just a batch of rows @@ -75,31 +69,6 @@ static AppendableRowsAndColumns expectAppendable(RowsAndColumns input) return retVal; } - static Map, Function> makeAsMap(Class clazz) - { - Map, Function> retVal = new HashMap<>(); - - for (Method method : clazz.getMethods()) { - if (method.isAnnotationPresent(SemanticCreator.class)) { - if (method.getParameterCount() != 0) { - throw DruidException.defensive("Method [%s] annotated with SemanticCreator was not 0-argument.", method); - } - - retVal.put(method.getReturnType(), arg -> { - try { - return method.invoke(arg); - } - catch (InvocationTargetException | IllegalAccessException e) { - throw DruidException.defensive().build(e, "Problem invoking method [%s]", method); - } - }); - } - } - - return retVal; - } - - /** * The set of column names available from the RowsAndColumns * diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/QueryableIndexRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/QueryableIndexRowsAndColumns.java index 209d4430b1d1..73fc72a1ee48 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/QueryableIndexRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/QueryableIndexRowsAndColumns.java @@ -19,10 +19,11 @@ package org.apache.druid.query.rowsandcols.concrete; +import org.apache.druid.common.semantic.SemanticCreator; +import org.apache.druid.common.semantic.SemanticUtils; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.rowsandcols.RowsAndColumns; -import org.apache.druid.query.rowsandcols.SemanticCreator; import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.segment.CloseableShapeshifter; import org.apache.druid.segment.QueryableIndex; @@ -41,7 +42,7 @@ public class QueryableIndexRowsAndColumns implements RowsAndColumns, AutoCloseable, CloseableShapeshifter { - private static final Map, Function> AS_MAP = RowsAndColumns + private static final Map, Function> AS_MAP = SemanticUtils .makeAsMap(QueryableIndexRowsAndColumns.class); private final QueryableIndex index; diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java index 9d75748b4162..b8d4d2d16cf9 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexSegment.java @@ -19,17 +19,24 @@ package org.apache.druid.segment; +import org.apache.druid.common.semantic.SemanticCreator; +import org.apache.druid.common.semantic.SemanticUtils; import org.apache.druid.query.rowsandcols.concrete.QueryableIndexRowsAndColumns; import org.apache.druid.timeline.SegmentId; import org.joda.time.Interval; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.util.Map; +import java.util.function.Function; /** */ public class QueryableIndexSegment implements Segment { + private static final Map, Function> AS_MAP = SemanticUtils + .makeAsMap(QueryableIndexSegment.class); + private final QueryableIndex index; private final QueryableIndexStorageAdapter storageAdapter; private final SegmentId segmentId; @@ -77,10 +84,18 @@ public void close() @Override public T as(@Nonnull Class clazz) { - if (CloseableShapeshifter.class.equals(clazz)) { - return (T) new QueryableIndexRowsAndColumns(index); + final Function fn = AS_MAP.get(clazz); + if (fn != null) { + return (T) fn.apply(this); } return Segment.super.as(clazz); } + + @SemanticCreator + @SuppressWarnings("unused") + public CloseableShapeshifter toCloseableShapeshifter() + { + return new QueryableIndexRowsAndColumns(index); + } } diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/SemanticCreatorUsageTest.java b/processing/src/test/java/org/apache/druid/common/semantic/SemanticCreatorUsageTest.java similarity index 96% rename from processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/SemanticCreatorUsageTest.java rename to processing/src/test/java/org/apache/druid/common/semantic/SemanticCreatorUsageTest.java index b5de751651e2..0dd61fb4b3ea 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/SemanticCreatorUsageTest.java +++ b/processing/src/test/java/org/apache/druid/common/semantic/SemanticCreatorUsageTest.java @@ -17,10 +17,9 @@ * under the License. */ -package org.apache.druid.query.rowsandcols.semantic; +package org.apache.druid.common.semantic; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.query.rowsandcols.SemanticCreator; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -80,7 +79,7 @@ public void testPublic() /** * {@link SemanticCreator} must return with an interface. - * + *

* An exact implementation may indicate that some interface methods might be missing. */ @Test @@ -95,7 +94,7 @@ public void testReturnType() /** * {@link SemanticCreator} method names must follow the naming pattern toReturnType(). - * + *

* For example: a method returning with a type of Ball should be named as "toBall" */ @Test From 24045a3c2f00ae58f6fba5e7c93289244a37faf6 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 00:01:15 +0530 Subject: [PATCH 02/12] Add segmentMorpher support to DataSourceMSQDestination --- .../apache/druid/msq/exec/ControllerImpl.java | 64 ++++++++++++++++++- .../destination/DataSourceMSQDestination.java | 40 +++++++++++- .../druid/msq/sql/MSQTaskQueryMaker.java | 3 +- .../msq/exec/MSQParseExceptionsTest.java | 4 +- .../msq/indexing/MSQControllerTaskTest.java | 3 +- .../resources/SqlStatementResourceTest.java | 1 + .../util/SqlStatementResourceHelperTest.java | 1 + 7 files changed, 106 insertions(+), 10 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index ad37c5380c56..f992be2c7846 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -146,6 +146,7 @@ import org.apache.druid.msq.input.stage.StageInputSpec; import org.apache.druid.msq.input.stage.StageInputSpecSlicer; import org.apache.druid.msq.input.table.TableInputSpec; +import org.apache.druid.msq.kernel.FrameProcessorFactory; import org.apache.druid.msq.kernel.QueryDefinition; import org.apache.druid.msq.kernel.QueryDefinitionBuilder; import org.apache.druid.msq.kernel.StageDefinition; @@ -1193,8 +1194,35 @@ private Int2ObjectMap makeWorkerFactoryInfosForStage( { if (MSQControllerTask.isIngestion(querySpec) && stageNumber == queryDef.getFinalStageDefinition().getStageNumber()) { - // noinspection unchecked,rawtypes - return (Int2ObjectMap) makeSegmentGeneratorWorkerFactoryInfos(workerInputs, segmentsToGenerate); + final DataSourceMSQDestination destination = (DataSourceMSQDestination) querySpec.getDestination(); + if (!destination.doesSegmentMorphing()) { + // noinspection unchecked,rawtypes + return (Int2ObjectMap) makeSegmentGeneratorWorkerFactoryInfos(workerInputs, segmentsToGenerate); + } else { + // worker info is the new lock version + if (destination.getReplaceTimeChunks().size() != 1) { + throw new ISE( + "Must have single interval in replaceTimeChunks, but got[%s]", + destination.getReplaceTimeChunks() + ); + } + try { + final List locks; + locks = context.taskActionClient().submit(new LockListAction()); + if (locks.size() == 1) { + final Int2ObjectMap retVal = new Int2ObjectAVLTreeMap<>(); + for (int worker : workerInputs.workers()) { + retVal.put(worker, locks.get(0).getVersion()); + } + return retVal; + } else { + throw new ISE("Got number of locks other than one: [%s]", locks); + } + } + catch (IOException e) { + throw new RuntimeException(e); + } + } } else { return null; } @@ -1810,6 +1838,35 @@ private static QueryDefinition makeQueryDefinition( } } + // Possibly add a segment morpher stage. + if (((DataSourceMSQDestination) querySpec.getDestination()).doesSegmentMorphing()) { + final DataSourceMSQDestination destination = (DataSourceMSQDestination) querySpec.getDestination(); + final FrameProcessorFactory segmentMorphFactory = destination.getSegmentMorphFactory(); + + if (!destination.isReplaceTimeChunks()) { + throw new MSQException(UnknownFault.forMessage("segmentMorphFactory requires replaceTimeChunks")); + } + + builder.add( + StageDefinition.builder(queryDef.getNextStageNumber()) + .inputs( + new TableInputSpec( + destination.getDataSource(), + destination.getReplaceTimeChunks(), + null, + null + ), + new StageInputSpec(queryDef.getFinalStageDefinition().getStageNumber()) + ) + .broadcastInputs(IntSet.of(1)) + .maxWorkerCount(tuningConfig.getMaxNumWorkers()) + .processorFactory(segmentMorphFactory) + ); + + // If there was a segment morpher, return immediately; don't add a segment-generation stage. + return builder.build(); + } + // Then, add a segment-generation stage. final DataSchema dataSchema = makeDataSchemaForIngestion(querySpec, querySignature, queryClusterBy, columnMappings, jsonMapper); @@ -2723,7 +2780,8 @@ private void startStages() throws IOException, InterruptedException for (final StageId stageId : newStageIds) { // Allocate segments, if this is the final stage of an ingestion. if (MSQControllerTask.isIngestion(querySpec) - && stageId.getStageNumber() == queryDef.getFinalStageDefinition().getStageNumber()) { + && stageId.getStageNumber() == queryDef.getFinalStageDefinition().getStageNumber() + && !((DataSourceMSQDestination) querySpec.getDestination()).doesSegmentMorphing()) { populateSegmentsToGenerate(); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java index ea3072bfe45a..78df189cc658 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java @@ -20,12 +20,14 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.msq.kernel.FrameProcessorFactory; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.apache.druid.server.security.Resource; @@ -49,18 +51,24 @@ public class DataSourceMSQDestination implements MSQDestination @Nullable private final List replaceTimeChunks; + @Nullable + @SuppressWarnings("rawtypes") + private final FrameProcessorFactory segmentMorphFactory; + @JsonCreator public DataSourceMSQDestination( @JsonProperty("dataSource") String dataSource, @JsonProperty("segmentGranularity") Granularity segmentGranularity, @JsonProperty("segmentSortOrder") @Nullable List segmentSortOrder, - @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks + @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks, + @JsonProperty("segmentMorphFactory") @Nullable FrameProcessorFactory segmentMorphFactory ) { this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); this.segmentGranularity = Preconditions.checkNotNull(segmentGranularity, "segmentGranularity"); this.segmentSortOrder = segmentSortOrder != null ? segmentSortOrder : Collections.emptyList(); this.replaceTimeChunks = replaceTimeChunks; + this.segmentMorphFactory = segmentMorphFactory; if (replaceTimeChunks != null) { // Verify that if replaceTimeChunks is provided, it is nonempty. @@ -98,6 +106,30 @@ public String getDataSource() return dataSource; } + /** + * Returns the segment morph factory, if one is present, else null. + *

+ * The segment morph factory if present, is a way to tell the MSQ task to funnel the results at the final stage to + * the {@link FrameProcessorFactory} instead of a segment generation stage. + */ + @Nullable + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_NULL) + public FrameProcessorFactory getSegmentMorphFactory() + { + return segmentMorphFactory; + } + + /** + * Checks if the destination uses a segmentMorphFactory. If one is present, that means that the query would modify + * existing segments instead of generating new ones. + */ + @JsonIgnore + public boolean doesSegmentMorphing() + { + return segmentMorphFactory != null; + } + @JsonProperty public Granularity getSegmentGranularity() { @@ -158,13 +190,14 @@ public boolean equals(Object o) return Objects.equals(dataSource, that.dataSource) && Objects.equals(segmentGranularity, that.segmentGranularity) && Objects.equals(segmentSortOrder, that.segmentSortOrder) - && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); + && Objects.equals(replaceTimeChunks, that.replaceTimeChunks) + && Objects.equals(segmentMorphFactory, that.segmentMorphFactory); } @Override public int hashCode() { - return Objects.hash(dataSource, segmentGranularity, segmentSortOrder, replaceTimeChunks); + return Objects.hash(dataSource, segmentGranularity, segmentSortOrder, replaceTimeChunks, segmentMorphFactory); } @Override @@ -175,6 +208,7 @@ public String toString() ", segmentGranularity=" + segmentGranularity + ", segmentSortOrder=" + segmentSortOrder + ", replaceTimeChunks=" + replaceTimeChunks + + (segmentMorphFactory != null ? ", segmentMorphFactory=" + segmentMorphFactory : "") + '}'; } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index c6396c0b3060..7af34a1eb55b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -246,7 +246,8 @@ public QueryResponse runQuery(final DruidQuery druidQuery) targetDataSource.getDestinationName(), segmentGranularityObject, segmentSortOrder, - replaceTimeChunks + replaceTimeChunks, + null ); MultiStageQueryContext.validateAndGetTaskLockType(sqlQueryContext, dataSourceMSQDestination.isReplaceTimeChunks()); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java index 330f1cdbbe6d..bc8d517ffba2 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQParseExceptionsTest.java @@ -225,7 +225,7 @@ public void testIngestWithSanitizedNullByte() throws IOException new ColumnMapping("v1", "agent_category") ) )) - .destination(new DataSourceMSQDestination("foo1", Granularities.ALL, null, null)) + .destination(new DataSourceMSQDestination("foo1", Granularities.ALL, null, null, null)) .tuningConfig(MSQTuningConfig.defaultConfig()) .build()) .setQueryContext(DEFAULT_MSQ_CONTEXT) @@ -318,7 +318,7 @@ public void testIngestWithSanitizedNullByteUsingContextParameter() throws IOExce new ColumnMapping("agent_category", "agent_category") ) )) - .destination(new DataSourceMSQDestination("foo1", Granularities.ALL, null, null)) + .destination(new DataSourceMSQDestination("foo1", Granularities.ALL, null, null, null)) .tuningConfig(MSQTuningConfig.defaultConfig()) .build()) .setQueryContext(runtimeContext) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java index 9de14610f19c..bfb307449768 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java @@ -58,7 +58,8 @@ public class MSQControllerTaskTest "target", Granularities.DAY, null, - INTERVALS + INTERVALS, + null )) .query(new Druids.ScanQueryBuilder() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/resources/SqlStatementResourceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/resources/SqlStatementResourceTest.java index 2da5fd42caf1..89df8180a5c6 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/resources/SqlStatementResourceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/resources/SqlStatementResourceTest.java @@ -199,6 +199,7 @@ public class SqlStatementResourceTest extends MSQTestBase "test", Granularities.DAY, null, + null, null )) .tuningConfig( diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/SqlStatementResourceHelperTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/SqlStatementResourceHelperTest.java index 1966d1e5b10a..58856adf3664 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/SqlStatementResourceHelperTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/SqlStatementResourceHelperTest.java @@ -375,6 +375,7 @@ public void testEmptyCountersForDataSourceDestination() "test", Granularities.DAY, null, + null, null ) ); From e255ef5db4ca83c26340e85a5f4446e59a7bf731 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 00:09:14 +0530 Subject: [PATCH 03/12] Add some utilities classes --- .../apache/druid/frame/read/FrameReader.java | 9 +++-- .../apache/druid/guice/JsonConfigurator.java | 4 +-- .../druid/java/util/common/StringUtils.java | 6 ---- .../rowsandcols/column/IntArrayColumn.java | 2 +- .../segment/data/CompressionFactory.java | 9 +++++ .../segment/data/DeltaLongEncodingReader.java | 6 ++++ .../segment/data/LongsLongEncodingReader.java | 6 ++++ .../segment/data/LongsLongEncodingWriter.java | 14 +++++++- .../segment/data/TableLongEncodingReader.java | 6 ++++ .../org/apache/druid/segment/data/VByte.java | 36 +++++++++++++++++++ .../druid/segment/serde/cell/IOIterator.java | 6 ++++ .../segment/serde/cell/StorableBuffer.java | 18 ++++++++++ 12 files changed, 109 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java b/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java index 8ddf99325d39..7c6af7573b20 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java @@ -20,6 +20,7 @@ package org.apache.druid.frame.read; import com.google.common.base.Preconditions; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; import org.apache.druid.frame.field.FieldReader; import org.apache.druid.frame.field.FieldReaders; @@ -31,7 +32,9 @@ import org.apache.druid.frame.segment.row.FrameCursorFactory; import org.apache.druid.frame.write.FrameWriterUtils; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.rowsandcols.RowsAndColumns; +import org.apache.druid.query.rowsandcols.concrete.ColumnBasedFrameRowsAndColumns; +import org.apache.druid.query.rowsandcols.concrete.RowBasedFrameRowsAndColumns; import org.apache.druid.segment.CursorFactory; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; @@ -44,7 +47,7 @@ /** * Embeds the logic to read frames with a given {@link RowSignature}. - * + *

* Stateless and immutable. */ public class FrameReader @@ -146,7 +149,7 @@ public CursorFactory makeCursorFactory(final Frame frame) case ROW_BASED: return new FrameCursorFactory(frame, this, fieldReaders); default: - throw new ISE("Unrecognized frame type [%s]", frame.type()); + throw DruidException.defensive("Unrecognized frame type [%s]", frame.type()); } } diff --git a/processing/src/main/java/org/apache/druid/guice/JsonConfigurator.java b/processing/src/main/java/org/apache/druid/guice/JsonConfigurator.java index 1e4f18dc1cd7..ed7e79df3f17 100644 --- a/processing/src/main/java/org/apache/druid/guice/JsonConfigurator.java +++ b/processing/src/main/java/org/apache/druid/guice/JsonConfigurator.java @@ -236,9 +236,9 @@ private static void hieraricalPutValue( // to configure ParametrizedUriEmitterConfig object. So skipping xxx=yyy key-value pair when configuring Emitter // doesn't make any difference. That is why we just log this situation, instead of throwing an exception. log.info( - "Skipping %s property: one of it's prefixes is also used as a property key. Prefix: %s", + "Skipping property [%s]: one of it's prefixes [%s] is also used as a property key.", originalProperty, - propertyPrefix + nestedKey ); return; } diff --git a/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java index 7b27ab125fcb..e5d9b2c8e4ec 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java @@ -349,12 +349,6 @@ public static byte[] toUtf8Nullable(@Nullable final String string) return toUtf8(string); } - @SuppressWarnings("unused") - public static String nullableValueOf(@Nullable final Object obj) - { - return obj == null ? null : obj.toString(); - } - /** * Equivalent of String.format(Locale.ENGLISH, message, formatArgs). */ diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/column/IntArrayColumn.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/column/IntArrayColumn.java index 673cebf0e2e4..07c083d7d9f8 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/column/IntArrayColumn.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/column/IntArrayColumn.java @@ -195,7 +195,7 @@ public FindResult findString(int startIndex, int endIndex, String val) @Override public FindResult findComplex(int startIndex, int endIndex, Object val) { - return findDouble(startIndex, endIndex, (int) Numbers.tryParseLong(val, 0)); + return findInt(startIndex, endIndex, (int) Numbers.tryParseLong(val, 0)); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java index dde6a440d9ea..98e28380b1ef 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressionFactory.java @@ -233,6 +233,13 @@ public interface LongEncodingWriter void write(long value) throws IOException; + default void write(long[] values, int offset, int length) throws IOException + { + for (int i = offset; i < length; ++i) { + write(values[i]); + } + } + /** * Flush the unwritten content to the current output. */ @@ -294,6 +301,8 @@ public interface LongEncodingReader * various duplicates. */ LongEncodingReader duplicate(); + + LongEncodingStrategy getStrategy(); } public static Supplier getLongSupplier( diff --git a/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java index 435aa2ddfd1a..b7feb3b1dd34 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/DeltaLongEncodingReader.java @@ -82,4 +82,10 @@ public CompressionFactory.LongEncodingReader duplicate() { return new DeltaLongEncodingReader(buffer.duplicate(), base, bitsPerValue); } + + @Override + public CompressionFactory.LongEncodingStrategy getStrategy() + { + return CompressionFactory.LongEncodingStrategy.AUTO; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java index 2ed0459121af..7df866f22c7f 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingReader.java @@ -71,4 +71,10 @@ public CompressionFactory.LongEncodingReader duplicate() { return new LongsLongEncodingReader(buffer.getByteBuffer(), buffer.getTypeByteOrder()); } + + @Override + public CompressionFactory.LongEncodingStrategy getStrategy() + { + return CompressionFactory.LongEncodingStrategy.LONGS; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingWriter.java b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingWriter.java index 2aeb194d9a8a..f2b198b7e7a8 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/LongsLongEncodingWriter.java @@ -22,7 +22,6 @@ import org.apache.druid.segment.writeout.WriteOutBytes; import javax.annotation.Nullable; - import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -75,6 +74,19 @@ public void write(long value) throws IOException } } + @Override + public void write(long[] values, int offset, int length) throws IOException + { + if (outBuffer != null) { + outBuffer.asLongBuffer().put(values, offset, length); + outBuffer.position(outBuffer.position() + (length * Long.BYTES)); + } else { + for (int i = offset; i < length; ++i) { + write(values[i]); + } + } + } + @Override public void flush() { diff --git a/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java b/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java index 6a5e17b1080b..7e9b1fdc927e 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java +++ b/processing/src/main/java/org/apache/druid/segment/data/TableLongEncodingReader.java @@ -88,4 +88,10 @@ public CompressionFactory.LongEncodingReader duplicate() { return new TableLongEncodingReader(buffer.duplicate(), table, bitsPerValue); } + + @Override + public CompressionFactory.LongEncodingStrategy getStrategy() + { + return CompressionFactory.LongEncodingStrategy.AUTO; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/VByte.java b/processing/src/main/java/org/apache/druid/segment/data/VByte.java index 749382cc001e..7886ae7f070a 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/VByte.java +++ b/processing/src/main/java/org/apache/druid/segment/data/VByte.java @@ -19,7 +19,9 @@ package org.apache.druid.segment.data; +import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; public class VByte { @@ -58,6 +60,40 @@ public static int readInt(ByteBuffer buffer) return v; } + public static int writeInt(WritableByteChannel out, int val) throws IOException + { + final byte[] bytes = new byte[5]; + final int numBytes; + if (val < (1 << 7)) { + bytes[0] = (byte) (val | (1 << 7)); + numBytes = 1; + } else if (val < (1 << 14)) { + bytes[0] = extract7bits(0, val); + bytes[1] = (byte) (extract7bitsmaskless(1, (val)) | (1 << 7)); + numBytes = 2; + } else if (val < (1 << 21)) { + bytes[0] = extract7bits(0, val); + bytes[1] = extract7bits(1, val); + bytes[2] = (byte) (extract7bitsmaskless(2, (val)) | (1 << 7)); + numBytes = 3; + } else if (val < (1 << 28)) { + bytes[0] = extract7bits(0, val); + bytes[1] = extract7bits(1, val); + bytes[2] = extract7bits(2, val); + bytes[3] = (byte) (extract7bitsmaskless(3, (val)) | (1 << 7)); + numBytes = 4; + } else { + bytes[0] = extract7bits(0, val); + bytes[1] = extract7bits(1, val); + bytes[2] = extract7bits(2, val); + bytes[3] = extract7bits(3, val); + bytes[4] = (byte) (extract7bitsmaskless(4, (val)) | (1 << 7)); + numBytes = 5; + } + out.write(ByteBuffer.wrap(bytes, 0, numBytes)); + return numBytes; + } + /** * Write a variable byte (vbyte) encoded integer to a {@link ByteBuffer} at the current position, advancing the buffer * position by the number of bytes required to represent the integer, between 1 and 5 bytes. diff --git a/processing/src/main/java/org/apache/druid/segment/serde/cell/IOIterator.java b/processing/src/main/java/org/apache/druid/segment/serde/cell/IOIterator.java index 887f1fb65ac6..3931601dd4f3 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/cell/IOIterator.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/cell/IOIterator.java @@ -22,6 +22,12 @@ import java.io.Closeable; import java.io.IOException; +/** + * An Iterator-like interface that is intentionally not extending Iterator. This is because it is Closeable + * and we never want to lose track of the fact that the object needs to be closed. + * + * @param + */ public interface IOIterator extends Closeable { boolean hasNext() throws IOException; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/cell/StorableBuffer.java b/processing/src/main/java/org/apache/druid/segment/serde/cell/StorableBuffer.java index f228a81904fc..7a3bcea371e0 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/cell/StorableBuffer.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/cell/StorableBuffer.java @@ -48,6 +48,24 @@ public int getSerializedSize() } }; + static StorableBuffer fromBytes(byte[] bytes) + { + return new StorableBuffer() + { + @Override + public void store(ByteBuffer byteBuffer) + { + byteBuffer.put(bytes); + } + + @Override + public int getSerializedSize() + { + return bytes.length; + } + }; + } + void store(ByteBuffer byteBuffer); int getSerializedSize(); From 643e6f48235156e8f5b6afe439aaabb2762f8727 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 09:26:54 +0530 Subject: [PATCH 04/12] Refactor SimpleQueryableIndex to take a metadata supplier --- .../common/task/CompactionTaskTest.java | 18 ++++--- .../org/apache/druid/segment/IndexIO.java | 48 +++++++++++-------- .../druid/segment/SimpleQueryableIndex.java | 11 +++-- .../IndexIONullColumnsCompatibilityTest.java | 24 ++++++---- .../IndexMergerLongestSharedDimOrderTest.java | 2 +- 5 files changed, 57 insertions(+), 46 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index 134f5305169d..eeb7aab9e047 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -2033,14 +2033,6 @@ private static class TestIndexIO extends IndexIO } } - final Metadata metadata = new Metadata( - null, - aggregatorFactories.toArray(new AggregatorFactory[0]), - null, - null, - null - ); - queryableIndexMap.put( entry.getValue(), new SimpleQueryableIndex( @@ -2049,7 +2041,13 @@ private static class TestIndexIO extends IndexIO null, columnMap, null, - metadata, + () -> new Metadata( + null, + aggregatorFactories.toArray(new AggregatorFactory[0]), + null, + null, + null + ), false ) ); @@ -2074,7 +2072,7 @@ void removeMetadata(File file) index.getBitmapFactoryForDimensions(), index.getColumns(), index.getFileMapper(), - null, + () -> null, false ) ); diff --git a/processing/src/main/java/org/apache/druid/segment/IndexIO.java b/processing/src/main/java/org/apache/druid/segment/IndexIO.java index dd0ac9ab1177..d7aeea060033 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexIO.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexIO.java @@ -278,7 +278,7 @@ private static void validateRowValues( } } } else if (vals1 instanceof Object[]) { - if (!Arrays.deepEquals((Object[]) vals1, (Object[]) vals2)) { + if (!(vals2 instanceof Object[] && Arrays.deepEquals((Object[]) vals1, (Object[]) vals2))) { throw notEqualValidationException(dim1Name, vals1, vals2); } } else { @@ -510,7 +510,7 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen new ConciseBitmapFactory(), columns, index.getFileMapper(), - null, + () -> null, lazy ); } @@ -604,24 +604,32 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen allDims = null; } - Metadata metadata = null; - ByteBuffer metadataBB = smooshedFiles.mapFile("metadata.drd"); - if (metadataBB != null) { - try { - metadata = mapper.readValue( - SERIALIZER_UTILS.readBytes(metadataBB, metadataBB.remaining()), - Metadata.class - ); - } - catch (JsonParseException | JsonMappingException ex) { - // Any jackson deserialization errors are ignored e.g. if metadata contains some aggregator which - // is no longer supported then it is OK to not use the metadata instead of failing segment loading - log.warn(ex, "Failed to load metadata for segment [%s]", inDir); - } - catch (IOException ex) { - throw new IOException("Failed to read metadata", ex); + Supplier metadataSupplier = new Supplier() + { + @Override + @Nullable + public Metadata get() + { + try { + ByteBuffer metadataBB = smooshedFiles.mapFile("metadata.drd"); + if (metadataBB != null) { + return mapper.readValue( + SERIALIZER_UTILS.readBytes(metadataBB, metadataBB.remaining()), + Metadata.class + ); + } + } + catch (JsonParseException | JsonMappingException ex) { + // Any jackson deserialization errors are ignored e.g. if metadata contains some aggregator which + // is no longer supported then it is OK to not use the metadata instead of failing segment loading + log.warn(ex, "Failed to load metadata for segment [%s]", inDir); + } + catch (IOException ex) { + log.warn(ex, "Failed to read metadata for segment [%s]", inDir); + } + return null; } - } + }; Map> columns = new LinkedHashMap<>(); @@ -663,7 +671,7 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen segmentBitmapSerdeFactory.getBitmapFactory(), columns, smooshedFiles, - metadata, + metadataSupplier, lazy ); diff --git a/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java b/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java index 924c7911f8a3..dc97845cb229 100644 --- a/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/SimpleQueryableIndex.java @@ -32,6 +32,7 @@ import org.joda.time.Interval; import javax.annotation.Nullable; +import javax.validation.constraints.NotNull; import java.util.List; import java.util.Map; @@ -46,8 +47,8 @@ public class SimpleQueryableIndex implements QueryableIndex private final BitmapFactory bitmapFactory; private final Map> columns; private final SmooshedFileMapper fileMapper; - @Nullable - private final Metadata metadata; + @NotNull + private final Supplier metadataSupplier; private final Supplier> dimensionHandlers; public SimpleQueryableIndex( @@ -56,7 +57,7 @@ public SimpleQueryableIndex( BitmapFactory bitmapFactory, Map> columns, SmooshedFileMapper fileMapper, - @Nullable Metadata metadata, + @NotNull Supplier metadataSupplier, boolean lazy ) { @@ -73,7 +74,7 @@ public SimpleQueryableIndex( this.bitmapFactory = bitmapFactory; this.columns = columns; this.fileMapper = fileMapper; - this.metadata = metadata; + this.metadataSupplier = metadataSupplier; if (lazy) { this.dimensionHandlers = Suppliers.memoize(() -> initDimensionHandlers(availableDimensions)); @@ -143,7 +144,7 @@ public void close() @Override public Metadata getMetadata() { - return metadata; + return metadataSupplier.get(); } @Override diff --git a/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java b/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java index 2e3284490758..07cfbdc2a0f2 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java @@ -31,6 +31,7 @@ import com.google.common.primitives.Ints; import org.apache.druid.data.input.MapBasedInputRow; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.io.smoosh.Smoosh; @@ -184,19 +185,22 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen segmentBitmapSerdeFactory = new BitmapSerde.LegacyBitmapSerdeFactory(); } - Metadata metadata = null; - ByteBuffer metadataBB = smooshedFiles.mapFile("metadata.drd"); - if (metadataBB != null) { + Supplier metadataSupplier = () -> { try { - metadata = mapper.readValue( - IndexIO.SERIALIZER_UTILS.readBytes(metadataBB, metadataBB.remaining()), - Metadata.class - ); + ByteBuffer metadataBB = smooshedFiles.mapFile("metadata.drd"); + if (metadataBB != null) { + return mapper.readValue( + IndexIO.SERIALIZER_UTILS.readBytes(metadataBB, metadataBB.remaining()), + Metadata.class + ); + } else { + return null; + } } catch (IOException ex) { - throw new IOException("Failed to read metadata", ex); + throw DruidException.defensive(ex, "Failed to read metadata"); } - } + }; Map> columns = new HashMap<>(); @@ -251,7 +255,7 @@ public QueryableIndex load(File inDir, ObjectMapper mapper, boolean lazy, Segmen segmentBitmapSerdeFactory.getBitmapFactory(), columns, smooshedFiles, - metadata, + metadataSupplier, lazy ); diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerLongestSharedDimOrderTest.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerLongestSharedDimOrderTest.java index 2bb84c0eeb98..dc03749b2866 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerLongestSharedDimOrderTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerLongestSharedDimOrderTest.java @@ -167,7 +167,7 @@ private QueryableIndexIndexableAdapter makeIndexWithDimensionList(List d mockBitmapFactory, ImmutableMap.of(ColumnHolder.TIME_COLUMN_NAME, mockSupplier), mockSmooshedFileMapper, - null, + () -> null, true ) ); From fcc34c151025f8048a9d1873542e8c67c0019326 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 10:08:26 +0530 Subject: [PATCH 05/12] Add FrameMaker --- .../ColumnBasedFrameRowsAndColumns.java | 1 - .../semantic/DefaultFrameMaker.java | 81 +++++++++++++++++++ .../rowsandcols/semantic/FrameMaker.java | 40 +++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFrameMaker.java create mode 100644 processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/FrameMaker.java diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java index ada3da164ea1..0152156c5b58 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java @@ -80,7 +80,6 @@ public Column findColumn(String name) } } return colCache.get(name); - } @SuppressWarnings("unchecked") diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFrameMaker.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFrameMaker.java new file mode 100644 index 000000000000..204b5bd85489 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFrameMaker.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.rowsandcols.semantic; + +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; +import org.apache.druid.frame.write.FrameWriter; +import org.apache.druid.frame.write.FrameWriters; +import org.apache.druid.query.rowsandcols.RowsAndColumns; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.RowSignature; + +import java.util.Collections; +import java.util.concurrent.atomic.AtomicInteger; + +public class DefaultFrameMaker implements FrameMaker +{ + private final RowsAndColumns rac; + + public DefaultFrameMaker(RowsAndColumns rac) + { + this.rac = rac; + } + + @Override + public RowSignature computeSignature() + { + final RowSignature.Builder signatureBuilder = RowSignature.builder(); + for (String column : rac.getColumnNames()) { + final Column racColumn = rac.findColumn(column); + if (racColumn == null) { + continue; + } + signatureBuilder.add(column, racColumn.toAccessor().getType()); + } + + return signatureBuilder.build(); + } + + @Override + public Frame toColumnBasedFrame() + { + final AtomicInteger rowId = new AtomicInteger(0); + final int numRows = rac.numRows(); + final ColumnSelectorFactoryMaker csfm = ColumnSelectorFactoryMaker.fromRAC(rac); + final ColumnSelectorFactory selectorFactory = csfm.make(rowId); + + final ArenaMemoryAllocatorFactory memFactory = new ArenaMemoryAllocatorFactory(200 << 20); // 200 MB + + final FrameWriter frameWriter = FrameWriters.makeColumnBasedFrameWriterFactory( + memFactory, + computeSignature(), + Collections.emptyList() + ).newFrameWriter(selectorFactory); + + rowId.set(0); + for (; rowId.get() < numRows; rowId.incrementAndGet()) { + frameWriter.addSelection(); + } + + return Frame.wrap(frameWriter.toByteArray()); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/FrameMaker.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/FrameMaker.java new file mode 100644 index 000000000000..095bfe1ed87c --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/FrameMaker.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.rowsandcols.semantic; + +import org.apache.druid.frame.Frame; +import org.apache.druid.query.rowsandcols.RowsAndColumns; +import org.apache.druid.segment.column.RowSignature; + +public interface FrameMaker +{ + static FrameMaker fromRAC(RowsAndColumns rac) + { + FrameMaker retVal = rac.as(FrameMaker.class); + if (retVal == null) { + retVal = new DefaultFrameMaker(rac); + } + return retVal; + } + + RowSignature computeSignature(); + + Frame toColumnBasedFrame(); +} From 01b6fdd8472ae940df071cd871ba1ee48209dfee Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 10:22:16 +0530 Subject: [PATCH 06/12] Modify TmpFileSegmentWriteOutMedium to use a heap based WriteOutBytes before falling back to a temporary file --- .../segment/writeout/FileWriteOutBytes.java | 37 +-- .../LazilyAllocatingHeapWriteOutBytes.java | 218 ++++++++++++++++++ .../writeout/SegmentWriteOutMedium.java | 5 +- .../TmpFileSegmentWriteOutMedium.java | 63 ++++- .../druid/segment/writeout/WriteOutBytes.java | 8 +- 5 files changed, 304 insertions(+), 27 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/writeout/LazilyAllocatingHeapWriteOutBytes.java diff --git a/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java b/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java index e41a362fda8e..959066342ac0 100644 --- a/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java +++ b/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java @@ -33,7 +33,7 @@ import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; -final class FileWriteOutBytes extends WriteOutBytes +public final class FileWriteOutBytes extends WriteOutBytes { private final File file; private final FileChannel ch; @@ -90,22 +90,29 @@ public int write(ByteBuffer src) throws IOException { int len = src.remaining(); flushIfNeeded(len); - while (src.remaining() > buffer.capacity()) { - int srcLimit = src.limit(); - try { - src.limit(src.position() + buffer.capacity()); - buffer.put(src); - writeOutBytes += buffer.capacity(); - flush(); - } - finally { - // IOException may occur in flush(), reset src limit to the original - src.limit(srcLimit); + if (len > buffer.remaining()) { + // if a flush was required, flushIfNeeded should have forced a flush. So, if the len is greater than + // our buffer size, we should just dump it straight to the file instead of buffering + Channels.writeFully(ch, src); + writeOutBytes += len; + } else { + while (src.remaining() > buffer.capacity()) { + int srcLimit = src.limit(); + try { + src.limit(src.position() + buffer.capacity()); + buffer.put(src); + writeOutBytes += buffer.capacity(); + flush(); + } + finally { + // IOException may occur in flush(), reset src limit to the original + src.limit(srcLimit); + } } + int remaining = src.remaining(); + buffer.put(src); + writeOutBytes += remaining; } - int remaining = src.remaining(); - buffer.put(src); - writeOutBytes += remaining; return len; } diff --git a/processing/src/main/java/org/apache/druid/segment/writeout/LazilyAllocatingHeapWriteOutBytes.java b/processing/src/main/java/org/apache/druid/segment/writeout/LazilyAllocatingHeapWriteOutBytes.java new file mode 100644 index 000000000000..66d73e525574 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/writeout/LazilyAllocatingHeapWriteOutBytes.java @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.writeout; + +import org.apache.commons.io.input.NullInputStream; +import org.apache.druid.io.ByteBufferInputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.BufferOverflowException; +import java.nio.BufferUnderflowException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.function.Supplier; + +public class LazilyAllocatingHeapWriteOutBytes extends WriteOutBytes +{ + private final Supplier delegateSupplier; + + private ByteBuffer tmpBuffer = null; + private WriteOutBytes delegate = null; + + public LazilyAllocatingHeapWriteOutBytes(Supplier delegateSupplier) + { + this.delegateSupplier = delegateSupplier; + } + + @Override + public void writeInt(int v) throws IOException + { + final boolean useBuffer = ensureBytes(Integer.BYTES); + if (useBuffer) { + tmpBuffer.putInt(v); + return; + } + + delegate.writeInt(v); + } + + @Override + public long size() + { + if (delegate == null) { + return tmpBuffer == null ? 0 : tmpBuffer.position(); + } else { + return delegate.size(); + } + } + + @Override + public void writeTo(WritableByteChannel channel) throws IOException + { + if (delegate == null) { + if (tmpBuffer == null) { + return; + } + + final ByteBuffer tmpBufCopy = tmpBuffer.asReadOnlyBuffer(); + tmpBufCopy.flip(); + channel.write(tmpBufCopy); + } else { + delegate.writeTo(channel); + } + } + + @Override + public InputStream asInputStream() throws IOException + { + if (delegate == null) { + if (tmpBuffer == null) { + return new NullInputStream(); + } + final ByteBuffer tmpBufCopy = tmpBuffer.asReadOnlyBuffer(); + tmpBufCopy.flip(); + return new ByteBufferInputStream(tmpBufCopy); + } else { + return delegate.asInputStream(); + } + } + + @Override + public void readFully(long pos, ByteBuffer buffer) throws IOException + { + if (delegate == null) { + if (tmpBuffer == null) { + return; + } + + if (pos < tmpBuffer.position()) { + final ByteBuffer tmpBufCopy = tmpBuffer.asReadOnlyBuffer(); + tmpBufCopy.flip().position((int) pos); + if (tmpBufCopy.remaining() < buffer.remaining()) { + throw new BufferUnderflowException(); + } + tmpBufCopy.limit(tmpBufCopy.position() + buffer.remaining()); + buffer.put(tmpBufCopy); + } else { + throw new BufferOverflowException(); + } + } else { + delegate.readFully(pos, buffer); + } + } + + @Override + public void write(int b) throws IOException + { + final boolean useBuffer = ensureBytes(1); + if (useBuffer) { + tmpBuffer.put((byte) b); + return; + } + + delegate.write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException + { + write(ByteBuffer.wrap(b, off, len)); + } + + @Override + public int write(ByteBuffer src) throws IOException + { + final int numToWrite = src.remaining(); + final boolean useBuffer = ensureBytes(numToWrite); + if (useBuffer) { + tmpBuffer.put(src); + return numToWrite; + } + + return delegate.write(src); + } + + @Override + public boolean isOpen() + { + return delegate == null ? true : delegate.isOpen(); + } + + /** + * Ensures bytes are available, returns a boolean indicating if the buffer should be used or the delegate + * + * @param numBytes the number of bytes that need writing + * @return boolean true if the buffer should be used, false if the delegate should be used + * @throws IOException if an issue with IO occurs (can primarily happen when copying buffers) + */ + private boolean ensureBytes(int numBytes) throws IOException + { + if (tmpBuffer == null) { + if (delegate == null) { + if (numBytes < 128) { + tmpBuffer = ByteBuffer.allocate(128); + } else if (numBytes < 4096) { + tmpBuffer = ByteBuffer.allocate(4096); + } else { + // We are likely dealing with something that's already buffering stuff, so just switch delegate immediately + delegate = delegateSupplier.get(); + return false; + } + return true; + } else { + return false; + } + } + + if (numBytes < tmpBuffer.remaining()) { + return true; + } + + final ByteBuffer newBuf; + switch (tmpBuffer.capacity()) { + case 128: + if (numBytes < 4096 - tmpBuffer.position()) { + newBuf = ByteBuffer.allocate(4096); + break; + } + case 4096: + if (numBytes < 16384 - tmpBuffer.position()) { + newBuf = ByteBuffer.allocate(16384); + break; + } + default: + newBuf = null; + } + + if (newBuf == null) { + delegate = delegateSupplier.get(); + tmpBuffer.flip(); + delegate.write(tmpBuffer); + tmpBuffer = null; + return false; + } else { + tmpBuffer.flip(); + newBuf.put(tmpBuffer); + tmpBuffer = newBuf; + return true; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/writeout/SegmentWriteOutMedium.java b/processing/src/main/java/org/apache/druid/segment/writeout/SegmentWriteOutMedium.java index 7c2af89b1b66..16efd07cb1bf 100644 --- a/processing/src/main/java/org/apache/druid/segment/writeout/SegmentWriteOutMedium.java +++ b/processing/src/main/java/org/apache/druid/segment/writeout/SegmentWriteOutMedium.java @@ -28,7 +28,7 @@ * SegmentWriteOutMedium is an umbrella "resource disposer" for temporary buffers (in the form of {@link WriteOutBytes}, * obtained by calling {@link #makeWriteOutBytes()} on the SegmentWriteOutMedium instance), that are used during new Druid * segment creation, and other resources (see {@link #getCloser()}). - * + *

* When SegmentWriteOutMedium is closed, all child WriteOutBytes couldn't be used anymore. */ public interface SegmentWriteOutMedium extends Closeable @@ -37,6 +37,7 @@ public interface SegmentWriteOutMedium extends Closeable * Creates a new empty {@link WriteOutBytes}, attached to this SegmentWriteOutMedium. When this SegmentWriteOutMedium is * closed, the returned WriteOutBytes couldn't be used anymore. */ + @SuppressWarnings("RedundantThrows") WriteOutBytes makeWriteOutBytes() throws IOException; /** @@ -45,7 +46,7 @@ public interface SegmentWriteOutMedium extends Closeable * using a shared {@link SegmentWriteOutMedium} but which control the complete lifecycle of the {@link WriteOutBytes} * which they require to free the backing resources when they are finished, rather than waiting until * {@link #close()} is called for this medium. - * + *

* The 'child' medium will be closed when {@link #close()} is called, if not called explicitly prior to closing this * medium. */ diff --git a/processing/src/main/java/org/apache/druid/segment/writeout/TmpFileSegmentWriteOutMedium.java b/processing/src/main/java/org/apache/druid/segment/writeout/TmpFileSegmentWriteOutMedium.java index 007b93aa04b7..646be1451b3a 100644 --- a/processing/src/main/java/org/apache/druid/segment/writeout/TmpFileSegmentWriteOutMedium.java +++ b/processing/src/main/java/org/apache/druid/segment/writeout/TmpFileSegmentWriteOutMedium.java @@ -19,21 +19,47 @@ package org.apache.druid.segment.writeout; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; import java.io.File; import java.io.IOException; import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicInteger; +/** + * Builds segment write out medium objects that are based on temporary files. Some analysis of usage of these + * objects shows that they periodically get used for very small things, where the overhead of creating a tmp file + * is actually very large. It would be best to go back and look at usages and try to make them lazy such that + * they only actually use a medium when they need it. But, in an attempt to get some benefits, we "shim" in the + * laziness by returning a heap-based WriteOutBytes that only falls back to making a tmp file when it actually + * fills up. + */ public final class TmpFileSegmentWriteOutMedium implements SegmentWriteOutMedium { + private static final Logger log = new Logger(TmpFileSegmentWriteOutMedium.class); + + private final AtomicInteger filesCreated; + private final SortedMap sizeDistribution; + private int numLocallyCreated = 0; + private final File dir; private final Closer closer = Closer.create(); TmpFileSegmentWriteOutMedium(File outDir) throws IOException { + this(outDir, new AtomicInteger(0), new ConcurrentSkipListMap<>()); + } + + private TmpFileSegmentWriteOutMedium(File outDir, AtomicInteger filesCreated, SortedMap sizeDistribution) throws IOException + { + this.filesCreated = filesCreated; + this.sizeDistribution = sizeDistribution; File tmpOutputFilesDir = new File(outDir, "tmpOutputFiles"); FileUtils.mkdirp(tmpOutputFilesDir); closer.register(() -> FileUtils.deleteDirectory(tmpOutputFilesDir)); @@ -43,21 +69,37 @@ public final class TmpFileSegmentWriteOutMedium implements SegmentWriteOutMedium @Override public WriteOutBytes makeWriteOutBytes() throws IOException { - File file = File.createTempFile("filePeon", null, dir); - FileChannel ch = FileChannel.open( - file.toPath(), - StandardOpenOption.READ, - StandardOpenOption.WRITE + return new LazilyAllocatingHeapWriteOutBytes( + () -> { + final int i = filesCreated.incrementAndGet(); + ++numLocallyCreated; + File file; + FileChannel ch; + try { + file = File.createTempFile("filePeon", null, dir); + ch = FileChannel.open(file.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE); + } + catch (IOException e) { + throw DruidException.defensive(e, "Failed"); + } + if (i % 1000 == 0) { + log.info("Created [%,d] tmp files. [%s]", i, dir); + } + final FileWriteOutBytes retVal = new FileWriteOutBytes(file, ch); + closer.register(file::delete); + closer.register(ch); + closer.register(() -> { + sizeDistribution.compute(retVal.size(), (key, val) -> val == null ? 1 : val + 1); + }); + return retVal; + } ); - closer.register(file::delete); - closer.register(ch); - return new FileWriteOutBytes(file, ch); } @Override public SegmentWriteOutMedium makeChildWriteOutMedium() throws IOException { - TmpFileSegmentWriteOutMedium tmpFileSegmentWriteOutMedium = new TmpFileSegmentWriteOutMedium(dir); + TmpFileSegmentWriteOutMedium tmpFileSegmentWriteOutMedium = new TmpFileSegmentWriteOutMedium(dir, filesCreated, sizeDistribution); closer.register(tmpFileSegmentWriteOutMedium); return tmpFileSegmentWriteOutMedium; } @@ -71,6 +113,9 @@ public Closer getCloser() @Override public void close() throws IOException { + log.debug("Closing, files still open[%,d], filesBeingClosed[%,d], dir[%s]", filesCreated.get(), numLocallyCreated, dir); + filesCreated.set(filesCreated.get() - numLocallyCreated); + numLocallyCreated = 0; closer.close(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/writeout/WriteOutBytes.java b/processing/src/main/java/org/apache/druid/segment/writeout/WriteOutBytes.java index e1c2972b1485..95ab2f7b01dc 100644 --- a/processing/src/main/java/org/apache/druid/segment/writeout/WriteOutBytes.java +++ b/processing/src/main/java/org/apache/druid/segment/writeout/WriteOutBytes.java @@ -30,7 +30,7 @@ * WritableByteChannel} and {@link #writeInt(int)} append to the sequence. Methods {@link * #writeTo(WritableByteChannel)} and {@link #asInputStream()} allow to write the sequence somewhere else. {@link * #readFully} allows to access the sequence randomly. - * + *

* WriteOutBytes is a resource that is managed by {@link SegmentWriteOutMedium}, so it's own {@link #close()} method * does nothing. However WriteOutBytes should appear closed, i. e. {@link #isOpen()} returns false, after the parental * SegmentWriteOutMedium is closed. @@ -47,6 +47,12 @@ public abstract class WriteOutBytes extends OutputStream implements WritableByte */ public abstract long size(); + @Override + public void write(byte[] b, int off, int len) throws IOException + { + write(ByteBuffer.wrap(b, off, len)); + } + /** * Takes all bytes that are written to this WriteOutBytes so far and writes them into the given channel. */ From 93ca7e7d0b238259427b58f7b5dcea06298401ed Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 10:33:55 +0530 Subject: [PATCH 07/12] - Modify DictionaryWriter#write to return the index of the value written. - Add a readBufferCache to FrontCodedIntArrayIndexedWriter. - Track cardinality with FixedIndexedWriter. --- .../druid/segment/data/DictionaryWriter.java | 31 ++++++++++++++- .../data/EncodedStringDictionaryWriter.java | 4 +- .../segment/data/FixedIndexedWriter.java | 38 +++++++++---------- .../segment/data/FrontCodedIndexedWriter.java | 11 ++++-- .../data/FrontCodedIntArrayIndexedWriter.java | 35 +++++++++++++---- .../segment/data/GenericIndexedWriter.java | 5 ++- 6 files changed, 90 insertions(+), 34 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/DictionaryWriter.java b/processing/src/main/java/org/apache/druid/segment/data/DictionaryWriter.java index 170f6975f28e..5901e2e13205 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/DictionaryWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/DictionaryWriter.java @@ -28,12 +28,41 @@ public interface DictionaryWriter extends Serializer { boolean isSorted(); + /** + * Prepares the writer for writing + * + * @throws IOException if there is a problem with IO + */ void open() throws IOException; - void write(@Nullable T objectToWrite) throws IOException; + /** + * Writes an object to the dictionary. + *

+ * Returns the index of the value that was just written. This is defined as the `int` value that can be passed + * into {@link #get} such that it will return the same value back. + * + * @param objectToWrite object to be written to the dictionary + * @return index of the value that was just written + * @throws IOException if there is a problem with IO + */ + int write(@Nullable T objectToWrite) throws IOException; + /** + * Returns an object that has already been written via the {@link #write} method. + * + * @param dictId index of the object to return + * @return the object identified by the given index + * @throws IOException if there is a problem with IO + */ @Nullable T get(int dictId) throws IOException; + /** + * Returns the number of items that have been written so far in this dictionary. Any number lower than this + * cardinality can be passed into {@link #get} and a value will be returned. If a value greater than or equal to + * the cardinality is passed into {@link #get} all sorts of things could happen, but likely none of them are good. + * + * @return the number of items that have been written so far + */ int getCardinality(); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/EncodedStringDictionaryWriter.java b/processing/src/main/java/org/apache/druid/segment/data/EncodedStringDictionaryWriter.java index 371a73bebd74..799ed3766f28 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/EncodedStringDictionaryWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/EncodedStringDictionaryWriter.java @@ -58,9 +58,9 @@ public void open() throws IOException } @Override - public void write(@Nullable String objectToWrite) throws IOException + public int write(@Nullable String objectToWrite) throws IOException { - delegate.write(StringUtils.toUtf8Nullable(NullHandling.emptyToNullIfNeeded(objectToWrite))); + return delegate.write(StringUtils.toUtf8Nullable(NullHandling.emptyToNullIfNeeded(objectToWrite))); } @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedWriter.java index b1b473b3419d..42ca16b78f42 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FixedIndexedWriter.java @@ -20,8 +20,8 @@ package org.apache.druid.segment.data; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.io.Channels; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; @@ -46,14 +46,16 @@ public class FixedIndexedWriter implements DictionaryWriter private final Comparator comparator; private final ByteBuffer scratch; private final ByteBuffer readBuffer; - private int numWritten; + private final boolean isSorted; + private final int width; + + private int cardinality = 0; + @Nullable private WriteOutBytes valuesOut = null; private boolean hasNulls = false; - private boolean isSorted; @Nullable private T prevObject = null; - private final int width; public FixedIndexedWriter( SegmentWriteOutMedium segmentWriteOutMedium, @@ -87,7 +89,7 @@ public void open() throws IOException @Override public int getCardinality() { - return hasNulls ? numWritten + 1 : numWritten; + return cardinality; } @Override @@ -97,28 +99,31 @@ public long getSerializedSize() } @Override - public void write(@Nullable T objectToWrite) throws IOException + public int write(@Nullable T objectToWrite) throws IOException { if (prevObject != null && isSorted && comparator.compare(prevObject, objectToWrite) >= 0) { - throw new ISE( + throw DruidException.defensive( "Values must be sorted and unique. Element [%s] with value [%s] is before or equivalent to [%s]", - numWritten, + cardinality, objectToWrite, prevObject ); } if (objectToWrite == null) { + if (cardinality != 0) { + throw DruidException.defensive("Null must come first, got it at cardinality[%,d]!=0", cardinality); + } hasNulls = true; - return; + return cardinality++; } scratch.clear(); typeStrategy.write(scratch, objectToWrite, width); scratch.flip(); Channels.writeFully(valuesOut, scratch); - numWritten++; prevObject = objectToWrite; + return cardinality++; } @Override @@ -141,7 +146,7 @@ public void writeTo( scratch.flip(); Channels.writeFully(channel, scratch); scratch.clear(); - scratch.putInt(numWritten); + scratch.putInt(hasNulls ? cardinality - 1 : cardinality); // we don't actually write the null entry, so subtract 1 scratch.flip(); Channels.writeFully(channel, scratch); valuesOut.writeTo(channel); @@ -166,7 +171,7 @@ public T get(int index) throws IOException public Iterator getIterator() { final ByteBuffer iteratorBuffer = ByteBuffer.allocate(width * PAGE_SIZE).order(readBuffer.order()); - final int totalCount = hasNulls ? 1 + numWritten : numWritten; + final int totalCount = cardinality; final int startPos = hasNulls ? 1 : 0; return new Iterator() @@ -197,13 +202,8 @@ private void readPage() { iteratorBuffer.clear(); try { - if (numWritten - (pos - startPos) < PAGE_SIZE) { - int size = (numWritten - (pos - startPos)) * width; - iteratorBuffer.limit(size); - valuesOut.readFully((long) (pos - startPos) * width, iteratorBuffer); - } else { - valuesOut.readFully((long) (pos - startPos) * width, iteratorBuffer); - } + iteratorBuffer.limit(Math.min(PAGE_SIZE, (cardinality - pos) * width)); + valuesOut.readFully((long) (pos - startPos) * width, iteratorBuffer); iteratorBuffer.clear(); } catch (IOException e) { diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java index c24d2e55d71d..707e3894793a 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java @@ -21,6 +21,7 @@ import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.io.Channels; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; @@ -102,7 +103,7 @@ public void open() throws IOException } @Override - public void write(@Nullable byte[] value) throws IOException + public int write(@Nullable byte[] value) throws IOException { if (prevObject != null && compareNullableUtf8UsingJavaStringOrdering(prevObject, value) >= 0) { throw new ISE( @@ -114,8 +115,11 @@ public void write(@Nullable byte[] value) throws IOException } if (value == null) { + if (numWritten != 0) { + throw DruidException.defensive("Null must come first, got it at cardinality[%,d]!=0", numWritten); + } hasNulls = true; - return; + return 0; } // if the bucket buffer is full, write the bucket @@ -143,8 +147,9 @@ public void write(@Nullable byte[] value) throws IOException bucketBuffer[numWritten % bucketSize] = value; - ++numWritten; + int retVal = numWritten++; prevObject = value; + return retVal + (hasNulls ? 1 : 0); } diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java index 8116882191b0..50e350f3d640 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java @@ -21,6 +21,7 @@ import com.google.common.primitives.Ints; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.io.Channels; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; @@ -81,6 +82,10 @@ public class FrontCodedIntArrayIndexedWriter implements DictionaryWriter private boolean isClosed = false; private boolean hasNulls = false; + private int readCachedBucket = -1; + @Nullable + private ByteBuffer readBufferCache = null; + public FrontCodedIntArrayIndexedWriter( SegmentWriteOutMedium segmentWriteOutMedium, ByteOrder byteOrder, @@ -107,7 +112,7 @@ public void open() throws IOException } @Override - public void write(@Nullable int[] value) throws IOException + public int write(@Nullable int[] value) throws IOException { if (prevObject != null && ARRAY_COMPARATOR.compare(prevObject, value) >= 0) { @@ -120,8 +125,11 @@ public void write(@Nullable int[] value) throws IOException } if (value == null) { + if (numWritten != 0) { + throw DruidException.defensive("Null must come first, got it at numWritten[%,d]!=0", numWritten); + } hasNulls = true; - return; + return 0; } // if the bucket buffer is full, write the bucket @@ -147,8 +155,9 @@ public void write(@Nullable int[] value) throws IOException bucketBuffer[numWritten % bucketSize] = value; - ++numWritten; + int retVal = numWritten++; prevObject = value; + return retVal + (hasNulls ? 1 : 0); } @@ -206,6 +215,11 @@ public int[] get(int index) throws IOException return bucketBuffer[relativeIndex]; } else { final int bucket = adjustedIndex >> div; + if (readCachedBucket == bucket) { + readBufferCache.position(0); + return getFromBucket(readBufferCache, relativeIndex); + } + long startOffset; if (bucket == 0) { startOffset = 0; @@ -217,10 +231,17 @@ public int[] get(int index) throws IOException if (currentBucketSize == 0) { return null; } - final ByteBuffer bucketBuffer = ByteBuffer.allocate(currentBucketSize).order(byteOrder); - valuesOut.readFully(startOffset, bucketBuffer); - bucketBuffer.clear(); - return getFromBucket(bucketBuffer, relativeIndex); + if (readBufferCache == null || readBufferCache.capacity() < currentBucketSize) { + readBufferCache = ByteBuffer.allocate(currentBucketSize).order(byteOrder); + } + readBufferCache.clear(); + readBufferCache.limit(currentBucketSize); + valuesOut.readFully(startOffset, readBufferCache); + + readCachedBucket = bucket; + + readBufferCache.position(0); + return getFromBucket(readBufferCache, relativeIndex); } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java index 8b38125322ba..a87a61843fac 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/GenericIndexedWriter.java @@ -242,7 +242,7 @@ void setIntMaxForCasting(final int intMaxForCasting) } @Override - public void write(@Nullable T objectToWrite) throws IOException + public int write(@Nullable T objectToWrite) throws IOException { if (objectsSorted && prevObject != null && strategy.compare(prevObject, objectToWrite) >= 0) { objectsSorted = false; @@ -263,7 +263,7 @@ public void write(@Nullable T objectToWrite) throws IOException // Increment number of values written. Important to do this after the check above, since numWritten is // accessed during "initializeHeaderOutLong" to determine the length of the header. - ++numWritten; + int retVal = numWritten++; if (!requireMultipleFiles) { headerOut.writeInt(checkedCastNonnegativeLongToInt(valuesOut.size())); @@ -280,6 +280,7 @@ public void write(@Nullable T objectToWrite) throws IOException if (objectsSorted) { prevObject = objectToWrite; } + return retVal; } @Nullable From 716eaca13b6976b69af15f1fdad2d02d42c5a913 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 3 Jul 2024 10:40:07 +0530 Subject: [PATCH 08/12] Refactor existing classes --- .../MapOfColumnsRowsAndColumns.java | 7 + .../rowsandcols/column/LongArrayColumn.java | 201 ++++++++++++++++++ .../concrete/ColumnHolderRACColumn.java | 2 +- .../DefaultColumnSelectorFactoryMaker.java | 1 + .../druid/segment/AutoTypeColumnMerger.java | 6 +- .../druid/segment/column/BaseColumn.java | 7 + .../druid/segment/column/LongsColumn.java | 9 + .../StringUtf8DictionaryEncodedColumn.java | 25 +-- .../BlockLayoutColumnarLongsSupplier.java | 10 +- .../druid/segment/data/ColumnarInts.java | 6 + .../druid/segment/data/ColumnarLongs.java | 13 +- .../segment/data/ColumnarLongsSerializer.java | 9 + .../MMappedQueryableSegmentizerFactory.java | 21 ++ .../CompressedNestedDataComplexColumn.java | 34 ++- .../segment/nested/DictionaryIdLookup.java | 2 +- .../ColumnBasedFrameRowsAndColumnsTest.java | 6 +- 16 files changed, 327 insertions(+), 32 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/MapOfColumnsRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/MapOfColumnsRowsAndColumns.java index 121e4863bcdc..42972f9340da 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/MapOfColumnsRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/MapOfColumnsRowsAndColumns.java @@ -25,6 +25,7 @@ import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.query.rowsandcols.column.DoubleArrayColumn; import org.apache.druid.query.rowsandcols.column.IntArrayColumn; +import org.apache.druid.query.rowsandcols.column.LongArrayColumn; import org.apache.druid.query.rowsandcols.column.ObjectArrayColumn; import org.apache.druid.query.rowsandcols.semantic.AppendableRowsAndColumns; import org.apache.druid.segment.column.ColumnType; @@ -170,6 +171,12 @@ public Builder add(String name, int[] vals) return add(name, new IntArrayColumn(vals)); } + @SuppressWarnings("unused") + public Builder add(String name, long[] vals) + { + return add(name, new LongArrayColumn(vals)); + } + public Builder add(String name, double[] vals) { return add(name, new DoubleArrayColumn(vals)); diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java new file mode 100644 index 000000000000..6374c145b94f --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.rowsandcols.column; + +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Numbers; +import org.apache.druid.query.rowsandcols.util.FindResult; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.Arrays; + +public class LongArrayColumn implements Column +{ + private final long[] vals; + + public LongArrayColumn( + long[] vals + ) + { + this.vals = vals; + } + + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new MyColumnAccessor(); + } + + @Nullable + @SuppressWarnings("unchecked") + @Override + public T as(Class clazz) + { + if (VectorCopier.class.equals(clazz)) { + return (T) (VectorCopier) (into, intoStart) -> { + if (Integer.MAX_VALUE - vals.length < intoStart) { + throw new ISE( + "too many rows!!! intoStart[%,d], vals.length[%,d] combine to exceed max_int", + intoStart, + vals.length + ); + } + for (int i = 0; i < vals.length; ++i) { + into[intoStart + i] = vals[i]; + } + }; + } + if (ColumnValueSwapper.class.equals(clazz)) { + return (T) (ColumnValueSwapper) (lhs, rhs) -> { + long tmp = vals[lhs]; + vals[lhs] = vals[rhs]; + vals[rhs] = tmp; + }; + } + return null; + } + + private class MyColumnAccessor implements BinarySearchableAccessor + { + @Override + public ColumnType getType() + { + return ColumnType.LONG; + } + + @Override + public int numRows() + { + return vals.length; + } + + @Override + public boolean isNull(int rowNum) + { + return false; + } + + @Override + public Object getObject(int rowNum) + { + return vals[rowNum]; + } + + @Override + public double getDouble(int rowNum) + { + return vals[rowNum]; + } + + @Override + public float getFloat(int rowNum) + { + return vals[rowNum]; + } + + @Override + public long getLong(int rowNum) + { + return vals[rowNum]; + } + + @Override + public int getInt(int rowNum) + { + return (int) vals[rowNum]; + } + + @Override + public int compareRows(int lhsRowNum, int rhsRowNum) + { + return Long.compare(vals[lhsRowNum], vals[rhsRowNum]); + } + + + @Override + public FindResult findNull(int startIndex, int endIndex) + { + return FindResult.notFound(endIndex); + } + + @Override + public FindResult findDouble(int startIndex, int endIndex, double val) + { + return findLong(startIndex, endIndex, (int) val); + } + + @Override + public FindResult findFloat(int startIndex, int endIndex, float val) + { + return findLong(startIndex, endIndex, (int) val); + } + + @Override + public FindResult findLong(int startIndex, int endIndex, long val) + { + if (vals[startIndex] == val) { + int end = startIndex + 1; + + while (end < endIndex && vals[end] == val) { + ++end; + } + return FindResult.found(startIndex, end); + } + + int i = Arrays.binarySearch(vals, startIndex, endIndex, val); + if (i > 0) { + int foundStart = i; + int foundEnd = i + 1; + + while (foundStart - 1 >= startIndex && vals[foundStart - 1] == val) { + --foundStart; + } + + while (foundEnd < endIndex && vals[foundEnd] == val) { + ++foundEnd; + } + + return FindResult.found(foundStart, foundEnd); + } else { + return FindResult.notFound(-(i + 1)); + } + } + + public FindResult findInt(int startIndex, int endIndex, int val) + { + return findLong(startIndex, endIndex, val); + } + + @Override + public FindResult findString(int startIndex, int endIndex, String val) + { + return findLong(startIndex, endIndex, (int) Numbers.tryParseLong(val, 0)); + } + + @Override + public FindResult findComplex(int startIndex, int endIndex, Object val) + { + return findLong(startIndex, endIndex, Numbers.tryParseLong(val, 0)); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java index ed4f8ead52e8..d68f8872bf46 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java @@ -91,7 +91,7 @@ public int numRows() public boolean isNull(int rowNum) { offset.set(rowNum); - return valueSelector.isNull(); + return valueSelector.getObject() == null; } @Nullable diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultColumnSelectorFactoryMaker.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultColumnSelectorFactoryMaker.java index 3c6d3cc08c9b..2eda694ec101 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultColumnSelectorFactoryMaker.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultColumnSelectorFactoryMaker.java @@ -210,6 +210,7 @@ public PassThroughColumnValueSelector( break; case ARRAY: myClazz = List.class; + break; default: throw DruidException.defensive("this class cannot handle type [%s]", columnAccessor.getType()); } diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java index 5d1198f5460a..801eaf112a5f 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java @@ -22,7 +22,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.PeekingIterator; -import org.apache.druid.java.util.common.ISE; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.column.ColumnDescriptor; @@ -212,7 +212,7 @@ public void writeMergedValueDictionary(List adapters) throws I ); break; default: - throw new ISE( + throw DruidException.defensive( "How did we get here? Column [%s] with type [%s] does not have specialized serializer", name, logicalType @@ -349,7 +349,7 @@ public boolean hasOnlyNulls() @Override public ColumnDescriptor makeColumnDescriptor() { - ColumnDescriptor.Builder descriptorBuilder = new ColumnDescriptor.Builder(); + ColumnDescriptor.Builder descriptorBuilder = ColumnDescriptor.builder(); final NestedCommonFormatColumnPartSerde partSerde = NestedCommonFormatColumnPartSerde.serializerBuilder() diff --git a/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java b/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java index f22693365e13..e6011898d7db 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/BaseColumn.java @@ -26,6 +26,7 @@ import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; import java.io.Closeable; public interface BaseColumn extends Closeable @@ -41,4 +42,10 @@ default VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offse { throw new UOE("Cannot make VectorObjectSelector for column with class[%s]", getClass().getName()); } + + @Nullable + default T as(Class clazz) + { + return null; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java b/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java index 6f17dfb7c015..1f88ab044d10 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/LongsColumn.java @@ -28,6 +28,8 @@ import org.apache.druid.segment.vector.ReadableVectorOffset; import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; + /** */ public class LongsColumn implements NumericColumn @@ -75,6 +77,13 @@ public long getLongSingleValueRow(int rowNum) return column.get(rowNum); } + @Override + @Nullable + public T as(Class clazz) + { + return column.as(clazz); + } + @Override public void close() { diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java index fe8ade4a9ed6..c3ebde1854c0 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java @@ -528,11 +528,11 @@ public Indexed getStringDictionary() /** * Base type for a {@link SingleValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING} * built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods. - * + *

* Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}. */ public abstract static class StringSingleValueDimensionVectorSelector - implements SingleValueDimensionVectorSelector, IdLookup + implements SingleValueDimensionVectorSelector, IdLookup { private final ColumnarInts column; private final ReadableVectorOffset offset; @@ -540,8 +540,8 @@ public abstract static class StringSingleValueDimensionVectorSelector private int id = ReadableVectorInspector.NULL_ID; public StringSingleValueDimensionVectorSelector( - ColumnarInts column, - ReadableVectorOffset offset + ColumnarInts column, + ReadableVectorOffset offset ) { this.column = column; @@ -601,11 +601,11 @@ public int getMaxVectorSize() /** * Base type for a {@link MultiValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING} * built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods. - * + *

* Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}. */ public abstract static class StringMultiValueDimensionVectorSelector - implements MultiValueDimensionVectorSelector, IdLookup + implements MultiValueDimensionVectorSelector, IdLookup { private final ColumnarMultiInts multiValueColumn; private final ReadableVectorOffset offset; @@ -614,8 +614,8 @@ public abstract static class StringMultiValueDimensionVectorSelector private int id = ReadableVectorInspector.NULL_ID; public StringMultiValueDimensionVectorSelector( - ColumnarMultiInts multiValueColumn, - ReadableVectorOffset offset + ColumnarMultiInts multiValueColumn, + ReadableVectorOffset offset ) { this.multiValueColumn = multiValueColumn; @@ -670,6 +670,7 @@ public IdLookup idLookup() { return this; } + @Override public int getCurrentVectorSize() { @@ -697,8 +698,8 @@ public abstract static class StringVectorObjectSelector implements VectorObjectS private int id = ReadableVectorInspector.NULL_ID; public StringVectorObjectSelector( - ColumnarInts column, - ReadableVectorOffset offset + ColumnarInts column, + ReadableVectorOffset offset ) { this.column = column; @@ -757,8 +758,8 @@ public abstract static class MultiValueStringVectorObjectSelector implements Vec private int id = ReadableVectorInspector.NULL_ID; public MultiValueStringVectorObjectSelector( - ColumnarMultiInts multiValueColumn, - ReadableVectorOffset offset + ColumnarMultiInts multiValueColumn, + ReadableVectorOffset offset ) { this.multiValueColumn = multiValueColumn; diff --git a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java index 6fe04fbd31f9..36dbf5f5309f 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/BlockLayoutColumnarLongsSupplier.java @@ -47,7 +47,7 @@ public BlockLayoutColumnarLongsSupplier( CompressionStrategy strategy ) { - baseLongBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(order, strategy)); + this.baseLongBuffers = GenericIndexed.read(fromBuffer, DecompressingByteBufferObjectStrategy.of(order, strategy)); this.totalSize = totalSize; this.sizePer = sizePer; this.baseReader = reader; @@ -156,6 +156,12 @@ public long get(int index) @Override public void get(final long[] out, final int start, final int length) + { + get(out, 0, start, length); + } + + @Override + public void get(long[] out, int offset, int start, int length) { // division + remainder is optimized by the compiler so keep those together int bufferNum = start / sizePer; @@ -169,7 +175,7 @@ public void get(final long[] out, final int start, final int length) } final int limit = Math.min(length - p, sizePer - bufferIndex); - reader.read(out, p, bufferIndex, limit); + reader.read(out, offset + p, bufferIndex, limit); p += limit; bufferNum++; bufferIndex = 0; diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarInts.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarInts.java index dc2adbbb6710..e4633d032980 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarInts.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarInts.java @@ -28,4 +28,10 @@ */ public interface ColumnarInts extends IndexedInts, Closeable { + default void get(int[] out, int offset, int start, int length) + { + for (int i = 0; i < length; i++) { + out[offset + i] = get(i + start); + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java index 256c9934a21a..6d8162ef2670 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongs.java @@ -46,9 +46,14 @@ public interface ColumnarLongs extends Closeable long get(int index); default void get(long[] out, int start, int length) + { + get(out, 0, start, length); + } + + default void get(long[] out, int offset, int start, int length) { for (int i = 0; i < length; i++) { - out[i] = get(i + start); + out[offset + i] = get(i + start); } } @@ -62,6 +67,12 @@ default void get(long[] out, int[] indexes, int length) @Override void close(); + @Nullable + default T as(Class clazz) + { + return null; + } + default ColumnValueSelector makeColumnValueSelector(ReadableOffset offset, ImmutableBitmap nullValueBitmap) { if (nullValueBitmap.isEmpty()) { diff --git a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongsSerializer.java b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongsSerializer.java index 05cf26439e35..2166874a9f33 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongsSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ColumnarLongsSerializer.java @@ -29,6 +29,15 @@ public interface ColumnarLongsSerializer extends Serializer { void open() throws IOException; + int size(); + void add(long value) throws IOException; + + default void addAll(long[] values, int start, int end) throws IOException + { + for (int i = start; i < end; ++i) { + add(values[i]); + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/loading/MMappedQueryableSegmentizerFactory.java b/processing/src/main/java/org/apache/druid/segment/loading/MMappedQueryableSegmentizerFactory.java index d3124cb122f9..4afd7691fdf6 100644 --- a/processing/src/main/java/org/apache/druid/segment/loading/MMappedQueryableSegmentizerFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/loading/MMappedQueryableSegmentizerFactory.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.loading; import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.annotation.JsonCreator; import com.google.common.base.Preconditions; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.QueryableIndexSegment; @@ -34,9 +35,29 @@ */ public class MMappedQueryableSegmentizerFactory implements SegmentizerFactory { + /** + * A static method to make a segmentizer factory that can be serialized by Jackson. This exists because the object + * doesn't actually need the IndexIO object in order to be serialized by Jackson, but the public constructor + * *does* require it. We leave the public constructor alone because if indexIO is null, this SegmentizerFactory + * is largely useless, so we just create this one static method to enable creating the object for this singular + * use case. + * + * @return a SegmentizerFactory that can be used to be serialized by Jackson + */ + @SuppressWarnings("unused") + public static MMappedQueryableSegmentizerFactory makeForSerialization() + { + return new MMappedQueryableSegmentizerFactory(); + } private final IndexIO indexIO; + private MMappedQueryableSegmentizerFactory() + { + this.indexIO = null; + } + + @JsonCreator public MMappedQueryableSegmentizerFactory(@JacksonInject IndexIO indexIO) { this.indexIO = Preconditions.checkNotNull(indexIO, "Null IndexIO"); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index d3869bd9ef58..daf4f12a24a7 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -451,7 +451,10 @@ public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) @Override public int getLength() { - return -1; + if (compressedRawColumn == null) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + } + return compressedRawColumn.size(); } @Override @@ -534,9 +537,14 @@ public ColumnValueSelector makeColumnValueSelector(List path, if (arrayFieldIndex >= 0) { final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); if (elementNumber < 0) { - throw new IAE("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + throw new IAE( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(arrayField, arrayFieldIndex).getColumn(); + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField, + arrayFieldIndex + ).getColumn(); ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); return new ColumnValueSelector() { @@ -633,9 +641,15 @@ public VectorObjectSelector makeVectorObjectSelector(List path, if (arrayFieldIndex >= 0) { final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); if (elementNumber < 0) { - throw new IAE("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + throw new IAE( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(arrayField, arrayFieldIndex).getColumn(); + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField, + arrayFieldIndex + ).getColumn(); VectorObjectSelector arraySelector = col.makeVectorObjectSelector(readableOffset); return new VectorObjectSelector() @@ -701,9 +715,15 @@ public VectorValueSelector makeVectorValueSelector(List path, Re if (arrayFieldIndex >= 0) { final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); if (elementNumber < 0) { - throw new IAE("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + throw new IAE( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(arrayField, arrayFieldIndex).getColumn(); + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField, + arrayFieldIndex + ).getColumn(); VectorObjectSelector arraySelector = col.makeVectorObjectSelector(readableOffset); return new VectorValueSelector() diff --git a/processing/src/main/java/org/apache/druid/segment/nested/DictionaryIdLookup.java b/processing/src/main/java/org/apache/druid/segment/nested/DictionaryIdLookup.java index f4176db220cd..44c37da01f26 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/DictionaryIdLookup.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/DictionaryIdLookup.java @@ -51,7 +51,7 @@ /** * Value to dictionary id lookup, backed with memory mapped dictionaries populated lazily by the supplied - * @link DictionaryWriter}. + * {@link DictionaryWriter}. */ public final class DictionaryIdLookup implements Closeable { diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java index cd1bb1b81ecb..acfcbe6f83ed 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java @@ -33,17 +33,13 @@ public ColumnBasedFrameRowsAndColumnsTest() super(ColumnBasedFrameRowsAndColumns.class); } - public static Function MAKER = input -> { - - return buildFrame(input); - }; + public static Function MAKER = ColumnBasedFrameRowsAndColumnsTest::buildFrame; public static ColumnBasedFrameRowsAndColumns buildFrame(MapOfColumnsRowsAndColumns input) { LazilyDecoratedRowsAndColumns rac = new LazilyDecoratedRowsAndColumns(input, null, null, null, OffsetLimit.limit(Integer.MAX_VALUE), null, null); rac.numRows(); // materialize - return (ColumnBasedFrameRowsAndColumns) rac.getBase(); } } From 18efbf6f5e22c1e8817123d6a81dc9415b269e5f Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 4 Jul 2024 10:14:45 +0530 Subject: [PATCH 09/12] Add FieldReader#makeRACColumn for use with row based frames --- .../apache/druid/frame/field/FieldReader.java | 12 ++- .../concrete/RowBasedFrameRowsAndColumns.java | 17 +++- .../rowsandcols/RowsAndColumnsTestBase.java | 5 +- .../RowBasedFrameRowsAndColumnsTest.java | 80 +++++++++++++++++++ 4 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java diff --git a/processing/src/main/java/org/apache/druid/frame/field/FieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/FieldReader.java index bc9d631361f3..8cf0378071d3 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FieldReader.java @@ -20,24 +20,32 @@ package org.apache.druid.frame.field; import org.apache.datasketches.memory.Memory; +import org.apache.druid.frame.Frame; import org.apache.druid.frame.key.RowKey; import org.apache.druid.frame.key.RowKeyReader; import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; /** * Embeds the logic to read a specific field from row-based frames or from {@link RowKey}. - * + *

* Most callers should use {@link org.apache.druid.frame.read.FrameReader} or * {@link RowKeyReader} rather than using this interface directly. - * + *

* Stateless and immutable. */ public interface FieldReader { + /** + * Create a {@link Column} which provides accses to the rows in the frame, via the {@link Column#toAccessor()}. + */ + Column makeRACColumn(Frame frame, RowSignature signature, String columnName); + /** * Create a {@link ColumnValueSelector} backed by some memory and a moveable pointer. */ diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumns.java index 234410bc070c..fa17984e9ba5 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumns.java @@ -19,12 +19,13 @@ package org.apache.druid.query.rowsandcols.concrete; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; import org.apache.druid.frame.FrameType; +import org.apache.druid.frame.field.FieldReader; +import org.apache.druid.frame.field.FieldReaders; import org.apache.druid.frame.read.FrameReader; -import org.apache.druid.frame.read.columnar.FrameColumnReaders; import org.apache.druid.frame.segment.FrameStorageAdapter; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.query.rowsandcols.RowsAndColumns; import org.apache.druid.query.rowsandcols.column.Column; @@ -65,6 +66,7 @@ public int numRows() @Override public Column findColumn(String name) { + // Use contains so that we can negative cache. if (!colCache.containsKey(name)) { final int columnIndex = signature.indexOf(name); if (columnIndex < 0) { @@ -72,9 +74,16 @@ public Column findColumn(String name) } else { final ColumnType columnType = signature .getColumnType(columnIndex) - .orElseThrow(() -> new ISE("just got the id, why is columnType not there?")); + .orElseThrow( + () -> DruidException.defensive( + "just got the id [%s][%s], why is columnType not there?", + columnIndex, + name + ) + ); - colCache.put(name, FrameColumnReaders.create(name, columnIndex, columnType).readRACColumn(frame)); + final FieldReader reader = FieldReaders.create(name, columnType); + colCache.put(name, reader.makeRACColumn(frame, signature, name)); } } return colCache.get(name); diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java index 56be3d50f20e..7b639b3d48d4 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java @@ -24,6 +24,8 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.rowsandcols.concrete.ColumnBasedFrameRowsAndColumns; import org.apache.druid.query.rowsandcols.concrete.ColumnBasedFrameRowsAndColumnsTest; +import org.apache.druid.query.rowsandcols.concrete.RowBasedFrameRowsAndColumns; +import org.apache.druid.query.rowsandcols.concrete.RowBasedFrameRowsAndColumnsTest; import org.junit.Assert; import org.junit.Test; @@ -67,7 +69,8 @@ private static ArrayList getMakers() new Object[]{ConcatRowsAndColumns.class, ConcatRowsAndColumnsTest.MAKER}, new Object[]{RearrangedRowsAndColumns.class, RearrangedRowsAndColumnsTest.MAKER}, new Object[]{ColumnBasedFrameRowsAndColumns.class, ColumnBasedFrameRowsAndColumnsTest.MAKER}, - new Object[]{StorageAdapterRowsAndColumns.class, StorageAdapterRowsAndColumnsTest.MAKER} + new Object[]{StorageAdapterRowsAndColumns.class, StorageAdapterRowsAndColumnsTest.MAKER}, + new Object[]{RowBasedFrameRowsAndColumns.class, RowBasedFrameRowsAndColumnsTest.MAKER} ); } diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java new file mode 100644 index 000000000000..f9ad838796a8 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.rowsandcols.concrete; + +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; +import org.apache.druid.frame.write.FrameWriter; +import org.apache.druid.frame.write.FrameWriters; +import org.apache.druid.query.rowsandcols.MapOfColumnsRowsAndColumns; +import org.apache.druid.query.rowsandcols.RowsAndColumnsTestBase; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.semantic.ColumnSelectorFactoryMaker; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.RowSignature; + +import java.util.Collections; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; + +public class RowBasedFrameRowsAndColumnsTest extends RowsAndColumnsTestBase +{ + public RowBasedFrameRowsAndColumnsTest() +{ + super(RowBasedFrameRowsAndColumns.class); +} + + public static Function MAKER = RowBasedFrameRowsAndColumnsTest::buildFrame; + + private static RowBasedFrameRowsAndColumns buildFrame(MapOfColumnsRowsAndColumns rac) + { + final AtomicInteger rowId = new AtomicInteger(0); + final int numRows = rac.numRows(); + final ColumnSelectorFactoryMaker csfm = ColumnSelectorFactoryMaker.fromRAC(rac); + final ColumnSelectorFactory selectorFactory = csfm.make(rowId); + + final RowSignature.Builder sigBob = RowSignature.builder(); + final ArenaMemoryAllocatorFactory memFactory = new ArenaMemoryAllocatorFactory(200 << 20); + + + for (String column : rac.getColumnNames()) { + final Column racColumn = rac.findColumn(column); + if (racColumn == null) { + continue; + } + sigBob.add(column, racColumn.toAccessor().getType()); + } + + final RowSignature signature = sigBob.build(); + final FrameWriter frameWriter = FrameWriters.makeRowBasedFrameWriterFactory( + memFactory, + signature, + Collections.emptyList(), + false + ).newFrameWriter(selectorFactory); + + rowId.set(0); + for (; rowId.get() < numRows; rowId.incrementAndGet()) { + frameWriter.addSelection(); + } + + return new RowBasedFrameRowsAndColumns(Frame.wrap(frameWriter.toByteArray()), signature); + } +} From 54d18f4f8cedf952d93df0e5cc0a1e2888d548fe Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 4 Jul 2024 11:19:03 +0530 Subject: [PATCH 10/12] Add field readers for basic types --- .../druid/frame/field/ComplexFieldReader.java | 97 ++++- .../frame/field/DoubleArrayFieldReader.java | 10 + .../druid/frame/field/DoubleFieldReader.java | 140 +++++++ .../frame/field/FieldPositionHelper.java | 65 ++++ .../frame/field/FloatArrayFieldReader.java | 10 + .../druid/frame/field/FloatFieldReader.java | 140 +++++++ .../frame/field/LongArrayFieldReader.java | 10 + .../druid/frame/field/LongFieldReader.java | 141 ++++++- .../frame/field/NumericArrayFieldReader.java | 2 +- .../druid/frame/field/NumericFieldReader.java | 5 + .../druid/frame/field/StringFieldReader.java | 346 +++++++++++++++--- 11 files changed, 909 insertions(+), 57 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/frame/field/FieldPositionHelper.java diff --git a/processing/src/main/java/org/apache/druid/frame/field/ComplexFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/ComplexFieldReader.java index c0a0c872faec..d5d6c21cf9be 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/ComplexFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/ComplexFieldReader.java @@ -21,24 +21,32 @@ import com.google.common.base.Preconditions; import org.apache.datasketches.memory.Memory; +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.write.RowBasedFrameWriter; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.column.ColumnAccessor; +import org.apache.druid.query.rowsandcols.column.accessor.ObjectColumnAccessorBase; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.ObjectColumnSelector; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.serde.ComplexMetricSerde; import org.apache.druid.segment.serde.ComplexMetrics; +import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.util.Comparator; /** * Reads values written by {@link ComplexFieldWriter}. - * + *

* Format: - * + *

* - 1 byte: {@link ComplexFieldWriter#NULL_BYTE} or {@link ComplexFieldWriter#NOT_NULL_BYTE} * - 4 bytes: length of serialized complex value, little-endian int * - N bytes: serialized complex value @@ -121,7 +129,7 @@ public static Object readFieldFromByteArray( * Alternative interface to read the field from the memory without creating a selector and field pointer */ @Nullable - public static Object readFieldFromMemory( + public static T readFieldFromMemory( final ComplexMetricSerde serde, final Memory memory, final long position @@ -136,7 +144,8 @@ public static Object readFieldFromMemory( final byte[] bytes = new byte[length]; memory.getByteArray(position + ComplexFieldWriter.HEADER_SIZE, bytes, 0, length); - return serde.fromBytes(bytes, 0, length); + //noinspection unchecked + return (T) serde.fromBytes(bytes, 0, length); } else { throw new ISE("Unexpected null byte [%s]", nullByte); } @@ -162,8 +171,8 @@ private Selector(Memory memory, ReadableFieldPointer fieldPointer, ComplexMetric @Override public T getObject() { - //noinspection unchecked - return (T) readFieldFromMemory(serde, memory, fieldPointer.position()); + final long fieldPosition = fieldPointer.position(); + return readFieldFromMemory(serde, memory, fieldPosition); } @Override @@ -178,4 +187,80 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) // Do nothing. } } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + return new ComplexFieldReaderColumn(frame, signature.indexOf(columnName), signature.size()); + } + + private class ComplexFieldReaderColumn implements Column + { + private final Frame frame; + private final Memory dataRegion; + private final ColumnType type; + private final FieldPositionHelper coach; + + public ComplexFieldReaderColumn(Frame frame, int columnIndex, int numFields) + { + this.frame = frame; + dataRegion = frame.region(RowBasedFrameWriter.ROW_DATA_REGION); + + this.type = ColumnType.ofComplex(serde.getTypeName()); + this.coach = new FieldPositionHelper( + frame, + frame.region(RowBasedFrameWriter.ROW_OFFSET_REGION), + dataRegion, + columnIndex, + numFields + ); + } + + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new ObjectColumnAccessorBase() + { + @Override + public ColumnType getType() + { + return type; + } + + @Override + public int numRows() + { + return frame.numRows(); + } + + @Override + public boolean isNull(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + return dataRegion.getByte(fieldPosition) == ComplexFieldWriter.NULL_BYTE; + } + + @Override + protected Object getVal(int rowNum) + { + return readFieldFromMemory(serde, dataRegion, coach.computeFieldPosition(rowNum)); + } + + @Override + protected Comparator getComparator() + { + return serde.getTypeStrategy(); + } + + }; + } + + @Nullable + @Override + public T as(Class clazz) + { + return null; + } + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java index ec7de095e12c..7b1740e1910a 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java @@ -20,7 +20,11 @@ package org.apache.druid.frame.field; import org.apache.datasketches.memory.Memory; +import org.apache.druid.error.NotYetImplemented; +import org.apache.druid.frame.Frame; +import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; @@ -60,4 +64,10 @@ public int getIndividualFieldSize() } }; } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + throw NotYetImplemented.ex("Class cannot create an RAC column."); + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java index 7f7a3f8639eb..3afadebe7111 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java @@ -21,11 +21,20 @@ import org.apache.datasketches.memory.Memory; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.write.RowBasedFrameWriter; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.column.ColumnAccessor; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DoubleColumnSelector; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + /** * Reads the values produced by {@link DoubleFieldWriter} * @@ -99,4 +108,135 @@ public boolean isNull() return super._isNull(); } } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + return new DoubleFieldReaderColumn(frame, signature.indexOf(columnName), signature.size()); + } + + private class DoubleFieldReaderColumn implements Column + { + private final Frame frame; + private final Memory dataRegion; + private final FieldPositionHelper coach; + + public DoubleFieldReaderColumn(Frame frame, int columnIndex, int numFields) + { + this.frame = frame; + dataRegion = frame.region(RowBasedFrameWriter.ROW_DATA_REGION); + + this.coach = new FieldPositionHelper( + frame, + frame.region(RowBasedFrameWriter.ROW_OFFSET_REGION), + dataRegion, + columnIndex, + numFields + ); + } + + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new ColumnAccessor() + { + @Override + public ColumnType getType() + { + return ColumnType.DOUBLE; + } + + @Override + public int numRows() + { + return frame.numRows(); + } + + @Override + public boolean isNull(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + return dataRegion.getByte(fieldPosition) == getNullIndicatorByte(); + } + + @Nullable + @Override + public Object getObject(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + + if (dataRegion.getByte(fieldPosition) == getNullIndicatorByte()) { + return null; + } else { + return getDoubleAtPosition(fieldPosition); + } + } + + @Override + public double getDouble(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + + if (dataRegion.getByte(fieldPosition) == getNullIndicatorByte()) { + return 0L; + } else { + return getDoubleAtPosition(fieldPosition); + } + } + + @Override + public float getFloat(int rowNum) + { + return (float) getDouble(rowNum); + } + + @Override + public long getLong(int rowNum) + { + return (long) getDouble(rowNum); + } + + @Override + public int getInt(int rowNum) + { + return (int) getDouble(rowNum); + } + + @Override + public int compareRows(int lhsRowNum, int rhsRowNum) + { + long lhsPosition = coach.computeFieldPosition(lhsRowNum); + long rhsPosition = coach.computeFieldPosition(rhsRowNum); + + final byte nullIndicatorByte = getNullIndicatorByte(); + if (dataRegion.getByte(lhsPosition) == nullIndicatorByte) { + if (dataRegion.getByte(rhsPosition) == nullIndicatorByte) { + return 0; + } else { + return -1; + } + } else { + if (dataRegion.getByte(rhsPosition) == nullIndicatorByte) { + return 1; + } else { + return Double.compare(getDoubleAtPosition(lhsPosition), getDoubleAtPosition(rhsPosition)); + } + } + } + + private double getDoubleAtPosition(long lhsPosition) + { + return TransformUtils.detransformToDouble(dataRegion.getLong(lhsPosition + Byte.BYTES)); + } + }; + } + + @Nullable + @Override + public T as(Class clazz) + { + return null; + } + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/FieldPositionHelper.java b/processing/src/main/java/org/apache/druid/frame/field/FieldPositionHelper.java new file mode 100644 index 000000000000..d4abe5300b49 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/field/FieldPositionHelper.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.field; + +import org.apache.datasketches.memory.Memory; +import org.apache.druid.frame.Frame; + +/** + * Helps compute the field position for a frame from the different regions in the frame. + */ +public class FieldPositionHelper +{ + private final Frame frame; + private final Memory offsetRegion; + private final Memory dataRegion; + private final int columnIndex; + private final long fieldsBytesSize; + + public FieldPositionHelper( + Frame frame, + Memory offsetRegion, + Memory dataRegion, + int columnIndex, + int numFields + ) + { + this.frame = frame; + this.offsetRegion = offsetRegion; + this.dataRegion = dataRegion; + this.columnIndex = columnIndex; + this.fieldsBytesSize = this.columnIndex == 0 + ? ((long) numFields) * Integer.BYTES + : ((long) (this.columnIndex - 1)) * Integer.BYTES; + } + + public long computeFieldPosition(int rowNum) + { + rowNum = frame.physicalRow(rowNum); + final long rowPosition = rowNum == 0 ? 0 : offsetRegion.getLong(((long) rowNum - 1) * Long.BYTES); + final long fieldPosition; + if (columnIndex == 0) { + fieldPosition = rowPosition + fieldsBytesSize; + } else { + fieldPosition = rowPosition + dataRegion.getInt(rowPosition + fieldsBytesSize); + } + return fieldPosition; + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java index e97af071824e..a5c9d6d97601 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java @@ -20,7 +20,11 @@ package org.apache.druid.frame.field; import org.apache.datasketches.memory.Memory; +import org.apache.druid.error.NotYetImplemented; +import org.apache.druid.frame.Frame; +import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; @@ -60,4 +64,10 @@ public int getIndividualFieldSize() } }; } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + throw NotYetImplemented.ex("Class cannot create an RAC column."); + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java index 6617d563d679..3fc7213c73ed 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java @@ -21,11 +21,20 @@ import org.apache.datasketches.memory.Memory; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.write.RowBasedFrameWriter; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.column.ColumnAccessor; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.FloatColumnSelector; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + /** * Reads values written by {@link FloatFieldWriter}. * @@ -99,4 +108,135 @@ public boolean isNull() return super._isNull(); } } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + return new FloatFieldReaderColumn(frame, signature.indexOf(columnName), signature.size()); + } + + private class FloatFieldReaderColumn implements Column + { + private final Frame frame; + private final Memory dataRegion; + private final FieldPositionHelper coach; + + public FloatFieldReaderColumn(Frame frame, int columnIndex, int numFields) + { + this.frame = frame; + dataRegion = frame.region(RowBasedFrameWriter.ROW_DATA_REGION); + + this.coach = new FieldPositionHelper( + frame, + frame.region(RowBasedFrameWriter.ROW_OFFSET_REGION), + dataRegion, + columnIndex, + numFields + ); + } + + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new ColumnAccessor() + { + @Override + public ColumnType getType() + { + return ColumnType.FLOAT; + } + + @Override + public int numRows() + { + return frame.numRows(); + } + + @Override + public boolean isNull(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + return dataRegion.getByte(fieldPosition) == getNullIndicatorByte(); + } + + @Nullable + @Override + public Object getObject(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + + if (dataRegion.getByte(fieldPosition) == getNullIndicatorByte()) { + return null; + } else { + return getFloatAtPosition(fieldPosition); + } + } + + @Override + public double getDouble(int rowNum) + { + return getFloat(rowNum); + } + + @Override + public float getFloat(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + + if (dataRegion.getByte(fieldPosition) == getNullIndicatorByte()) { + return 0L; + } else { + return getFloatAtPosition(fieldPosition); + } + } + + @Override + public long getLong(int rowNum) + { + return (long) getFloat(rowNum); + } + + @Override + public int getInt(int rowNum) + { + return (int) getFloat(rowNum); + } + + @Override + public int compareRows(int lhsRowNum, int rhsRowNum) + { + long lhsPosition = coach.computeFieldPosition(lhsRowNum); + long rhsPosition = coach.computeFieldPosition(rhsRowNum); + + final byte nullIndicatorByte = getNullIndicatorByte(); + if (dataRegion.getByte(lhsPosition) == nullIndicatorByte) { + if (dataRegion.getByte(rhsPosition) == nullIndicatorByte) { + return 0; + } else { + return -1; + } + } else { + if (dataRegion.getByte(rhsPosition) == nullIndicatorByte) { + return 1; + } else { + return Float.compare(getFloatAtPosition(lhsPosition), getFloatAtPosition(rhsPosition)); + } + } + } + + private float getFloatAtPosition(long rhsPosition) + { + return TransformUtils.detransformToFloat(dataRegion.getInt(rhsPosition + Byte.BYTES)); + } + }; + } + + @Nullable + @Override + public T as(Class clazz) + { + return null; + } + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java index 8f7578c07d38..8ab8978acbcb 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java @@ -20,7 +20,11 @@ package org.apache.druid.frame.field; import org.apache.datasketches.memory.Memory; +import org.apache.druid.error.NotYetImplemented; +import org.apache.druid.frame.Frame; +import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; @@ -60,4 +64,10 @@ public int getIndividualFieldSize() } }; } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + throw NotYetImplemented.ex("Class cannot create an RAC column."); + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java index 8f3bbbf04517..9b514c930873 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java @@ -21,11 +21,20 @@ import org.apache.datasketches.memory.Memory; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.write.RowBasedFrameWriter; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.column.ColumnAccessor; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.LongColumnSelector; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + /** * Reads values written by {@link LongFieldWriter}. * @@ -68,7 +77,6 @@ public ColumnValueSelector getColumnValueSelector( private static class LongFieldSelector extends NumericFieldReader.Selector implements LongColumnSelector { - final Memory dataRegion; final ReadableFieldPointer fieldPointer; @@ -99,4 +107,135 @@ public boolean isNull() return super._isNull(); } } + + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + return new LongFieldReaderColumn(frame, signature.indexOf(columnName), signature.size()); + } + + private class LongFieldReaderColumn implements Column + { + private final Frame frame; + private final Memory dataRegion; + private final FieldPositionHelper coach; + + public LongFieldReaderColumn(Frame frame, int columnIndex, int numFields) + { + this.frame = frame; + this.dataRegion = frame.region(RowBasedFrameWriter.ROW_DATA_REGION); + + this.coach = new FieldPositionHelper( + frame, + frame.region(RowBasedFrameWriter.ROW_OFFSET_REGION), + dataRegion, + columnIndex, + numFields + ); + } + + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new ColumnAccessor() + { + @Override + public ColumnType getType() + { + return ColumnType.LONG; + } + + @Override + public int numRows() + { + return frame.numRows(); + } + + @Override + public boolean isNull(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + return dataRegion.getByte(fieldPosition) == getNullIndicatorByte(); + } + + @Nullable + @Override + public Object getObject(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + + if (dataRegion.getByte(fieldPosition) == getNullIndicatorByte()) { + return null; + } else { + return getLongAtPosition(fieldPosition); + } + } + + @Override + public double getDouble(int rowNum) + { + return getLong(rowNum); + } + + @Override + public float getFloat(int rowNum) + { + return getLong(rowNum); + } + + @Override + public long getLong(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + + if (dataRegion.getByte(fieldPosition) == getNullIndicatorByte()) { + return 0L; + } else { + return getLongAtPosition(fieldPosition); + } + } + + @Override + public int getInt(int rowNum) + { + return (int) getLong(rowNum); + } + + @Override + public int compareRows(int lhsRowNum, int rhsRowNum) + { + long lhsPosition = coach.computeFieldPosition(lhsRowNum); + long rhsPosition = coach.computeFieldPosition(rhsRowNum); + + final byte nullIndicatorByte = getNullIndicatorByte(); + if (dataRegion.getByte(lhsPosition) == nullIndicatorByte) { + if (dataRegion.getByte(rhsPosition) == nullIndicatorByte) { + return 0; + } else { + return -1; + } + } else { + if (dataRegion.getByte(rhsPosition) == nullIndicatorByte) { + return 1; + } else { + return Long.compare(getLongAtPosition(lhsPosition), getLongAtPosition(rhsPosition)); + } + } + } + + private long getLongAtPosition(long rhsPosition) + { + return TransformUtils.detransformToLong(dataRegion.getLong(rhsPosition + Byte.BYTES)); + } + }; + } + + @Nullable + @Override + public T as(Class clazz) + { + return null; + } + } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java index 8d6f3958b1e4..92dfbc98596d 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java @@ -29,7 +29,7 @@ /** * Reader class for the fields written by {@link NumericArrayFieldWriter}. See the Javadoc for the writer for more * information on the format - * + *

* The numeric array fields are byte comparable */ public abstract class NumericArrayFieldReader implements FieldReader diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java index 1e11cfa65f37..03a79c8ae670 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java @@ -50,6 +50,11 @@ public NumericFieldReader(boolean forArray) } } + public byte getNullIndicatorByte() + { + return nullIndicatorByte; + } + @Override public ColumnValueSelector makeColumnValueSelector(Memory memory, ReadableFieldPointer fieldPointer) { diff --git a/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java index 2513a2d24444..ca2ad896a3e7 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/StringFieldReader.java @@ -23,51 +23,64 @@ import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.datasketches.memory.Memory; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.NotYetImplemented; +import org.apache.druid.frame.Frame; import org.apache.druid.frame.read.FrameReaderUtils; +import org.apache.druid.frame.write.RowBasedFrameWriter; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.query.rowsandcols.column.Column; +import org.apache.druid.query.rowsandcols.column.ColumnAccessor; +import org.apache.druid.query.rowsandcols.column.accessor.ObjectColumnAccessorBase; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelectorUtils; import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.RangeIndexedInts; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.List; /** * Reads fields written by {@link StringFieldWriter} or {@link StringArrayFieldWriter}. - * + *

* Strings are written in UTF8 and terminated by {@link StringFieldWriter#VALUE_TERMINATOR}. Note that this byte * appears in valid UTF8 encodings if and only if the string contains a NUL (char 0). Therefore, this field writer * cannot write out strings containing NUL characters. - * + *

* All rows are terminated by {@link StringFieldWriter#ROW_TERMINATOR}. - * + *

* Empty rows are represented in one byte: solely that {@link StringFieldWriter#ROW_TERMINATOR}. Rows that are null * themselves (i.e., a null array) are represented as a {@link StringFieldWriter#NULL_ROW} followed by a * {@link StringFieldWriter#ROW_TERMINATOR}. This encoding for null arrays is decoded by older readers as an * empty array; null arrays are a feature that did not exist in earlier versions of the code. - * + *

* Null strings are stored as {@link StringFieldWriter#NULL_BYTE}. All other strings are prepended by * {@link StringFieldWriter#NOT_NULL_BYTE} byte to differentiate them from nulls. - * + *

* This encoding allows the encoded data to be compared as bytes in a way that matches the behavior of * {@link org.apache.druid.segment.StringDimensionHandler#DIMENSION_SELECTOR_COMPARATOR}, except null and * empty list are not considered equal. */ public class StringFieldReader implements FieldReader { + public static final byte[] EXPECTED_BYTES_FOR_NULL = { + StringFieldWriter.NULL_BYTE, StringFieldWriter.VALUE_TERMINATOR, StringFieldWriter.ROW_TERMINATOR + }; private final boolean asArray; public StringFieldReader() @@ -123,6 +136,16 @@ public boolean isNull(Memory memory, long position) } } + @Override + public Column makeRACColumn(Frame frame, RowSignature signature, String columnName) + { + if (asArray) { + return new StringArrayFieldReaderColumn(frame, signature.indexOf(columnName), signature.size()); + } else { + return new StringFieldReaderColumn(frame, signature.indexOf(columnName), signature.size()); + } + } + /** * Selector that reads a value from a location pointed to by {@link ReadableFieldPointer}. */ @@ -297,70 +320,295 @@ private void updateCurrentUtf8Strings(final long fieldPosition) { currentUtf8StringsIsNull = false; currentUtf8Strings.clear(); + currentUtf8StringsIsNull = addStringsToList(memory, fieldPosition, currentUtf8Strings); + } + } - long position = fieldPosition; - long limit = memory.getCapacity(); + private static class StringFieldReaderColumn implements Column + { + private final Frame frame; + private final Memory dataRegion; + private final FieldPositionHelper coach; - boolean rowTerminatorSeen = false; + public StringFieldReaderColumn(Frame frame, int columnIndex, int numFields) + { + this.frame = frame; + this.dataRegion = frame.region(RowBasedFrameWriter.ROW_DATA_REGION); + + this.coach = new FieldPositionHelper( + frame, + frame.region(RowBasedFrameWriter.ROW_OFFSET_REGION), + dataRegion, + columnIndex, + numFields + ); + } - while (position < limit && !rowTerminatorSeen) { - final byte kind = memory.getByte(position); - position++; + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new ObjectColumnAccessorBase() + { + @Override + public ColumnType getType() + { + return ColumnType.STRING; + } - switch (kind) { - case StringFieldWriter.VALUE_TERMINATOR: // Or NULL_ROW (same byte value) - if (position == fieldPosition + 1) { - // It was NULL_ROW. - currentUtf8StringsIsNull = true; + @Override + public int numRows() + { + return frame.numRows(); + } + + @Override + public boolean isNull(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + byte[] nullBytes = new byte[3]; + dataRegion.getByteArray(fieldPosition, nullBytes, 0, 3); + return Arrays.equals(nullBytes, EXPECTED_BYTES_FOR_NULL); + } + + @Override + public int compareRows(int lhsRowNum, int rhsRowNum) + { + ByteBuffer lhs = getUTF8BytesAtPosition(coach.computeFieldPosition(lhsRowNum)); + ByteBuffer rhs = getUTF8BytesAtPosition(coach.computeFieldPosition(rhsRowNum)); + + if (lhs == null) { + if (rhs == null) { + return 0; + } else { + return -1; + } + } else { + if (rhs == null) { + return 1; + } else { + return lhs.compareTo(rhs); } + } + } - // Skip; next byte will be a null/not-null byte or a row terminator. - break; + @Override + protected Object getVal(int rowNum) + { + return getStringAtPosition(coach.computeFieldPosition(rowNum)); + } - case StringFieldWriter.ROW_TERMINATOR: - // Skip; this is the end of the row, so we'll fall through to the return statement. - rowTerminatorSeen = true; - break; + @Override + protected Comparator getComparator() + { + // we implement compareRows and thus don't need to actually implement this method + throw new UnsupportedOperationException(); + } - case StringFieldWriter.NULL_BYTE: - currentUtf8Strings.add(null); - break; + @Nullable + private String getStringAtPosition(long fieldPosition) + { + return StringUtils.fromUtf8Nullable(getUTF8BytesAtPosition(fieldPosition)); + } - case StringFieldWriter.NOT_NULL_BYTE: - for (long i = position; ; i++) { - if (i >= limit) { - throw new ISE("Value overrun"); - } + @Nullable + private ByteBuffer getUTF8BytesAtPosition(long fieldPosition) + { + ArrayList buffers = new ArrayList<>(); + final boolean isNull = addStringsToList(dataRegion, fieldPosition, buffers); + if (isNull) { + return null; + } else { + if (buffers.size() > 1) { + throw DruidException.defensive( + "Can only work with single-valued strings, should use a COMPLEX or ARRAY typed Column instead" + ); + } + return buffers.get(0); + } + } + }; + } - final byte b = memory.getByte(i); + @Nullable + @Override + public T as(Class clazz) + { + return null; + } + } - if (b == StringFieldWriter.VALUE_TERMINATOR) { - final int len = Ints.checkedCast(i - position); + private static class StringArrayFieldReaderColumn implements Column + { + private final Frame frame; + private final Memory dataRegion; + private final FieldPositionHelper coach; + + public StringArrayFieldReaderColumn(Frame frame, int columnIndex, int numFields) + { + this.frame = frame; + this.dataRegion = frame.region(RowBasedFrameWriter.ROW_DATA_REGION); + + this.coach = new FieldPositionHelper( + frame, + frame.region(RowBasedFrameWriter.ROW_OFFSET_REGION), + this.dataRegion, + columnIndex, + numFields + ); + } + + @Nonnull + @Override + public ColumnAccessor toAccessor() + { + return new ObjectColumnAccessorBase() + { + @Override + public ColumnType getType() + { + return ColumnType.STRING_ARRAY; + } + + @Override + public int numRows() + { + return frame.numRows(); + } + + @Override + public boolean isNull(int rowNum) + { + final long fieldPosition = coach.computeFieldPosition(rowNum); + byte[] nullBytes = new byte[3]; + dataRegion.getByteArray(fieldPosition, nullBytes, 0, 3); + return Arrays.equals(nullBytes, EXPECTED_BYTES_FOR_NULL); + } + + @Override + public int compareRows(int lhsRowNum, int rhsRowNum) + { + throw NotYetImplemented.ex( + "Should implement this by comparing the actual bytes in the frame, they should be comparable" + ); + } + + @Override + protected Object getVal(int rowNum) + { + return getStringsAtPosition(coach.computeFieldPosition(rowNum)); + } + + @Override + protected Comparator getComparator() + { + // we implement compareRows and thus don't need to actually implement this method + throw new UnsupportedOperationException(); + } + + @Nullable + private List getStringsAtPosition(long fieldPosition) + { + final List bufs = getUTF8BytesAtPosition(fieldPosition); + if (bufs == null) { + return null; + } + + final ArrayList retVal = new ArrayList<>(bufs.size()); + for (ByteBuffer buf : bufs) { + retVal.add(StringUtils.fromUtf8Nullable(buf)); + } + return retVal; + } - if (len == 0 && NullHandling.replaceWithDefault()) { - // Empty strings and nulls are the same in this mode. - currentUtf8Strings.add(null); - } else { - final ByteBuffer buf = FrameReaderUtils.readByteBuffer(memory, position, len); - currentUtf8Strings.add(buf); - } + @Nullable + private List getUTF8BytesAtPosition(long fieldPosition) + { + ArrayList buffers = new ArrayList<>(); + final boolean isNull = addStringsToList(dataRegion, fieldPosition, buffers); + if (isNull) { + return null; + } else { + return buffers; + } + } + }; + } - position += len; + @Nullable + @Override + public T as(Class clazz) + { + return null; + } + } - break; + private static boolean addStringsToList(Memory memory, long fieldPosition, List list) + { + long position = fieldPosition; + long limit = memory.getCapacity(); + + boolean rowTerminatorSeen = false; + boolean isEffectivelyNull = false; + + while (position < limit && !rowTerminatorSeen) { + final byte kind = memory.getByte(position); + position++; + + switch (kind) { + case StringFieldWriter.VALUE_TERMINATOR: // Or NULL_ROW (same byte value) + if (position == fieldPosition + 1) { + // It was NULL_ROW. + isEffectivelyNull = true; + } + + // Skip; next byte will be a null/not-null byte or a row terminator. + break; + + case StringFieldWriter.ROW_TERMINATOR: + // Skip; this is the end of the row, so we'll fall through to the return statement. + rowTerminatorSeen = true; + break; + + case StringFieldWriter.NULL_BYTE: + list.add(null); + break; + + case StringFieldWriter.NOT_NULL_BYTE: + for (long i = position; ; i++) { + if (i >= limit) { + throw new ISE("Value overrun"); + } + + final byte b = memory.getByte(i); + + if (b == StringFieldWriter.VALUE_TERMINATOR) { + final int len = Ints.checkedCast(i - position); + + if (len == 0 && NullHandling.replaceWithDefault()) { + // Empty strings and nulls are the same in this mode. + list.add(null); + } else { + final ByteBuffer buf = FrameReaderUtils.readByteBuffer(memory, position, len); + list.add(buf); } + + position += len; + + break; } + } - break; + break; - default: - throw new ISE("Invalid value start byte [%s]", kind); - } + default: + throw new ISE("Invalid value start byte [%s]", kind); } + } - if (!rowTerminatorSeen) { - throw new ISE("Unexpected end of field"); - } + if (!rowTerminatorSeen) { + throw new ISE("Unexpected end of field"); } + return isEffectivelyNull; } } From 63f1da077cb3692cfe6e3bc596bd5a983e978ff6 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 4 Jul 2024 11:22:54 +0530 Subject: [PATCH 11/12] Refactoring - Remove wire transfer mapping as it is unreachable. - Increase memory used by FileWriteOutBytes and change it to direct memory to improve performance. --- .../concrete/ColumnBasedFrameRowsAndColumns.java | 4 ---- .../org/apache/druid/segment/serde/MetaSerdeHelper.java | 6 +++++- .../apache/druid/segment/writeout/FileWriteOutBytes.java | 7 +++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java index 0152156c5b58..71c2541b387c 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumns.java @@ -28,7 +28,6 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.query.rowsandcols.RowsAndColumns; import org.apache.druid.query.rowsandcols.column.Column; -import org.apache.druid.query.rowsandcols.semantic.WireTransferable; import org.apache.druid.segment.CloseableShapeshifter; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.column.ColumnType; @@ -90,9 +89,6 @@ public T as(Class clazz) if (StorageAdapter.class.equals(clazz)) { return (T) new FrameStorageAdapter(frame, FrameReader.create(signature), Intervals.ETERNITY); } - if (WireTransferable.class.equals(clazz)) { - return (T) this; - } return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/MetaSerdeHelper.java b/processing/src/main/java/org/apache/druid/segment/serde/MetaSerdeHelper.java index 113821cee150..08e97e3e5017 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/MetaSerdeHelper.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/MetaSerdeHelper.java @@ -112,7 +112,11 @@ public void writeTo(WritableByteChannel channel, T x) throws IOException public int size(T x) { - return fieldWriters.stream().mapToInt(w -> w.size(x)).sum(); + int retVal = 0; + for (FieldWriter fieldWriter : fieldWriters) { + retVal += fieldWriter.size(x); + } + return retVal; } public interface FieldWriter diff --git a/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java b/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java index 959066342ac0..f15ee18c7916 100644 --- a/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java +++ b/processing/src/main/java/org/apache/druid/segment/writeout/FileWriteOutBytes.java @@ -39,8 +39,11 @@ public final class FileWriteOutBytes extends WriteOutBytes private final FileChannel ch; private long writeOutBytes; - /** Purposely big-endian, for {@link #writeInt(int)} implementation */ - private final ByteBuffer buffer = ByteBuffer.allocate(4096); // 4K page sized buffer + /** + * Purposely big-endian, for {@link #writeInt(int)} implementation. + * Direct because there is a material difference in performance when writing direct buffers + */ + private final ByteBuffer buffer = ByteBuffer.allocateDirect(32768); // 32K page sized buffer FileWriteOutBytes(File file, FileChannel ch) { From ced0131ca1b9d810204bf035cd25318cb351f99e Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 4 Jul 2024 12:36:28 +0530 Subject: [PATCH 12/12] Fix checkstyle --- .../main/java/org/apache/druid/frame/read/FrameReader.java | 3 --- .../concrete/RowBasedFrameRowsAndColumnsTest.java | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java b/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java index 7c6af7573b20..46a848fb6b15 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java @@ -32,9 +32,6 @@ import org.apache.druid.frame.segment.row.FrameCursorFactory; import org.apache.druid.frame.write.FrameWriterUtils; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.query.rowsandcols.RowsAndColumns; -import org.apache.druid.query.rowsandcols.concrete.ColumnBasedFrameRowsAndColumns; -import org.apache.druid.query.rowsandcols.concrete.RowBasedFrameRowsAndColumns; import org.apache.druid.segment.CursorFactory; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java index f9ad838796a8..867e83d9e008 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/RowBasedFrameRowsAndColumnsTest.java @@ -37,9 +37,9 @@ public class RowBasedFrameRowsAndColumnsTest extends RowsAndColumnsTestBase { public RowBasedFrameRowsAndColumnsTest() -{ - super(RowBasedFrameRowsAndColumns.class); -} + { + super(RowBasedFrameRowsAndColumns.class); + } public static Function MAKER = RowBasedFrameRowsAndColumnsTest::buildFrame;