diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java index 5b6d8a3d9c71..9953c0e3a40c 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/ExpressionSelectorBenchmark.java @@ -80,7 +80,16 @@ public void setup() { final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo( ImmutableList.of( - BenchmarkColumnSchema.makeNormal("n", ValueType.LONG, false, 1, 0d, 0d, 10000d, false), + BenchmarkColumnSchema.makeZipf( + "n", + ValueType.LONG, + false, + 1, + 0d, + 1000, + 10000, + 3d + ), BenchmarkColumnSchema.makeZipf( "s", ValueType.STRING, @@ -146,10 +155,7 @@ public void timeFloorUsingExpression(Blackhole blackhole) final List results = cursors .map(cursor -> { final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); - while (!cursor.isDone()) { - blackhole.consume(selector.getLong()); - cursor.advance(); - } + consumeLong(cursor, selector, blackhole); return null; }) .toList(); @@ -219,6 +225,71 @@ public void timeFloorUsingCursor(Blackhole blackhole) blackhole.consume(count); } + @Benchmark + public void timeFormatUsingExpression(Blackhole blackhole) + { + final Sequence cursors = new QueryableIndexStorageAdapter(index).makeCursors( + null, + index.getDataInterval(), + VirtualColumns.create( + ImmutableList.of( + new ExpressionVirtualColumn( + "v", + "timestamp_format(__time, 'yyyy-MM-dd')", + ValueType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + ), + Granularities.ALL, + false, + null + ); + + final List results = cursors + .map(cursor -> { + final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector( + DefaultDimensionSpec.of("v") + ); + consumeDimension(cursor, selector, blackhole); + return null; + }) + .toList(); + + blackhole.consume(results); + } + + @Benchmark + public void timeFormatUsingExtractionFn(Blackhole blackhole) + { + final Sequence cursors = new QueryableIndexStorageAdapter(index).makeCursors( + null, + index.getDataInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + final List results = cursors + .map(cursor -> { + final DimensionSelector selector = cursor + .getColumnSelectorFactory() + .makeDimensionSelector( + new ExtractionDimensionSpec( + ColumnHolder.TIME_COLUMN_NAME, + "v", + new TimeFormatExtractionFn("yyyy-MM-dd", null, null, null, false) + ) + ); + consumeDimension(cursor, selector, blackhole); + return null; + }) + .toList(); + + blackhole.consume(results); + } + @Benchmark public void strlenUsingExpressionAsLong(Blackhole blackhole) { @@ -312,6 +383,70 @@ public void strlenUsingExtractionFn(Blackhole blackhole) blackhole.consume(results); } + @Benchmark + public void arithmeticOnLong(Blackhole blackhole) + { + final Sequence cursors = new QueryableIndexStorageAdapter(index).makeCursors( + null, + index.getDataInterval(), + VirtualColumns.create( + ImmutableList.of( + new ExpressionVirtualColumn( + "v", + "n + 1", + ValueType.LONG, + TestExprMacroTable.INSTANCE + ) + ) + ), + Granularities.ALL, + false, + null + ); + + final List results = cursors + .map(cursor -> { + final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); + consumeLong(cursor, selector, blackhole); + return null; + }) + .toList(); + + blackhole.consume(results); + } + + @Benchmark + public void stringConcatAndCompareOnLong(Blackhole blackhole) + { + final Sequence cursors = new QueryableIndexStorageAdapter(index).makeCursors( + null, + index.getDataInterval(), + VirtualColumns.create( + ImmutableList.of( + new ExpressionVirtualColumn( + "v", + "concat(n, ' is my favorite number') == '3 is my favorite number'", + ValueType.LONG, + TestExprMacroTable.INSTANCE + ) + ) + ), + Granularities.ALL, + false, + null + ); + + final List results = cursors + .map(cursor -> { + final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); + consumeLong(cursor, selector, blackhole); + return null; + }) + .toList(); + + blackhole.consume(results); + } + private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole) { if (selector.getValueCardinality() >= 0) { diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java index 1fb709ec44cd..46904205d257 100644 --- a/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java +++ b/benchmarks/src/main/java/org/apache/druid/benchmark/datagen/SegmentGenerator.java @@ -27,6 +27,7 @@ import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.DoubleDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema; import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema; @@ -99,6 +100,9 @@ public QueryableIndex generate( case LONG: dimensions.add(new LongDimensionSchema(columnSchema.getName())); break; + case DOUBLE: + dimensions.add(new DoubleDimensionSchema(columnSchema.getName())); + break; case FLOAT: dimensions.add(new FloatDimensionSchema(columnSchema.getName())); break; diff --git a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java index 60009122de19..2b649d471351 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -30,6 +30,10 @@ */ public abstract class ExprEval { + // Cached String values. Protected so they can be used by subclasses. + private boolean stringValueValid = false; + private String stringValue; + public static ExprEval ofLong(@Nullable Number longValue) { return new LongExprEval(longValue); @@ -89,7 +93,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) @Nullable final T value; - private ExprEval(T value) + private ExprEval(@Nullable T value) { this.value = value; } @@ -115,7 +119,17 @@ public Object value() @Nullable public String asString() { - return value == null ? null : String.valueOf(value); + if (!stringValueValid) { + if (value == null) { + stringValue = null; + } else { + stringValue = String.valueOf(value); + } + + stringValueValid = true; + } + + return stringValue; } public abstract boolean asBoolean(); @@ -126,7 +140,6 @@ public String asString() private abstract static class NumericExprEval extends ExprEval { - private NumericExprEval(@Nullable Number value) { super(value); @@ -247,6 +260,16 @@ public Expr toExpr() private static class StringExprEval extends ExprEval { + // Cached primitive values. + private boolean intValueValid = false; + private boolean longValueValid = false; + private boolean doubleValueValid = false; + private boolean booleanValueValid = false; + private int intValue; + private long longValue; + private double doubleValue; + private boolean booleanValue; + private static final StringExprEval OF_NULL = new StringExprEval(null); private Number numericVal; @@ -263,9 +286,48 @@ public final ExprType type() } @Override - public final int asInt() + public int asInt() + { + if (!intValueValid) { + intValue = computeInt(); + intValueValid = true; + } + + return intValue; + } + + @Override + public long asLong() + { + if (!longValueValid) { + longValue = computeLong(); + longValueValid = true; + } + + return longValue; + } + + @Override + public double asDouble() + { + if (!doubleValueValid) { + doubleValue = computeDouble(); + doubleValueValid = true; + } + + return doubleValue; + } + + @Nullable + @Override + public String asString() { - Number number = asNumber(); + return value; + } + + private int computeInt() + { + Number number = computeNumber(); if (number == null) { assert NullHandling.replaceWithDefault(); return 0; @@ -273,10 +335,9 @@ public final int asInt() return number.intValue(); } - @Override - public final long asLong() + private long computeLong() { - Number number = asNumber(); + Number number = computeNumber(); if (number == null) { assert NullHandling.replaceWithDefault(); return 0L; @@ -284,10 +345,9 @@ public final long asLong() return number.longValue(); } - @Override - public final double asDouble() + private double computeDouble() { - Number number = asNumber(); + Number number = computeNumber(); if (number == null) { assert NullHandling.replaceWithDefault(); return 0.0d; @@ -296,7 +356,7 @@ public final double asDouble() } @Nullable - private Number asNumber() + private Number computeNumber() { if (value == null) { return null; @@ -321,13 +381,18 @@ private Number asNumber() @Override public boolean isNumericNull() { - return asNumber() == null; + return computeNumber() == null; } @Override public final boolean asBoolean() { - return Evals.asBoolean(value); + if (!booleanValueValid) { + booleanValue = Evals.asBoolean(value); + booleanValueValid = true; + } + + return booleanValue; } @Override @@ -335,9 +400,9 @@ public final ExprEval castTo(ExprType castTo) { switch (castTo) { case DOUBLE: - return ExprEval.ofDouble(asNumber()); + return ExprEval.ofDouble(computeNumber()); case LONG: - return ExprEval.ofLong(asNumber()); + return ExprEval.ofLong(computeNumber()); case STRING: return this; } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 14429032ceb0..b0d49bad47bf 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -137,12 +137,12 @@ public static ColumnValueSelector makeExprEvalSelector( final String column = Iterables.getOnlyElement(columns); final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); - if (column.equals(ColumnHolder.TIME_COLUMN_NAME)) { - // Optimization for expressions that hit the __time column and nothing else. - // May be worth applying this optimization to all long columns? + if (capabilities != null && capabilities.getType() == ValueType.LONG) { + // Optimization for expressions that hit one long column and nothing else. return new SingleLongInputCachingExpressionColumnValueSelector( - columnSelectorFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME), - expression + columnSelectorFactory.makeColumnValueSelector(column), + expression, + !ColumnHolder.TIME_COLUMN_NAME.equals(column) // __time doesn't need an LRU cache since it is sorted. ); } else if (capabilities != null && capabilities.getType() == ValueType.STRING diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java index f3ffe8318612..fa002c022036 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/SingleLongInputCachingExpressionColumnValueSelector.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.virtual; import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap; import org.apache.druid.java.util.common.ISE; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; @@ -28,6 +29,7 @@ import org.apache.druid.segment.ColumnValueSelector; import javax.annotation.Nonnull; +import javax.annotation.Nullable; /** * Like {@link ExpressionColumnValueSelector}, but caches the most recently computed value and re-uses it in the case @@ -35,30 +37,26 @@ */ public class SingleLongInputCachingExpressionColumnValueSelector implements ColumnValueSelector { - enum Validity - { - NONE, - DOUBLE, - LONG, - EVAL - } + private static final int CACHE_SIZE = 1000; private final ColumnValueSelector selector; private final Expr expression; private final SingleInputBindings bindings = new SingleInputBindings(); - // Last read input value + @Nullable + private final LruEvalCache lruEvalCache; + + // Last read input value. private long lastInput; - // Last computed output values (validity determined by "validity" field) - private Validity validity = Validity.NONE; - private double lastDoubleOutput; - private long lastLongOutput; - private ExprEval lastEvalOutput; + // Last computed output value, or null if there is none. + @Nullable + private ExprEval lastOutput; public SingleLongInputCachingExpressionColumnValueSelector( final ColumnValueSelector selector, - final Expr expression + final Expr expression, + final boolean useLruCache ) { // Verify expression has just one binding. @@ -68,6 +66,7 @@ public SingleLongInputCachingExpressionColumnValueSelector( this.selector = Preconditions.checkNotNull(selector, "selector"); this.expression = Preconditions.checkNotNull(expression, "expression"); + this.lruEvalCache = useLruCache ? new LruEvalCache() : null; } @Override @@ -80,59 +79,41 @@ public void inspectRuntimeShape(final RuntimeShapeInspector inspector) @Override public double getDouble() { - // No Assert for null handling as delegate selector already have it. - final long currentInput = selector.getLong(); - - if (lastInput == currentInput && validity == Validity.DOUBLE) { - return lastDoubleOutput; - } else { - final double output = eval(currentInput).asDouble(); - lastInput = currentInput; - lastDoubleOutput = output; - validity = Validity.DOUBLE; - return output; - } + return getObject().asDouble(); } @Override public float getFloat() { - // No Assert for null handling as delegate selector already have it. - return (float) getDouble(); + return (float) getObject().asDouble(); } @Override public long getLong() { - // No Assert for null handling as delegate selector already have it. - final long currentInput = selector.getLong(); - - if (lastInput == currentInput && validity == Validity.LONG) { - return lastLongOutput; - } else { - final long output = eval(currentInput).asLong(); - lastInput = currentInput; - lastLongOutput = output; - validity = Validity.LONG; - return output; - } + return getObject().asLong(); } @Nonnull @Override public ExprEval getObject() { - final long currentInput = selector.getLong(); - - if (lastInput == currentInput && validity == Validity.EVAL) { - return lastEvalOutput; - } else { - final ExprEval output = eval(currentInput); - lastInput = currentInput; - lastEvalOutput = output; - validity = Validity.EVAL; - return output; + // No assert for null handling, as the delegate selector already has it. + final long input = selector.getLong(); + final boolean cached = input == lastInput && lastOutput != null; + + if (!cached) { + if (lruEvalCache == null) { + bindings.set(input); + lastOutput = expression.eval(bindings); + } else { + lastOutput = lruEvalCache.compute(input); + } + + lastInput = input; } + + return lastOutput; } @Override @@ -141,12 +122,6 @@ public Class classOfObject() return ExprEval.class; } - private ExprEval eval(final long value) - { - bindings.set(value); - return expression.eval(bindings); - } - @Override public boolean isNull() { @@ -155,4 +130,26 @@ public boolean isNull() // ExprEval.isNumericNull checks whether the parsed primitive value is null or not. return getObject().isNumericNull(); } + + public class LruEvalCache + { + private final Long2ObjectLinkedOpenHashMap m = new Long2ObjectLinkedOpenHashMap<>(); + + public ExprEval compute(final long n) + { + ExprEval value = m.getAndMoveToFirst(n); + + if (value == null) { + bindings.set(n); + value = expression.eval(bindings); + m.putAndMoveToFirst(n, value); + + if (m.size() > CACHE_SIZE) { + m.removeLast(); + } + } + + return value; + } + } }