Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions core/src/main/java/org/apache/druid/math/expr/Expr.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ default boolean isNullLiteral()
return false;
}

default boolean isIdentifier()
{
return false;
}

/**
* Returns the value of expr if expr is a literal, or throws an exception otherwise.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ public String getBinding()
return binding;
}

@Override
public boolean isIdentifier()
{
return true;
}

@Nullable
@Override
public String getIdentifierIfIdentifier()
Expand Down
6 changes: 6 additions & 0 deletions core/src/test/java/org/apache/druid/math/expr/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ private void validateLiteral(String expr, ExpressionType type, Object expected,
Expr parsedFlat = Parser.parse(expr, ExprMacroTable.nil(), true);
Assert.assertTrue(parsed.isLiteral());
Assert.assertTrue(parsedFlat.isLiteral());
Assert.assertFalse(parsed.isIdentifier());
Assert.assertEquals(type, parsed.getOutputType(emptyBinding));
Assert.assertEquals(type, parsedFlat.getOutputType(emptyBinding));
Assert.assertEquals(expected, parsed.getLiteralValue());
Expand Down Expand Up @@ -770,6 +771,11 @@ private void validateParser(
)
{
final Expr parsed = Parser.parse(expression, ExprMacroTable.nil());
if (parsed instanceof IdentifierExpr) {
Assert.assertTrue(parsed.isIdentifier());
} else {
Assert.assertFalse(parsed.isIdentifier());
}
final Expr.BindingAnalysis deets = parsed.analyzeInputs();
Assert.assertEquals(expression, expected, parsed.toString());
Assert.assertEquals(expression, identifiers, deets.getRequiredBindingsList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,13 @@ public class GeneratorBasicSchemas
GeneratorColumnSchema.makeLazyDiscreteUniform("string4", ValueType.STRING, false, 1, null, 1, 10_000),
GeneratorColumnSchema.makeLazyDiscreteUniform("string5", ValueType.STRING, false, 1, 0.3, 1, 1_000_000),

// multi string dims
GeneratorColumnSchema.makeSequential("multi-string1", ValueType.STRING, false, 8, null, 0, 10000),
GeneratorColumnSchema.makeLazyZipf("multi-string2", ValueType.STRING, false, 8, null, 1, 100, 1.5),
GeneratorColumnSchema.makeLazyZipf("multi-string3", ValueType.STRING, false, 16, 0.1, 1, 1_000_000, 2.0),
GeneratorColumnSchema.makeLazyDiscreteUniform("multi-string4", ValueType.STRING, false, 4, null, 1, 10_000),
GeneratorColumnSchema.makeLazyDiscreteUniform("multi-string5", ValueType.STRING, false, 8, 0.3, 1, 1_000_000),

// numeric dims
GeneratorColumnSchema.makeSequential("long1", ValueType.LONG, false, 1, null, 0, 10000),
GeneratorColumnSchema.makeLazyZipf("long2", ValueType.LONG, false, 1, null, 1, 101, 1.5),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ public enum Trait
* expression has no inputs and can be optimized into a constant selector
*/
CONSTANT,
/**
* expression is a simple identifier expression, do not transform
*/
IDENTIFIER,
/**
* expression has a single, single valued input, and is dictionary encoded if the value is a string, and does
* not produce non-scalar output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression)
// check and set traits which allow optimized selectors to be created
if (columns.isEmpty()) {
traits.add(ExpressionPlan.Trait.CONSTANT);
} else if (expression.isIdentifier()) {
traits.add(ExpressionPlan.Trait.IDENTIFIER);
} else if (columns.size() == 1) {
final String column = Iterables.getOnlyElement(columns);
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
Expand Down Expand Up @@ -105,7 +107,14 @@ public static ExpressionPlan plan(ColumnInspector inspector, Expr expression)

// if we didn't eliminate this expression as a single input scalar or mappable expression, it might need
// automatic transformation to map across multi-valued inputs (or row by row detection in the worst case)
if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) {
if (
ExpressionPlan.none(
traits,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.CONSTANT,
ExpressionPlan.Trait.IDENTIFIER
)
) {
final Set<String> definitelyMultiValued = new HashSet<>();
final Set<String> definitelyArray = new HashSet<>();
for (String column : analysis.getRequiredBindings()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,8 @@ public static Expr.ObjectBinding createBindings(
} else if (capabilities.is(ValueType.STRING)) {
supplier = supplierFromDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)),
multiVal
multiVal,
homogenizeNullMultiValueStringArrays
);
} else {
// complex type just pass straight through
Expand Down Expand Up @@ -349,7 +350,8 @@ public ExpressionType getType(String name)
*
* @see org.apache.druid.segment.BaseNullableColumnValueSelector#isNull() for why this only works in the numeric case
*/
private static <T> Supplier<T> makeNullableNumericSupplier(
@VisibleForTesting
public static <T> Supplier<T> makeNullableNumericSupplier(
ColumnValueSelector selector,
Supplier<T> supplier
)
Expand All @@ -371,7 +373,7 @@ private static <T> Supplier<T> makeNullableNumericSupplier(
* arrays if specified.
*/
@VisibleForTesting
static Supplier<Object> supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray)
static Supplier<Object> supplierFromDimensionSelector(final DimensionSelector selector, boolean coerceArray, boolean homogenize)
{
Preconditions.checkNotNull(selector, "selector");
return () -> {
Expand All @@ -381,8 +383,12 @@ static Supplier<Object> supplierFromDimensionSelector(final DimensionSelector se
return selector.lookupName(row.get(0));
} else {
// column selector factories hate you and use [] and [null] interchangeably for nullish data
if (row.size() == 0) {
return new Object[]{null};
if (row.size() == 0 || (row.size() == 1 && selector.getObject() == null)) {
if (homogenize) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe I am overthinking but does it make sense to take out this branch and return a different supplier itself? Might be unnecessary if this method is not very hot.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eh, it is a hot path, though probably not the worst thing happening in non-vectorized expressions, so an extra branch probably doesn't do that much heh

that said, I think these input binding suppliers are probably worth thinking about either optimizing with additional specialized implementations, or at least using as an example of what not to do when we add multi-value string and array support for vectorized expressions.

return new Object[]{null};
} else {
return null;
}
}
final Object[] strings = new Object[row.size()];
// noinspection SSBasedInspection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public SingleStringInputCachingExpressionColumnValueSelector(
this.selector = Preconditions.checkNotNull(selector, "selector");
this.expression = Preconditions.checkNotNull(expression, "expression");

final Supplier<Object> inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector, false);
final Supplier<Object> inputSupplier = ExpressionSelectors.supplierFromDimensionSelector(selector, false, false);
this.bindings = InputBindings.singleProvider(ExpressionType.STRING, name -> inputSupplier.get());

if (selector.getValueCardinality() == DimensionDictionarySelector.CARDINALITY_UNKNOWN) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -816,10 +816,19 @@ public void testGroupByExpressionArrayExpressionFilter()
query
);

List<ResultRow> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? -1L : null, "count", 6L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 1L, "count", 2L)
);
List<ResultRow> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", -1L, "count", 4L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 0L, "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 1L, "count", 2L)
);
} else {
expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", null, "count", 6L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", 1L, "count", 2L)
);
}

TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-auto");
}
Expand Down Expand Up @@ -858,7 +867,7 @@ public void testGroupByExpressionArrayFnArg()
);

List<ResultRow> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foo", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? null : "foo", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot1, foot2, foot3", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot3, foot4, foot5", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot5, foot6, foot7", "count", 2L)
Expand Down Expand Up @@ -977,7 +986,7 @@ public void testGroupByExpressionFoldArrayToStringWithConcats()
.setVirtualColumns(
new ExpressionVirtualColumn(
"tt",
"fold((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag)))), tags, '')",
"fold((tag, acc) -> concat(concat(acc, case_searched(acc == '', '', ', '), concat('foo', tag))), tags, '')",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
)
Expand All @@ -995,7 +1004,7 @@ public void testGroupByExpressionFoldArrayToStringWithConcats()
);

List<ResultRow> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foo", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", NullHandling.replaceWithDefault() ? null : "foo", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot1, foot2, foot3", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot3, foot4, foot5", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "foot5, foot6, foot7", "count", 2L)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.serde.ComplexMetrics;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
Expand Down Expand Up @@ -211,6 +212,57 @@ public QueryableIndex generate(
return retVal;
}

public IncrementalIndex generateIncrementalIndex(

final DataSegment dataSegment,
final GeneratorSchemaInfo schemaInfo,
final Granularity granularity,
final int numRows
)
{
// In case we need to generate hyperUniques.
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());

final String dataHash = Hashing.sha256()
.newHasher()
.putString(dataSegment.getId().toString(), StandardCharsets.UTF_8)
.putString(schemaInfo.toString(), StandardCharsets.UTF_8)
.putString(granularity.toString(), StandardCharsets.UTF_8)
.putInt(numRows)
.hash()
.toString();


final DataGenerator dataGenerator = new DataGenerator(
schemaInfo.getColumnSchemas(),
dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(),
numRows
);

final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder()
.withDimensionsSpec(schemaInfo.getDimensionsSpec())
.withMetrics(schemaInfo.getAggsArray())
.withRollup(schemaInfo.isWithRollup())
.withQueryGranularity(granularity)
.build();

final List<InputRow> rows = new ArrayList<>();

for (int i = 0; i < numRows; i++) {
final InputRow row = dataGenerator.nextRow();
rows.add(row);

if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
}

log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);

return makeIncrementalIndex(dataSegment.getId(), dataHash, 0, rows, indexSchema);
}

@Override
public void close() throws IOException
{
Expand All @@ -236,6 +288,23 @@ private QueryableIndex makeIndex(
.buildMMappedIndex();
}

private IncrementalIndex makeIncrementalIndex(
final SegmentId identifier,
final String dataHash,
final int indexNumber,
final List<InputRow> rows,
final IncrementalIndexSchema indexSchema
)
{
return IndexBuilder
.create()
.schema(indexSchema)
.tmpDir(new File(getSegmentDir(identifier, dataHash), String.valueOf(indexNumber)))
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.rows(rows)
.buildIncrementalIndex();
}

private File getSegmentDir(final SegmentId identifier, final String dataHash)
{
return new File(cacheDir, StringUtils.format("%s_%s", identifier, dataHash));
Expand Down
Loading