From 524660310b73890eda561c9512505a509097a18c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 16 Jan 2020 13:15:44 -0800 Subject: [PATCH] Add javadocs and small improvements to join code. A follow-up to #9111. --- .../druid/segment/ColumnProcessorFactory.java | 3 ++ .../druid/segment/join/HashJoinEngine.java | 7 +++- .../segment/join/JoinConditionAnalysis.java | 8 ++++ .../druid/segment/join/JoinableClause.java | 10 ++++- .../join/PossiblyNullColumnValueSelector.java | 4 ++ .../segment/join/table/IndexedTable.java | 40 +++++++++++++++++++ .../join/table/IndexedTableJoinMatcher.java | 2 +- 7 files changed, 70 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java index 86600f3473a3..979e0067f632 100644 --- a/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/ColumnProcessorFactory.java @@ -41,6 +41,9 @@ public interface ColumnProcessorFactory { /** * This default type will be used when the underlying column has an unknown type. + * + * This allows a column processor factory to specify what type it prefers to deal with (the most 'natural' type for + * whatever it is doing) when all else is equal. */ ValueType defaultType(); diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinEngine.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinEngine.java index 71a3526b3544..32f811596a89 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinEngine.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinEngine.java @@ -45,8 +45,11 @@ private HashJoinEngine() * joinable clause's prefix (see {@link JoinableClause#getPrefix()}) will come from the Joinable's column selector * factory, and all other columns will come from the leftCursor's column selector factory. * - * Ensuing that the joinable clause's prefix does not conflict with any columns from "leftCursor" is the - * responsibility of the caller. + * Ensuring that the joinable clause's prefix does not conflict with any columns from "leftCursor" is the + * responsibility of the caller. If there is such a conflict (for example, if the joinable clause's prefix is "j.", + * and the leftCursor has a field named "j.j.abrams"), then the field from the leftCursor will be shadowed and will + * not be queryable through the returned Cursor. This happens even if the right-hand joinable doesn't actually have a + * column with this name. */ public static Cursor makeJoinCursor(final Cursor leftCursor, final JoinableClause joinableClause) { diff --git a/processing/src/main/java/org/apache/druid/segment/join/JoinConditionAnalysis.java b/processing/src/main/java/org/apache/druid/segment/join/JoinConditionAnalysis.java index dc643401d778..ca09682720dc 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/JoinConditionAnalysis.java +++ b/processing/src/main/java/org/apache/druid/segment/join/JoinConditionAnalysis.java @@ -62,6 +62,14 @@ private JoinConditionAnalysis( this.nonEquiConditions = nonEquiConditions; } + /** + * Analyze a join condition. + * + * @param condition the condition expression + * @param rightPrefix prefix for the right-hand side of the join; will be used to determine which identifiers in + * the condition come from the right-hand side and which come from the left-hand side + * @param macroTable macro table for parsing the condition expression + */ public static JoinConditionAnalysis forExpression( final String condition, final String rightPrefix, diff --git a/processing/src/main/java/org/apache/druid/segment/join/JoinableClause.java b/processing/src/main/java/org/apache/druid/segment/join/JoinableClause.java index 21d75f3af3e0..4f985dbf3f50 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/JoinableClause.java +++ b/processing/src/main/java/org/apache/druid/segment/join/JoinableClause.java @@ -48,7 +48,15 @@ public JoinableClause(@Nullable String prefix, Joinable joinable, JoinType joinT } /** - * The prefix to apply to all columns from the Joinable. + * The prefix to apply to all columns from the Joinable. The idea is that during a join, any columns that start with + * this prefix should be retrieved from our Joinable's {@link JoinMatcher#getColumnSelectorFactory()}. Any other + * columns should be returned from the left-hand side of the join. + * + * The prefix can be any string, as long as it is nonempty and not itself a prefix of the reserved column name + * {@code __time}. + * + * @see #getAvailableColumnsPrefixed() the list of columns from our {@link Joinable} with prefixes attached + * @see #unprefix a method for removing prefixes */ public String getPrefix() { diff --git a/processing/src/main/java/org/apache/druid/segment/join/PossiblyNullColumnValueSelector.java b/processing/src/main/java/org/apache/druid/segment/join/PossiblyNullColumnValueSelector.java index b42a048135f9..483ebbe5d1d4 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/PossiblyNullColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/join/PossiblyNullColumnValueSelector.java @@ -26,6 +26,10 @@ import javax.annotation.Nullable; import java.util.function.BooleanSupplier; +/** + * A {@link ColumnValueSelector} that wraps a base selector but might also generate null values on demand. This + * is used for "righty" joins (see {@link JoinType#isRighty()}), which may need to generate nulls on the left-hand side. + */ public class PossiblyNullColumnValueSelector implements ColumnValueSelector { private final ColumnValueSelector baseSelector; diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTable.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTable.java index ecfbe61aba20..a7b6b748b948 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTable.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTable.java @@ -26,27 +26,67 @@ import java.util.List; import java.util.Map; +/** + * An interface to a table where some columns (the 'key columns') have indexes that enable fast lookups. + * + * The main user of this class is {@link IndexedTableJoinable}, and its main purpose is to participate in joins. + */ public interface IndexedTable { + /** + * Returns the columns of this table that have indexes. + */ List keyColumns(); + /** + * Returns all columns of this table, including the key and non-key columns. + */ List allColumns(); + /** + * Returns the signature of this table: a map where each key is a column from {@link #allColumns()} and each value + * is a type code. + */ Map rowSignature(); + /** + * Returns the number of rows in this table. It must not change over time, since it is used for things like algorithm + * selection and reporting of cardinality metadata. + */ int numRows(); + /** + * Returns the index for a particular column. The provided column number must be that column's position in + * {@link #allColumns()}. + */ Index columnIndex(int column); + /** + * Returns a reader for a particular column. The provided column number must be that column's position in + * {@link #allColumns()}. + */ Reader columnReader(int column); + /** + * Indexes support fast lookups on key columns. + */ interface Index { + /** + * Returns the list of row numbers where the column this Reader is based on contains 'key'. + */ IntList find(Object key); } + /** + * Readers support reading values out of any column. + */ interface Reader { + /** + * Read the value at a particular row number. Throws an exception if the row is out of bounds (must be between zero + * and {@link #numRows()}). + */ @Nullable Object read(int row); } diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java index 5f48ff479f2c..16e322045a9b 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinMatcher.java @@ -75,7 +75,7 @@ public class IndexedTableJoinMatcher implements JoinMatcher if (condition.isAlwaysTrue()) { this.conditionMatchers = Collections.singletonList(() -> IntIterators.fromTo(0, table.numRows())); } else if (condition.isAlwaysFalse()) { - this.conditionMatchers = Collections.singletonList(() -> IntIterators.fromTo(0, 0)); + this.conditionMatchers = Collections.singletonList(() -> IntIterators.EMPTY_ITERATOR); } else if (condition.getNonEquiConditions().isEmpty()) { this.conditionMatchers = condition.getEquiConditions() .stream()