Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions docs/querying/datasource.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,15 +412,13 @@ The `unnest` datasource uses the following syntax:
"type": "expression",
"expression": "\"column_reference\""
},
"outputName": "unnested_target_column",
"allowList": []
},
"outputName": "unnested_target_column"
}
```

* `dataSource.type`: Set this to `unnest`.
* `dataSource.base`: Defines the datasource you want to unnest.
* `dataSource.base.type`: The type of datasource you want to unnest, such as a table.
* `dataSource.virtualColumn`: [Virtual column](virtual-columns.md) that references the nested values. The output name of this column is reused as the name of the column that contains unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required.
* `dataSource.allowList`: Optional. The subset of values you want to unnest.

To learn more about how to use the `unnest` datasource, see the [unnest tutorial](../tutorials/tutorial-unnest-datasource.md).
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.utils.JvmUtils;

import javax.annotation.Nullable;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
Expand All @@ -39,7 +37,6 @@

/**
* The data source for representing an unnest operation.
*
* An unnest data source has the following:
* a base data source which is to be unnested
* the column name of the MVD which will be unnested
Expand All @@ -50,27 +47,23 @@ public class UnnestDataSource implements DataSource
{
private final DataSource base;
private final VirtualColumn virtualColumn;
private final LinkedHashSet<String> allowList;

private UnnestDataSource(
DataSource dataSource,
VirtualColumn virtualColumn,
LinkedHashSet<String> allowList
VirtualColumn virtualColumn
)
{
this.base = dataSource;
this.virtualColumn = virtualColumn;
this.allowList = allowList;
}

@JsonCreator
public static UnnestDataSource create(
@JsonProperty("base") DataSource base,
@JsonProperty("virtualColumn") VirtualColumn virtualColumn,
@Nullable @JsonProperty("allowList") LinkedHashSet<String> allowList
@JsonProperty("virtualColumn") VirtualColumn virtualColumn
)
{
return new UnnestDataSource(base, virtualColumn, allowList);
return new UnnestDataSource(base, virtualColumn);
}

@JsonProperty("base")
Expand All @@ -85,12 +78,6 @@ public VirtualColumn getVirtualColumn()
return virtualColumn;
}

@JsonProperty("allowList")
public LinkedHashSet<String> getAllowList()
{
return allowList;
}

@Override
public Set<String> getTableNames()
{
Expand All @@ -109,7 +96,7 @@ public DataSource withChildren(List<DataSource> children)
if (children.size() != 1) {
throw new IAE("Expected [1] child, got [%d]", children.size());
}
return new UnnestDataSource(children.get(0), virtualColumn, allowList);
return new UnnestDataSource(children.get(0), virtualColumn);
}

@Override
Expand Down Expand Up @@ -146,17 +133,15 @@ public Function<SegmentReference, SegmentReference> createSegmentMapFunction(
baseSegment ->
new UnnestSegmentReference(
segmentMapFn.apply(baseSegment),
virtualColumn,
allowList
virtualColumn
)
);

}

@Override
public DataSource withUpdatedDataSource(DataSource newSource)
{
return new UnnestDataSource(newSource, virtualColumn, allowList);
return new UnnestDataSource(newSource, virtualColumn);
}

@Override
Expand Down Expand Up @@ -203,7 +188,6 @@ public String toString()
return "UnnestDataSource{" +
"base=" + base +
", column='" + virtualColumn + '\'' +
", allowList=" + allowList +
'}';
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;

/**
Expand All @@ -50,9 +49,6 @@
* unnestCursor.advance() -> 'e'
* <p>
* <p>
* The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to
* the next available match.
* <p>
* The index reference points to the index of each row that the unnest cursor is accessing through currentVal
* The index ranges from 0 to the size of the list in each row which is held in the unnestListForCurrentRow
* <p>
Expand All @@ -65,18 +61,17 @@ public class UnnestColumnValueSelectorCursor implements Cursor
private final ColumnValueSelector columnValueSelector;
private final VirtualColumn unnestColumn;
private final String outputName;
private final LinkedHashSet<String> allowSet;
private int index;
private Object currentVal;
private List<Object> unnestListForCurrentRow;
private boolean needInitialization;


public UnnestColumnValueSelectorCursor(
Cursor cursor,
ColumnSelectorFactory baseColumnSelectorFactory,
VirtualColumn unnestColumn,
String outputColumnName,
LinkedHashSet<String> allowSet
String outputColumnName
)
{
this.baseCursor = cursor;
Expand All @@ -89,7 +84,6 @@ public UnnestColumnValueSelectorCursor(
this.index = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
this.allowSet = allowSet;
}

@Override
Expand Down Expand Up @@ -194,11 +188,7 @@ public boolean isNull()
public Object getObject()
{
if (!unnestListForCurrentRow.isEmpty()) {
if (allowSet == null || allowSet.isEmpty()) {
return unnestListForCurrentRow.get(index);
} else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) {
return unnestListForCurrentRow.get(index);
}
return unnestListForCurrentRow.get(index);
}
return null;
}
Expand Down Expand Up @@ -253,9 +243,7 @@ public void advance()
@Override
public void advanceUninterruptibly()
{
do {
advanceAndUpdate();
} while (matchAndProceed());
advanceAndUpdate();
}

@Override
Expand Down Expand Up @@ -304,19 +292,10 @@ private void getNextRow()
/**
* This initializes the unnest cursor and creates data structures
* to start iterating over the values to be unnested.
* This would also create a bitset for dictonary encoded columns to
* check for matching values specified in allowedList of UnnestDataSource.
*/
private void initialize()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The javadoc on this method seems out of date (it refers to allowList).

{
getNextRow();
if (allowSet != null) {
if (!allowSet.isEmpty()) {
if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) {
advance();
}
}
}
needInitialization = false;
}

Expand All @@ -338,22 +317,4 @@ private void advanceAndUpdate()
index++;
}
}

/**
* This advances the unnest cursor in cases where an allowList is specified
* and the current value at the unnest cursor is not in the allowList.
* The cursor in such cases is moved till the next match is found.
*
* @return a boolean to indicate whether to stay or move cursor
*/
private boolean matchAndProceed()
{
boolean matchStatus;
if (allowSet == null || allowSet.isEmpty()) {
matchStatus = true;
} else {
matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index));
}
return !baseCursor.isDone() && !matchStatus;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
import org.joda.time.DateTime;

import javax.annotation.Nullable;
import java.util.BitSet;
import java.util.LinkedHashSet;

/**
* The cursor to help unnest MVDs with dictionary encoding.
Expand All @@ -58,15 +56,6 @@
* <p>
* Total 5 advance calls above
* <p>
* The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to
* the next available match. The hashSet is converted into a bitset (during initialization) for efficiency.
* If allowSet is ['c', 'd'] then the advance moves over to the next available match
* <p>
* advance() -> 2 -> 'c'
* advance() -> 3 -> 'd' (advances base cursor first)
* advance() -> 2 -> 'c'
* <p>
* Total 3 advance calls in this case
* <p>
* The index reference points to the index of each row that the unnest cursor is accessing
* The indexedInts for each row are held in the indexedIntsForCurrentRow object
Expand All @@ -79,8 +68,6 @@ public class UnnestDimensionCursor implements Cursor
private final DimensionSelector dimSelector;
private final VirtualColumn unnestColumn;
private final String outputName;
private final LinkedHashSet<String> allowSet;
private final BitSet allowedBitSet;
private final ColumnSelectorFactory baseColumnSelectorFactory;
private int index;
@Nullable
Expand All @@ -92,8 +79,7 @@ public UnnestDimensionCursor(
Cursor cursor,
ColumnSelectorFactory baseColumnSelectorFactory,
VirtualColumn unnestColumn,
String outputColumnName,
LinkedHashSet<String> allowSet
String outputColumnName
)
{
this.baseCursor = cursor;
Expand All @@ -106,8 +92,6 @@ public UnnestDimensionCursor(
this.index = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
this.allowSet = allowSet;
this.allowedBitSet = new BitSet();
}

@Override
Expand Down Expand Up @@ -158,6 +142,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
@Override
public boolean matches()
{
if (indexedIntsForCurrentRow.size() <= 0) {
return false;
}
return idForLookup == indexedIntsForCurrentRow.get(index);
}

Expand Down Expand Up @@ -188,14 +175,7 @@ public Object getObject()
if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) {
return null;
}
if (allowedBitSet.isEmpty()) {
if (allowSet == null || allowSet.isEmpty()) {
return lookupName(indexedIntsForCurrentRow.get(index));
}
} else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) {
return lookupName(indexedIntsForCurrentRow.get(index));
}
return null;
return lookupName(indexedIntsForCurrentRow.get(index));
}

@Override
Expand All @@ -207,9 +187,6 @@ public Class<?> classOfObject()
@Override
public int getValueCardinality()
{
if (!allowedBitSet.isEmpty()) {
return allowedBitSet.cardinality();
}
return dimSelector.getValueCardinality();
}

Expand Down Expand Up @@ -290,9 +267,7 @@ public void advance()
@Override
public void advanceUninterruptibly()
{
do {
advanceAndUpdate();
} while (matchAndProceed());
advanceAndUpdate();
}

@Override
Expand Down Expand Up @@ -330,23 +305,13 @@ public void reset()
@Nullable
private void initialize()
{
IdLookup idLookup = dimSelector.idLookup();
index = 0;
this.indexIntsForRow = new SingleIndexInts();
if (allowSet != null && !allowSet.isEmpty() && idLookup != null) {
for (String s : allowSet) {
if (idLookup.lookupId(s) >= 0) {
allowedBitSet.set(idLookup.lookupId(s));
}
}
}

if (dimSelector.getObject() != null) {
this.indexedIntsForCurrentRow = dimSelector.getRow();
}
if (!allowedBitSet.isEmpty()) {
if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) {
advance();
}
}

needInitialization = false;
}

Expand All @@ -362,6 +327,9 @@ private void advanceAndUpdate()
index = 0;
if (!baseCursor.isDone()) {
baseCursor.advanceUninterruptibly();
if (!baseCursor.isDone()) {
indexedIntsForCurrentRow = dimSelector.getRow();
}
}
} else {
if (index >= indexedIntsForCurrentRow.size() - 1) {
Expand All @@ -378,23 +346,6 @@ private void advanceAndUpdate()
}
}

/**
* This advances the unnest cursor in cases where an allowList is specified
* and the current value at the unnest cursor is not in the allowList.
* The cursor in such cases is moved till the next match is found.
*
* @return a boolean to indicate whether to stay or move cursor
*/
private boolean matchAndProceed()
{
boolean matchStatus;
if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) {
matchStatus = true;
} else {
matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index));
}
return !baseCursor.isDone() && !matchStatus;
}

// Helper class to help in returning
// getRow from the dimensionSelector
Expand Down
Loading