Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -550,8 +550,8 @@ public static String rpad(String base, Integer len, String pad)
* Returns the string truncated to maxBytes.
* If given string input is shorter than maxBytes, then it remains the same.
*
* @param s The input string to possibly be truncated
* @param maxBytes The max bytes that string input will be truncated to
* @param s The input string to possibly be truncated
* @param maxBytes The max bytes that string input will be truncated to
*
* @return the string after truncated to maxBytes
*/
Expand All @@ -568,4 +568,17 @@ public static String chop(@Nullable final String s, final int maxBytes)
}
}

/**
* Shorten "s" to "maxBytes" chars. Fast and loose because these are *chars* not *bytes*. Use
* {@link #chop(String, int)} for slower, but accurate chopping.
*/
@Nullable
public static String fastLooseChop(@Nullable final String s, final int maxBytes)
{
if (s == null || s.length() <= maxBytes) {
return s;
} else {
return s.substring(0, maxBytes);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,32 @@ public void testRpad()
Assert.assertEquals(s5, null);
}

@Test
public void testChop()
{
Assert.assertEquals("foo", StringUtils.chop("foo", 5));
Assert.assertEquals("fo", StringUtils.chop("foo", 2));
Assert.assertEquals("", StringUtils.chop("foo", 0));
Assert.assertEquals("smile 🙂 for", StringUtils.chop("smile 🙂 for the camera", 14));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🙂

Assert.assertEquals("smile 🙂", StringUtils.chop("smile 🙂 for the camera", 10));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 9));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 8));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 7));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 6));
Assert.assertEquals("smile", StringUtils.chop("smile 🙂 for the camera", 5));
}

@Test
public void testFastLooseChop()
{
Assert.assertEquals("foo", StringUtils.fastLooseChop("foo", 5));
Assert.assertEquals("fo", StringUtils.fastLooseChop("foo", 2));
Assert.assertEquals("", StringUtils.fastLooseChop("foo", 0));
Assert.assertEquals("smile 🙂 for", StringUtils.fastLooseChop("smile 🙂 for the camera", 12));
Assert.assertEquals("smile 🙂 ", StringUtils.fastLooseChop("smile 🙂 for the camera", 9));
Assert.assertEquals("smile 🙂", StringUtils.fastLooseChop("smile 🙂 for the camera", 8));
Assert.assertEquals("smile \uD83D", StringUtils.fastLooseChop("smile 🙂 for the camera", 7));
Assert.assertEquals("smile ", StringUtils.fastLooseChop("smile 🙂 for the camera", 6));
Assert.assertEquals("smile", StringUtils.fastLooseChop("smile 🙂 for the camera", 5));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,29 @@
import org.apache.druid.query.aggregation.SerializablePairLongString;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;

import javax.annotation.Nullable;
import org.apache.druid.segment.DimensionHandlerUtils;

public class StringFirstAggregator implements Aggregator
{
@Nullable
private final BaseLongColumnValueSelector timeSelector;
private final BaseObjectColumnValueSelector valueSelector;
private final BaseObjectColumnValueSelector<?> valueSelector;
private final int maxStringBytes;
private final boolean needsFoldCheck;

protected long firstTime;
protected String firstValue;

public StringFirstAggregator(
@Nullable BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector valueSelector,
int maxStringBytes
BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector<?> valueSelector,
int maxStringBytes,
boolean needsFoldCheck
)
{
this.valueSelector = valueSelector;
this.timeSelector = timeSelector;
this.maxStringBytes = maxStringBytes;
this.needsFoldCheck = needsFoldCheck;

firstTime = DateTimes.MAX.getMillis();
firstValue = null;
Expand All @@ -55,17 +56,28 @@ public StringFirstAggregator(
@Override
public void aggregate()
{
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);
if (needsFoldCheck) {
// Less efficient code path when folding is a possibility (we must read the value selector first just in case
// it's a foldable object).
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);

if (inPair != null && inPair.rhs != null && inPair.lhs < firstTime) {
firstTime = inPair.lhs;
firstValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes);
}
} else {
final long time = timeSelector.getLong();

if (inPair != null && inPair.rhs != null && inPair.lhs < firstTime) {
firstTime = inPair.lhs;
firstValue = inPair.rhs;
if (time < firstTime) {
final String value = DimensionHandlerUtils.convertObjectToString(valueSelector.getObject());

if (firstValue.length() > maxStringBytes) {
firstValue = firstValue.substring(0, maxStringBytes);
if (value != null) {
firstTime = time;
firstValue = StringUtils.fastLooseChop(value, maxStringBytes);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.aggregation.SerializablePairLongString;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnHolder;

Expand Down Expand Up @@ -118,20 +119,24 @@ public StringFirstAggregatorFactory(
@Override
public Aggregator factorize(ColumnSelectorFactory metricFactory)
{
final BaseObjectColumnValueSelector<?> valueSelector = metricFactory.makeColumnValueSelector(fieldName);
return new StringFirstAggregator(
metricFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
metricFactory.makeColumnValueSelector(fieldName),
maxStringBytes
valueSelector,
maxStringBytes,
StringFirstLastUtils.selectorNeedsFoldCheck(valueSelector, metricFactory.getColumnCapabilities(fieldName))
);
}

@Override
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
{
final BaseObjectColumnValueSelector<?> valueSelector = metricFactory.makeColumnValueSelector(fieldName);
return new StringFirstBufferAggregator(
metricFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
metricFactory.makeColumnValueSelector(fieldName),
maxStringBytes
valueSelector,
maxStringBytes,
StringFirstLastUtils.selectorNeedsFoldCheck(valueSelector, metricFactory.getColumnCapabilities(fieldName))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.DimensionHandlerUtils;

import java.nio.ByteBuffer;

Expand All @@ -36,18 +37,21 @@ public class StringFirstBufferAggregator implements BufferAggregator
);

private final BaseLongColumnValueSelector timeSelector;
private final BaseObjectColumnValueSelector valueSelector;
private final BaseObjectColumnValueSelector<?> valueSelector;
private final int maxStringBytes;
private final boolean needsFoldCheck;

public StringFirstBufferAggregator(
BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector valueSelector,
int maxStringBytes
BaseObjectColumnValueSelector<?> valueSelector,
int maxStringBytes,
boolean needsFoldCheck
)
{
this.timeSelector = timeSelector;
this.valueSelector = valueSelector;
this.maxStringBytes = maxStringBytes;
this.needsFoldCheck = needsFoldCheck;
}

@Override
Expand All @@ -59,20 +63,40 @@ public void init(ByteBuffer buf, int position)
@Override
public void aggregate(ByteBuffer buf, int position)
{
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);
if (needsFoldCheck) {
// Less efficient code path when folding is a possibility (we must read the value selector first just in case
// it's a foldable object).
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);

if (inPair != null && inPair.rhs != null) {
if (inPair != null && inPair.rhs != null) {
final long firstTime = buf.getLong(position);
if (inPair.lhs < firstTime) {
StringFirstLastUtils.writePair(
buf,
position,
new SerializablePairLongString(inPair.lhs, inPair.rhs),
maxStringBytes
);
}
}
} else {
final long time = timeSelector.getLong();
final long firstTime = buf.getLong(position);
if (inPair.lhs < firstTime) {
StringFirstLastUtils.writePair(
buf,
position,
new SerializablePairLongString(inPair.lhs, inPair.rhs),
maxStringBytes
);

if (time < firstTime) {
final String value = DimensionHandlerUtils.convertObjectToString(valueSelector.getObject());

if (value != null) {
StringFirstLastUtils.writePair(
buf,
position,
new SerializablePairLongString(time, value),
maxStringBytes
);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.NilColumnValueSelector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;

import javax.annotation.Nullable;
import java.nio.ByteBuffer;
Expand All @@ -32,10 +35,34 @@ public class StringFirstLastUtils
{
private static final int NULL_VALUE = -1;

/**
* Returns whether a given value selector *might* contain SerializablePairLongString objects.
*/
public static boolean selectorNeedsFoldCheck(
final BaseObjectColumnValueSelector<?> valueSelector,
@Nullable final ColumnCapabilities valueSelectorCapabilities
)
{
if (valueSelectorCapabilities != null && valueSelectorCapabilities.getType() != ValueType.COMPLEX) {
// Known, non-complex type.
return false;
}

if (valueSelector instanceof NilColumnValueSelector) {
// Nil column, definitely no SerializablePairLongStrings.
return false;
}

// Check if the selector class could possibly be a SerializablePairLongString (either a superclass or subclass).
final Class<?> clazz = valueSelector.classOfObject();
return clazz.isAssignableFrom(SerializablePairLongString.class)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, it looks to me like this method only returns true if a selector definitely is a SerializablePairLongString . Should this just be returning true if we made it here?

Alternatively, should needsFoldCheck be rebranded as isFold and the aggregate methods of the first last heap and buffer aggs just be reading a SerializablePairLongString from the value selector directly instead of calling readPairFromSelectors?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, I was wrong, I think this line + comment just isn't super intuitive on first glance that it is a catch-all that any ‘unknown’ selector types will also be true (since clazz will be Object.class in that case). Maybe expanding on the comment would make it require less thought?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed it to:

    // Check if the selector class could possibly be a SerializablePairLongString (either a superclass or subclass).

|| SerializablePairLongString.class.isAssignableFrom(clazz);
}

@Nullable
public static SerializablePairLongString readPairFromSelectors(
final BaseLongColumnValueSelector timeSelector,
final BaseObjectColumnValueSelector valueSelector
final BaseObjectColumnValueSelector<?> valueSelector
)
{
final long time;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,29 @@
import org.apache.druid.query.aggregation.first.StringFirstLastUtils;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.DimensionHandlerUtils;

public class StringLastAggregator implements Aggregator
{
private final BaseLongColumnValueSelector timeSelector;
private final BaseObjectColumnValueSelector valueSelector;
private final BaseObjectColumnValueSelector<?> valueSelector;
private final int maxStringBytes;
private final boolean needsFoldCheck;

protected long lastTime;
protected String lastValue;

public StringLastAggregator(
BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector valueSelector,
int maxStringBytes
final BaseLongColumnValueSelector timeSelector,
final BaseObjectColumnValueSelector<?> valueSelector,
final int maxStringBytes,
final boolean needsFoldCheck
)
{
this.valueSelector = valueSelector;
this.timeSelector = timeSelector;
this.maxStringBytes = maxStringBytes;
this.needsFoldCheck = needsFoldCheck;

lastTime = DateTimes.MIN.getMillis();
lastValue = null;
Expand All @@ -53,22 +57,28 @@ public StringLastAggregator(
@Override
public void aggregate()
{
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);
if (needsFoldCheck) {
// Less efficient code path when folding is a possibility (we must read the value selector first just in case
// it's a foldable object).
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);

if (inPair == null) {
// Don't aggregate nulls.
return;
}
if (inPair != null && inPair.rhs != null && inPair.lhs >= lastTime) {
lastTime = inPair.lhs;
lastValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes);
}
} else {
final long time = timeSelector.getLong();

if (inPair != null && inPair.rhs != null && inPair.lhs >= lastTime) {
lastTime = inPair.lhs;
lastValue = inPair.rhs;
if (time >= lastTime) {
final String value = DimensionHandlerUtils.convertObjectToString(valueSelector.getObject());

if (lastValue.length() > maxStringBytes) {
lastValue = lastValue.substring(0, maxStringBytes);
if (value != null) {
lastTime = time;
lastValue = StringUtils.fastLooseChop(value, maxStringBytes);
}
}
}
}
Expand Down
Loading