Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -546,4 +546,39 @@ public static String rpad(String base, Integer len, String pad)
return new String(data);
}

/**
* Returns the string truncated to maxBytes.
* If given string input is shorter than maxBytes, then it remains the same.
*
* @param s The input string to possibly be truncated
* @param maxBytes The max bytes that string input will be truncated to
*
* @return the string after truncated to maxBytes
*/
@Nullable
public static String chop(@Nullable final String s, final int maxBytes)
{
if (s == null) {
return null;
} else {
// Shorten firstValue to what could fit in maxBytes as UTF-8.
final byte[] bytes = new byte[maxBytes];
final int len = StringUtils.toUtf8WithLimit(s, ByteBuffer.wrap(bytes));
return new String(bytes, 0, len, StandardCharsets.UTF_8);
}
}

/**
* Shorten "s" to "maxBytes" chars. Fast and loose because these are *chars* not *bytes*. Use
* {@link #chop(String, int)} for slower, but accurate chopping.
*/
@Nullable
public static String fastLooseChop(@Nullable final String s, final int maxBytes)
{
if (s == null || s.length() <= maxBytes) {
return s;
} else {
return s.substring(0, maxBytes);
}
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To resolve I moved chop and fastLooseChop to StringUtils since this has tests with them and renamed references in StringFirst/Last aggs

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,32 @@ public void testRpad()
Assert.assertEquals(s5, null);
}

@Test
public void testChop()
{
Assert.assertEquals("foo", StringUtils.chop("foo", 5));
Assert.assertEquals("fo", StringUtils.chop("foo", 2));
Assert.assertEquals("", StringUtils.chop("foo", 0));
Assert.assertEquals("smile 🙂 for", StringUtils.chop("smile 🙂 for the camera", 14));
Assert.assertEquals("smile 🙂", StringUtils.chop("smile 🙂 for the camera", 10));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 9));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 8));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 7));
Assert.assertEquals("smile ", StringUtils.chop("smile 🙂 for the camera", 6));
Assert.assertEquals("smile", StringUtils.chop("smile 🙂 for the camera", 5));
}

@Test
public void testFastLooseChop()
{
Assert.assertEquals("foo", StringUtils.fastLooseChop("foo", 5));
Assert.assertEquals("fo", StringUtils.fastLooseChop("foo", 2));
Assert.assertEquals("", StringUtils.fastLooseChop("foo", 0));
Assert.assertEquals("smile 🙂 for", StringUtils.fastLooseChop("smile 🙂 for the camera", 12));
Assert.assertEquals("smile 🙂 ", StringUtils.fastLooseChop("smile 🙂 for the camera", 9));
Assert.assertEquals("smile 🙂", StringUtils.fastLooseChop("smile 🙂 for the camera", 8));
Assert.assertEquals("smile \uD83D", StringUtils.fastLooseChop("smile 🙂 for the camera", 7));
Assert.assertEquals("smile ", StringUtils.fastLooseChop("smile 🙂 for the camera", 6));
Assert.assertEquals("smile", StringUtils.fastLooseChop("smile 🙂 for the camera", 5));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,34 @@
package org.apache.druid.query.aggregation.first;

import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.SerializablePairLongString;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;

import javax.annotation.Nullable;
import org.apache.druid.segment.DimensionHandlerUtils;

public class StringFirstAggregator implements Aggregator
{
@Nullable
private final BaseLongColumnValueSelector timeSelector;
private final BaseObjectColumnValueSelector valueSelector;
private final BaseObjectColumnValueSelector<?> valueSelector;
private final int maxStringBytes;
private final boolean needsFoldCheck;

protected long firstTime;
protected String firstValue;

public StringFirstAggregator(
@Nullable BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector valueSelector,
int maxStringBytes
BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector<?> valueSelector,
int maxStringBytes,
boolean needsFoldCheck
)
{
this.valueSelector = valueSelector;
this.timeSelector = timeSelector;
this.maxStringBytes = maxStringBytes;
this.needsFoldCheck = needsFoldCheck;

firstTime = DateTimes.MAX.getMillis();
firstValue = null;
Expand All @@ -54,25 +56,36 @@ public StringFirstAggregator(
@Override
public void aggregate()
{
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);
if (needsFoldCheck) {
// Less efficient code path when folding is a possibility (we must read the value selector first just in case
// it's a foldable object).
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);

if (inPair != null && inPair.rhs != null && inPair.lhs < firstTime) {
firstTime = inPair.lhs;
firstValue = StringUtils.fastLooseChop(inPair.rhs, maxStringBytes);
}
} else {
final long time = timeSelector.getLong();

if (inPair != null && inPair.rhs != null && inPair.lhs < firstTime) {
firstTime = inPair.lhs;
firstValue = inPair.rhs;
if (time < firstTime) {
final String value = DimensionHandlerUtils.convertObjectToString(valueSelector.getObject());

if (firstValue.length() > maxStringBytes) {
firstValue = firstValue.substring(0, maxStringBytes);
if (value != null) {
firstTime = time;
firstValue = StringUtils.fastLooseChop(value, maxStringBytes);
}
}
}
}

@Override
public Object get()
{
return new SerializablePairLongString(firstTime, StringFirstLastUtils.chop(firstValue, maxStringBytes));
return new SerializablePairLongString(firstTime, StringUtils.chop(firstValue, maxStringBytes));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.aggregation.SerializablePairLongString;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnHolder;

Expand Down Expand Up @@ -118,20 +119,24 @@ public StringFirstAggregatorFactory(
@Override
public Aggregator factorize(ColumnSelectorFactory metricFactory)
{
final BaseObjectColumnValueSelector<?> valueSelector = metricFactory.makeColumnValueSelector(fieldName);
return new StringFirstAggregator(
metricFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
metricFactory.makeColumnValueSelector(fieldName),
maxStringBytes
valueSelector,
maxStringBytes,
StringFirstLastUtils.selectorNeedsFoldCheck(valueSelector, metricFactory.getColumnCapabilities(fieldName))
);
}

@Override
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
{
final BaseObjectColumnValueSelector<?> valueSelector = metricFactory.makeColumnValueSelector(fieldName);
return new StringFirstBufferAggregator(
metricFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
metricFactory.makeColumnValueSelector(fieldName),
maxStringBytes
valueSelector,
maxStringBytes,
StringFirstLastUtils.selectorNeedsFoldCheck(valueSelector, metricFactory.getColumnCapabilities(fieldName))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.DimensionHandlerUtils;

import java.nio.ByteBuffer;

Expand All @@ -36,18 +37,21 @@ public class StringFirstBufferAggregator implements BufferAggregator
);

private final BaseLongColumnValueSelector timeSelector;
private final BaseObjectColumnValueSelector valueSelector;
private final BaseObjectColumnValueSelector<?> valueSelector;
private final int maxStringBytes;
private final boolean needsFoldCheck;

public StringFirstBufferAggregator(
BaseLongColumnValueSelector timeSelector,
BaseObjectColumnValueSelector valueSelector,
int maxStringBytes
BaseObjectColumnValueSelector<?> valueSelector,
int maxStringBytes,
boolean needsFoldCheck
)
{
this.timeSelector = timeSelector;
this.valueSelector = valueSelector;
this.maxStringBytes = maxStringBytes;
this.needsFoldCheck = needsFoldCheck;
}

@Override
Expand All @@ -59,20 +63,40 @@ public void init(ByteBuffer buf, int position)
@Override
public void aggregate(ByteBuffer buf, int position)
{
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);
if (needsFoldCheck) {
// Less efficient code path when folding is a possibility (we must read the value selector first just in case
// it's a foldable object).
final SerializablePairLongString inPair = StringFirstLastUtils.readPairFromSelectors(
timeSelector,
valueSelector
);

if (inPair != null && inPair.rhs != null) {
if (inPair != null && inPair.rhs != null) {
final long firstTime = buf.getLong(position);
if (inPair.lhs < firstTime) {
StringFirstLastUtils.writePair(
buf,
position,
new SerializablePairLongString(inPair.lhs, inPair.rhs),
maxStringBytes
);
}
}
} else {
final long time = timeSelector.getLong();
final long firstTime = buf.getLong(position);
if (inPair.lhs < firstTime) {
StringFirstLastUtils.writePair(
buf,
position,
new SerializablePairLongString(inPair.lhs, inPair.rhs),
maxStringBytes
);

if (time < firstTime) {
final String value = DimensionHandlerUtils.convertObjectToString(valueSelector.getObject());

if (value != null) {
StringFirstLastUtils.writePair(
buf,
position,
new SerializablePairLongString(time, value),
maxStringBytes
);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,32 +24,45 @@
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.NilColumnValueSelector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;

import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

public class StringFirstLastUtils
{
private static final int NULL_VALUE = -1;

@Nullable
public static String chop(@Nullable final String s, final int maxBytes)
/**
* Returns whether a given value selector *might* contain SerializablePairLongString objects.
*/
public static boolean selectorNeedsFoldCheck(
final BaseObjectColumnValueSelector<?> valueSelector,
@Nullable final ColumnCapabilities valueSelectorCapabilities
)
{
if (s == null) {
return null;
} else {
// Shorten firstValue to what could fit in maxBytes as UTF-8.
final byte[] bytes = new byte[maxBytes];
final int len = StringUtils.toUtf8WithLimit(s, ByteBuffer.wrap(bytes));
return new String(bytes, 0, len, StandardCharsets.UTF_8);
if (valueSelectorCapabilities != null && valueSelectorCapabilities.getType() != ValueType.COMPLEX) {
// Known, non-complex type.
return false;
}

if (valueSelector instanceof NilColumnValueSelector) {
// Nil column, definitely no SerializablePairLongStrings.
return false;
}

// Check if the selector class could possibly be a SerializablePairLongString (either a superclass or subclass).
final Class<?> clazz = valueSelector.classOfObject();
return clazz.isAssignableFrom(SerializablePairLongString.class)
|| SerializablePairLongString.class.isAssignableFrom(clazz);
}

@Nullable
public static SerializablePairLongString readPairFromSelectors(
final BaseLongColumnValueSelector timeSelector,
final BaseObjectColumnValueSelector valueSelector
final BaseObjectColumnValueSelector<?> valueSelector
)
{
final long time;
Expand Down
Loading