Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public class DataSketchesHllBenchmark
"hll",
null,
null,
null,
false
);

Expand Down
1 change: 1 addition & 0 deletions codestyle/spotbugs-exclude.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
<And>
<Bug pattern="EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS"/>
<Or>
<Class name="org.apache.druid.jackson.DefaultTrueJsonIncludeFilter"/>
<Class name="org.apache.druid.query.scan.ScanQuery$ScanRowsLimitJsonIncludeFilter"/>
<Class name="org.apache.druid.query.scan.ScanQuery$ScanTimeOrderJsonIncludeFilter"/>
<Class name="org.apache.druid.query.scan.ScanQuery$BatchSizeJsonIncludeFilter"/>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.jackson;

import com.fasterxml.jackson.annotation.JsonInclude;

/**
* {@link JsonInclude} filter for boolean values that default to true.
*
* This API works by "creative" use of equals. It requires warnings to be suppressed and also requires spotbugs
* exclusions (see spotbugs-exclude.xml).
*/
@SuppressWarnings({"EqualsAndHashcode", "EqualsHashCode"})
public class DefaultTrueJsonIncludeFilter // lgtm [java/inconsistent-equals-and-hashcode]
{
@Override
public boolean equals(Object obj)
{
return obj == null || (obj instanceof Boolean && (boolean) obj);
}
}
15 changes: 8 additions & 7 deletions docs/querying/sql-query-context.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@ Configure Druid SQL query planning using the parameters in the table below.
|Parameter|Description|Default value|
|---------|-----------|-------------|
|`sqlQueryId`|Unique identifier given to this SQL query. For HTTP client, it will be returned in `X-Druid-SQL-Query-Id` header.<br/><br/>To specify a unique identifier for SQL query, use `sqlQueryId` instead of [`queryId`](query-context.md). Setting `queryId` for a SQL request has no effect. All native queries underlying SQL use an auto-generated `queryId`.|auto-generated|
|`sqlTimeZone`|Sets the time zone for this connection, which will affect how time functions and timestamp literals behave. Should be a time zone name like "America/Los_Angeles" or offset like "-08:00".|druid.sql.planner.sqlTimeZone on the Broker (default: UTC)|
|`sqlStringifyArrays`|When set to true, result columns which return array values will be serialized into a JSON string in the response instead of as an array (default: true, except for JDBC connections, where it is always false)|
|`useApproximateCountDistinct`|Whether to use an approximate cardinality algorithm for `COUNT(DISTINCT foo)`.|druid.sql.planner.useApproximateCountDistinct on the Broker (default: true)|
|`useGroupingSetForExactDistinct`|Whether to use grouping sets to execute queries with multiple exact distinct aggregations.|druid.sql.planner.useGroupingSetForExactDistinct on the Broker (default: false)|
|`useApproximateTopN`|Whether to use approximate [TopN queries](topnquery.md) when a SQL query could be expressed as such. If false, exact [GroupBy queries](groupbyquery.md) will be used instead.|druid.sql.planner.useApproximateTopN on the Broker (default: true)|
|`enableTimeBoundaryPlanning`|If true, SQL queries will get converted to TimeBoundary queries wherever possible. TimeBoundary queries are very efficient for min-max calculation on __time column in a datasource |druid.query.default.context.enableTimeBoundaryPlanning on the Broker (default: false)|
|`useNativeQueryExplain`|If true, `EXPLAIN PLAN FOR` will return the explain plan as a JSON representation of equivalent native query(s), else it will return the original version of explain plan generated by Calcite.|`druid.sql.planner.useNativeQueryExplain` on the Broker (default: true)|
|`sqlTimeZone`|Sets the time zone for this connection, which will affect how time functions and timestamp literals behave. Should be a time zone name like "America/Los_Angeles" or offset like "-08:00".|`druid.sql.planner.sqlTimeZone` on the Broker (default: UTC)|
|`sqlStringifyArrays`|When set to true, result columns which return array values will be serialized into a JSON string in the response instead of as an array|true, except for JDBC connections, where it is always false|
|`useApproximateCountDistinct`|Whether to use an approximate cardinality algorithm for `COUNT(DISTINCT foo)`.|`druid.sql.planner.useApproximateCountDistinct` on the Broker (default: true)|
|`useGroupingSetForExactDistinct`|Whether to use grouping sets to execute queries with multiple exact distinct aggregations.|`druid.sql.planner.useGroupingSetForExactDistinct` on the Broker (default: false)|
|`useApproximateTopN`|Whether to use approximate [TopN queries](topnquery.md) when a SQL query could be expressed as such. If false, exact [GroupBy queries](groupbyquery.md) will be used instead.|`druid.sql.planner.useApproximateTopN` on the Broker (default: true)|
|`enableTimeBoundaryPlanning`|If true, SQL queries will get converted to TimeBoundary queries wherever possible. TimeBoundary queries are very efficient for min-max calculation on __time column in a datasource |`druid.query.default.context.enableTimeBoundaryPlanning` on the Broker (default: false)|
|`useNativeQueryExplain`|If true, `EXPLAIN PLAN FOR` will return the explain plan as a JSON representation of equivalent native query(s), else it will return the original version of explain plan generated by Calcite.<br /><br />This property is provided for backwards compatibility. It is not recommended to use this parameter unless you were depending on the older behavior.|`druid.sql.planner.useNativeQueryExplain` on the Broker (default: true)|
|`sqlFinalizeOuterSketches`|If false (default behavior in Druid 25.0.0 and later), `DS_HLL`, `DS_THETA`, and `DS_QUANTILES_SKETCH` return sketches in query results, as documented. If true (default behavior in Druid 24.0.1 and earlier), sketches from these functions are finalized when they appear in query results.<br /><br />This property is provided for backwards compatibility with behavior in Druid 24.0.1 and earlier. It is not recommended to use this parameter unless you were depending on the older behavior. Instead, use a function that does not return a sketch, such as `APPROX_COUNT_DISTINCT_DS_HLL`, `APPROX_COUNT_DISTINCT_DS_THETA`, `APPROX_QUANTILE_DS`, `DS_THETA_ESTIMATE`, or `DS_GET_QUANTILE`.|`druid.query.default.context.sqlFinalizeOuterSketches` on the Broker (default: false)|

## Setting the query context
The query context parameters can be specified as a "context" object in the [JSON API](sql-api.md) or as a [JDBC connection properties object](sql-jdbc.md).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.aggregation.datasketches;

import org.apache.druid.query.QueryContexts;
import org.apache.druid.sql.calcite.planner.PlannerContext;

public class SketchQueryContext
{
public static final String CTX_FINALIZE_OUTER_SKETCHES = "sqlFinalizeOuterSketches";
public static final boolean DEFAULT_FINALIZE_OUTER_SKETCHES = false;

public static boolean isFinalizeOuterSketches(final PlannerContext plannerContext)
{
return QueryContexts.getAsBoolean(
CTX_FINALIZE_OUTER_SKETCHES,
plannerContext.queryContextMap().get(CTX_FINALIZE_OUTER_SKETCHES),
DEFAULT_FINALIZE_OUTER_SKETCHES
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@

package org.apache.druid.query.aggregation.datasketches.hll;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.datasketches.hll.HllSketch;
import org.apache.datasketches.hll.TgtHllType;
import org.apache.datasketches.hll.Union;
import org.apache.druid.jackson.DefaultTrueJsonIncludeFilter;
import org.apache.druid.query.aggregation.AggregateCombiner;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.ObjectAggregateCombiner;
Expand All @@ -42,6 +44,7 @@
public abstract class HllSketchAggregatorFactory extends AggregatorFactory
{
public static final boolean DEFAULT_ROUND = false;
public static final boolean DEFAULT_SHOULD_FINALIZE = true;
public static final int DEFAULT_LG_K = 12;
public static final TgtHllType DEFAULT_TGT_HLL_TYPE = TgtHllType.HLL_4;

Expand All @@ -52,20 +55,23 @@ public abstract class HllSketchAggregatorFactory extends AggregatorFactory
private final String fieldName;
private final int lgK;
private final TgtHllType tgtHllType;
private final boolean shouldFinalize;
private final boolean round;

HllSketchAggregatorFactory(
final String name,
final String fieldName,
@Nullable final Integer lgK,
@Nullable final String tgtHllType,
final Boolean shouldFinalize,
final boolean round
)
{
this.name = Objects.requireNonNull(name);
this.fieldName = Objects.requireNonNull(fieldName);
this.lgK = lgK == null ? DEFAULT_LG_K : lgK;
this.tgtHllType = tgtHllType == null ? DEFAULT_TGT_HLL_TYPE : TgtHllType.valueOf(tgtHllType);
this.shouldFinalize = shouldFinalize == null ? DEFAULT_SHOULD_FINALIZE : shouldFinalize;
this.round = round;
}

Expand Down Expand Up @@ -95,6 +101,14 @@ public String getTgtHllType()
}

@JsonProperty
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = DefaultTrueJsonIncludeFilter.class)
public boolean isShouldFinalize()
{
return shouldFinalize;
}

@JsonProperty
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public boolean isRound()
{
return round;
Expand All @@ -114,7 +128,7 @@ public List<String> requiredFields()
public List<AggregatorFactory> getRequiredColumns()
{
return Collections.singletonList(
new HllSketchBuildAggregatorFactory(fieldName, fieldName, lgK, tgtHllType.toString(), round)
new HllSketchBuildAggregatorFactory(fieldName, fieldName, lgK, tgtHllType.toString(), shouldFinalize, round)
);
}

Expand Down Expand Up @@ -179,9 +193,10 @@ public ColumnType getResultType()
@Override
public Object finalizeComputation(@Nullable final Object object)
{
if (object == null) {
return null;
if (!shouldFinalize || object == null) {
return object;
}

final HllSketch sketch = (HllSketch) object;
final double estimate = sketch.getEstimate();

Expand All @@ -201,7 +216,14 @@ public Comparator<HllSketch> getComparator()
@Override
public AggregatorFactory getCombiningFactory()
{
return new HllSketchMergeAggregatorFactory(getName(), getName(), getLgK(), getTgtHllType(), isRound());
return new HllSketchMergeAggregatorFactory(
getName(),
getName(),
getLgK(),
getTgtHllType(),
isShouldFinalize(),
isRound()
);
}

@Override
Expand All @@ -212,51 +234,41 @@ public byte[] getCacheKey()
}

@Override
public boolean equals(final Object object)
public boolean equals(Object o)
{
if (this == object) {
if (this == o) {
return true;
}
if (object == null || !getClass().equals(object.getClass())) {
return false;
}
final HllSketchAggregatorFactory that = (HllSketchAggregatorFactory) object;
if (!name.equals(that.getName())) {
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!fieldName.equals(that.getFieldName())) {
return false;
}
if (lgK != that.getLgK()) {
return false;
}
if (!tgtHllType.equals(that.tgtHllType)) {
return false;
}
if (round != that.round) {
return false;
}
return true;
HllSketchAggregatorFactory that = (HllSketchAggregatorFactory) o;
return lgK == that.lgK
&& shouldFinalize == that.shouldFinalize
&& round == that.round
&& Objects.equals(name, that.name)
&& Objects.equals(fieldName, that.fieldName)
&& tgtHllType == that.tgtHllType;
}

@Override
public int hashCode()
{
return Objects.hash(name, fieldName, lgK, tgtHllType);
return Objects.hash(name, fieldName, lgK, tgtHllType, shouldFinalize, round);
}

@Override
public String toString()
{
return getClass().getSimpleName() + " {"
+ " name=" + name
+ ", fieldName=" + fieldName
+ ", lgK=" + lgK
+ ", tgtHllType=" + tgtHllType
+ ", round=" + round
+ " }";
return getClass().getSimpleName() + "{" +
"name='" + name + '\'' +
", fieldName='" + fieldName + '\'' +
", lgK=" + lgK +
", tgtHllType=" + tgtHllType +
(shouldFinalize != DEFAULT_SHOULD_FINALIZE ? ", shouldFinalize=" + shouldFinalize : "") +
(round != DEFAULT_ROUND ? ", round=" + round : "") +
'}';
}

protected abstract byte getCacheTypeId();

}
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ public HllSketchBuildAggregatorFactory(
@JsonProperty("fieldName") final String fieldName,
@JsonProperty("lgK") @Nullable final Integer lgK,
@JsonProperty("tgtHllType") @Nullable final String tgtHllType,
@JsonProperty("shouldFinalize") final Boolean shouldFinalize,
@JsonProperty("round") final boolean round
)
{
super(name, fieldName, lgK, tgtHllType, round);
super(name, fieldName, lgK, tgtHllType, shouldFinalize, round);
}


Expand Down Expand Up @@ -125,7 +126,14 @@ public int getMaxIntermediateSize()
@Override
public AggregatorFactory withName(String newName)
{
return new HllSketchBuildAggregatorFactory(newName, getFieldName(), getLgK(), getTgtHllType(), isRound());
return new HllSketchBuildAggregatorFactory(
newName,
getFieldName(),
getLgK(),
getTgtHllType(),
isShouldFinalize(),
isRound()
);
}

private void validateInputs(@Nullable ColumnCapabilities capabilities)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ public HllSketchMergeAggregatorFactory(
@JsonProperty("fieldName") final String fieldName,
@JsonProperty("lgK") @Nullable final Integer lgK,
@JsonProperty("tgtHllType") @Nullable final String tgtHllType,
@JsonProperty("shouldFinalize") final Boolean shouldFinalize,
@JsonProperty("round") final boolean round
)
{
super(name, fieldName, lgK, tgtHllType, round);
super(name, fieldName, lgK, tgtHllType, shouldFinalize, round);
}

@Override
Expand All @@ -64,16 +65,19 @@ public AggregatorFactory getMergingFactory(AggregatorFactory other) throws Aggre
if (other.getName().equals(this.getName()) && other instanceof HllSketchMergeAggregatorFactory) {
HllSketchMergeAggregatorFactory castedOther = (HllSketchMergeAggregatorFactory) other;

return new HllSketchMergeAggregatorFactory(
getName(),
getName(),
Math.max(getLgK(), castedOther.getLgK()),
getTgtHllType().compareTo(castedOther.getTgtHllType()) < 0 ? castedOther.getTgtHllType() : getTgtHllType(),
isRound() || castedOther.isRound()
);
} else {
throw new AggregatorFactoryNotMergeableException(this, other);
if (castedOther.isShouldFinalize() == isShouldFinalize()) {
return new HllSketchMergeAggregatorFactory(
getName(),
getName(),
Math.max(getLgK(), castedOther.getLgK()),
getTgtHllType().compareTo(castedOther.getTgtHllType()) < 0 ? castedOther.getTgtHllType() : getTgtHllType(),
isShouldFinalize(),
isRound() || castedOther.isRound()
);
}
}

throw new AggregatorFactoryNotMergeableException(this, other);
}

@Override
Expand Down Expand Up @@ -134,7 +138,14 @@ public int getMaxIntermediateSize()
@Override
public AggregatorFactory withName(String newName)
{
return new HllSketchMergeAggregatorFactory(newName, getFieldName(), getLgK(), getTgtHllType(), isRound());
return new HllSketchMergeAggregatorFactory(
newName,
getFieldName(),
getLgK(),
getTgtHllType(),
isShouldFinalize(),
isRound()
);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlA

private static final SqlAggFunction FUNCTION_INSTANCE = new HllSketchApproxCountDistinctSqlAggFunction();

public HllSketchApproxCountDistinctSqlAggregator()
{
super(true);
}

@Override
public SqlAggFunction calciteFunction()
{
Expand Down
Loading