Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,10 @@ public <T> Expression predicate(BoundPredicate<T> pred) {
// similarly, if partitioning by day(ts) and hour(ts), the more restrictive
// projection should be used. ts = 2019-01-01T01:00:00 produces day=2019-01-01 and
// hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 2019-01-01-02.
result = Expressions.and(
result,
((Transform<T, ?>) part.transform()).project(part.name(), pred));
UnboundPredicate<?> inclusiveProjection = ((Transform<T, ?>) part.transform()).project(part.name(), pred);
if (inclusiveProjection != null) {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added null check

result = Expressions.and(result, inclusiveProjection);
}
}

return result;
Expand Down Expand Up @@ -251,9 +252,10 @@ public <T> Expression predicate(BoundPredicate<T> pred) {
// any timestamp where either projection predicate is true must match the original
// predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but not
// the day, but does match the original predicate.
result = Expressions.or(
result,
((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred));
UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred);
if (strictProjection != null) {
result = Expressions.or(result, strictProjection);
}
}

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,9 @@

package org.apache.iceberg.expressions;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import java.io.Serializable;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.StructLike;
Expand Down Expand Up @@ -200,42 +197,66 @@ public <T> Expression notEq(BoundReference<T> ref, Literal<T> lit) {
@Override
@SuppressWarnings("unchecked")
public <T> Expression predicate(BoundPredicate<T> pred) {
// Get the strict projection of this predicate in partition data, then use it to determine
// whether to return the original predicate. The strict projection returns true iff the
// original predicate would have returned true, so the predicate can be eliminated if the
// strict projection evaluates to true.
/**
* Get the strict projection and inclusive projection of this predicate in partition data,
* then use them to determine whether to return the original predicate. The strict projection
* returns true iff the original predicate would have returned true, so the predicate can be
* eliminated if the strict projection evaluates to true. Similarly the inclusive projection
* returns false iff the original predicate would have returned false, so the predicate can
* also be eliminated if the inclusive projection evaluates to false.
*/

//
// If there is no strict projection or if it evaluates to false, then return the predicate.
List<PartitionField> parts = spec.getFieldsBySourceId(pred.ref().fieldId());
if (parts == null) {
return pred; // not associated inclusive a partition field, can't be evaluated
}

List<UnboundPredicate<?>> strictProjections = Lists.transform(parts,
part -> ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred));
for (PartitionField part : parts) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like how you restructured this to simply return alwaysTrue or alwaysFalse if any projection can determine the result. That's a lot simpler than before.


if (Iterables.all(strictProjections, Objects::isNull)) {
// if there are no strict projections, the predicate must be in the residual
return pred;
}
// checking the strict projection
UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred);
Expression strictResult = null;

if (strictProjection != null) {
Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive);
if (bound instanceof BoundPredicate) {
strictResult = super.predicate((BoundPredicate<?>) bound);
} else {
// if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse
strictResult = bound;
Comment thread
moulimukherjee marked this conversation as resolved.
}
}

Expression result = Expressions.alwaysFalse();
for (UnboundPredicate<?> strictProjection : strictProjections) {
if (strictProjection == null) {
continue;
if (strictResult != null && strictResult.op() == Expression.Operation.TRUE) {
// If strict is true, returning true
return Expressions.alwaysTrue();
}

Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive);
if (bound instanceof BoundPredicate) {
// evaluate the bound predicate, which will return alwaysTrue or alwaysFalse
result = Expressions.or(result, super.predicate((BoundPredicate<?>) bound));
} else {
// update the result expression with the non-predicate residual (e.g. alwaysTrue)
result = Expressions.or(result, bound);
// checking the inclusive projection
UnboundPredicate<?> inclusiveProjection = ((Transform<T, ?>) part.transform()).project(part.name(), pred);
Expression inclusiveResult = null;
if (inclusiveProjection != null) {
Expression boundInclusive = inclusiveProjection.bind(spec.partitionType(), caseSensitive);
if (boundInclusive instanceof BoundPredicate) {
// using predicate method specific to inclusive
inclusiveResult = super.predicate((BoundPredicate<?>) boundInclusive);
} else {
// if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse
inclusiveResult = boundInclusive;
}
}

if (inclusiveResult != null && inclusiveResult.op() == Expression.Operation.FALSE) {
// If inclusive is false, returning false
return Expressions.alwaysFalse();
}

}

return result;
// neither strict not inclusive predicate was conclusive, returning the original pred
return pred;
}

@Override
Expand Down
7 changes: 5 additions & 2 deletions api/src/main/java/org/apache/iceberg/transforms/Dates.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,11 @@ public UnboundPredicate<Integer> project(String fieldName, BoundPredicate<Intege
}

@Override
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Integer> predicate) {
return null;
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Integer> pred) {
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
return Expressions.predicate(pred.op(), fieldName);
}
return ProjectionUtil.truncateIntegerStrict(fieldName, pred, this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,90 @@ static <T> UnboundPredicate<T> truncateInteger(
}
}

static UnboundPredicate<Integer> truncateIntegerStrict(
String name, BoundPredicate<Integer> pred, Transform<Integer, Integer> transform) {
int boundary = pred.literal().value();
switch (pred.op()) {
case LT:
// predicate would be <= the previous partition
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
case LT_EQ:
// Checking if the literal is at the upper partition boundary
if (transform.apply(boundary + 1).equals(transform.apply(boundary))) {
// Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000
// the predicate can be < 2019-07-01
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
} else {
// Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999
// the predicate can be <= 2019-07-02
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
}
case GT:
// predicate would be >= the next partition
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
case GT_EQ:
// Checking if the literal is at the lower partition boundary
if (transform.apply(boundary - 1).equals(transform.apply(boundary))) {
// Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000
// the predicate can be >= 2019-07-03
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
} else {
// Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000
// the predicate can be >= 2019-07-02
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
}
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
// there is no predicate that guarantees equality because adjacent ints transform to the same value
return null;
default:
Comment thread
moulimukherjee marked this conversation as resolved.
return null;
}
}

static UnboundPredicate<Integer> truncateLongStrict(
String name, BoundPredicate<Long> pred, Transform<Long, Integer> transform) {
long boundary = pred.literal().value();
switch (pred.op()) {
case LT:
// predicate would be <= the previous partition
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
case LT_EQ:
// Checking if the literal is at the upper partition boundary
if (transform.apply(boundary + 1L).equals(transform.apply(boundary))) {
Comment thread
moulimukherjee marked this conversation as resolved.
// Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000
// the predicate can be <= 2019-07-01
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
} else {
// Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999
// the predicate can be <= 2019-07-02
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
}
case GT:
// predicate would be >= the next partition
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
case GT_EQ:
// Checking if the literal is at the lower partition boundary
if (transform.apply(boundary - 1L).equals(transform.apply(boundary))) {
// Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000
// the predicate can be >= 2019-07-03
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
} else {
// Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000
// the predicate can be >= 2019-07-02
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
}
case NOT_EQ:
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
case EQ:
// there is no predicate that guarantees equality because adjacent longs transform to the same value
return null;
default:
return null;
}
}

static <T> UnboundPredicate<T> truncateLong(
String name, BoundPredicate<Long> pred, Transform<Long, T> transform) {
long boundary = pred.literal().value();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,11 @@ public UnboundPredicate<Integer> project(String fieldName, BoundPredicate<Long>
}

@Override
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Long> predicate) {
return null;
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Long> pred) {
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
return Expressions.predicate(pred.op(), fieldName);
}
return ProjectionUtil.truncateLongStrict(fieldName, pred, this);
}

@Override
Expand Down
Loading