diff --git a/api/src/main/java/org/apache/iceberg/expressions/Projections.java b/api/src/main/java/org/apache/iceberg/expressions/Projections.java index 50d0693d4590..f800b350858e 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Projections.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Projections.java @@ -221,9 +221,10 @@ public Expression predicate(BoundPredicate pred) { // similarly, if partitioning by day(ts) and hour(ts), the more restrictive // projection should be used. ts = 2019-01-01T01:00:00 produces day=2019-01-01 and // hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 2019-01-01-02. - result = Expressions.and( - result, - ((Transform) part.transform()).project(part.name(), pred)); + UnboundPredicate inclusiveProjection = ((Transform) part.transform()).project(part.name(), pred); + if (inclusiveProjection != null) { + result = Expressions.and(result, inclusiveProjection); + } } return result; @@ -251,9 +252,10 @@ public Expression predicate(BoundPredicate pred) { // any timestamp where either projection predicate is true must match the original // predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but not // the day, but does match the original predicate. - result = Expressions.or( - result, - ((Transform) part.transform()).projectStrict(part.name(), pred)); + UnboundPredicate strictProjection = ((Transform) part.transform()).projectStrict(part.name(), pred); + if (strictProjection != null) { + result = Expressions.or(result, strictProjection); + } } return result; diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java index 0fae40d3decb..dd3a0b8da8a0 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java @@ -19,12 +19,9 @@ package org.apache.iceberg.expressions; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import java.io.Serializable; import java.util.Comparator; import java.util.List; -import java.util.Objects; import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.StructLike; @@ -200,10 +197,15 @@ public Expression notEq(BoundReference ref, Literal lit) { @Override @SuppressWarnings("unchecked") public Expression predicate(BoundPredicate pred) { - // Get the strict projection of this predicate in partition data, then use it to determine - // whether to return the original predicate. The strict projection returns true iff the - // original predicate would have returned true, so the predicate can be eliminated if the - // strict projection evaluates to true. + /** + * Get the strict projection and inclusive projection of this predicate in partition data, + * then use them to determine whether to return the original predicate. The strict projection + * returns true iff the original predicate would have returned true, so the predicate can be + * eliminated if the strict projection evaluates to true. Similarly the inclusive projection + * returns false iff the original predicate would have returned false, so the predicate can + * also be eliminated if the inclusive projection evaluates to false. + */ + // // If there is no strict projection or if it evaluates to false, then return the predicate. List parts = spec.getFieldsBySourceId(pred.ref().fieldId()); @@ -211,31 +213,50 @@ public Expression predicate(BoundPredicate pred) { return pred; // not associated inclusive a partition field, can't be evaluated } - List> strictProjections = Lists.transform(parts, - part -> ((Transform) part.transform()).projectStrict(part.name(), pred)); + for (PartitionField part : parts) { - if (Iterables.all(strictProjections, Objects::isNull)) { - // if there are no strict projections, the predicate must be in the residual - return pred; - } + // checking the strict projection + UnboundPredicate strictProjection = ((Transform) part.transform()).projectStrict(part.name(), pred); + Expression strictResult = null; + + if (strictProjection != null) { + Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive); + if (bound instanceof BoundPredicate) { + strictResult = super.predicate((BoundPredicate) bound); + } else { + // if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse + strictResult = bound; + } + } - Expression result = Expressions.alwaysFalse(); - for (UnboundPredicate strictProjection : strictProjections) { - if (strictProjection == null) { - continue; + if (strictResult != null && strictResult.op() == Expression.Operation.TRUE) { + // If strict is true, returning true + return Expressions.alwaysTrue(); } - Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive); - if (bound instanceof BoundPredicate) { - // evaluate the bound predicate, which will return alwaysTrue or alwaysFalse - result = Expressions.or(result, super.predicate((BoundPredicate) bound)); - } else { - // update the result expression with the non-predicate residual (e.g. alwaysTrue) - result = Expressions.or(result, bound); + // checking the inclusive projection + UnboundPredicate inclusiveProjection = ((Transform) part.transform()).project(part.name(), pred); + Expression inclusiveResult = null; + if (inclusiveProjection != null) { + Expression boundInclusive = inclusiveProjection.bind(spec.partitionType(), caseSensitive); + if (boundInclusive instanceof BoundPredicate) { + // using predicate method specific to inclusive + inclusiveResult = super.predicate((BoundPredicate) boundInclusive); + } else { + // if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse + inclusiveResult = boundInclusive; + } } + + if (inclusiveResult != null && inclusiveResult.op() == Expression.Operation.FALSE) { + // If inclusive is false, returning false + return Expressions.alwaysFalse(); + } + } - return result; + // neither strict not inclusive predicate was conclusive, returning the original pred + return pred; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Dates.java b/api/src/main/java/org/apache/iceberg/transforms/Dates.java index a57d6d95eaa4..94714d2e7069 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Dates.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Dates.java @@ -73,8 +73,11 @@ public UnboundPredicate project(String fieldName, BoundPredicate projectStrict(String fieldName, BoundPredicate predicate) { - return null; + public UnboundPredicate projectStrict(String fieldName, BoundPredicate pred) { + if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { + return Expressions.predicate(pred.op(), fieldName); + } + return ProjectionUtil.truncateIntegerStrict(fieldName, pred, this); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java index 04da036e1637..ef1e0c3591ed 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java @@ -52,6 +52,90 @@ static UnboundPredicate truncateInteger( } } + static UnboundPredicate truncateIntegerStrict( + String name, BoundPredicate pred, Transform transform) { + int boundary = pred.literal().value(); + switch (pred.op()) { + case LT: + // predicate would be <= the previous partition + return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1); + case LT_EQ: + // Checking if the literal is at the upper partition boundary + if (transform.apply(boundary + 1).equals(transform.apply(boundary))) { + // Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000 + // the predicate can be < 2019-07-01 + return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1); + } else { + // Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999 + // the predicate can be <= 2019-07-02 + return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary)); + } + case GT: + // predicate would be >= the next partition + return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1); + case GT_EQ: + // Checking if the literal is at the lower partition boundary + if (transform.apply(boundary - 1).equals(transform.apply(boundary))) { + // Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000 + // the predicate can be >= 2019-07-03 + return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1); + } else { + // Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000 + // the predicate can be >= 2019-07-02 + return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary)); + } + case NOT_EQ: + return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary)); + case EQ: + // there is no predicate that guarantees equality because adjacent ints transform to the same value + return null; + default: + return null; + } + } + + static UnboundPredicate truncateLongStrict( + String name, BoundPredicate pred, Transform transform) { + long boundary = pred.literal().value(); + switch (pred.op()) { + case LT: + // predicate would be <= the previous partition + return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1); + case LT_EQ: + // Checking if the literal is at the upper partition boundary + if (transform.apply(boundary + 1L).equals(transform.apply(boundary))) { + // Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000 + // the predicate can be <= 2019-07-01 + return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1); + } else { + // Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999 + // the predicate can be <= 2019-07-02 + return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary)); + } + case GT: + // predicate would be >= the next partition + return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1); + case GT_EQ: + // Checking if the literal is at the lower partition boundary + if (transform.apply(boundary - 1L).equals(transform.apply(boundary))) { + // Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000 + // the predicate can be >= 2019-07-03 + return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1); + } else { + // Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000 + // the predicate can be >= 2019-07-02 + return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary)); + } + case NOT_EQ: + return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary)); + case EQ: + // there is no predicate that guarantees equality because adjacent longs transform to the same value + return null; + default: + return null; + } + } + static UnboundPredicate truncateLong( String name, BoundPredicate pred, Transform transform) { long boundary = pred.literal().value(); diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index f01ea050229c..7259defd85f3 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -76,8 +76,11 @@ public UnboundPredicate project(String fieldName, BoundPredicate } @Override - public UnboundPredicate projectStrict(String fieldName, BoundPredicate predicate) { - return null; + public UnboundPredicate projectStrict(String fieldName, BoundPredicate pred) { + if (pred.op() == NOT_NULL || pred.op() == IS_NULL) { + return Expressions.predicate(pred.op(), fieldName); + } + return ProjectionUtil.truncateLongStrict(fieldName, pred, this); } @Override diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java new file mode 100644 index 000000000000..7b4ee7844b5b --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDatesProjection.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.transforms; + +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.expressions.Projections; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound; +import static org.apache.iceberg.expressions.Expressions.equal; +import static org.apache.iceberg.expressions.Expressions.greaterThan; +import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual; +import static org.apache.iceberg.expressions.Expressions.lessThan; +import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual; +import static org.apache.iceberg.expressions.Expressions.notEqual; +import static org.apache.iceberg.types.Types.NestedField.optional; + +public class TestDatesProjection { + private static final Types.DateType TYPE = Types.DateType.get(); + private static final Schema SCHEMA = new Schema(optional(1, "date", TYPE)); + + public void assertProjectionStrict(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp, String expectedLiteral) { + + Expression projection = Projections.strict(spec).project(filter); + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + + Assert.assertEquals(expectedOp, predicate.op()); + + Literal literal = predicate.literal(); + Dates transform = (Dates) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((int) literal.value()); + Assert.assertEquals(expectedLiteral, output); + } + + public void assertProjectionStrictValue(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp) { + + Expression projection = Projections.strict(spec).project(filter); + Assert.assertEquals(projection.op(), expectedOp); + } + + public void assertProjectionInclusiveValue(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp) { + + Expression projection = Projections.inclusive(spec).project(filter); + Assert.assertEquals(projection.op(), expectedOp); + } + + public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp, String expectedLiteral) { + Expression projection = Projections.inclusive(spec).project(filter); + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + + Assert.assertEquals(predicate.op(), expectedOp); + + Literal literal = predicate.literal(); + Dates transform = (Dates) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((int) literal.value()); + Assert.assertEquals(expectedLiteral, output); + } + + @Test + public void testMonthStrictLowerBound() { + Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("date").build(); + + assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016-12"); + assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2016-12"); + assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017-02"); + assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-01"); + assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017-01"); + assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE); + } + + @Test + public void testMonthStrictUpperBound() { + Integer date = (Integer) Literal.of("2017-12-31").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("date").build(); + + assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2017-11"); + assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018-01"); + assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2018-01"); + assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017-12"); + assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE); + } + + @Test + public void testMonthInclusiveLowerBound() { + Integer date = (Integer) Literal.of("2017-12-01").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("date").build(); + + assertProjectionInclusive(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2017-11"); + assertProjectionInclusive(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, equal("date", date), Expression.Operation.EQ, "2017-12"); + assertProjectionInclusiveValue(spec, notEqual("date", date), Expression.Operation.TRUE); + } + + @Test + public void testMonthInclusiveUpperBound() { + Integer date = (Integer) Literal.of("2017-12-31").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("date").build(); + + assertProjectionInclusive(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionInclusive(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018-01"); + assertProjectionInclusive(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, equal("date", date), Expression.Operation.EQ, "2017-12"); + assertProjectionInclusiveValue(spec, notEqual("date", date), Expression.Operation.TRUE); + } + + @Test + public void testDayStrict() { + Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("date").build(); + + assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016-12-31"); + // should be the same date for <= + assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-01-01"); + assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017-01-02"); + // should be the same date for >= + assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-01-01"); + assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017-01-01"); + assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE); + } + + @Test + public void testDayInclusive() { + Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("date").build(); + + assertProjectionInclusive(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016-12-31"); + assertProjectionInclusive(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017-01-01"); + assertProjectionInclusive(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017-01-02"); + assertProjectionInclusive(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017-01-01"); + assertProjectionInclusive(spec, equal("date", date), Expression.Operation.EQ, "2017-01-01"); + assertProjectionInclusiveValue(spec, notEqual("date", date), Expression.Operation.TRUE); + } + + @Test + public void testYearStrictLowerBound() { + Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("date").build(); + + assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017"); + assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE); + } + + @Test + public void testYearStrictUpperBound() { + Integer date = (Integer) Literal.of("2017-12-31").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("date").build(); + + assertProjectionStrict(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionStrict(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionStrict(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionStrict(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionStrict(spec, notEqual("date", date), Expression.Operation.NOT_EQ, "2017"); + assertProjectionStrictValue(spec, equal("date", date), Expression.Operation.FALSE); + } + + @Test + public void testYearInclusiveLowerBound() { + Integer date = (Integer) Literal.of("2017-01-01").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("date").build(); + + assertProjectionInclusive(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionInclusive(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionInclusive(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionInclusive(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionInclusive(spec, equal("date", date), Expression.Operation.EQ, "2017"); + assertProjectionInclusiveValue(spec, notEqual("date", date), Expression.Operation.TRUE); + } + + @Test + public void testYearInclusiveUpperBound() { + Integer date = (Integer) Literal.of("2017-12-31").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("date").build(); + + assertProjectionInclusive(spec, lessThan("date", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionInclusive(spec, lessThanOrEqual("date", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionInclusive(spec, greaterThan("date", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionInclusive(spec, greaterThanOrEqual("date", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionInclusive(spec, equal("date", date), Expression.Operation.EQ, "2017"); + assertProjectionInclusiveValue(spec, notEqual("date", date), Expression.Operation.TRUE); + } +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java new file mode 100644 index 000000000000..5d3c5b09085f --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.transforms; + +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.expressions.Projections; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound; +import static org.apache.iceberg.expressions.Expressions.equal; +import static org.apache.iceberg.expressions.Expressions.greaterThan; +import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual; +import static org.apache.iceberg.expressions.Expressions.lessThan; +import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual; +import static org.apache.iceberg.expressions.Expressions.notEqual; +import static org.apache.iceberg.types.Types.NestedField.optional; + +public class TestTimestampsProjection { + private static final Types.TimestampType TYPE = Types.TimestampType.withoutZone(); + private static final Schema SCHEMA = new Schema(optional(1, "timestamp", TYPE)); + + public void assertProjectionStrict(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp, String expectedLiteral) { + + Expression projection = Projections.strict(spec).project(filter); + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + + Assert.assertEquals(expectedOp, predicate.op()); + + Literal literal = predicate.literal(); + Timestamps transform = (Timestamps) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((int) literal.value()); + Assert.assertEquals(expectedLiteral, output); + } + + public void assertProjectionStrictValue(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp) { + + Expression projection = Projections.strict(spec).project(filter); + Assert.assertEquals(projection.op(), expectedOp); + } + + public void assertProjectionInclusiveValue(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp) { + + Expression projection = Projections.inclusive(spec).project(filter); + Assert.assertEquals(projection.op(), expectedOp); + } + + public void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate filter, + Expression.Operation expectedOp, String expectedLiteral) { + Expression projection = Projections.inclusive(spec).project(filter); + UnboundPredicate predicate = assertAndUnwrapUnbound(projection); + + Assert.assertEquals(predicate.op(), expectedOp); + + Literal literal = predicate.literal(); + Timestamps transform = (Timestamps) spec.getFieldsBySourceId(1).get(0).transform(); + String output = transform.toHumanString((int) literal.value()); + Assert.assertEquals(expectedLiteral, output); + } + + @Test + public void testMonthStrictLowerBound() { + Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-11"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018-01"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testMonthStrictUpperBound() { + Long date = (long) Literal.of("2017-12-31T23:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018-01"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2018-01"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testMonthInclusiveLowerBound() { + Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testMonthInclusiveUpperBound() { + Long date = (long) Literal.of("2017-12-01T23:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).month("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testDayStrictLowerBound() { + Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testDayStrictUpperBound() { + Long date = (long) Literal.of("2017-12-01T23:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testDayInclusiveLowerBound() { + Long date = (long) Literal.of("2017-12-01T00:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-11-30"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12-01"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testDayInclusiveUpperBound() { + Long date = (long) Literal.of("2017-12-01T23:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).day("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-02"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12-01"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testYearStrictLowerBound() { + Long date = (long) Literal.of("2017-01-01T00:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testYearStrictUpperBound() { + Long date = (long) Literal.of("2017-12-31T23:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testYearInclusiveLowerBound() { + Long date = (long) Literal.of("2017-01-01T00:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2016"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testYearInclusiveUpperBound() { + Long date = (long) Literal.of("2017-12-31T23:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).year("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2018"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testHourStrictLowerBound() { + Long date = (long) Literal.of("2017-12-01T10:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-10"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01-10"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testHourStrictUpperBound() { + Long date = (long) Literal.of("2017-12-01T10:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("timestamp").build(); + + assertProjectionStrict(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09"); + assertProjectionStrict(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-10"); + assertProjectionStrict(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11"); + assertProjectionStrict(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11"); + assertProjectionStrict(spec, notEqual("timestamp", date), Expression.Operation.NOT_EQ, "2017-12-01-10"); + assertProjectionStrictValue(spec, equal("timestamp", date), Expression.Operation.FALSE); + } + + @Test + public void testHourInclusiveLowerBound() { + Long date = (long) Literal.of("2017-12-01T10:00:00.00000").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-09"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-10"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-10"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-10"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12-01-10"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } + + @Test + public void testHourInclusiveUpperBound() { + Long date = (long) Literal.of("2017-12-01T10:59:59.999999").to(TYPE).value(); + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("timestamp").build(); + + assertProjectionInclusive(spec, lessThan("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-10"); + assertProjectionInclusive(spec, lessThanOrEqual("timestamp", date), Expression.Operation.LT_EQ, "2017-12-01-10"); + assertProjectionInclusive(spec, greaterThan("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-11"); + assertProjectionInclusive(spec, greaterThanOrEqual("timestamp", date), Expression.Operation.GT_EQ, "2017-12-01-10"); + assertProjectionInclusive(spec, equal("timestamp", date), Expression.Operation.EQ, "2017-12-01-10"); + assertProjectionInclusiveValue(spec, notEqual("timestamp", date), Expression.Operation.TRUE); + } +}