From 912e00e6e1665b0a9d2e7a9f8b1cee337b93f12d Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 31 Jul 2019 15:14:24 -0700 Subject: [PATCH] Use UnicodeUtil.truncateString for Truncate transform. This truncates by unicode codepoint instead of Java chars. --- api/src/main/java/org/apache/iceberg/transforms/Truncate.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java index d65d6462c918..2eacaa235e86 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java @@ -27,6 +27,7 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Type; +import org.apache.iceberg.util.UnicodeUtil; import static org.apache.iceberg.expressions.Expression.Operation.IS_NULL; import static org.apache.iceberg.expressions.Expression.Operation.LT; @@ -233,7 +234,7 @@ public Integer width() { @Override public CharSequence apply(CharSequence value) { - return value.subSequence(0, Math.min(value.length(), length)); + return UnicodeUtil.truncateString(value, length); } @Override