From ade1a1ffada528e23e322c5a5a93ab9f7f99f9ee Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 21 Jan 2022 16:46:46 -0600
Subject: [PATCH 01/12] Core: Adds Utility Class for Implementing ZOrdering

---
 build.gradle                                  |   1 +
 .../apache/iceberg/util/ZOrderByteUtils.java  | 128 +++++++++
 .../iceberg/util/TestZOrderByteUtil.java      | 244 ++++++++++++++++++
 versions.props                                |   1 +
 4 files changed, 374 insertions(+)
 create mode 100644 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
 create mode 100644 core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java

diff --git a/build.gradle b/build.gradle
index fa509212fb8b..65346358fe33 100644
--- a/build.gradle
+++ b/build.gradle
@@ -222,6 +222,7 @@ project(':iceberg-core') {
     }
 
     testImplementation "org.xerial:sqlite-jdbc"
+    testImplementation "org.apache.commons:commons-lang3"
     testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
   }
 }
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
new file mode 100644
index 000000000000..4ef3120a2217
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.util;
+
+import java.util.Arrays;
+
+/**
+ * Within Z-Ordering the byte representations of objects being compared must be ordered,
+ * this requires several types to be transformed when converted to bytes. The goal is to
+ * map object's whose byte representation are not lexicographically ordered into representations
+ * that are lexicographically ordered.
+ * Most of these techniques are derived from
+ * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
+ */
+public class ZOrderByteUtils {
+
+  private ZOrderByteUtils() {
+
+  }
+
+  /**
+   * Signed ints do not have their bytes in magnitude order because of the sign bit.
+   * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
+   * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
+   */
+  public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
+    if (intBytes == null) {
+      return new byte[size];
+    }
+    intBytes[0] = (byte) (intBytes[0] ^ (1 << 7));
+    return intBytes;
+  }
+
+  /**
+   * IEEE 754 :
+   * “If two floating-point numbers in the same format are ordered (say, x < y),
+   * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.”
+   *
+   * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
+   * comparable bytes
+   */
+  public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) {
+    if (floatBytes == null) {
+      return new byte[size];
+    }
+    if ((floatBytes[0] & (1 << 7)) == 0) {
+      // The signed magnitude is positive set the first bit (reversing the sign so positives order after negatives)
+      floatBytes[0] = (byte) (floatBytes[0] | (1 << 7));
+    } else {
+      // The signed magnitude is negative so flip the first bit (reversing the sign so positives order after negatives)
+      // Then flip all remaining bits so numbers with greater negative magnitude come before those
+      // with less magnitude (reverse the order)
+      for (int i = 0; i < floatBytes.length; i++) {
+        floatBytes[i] = (byte) ~floatBytes[i];
+      }
+    }
+    return floatBytes;
+  }
+
+  /**
+   * Strings are lexicographically sortable BUT if different byte array lengths will
+   * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time).
+   * This implementation just uses a set size to for all output byte representations. Truncating longer strings
+   * and right padding 0 for shorter strings.
+   */
+  public static byte[] orderUTF8LikeBytes(byte[] stringBytes, int size) {
+    if (stringBytes == null) {
+      return new byte[size];
+    }
+    return Arrays.copyOf(stringBytes, size);
+  }
+
+  /**
+   * Interleave bits using a naive loop.
+   * @param columnsBinary an array of byte arrays, none of which are empty
+   * @return their bits interleaved
+   */
+  public static byte[] interleaveBits(byte[][] columnsBinary) {
+    int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum();
+    byte[] interleavedBytes = new byte[interleavedSize];
+    int sourceBit = 7;
+    int sourceByte = 0;
+    int sourceColumn = 0;
+    int interleaveBit = 7;
+    int interleaveByte = 0;
+    while (interleaveByte < interleavedSize) {
+      // Take what we have, Get the source Bit of the source Byte, move it to the interleaveBit position
+      interleavedBytes[interleaveByte] =
+          (byte) (interleavedBytes[interleaveByte] |
+              (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit);
+
+      if (--interleaveBit == -1) {
+        // Finished a byte in our interleave byte array start a new byte
+        interleaveByte++;
+        interleaveBit = 7;
+      }
+
+      // Find next column with a byte we can use
+      do {
+        if (++sourceColumn == columnsBinary.length) {
+          sourceColumn = 0;
+          if (--sourceBit == -1) {
+            sourceByte++;
+            sourceBit = 7;
+          }
+        }
+      } while (columnsBinary[sourceColumn].length <= sourceByte && interleaveByte < interleavedSize);
+    }
+    return interleavedBytes;
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
new file mode 100644
index 000000000000..87d69dc99182
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.iceberg.util;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Random;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestZOrderByteUtil {
+  private static final byte IIIIIIII = (byte) 255;
+  private static final byte IOIOIOIO = (byte) 170;
+  private static final byte OIOIOIOI = (byte) 85;
+  private static final byte OOOOIIII = (byte) 15;
+  private static final byte OOOOOOOI = (byte) 1;
+  private static final byte OOOOOOOO = (byte) 0;
+
+  private static final int NUM_TESTS = 100000;
+
+  private final Random random = new Random(42);
+
+  private String bytesToString(byte[] bytes) {
+    StringBuilder result = new StringBuilder();
+    for (byte b : bytes) {
+      result.append(String.format("%8s", Integer.toBinaryString(b & 0xFF)).replace(' ', '0'));
+    }
+    return result.toString();
+  }
+
+  /**
+   * Returns a non-0 length byte array
+   */
+  private byte[]  generateRandomBytes() {
+    int length = Math.abs(random.nextInt(100) + 1);
+    byte[] result = new byte[length];
+    random.nextBytes(result);
+    return result;
+  }
+
+  /**
+   * Test method to ensure correctness of byte interleaving code
+   */
+  private String interleaveStrings(String[] strings) {
+    StringBuilder result = new StringBuilder();
+    int totalLength = Arrays.stream(strings).mapToInt(String::length).sum();
+    int substringIndex = 0;
+    int characterIndex = 0;
+    while (characterIndex < totalLength) {
+      for (String str : strings) {
+        if (substringIndex < str.length()) {
+          result.append(str.charAt(substringIndex));
+          characterIndex++;
+        }
+      }
+      substringIndex++;
+    }
+    return result.toString();
+  }
+
+  /**
+   * Compares the result of a string based interleaving algorithm implemented above
+   * versus the binary bit-shifting algorithm used in ZOrderByteUtils. Either both
+   * algorithms are identically wrong or are both identically correct.
+   */
+  @Test
+  public void testInterleaveRandomExamples() {
+    for (int test = 0; test < NUM_TESTS; test++) {
+      int numByteArrays = Math.abs(random.nextInt(6)) + 1;
+      byte[][] testBytes =  new byte[numByteArrays][];
+      String[] testStrings = new String[numByteArrays];
+      for (int byteIndex = 0;  byteIndex < numByteArrays; byteIndex++) {
+        testBytes[byteIndex] = generateRandomBytes();
+        testStrings[byteIndex] = bytesToString(testBytes[byteIndex]);
+      }
+      byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes);
+      String byteResultAsString = bytesToString(byteResult);
+
+      String stringResult = interleaveStrings(testStrings);
+
+      Assert.assertEquals("String interleave didn't match byte interleave", stringResult, byteResultAsString);
+    }
+  }
+
+  @Test
+  public void testInterleaveEmptyBits() {
+    byte[][] test = new byte[4][10];
+    byte[] expected = new byte[40];
+
+    Assert.assertArrayEquals("Should combine empty arrays",
+        expected, ZOrderByteUtils.interleaveBits(test));
+  }
+
+  @Test
+  public void testInterleaveFullBits() {
+    byte[][] test = new byte[4][];
+    test[0] = new byte[]{IIIIIIII, IIIIIIII};
+    test[1] = new byte[]{IIIIIIII};
+    test[2] = new byte[0];
+    test[3] = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII};
+    byte[] expected = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII};
+
+    Assert.assertArrayEquals("Should combine full arrays",
+        expected, ZOrderByteUtils.interleaveBits(test));
+  }
+
+  @Test
+  public void testInterleaveMixedBits() {
+    byte[][] test = new byte[4][];
+    test[0] = new byte[]{OOOOOOOI, IIIIIIII, OOOOOOOO, OOOOIIII};
+    test[1] = new byte[]{OOOOOOOI, OOOOOOOO, IIIIIIII};
+    test[2] = new byte[]{OOOOOOOI};
+    test[3] = new byte[]{OOOOOOOI};
+    byte[] expected = new byte[]{
+        OOOOOOOO, OOOOOOOO, OOOOOOOO, OOOOIIII,
+        IOIOIOIO, IOIOIOIO,
+        OIOIOIOI, OIOIOIOI,
+        OOOOIIII};
+    Assert.assertArrayEquals("Should combine mixed byte arrays",
+        expected, ZOrderByteUtils.interleaveBits(test));
+  }
+
+  @Test
+  public void testIntOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      int aInt = random.nextInt();
+      int bInt = random.nextInt();
+      int intCompare = Integer.compare(aInt, bInt);
+      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aInt), 4);
+      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bInt), 4);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aInt, bInt, intCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (intCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testLongOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      long aLong = random.nextInt();
+      long bLong = random.nextInt();
+      int longCompare = Long.compare(aLong, bLong);
+      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aLong), 8);
+      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bLong), 8);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aLong, bLong, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (longCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testFloatOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      float aFloat = random.nextFloat();
+      float bFloat = random.nextFloat();
+      int floatCompare = Float.compare(aFloat, bFloat);
+      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aFloat), 4);
+      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bFloat), 4);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aFloat, bFloat, floatCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (floatCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testDoubleOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      double aDouble = random.nextDouble();
+      double bDouble = random.nextDouble();
+      int doubleCompare = Double.compare(aDouble, bDouble);
+      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aDouble), 8);
+      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bDouble), 8);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aDouble, bDouble, doubleCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (doubleCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testStringOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      String aString = RandomStringUtils.random(random.nextInt(35), true, true);
+      String bString = RandomStringUtils.random(random.nextInt(35), true, true);
+      int stringCompare = aString.compareTo(bString);
+      byte[] aBytes = ZOrderByteUtils.orderUTF8LikeBytes(aString.getBytes(StandardCharsets.UTF_8), 128);
+      byte[] bBytes = ZOrderByteUtils.orderUTF8LikeBytes(bString.getBytes(StandardCharsets.UTF_8), 128);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aString, bString, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (stringCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  private byte[] bytesOf(int num) {
+    return ByteBuffer.allocate(4).putInt(num).array();
+  }
+
+  private byte[] bytesOf(long num) {
+    return ByteBuffer.allocate(8).putLong(num).array();
+  }
+
+  private byte[] bytesOf(float num) {
+    return ByteBuffer.allocate(4).putFloat(num).array();
+  }
+
+  private byte[] bytesOf(double num) {
+    return ByteBuffer.allocate(8).putDouble(num).array();
+  }
+}
diff --git a/versions.props b/versions.props
index c9ec027effe6..3dc3a5041f23 100644
--- a/versions.props
+++ b/versions.props
@@ -1,6 +1,7 @@
 org.slf4j:* = 1.7.25
 org.apache.avro:avro = 1.10.1
 org.apache.calcite:* = 1.10.0
+org.apache.commons:commons-lang3 = 3.12.0
 org.apache.flink:* = 1.12.5
 org.apache.hadoop:* = 2.7.3
 org.apache.hive:* = 2.3.8

From 781a12158534d6651264e3bbb93952b7c7435175 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 25 Jan 2022 15:45:02 -0600
Subject: [PATCH 02/12] Fix JavaDoc

---
 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 4ef3120a2217..759f101b0cc5 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -50,7 +50,7 @@ public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
 
   /**
    * IEEE 754 :
-   * “If two floating-point numbers in the same format are ordered (say, x < y),
+   * “If two floating-point numbers in the same format are ordered (say, x \< y),
    * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.”
    *
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically

From fda817cdb61140208b625bef885483c9866d0b0d Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 12:31:49 -0600
Subject: [PATCH 03/12] Switch Implementations to work on Primitives instead of
 ByteArrays

---
 .../apache/iceberg/util/ZOrderByteUtils.java  | 66 ++++++++------
 .../iceberg/util/TestZOrderByteUtil.java      | 86 ++++++++-----------
 2 files changed, 74 insertions(+), 78 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 759f101b0cc5..571ea24d5039 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.util;
 
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 
 /**
@@ -28,6 +29,9 @@
  * that are lexicographically ordered.
  * Most of these techniques are derived from
  * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
+ *
+ * Some implementation is taken from
+ * https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
  */
 public class ZOrderByteUtils {
 
@@ -40,12 +44,19 @@ private ZOrderByteUtils() {
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
-  public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
-    if (intBytes == null) {
-      return new byte[size];
-    }
-    intBytes[0] = (byte) (intBytes[0] ^ (1 << 7));
-    return intBytes;
+  public static byte[] intToOrderedBytes(int val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+    bytes.putInt(val ^ 0x80000000);
+    return bytes.array();
+  }
+
+  /**
+   * Signed longs are treated the same as the signed ints
+   */
+  public static byte[] longToOrderBytes(long val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+    bytes.putLong(val ^ 0x8000000000000000L);
+    return bytes.array();
   }
 
   /**
@@ -56,22 +67,23 @@ public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
    * comparable bytes
    */
-  public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) {
-    if (floatBytes == null) {
-      return new byte[size];
-    }
-    if ((floatBytes[0] & (1 << 7)) == 0) {
-      // The signed magnitude is positive set the first bit (reversing the sign so positives order after negatives)
-      floatBytes[0] = (byte) (floatBytes[0] | (1 << 7));
-    } else {
-      // The signed magnitude is negative so flip the first bit (reversing the sign so positives order after negatives)
-      // Then flip all remaining bits so numbers with greater negative magnitude come before those
-      // with less magnitude (reverse the order)
-      for (int i = 0; i < floatBytes.length; i++) {
-        floatBytes[i] = (byte) ~floatBytes[i];
-      }
-    }
-    return floatBytes;
+  public static byte[] floatToOrderedBytes(float val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+    int ival = Float.floatToIntBits(val);
+    ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
+    bytes.putInt(ival);
+    return bytes.array();
+  }
+
+  /**
+   * Doubles are treated the same as floats
+   */
+  public static byte[] doubleToOrderedBytes(double val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+    long lng = Double.doubleToLongBits(val);
+    lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
+    bytes.putLong(lng);
+    return bytes.array();
   }
 
   /**
@@ -80,11 +92,13 @@ public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) {
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
    * and right padding 0 for shorter strings.
    */
-  public static byte[] orderUTF8LikeBytes(byte[] stringBytes, int size) {
-    if (stringBytes == null) {
-      return new byte[size];
+  public static byte[] stringToOrderedBytes(String val, int length) {
+    ByteBuffer bytes = ByteBuffer.allocate(length);
+    if (val != null) {
+      int maxLength = Math.min(length, val.length());
+      bytes.put(val.getBytes(), 0, maxLength);
     }
-    return Arrays.copyOf(stringBytes, size);
+    return bytes.array();
   }
 
   /**
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 87d69dc99182..b34f950f90c8 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -20,8 +20,6 @@
 
 package org.apache.iceberg.util;
 
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Random;
 import org.apache.commons.lang3.RandomStringUtils;
@@ -146,15 +144,15 @@ public void testIntOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
-      int intCompare = Integer.compare(aInt, bInt);
-      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aInt), 4);
-      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bInt), 4);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int intCompare = Integer.signum(Integer.compare(aInt, bInt));
+      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt);
+      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
+      Assert.assertEquals(String.format(
           "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aInt, bInt, intCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (intCompare ^ byteCompare) >= 0);
+          intCompare, byteCompare);
     }
   }
 
@@ -163,15 +161,15 @@ public void testLongOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
-      int longCompare = Long.compare(aLong, bLong);
-      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aLong), 8);
-      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bLong), 8);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int longCompare = Integer.signum(Long.compare(aLong, bLong));
+      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aLong);
+      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bLong);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aLong, bLong, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (longCompare ^ byteCompare) >= 0);
+          longCompare, byteCompare);
     }
   }
 
@@ -180,15 +178,15 @@ public void testFloatOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
-      int floatCompare = Float.compare(aFloat, bFloat);
-      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aFloat), 4);
-      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bFloat), 4);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int floatCompare = Integer.signum(Float.compare(aFloat, bFloat));
+      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat);
+      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of floats should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aFloat, bFloat, floatCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (floatCompare ^ byteCompare) >= 0);
+          floatCompare, byteCompare);
     }
   }
 
@@ -197,15 +195,15 @@ public void testDoubleOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
-      int doubleCompare = Double.compare(aDouble, bDouble);
-      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aDouble), 8);
-      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bDouble), 8);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble));
+      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble);
+      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of doubles should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aDouble, bDouble, doubleCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (doubleCompare ^ byteCompare) >= 0);
+          doubleCompare, byteCompare);
     }
   }
 
@@ -214,31 +212,15 @@ public void testStringOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       String aString = RandomStringUtils.random(random.nextInt(35), true, true);
       String bString = RandomStringUtils.random(random.nextInt(35), true, true);
-      int stringCompare = aString.compareTo(bString);
-      byte[] aBytes = ZOrderByteUtils.orderUTF8LikeBytes(aString.getBytes(StandardCharsets.UTF_8), 128);
-      byte[] bBytes = ZOrderByteUtils.orderUTF8LikeBytes(bString.getBytes(StandardCharsets.UTF_8), 128);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int stringCompare = Integer.signum(aString.compareTo(bString));
+      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128);
+      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of strings should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aString, bString, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (stringCompare ^ byteCompare) >= 0);
+          stringCompare, byteCompare);
     }
   }
-
-  private byte[] bytesOf(int num) {
-    return ByteBuffer.allocate(4).putInt(num).array();
-  }
-
-  private byte[] bytesOf(long num) {
-    return ByteBuffer.allocate(8).putLong(num).array();
-  }
-
-  private byte[] bytesOf(float num) {
-    return ByteBuffer.allocate(4).putFloat(num).array();
-  }
-
-  private byte[] bytesOf(double num) {
-    return ByteBuffer.allocate(8).putDouble(num).array();
-  }
 }

From c6954e6b544a9da070329e12a01bc69d0a572d6d Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 16:47:36 -0600
Subject: [PATCH 04/12] Clean up RandomStringUtilUsage

---
 build.gradle                                                | 1 -
 .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java  | 6 ++++--
 .../java/org/apache/iceberg/util/TestZOrderByteUtil.java    | 6 +++---
 versions.props                                              | 1 -
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/build.gradle b/build.gradle
index 65346358fe33..fa509212fb8b 100644
--- a/build.gradle
+++ b/build.gradle
@@ -222,7 +222,6 @@ project(':iceberg-core') {
     }
 
     testImplementation "org.xerial:sqlite-jdbc"
-    testImplementation "org.apache.commons:commons-lang3"
     testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
   }
 }
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 571ea24d5039..750831a9a5e7 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -120,7 +120,8 @@ public static byte[] interleaveBits(byte[][] columnsBinary) {
           (byte) (interleavedBytes[interleaveByte] |
               (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit);
 
-      if (--interleaveBit == -1) {
+      --interleaveBit;
+      if (interleaveBit == -1) {
         // Finished a byte in our interleave byte array start a new byte
         interleaveByte++;
         interleaveBit = 7;
@@ -128,7 +129,8 @@ public static byte[] interleaveBits(byte[][] columnsBinary) {
 
       // Find next column with a byte we can use
       do {
-        if (++sourceColumn == columnsBinary.length) {
+        ++sourceColumn;
+        if (sourceColumn == columnsBinary.length) {
           sourceColumn = 0;
           if (--sourceBit == -1) {
             sourceByte++;
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index b34f950f90c8..17f19ec01af7 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -22,8 +22,8 @@
 
 import java.util.Arrays;
 import java.util.Random;
-import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
+import org.apache.iceberg.types.Types;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -210,8 +210,8 @@ public void testDoubleOrdering() {
   @Test
   public void testStringOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
-      String aString = RandomStringUtils.random(random.nextInt(35), true, true);
-      String bString = RandomStringUtils.random(random.nextInt(35), true, true);
+      String aString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
+      String bString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       int stringCompare = Integer.signum(aString.compareTo(bString));
       byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128);
       byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128);
diff --git a/versions.props b/versions.props
index 3dc3a5041f23..c9ec027effe6 100644
--- a/versions.props
+++ b/versions.props
@@ -1,7 +1,6 @@
 org.slf4j:* = 1.7.25
 org.apache.avro:avro = 1.10.1
 org.apache.calcite:* = 1.10.0
-org.apache.commons:commons-lang3 = 3.12.0
 org.apache.flink:* = 1.12.5
 org.apache.hadoop:* = 2.7.3
 org.apache.hive:* = 2.3.8

From 3f6fc921ae83cb6d9643fbdb1522682dcbcdf065 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 16:56:07 -0600
Subject: [PATCH 05/12] Fix JavaDoc

---
 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 750831a9a5e7..e41a5b3be763 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -61,7 +61,7 @@ public static byte[] longToOrderBytes(long val) {
 
   /**
    * IEEE 754 :
-   * “If two floating-point numbers in the same format are ordered (say, x \< y),
+   * “If two floating-point numbers in the same format are ordered (say, x {@literal <} y),
    * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.”
    *
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically

From 5ccf8f45bfd57a9305dc935e48807e500d7ebe3f Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 17:41:05 -0600
Subject: [PATCH 06/12] Add Functions for Smaller Types

---
 .../apache/iceberg/util/ZOrderByteUtils.java  | 18 ++++++++++
 .../iceberg/util/TestZOrderByteUtil.java      | 34 +++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index e41a5b3be763..ae68b69b745e 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -59,6 +59,24 @@ public static byte[] longToOrderBytes(long val) {
     return bytes.array();
   }
 
+  /**
+   * Signed shorts are treated the same as the signed ints
+   */
+  public static byte[] shortToOrderBytes(short val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Short.BYTES);
+    bytes.putShort((short) (val ^ (0x8000)));
+    return bytes.array();
+  }
+
+  /**
+   * Signed tiny ints are treated the same as the signed ints
+   */
+  public static byte[] tinyintToOrderedBytes(byte val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Byte.BYTES);
+    bytes.put((byte) (val ^ (0x80)));
+    return bytes.array();
+  }
+
   /**
    * IEEE 754 :
    * “If two floating-point numbers in the same format are ordered (say, x {@literal <} y),
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 17f19ec01af7..81caf0ad0fb3 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -173,6 +173,40 @@ public void testLongOrdering() {
     }
   }
 
+  @Test
+  public void testShortOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
+      short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
+      int longCompare = Integer.signum(Long.compare(aShort, bShort));
+      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aShort);
+      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bShort);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
+
+      Assert.assertEquals(String.format(
+              "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+              aShort, bShort, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          longCompare, byteCompare);
+    }
+  }
+
+  @Test
+  public void testTinyOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      long aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      long bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      int longCompare = Integer.signum(Long.compare(aByte, bByte));
+      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aByte);
+      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bByte);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
+
+      Assert.assertEquals(String.format(
+              "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+              aByte, bByte, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          longCompare, byteCompare);
+    }
+  }
+
   @Test
   public void testFloatOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {

From 30c4633f36faaf5c12f56cc1e9b07259d5a6eca3 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 7 Feb 2022 15:00:45 -0600
Subject: [PATCH 07/12] Updates for reviewer comments

---
 .../org/apache/iceberg/util/ByteBuffers.java  | 10 +++
 .../apache/iceberg/util/ZOrderByteUtils.java  | 84 +++++++++++--------
 .../iceberg/util/TestZOrderByteUtil.java      | 50 +++++++----
 3 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
index 213b222dc507..efc05f179f82 100644
--- a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
+++ b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
@@ -21,6 +21,7 @@
 
 import java.nio.ByteBuffer;
 import java.util.Arrays;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 public class ByteBuffers {
 
@@ -46,6 +47,15 @@ public static byte[] toByteArray(ByteBuffer buffer) {
     }
   }
 
+  public static ByteBuffer reuse(ByteBuffer reuse, int length) {
+    Preconditions.checkArgument(reuse.hasArray() && reuse.arrayOffset() == 0 && reuse.capacity() == length,
+        "Cannot reuse buffer: Should be an array %s, should have an offset of 0 %s, should be of size %s was %s",
+        reuse.hasArray(), reuse.arrayOffset(), length, reuse.capacity());
+    reuse.position(0);
+    reuse.limit(length);
+    return reuse;
+  }
+
   public static ByteBuffer copy(ByteBuffer buffer) {
     if (buffer == null) {
       return null;
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index ae68b69b745e..deab4450a61d 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -44,35 +44,35 @@ private ZOrderByteUtils() {
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
-  public static byte[] intToOrderedBytes(int val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+  public static byte[] intToOrderedBytes(int val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES);
     bytes.putInt(val ^ 0x80000000);
     return bytes.array();
   }
 
   /**
-   * Signed longs are treated the same as the signed ints
+   * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] longToOrderBytes(long val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+  public static byte[] longToOrderedBytes(long val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES);
     bytes.putLong(val ^ 0x8000000000000000L);
     return bytes.array();
   }
 
   /**
-   * Signed shorts are treated the same as the signed ints
+   * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] shortToOrderBytes(short val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Short.BYTES);
+  public static byte[] shortToOrderedBytes(short val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES);
     bytes.putShort((short) (val ^ (0x8000)));
     return bytes.array();
   }
 
   /**
-   * Signed tiny ints are treated the same as the signed ints
+   * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] tinyintToOrderedBytes(byte val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Byte.BYTES);
+  public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES);
     bytes.put((byte) (val ^ (0x80)));
     return bytes.array();
   }
@@ -85,8 +85,8 @@ public static byte[] tinyintToOrderedBytes(byte val) {
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
    * comparable bytes
    */
-  public static byte[] floatToOrderedBytes(float val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+  public static byte[] floatToOrderedBytes(float val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES);
     int ival = Float.floatToIntBits(val);
     ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
     bytes.putInt(ival);
@@ -94,10 +94,10 @@ public static byte[] floatToOrderedBytes(float val) {
   }
 
   /**
-   * Doubles are treated the same as floats
+   * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)}
    */
-  public static byte[] doubleToOrderedBytes(double val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+  public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES);
     long lng = Double.doubleToLongBits(val);
     lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
     bytes.putLong(lng);
@@ -108,54 +108,70 @@ public static byte[] doubleToOrderedBytes(double val) {
    * Strings are lexicographically sortable BUT if different byte array lengths will
    * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time).
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
-   * and right padding 0 for shorter strings.
+   * and right padding 0 for shorter strings. Requires UTF8 (or ASCII) encoding for ordering guarantees to hold.
    */
-  public static byte[] stringToOrderedBytes(String val, int length) {
-    ByteBuffer bytes = ByteBuffer.allocate(length);
+  public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
+    Arrays.fill(bytes.array(), 0, length, (byte) 0x00);
     if (val != null) {
       int maxLength = Math.min(length, val.length());
+      // We may truncate mid-character
       bytes.put(val.getBytes(), 0, maxLength);
     }
     return bytes.array();
   }
 
   /**
-   * Interleave bits using a naive loop.
-   * @param columnsBinary an array of byte arrays, none of which are empty
-   * @return their bits interleaved
+   * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is
+   * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all
+   * columns that have a bit available at that position. Once a Column has no more bits to produce it is skipped in the
+   * interleaving.
+   * @param columnsBinary an array of ordered byte representations of the columns being ZOrdered
+   * @return the columnbytes interleaved
    */
   public static byte[] interleaveBits(byte[][] columnsBinary) {
     int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum();
     byte[] interleavedBytes = new byte[interleavedSize];
-    int sourceBit = 7;
-    int sourceByte = 0;
     int sourceColumn = 0;
-    int interleaveBit = 7;
+    int sourceByte = 0;
+    int sourceBit = 7;
     int interleaveByte = 0;
-    while (interleaveByte < interleavedSize) {
-      // Take what we have, Get the source Bit of the source Byte, move it to the interleaveBit position
-      interleavedBytes[interleaveByte] =
-          (byte) (interleavedBytes[interleaveByte] |
-              (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit);
+    int interleaveBit = 7;
 
+    while (interleaveByte < interleavedSize) {
+      // Take the source bit from source byte and move it to the output bit position
+      interleavedBytes[interleaveByte] |=
+              (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >>> sourceBit << interleaveBit;
       --interleaveBit;
+
+      // Check if an output byte has been completed
       if (interleaveBit == -1) {
-        // Finished a byte in our interleave byte array start a new byte
+        // Move to the next output byte
         interleaveByte++;
+        // Move to the highest order bit of the new output byte
         interleaveBit = 7;
       }
 
-      // Find next column with a byte we can use
+      // Check if the last output byte has been completed
+      if (interleaveByte == interleavedSize) {
+        break;
+      }
+
+      // Find the next source bit to interleave
       do {
+        // Move to next column
         ++sourceColumn;
         if (sourceColumn == columnsBinary.length) {
+          // If the last source column was used, reset to next bit of first column
           sourceColumn = 0;
-          if (--sourceBit == -1) {
+          --sourceBit;
+          if (sourceBit == -1) {
+            // If the last bit of the source byte was used, reset to the highest bit of the next byte
             sourceByte++;
             sourceBit = 7;
           }
         }
-      } while (columnsBinary[sourceColumn].length <= sourceByte && interleaveByte < interleavedSize);
+      } while (columnsBinary[sourceColumn].length <= sourceByte);
     }
     return interleavedBytes;
   }
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 81caf0ad0fb3..e2ff29d76c3a 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -20,6 +20,7 @@
 
 package org.apache.iceberg.util;
 
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Random;
 import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
@@ -36,6 +37,7 @@ public class TestZOrderByteUtil {
   private static final byte OOOOOOOO = (byte) 0;
 
   private static final int NUM_TESTS = 100000;
+  private static final int NUM_INTERLEAVE_TESTS = 1000;
 
   private final Random random = new Random(42);
 
@@ -84,7 +86,7 @@ private String interleaveStrings(String[] strings) {
    */
   @Test
   public void testInterleaveRandomExamples() {
-    for (int test = 0; test < NUM_TESTS; test++) {
+    for (int test = 0; test < NUM_INTERLEAVE_TESTS; test++) {
       int numByteArrays = Math.abs(random.nextInt(6)) + 1;
       byte[][] testBytes =  new byte[numByteArrays][];
       String[] testStrings = new String[numByteArrays];
@@ -141,12 +143,14 @@ public void testInterleaveMixedBits() {
 
   @Test
   public void testIntOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Integer.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Integer.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
       int intCompare = Integer.signum(Integer.compare(aInt, bInt));
-      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt);
-      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt);
+      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -158,12 +162,14 @@ public void testIntOrdering() {
 
   @Test
   public void testLongOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Long.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Long.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
       int longCompare = Integer.signum(Long.compare(aLong, bLong));
-      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aLong);
-      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bLong);
+      byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -175,12 +181,14 @@ public void testLongOrdering() {
 
   @Test
   public void testShortOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Short.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Short.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aShort, bShort));
-      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aShort);
-      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bShort);
+      byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -192,12 +200,14 @@ public void testShortOrdering() {
 
   @Test
   public void testTinyOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Byte.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Byte.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
-      long aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
-      long bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aByte, bByte));
-      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aByte);
-      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bByte);
+      byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -209,12 +219,14 @@ public void testTinyOrdering() {
 
   @Test
   public void testFloatOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Float.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Float.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
       int floatCompare = Integer.signum(Float.compare(aFloat, bFloat));
-      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat);
-      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat);
+      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -226,12 +238,14 @@ public void testFloatOrdering() {
 
   @Test
   public void testDoubleOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Double.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Double.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
       int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble));
-      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble);
-      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble);
+      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -243,12 +257,14 @@ public void testDoubleOrdering() {
 
   @Test
   public void testStringOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(128);
+    ByteBuffer bBuffer = ByteBuffer.allocate(128);
     for (int i = 0; i < NUM_TESTS; i++) {
       String aString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       String bString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       int stringCompare = Integer.signum(aString.compareTo(bString));
-      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128);
-      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128);
+      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(

From 10e561c1a53c60341853631fc1d8936c60d6e41f Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 7 Feb 2022 19:45:49 -0600
Subject: [PATCH 08/12] Specify Output Size

---
 .../org/apache/iceberg/util/ZOrderByteUtils.java     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index deab4450a61d..f4d28572be84 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -121,16 +121,24 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu
     return bytes.array();
   }
 
+  /**
+   * For Testing interleave all available bytes
+   */
+  static byte[] interleaveBits(byte[][] columnsBinary) {
+    return interleaveBits(columnsBinary,
+        Arrays.stream(columnsBinary).mapToInt(column -> column.length).max().getAsInt());
+  }
+
   /**
    * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is
    * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all
    * columns that have a bit available at that position. Once a Column has no more bits to produce it is skipped in the
    * interleaving.
    * @param columnsBinary an array of ordered byte representations of the columns being ZOrdered
+   * @param interleavedSize the number of bytes to use in the output
    * @return the columnbytes interleaved
    */
-  public static byte[] interleaveBits(byte[][] columnsBinary) {
-    int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum();
+  public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
     byte[] interleavedBytes = new byte[interleavedSize];
     int sourceColumn = 0;
     int sourceByte = 0;

From 74d20a4ad15d620ba35e55bbfecf774cca663037 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 08:48:29 -0600
Subject: [PATCH 09/12] Fix Encoding

Also a patch for the test interleave method length calculation
---
 .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index f4d28572be84..52180cac7a2d 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.util;
 
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 /**
@@ -108,7 +109,7 @@ public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
    * Strings are lexicographically sortable BUT if different byte array lengths will
    * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time).
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
-   * and right padding 0 for shorter strings. Requires UTF8 (or ASCII) encoding for ordering guarantees to hold.
+   * and right padding 0 for shorter strings.
    */
   public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
@@ -116,7 +117,7 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu
     if (val != null) {
       int maxLength = Math.min(length, val.length());
       // We may truncate mid-character
-      bytes.put(val.getBytes(), 0, maxLength);
+      bytes.put(val.getBytes(StandardCharsets.UTF_8), 0, maxLength);
     }
     return bytes.array();
   }
@@ -126,7 +127,7 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu
    */
   static byte[] interleaveBits(byte[][] columnsBinary) {
     return interleaveBits(columnsBinary,
-        Arrays.stream(columnsBinary).mapToInt(column -> column.length).max().getAsInt());
+        Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum());
   }
 
   /**

From 848de3b11a2fc414438c9029e98b8dfae80e278f Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 11:58:08 -0600
Subject: [PATCH 10/12] Methods return ByteBuffers, Strings are efit into our
 buffer using CharsetEncoder.encode

---
 .../apache/iceberg/util/ZOrderByteUtils.java  | 32 ++++++++++---------
 .../iceberg/util/TestZOrderByteUtil.java      | 31 ++++++++++--------
 2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 52180cac7a2d..967aa0bf7c5c 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -20,6 +20,8 @@
 package org.apache.iceberg.util;
 
 import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
@@ -45,37 +47,37 @@ private ZOrderByteUtils() {
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
-  public static byte[] intToOrderedBytes(int val, ByteBuffer reuse) {
+  public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES);
     bytes.putInt(val ^ 0x80000000);
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] longToOrderedBytes(long val, ByteBuffer reuse) {
+  public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES);
     bytes.putLong(val ^ 0x8000000000000000L);
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] shortToOrderedBytes(short val, ByteBuffer reuse) {
+  public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES);
     bytes.putShort((short) (val ^ (0x8000)));
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
+  public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES);
     bytes.put((byte) (val ^ (0x80)));
-    return bytes.array();
+    return bytes;
   }
 
   /**
@@ -86,23 +88,23 @@ public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
    * comparable bytes
    */
-  public static byte[] floatToOrderedBytes(float val, ByteBuffer reuse) {
+  public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES);
     int ival = Float.floatToIntBits(val);
     ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
     bytes.putInt(ival);
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)}
    */
-  public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
+  public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES);
     long lng = Double.doubleToLongBits(val);
     lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
     bytes.putLong(lng);
-    return bytes.array();
+    return bytes;
   }
 
   /**
@@ -111,15 +113,15 @@ public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
    * and right padding 0 for shorter strings.
    */
-  public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) {
+  public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer reuse, CharsetEncoder encoder) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
     Arrays.fill(bytes.array(), 0, length, (byte) 0x00);
     if (val != null) {
       int maxLength = Math.min(length, val.length());
       // We may truncate mid-character
-      bytes.put(val.getBytes(StandardCharsets.UTF_8), 0, maxLength);
+      encoder.encode(CharBuffer.wrap(val), bytes, true);
     }
-    return bytes.array();
+    return bytes;
   }
 
   /**
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index e2ff29d76c3a..bf84319d0d45 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -21,6 +21,8 @@
 package org.apache.iceberg.util;
 
 import java.nio.ByteBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Random;
 import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
@@ -149,8 +151,8 @@ public void testIntOrdering() {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
       int intCompare = Integer.signum(Integer.compare(aInt, bInt));
-      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -168,8 +170,8 @@ public void testLongOrdering() {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
       int longCompare = Integer.signum(Long.compare(aLong, bLong));
-      byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -187,8 +189,8 @@ public void testShortOrdering() {
       short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aShort, bShort));
-      byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -206,8 +208,8 @@ public void testTinyOrdering() {
       byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aByte, bByte));
-      byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -225,8 +227,8 @@ public void testFloatOrdering() {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
       int floatCompare = Integer.signum(Float.compare(aFloat, bFloat));
-      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -244,8 +246,8 @@ public void testDoubleOrdering() {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
       int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble));
-      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -257,14 +259,15 @@ public void testDoubleOrdering() {
 
   @Test
   public void testStringOrdering() {
+    CharsetEncoder encoder =  StandardCharsets.UTF_8.newEncoder();
     ByteBuffer aBuffer = ByteBuffer.allocate(128);
     ByteBuffer bBuffer = ByteBuffer.allocate(128);
     for (int i = 0; i < NUM_TESTS; i++) {
       String aString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       String bString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       int stringCompare = Integer.signum(aString.compareTo(bString));
-      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer, encoder).array();
+      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer, encoder).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(

From 83586f1f5262490509bb599649357d8f8189bfc6 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 21:49:55 -0600
Subject: [PATCH 11/12] Remove unused string length

---
 .../java/org/apache/iceberg/util/ZOrderByteUtils.java    | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 967aa0bf7c5c..3ec4c0f430f4 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -24,6 +24,7 @@
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
  * Within Z-Ordering the byte representations of objects being compared must be ordered,
@@ -114,12 +115,14 @@ public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) {
    * and right padding 0 for shorter strings.
    */
   public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer reuse, CharsetEncoder encoder) {
+    Preconditions.checkArgument(encoder.charset().equals(StandardCharsets.UTF_8),
+        "Cannot use an encoder not using UTF_8 as it's Charset");
+
     ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
     Arrays.fill(bytes.array(), 0, length, (byte) 0x00);
     if (val != null) {
-      int maxLength = Math.min(length, val.length());
-      // We may truncate mid-character
-      encoder.encode(CharBuffer.wrap(val), bytes, true);
+      CharBuffer inputBuffer = CharBuffer.wrap(val);
+      encoder.encode(inputBuffer, bytes, true);
     }
     return bytes;
   }

From 57e1462fb27a70d659c1bdaaac15c659cfa22bbb Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 22:05:29 -0600
Subject: [PATCH 12/12] Update docs

---
 .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 3ec4c0f430f4..b008461ea8ca 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -30,10 +30,12 @@
  * Within Z-Ordering the byte representations of objects being compared must be ordered,
  * this requires several types to be transformed when converted to bytes. The goal is to
  * map object's whose byte representation are not lexicographically ordered into representations
- * that are lexicographically ordered.
+ * that are lexicographically ordered. Bytes produced should be compared lexicographically as
+ * unsigned bytes, big-endian.
+ * <p>
  * Most of these techniques are derived from
  * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
- *
+ * <p>
  * Some implementation is taken from
  * https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
  */