patrickfav · patrickfav · Aug 12, 2018 · Aug 12, 2018 · Aug 12, 2018 · Aug 12, 2018
diff --git a/CHANGELOG b/CHANGELOG
@@ -3,6 +3,11 @@
 ## v0.8.0
 
 * add radix encoding/parsing and fix radix tests #6, #20
+* add support for Base32 RFC4648 non-hex alphabet encoding/parsing #21
+
+### Deprecations (will be removed in v1.0+)
+
+* `parseBase36()`/`encodeBase36()` - use `parseRadix(36)`/`encodeRadix(36)` instead
 
 ## v0.7.1
 
@@ -29,6 +34,10 @@
  * add constructor/converter from/to UUID #9
  * add `empty()` constructor, creating empty byte array
 
+### Deprecations (will be removed in v1.0+)
+
+* `toObjectArray()` renamed to `toBoxedArray()`
+
 ## v0.5.0
 
  * better resource handling for compression

diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ It's main features include:
 * **Creation** from a wide variety of sources: multiple arrays, integers, [streams](https://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html), random, strings, files, uuid, ...
 * **Transformation** with many built-in: append, [xor](https://en.wikipedia.org/wiki/Exclusive_or), [and](https://en.wikipedia.org/wiki/Logical_conjunction), [hash](https://en.wikipedia.org/wiki/Cryptographic_hash_function), [shifts](https://en.wikipedia.org/wiki/Bitwise_operation#Bit_shifts), shuffle, reverse, [checksum](https://en.wikipedia.org/wiki/Checksum), ...
 * **Validators** with the ability to arbitrarily combine multiple ones with logical expressions
-* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base36](https://en.wikipedia.org/wiki/Base36), [base64](https://en.wikipedia.org/wiki/Base64), ...
+* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base32](https://en.wikipedia.org/wiki/Base32), [base64](https://en.wikipedia.org/wiki/Base64), ...
 * **Immutable, Mutable and Read-Only** versions
 * **Handling Strings** with encoding and normalizing strings for arbitrary charset
 * **Utility Features** like `indexOf`, `count`, `isEmpty`, `bitAt`, `contains` ...
@@ -312,7 +312,14 @@ Bytes.from(array).encodeBase64(); //"SpT9/x6v7Q=="
 Bytes.from(array).encodeBase64Url(); //"SpT9_x6v7Q=="
  ```
 
-Additionally the following encodings are supported:
+also a **Base32** encoder (using the RFC4648 non-hex alphabet):
+
+```java
+Bytes.parseBase32("MZXQ====");
+Bytes.from(array).encodeBase32();
+ ```
+
+Additionally the following radixe encodings are supported:
 
 ```java
 Bytes.from(array).encodeBinary(); //1110110110101111

diff --git a/pom.xml b/pom.xml
@@ -215,6 +215,18 @@
             <version>4.12</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.openjdk.jmh</groupId>
+            <artifactId>jmh-core</artifactId>
+            <version>1.21</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.openjdk.jmh</groupId>
+            <artifactId>jmh-generator-annprocess</artifactId>
+            <version>1.21</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
     <developers>
         <developer>

diff --git a/src/main/java/at/favre/lib/bytes/BaseEncoding.java b/src/main/java/at/favre/lib/bytes/BaseEncoding.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2018 Patrick Favre-Bulle
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package at.favre.lib.bytes;
+
+import java.io.IOException;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * Encoder which supports arbitrary alphabet and padding.
+ *
+ * Derived from Google Guava's common/io/ BaseEncoding
+ * <p>
+ * See: https://github.com/google/guava/blob/v26.0/guava/src/com/google/common/io/BaseEncoding.java
+ */
+final class BaseEncoding implements BinaryToTextEncoding.EncoderDecoder {
+    private static final char ASCII_MAX = 127;
+
+    static final Alphabet BASE32_RFC4848 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".toCharArray());
+    static final char BASE32_RFC4848_PADDING = '=';
+
+    private final Alphabet alphabet;
+    private final Character paddingChar;
+
+    BaseEncoding(Alphabet alphabet, Character paddingChar) {
+        this.alphabet = Objects.requireNonNull(alphabet);
+        this.paddingChar = paddingChar;
+    }
+
+    private int maxEncodedSize(int bytes) {
+        return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk);
+    }
+
+    @Override
+    public String encode(byte[] array, ByteOrder byteOrder) {
+        return encode(array, 0, array.length);
+    }
+
+    private String encode(byte[] bytes, int off, int len) {
+        StringBuilder result = new StringBuilder(maxEncodedSize(len));
+        try {
+            encodeTo(result, bytes, off, len);
+        } catch (IOException impossible) {
+            throw new AssertionError(impossible);
+        }
+        return result.toString();
+    }
+
+    private void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
+        Objects.requireNonNull(target);
+        for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
+            encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
+        }
+    }
+
+    private void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
+        Objects.requireNonNull(target);
+        long bitBuffer = 0;
+        for (int i = 0; i < len; ++i) {
+            bitBuffer |= bytes[off + i] & 0xFF;
+            bitBuffer <<= 8; // Add additional zero byte in the end.
+        }
+        // Position of first character is length of bitBuffer minus bitsPerChar.
+        final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
+        int bitsProcessed = 0;
+        while (bitsProcessed < len * 8) {
+            int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
+            target.append(alphabet.encode(charIndex));
+            bitsProcessed += alphabet.bitsPerChar;
+        }
+        if (paddingChar != null) {
+            while (bitsProcessed < alphabet.bytesPerChunk * 8) {
+                target.append(paddingChar);
+                bitsProcessed += alphabet.bitsPerChar;
+            }
+        }
+    }
+
+    private int maxDecodedSize(int chars) {
+        return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
+    }
+
+    private String trimTrailingPadding(CharSequence chars) {
+        Objects.requireNonNull(chars);
+        if (paddingChar == null) {
+            return chars.toString();
+        }
+        int l;
+        for (l = chars.length() - 1; l >= 0; l--) {
+            if (chars.charAt(l) != paddingChar) {
+                break;
+            }
+        }
+        return chars.subSequence(0, l + 1).toString();
+    }
+
+    @Override
+    public byte[] decode(String encoded) {
+        encoded = trimTrailingPadding(encoded);
+        byte[] tmp = new byte[maxDecodedSize(encoded.length())];
+        int len = decodeTo(tmp, encoded);
+        return extract(tmp, len);
+    }
+
+    private static byte[] extract(byte[] result, int length) {
+        if (length == result.length) {
+            return result;
+        } else {
+            byte[] trunc = new byte[length];
+            System.arraycopy(result, 0, trunc, 0, length);
+            return trunc;
+        }
+    }
+
+    private int decodeTo(byte[] target, CharSequence chars) {
+        Objects.requireNonNull(target);
+        chars = trimTrailingPadding(chars);
+        int bytesWritten = 0;
+        for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
+            long chunk = 0;
+            int charsProcessed = 0;
+            for (int i = 0; i < alphabet.charsPerChunk; i++) {
+                chunk <<= alphabet.bitsPerChar;
+                if (charIdx + i < chars.length()) {
+                    chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
+                }
+            }
+            final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
+            for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
+                target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
+            }
+        }
+        return bytesWritten;
+    }
+
+    static final class Alphabet {
+        // this is meant to be immutable -- don't modify it!
+        private final char[] chars;
+        final int mask;
+        final int bitsPerChar;
+        final int charsPerChunk;
+        final int bytesPerChunk;
+        private final byte[] decodabet;
+
+        Alphabet(char[] chars) {
+            this.chars = Objects.requireNonNull(chars);
+            this.bitsPerChar = log2(chars.length);
+
+            /*
+             * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
+             * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
+             */
+            int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
+            this.charsPerChunk = 8 / gcd;
+            this.bytesPerChunk = bitsPerChar / gcd;
+            this.mask = chars.length - 1;
+
+            byte[] decodabet = new byte[ASCII_MAX + 1];
+            Arrays.fill(decodabet, (byte) -1);
+            for (int i = 0; i < chars.length; i++) {
+                char c = chars[i];
+                decodabet[c] = (byte) i;
+            }
+            this.decodabet = decodabet;
+        }
+
+        char encode(int bits) {
+            return chars[bits];
+        }
+
+        int decode(char ch) {
+            return (int) decodabet[ch];
+        }
+    }
+
+    private static int divide(int p, int q) {
+        int div = p / q;
+        int rem = p - q * div; // equal to p % q
+
+        if (rem == 0) {
+            return div;
+        }
+        int signum = 1 | ((p ^ q) >> (Integer.SIZE - 1));
+        return signum > 0 ? div + signum : div;
+    }
+
+    private static int log2(int x) {
+        return (Integer.SIZE - 1) - Integer.numberOfLeadingZeros(x);
+    }
+}
diff --git a/src/main/java/at/favre/lib/bytes/Bytes.java b/src/main/java/at/favre/lib/bytes/Bytes.java
@@ -156,8 +156,7 @@ public static Bytes wrap(byte[] array) {
      * @return new instance
      */
     public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
-        Objects.requireNonNull(array, "passed array must not be null");
-        return new Bytes(array, byteOrder);
+        return new Bytes(Objects.requireNonNull(array, "passed array must not be null"), byteOrder);
     }
 
     /**
@@ -168,8 +167,7 @@ public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
      * @return new instance
      */
     public static Bytes from(byte[] byteArrayToCopy) {
-        Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte");
-        return wrap(Arrays.copyOf(byteArrayToCopy, byteArrayToCopy.length));
+        return wrap(Arrays.copyOf(Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte"), byteArrayToCopy.length));
     }
 
     /**
@@ -318,8 +316,7 @@ public static Bytes from(int integer4byte) {
      * @return new instance
      */
     public static Bytes from(int... intArray) {
-        Objects.requireNonNull(intArray, "must provide at least a single int");
-        return wrap(Util.toByteArray(intArray));
+        return wrap(Util.toByteArray(Objects.requireNonNull(intArray, "must provide at least a single int")));
     }
 
     /**
@@ -339,8 +336,7 @@ public static Bytes from(long long8byte) {
      * @return new instance
      */
     public static Bytes from(long... longArray) {
-        Objects.requireNonNull(longArray, "must provide at least a single long");
-        return wrap(Util.toByteArray(longArray));
+        return wrap(Util.toByteArray(Objects.requireNonNull(longArray, "must provide at least a single long")));
     }
 
     /**
@@ -470,9 +466,7 @@ public static Bytes from(CharSequence utf8String, Normalizer.Form form) {
      * @return new instance
      */
     public static Bytes from(CharSequence string, Charset charset) {
-        Objects.requireNonNull(string, "provided string must not be null");
-        Objects.requireNonNull(charset, "provided charset must not be null");
-        return wrap(string.toString().getBytes(charset));
+        return wrap(Objects.requireNonNull(string, "provided string must not be null").toString().getBytes(Objects.requireNonNull(charset, "provided charset must not be null")));
     }
 
     /**
@@ -567,6 +561,18 @@ public static Bytes parseHex(String hexString) {
         return parse(hexString, new BinaryToTextEncoding.Hex());
     }
 
+    /**
+     * Parsing of base32/RFC 4648 encoded byte arrays.
+     * <p>
+     * Uses the RFC 4648 non-hex alphabet, see <a href="https://en.wikipedia.org/wiki/Base32#RFC_4648_Base32_alphabet">Base32 alphabet</a>.
+     *
+     * @param base32Rfc4648String the encoded string
+     * @return decoded instance
+     */
+    public static Bytes parseBase32(String base32Rfc4648String) {
+        return parse(base32Rfc4648String, new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
+    }
+
     /**
      * Parsing of base36 encoded byte arrays.
      * <p>
@@ -1538,6 +1544,19 @@ public String encodeHex(boolean upperCase) {
         return encode(new BinaryToTextEncoding.Hex(upperCase));
     }
 
+    /**
+     * Base32 RFC4648 string representation of the internal byte array (not Base32 hex alphabet extension)
+     * <p>
+     * Example: <code>MZXW6YQ=</code>
+     * <p>
+     * See <a href="https://tools.ietf.org/html/rfc4648">RFC 4648</a>
+     *
+     * @return base32 string
+     */
+    public String encodeBase32() {
+        return encode(new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
+    }
+
     /**
      * DO NOT USE AS DATA ENCODING, ONLY FOR NUMBERS!
      * <p>
@@ -1597,8 +1616,7 @@ public String encodeUtf8() {
      * @return encoded string
      */
     public String encodeCharset(Charset charset) {
-        Objects.requireNonNull(charset, "given charset must not be null");
-        return new String(internalArray(), charset);
+        return new String(internalArray(), Objects.requireNonNull(charset, "given charset must not be null"));
     }
 
     /**