From b21ed765a54196ab64940fa623904ad854fc1c6a Mon Sep 17 00:00:00 2001 From: pfavre Date: Sun, 12 Aug 2018 13:49:35 +0200 Subject: [PATCH 1/3] Implement Base32 encoding powered by Guava's BaseEncoding fixes #21 --- CHANGELOG | 1 + README.md | 11 +- .../java/at/favre/lib/bytes/BaseEncoding.java | 208 ++++++++++++++++++ src/main/java/at/favre/lib/bytes/Bytes.java | 44 ++-- .../lib/bytes/BinaryToTextEncodingTest.java | 119 ++++++++-- .../lib/bytes/BytesParseAndEncodingTest.java | 10 + 6 files changed, 360 insertions(+), 33 deletions(-) create mode 100644 src/main/java/at/favre/lib/bytes/BaseEncoding.java diff --git a/CHANGELOG b/CHANGELOG index dd96189..ebf5d7b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ ## v0.8.0 * add radix encoding/parsing and fix radix tests #6, #20 +* add support for Base32 RFC4648 non-hex alphabet encoding/parsing #21 ## v0.7.1 diff --git a/README.md b/README.md index a033bf7..90c7c67 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ It's main features include: * **Creation** from a wide variety of sources: multiple arrays, integers, [streams](https://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html), random, strings, files, uuid, ... * **Transformation** with many built-in: append, [xor](https://en.wikipedia.org/wiki/Exclusive_or), [and](https://en.wikipedia.org/wiki/Logical_conjunction), [hash](https://en.wikipedia.org/wiki/Cryptographic_hash_function), [shifts](https://en.wikipedia.org/wiki/Bitwise_operation#Bit_shifts), shuffle, reverse, [checksum](https://en.wikipedia.org/wiki/Checksum), ... * **Validators** with the ability to arbitrarily combine multiple ones with logical expressions -* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base36](https://en.wikipedia.org/wiki/Base36), [base64](https://en.wikipedia.org/wiki/Base64), ... +* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base32](https://en.wikipedia.org/wiki/Base32), [base64](https://en.wikipedia.org/wiki/Base64), ... * **Immutable, Mutable and Read-Only** versions * **Handling Strings** with encoding and normalizing strings for arbitrary charset * **Utility Features** like `indexOf`, `count`, `isEmpty`, `bitAt`, `contains` ... @@ -312,7 +312,14 @@ Bytes.from(array).encodeBase64(); //"SpT9/x6v7Q==" Bytes.from(array).encodeBase64Url(); //"SpT9_x6v7Q==" ``` -Additionally the following encodings are supported: +also a **Base32** encoder (using the RFC4648 non-hex alphabet): + +```java +Bytes.parseBase32("MZXQ===="); +Bytes.from(array).encodeBase32(); + ``` + +Additionally the following radixe encodings are supported: ```java Bytes.from(array).encodeBinary(); //1110110110101111 diff --git a/src/main/java/at/favre/lib/bytes/BaseEncoding.java b/src/main/java/at/favre/lib/bytes/BaseEncoding.java new file mode 100644 index 0000000..e932036 --- /dev/null +++ b/src/main/java/at/favre/lib/bytes/BaseEncoding.java @@ -0,0 +1,208 @@ +/* + * Copyright 2018 Patrick Favre-Bulle + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package at.favre.lib.bytes; + +import java.io.IOException; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.util.Objects; + +/** + * Derived from Google Guava's common/io/ BaseEncoding + *

+ * See: https://github.com/google/guava/blob/v26.0/guava/src/com/google/common/io/BaseEncoding.java + */ +final class BaseEncoding implements BinaryToTextEncoding.EncoderDecoder { + private static final char ASCII_MAX = 127; + + static final Alphabet BASE32_RFC4848 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".toCharArray()); + static final char BASE32_RFC4848_PADDING = '='; + + private final Alphabet alphabet; + private final Character paddingChar; + + BaseEncoding(Alphabet alphabet, Character paddingChar) { + this.alphabet = Objects.requireNonNull(alphabet); + this.paddingChar = paddingChar; + } + + private int maxEncodedSize(int bytes) { + return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk); + } + + @Override + public String encode(byte[] array, ByteOrder byteOrder) { + return encode(array, 0, array.length); + } + + private String encode(byte[] bytes, int off, int len) { + StringBuilder result = new StringBuilder(maxEncodedSize(len)); + try { + encodeTo(result, bytes, off, len); + } catch (IOException impossible) { + throw new AssertionError(impossible); + } + return result.toString(); + } + + private void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { + Objects.requireNonNull(target); + for (int i = 0; i < len; i += alphabet.bytesPerChunk) { + encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); + } + } + + private void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { + Objects.requireNonNull(target); + long bitBuffer = 0; + for (int i = 0; i < len; ++i) { + bitBuffer |= bytes[off + i] & 0xFF; + bitBuffer <<= 8; // Add additional zero byte in the end. + } + // Position of first character is length of bitBuffer minus bitsPerChar. + final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; + int bitsProcessed = 0; + while (bitsProcessed < len * 8) { + int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; + target.append(alphabet.encode(charIndex)); + bitsProcessed += alphabet.bitsPerChar; + } + if (paddingChar != null) { + while (bitsProcessed < alphabet.bytesPerChunk * 8) { + target.append(paddingChar); + bitsProcessed += alphabet.bitsPerChar; + } + } + } + + private int maxDecodedSize(int chars) { + return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); + } + + private String trimTrailingPadding(CharSequence chars) { + Objects.requireNonNull(chars); + if (paddingChar == null) { + return chars.toString(); + } + int l; + for (l = chars.length() - 1; l >= 0; l--) { + if (chars.charAt(l) != paddingChar) { + break; + } + } + return chars.subSequence(0, l + 1).toString(); + } + + @Override + public byte[] decode(String encoded) { + encoded = trimTrailingPadding(encoded); + byte[] tmp = new byte[maxDecodedSize(encoded.length())]; + int len = decodeTo(tmp, encoded); + return extract(tmp, len); + } + + private static byte[] extract(byte[] result, int length) { + if (length == result.length) { + return result; + } else { + byte[] trunc = new byte[length]; + System.arraycopy(result, 0, trunc, 0, length); + return trunc; + } + } + + private int decodeTo(byte[] target, CharSequence chars) { + Objects.requireNonNull(target); + chars = trimTrailingPadding(chars); + int bytesWritten = 0; + for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { + long chunk = 0; + int charsProcessed = 0; + for (int i = 0; i < alphabet.charsPerChunk; i++) { + chunk <<= alphabet.bitsPerChar; + if (charIdx + i < chars.length()) { + chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); + } + } + final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; + for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { + target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); + } + } + return bytesWritten; + } + + private static final class Alphabet { + // this is meant to be immutable -- don't modify it! + private final char[] chars; + final int mask; + final int bitsPerChar; + final int charsPerChunk; + final int bytesPerChunk; + private final byte[] decodabet; + + Alphabet(char[] chars) { + this.chars = Objects.requireNonNull(chars); + this.bitsPerChar = log2(chars.length); + + /* + * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes + * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. + */ + int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); + this.charsPerChunk = 8 / gcd; + this.bytesPerChunk = bitsPerChar / gcd; + this.mask = chars.length - 1; + + byte[] decodabet = new byte[ASCII_MAX + 1]; + Arrays.fill(decodabet, (byte) -1); + for (int i = 0; i < chars.length; i++) { + char c = chars[i]; + decodabet[c] = (byte) i; + } + this.decodabet = decodabet; + } + + char encode(int bits) { + return chars[bits]; + } + + int decode(char ch) { + return (int) decodabet[ch]; + } + } + + private static int divide(int p, int q) { + int div = p / q; + int rem = p - q * div; // equal to p % q + + if (rem == 0) { + return div; + } + int signum = 1 | ((p ^ q) >> (Integer.SIZE - 1)); + return signum > 0 ? div + signum : div; + } + + private static int log2(int x) { + return (Integer.SIZE - 1) - Integer.numberOfLeadingZeros(x); + } +} diff --git a/src/main/java/at/favre/lib/bytes/Bytes.java b/src/main/java/at/favre/lib/bytes/Bytes.java index 8e89020..829ba69 100644 --- a/src/main/java/at/favre/lib/bytes/Bytes.java +++ b/src/main/java/at/favre/lib/bytes/Bytes.java @@ -156,8 +156,7 @@ public static Bytes wrap(byte[] array) { * @return new instance */ public static Bytes wrap(byte[] array, ByteOrder byteOrder) { - Objects.requireNonNull(array, "passed array must not be null"); - return new Bytes(array, byteOrder); + return new Bytes(Objects.requireNonNull(array, "passed array must not be null"), byteOrder); } /** @@ -168,8 +167,7 @@ public static Bytes wrap(byte[] array, ByteOrder byteOrder) { * @return new instance */ public static Bytes from(byte[] byteArrayToCopy) { - Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte"); - return wrap(Arrays.copyOf(byteArrayToCopy, byteArrayToCopy.length)); + return wrap(Arrays.copyOf(Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte"), byteArrayToCopy.length)); } /** @@ -318,8 +316,7 @@ public static Bytes from(int integer4byte) { * @return new instance */ public static Bytes from(int... intArray) { - Objects.requireNonNull(intArray, "must provide at least a single int"); - return wrap(Util.toByteArray(intArray)); + return wrap(Util.toByteArray(Objects.requireNonNull(intArray, "must provide at least a single int"))); } /** @@ -339,8 +336,7 @@ public static Bytes from(long long8byte) { * @return new instance */ public static Bytes from(long... longArray) { - Objects.requireNonNull(longArray, "must provide at least a single long"); - return wrap(Util.toByteArray(longArray)); + return wrap(Util.toByteArray(Objects.requireNonNull(longArray, "must provide at least a single long"))); } /** @@ -470,9 +466,7 @@ public static Bytes from(CharSequence utf8String, Normalizer.Form form) { * @return new instance */ public static Bytes from(CharSequence string, Charset charset) { - Objects.requireNonNull(string, "provided string must not be null"); - Objects.requireNonNull(charset, "provided charset must not be null"); - return wrap(string.toString().getBytes(charset)); + return wrap(Objects.requireNonNull(string, "provided string must not be null").toString().getBytes(Objects.requireNonNull(charset, "provided charset must not be null"))); } /** @@ -567,6 +561,18 @@ public static Bytes parseHex(String hexString) { return parse(hexString, new BinaryToTextEncoding.Hex()); } + /** + * Parsing of base32/RFC 4648 encoded byte arrays. + *

+ * Uses the RFC 4648 non-hex alphabet, see Base32 alphabet. + * + * @param base32Rfc4648String the encoded string + * @return decoded instance + */ + public static Bytes parseBase32(String base32Rfc4648String) { + return parse(base32Rfc4648String, new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING)); + } + /** * Parsing of base36 encoded byte arrays. *

@@ -1538,6 +1544,19 @@ public String encodeHex(boolean upperCase) { return encode(new BinaryToTextEncoding.Hex(upperCase)); } + /** + * Base32 RFC4648 string representation of the internal byte array (not Base32 hex alphabet extension) + *

+ * Example: MZXW6YQ= + *

+ * See RFC 4648 + * + * @return base32 string + */ + public String encodeBase32() { + return encode(new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING)); + } + /** * DO NOT USE AS DATA ENCODING, ONLY FOR NUMBERS! *

@@ -1597,8 +1616,7 @@ public String encodeUtf8() { * @return encoded string */ public String encodeCharset(Charset charset) { - Objects.requireNonNull(charset, "given charset must not be null"); - return new String(internalArray(), charset); + return new String(internalArray(), Objects.requireNonNull(charset, "given charset must not be null")); } /** diff --git a/src/test/java/at/favre/lib/bytes/BinaryToTextEncodingTest.java b/src/test/java/at/favre/lib/bytes/BinaryToTextEncodingTest.java index bf763e0..526d35e 100644 --- a/src/test/java/at/favre/lib/bytes/BinaryToTextEncodingTest.java +++ b/src/test/java/at/favre/lib/bytes/BinaryToTextEncodingTest.java @@ -40,6 +40,27 @@ public void decodeHexShouldFail() { new BinaryToTextEncoding.Hex(false).decode("AAI="); } + @Test + public void testBase16Reference() { + BinaryToTextEncoding.EncoderDecoder base16Encoding = new BinaryToTextEncoding.Hex(true); + // see: https://tools.ietf.org/html/rfc4648 + assertEquals("", base16Encoding.encode(Bytes.from("").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("66", base16Encoding.encode(Bytes.from("f").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("666F", base16Encoding.encode(Bytes.from("fo").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("666F6F", base16Encoding.encode(Bytes.from("foo").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("666F6F62", base16Encoding.encode(Bytes.from("foob").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("666F6F6261", base16Encoding.encode(Bytes.from("fooba").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("666F6F626172", base16Encoding.encode(Bytes.from("foobar").array(), ByteOrder.BIG_ENDIAN)); + + assertArrayEquals(Bytes.from("").array(), base16Encoding.decode("")); + assertArrayEquals(Bytes.from("f").array(), base16Encoding.decode("66")); + assertArrayEquals(Bytes.from("fo").array(), base16Encoding.decode("666F")); + assertArrayEquals(Bytes.from("foo").array(), base16Encoding.decode("666F6F")); + assertArrayEquals(Bytes.from("foob").array(), base16Encoding.decode("666F6F62")); + assertArrayEquals(Bytes.from("fooba").array(), base16Encoding.decode("666F6F6261")); + assertArrayEquals(Bytes.from("foobar").array(), base16Encoding.decode("666F6F626172")); + } + @Test public void encodeBaseRadix() { assertEquals("100211", new BinaryToTextEncoding.BaseRadixNumber(16).encode(new byte[]{16, 2, 17}, ByteOrder.BIG_ENDIAN)); @@ -89,35 +110,44 @@ public void encodeDecodeRadixZeros() { } @Test - public void encodeDecodeBase64() { + public void encodeDecodeBase64Random() { BinaryToTextEncoding.EncoderDecoder encoderPad = new BinaryToTextEncoding.Base64Encoding(false, true); BinaryToTextEncoding.EncoderDecoder encoderUrlPad = new BinaryToTextEncoding.Base64Encoding(true, true); BinaryToTextEncoding.EncoderDecoder encoderNoPad = new BinaryToTextEncoding.Base64Encoding(false, false); for (int i = 0; i < 32; i += 4) { - Bytes rnd = Bytes.random(i); - String encodedBigEndian = encoderPad.encode(rnd.array(), ByteOrder.BIG_ENDIAN); - byte[] decoded = encoderPad.decode(encodedBigEndian); - assertEquals(rnd, Bytes.wrap(decoded)); - - String encodedBigEndianUrlPad = encoderUrlPad.encode(rnd.array(), ByteOrder.BIG_ENDIAN); - byte[] decodedUrlPad = encoderPad.decode(encodedBigEndianUrlPad); - assertEquals(rnd, Bytes.wrap(decodedUrlPad)); - - String encodedBigEndianNoPad = encoderNoPad.encode(rnd.array(), ByteOrder.BIG_ENDIAN); - byte[] decodedNoPad = encoderPad.decode(encodedBigEndianNoPad); - assertEquals(rnd, Bytes.wrap(decodedNoPad)); + testRndEncodeDecode(encoderPad, i); + testRndEncodeDecode(encoderUrlPad, i); + testRndEncodeDecode(encoderNoPad, i); } } + @Test + public void testBase64Reference() { + BinaryToTextEncoding.EncoderDecoder base64Encoding = new BinaryToTextEncoding.Base64Encoding(); + // see: https://tools.ietf.org/html/rfc4648 + assertEquals("", base64Encoding.encode(Bytes.from("").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("Zg==", base64Encoding.encode(Bytes.from("f").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("Zm8=", base64Encoding.encode(Bytes.from("fo").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("Zm9v", base64Encoding.encode(Bytes.from("foo").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("Zm9vYg==", base64Encoding.encode(Bytes.from("foob").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("Zm9vYmE=", base64Encoding.encode(Bytes.from("fooba").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("Zm9vYmFy", base64Encoding.encode(Bytes.from("foobar").array(), ByteOrder.BIG_ENDIAN)); + + assertArrayEquals(Bytes.from("").array(), base64Encoding.decode("")); + assertArrayEquals(Bytes.from("f").array(), base64Encoding.decode("Zg==")); + assertArrayEquals(Bytes.from("fo").array(), base64Encoding.decode("Zm8=")); + assertArrayEquals(Bytes.from("foo").array(), base64Encoding.decode("Zm9v")); + assertArrayEquals(Bytes.from("foob").array(), base64Encoding.decode("Zm9vYg==")); + assertArrayEquals(Bytes.from("fooba").array(), base64Encoding.decode("Zm9vYmE=")); + assertArrayEquals(Bytes.from("foobar").array(), base64Encoding.decode("Zm9vYmFy")); + } + @Test public void encodeDecodeHex() { for (int i = 4; i < 32; i += 4) { - Bytes rnd = Bytes.random(i); - BinaryToTextEncoding.EncoderDecoder encoding = new BinaryToTextEncoding.Hex(); - String encodedBigEndian = encoding.encode(rnd.array(), ByteOrder.BIG_ENDIAN); - byte[] decoded = encoding.decode(encodedBigEndian); - assertEquals(rnd, Bytes.wrap(decoded)); + testRndEncodeDecode(new BinaryToTextEncoding.Hex(), i); + testRndEncodeDecode(new BinaryToTextEncoding.Hex(true), i); } } @@ -168,4 +198,57 @@ public void encodeRadixIllegalTooLow() { public void encodeRadixIllegalTooLow2() { new BinaryToTextEncoding.BaseRadixNumber(0); } + + @Test + public void testEncodeDecodeRndBase32() { + BaseEncoding base32Encoding = new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING); + for (int i = 0; i < 128; i++) { + testRndEncodeDecode(base32Encoding, i); + } + } + + private byte[] testRndEncodeDecode(BinaryToTextEncoding.EncoderDecoder encoder, int dataLength) { + Bytes rnd = Bytes.random(dataLength); + String encoded = encoder.encode(rnd.array(), ByteOrder.BIG_ENDIAN); + byte[] decoded = encoder.decode(encoded); + assertEquals(rnd, Bytes.wrap(decoded)); + return decoded; + } + + @Test + public void testBase32Reference() { + BinaryToTextEncoding.EncoderDecoder base32Encoding = new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING); + // see: https://tools.ietf.org/html/rfc4648 + assertEquals("", base32Encoding.encode(Bytes.from("").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("MY======", base32Encoding.encode(Bytes.from("f").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("MZXQ====", base32Encoding.encode(Bytes.from("fo").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("MZXW6===", base32Encoding.encode(Bytes.from("foo").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("MZXW6YQ=", base32Encoding.encode(Bytes.from("foob").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("MZXW6YTB", base32Encoding.encode(Bytes.from("fooba").array(), ByteOrder.BIG_ENDIAN)); + assertEquals("MZXW6YTBOI======", base32Encoding.encode(Bytes.from("foobar").array(), ByteOrder.BIG_ENDIAN)); + + assertArrayEquals(Bytes.from("").array(), base32Encoding.decode("")); + assertArrayEquals(Bytes.from("f").array(), base32Encoding.decode("MY======")); + assertArrayEquals(Bytes.from("fo").array(), base32Encoding.decode("MZXQ====")); + assertArrayEquals(Bytes.from("foo").array(), base32Encoding.decode("MZXW6===")); + assertArrayEquals(Bytes.from("foob").array(), base32Encoding.decode("MZXW6YQ=")); + assertArrayEquals(Bytes.from("fooba").array(), base32Encoding.decode("MZXW6YTB")); + assertArrayEquals(Bytes.from("foobar").array(), base32Encoding.decode("MZXW6YTBOI======")); + } + + @Test + public void testBase64BigData() { + for (int i = 0; i < 5; i++) { + byte[] out = testRndEncodeDecode(new BinaryToTextEncoding.Base64Encoding(), 1024 * 1024); + System.out.println(out.length); + } + } + + @Test + public void testBase32BigData() { + for (int i = 0; i < 5; i++) { + byte[] out = testRndEncodeDecode(new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING), 1024 * 1024); + System.out.println(out.length); + } + } } diff --git a/src/test/java/at/favre/lib/bytes/BytesParseAndEncodingTest.java b/src/test/java/at/favre/lib/bytes/BytesParseAndEncodingTest.java index 5b64ec8..68fbcf8 100644 --- a/src/test/java/at/favre/lib/bytes/BytesParseAndEncodingTest.java +++ b/src/test/java/at/favre/lib/bytes/BytesParseAndEncodingTest.java @@ -83,6 +83,16 @@ public void encodeBase64Url() { assertEquals("SpT9_x6v7Q==", Bytes.from(encodingExample).encodeBase64Url()); } + @Test + public void parseBase32() { + assertArrayEquals(encodingExample, Bytes.parseBase32("JKKP37Y6V7WQ====").array()); + } + + @Test + public void encodeBase32() { + assertEquals("JKKP37Y6V7WQ====", Bytes.from(encodingExample).encodeBase32()); + } + @Test public void encodeBinary() { byte[] defaultArray = new byte[]{(byte) 0xA0, (byte) 0xE1}; From 3615d7712e47adb7dd471ad86bffa6bcc71926de Mon Sep 17 00:00:00 2001 From: pfavre Date: Sun, 12 Aug 2018 22:57:23 +0200 Subject: [PATCH 2/3] Add JMH encoding benchmark --- pom.xml | 12 +++ .../java/at/favre/lib/bytes/BaseEncoding.java | 2 +- .../favre/lib/bytes/EncodingJmhBenchmark.java | 99 +++++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 src/test/java/at/favre/lib/bytes/EncodingJmhBenchmark.java diff --git a/pom.xml b/pom.xml index 9283937..c47c5a9 100644 --- a/pom.xml +++ b/pom.xml @@ -215,6 +215,18 @@ 4.12 test + + org.openjdk.jmh + jmh-core + 1.21 + test + + + org.openjdk.jmh + jmh-generator-annprocess + 1.21 + test + diff --git a/src/main/java/at/favre/lib/bytes/BaseEncoding.java b/src/main/java/at/favre/lib/bytes/BaseEncoding.java index e932036..163285c 100644 --- a/src/main/java/at/favre/lib/bytes/BaseEncoding.java +++ b/src/main/java/at/favre/lib/bytes/BaseEncoding.java @@ -151,7 +151,7 @@ private int decodeTo(byte[] target, CharSequence chars) { return bytesWritten; } - private static final class Alphabet { + static final class Alphabet { // this is meant to be immutable -- don't modify it! private final char[] chars; final int mask; diff --git a/src/test/java/at/favre/lib/bytes/EncodingJmhBenchmark.java b/src/test/java/at/favre/lib/bytes/EncodingJmhBenchmark.java new file mode 100644 index 0000000..2792f78 --- /dev/null +++ b/src/test/java/at/favre/lib/bytes/EncodingJmhBenchmark.java @@ -0,0 +1,99 @@ +/* + * Copyright 2018 Patrick Favre-Bulle + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package at.favre.lib.bytes; + +import org.openjdk.jmh.annotations.*; + +import java.nio.ByteOrder; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +/* +# JMH version: 1.21 +# VM version: JDK 1.8.0_172, Java HotSpot(TM) 64-Bit Server VM, 25.172-b11 +# i7 7700K / 24G + +Benchmark (byteLength) Mode Cnt Score Error Units +EncodingJmhBenchmark.encodeBase64Guava 1 thrpt 4 10361634,745 ± 152739,710 ops/s +EncodingJmhBenchmark.encodeBase64Guava 16 thrpt 4 4360485,804 ± 44729,417 ops/s +EncodingJmhBenchmark.encodeBase64Guava 128 thrpt 4 790407,010 ± 8095,476 ops/s +EncodingJmhBenchmark.encodeBase64Guava 512 thrpt 4 192448,674 ± 2196,035 ops/s +EncodingJmhBenchmark.encodeBase64Guava 1000000 thrpt 4 102,780 ± 2,949 ops/s +EncodingJmhBenchmark.encodeBase64Okio 1 thrpt 4 12658987,399 ± 361955,366 ops/s +EncodingJmhBenchmark.encodeBase64Okio 16 thrpt 4 7059404,777 ± 293665,348 ops/s +EncodingJmhBenchmark.encodeBase64Okio 128 thrpt 4 1749131,031 ± 85915,325 ops/s +EncodingJmhBenchmark.encodeBase64Okio 512 thrpt 4 239764,488 ± 6204,540 ops/s +EncodingJmhBenchmark.encodeBase64Okio 1000000 thrpt 4 107,868 ± 0,569 ops/s + */ + +@State(Scope.Thread) +@Fork(1) +@Warmup(iterations = 2, time = 2) +@Measurement(iterations = 4, time = 5) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +public class EncodingJmhBenchmark { + + @Param({"1", "16", "128", "512", "1000000"}) + private int byteLength; + private Map rndMap; + + private BinaryToTextEncoding.EncoderDecoder base64Okio; + private BinaryToTextEncoding.EncoderDecoder base64Guava; + private Random random; + + @Setup(Level.Trial) + public void setup() { + random = new Random(); + base64Okio = new BinaryToTextEncoding.Base64Encoding(); + base64Guava = new BaseEncoding(new BaseEncoding.Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray()), BaseEncoding.BASE32_RFC4848_PADDING); + rndMap = new HashMap<>(); + int[] lengths = new int[]{1, 16, 128, 512, 1000000}; + for (int length : lengths) { + int count = 10; + rndMap.put(length, new Bytes[count]); + for (int i = 0; i < count; i++) { + rndMap.get(length)[i] = Bytes.random(length); + } + } + } + + @Benchmark + public byte[] encodeBase64Okio() { + return encodeDecode(base64Okio); + } + + @Benchmark + public byte[] encodeBase64Guava() { + return encodeDecode(base64Guava); + } + + private byte[] encodeDecode(BinaryToTextEncoding.EncoderDecoder encoder) { + Bytes[] bytes = rndMap.get(byteLength); + int rndNum = random.nextInt(bytes.length); + + String encoded = encoder.encode(bytes[rndNum].array(), ByteOrder.BIG_ENDIAN); + return encoder.decode(encoded); + } +} From f88de4915c4c0f08780e8b671f8a824b37fc967f Mon Sep 17 00:00:00 2001 From: pfavre Date: Sun, 12 Aug 2018 23:02:49 +0200 Subject: [PATCH 3/3] Add better doc and update Changelog --- CHANGELOG | 8 ++++++++ src/main/java/at/favre/lib/bytes/BaseEncoding.java | 2 ++ 2 files changed, 10 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index ebf5d7b..2896301 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,10 @@ * add radix encoding/parsing and fix radix tests #6, #20 * add support for Base32 RFC4648 non-hex alphabet encoding/parsing #21 +### Deprecations (will be removed in v1.0+) + +* `parseBase36()`/`encodeBase36()` - use `parseRadix(36)`/`encodeRadix(36)` instead + ## v0.7.1 * sign AFTER ProGuard so optimized version has correct jar signature @@ -30,6 +34,10 @@ * add constructor/converter from/to UUID #9 * add `empty()` constructor, creating empty byte array +### Deprecations (will be removed in v1.0+) + +* `toObjectArray()` renamed to `toBoxedArray()` + ## v0.5.0 * better resource handling for compression diff --git a/src/main/java/at/favre/lib/bytes/BaseEncoding.java b/src/main/java/at/favre/lib/bytes/BaseEncoding.java index 163285c..667242c 100644 --- a/src/main/java/at/favre/lib/bytes/BaseEncoding.java +++ b/src/main/java/at/favre/lib/bytes/BaseEncoding.java @@ -27,6 +27,8 @@ import java.util.Objects; /** + * Encoder which supports arbitrary alphabet and padding. + * * Derived from Google Guava's common/io/ BaseEncoding *

* See: https://github.com/google/guava/blob/v26.0/guava/src/com/google/common/io/BaseEncoding.java