Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
## v0.8.0

* add radix encoding/parsing and fix radix tests #6, #20
* add support for Base32 RFC4648 non-hex alphabet encoding/parsing #21

### Deprecations (will be removed in v1.0+)

* `parseBase36()`/`encodeBase36()` - use `parseRadix(36)`/`encodeRadix(36)` instead

## v0.7.1

Expand All @@ -29,6 +34,10 @@
* add constructor/converter from/to UUID #9
* add `empty()` constructor, creating empty byte array

### Deprecations (will be removed in v1.0+)

* `toObjectArray()` renamed to `toBoxedArray()`

## v0.5.0

* better resource handling for compression
Expand Down
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ It's main features include:
* **Creation** from a wide variety of sources: multiple arrays, integers, [streams](https://docs.oracle.com/javase/7/docs/api/java/io/InputStream.html), random, strings, files, uuid, ...
* **Transformation** with many built-in: append, [xor](https://en.wikipedia.org/wiki/Exclusive_or), [and](https://en.wikipedia.org/wiki/Logical_conjunction), [hash](https://en.wikipedia.org/wiki/Cryptographic_hash_function), [shifts](https://en.wikipedia.org/wiki/Bitwise_operation#Bit_shifts), shuffle, reverse, [checksum](https://en.wikipedia.org/wiki/Checksum), ...
* **Validators** with the ability to arbitrarily combine multiple ones with logical expressions
* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base36](https://en.wikipedia.org/wiki/Base36), [base64](https://en.wikipedia.org/wiki/Base64), ...
* **Parsing and Encoding** in most common binary-to-text-encodings: [hex](https://en.wikipedia.org/wiki/Hexadecimal), [base32](https://en.wikipedia.org/wiki/Base32), [base64](https://en.wikipedia.org/wiki/Base64), ...
* **Immutable, Mutable and Read-Only** versions
* **Handling Strings** with encoding and normalizing strings for arbitrary charset
* **Utility Features** like `indexOf`, `count`, `isEmpty`, `bitAt`, `contains` ...
Expand Down Expand Up @@ -312,7 +312,14 @@ Bytes.from(array).encodeBase64(); //"SpT9/x6v7Q=="
Bytes.from(array).encodeBase64Url(); //"SpT9_x6v7Q=="
```

Additionally the following encodings are supported:
also a **Base32** encoder (using the RFC4648 non-hex alphabet):

```java
Bytes.parseBase32("MZXQ====");
Bytes.from(array).encodeBase32();
```

Additionally the following radixe encodings are supported:

```java
Bytes.from(array).encodeBinary(); //1110110110101111
Expand Down
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,18 @@
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.21</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.21</version>
<scope>test</scope>
</dependency>
</dependencies>
<developers>
<developer>
Expand Down
210 changes: 210 additions & 0 deletions src/main/java/at/favre/lib/bytes/BaseEncoding.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* Copyright 2018 Patrick Favre-Bulle
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package at.favre.lib.bytes;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.Objects;

/**
* Encoder which supports arbitrary alphabet and padding.
*
* Derived from Google Guava's common/io/ BaseEncoding
* <p>
* See: https://github.com/google/guava/blob/v26.0/guava/src/com/google/common/io/BaseEncoding.java
*/
final class BaseEncoding implements BinaryToTextEncoding.EncoderDecoder {
private static final char ASCII_MAX = 127;

static final Alphabet BASE32_RFC4848 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".toCharArray());
static final char BASE32_RFC4848_PADDING = '=';

private final Alphabet alphabet;
private final Character paddingChar;

BaseEncoding(Alphabet alphabet, Character paddingChar) {
this.alphabet = Objects.requireNonNull(alphabet);
this.paddingChar = paddingChar;
}

private int maxEncodedSize(int bytes) {
return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk);
}

@Override
public String encode(byte[] array, ByteOrder byteOrder) {
return encode(array, 0, array.length);
}

private String encode(byte[] bytes, int off, int len) {
StringBuilder result = new StringBuilder(maxEncodedSize(len));
try {
encodeTo(result, bytes, off, len);
} catch (IOException impossible) {
throw new AssertionError(impossible);
}
return result.toString();
}

private void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
Objects.requireNonNull(target);
for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
}
}

private void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
Objects.requireNonNull(target);
long bitBuffer = 0;
for (int i = 0; i < len; ++i) {
bitBuffer |= bytes[off + i] & 0xFF;
bitBuffer <<= 8; // Add additional zero byte in the end.
}
// Position of first character is length of bitBuffer minus bitsPerChar.
final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
int bitsProcessed = 0;
while (bitsProcessed < len * 8) {
int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
target.append(alphabet.encode(charIndex));
bitsProcessed += alphabet.bitsPerChar;
}
if (paddingChar != null) {
while (bitsProcessed < alphabet.bytesPerChunk * 8) {
target.append(paddingChar);
bitsProcessed += alphabet.bitsPerChar;
}
}
}

private int maxDecodedSize(int chars) {
return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
}

private String trimTrailingPadding(CharSequence chars) {
Objects.requireNonNull(chars);
if (paddingChar == null) {
return chars.toString();
}
int l;
for (l = chars.length() - 1; l >= 0; l--) {
if (chars.charAt(l) != paddingChar) {
break;
}
}
return chars.subSequence(0, l + 1).toString();
}

@Override
public byte[] decode(String encoded) {
encoded = trimTrailingPadding(encoded);
byte[] tmp = new byte[maxDecodedSize(encoded.length())];
int len = decodeTo(tmp, encoded);
return extract(tmp, len);
}

private static byte[] extract(byte[] result, int length) {
if (length == result.length) {
return result;
} else {
byte[] trunc = new byte[length];
System.arraycopy(result, 0, trunc, 0, length);
return trunc;
}
}

private int decodeTo(byte[] target, CharSequence chars) {
Objects.requireNonNull(target);
chars = trimTrailingPadding(chars);
int bytesWritten = 0;
for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
long chunk = 0;
int charsProcessed = 0;
for (int i = 0; i < alphabet.charsPerChunk; i++) {
chunk <<= alphabet.bitsPerChar;
if (charIdx + i < chars.length()) {
chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
}
}
final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
}
}
return bytesWritten;
}

static final class Alphabet {
// this is meant to be immutable -- don't modify it!
private final char[] chars;
final int mask;
final int bitsPerChar;
final int charsPerChunk;
final int bytesPerChunk;
private final byte[] decodabet;

Alphabet(char[] chars) {
this.chars = Objects.requireNonNull(chars);
this.bitsPerChar = log2(chars.length);

/*
* e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
* for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
*/
int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
this.charsPerChunk = 8 / gcd;
this.bytesPerChunk = bitsPerChar / gcd;
this.mask = chars.length - 1;

byte[] decodabet = new byte[ASCII_MAX + 1];
Arrays.fill(decodabet, (byte) -1);
for (int i = 0; i < chars.length; i++) {
char c = chars[i];
decodabet[c] = (byte) i;
}
this.decodabet = decodabet;
}

char encode(int bits) {
return chars[bits];
}

int decode(char ch) {
return (int) decodabet[ch];
}
}

private static int divide(int p, int q) {
int div = p / q;
int rem = p - q * div; // equal to p % q

if (rem == 0) {
return div;
}
int signum = 1 | ((p ^ q) >> (Integer.SIZE - 1));
return signum > 0 ? div + signum : div;
}

private static int log2(int x) {
return (Integer.SIZE - 1) - Integer.numberOfLeadingZeros(x);
}
}
44 changes: 31 additions & 13 deletions src/main/java/at/favre/lib/bytes/Bytes.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ public static Bytes wrap(byte[] array) {
* @return new instance
*/
public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
Objects.requireNonNull(array, "passed array must not be null");
return new Bytes(array, byteOrder);
return new Bytes(Objects.requireNonNull(array, "passed array must not be null"), byteOrder);
}

/**
Expand All @@ -168,8 +167,7 @@ public static Bytes wrap(byte[] array, ByteOrder byteOrder) {
* @return new instance
*/
public static Bytes from(byte[] byteArrayToCopy) {
Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte");
return wrap(Arrays.copyOf(byteArrayToCopy, byteArrayToCopy.length));
return wrap(Arrays.copyOf(Objects.requireNonNull(byteArrayToCopy, "must at least pass a single byte"), byteArrayToCopy.length));
}

/**
Expand Down Expand Up @@ -318,8 +316,7 @@ public static Bytes from(int integer4byte) {
* @return new instance
*/
public static Bytes from(int... intArray) {
Objects.requireNonNull(intArray, "must provide at least a single int");
return wrap(Util.toByteArray(intArray));
return wrap(Util.toByteArray(Objects.requireNonNull(intArray, "must provide at least a single int")));
}

/**
Expand All @@ -339,8 +336,7 @@ public static Bytes from(long long8byte) {
* @return new instance
*/
public static Bytes from(long... longArray) {
Objects.requireNonNull(longArray, "must provide at least a single long");
return wrap(Util.toByteArray(longArray));
return wrap(Util.toByteArray(Objects.requireNonNull(longArray, "must provide at least a single long")));
}

/**
Expand Down Expand Up @@ -470,9 +466,7 @@ public static Bytes from(CharSequence utf8String, Normalizer.Form form) {
* @return new instance
*/
public static Bytes from(CharSequence string, Charset charset) {
Objects.requireNonNull(string, "provided string must not be null");
Objects.requireNonNull(charset, "provided charset must not be null");
return wrap(string.toString().getBytes(charset));
return wrap(Objects.requireNonNull(string, "provided string must not be null").toString().getBytes(Objects.requireNonNull(charset, "provided charset must not be null")));
}

/**
Expand Down Expand Up @@ -567,6 +561,18 @@ public static Bytes parseHex(String hexString) {
return parse(hexString, new BinaryToTextEncoding.Hex());
}

/**
* Parsing of base32/RFC 4648 encoded byte arrays.
* <p>
* Uses the RFC 4648 non-hex alphabet, see <a href="https://en.wikipedia.org/wiki/Base32#RFC_4648_Base32_alphabet">Base32 alphabet</a>.
*
* @param base32Rfc4648String the encoded string
* @return decoded instance
*/
public static Bytes parseBase32(String base32Rfc4648String) {
return parse(base32Rfc4648String, new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
}

/**
* Parsing of base36 encoded byte arrays.
* <p>
Expand Down Expand Up @@ -1538,6 +1544,19 @@ public String encodeHex(boolean upperCase) {
return encode(new BinaryToTextEncoding.Hex(upperCase));
}

/**
* Base32 RFC4648 string representation of the internal byte array (not Base32 hex alphabet extension)
* <p>
* Example: <code>MZXW6YQ=</code>
* <p>
* See <a href="https://tools.ietf.org/html/rfc4648">RFC 4648</a>
*
* @return base32 string
*/
public String encodeBase32() {
return encode(new BaseEncoding(BaseEncoding.BASE32_RFC4848, BaseEncoding.BASE32_RFC4848_PADDING));
}

/**
* DO NOT USE AS DATA ENCODING, ONLY FOR NUMBERS!
* <p>
Expand Down Expand Up @@ -1597,8 +1616,7 @@ public String encodeUtf8() {
* @return encoded string
*/
public String encodeCharset(Charset charset) {
Objects.requireNonNull(charset, "given charset must not be null");
return new String(internalArray(), charset);
return new String(internalArray(), Objects.requireNonNull(charset, "given charset must not be null"));
}

/**
Expand Down
Loading