From 4774deb5a710ce39cc88083f0b43e49b10df0d6b Mon Sep 17 00:00:00 2001
From: Krzysztof Kocel
Date: Fri, 12 Apr 2024 15:19:48 +0200
Subject: [PATCH 1/4] add custom alphabet for Base64
---
.../apache/commons/codec/binary/Base64.java | 92 +++++++++++++++++--
.../commons/codec/binary/Base64Test.java | 50 ++++++++++
2 files changed, 132 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/apache/commons/codec/binary/Base64.java b/src/main/java/org/apache/commons/codec/binary/Base64.java
index 61e411ae7e..d460b6f933 100644
--- a/src/main/java/org/apache/commons/codec/binary/Base64.java
+++ b/src/main/java/org/apache/commons/codec/binary/Base64.java
@@ -18,6 +18,7 @@
package org.apache.commons.codec.binary;
import java.math.BigInteger;
+import java.util.Arrays;
import java.util.Objects;
import org.apache.commons.codec.CodecPolicy;
@@ -106,7 +107,7 @@ public class Base64 extends BaseNCodec {
* https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
*
*/
- private static final byte[] DECODE_TABLE = {
+ private static final byte[] DEFAULT_DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
@@ -347,7 +348,7 @@ public static boolean isArrayByteBase64(final byte[] arrayOctet) {
* @since 1.4
*/
public static boolean isBase64(final byte octet) {
- return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
+ return octet == PAD_DEFAULT || octet >= 0 && octet < DEFAULT_DECODE_TABLE.length && DEFAULT_DECODE_TABLE[octet] != -1;
}
/**
@@ -415,14 +416,17 @@ static byte[] toIntegerBytes(final BigInteger bigInt) {
}
/**
- * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
+ * Encode table to use: either STANDARD or URL_SAFE or custom.
+ * Note: the DECODE_TABLE above remains static because it is able
* to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
* between the two modes.
*/
private final byte[] encodeTable;
- /** Only one decode table currently; keep for consistency with Base32 code. */
- private final byte[] decodeTable = DECODE_TABLE;
+ /**
+ * Decode table to use
+ */
+ private final byte[] decodeTable;
/**
* Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
@@ -552,7 +556,18 @@ public Base64(final int lineLength, final byte[] lineSeparator) {
* @since 1.4
*/
public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
- this(lineLength, lineSeparator, urlSafe, DECODING_POLICY_DEFAULT);
+ this(lineLength, lineSeparator, urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE, DECODING_POLICY_DEFAULT);
+ }
+
+
+ /**
+ * Creates a Base64 codec used for decoding and encoding with non-standard encodeTable-table
+ *
+ * @param encodeTable
+ * The manual encodeTable - a byte array of 64 chars
+ */
+ public Base64(byte[] encodeTable) {
+ this(0, CHUNK_SEPARATOR, encodeTable, DECODING_POLICY_DEFAULT);
}
/**
@@ -583,13 +598,54 @@ public Base64(final int lineLength, final byte[] lineSeparator, final boolean ur
* Thrown when the {@code lineSeparator} contains Base64 characters.
* @since 1.15
*/
- public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe,
+ public Base64(final int lineLength, final byte[] lineSeparator, boolean urlSafe,
+ final CodecPolicy decodingPolicy){
+ this(lineLength, lineSeparator,urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE, decodingPolicy);
+ }
+
+ /**
+ * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
+ *
+ * When encoding the line length and line separator are given in the constructor, and the encoding table is
+ * STANDARD_ENCODE_TABLE.
+ *
+ *
+ * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
+ *
+ *
+ * When decoding all variants are supported.
+ *
+ *
+ * @param lineLength
+ * Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
+ * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ * decoding.
+ * @param lineSeparator
+ * Each line of encoded data will end with this sequence of bytes.
+ * @param encodeTable
+ * The manual encodeTable - a byte array of 64 chars.
+ * @param decodingPolicy The decoding policy.
+ * @throws IllegalArgumentException
+ * Thrown when the {@code lineSeparator} contains Base64 characters.
+ * @since 1.15
+ */
+ public Base64(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable,
final CodecPolicy decodingPolicy) {
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
lineLength,
lineSeparator == null ? 0 : lineSeparator.length,
PAD_DEFAULT,
decodingPolicy);
+ this.encodeTable = encodeTable;
+
+ if (encodeTable == STANDARD_ENCODE_TABLE || encodeTable == URL_SAFE_ENCODE_TABLE) {
+ decodeTable = DEFAULT_DECODE_TABLE;
+ } else {
+ if (encodeTable.length != 64) {
+ throw new IllegalArgumentException("encodeTable must be exactly 64 bytes long");
+ }
+ decodeTable = calculateDecodeTable(encodeTable);
+ }
// TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
// @see test case Base64Test.testConstructors()
if (lineSeparator != null) {
@@ -609,7 +665,6 @@ public Base64(final int lineLength, final byte[] lineSeparator, final boolean ur
this.lineSeparator = null;
}
this.decodeSize = this.encodeSize - 1;
- this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
}
// Implementation of the Encoder Interface
@@ -655,8 +710,8 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont
context.eof = true;
break;
}
- if (b >= 0 && b < DECODE_TABLE.length) {
- final int result = DECODE_TABLE[b];
+ if (b >= 0 && b < decodeTable.length) {
+ final int result = decodeTable[b];
if (result >= 0) {
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
@@ -804,6 +859,23 @@ protected boolean isInAlphabet(final byte octet) {
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
}
+ /**
+ * calculates a decode table for a given encode table
+ *
+ * @param encodeTable that is used to determine decode lookup table
+ * @return decodeTable
+ */
+ private byte[] calculateDecodeTable(byte[] encodeTable) {
+ byte[] decodeTable = new byte[256];
+ for (int i=0; i < 256; i++) {
+ decodeTable[i] = -1;
+ }
+ for (int i=0; i < encodeTable.length; i++) {
+ decodeTable[encodeTable[i]] = (byte) i;
+ }
+ return decodeTable;
+ }
+
/**
* Returns our current encode mode. True if we're URL-SAFE, false otherwise.
*
diff --git a/src/test/java/org/apache/commons/codec/binary/Base64Test.java b/src/test/java/org/apache/commons/codec/binary/Base64Test.java
index 7919ea8859..6626a9188d 100644
--- a/src/test/java/org/apache/commons/codec/binary/Base64Test.java
+++ b/src/test/java/org/apache/commons/codec/binary/Base64Test.java
@@ -167,6 +167,56 @@ public void testBase64() {
assertEquals("Hello World", decodeString, "decode hello world");
}
+ @Test
+ public void testCustomEncodingAlphabet_illegal() {
+ byte[] encodeTable = {
+ '.', '-', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'
+ };
+ assertThrows(IllegalArgumentException.class, () -> new Base64(encodeTable));
+ }
+
+ @Test
+ public void testCustomEncodingAlphabet() {
+ // created a duplicate of STANDARD_ENCODE_TABLE and replaced two chars with
+ // custom values not already present in table
+ // A => . B => -
+ byte[] encodeTable = {
+ '.', '-', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
+ };
+
+ // two instances: one with default table and one with adjusted encoding table
+ Base64 b64 = new Base64();
+ Base64 b64customEncoding = new Base64(encodeTable);
+
+ final String content = "! Hello World - this ยง$%";
+
+ byte[] encodedBytes = b64.encode(StringUtils.getBytesUtf8(content));
+ String encodedContent = StringUtils.newStringUtf8(encodedBytes);
+
+ byte[] encodedBytesCustom = b64customEncoding.encode(StringUtils.getBytesUtf8(content));
+ String encodedContentCustom = StringUtils.newStringUtf8(encodedBytesCustom);
+
+ assertTrue(
+ encodedContent.contains("A") && encodedContent.contains("B"), "testing precondition not met - ecodedContent should contain parts of modified table");
+
+ assertEquals(
+ encodedContent
+ .replaceAll("A", ".").replaceAll("B", "-") // replace alphabet adjustments
+ .replaceAll("=", "") // remove padding (not default alphabet)
+ , encodedContentCustom);
+
+
+ // try decode encoded content
+ final byte[] decode = b64customEncoding.decode(encodedBytesCustom);
+ final String decodeString = StringUtils.newStringUtf8(decode);
+
+ assertEquals(content, decodeString);
+ }
+
@Test
public void testBase64AtBufferEnd() {
testBase64InBuffer(100, 0);
From 4e2f46acec5889c07fecb5358e910bbc4963f78f Mon Sep 17 00:00:00 2001
From: Krzysztof Kocel
Date: Sun, 14 Apr 2024 21:51:33 +0200
Subject: [PATCH 2/4] checkstyle
---
src/main/java/org/apache/commons/codec/binary/Base64.java | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/main/java/org/apache/commons/codec/binary/Base64.java b/src/main/java/org/apache/commons/codec/binary/Base64.java
index 96c5a0a3d1..9e13e71764 100644
--- a/src/main/java/org/apache/commons/codec/binary/Base64.java
+++ b/src/main/java/org/apache/commons/codec/binary/Base64.java
@@ -597,8 +597,8 @@ public Base64(byte[] encodeTable) {
* @since 1.15
*/
public Base64(final int lineLength, final byte[] lineSeparator, boolean urlSafe,
- final CodecPolicy decodingPolicy){
- this(lineLength, lineSeparator,urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE, decodingPolicy);
+ final CodecPolicy decodingPolicy) {
+ this(lineLength, lineSeparator, urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE, decodingPolicy);
}
/**
@@ -865,10 +865,10 @@ protected boolean isInAlphabet(final byte octet) {
*/
private byte[] calculateDecodeTable(byte[] encodeTable) {
byte[] decodeTable = new byte[256];
- for (int i=0; i < 256; i++) {
+ for (int i = 0; i < 256; i++) {
decodeTable[i] = -1;
}
- for (int i=0; i < encodeTable.length; i++) {
+ for (int i = 0; i < encodeTable.length; i++) {
decodeTable[encodeTable[i]] = (byte) i;
}
return decodeTable;
From 8061fb1ed55c75bd574b9d0f8956f6c4fe7f5946 Mon Sep 17 00:00:00 2001
From: Krzysztof Kocel
Date: Mon, 15 Apr 2024 10:18:10 +0200
Subject: [PATCH 3/4] revert table name, improve javadoc, add constants
---
.../apache/commons/codec/binary/Base64.java | 22 +++++++++----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/main/java/org/apache/commons/codec/binary/Base64.java b/src/main/java/org/apache/commons/codec/binary/Base64.java
index 9e13e71764..adf4ebeb54 100644
--- a/src/main/java/org/apache/commons/codec/binary/Base64.java
+++ b/src/main/java/org/apache/commons/codec/binary/Base64.java
@@ -64,6 +64,8 @@ public class Base64 extends BaseNCodec {
private static final int BITS_PER_ENCODED_BYTE = 6;
private static final int BYTES_PER_UNENCODED_BLOCK = 3;
private static final int BYTES_PER_ENCODED_BLOCK = 4;
+ private static final int ALPHABET_LENGTH = 64;
+ private static final int DECODING_TABLE_LENGTH = 256;
/**
* This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
@@ -107,7 +109,7 @@ public class Base64 extends BaseNCodec {
* https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
*
*/
- private static final byte[] DEFAULT_DECODE_TABLE = {
+ private static final byte[] DECODE_TABLE = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
@@ -346,7 +348,7 @@ public static boolean isArrayByteBase64(final byte[] arrayOctet) {
* @since 1.4
*/
public static boolean isBase64(final byte octet) {
- return octet == PAD_DEFAULT || octet >= 0 && octet < DEFAULT_DECODE_TABLE.length && DEFAULT_DECODE_TABLE[octet] != -1;
+ return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
}
/**
@@ -422,7 +424,7 @@ static byte[] toIntegerBytes(final BigInteger bigInt) {
private final byte[] encodeTable;
/**
- * Decode table to use
+ * Decode table to use.
*/
private final byte[] decodeTable;
@@ -625,7 +627,7 @@ public Base64(final int lineLength, final byte[] lineSeparator, boolean urlSafe,
* @param decodingPolicy The decoding policy.
* @throws IllegalArgumentException
* Thrown when the {@code lineSeparator} contains Base64 characters.
- * @since 1.15
+ * @since 1.17.0
*/
public Base64(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable,
final CodecPolicy decodingPolicy) {
@@ -637,9 +639,9 @@ public Base64(final int lineLength, final byte[] lineSeparator, final byte[] enc
this.encodeTable = encodeTable;
if (encodeTable == STANDARD_ENCODE_TABLE || encodeTable == URL_SAFE_ENCODE_TABLE) {
- decodeTable = DEFAULT_DECODE_TABLE;
+ decodeTable = DECODE_TABLE;
} else {
- if (encodeTable.length != 64) {
+ if (encodeTable.length != ALPHABET_LENGTH) {
throw new IllegalArgumentException("encodeTable must be exactly 64 bytes long");
}
decodeTable = calculateDecodeTable(encodeTable);
@@ -858,16 +860,14 @@ protected boolean isInAlphabet(final byte octet) {
}
/**
- * calculates a decode table for a given encode table
+ * Calculates a decode table for a given encode table.
*
* @param encodeTable that is used to determine decode lookup table
* @return decodeTable
*/
private byte[] calculateDecodeTable(byte[] encodeTable) {
- byte[] decodeTable = new byte[256];
- for (int i = 0; i < 256; i++) {
- decodeTable[i] = -1;
- }
+ byte[] decodeTable = new byte[DECODING_TABLE_LENGTH];
+ Arrays.fill(decodeTable, (byte) -1);
for (int i = 0; i < encodeTable.length; i++) {
decodeTable[encodeTable[i]] = (byte) i;
}
From 8a568f48c1d12e06c569285fb59ef7f9cf28266f Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Mon, 15 Apr 2024 17:03:31 -0400
Subject: [PATCH 4/4] Add missing Javadoc tag
---
src/main/java/org/apache/commons/codec/binary/Base64.java | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/main/java/org/apache/commons/codec/binary/Base64.java b/src/main/java/org/apache/commons/codec/binary/Base64.java
index adf4ebeb54..7fffc9f942 100644
--- a/src/main/java/org/apache/commons/codec/binary/Base64.java
+++ b/src/main/java/org/apache/commons/codec/binary/Base64.java
@@ -565,6 +565,7 @@ public Base64(final int lineLength, final byte[] lineSeparator, final boolean ur
*
* @param encodeTable
* The manual encodeTable - a byte array of 64 chars
+ * @since 1.17.0
*/
public Base64(byte[] encodeTable) {
this(0, CHUNK_SEPARATOR, encodeTable, DECODING_POLICY_DEFAULT);