diff --git a/src/main/java/com/metamx/common/CompressionUtils.java b/src/main/java/com/metamx/common/CompressionUtils.java index 70915dd1..7b060b46 100644 --- a/src/main/java/com/metamx/common/CompressionUtils.java +++ b/src/main/java/com/metamx/common/CompressionUtils.java @@ -22,7 +22,6 @@ import com.google.common.io.ByteSource; import com.google.common.io.ByteStreams; import com.google.common.io.Files; -import com.metamx.common.guava.CloseQuietly; import com.metamx.common.logger.Logger; import java.io.BufferedInputStream; @@ -99,7 +98,7 @@ public static long zip(File directory, OutputStream out) throws IOException totalSize += Files.asByteSource(file).copyTo(zipOut); } zipOut.closeEntry(); - // Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf + // Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf zipOut.flush(); } diff --git a/src/main/java/com/metamx/common/Pair.java b/src/main/java/com/metamx/common/Pair.java index 9ff98493..1d06755a 100644 --- a/src/main/java/com/metamx/common/Pair.java +++ b/src/main/java/com/metamx/common/Pair.java @@ -112,4 +112,20 @@ public int compare(Pair o1, Pair o2) } }; } + + public static Comparator> comparator() + { + return new Comparator>() + { + @Override + public int compare(Pair o1, Pair o2) + { + int compare = o1.lhs.compareTo(o2.lhs); + if (compare == 0) { + return o1.rhs.compareTo(o2.rhs); + } + return compare; + } + }; + } } diff --git a/src/main/java/com/metamx/common/StreamUtils.java b/src/main/java/com/metamx/common/StreamUtils.java index cc588e8d..ba15b0f1 100644 --- a/src/main/java/com/metamx/common/StreamUtils.java +++ b/src/main/java/com/metamx/common/StreamUtils.java @@ -56,7 +56,7 @@ public static long copyToFileAndClose(InputStream is, File file) throws IOExcept file.getParentFile().mkdirs(); try (OutputStream os = new BufferedOutputStream(new FileOutputStream(file))) { final long result = ByteStreams.copy(is, os); - // Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf + // Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf os.flush(); return result; } @@ -83,7 +83,7 @@ public static long copyToFileAndClose(InputStream is, File file, long timeout) t file.getParentFile().mkdirs(); try (OutputStream os = new BufferedOutputStream(new FileOutputStream(file))) { final long retval = copyWithTimeout(is, os, timeout); - // Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf + // Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf os.flush(); return retval; } @@ -106,7 +106,7 @@ public static long copyAndClose(InputStream is, OutputStream os) throws IOExcept { try { final long retval = ByteStreams.copy(is, os); - // Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf + // Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf os.flush(); return retval; } @@ -127,7 +127,7 @@ public static long copyAndClose(InputStream is, OutputStream os) throws IOExcept * @return The total size of bytes written to `os` * * @throws IOException - * @throws TimeoutException If `tiemout` is exceeded + * @throws TimeoutException If `timeout` is exceeded */ public static long copyWithTimeout(InputStream is, OutputStream os, long timeout) throws IOException, TimeoutException { @@ -170,7 +170,7 @@ public Long call() throws Exception try (InputStream inputStream = byteSource.openStream()) { try (OutputStream outputStream = byteSink.openStream()) { final long retval = ByteStreams.copy(inputStream, outputStream); - // Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf + // Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf outputStream.flush(); return retval; } diff --git a/src/main/java/com/metamx/common/StringUtils.java b/src/main/java/com/metamx/common/StringUtils.java index cbe4c0a4..6a677e39 100644 --- a/src/main/java/com/metamx/common/StringUtils.java +++ b/src/main/java/com/metamx/common/StringUtils.java @@ -25,7 +25,7 @@ import java.util.IllegalFormatException; /** - * As of right now (Dec 2014) the JVM is optimized around String charset variablse instead of Charset passing. + * As of right now (Dec 2014) the JVM is optimized around String charset variable instead of Charset passing. */ public class StringUtils { @@ -70,7 +70,7 @@ public static byte[] toUtf8(final String string) public static String safeFormat(String message, Object... formatArgs) { - if(formatArgs == null || formatArgs.length == 0) { + if (formatArgs == null || formatArgs.length == 0) { return message; } try { @@ -84,4 +84,26 @@ public static String safeFormat(String message, Object... formatArgs) return bob.toString(); } } + + // should be used only for estimation + // returns the same result with StringUtils.fromUtf8(value).length for valid string values + // does not check validity of format and returns over-estimated result for invalid string (see UT) + public static int binaryLengthAsUTF8(String value) + { + int length = 0; + for (int i = 0; i < value.length(); i++) { + final char val = value.charAt(i); + if (val < 0x80) { + length += 1; + } else if (val < 0x800) { + length += 2; + } else if (Character.isSurrogate(val)) { + length += 4; + i++; + } else { + length += 3; + } + } + return length; + } } diff --git a/src/test/java/com/metamx/common/StringUtilsTest.java b/src/test/java/com/metamx/common/StringUtilsTest.java index 28e44857..62fca869 100644 --- a/src/test/java/com/metamx/common/StringUtilsTest.java +++ b/src/test/java/com/metamx/common/StringUtilsTest.java @@ -26,6 +26,54 @@ */ public class StringUtilsTest { + // copied from https://github.com/druid-io/druid/pull/2612 + public final static String[] TEST_STRINGS = new String[]{ + "peach", "péché", "pêche", "sin", "", + "☃", "C", "c", "Ç", "ç", "G", "g", "Ğ", "ğ", "I", "ı", "İ", "i", + "O", "o", "Ö", "ö", "S", "s", "Ş", "ş", "U", "u", "Ü", "ü", "ä", + "\uD841\uDF0E", + "\uD841\uDF31", + "\uD844\uDC5C", + "\uD84F\uDCB7", + "\uD860\uDEE2", + "\uD867\uDD98", + "\u006E\u0303", + "\u006E", + "\uFB00", + "\u0066\u0066", + "Å", + "\u00C5", + "\u212B" + }; + + @Test + public void binaryLengthAsUTF8Test() throws UnsupportedEncodingException + { + for (String string : TEST_STRINGS) { + Assert.assertEquals(StringUtils.toUtf8(string).length, StringUtils.binaryLengthAsUTF8(string)); + } + } + + @Test + public void binaryLengthAsUTF8InvalidTest() throws UnsupportedEncodingException + { + String invalid = "\uD841"; // high only + Assert.assertEquals(1, StringUtils.toUtf8(invalid).length); + Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid)); + + invalid = "\uD841\uD841"; // high + high + Assert.assertEquals(2, StringUtils.toUtf8(invalid).length); + Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid)); + + invalid = "\uD841\u0050"; // high + char + Assert.assertEquals(2, StringUtils.toUtf8(invalid).length); + Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid)); + + invalid = "\uDEE2\uD841"; // low + high + Assert.assertEquals(2, StringUtils.toUtf8(invalid).length); + Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid)); + } + @Test public void fromUtf8ConversionTest() throws UnsupportedEncodingException {