Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/main/java/com/metamx/common/CompressionUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import com.google.common.io.ByteSource;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.metamx.common.guava.CloseQuietly;
import com.metamx.common.logger.Logger;

import java.io.BufferedInputStream;
Expand Down Expand Up @@ -99,7 +98,7 @@ public static long zip(File directory, OutputStream out) throws IOException
totalSize += Files.asByteSource(file).copyTo(zipOut);
}
zipOut.closeEntry();
// Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
zipOut.flush();
}

Expand Down
16 changes: 16 additions & 0 deletions src/main/java/com/metamx/common/Pair.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,20 @@ public int compare(Pair<T1, ?> o1, Pair<T1, ?> o2)
}
};
}

public static <T1 extends Comparable, T2 extends Comparable> Comparator<Pair<T1, T2>> comparator()
{
return new Comparator<Pair<T1, T2>>()
{
@Override
public int compare(Pair<T1, T2> o1, Pair<T1, T2> o2)
{
int compare = o1.lhs.compareTo(o2.lhs);
if (compare == 0) {
return o1.rhs.compareTo(o2.rhs);
}
return compare;
}
};
}
}
10 changes: 5 additions & 5 deletions src/main/java/com/metamx/common/StreamUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public static long copyToFileAndClose(InputStream is, File file) throws IOExcept
file.getParentFile().mkdirs();
try (OutputStream os = new BufferedOutputStream(new FileOutputStream(file))) {
final long result = ByteStreams.copy(is, os);
// Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
os.flush();
return result;
}
Expand All @@ -83,7 +83,7 @@ public static long copyToFileAndClose(InputStream is, File file, long timeout) t
file.getParentFile().mkdirs();
try (OutputStream os = new BufferedOutputStream(new FileOutputStream(file))) {
final long retval = copyWithTimeout(is, os, timeout);
// Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
os.flush();
return retval;
}
Expand All @@ -106,7 +106,7 @@ public static long copyAndClose(InputStream is, OutputStream os) throws IOExcept
{
try {
final long retval = ByteStreams.copy(is, os);
// Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
os.flush();
return retval;
}
Expand All @@ -127,7 +127,7 @@ public static long copyAndClose(InputStream is, OutputStream os) throws IOExcept
* @return The total size of bytes written to `os`
*
* @throws IOException
* @throws TimeoutException If `tiemout` is exceeded
* @throws TimeoutException If `timeout` is exceeded
*/
public static long copyWithTimeout(InputStream is, OutputStream os, long timeout) throws IOException, TimeoutException
{
Expand Down Expand Up @@ -170,7 +170,7 @@ public Long call() throws Exception
try (InputStream inputStream = byteSource.openStream()) {
try (OutputStream outputStream = byteSink.openStream()) {
final long retval = ByteStreams.copy(inputStream, outputStream);
// Workarround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
// Workaround for http://hg.openjdk.java.net/jdk8/jdk8/jdk/rev/759aa847dcaf
outputStream.flush();
return retval;
}
Expand Down
26 changes: 24 additions & 2 deletions src/main/java/com/metamx/common/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import java.util.IllegalFormatException;

/**
* As of right now (Dec 2014) the JVM is optimized around String charset variablse instead of Charset passing.
* As of right now (Dec 2014) the JVM is optimized around String charset variable instead of Charset passing.
*/
public class StringUtils
{
Expand Down Expand Up @@ -70,7 +70,7 @@ public static byte[] toUtf8(final String string)

public static String safeFormat(String message, Object... formatArgs)
{
if(formatArgs == null || formatArgs.length == 0) {
if (formatArgs == null || formatArgs.length == 0) {
return message;
}
try {
Expand All @@ -84,4 +84,26 @@ public static String safeFormat(String message, Object... formatArgs)
return bob.toString();
}
}

// should be used only for estimation
// returns the same result with StringUtils.fromUtf8(value).length for valid string values
// does not check validity of format and returns over-estimated result for invalid string (see UT)
public static int binaryLengthAsUTF8(String value)
{
int length = 0;
for (int i = 0; i < value.length(); i++) {
final char val = value.charAt(i);
if (val < 0x80) {
length += 1;
} else if (val < 0x800) {
length += 2;
} else if (Character.isSurrogate(val)) {
length += 4;
i++;
} else {
length += 3;
}
}
return length;
}
}
48 changes: 48 additions & 0 deletions src/test/java/com/metamx/common/StringUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,54 @@
*/
public class StringUtilsTest
{
// copied from https://github.com/druid-io/druid/pull/2612
public final static String[] TEST_STRINGS = new String[]{
"peach", "péché", "pêche", "sin", "",
"☃", "C", "c", "Ç", "ç", "G", "g", "Ğ", "ğ", "I", "ı", "İ", "i",
"O", "o", "Ö", "ö", "S", "s", "Ş", "ş", "U", "u", "Ü", "ü", "ä",
"\uD841\uDF0E",
"\uD841\uDF31",
"\uD844\uDC5C",
"\uD84F\uDCB7",
"\uD860\uDEE2",
"\uD867\uDD98",
"\u006E\u0303",
"\u006E",
"\uFB00",
"\u0066\u0066",
"Å",
"\u00C5",
"\u212B"
};

@Test
public void binaryLengthAsUTF8Test() throws UnsupportedEncodingException
{
for (String string : TEST_STRINGS) {
Assert.assertEquals(StringUtils.toUtf8(string).length, StringUtils.binaryLengthAsUTF8(string));
}
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's also add a tests where the length are not equal to clarify that this is the expected behavior

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok. I'll add invalid char cases.

@Test
public void binaryLengthAsUTF8InvalidTest() throws UnsupportedEncodingException
{
String invalid = "\uD841"; // high only
Assert.assertEquals(1, StringUtils.toUtf8(invalid).length);
Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid));

invalid = "\uD841\uD841"; // high + high
Assert.assertEquals(2, StringUtils.toUtf8(invalid).length);
Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid));

invalid = "\uD841\u0050"; // high + char
Assert.assertEquals(2, StringUtils.toUtf8(invalid).length);
Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid));

invalid = "\uDEE2\uD841"; // low + high
Assert.assertEquals(2, StringUtils.toUtf8(invalid).length);
Assert.assertEquals(4, StringUtils.binaryLengthAsUTF8(invalid));
}

@Test
public void fromUtf8ConversionTest() throws UnsupportedEncodingException
{
Expand Down