Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
6 changes: 6 additions & 0 deletions extensions-core/orc-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@
<version>${project.parent.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-indexing-hadoop</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ private static List<Object> convertList(TypeDescription fieldDescription, OrcLis
return new ArrayList<Object>(orcList);
}

private static Map<Object, Object> convertMap(
private Map<Object, Object> convertMap(
TypeDescription fieldDescription,
OrcMap<? extends WritableComparable, ? extends WritableComparable> map,
boolean binaryAsString
Expand All @@ -75,11 +75,7 @@ private static Map<Object, Object> convertMap(
TypeDescription valueDescription = fieldDescription.getChildren().get(1);
for (WritableComparable key : map.navigableKeySet()) {
Object newKey = convertPrimitive(keyDescription, key, binaryAsString);
if (valueDescription.getCategory().isPrimitive()) {
converted.put(newKey, convertPrimitive(valueDescription, map.get(key), binaryAsString));
} else {
converted.put(newKey, map.get(key));
}
converted.put(newKey, convertField(valueDescription, map.get(key)));
}
return converted;
}
Expand Down Expand Up @@ -148,6 +144,63 @@ private static Object convertPrimitive(
}
}

@Nullable
public Object tryConvertPrimitive(@Nullable WritableComparable field)
{
if (field == null) {
return null;
}
/*
ORC TYPE WRITABLE TYPE
binary org.apache.hadoop.io.BytesWritable
bigint org.apache.hadoop.io.LongWritable
boolean org.apache.hadoop.io.BooleanWritable
char org.apache.hadoop.io.Text
date org.apache.hadoop.hive.serde2.io.DateWritable
decimal org.apache.hadoop.hive.serde2.io.HiveDecimalWritable
double org.apache.hadoop.io.DoubleWritable
float org.apache.hadoop.io.FloatWritable
int org.apache.hadoop.io.IntWritable
smallint org.apache.hadoop.io.ShortWritable
string org.apache.hadoop.io.Text
timestamp org.apache.orc.mapred.OrcTimestamp
tinyint org.apache.hadoop.io.ByteWritable
varchar org.apache.hadoop.io.Text
*/
if (field instanceof Text) {
return ((Text) field).toString();
} else if (field instanceof BooleanWritable) {
return ((BooleanWritable) field).get();
} else if (field instanceof ByteWritable) {
return ((ByteWritable) field).get();
} else if (field instanceof ShortWritable) {
return ((ShortWritable) field).get();
} else if (field instanceof IntWritable) {
return ((IntWritable) field).get();
} else if (field instanceof LongWritable) {
return ((LongWritable) field).get();
} else if (field instanceof FloatWritable) {
return ((FloatWritable) field).get();
} else if (field instanceof DoubleWritable) {
return ((DoubleWritable) field).get();
} else if (field instanceof HiveDecimalWritable) {
return ((HiveDecimalWritable) field).getHiveDecimal().doubleValue();
} else if (field instanceof OrcTimestamp) {
return ((OrcTimestamp) field).getTime();
} else if (field instanceof DateWritable) {
return DateTimes.utc(((DateWritable) field).get().getTime());
} else if (field instanceof BytesWritable) {
byte[] bytes = ((BytesWritable) field).getBytes();
if (binaryAsString) {
return StringUtils.fromUtf8(bytes);
} else {
return bytes;
}
}
// unknown conversion, return null
return null;
}

private final boolean binaryAsString;
private Object2IntMap<String> fieldIndexCache;

Expand Down Expand Up @@ -204,6 +257,12 @@ Object convertField(OrcStruct struct, int fieldIndex)
return null;
}

return convertField(fieldDescription, fieldValue);
}

@Nullable
Object convertField(TypeDescription fieldDescription, WritableComparable fieldValue)
{
if (fieldDescription.getCategory().isPrimitive()) {
return convertPrimitive(fieldDescription, fieldValue, binaryAsString);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.jayway.jsonpath.spi.json.JsonProvider;
import org.apache.druid.java.util.common.parsers.NotImplementedMappingProvider;
import org.apache.druid.java.util.common.parsers.ObjectFlatteners;
import org.apache.hadoop.io.WritableComparable;
import org.apache.orc.TypeDescription;
import org.apache.orc.mapred.OrcList;
import org.apache.orc.mapred.OrcMap;
Expand Down Expand Up @@ -71,7 +72,7 @@ public Iterable<String> discoverRootFields(OrcStruct obj)
@Override
public Object getRootField(OrcStruct obj, String key)
{
return finalizeConversion(converter.convertRootField(obj, key));
return toPlainJavaType(converter.convertRootField(obj, key));
}

@Override
Expand Down Expand Up @@ -107,11 +108,19 @@ public JsonProvider getJsonProvider()
return orcJsonProvider;
}

@Override
public Object finalizeConversionForMap(Object o)
{
return finalizeConversion(o);
}

private Object finalizeConversion(Object o)
{
// replace any remaining complex types with null
// recursively convert any complex types
if (o instanceof OrcStruct || o instanceof OrcMap || o instanceof OrcList) {
return null;
return toPlainJavaType(o);
} else if (o instanceof WritableComparable) {
return converter.tryConvertPrimitive((WritableComparable) o);
}
return o;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import com.jayway.jsonpath.InvalidJsonException;
import com.jayway.jsonpath.spi.json.JsonProvider;
import org.apache.hadoop.io.Text;
import org.apache.orc.mapred.OrcMap;
import org.apache.orc.mapred.OrcStruct;

import java.io.InputStream;
Expand Down Expand Up @@ -103,6 +105,9 @@ public Object getMapValue(final Object o, final String s)
{
if (o == null) {
return null;
} else if (o instanceof OrcMap) {
OrcMap map = (OrcMap) o;
return map.get(new Text(s));
} else if (o instanceof Map) {
return ((Map) o).get(s);
} else if (o instanceof OrcStruct) {
Expand Down
Loading