Skip to content
This repository was archived by the owner on May 12, 2021. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ public boolean equals(Object obj) {
}
}

public Schema getNestedSchema() {
return nestedRecordSchema;
}

public int hashCode() {
return Objects.hashCode(dataType.hashCode(), nestedRecordSchema);
}
Expand Down
198 changes: 142 additions & 56 deletions tajo-catalog/tajo-catalog-drivers/tajo-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<parquet.version>1.5.0</parquet.version>
<parquet.format.version>2.1.0</parquet.format.version>
</properties>

<build>
Expand Down Expand Up @@ -136,19 +134,35 @@
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-yarn-common</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>netty</artifactId>
<groupId>io.netty</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
Expand All @@ -158,129 +172,201 @@
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-contrib</artifactId>
<artifactId>hive-serde</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-hbase-handler</artifactId>
<artifactId>hive-shimss</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-shims</artifactId>
<groupId>com.jolbox</groupId>
<artifactId>bonecp</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-testutils</artifactId>
<artifactId>tephra-hbase-compat-1.0</artifactId>
<groupId>co.cask.tephra</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
<artifactId>tephra-core</artifactId>
<groupId>co.cask.tephra</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<artifactId>tephra-api</artifactId>
<groupId>co.cask.tephra</groupId>
</exclusion>
<exclusion>
<groupId>com.jolbox</groupId>
<artifactId>bonecp</artifactId>
<artifactId>hbase-client</artifactId>
<groupId>org.apache.hbase</groupId>
</exclusion>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<artifactId>antlr-runtime</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
<artifactId>log4j-slf4j-impl</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<artifactId>jetty-all</artifactId>
<groupId>org.eclipse.jetty.aggregate</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<artifactId>javax.servlet</artifactId>
<groupId>org.eclipse.jetty.orbit</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-shimss</artifactId>
<artifactId>joda-time</artifactId>
<groupId>joda-time</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
<artifactId>jackson-databind</artifactId>
<groupId>com.fasterxml.jackson.core</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<artifactId>metrics-json</artifactId>
<groupId>io.dropwizard.metrics</groupId>
</exclusion>
<exclusion>
<groupId>com.jolbox</groupId>
<artifactId>bonecp</artifactId>
<artifactId>metrics-jvm</artifactId>
<groupId>io.dropwizard.metrics</groupId>
</exclusion>
<exclusion>
<artifactId>metrics-core</artifactId>
<groupId>io.dropwizard.metrics</groupId>
</exclusion>
<exclusion>
<artifactId>ant</artifactId>
<groupId>org.apache.ant</groupId>
</exclusion>
<exclusion>
<artifactId>json</artifactId>
<groupId>org.json</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-slf4j-impl</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-web</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-1.2-api</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>hive-ant</artifactId>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
</exclusion>
<exclusion>
<artifactId>hive-llap-tez</artifactId>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<artifactId>ST4</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<artifactId>ivy</artifactId>
<groupId>org.apache.ivy</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<artifactId>curator-framework</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-shims</artifactId>
<artifactId>apache-curator</artifactId>
<groupId>org.apache.curator</groupId>
</exclusion>
<exclusion>
<groupId>com.jolbox</groupId>
<artifactId>bonecp</artifactId>
<artifactId>groovy-all</artifactId>
<groupId>org.codehaus.groovy</groupId>
</exclusion>
<exclusion>
<artifactId>calcite-core</artifactId>
<groupId>org.apache.calcite</groupId>
</exclusion>
<exclusion>
<artifactId>calcite-avatica</artifactId>
<groupId>org.apache.calcite</groupId>
</exclusion>
<exclusion>
<artifactId>stax-api</artifactId>
<groupId>stax</groupId>
</exclusion>
<exclusion>
<groupId>jline</groupId>
<artifactId>jline</artifactId>
<groupId>jline</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-1.2-api</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-slf4j-impl</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
<exclusion>
<artifactId>ant</artifactId>
<groupId>org.apache.ant</groupId>
</exclusion>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>antlr-runtime</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>opencsv</artifactId>
<groupId>net.sf.opencsv</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hive-bundle</artifactId>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop-bundle</artifactId>
<version>${parquet.version}</version>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.orc.OrcConf;
import org.apache.parquet.hadoop.ParquetOutputFormat;
import org.apache.tajo.BuiltinStorages;
import org.apache.tajo.TajoConstants;
import org.apache.tajo.algebra.Expr;
import org.apache.tajo.algebra.IsNullPredicate;
import org.apache.tajo.algebra.JsonHelper;
import org.apache.tajo.catalog.*;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.partition.PartitionMethodDesc;
import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.catalog.proto.CatalogProtos.*;
Expand All @@ -56,10 +59,8 @@
import org.apache.tajo.storage.StorageConstants;
import org.apache.tajo.util.KeyValueSet;
import org.apache.thrift.TException;
import parquet.hadoop.ParquetOutputFormat;

import java.io.File;
import java.io.IOException;
import java.util.*;

public class HiveCatalogStore extends CatalogConstants implements CatalogStore {
Expand Down Expand Up @@ -564,6 +565,16 @@ public final void createTable(final CatalogProtos.TableDescProto tableDescProto)
table.putToParameters(ParquetOutputFormat.COMPRESSION,
tableDesc.getMeta().getProperty(ParquetOutputFormat.COMPRESSION));
}
} else if (tableDesc.getMeta().getDataFormat().equalsIgnoreCase(BuiltinStorages.ORC)) {
StorageFormatDescriptor descriptor = storageFormatFactory.get(IOConstants.ORC);
sd.setInputFormat(descriptor.getInputFormat());
sd.setOutputFormat(descriptor.getOutputFormat());
sd.getSerdeInfo().setSerializationLib(descriptor.getSerde());

if (tableDesc.getMeta().containsProperty(OrcConf.COMPRESS.getAttribute())) {
table.putToParameters(OrcConf.COMPRESS.getAttribute(),
tableDesc.getMeta().getProperty(OrcConf.COMPRESS.getAttribute()));
}
} else {
throw new UnsupportedException(tableDesc.getMeta().getDataFormat() + " in HivecatalogStore");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.serde.serdeConstants;
Expand Down Expand Up @@ -137,6 +138,8 @@ public static String getDataFormat(StorageDescriptor descriptor) {
return BuiltinStorages.PARQUET;
} else if (AvroSerDe.class.getName().equals(serde)) {
return BuiltinStorages.AVRO;
} else if (OrcSerde.class.getName().equals(serde)) {
return BuiltinStorages.ORC;
} else {
throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public static void setUp() throws Exception {
conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousePath.toUri().toString());
conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, jdbcUri);
conf.set(TajoConf.ConfVars.WAREHOUSE_DIR.varname, warehousePath.toUri().toString());
conf.setBoolean("datanucleus.schema.autoCreateAll", true);

// create local HiveCatalogStore.
TajoConf tajoConf = new TajoConf(conf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ private static void dumpDatabase(TajoClient client, String databaseName, PrintWr
}
}
writer.write("\n\n");
} catch (Exception e) {
} catch (Throwable e) {
// dump for each table can throw any exception. We need to skip the exception case.
// here, the error message prints out via stderr.
System.err.println("ERROR:" + tableName + "," + e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public String toString() {

/**
*
* @param tm TimeMEta
* @param tm TimeMeta
* @param timeZone Timezone
* @param includeTimeZone Add timezone if it is true. It is usually used for TIMEZONEZ
* @return A timestamp string
Expand Down
Loading