Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.hive;

import com.google.common.collect.ImmutableSet;
import org.apache.hadoop.hive.metastore.api.Table;

import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

public class HiveProperties {
public static final String PROP_FIELD_DELIMITER = "field.delim";
public static final String PROP_SEPARATOR_CHAR = "separatorChar";
public static final String PROP_SERIALIZATION_FORMAT = "serialization.format";
public static final String DEFAULT_FIELD_DELIMITER = "\1"; // "\x01"

public static final String PROP_LINE_DELIMITER = "line.delim";
public static final String DEFAULT_LINE_DELIMITER = "\n";

public static final String PROP_QUOTE_CHAR = "quoteChar";

public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim";
public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim";
public static final String DEFAULT_COLLECTION_DELIMITER = "\2";

public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim";
public static final String DEFAULT_MAP_KV_DELIMITER = "\003";

public static final String PROP_ESCAPE_DELIMITER = "escape.delim";
public static final String DEFAULT_ESCAPE_DELIMIER = "\\";

public static final String PROP_NULL_FORMAT = "serialization.null.format";
public static final String DEFAULT_NULL_FORMAT = "\\N";

public static final Set<String> HIVE_SERDE_PROPERTIES = ImmutableSet.of(
PROP_FIELD_DELIMITER,
PROP_COLLECTION_DELIMITER_HIVE2,
PROP_COLLECTION_DELIMITER_HIVE3,
PROP_SEPARATOR_CHAR,
PROP_SERIALIZATION_FORMAT,
PROP_LINE_DELIMITER,
PROP_QUOTE_CHAR,
PROP_MAP_KV_DELIMITER,
PROP_ESCAPE_DELIMITER,
PROP_NULL_FORMAT
);

public static String getFieldDelimiter(Table table) {
// This method is used for text format.
// If you need compatibility with csv format, please use `getColumnSeparator`.
Optional<String> fieldDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_FIELD_DELIMITER);
Optional<String> serFormat = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SERIALIZATION_FORMAT);
return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat));
}

public static String getColumnSeparator(Table table) {
Optional<String> fieldDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_FIELD_DELIMITER);
Optional<String> columnSeparator = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SEPARATOR_CHAR);
Optional<String> serFormat = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SERIALIZATION_FORMAT);
return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator, serFormat));
}


public static String getLineDelimiter(Table table) {
Optional<String> lineDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_LINE_DELIMITER);
return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_LINE_DELIMITER, lineDelim));
}

public static String getMapKvDelimiter(Table table) {
Optional<String> mapkvDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_MAP_KV_DELIMITER);
return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
}

public static String getCollectionDelimiter(Table table) {
Optional<String> collectionDelimHive2 = HiveMetaStoreClientHelper.getSerdeProperty(table,
PROP_COLLECTION_DELIMITER_HIVE2);
Optional<String> collectionDelimHive3 = HiveMetaStoreClientHelper.getSerdeProperty(table,
PROP_COLLECTION_DELIMITER_HIVE3);
return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, collectionDelimHive3));
}

public static Optional<String> getQuoteChar(Table table) {
Map<String, String> serdeParams = table.getSd().getSerdeInfo().getParameters();
if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
return Optional.of(serdeParams.get(PROP_QUOTE_CHAR));
}
return Optional.empty();
}

public static Optional<String> getEscapeDelimiter(Table table) {
Optional<String> escapeDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_ESCAPE_DELIMITER);
if (escapeDelim.isPresent()) {
String escape = HiveMetaStoreClientHelper.getByte(escapeDelim.get());
if (escape != null) {
return Optional.of(escape);
} else {
return Optional.of(DEFAULT_ESCAPE_DELIMIER);
}
}
return Optional.empty();
}

public static String getNullFormat(Table table) {
Optional<String> nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_NULL_FORMAT);
return HiveMetaStoreClientHelper.firstPresentOrDefault(DEFAULT_NULL_FORMAT, nullFormat);
}

// Set properties to table
public static void setTableProperties(Table table, Map<String, String> properties) {
HashMap<String, String> serdeProps = new HashMap<>();
HashMap<String, String> tblProps = new HashMap<>();

for (String k : properties.keySet()) {
if (HIVE_SERDE_PROPERTIES.contains(k)) {
serdeProps.put(k, properties.get(k));
} else {
tblProps.put(k, properties.get(k));
}
}

if (table.getParameters() == null) {
table.setParameters(tblProps);
} else {
table.getParameters().putAll(tblProps);
}

if (table.getSd().getSerdeInfo().getParameters() == null) {
table.getSd().getSerdeInfo().setParameters(serdeProps);
} else {
table.getSd().getSerdeInfo().getParameters().putAll(serdeProps);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.doris.fs.remote.BrokerFileSystem;
import org.apache.doris.fs.remote.RemoteFileSystem;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.qe.ConnectContext;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
Expand Down Expand Up @@ -69,6 +70,8 @@ public final class HiveUtil {
public static final String COMPRESSION_KEY = "compression";
public static final Set<String> SUPPORTED_ORC_COMPRESSIONS = ImmutableSet.of("plain", "zlib", "snappy", "zstd");
public static final Set<String> SUPPORTED_PARQUET_COMPRESSIONS = ImmutableSet.of("plain", "snappy", "zstd");
public static final Set<String> SUPPORTED_TEXT_COMPRESSIONS =
ImmutableSet.of("plain", "gzip", "zstd", "bzip2", "lz4", "snappy");

private HiveUtil() {
}
Expand Down Expand Up @@ -191,7 +194,6 @@ public static Table toHiveTable(HiveTableMetadata hiveTable) {
Table table = new Table();
table.setDbName(hiveTable.getDbName());
table.setTableName(hiveTable.getTableName());
// table.setOwner("");
int createTime = (int) System.currentTimeMillis() * 1000;
table.setCreateTime(createTime);
table.setLastAccessTime(createTime);
Expand All @@ -211,10 +213,10 @@ public static Table toHiveTable(HiveTableMetadata hiveTable) {
setCompressType(hiveTable, props);
// set hive table comment by table properties
props.put("comment", hiveTable.getComment());
table.setParameters(props);
if (props.containsKey("owner")) {
table.setOwner(props.get("owner"));
}
HiveProperties.setTableProperties(table, props);
return table;
}

Expand All @@ -232,6 +234,12 @@ private static void setCompressType(HiveTableMetadata hiveTable, Map<String, Str
throw new AnalysisException("Unsupported orc compression type " + compression);
}
props.putIfAbsent("orc.compress", StringUtils.isEmpty(compression) ? "zlib" : compression);
} else if (fileFormat.equalsIgnoreCase("text")) {
if (StringUtils.isNotEmpty(compression) && !SUPPORTED_TEXT_COMPRESSIONS.contains(compression)) {
throw new AnalysisException("Unsupported text compression type " + compression);
}
props.putIfAbsent("text.compression", StringUtils.isEmpty(compression)
? ConnectContext.get().getSessionVariable().hiveTextCompression() : compression);
} else {
throw new IllegalArgumentException("Compression is not supported on " + fileFormat);
}
Expand All @@ -249,7 +257,7 @@ private static StorageDescriptor toHiveStorageDesc(List<FieldSchema> columns,
sd.setBucketCols(bucketCols);
sd.setNumBuckets(numBuckets);
Map<String, String> parameters = new HashMap<>();
parameters.put("tag", "doris external hive talbe");
parameters.put("tag", "doris external hive table");
sd.setParameters(parameters);
return sd;
}
Expand All @@ -266,6 +274,10 @@ private static void setFileFormat(String fileFormat, StorageDescriptor sd) {
inputFormat = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat";
outputFormat = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat";
serDe = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe";
} else if (fileFormat.equalsIgnoreCase("text")) {
inputFormat = "org.apache.hadoop.mapred.TextInputFormat";
outputFormat = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat";
serDe = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe";
} else {
throw new IllegalArgumentException("Creating table with an unsupported file format: " + fileFormat);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
import org.apache.doris.datasource.hive.HivePartition;
import org.apache.doris.datasource.hive.HiveProperties;
import org.apache.doris.datasource.hive.HiveTransaction;
import org.apache.doris.datasource.hive.source.HiveSplit.HiveSplitCreator;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
Expand All @@ -57,6 +58,7 @@
import lombok.Setter;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand All @@ -65,7 +67,6 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
Expand All @@ -77,26 +78,6 @@
public class HiveScanNode extends FileQueryScanNode {
private static final Logger LOG = LogManager.getLogger(HiveScanNode.class);

public static final String PROP_FIELD_DELIMITER = "field.delim";
public static final String DEFAULT_FIELD_DELIMITER = "\1"; // "\x01"
public static final String PROP_LINE_DELIMITER = "line.delim";
public static final String DEFAULT_LINE_DELIMITER = "\n";
public static final String PROP_SEPARATOR_CHAR = "separatorChar";
public static final String PROP_QUOTE_CHAR = "quoteChar";
public static final String PROP_SERIALIZATION_FORMAT = "serialization.format";

public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim";
public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim";
public static final String DEFAULT_COLLECTION_DELIMITER = "\2";

public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim";
public static final String DEFAULT_MAP_KV_DELIMITER = "\003";

public static final String PROP_ESCAPE_DELIMITER = "escape.delim";
public static final String DEFAULT_ESCAPE_DELIMIER = "\\";
public static final String PROP_NULL_FORMAT = "serialization.null.format";
public static final String DEFAULT_NULL_FORMAT = "\\N";

protected final HMSExternalTable hmsTable;
private HiveTransaction hiveTransaction = null;

Expand Down Expand Up @@ -431,57 +412,21 @@ protected Map<String, String> getLocationProperties() throws UserException {
@Override
protected TFileAttributes getFileAttributes() throws UserException {
TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();

Table table = hmsTable.getRemoteTable();
// 1. set column separator
Optional<String> fieldDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_FIELD_DELIMITER);
Optional<String> serFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_SERIALIZATION_FORMAT);
Optional<String> columnSeparator = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_SEPARATOR_CHAR);
textParams.setColumnSeparator(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator, serFormat)));
textParams.setColumnSeparator(HiveProperties.getColumnSeparator(table));
// 2. set line delimiter
Optional<String> lineDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_LINE_DELIMITER);
textParams.setLineDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_LINE_DELIMITER, lineDelim)));
textParams.setLineDelimiter(HiveProperties.getLineDelimiter(table));
// 3. set mapkv delimiter
Optional<String> mapkvDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_MAP_KV_DELIMITER);
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_MAP_KV_DELIMITER, mapkvDelim)));
textParams.setMapkvDelimiter(HiveProperties.getMapKvDelimiter(table));
// 4. set collection delimiter
Optional<String> collectionDelimHive2 = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_COLLECTION_DELIMITER_HIVE2);
Optional<String> collectionDelimHive3 = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_COLLECTION_DELIMITER_HIVE3);
textParams.setCollectionDelimiter(
HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, collectionDelimHive3)));
textParams.setCollectionDelimiter(HiveProperties.getCollectionDelimiter(table));
// 5. set quote char
Map<String, String> serdeParams = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
}
HiveProperties.getQuoteChar(table).ifPresent(d -> textParams.setEnclose(d.getBytes()[0]));
// 6. set escape delimiter
Optional<String> escapeDelim = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_ESCAPE_DELIMITER);
if (escapeDelim.isPresent()) {
String escape = HiveMetaStoreClientHelper.getByte(
escapeDelim.get());
if (escape != null) {
textParams
.setEscape(escape.getBytes()[0]);
} else {
textParams.setEscape(DEFAULT_ESCAPE_DELIMIER.getBytes()[0]);
}
}
HiveProperties.getEscapeDelimiter(table).ifPresent(d -> textParams.setEscape(d.getBytes()[0]));
// 7. set null format
Optional<String> nullFormat = HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_NULL_FORMAT);
textParams.setNullFormat(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_NULL_FORMAT, nullFormat));
textParams.setNullFormat(HiveProperties.getNullFormat(table));

TFileAttributes fileAttributes = new TFileAttributes();
fileAttributes.setTextParams(textParams);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,10 @@
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.datasource.iceberg.IcebergUtils;
import org.apache.doris.planner.ColumnRange;
import org.apache.doris.thrift.TFileAttributes;

import org.apache.iceberg.Table;

Expand Down Expand Up @@ -74,11 +72,6 @@ public TableIf getTargetTable() {
return icebergExtTable;
}

@Override
public TFileAttributes getFileAttributes() throws UserException {
return new TFileAttributes();
}

@Override
public ExternalCatalog getCatalog() {
return icebergExtTable.getCatalog();
Expand Down
Loading