diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ColumnSeparatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ColumnSeparatorTest.java index 76e5feddf52957..8426992319617e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ColumnSeparatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ColumnSeparatorTest.java @@ -17,11 +17,11 @@ package org.apache.doris.analysis; +import org.apache.doris.common.AnalysisException; + import org.junit.Assert; import org.junit.Test; -import org.apache.doris.common.AnalysisException; - public class ColumnSeparatorTest { @Test public void testNormal() throws AnalysisException { @@ -42,6 +42,16 @@ public void testNormal() throws AnalysisException { separator.analyze(); Assert.assertEquals("'\\x0001'", separator.toSql()); Assert.assertEquals("\0\1", separator.getColumnSeparator()); + + separator = new ColumnSeparator("|"); + separator.analyze(); + Assert.assertEquals("'|'", separator.toSql()); + Assert.assertEquals("|", separator.getColumnSeparator()); + + separator = new ColumnSeparator("\\|"); + separator.analyze(); + Assert.assertEquals("'\\|'", separator.toSql()); + Assert.assertEquals("\\|", separator.getColumnSeparator()); } @Test(expected = AnalysisException.class) diff --git a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java index bc0a6cc0eda0a1..6e5a714f1b5ce6 100644 --- a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java +++ b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java @@ -17,14 +17,15 @@ package org.apache.doris.load.loadv2.dpp; -import org.apache.commons.lang3.tuple.Pair; +import scala.Tuple2; + import org.apache.doris.common.SparkDppException; import org.apache.doris.load.loadv2.etl.EtlJobConfig; - import com.google.common.base.Strings; import com.google.gson.Gson; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -55,6 +56,7 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.storage.StorageLevel; import org.apache.spark.util.LongAccumulator; +import org.apache.spark.util.SerializableConfiguration; import java.io.IOException; import java.math.BigInteger; @@ -71,9 +73,6 @@ import java.util.Queue; import java.util.Set; -import org.apache.spark.util.SerializableConfiguration; -import scala.Tuple2; - // This class is a Spark-based data preprocessing program, // which will make use of the distributed compute framework of spark to // do ETL job/sort/preaggregate jobs in spark job diff --git a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/etl/EtlJobConfig.java b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/etl/EtlJobConfig.java index fe81aeb1d26bd4..9ee4d83688b70b 100644 --- a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/etl/EtlJobConfig.java +++ b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/etl/EtlJobConfig.java @@ -17,8 +17,9 @@ package org.apache.doris.load.loadv2.etl; -import com.google.common.collect.Lists; +import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import com.google.gson.ExclusionStrategy; import com.google.gson.FieldAttributes; import com.google.gson.Gson; @@ -502,13 +503,20 @@ public EtlFileGroup(SourceType sourceType, List filePaths, List this.filePaths = filePaths; this.fileFieldNames = fileFieldNames; this.columnsFromPath = columnsFromPath; - this.columnSeparator = columnSeparator; this.lineDelimiter = lineDelimiter; this.isNegative = isNegative; this.fileFormat = fileFormat; this.columnMappings = columnMappings; this.where = where; this.partitions = partitions; + + // Convert some special characters in column separator + char sep = Strings.isNullOrEmpty(columnSeparator) ? '\t' : columnSeparator.charAt(0); + if (".$|()[]{}^?*+\\".indexOf(sep) != -1) { + this.columnSeparator = new String(new char[]{'\\', sep}); + } else { + this.columnSeparator = Character.toString(sep); + } } // for data from table