diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index c394424092799a..d9453fecf03d9c 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -339,19 +339,20 @@ void BrokerScanner::split_line(const Slice& line) { delete[] ptr; } else { const char* value = line.data; - size_t start = 0; // point to the start pos of next col value. - size_t curpos = 0; // point to the start pos of separator matching sequence. - size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t start = 0; // point to the start pos of next col value. + size_t curpos = 0; // point to the start pos of separator matching sequence. + size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t non_space = 0; // point to the last pos of non_space charactor. // Separator: AAAA // - // curpos + // p1 // ▼ // AAAA // 1000AAAA2000AAAA // ▲ ▲ // Start │ - // p1 + // curpos while (curpos < line.size) { if (*(value + curpos + p1) != _value_separator[p1]) { @@ -362,16 +363,30 @@ void BrokerScanner::split_line(const Slice& line) { p1++; if (p1 == _value_separator_length) { // Match a separator - _split_values.emplace_back(value + start, curpos - start); + non_space = curpos; + // Trim tailing spaces. Be consistent with hive and trino's behavior. + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); start = curpos + _value_separator_length; curpos = start; p1 = 0; + non_space = 0; } } } CHECK(curpos == line.size) << curpos << " vs " << line.size; - _split_values.emplace_back(value + start, curpos - start); + non_space = curpos; + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); } } diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 8f5c0b86a1fdec..eed32d8b82f568 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -326,6 +326,10 @@ class RuntimeState { bool enable_vectorized_exec() const { return _query_options.enable_vectorized_engine; } + bool trim_tailing_spaces_for_external_table_query() const { + return _query_options.trim_tailing_spaces_for_external_table_query; + } + bool return_object_data_as_binary() const { return _query_options.return_object_data_as_binary; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index c6caa2cb3d2520..4d2a42d4e52d6e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -180,6 +180,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PROJECTION = "enable_projection"; + public static final String TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY = "trim_tailing_spaces_for_external_table_query"; + // session origin value public Map sessionOriginValue = new HashMap(); // check stmt is or not [select /*+ SET_VAR(...)*/ ...] @@ -442,6 +444,9 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_PROJECTION) private boolean enableProjection = false; + @VariableMgr.VarAttr(name = TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY, needForward = true) + public boolean trimTailingSpacesForExternalTableQuery = false; + public String getBlockEncryptionMode() { return blockEncryptionMode; } @@ -906,6 +911,14 @@ public boolean isEnableProjection() { return enableProjection; } + public boolean isTrimTailingSpacesForExternalTableQuery() { + return trimTailingSpacesForExternalTableQuery; + } + + public void setTrimTailingSpacesForExternalTableQuery(boolean trimTailingSpacesForExternalTableQuery) { + this.trimTailingSpacesForExternalTableQuery = trimTailingSpacesForExternalTableQuery; + } + // Serialize to thrift object // used for rest api public TQueryOptions toThrift() { @@ -923,6 +936,7 @@ public TQueryOptions toThrift() { tResult.setCodegenLevel(codegenLevel); tResult.setEnableVectorizedEngine(enableVectorizedEngine); tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary); + tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery); tResult.setBatchSize(batchSize); tResult.setDisableStreamPreaggregations(disableStreamPreaggregations); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index a67a43d2d46bbb..4787513baff845 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -160,6 +160,9 @@ struct TQueryOptions { // show bitmap data in result, if use this in mysql cli may make the terminal // output corrupted character 43: optional bool return_object_data_as_binary = false + + // trim tailing spaces while querying external table and stream load + 44: optional bool trim_tailing_spaces_for_external_table_query = false }