From 48e9329f434e8c1f8d102cf7c206c48af99d0428 Mon Sep 17 00:00:00 2001 From: James Date: Fri, 18 Apr 2025 09:55:02 +0800 Subject: [PATCH] [fix](nereids)Use utf-8 when convert string like literal to double. (#50085) Use utf-8 when convert string like literal to double. StringLike columns in Doris are all stored with utf-8 encoding. So we need to use utf-8 encoding to read the column statistics min/max value. Otherwise, Java will use the system default encoding. In this case, doris may read wrong statistics min/max value. --- .../trees/expressions/literal/StringLikeLiteral.java | 3 ++- .../trees/expressions/literal/StringLikeLiteralTest.java | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java index dba9247fe70213..d1d98fe218afd5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java @@ -19,6 +19,7 @@ import org.apache.doris.nereids.types.DataType; +import java.nio.charset.StandardCharsets; import java.util.Objects; /** @@ -46,7 +47,7 @@ public double getDouble() { * get double value */ public static double getDouble(String str) { - byte[] bytes = str.getBytes(); + byte[] bytes = str.getBytes(StandardCharsets.UTF_8); long v = 0; int pos = 0; int len = Math.min(bytes.length, 7); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteralTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteralTest.java index c1e9bc0e839b2a..d9724f6b324f60 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteralTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteralTest.java @@ -31,4 +31,11 @@ public void testStrToDouble() { double d2 = StringLikeLiteral.getDouble(maxStr); Assertions.assertTrue(d1 < d2); } + + @Test + public void testUtf8() { + System.setProperty("file.encoding", "ANSI_X3.4-1968"); + double d1 = StringLikeLiteral.getDouble("一般风险准备"); + Assertions.assertEquals(d1, 6.4379158486625512E16); + } }