From 2fbef1097243c61abec6163bbec099772f726787 Mon Sep 17 00:00:00 2001 From: weixiaoxing Date: Mon, 30 Jun 2025 23:25:05 +0800 Subject: [PATCH] [fix](hive-view) Fix case sensitivity issue in Hive view SQL processing Fix uppercase identifiers in Hive view SQL causing query failures. Add HiveViewSqlTransformer to normalize SQL while preserving quoted strings. --- .../common/util/HiveViewSqlTransformer.java | 66 ++++++++++++++++ .../nereids/rules/analysis/BindRelation.java | 3 +- .../util/HiveViewSqlTransformerTest.java | 75 +++++++++++++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/common/util/HiveViewSqlTransformer.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/common/util/HiveViewSqlTransformerTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/HiveViewSqlTransformer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/HiveViewSqlTransformer.java new file mode 100644 index 00000000000000..1d2e4882abadb6 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/HiveViewSqlTransformer.java @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common.util; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility class for transforming Hive view SQL to ensure case consistency. + * This transformer converts SQL keywords, table names, and column names to lowercase + * while preserving the case of string literals enclosed in quotes. + */ +public class HiveViewSqlTransformer { + + // Pattern to match quoted strings (both single and double quotes) + private static final Pattern QUOTE_PATTERN = Pattern.compile("(\"[^\"]*\"|'[^']*')"); + + /** + * Formats the input SQL by converting all non-quoted content to lowercase. + * This helps ensure case consistency when processing Hive view definitions, + * as Hive is case-insensitive but Doris may be case-sensitive in certain contexts. + * + * @param input the original SQL string from Hive view definition + * @return formatted SQL with non-quoted content in lowercase + */ + public static String format(String input) { + if (input == null) { + return null; + } + + Matcher quoteMatcher = QUOTE_PATTERN.matcher(input); + StringBuffer result = new StringBuffer(); + int lastIndex = 0; + + // Process each quoted string separately + while (quoteMatcher.find()) { + // Convert non-quoted content to lowercase + result.append(input.substring(lastIndex, quoteMatcher.start()).toLowerCase()); + // Preserve quoted content as-is + result.append(quoteMatcher.group()); + lastIndex = quoteMatcher.end(); + } + + // Handle remaining non-quoted content + if (lastIndex < input.length()) { + result.append(input.substring(lastIndex).toLowerCase()); + } + + return result.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 51722778bfcf5d..15030615b3c199 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -33,6 +33,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.IdGenerator; import org.apache.doris.common.Pair; +import org.apache.doris.common.util.HiveViewSqlTransformer; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.hive.HMSExternalTable; @@ -413,7 +414,7 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio isView = true; String hiveCatalog = hmsTable.getCatalog().getName(); String hiveDb = hmsTable.getDatabase().getFullName(); - String ddlSql = hmsTable.getViewText(); + String ddlSql = HiveViewSqlTransformer.format(hmsTable.getViewText()); Plan hiveViewPlan = parseAndAnalyzeExternalView( hmsTable, hiveCatalog, hiveDb, ddlSql, cascadesContext); return new LogicalSubQueryAlias<>(qualifiedTableName, hiveViewPlan); diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/HiveViewSqlTransformerTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/HiveViewSqlTransformerTest.java new file mode 100644 index 00000000000000..b2043d548b4d06 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/HiveViewSqlTransformerTest.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common.util; + +import org.junit.Assert; +import org.junit.Test; + +public class HiveViewSqlTransformerTest { + + @Test + public void testFormat() { + // Test case 1: Simple string literal should remain unchanged + String sql1 = "select 'TEST1'"; + Assert.assertEquals(sql1, HiveViewSqlTransformer.format(sql1)); + + // Test case 2: Mixed case SQL with double-quoted string literal + String sql2 = "SELECT col1, col2, col3 from test_db.TEST_TABLE where COL1 = \"TEST\""; + Assert.assertEquals("select col1, col2, col3 from test_db.test_table where col1 = \"TEST\"", + HiveViewSqlTransformer.format(sql2)); + + // Test case 3: Complex SQL with aggregation functions and mixed quotes + String sql3 = "SELECT count(col1), SUM(col2), col3, col4 from TEST_DB.TEST_TABLE GROUP BY COL3, col4 " + + "where COL3 = \"TEST\" and COL4='YES'"; + Assert.assertEquals("select count(col1), sum(col2), col3, col4 from test_db.test_table group by " + + "col3, col4 where col3 = \"TEST\" and col4='YES'", HiveViewSqlTransformer.format(sql3)); + + // Test case 4: Single-quoted string literals + String sql4 = "SELECT COL1, COL2 from TEST_DB.TEST_TABLE where COL1 = 'TEST' and COL2='YES'"; + Assert.assertEquals("select col1, col2 from test_db.test_table where col1 = 'TEST' and col2='YES'", + HiveViewSqlTransformer.format(sql4)); + + // Test case 5: Double-quoted string literals + String sql5 = "SELECT COL1, COL2 from TEST_DB.TEST_TABLE where COL1 = \"TEST\" and COL2=\"YES\""; + Assert.assertEquals("select col1, col2 from test_db.test_table where col1 = \"TEST\" and col2=\"YES\"", + HiveViewSqlTransformer.format(sql5)); + + // Test case 6: Mixed quote types + String sql6 = "SELECT COL1, COL2 from TEST_DB.TEST_TABLE where COL1 = 'TEST' and COL2=\"YES\""; + Assert.assertEquals("select col1, col2 from test_db.test_table where col1 = 'TEST' and col2=\"YES\"", + HiveViewSqlTransformer.format(sql6)); + } + + @Test + public void testNullInput() { + Assert.assertNull(HiveViewSqlTransformer.format(null)); + } + + @Test + public void testEmptyInput() { + Assert.assertEquals("", HiveViewSqlTransformer.format("")); + } + + @Test + public void testComplexQuotedStrings() { + // Test with quoted strings containing special characters + String sql = "SELECT * FROM TABLE_NAME WHERE col = 'It\\'s a test' AND col2 = \"Quote \\\"test\\\"\""; + String expected = "select * from table_name where col = 'It\\'s a test' and col2 = \"Quote \\\"test\\\"\""; + Assert.assertEquals(expected, HiveViewSqlTransformer.format(sql)); + } +}