From da1043d0171772408c5dec44fa1904bb34ba3131 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 13 Jan 2026 22:12:40 +0800 Subject: [PATCH 1/6] [feature](search) Add multi-field search support with fields parameter Add `fields` parameter to the search() function, allowing queries to search across multiple fields with a single query term. Similar to Elasticsearch's query_string `fields` parameter. Usage examples: - search('hello', '{"fields":["title","content"]}') -> Equivalent to: (title:hello OR content:hello) - search('hello world', '{"fields":["title","content"],"default_operator":"and"}') -> Equivalent to: (title:hello OR content:hello) AND (title:world OR content:world) - search('a AND b', '{"fields":["title","content"],"mode":"lucene"}') -> Multi-field with Lucene boolean semantics Key changes: - Extended SearchOptions with `fields` array and `isMultiFieldMode()` - Added common helper methods for DRY compliance (parseWithVisitor, expandItemAcrossFields) - Added FieldTrackingVisitor interface for polymorphic visitor handling - Added parseDslMultiFieldMode() and parseDslMultiFieldLuceneMode() - Added comprehensive unit tests (15+ test cases) - Added regression tests (18 test cases) Co-Authored-By: Claude Opus 4.5 --- .../functions/scalar/SearchDslParser.java | 448 +++++++++++++++++- .../functions/scalar/SearchDslParserTest.java | 297 +++++++++++- .../data/search/test_search_multi_field.out | 87 ++++ .../search/test_search_multi_field.groovy | 229 +++++++++ 4 files changed, 1045 insertions(+), 16 deletions(-) create mode 100644 regression-test/data/search/test_search_multi_field.out create mode 100644 regression-test/suites/search/test_search_multi_field.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java index b4c880546a700c..a29bedede80882 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java @@ -36,6 +36,8 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; /** * Search DSL Parser using ANTLR-generated parser. @@ -74,7 +76,9 @@ public static QsPlan parseDsl(String dsl) { * - default_operator: "and" or "or" for multi-term queries * - mode: "standard" or "lucene" * - minimum_should_match: integer for Lucene mode + * - fields: array of field names for multi-field search * Example: '{"default_field":"title","mode":"lucene","minimum_should_match":0}' + * Example: '{"fields":["title","content"],"default_operator":"and"}' * @return Parsed QsPlan */ public static QsPlan parseDsl(String dsl, String optionsJson) { @@ -87,9 +91,19 @@ public static QsPlan parseDsl(String dsl, String optionsJson) { // Use Lucene mode parser if specified if (searchOptions.isLuceneMode()) { + // Multi-field + Lucene mode: first expand DSL, then parse with Lucene semantics + if (searchOptions.isMultiFieldMode()) { + return parseDslMultiFieldLuceneMode(dsl, searchOptions.getFields(), + defaultOperator, searchOptions); + } return parseDslLuceneMode(dsl, defaultField, defaultOperator, searchOptions); } + // Multi-field mode parsing (standard mode) + if (searchOptions.isMultiFieldMode()) { + return parseDslMultiFieldMode(dsl, searchOptions.getFields(), defaultOperator); + } + // Standard mode parsing return parseDslStandardMode(dsl, defaultField, defaultOperator); } @@ -461,6 +475,361 @@ private static boolean containsWildcard(String term) { return false; } + // ============ Common Helper Methods ============ + + /** + * Create an error QsPlan for empty DSL input. + */ + private static QsPlan createEmptyDslErrorPlan() { + return new QsPlan(new QsNode(QsClauseType.TERM, "error", "empty_dsl"), new ArrayList<>()); + } + + /** + * Validate that DSL is not null or empty. + * @return true if DSL is valid (non-null, non-empty) + */ + private static boolean isValidDsl(String dsl) { + return dsl != null && !dsl.trim().isEmpty(); + } + + /** + * Validate fields list for multi-field mode. + * @throws IllegalArgumentException if fields is null or empty + */ + private static void validateFieldsList(List fields) { + if (fields == null || fields.isEmpty()) { + throw new IllegalArgumentException("fields list cannot be null or empty for multi-field mode"); + } + } + + /** + * Common ANTLR parsing helper with visitor pattern. + * Reduces code duplication across parsing methods. + * + * @param expandedDsl The expanded DSL string to parse + * @param visitorFactory Factory function to create the appropriate visitor + * @param originalDsl Original DSL for error messages + * @param modeDescription Description of the parsing mode for error messages + * @return Parsed QsPlan + */ + private static QsPlan parseWithVisitor(String expandedDsl, + Function visitorFactory, + String originalDsl, String modeDescription) { + try { + // Create ANTLR lexer and parser + SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + SearchParser parser = new SearchParser(tokens); + + // Add error listener + parser.removeErrorListeners(); + parser.addErrorListener(new org.antlr.v4.runtime.BaseErrorListener() { + @Override + public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, + Object offendingSymbol, + int line, int charPositionInLine, + String msg, org.antlr.v4.runtime.RecognitionException e) { + throw new RuntimeException("Invalid search DSL syntax at line " + line + + ":" + charPositionInLine + " " + msg); + } + }); + + ParseTree tree = parser.search(); + if (tree == null) { + throw new RuntimeException("Invalid search DSL syntax"); + } + + // Build AST using provided visitor + FieldTrackingVisitor visitor = visitorFactory.apply(parser); + QsNode root = visitor.visit(tree); + + // Extract field bindings + Set fieldNames = visitor.getFieldNames(); + List bindings = new ArrayList<>(); + int slotIndex = 0; + for (String fieldName : fieldNames) { + bindings.add(new QsFieldBinding(fieldName, slotIndex++)); + } + + return new QsPlan(root, bindings); + + } catch (Exception e) { + LOG.error("Failed to parse search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Invalid search DSL syntax: " + originalDsl + + ". Error: " + e.getMessage(), e); + } + } + + /** + * Expand a single item (term or function) across multiple fields with OR. + * Example: "hello" + ["title", "content"] -> "(title:hello OR content:hello)" + * Example: "EXACT(foo)" + ["title", "content"] -> "(title:EXACT(foo) OR content:EXACT(foo))" + * + * @param item The term or function to expand + * @param fields List of field names + * @return Expanded DSL string + */ + private static String expandItemAcrossFields(String item, List fields) { + if (fields.size() == 1) { + return fields.get(0) + ":" + item; + } + return fields.stream() + .map(field -> field + ":" + item) + .collect(Collectors.joining(" OR ", "(", ")")); + } + + // ============ Multi-Field Expansion Methods ============ + + /** + * Parse DSL in multi-field mode. + * Each term without field prefix is expanded to OR across all specified fields. + * + * @param dsl DSL query string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining term groups + * @return Parsed QsPlan + */ + private static QsPlan parseDslMultiFieldMode(String dsl, List fields, String defaultOperator) { + if (!isValidDsl(dsl)) { + return createEmptyDslErrorPlan(); + } + validateFieldsList(fields); + + String expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizeDefaultOperator(defaultOperator)); + return parseWithVisitor(expandedDsl, parser -> new QsAstBuilder(), dsl, "multi-field mode"); + } + + /** + * Parse DSL in multi-field mode with Lucene boolean semantics. + * First expands DSL across fields, then applies Lucene-style MUST/SHOULD/MUST_NOT logic. + * + * @param dsl DSL query string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining term groups + * @param options Search options containing Lucene mode settings + * @return Parsed QsPlan with Lucene boolean semantics + */ + private static QsPlan parseDslMultiFieldLuceneMode(String dsl, List fields, + String defaultOperator, SearchOptions options) { + if (!isValidDsl(dsl)) { + return createEmptyDslErrorPlan(); + } + validateFieldsList(fields); + + String expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizeDefaultOperator(defaultOperator)); + return parseWithVisitor(expandedDsl, parser -> new QsLuceneModeAstBuilder(options), + dsl, "multi-field Lucene mode"); + } + + /** + * Expand simplified DSL to multi-field format. + * Each term without field prefix is expanded to OR across all fields. + * + * @param dsl Simple DSL string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining term groups + * @return Expanded full DSL + */ + private static String expandMultiFieldDsl(String dsl, List fields, String defaultOperator) { + if (fields == null || fields.isEmpty()) { + throw new IllegalArgumentException("fields list cannot be null or empty"); + } + + if (fields.size() == 1) { + // Single field - delegate to existing method + return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator); + } + + // 1. If DSL already contains field names, handle mixed case + if (containsFieldReference(dsl)) { + return expandOperatorExpressionAcrossFields(dsl, fields); + } + + // 2. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN) + if (startsWithFunction(dsl)) { + // Expand function across fields: EXACT(foo) -> (f1:EXACT(foo) OR f2:EXACT(foo)) + return expandFunctionAcrossFields(dsl, fields); + } + + // 3. Check for explicit boolean operators in DSL + if (containsExplicitOperators(dsl)) { + return expandOperatorExpressionAcrossFields(dsl, fields); + } + + // 4. Tokenize and analyze terms + List terms = tokenizeDsl(dsl); + if (terms.isEmpty()) { + return expandTermAcrossFields(dsl, fields); + } + + // 5. Single term - expand across fields + if (terms.size() == 1) { + return expandTermAcrossFields(terms.get(0), fields); + } + + // 6. Multiple terms - expand each across fields, join with operator + String joinOperator = "and".equals(defaultOperator) ? " AND " : " OR "; + StringBuilder result = new StringBuilder(); + for (int i = 0; i < terms.size(); i++) { + if (i > 0) { + result.append(joinOperator); + } + result.append(expandTermAcrossFields(terms.get(i), fields)); + } + return result.toString(); + } + + /** + * Expand a single term across multiple fields with OR. + * Example: "hello" + ["title", "content"] -> "(title:hello OR content:hello)" + * Delegates to expandItemAcrossFields for DRY compliance. + */ + private static String expandTermAcrossFields(String term, List fields) { + return expandItemAcrossFields(term, fields); + } + + /** + * Expand a function call across multiple fields. + * Example: "EXACT(foo bar)" + ["title", "content"] -> "(title:EXACT(foo bar) OR content:EXACT(foo bar))" + * Delegates to expandItemAcrossFields for DRY compliance. + */ + private static String expandFunctionAcrossFields(String dsl, List fields) { + return expandItemAcrossFields(dsl, fields); + } + + /** + * Handle DSL with explicit operators (AND/OR/NOT). + * Each operand without field prefix is expanded across fields. + * Example: "hello AND world" + ["title", "content"] -> + * "(title:hello OR content:hello) AND (title:world OR content:world)" + */ + private static String expandOperatorExpressionAcrossFields(String dsl, List fields) { + StringBuilder result = new StringBuilder(); + StringBuilder currentTerm = new StringBuilder(); + int i = 0; + + while (i < dsl.length()) { + // Skip whitespace + while (i < dsl.length() && Character.isWhitespace(dsl.charAt(i))) { + i++; + } + if (i >= dsl.length()) { + break; + } + + // Handle escape sequences + if (dsl.charAt(i) == '\\' && i + 1 < dsl.length()) { + currentTerm.append(dsl.charAt(i)); + currentTerm.append(dsl.charAt(i + 1)); + i += 2; + continue; + } + + // Handle parentheses - include entire group as a term + if (dsl.charAt(i) == '(') { + int depth = 1; + currentTerm.append('('); + i++; + while (i < dsl.length() && depth > 0) { + char c = dsl.charAt(i); + if (c == '(') { + depth++; + } else if (c == ')') { + depth--; + } + currentTerm.append(c); + i++; + } + continue; + } + + // Try to match operators + String remaining = dsl.substring(i); + String upperRemaining = remaining.toUpperCase(); + + // Check for AND operator + if (matchesOperatorWord(upperRemaining, "AND")) { + flushTermAcrossFields(result, currentTerm, fields); + appendWithSpace(result, "AND"); + i += 3; + continue; + } + + // Check for OR operator + if (matchesOperatorWord(upperRemaining, "OR")) { + flushTermAcrossFields(result, currentTerm, fields); + appendWithSpace(result, "OR"); + i += 2; + continue; + } + + // Check for NOT operator + if (matchesOperatorWord(upperRemaining, "NOT")) { + flushTermAcrossFields(result, currentTerm, fields); + appendWithSpace(result, "NOT"); + i += 3; + continue; + } + + // Accumulate term character + currentTerm.append(dsl.charAt(i)); + i++; + } + + // Flush final term + flushTermAcrossFields(result, currentTerm, fields); + + return result.toString().trim(); + } + + /** + * Check if the string starts with an operator word followed by whitespace or end of string. + */ + private static boolean matchesOperatorWord(String upper, String op) { + if (!upper.startsWith(op)) { + return false; + } + int opLen = op.length(); + // Must be followed by whitespace or end of string + return upper.length() == opLen || Character.isWhitespace(upper.charAt(opLen)); + } + + /** + * Flush accumulated term, expanding across fields if needed. + */ + private static void flushTermAcrossFields(StringBuilder result, StringBuilder term, List fields) { + String trimmed = term.toString().trim(); + if (!trimmed.isEmpty()) { + // Check if term already has a field reference + if (containsFieldReference(trimmed)) { + appendWithSpace(result, trimmed); + } else if (trimmed.startsWith("(") && trimmed.endsWith(")")) { + // Parenthesized expression - recursively expand + String inner = trimmed.substring(1, trimmed.length() - 1).trim(); + String expanded = expandOperatorExpressionAcrossFields(inner, fields); + appendWithSpace(result, "(" + expanded + ")"); + } else if (startsWithFunction(trimmed)) { + // Function - expand across fields + appendWithSpace(result, expandFunctionAcrossFields(trimmed, fields)); + } else { + // Regular term - expand across fields + appendWithSpace(result, expandTermAcrossFields(trimmed, fields)); + } + term.setLength(0); + } + } + + /** + * Append text to StringBuilder with a leading space if not empty. + */ + private static void appendWithSpace(StringBuilder sb, String text) { + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(text); + } + /** * Clause types supported */ @@ -490,10 +859,20 @@ public enum QsOccur { MUST_NOT // Term must not appear (equivalent to -term) } + /** + * Common interface for AST builders that track field names. + * Both QsAstBuilder and QsLuceneModeAstBuilder implement this interface. + */ + private interface FieldTrackingVisitor { + Set getFieldNames(); + + QsNode visit(ParseTree tree); + } + /** * ANTLR visitor to build QsNode AST from parse tree */ - private static class QsAstBuilder extends SearchParserBaseVisitor { + private static class QsAstBuilder extends SearchParserBaseVisitor implements FieldTrackingVisitor { private final Set fieldNames = new HashSet<>(); // Context stack to track current field name during parsing private String currentFieldName = null; @@ -676,7 +1055,9 @@ public QsNode visitSearchValue(SearchParser.SearchValueContext ctx) { return createExactNode(fieldName, ctx.exactValue().getText()); } - // Fallback for unknown types + // Fallback for unknown types - should not normally reach here + LOG.warn("Unexpected search value type encountered, falling back to TERM: field={}, text={}", + fieldName, ctx.getText()); return createTermNode(fieldName, ctx.getText()); } @@ -747,8 +1128,10 @@ private QsNode createAnyAllNode(String fieldName, String anyAllText) { return new QsNode(QsClauseType.ALL, fieldName, sanitizedContent); } - // Fallback to ANY for unknown cases - return new QsNode(QsClauseType.ANY, fieldName, sanitizedContent); + // Unknown ANY/ALL clause type - this should not happen with valid grammar + throw new IllegalArgumentException( + "Unknown ANY/ALL clause type: '" + anyAllText + "'. " + + "Expected ANY(...) or ALL(...)."); } private QsNode createExactNode(String fieldName, String exactText) { @@ -808,8 +1191,8 @@ public static QsPlan fromJson(String json) { try { return JSON_MAPPER.readValue(json, QsPlan.class); } catch (JsonProcessingException e) { - LOG.warn("Failed to parse QsPlan from JSON: {}", json, e); - return new QsPlan(new QsNode(QsClauseType.TERM, "error", null), new ArrayList<>()); + throw new IllegalArgumentException( + "Failed to parse search plan from JSON: " + e.getMessage(), e); } } @@ -820,8 +1203,7 @@ public String toJson() { try { return JSON_MAPPER.writeValueAsString(this); } catch (JsonProcessingException e) { - LOG.warn("Failed to serialize QsPlan to JSON", e); - return "{}"; + throw new RuntimeException("Failed to serialize QsPlan to JSON", e); } } @@ -980,12 +1362,14 @@ public boolean equals(Object o) { * - default_operator: "and" or "or" for multi-term queries (default: "or") * - mode: "standard" (default) or "lucene" (ES/Lucene-style boolean parsing) * - minimum_should_match: integer for Lucene mode (default: 0 for filter context) + * - fields: array of field names for multi-field search (mutually exclusive with default_field) */ public static class SearchOptions { private String defaultField = null; private String defaultOperator = null; private String mode = "standard"; private Integer minimumShouldMatch = null; + private List fields = null; public String getDefaultField() { return defaultField; @@ -1022,6 +1406,22 @@ public Integer getMinimumShouldMatch() { public void setMinimumShouldMatch(Integer minimumShouldMatch) { this.minimumShouldMatch = minimumShouldMatch; } + + public List getFields() { + return fields; + } + + public void setFields(List fields) { + this.fields = fields; + } + + /** + * Check if multi-field mode is enabled. + * Multi-field mode is active when fields array is non-null and non-empty. + */ + public boolean isMultiFieldMode() { + return fields != null && !fields.isEmpty(); + } } /** @@ -1031,6 +1431,7 @@ public void setMinimumShouldMatch(Integer minimumShouldMatch) { * - default_operator: "and" or "or" for multi-term queries * - mode: "standard" or "lucene" * - minimum_should_match: integer for Lucene mode + * - fields: array of field names for multi-field search */ private static SearchOptions parseOptions(String optionsJson) { SearchOptions options = new SearchOptions(); @@ -1054,8 +1455,34 @@ private static SearchOptions parseOptions(String optionsJson) { if (jsonNode.has("minimum_should_match")) { options.setMinimumShouldMatch(jsonNode.get("minimum_should_match").asInt()); } + // Parse fields array for multi-field search + if (jsonNode.has("fields")) { + com.fasterxml.jackson.databind.JsonNode fieldsNode = jsonNode.get("fields"); + if (fieldsNode.isArray()) { + List fieldsList = new ArrayList<>(); + for (com.fasterxml.jackson.databind.JsonNode fieldNode : fieldsNode) { + String fieldValue = fieldNode.asText().trim(); + if (!fieldValue.isEmpty()) { + fieldsList.add(fieldValue); + } + } + if (!fieldsList.isEmpty()) { + options.setFields(fieldsList); + } + } + } + + // Validation: fields and default_field are mutually exclusive + if (options.getFields() != null && !options.getFields().isEmpty() + && options.getDefaultField() != null && !options.getDefaultField().isEmpty()) { + throw new IllegalArgumentException( + "'fields' and 'default_field' are mutually exclusive. Use only one."); + } + } catch (IllegalArgumentException e) { + throw e; } catch (Exception e) { - LOG.warn("Failed to parse search options JSON: {}", optionsJson, e); + throw new IllegalArgumentException( + "Invalid search options JSON: '" + optionsJson + "'. Error: " + e.getMessage(), e); } return options; @@ -1138,7 +1565,8 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, * ANTLR visitor for Lucene-mode AST building. * Transforms standard boolean expressions into Lucene-style OCCUR_BOOLEAN queries. */ - private static class QsLuceneModeAstBuilder extends SearchParserBaseVisitor { + private static class QsLuceneModeAstBuilder extends SearchParserBaseVisitor + implements FieldTrackingVisitor { private final Set fieldNames = new HashSet<>(); private final SearchOptions options; private String currentFieldName = null; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java index 6279aead20a708..c38deaa18887ef 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java @@ -741,14 +741,12 @@ public void testStandardModeUnchanged() { @Test public void testLuceneModeInvalidJson() { - // Test: invalid JSON options should fall back to standard mode + // Test: invalid JSON options should throw an exception String dsl = "field:a AND field:b"; String options = "not valid json"; - QsPlan plan = SearchDslParser.parseDsl(dsl, options); - - Assertions.assertNotNull(plan); - // Should fall back to standard mode (AND type) - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + SearchDslParser.parseDsl(dsl, options); + }); } @Test @@ -925,4 +923,291 @@ public void testNoEscapeWithoutBackslash() { Assertions.assertEquals(QsClauseType.TERM, plan.root.type); Assertions.assertEquals("normalterm", plan.root.value); } + + // ============ Tests for Multi-Field Search ============ + + @Test + public void testMultiFieldSimpleTerm() { + // Test: "hello" + fields=["title","content"] → "(title:hello OR content:hello)" + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); + + // Verify both fields are in bindings + Assertions.assertEquals(2, plan.fieldBindings.size()); + Assertions.assertTrue(plan.fieldBindings.stream() + .anyMatch(b -> "title".equals(b.fieldName))); + Assertions.assertTrue(plan.fieldBindings.stream() + .anyMatch(b -> "content".equals(b.fieldName))); + } + + @Test + public void testMultiFieldMultiTermAnd() { + // Test: "hello world" + fields=["title","content"] + default_operator="and" + // → "(title:hello OR content:hello) AND (title:world OR content:world)" + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); + + // Each child should be an OR of two fields + for (QsNode child : plan.root.children) { + Assertions.assertEquals(QsClauseType.OR, child.type); + Assertions.assertEquals(2, child.children.size()); + } + } + + @Test + public void testMultiFieldMultiTermOr() { + // Test: "hello world" + fields=["title","content"] + default_operator="or" + // → "(title:hello OR content:hello) OR (title:world OR content:world)" + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"or\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + } + + @Test + public void testMultiFieldExplicitAndOperator() { + // Test: "hello AND world" + fields=["title","content"] + String dsl = "hello AND world"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.AND, plan.root.type); + } + + @Test + public void testMultiFieldMixedWithExplicitField() { + // Test: "hello AND category:tech" + fields=["title","content"] + // → "(title:hello OR content:hello) AND category:tech" + String dsl = "hello AND category:tech"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); + + // Verify "category" is preserved + Assertions.assertTrue(plan.fieldBindings.stream() + .anyMatch(b -> "category".equals(b.fieldName))); + } + + @Test + public void testMultiFieldWithWildcard() { + // Test: "hello*" + fields=["title","content"] + String dsl = "hello*"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); + + // Both should be PREFIX type + for (QsNode child : plan.root.children) { + Assertions.assertEquals(QsClauseType.PREFIX, child.type); + } + } + + @Test + public void testMultiFieldWithExactFunction() { + // Test: "EXACT(foo bar)" + fields=["title","content"] + String dsl = "EXACT(foo bar)"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); + + // Both should be EXACT type + for (QsNode child : plan.root.children) { + Assertions.assertEquals(QsClauseType.EXACT, child.type); + } + } + + @Test + public void testMultiFieldThreeFields() { + // Test: "hello" + fields=["title","content","tags"] + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\",\"tags\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(3, plan.fieldBindings.size()); + } + + @Test + public void testFieldsAndDefaultFieldMutuallyExclusive() { + // Test: specifying both fields and default_field should throw error + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_field\":\"tags\"}"; + + IllegalArgumentException exception = Assertions.assertThrows(IllegalArgumentException.class, () -> { + SearchDslParser.parseDsl(dsl, options); + }); + Assertions.assertTrue(exception.getMessage().contains("mutually exclusive")); + } + + @Test + public void testSingleFieldInArray() { + // Test: single field in array should work like default_field + String dsl = "hello"; + String options = "{\"fields\":[\"title\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.TERM, plan.root.type); + Assertions.assertEquals("title", plan.root.field); + Assertions.assertEquals(1, plan.fieldBindings.size()); + } + + @Test + public void testMultiFieldNotOperator() { + // Test: "NOT hello" + fields=["title","content"] + String dsl = "NOT hello"; + String options = "{\"fields\":[\"title\",\"content\"]}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(1, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.root.children.get(0).type); + } + + // ============ Tests for Multi-Field + Lucene Mode ============ + + @Test + public void testMultiFieldLuceneModeSimpleAnd() { + // Test: "a AND b" + fields=["title","content"] + lucene mode + // Expanded: "(title:a OR content:a) AND (title:b OR content:b)" + // With Lucene semantics: both groups are MUST + String dsl = "a AND b"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + + // Should have 2 children (two OR groups), both with MUST + // Note: In Lucene mode, OR groups are also wrapped as OCCUR_BOOLEAN + Assertions.assertEquals(2, plan.root.children.size()); + for (QsNode child : plan.root.children) { + Assertions.assertEquals(QsOccur.MUST, child.occur); + // The child is OCCUR_BOOLEAN wrapping the OR group + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, child.type); + } + } + + @Test + public void testMultiFieldLuceneModeSimpleOr() { + // Test: "a OR b" + fields=["title","content"] + lucene mode + // Expanded: "(title:a OR content:a) OR (title:b OR content:b)" + // With Lucene semantics: both groups are SHOULD + String dsl = "a OR b"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + + // Should have 2 children, both with SHOULD + Assertions.assertEquals(2, plan.root.children.size()); + for (QsNode child : plan.root.children) { + Assertions.assertEquals(QsOccur.SHOULD, child.occur); + } + + // minimum_should_match should be 1 + Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + } + + @Test + public void testMultiFieldLuceneModeAndOrMixed() { + // Test: "a AND b OR c" + fields=["title","content"] + lucene mode + minimum_should_match=0 + // With Lucene semantics and minimum_should_match=0: SHOULD groups are discarded + // Only "a" (MUST) remains - wrapped in OCCUR_BOOLEAN + String dsl = "a AND b OR c"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // With minimum_should_match=0, only (title:a OR content:a) remains + // In Lucene mode, this is wrapped as OCCUR_BOOLEAN + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + } + + @Test + public void testMultiFieldLuceneModeWithNot() { + // Test: "a AND NOT b" + fields=["title","content"] + lucene mode + // Expanded: "(title:a OR content:a) AND NOT (title:b OR content:b)" + String dsl = "a AND NOT b"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + + // Should have 2 children: a (MUST), b (MUST_NOT) + Assertions.assertEquals(2, plan.root.children.size()); + + // Find MUST and MUST_NOT children + boolean hasMust = plan.root.children.stream().anyMatch(c -> c.occur == QsOccur.MUST); + boolean hasMustNot = plan.root.children.stream().anyMatch(c -> c.occur == QsOccur.MUST_NOT); + Assertions.assertTrue(hasMust); + Assertions.assertTrue(hasMustNot); + } + + @Test + public void testMultiFieldLuceneModeSingleTerm() { + // Test: single term with multi-field + lucene mode + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // In Lucene mode, even single term OR groups are wrapped as OCCUR_BOOLEAN + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + // The OCCUR_BOOLEAN contains the OR group's children with SHOULD occur + Assertions.assertEquals(2, plan.root.children.size()); + } + + @Test + public void testMultiFieldLuceneModeComplexQuery() { + // Test: "(a OR b) AND NOT c" + fields=["f1","f2"] + lucene mode + String dsl = "(a OR b) AND NOT c"; + String options = "{\"fields\":[\"f1\",\"f2\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // Should have proper structure with MUST and MUST_NOT + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + } + + @Test + public void testMultiFieldLuceneModeMinimumShouldMatchOne() { + // Test: "a AND b OR c" with minimum_should_match=1 keeps all clauses + String dsl = "a AND b OR c"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":1}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + // All 3 groups should be present + Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + } } diff --git a/regression-test/data/search/test_search_multi_field.out b/regression-test/data/search/test_search_multi_field.out new file mode 100644 index 00000000000000..8ba93f3eba84f9 --- /dev/null +++ b/regression-test/data/search/test_search_multi_field.out @@ -0,0 +1,87 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !multi_field_single_term -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews + +-- !multi_field_multi_term_and -- +1 machine learning basics + +-- !multi_field_multi_term_or -- +1 machine learning basics +4 machine maintenance +5 learning guitar +6 deep learning neural networks +8 cooking machine reviews + +-- !multi_field_explicit_and -- +1 machine learning basics + +-- !multi_field_mixed -- +1 machine learning basics tech + +-- !three_fields -- +1 machine learning basics +3 AI in healthcare +6 deep learning neural networks + +-- !multi_field_wildcard -- +1 machine learning basics +5 learning guitar +6 deep learning neural networks + +-- !multi_field_not -- +1 machine learning basics +4 machine maintenance + +-- !multi_field_complex -- +1 machine learning basics +3 AI in healthcare +4 machine maintenance +6 deep learning neural networks + +-- !single_field_array -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews + +-- !multi_field_lucene_and -- +1 machine learning basics + +-- !multi_field_lucene_or -- +1 machine learning basics +2 cooking recipes +4 machine maintenance +8 cooking machine reviews + +-- !multi_field_lucene_and_or -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews + +-- !multi_field_lucene_min_should_1 -- +1 machine learning basics +8 cooking machine reviews + +-- !multi_field_lucene_and_not -- +1 machine learning basics +8 cooking machine reviews + +-- !compare_default_field -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews + +-- !compare_fields_single -- +1 machine learning basics +4 machine maintenance +8 cooking machine reviews + +-- !multi_field_exact -- + +-- !multi_field_any -- +1 machine learning basics +2 cooking recipes +4 machine maintenance +8 cooking machine reviews + diff --git a/regression-test/suites/search/test_search_multi_field.groovy b/regression-test/suites/search/test_search_multi_field.groovy new file mode 100644 index 00000000000000..38262e02e3dd6e --- /dev/null +++ b/regression-test/suites/search/test_search_multi_field.groovy @@ -0,0 +1,229 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/** + * Tests for multi-field search support in search() function. + * + * The 'fields' parameter allows searching across multiple fields with a single query term. + * This is similar to Elasticsearch's query_string 'fields' parameter. + * + * Example: + * search('hello', '{"fields":["title","content"]}') + * -> Equivalent to: (title:hello OR content:hello) + * + * search('hello world', '{"fields":["title","content"],"default_operator":"and"}') + * -> Equivalent to: (title:hello OR content:hello) AND (title:world OR content:world) + * + * Multi-field search can also be combined with Lucene mode for MUST/SHOULD/MUST_NOT semantics. + */ +suite("test_search_multi_field") { + def tableName = "search_multi_field_test" + + sql "DROP TABLE IF EXISTS ${tableName}" + + // Create table with inverted indexes on multiple fields + sql """ + CREATE TABLE ${tableName} ( + id INT, + title VARCHAR(200), + content VARCHAR(500), + tags VARCHAR(100), + category VARCHAR(50), + INDEX idx_title(title) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_content(content) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_tags(tags) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_category(category) USING INVERTED + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1") + """ + + // Insert test data + sql """INSERT INTO ${tableName} VALUES + (1, 'machine learning basics', 'introduction to AI and ML', 'ml ai tutorial', 'tech'), + (2, 'cooking recipes', 'how to make pasta', 'food cooking', 'lifestyle'), + (3, 'AI in healthcare', 'artificial intelligence applications', 'health ai', 'tech'), + (4, 'machine maintenance', 'keeping machines running', 'industrial', 'engineering'), + (5, 'learning guitar', 'music lessons for beginners', 'music learning', 'entertainment'), + (6, 'deep learning neural networks', 'advanced AI concepts', 'ai ml deep', 'tech'), + (7, 'car maintenance guide', 'vehicle repair tips', 'auto maintenance', 'automotive'), + (8, 'cooking machine reviews', 'kitchen appliance ratings', 'cooking appliances', 'lifestyle') + """ + + // Wait for index building + Thread.sleep(5000) + + // ============ Test 1: Single term across multiple fields ============ + // "machine" in title OR content + qt_multi_field_single_term """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 2: Multiple terms with AND ============ + // "machine" AND "learning" across title,content + qt_multi_field_multi_term_and """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and"}') + ORDER BY id + """ + + // ============ Test 3: Multiple terms with OR (default) ============ + qt_multi_field_multi_term_or """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"or"}') + ORDER BY id + """ + + // ============ Test 4: Explicit AND operator in DSL ============ + qt_multi_field_explicit_and """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 5: Mixed - some terms with explicit field ============ + qt_multi_field_mixed """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, category + FROM ${tableName} + WHERE search('machine AND category:tech', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 6: Three fields ============ + qt_three_fields """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('ai', '{"fields":["title","content","tags"]}') + ORDER BY id + """ + + // ============ Test 7: Wildcard across fields ============ + qt_multi_field_wildcard """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('learn*', '{"fields":["title","content","tags"]}') + ORDER BY id + """ + + // ============ Test 8: NOT operator ============ + qt_multi_field_not """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND NOT cooking', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 9: Complex boolean ============ + qt_multi_field_complex """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('(machine OR ai) AND NOT cooking', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 10: Single field in array (backward compatible) ============ + qt_single_field_array """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"fields":["title"]}') + ORDER BY id + """ + + // ============ Test 11: Multi-field with Lucene mode - simple AND ============ + qt_multi_field_lucene_and """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}') + ORDER BY id + """ + + // ============ Test 12: Multi-field with Lucene mode - OR ============ + qt_multi_field_lucene_or """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine OR cooking', '{"fields":["title","content"],"mode":"lucene"}') + ORDER BY id + """ + + // ============ Test 13: Multi-field with Lucene mode - AND OR mixed ============ + // With minimum_should_match=0, SHOULD clauses are discarded when MUST exists + qt_multi_field_lucene_and_or """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}') + ORDER BY id + """ + + // ============ Test 14: Multi-field with Lucene mode - minimum_should_match=1 ============ + qt_multi_field_lucene_min_should_1 """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":1}') + ORDER BY id + """ + + // ============ Test 15: Multi-field with Lucene mode - AND NOT ============ + qt_multi_field_lucene_and_not """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine AND NOT maintenance', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}') + ORDER BY id + """ + + // ============ Test 16: Comparison - same query with default_field vs fields ============ + // Using default_field (single field) + qt_compare_default_field """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"default_field":"title"}') + ORDER BY id + """ + + // Using fields array with single field (should be same as default_field) + qt_compare_fields_single """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine', '{"fields":["title"]}') + ORDER BY id + """ + + // ============ Test 17: EXACT function across fields ============ + qt_multi_field_exact """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('EXACT(machine learning)', '{"fields":["title","content"]}') + ORDER BY id + """ + + // ============ Test 18: ANY function across fields ============ + qt_multi_field_any """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('ANY(machine cooking)', '{"fields":["title","content"]}') + ORDER BY id + """ + + // Cleanup + sql "DROP TABLE IF EXISTS ${tableName}" +} From 7c0d17897b2775d1826146cd7c2505e5fd40a022 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 14 Jan 2026 15:51:51 +0800 Subject: [PATCH 2/6] [test](search) Add test cases for multi-field search Lucene mode and cross_fields behavior - Add test data id=9 to verify cross_fields vs best_fields semantics - Add Test 2b: multi_field_multi_term_and_lucene to test default_operator:and with mode:lucene - Add Test 11b: multi_field_cross_fields_verify to explicitly verify cross_fields behavior - Update expected output file with new test case results Co-Authored-By: Claude Opus 4.5 --- .../data/search/test_search_multi_field.out | 25 +++++++++++++ .../search/test_search_multi_field.groovy | 36 ++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/regression-test/data/search/test_search_multi_field.out b/regression-test/data/search/test_search_multi_field.out index 8ba93f3eba84f9..fe08a891087385 100644 --- a/regression-test/data/search/test_search_multi_field.out +++ b/regression-test/data/search/test_search_multi_field.out @@ -3,9 +3,15 @@ 1 machine learning basics 4 machine maintenance 8 cooking machine reviews +9 machine guide -- !multi_field_multi_term_and -- 1 machine learning basics +9 machine guide + +-- !multi_field_multi_term_and_lucene -- +1 machine learning basics +9 machine guide -- !multi_field_multi_term_or -- 1 machine learning basics @@ -13,12 +19,15 @@ 5 learning guitar 6 deep learning neural networks 8 cooking machine reviews +9 machine guide -- !multi_field_explicit_and -- 1 machine learning basics +9 machine guide -- !multi_field_mixed -- 1 machine learning basics tech +9 machine guide tech -- !three_fields -- 1 machine learning basics @@ -29,53 +38,68 @@ 1 machine learning basics 5 learning guitar 6 deep learning neural networks +9 machine guide -- !multi_field_not -- 1 machine learning basics 4 machine maintenance +9 machine guide -- !multi_field_complex -- 1 machine learning basics 3 AI in healthcare 4 machine maintenance 6 deep learning neural networks +9 machine guide -- !single_field_array -- 1 machine learning basics 4 machine maintenance 8 cooking machine reviews +9 machine guide -- !multi_field_lucene_and -- 1 machine learning basics +9 machine guide + +-- !multi_field_cross_fields_verify -- +1 machine learning basics introduction to AI and ML +9 machine guide learning tips -- !multi_field_lucene_or -- 1 machine learning basics 2 cooking recipes 4 machine maintenance 8 cooking machine reviews +9 machine guide -- !multi_field_lucene_and_or -- 1 machine learning basics 4 machine maintenance 8 cooking machine reviews +9 machine guide -- !multi_field_lucene_min_should_1 -- 1 machine learning basics 8 cooking machine reviews +9 machine guide -- !multi_field_lucene_and_not -- 1 machine learning basics 8 cooking machine reviews +9 machine guide -- !compare_default_field -- 1 machine learning basics 4 machine maintenance 8 cooking machine reviews +9 machine guide -- !compare_fields_single -- 1 machine learning basics 4 machine maintenance 8 cooking machine reviews +9 machine guide -- !multi_field_exact -- @@ -84,4 +108,5 @@ 2 cooking recipes 4 machine maintenance 8 cooking machine reviews +9 machine guide diff --git a/regression-test/suites/search/test_search_multi_field.groovy b/regression-test/suites/search/test_search_multi_field.groovy index 38262e02e3dd6e..712bd0eabd508e 100644 --- a/regression-test/suites/search/test_search_multi_field.groovy +++ b/regression-test/suites/search/test_search_multi_field.groovy @@ -54,6 +54,9 @@ suite("test_search_multi_field") { """ // Insert test data + // Note: id=9 is specifically designed to test cross_fields vs best_fields behavior + // - cross_fields: matches (title has 'machine', content has 'learning') + // - best_fields: does NOT match (no single field has both terms) sql """INSERT INTO ${tableName} VALUES (1, 'machine learning basics', 'introduction to AI and ML', 'ml ai tutorial', 'tech'), (2, 'cooking recipes', 'how to make pasta', 'food cooking', 'lifestyle'), @@ -62,7 +65,8 @@ suite("test_search_multi_field") { (5, 'learning guitar', 'music lessons for beginners', 'music learning', 'entertainment'), (6, 'deep learning neural networks', 'advanced AI concepts', 'ai ml deep', 'tech'), (7, 'car maintenance guide', 'vehicle repair tips', 'auto maintenance', 'automotive'), - (8, 'cooking machine reviews', 'kitchen appliance ratings', 'cooking appliances', 'lifestyle') + (8, 'cooking machine reviews', 'kitchen appliance ratings', 'cooking appliances', 'lifestyle'), + (9, 'machine guide', 'learning tips', 'howto', 'tech') """ // Wait for index building @@ -86,6 +90,20 @@ suite("test_search_multi_field") { ORDER BY id """ + // ============ Test 2b: Multiple terms with AND in Lucene mode ============ + // Same as Test 2 but with mode:lucene - should have same result + // This tests that default_operator:and works correctly with Lucene mode + // ES behavior comparison: + // - ES best_fields (default): only id=1 (both terms must be in same field) + // - ES cross_fields: id=1 and id=9 (terms can be across different fields) + // - Doris uses cross_fields semantics + qt_multi_field_multi_term_and_lucene """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene"}') + ORDER BY id + """ + // ============ Test 3: Multiple terms with OR (default) ============ qt_multi_field_multi_term_or """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title @@ -151,6 +169,11 @@ suite("test_search_multi_field") { """ // ============ Test 11: Multi-field with Lucene mode - simple AND ============ + // This is equivalent to Test 2 but uses Lucene mode with explicit AND operator + // Expected: Same result as Test 2 - cross_fields semantics + // - ES best_fields would return: id=1 only (both terms in same field) + // - Doris cross_fields returns: id=1, id=9 (terms can be in different fields) + // id=9: title='machine guide', content='learning tips' - matches cross_fields but not best_fields qt_multi_field_lucene_and """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} @@ -158,6 +181,17 @@ suite("test_search_multi_field") { ORDER BY id """ + // ============ Test 11b: Verify cross_fields behavior explicitly ============ + // This test verifies that our implementation uses cross_fields semantics (like ES type:cross_fields) + // Query: "machine AND learning" across title and content + // id=9 has 'machine' in title and 'learning' in content - should match with cross_fields + qt_multi_field_cross_fields_verify """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, content + FROM ${tableName} + WHERE search('machine AND learning', '{"fields":["title","content"]}') + ORDER BY id + """ + // ============ Test 12: Multi-field with Lucene mode - OR ============ qt_multi_field_lucene_or """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title From 098956f72a6288468b7cb560a2df2480273ba758 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 14 Jan 2026 22:14:05 +0800 Subject: [PATCH 3/6] [feature](search) Add type parameter for multi-field search with best_fields/cross_fields modes Add support for "type" option in multi-field search DSL to control how terms are matched across fields: - best_fields (default): All terms must match within the SAME field Example: "machine learning" -> (title:machine AND title:learning) OR (content:machine AND content:learning) - cross_fields: Terms can match across DIFFERENT fields Example: "machine learning" -> (title:machine OR content:machine) AND (title:learning OR content:learning) This aligns with Elasticsearch's default behavior where best_fields is the default mode. Users can explicitly set type:"cross_fields" when they need terms to be distributed across multiple fields. Also includes: - Input validation in setType() with IllegalArgumentException for invalid values - Explicit type checking to prevent silent fallback behavior - Unit tests for type parameter parsing and expansion - Regression tests for both modes in standard and Lucene modes Co-Authored-By: Claude Opus 4.5 --- .../functions/scalar/SearchDslParser.java | 191 +++++++++++++++++- .../functions/scalar/SearchDslParserTest.java | 140 +++++++++++-- .../data/search/test_search_multi_field.out | 14 ++ .../search/test_search_multi_field.groovy | 98 ++++++--- 4 files changed, 389 insertions(+), 54 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java index a29bedede80882..b499219a142cc3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java @@ -77,8 +77,10 @@ public static QsPlan parseDsl(String dsl) { * - mode: "standard" or "lucene" * - minimum_should_match: integer for Lucene mode * - fields: array of field names for multi-field search + * - type: "best_fields" (default) or "cross_fields" for multi-field semantics * Example: '{"default_field":"title","mode":"lucene","minimum_should_match":0}' * Example: '{"fields":["title","content"],"default_operator":"and"}' + * Example: '{"fields":["title","content"],"type":"cross_fields"}' * @return Parsed QsPlan */ public static QsPlan parseDsl(String dsl, String optionsJson) { @@ -101,7 +103,7 @@ public static QsPlan parseDsl(String dsl, String optionsJson) { // Multi-field mode parsing (standard mode) if (searchOptions.isMultiFieldMode()) { - return parseDslMultiFieldMode(dsl, searchOptions.getFields(), defaultOperator); + return parseDslMultiFieldMode(dsl, searchOptions.getFields(), defaultOperator, searchOptions); } // Standard mode parsing @@ -583,31 +585,48 @@ private static String expandItemAcrossFields(String item, List fields) { /** * Parse DSL in multi-field mode. - * Each term without field prefix is expanded to OR across all specified fields. + * Expansion behavior depends on the type option: + * - best_fields (default): all terms must match within the same field + * - cross_fields: terms can match across different fields * * @param dsl DSL query string * @param fields List of field names to search * @param defaultOperator "and" or "or" for joining term groups + * @param options Search options containing type setting * @return Parsed QsPlan */ - private static QsPlan parseDslMultiFieldMode(String dsl, List fields, String defaultOperator) { + private static QsPlan parseDslMultiFieldMode(String dsl, List fields, String defaultOperator, + SearchOptions options) { if (!isValidDsl(dsl)) { return createEmptyDslErrorPlan(); } validateFieldsList(fields); - String expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizeDefaultOperator(defaultOperator)); + String normalizedOperator = normalizeDefaultOperator(defaultOperator); + String expandedDsl; + if (options.isCrossFieldsMode()) { + // cross_fields: terms can be across different fields + expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizedOperator); + } else if (options.isBestFieldsMode()) { + // best_fields: all terms must be in the same field + expandedDsl = expandMultiFieldDslBestFields(dsl.trim(), fields, normalizedOperator); + } else { + // Should never happen due to setType() validation, but provide fallback + throw new IllegalStateException( + "Invalid type value: '" + options.getType() + "'. Expected 'best_fields' or 'cross_fields'"); + } return parseWithVisitor(expandedDsl, parser -> new QsAstBuilder(), dsl, "multi-field mode"); } /** * Parse DSL in multi-field mode with Lucene boolean semantics. * First expands DSL across fields, then applies Lucene-style MUST/SHOULD/MUST_NOT logic. + * Expansion behavior depends on the type option (best_fields or cross_fields). * * @param dsl DSL query string * @param fields List of field names to search * @param defaultOperator "and" or "or" for joining term groups - * @param options Search options containing Lucene mode settings + * @param options Search options containing Lucene mode settings and type * @return Parsed QsPlan with Lucene boolean semantics */ private static QsPlan parseDslMultiFieldLuceneMode(String dsl, List fields, @@ -617,7 +636,19 @@ private static QsPlan parseDslMultiFieldLuceneMode(String dsl, List fiel } validateFieldsList(fields); - String expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizeDefaultOperator(defaultOperator)); + String normalizedOperator = normalizeDefaultOperator(defaultOperator); + String expandedDsl; + if (options.isCrossFieldsMode()) { + // cross_fields: terms can be across different fields + expandedDsl = expandMultiFieldDsl(dsl.trim(), fields, normalizedOperator); + } else if (options.isBestFieldsMode()) { + // best_fields: all terms must be in the same field + expandedDsl = expandMultiFieldDslBestFields(dsl.trim(), fields, normalizedOperator); + } else { + // Should never happen due to setType() validation, but provide fallback + throw new IllegalStateException( + "Invalid type value: '" + options.getType() + "'. Expected 'best_fields' or 'cross_fields'"); + } return parseWithVisitor(expandedDsl, parser -> new QsLuceneModeAstBuilder(options), dsl, "multi-field Lucene mode"); } @@ -680,6 +711,107 @@ private static String expandMultiFieldDsl(String dsl, List fields, Strin return result.toString(); } + /** + * Expand multi-field DSL using best_fields semantics. + * Each field is wrapped with all terms joined by the default operator, then fields are ORed. + * + * Example: "machine learning" with fields ["title", "content"] and default_operator "and" + * Result: (title:machine AND title:learning) OR (content:machine AND content:learning) + * + * @param dsl Simple DSL string + * @param fields List of field names to search + * @param defaultOperator "and" or "or" for joining terms within each field + * @return Expanded full DSL with best_fields semantics + */ + private static String expandMultiFieldDslBestFields(String dsl, List fields, + String defaultOperator) { + if (fields == null || fields.isEmpty()) { + throw new IllegalArgumentException("fields list cannot be null or empty"); + } + + if (fields.size() == 1) { + // Single field - delegate to existing method + return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator); + } + + // 1. Check for leading NOT - must use cross_fields semantics for correct negation + // "NOT hello" should expand to "NOT (title:hello OR content:hello)" + // rather than "(NOT title:hello) OR (NOT content:hello)" which has wrong semantics + String trimmedDsl = dsl.trim(); + if (trimmedDsl.toUpperCase().startsWith("NOT ") + || trimmedDsl.toUpperCase().startsWith("NOT\t")) { + // Use cross_fields expansion for leading NOT + return expandOperatorExpressionAcrossFields(dsl, fields); + } + + // 2. If DSL contains field references or explicit operators, apply best_fields + // by expanding the entire expression per field and ORing the results + if (containsFieldReference(dsl) || containsExplicitOperators(dsl)) { + return expandOperatorExpressionAcrossFieldsBestFields(dsl, fields, defaultOperator); + } + + // 3. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN) + if (startsWithFunction(dsl)) { + // For functions, use cross_fields approach (function applied to each field) + return expandFunctionAcrossFields(dsl, fields); + } + + // 4. Tokenize and analyze terms + List terms = tokenizeDsl(dsl); + if (terms.isEmpty()) { + // Single term case - expand across fields with OR + return expandTermAcrossFields(dsl, fields); + } + + // 5. Single term - expand across fields with OR + if (terms.size() == 1) { + return expandTermAcrossFields(terms.get(0), fields); + } + + // 6. Multiple terms - best_fields: each field with all terms, then OR across fields + String termOperator = "and".equals(defaultOperator) ? " AND " : " OR "; + + StringBuilder result = new StringBuilder(); + for (int fieldIdx = 0; fieldIdx < fields.size(); fieldIdx++) { + if (fieldIdx > 0) { + result.append(" OR "); + } + + String field = fields.get(fieldIdx); + // Build: (field:term1 AND field:term2 AND ...) + result.append("("); + for (int termIdx = 0; termIdx < terms.size(); termIdx++) { + if (termIdx > 0) { + result.append(termOperator); + } + result.append(field).append(":").append(terms.get(termIdx)); + } + result.append(")"); + } + return result.toString(); + } + + /** + * Handle DSL with explicit operators using best_fields semantics. + * For complex expressions, we group by field and OR across fields. + */ + private static String expandOperatorExpressionAcrossFieldsBestFields(String dsl, + List fields, String defaultOperator) { + // For expressions with explicit operators, we apply the entire expression to each field + // and OR the results: (title:expr) OR (content:expr) + StringBuilder result = new StringBuilder(); + for (int i = 0; i < fields.size(); i++) { + if (i > 0) { + result.append(" OR "); + } + String field = fields.get(i); + // Expand the DSL for this single field + String fieldDsl = expandSimplifiedDsl(dsl, field, defaultOperator); + result.append("(").append(fieldDsl).append(")"); + } + return result.toString(); + } + /** * Expand a single term across multiple fields with OR. * Example: "hello" + ["title", "content"] -> "(title:hello OR content:hello)" @@ -1370,6 +1502,7 @@ public static class SearchOptions { private String mode = "standard"; private Integer minimumShouldMatch = null; private List fields = null; + private String type = "best_fields"; // "best_fields" (default) or "cross_fields" public String getDefaultField() { return defaultField; @@ -1422,6 +1555,47 @@ public void setFields(List fields) { public boolean isMultiFieldMode() { return fields != null && !fields.isEmpty(); } + + /** + * Get the multi-field search type ("best_fields" or "cross_fields"). + */ + public String getType() { + return type; + } + + /** + * Set the multi-field search type. + * @param type Either "best_fields" or "cross_fields" (case-insensitive) + * @throws IllegalArgumentException if type is invalid + */ + public void setType(String type) { + if (type == null) { + this.type = "best_fields"; + return; + } + String normalized = type.trim().toLowerCase(); + if (!"cross_fields".equals(normalized) && !"best_fields".equals(normalized)) { + throw new IllegalArgumentException( + "'type' must be 'cross_fields' or 'best_fields', got: " + type); + } + this.type = normalized; + } + + /** + * Check if best_fields mode is enabled (default). + * In best_fields mode, all terms must match within the same field. + */ + public boolean isBestFieldsMode() { + return "best_fields".equals(type); + } + + /** + * Check if cross_fields mode is enabled. + * In cross_fields mode, terms can match across different fields. + */ + public boolean isCrossFieldsMode() { + return "cross_fields".equals(type); + } } /** @@ -1432,6 +1606,7 @@ public boolean isMultiFieldMode() { * - mode: "standard" or "lucene" * - minimum_should_match: integer for Lucene mode * - fields: array of field names for multi-field search + * - type: "best_fields" (default) or "cross_fields" for multi-field search semantics */ private static SearchOptions parseOptions(String optionsJson) { SearchOptions options = new SearchOptions(); @@ -1471,6 +1646,10 @@ private static SearchOptions parseOptions(String optionsJson) { } } } + // Parse type for multi-field search semantics + if (jsonNode.has("type")) { + options.setType(jsonNode.get("type").asText()); + } // Validation: fields and default_field are mutually exclusive if (options.getFields() != null && !options.getFields().isEmpty() diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java index c38deaa18887ef..b864caa2400ca5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java @@ -947,10 +947,10 @@ public void testMultiFieldSimpleTerm() { @Test public void testMultiFieldMultiTermAnd() { - // Test: "hello world" + fields=["title","content"] + default_operator="and" + // Test: "hello world" + fields=["title","content"] + default_operator="and" + type="cross_fields" // → "(title:hello OR content:hello) AND (title:world OR content:world)" String dsl = "hello world"; - String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\"}"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -978,9 +978,10 @@ public void testMultiFieldMultiTermOr() { @Test public void testMultiFieldExplicitAndOperator() { - // Test: "hello AND world" + fields=["title","content"] + // Test: "hello AND world" + fields=["title","content"] + cross_fields + // → "(title:hello OR content:hello) AND (title:world OR content:world)" String dsl = "hello AND world"; - String options = "{\"fields\":[\"title\",\"content\"]}"; + String options = "{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -989,10 +990,10 @@ public void testMultiFieldExplicitAndOperator() { @Test public void testMultiFieldMixedWithExplicitField() { - // Test: "hello AND category:tech" + fields=["title","content"] + // Test: "hello AND category:tech" + fields=["title","content"] + cross_fields // → "(title:hello OR content:hello) AND category:tech" String dsl = "hello AND category:tech"; - String options = "{\"fields\":[\"title\",\"content\"]}"; + String options = "{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -1093,11 +1094,11 @@ public void testMultiFieldNotOperator() { @Test public void testMultiFieldLuceneModeSimpleAnd() { - // Test: "a AND b" + fields=["title","content"] + lucene mode + // Test: "a AND b" + fields=["title","content"] + lucene mode + cross_fields // Expanded: "(title:a OR content:a) AND (title:b OR content:b)" // With Lucene semantics: both groups are MUST String dsl = "a AND b"; - String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -1137,11 +1138,11 @@ public void testMultiFieldLuceneModeSimpleOr() { @Test public void testMultiFieldLuceneModeAndOrMixed() { - // Test: "a AND b OR c" + fields=["title","content"] + lucene mode + minimum_should_match=0 + // Test: "a AND b OR c" + fields=["title","content"] + lucene mode + minimum_should_match=0 + cross_fields // With Lucene semantics and minimum_should_match=0: SHOULD groups are discarded // Only "a" (MUST) remains - wrapped in OCCUR_BOOLEAN String dsl = "a AND b OR c"; - String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -1152,10 +1153,10 @@ public void testMultiFieldLuceneModeAndOrMixed() { @Test public void testMultiFieldLuceneModeWithNot() { - // Test: "a AND NOT b" + fields=["title","content"] + lucene mode + // Test: "a AND NOT b" + fields=["title","content"] + lucene mode + cross_fields // Expanded: "(title:a OR content:a) AND NOT (title:b OR content:b)" String dsl = "a AND NOT b"; - String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -1187,9 +1188,9 @@ public void testMultiFieldLuceneModeSingleTerm() { @Test public void testMultiFieldLuceneModeComplexQuery() { - // Test: "(a OR b) AND NOT c" + fields=["f1","f2"] + lucene mode + // Test: "(a OR b) AND NOT c" + fields=["f1","f2"] + lucene mode + cross_fields String dsl = "(a OR b) AND NOT c"; - String options = "{\"fields\":[\"f1\",\"f2\"],\"mode\":\"lucene\",\"minimum_should_match\":0}"; + String options = "{\"fields\":[\"f1\",\"f2\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -1199,9 +1200,9 @@ public void testMultiFieldLuceneModeComplexQuery() { @Test public void testMultiFieldLuceneModeMinimumShouldMatchOne() { - // Test: "a AND b OR c" with minimum_should_match=1 keeps all clauses + // Test: "a AND b OR c" with minimum_should_match=1 keeps all clauses + cross_fields String dsl = "a AND b OR c"; - String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":1}"; + String options = "{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":1,\"type\":\"cross_fields\"}"; QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); @@ -1210,4 +1211,111 @@ public void testMultiFieldLuceneModeMinimumShouldMatchOne() { Assertions.assertEquals(3, plan.root.children.size()); Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); } + + // ============ Tests for type parameter (best_fields vs cross_fields) ============ + + @Test + public void testMultiFieldBestFieldsDefault() { + // Test: best_fields is the default when type is not specified + // "hello world" with fields ["title", "content"] and default_operator "and" + // Expands to: (title:hello AND title:world) OR (content:hello AND content:world) + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // Root should be OR (joining fields) + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); // 2 fields + + // Each child should be an AND of terms for that field + for (QsNode fieldGroup : plan.root.children) { + Assertions.assertEquals(QsClauseType.AND, fieldGroup.type); + Assertions.assertEquals(2, fieldGroup.children.size()); // 2 terms + } + } + + @Test + public void testMultiFieldBestFieldsExplicit() { + // Test: explicitly specify type=best_fields + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"type\":\"best_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); + } + + @Test + public void testMultiFieldCrossFields() { + // Test: cross_fields mode + // "hello world" with fields ["title", "content"] and default_operator "and" + // Expands to: (title:hello OR content:hello) AND (title:world OR content:world) + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + // Root should be AND (joining term groups) + Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(2, plan.root.children.size()); // 2 term groups + + // Each child should be an OR of the same term across fields + for (QsNode termGroup : plan.root.children) { + Assertions.assertEquals(QsClauseType.OR, termGroup.type); + Assertions.assertEquals(2, termGroup.children.size()); // 2 fields + } + } + + @Test + public void testMultiFieldBestFieldsLuceneMode() { + // Test: best_fields with Lucene mode + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"mode\":\"lucene\",\"type\":\"best_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + } + + @Test + public void testMultiFieldCrossFieldsLuceneMode() { + // Test: cross_fields with Lucene mode + String dsl = "hello world"; + String options = "{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"mode\":\"lucene\",\"type\":\"cross_fields\"}"; + QsPlan plan = SearchDslParser.parseDsl(dsl, options); + + Assertions.assertNotNull(plan); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + } + + @Test + public void testMultiFieldInvalidType() { + // Test: invalid type value should throw exception + String dsl = "hello"; + String options = "{\"fields\":[\"title\",\"content\"],\"type\":\"invalid_type\"}"; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + SearchDslParser.parseDsl(dsl, options); + }); + } + + @Test + public void testMultiFieldSingleTermSameResultForBothTypes() { + // Test: single term should have same structure for both types + // since there's only one term, no difference between best_fields and cross_fields + String dsl = "hello"; + String optionsBestFields = "{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\"}"; + String optionsCrossFields = "{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}"; + + QsPlan planBest = SearchDslParser.parseDsl(dsl, optionsBestFields); + QsPlan planCross = SearchDslParser.parseDsl(dsl, optionsCrossFields); + + Assertions.assertNotNull(planBest); + Assertions.assertNotNull(planCross); + // Both should have same structure: (title:hello OR content:hello) + Assertions.assertEquals(planBest.root.type, planCross.root.type); + Assertions.assertEquals(planBest.root.children.size(), planCross.root.children.size()); + } } diff --git a/regression-test/data/search/test_search_multi_field.out b/regression-test/data/search/test_search_multi_field.out index fe08a891087385..4a4923a4c3b50e 100644 --- a/regression-test/data/search/test_search_multi_field.out +++ b/regression-test/data/search/test_search_multi_field.out @@ -110,3 +110,17 @@ 8 cooking machine reviews 9 machine guide +-- !multi_field_best_fields_default -- +1 machine learning basics + +-- !multi_field_cross_fields -- +1 machine learning basics +9 machine guide + +-- !multi_field_best_fields_lucene -- +1 machine learning basics + +-- !multi_field_cross_fields_lucene -- +1 machine learning basics +9 machine guide + diff --git a/regression-test/suites/search/test_search_multi_field.groovy b/regression-test/suites/search/test_search_multi_field.groovy index 712bd0eabd508e..f71db33f2b050f 100644 --- a/regression-test/suites/search/test_search_multi_field.groovy +++ b/regression-test/suites/search/test_search_multi_field.groovy @@ -81,26 +81,25 @@ suite("test_search_multi_field") { ORDER BY id """ - // ============ Test 2: Multiple terms with AND ============ - // "machine" AND "learning" across title,content + // ============ Test 2: Multiple terms with AND (cross_fields) ============ + // "machine" AND "learning" across title,content with cross_fields semantics + // cross_fields: terms can be across different fields + // id=1: title has both terms + // id=9: title has "machine", content has "learning" (cross_fields match) qt_multi_field_multi_term_and """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and"}') + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","type":"cross_fields"}') ORDER BY id """ - // ============ Test 2b: Multiple terms with AND in Lucene mode ============ + // ============ Test 2b: Multiple terms with AND in Lucene mode (cross_fields) ============ // Same as Test 2 but with mode:lucene - should have same result - // This tests that default_operator:and works correctly with Lucene mode - // ES behavior comparison: - // - ES best_fields (default): only id=1 (both terms must be in same field) - // - ES cross_fields: id=1 and id=9 (terms can be across different fields) - // - Doris uses cross_fields semantics + // Uses cross_fields semantics explicitly qt_multi_field_multi_term_and_lucene """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene"}') + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene","type":"cross_fields"}') ORDER BY id """ @@ -112,19 +111,20 @@ suite("test_search_multi_field") { ORDER BY id """ - // ============ Test 4: Explicit AND operator in DSL ============ + // ============ Test 4: Explicit AND operator in DSL (cross_fields) ============ + // Uses explicit type:cross_fields for backward compatibility qt_multi_field_explicit_and """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine AND learning', '{"fields":["title","content"]}') + WHERE search('machine AND learning', '{"fields":["title","content"],"type":"cross_fields"}') ORDER BY id """ - // ============ Test 5: Mixed - some terms with explicit field ============ + // ============ Test 5: Mixed - some terms with explicit field (cross_fields) ============ qt_multi_field_mixed """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, category FROM ${tableName} - WHERE search('machine AND category:tech', '{"fields":["title","content"]}') + WHERE search('machine AND category:tech', '{"fields":["title","content"],"type":"cross_fields"}') ORDER BY id """ @@ -144,19 +144,19 @@ suite("test_search_multi_field") { ORDER BY id """ - // ============ Test 8: NOT operator ============ + // ============ Test 8: NOT operator (cross_fields) ============ qt_multi_field_not """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine AND NOT cooking', '{"fields":["title","content"]}') + WHERE search('machine AND NOT cooking', '{"fields":["title","content"],"type":"cross_fields"}') ORDER BY id """ - // ============ Test 9: Complex boolean ============ + // ============ Test 9: Complex boolean (cross_fields) ============ qt_multi_field_complex """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('(machine OR ai) AND NOT cooking', '{"fields":["title","content"]}') + WHERE search('(machine OR ai) AND NOT cooking', '{"fields":["title","content"],"type":"cross_fields"}') ORDER BY id """ @@ -168,27 +168,24 @@ suite("test_search_multi_field") { ORDER BY id """ - // ============ Test 11: Multi-field with Lucene mode - simple AND ============ + // ============ Test 11: Multi-field with Lucene mode - simple AND (cross_fields) ============ // This is equivalent to Test 2 but uses Lucene mode with explicit AND operator - // Expected: Same result as Test 2 - cross_fields semantics - // - ES best_fields would return: id=1 only (both terms in same field) - // - Doris cross_fields returns: id=1, id=9 (terms can be in different fields) - // id=9: title='machine guide', content='learning tips' - matches cross_fields but not best_fields + // Uses cross_fields semantics explicitly qt_multi_field_lucene_and """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine AND learning', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}') + WHERE search('machine AND learning', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0,"type":"cross_fields"}') ORDER BY id """ // ============ Test 11b: Verify cross_fields behavior explicitly ============ - // This test verifies that our implementation uses cross_fields semantics (like ES type:cross_fields) + // This test verifies cross_fields semantics (like ES type:cross_fields) // Query: "machine AND learning" across title and content // id=9 has 'machine' in title and 'learning' in content - should match with cross_fields qt_multi_field_cross_fields_verify """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title, content FROM ${tableName} - WHERE search('machine AND learning', '{"fields":["title","content"]}') + WHERE search('machine AND learning', '{"fields":["title","content"],"type":"cross_fields"}') ORDER BY id """ @@ -200,28 +197,28 @@ suite("test_search_multi_field") { ORDER BY id """ - // ============ Test 13: Multi-field with Lucene mode - AND OR mixed ============ + // ============ Test 13: Multi-field with Lucene mode - AND OR mixed (cross_fields) ============ // With minimum_should_match=0, SHOULD clauses are discarded when MUST exists qt_multi_field_lucene_and_or """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}') + WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0,"type":"cross_fields"}') ORDER BY id """ - // ============ Test 14: Multi-field with Lucene mode - minimum_should_match=1 ============ + // ============ Test 14: Multi-field with Lucene mode - minimum_should_match=1 (cross_fields) ============ qt_multi_field_lucene_min_should_1 """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":1}') + WHERE search('machine AND learning OR cooking', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":1,"type":"cross_fields"}') ORDER BY id """ - // ============ Test 15: Multi-field with Lucene mode - AND NOT ============ + // ============ Test 15: Multi-field with Lucene mode - AND NOT (cross_fields) ============ qt_multi_field_lucene_and_not """ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title FROM ${tableName} - WHERE search('machine AND NOT maintenance', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0}') + WHERE search('machine AND NOT maintenance', '{"fields":["title","content"],"mode":"lucene","minimum_should_match":0,"type":"cross_fields"}') ORDER BY id """ @@ -258,6 +255,43 @@ suite("test_search_multi_field") { ORDER BY id """ + // ============ Test 19: best_fields mode (default) ============ + // With best_fields, all terms must be in the SAME field + // Only id=1 matches: title has both "machine" and "learning" + // id=9 does NOT match: "machine" in title, "learning" in content (different fields) + qt_multi_field_best_fields_default """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and"}') + ORDER BY id + """ + + // ============ Test 20: cross_fields mode (explicit) ============ + // With cross_fields, terms can be in DIFFERENT fields + // Both id=1 and id=9 match + qt_multi_field_cross_fields """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","type":"cross_fields"}') + ORDER BY id + """ + + // ============ Test 21: best_fields with Lucene mode ============ + qt_multi_field_best_fields_lucene """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene","type":"best_fields"}') + ORDER BY id + """ + + // ============ Test 22: cross_fields with Lucene mode ============ + qt_multi_field_cross_fields_lucene """ + SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title + FROM ${tableName} + WHERE search('machine learning', '{"fields":["title","content"],"default_operator":"and","mode":"lucene","type":"cross_fields"}') + ORDER BY id + """ + // Cleanup sql "DROP TABLE IF EXISTS ${tableName}" } From ddcaa768e3fa57e7c575e9de8ce6f5dd8b0048c9 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Thu, 15 Jan 2026 13:52:37 +0800 Subject: [PATCH 4/6] [refactor](search) Improve encapsulation for SearchDslParser data classes Make fields private with proper getter/setter methods in QsNode, QsPlan, and QsFieldBinding classes. This follows Java encapsulation best practices and allows for future validation or transformation logic. - Make all fields private with @JsonProperty getters for serialization - Add setters for QsNode.field and QsFieldBinding.fieldName (needed for field name normalization in RewriteSearchToSlots) - Update all usages to use getter/setter methods instead of direct field access - Add Javadoc to QsNode constructors Co-Authored-By: Claude Opus 4.5 --- .../doris/analysis/SearchPredicate.java | 57 +- .../rules/rewrite/RewriteSearchToSlots.java | 20 +- .../functions/scalar/SearchDslParser.java | 181 ++++-- .../rewrite/RewriteSearchToSlotsTest.java | 28 +- .../expressions/SearchExpressionTest.java | 2 +- .../functions/scalar/SearchDslParserTest.java | 568 +++++++++--------- .../functions/scalar/SearchTest.java | 12 +- 7 files changed, 484 insertions(+), 384 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java index 8440d70b334750..b53386206e9fe4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java @@ -88,12 +88,12 @@ protected void toThrift(TExprNode msg) { // Print QsPlan details if (qsPlan != null) { LOG.info("SearchPredicate.toThrift: QsPlan fieldBindings.size={}", - qsPlan.fieldBindings != null ? qsPlan.fieldBindings.size() : 0); - if (qsPlan.fieldBindings != null) { - for (int i = 0; i < qsPlan.fieldBindings.size(); i++) { - SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(i); + qsPlan.getFieldBindings() != null ? qsPlan.getFieldBindings().size() : 0); + if (qsPlan.getFieldBindings() != null) { + for (int i = 0; i < qsPlan.getFieldBindings().size(); i++) { + SearchDslParser.QsFieldBinding binding = qsPlan.getFieldBindings().get(i); LOG.info("SearchPredicate.toThrift: binding[{}] fieldName='{}', slotIndex={}", - i, binding.fieldName, binding.slotIndex); + i, binding.getFieldName(), binding.getSlotIndex()); } } } @@ -142,14 +142,14 @@ public int hashCode() { private TSearchParam buildThriftParam() { TSearchParam param = new TSearchParam(); param.setOriginalDsl(dslString); - param.setRoot(convertQsNodeToThrift(qsPlan.root)); + param.setRoot(convertQsNodeToThrift(qsPlan.getRoot())); List bindings = new ArrayList<>(); - for (int i = 0; i < qsPlan.fieldBindings.size(); i++) { - SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(i); + for (int i = 0; i < qsPlan.getFieldBindings().size(); i++) { + SearchDslParser.QsFieldBinding binding = qsPlan.getFieldBindings().get(i); TSearchFieldBinding thriftBinding = new TSearchFieldBinding(); - String fieldPath = binding.fieldName; + String fieldPath = binding.getFieldName(); thriftBinding.setFieldName(fieldPath); // Check if this is a variant subcolumn (contains dot) @@ -176,9 +176,10 @@ private TSearchParam buildThriftParam() { SlotRef slotRef = (SlotRef) this.children.get(i); int actualSlotId = slotRef.getSlotId().asInt(); thriftBinding.setSlotIndex(actualSlotId); - LOG.info("buildThriftParam: binding field='{}', actual slotId={}", binding.fieldName, actualSlotId); + LOG.info("buildThriftParam: binding field='{}', actual slotId={}", + binding.getFieldName(), actualSlotId); } else { - LOG.warn("buildThriftParam: No corresponding SlotRef for field '{}'", binding.fieldName); + LOG.warn("buildThriftParam: No corresponding SlotRef for field '{}'", binding.getFieldName()); thriftBinding.setSlotIndex(i); // fallback to position } @@ -230,10 +231,10 @@ private boolean isExplainVerboseContext() { private List buildDslAstExplainLines() { List lines = new ArrayList<>(); - if (qsPlan == null || qsPlan.root == null) { + if (qsPlan == null || qsPlan.getRoot() == null) { return lines; } - TSearchClause rootClause = convertQsNodeToThrift(qsPlan.root); + TSearchClause rootClause = convertQsNodeToThrift(qsPlan.getRoot()); appendClauseExplain(rootClause, lines, 0); return lines; } @@ -258,11 +259,11 @@ private void appendClauseExplain(TSearchClause clause, List lines, int d private List buildFieldBindingExplainLines() { List lines = new ArrayList<>(); - if (qsPlan == null || qsPlan.fieldBindings == null || qsPlan.fieldBindings.isEmpty()) { + if (qsPlan == null || qsPlan.getFieldBindings() == null || qsPlan.getFieldBindings().isEmpty()) { return lines; } - IntStream.range(0, qsPlan.fieldBindings.size()).forEach(index -> { - SearchDslParser.QsFieldBinding binding = qsPlan.fieldBindings.get(index); + IntStream.range(0, qsPlan.getFieldBindings().size()).forEach(index -> { + SearchDslParser.QsFieldBinding binding = qsPlan.getFieldBindings().get(index); String slotDesc = ""; if (index < children.size() && children.get(index) instanceof SlotRef) { SlotRef slotRef = (SlotRef) children.get(index); @@ -272,7 +273,7 @@ private List buildFieldBindingExplainLines() { } else if (index < children.size()) { slotDesc = children.get(index).toSqlWithoutTbl(); } - lines.add(binding.fieldName + " -> " + slotDesc); + lines.add(binding.getFieldName() + " -> " + slotDesc); }); return lines; } @@ -304,29 +305,29 @@ private TSearchClause convertQsNodeToThrift( TSearchClause clause = new TSearchClause(); // Convert clause type - clause.setClauseType(node.type.name()); + clause.setClauseType(node.getType().name()); - if (node.field != null) { - clause.setFieldName(node.field); + if (node.getField() != null) { + clause.setFieldName(node.getField()); } - if (node.value != null) { - clause.setValue(node.value); + if (node.getValue() != null) { + clause.setValue(node.getValue()); } // Convert occur type for Lucene-style boolean queries - if (node.occur != null) { - clause.setOccur(convertQsOccurToThrift(node.occur)); + if (node.getOccur() != null) { + clause.setOccur(convertQsOccurToThrift(node.getOccur())); } // Convert minimum_should_match for OCCUR_BOOLEAN - if (node.minimumShouldMatch != null) { - clause.setMinimumShouldMatch(node.minimumShouldMatch); + if (node.getMinimumShouldMatch() != null) { + clause.setMinimumShouldMatch(node.getMinimumShouldMatch()); } - if (node.children != null && !node.children.isEmpty()) { + if (node.getChildren() != null && !node.getChildren().isEmpty()) { List childClauses = new ArrayList<>(); - for (SearchDslParser.QsNode child : node.children) { + for (SearchDslParser.QsNode child : node.getChildren()) { childClauses.add(convertQsNodeToThrift(child)); } clause.setChildren(childClauses); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java index 3114fff0593b4d..83da8f99a96821 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java @@ -93,7 +93,7 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { try { // Parse DSL to get field bindings SearchDslParser.QsPlan qsPlan = search.getQsPlan(); - if (qsPlan == null || qsPlan.fieldBindings == null || qsPlan.fieldBindings.isEmpty()) { + if (qsPlan == null || qsPlan.getFieldBindings() == null || qsPlan.getFieldBindings().isEmpty()) { LOG.warn("Search function has no field bindings: {}", search.getDslString()); return search; } @@ -102,8 +102,8 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { // Create slot reference children from field bindings List slotChildren = new ArrayList<>(); - for (SearchDslParser.QsFieldBinding binding : qsPlan.fieldBindings) { - String originalFieldName = binding.fieldName; + for (SearchDslParser.QsFieldBinding binding : qsPlan.getFieldBindings()) { + String originalFieldName = binding.getFieldName(); Expression childExpr; String normalizedFieldName; @@ -151,14 +151,14 @@ private Expression rewriteSearch(Search search, LogicalOlapScan scan) { } normalizedFields.put(originalFieldName, normalizedFieldName); - binding.fieldName = normalizedFieldName; + binding.setFieldName(normalizedFieldName); slotChildren.add(childExpr); } LOG.info("Rewriting search function: dsl='{}' with {} slot children", search.getDslString(), slotChildren.size()); - normalizePlanFields(qsPlan.root, normalizedFields); + normalizePlanFields(qsPlan.getRoot(), normalizedFields); // Create SearchExpression with slot children return new SearchExpression(search.getDslString(), qsPlan, slotChildren); @@ -182,16 +182,16 @@ private void normalizePlanFields(SearchDslParser.QsNode node, Map entry : normalized.entrySet()) { - if (entry.getKey().equalsIgnoreCase(node.field)) { - node.field = entry.getValue(); + if (entry.getKey().equalsIgnoreCase(node.getField())) { + node.setField(entry.getValue()); break; } } } - if (node.children != null) { - for (SearchDslParser.QsNode child : node.children) { + if (node.getChildren() != null) { + for (SearchDslParser.QsNode child : node.getChildren()) { normalizePlanFields(child, normalized); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java index b499219a142cc3..099f6b0e7518ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java @@ -32,7 +32,8 @@ import org.apache.logging.log4j.Logger; import java.util.ArrayList; -import java.util.HashSet; +import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -179,7 +180,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (Exception e) { + } catch (RuntimeException e) { LOG.error("Failed to parse search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e); } @@ -555,7 +556,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (Exception e) { + } catch (RuntimeException e) { LOG.error("Failed to parse search DSL in {}: '{}' (expanded: '{}')", modeDescription, originalDsl, expandedDsl, e); throw new RuntimeException("Invalid search DSL syntax: " + originalDsl @@ -1005,12 +1006,12 @@ private interface FieldTrackingVisitor { * ANTLR visitor to build QsNode AST from parse tree */ private static class QsAstBuilder extends SearchParserBaseVisitor implements FieldTrackingVisitor { - private final Set fieldNames = new HashSet<>(); + private final Set fieldNames = new LinkedHashSet<>(); // Context stack to track current field name during parsing private String currentFieldName = null; public Set getFieldNames() { - return fieldNames; + return Collections.unmodifiableSet(fieldNames); } @Override @@ -1300,20 +1301,29 @@ private String stripOuterQuotes(String text) { } /** - * Intermediate Representation for search DSL parsing result + * Intermediate Representation for search DSL parsing result. + * This class is immutable after construction. */ public static class QsPlan { @JsonProperty("root") - public QsNode root; + private final QsNode root; @JsonProperty("fieldBindings") - public List fieldBindings; + private final List fieldBindings; @JsonCreator public QsPlan(@JsonProperty("root") QsNode root, @JsonProperty("fieldBindings") List fieldBindings) { - this.root = root; - this.fieldBindings = fieldBindings != null ? fieldBindings : new ArrayList<>(); + this.root = Objects.requireNonNull(root, "root cannot be null"); + this.fieldBindings = fieldBindings != null ? new ArrayList<>(fieldBindings) : new ArrayList<>(); + } + + public QsNode getRoot() { + return root; + } + + public List getFieldBindings() { + return Collections.unmodifiableList(fieldBindings); } /** @@ -1353,32 +1363,38 @@ public boolean equals(Object o) { return false; } QsPlan qsPlan = (QsPlan) o; - return Objects.equals(root, qsPlan.root) - && Objects.equals(fieldBindings, qsPlan.fieldBindings); + return Objects.equals(root, qsPlan.getRoot()) + && Objects.equals(fieldBindings, qsPlan.getFieldBindings()); } } /** - * Search AST node representing a clause in the DSL + * Search AST node representing a clause in the DSL. + * + *

Warning: This class is mutable. The {@code occur}, {@code children}, + * and other fields can be modified after construction. Although this class implements + * {@code equals()} and {@code hashCode()}, it should NOT be used as a key in + * {@code HashMap} or element in {@code HashSet} if any field may be modified after + * insertion, as this will break the hash-based collection contract. */ public static class QsNode { @JsonProperty("type") - public QsClauseType type; + private final QsClauseType type; @JsonProperty("field") - public String field; + private String field; @JsonProperty("value") - public String value; + private final String value; @JsonProperty("children") - public List children; + private final List children; @JsonProperty("occur") - public QsOccur occur; + private QsOccur occur; @JsonProperty("minimumShouldMatch") - public Integer minimumShouldMatch; + private final Integer minimumShouldMatch; /** * Constructor for JSON deserialization @@ -1400,30 +1416,96 @@ public QsNode(@JsonProperty("type") QsClauseType type, this.type = type; this.field = field; this.value = value; - this.children = children != null ? children : new ArrayList<>(); + this.children = children != null ? new ArrayList<>(children) : new ArrayList<>(); this.occur = occur; this.minimumShouldMatch = minimumShouldMatch; } + /** + * Constructor for leaf nodes (TERM, PHRASE, PREFIX, etc.) + * + * @param type the clause type + * @param field the field name + * @param value the field value + */ public QsNode(QsClauseType type, String field, String value) { this.type = type; this.field = field; this.value = value; this.children = new ArrayList<>(); + this.occur = null; + this.minimumShouldMatch = null; } + /** + * Constructor for compound nodes (AND, OR, NOT) + * + * @param type the clause type + * @param children the child nodes + */ public QsNode(QsClauseType type, List children) { this.type = type; - this.children = children != null ? children : new ArrayList<>(); + this.field = null; + this.value = null; + this.children = children != null ? new ArrayList<>(children) : new ArrayList<>(); + this.occur = null; + this.minimumShouldMatch = null; } + /** + * Constructor for OCCUR_BOOLEAN nodes with minimum_should_match + * + * @param type the clause type + * @param children the child nodes + * @param minimumShouldMatch the minimum number of SHOULD clauses that must match + */ public QsNode(QsClauseType type, List children, Integer minimumShouldMatch) { this.type = type; - this.children = children != null ? children : new ArrayList<>(); + this.field = null; + this.value = null; + this.children = children != null ? new ArrayList<>(children) : new ArrayList<>(); + this.occur = null; this.minimumShouldMatch = minimumShouldMatch; } - public QsNode withOccur(QsOccur occur) { + public QsClauseType getType() { + return type; + } + + public String getField() { + return field; + } + + /** + * Sets the field name for this node (used for field name normalization). + * @param field the normalized field name + */ + public void setField(String field) { + this.field = field; + } + + public String getValue() { + return value; + } + + public List getChildren() { + return Collections.unmodifiableList(children); + } + + public QsOccur getOccur() { + return occur; + } + + public Integer getMinimumShouldMatch() { + return minimumShouldMatch; + } + + /** + * Sets the occur type for this node. + * @param occur the occur type (MUST, SHOULD, MUST_NOT) + * @return this node for method chaining + */ + public QsNode setOccur(QsOccur occur) { this.occur = occur; return this; } @@ -1442,24 +1524,25 @@ public boolean equals(Object o) { return false; } QsNode qsNode = (QsNode) o; - return type == qsNode.type - && Objects.equals(field, qsNode.field) - && Objects.equals(value, qsNode.value) - && Objects.equals(children, qsNode.children) - && occur == qsNode.occur - && Objects.equals(minimumShouldMatch, qsNode.minimumShouldMatch); + return type == qsNode.getType() + && Objects.equals(field, qsNode.getField()) + && Objects.equals(value, qsNode.getValue()) + && Objects.equals(children, qsNode.getChildren()) + && occur == qsNode.getOccur() + && Objects.equals(minimumShouldMatch, qsNode.getMinimumShouldMatch()); } } /** - * Field binding information extracted from DSL + * Field binding information extracted from DSL. + * The fieldName may be modified for normalization purposes. */ public static class QsFieldBinding { @JsonProperty("fieldName") - public String fieldName; + private String fieldName; @JsonProperty("slotIndex") - public int slotIndex; + private final int slotIndex; @JsonCreator public QsFieldBinding(@JsonProperty("fieldName") String fieldName, @@ -1468,6 +1551,22 @@ public QsFieldBinding(@JsonProperty("fieldName") String fieldName, this.slotIndex = slotIndex; } + public String getFieldName() { + return fieldName; + } + + /** + * Sets the field name (used for field name normalization). + * @param fieldName the normalized field name + */ + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + + public int getSlotIndex() { + return slotIndex; + } + @Override public int hashCode() { return Objects.hash(fieldName, slotIndex); @@ -1541,11 +1640,11 @@ public void setMinimumShouldMatch(Integer minimumShouldMatch) { } public List getFields() { - return fields; + return fields == null ? null : Collections.unmodifiableList(fields); } public void setFields(List fields) { - this.fields = fields; + this.fields = fields == null ? null : new ArrayList<>(fields); } /** @@ -1659,7 +1758,7 @@ private static SearchOptions parseOptions(String optionsJson) { } } catch (IllegalArgumentException e) { throw e; - } catch (Exception e) { + } catch (JsonProcessingException e) { throw new IllegalArgumentException( "Invalid search options JSON: '" + optionsJson + "'. Error: " + e.getMessage(), e); } @@ -1734,7 +1833,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (Exception e) { + } catch (RuntimeException e) { LOG.error("Failed to parse search DSL in Lucene mode: '{}' (expanded: '{}')", dsl, expandedDsl, e); throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e); } @@ -1746,7 +1845,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, */ private static class QsLuceneModeAstBuilder extends SearchParserBaseVisitor implements FieldTrackingVisitor { - private final Set fieldNames = new HashSet<>(); + private final Set fieldNames = new LinkedHashSet<>(); private final SearchOptions options; private String currentFieldName = null; @@ -1755,7 +1854,7 @@ public QsLuceneModeAstBuilder(SearchOptions options) { } public Set getFieldNames() { - return fieldNames; + return Collections.unmodifiableSet(fieldNames); } @Override @@ -1791,7 +1890,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { TermWithOccur singleTerm = terms.get(0); if (singleTerm.isNegated) { // Single negated term - must wrap in OCCUR_BOOLEAN for BE to handle MUST_NOT - singleTerm.node.occur = QsOccur.MUST_NOT; + singleTerm.node.setOccur(QsOccur.MUST_NOT); List children = new ArrayList<>(); children.add(singleTerm.node); return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0); @@ -1831,7 +1930,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { TermWithOccur remainingTerm = terms.get(0); if (remainingTerm.occur == QsOccur.MUST_NOT) { // Single MUST_NOT term - must wrap in OCCUR_BOOLEAN for BE to handle - remainingTerm.node.occur = QsOccur.MUST_NOT; + remainingTerm.node.setOccur(QsOccur.MUST_NOT); List children = new ArrayList<>(); children.add(remainingTerm.node); return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0); @@ -1842,7 +1941,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { // Build OCCUR_BOOLEAN node List children = new ArrayList<>(); for (TermWithOccur term : terms) { - term.node.occur = term.occur; + term.node.setOccur(term.occur); children.add(term.node); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java index 152a7e75929f60..76e25cc3879e65 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlotsTest.java @@ -127,8 +127,8 @@ public void testQsPlanParsing() { try { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan, "Plan should not be null for DSL: " + dsl); - Assertions.assertNotNull(plan.root, "Plan root should not be null for DSL: " + dsl); - Assertions.assertTrue(plan.fieldBindings.size() > 0, "Should have field bindings for DSL: " + dsl); + Assertions.assertNotNull(plan.getRoot(), "Plan root should not be null for DSL: " + dsl); + Assertions.assertTrue(plan.getFieldBindings().size() > 0, "Should have field bindings for DSL: " + dsl); } catch (Exception e) { // DSL parsing might fail for complex cases - that's acceptable System.out.println("DSL parsing failed for: " + dsl + " - " + e.getMessage()); @@ -142,10 +142,10 @@ public void testFieldNameExtraction() { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl); // Should extract 3 unique field names - Assertions.assertEquals(3, plan.fieldBindings.size()); + Assertions.assertEquals(3, plan.getFieldBindings().size()); - List fieldNames = plan.fieldBindings.stream() - .map(binding -> binding.fieldName) + List fieldNames = plan.getFieldBindings().stream() + .map(binding -> binding.getFieldName()) .distinct() .collect(java.util.stream.Collectors.toList()); @@ -163,12 +163,12 @@ public void testCaseInsensitiveFieldNames() { SearchDslParser.QsPlan plan2 = SearchDslParser.parseDsl(dsl2); // Both should work and extract field names - Assertions.assertEquals(1, plan1.fieldBindings.size()); - Assertions.assertEquals(1, plan2.fieldBindings.size()); + Assertions.assertEquals(1, plan1.getFieldBindings().size()); + Assertions.assertEquals(1, plan2.getFieldBindings().size()); // Field names should be consistent (implementation dependent) - Assertions.assertNotNull(plan1.fieldBindings.get(0).fieldName); - Assertions.assertNotNull(plan2.fieldBindings.get(0).fieldName); + Assertions.assertNotNull(plan1.getFieldBindings().get(0).getFieldName()); + Assertions.assertNotNull(plan2.getFieldBindings().get(0).getFieldName()); } @Test @@ -198,10 +198,10 @@ public void testComplexDslStructures() { try { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(complexDsl); Assertions.assertNotNull(plan); - Assertions.assertNotNull(plan.root); + Assertions.assertNotNull(plan.getRoot()); // Should have multiple field bindings - Assertions.assertTrue(plan.fieldBindings.size() >= 2); + Assertions.assertTrue(plan.getFieldBindings().size() >= 2); } catch (Exception e) { // Complex DSL might not be fully supported yet @@ -215,7 +215,7 @@ public void testSlotReferenceConsistency() { SearchDslParser.QsPlan plan = SearchDslParser.parseDsl(dsl); // Create slot reference matching the field binding - String fieldName = plan.fieldBindings.get(0).fieldName; + String fieldName = plan.getFieldBindings().get(0).getFieldName(); SlotReference slot = new SlotReference(fieldName, StringType.INSTANCE, true, Arrays.asList()); SearchExpression expr = new SearchExpression(dsl, plan, Arrays.asList(slot)); @@ -246,8 +246,8 @@ public void testRewriteSearchHandlesCaseInsensitiveField() throws Exception { Assertions.assertEquals("name", slot.getName()); SearchDslParser.QsPlan normalizedPlan = searchExpression.getQsPlan(); - Assertions.assertEquals("name", normalizedPlan.fieldBindings.get(0).fieldName); - Assertions.assertEquals("name", normalizedPlan.root.field); + Assertions.assertEquals("name", normalizedPlan.getFieldBindings().get(0).getFieldName()); + Assertions.assertEquals("name", normalizedPlan.getRoot().getField()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java index 9d6996b928decd..43de6d03205354 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/SearchExpressionTest.java @@ -212,7 +212,7 @@ public void testMultipleSlotChildren() { Assertions.assertEquals(2, searchExpr.children().size()); Assertions.assertEquals(titleSlot, searchExpr.children().get(0)); Assertions.assertEquals(contentSlot, searchExpr.children().get(1)); - Assertions.assertEquals(2, searchExpr.getQsPlan().fieldBindings.size()); + Assertions.assertEquals(2, searchExpr.getQsPlan().getFieldBindings().size()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java index b864caa2400ca5..8586b3cd83b4af 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java @@ -37,14 +37,14 @@ public void testSimpleTermQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertNotNull(plan.root); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("hello", plan.root.value); + Assertions.assertNotNull(plan.getRoot()); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("hello", plan.getRoot().getValue()); - Assertions.assertEquals(1, plan.fieldBindings.size()); - QsFieldBinding binding = plan.fieldBindings.get(0); - Assertions.assertEquals("title", binding.fieldName); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + QsFieldBinding binding = plan.getFieldBindings().get(0); + Assertions.assertEquals("title", binding.getFieldName()); } @Test @@ -53,9 +53,9 @@ public void testPhraseQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PHRASE, plan.root.type); - Assertions.assertEquals("content", plan.root.field); - Assertions.assertEquals("hello world", plan.root.value); + Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType()); + Assertions.assertEquals("content", plan.getRoot().getField()); + Assertions.assertEquals("hello world", plan.getRoot().getValue()); } @Test @@ -64,9 +64,9 @@ public void testPrefixQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PREFIX, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("hello*", plan.root.value); + Assertions.assertEquals(QsClauseType.PREFIX, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("hello*", plan.getRoot().getValue()); } @Test @@ -75,9 +75,9 @@ public void testWildcardQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.WILDCARD, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("h*llo", plan.root.value); + Assertions.assertEquals(QsClauseType.WILDCARD, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("h*llo", plan.getRoot().getValue()); } @Test @@ -86,9 +86,9 @@ public void testRegexpQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.REGEXP, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals("[a-z]+", plan.root.value); // slashes removed + Assertions.assertEquals(QsClauseType.REGEXP, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals("[a-z]+", plan.getRoot().getValue()); // slashes removed } @Test @@ -97,9 +97,9 @@ public void testRangeQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.RANGE, plan.root.type); - Assertions.assertEquals("age", plan.root.field); - Assertions.assertEquals("[18 TO 65]", plan.root.value); + Assertions.assertEquals(QsClauseType.RANGE, plan.getRoot().getType()); + Assertions.assertEquals("age", plan.getRoot().getField()); + Assertions.assertEquals("[18 TO 65]", plan.getRoot().getValue()); } @Test @@ -108,9 +108,9 @@ public void testListQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.LIST, plan.root.type); - Assertions.assertEquals("category", plan.root.field); - Assertions.assertEquals("IN(tech news)", plan.root.value); + Assertions.assertEquals(QsClauseType.LIST, plan.getRoot().getType()); + Assertions.assertEquals("category", plan.getRoot().getField()); + Assertions.assertEquals("IN(tech news)", plan.getRoot().getValue()); } @Test @@ -119,9 +119,9 @@ public void testAnyQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("java python", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("java python", plan.getRoot().getValue()); } @Test @@ -130,9 +130,9 @@ public void testAllQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("programming language", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("programming language", plan.getRoot().getValue()); } @Test @@ -141,9 +141,9 @@ public void testAllQueryWithQuotes() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("redirect", plan.root.field); - Assertions.assertEquals("Rainbowman", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("redirect", plan.getRoot().getField()); + Assertions.assertEquals("Rainbowman", plan.getRoot().getValue()); } @Test @@ -152,9 +152,9 @@ public void testAnyQueryWithQuotes() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("Mandy Patinkin", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("Mandy Patinkin", plan.getRoot().getValue()); } @Test @@ -163,23 +163,23 @@ public void testAndQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); - QsNode leftChild = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.TERM, leftChild.type); - Assertions.assertEquals("title", leftChild.field); - Assertions.assertEquals("hello", leftChild.value); + QsNode leftChild = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.TERM, leftChild.getType()); + Assertions.assertEquals("title", leftChild.getField()); + Assertions.assertEquals("hello", leftChild.getValue()); - QsNode rightChild = plan.root.children.get(1); - Assertions.assertEquals(QsClauseType.TERM, rightChild.type); - Assertions.assertEquals("content", rightChild.field); - Assertions.assertEquals("world", rightChild.value); + QsNode rightChild = plan.getRoot().getChildren().get(1); + Assertions.assertEquals(QsClauseType.TERM, rightChild.getType()); + Assertions.assertEquals("content", rightChild.getField()); + Assertions.assertEquals("world", rightChild.getValue()); // Should have 2 field bindings - Assertions.assertEquals(2, plan.fieldBindings.size()); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "title".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "content".equals(b.fieldName))); + Assertions.assertEquals(2, plan.getFieldBindings().size()); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "title".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "content".equals(b.getFieldName()))); } @Test @@ -188,8 +188,8 @@ public void testOrQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -198,13 +198,13 @@ public void testNotQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); - QsNode child = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.TERM, child.type); - Assertions.assertEquals("title", child.field); - Assertions.assertEquals("spam", child.value); + QsNode child = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.TERM, child.getType()); + Assertions.assertEquals("title", child.getField()); + Assertions.assertEquals("spam", child.getValue()); } @Test @@ -213,14 +213,14 @@ public void testComplexQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Should have 3 field bindings - Assertions.assertEquals(3, plan.fieldBindings.size()); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "title".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "content".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream().anyMatch(b -> "category".equals(b.fieldName))); + Assertions.assertEquals(3, plan.getFieldBindings().size()); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "title".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "content".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream().anyMatch(b -> "category".equals(b.getFieldName()))); } @Test @@ -229,9 +229,9 @@ public void testEmptyDsl() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("error", plan.root.field); - Assertions.assertEquals("empty_dsl", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("error", plan.getRoot().getField()); + Assertions.assertEquals("empty_dsl", plan.getRoot().getValue()); } @Test @@ -258,9 +258,9 @@ public void testQsPlanSerialization() { QsPlan deserialized = QsPlan.fromJson(json); Assertions.assertNotNull(deserialized); - Assertions.assertEquals(plan.root.type, deserialized.root.type); - Assertions.assertEquals(plan.root.field, deserialized.root.field); - Assertions.assertEquals(plan.root.value, deserialized.root.value); + Assertions.assertEquals(plan.getRoot().getType(), deserialized.getRoot().getType()); + Assertions.assertEquals(plan.getRoot().getField(), deserialized.getRoot().getField()); + Assertions.assertEquals(plan.getRoot().getValue(), deserialized.getRoot().getValue()); } @Test @@ -269,9 +269,9 @@ public void testQuotedFieldNames() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals("field name", plan.root.field); - Assertions.assertEquals(1, plan.fieldBindings.size()); - Assertions.assertEquals("field name", plan.fieldBindings.get(0).fieldName); + Assertions.assertEquals("field name", plan.getRoot().getField()); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + Assertions.assertEquals("field name", plan.getFieldBindings().get(0).getFieldName()); } // ============ Tests for Default Field and Operator Support ============ @@ -283,11 +283,11 @@ public void testDefaultFieldWithSimpleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo", plan.root.value); - Assertions.assertEquals(1, plan.fieldBindings.size()); - Assertions.assertEquals("tags", plan.fieldBindings.get(0).fieldName); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo", plan.getRoot().getValue()); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + Assertions.assertEquals("tags", plan.getFieldBindings().get(0).getFieldName()); } @Test @@ -297,9 +297,9 @@ public void testDefaultFieldWithMultiTermAnd() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -309,9 +309,9 @@ public void testDefaultFieldWithMultiTermOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -321,9 +321,9 @@ public void testDefaultFieldWithMultiTermDefaultOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -333,9 +333,9 @@ public void testDefaultFieldWithWildcardSingleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PREFIX, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo*", plan.root.value); + Assertions.assertEquals(QsClauseType.PREFIX, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo*", plan.getRoot().getValue()); } @Test @@ -345,18 +345,18 @@ public void testDefaultFieldWithWildcardMultiTermAnd() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); - QsNode firstChild = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.PREFIX, firstChild.type); - Assertions.assertEquals("tags", firstChild.field); - Assertions.assertEquals("foo*", firstChild.value); + QsNode firstChild = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.PREFIX, firstChild.getType()); + Assertions.assertEquals("tags", firstChild.getField()); + Assertions.assertEquals("foo*", firstChild.getValue()); - QsNode secondChild = plan.root.children.get(1); - Assertions.assertEquals(QsClauseType.PREFIX, secondChild.type); - Assertions.assertEquals("tags", secondChild.field); - Assertions.assertEquals("bar*", secondChild.value); + QsNode secondChild = plan.getRoot().getChildren().get(1); + Assertions.assertEquals(QsClauseType.PREFIX, secondChild.getType()); + Assertions.assertEquals("tags", secondChild.getField()); + Assertions.assertEquals("bar*", secondChild.getValue()); } @Test @@ -366,8 +366,8 @@ public void testDefaultFieldWithWildcardMultiTermOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -377,16 +377,16 @@ public void testDefaultFieldWithExplicitOperatorOverride() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); - QsNode firstChild = plan.root.children.get(0); - Assertions.assertEquals("tags", firstChild.field); - Assertions.assertEquals("foo", firstChild.value); + QsNode firstChild = plan.getRoot().getChildren().get(0); + Assertions.assertEquals("tags", firstChild.getField()); + Assertions.assertEquals("foo", firstChild.getValue()); - QsNode secondChild = plan.root.children.get(1); - Assertions.assertEquals("tags", secondChild.field); - Assertions.assertEquals("bar", secondChild.value); + QsNode secondChild = plan.getRoot().getChildren().get(1); + Assertions.assertEquals("tags", secondChild.getField()); + Assertions.assertEquals("bar", secondChild.getValue()); } @Test @@ -396,8 +396,8 @@ public void testDefaultFieldWithExplicitAndOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -407,9 +407,9 @@ public void testDefaultFieldWithExactFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.EXACT, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.EXACT, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -419,9 +419,9 @@ public void testDefaultFieldWithAnyFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -431,9 +431,9 @@ public void testDefaultFieldWithAllFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ALL, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("foo bar", plan.root.value); + Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("foo bar", plan.getRoot().getValue()); } @Test @@ -443,9 +443,9 @@ public void testDefaultFieldIgnoredWhenDslHasFieldReference() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("title", plan.root.field); // Should be "title", not "tags" - Assertions.assertEquals("hello", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); // Should be "title", not "tags" + Assertions.assertEquals("hello", plan.getRoot().getValue()); } @Test @@ -468,15 +468,15 @@ public void testDefaultOperatorCaseInsensitive() { // Test "AND" QsPlan plan1 = SearchDslParser.parseDsl(dsl, "tags", "AND"); - Assertions.assertEquals(QsClauseType.ALL, plan1.root.type); + Assertions.assertEquals(QsClauseType.ALL, plan1.getRoot().getType()); // Test "Or" QsPlan plan2 = SearchDslParser.parseDsl(dsl, "tags", "Or"); - Assertions.assertEquals(QsClauseType.ANY, plan2.root.type); + Assertions.assertEquals(QsClauseType.ANY, plan2.getRoot().getType()); // Test "aNd" QsPlan plan3 = SearchDslParser.parseDsl(dsl, "tags", "aNd"); - Assertions.assertEquals(QsClauseType.ALL, plan3.root.type); + Assertions.assertEquals(QsClauseType.ALL, plan3.getRoot().getType()); } @Test @@ -486,9 +486,9 @@ public void testDefaultFieldWithComplexWildcard() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.WILDCARD, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("*foo*", plan.root.value); + Assertions.assertEquals(QsClauseType.WILDCARD, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("*foo*", plan.getRoot().getValue()); } @Test @@ -499,8 +499,8 @@ public void testDefaultFieldWithMixedWildcards() { Assertions.assertNotNull(plan); // Should create AND query because it contains wildcards - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); } @Test @@ -510,9 +510,9 @@ public void testDefaultFieldWithQuotedPhrase() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PHRASE, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("hello world", plan.root.value); + Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("hello world", plan.getRoot().getValue()); } @Test @@ -522,13 +522,13 @@ public void testDefaultFieldWithNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); - QsNode child = plan.root.children.get(0); - Assertions.assertEquals(QsClauseType.TERM, child.type); - Assertions.assertEquals("tags", child.field); - Assertions.assertEquals("foo", child.value); + QsNode child = plan.getRoot().getChildren().get(0); + Assertions.assertEquals(QsClauseType.TERM, child.getType()); + Assertions.assertEquals("tags", child.getField()); + Assertions.assertEquals("foo", child.getValue()); } @Test @@ -552,7 +552,7 @@ public void testDefaultFieldWithNullOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.ANY, plan.root.type); // Defaults to OR/ANY + Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType()); // Defaults to OR/ANY } @Test @@ -562,9 +562,9 @@ public void testDefaultFieldWithSingleWildcardTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.WILDCARD, plan.root.type); - Assertions.assertEquals("tags", plan.root.field); - Assertions.assertEquals("f?o", plan.root.value); + Assertions.assertEquals(QsClauseType.WILDCARD, plan.getRoot().getType()); + Assertions.assertEquals("tags", plan.getRoot().getField()); + Assertions.assertEquals("f?o", plan.getRoot().getValue()); } @Test @@ -574,9 +574,9 @@ public void testDefaultFieldPreservesFieldBindings() { QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and"); Assertions.assertNotNull(plan); - Assertions.assertEquals(1, plan.fieldBindings.size()); - Assertions.assertEquals("tags", plan.fieldBindings.get(0).fieldName); - Assertions.assertEquals(0, plan.fieldBindings.get(0).slotIndex); + Assertions.assertEquals(1, plan.getFieldBindings().size()); + Assertions.assertEquals("tags", plan.getFieldBindings().get(0).getFieldName()); + Assertions.assertEquals(0, plan.getFieldBindings().get(0).getSlotIndex()); } // ============ Tests for Lucene Mode Parsing ============ @@ -589,13 +589,13 @@ public void testLuceneModeSimpleAndQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); - Assertions.assertEquals(Integer.valueOf(0), plan.root.minimumShouldMatch); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + Assertions.assertEquals(Integer.valueOf(0), plan.getRoot().getMinimumShouldMatch()); // Both children should have MUST occur - for (QsNode child : plan.root.children) { - Assertions.assertEquals(SearchDslParser.QsOccur.MUST, child.occur); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(SearchDslParser.QsOccur.MUST, child.getOccur()); } } @@ -607,16 +607,16 @@ public void testLuceneModeSimpleOrQuery() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); - Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); // All children should have SHOULD occur - for (QsNode child : plan.root.children) { - Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, child.occur); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, child.getOccur()); } // minimum_should_match should be 1 (at least one must match) - Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); } @Test @@ -630,9 +630,9 @@ public void testLuceneModeAndOrMixed() { Assertions.assertNotNull(plan); // With minimum_should_match=0 and MUST clauses present, SHOULD is discarded // Only "a" remains with MUST - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); - Assertions.assertEquals("a", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); + Assertions.assertEquals("a", plan.getRoot().getValue()); } @Test @@ -650,23 +650,23 @@ public void testLuceneModeAndOrNotMixed() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // Should have 3 children: a(MUST), c(MUST_NOT), d(MUST) // b is filtered out because it becomes SHOULD - Assertions.assertEquals(3, plan.root.children.size()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); - QsNode nodeA = plan.root.children.get(0); - Assertions.assertEquals("a", nodeA.value); - Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeA.occur); + QsNode nodeA = plan.getRoot().getChildren().get(0); + Assertions.assertEquals("a", nodeA.getValue()); + Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeA.getOccur()); - QsNode nodeC = plan.root.children.get(1); - Assertions.assertEquals("c", nodeC.value); - Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT, nodeC.occur); + QsNode nodeC = plan.getRoot().getChildren().get(1); + Assertions.assertEquals("c", nodeC.getValue()); + Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT, nodeC.getOccur()); - QsNode nodeD = plan.root.children.get(2); - Assertions.assertEquals("d", nodeD.value); - Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeD.occur); + QsNode nodeD = plan.getRoot().getChildren().get(2); + Assertions.assertEquals("d", nodeD.getValue()); + Assertions.assertEquals(SearchDslParser.QsOccur.MUST, nodeD.getOccur()); } @Test @@ -680,9 +680,9 @@ public void testLuceneModeWithDefaultField() { Assertions.assertNotNull(plan); // With minimum_should_match=0, only aterm (MUST) remains - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("firstname", plan.root.field); - Assertions.assertEquals("aterm", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("firstname", plan.getRoot().getField()); + Assertions.assertEquals("aterm", plan.getRoot().getValue()); } @Test @@ -695,10 +695,10 @@ public void testLuceneModeNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); - Assertions.assertEquals(QsClauseType.TERM, plan.root.children.get(0).type); - Assertions.assertEquals(QsOccur.MUST_NOT, plan.root.children.get(0).occur); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getChildren().get(0).getType()); + Assertions.assertEquals(QsOccur.MUST_NOT, plan.getRoot().getChildren().get(0).getOccur()); } @Test @@ -709,10 +709,10 @@ public void testLuceneModeMinimumShouldMatchExplicit() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // All 3 terms should be present - Assertions.assertEquals(3, plan.root.children.size()); - Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); } @Test @@ -723,9 +723,9 @@ public void testLuceneModeSingleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); - Assertions.assertEquals("hello", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); + Assertions.assertEquals("hello", plan.getRoot().getValue()); } @Test @@ -736,7 +736,7 @@ public void testStandardModeUnchanged() { Assertions.assertNotNull(plan); // Standard mode uses traditional boolean algebra: OR at top level - Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); } @Test @@ -756,7 +756,7 @@ public void testLuceneModeEmptyOptions() { QsPlan plan = SearchDslParser.parseDsl(dsl, ""); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); } // ============ Tests for Escape Handling ============ @@ -769,10 +769,10 @@ public void testEscapedSpaceInTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "First\ Value" -> "First Value" - Assertions.assertEquals("First Value", plan.root.value); + Assertions.assertEquals("First Value", plan.getRoot().getValue()); } @Test @@ -783,10 +783,10 @@ public void testEscapedParentheses() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "hello\(world\)" -> "hello(world)" - Assertions.assertEquals("hello(world)", plan.root.value); + Assertions.assertEquals("hello(world)", plan.getRoot().getValue()); } @Test @@ -797,10 +797,10 @@ public void testEscapedColon() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "value\:with\:colons" -> "value:with:colons" - Assertions.assertEquals("value:with:colons", plan.root.value); + Assertions.assertEquals("value:with:colons", plan.getRoot().getValue()); } @Test @@ -811,10 +811,10 @@ public void testEscapedBackslash() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("field", plan.root.field); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("field", plan.getRoot().getField()); // After unescape: "path\\to\\file" -> "path\to\file" - Assertions.assertEquals("path\\to\\file", plan.root.value); + Assertions.assertEquals("path\\to\\file", plan.getRoot().getValue()); } @Test @@ -824,8 +824,8 @@ public void testUppercaseAndOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -838,7 +838,7 @@ public void testLowercaseAndOperator() { Assertions.assertNotNull(plan); // Current behavior: lowercase 'and' IS an operator - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); // TODO: If PDF requires only uppercase, this should fail and return OR or different structure } @@ -849,8 +849,8 @@ public void testUppercaseOrOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -862,7 +862,7 @@ public void testLowercaseOrOperator() { Assertions.assertNotNull(plan); // Current behavior: lowercase 'or' IS an operator - Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); // TODO: If PDF requires only uppercase, this should fail } @@ -873,7 +873,7 @@ public void testUppercaseNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); } @Test @@ -885,7 +885,7 @@ public void testLowercaseNotOperator() { Assertions.assertNotNull(plan); // Current behavior: lowercase 'not' IS an operator - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); // TODO: If PDF requires only uppercase, this should fail } @@ -897,7 +897,7 @@ public void testExclamationNotOperator() { Assertions.assertNotNull(plan); // Current behavior: ! IS a NOT operator - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); } @Test @@ -909,8 +909,8 @@ public void testEscapedSpecialCharactersInQuoted() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.PHRASE, plan.root.type); - Assertions.assertEquals("hello\\\"world", plan.root.value); + Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType()); + Assertions.assertEquals("hello\\\"world", plan.getRoot().getValue()); } @Test @@ -920,8 +920,8 @@ public void testNoEscapeWithoutBackslash() { QsPlan plan = SearchDslParser.parseDsl(dsl); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("normalterm", plan.root.value); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("normalterm", plan.getRoot().getValue()); } // ============ Tests for Multi-Field Search ============ @@ -934,15 +934,15 @@ public void testMultiFieldSimpleTerm() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Verify both fields are in bindings - Assertions.assertEquals(2, plan.fieldBindings.size()); - Assertions.assertTrue(plan.fieldBindings.stream() - .anyMatch(b -> "title".equals(b.fieldName))); - Assertions.assertTrue(plan.fieldBindings.stream() - .anyMatch(b -> "content".equals(b.fieldName))); + Assertions.assertEquals(2, plan.getFieldBindings().size()); + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "title".equals(b.getFieldName()))); + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "content".equals(b.getFieldName()))); } @Test @@ -954,13 +954,13 @@ public void testMultiFieldMultiTermAnd() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Each child should be an OR of two fields - for (QsNode child : plan.root.children) { - Assertions.assertEquals(QsClauseType.OR, child.type); - Assertions.assertEquals(2, child.children.size()); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.OR, child.getType()); + Assertions.assertEquals(2, child.getChildren().size()); } } @@ -973,7 +973,7 @@ public void testMultiFieldMultiTermOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); } @Test @@ -985,7 +985,7 @@ public void testMultiFieldExplicitAndOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); } @Test @@ -997,12 +997,12 @@ public void testMultiFieldMixedWithExplicitField() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Verify "category" is preserved - Assertions.assertTrue(plan.fieldBindings.stream() - .anyMatch(b -> "category".equals(b.fieldName))); + Assertions.assertTrue(plan.getFieldBindings().stream() + .anyMatch(b -> "category".equals(b.getFieldName()))); } @Test @@ -1013,12 +1013,12 @@ public void testMultiFieldWithWildcard() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Both should be PREFIX type - for (QsNode child : plan.root.children) { - Assertions.assertEquals(QsClauseType.PREFIX, child.type); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.PREFIX, child.getType()); } } @@ -1030,12 +1030,12 @@ public void testMultiFieldWithExactFunction() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Both should be EXACT type - for (QsNode child : plan.root.children) { - Assertions.assertEquals(QsClauseType.EXACT, child.type); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.EXACT, child.getType()); } } @@ -1047,9 +1047,9 @@ public void testMultiFieldThreeFields() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(3, plan.root.children.size()); - Assertions.assertEquals(3, plan.fieldBindings.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); + Assertions.assertEquals(3, plan.getFieldBindings().size()); } @Test @@ -1072,9 +1072,9 @@ public void testSingleFieldInArray() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.TERM, plan.root.type); - Assertions.assertEquals("title", plan.root.field); - Assertions.assertEquals(1, plan.fieldBindings.size()); + Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType()); + Assertions.assertEquals("title", plan.getRoot().getField()); + Assertions.assertEquals(1, plan.getFieldBindings().size()); } @Test @@ -1085,9 +1085,9 @@ public void testMultiFieldNotOperator() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.NOT, plan.root.type); - Assertions.assertEquals(1, plan.root.children.size()); - Assertions.assertEquals(QsClauseType.OR, plan.root.children.get(0).type); + Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType()); + Assertions.assertEquals(1, plan.getRoot().getChildren().size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getChildren().get(0).getType()); } // ============ Tests for Multi-Field + Lucene Mode ============ @@ -1102,15 +1102,15 @@ public void testMultiFieldLuceneModeSimpleAnd() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // Should have 2 children (two OR groups), both with MUST // Note: In Lucene mode, OR groups are also wrapped as OCCUR_BOOLEAN - Assertions.assertEquals(2, plan.root.children.size()); - for (QsNode child : plan.root.children) { - Assertions.assertEquals(QsOccur.MUST, child.occur); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsOccur.MUST, child.getOccur()); // The child is OCCUR_BOOLEAN wrapping the OR group - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, child.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, child.getType()); } } @@ -1124,16 +1124,16 @@ public void testMultiFieldLuceneModeSimpleOr() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // Should have 2 children, both with SHOULD - Assertions.assertEquals(2, plan.root.children.size()); - for (QsNode child : plan.root.children) { - Assertions.assertEquals(QsOccur.SHOULD, child.occur); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); + for (QsNode child : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsOccur.SHOULD, child.getOccur()); } // minimum_should_match should be 1 - Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); } @Test @@ -1148,7 +1148,7 @@ public void testMultiFieldLuceneModeAndOrMixed() { Assertions.assertNotNull(plan); // With minimum_should_match=0, only (title:a OR content:a) remains // In Lucene mode, this is wrapped as OCCUR_BOOLEAN - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); } @Test @@ -1160,14 +1160,14 @@ public void testMultiFieldLuceneModeWithNot() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // Should have 2 children: a (MUST), b (MUST_NOT) - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Find MUST and MUST_NOT children - boolean hasMust = plan.root.children.stream().anyMatch(c -> c.occur == QsOccur.MUST); - boolean hasMustNot = plan.root.children.stream().anyMatch(c -> c.occur == QsOccur.MUST_NOT); + boolean hasMust = plan.getRoot().getChildren().stream().anyMatch(c -> c.getOccur() == QsOccur.MUST); + boolean hasMustNot = plan.getRoot().getChildren().stream().anyMatch(c -> c.getOccur() == QsOccur.MUST_NOT); Assertions.assertTrue(hasMust); Assertions.assertTrue(hasMustNot); } @@ -1181,9 +1181,9 @@ public void testMultiFieldLuceneModeSingleTerm() { Assertions.assertNotNull(plan); // In Lucene mode, even single term OR groups are wrapped as OCCUR_BOOLEAN - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // The OCCUR_BOOLEAN contains the OR group's children with SHOULD occur - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -1195,7 +1195,7 @@ public void testMultiFieldLuceneModeComplexQuery() { Assertions.assertNotNull(plan); // Should have proper structure with MUST and MUST_NOT - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); } @Test @@ -1206,10 +1206,10 @@ public void testMultiFieldLuceneModeMinimumShouldMatchOne() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); // All 3 groups should be present - Assertions.assertEquals(3, plan.root.children.size()); - Assertions.assertEquals(Integer.valueOf(1), plan.root.minimumShouldMatch); + Assertions.assertEquals(3, plan.getRoot().getChildren().size()); + Assertions.assertEquals(Integer.valueOf(1), plan.getRoot().getMinimumShouldMatch()); } // ============ Tests for type parameter (best_fields vs cross_fields) ============ @@ -1225,13 +1225,13 @@ public void testMultiFieldBestFieldsDefault() { Assertions.assertNotNull(plan); // Root should be OR (joining fields) - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); // 2 fields + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // 2 fields // Each child should be an AND of terms for that field - for (QsNode fieldGroup : plan.root.children) { - Assertions.assertEquals(QsClauseType.AND, fieldGroup.type); - Assertions.assertEquals(2, fieldGroup.children.size()); // 2 terms + for (QsNode fieldGroup : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.AND, fieldGroup.getType()); + Assertions.assertEquals(2, fieldGroup.getChildren().size()); // 2 terms } } @@ -1243,8 +1243,8 @@ public void testMultiFieldBestFieldsExplicit() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OR, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); } @Test @@ -1258,13 +1258,13 @@ public void testMultiFieldCrossFields() { Assertions.assertNotNull(plan); // Root should be AND (joining term groups) - Assertions.assertEquals(QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); // 2 term groups + Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // 2 term groups // Each child should be an OR of the same term across fields - for (QsNode termGroup : plan.root.children) { - Assertions.assertEquals(QsClauseType.OR, termGroup.type); - Assertions.assertEquals(2, termGroup.children.size()); // 2 fields + for (QsNode termGroup : plan.getRoot().getChildren()) { + Assertions.assertEquals(QsClauseType.OR, termGroup.getType()); + Assertions.assertEquals(2, termGroup.getChildren().size()); // 2 fields } } @@ -1276,7 +1276,7 @@ public void testMultiFieldBestFieldsLuceneMode() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); } @Test @@ -1287,7 +1287,7 @@ public void testMultiFieldCrossFieldsLuceneMode() { QsPlan plan = SearchDslParser.parseDsl(dsl, options); Assertions.assertNotNull(plan); - Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.root.type); + Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, plan.getRoot().getType()); } @Test @@ -1315,7 +1315,7 @@ public void testMultiFieldSingleTermSameResultForBothTypes() { Assertions.assertNotNull(planBest); Assertions.assertNotNull(planCross); // Both should have same structure: (title:hello OR content:hello) - Assertions.assertEquals(planBest.root.type, planCross.root.type); - Assertions.assertEquals(planBest.root.children.size(), planCross.root.children.size()); + Assertions.assertEquals(planBest.getRoot().getType(), planCross.getRoot().getType()); + Assertions.assertEquals(planBest.getRoot().getChildren().size(), planCross.getRoot().getChildren().size()); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java index fd6f03743aebf6..38d801b14f4626 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java @@ -58,9 +58,9 @@ public void testGetQsPlan() { SearchDslParser.QsPlan plan = searchFunc.getQsPlan(); Assertions.assertNotNull(plan); - Assertions.assertNotNull(plan.root); - Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.fieldBindings.size()); + Assertions.assertNotNull(plan.getRoot()); + Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getFieldBindings().size()); } @Test @@ -151,11 +151,11 @@ public void testComplexDslParsing() { SearchDslParser.QsPlan plan = searchFunc.getQsPlan(); Assertions.assertNotNull(plan); - Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.root.type); - Assertions.assertEquals(2, plan.root.children.size()); + Assertions.assertEquals(SearchDslParser.QsClauseType.AND, plan.getRoot().getType()); + Assertions.assertEquals(2, plan.getRoot().getChildren().size()); // Should detect 3 unique fields: title, content, category - Assertions.assertEquals(3, plan.fieldBindings.size()); + Assertions.assertEquals(3, plan.getFieldBindings().size()); } @Test From d989599d6093df9801e6e2cbfb2c4f85da9c2b9f Mon Sep 17 00:00:00 2001 From: airborne12 Date: Thu, 15 Jan 2026 14:47:52 +0800 Subject: [PATCH 5/6] [refactor](search) Improve SearchDslParser code quality and error handling - Add SearchDslSyntaxException for clearer DSL syntax error messages - Add @Nullable annotations to public parseDsl method parameters - Replace deprecated ANTLRInputStream with CharStreams.fromString() - Remove duplicate fields validation in expandMultiFieldDsl methods - Add context (actual fields value) to validation exception messages - Improve error handling with specific catch blocks for different exception types (syntax errors, argument errors, internal errors) Co-Authored-By: Claude Opus 4.5 --- .../functions/scalar/SearchDslParser.java | 135 ++++++++++++++---- .../functions/scalar/SearchDslParserTest.java | 4 +- .../functions/scalar/SearchTest.java | 2 +- 3 files changed, 109 insertions(+), 32 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java index 099f6b0e7518ff..a42189b3eda1f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java @@ -25,7 +25,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.tree.ParseTree; import org.apache.logging.log4j.LogManager; @@ -39,6 +39,7 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; +import javax.annotation.Nullable; /** * Search DSL Parser using ANTLR-generated parser. @@ -60,6 +61,22 @@ public class SearchDslParser { private static final Logger LOG = LogManager.getLogger(SearchDslParser.class); private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + /** + * Exception for search DSL syntax errors. + * This exception is thrown when the DSL string cannot be parsed due to syntax issues. + * It is distinct from programming errors (NullPointerException, etc.) to provide + * clearer error messages to users. + */ + public static class SearchDslSyntaxException extends RuntimeException { + public SearchDslSyntaxException(String message) { + super(message); + } + + public SearchDslSyntaxException(String message, Throwable cause) { + super(message, cause); + } + } + /** * Parse DSL string and return intermediate representation */ @@ -84,7 +101,7 @@ public static QsPlan parseDsl(String dsl) { * Example: '{"fields":["title","content"],"type":"cross_fields"}' * @return Parsed QsPlan */ - public static QsPlan parseDsl(String dsl, String optionsJson) { + public static QsPlan parseDsl(String dsl, @Nullable String optionsJson) { // Parse options from JSON SearchOptions searchOptions = parseOptions(optionsJson); @@ -120,7 +137,7 @@ public static QsPlan parseDsl(String dsl, String optionsJson) { * @param defaultOperator Default operator ("and" or "or") for multi-term queries (optional, defaults to "or") * @return Parsed QsPlan */ - public static QsPlan parseDsl(String dsl, String defaultField, String defaultOperator) { + public static QsPlan parseDsl(String dsl, @Nullable String defaultField, @Nullable String defaultOperator) { return parseDslStandardMode(dsl, defaultField, defaultOperator); } @@ -141,7 +158,7 @@ private static QsPlan parseDslStandardMode(String dsl, String defaultField, Stri try { // Create ANTLR lexer and parser - SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl)); + SearchLexer lexer = new SearchLexer(CharStreams.fromString(expandedDsl)); CommonTokenStream tokens = new CommonTokenStream(lexer); SearchParser parser = new SearchParser(tokens); @@ -153,7 +170,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, org.antlr.v4.runtime.RecognitionException e) { - throw new RuntimeException("Invalid search DSL syntax at line " + line + throw new SearchDslSyntaxException("Syntax error at line " + line + ":" + charPositionInLine + " " + msg); } }); @@ -163,7 +180,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, // Check if parsing was successful if (tree == null) { - throw new RuntimeException("Invalid search DSL syntax"); + throw new SearchDslSyntaxException("Invalid search DSL syntax: parsing returned null"); } // Build AST using visitor pattern @@ -180,9 +197,28 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (RuntimeException e) { + } catch (SearchDslSyntaxException e) { + // Syntax error in DSL - user input issue LOG.error("Failed to parse search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); - throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e); + throw new SearchDslSyntaxException("Invalid search DSL: " + dsl + ". " + e.getMessage(), e); + } catch (IllegalArgumentException e) { + // Invalid argument - user input issue + LOG.error("Invalid argument in search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new IllegalArgumentException("Invalid search DSL argument: " + dsl + ". " + e.getMessage(), e); + } catch (NullPointerException e) { + // Internal error - programming bug + LOG.error("Internal error (NPE) while parsing search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (IndexOutOfBoundsException e) { + // Internal error - programming bug + LOG.error("Internal error (IOOB) while parsing search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (RuntimeException e) { + // Other runtime errors + LOG.error("Unexpected error while parsing search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new RuntimeException("Unexpected error parsing search DSL: " + dsl + ". " + e.getMessage(), e); } } @@ -501,7 +537,8 @@ private static boolean isValidDsl(String dsl) { */ private static void validateFieldsList(List fields) { if (fields == null || fields.isEmpty()) { - throw new IllegalArgumentException("fields list cannot be null or empty for multi-field mode"); + throw new IllegalArgumentException( + "fields list cannot be null or empty for multi-field mode, got: " + fields); } } @@ -520,7 +557,7 @@ private static QsPlan parseWithVisitor(String expandedDsl, String originalDsl, String modeDescription) { try { // Create ANTLR lexer and parser - SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl)); + SearchLexer lexer = new SearchLexer(CharStreams.fromString(expandedDsl)); CommonTokenStream tokens = new CommonTokenStream(lexer); SearchParser parser = new SearchParser(tokens); @@ -532,14 +569,14 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, org.antlr.v4.runtime.RecognitionException e) { - throw new RuntimeException("Invalid search DSL syntax at line " + line + throw new SearchDslSyntaxException("Syntax error at line " + line + ":" + charPositionInLine + " " + msg); } }); ParseTree tree = parser.search(); if (tree == null) { - throw new RuntimeException("Invalid search DSL syntax"); + throw new SearchDslSyntaxException("Invalid search DSL syntax: parsing returned null"); } // Build AST using provided visitor @@ -556,11 +593,35 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (RuntimeException e) { + } catch (SearchDslSyntaxException e) { + // Syntax error in DSL - user input issue LOG.error("Failed to parse search DSL in {}: '{}' (expanded: '{}')", modeDescription, originalDsl, expandedDsl, e); - throw new RuntimeException("Invalid search DSL syntax: " + originalDsl - + ". Error: " + e.getMessage(), e); + throw new SearchDslSyntaxException("Invalid search DSL: " + originalDsl + ". " + e.getMessage(), e); + } catch (IllegalArgumentException e) { + // Invalid argument - user input issue + LOG.error("Invalid argument in search DSL ({}): '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new IllegalArgumentException("Invalid search DSL argument: " + originalDsl + + ". " + e.getMessage(), e); + } catch (NullPointerException e) { + // Internal error - programming bug + LOG.error("Internal error (NPE) while parsing search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + originalDsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (IndexOutOfBoundsException e) { + // Internal error - programming bug + LOG.error("Internal error (IOOB) while parsing search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + originalDsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (RuntimeException e) { + // Other runtime errors + LOG.error("Unexpected error while parsing search DSL in {}: '{}' (expanded: '{}')", + modeDescription, originalDsl, expandedDsl, e); + throw new RuntimeException("Unexpected error parsing search DSL: " + originalDsl + + ". " + e.getMessage(), e); } } @@ -664,10 +725,7 @@ private static QsPlan parseDslMultiFieldLuceneMode(String dsl, List fiel * @return Expanded full DSL */ private static String expandMultiFieldDsl(String dsl, List fields, String defaultOperator) { - if (fields == null || fields.isEmpty()) { - throw new IllegalArgumentException("fields list cannot be null or empty"); - } - + // Note: fields validation is done by validateFieldsList() before calling this method if (fields.size() == 1) { // Single field - delegate to existing method return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator); @@ -726,10 +784,7 @@ private static String expandMultiFieldDsl(String dsl, List fields, Strin */ private static String expandMultiFieldDslBestFields(String dsl, List fields, String defaultOperator) { - if (fields == null || fields.isEmpty()) { - throw new IllegalArgumentException("fields list cannot be null or empty"); - } - + // Note: fields validation is done by validateFieldsList() before calling this method if (fields.size() == 1) { // Single field - delegate to existing method return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator); @@ -1796,7 +1851,7 @@ private static QsPlan parseDslLuceneMode(String dsl, String defaultField, String try { // Create ANTLR lexer and parser - SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl)); + SearchLexer lexer = new SearchLexer(CharStreams.fromString(expandedDsl)); CommonTokenStream tokens = new CommonTokenStream(lexer); SearchParser parser = new SearchParser(tokens); @@ -1808,7 +1863,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, org.antlr.v4.runtime.RecognitionException e) { - throw new RuntimeException("Invalid search DSL syntax at line " + line + throw new SearchDslSyntaxException("Syntax error at line " + line + ":" + charPositionInLine + " " + msg); } }); @@ -1816,7 +1871,7 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, // Parse using standard parser first ParseTree tree = parser.search(); if (tree == null) { - throw new RuntimeException("Invalid search DSL syntax"); + throw new SearchDslSyntaxException("Invalid search DSL syntax: parsing returned null"); } // Build AST using Lucene-mode visitor @@ -1833,9 +1888,31 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer recognizer, return new QsPlan(root, bindings); - } catch (RuntimeException e) { + } catch (SearchDslSyntaxException e) { + // Syntax error in DSL - user input issue LOG.error("Failed to parse search DSL in Lucene mode: '{}' (expanded: '{}')", dsl, expandedDsl, e); - throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e); + throw new SearchDslSyntaxException("Invalid search DSL: " + dsl + ". " + e.getMessage(), e); + } catch (IllegalArgumentException e) { + // Invalid argument - user input issue + LOG.error("Invalid argument in search DSL (Lucene mode): '{}' (expanded: '{}')", dsl, expandedDsl, e); + throw new IllegalArgumentException("Invalid search DSL argument: " + dsl + ". " + e.getMessage(), e); + } catch (NullPointerException e) { + // Internal error - programming bug + LOG.error("Internal error (NPE) while parsing search DSL in Lucene mode: '{}' (expanded: '{}')", + dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (IndexOutOfBoundsException e) { + // Internal error - programming bug + LOG.error("Internal error (IOOB) while parsing search DSL in Lucene mode: '{}' (expanded: '{}')", + dsl, expandedDsl, e); + throw new RuntimeException("Internal error while parsing search DSL: " + dsl + + ". This may be a bug. Details: " + e.getMessage(), e); + } catch (RuntimeException e) { + // Other runtime errors + LOG.error("Unexpected error while parsing search DSL in Lucene mode: '{}' (expanded: '{}')", + dsl, expandedDsl, e); + throw new RuntimeException("Unexpected error parsing search DSL: " + dsl + ". " + e.getMessage(), e); } } @@ -1918,7 +1995,7 @@ private QsNode processLuceneBooleanChain(SearchParser.OrClauseContext ctx) { if (hasMust) { terms = terms.stream() .filter(t -> t.occur != QsOccur.SHOULD) - .collect(java.util.stream.Collectors.toList()); + .collect(Collectors.toList()); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java index 8586b3cd83b4af..e45790b4efbdc3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java @@ -242,7 +242,7 @@ public void testInvalidDsl() { SearchDslParser.parseDsl(dsl); }); - Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL syntax")); + Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL")); } @Test @@ -542,7 +542,7 @@ public void testDefaultFieldWithEmptyString() { SearchDslParser.parseDsl(dsl, "", "and"); }); - Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL syntax")); + Assertions.assertTrue(exception.getMessage().contains("Invalid search DSL")); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java index 38d801b14f4626..919bebea2524fc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchTest.java @@ -169,7 +169,7 @@ public void testInvalidDslHandling() { searchFunc.getQsPlan(); Assertions.assertTrue(false, "Expected exception for invalid DSL"); } catch (RuntimeException e) { - Assertions.assertTrue(e.getMessage().contains("Invalid search DSL syntax")); + Assertions.assertTrue(e.getMessage().contains("Invalid search DSL")); } } } From fc21cbce3d725aff907d6a5b660fec4059c5b7e5 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Thu, 15 Jan 2026 16:16:00 +0800 Subject: [PATCH 6/6] [refactor](search) Use Jackson annotations for SearchOptions and optimize StringBuilder - Add @JsonProperty/@JsonSetter annotations to SearchOptions for declarative JSON mapping - Simplify parseOptions() from 58 lines to 20 lines using JSON_MAPPER.readValue() - Add validate() method for mutual exclusion and range checks - Add minimum_should_match negative value validation - Reuse StringBuilder with setLength(0) instead of creating new instances in tokenizeDsl() Co-Authored-By: Claude Opus 4.5 --- .../functions/scalar/SearchDslParser.java | 121 ++++++++++-------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java index a42189b3eda1f0..82cbdcdf2401c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import org.antlr.v4.runtime.CharStreams; @@ -472,7 +473,7 @@ private static List tokenizeDsl(String dsl) { // End of term (only if not escaped - handled above) if (currentTerm.length() > 0) { terms.add(currentTerm.toString()); - currentTerm = new StringBuilder(); + currentTerm.setLength(0); // Reuse StringBuilder instead of creating new one } } else { currentTerm.append(c); @@ -1651,11 +1652,21 @@ public boolean equals(Object o) { * - fields: array of field names for multi-field search (mutually exclusive with default_field) */ public static class SearchOptions { + @JsonProperty("default_field") private String defaultField = null; + + @JsonProperty("default_operator") private String defaultOperator = null; + + @JsonProperty("mode") private String mode = "standard"; + + @JsonProperty("minimum_should_match") private Integer minimumShouldMatch = null; + private List fields = null; + + @JsonProperty("type") private String type = "best_fields"; // "best_fields" (default) or "cross_fields" public String getDefaultField() { @@ -1698,8 +1709,22 @@ public List getFields() { return fields == null ? null : Collections.unmodifiableList(fields); } + /** + * Set fields with empty element filtering. + * Empty or whitespace-only strings are filtered out. + */ + @JsonSetter("fields") public void setFields(List fields) { - this.fields = fields == null ? null : new ArrayList<>(fields); + if (fields == null) { + this.fields = null; + return; + } + // Filter out empty or whitespace-only elements + List filtered = fields.stream() + .filter(f -> f != null && !f.trim().isEmpty()) + .map(String::trim) + .collect(Collectors.toList()); + this.fields = filtered.isEmpty() ? null : new ArrayList<>(filtered); } /** @@ -1750,75 +1775,59 @@ public boolean isBestFieldsMode() { public boolean isCrossFieldsMode() { return "cross_fields".equals(type); } + + /** + * Validate the options after deserialization. + * Checks for: + * - Mutual exclusion between fields and default_field + * - minimum_should_match is non-negative if specified + * + * @throws IllegalArgumentException if validation fails + */ + public void validate() { + // Validation: fields and default_field are mutually exclusive + if (fields != null && !fields.isEmpty() + && defaultField != null && !defaultField.isEmpty()) { + throw new IllegalArgumentException( + "'fields' and 'default_field' are mutually exclusive. Use only one."); + } + // Validation: minimum_should_match should be non-negative + if (minimumShouldMatch != null && minimumShouldMatch < 0) { + throw new IllegalArgumentException( + "'minimum_should_match' must be non-negative, got: " + minimumShouldMatch); + } + } } /** - * Parse options JSON string. - * Supports the following fields: - * - default_field: default field name when DSL doesn't specify field - * - default_operator: "and" or "or" for multi-term queries - * - mode: "standard" or "lucene" - * - minimum_should_match: integer for Lucene mode - * - fields: array of field names for multi-field search - * - type: "best_fields" (default) or "cross_fields" for multi-field search semantics + * Parse options JSON string using Jackson databind. + * The SearchOptions class uses @JsonProperty annotations for field mapping + * and @JsonSetter for custom deserialization logic (e.g., filtering empty fields). + * + * @param optionsJson JSON string containing search options + * @return Parsed and validated SearchOptions + * @throws IllegalArgumentException if JSON is invalid or validation fails */ private static SearchOptions parseOptions(String optionsJson) { - SearchOptions options = new SearchOptions(); if (optionsJson == null || optionsJson.trim().isEmpty()) { - return options; + return new SearchOptions(); } try { - // Parse JSON using Jackson - com.fasterxml.jackson.databind.JsonNode jsonNode = JSON_MAPPER.readTree(optionsJson); - - if (jsonNode.has("default_field")) { - options.setDefaultField(jsonNode.get("default_field").asText()); - } - if (jsonNode.has("default_operator")) { - options.setDefaultOperator(jsonNode.get("default_operator").asText()); - } - if (jsonNode.has("mode")) { - options.setMode(jsonNode.get("mode").asText()); - } - if (jsonNode.has("minimum_should_match")) { - options.setMinimumShouldMatch(jsonNode.get("minimum_should_match").asInt()); - } - // Parse fields array for multi-field search - if (jsonNode.has("fields")) { - com.fasterxml.jackson.databind.JsonNode fieldsNode = jsonNode.get("fields"); - if (fieldsNode.isArray()) { - List fieldsList = new ArrayList<>(); - for (com.fasterxml.jackson.databind.JsonNode fieldNode : fieldsNode) { - String fieldValue = fieldNode.asText().trim(); - if (!fieldValue.isEmpty()) { - fieldsList.add(fieldValue); - } - } - if (!fieldsList.isEmpty()) { - options.setFields(fieldsList); - } - } - } - // Parse type for multi-field search semantics - if (jsonNode.has("type")) { - options.setType(jsonNode.get("type").asText()); - } - - // Validation: fields and default_field are mutually exclusive - if (options.getFields() != null && !options.getFields().isEmpty() - && options.getDefaultField() != null && !options.getDefaultField().isEmpty()) { - throw new IllegalArgumentException( - "'fields' and 'default_field' are mutually exclusive. Use only one."); - } + // Use Jackson to deserialize directly into SearchOptions + // @JsonProperty annotations handle field mapping + // @JsonSetter on setFields() handles empty element filtering + SearchOptions options = JSON_MAPPER.readValue(optionsJson, SearchOptions.class); + // Run validation checks (mutual exclusion, range checks, etc.) + options.validate(); + return options; } catch (IllegalArgumentException e) { + // Re-throw validation errors as-is throw e; } catch (JsonProcessingException e) { throw new IllegalArgumentException( "Invalid search options JSON: '" + optionsJson + "'. Error: " + e.getMessage(), e); } - - return options; } /**