From db99822f7fca8f7b50f75243c5a9b7a7581f4b9e Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 11:42:21 -0700 Subject: [PATCH 01/17] add non-numeric support for max/min Signed-off-by: Ritvi Bhatt --- .../sql/calcite/remote/CalciteExplainIT.java | 16 +++++ .../calcite/explain_max_string_field.json | 6 ++ .../calcite/explain_min_string_field.json | 6 ++ .../explain_max_string_field.json | 6 ++ .../explain_min_string_field.json | 6 ++ .../ppl/explain_max_string_field.json | 23 +++++++ .../ppl/explain_min_string_field.json | 23 +++++++ .../opensearch/request/AggregateAnalyzer.java | 47 +++++++++++++-- .../calcite/CalcitePPLAggregationTest.java | 60 +++++++++++++++++++ 9 files changed, 187 insertions(+), 6 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_max_string_field.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_min_string_field.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_max_string_field.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_min_string_field.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c0265dd2738..1d35050d558 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -556,6 +556,22 @@ public void testPushdownLimitIntoAggregation() throws IOException { + " head 100 | head 10 from 10 ")); } + @Test + public void testExplainMaxOnStringField() throws IOException { + String expected = loadExpectedPlan("explain_max_string_field.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString("source=opensearch-sql_test_index_account | stats max(firstname)")); + } + + @Test + public void testExplainMinOnStringField() throws IOException { + String expected = loadExpectedPlan("explain_min_string_field.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString("source=opensearch-sql_test_index_account | stats min(firstname)")); + } + /** * Executes the PPL query and returns the result as a string with windows-style line breaks * replaced with Unix-style ones. diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_max_string_field.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_max_string_field.json new file mode 100644 index 00000000000..961cdc4687f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_max_string_field.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], max(firstname)=[MAX($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},max(firstname)=MAX($0))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"max(firstname)\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"firstname\"],\"excludes\":[]},\"sort\":[{\"firstname.keyword\":{\"order\":\"desc\"}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_min_string_field.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_min_string_field.json new file mode 100644 index 00000000000..41a14f5e84e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_min_string_field.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], min(firstname)=[MIN($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},min(firstname)=MIN($0))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"min(firstname)\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"firstname\"],\"excludes\":[]},\"sort\":[{\"firstname.keyword\":{\"order\":\"asc\"}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_max_string_field.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_max_string_field.json new file mode 100644 index 00000000000..8a84763e33c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_max_string_field.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], max(firstname)=[MAX($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], max(firstname)=[MAX($1)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_min_string_field.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_min_string_field.json new file mode 100644 index 00000000000..320b519c442 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_min_string_field.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], min(firstname)=[MIN($0)])\n LogicalProject(firstname=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], min(firstname)=[MIN($1)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json new file mode 100644 index 00000000000..419f1c3d61c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json @@ -0,0 +1,23 @@ +{ + "root": { + "type": "OpenSearchPlan", + "children": [ + { + "type": "LogicalAggregate", + "description": "group=[{}], max(firstname)=[MAX($0)]", + "children": [ + { + "type": "LogicalProject", + "description": "firstname=[$3]", + "children": [ + { + "type": "LogicalTableScan", + "description": "table=[[opensearch-sql_test_index_account]]" + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json new file mode 100644 index 00000000000..8e4f353d9f6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json @@ -0,0 +1,23 @@ +{ + "root": { + "type": "OpenSearchPlan", + "children": [ + { + "type": "LogicalAggregate", + "description": "group=[{}], min(firstname)=[MIN($0)]", + "children": [ + { + "type": "LogicalProject", + "description": "firstname=[$3]", + "children": [ + { + "type": "LogicalTableScan", + "description": "table=[[opensearch-sql_test_index_account]]" + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 86986713b68..e66a99b1892 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -68,6 +68,7 @@ import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; @@ -278,12 +279,46 @@ private static Pair createRegularAggregation( helper.build( !args.isEmpty() ? args.getFirst() : null, AggregationBuilders.count(aggFieldName)), new SingleValueParser(aggFieldName)); - case MIN -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), - new SingleValueParser(aggFieldName)); - case MAX -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), - new SingleValueParser(aggFieldName)); + case MIN -> { + String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); + ExprType fieldType = helper.fieldTypes.get(fieldName); + + if (fieldType instanceof OpenSearchTextType) { + yield Pair.of( + AggregationBuilders.topHits(aggFieldName) + .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) + .size(1) + .from(0) + .sort( + helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), + SortOrder.ASC), + new ArgMaxMinParser(aggFieldName)); + } else { + yield Pair.of( + helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), + new SingleValueParser(aggFieldName)); + } + } + case MAX -> { + String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); + ExprType fieldType = helper.fieldTypes.get(fieldName); + + if (fieldType instanceof OpenSearchTextType) { + yield Pair.of( + AggregationBuilders.topHits(aggFieldName) + .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) + .size(1) + .from(0) + .sort( + helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), + SortOrder.DESC), + new ArgMaxMinParser(aggFieldName)); + } else { + yield Pair.of( + helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), + new SingleValueParser(aggFieldName)); + } + } case VAR_SAMP -> Pair.of( helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), new StatsParser(ExtendedStats::getVarianceSampling, aggFieldName)); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index 1a67fb7101d..962b222e980 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -711,4 +711,64 @@ public void testMedian() { "SELECT `percentile_approx`(`SAL`, 50.0, DECIMAL) `median(SAL)`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testMaxOnStringField() { + String ppl = "source=EMP | stats max(ENAME) as max_name"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalAggregate(group=[{}], max_name=[MAX($0)])\n" + + " LogicalProject(ENAME=[$1])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "max_name=WARD\n"; + verifyResult(root, expectedResult); + } + + @Test + public void testMinOnStringField() { + String ppl = "source=EMP | stats min(ENAME) as min_name"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalAggregate(group=[{}], min_name=[MIN($0)])\n" + + " LogicalProject(ENAME=[$1])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "min_name=ADAMS\n"; + verifyResult(root, expectedResult); + } + + @Test + public void testMaxOnTimeField() { + String ppl = "source=EMP | stats max(HIREDATE) as max_hire_date"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalAggregate(group=[{}], max_hire_date=[MAX($0)])\n" + + " LogicalProject(HIREDATE=[$4])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "max_hire_date=1987-05-23\n"; + verifyResult(root, expectedResult); + } + + @Test + public void testMinOnTimeField() { + String ppl = "source=EMP | stats min(HIREDATE) as min_hire_date"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalAggregate(group=[{}], min_hire_date=[MIN($0)])\n" + + " LogicalProject(HIREDATE=[$4])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "min_hire_date=1980-12-17\n"; + verifyResult(root, expectedResult); + } } From 2201a96915991cd4f2256a77c4bbd8ae72f71ff4 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 12:21:20 -0700 Subject: [PATCH 02/17] fix mixed field behavior Signed-off-by: Ritvi Bhatt --- .../sql/calcite/remote/CalciteExplainIT.java | 8 ++-- .../opensearch/request/AggregateAnalyzer.java | 41 +++++++++--------- .../opensearch/response/agg/MaxMinParser.java | 42 +++++++++++++++++++ .../calcite/CalcitePPLAggregationTest.java | 24 +++++------ 4 files changed, 79 insertions(+), 36 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 1d35050d558..83edc4d207d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -560,16 +560,16 @@ public void testPushdownLimitIntoAggregation() throws IOException { public void testExplainMaxOnStringField() throws IOException { String expected = loadExpectedPlan("explain_max_string_field.json"); assertJsonEqualsIgnoreId( - expected, - explainQueryToString("source=opensearch-sql_test_index_account | stats max(firstname)")); + expected, + explainQueryToString("source=opensearch-sql_test_index_account | stats max(firstname)")); } @Test public void testExplainMinOnStringField() throws IOException { String expected = loadExpectedPlan("explain_min_string_field.json"); assertJsonEqualsIgnoreId( - expected, - explainQueryToString("source=opensearch-sql_test_index_account | stats min(firstname)")); + expected, + explainQueryToString("source=opensearch-sql_test_index_account | stats min(firstname)")); } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index e66a99b1892..de0b4328f09 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -72,6 +72,7 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.MaxMinParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -285,18 +286,18 @@ private static Pair createRegularAggregation( if (fieldType instanceof OpenSearchTextType) { yield Pair.of( - AggregationBuilders.topHits(aggFieldName) - .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) - .size(1) - .from(0) - .sort( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), - SortOrder.ASC), - new ArgMaxMinParser(aggFieldName)); + AggregationBuilders.topHits(aggFieldName) + .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) + .size(1) + .from(0) + .sort( + helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), + SortOrder.ASC), + new MaxMinParser(aggFieldName)); } else { yield Pair.of( - helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), - new SingleValueParser(aggFieldName)); + helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), + new SingleValueParser(aggFieldName)); } } case MAX -> { @@ -305,18 +306,18 @@ private static Pair createRegularAggregation( if (fieldType instanceof OpenSearchTextType) { yield Pair.of( - AggregationBuilders.topHits(aggFieldName) - .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) - .size(1) - .from(0) - .sort( - helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), - SortOrder.DESC), - new ArgMaxMinParser(aggFieldName)); + AggregationBuilders.topHits(aggFieldName) + .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) + .size(1) + .from(0) + .sort( + helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), + SortOrder.DESC), + new MaxMinParser(aggFieldName)); } else { yield Pair.of( - helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), - new SingleValueParser(aggFieldName)); + helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), + new SingleValueParser(aggFieldName)); } } case VAR_SAMP -> Pair.of( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java new file mode 100644 index 00000000000..14a89ce32d6 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.Collections; +import java.util.Map; +import lombok.Value; +import org.opensearch.search.SearchHit; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.metrics.TopHits; + +/** {@link TopHits} metric parser for MAX/MIN aggregations on text fields. */ +@Value +public class MaxMinParser implements MetricParser { + + String name; + + @Override + public Map parse(Aggregation agg) { + TopHits topHits = (TopHits) agg; + SearchHit[] hits = topHits.getHits().getHits(); + + if (hits.length == 0) { + return Collections.singletonMap(agg.getName(), null); + } + + Map source = hits[0].getSourceAsMap(); + + if (source.isEmpty()) { + return Collections.singletonMap(agg.getName(), null); + } else { + Object value = source.values().iterator().next(); + // Convert all values to strings to handle mixed types consistently with lexicographical + // sorting + String stringValue = value != null ? value.toString() : null; + return Collections.singletonMap(agg.getName(), stringValue); + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index 962b222e980..b849d610cf6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -718,9 +718,9 @@ public void testMaxOnStringField() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{}], max_name=[MAX($0)])\n" - + " LogicalProject(ENAME=[$1])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalAggregate(group=[{}], max_name=[MAX($0)])\n" + + " LogicalProject(ENAME=[$1])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "max_name=WARD\n"; @@ -733,9 +733,9 @@ public void testMinOnStringField() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{}], min_name=[MIN($0)])\n" - + " LogicalProject(ENAME=[$1])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalAggregate(group=[{}], min_name=[MIN($0)])\n" + + " LogicalProject(ENAME=[$1])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "min_name=ADAMS\n"; @@ -748,9 +748,9 @@ public void testMaxOnTimeField() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{}], max_hire_date=[MAX($0)])\n" - + " LogicalProject(HIREDATE=[$4])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalAggregate(group=[{}], max_hire_date=[MAX($0)])\n" + + " LogicalProject(HIREDATE=[$4])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "max_hire_date=1987-05-23\n"; @@ -763,9 +763,9 @@ public void testMinOnTimeField() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{}], min_hire_date=[MIN($0)])\n" - + " LogicalProject(HIREDATE=[$4])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalAggregate(group=[{}], min_hire_date=[MIN($0)])\n" + + " LogicalProject(HIREDATE=[$4])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "min_hire_date=1980-12-17\n"; From 87e5b6a2904498c3f06cf1c0096412eed6d81a5c Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 12:23:53 -0700 Subject: [PATCH 03/17] update doc Signed-off-by: Ritvi Bhatt --- docs/user/ppl/cmd/stats.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 9212efcd8e4..c2204d2d30e 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -177,6 +177,10 @@ Description Usage: MAX(expr). Returns the maximum value of expr. +For non-numeric fields, MAX performs lexicographical comparison. When fields contain mixed data types (strings and numbers), all values are converted to strings for consistent lexicographical ordering. For example, with values ["apple", 123, "banana", 45], the maximum would be "banana" since lexicographically: "123" < "45" < "apple" < "banana". + +Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0. + Example:: os> source=accounts | stats max(age); @@ -187,6 +191,16 @@ Example:: | 36 | +----------+ +Example with text field:: + + os> source=accounts | stats max(firstname); + fetched rows / total rows = 1/1 + +----------------+ + | max(firstname) | + |----------------| + | Nanette | + +----------------+ + MIN --- @@ -195,6 +209,10 @@ Description Usage: MIN(expr). Returns the minimum value of expr. +For non-numeric fields, MIN performs lexicographical comparison. When fields contain mixed data types (strings and numbers), all values are converted to strings for consistent lexicographical ordering. For example, with values ["apple", 123, "banana", 45], the minimum would be "123" since lexicographically: "123" < "45" < "apple" < "banana". + +Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0. + Example:: os> source=accounts | stats min(age); @@ -205,6 +223,16 @@ Example:: | 28 | +----------+ +Example with text field:: + + os> source=accounts | stats min(firstname); + fetched rows / total rows = 1/1 + +----------------+ + | min(firstname) | + |----------------| + | Amber | + +----------------+ + VAR_SAMP -------- From 2580bcd5ed1faa59859b6ca7f8c6a11544324f88 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 13:34:31 -0700 Subject: [PATCH 04/17] update doc Signed-off-by: Ritvi Bhatt --- docs/user/ppl/cmd/stats.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index c2204d2d30e..fde1a911f49 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -177,7 +177,7 @@ Description Usage: MAX(expr). Returns the maximum value of expr. -For non-numeric fields, MAX performs lexicographical comparison. When fields contain mixed data types (strings and numbers), all values are converted to strings for consistent lexicographical ordering. For example, with values ["apple", 123, "banana", 45], the maximum would be "banana" since lexicographically: "123" < "45" < "apple" < "banana". +For non-numeric fields, values are sorted lexicographically. Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0. @@ -209,7 +209,7 @@ Description Usage: MIN(expr). Returns the minimum value of expr. -For non-numeric fields, MIN performs lexicographical comparison. When fields contain mixed data types (strings and numbers), all values are converted to strings for consistent lexicographical ordering. For example, with values ["apple", 123, "banana", 45], the minimum would be "123" since lexicographically: "123" < "45" < "apple" < "banana". +For non-numeric fields, values are sorted lexicographically. Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0. From de51eb1739fb0044527a6420a9f8b71ffcad5eb2 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 13:36:10 -0700 Subject: [PATCH 05/17] update formatting Signed-off-by: Ritvi Bhatt --- .../opensearch/sql/opensearch/response/agg/MaxMinParser.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java index 14a89ce32d6..b61dd7b7c39 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java @@ -33,8 +33,7 @@ public Map parse(Aggregation agg) { return Collections.singletonMap(agg.getName(), null); } else { Object value = source.values().iterator().next(); - // Convert all values to strings to handle mixed types consistently with lexicographical - // sorting + // Convert all values to strings to handle mixed types consistently with lexicographical sorting String stringValue = value != null ? value.toString() : null; return Collections.singletonMap(agg.getName(), stringValue); } From 987809b20caf10ffd2cb9604a9e8e580ba97c7ad Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 15:16:30 -0700 Subject: [PATCH 06/17] add tests Signed-off-by: Ritvi Bhatt --- .../calcite/remote/CalcitePPLAggregationIT.java | 16 ++++++++++++++++ .../opensearch/request/AggregateAnalyzer.java | 13 +++++++++++-- .../ppl/calcite/CalcitePPLAggregationTest.java | 12 ++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index 2971621dade..b14ecb03637 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -1176,4 +1176,20 @@ public void testMedian() throws IOException { verifySchema(actual, schema("median(balance)", "bigint")); verifyDataRows(actual, rows(32838)); } + + @Test + public void testStatsMaxOnStringField() throws IOException { + JSONObject actual = + executeQuery(String.format("source=%s | stats max(firstname)", TEST_INDEX_BANK)); + verifySchema(actual, schema("max(firstname)", "string")); + verifyDataRows(actual, rows("Virginia")); + } + + @Test + public void testStatsMinOnStringField() throws IOException { + JSONObject actual = + executeQuery(String.format("source=%s | stats min(firstname)", TEST_INDEX_BANK)); + verifySchema(actual, schema("min(firstname)", "string")); + verifyDataRows(actual, rows("Amber JOHnny")); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index de0b4328f09..7b754095dec 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -66,6 +66,7 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; @@ -284,7 +285,7 @@ private static Pair createRegularAggregation( String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); ExprType fieldType = helper.fieldTypes.get(fieldName); - if (fieldType instanceof OpenSearchTextType) { + if (isStringType(fieldType)) { yield Pair.of( AggregationBuilders.topHits(aggFieldName) .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) @@ -304,7 +305,7 @@ private static Pair createRegularAggregation( String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); ExprType fieldType = helper.fieldTypes.get(fieldName); - if (fieldType instanceof OpenSearchTextType) { + if (isStringType(fieldType)) { yield Pair.of( AggregationBuilders.topHits(aggFieldName) .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) @@ -399,6 +400,14 @@ yield switch (functionName) { }; } + /** + * Check if the field type is a string-like type that requires TopHits aggregation + * for lexicographical sorting instead of native OpenSearch min/max aggregations. + */ + private static boolean isStringType(ExprType fieldType) { + return fieldType instanceof OpenSearchTextType || fieldType == ExprCoreType.STRING; + } + private static List> createCompositeBuckets( List groupList, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { ImmutableList.Builder> resultBuilder = ImmutableList.builder(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index b849d610cf6..12b94f4c111 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -725,6 +725,9 @@ public void testMaxOnStringField() { String expectedResult = "max_name=WARD\n"; verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT MAX(`ENAME`) `max_name`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -740,6 +743,9 @@ public void testMinOnStringField() { String expectedResult = "min_name=ADAMS\n"; verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT MIN(`ENAME`) `min_name`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -755,6 +761,9 @@ public void testMaxOnTimeField() { String expectedResult = "max_hire_date=1987-05-23\n"; verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT MAX(`HIREDATE`) `max_hire_date`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -770,5 +779,8 @@ public void testMinOnTimeField() { String expectedResult = "min_hire_date=1980-12-17\n"; verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT MIN(`HIREDATE`) `min_hire_date`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } } From 864e3795495cf9417f06b86eeef7f127ec653608 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 15:34:26 -0700 Subject: [PATCH 07/17] fix formatting Signed-off-by: Ritvi Bhatt --- .../opensearch/sql/opensearch/request/AggregateAnalyzer.java | 4 ++-- .../opensearch/sql/opensearch/response/agg/MaxMinParser.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 7b754095dec..cc978b46913 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -401,8 +401,8 @@ yield switch (functionName) { } /** - * Check if the field type is a string-like type that requires TopHits aggregation - * for lexicographical sorting instead of native OpenSearch min/max aggregations. + * Check if the field type is a string-like type that requires TopHits aggregation for + * lexicographical sorting instead of native OpenSearch min/max aggregations. */ private static boolean isStringType(ExprType fieldType) { return fieldType instanceof OpenSearchTextType || fieldType == ExprCoreType.STRING; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java index b61dd7b7c39..14a89ce32d6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java @@ -33,7 +33,8 @@ public Map parse(Aggregation agg) { return Collections.singletonMap(agg.getName(), null); } else { Object value = source.values().iterator().next(); - // Convert all values to strings to handle mixed types consistently with lexicographical sorting + // Convert all values to strings to handle mixed types consistently with lexicographical + // sorting String stringValue = value != null ? value.toString() : null; return Collections.singletonMap(agg.getName(), stringValue); } From 83b5f508f4592ab2b9fe4603660144f90de3565f Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 15:36:00 -0700 Subject: [PATCH 08/17] empty Signed-off-by: Ritvi Bhatt From 6dbf48724bcdb487eeb15644ec53b6cb9860ce50 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 16:11:39 -0700 Subject: [PATCH 09/17] fix formatting Signed-off-by: Ritvi Bhatt --- .../org/opensearch/sql/calcite/remote/CalciteExplainIT.java | 2 +- .../opensearch/sql/opensearch/request/AggregateAnalyzer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 5c428313f89..9cd3bef9763 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -624,7 +624,7 @@ public void testExplainMinOnStringField() throws IOException { expected, explainQueryToString("source=opensearch-sql_test_index_account | stats min(firstname)")); } - + @Test public void testExplainSortOnMetricsNoBucketNullable() throws IOException { // TODO enhancement later: https://github.com/opensearch-project/sql/issues/4282 diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 3e534f11ec3..c780edea12c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -423,7 +423,7 @@ yield switch (functionName) { private static boolean isStringType(ExprType fieldType) { return fieldType instanceof OpenSearchTextType || fieldType == ExprCoreType.STRING; } - + private static ValuesSourceAggregationBuilder createBucketAggregation( Integer group, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { return createBucket(group, project, helper); From 1fbd3fb8d7e635dc324a2cfa4dddd376bf710e50 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Thu, 11 Sep 2025 16:37:38 -0700 Subject: [PATCH 10/17] fix Signed-off-by: Ritvi Bhatt From 67d23df9b0d7f826fe64f920166877713255f87a Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Mon, 15 Sep 2025 17:21:41 -0700 Subject: [PATCH 11/17] support ip type max/min Signed-off-by: Ritvi Bhatt --- .../opensearch/sql/opensearch/request/AggregateAnalyzer.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index c780edea12c..f8a1853b0c2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -421,7 +421,9 @@ yield switch (functionName) { } private static boolean isStringType(ExprType fieldType) { - return fieldType instanceof OpenSearchTextType || fieldType == ExprCoreType.STRING; + return fieldType instanceof OpenSearchTextType || + fieldType == ExprCoreType.STRING || + fieldType == ExprCoreType.IP; } private static ValuesSourceAggregationBuilder createBucketAggregation( From a648c2ed53480cf9722fa395bfac285fb5b0b39c Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Mon, 15 Sep 2025 21:43:48 -0700 Subject: [PATCH 12/17] fix formatting Signed-off-by: Ritvi Bhatt --- .../sql/opensearch/request/AggregateAnalyzer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index f8a1853b0c2..6858e2287ea 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -421,9 +421,9 @@ yield switch (functionName) { } private static boolean isStringType(ExprType fieldType) { - return fieldType instanceof OpenSearchTextType || - fieldType == ExprCoreType.STRING || - fieldType == ExprCoreType.IP; + return fieldType instanceof OpenSearchTextType + || fieldType == ExprCoreType.STRING + || fieldType == ExprCoreType.IP; } private static ValuesSourceAggregationBuilder createBucketAggregation( From c3080d8835e269cd4be0b1ac2b35a05f41ff9abd Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Tue, 16 Sep 2025 09:34:07 -0700 Subject: [PATCH 13/17] use tophitsparser Signed-off-by: Ritvi Bhatt --- .../opensearch/request/AggregateAnalyzer.java | 5 +-- .../opensearch/response/agg/MaxMinParser.java | 42 ------------------- 2 files changed, 2 insertions(+), 45 deletions(-) delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 6858e2287ea..c489f023c41 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -77,7 +77,6 @@ import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; -import org.opensearch.sql.opensearch.response.agg.MaxMinParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -314,7 +313,7 @@ private static Pair createRegularAggregation( .sort( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.ASC), - new MaxMinParser(aggFieldName)); + new TopHitsParser(aggFieldName, true)); } else { yield Pair.of( helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), @@ -334,7 +333,7 @@ private static Pair createRegularAggregation( .sort( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.DESC), - new MaxMinParser(aggFieldName)); + new TopHitsParser(aggFieldName, true)); } else { yield Pair.of( helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java deleted file mode 100644 index 14a89ce32d6..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MaxMinParser.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.response.agg; - -import java.util.Collections; -import java.util.Map; -import lombok.Value; -import org.opensearch.search.SearchHit; -import org.opensearch.search.aggregations.Aggregation; -import org.opensearch.search.aggregations.metrics.TopHits; - -/** {@link TopHits} metric parser for MAX/MIN aggregations on text fields. */ -@Value -public class MaxMinParser implements MetricParser { - - String name; - - @Override - public Map parse(Aggregation agg) { - TopHits topHits = (TopHits) agg; - SearchHit[] hits = topHits.getHits().getHits(); - - if (hits.length == 0) { - return Collections.singletonMap(agg.getName(), null); - } - - Map source = hits[0].getSourceAsMap(); - - if (source.isEmpty()) { - return Collections.singletonMap(agg.getName(), null); - } else { - Object value = source.values().iterator().next(); - // Convert all values to strings to handle mixed types consistently with lexicographical - // sorting - String stringValue = value != null ? value.toString() : null; - return Collections.singletonMap(agg.getName(), stringValue); - } - } -} From 16edabe39fa26d85014f96484233655e15c5f595 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Tue, 16 Sep 2025 09:48:29 -0700 Subject: [PATCH 14/17] remove v2 explain Signed-off-by: Ritvi Bhatt --- .../ppl/explain_max_string_field.json | 23 ------------------- .../ppl/explain_min_string_field.json | 23 ------------------- 2 files changed, 46 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json deleted file mode 100644 index 419f1c3d61c..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_max_string_field.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "root": { - "type": "OpenSearchPlan", - "children": [ - { - "type": "LogicalAggregate", - "description": "group=[{}], max(firstname)=[MAX($0)]", - "children": [ - { - "type": "LogicalProject", - "description": "firstname=[$3]", - "children": [ - { - "type": "LogicalTableScan", - "description": "table=[[opensearch-sql_test_index_account]]" - } - ] - } - ] - } - ] - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json deleted file mode 100644 index 8e4f353d9f6..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_min_string_field.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "root": { - "type": "OpenSearchPlan", - "children": [ - { - "type": "LogicalAggregate", - "description": "group=[{}], min(firstname)=[MIN($0)]", - "children": [ - { - "type": "LogicalProject", - "description": "firstname=[$3]", - "children": [ - { - "type": "LogicalTableScan", - "description": "table=[[opensearch-sql_test_index_account]]" - } - ] - } - ] - } - ] - } -} \ No newline at end of file From d2200e84fd970bf9372f1e994d3f45f21e740e96 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Tue, 16 Sep 2025 12:37:14 -0700 Subject: [PATCH 15/17] check for numeric fields for native max/min Signed-off-by: Ritvi Bhatt --- .../opensearch/request/AggregateAnalyzer.java | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index c489f023c41..970b9b40a9c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -304,7 +304,11 @@ private static Pair createRegularAggregation( String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); ExprType fieldType = helper.fieldTypes.get(fieldName); - if (isStringType(fieldType)) { + if (isNativeMaxMinSupportedType(fieldType)) { + yield Pair.of( + helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), + new SingleValueParser(aggFieldName)); + } else { yield Pair.of( AggregationBuilders.topHits(aggFieldName) .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) @@ -314,17 +318,17 @@ private static Pair createRegularAggregation( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.ASC), new TopHitsParser(aggFieldName, true)); - } else { - yield Pair.of( - helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), - new SingleValueParser(aggFieldName)); } } case MAX -> { String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); ExprType fieldType = helper.fieldTypes.get(fieldName); - if (isStringType(fieldType)) { + if (isNativeMaxMinSupportedType(fieldType)) { + yield Pair.of( + helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), + new SingleValueParser(aggFieldName)); + } else { yield Pair.of( AggregationBuilders.topHits(aggFieldName) .fetchSource(helper.inferNamedField(args.getFirst()).getRootName(), null) @@ -334,10 +338,6 @@ private static Pair createRegularAggregation( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.DESC), new TopHitsParser(aggFieldName, true)); - } else { - yield Pair.of( - helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), - new SingleValueParser(aggFieldName)); } } case VAR_SAMP -> Pair.of( @@ -425,6 +425,13 @@ private static boolean isStringType(ExprType fieldType) { || fieldType == ExprCoreType.IP; } + private static boolean isNativeMaxMinSupportedType(ExprType fieldType) { + return ExprCoreType.numberTypes().contains(fieldType) + || fieldType == ExprCoreType.DATE + || fieldType == ExprCoreType.TIME + || fieldType == ExprCoreType.TIMESTAMP; + } + private static ValuesSourceAggregationBuilder createBucketAggregation( Integer group, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { return createBucket(group, project, helper); From e692b9d15309eead67d8ec617d68938fc001046e Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Tue, 16 Sep 2025 12:45:06 -0700 Subject: [PATCH 16/17] change names Signed-off-by: Ritvi Bhatt --- .../sql/opensearch/request/AggregateAnalyzer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 970b9b40a9c..3d47d46304d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -304,7 +304,7 @@ private static Pair createRegularAggregation( String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); ExprType fieldType = helper.fieldTypes.get(fieldName); - if (isNativeMaxMinSupportedType(fieldType)) { + if (supportsMaxMinAggregation(fieldType)) { yield Pair.of( helper.build(args.getFirst(), AggregationBuilders.min(aggFieldName)), new SingleValueParser(aggFieldName)); @@ -324,7 +324,7 @@ private static Pair createRegularAggregation( String fieldName = helper.inferNamedField(args.getFirst()).getRootName(); ExprType fieldType = helper.fieldTypes.get(fieldName); - if (isNativeMaxMinSupportedType(fieldType)) { + if (supportsMaxMinAggregation(fieldType)) { yield Pair.of( helper.build(args.getFirst(), AggregationBuilders.max(aggFieldName)), new SingleValueParser(aggFieldName)); @@ -425,7 +425,7 @@ private static boolean isStringType(ExprType fieldType) { || fieldType == ExprCoreType.IP; } - private static boolean isNativeMaxMinSupportedType(ExprType fieldType) { + private static boolean supportsMaxMinAggregation(ExprType fieldType) { return ExprCoreType.numberTypes().contains(fieldType) || fieldType == ExprCoreType.DATE || fieldType == ExprCoreType.TIME From 27fe823cf70f390c311ac8e0a7b7b40e97b5bf82 Mon Sep 17 00:00:00 2001 From: Ritvi Bhatt Date: Tue, 16 Sep 2025 12:58:41 -0700 Subject: [PATCH 17/17] fix type checking Signed-off-by: Ritvi Bhatt --- .../opensearch/request/AggregateAnalyzer.java | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 3d47d46304d..5fc8bf9f6c0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -72,7 +72,7 @@ import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; -import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; @@ -419,17 +419,16 @@ yield switch (functionName) { }; } - private static boolean isStringType(ExprType fieldType) { - return fieldType instanceof OpenSearchTextType - || fieldType == ExprCoreType.STRING - || fieldType == ExprCoreType.IP; - } - private static boolean supportsMaxMinAggregation(ExprType fieldType) { - return ExprCoreType.numberTypes().contains(fieldType) - || fieldType == ExprCoreType.DATE - || fieldType == ExprCoreType.TIME - || fieldType == ExprCoreType.TIMESTAMP; + ExprType coreType = + (fieldType instanceof OpenSearchDataType) + ? ((OpenSearchDataType) fieldType).getExprType() + : fieldType; + + return ExprCoreType.numberTypes().contains(coreType) + || coreType == ExprCoreType.DATE + || coreType == ExprCoreType.TIME + || coreType == ExprCoreType.TIMESTAMP; } private static ValuesSourceAggregationBuilder createBucketAggregation(