From fe6d01a312afbe382c61fd90208c1919e1fa318e Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 27 Nov 2025 15:06:16 +0800 Subject: [PATCH 1/4] Remove unnecessary filter for DataHistogram aggregation Signed-off-by: Lantao Jin --- .../org/opensearch/sql/ppl/ExplainIT.java | 16 +- .../opensearch/sql/ppl/PPLIntegTestCase.java | 2 + .../org/opensearch/sql/util/MatcherUtils.java | 7 +- .../composite_date_histogram_daily.ppl | 3 +- .../big5/queries/composite_terms.ppl | 3 +- .../big5/queries/composite_terms_keyword.ppl | 3 +- .../queries/optimized/composite_terms.ppl | 3 +- .../optimized/composite_terms_keyword.ppl | 3 +- .../agg_composite_date_range_push.yaml | 2 +- .../big5/composite_date_histogram_daily.yaml | 15 +- .../calcite/big5/composite_terms.yaml | 4 +- .../calcite/big5/composite_terms_keyword.yaml | 4 +- .../big5/date_histogram_hourly_agg.yaml | 2 +- .../big5/date_histogram_minute_agg.yaml | 2 +- .../calcite/chart_with_timestamp_span.yaml | 2 +- .../calcite/clickbench/q43.yaml | 2 +- .../calcite/explain_filter_with_search.yaml | 2 +- .../calcite/explain_stats_by_timespan.json | 6 - .../calcite/explain_stats_by_timespan.yaml | 10 + .../calcite/explain_stats_by_timespan2.json | 6 - .../calcite/explain_stats_by_timespan2.yaml | 10 + .../explain_stats_by_timespan.json | 6 - .../explain_stats_by_timespan.yaml | 14 ++ .../explain_stats_by_timespan2.json | 6 - .../explain_stats_by_timespan2.yaml | 14 ++ .../big5/composite_date_histogram_daily.yaml | 32 ++- .../ppl/big5/composite_terms.yaml | 36 +-- .../ppl/big5/composite_terms_keyword.yaml | 40 +-- .../ppl/big5/date_histogram_hourly_agg.yaml | 2 +- .../ppl/big5/date_histogram_minute_agg.yaml | 2 +- .../ppl/explain_stats_by_timespan.json | 15 -- .../ppl/explain_stats_by_timespan.yaml | 15 ++ .../ppl/explain_stats_by_timespan2.json | 15 -- .../ppl/explain_stats_by_timespan2.yaml | 15 ++ .../opensearch/request/PredicateAnalyzer.java | 9 +- .../dsl/BucketAggregationBuilder.java | 132 ---------- .../dsl/CompositeAggregationBuilder.java | 29 +-- .../AggregationQueryBuilderTest.java | 2 +- .../dsl/BucketAggregationBuilderTest.java | 234 ------------------ 39 files changed, 202 insertions(+), 523 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java delete mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 42a9ddfb98a..62eadd7ef5e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -464,22 +464,22 @@ public void testStatsBySpanNonBucketNullable() throws IOException { @Test public void testStatsByTimeSpan() throws IOException { - String expected = loadExpectedPlan("explain_stats_by_timespan.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_stats_by_timespan.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( String.format("source=%s | stats count() by span(birthdate,1m)", TEST_INDEX_BANK))); - expected = loadExpectedPlan("explain_stats_by_timespan2.json"); - assertJsonEqualsIgnoreId( + expected = loadExpectedPlan("explain_stats_by_timespan2.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( String.format("source=%s | stats count() by span(birthdate,1M)", TEST_INDEX_BANK))); // bucket_nullable doesn't impact by-span-time - assertJsonEqualsIgnoreId( + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( String.format( "source=%s | stats bucket_nullable=false count() by span(birthdate,1M)", TEST_INDEX_BANK))); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java index 5c2e45f1af1..81eaad47298 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java @@ -59,6 +59,8 @@ protected String executeQueryToString(String query) throws IOException { return getResponseBody(response, true); } + /** Deprecated, use {@link #explainQueryYaml(String)} */ + @Deprecated protected String explainQueryToString(String query) throws IOException { return explainQueryToString(query, false); } diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index b7e030e1295..65018eda0eb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -411,7 +411,11 @@ public static void assertJsonEquals(String expected, String actual) { JsonParser.parseString(eliminatePid(actual))); } - /** Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. */ + /** + * Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. + * Deprecated, use {@link #assertYamlEqualsIgnoreId(String, String)} + */ + @Deprecated // use assertYamlEqualsIgnoreId instead public static void assertJsonEqualsIgnoreId(String expected, String actual) { assertJsonEquals(cleanUpId(expected), cleanUpId(actual)); } @@ -434,6 +438,7 @@ private static String eliminatePid(String s) { return s.replaceAll("pitId=[^,]+,", "pitId=*,"); } + /** Compare two YAML strings are equals with ignoring the RelNode id in the Calcite plan. */ public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYaml) { String cleanedYaml = cleanUpYaml(actualYaml); assertYamlEquals(expectedYaml, cleanedYaml); diff --git a/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl b/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl index 656289b0603..10679eb2ea6 100644 --- a/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl +++ b/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl @@ -31,4 +31,5 @@ */ source = big5 | where `@timestamp` >= '2022-12-30 00:00:00' and `@timestamp` < '2023-01-07 12:00:00' -| stats count() by span(`@timestamp`, 1d) \ No newline at end of file +| stats count() by span(`@timestamp`, 1d) +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/composite_terms.ppl b/integ-test/src/test/resources/big5/queries/composite_terms.ppl index 07edca09e69..4a9a4a3244c 100644 --- a/integ-test/src/test/resources/big5/queries/composite_terms.ppl +++ b/integ-test/src/test/resources/big5/queries/composite_terms.ppl @@ -29,4 +29,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats count() by `process.name`, `cloud.region` -| sort - `process.name`, + `cloud.region` \ No newline at end of file +| sort - `process.name`, + `cloud.region` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl b/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl index 42b8c9585a4..e0a92c0b6e6 100644 --- a/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl +++ b/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl @@ -30,4 +30,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream` -| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` \ No newline at end of file +| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl b/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl index 97897e227de..6161a31a0c8 100644 --- a/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl +++ b/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl @@ -29,4 +29,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats bucket_nullable = false count() by `process.name`, `cloud.region` -| sort - `process.name`, + `cloud.region` \ No newline at end of file +| sort - `process.name`, + `cloud.region` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl b/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl index 04d12b4fb0e..224f5575aaa 100644 --- a/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl +++ b/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl @@ -30,4 +30,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats bucket_nullable = false count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream` -| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` \ No newline at end of file +| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml index 30e4762d325..44383b0ab04 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 9b69c67b74c..5b6c7913f9e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -1,11 +1,12 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) - LogicalAggregate(group=[{0}], count()=[COUNT()]) - LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalSort(fetch=[10]) + LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) + LogicalFilter(condition=[IS NOT NULL($17)]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"@timestamp","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index 8720f023f80..cc3af323ddf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -1,7 +1,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + LogicalSort(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10]) LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1]) LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14]) @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index ac251d900f0..9e546a26dbf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -1,7 +1,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first]) + LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first], fetch=[10]) LogicalProject(count()=[$3], process.name=[$0], cloud.region=[$1], aws.cloudwatch.log_stream=[$2]) LogicalAggregate(group=[{0, 1, 2}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14], aws.cloudwatch.log_stream=[$34]) @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml index a97ca073a21..b6b0a42062f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($17)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index c715c2c2a42..9ab2af83445 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"@timestamp","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml index a07c92033d0..052680e2aa9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 3bba5e1ed82..0466a6a16dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -12,4 +12,4 @@ calcite: EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","calendar_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index 29ebac7168f..f2744c79991 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"birthdate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json deleted file mode 100644 index f84aa0cb018..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1m)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml new file mode 100644 index 00000000000..fec4f9cf9d5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1m)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1m)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json deleted file mode 100644 index 036547978b1..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1M)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\":\"1M\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml new file mode 100644 index 00000000000..5021adf62b8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1M)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json deleted file mode 100644 index 1b846e6c16e..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1m)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1m)=[$t0])\n EnumerableAggregate(group=[{0}], count()=[COUNT()])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['m'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1m)=[$t21], $condition=[$t22])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml new file mode 100644 index 00000000000..3843b2bce4a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1m)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1m)=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['m'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1m)=[$t21], $condition=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json deleted file mode 100644 index 9cec9bcf190..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1M)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1M)=[$t0])\n EnumerableAggregate(group=[{0}], count()=[COUNT()])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['M'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1M)=[$t21], $condition=[$t22])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml new file mode 100644 index 00000000000..af739c44d85 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1M)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1M)=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['M'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1M)=[$t21], $condition=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml index 9a0882dc49a..0a057446400 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml @@ -3,17 +3,23 @@ root: description: fields: "[count(), span(`@timestamp`,1d)]" children: - - name: OpenSearchIndexScan + - name: LimitOperator description: - request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ - size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\"\ - :{\"@timestamp\":{\"from\":1672358400000,\"to\":null,\"include_lower\":true,\"\ - include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\":{\"from\"\ - :null,\"to\":1673092800000,\"include_lower\":true,\"include_upper\":false,\"\ - boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ - :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1d)\"\ - :{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"fixed_interval\":\"1d\"}}}]},\"aggregations\":{\"count()\"\ - :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + limit: 10 + offset: 0 + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\"\ + :0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"\ + range\":{\"@timestamp\":{\"from\":1672358400000,\"to\":null,\"include_lower\"\ + :true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\"\ + :{\"from\":null,\"to\":1673092800000,\"include_lower\":true,\"include_upper\"\ + :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ + aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ + sources\":[{\"span(`@timestamp`,1d)\":{\"date_histogram\":{\"field\"\ + :\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\"\ + :\"1d\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\"\ + :\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml index 481d9cdd423..6a0e0c660da 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml @@ -3,19 +3,25 @@ root: description: fields: "[count(), process.name, cloud.region]" children: - - name: OpenSearchIndexScan + - name: LimitOperator description: - request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ - size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\"\ - :{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\":true,\"\ - include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\":{\"from\"\ - :null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\":false,\"\ - boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ - :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"process.name\"\ - :{\"terms\":{\"field\":\"process.name\",\"missing_bucket\":true,\"missing_order\"\ - :\"last\",\"order\":\"desc\"}}},{\"cloud.region\":{\"terms\":{\"field\"\ - :\"cloud.region\",\"missing_bucket\":true,\"missing_order\":\"first\",\"\ - order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"\ - field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + limit: 10 + offset: 0 + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\"\ + :0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"\ + range\":{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\"\ + :true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\"\ + :{\"from\":null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\"\ + :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ + aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ + sources\":[{\"process.name\":{\"terms\":{\"field\":\"process.name\"\ + ,\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"desc\"\ + }}},{\"cloud.region\":{\"terms\":{\"field\":\"cloud.region\",\"missing_bucket\"\ + :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ + \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml index a7f12407647..5c77f33d0cd 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml @@ -3,21 +3,27 @@ root: description: fields: "[count(), process.name, cloud.region, aws.cloudwatch.log_stream]" children: - - name: OpenSearchIndexScan + - name: LimitOperator description: - request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ - size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\"\ - :{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\":true,\"\ - include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\":{\"from\"\ - :null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\":false,\"\ - boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ - :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"process.name\"\ - :{\"terms\":{\"field\":\"process.name\",\"missing_bucket\":true,\"missing_order\"\ - :\"last\",\"order\":\"desc\"}}},{\"cloud.region\":{\"terms\":{\"field\"\ - :\"cloud.region\",\"missing_bucket\":true,\"missing_order\":\"first\",\"\ - order\":\"asc\"}}},{\"aws.cloudwatch.log_stream\":{\"terms\":{\"field\"\ - :\"aws.cloudwatch.log_stream\",\"missing_bucket\":true,\"missing_order\"\ - :\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ - :{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + limit: 10 + offset: 0 + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\"\ + :0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"\ + range\":{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\"\ + :true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\"\ + :{\"from\":null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\"\ + :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ + aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ + sources\":[{\"process.name\":{\"terms\":{\"field\":\"process.name\"\ + ,\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"desc\"\ + }}},{\"cloud.region\":{\"terms\":{\"field\":\"cloud.region\",\"missing_bucket\"\ + :true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"aws.cloudwatch.log_stream\"\ + :{\"terms\":{\"field\":\"aws.cloudwatch.log_stream\",\"missing_bucket\"\ + :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ + \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml index 72549142297..d8f57661ce2 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml @@ -9,7 +9,7 @@ root: size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"\ composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1h)\":{\"date_histogram\"\ :{\"field\":\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"\ - fixed_interval\":\"1h\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ + calendar_interval\":\"1h\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ :{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml index be30d2a0801..ec8d58454ff 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml @@ -13,7 +13,7 @@ root: boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1m)\"\ :{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ + order\":\"asc\",\"calendar_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json deleted file mode 100644 index 7d345202ce4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[count(), span(birthdate,1m)]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml new file mode 100644 index 00000000000..102995d5f02 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml @@ -0,0 +1,15 @@ +root: + name: ProjectOperator + description: + fields: "[count(), span(birthdate,1m)]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank,\ + \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ + :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\"\ + :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ + order\":\"asc\",\"calendar_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ + :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ + \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json deleted file mode 100644 index a5b9e210f09..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[count(), span(birthdate,1M)]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml new file mode 100644 index 00000000000..167328625f1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml @@ -0,0 +1,15 @@ +root: + name: ProjectOperator + description: + fields: "[count(), span(birthdate,1M)]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank,\ + \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ + :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\"\ + :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ + order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ + :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ + \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 43d06c5e6b1..1f28580ed70 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -658,11 +658,10 @@ private QueryExpression binary(RexCall call) { RexUnknownAs nullAs = getNullAsForSearch(call); QueryExpression finalExpression = switch (nullAs) { - // e.g. where isNotNull(a) and (a = 1 or a = 2) - // TODO: For this case, seems return `expression` should be equivalent - case FALSE -> - CompoundQueryExpression.and( - false, expression, QueryExpression.create(pair.getKey()).exists()); + // e.g. where isNotNull(a) and ( a = 1 or a = 2) + // For this case, return `expression` is equivalent + // But DSL `bool.must` could slow down the query, so we return `expression` + case FALSE -> expression; // e.g. where isNull(a) or a = 1 or a = 2 case TRUE -> CompoundQueryExpression.or( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java deleted file mode 100644 index 8fa2c916288..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.script.aggregation.dsl; - -import static org.opensearch.sql.data.type.ExprCoreType.DATE; -import static org.opensearch.sql.data.type.ExprCoreType.TIME; -import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; -import static org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE; - -import java.util.List; -import java.util.stream.Collectors; -import org.opensearch.search.aggregations.BucketOrder; -import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; -import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; -import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; -import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; -import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; -import org.opensearch.search.aggregations.support.ValueType; -import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; -import org.opensearch.sql.ast.expression.SpanUnit; -import org.opensearch.sql.expression.NamedExpression; -import org.opensearch.sql.expression.span.SpanExpression; -import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; - -/** Bucket Aggregation Builder. */ -public class BucketAggregationBuilder { - - private final AggregationBuilderHelper helper; - - public BucketAggregationBuilder(ExpressionSerializer serializer) { - this.helper = new AggregationBuilderHelper(serializer); - } - - /** Build the ValuesSourceAggregationBuilder. */ - public ValuesSourceAggregationBuilder build(NamedExpression expr) { - if (expr.getDelegated() instanceof SpanExpression) { - SpanExpression spanExpr = (SpanExpression) expr.getDelegated(); - return buildHistogram( - expr.getName(), - spanExpr.getField().toString(), - spanExpr.getValue().valueOf().doubleValue(), - spanExpr.getUnit()); - } else { - TermsAggregationBuilder sourceBuilder = new TermsAggregationBuilder(expr.getName()); - sourceBuilder.size(AGGREGATION_BUCKET_SIZE); - sourceBuilder.order(BucketOrder.key(true)); - // Time types values are converted to LONG in ExpressionAggregationScript::execute - if ((expr.getDelegated().type() instanceof OpenSearchDateType - && List.of(TIMESTAMP, TIME, DATE) - .contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType())) - || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { - sourceBuilder.userValueTypeHint(ValueType.LONG); - } - return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script); - } - } - - /** Build the MultiTermsAggregationBuilder. */ - public MultiTermsAggregationBuilder buildMultipleTerms(List exprs) { - MultiTermsAggregationBuilder sourceBuilder = - new MultiTermsAggregationBuilder( - exprs.stream().map(NamedExpression::getName).collect(Collectors.joining("_"))); - sourceBuilder.terms( - exprs.stream() - .map( - expr -> { - MultiTermsValuesSourceConfig.Builder config = - new MultiTermsValuesSourceConfig.Builder(); - config.setFieldName(expr.getName()); - // Time types values are converted to LONG in ExpressionAggregationScript::execute - if ((expr.getDelegated().type() instanceof OpenSearchDateType - && List.of(TIMESTAMP, TIME, DATE) - .contains( - ((OpenSearchDateType) expr.getDelegated().type()) - .getExprCoreType())) - || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { - config.setUserValueTypeHint(ValueType.LONG); - } - return config.build(); - }) - .toList()); - sourceBuilder.size(AGGREGATION_BUCKET_SIZE); - return sourceBuilder; - } - - public static ValuesSourceAggregationBuilder buildHistogram( - String name, String field, Double value, SpanUnit unit) { - switch (unit) { - case NONE: - return new HistogramAggregationBuilder(name).field(field).interval(value); - case UNKNOWN: - throw new IllegalStateException("Invalid span unit"); - default: - return buildDateHistogram(name, field, value.intValue(), unit); - } - } - - public static ValuesSourceAggregationBuilder buildAutoDateHistogram( - String name, String field, Integer bucketSize) { - return new AutoDateHistogramAggregationBuilder(name).field(field).setNumBuckets(bucketSize); - } - - public static ValuesSourceAggregationBuilder buildDateHistogram( - String name, String field, Integer value, SpanUnit unit) { - String spanValue = value + unit.getName(); - switch (unit) { - case MILLISECOND: - case MS: - case SECOND: - case S: - case MINUTE: - case m: - case HOUR: - case H: - case DAY: - case D: - return new DateHistogramAggregationBuilder(name) - .field(field) - .fixedInterval(new DateHistogramInterval(spanValue)); - default: - return new DateHistogramAggregationBuilder(name) - .field(field) - .calendarInterval(new DateHistogramInterval(spanValue)); - } - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java index 954bbc93f8e..b84128d68df 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java @@ -16,6 +16,7 @@ import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.aggregations.support.ValueType; @@ -105,24 +106,14 @@ public static CompositeValuesSourceBuilder buildHistogram( public static CompositeValuesSourceBuilder buildDateHistogram( String name, String field, Integer value, SpanUnit unit) { String spanValue = value + unit.getName(); - switch (unit) { - case MILLISECOND: - case MS: - case SECOND: - case S: - case MINUTE: - case m: - case HOUR: - case H: - case DAY: - case D: - return new DateHistogramValuesSourceBuilder(name) - .field(field) - .fixedInterval(new DateHistogramInterval(spanValue)); - default: - return new DateHistogramValuesSourceBuilder(name) - .field(field) - .calendarInterval(new DateHistogramInterval(spanValue)); - } + DateHistogramValuesSourceBuilder builder = + new DateHistogramValuesSourceBuilder(name).field(field); + return useCalendarInterval(spanValue) + ? builder.calendarInterval(new DateHistogramInterval(spanValue)) + : builder.fixedInterval(new DateHistogramInterval(spanValue)); + } + + private static boolean useCalendarInterval(String spanValue) { + return DateHistogramAggregationBuilder.DATE_FIELD_UNITS.containsKey(spanValue); } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java index 721b0b66a4c..08472a74c8d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java @@ -609,7 +609,7 @@ void fixed_interval_time_span() { + " \"field\" : \"timestamp\",%n" + " \"missing_bucket\" : false,%n" + " \"order\" : \"asc\",%n" - + " \"fixed_interval\" : \"1h\"%n" + + " \"calendar_interval\" : \"1h\"%n" + " }%n" + " }%n" + " } ]%n" diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java deleted file mode 100644 index 26b303fbf45..00000000000 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.script.aggregation.dsl; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.Mockito.when; -import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; -import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; -import static org.opensearch.sql.data.type.ExprCoreType.STRING; -import static org.opensearch.sql.expression.DSL.literal; -import static org.opensearch.sql.expression.DSL.named; -import static org.opensearch.sql.expression.DSL.ref; - -import java.util.Map; -import lombok.SneakyThrows; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayNameGeneration; -import org.junit.jupiter.api.DisplayNameGenerator; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; -import org.opensearch.sql.data.type.ExprCoreType; -import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.expression.DSL; -import org.opensearch.sql.expression.NamedExpression; -import org.opensearch.sql.expression.parse.ParseExpression; -import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; -import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; -import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; - -@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) -@ExtendWith(MockitoExtension.class) -class BucketAggregationBuilderTest { - - @Mock private ExpressionSerializer serializer; - - private BucketAggregationBuilder aggregationBuilder; - - @BeforeEach - void set_up() { - aggregationBuilder = new BucketAggregationBuilder(serializer); - } - - @Test - void should_build_bucket_with_field() { - assertEquals( - "{\n" - + " \"age\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"age\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("age", ref("age", INTEGER)))); - } - - @Test - void should_build_bucket_with_literal() { - var literal = literal(1); - when(serializer.serialize(literal)).thenReturn("mock-serialize"); - assertEquals( - "{\n" - + " \"1\" : {\n" - + " \"terms\" : {\n" - + " \"script\" : {\n" - + " \"source\" :" - + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" - + " \"lang\" : \"opensearch_compounded_script\"\n" - + " },\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named(literal))); - } - - @Test - void should_build_bucket_with_keyword_field() { - assertEquals( - "{\n" - + " \"name\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"name.keyword\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery( - named( - "name", - ref( - "name", - OpenSearchTextType.of( - Map.of( - "words", - OpenSearchDataType.of(OpenSearchDataType.MappingType.Keyword))))))); - } - - @Test - void should_build_bucket_with_parse_expression() { - ParseExpression parseExpression = - DSL.regex(ref("name.keyword", STRING), DSL.literal("(?\\w+)"), DSL.literal("name")); - when(serializer.serialize(parseExpression)).thenReturn("mock-serialize"); - assertEquals( - "{\n" - + " \"name\" : {\n" - + " \"terms\" : {\n" - + " \"script\" : {\n" - + " \"source\" :" - + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" - + " \"lang\" : \"opensearch_compounded_script\"\n" - + " },\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("name", parseExpression))); - } - - @Test - void terms_bucket_for_opensearchdate_type_uses_long() { - OpenSearchDateType dataType = OpenSearchDateType.of(ExprCoreType.TIMESTAMP); - - assertEquals( - "{\n" - + " \"date\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"value_type\" : \"long\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("date", ref("date", dataType)))); - } - - @Test - void terms_bucket_for_opensearchdate_type_uses_long_false() { - OpenSearchDateType dataType = OpenSearchDateType.of(STRING); - - assertEquals( - "{\n" - + " \"date\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("date", ref("date", dataType)))); - } - - @ParameterizedTest(name = "{0}") - @EnumSource( - value = ExprCoreType.class, - names = {"TIMESTAMP", "TIME", "DATE"}) - void terms_bucket_for_datetime_types_uses_long(ExprType dataType) { - assertEquals( - "{\n" - + " \"date\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"value_type\" : \"long\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("date", ref("date", dataType)))); - } - - @SneakyThrows - private String buildQuery(NamedExpression groupByExpression) { - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - ValuesSourceAggregationBuilder sourceBuilder = aggregationBuilder.build(groupByExpression); - sourceBuilder.toXContent(builder, EMPTY_PARAMS); - builder.endObject(); - return BytesReference.bytes(builder).utf8ToString(); - } -} From d318d887e66aae874efec815fce1b576af3b91aa Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 27 Nov 2025 18:28:30 +0800 Subject: [PATCH 2/4] revert changes of interval selection in DataHistogram Signed-off-by: Lantao Jin --- .../org/opensearch/sql/util/MatcherUtils.java | 2 +- .../agg_composite_date_range_push.yaml | 2 +- .../big5/composite_date_histogram_daily.yaml | 2 +- .../big5/date_histogram_hourly_agg.yaml | 2 +- .../big5/date_histogram_minute_agg.yaml | 2 +- .../calcite/chart_with_timestamp_span.yaml | 2 +- .../calcite/clickbench/q43.yaml | 2 +- .../calcite/explain_filter_with_search.yaml | 2 +- .../calcite/explain_stats_by_timespan.yaml | 2 +- .../calcite/explain_stats_by_timespan2.yaml | 2 +- .../big5/composite_date_histogram_daily.yaml | 2 +- .../ppl/big5/date_histogram_hourly_agg.yaml | 2 +- .../ppl/big5/date_histogram_minute_agg.yaml | 2 +- .../ppl/explain_stats_by_timespan.yaml | 2 +- .../ppl/explain_stats_by_timespan2.yaml | 2 +- .../dsl/CompositeAggregationBuilder.java | 29 ++++++++++++------- .../AggregationQueryBuilderTest.java | 2 +- 17 files changed, 35 insertions(+), 26 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index 65018eda0eb..bf9b214bd44 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -415,7 +415,7 @@ public static void assertJsonEquals(String expected, String actual) { * Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. * Deprecated, use {@link #assertYamlEqualsIgnoreId(String, String)} */ - @Deprecated // use assertYamlEqualsIgnoreId instead + @Deprecated public static void assertJsonEqualsIgnoreId(String expected, String actual) { assertJsonEquals(cleanUpId(expected), cleanUpId(actual)); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml index 44383b0ab04..576f93485e2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 5b6c7913f9e..10023133a38 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml index b6b0a42062f..3a6609dc2d8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($17)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index 9ab2af83445..44b15522967 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml index 052680e2aa9..4f79d775ebe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 0466a6a16dd..3bba5e1ed82 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -12,4 +12,4 @@ calcite: EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","calendar_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index f2744c79991..bd8114a7989 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml index fec4f9cf9d5..b4384528c0c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1m)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1m)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml index 5021adf62b8..c97d07c93a2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml index 0a057446400..073078ddf0d 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml @@ -18,7 +18,7 @@ root: :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ sources\":[{\"span(`@timestamp`,1d)\":{\"date_histogram\":{\"field\"\ - :\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\"\ + :\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\"\ :\"1d\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\"\ :\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ \ searchAfter=null, searchResponse=null)" diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml index d8f57661ce2..72549142297 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml @@ -9,7 +9,7 @@ root: size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"\ composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1h)\":{\"date_histogram\"\ :{\"field\":\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"\ - calendar_interval\":\"1h\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ + fixed_interval\":\"1h\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ :{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml index ec8d58454ff..be30d2a0801 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml @@ -13,7 +13,7 @@ root: boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1m)\"\ :{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"calendar_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ + order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml index 102995d5f02..3fd26cb19a6 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml @@ -9,7 +9,7 @@ root: \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\"\ :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"calendar_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ + order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml index 167328625f1..6eeeebe044f 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml @@ -9,7 +9,7 @@ root: \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\"\ :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ + order\":\"asc\",\"fixed_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java index b84128d68df..954bbc93f8e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/CompositeAggregationBuilder.java @@ -16,7 +16,6 @@ import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; -import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.aggregations.support.ValueType; @@ -106,14 +105,24 @@ public static CompositeValuesSourceBuilder buildHistogram( public static CompositeValuesSourceBuilder buildDateHistogram( String name, String field, Integer value, SpanUnit unit) { String spanValue = value + unit.getName(); - DateHistogramValuesSourceBuilder builder = - new DateHistogramValuesSourceBuilder(name).field(field); - return useCalendarInterval(spanValue) - ? builder.calendarInterval(new DateHistogramInterval(spanValue)) - : builder.fixedInterval(new DateHistogramInterval(spanValue)); - } - - private static boolean useCalendarInterval(String spanValue) { - return DateHistogramAggregationBuilder.DATE_FIELD_UNITS.containsKey(spanValue); + switch (unit) { + case MILLISECOND: + case MS: + case SECOND: + case S: + case MINUTE: + case m: + case HOUR: + case H: + case DAY: + case D: + return new DateHistogramValuesSourceBuilder(name) + .field(field) + .fixedInterval(new DateHistogramInterval(spanValue)); + default: + return new DateHistogramValuesSourceBuilder(name) + .field(field) + .calendarInterval(new DateHistogramInterval(spanValue)); + } } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java index 08472a74c8d..721b0b66a4c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java @@ -609,7 +609,7 @@ void fixed_interval_time_span() { + " \"field\" : \"timestamp\",%n" + " \"missing_bucket\" : false,%n" + " \"order\" : \"asc\",%n" - + " \"calendar_interval\" : \"1h\"%n" + + " \"fixed_interval\" : \"1h\"%n" + " }%n" + " }%n" + " } ]%n" From 1b49d496e57698097c486f4ecb0abfe608958440 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 27 Nov 2025 18:35:51 +0800 Subject: [PATCH 3/4] typo Signed-off-by: Lantao Jin --- .../expectedOutput/calcite/agg_composite_date_range_push.yaml | 2 +- .../expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml | 2 +- .../expectedOutput/calcite/chart_with_timestamp_span.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml index 576f93485e2..30e4762d325 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml index 3a6609dc2d8..a97ca073a21 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($17)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml index 4f79d775ebe..a07c92033d0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) From 30fa208bc7369d4e4b4ff2a3fba2cb8f891335d3 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 27 Nov 2025 19:05:03 +0800 Subject: [PATCH 4/4] revert one it Signed-off-by: Lantao Jin --- .../expectedOutput/calcite/explain_stats_by_timespan2.yaml | 2 +- .../expectedOutput/ppl/explain_stats_by_timespan2.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml index c97d07c93a2..5021adf62b8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml index 6eeeebe044f..167328625f1 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml @@ -9,7 +9,7 @@ root: \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\"\ :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"fixed_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ + order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file