From aad6c82bfa3158ac0ccb60ca29456ca46eecc53a Mon Sep 17 00:00:00 2001 From: Abhishek Radhakrishnan Date: Mon, 29 Apr 2024 11:31:36 -0700 Subject: [PATCH] Support for filters in the Druid Delta Lake connector (#16288) * Delta Lake support for filters. * Updates * cleanup comments * Docs * Remmove Enclosed runner * Rename * Cleanup test * Serde test for the Delta input source and fix jackson annotation. * Updates and docs. * Update error messages to be clearer * Fixes * Handle NumberFormatException to provide a nicer error message. * Apply suggestions from code review Co-authored-by: 317brian <53799971+317brian@users.noreply.github.com> * Doc fixes based on feedback * Yes -> yes in docs; reword slightly. * Update docs/ingestion/input-sources.md Co-authored-by: Laksh Singla * Update docs/ingestion/input-sources.md Co-authored-by: Laksh Singla * Documentation, javadoc and more updates. * Not with an or expression end-to-end test. * Break up =, >, >=, <, <= into its own types instead of sub-classing. --------- Co-authored-by: 317brian <53799971+317brian@users.noreply.github.com> Co-authored-by: Laksh Singla --- .../extensions-contrib/delta-lake.md | 10 +- docs/ingestion/input-sources.md | 114 ++++- .../druid-deltalake-extensions/pom.xml | 26 ++ .../druid/delta/filter/DeltaAndFilter.java | 67 +++ .../druid/delta/filter/DeltaEqualsFilter.java | 66 +++ .../druid/delta/filter/DeltaFilter.java | 54 +++ .../druid/delta/filter/DeltaFilterUtils.java | 92 ++++ .../delta/filter/DeltaGreaterThanFilter.java | 66 +++ .../DeltaGreaterThanOrEqualsFilter.java | 66 +++ .../delta/filter/DeltaLessThanFilter.java | 66 +++ .../filter/DeltaLessThanOrEqualsFilter.java | 66 +++ .../druid/delta/filter/DeltaNotFilter.java | 55 +++ .../druid/delta/filter/DeltaOrFilter.java | 67 +++ .../druid/delta/input/DeltaInputSource.java | 70 ++- .../delta/input/DeltaInputSourceReader.java | 4 +- .../delta/filter/DeltaAndFilterTest.java | 110 +++++ .../delta/filter/DeltaEqualsFilterTest.java | 103 +++++ .../filter/DeltaGreaterThanFilterTest.java | 56 +++ .../DeltaGreaterThanOrEqualsFilterTest.java | 57 +++ .../delta/filter/DeltaLessThanFilterTest.java | 56 +++ .../DeltaLessThanOrEqualsFilterTest.java | 75 +++ .../delta/filter/DeltaNotFilterTest.java | 111 +++++ .../druid/delta/filter/DeltaOrFilterTest.java | 110 +++++ .../druid/delta/input/DeltaInputRowTest.java | 58 ++- .../input/DeltaInputSourceSerdeTest.java | 142 ++++++ .../delta/input/DeltaInputSourceTest.java | 429 ++++++++++++------ .../druid/delta/input/DeltaTestUtils.java | 285 +----------- .../delta/input/NonPartitionedDeltaTable.java | 306 +++++++++++++ .../delta/input/PartitionedDeltaTable.java | 258 +++++++++++ .../druid/delta/input/RowSerdeTest.java | 23 +- .../src/test/resources/README.md | 38 +- .../src/test/resources/create_delta_table.py | 23 +- .../_delta_log/.00000000000000000000.json.crc | Bin 0 -> 44 bytes .../_delta_log/.00000000000000000001.json.crc | Bin 0 -> 40 bytes .../_delta_log/.00000000000000000002.json.crc | Bin 0 -> 40 bytes .../_delta_log/00000000000000000000.json | 8 + .../_delta_log/00000000000000000001.json | 6 + .../_delta_log/00000000000000000002.json | 6 + ...-9c4e-d5fa896dcefe.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...-936e-34132f176203.c000.snappy.parquet.crc | Bin 0 -> 24 bytes ...-ac1a-b8decc09e05a.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...4065-9c4e-d5fa896dcefe.c000.snappy.parquet | Bin 0 -> 2049 bytes ...4378-936e-34132f176203.c000.snappy.parquet | Bin 0 -> 2034 bytes ...47ef-ac1a-b8decc09e05a.c000.snappy.parquet | Bin 0 -> 2187 bytes ...-8962-03543375d133.c000.snappy.parquet.crc | Bin 0 -> 24 bytes ...-97b3-faa0f302984a.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...-91ec-9f398300c083.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...4b24-8962-03543375d133.c000.snappy.parquet | Bin 0 -> 2033 bytes ...4794-97b3-faa0f302984a.c000.snappy.parquet | Bin 0 -> 2187 bytes ...4b70-91ec-9f398300c083.c000.snappy.parquet | Bin 0 -> 2186 bytes ...-8ecf-03add4ee14b8.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...-9eac-c09a3802cd78.c000.snappy.parquet.crc | Bin 0 -> 24 bytes ...-90d3-96f00b1b2e22.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...407e-8ecf-03add4ee14b8.c000.snappy.parquet | Bin 0 -> 2187 bytes ...47a3-9eac-c09a3802cd78.c000.snappy.parquet | Bin 0 -> 2034 bytes ...4a50-90d3-96f00b1b2e22.c000.snappy.parquet | Bin 0 -> 2049 bytes ...-b21c-84d385a8218a.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...-8dd2-1ed5e561f801.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...-8340-58cb6d797963.c000.snappy.parquet.crc | Bin 0 -> 24 bytes ...4e92-b21c-84d385a8218a.c000.snappy.parquet | Bin 0 -> 2049 bytes ...4bd1-8dd2-1ed5e561f801.c000.snappy.parquet | Bin 0 -> 2049 bytes ...4c31-8340-58cb6d797963.c000.snappy.parquet | Bin 0 -> 2033 bytes ...-a816-c73420234b25.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...-89ee-195b76453643.c000.snappy.parquet.crc | Bin 0 -> 24 bytes ...-9283-f2bff0c0dfad.c000.snappy.parquet.crc | Bin 0 -> 28 bytes ...434f-a816-c73420234b25.c000.snappy.parquet | Bin 0 -> 2049 bytes ...4d68-89ee-195b76453643.c000.snappy.parquet | Bin 0 -> 2034 bytes ...4a37-9283-f2bff0c0dfad.c000.snappy.parquet | Bin 0 -> 2187 bytes .../src/test/resources/requirements.txt | 3 +- 69 files changed, 2670 insertions(+), 482 deletions(-) create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaAndFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaEqualsFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilterUtils.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaNotFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaOrFilter.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaAndFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaEqualsFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaNotFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaOrFilterTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceSerdeTest.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/NonPartitionedDeltaTable.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/PartitionedDeltaTable.java create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000000.json.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000001.json.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000002.json.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000000.json create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000001.json create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000002.json create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-07285317-1943-4b24-8962-03543375d133.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-07285317-1943-4b24-8962-03543375d133.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-ac0ede62-3abc-47a3-9eac-c09a3802cd78.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-ac0ede62-3abc-47a3-9eac-c09a3802cd78.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-45c2fd36-d1e1-4e92-b21c-84d385a8218a.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-45c2fd36-d1e1-4e92-b21c-84d385a8218a.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-079ed08f-dd8d-434f-a816-c73420234b25.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-b8de3a44-b0e9-4d68-89ee-195b76453643.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-f87803c3-6cfd-4a37-9283-f2bff0c0dfad.c000.snappy.parquet.crc create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/part-00009-079ed08f-dd8d-434f-a816-c73420234b25.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/part-00009-b8de3a44-b0e9-4d68-89ee-195b76453643.c000.snappy.parquet create mode 100644 extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/part-00009-f87803c3-6cfd-4a37-9283-f2bff0c0dfad.c000.snappy.parquet diff --git a/docs/development/extensions-contrib/delta-lake.md b/docs/development/extensions-contrib/delta-lake.md index bab07ee3dbbe..503fbfdc55df 100644 --- a/docs/development/extensions-contrib/delta-lake.md +++ b/docs/development/extensions-contrib/delta-lake.md @@ -22,16 +22,14 @@ title: "Delta Lake extension" ~ under the License. --> -## Delta Lake extension - Delta Lake is an open source storage framework that enables building a Lakehouse architecture with various compute engines. [DeltaLakeInputSource](../../ingestion/input-sources.md#delta-lake-input-source) lets you ingest data stored in a Delta Lake table into Apache Druid. To use the Delta Lake extension, add the `druid-deltalake-extensions` to the list of loaded extensions. See [Loading extensions](../../configuration/extensions.md#loading-extensions) for more information. -The Delta input source reads the configured Delta Lake table and extracts all the underlying delta files in the table's latest snapshot. -These Delta Lake files are versioned Parquet files. +The Delta input source reads the configured Delta Lake table and extracts the underlying Delta files in the table's latest snapshot +based on an optional Delta filter. These Delta Lake files are versioned Parquet files. ## Version support @@ -57,5 +55,5 @@ See [Loading community extensions](../../configuration/extensions.md#loading-com ## Known limitations -- This extension relies on the Delta Kernel API and can only read from the latest Delta table snapshot. -- Column filtering isn't supported. The extension reads all columns in the configured table. \ No newline at end of file +This extension relies on the Delta Kernel API and can only read from the latest Delta table snapshot. Ability to read from +arbitrary snapshots is tracked [here](https://github.com/delta-io/delta/issues/2581). \ No newline at end of file diff --git a/docs/ingestion/input-sources.md b/docs/ingestion/input-sources.md index ee5c42cb2e59..f01bb26096cd 100644 --- a/docs/ingestion/input-sources.md +++ b/docs/ingestion/input-sources.md @@ -1141,7 +1141,86 @@ To use the Delta Lake input source, load the extension [`druid-deltalake-extensi You can use the Delta input source to read data stored in a Delta Lake table. For a given table, the input source scans the latest snapshot from the configured table. Druid ingests the underlying delta files from the table. -The following is a sample spec: + | Property|Description|Required| +|---------|-----------|--------| +| type|Set this value to `delta`.|yes| +| tablePath|The location of the Delta table.|yes| +| filter|The JSON Object that filters data files within a snapshot.|no| + +### Delta filter object + +You can use these filters to filter out data files from a snapshot, reducing the number of files Druid has to ingest from +a Delta table. This input source provides the following filters: `and`, `or`, `not`, `=`, `>`, `>=`, `<`, `<=`. + +When a filter is applied on non-partitioned columns, the filtering is best-effort as the Delta Kernel solely relies +on statistics collected when the non-partitioned table is created. In this scenario, this Druid connector may ingest +data that doesn't match the filter. To guarantee that the Delta Kernel prunes out unnecessary column values, only use +filters on partitioned columns. + + +`and` filter: + +| Property | Description | Required | +|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| type | Set this value to `and`. | yes | +| filters | List of Delta filter predicates that get evaluated using logical AND where both conditions need to be true. `and` filter requires two filter predicates. | yes | + +`or` filter: + +| Property | Description | Required | +|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| type | Set this value to `or`. | yes | +| filters | List of Delta filter predicates that get evaluated using logical OR where only one condition needs to be true. `or` filter requires two filter predicates. | yes | + +`not` filter: + +| Property | Description | Required | +|----------|---------------------------------------------------------------------------------------------------------------|----------| +| type | Set this value to `not`. | yes | +| filter | The Delta filter predicate that gets evaluated using logical NOT. `not` filter requires one filter predicate. | yes | + +`=` filter: + +| Property | Description | Required | +|----------|------------------------------------------|----------| +| type | Set this value to `=`. | yes | +| column | The table column to apply the filter on. | yes | +| value | The value to use in the filter. | yes | + +`>` filter: + +| Property | Description | Required | +|----------|------------------------------------------|----------| +| type | Set this value to `>`. | yes | +| column | The table column to apply the filter on. | yes | +| value | The value to use in the filter. | yes | + +`>=` filter: + +| Property | Description | Required | +|----------|------------------------------------------|----------| +| type | Set this value to `>=`. | yes | +| column | The table column to apply the filter on. | yes | +| value | The value to use in the filter. | yes | + +`<` filter: + +| Property | Description | Required | +|----------|------------------------------------------|----------| +| type | Set this value to `<`. | Yes | +| column | The table column to apply the filter on. | Yes | +| value | The value to use in the filter. | Yes | + +`<=` filter: + +| Property | Description | Required | +|----------|------------------------------------------|----------| +| type | Set this value to `<=`. | yes | +| column | The table column to apply the filter on. | yes | +| value | The value to use in the filter. | yes | + + +The following is a sample spec to read all records from the Delta table `/delta-table/foo`: ```json ... @@ -1149,14 +1228,35 @@ The following is a sample spec: "type": "index_parallel", "inputSource": { "type": "delta", - "tablePath": "/delta-table/directory" + "tablePath": "/delta-table/foo" }, } -} ``` -| Property|Description|Required| -|---------|-----------|--------| -| type|Set this value to `delta`.|yes| -| tablePath|The location of the Delta table.|yes| +The following is a sample spec to read records from the Delta table `/delta-table/foo` to select records where `name = 'Employee4' and age >= 30`: +```json +... + "ioConfig": { + "type": "index_parallel", + "inputSource": { + "type": "delta", + "tablePath": "/delta-table/foo", + "filter": { + "type": "and", + "filters": [ + { + "type": "=", + "column": "name", + "value": "Employee4" + }, + { + "type": ">=", + "column": "age", + "value": "30" + } + ] + } + }, + } +``` diff --git a/extensions-contrib/druid-deltalake-extensions/pom.xml b/extensions-contrib/druid-deltalake-extensions/pom.xml index 6f0f9a3ddc01..9aa7821be78e 100644 --- a/extensions-contrib/druid-deltalake-extensions/pom.xml +++ b/extensions-contrib/druid-deltalake-extensions/pom.xml @@ -118,11 +118,37 @@ + junit junit test + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.jupiter + junit-jupiter-migrationsupport + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.vintage + junit-vintage-engine + test + org.apache.druid druid-processing diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaAndFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaAndFilter.java new file mode 100644 index 000000000000..218370917b40 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaAndFilter.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.delta.kernel.expressions.And; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +import java.util.List; + +/** + * Druid {@link DeltaFilter} that maps to a canonical {@link And} predicate. + * @implNote currently this filter only allows 2 filter predicates. However, this can be relaxed by recursively + * flattening the filters to allow complex expressions. + */ +public class DeltaAndFilter implements DeltaFilter +{ + @JsonProperty + private final List filters; + + @JsonCreator + public DeltaAndFilter(@JsonProperty("filters") final List filters) + { + if (filters == null) { + throw InvalidInput.exception( + "Delta and filter requires 2 filter predicates and must be non-empty. None provided." + ); + } + if (filters.size() != 2) { + throw InvalidInput.exception( + "Delta and filter requires 2 filter predicates, but provided [%d].", + filters.size() + ); + } + this.filters = filters; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + // This is simple for now. We can do a recursive flatten. + return new And( + filters.get(0).getFilterPredicate(snapshotSchema), + filters.get(1).getFilterPredicate(snapshotSchema) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaEqualsFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaEqualsFilter.java new file mode 100644 index 000000000000..39d22e98176c --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaEqualsFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +/** + * Druid {@link DeltaFilter} that maps to a Delta predicate of type = for the supplied column and value. + */ +public class DeltaEqualsFilter implements DeltaFilter +{ + @JsonProperty + private final String column; + @JsonProperty + private final String value; + + @JsonCreator + public DeltaEqualsFilter(@JsonProperty("column") final String column, @JsonProperty("value") final String value) + { + if (column == null) { + throw InvalidInput.exception("column is a required field for = filter."); + } + if (value == null) { + throw InvalidInput.exception( + "value is a required field for = filter. None provided for column[%s].", column + ); + } + this.column = column; + this.value = value; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + return new Predicate( + "=", + ImmutableList.of( + new Column(column), + DeltaFilterUtils.dataTypeToLiteral(snapshotSchema, column, value) + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilter.java new file mode 100644 index 000000000000..4d1dcb4258b4 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilter.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; + +/** + * Druid filters that translate to the underlying Delta Kernel {@link Predicate}s. Implementations should + * provide an expression tree syntax to provide more flexibility to users. + * + *

+ * A user-facing Druid {@link DeltaFilter} should be translated to a canonical Delta Kernel {@link Predicate}. + * Implementations should provide this one-to-one translation. + *

+ */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "=", value = DeltaEqualsFilter.class), + @JsonSubTypes.Type(name = ">", value = DeltaGreaterThanFilter.class), + @JsonSubTypes.Type(name = ">=", value = DeltaGreaterThanOrEqualsFilter.class), + @JsonSubTypes.Type(name = "<", value = DeltaLessThanFilter.class), + @JsonSubTypes.Type(name = "<=", value = DeltaLessThanOrEqualsFilter.class), + @JsonSubTypes.Type(name = "and", value = DeltaAndFilter.class), + @JsonSubTypes.Type(name = "or", value = DeltaOrFilter.class), + @JsonSubTypes.Type(name = "not", value = DeltaNotFilter.class), +}) +public interface DeltaFilter +{ + /** + * Return a Delta predicate expression. The {@code snapshotSchema} should be used to perform any validations + * and derive sub-expressions to be used in the resulting {@link Predicate}. + */ + Predicate getFilterPredicate(StructType snapshotSchema); +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilterUtils.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilterUtils.java new file mode 100644 index 000000000000..dd6c6e27d1e5 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaFilterUtils.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Literal; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +import java.sql.Date; +import java.time.LocalDate; +import java.time.temporal.ChronoUnit; + +public class DeltaFilterUtils +{ + /** + * @return a Delta typed literal with the type of value inferred from the snapshot schema. The column must + * be present in the supplied snapshot schema. + */ + static Literal dataTypeToLiteral( + final StructType snapshotSchema, + final String column, + final String value + ) + { + if (!snapshotSchema.fieldNames().contains(column)) { + throw InvalidInput.exception( + "column[%s] doesn't exist in schema[%s]", column, snapshotSchema + ); + } + + final StructField structField = snapshotSchema.get(column); + final DataType dataType = structField.getDataType(); + try { + if (dataType instanceof StringType) { + return Literal.ofString(value); + } else if (dataType instanceof IntegerType) { + return Literal.ofInt(Integer.parseInt(value)); + } else if (dataType instanceof ShortType) { + return Literal.ofShort(Short.parseShort(value)); + } else if (dataType instanceof LongType) { + return Literal.ofLong(Long.parseLong(value)); + } else if (dataType instanceof FloatType) { + return Literal.ofFloat(Float.parseFloat(value)); + } else if (dataType instanceof DoubleType) { + return Literal.ofDouble(Double.parseDouble(value)); + } else if (dataType instanceof DateType) { + final Date dataVal = Date.valueOf(value); + final int daysSinceEpoch = (int) ChronoUnit.DAYS.between( + LocalDate.ofEpochDay(0), dataVal.toLocalDate() + ); + return Literal.ofDate(daysSinceEpoch); + } else { + throw InvalidInput.exception( + "Unsupported data type[%s] for column[%s] with value[%s].", + dataType, column, value + ); + } + } + catch (NumberFormatException e) { + throw InvalidInput.exception( + "column[%s] has an invalid value[%s]. The value must be a number, as the column's data type is [%s].", + column, value, dataType + ); + } + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanFilter.java new file mode 100644 index 000000000000..c57f626c3520 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +/** + * Druid {@link DeltaFilter} that maps to a Delta predicate of type > for the supplied column and value. + */ +public class DeltaGreaterThanFilter implements DeltaFilter +{ + @JsonProperty + private final String column; + @JsonProperty + private final String value; + + @JsonCreator + public DeltaGreaterThanFilter(@JsonProperty("column") final String column, @JsonProperty("value") final String value) + { + if (column == null) { + throw InvalidInput.exception("column is a required field for > filter."); + } + if (value == null) { + throw InvalidInput.exception( + "value is a required field for > filter. None provided for column[%s].", column + ); + } + this.column = column; + this.value = value; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + return new Predicate( + ">", + ImmutableList.of( + new Column(column), + DeltaFilterUtils.dataTypeToLiteral(snapshotSchema, column, value) + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilter.java new file mode 100644 index 000000000000..89d3394ed787 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +/** + * Druid {@link DeltaFilter} that maps to a Delta predicate of type >= for the supplied column and value. + */ +public class DeltaGreaterThanOrEqualsFilter implements DeltaFilter +{ + @JsonProperty + private final String column; + @JsonProperty + private final String value; + + @JsonCreator + public DeltaGreaterThanOrEqualsFilter(@JsonProperty("column") final String column, @JsonProperty("value") final String value) + { + if (column == null) { + throw InvalidInput.exception("column is a required field for >= filter."); + } + if (value == null) { + throw InvalidInput.exception( + "value is a required field for >= filter. None provided for column[%s].", column + ); + } + this.column = column; + this.value = value; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + return new Predicate( + ">=", + ImmutableList.of( + new Column(column), + DeltaFilterUtils.dataTypeToLiteral(snapshotSchema, column, value) + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanFilter.java new file mode 100644 index 000000000000..b98c93f3a8bf --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +/** + * Druid {@link DeltaFilter} that maps to a Delta predicate of type < for the supplied column and value. + */ +public class DeltaLessThanFilter implements DeltaFilter +{ + @JsonProperty + private final String column; + @JsonProperty + private final String value; + + @JsonCreator + public DeltaLessThanFilter(@JsonProperty("column") final String column, @JsonProperty("value") final String value) + { + if (column == null) { + throw InvalidInput.exception("column is a required field for < filter."); + } + if (value == null) { + throw InvalidInput.exception( + "value is a required field for < filter. None provided for column[%s].", column + ); + } + this.column = column; + this.value = value; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + return new Predicate( + "<", + ImmutableList.of( + new Column(column), + DeltaFilterUtils.dataTypeToLiteral(snapshotSchema, column, value) + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilter.java new file mode 100644 index 000000000000..84fb12f88901 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +/** + * Druid {@link DeltaFilter} that maps to a Delta predicate of type <= for the supplied column and value. + */ +public class DeltaLessThanOrEqualsFilter implements DeltaFilter +{ + @JsonProperty + private final String column; + @JsonProperty + private final String value; + + @JsonCreator + public DeltaLessThanOrEqualsFilter(@JsonProperty("column") final String column, @JsonProperty("value") final String value) + { + if (column == null) { + throw InvalidInput.exception("column is a required field for <= filter."); + } + if (value == null) { + throw InvalidInput.exception( + "value is a required field for <= filter. None provided for column[%s].", column + ); + } + this.column = column; + this.value = value; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + return new Predicate( + "<=", + ImmutableList.of( + new Column(column), + DeltaFilterUtils.dataTypeToLiteral(snapshotSchema, column, value) + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaNotFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaNotFilter.java new file mode 100644 index 000000000000..86978bf641fd --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaNotFilter.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +/** + * Druid {@link DeltaFilter} that maps to a canonical NOT {@link Predicate}. + */ +public class DeltaNotFilter implements DeltaFilter +{ + @JsonProperty + private final DeltaFilter filter; + + @JsonCreator + public DeltaNotFilter(@JsonProperty("filter") final DeltaFilter filter) + { + if (filter == null) { + throw InvalidInput.exception( + "Delta not filter requiers 1 filter predicate and must be non-empty. None provided." + ); + } + this.filter = filter; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + return new Predicate( + "NOT", + filter.getFilterPredicate(snapshotSchema) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaOrFilter.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaOrFilter.java new file mode 100644 index 000000000000..b9e2501df26c --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/filter/DeltaOrFilter.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import io.delta.kernel.expressions.Or; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.InvalidInput; + +import java.util.List; + +/** + * Druid {@link DeltaFilter} that maps to a canonical {@link Or} predicate. + * @implNote currently this filter only allows 2 filter predicates. However, this can be relaxed by recursively + * flattening the filters to allow complex expressions. + */ +public class DeltaOrFilter implements DeltaFilter +{ + @JsonProperty + private final List filters; + + @JsonCreator + public DeltaOrFilter(@JsonProperty("filters") final List filters) + { + if (filters == null) { + throw InvalidInput.exception( + "Delta or filter requires 2 filter predicates and must be non-empty. None provided." + ); + } + if (filters.size() != 2) { + throw InvalidInput.exception( + "Delta or filter requires 2 filter predicates, but provided [%d].", + filters.size() + ); + } + this.filters = filters; + } + + @Override + public Predicate getFilterPredicate(StructType snapshotSchema) + { + // This is simple for now. We can do a recursive flatten. + return new Or( + filters.get(0).getFilterPredicate(snapshotSchema), + filters.get(1).getFilterPredicate(snapshotSchema) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSource.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSource.java index cbce419291fe..56ccd2a41ae9 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSource.java +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSource.java @@ -21,9 +21,11 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterators; import com.google.common.primitives.Ints; import io.delta.kernel.Scan; +import io.delta.kernel.ScanBuilder; import io.delta.kernel.Snapshot; import io.delta.kernel.Table; import io.delta.kernel.TableNotFoundException; @@ -32,6 +34,7 @@ import io.delta.kernel.data.FilteredColumnarBatch; import io.delta.kernel.data.Row; import io.delta.kernel.defaults.client.DefaultTableClient; +import io.delta.kernel.expressions.Predicate; import io.delta.kernel.internal.InternalScanFileUtils; import io.delta.kernel.internal.data.ScanStateRow; import io.delta.kernel.internal.util.Utils; @@ -47,6 +50,7 @@ import org.apache.druid.data.input.InputSplit; import org.apache.druid.data.input.SplitHintSpec; import org.apache.druid.data.input.impl.SplittableInputSource; +import org.apache.druid.delta.filter.DeltaFilter; import org.apache.druid.error.InvalidInput; import org.apache.druid.utils.Streams; import org.apache.hadoop.conf.Configuration; @@ -62,11 +66,20 @@ import java.util.stream.Stream; /** - * Input source to ingest data from a Delta Lake. - * This input source reads the latest snapshot from a Delta table specified by {@code tablePath} parameter. + * Input source to ingest data from a Delta Lake. This input source reads the latest snapshot from a Delta table + * specified by {@code tablePath} parameter. If {@code filter} is specified, it's used at the Kernel level + * for data pruning. The filtering behavior is as follows: + *
    + *
  • When a filter is applied on a partitioned table using the partitioning columns, the filtering is guaranteed.
  • + *
  • When a filter is applied on non-partitioned columns, the filtering is best-effort as the Delta + * Kernel solely relies on statistics collected when the non-partitioned table is created. In this scenario, this input + * source connector may ingest data that doesn't match the filter.
  • + *
+ *

* We leverage the Delta Kernel APIs to interact with a Delta table. The Kernel API abstracts away the * complexities of the Delta protocol itself. * Note: currently, the Kernel table API only supports reading from the latest snapshot. + *

*/ public class DeltaInputSource implements SplittableInputSource { @@ -79,10 +92,15 @@ public class DeltaInputSource implements SplittableInputSource @Nullable private final DeltaSplit deltaSplit; + @JsonProperty + @Nullable + private final DeltaFilter filter; + @JsonCreator public DeltaInputSource( - @JsonProperty("tablePath") String tablePath, - @JsonProperty("deltaSplit") @Nullable DeltaSplit deltaSplit + @JsonProperty("tablePath") final String tablePath, + @JsonProperty("deltaSplit") @Nullable final DeltaSplit deltaSplit, + @JsonProperty("filter") @Nullable final DeltaFilter filter ) { if (tablePath == null) { @@ -90,6 +108,7 @@ public DeltaInputSource( } this.tablePath = tablePath; this.deltaSplit = deltaSplit; + this.filter = filter; } @Override @@ -127,17 +146,23 @@ public InputSourceReader reader( for (String file : deltaSplit.getFiles()) { final Row scanFile = deserialize(tableClient, file); scanFileDataIters.add( - getTransformedDataIterator(tableClient, scanState, scanFile, physicalReadSchema) + getTransformedDataIterator(tableClient, scanState, scanFile, physicalReadSchema, Optional.empty()) ); } } else { final Table table = Table.forPath(tableClient, tablePath); final Snapshot latestSnapshot = table.getLatestSnapshot(tableClient); + final StructType fullSnapshotSchema = latestSnapshot.getSchema(tableClient); final StructType prunedSchema = pruneSchema( - latestSnapshot.getSchema(tableClient), + fullSnapshotSchema, inputRowSchema.getColumnsFilter() ); - final Scan scan = latestSnapshot.getScanBuilder(tableClient).withReadSchema(tableClient, prunedSchema).build(); + + final ScanBuilder scanBuilder = latestSnapshot.getScanBuilder(tableClient); + if (filter != null) { + scanBuilder.withFilter(tableClient, filter.getFilterPredicate(fullSnapshotSchema)); + } + final Scan scan = scanBuilder.withReadSchema(tableClient, prunedSchema).build(); final CloseableIterator scanFilesIter = scan.getScanFiles(tableClient); final Row scanState = scan.getScanState(tableClient); @@ -151,7 +176,7 @@ public InputSourceReader reader( while (scanFileRows.hasNext()) { final Row scanFile = scanFileRows.next(); scanFileDataIters.add( - getTransformedDataIterator(tableClient, scanState, scanFile, physicalReadSchema) + getTransformedDataIterator(tableClient, scanState, scanFile, physicalReadSchema, scan.getRemainingFilter()) ); } } @@ -187,7 +212,13 @@ public Stream> createSplits(InputFormat inputFormat, @Nul catch (TableNotFoundException e) { throw InvalidInput.exception(e, "tablePath[%s] not found.", tablePath); } - final Scan scan = latestSnapshot.getScanBuilder(tableClient).build(); + final StructType fullSnapshotSchema = latestSnapshot.getSchema(tableClient); + + final ScanBuilder scanBuilder = latestSnapshot.getScanBuilder(tableClient); + if (filter != null) { + scanBuilder.withFilter(tableClient, filter.getFilterPredicate(fullSnapshotSchema)); + } + final Scan scan = scanBuilder.withReadSchema(tableClient, fullSnapshotSchema).build(); // scan files iterator for the current snapshot final CloseableIterator scanFilesIterator = scan.getScanFiles(tableClient); @@ -220,7 +251,8 @@ public InputSource withSplit(InputSplit split) { return new DeltaInputSource( tablePath, - split.get() + split.get(), + filter ); } @@ -279,7 +311,8 @@ private CloseableIterator getTransformedDataIterator( final TableClient tableClient, final Row scanState, final Row scanFile, - final StructType physicalReadSchema + final StructType physicalReadSchema, + final Optional optionalPredicate ) throws IOException { final FileStatus fileStatus = InternalScanFileUtils.getAddFileStatus(scanFile); @@ -287,8 +320,9 @@ private CloseableIterator getTransformedDataIterator( final CloseableIterator physicalDataIter = tableClient.getParquetHandler().readParquetFiles( Utils.singletonCloseableIterator(fileStatus), physicalReadSchema, - Optional.empty() + optionalPredicate ); + return Scan.transformPhysicalData( tableClient, scanState, @@ -296,4 +330,16 @@ private CloseableIterator getTransformedDataIterator( physicalDataIter ); } + + @VisibleForTesting + String getTablePath() + { + return tablePath; + } + + @VisibleForTesting + DeltaFilter getFilter() + { + return filter; + } } diff --git a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSourceReader.java b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSourceReader.java index 02421997f432..9ac7c253ef35 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSourceReader.java +++ b/extensions-contrib/druid-deltalake-extensions/src/main/java/org/apache/druid/delta/input/DeltaInputSourceReader.java @@ -45,7 +45,6 @@ public class DeltaInputSourceReader implements InputSourceReader public DeltaInputSourceReader( Iterator> filteredColumnarBatchIterators, InputRowSchema inputRowSchema - ) { this.filteredColumnarBatchIterators = filteredColumnarBatchIterators; @@ -120,7 +119,8 @@ public boolean hasNext() filteredColumnarBatchIterators.next(); while (filteredBatchIterator.hasNext()) { - currentBatch = filteredBatchIterator.next().getRows(); + final FilteredColumnarBatch nextBatch = filteredBatchIterator.next(); + currentBatch = nextBatch.getRows(); if (currentBatch.hasNext()) { return true; } diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaAndFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaAndFilterTest.java new file mode 100644 index 000000000000..946a832f0780 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaAndFilterTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.And; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.StringUtils; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +public class DeltaAndFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("name", StringType.STRING, true)) + .add(new StructField("age", LongType.LONG, false)) + .add(new StructField("bar", StringType.STRING, true)); + + @Test + public void testAndFilter() + { + DeltaAndFilter andFilter = new DeltaAndFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee1"), + new DeltaGreaterThanOrEqualsFilter("age", "8") + ) + ); + + Predicate predicate = andFilter.getFilterPredicate(SCHEMA); + + Assert.assertTrue(predicate instanceof And); + Assert.assertEquals(2, predicate.getChildren().size()); + } + + @Test + public void testAndFilterWithInvalidColumn() + { + DeltaAndFilter andFilter = new DeltaAndFilter( + Arrays.asList( + new DeltaEqualsFilter("name2", "Employee1"), + new DeltaGreaterThanOrEqualsFilter("age", "8") + ) + ); + + MatcherAssert.assertThat( + Assert.assertThrows(DruidException.class, () -> andFilter.getFilterPredicate(SCHEMA)), + DruidExceptionMatcher.invalidInput().expectMessageIs( + StringUtils.format("column[name2] doesn't exist in schema[%s]", SCHEMA) + ) + ); + } + + @Test + public void testAndFilterWithNoFilterPredicates() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaAndFilter(null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "Delta and filter requires 2 filter predicates and must be non-empty. None provided." + ) + ); + } + + @Test + public void testAndFilterWithOneFilterPredicate() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaAndFilter( + Collections.singletonList( + new DeltaEqualsFilter("name", "Employee1") + ) + ) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "Delta and filter requires 2 filter predicates, but provided [1]." + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaEqualsFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaEqualsFilterTest.java new file mode 100644 index 000000000000..26761b73da5f --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaEqualsFilterTest.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; + +public class DeltaEqualsFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("str_col", StringType.STRING, true)) + .add(new StructField("int_col", IntegerType.INTEGER, true)) + .add(new StructField("short_col", ShortType.SHORT, true)) + .add(new StructField("long_col", LongType.LONG, false)) + .add(new StructField("float_col", FloatType.FLOAT, true)) + .add(new StructField("double_col", DoubleType.DOUBLE, true)) + .add(new StructField("date_col", DateType.DATE, true)); + + @Test + public void testEqualsFilter() + { + DeltaEqualsFilter eqFilter = new DeltaEqualsFilter("str_col", "Employee1"); + + Predicate predicate = eqFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals("=", predicate.getName()); + Assert.assertEquals(2, predicate.getChildren().size()); + } + + @Test + public void testFilterWithNullColumn() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaEqualsFilter(null, "Employee1") + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "column is a required field for = filter." + ) + ); + } + + @Test + public void testFilterWithNullValue() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaEqualsFilter("str_col", null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "value is a required field for = filter. None provided for column[str_col]." + ) + ); + } + + @Test + public void testFilterWithInvalidNumericValue() + { + DeltaEqualsFilter eqFilter = new DeltaEqualsFilter("long_col", "twentyOne"); + + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eqFilter.getFilterPredicate(SCHEMA) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "column[long_col] has an invalid value[twentyOne]. The value must be a number, as the column's data type is [long]." + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanFilterTest.java new file mode 100644 index 000000000000..53bf42a7e71a --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanFilterTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.junit.Assert; +import org.junit.Test; + +public class DeltaGreaterThanFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("str_col", StringType.STRING, true)) + .add(new StructField("int_col", IntegerType.INTEGER, true)) + .add(new StructField("short_col", ShortType.SHORT, true)) + .add(new StructField("long_col", LongType.LONG, false)) + .add(new StructField("float_col", FloatType.FLOAT, true)) + .add(new StructField("double_col", DoubleType.DOUBLE, true)) + .add(new StructField("date_col", DateType.DATE, true)); + + @Test + public void testGreaterThanFilter() + { + DeltaGreaterThanFilter gtFilter = new DeltaGreaterThanFilter("int_col", "123"); + + Predicate predicate = gtFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals(">", predicate.getName()); + Assert.assertEquals(2, predicate.getChildren().size()); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilterTest.java new file mode 100644 index 000000000000..cf9fdf9c8bc7 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaGreaterThanOrEqualsFilterTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.junit.Assert; +import org.junit.Test; + +public class DeltaGreaterThanOrEqualsFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("str_col", StringType.STRING, true)) + .add(new StructField("int_col", IntegerType.INTEGER, true)) + .add(new StructField("short_col", ShortType.SHORT, true)) + .add(new StructField("long_col", LongType.LONG, false)) + .add(new StructField("float_col", FloatType.FLOAT, true)) + .add(new StructField("double_col", DoubleType.DOUBLE, true)) + .add(new StructField("date_col", DateType.DATE, true)); + + + @Test + public void testGreaterThanOrEqualsFilter() + { + DeltaGreaterThanOrEqualsFilter gteFilter = new DeltaGreaterThanOrEqualsFilter("long_col", "1234343232323"); + + Predicate predicate = gteFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals(">=", predicate.getName()); + Assert.assertEquals(2, predicate.getChildren().size()); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanFilterTest.java new file mode 100644 index 000000000000..2d716214e61a --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanFilterTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.junit.Assert; +import org.junit.Test; + +public class DeltaLessThanFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("str_col", StringType.STRING, true)) + .add(new StructField("int_col", IntegerType.INTEGER, true)) + .add(new StructField("short_col", ShortType.SHORT, true)) + .add(new StructField("long_col", LongType.LONG, false)) + .add(new StructField("float_col", FloatType.FLOAT, true)) + .add(new StructField("double_col", DoubleType.DOUBLE, true)) + .add(new StructField("date_col", DateType.DATE, true)); + + @Test + public void testLessThanFilter() + { + DeltaLessThanFilter ltFilter = new DeltaLessThanFilter("double_col", "123.2323"); + + Predicate predicate = ltFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals("<", predicate.getName()); + Assert.assertEquals(2, predicate.getChildren().size()); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilterTest.java new file mode 100644 index 000000000000..a526c57e3893 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaLessThanOrEqualsFilterTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; + +public class DeltaLessThanOrEqualsFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("str_col", StringType.STRING, true)) + .add(new StructField("int_col", IntegerType.INTEGER, true)) + .add(new StructField("short_col", ShortType.SHORT, true)) + .add(new StructField("long_col", LongType.LONG, false)) + .add(new StructField("float_col", FloatType.FLOAT, true)) + .add(new StructField("double_col", DoubleType.DOUBLE, true)) + .add(new StructField("date_col", DateType.DATE, true)); + + @Test + public void testLessThanOrEqualsFilter() + { + DeltaLessThanOrEqualsFilter lteFilter = new DeltaLessThanOrEqualsFilter("date_col", "2024-01-01"); + + Predicate predicate = lteFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals("<=", predicate.getName()); + Assert.assertEquals(2, predicate.getChildren().size()); + } + + @Test + public void testFilterWithInvalidNumericValue() + { + DeltaLessThanOrEqualsFilter lteFilter = new DeltaLessThanOrEqualsFilter("long_col", "twentyOne"); + + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> lteFilter.getFilterPredicate(SCHEMA) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "column[long_col] has an invalid value[twentyOne]. The value must be a number, as the column's data type is [long]." + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaNotFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaNotFilterTest.java new file mode 100644 index 000000000000..dae3d37d2522 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaNotFilterTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.StringUtils; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class DeltaNotFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("name", StringType.STRING, true)) + .add(new StructField("age", LongType.LONG, false)) + .add(new StructField("bar", StringType.STRING, true)); + + @Test + public void testNotFilterWithEqualsExpression() + { + DeltaEqualsFilter equalsFilter = new DeltaEqualsFilter( + "name", + "Employee1" + ); + DeltaNotFilter notFilter = new DeltaNotFilter(equalsFilter); + + Predicate predicate = notFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals(predicate.getName(), "NOT"); + Assert.assertEquals(1, predicate.getChildren().size()); + } + + @Test + public void testNotFilterWithAndExpression() + { + DeltaAndFilter andFilter = new DeltaAndFilter( + Arrays.asList( + new DeltaEqualsFilter( + "name", + "Employee1" + ), + new DeltaEqualsFilter( + "name", + "Employee2" + ) + ) + ); + DeltaNotFilter notFilter = new DeltaNotFilter(andFilter); + + Predicate predicate = notFilter.getFilterPredicate(SCHEMA); + + Assert.assertEquals(predicate.getName(), "NOT"); + Assert.assertEquals(1, predicate.getChildren().size()); + } + + @Test + public void testNotFilterWithInvalidColumn() + { + DeltaEqualsFilter equalsFilter = new DeltaEqualsFilter( + "name2", + "Employee1" + ); + DeltaNotFilter notFilter = new DeltaNotFilter(equalsFilter); + + MatcherAssert.assertThat( + Assert.assertThrows(DruidException.class, () -> notFilter.getFilterPredicate(SCHEMA)), + DruidExceptionMatcher.invalidInput().expectMessageIs( + StringUtils.format("column[name2] doesn't exist in schema[%s]", SCHEMA) + ) + ); + } + + @Test + public void testNotFilterWithNoFilterPredicates() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaNotFilter(null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "Delta not filter requiers 1 filter predicate and must be non-empty. None provided." + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaOrFilterTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaOrFilterTest.java new file mode 100644 index 000000000000..402c679dcd63 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/filter/DeltaOrFilterTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.filter; + +import io.delta.kernel.expressions.Or; +import io.delta.kernel.expressions.Predicate; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.StringUtils; +import org.hamcrest.MatcherAssert; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +public class DeltaOrFilterTest +{ + private static final StructType SCHEMA = new StructType() + .add(new StructField("name", StringType.STRING, true)) + .add(new StructField("age", LongType.LONG, false)) + .add(new StructField("bar", StringType.STRING, true)); + + @Test + public void testOrFilter() + { + DeltaOrFilter orFilter = new DeltaOrFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee1"), + new DeltaLessThanOrEqualsFilter("age", "8") + ) + ); + + Predicate predicate = orFilter.getFilterPredicate(SCHEMA); + + Assert.assertTrue(predicate instanceof Or); + Assert.assertEquals(2, predicate.getChildren().size()); + } + + @Test + public void testOrFilterWithInvalidColumn() + { + DeltaOrFilter orFilter = new DeltaOrFilter( + Arrays.asList( + new DeltaEqualsFilter("name2", "Employee1"), + new DeltaLessThanOrEqualsFilter("age", "8") + ) + ); + + MatcherAssert.assertThat( + Assert.assertThrows(DruidException.class, () -> orFilter.getFilterPredicate(SCHEMA)), + DruidExceptionMatcher.invalidInput().expectMessageIs( + StringUtils.format("column[name2] doesn't exist in schema[%s]", SCHEMA) + ) + ); + } + + @Test + public void testOrFilterWithNoFilterPredicates() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaOrFilter(null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "Delta or filter requires 2 filter predicates and must be non-empty. None provided." + ) + ); + } + + @Test + public void testOrFilterWithOneFilterPredicate() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaOrFilter( + Collections.singletonList( + new DeltaEqualsFilter("name", "Employee1") + ) + ) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "Delta or filter requires 2 filter predicates, but provided [1]." + ) + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputRowTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputRowTest.java index 7069d79b55c6..9e597894d05c 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputRowTest.java +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputRowTest.java @@ -32,21 +32,44 @@ import io.delta.kernel.types.StructType; import io.delta.kernel.utils.CloseableIterator; import io.delta.kernel.utils.FileStatus; +import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.hadoop.conf.Configuration; +import org.hamcrest.MatcherAssert; import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.Optional; public class DeltaInputRowTest { - @Test - public void testDeltaInputRow() throws TableNotFoundException, IOException + public static Collection data() + { + Object[][] data = new Object[][]{ + {NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.FULL_SCHEMA, NonPartitionedDeltaTable.DIMENSIONS, NonPartitionedDeltaTable.EXPECTED_ROWS}, + {PartitionedDeltaTable.DELTA_TABLE_PATH, PartitionedDeltaTable.FULL_SCHEMA, PartitionedDeltaTable.DIMENSIONS, PartitionedDeltaTable.EXPECTED_ROWS} + }; + return Arrays.asList(data); + } + + @MethodSource("data") + @ParameterizedTest(name = "{index}:with context {0}") + public void testDeltaInputRow( + final String deltaTablePath, + final InputRowSchema schema, + final List dimensions, + final List> expectedRows + ) throws TableNotFoundException, IOException { final TableClient tableClient = DefaultTableClient.create(new Configuration()); - final Scan scan = DeltaTestUtils.getScan(tableClient); + final Scan scan = DeltaTestUtils.getScan(tableClient, deltaTablePath); final Row scanState = scan.getScanState(tableClient); final StructType physicalReadSchema = ScanStateRow.getPhysicalDataReadSchema(tableClient, scanState); @@ -76,13 +99,13 @@ public void testDeltaInputRow() throws TableNotFoundException, IOException while (dataIter.hasNext()) { FilteredColumnarBatch dataReadResult = dataIter.next(); Row next = dataReadResult.getRows().next(); - DeltaInputRow deltaInputRow = new DeltaInputRow(next, DeltaTestUtils.FULL_SCHEMA); + DeltaInputRow deltaInputRow = new DeltaInputRow(next, schema); Assert.assertNotNull(deltaInputRow); - Assert.assertEquals(DeltaTestUtils.DIMENSIONS, deltaInputRow.getDimensions()); + Assert.assertEquals(dimensions, deltaInputRow.getDimensions()); - Map expectedRow = DeltaTestUtils.EXPECTED_ROWS.get(totalRecordCount); + Map expectedRow = expectedRows.get(totalRecordCount); for (String key : expectedRow.keySet()) { - if (DeltaTestUtils.FULL_SCHEMA.getTimestampSpec().getTimestampColumn().equals(key)) { + if (schema.getTimestampSpec().getTimestampColumn().equals(key)) { final long expectedMillis = ((Long) expectedRow.get(key)) * 1000; Assert.assertEquals(expectedMillis, deltaInputRow.getTimestampFromEpoch()); } else { @@ -93,6 +116,23 @@ public void testDeltaInputRow() throws TableNotFoundException, IOException } } } - Assert.assertEquals(DeltaTestUtils.EXPECTED_ROWS.size(), totalRecordCount); + Assert.assertEquals(NonPartitionedDeltaTable.EXPECTED_ROWS.size(), totalRecordCount); + } + + @MethodSource("data") + @ParameterizedTest(name = "{index}:with context {0}") + public void testReadNonExistentTable() + { + final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null); + + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> deltaInputSource.reader(null, null, null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "tablePath[non-existent-table] not found." + ) + ); } } diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceSerdeTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceSerdeTest.java new file mode 100644 index 000000000000..b6b223d9e0a6 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceSerdeTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.input; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.exc.ValueInstantiationException; +import org.apache.druid.delta.common.DeltaLakeDruidModule; +import org.apache.druid.delta.filter.DeltaAndFilter; +import org.apache.druid.delta.filter.DeltaLessThanFilter; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +public class DeltaInputSourceSerdeTest +{ + private static final ObjectMapper OBJECT_MAPPER = new DefaultObjectMapper() + .registerModules(new DeltaLakeDruidModule().getJacksonModules()); + + @Test + public void testDeltaInputSourceDeserializationWithNoFilter() throws JsonProcessingException + { + final String payload = "{\n" + + " \"type\": \"delta\",\n" + + " \"tablePath\": \"foo/bar\"\n" + + " }"; + + final DeltaInputSource deltaInputSource = OBJECT_MAPPER.readValue(payload, DeltaInputSource.class); + Assert.assertEquals("foo/bar", deltaInputSource.getTablePath()); + Assert.assertNull(deltaInputSource.getFilter()); + } + + @Test + public void testDeltaInputSourceDeserializationWithLessThanFilter() throws JsonProcessingException + { + final String payload = "{\n" + + " \"type\": \"delta\",\n" + + " \"tablePath\": \"foo/bar\",\n" + + " \"filter\": {\n" + + " \"type\": \"<\",\n" + + " \"column\": \"age\",\n" + + " \"value\": \"20\"\n" + + " }\n" + + " }"; + + final DeltaInputSource deltaInputSource = OBJECT_MAPPER.readValue(payload, DeltaInputSource.class); + Assert.assertEquals("foo/bar", deltaInputSource.getTablePath()); + Assert.assertTrue(deltaInputSource.getFilter() instanceof DeltaLessThanFilter); + } + + @Test + public void testDeltaInputSourceDeserializationWithAndFilter() throws JsonProcessingException + { + final String payload = "{\n" + + " \"type\": \"delta\",\n" + + " \"tablePath\": \"s3://foo/bar/baz\",\n" + + " \"filter\": {\n" + + " \"type\": \"and\",\n" + + " \"filters\": [\n" + + " {\n" + + " \"type\": \"<=\",\n" + + " \"column\": \"age\",\n" + + " \"value\": \"30\"\n" + + " },\n" + + " {\n" + + " \"type\": \">=\",\n" + + " \"column\": \"name\",\n" + + " \"value\": \"Employee4\"\n" + + " }\n" + + " ]\n" + + " }\n" + + " }"; + + final DeltaInputSource deltaInputSource = OBJECT_MAPPER.readValue(payload, DeltaInputSource.class); + Assert.assertEquals("s3://foo/bar/baz", deltaInputSource.getTablePath()); + Assert.assertTrue(deltaInputSource.getFilter() instanceof DeltaAndFilter); + } + + @Test + public void testDeltaInputSourceDeserializationWithNoTablePath() + { + final String payload = "{\n" + + " \"type\": \"delta\",\n" + + " \"filter\": {\n" + + " \"type\": \"<\",\n" + + " \"column\": \"age\",\n" + + " \"value\": \"20\"\n" + + " }\n" + + " }"; + + final ValueInstantiationException exception = Assert.assertThrows( + ValueInstantiationException.class, + () -> OBJECT_MAPPER.readValue(payload, DeltaInputSource.class) + ); + + Assert.assertTrue( + exception.getCause().getMessage().contains( + "tablePath cannot be null." + ) + ); + } + + @Test + public void testDeltaInputSourceDeserializationWithNoFilterColumn() + { + final String payload = "{\n" + + " \"type\": \"delta\",\n" + + " \"tablePath\": \"foo/bar\",\n" + + " \"filter\": {\n" + + " \"type\": \">=\",\n" + + " \"value\": \"20\"\n" + + " }\n" + + " }"; + + final ValueInstantiationException exception = Assert.assertThrows( + ValueInstantiationException.class, + () -> OBJECT_MAPPER.readValue(payload, DeltaInputSource.class) + ); + + Assert.assertEquals( + "column is a required field for >= filter.", + exception.getCause().getMessage() + ); + } +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceTest.java index 24b1096abe1e..3fe42676498e 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceTest.java +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaInputSourceTest.java @@ -23,7 +23,14 @@ import org.apache.druid.data.input.InputRowListPlusRawValues; import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputSourceReader; -import org.apache.druid.data.input.InputSplit; +import org.apache.druid.delta.filter.DeltaAndFilter; +import org.apache.druid.delta.filter.DeltaEqualsFilter; +import org.apache.druid.delta.filter.DeltaFilter; +import org.apache.druid.delta.filter.DeltaGreaterThanFilter; +import org.apache.druid.delta.filter.DeltaGreaterThanOrEqualsFilter; +import org.apache.druid.delta.filter.DeltaLessThanOrEqualsFilter; +import org.apache.druid.delta.filter.DeltaNotFilter; +import org.apache.druid.delta.filter.DeltaOrFilter; import org.apache.druid.error.DruidException; import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.java.util.common.DateTimes; @@ -32,11 +39,15 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.stream.Collectors; public class DeltaInputSourceTest @@ -47,169 +58,311 @@ public void setUp() System.setProperty("user.timezone", "UTC"); } - @Test - public void testSampleDeltaTable() throws IOException + @RunWith(Parameterized.class) + public static class TablePathParameterTests { - final DeltaInputSource deltaInputSource = new DeltaInputSource(DeltaTestUtils.DELTA_TABLE_PATH, null); - final InputSourceReader inputSourceReader = deltaInputSource.reader(DeltaTestUtils.FULL_SCHEMA, null, null); + @Parameterized.Parameters + public static Object[][] data() + { + return new Object[][]{ + { + NonPartitionedDeltaTable.DELTA_TABLE_PATH, + NonPartitionedDeltaTable.FULL_SCHEMA, + NonPartitionedDeltaTable.EXPECTED_ROWS + }, + { + NonPartitionedDeltaTable.DELTA_TABLE_PATH, + NonPartitionedDeltaTable.SCHEMA_1, + NonPartitionedDeltaTable.EXPECTED_ROWS + }, + { + NonPartitionedDeltaTable.DELTA_TABLE_PATH, + NonPartitionedDeltaTable.SCHEMA_2, + NonPartitionedDeltaTable.EXPECTED_ROWS + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + PartitionedDeltaTable.FULL_SCHEMA, + PartitionedDeltaTable.EXPECTED_ROWS + } + }; + } - List actualSampledRows = sampleAllRows(inputSourceReader); - Assert.assertEquals(DeltaTestUtils.EXPECTED_ROWS.size(), actualSampledRows.size()); + @Parameterized.Parameter(0) + public String deltaTablePath; + @Parameterized.Parameter(1) + public InputRowSchema schema; + @Parameterized.Parameter(2) + public List> expectedRows; - for (int idx = 0; idx < DeltaTestUtils.EXPECTED_ROWS.size(); idx++) { - Map expectedRow = DeltaTestUtils.EXPECTED_ROWS.get(idx); - InputRowListPlusRawValues actualSampledRow = actualSampledRows.get(idx); - Assert.assertNull(actualSampledRow.getParseException()); + @Test + public void testSampleDeltaTable() throws IOException + { + final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null); + final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); - Map actualSampledRawVals = actualSampledRow.getRawValues(); - Assert.assertNotNull(actualSampledRawVals); - Assert.assertNotNull(actualSampledRow.getRawValuesList()); - Assert.assertEquals(1, actualSampledRow.getRawValuesList().size()); + List actualSampledRows = sampleAllRows(inputSourceReader); + Assert.assertEquals(expectedRows.size(), actualSampledRows.size()); - for (String key : expectedRow.keySet()) { - if (DeltaTestUtils.FULL_SCHEMA.getTimestampSpec().getTimestampColumn().equals(key)) { - final long expectedMillis = (Long) expectedRow.get(key); - Assert.assertEquals(expectedMillis, actualSampledRawVals.get(key)); - } else { - Assert.assertEquals(expectedRow.get(key), actualSampledRawVals.get(key)); + for (int idx = 0; idx < expectedRows.size(); idx++) { + Map expectedRow = expectedRows.get(idx); + InputRowListPlusRawValues actualSampledRow = actualSampledRows.get(idx); + Assert.assertNull(actualSampledRow.getParseException()); + + Map actualSampledRawVals = actualSampledRow.getRawValues(); + Assert.assertNotNull(actualSampledRawVals); + Assert.assertNotNull(actualSampledRow.getRawValuesList()); + Assert.assertEquals(1, actualSampledRow.getRawValuesList().size()); + + for (String key : expectedRow.keySet()) { + if (!schema.getColumnsFilter().apply(key)) { + Assert.assertNull(actualSampledRawVals.get(key)); + } else { + if (schema.getTimestampSpec().getTimestampColumn().equals(key)) { + final long expectedMillis = (Long) expectedRow.get(key); + Assert.assertEquals(expectedMillis, actualSampledRawVals.get(key)); + } else { + Assert.assertEquals(expectedRow.get(key), actualSampledRawVals.get(key)); + } + } } } } - } - @Test - public void testReadAllDeltaTable() throws IOException - { - final DeltaInputSource deltaInputSource = new DeltaInputSource(DeltaTestUtils.DELTA_TABLE_PATH, null); - final InputSourceReader inputSourceReader = deltaInputSource.reader( - DeltaTestUtils.FULL_SCHEMA, - null, - null - ); - final List actualReadRows = readAllRows(inputSourceReader); - validateRows(DeltaTestUtils.EXPECTED_ROWS, actualReadRows, DeltaTestUtils.FULL_SCHEMA); + @Test + public void testReadDeltaTable() throws IOException + { + final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null); + final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); + final List actualReadRows = readAllRows(inputSourceReader); + validateRows(expectedRows, actualReadRows, schema); + } } - @Test - public void testReadAllDeltaTableSubSchema1() throws IOException + @RunWith(Parameterized.class) + public static class FilterParameterTests { - final DeltaInputSource deltaInputSource = new DeltaInputSource(DeltaTestUtils.DELTA_TABLE_PATH, null); - final InputSourceReader inputSourceReader = deltaInputSource.reader( - DeltaTestUtils.SCHEMA_1, - null, - null - ); - final List actualReadRows = readAllRows(inputSourceReader); - validateRows(DeltaTestUtils.EXPECTED_ROWS, actualReadRows, DeltaTestUtils.SCHEMA_1); - } + @Parameterized.Parameters + public static Object[][] data() + { + return new Object[][]{ + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaEqualsFilter("name", "Employee2"), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> row.get("name").equals("Employee2") + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaGreaterThanFilter("name", "Employee3"), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> ((String) row.get("name")).compareTo("Employee3") > 0 + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaLessThanOrEqualsFilter("name", "Employee4"), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> ((String) row.get("name")).compareTo("Employee4") <= 0 + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaAndFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee1"), + new DeltaEqualsFilter("name", "Employee4") + ) + ), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> row.get("name").equals("Employee1") && row.get("name").equals("Employee4") + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaOrFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee5"), + new DeltaEqualsFilter("name", "Employee1") + ) + ), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> row.get("name").equals("Employee5") || row.get("name").equals("Employee1") + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaNotFilter( + new DeltaOrFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee5"), + new DeltaEqualsFilter("name", "Employee1") + ) + ) + ), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> !(row.get("name").equals("Employee5") || row.get("name").equals("Employee1")) + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaNotFilter( + new DeltaAndFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee1"), + new DeltaEqualsFilter("name", "Employee4") + ) + ) + ), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> (!(row.get("name").equals("Employee1") && row.get("name").equals("Employee4"))) + ) + }, + { + PartitionedDeltaTable.DELTA_TABLE_PATH, + new DeltaNotFilter( + new DeltaOrFilter( + Arrays.asList( + new DeltaEqualsFilter("name", "Employee1"), + new DeltaGreaterThanOrEqualsFilter("name", "Employee4") + ) + ) + ), + PartitionedDeltaTable.FULL_SCHEMA, + filterExpectedRows( + PartitionedDeltaTable.EXPECTED_ROWS, + row -> (!(row.get("name").equals("Employee1") || ((String) row.get("name")).compareTo("Employee4") >= 0)) + ) + } + }; + } - @Test - public void testReadAllDeltaTableWithSubSchema2() throws IOException - { - final DeltaInputSource deltaInputSource = new DeltaInputSource(DeltaTestUtils.DELTA_TABLE_PATH, null); - final InputSourceReader inputSourceReader = deltaInputSource.reader( - DeltaTestUtils.SCHEMA_2, - null, - null - ); - final List actualReadRows = readAllRows(inputSourceReader); - validateRows(DeltaTestUtils.EXPECTED_ROWS, actualReadRows, DeltaTestUtils.SCHEMA_2); - } + @Parameterized.Parameter(0) + public String deltaTablePath; + @Parameterized.Parameter(1) + public DeltaFilter filter; + @Parameterized.Parameter(2) + public InputRowSchema schema; + @Parameterized.Parameter(3) + public List> expectedRows; - @Test - public void testDeltaLakeWithCreateSplits() - { - final DeltaInputSource deltaInputSource = new DeltaInputSource(DeltaTestUtils.DELTA_TABLE_PATH, null); - final List> splits = deltaInputSource.createSplits(null, null) - .collect(Collectors.toList()); - Assert.assertEquals(DeltaTestUtils.SPLIT_TO_EXPECTED_ROWS.size(), splits.size()); + @Test + public void testSampleDeltaTable() throws IOException + { + final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter); + final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); - for (InputSplit split : splits) { - final DeltaSplit deltaSplit = split.get(); - final DeltaInputSource deltaInputSourceWithSplit = new DeltaInputSource( - DeltaTestUtils.DELTA_TABLE_PATH, - deltaSplit - ); - List> splitsResult = deltaInputSourceWithSplit.createSplits(null, null) - .collect(Collectors.toList()); - Assert.assertEquals(1, splitsResult.size()); - Assert.assertEquals(deltaSplit, splitsResult.get(0).get()); + List actualSampledRows = sampleAllRows(inputSourceReader); + Assert.assertEquals(expectedRows.size(), actualSampledRows.size()); + + for (int idx = 0; idx < expectedRows.size(); idx++) { + Map expectedRow = expectedRows.get(idx); + InputRowListPlusRawValues actualSampledRow = actualSampledRows.get(idx); + Assert.assertNull(actualSampledRow.getParseException()); + + Map actualSampledRawVals = actualSampledRow.getRawValues(); + Assert.assertNotNull(actualSampledRawVals); + Assert.assertNotNull(actualSampledRow.getRawValuesList()); + Assert.assertEquals(1, actualSampledRow.getRawValuesList().size()); + + for (String key : expectedRow.keySet()) { + if (!schema.getColumnsFilter().apply(key)) { + Assert.assertNull(actualSampledRawVals.get(key)); + } else { + if (schema.getTimestampSpec().getTimestampColumn().equals(key)) { + final long expectedMillis = (Long) expectedRow.get(key); + Assert.assertEquals(expectedMillis, actualSampledRawVals.get(key)); + } else { + Assert.assertEquals(expectedRow.get(key), actualSampledRawVals.get(key)); + } + } + } + } } - } - @Test - public void testDeltaLakeWithReadSplits() throws IOException - { - final DeltaInputSource deltaInputSource = new DeltaInputSource(DeltaTestUtils.DELTA_TABLE_PATH, null); - final List> splits = deltaInputSource.createSplits(null, null) - .collect(Collectors.toList()); - Assert.assertEquals(DeltaTestUtils.SPLIT_TO_EXPECTED_ROWS.size(), splits.size()); + private static List> filterExpectedRows( + final List> rows, + final Predicate> filter + ) + { + return rows.stream().filter(filter).collect(Collectors.toList()); + } - for (int idx = 0; idx < splits.size(); idx++) { - final InputSplit split = splits.get(idx); - final DeltaSplit deltaSplit = split.get(); - final DeltaInputSource deltaInputSourceWithSplit = new DeltaInputSource( - DeltaTestUtils.DELTA_TABLE_PATH, - deltaSplit - ); - final InputSourceReader inputSourceReader = deltaInputSourceWithSplit.reader( - DeltaTestUtils.FULL_SCHEMA, - null, - null - ); - final List actualRowsInSplit = readAllRows(inputSourceReader); - final List> expectedRowsInSplit = DeltaTestUtils.SPLIT_TO_EXPECTED_ROWS.get(idx); - validateRows(expectedRowsInSplit, actualRowsInSplit, DeltaTestUtils.FULL_SCHEMA); + @Test + public void testReadDeltaTable() throws IOException + { + final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter); + final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); + final List actualReadRows = readAllRows(inputSourceReader); + validateRows(expectedRows, actualReadRows, schema); } } - @Test - public void testNullTable() + public static class InvalidInputTests { - MatcherAssert.assertThat( - Assert.assertThrows( - DruidException.class, - () -> new DeltaInputSource(null, null) - ), - DruidExceptionMatcher.invalidInput().expectMessageIs( - "tablePath cannot be null." - ) - ); - } + @Test + public void testNullTable() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> new DeltaInputSource(null, null, null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "tablePath cannot be null." + ) + ); + } - @Test - public void testSplitNonExistentTable() - { - final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null); + @Test + public void testSplitNonExistentTable() + { + final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null); - MatcherAssert.assertThat( - Assert.assertThrows( - DruidException.class, - () -> deltaInputSource.createSplits(null, null) - ), - DruidExceptionMatcher.invalidInput().expectMessageIs( - "tablePath[non-existent-table] not found." - ) - ); - } + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> deltaInputSource.createSplits(null, null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "tablePath[non-existent-table] not found." + ) + ); + } - @Test - public void testReadNonExistentTable() - { - final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null); + @Test + public void testReadNonExistentTable() + { + final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null); - MatcherAssert.assertThat( - Assert.assertThrows( - DruidException.class, - () -> deltaInputSource.reader(null, null, null) - ), - DruidExceptionMatcher.invalidInput().expectMessageIs( - "tablePath[non-existent-table] not found." - ) - ); + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> deltaInputSource.reader(null, null, null) + ), + DruidExceptionMatcher.invalidInput().expectMessageIs( + "tablePath[non-existent-table] not found." + ) + ); + } } - private List sampleAllRows(InputSourceReader reader) throws IOException + private static List sampleAllRows(InputSourceReader reader) throws IOException { List rows = new ArrayList<>(); try (CloseableIterator iterator = reader.sample()) { @@ -218,7 +371,7 @@ private List sampleAllRows(InputSourceReader reader) return rows; } - private List readAllRows(InputSourceReader reader) throws IOException + private static List readAllRows(InputSourceReader reader) throws IOException { final List rows = new ArrayList<>(); try (CloseableIterator iterator = reader.read()) { @@ -227,7 +380,7 @@ private List readAllRows(InputSourceReader reader) throws IOException return rows; } - private void validateRows( + private static void validateRows( final List> expectedRows, final List actualReadRows, final InputRowSchema schema diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaTestUtils.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaTestUtils.java index 180adaefcfb5..96696e7b6a43 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaTestUtils.java +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/DeltaTestUtils.java @@ -19,9 +19,6 @@ package org.apache.druid.delta.input; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import io.delta.kernel.Scan; import io.delta.kernel.ScanBuilder; import io.delta.kernel.Snapshot; @@ -29,290 +26,12 @@ import io.delta.kernel.TableNotFoundException; import io.delta.kernel.client.TableClient; import io.delta.kernel.types.StructType; -import org.apache.druid.data.input.ColumnsFilter; -import org.apache.druid.data.input.InputRowSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.DoubleDimensionSchema; -import org.apache.druid.data.input.impl.FloatDimensionSchema; -import org.apache.druid.data.input.impl.LongDimensionSchema; -import org.apache.druid.data.input.impl.StringDimensionSchema; -import org.apache.druid.data.input.impl.TimestampSpec; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Refer to extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md to generate the - * sample Delta Lake table used in the unit tests. - */ public class DeltaTestUtils { - /** - * The Delta table path used by unit tests. - */ - public static final String DELTA_TABLE_PATH = "src/test/resources/employee-delta-table"; - /** - * The list of dimensions in the Delta table {@link #DELTA_TABLE_PATH}. - */ - public static final List DIMENSIONS = ImmutableList.of( - "id", - "birthday", - "name", - "age", - "salary", - "bonus", - "yoe", - "is_fulltime", - "last_vacation_time" - ); - - /** - * The expected set of rows from the first checkpoint file {@code DELTA_TABLE_PATH/_delta_log/00000000000000000000.json} - */ - private static final List> SPLIT_0_EXPECTED_ROWS = new ArrayList<>( - ImmutableList.of( - ImmutableMap.of( - "birthday", 1057881600L, - "name", "Employee1", - "id", 867799346L, - "salary", 87642.55209817083, - "age", (short) 20, - "yoe", 4 - ), - ImmutableMap.of( - "birthday", 1035417600L, - "is_fulltime", false, - "name", "Employee2", - "id", 9963151889L, - "salary", 79404.63969727767, - "age", (short) 21, - "yoe", 2 - ), - ImmutableMap.of( - "birthday", 890179200L, - "name", "Employee3", - "id", 2766777393L, - "salary", 92418.21424435009, - "age", (short) 25, - "yoe", 9 - ), - ImmutableMap.of( - "birthday", 1073001600L, - "name", "Employee4", - "id", 6320361986L, - "salary", 97907.76612488469, - "age", (short) 20, - "yoe", 3 - ), - ImmutableMap.of( - "birthday", 823996800L, - "is_fulltime", true, - "bonus", 4982.215f, - "name", "Employee5", - "id", 7068152260L, - "salary", 79037.77202099308, - "last_vacation_time", 1706256972000L, - "age", (short) 27, - "yoe", 9 - ) - ) - ); - - /** - * The expected rows from second checkpoint file {@code DELTA_TABLE_PATH/_delta_log/00000000000000000001.json} - */ - private static final List> SPLIT_1_EXPECTED_ROWS = new ArrayList<>( - ImmutableList.of( - ImmutableMap.of( - "birthday", 937526400L, - "is_fulltime", false, - "name", "Employee1", - "id", 4693651733L, - "salary", 83845.11357786917, - "age", (short) 24, - "yoe", 3 - ), - ImmutableMap.of( - "birthday", 810777600L, - "is_fulltime", false, - "name", "Employee2", - "id", 7132772589L, - "salary", 90140.44051385639, - "age", (short) 28, - "yoe", 8 - ), - ImmutableMap.of( - "birthday", 1104969600L, - "is_fulltime", true, - "bonus", 3699.0881f, - "name", "Employee3", - "id", 6627278510L, - "salary", 58857.27649436368, - "last_vacation_time", 1706458554000L, - "age", (short) 19, - "yoe", 4 - ), - ImmutableMap.of( - "birthday", 763257600L, - "is_fulltime", true, - "bonus", 2334.6675f, - "name", "Employee4", - "id", 4786204912L, - "salary", 93646.81222022788, - "last_vacation_time", 1706390154000L, - "age", (short) 29, - "yoe", 5 - ), - ImmutableMap.of( - "birthday", 1114646400L, - "name", "Employee5", - "id", 2773939764L, - "salary", 66300.05339373322, - "age", (short) 18, - "yoe", 3 - ), - ImmutableMap.of( - "birthday", 913334400L, - "is_fulltime", false, - "name", "Employee6", - "id", 8333438088L, - "salary", 59219.5257906128, - "age", (short) 25, - "yoe", 4 - ), - ImmutableMap.of( - "birthday", 893894400L, - "is_fulltime", false, - "name", "Employee7", - "id", 8397454007L, - "salary", 61909.733851830584, - "age", (short) 25, - "yoe", 8 - ), - ImmutableMap.of( - "birthday", 1038873600L, - "is_fulltime", true, - "bonus", 3000.0154f, - "name", "Employee8", - "id", 8925359945L, - "salary", 76588.05471316943, - "last_vacation_time", 1706195754000L, - "age", (short) 21, - "yoe", 1 - ), - ImmutableMap.of( - "birthday", 989798400L, - "is_fulltime", true, - "bonus", 4463.3833f, - "name", "Employee9", - "id", 8154788551L, - "salary", 59787.98539015684, - "last_vacation_time", 1706181354000L, - "age", (short) 22, - "yoe", 4 - ), - ImmutableMap.of( - "birthday", 912297600L, - "is_fulltime", false, - "name", "Employee10", - "id", 5884382356L, - "salary", 51565.91965119349, - "age", (short) 25, - "yoe", 9 - ) - ) - ); - - /** - * Mapping of checkpoint file identifier to the list of expected rows in that checkpoint. - */ - public static final Map>> SPLIT_TO_EXPECTED_ROWS = new HashMap<>( - ImmutableMap.of( - 0, SPLIT_0_EXPECTED_ROWS, - 1, SPLIT_1_EXPECTED_ROWS - ) - ); - - /** - * Complete set of expected rows across all checkpoint files for {@link #DELTA_TABLE_PATH}. - */ - public static final List> EXPECTED_ROWS = SPLIT_TO_EXPECTED_ROWS.values().stream() - .flatMap(List::stream) - .collect(Collectors.toList()); - - /** - * The Druid schema used for ingestion of {@link #DELTA_TABLE_PATH}. - */ - public static final InputRowSchema FULL_SCHEMA = new InputRowSchema( - new TimestampSpec("birthday", "posix", null), - new DimensionsSpec( - ImmutableList.of( - new LongDimensionSchema("id"), - new LongDimensionSchema("birthday"), - new StringDimensionSchema("name"), - new LongDimensionSchema("age"), - new DoubleDimensionSchema("salary"), - new FloatDimensionSchema("bonus"), - new LongDimensionSchema("yoe"), - new StringDimensionSchema("is_fulltime"), - new LongDimensionSchema("last_vacation_time") - ) - ), - ColumnsFilter.all() - ); - - /** - * Similar to {@link #FULL_SCHEMA}, but with a smaller set of columns with an inclusion filter applied. - */ - public static final InputRowSchema SCHEMA_1 = new InputRowSchema( - new TimestampSpec("birthday", "posix", null), - new DimensionsSpec( - ImmutableList.of( - new LongDimensionSchema("id"), - new LongDimensionSchema("birthday"), - new StringDimensionSchema("name"), - new LongDimensionSchema("age"), - new DoubleDimensionSchema("salary"), - new FloatDimensionSchema("bonus"), - new LongDimensionSchema("yoe"), - new StringDimensionSchema("is_fulltime"), - new LongDimensionSchema("last_vacation_time") - ) - ), - ColumnsFilter.inclusionBased(ImmutableSet.of("id", "birthday", "name", "is_fulltime")) - ); - - /** - * Similar to {@link #FULL_SCHEMA}, but with a smaller set of columns with an exclusion filter applied. A non-existent - * column is added to the exclusion filter - it should silently get thrown away. - */ - public static final InputRowSchema SCHEMA_2 = new InputRowSchema( - new TimestampSpec("birthday", "posix", null), - new DimensionsSpec( - ImmutableList.of( - new LongDimensionSchema("id"), - new LongDimensionSchema("birthday"), - new StringDimensionSchema("name"), - new LongDimensionSchema("age"), - new DoubleDimensionSchema("salary"), - new FloatDimensionSchema("bonus"), - new LongDimensionSchema("yoe"), - new StringDimensionSchema("is_fulltime"), - new LongDimensionSchema("last_vacation_time") - ) - ), - ColumnsFilter.exclusionBased(ImmutableSet.of("last_vacation_time", "bonus", "non_existent_column")) - ); - - /** - * A simple wrapper that builds the table scan for {@link #DELTA_TABLE_PATH} meant to be used in tests. - */ - public static Scan getScan(final TableClient tableClient) throws TableNotFoundException + public static Scan getScan(final TableClient tableClient, final String deltaTablePath) throws TableNotFoundException { - final Table table = Table.forPath(tableClient, DELTA_TABLE_PATH); + final Table table = Table.forPath(tableClient, deltaTablePath); final Snapshot snapshot = table.getLatestSnapshot(tableClient); final StructType readSchema = snapshot.getSchema(tableClient); final ScanBuilder scanBuilder = snapshot.getScanBuilder(tableClient) diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/NonPartitionedDeltaTable.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/NonPartitionedDeltaTable.java new file mode 100644 index 000000000000..eaf19ad418ce --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/NonPartitionedDeltaTable.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.input; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.data.input.ColumnsFilter; +import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.DoubleDimensionSchema; +import org.apache.druid.data.input.impl.FloatDimensionSchema; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.TimestampSpec; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Refer to extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md to generate the + * sample Delta Lake table used in the unit tests. + * + *

+ * For a partitioned delta table sample, see {@link PartitionedDeltaTable}. + *

+ */ +public class NonPartitionedDeltaTable +{ + /** + * The non-partitioned Delta table path used by unit tests. + */ + public static final String DELTA_TABLE_PATH = "src/test/resources/employee-delta-table"; + /** + * The list of dimensions in the Delta table {@link #DELTA_TABLE_PATH}. + */ + public static final List DIMENSIONS = ImmutableList.of( + "id", + "birthday", + "name", + "age", + "salary", + "bonus", + "yoe", + "is_fulltime", + "last_vacation_time" + ); + + /** + * The expected set of rows from the first checkpoint file {@code {@link #DELTA_TABLE_PATH}/_delta_log/00000000000000000000.json} + */ + private static final List> SPLIT_0_EXPECTED_ROWS = new ArrayList<>( + ImmutableList.of( + ImmutableMap.of( + "birthday", 1057881600L, + "name", "Employee1", + "id", 867799346L, + "salary", 87642.55209817083, + "age", (short) 20, + "yoe", 4 + ), + ImmutableMap.of( + "birthday", 1035417600L, + "is_fulltime", false, + "name", "Employee2", + "id", 9963151889L, + "salary", 79404.63969727767, + "age", (short) 21, + "yoe", 2 + ), + ImmutableMap.of( + "birthday", 890179200L, + "name", "Employee3", + "id", 2766777393L, + "salary", 92418.21424435009, + "age", (short) 25, + "yoe", 9 + ), + ImmutableMap.of( + "birthday", 1073001600L, + "name", "Employee4", + "id", 6320361986L, + "salary", 97907.76612488469, + "age", (short) 20, + "yoe", 3 + ), + ImmutableMap.of( + "birthday", 823996800L, + "is_fulltime", true, + "bonus", 4982.215f, + "name", "Employee5", + "id", 7068152260L, + "salary", 79037.77202099308, + "last_vacation_time", 1706256972000L, + "age", (short) 27, + "yoe", 9 + ) + ) + ); + + /** + * The expected rows from second checkpoint file {@code DELTA_TABLE_PATH/_delta_log/00000000000000000001.json} + */ + private static final List> SPLIT_1_EXPECTED_ROWS = new ArrayList<>( + ImmutableList.of( + ImmutableMap.of( + "birthday", 937526400L, + "is_fulltime", false, + "name", "Employee1", + "id", 4693651733L, + "salary", 83845.11357786917, + "age", (short) 24, + "yoe", 3 + ), + ImmutableMap.of( + "birthday", 810777600L, + "is_fulltime", false, + "name", "Employee2", + "id", 7132772589L, + "salary", 90140.44051385639, + "age", (short) 28, + "yoe", 8 + ), + ImmutableMap.of( + "birthday", 1104969600L, + "is_fulltime", true, + "bonus", 3699.0881f, + "name", "Employee3", + "id", 6627278510L, + "salary", 58857.27649436368, + "last_vacation_time", 1706458554000L, + "age", (short) 19, + "yoe", 4 + ), + ImmutableMap.of( + "birthday", 763257600L, + "is_fulltime", true, + "bonus", 2334.6675f, + "name", "Employee4", + "id", 4786204912L, + "salary", 93646.81222022788, + "last_vacation_time", 1706390154000L, + "age", (short) 29, + "yoe", 5 + ), + ImmutableMap.of( + "birthday", 1114646400L, + "name", "Employee5", + "id", 2773939764L, + "salary", 66300.05339373322, + "age", (short) 18, + "yoe", 3 + ), + ImmutableMap.of( + "birthday", 913334400L, + "is_fulltime", false, + "name", "Employee6", + "id", 8333438088L, + "salary", 59219.5257906128, + "age", (short) 25, + "yoe", 4 + ), + ImmutableMap.of( + "birthday", 893894400L, + "is_fulltime", false, + "name", "Employee7", + "id", 8397454007L, + "salary", 61909.733851830584, + "age", (short) 25, + "yoe", 8 + ), + ImmutableMap.of( + "birthday", 1038873600L, + "is_fulltime", true, + "bonus", 3000.0154f, + "name", "Employee8", + "id", 8925359945L, + "salary", 76588.05471316943, + "last_vacation_time", 1706195754000L, + "age", (short) 21, + "yoe", 1 + ), + ImmutableMap.of( + "birthday", 989798400L, + "is_fulltime", true, + "bonus", 4463.3833f, + "name", "Employee9", + "id", 8154788551L, + "salary", 59787.98539015684, + "last_vacation_time", 1706181354000L, + "age", (short) 22, + "yoe", 4 + ), + ImmutableMap.of( + "birthday", 912297600L, + "is_fulltime", false, + "name", "Employee10", + "id", 5884382356L, + "salary", 51565.91965119349, + "age", (short) 25, + "yoe", 9 + ) + ) + ); + + /** + * Mapping of checkpoint file identifier to the list of expected rows in that checkpoint. + */ + public static final Map>> SPLIT_TO_EXPECTED_ROWS = new HashMap<>( + ImmutableMap.of( + 0, SPLIT_0_EXPECTED_ROWS, + 1, SPLIT_1_EXPECTED_ROWS + ) + ); + + /** + * Complete set of expected rows across all checkpoint files for {@link #DELTA_TABLE_PATH}. + */ + public static final List> EXPECTED_ROWS = SPLIT_TO_EXPECTED_ROWS.values().stream() + .flatMap(List::stream) + .collect(Collectors.toList()); + + /** + * The Druid schema used for ingestion of {@link #DELTA_TABLE_PATH}. + */ + public static final InputRowSchema FULL_SCHEMA = new InputRowSchema( + new TimestampSpec("birthday", "posix", null), + new DimensionsSpec( + ImmutableList.of( + new LongDimensionSchema("id"), + new LongDimensionSchema("birthday"), + new StringDimensionSchema("name"), + new LongDimensionSchema("age"), + new DoubleDimensionSchema("salary"), + new FloatDimensionSchema("bonus"), + new LongDimensionSchema("yoe"), + new StringDimensionSchema("is_fulltime"), + new LongDimensionSchema("last_vacation_time") + ) + ), + ColumnsFilter.all() + ); + + /** + * Similar to {@link #FULL_SCHEMA}, but with a smaller set of columns with an inclusion filter applied. + */ + public static final InputRowSchema SCHEMA_1 = new InputRowSchema( + new TimestampSpec("birthday", "posix", null), + new DimensionsSpec( + ImmutableList.of( + new LongDimensionSchema("id"), + new LongDimensionSchema("birthday"), + new StringDimensionSchema("name"), + new LongDimensionSchema("age"), + new DoubleDimensionSchema("salary"), + new FloatDimensionSchema("bonus"), + new LongDimensionSchema("yoe"), + new StringDimensionSchema("is_fulltime"), + new LongDimensionSchema("last_vacation_time") + ) + ), + ColumnsFilter.inclusionBased(ImmutableSet.of("id", "birthday", "name", "is_fulltime")) + ); + + /** + * Similar to {@link #FULL_SCHEMA}, but with a smaller set of columns with an exclusion filter applied. A non-existent + * column is added to the exclusion filter - it should silently get thrown away. + */ + public static final InputRowSchema SCHEMA_2 = new InputRowSchema( + new TimestampSpec("birthday", "posix", null), + new DimensionsSpec( + ImmutableList.of( + new LongDimensionSchema("id"), + new LongDimensionSchema("birthday"), + new StringDimensionSchema("name"), + new LongDimensionSchema("age"), + new DoubleDimensionSchema("salary"), + new FloatDimensionSchema("bonus"), + new LongDimensionSchema("yoe"), + new StringDimensionSchema("is_fulltime"), + new LongDimensionSchema("last_vacation_time") + ) + ), + ColumnsFilter.exclusionBased(ImmutableSet.of("last_vacation_time", "bonus", "non_existent_column")) + ); +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/PartitionedDeltaTable.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/PartitionedDeltaTable.java new file mode 100644 index 000000000000..60f6bb6f0646 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/PartitionedDeltaTable.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.delta.input; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.data.input.ColumnsFilter; +import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.DoubleDimensionSchema; +import org.apache.druid.data.input.impl.FloatDimensionSchema; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.TimestampSpec; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Refer to extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md to generate the + * sample partitioned Delta Lake table used in the unit tests. + * + *

+ * For an unpartitioned delta table sample, see {@link NonPartitionedDeltaTable}. + *

+ */ +public class PartitionedDeltaTable +{ + /** + * The Delta table path used by unit tests. + */ + public static final String DELTA_TABLE_PATH = "src/test/resources/employee-delta-table-partitioned-name"; + + /** + * The list of dimensions in the Delta table {@link #DELTA_TABLE_PATH}. + */ + public static final List DIMENSIONS = ImmutableList.of( + "id", + "birthday", + "name", + "age", + "salary", + "bonus", + "yoe", + "is_fulltime", + "last_vacation_time" + ); + + /** + * The expected set of rows from the first checkpoint file {@code {@link #DELTA_TABLE_PATH}/_delta_log/00000000000000000000.json} + */ + private static final List> SPLIT_0_EXPECTED_ROWS = new ArrayList<>( + ImmutableList.of( + ImmutableMap.of( + "birthday", 898992000L, + "name", "Employee1", + "id", 1726247710L, + "salary", 77928.75048595395, + "age", (short) 25, + "yoe", 3 + ), + ImmutableMap.of( + "birthday", 783475200L, + "is_fulltime", true, + "name", "Employee2", + "id", 6142474489L, + "salary", 57807.64358288189, + "age", (short) 29, + "yoe", 1 + ), + ImmutableMap.of( + "birthday", 989712000L, + "name", "Employee3", + "id", 3550221591L, + "salary", 58226.41814823942, + "age", (short) 22, + "yoe", 6 + ), + ImmutableMap.of( + "birthday", 1130025600L, + "name", "Employee4", + "id", 3822742702L, + "salary", 63581.29293955827, + "age", (short) 18, + "yoe", 2 + ), + ImmutableMap.of( + "birthday", 1001116800L, + "name", "Employee5", + "id", 5611620190L, + "salary", 76076.68269796186, + "age", (short) 22, + "yoe", 3 + ) + ) + ); + + /** + * The expected rows from second checkpoint file {@code DELTA_TABLE_PATH/_delta_log/00000000000000000001.json} + */ + private static final List> SPLIT_1_EXPECTED_ROWS = new ArrayList<>( + ImmutableList.of( + ImmutableMap.of( + "birthday", 1058227200L, + "is_fulltime", false, + "name", "Employee1", + "id", 74065452L, + "salary", 73109.56096784897, + "age", (short) 20, + "yoe", 3 + ), + ImmutableMap.of( + "birthday", 930528000L, + "is_fulltime", true, + "name", "Employee2", + "id", 7246574606L, + "salary", 54723.608212239684, + "age", (short) 24, + "yoe", 5 + ), + ImmutableMap.of( + "birthday", 863654400L, + "is_fulltime", true, + "bonus", 1424.9856f, + "name", "Employee3", + "id", 743868531L, + "salary", 59595.17550553535, + "last_vacation_time", 1712918081000L, + "age", (short) 26, + "yoe", 8 + ), + ImmutableMap.of( + "birthday", 850780800L, + "name", "Employee4", + "id", 4750981713L, + "salary", 85673.13564089558, + "age", (short) 27, + "yoe", 8 + ), + ImmutableMap.of( + "birthday", 986256000L, + "name", "Employee5", + "id", 2605140287L, + "salary", 56740.37076828715, + "age", (short) 23, + "yoe", 5 + ) + ) + ); + + /** + * The expected rows from second checkpoint file {@code DELTA_TABLE_PATH/_delta_log/00000000000000000001.json} + */ + private static final List> SPLIT_2_EXPECTED_ROWS = new ArrayList<>( + ImmutableList.of( + ImmutableMap.of( + "birthday", 885168000L, + "name", "Employee1", + "id", 4922151803L, + "salary", 63418.10754490299, + "age", (short) 26, + "yoe", 10 + ), + ImmutableMap.of( + "birthday", 806198400L, + "name", "Employee2", + "id", 9345771736L, + "salary", 58610.730719740226, + "age", (short) 28, + "yoe", 10 + ), + ImmutableMap.of( + "birthday", 1120435200L, + "name", "Employee3", + "id", 4740025087L, + "salary", 63256.1008903906, + "age", (short) 18, + "yoe", 1 + ), + ImmutableMap.of( + "birthday", 968284800L, + "is_fulltime", false, + "name", "Employee4", + "id", 655456941L, + "salary", 95552.47057273184, + "age", (short) 23, + "yoe", 1 + ), + ImmutableMap.of( + "birthday", 1124841600L, + "name", "Employee5", + "id", 5565370685L, + "salary", 74066.92920109774, + "age", (short) 18, + "yoe", 1 + ) + ) + ); + + /** + * Mapping of checkpoint file identifier to the list of expected rows in that checkpoint. + */ + public static final Map>> SPLIT_TO_EXPECTED_ROWS = new HashMap<>( + ImmutableMap.of( + 0, SPLIT_0_EXPECTED_ROWS, + 1, SPLIT_1_EXPECTED_ROWS, + 2, SPLIT_2_EXPECTED_ROWS + ) + ); + + /** + * Complete set of expected rows across all checkpoint files for {@link #DELTA_TABLE_PATH}. + */ + public static final List> EXPECTED_ROWS = SPLIT_TO_EXPECTED_ROWS.values().stream() + .flatMap(List::stream) + .collect(Collectors.toList()); + + /** + * The Druid schema used for ingestion of {@link #DELTA_TABLE_PATH}. + */ + public static final InputRowSchema FULL_SCHEMA = new InputRowSchema( + new TimestampSpec("birthday", "posix", null), + new DimensionsSpec( + ImmutableList.of( + new LongDimensionSchema("id"), + new LongDimensionSchema("birthday"), + new StringDimensionSchema("name"), + new LongDimensionSchema("age"), + new DoubleDimensionSchema("salary"), + new FloatDimensionSchema("bonus"), + new LongDimensionSchema("yoe"), + new StringDimensionSchema("is_fulltime"), + new LongDimensionSchema("last_vacation_time") + ) + ), + ColumnsFilter.all() + ); +} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/RowSerdeTest.java b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/RowSerdeTest.java index eb06f532a021..fe2b85d2f3f7 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/RowSerdeTest.java +++ b/extensions-contrib/druid-deltalake-extensions/src/test/java/org/apache/druid/delta/input/RowSerdeTest.java @@ -25,15 +25,29 @@ import io.delta.kernel.defaults.client.DefaultTableClient; import org.apache.hadoop.conf.Configuration; import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.Arrays; +import java.util.Collection; public class RowSerdeTest { - @Test - public void testSerializeDeserializeRoundtrip() throws TableNotFoundException + public static Collection data() + { + Object[][] data = new Object[][]{ + {NonPartitionedDeltaTable.DELTA_TABLE_PATH}, + {PartitionedDeltaTable.DELTA_TABLE_PATH} + }; + return Arrays.asList(data); + } + + @MethodSource("data") + @ParameterizedTest(name = "{index}:with context {0}") + public void testSerializeDeserializeRoundtrip(final String tablePath) throws TableNotFoundException { final DefaultTableClient tableClient = DefaultTableClient.create(new Configuration()); - final Scan scan = DeltaTestUtils.getScan(tableClient); + final Scan scan = DeltaTestUtils.getScan(tableClient, tablePath); final Row scanState = scan.getScanState(tableClient); final String rowJson = RowSerde.serializeRowToJson(scanState); @@ -41,5 +55,4 @@ public void testSerializeDeserializeRoundtrip() throws TableNotFoundException Assert.assertEquals(scanState.getSchema(), row.getSchema()); } - } diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md b/extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md index c524adec4b58..f1ac54fb8b30 100644 --- a/extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md +++ b/extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md @@ -44,25 +44,43 @@ Delta table to `resources/employee-delta-table`. You can override the defaults b ```shell python3 create_delta_table.py -h -usage: create_delta_table.py [-h] [--save_mode {append,overwrite}] [--save_path SAVE_PATH] [--num_records NUM_RECORDS] +usage: create_delta_table.py [-h] --save_path SAVE_PATH [--save_mode {append,overwrite}] [--partitioned_by {date,name}] [--num_records NUM_RECORDS] Script to write a Delta Lake table. -optional arguments: +options: -h, --help show this help message and exit + --save_path SAVE_PATH + Save path for Delta table (default: None) --save_mode {append,overwrite} Specify write mode (append/overwrite) (default: append) - --save_path SAVE_PATH - Save path for Delta table (default: /druid/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table) + --partitioned_by {date,name} + Partitioned by columns (default: None) --num_records NUM_RECORDS - Specify number of Delta records to write (default: 10) + Specify number of Delta records to write (default: 5) ``` -The test data in `resources/employee-delta-table` was generated by: +### Non-partitioned table `employee-delta-table`: + +The test data in `resources/employee-delta-table` contains 15 Delta records generated over 2 snapshots. +The table was generated by running the following commands: ```shell -python3 create_delta_table.py -python3 create_delta_table.py --num_records=5 --save_mode=append +python3 create_delta_table.py --save_path=employee-delta-table --num_records=10 +python3 create_delta_table.py --save_path=employee-delta-table +``` + +The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in +`NonPartitionedDeltaTable.java` accordingly. + +### Partitioned table `employee-delta-table-partitioned-name`: + +The test data in `resources/employee-delta-table-partitioned-name` contains 15 Delta records generated over 3 snapshots. +This table is partitioned by the name column. The table was generated by running the following commands: +```shell +python3 create_delta_table.py --save_path=employee-delta-table-partitioned-name --partitioned_by=name +python3 create_delta_table.py --save_path=employee-delta-table-partitioned-name --partitioned_by=name +python3 create_delta_table.py --save_path=employee-delta-table-partitioned-name --partitioned_by=name ``` -This creates a total of 15 Delta records across two transactional commits. The resulting Delta table is checked in -to the repo. The expectated rows `DeltaTestUtils.java` are updated accordingly. +The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in +`PartitionedDeltaTable.java` accordingly. diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/create_delta_table.py b/extensions-contrib/druid-deltalake-extensions/src/test/resources/create_delta_table.py index ab9ec87fb005..34a649773047 100755 --- a/extensions-contrib/druid-deltalake-extensions/src/test/resources/create_delta_table.py +++ b/extensions-contrib/druid-deltalake-extensions/src/test/resources/create_delta_table.py @@ -18,7 +18,7 @@ import os import argparse -import delta +from delta import * import pyspark from pyspark.sql.types import StructType, StructField, ShortType, StringType, TimestampType, LongType, IntegerType, DoubleType, FloatType, DateType, BooleanType from datetime import datetime, timedelta @@ -34,7 +34,7 @@ def config_spark_with_delta_lake(): "org.apache.spark.sql.delta.catalog.DeltaCatalog", ) ) - spark = delta.configure_spark_with_delta_pip(builder).getOrCreate() + spark = configure_spark_with_delta_pip(builder).getOrCreate() spark.sparkContext.setLogLevel("ERROR") return spark @@ -94,28 +94,33 @@ def main(): parser = argparse.ArgumentParser(description="Script to write a Delta Lake table.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--save_mode', choices=('append', 'overwrite'), default="overwrite", + parser.add_argument('--save_path', default=None, required=True, help="Save path for Delta table") + parser.add_argument('--save_mode', choices=('append', 'overwrite'), default="append", help="Specify write mode (append/overwrite)") - parser.add_argument('--save_path', default=os.path.join(os.getcwd(), "employee-delta-table"), - help="Save path for Delta table") - parser.add_argument('--num_records', type=int, default=10, - help="Specify number of Delta records to write") + parser.add_argument('--partitioned_by', choices=("date", "name"), default=None, + help="Column to partition the Delta table") + parser.add_argument('--num_records', type=int, default=5, help="Specify number of Delta records to write") args = parser.parse_args() save_mode = args.save_mode save_path = args.save_path num_records = args.num_records + partitioned_by = args.partitioned_by spark = config_spark_with_delta_lake() data, schema = create_dataset(num_records=num_records) df = spark.createDataFrame(data, schema=schema) - df.write.format("delta").mode(save_mode).save(save_path) + if not partitioned_by: + df.write.format("delta").mode(save_mode).save(save_path) + else: + df.write.format("delta").partitionBy("name").mode(save_mode).save(save_path) df.show() - print(f"Generated Delta records to {save_path} in {save_mode} mode with {num_records} records.") + print(f"Generated Delta table records partitioned by {partitioned_by} in {save_path} in {save_mode} mode" + f" with {num_records} records.") if __name__ == "__main__": diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000000.json.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..6f5648550c537145366d7600aaa10a1669f7a00d GIT binary patch literal 44 zcmV+{0Mq|ta$^7h00IDfNhd{Gr%An^bk{i8Pp=zx=Uz-6+Y}W$f#8z`*Tv{%qLD~q Cl@dn) literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000001.json.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000001.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..c9e3f49b6c52e4483f91f2652a2a5429bc266e82 GIT binary patch literal 40 wcmYc;N@ieSU}6ZE&Lp3=OO#6}``0UJyF^ZzCkzv!`ON>t`+U9gg?V`o0Pq10f&c&j literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000002.json.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/.00000000000000000002.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..5c6706ead42b36c9243e3f54950dfd72cc370339 GIT binary patch literal 40 wcmYc;N@ieSU}A6-zI|b4`PHxmbC?!&{MHdYGk+UPb&-84*Y3Uy7M9f<05Dt+MF0Q* literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000000.json b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..17d65f965e6f --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000000.json @@ -0,0 +1,8 @@ +{"commitInfo":{"timestamp":1713151902031,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"name\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"5","numOutputRows":"5","numOutputBytes":"10199"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.1.0","txnId":"c79386c6-581f-4624-a5a5-b04298b173d2"}} +{"metaData":{"id":"45cfe982-fdfe-4d8c-ad30-7d0eb3acf821","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"birthday\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"salary\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"bonus\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"yoe\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"is_fulltime\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_vacation_time\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["name"],"configuration":{},"createdTime":1713151899961}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"add":{"path":"name=Employee1/part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet","partitionValues":{"name":"Employee1"},"size":2034,"modificationTime":1713151901969,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4922151803,\"birthday\":\"1998-01-19\",\"age\":26,\"salary\":63418.10754490299,\"yoe\":10},\"maxValues\":{\"id\":4922151803,\"birthday\":\"1998-01-19\",\"age\":26,\"salary\":63418.10754490299,\"yoe\":10},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":1,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee2/part-00003-07285317-1943-4b24-8962-03543375d133.c000.snappy.parquet","partitionValues":{"name":"Employee2"},"size":2033,"modificationTime":1713151901968,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9345771736,\"birthday\":\"1995-07-20\",\"age\":28,\"salary\":58610.730719740226,\"yoe\":10},\"maxValues\":{\"id\":9345771736,\"birthday\":\"1995-07-20\",\"age\":28,\"salary\":58610.730719740226,\"yoe\":10},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":1,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee3/part-00005-ac0ede62-3abc-47a3-9eac-c09a3802cd78.c000.snappy.parquet","partitionValues":{"name":"Employee3"},"size":2034,"modificationTime":1713151901968,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4740025087,\"birthday\":\"2005-07-04\",\"age\":18,\"salary\":63256.1008903906,\"yoe\":1},\"maxValues\":{\"id\":4740025087,\"birthday\":\"2005-07-04\",\"age\":18,\"salary\":63256.1008903906,\"yoe\":1},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":1,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee4/part-00007-45c2fd36-d1e1-4e92-b21c-84d385a8218a.c000.snappy.parquet","partitionValues":{"name":"Employee4"},"size":2049,"modificationTime":1713151901969,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":655456941,\"birthday\":\"2000-09-07\",\"age\":23,\"salary\":95552.47057273184,\"yoe\":1},\"maxValues\":{\"id\":655456941,\"birthday\":\"2000-09-07\",\"age\":23,\"salary\":95552.47057273184,\"yoe\":1},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee5/part-00009-079ed08f-dd8d-434f-a816-c73420234b25.c000.snappy.parquet","partitionValues":{"name":"Employee5"},"size":2049,"modificationTime":1713151901969,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5565370685,\"birthday\":\"2005-08-24\",\"age\":18,\"salary\":74066.92920109774,\"yoe\":1},\"maxValues\":{\"id\":5565370685,\"birthday\":\"2005-08-24\",\"age\":18,\"salary\":74066.92920109774,\"yoe\":1},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":1}}"}} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000001.json b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..f4f7c9e2e9f8 --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000001.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1713152087613,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"name\"]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"5","numOutputRows":"5","numOutputBytes":"10643"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.1.0","txnId":"b1dfdd19-fd45-40e0-bda3-c19beb391488"}} +{"add":{"path":"name=Employee1/part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet","partitionValues":{"name":"Employee1"},"size":2049,"modificationTime":1713152085839,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":74065452,\"birthday\":\"2003-07-15\",\"age\":20,\"salary\":73109.56096784897,\"yoe\":3},\"maxValues\":{\"id\":74065452,\"birthday\":\"2003-07-15\",\"age\":20,\"salary\":73109.56096784897,\"yoe\":3},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee2/part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet","partitionValues":{"name":"Employee2"},"size":2187,"modificationTime":1713152085839,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7246574606,\"birthday\":\"1999-06-28\",\"age\":24,\"salary\":54723.608212239684,\"bonus\":1260.9291,\"yoe\":5,\"last_vacation_time\":\"2024-04-13T06:34:41.385-07:00\"},\"maxValues\":{\"id\":7246574606,\"birthday\":\"1999-06-28\",\"age\":24,\"salary\":54723.608212239684,\"bonus\":1260.9291,\"yoe\":5,\"last_vacation_time\":\"2024-04-13T06:34:41.385-07:00\"},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":0,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":0}}"}} +{"add":{"path":"name=Employee3/part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet","partitionValues":{"name":"Employee3"},"size":2187,"modificationTime":1713152085839,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":743868531,\"birthday\":\"1997-05-15\",\"age\":26,\"salary\":59595.17550553535,\"bonus\":1424.9856,\"yoe\":8,\"last_vacation_time\":\"2024-04-12T03:34:41.385-07:00\"},\"maxValues\":{\"id\":743868531,\"birthday\":\"1997-05-15\",\"age\":26,\"salary\":59595.17550553535,\"bonus\":1424.9856,\"yoe\":8,\"last_vacation_time\":\"2024-04-12T03:34:41.385-07:00\"},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":0,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":0}}"}} +{"add":{"path":"name=Employee4/part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet","partitionValues":{"name":"Employee4"},"size":2033,"modificationTime":1713152085839,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4750981713,\"birthday\":\"1996-12-17\",\"age\":27,\"salary\":85673.13564089558,\"yoe\":8},\"maxValues\":{\"id\":4750981713,\"birthday\":\"1996-12-17\",\"age\":27,\"salary\":85673.13564089558,\"yoe\":8},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":1,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee5/part-00009-f87803c3-6cfd-4a37-9283-f2bff0c0dfad.c000.snappy.parquet","partitionValues":{"name":"Employee5"},"size":2187,"modificationTime":1713152085839,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2605140287,\"birthday\":\"2001-04-03\",\"age\":23,\"salary\":56740.37076828715,\"bonus\":3912.511,\"yoe\":5,\"last_vacation_time\":\"2024-04-13T14:34:41.385-07:00\"},\"maxValues\":{\"id\":2605140287,\"birthday\":\"2001-04-03\",\"age\":23,\"salary\":56740.37076828715,\"bonus\":3912.511,\"yoe\":5,\"last_vacation_time\":\"2024-04-13T14:34:41.385-07:00\"},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":0,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":0}}"}} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000002.json b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..f3bd11c62fdf --- /dev/null +++ b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/_delta_log/00000000000000000002.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1713152124948,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"name\"]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"5","numOutputRows":"5","numOutputBytes":"10505"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.1.0","txnId":"87a05abb-fae3-47c4-af4a-b185e23004c5"}} +{"add":{"path":"name=Employee1/part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet","partitionValues":{"name":"Employee1"},"size":2187,"modificationTime":1713152123251,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1726247710,\"birthday\":\"1998-06-28\",\"age\":25,\"salary\":77928.75048595395,\"bonus\":4976.98,\"yoe\":3,\"last_vacation_time\":\"2024-04-13T22:35:19.168-07:00\"},\"maxValues\":{\"id\":1726247710,\"birthday\":\"1998-06-28\",\"age\":25,\"salary\":77928.75048595395,\"bonus\":4976.98,\"yoe\":3,\"last_vacation_time\":\"2024-04-13T22:35:19.168-07:00\"},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":0,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":0}}"}} +{"add":{"path":"name=Employee2/part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet","partitionValues":{"name":"Employee2"},"size":2186,"modificationTime":1713152123251,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6142474489,\"birthday\":\"1994-10-30\",\"age\":29,\"salary\":57807.64358288189,\"bonus\":3662.5002,\"yoe\":1,\"last_vacation_time\":\"2024-04-14T16:35:19.168-07:00\"},\"maxValues\":{\"id\":6142474489,\"birthday\":\"1994-10-30\",\"age\":29,\"salary\":57807.64358288189,\"bonus\":3662.5002,\"yoe\":1,\"last_vacation_time\":\"2024-04-14T16:35:19.168-07:00\"},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":0,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":0}}"}} +{"add":{"path":"name=Employee3/part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet","partitionValues":{"name":"Employee3"},"size":2049,"modificationTime":1713152123251,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3550221591,\"birthday\":\"2001-05-13\",\"age\":22,\"salary\":58226.41814823942,\"yoe\":6},\"maxValues\":{\"id\":3550221591,\"birthday\":\"2001-05-13\",\"age\":22,\"salary\":58226.41814823942,\"yoe\":6},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee4/part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet","partitionValues":{"name":"Employee4"},"size":2049,"modificationTime":1713152123251,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3822742702,\"birthday\":\"2005-10-23\",\"age\":18,\"salary\":63581.29293955827,\"yoe\":2},\"maxValues\":{\"id\":3822742702,\"birthday\":\"2005-10-23\",\"age\":18,\"salary\":63581.29293955827,\"yoe\":2},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":0,\"last_vacation_time\":1}}"}} +{"add":{"path":"name=Employee5/part-00009-b8de3a44-b0e9-4d68-89ee-195b76453643.c000.snappy.parquet","partitionValues":{"name":"Employee5"},"size":2034,"modificationTime":1713152123251,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5611620190,\"birthday\":\"2001-09-22\",\"age\":22,\"salary\":76076.68269796186,\"yoe\":3},\"maxValues\":{\"id\":5611620190,\"birthday\":\"2001-09-22\",\"age\":22,\"salary\":76076.68269796186,\"yoe\":3},\"nullCount\":{\"id\":0,\"birthday\":0,\"age\":0,\"salary\":0,\"bonus\":1,\"yoe\":0,\"is_fulltime\":1,\"last_vacation_time\":1}}"}} diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..4f11ee873021e0c1a126aa3028a63194aac4b612 GIT binary patch literal 28 kcmYc;N@ieSU}E4#F8xzVm)N0E~(Zwg3PC literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..8ac6acc78a223930005f0a9b94aa5c309efff54b GIT binary patch literal 24 gcmYc;N@ieSU}Bi4Gsl|cpU$gg3ukY9b^fFP0ASz>3;+NC literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/.part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..3d7ad988bc8758c83127167f8ff2e3850a9a192b GIT binary patch literal 28 kcmYc;N@ieSU}8A-^mxz?3634ouS_>P@o9M_zLC5K0Ew;&ga7~l literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-1b911f24-6d69-4065-9c4e-d5fa896dcefe.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c80c8128ff0568c837add76699f3f2fe8b6fbe5a GIT binary patch literal 2049 zcmb7_O=#3W6o6;5yJ@IJs&On+A#6uNC%Iqd>quEWHWR+5S z@hHWMC$ad07ZDF4MG!A~^U$+cPf|f|wPKO#$v63%prR{;?3*|5eeb<_Z?b2Oo;5JS zC0xcYHt&5DOIU!Vh$Vz}4hjgNe6FHhDu94M--l(q_V(3}tXcQI$n5Q*)a351o7cKB zq{O5zqOf2uZ*Q*TGA2o|bI$Yc=hwcBL$Ds)i@$wc{m`8;6Z^toNiL7y-MycsB!Ti# zpq+R({``9F{f-QwOODcIh`&Wpb_t3sCADWnEGPt_aJ#^2R^;#}t7dhAIxLr4OG#}2 zOT@%0r^w)e*iwkutOkIDrEo|Nbh*$Dc1uLUry&_8P2l6Pw46)D^<29DM8IRQ!G@3; ztmN@{tli2-29U$@i5`EJdY32t#*EdZxEJ9b4imm~_6s=aj>ankn;i z+UhT#dc3E17gKeEO7y8iy?KNsSQP7StL?|O^R5LA0PPOjpPXttPGC1dBZ6kPL;Ya# z3T@E9c3YF-1`(8!vZP5RTu_EYBq=NxED(X5B&Rj0#v8>Yg7&BF<61Z>Xgp+r5ZM-S zA9-59V@R8T35m&RaUuU0Ij&9Aq83MQ;D!y4R$1Os7w|R{J1gMeCZS-a}@>LeL&R`EkR^v(@V#GOQ zXCj2w$T%3(VDRAqT=<&>Z8Ojt5vxes0+1Mk{xC{uWn1}QD!U1r`7F-ZocVHLXtcsct)zO8; z#Y*a#+)C~bVXe;nVNSWLKjV5V!46|bH=VniZkr|Cv48k+;Ov>`-u4gZO;A+cX1KPC*$Ec*!0Trw3Q9rsOm;}f4yxx zX4$L{8Y5L>;D|X?KU^KO8be2_1Lnx#frd#fW7x0;2S!Y@P7f$cC4~O?pif1F&cL7l EPYd|Q3;+NC literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-4f11e631-348f-4378-936e-34132f176203.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3bf43d99b023003e48b7ff2c9b9569a50a116651 GIT binary patch literal 2034 zcmb7_O=#Rk6o6-EwX)b6V>g~bDoa^Jy*S_qi?tKS7?Y-8iZRCI;7ciHw32NR?aG$4 z3C8#ue8?#&1j^o0N)I`h;(*&jk3EDOb4zn^dtTZtv_1BX{zgemvH>H%c|YHKZ|2SH z#^<+8f=G?j$)jJs`Dvs^Ff3)FAT&0I5klqC;U#OT1QO^ciAuiv@9Beb!L9mP;#Qf? zUGDw-=uhbamroK3Pxj-}vxgTYYhb+p#lQUXmZwyp-EdFj-C=`Z#wuE$7-nDnbRlFGs^Y z7Tn$nTf0u1k#R(%DXe1GhWbEIEH{XDZFVpzUu z93Y~=qGI`-ZkYNW_zoN(Xg_kpt?jPoMQ$53GH5>bSQu^HVJ#N9erGE-P^{Io`wgW= zByC<+U{|0Pf+?n}biJW0iDMEHMbr7=2`y|D%oS9D5FN;5lAcIp88tRxf>LtcT+F{o zR~nm3$pI%{;Ks+DtctvQBf#rR#6XQ+NdWoA~9hCbUKV3Fkkn@a9i&jVLKUIi(oCNSDF1bQQ)??Afn8>{uqB zqDCLWvP~<)f#yB-G4#l%Hu>@%#i|JVjl;f|d5@>?o}jdb;!Na~oR;6HWWa-8#FY#n zSWbg47@wRmc$0Cg5Q^xrL~%o*KS+~slV()a4|WW;$6)r2_!<~@oFIf*R4Ywm-Y}7R z`7Mud-!tIG?022W2>iQ7+liRXBBnl;4)y3@&(Rn4FbcY@NT1QSUB|OSeetV9y~Env z;PkR2vY#JZix*>Qs5a~GZ+L9u+_b6971%G*zsfboYbj97c` zeYmfWkh+=<&y{viPp7SJ8fMcl>(f@(_3XNB&6x{LbN0GDZ(VE7Ij#BY%~^Zl+HA{a dj(OE|=4Kac+hUis`!$3v{6jA*2yMWx{r~m!wK4zz literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee1/part-00001-615707f3-eb13-47ef-ac1a-b8decc09e05a.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1b97203b8927ccdddfa10fb9043743d518abeed1 GIT binary patch literal 2187 zcmb7`U1%It6oBvi?CcOjNMr701_oIMv!t*ccG<+HVUZYUkrGPrAumP3-JMA?IJ=Y1 z%vM4S_}~wefc57|S4G4ZEgHcmDM8Q|MSLpN{ya$VN$P_YLJPHe?)=@623=vfbMCq4 zeCM2d&%Nx~r=DqGgbiH5uRr+Vj%r{LrV3UO8k-RjLXz-U_{%B?2=tR!!=Da*`(~tI zHu894_R(~1a^vMMZz_kF=m~`d`}W@5H-&;p7VP9}e+oOw9}8))>z}{(U8!J}1^e;U zcTT1zmv6lPL6MZ1NJ+3UJb}Ny`17SArK}|>3s{DN41DX2@86YLP`QlIXs!PH{<)9aAGhMa7TY9s(8=|@CeQ?o59h*;DKLh-W9wno?DPf^Xb1Jv=Z zaBDT&0)S;=(TkN1UhxiaFPW7K>bPpI76e@rgw)+2Dn0{AA>AO$_*5zlgk0Pg^7UOY z*B)^3ii8(Zw>=pzG7;E=r$@sL8a%fWcGjIP#p4LeQ=%riHmri6w%s6Fx9R58=#m$^ zfKsAs&QxXT@}uKNm}&%6N=Q8#Y$B|}q}29%{V)MYo4x}_0oq60aAmdcd6C-%jRKkp zkA~681=^vJ>-SdT1tRK(zEg!}mi1W`DLTsqi$o-+$(gF!;s?ehf~NCh!WOCySOcd(wlO(t zAOMUtNHQ`QpA}&b%9xPbF>D`TBPlH1(!BP4;N@M#C02aaYg}98zoi2IZG{!TD;ii$ z`N=8e>F$bWa;u*5%)2}{iLwk4*-|f4hNmX4j7E}b22g*pA}(WG1x0*2D>7` zd8N4(dnJap$O0I&;6lI;aPhYYy2e0Xl(C7bd$NHQ04cJ5Sc}EH7syKj7*_8~hBjJ! zHU2q+NdK~m^cuV>GISWU8T%k|8`41Js!YVHO76;&aA9kj<_BvQ-Jo#AtoXKB7n~r3 zme)#6Yu0KY?dU@m;d#%3i+R59L{{KGZ*`rB+BBlthw_#gZEiT`yctG8zZ01?bJcY` zJ2dCdZJ9mVWlOFN>eS5hduuT3!QG>64_3j=shJtkoZs5suH~NDh3Ebd)*0R(t^jup z*ZqJcIArYP5#jE(-)9LA?H|88+Hmlh4a|MrUgWGf!SJyQuN$Ydecy9vZ+ObX zFCvGr_c4rUcjLcs&u$~_blN<}x}9!AdDh*@?RVR6+;k literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..6dcebe632d77e6c3205f98a56c9e8417fd4e8f3b GIT binary patch literal 28 jcmYc;N@ieSU}9jE-|JAkvPxRWx5&(iPs=OujpRK5V@3!Z literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/.part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..a2b1da3d0cffa712eeaf0af5c44cff72d3a2c734 GIT binary patch literal 28 kcmYc;N@ieSU}AV@*rs2+Q>7W@();+kUgw;k{ojKw3k)b!+P=J7Yl)1JaJFUju1Mp-pB8~SM{pq z+PUi{M!1S=`1S9*?~YZm1WOq!2n|k32%%!()!4B!OalD~R&n>>+54k8x5}x+Ej*RE z{Q2y8{Sc380%5sc{f3tZckGT;4TrtNVND%iO+ z5U$hU_Hx)CAgsWmZ29d@$bC0_2M!RlN8NCFrQ>;#+X9UYn&Tb~qvgA_Nh8;9FUJNVX;tk( zU8&-dHYF>tD^LrTh(s=vt94~w9Fvd;8p{q(Xkn{huAmBpNKeKiWUqu5P<;s|h?BGC zV*Yt@vA#5)9B}dlZhYLyD$l!r1bE#NHc)-1Ab@ORV%8$L00w}uu}HoQCRTaauSYN; zzm*y60ALkf9pBQd_XFT%vm_+ed^%x5o9FK_{@-QR{2tswh4YhBit%$~0Zj8}VI0Mt zJ!N6rGCqmwU3PhSSP>31+pn$!J+i4yzN|;F%ER6;*h881xL<@E9G0JntdjlWSC#Zw z@Qb*T9t6ug_>l3*7`?9<+X^O#Y?p{sSIDE%2;8J`RrP~agRWDUeIvdG#$6`}VHVYL z!SwXIxK-`rPK$)_4|~-b4`(X`TM#VKzl*xaJ3} zz&>*)w_AkUeuov@cYORHi0~E9r$3P$CQihyxM)0v!@v2BZ<)*irva_Ap=TSn*_{Mdg66Megw literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-090fd396-1c53-4794-97b3-faa0f302984a.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..555a4c1b5ccdbfc39874dfca58b4632cde1010e5 GIT binary patch literal 2187 zcmb7`QD_uL7=UN@_U_i0rZHxQUD$_XX$~#dCCeom!%<==A|fJv$V(|pFeV~|a>Bsxzpe=&fqob(_}lGI zuH-XjeQ(8PLvN=hFMheZHn59{5>r^PuU~w8c~8cq8|=Faou%N5?WrW#MY`$58$|Z#QD@yZI z+^!(>^6Chyc@V1p?f&(+ASvD&S&IQEr|uI~Mec4&1wl%5}eX?q{9iWbO zgBeDWqy-1|LhLRUs9(gmisf zOtm{)e3ZjeiQ9&Rryr4dmP9Sc@Lkn4^gE?BfP(tpy8 zTtJzl^Y%#T>=JFz(D7Qc(E<@wO}$o@H7u#)1thC17c3HyoFJ#la*ZDtmk2tL9wXPnRzV9u z76_5eA|58^^Y~p|EweJ8ftsM?Jh3zmH>*si6Fh0w|zGARTMFxA6 zgY!yq%l1kHt&u4(sKJGRAK;>I7PQGgpBJ%?${Uh~3jmU!d3I^BhR*_nPS4e^mgEnIyM4m$$h&+*qSeD73(lA`uilTV_yg?T!TrnfMZN`%A z2hj3LzG{pcCQ=T*ViBBi4Y-(R+IDF8-dUq*ht#4WRX&rJ^{}&O>yvsA`t3%jSM)i@ zcCA34{9svc(I#7REKnzAuGgA}SqttSt-G)aZgxfQ67|XDm6b~B*}d@GAHv$b`@Qp7 q%x;Vysg7C`hesP0war7OJvKUFS#^3)y`~|w>kmyA5IPNiL;eA|vGW!H literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee2/part-00003-62ce8217-f361-4b70-91ec-9f398300c083.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4900fb2729ded00018a5a7a08a42e70747b9f6b5 GIT binary patch literal 2186 zcmb7`UuYaf7{F)uHhW8Y#I$CHU0CH<&5^>nWVys9=^-)HBBhq%Lta$E-0dbg+`CP8 z_bMUgtsvA2rB4;|Ac!bhvYoi6V z(PtAi|CwCn-H(5~rR}5gR6=3SuH3o#;E94t=Iq<*(aVGR2aBn*hnIe-6tps5(mnU3 zRO5$tK7VvzKb6GS7(Rr*KmYT^k%CHDPa>AE0{NKuy-z>*PH95o3PP3o)a!R|9DN0$ zzs^sxl*b|Iho4>iVvGTlq%xYB08q&|C>y$#Rj4q;vOy5y%L=QEq4Q5s%&GxmUKF9N zWfcHeHY{4P)WM2ZKv*ekgP53QEyR*!=#rEQOrqga5EL>@vW$k z6$WyyaJ_^VQ|YdPm)H>4J14HQB z$swOc@yc1+rg10et?&gR8>Vrg1|6svvl>zj77Lb%OiqwfHLWQ&OpFMc%C}KSp;XWU z5CuZyb{S8QM+$xc)s~?bQgWUg=P!}twdH2A)yW&UeB+Z@k#~>rc1N&{YCDnuvclx7 z9SLBpL6SFviCGc$XBiW6pTqV5HkwA4mNd_O4|sW3i4jXa>op-Q@-Jz?zpS$4ugWIY zQhu^a9KT48!4ibaIx~5*aK)^M`-I_sQd#Z;z-7f`8=dDq;4q#$>uL_mXGPdI40f)} zVAo5qUukT`Ug6LtSp0k z^-A&9_~#5f{mUvc>Ts(l&|%PK?1RXC2m_H@3Xy9X`AwOC16$YiAY8NQ28APL^V4RZ z^}+~RULR@Lv$liu!;fi1XM7tD=9#`1+hOpQ-SuMX(wOQWDqB{(x#3w0RuqT*c5KzH z)sE-8k+tyJmer$Ow$yP!oT~XjZw+QWID53^!z!40bt@xU3tQXU_1rQ$@WLM4dc)hp z5fG-)dJwVzhxDCXBEsAa`Yhm~?fJbU%vb$@{txfaz{0lEi@h~39Nu@~b`z9V5cnSL z4NrOeMiel1KSnX_Zu}SSjcuf#NSo)mZdgsbt?JlL!*;4ut$xRMt8Qz?nQJ)H^X_cx rNMpup&(1fd-MJ&vZI^n^^Nu$&J?FYDdf2#NBDC*=mQ;jJ!(YhXHgWB; literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..0fe0b9b3c61be5d549bb110780e5a64d4d56be6d GIT binary patch literal 28 kcmYc;N@ieSU}BIKc=Kn2{SD#z#-HU*d|F literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-ac0ede62-3abc-47a3-9eac-c09a3802cd78.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-ac0ede62-3abc-47a3-9eac-c09a3802cd78.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..b4299aa5c9535aecae61adfca3b19f4b96369491 GIT binary patch literal 24 gcmYc;N@ieSU}BhFtNSQ2JamiWqL4eU&Yu(j0ATkCyZ`_I literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/.part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..abea94855b25ba10dd39377aac11fa4da0f0a5dc GIT binary patch literal 28 kcmYc;N@ieSU}8{gtT|c|Tx@m!%fH%VUDX`TciwLY0FUzwxBvhE literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-32e5492c-7ebf-407e-8ecf-03add4ee14b8.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7cd5d550b354ddd6b1242020d33f78b232085bb1 GIT binary patch literal 2187 zcmb7`UuYaf7{F)u_U@K=hP2HLyRzW1m}?8yCCepk8V-p;ij-1{4|x#@bGMUZaql+W z-HU`YASgv*ANrsaB);f__y-$`FKP{fFXEfnmewc9qeRd?qxdpj~l;V&Qr=-lCp#qD9FHfu53LRZbIb>LY4ac?^fRa z*hJ``jWG@wfvWF*arx>n&sIscMl})uD(DAfT~qs8loPS66NKVrg>Q|n#h<*IZwIL3 zT@lu5e+vNC7xP}cbnuFIKzJ#rgy`urelVvg^Pq2x`j>q9uo}j#bWh zu?r{*bdil!&wP1klO?3;JQpkznVcqPYid&*n2-oMk{zSa!d5{G zKo$s*TV*^--YVi3QEd)-Ath(U#r#=vsy5e5jyf5E8y|c!%k%!=yzL^kP;FBZK)x{< zYf}OkZ;)i3ux!+1X@BZrAu9`*x=T`Y6h z^#Yt%3d{FO3~iDbFlfSsfFI!EZyt1ugT5|f6Vgh|he4b147nW=5;a%$zpEDCo5#vu-Z9 z%yU9>`n5H)OFMkYbwHh(d46{hW?i^@wB^AnxUss~Cz{i1>+AK*vwz`*KZIq2`@BbuiK5-A`1qOoqOE`rRDn`qus$N zkH3fl#^1*O&N6ZTZY9zW?!woWuA pSbOqhW89fKG2V74vya8&WV^On+A#6vG4Ws=>ti<@lS-6*Bj zV_T%?U2+jYJ&5#B{{#g^Z{GCOf)^o&2p-ip`!%Z;jZn64-pB8~H}ht4U+byI>;!uz*uI1JGi|_kW=D3!q+yXfiY&#gr$j6$1fjs3V=XIk@Gq@qZGt-JmU~NaYXFPII4Yya5P{fIh}f(JfCQaH zkYBxuM8cP$6r>c%EIuDe?Mx!>WD@(9fM+6yj*vKPXA^BV5}|_jg??{|x;N&$`l8vO zcmUztupkST0ri0()hyRvH0bhh|0SCpC{t$Xf;n8e^!)I^A*L#TO7y5r-DQL&SmbL? zv*ks;%Z>>L2-^LYH#gt1ZQp8uMg+~EO+A0^7Ohj?a+-62fe1=b*(^&%oKq%533df) z!2%J;MRK_;P4i>o5fe`76c!+G}@C+)?!URR+q`829j+`&gPKO5^ zet{btceqOP*7|_g$zctZ+ZhgI8M^U*0VezJA z;XsofI|B5`rZ)Vt9{DN_`@~>RMAqXryaXt!Aw3gGCEMBGD(SM|4?!he2o|H@J;oM*%xzLeEbg&@20K6 zjk(z}ea&@lYYo$<2KA}@IJctu%S)zORXyKr)qQnTowrQe@YL$H6}3ql%w!p$j@)dg zxd5vsTnbvV!3tsKs2UU1>dNZsXc8IUL>>=m&EDf-HhHMG=(w!F9&?Acn}-`tixu2+ zeDEOf@Oj&z|B)Rgj>lR}-&`==-gBp(9Zsn^j&0Ir@035EDh^{0m*>;Q(!X%mSCM=% z8lDpkw-ilVN!Rp>rk6%)Ez34aMr~Z5tmtD?#zgIOW!$V!OjX8=$jTOu51<&+Vc;6%p-IKe(iq(@r%Dh literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee3/part-00005-c33dc31c-d3a8-4a50-90d3-96f00b1b2e22.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2e2369cc25f0605a5e2d3e8e7651c0ef37a4f94e GIT binary patch literal 2049 zcmb7_O=#3W6vtquEVOvT7;4 z2wqC@s341of~a^95pUwfgGlkB2!cYX2R%p?EaK5O`I?|bR|wgeH}C!4`_H_|o;Yw) z#|Rg25nsLca!07JDRC`g zn5Y+T#a~~%TiKK%bje}44Dq)N$`(P9rC57vhy?|71j{+5S&@UEw3+Dyb0C*{OR+Y9 z#ltu%qsS0}*i?wvOanjyDIAgkTPE;>)dG?5@gP}&OyEP&sGUiMtC?i|p@7FChczK_ zc$~%Kk##*AIzSEW!#&*`>^ue~iEzHynCefodb>|S4V3Myn&6XGW&N(I=09ZF!-sDuvwtcGs77;94ZR+`x z=V_h#meZUJ7Koq}l%4{^Y0yswdMjcTmDT|yMxYH^DN(kb{Y}{w2@^yXb3`afklhq=R>&ch9-#R~2?KKMBB@G0A&|B)TKkH=a~ z-<&qx&U2?eL7Y-^9NVPL&MAL>wm6J^#5|uiX8(mdy@2E+QSRembyagT-+8|s0G*EwVgLXD literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..c317822bb8f45d91ecfdc8ec3e1d618f9d6c5d65 GIT binary patch literal 28 kcmYc;N@ieSU}BiWa&|@Lez5|k7PEcFx~e&v@4Vj*0D;~MMgRZ+ literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/.part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..cc02fde8a578c4e8757f1b71fa0d3abf4c504900 GIT binary patch literal 24 fcmYc;N@ieSU}BJns|nv>*|OV9=3<7W?8IFFQeX%4 literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-45c2fd36-d1e1-4e92-b21c-84d385a8218a.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-45c2fd36-d1e1-4e92-b21c-84d385a8218a.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..374e9b2e74e4510f42de5887e045eb6652e70d0b GIT binary patch literal 2049 zcmb7_U1-!;6o79snM|lfTJ60iFvt+Lvx+q|OslmFr3x#Hh=?qGS)|;Vq-`)WsYynu zrO$#?AAItmh$4tCyNfL1a?2clqUE{`w& z`Ef^%lI9}Bfp+3u`0dl@|L09gYMd@Zd@X|3FKVii)}9lws1bysZHd*asnJhf&FTbo zR4%tx(%Jx)im8{Nso;UwQcT#a27pARaEJgpA+m$rGEwj;NQOyM_;@0%2${GhWcv?9 zJdqfzioJ4OCqbIFKDo z###{o#s(vPGZ=a+%=fB0{I(&;Jt>+`>Zu literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-b6e49fa4-cb41-4bd1-8dd2-1ed5e561f801.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0bea438691813e3874e5086b1a0a02e32d012011 GIT binary patch literal 2049 zcmb7_O=#3W6o6;5yJ>8Tw6!xPu*eeZQlW;1ZEY<}sUAc`L_G8&vdnJMHkjSiB&(Ft zlP62@tWZSo;141qBA&gdSP(%4JxK+G-uK>{c{4eEfNr*DG1h$d+_(Iwa=X?GqFDi7Uy#P>E(B6N}LNb z0>W&59g+(rQ*GsDpC3wG`I| zuvm<}GKvfyhz*5^&C&!&Pzr}+K$i*ZV75RcdKojQ}=w;tIwGYihB^w4G6Me84wu+sb;zUoIw``x=-3{shKiM zXU&1)$@_bHdYP&dRH8?1>MkHG!609Ank_G~U2sg;0MK?>UUjBr+rHHRjR=}Pn|glr zBCS*3a+=j(f(S}MSt&tx=9Ezhi3-aF3q&9%$f=T4;Ty#zf(DcA<62lM*m%eSA@W1S z{p3*&PomN^bVx)_iVOHh$+6OOCEVih4cuVE!%>>I(gD10Ijo`5Dmx(&kS$EYTFn5A zHAeVmFg{AdZgyZoZsZwk9blaiELzf}_I2PT-NYqUeB5JPo8~`Z{IxtQ{-IF7Qp68; zDZo#VNtmh}f`;U4w#M9+MZ6D{Hoz^Z5jHfbaYKL_S>J|lRwEy!Ve1TbM`SfVhWi3V zIi!0cfL6#97*wF~;Q?ImO@n?g(0dW9sI(0rF#_$-N=aqg+21O=Dq(`ia*hZkiQLQe z!>JmQWyhV>=sbmUpan-lyJ)%|^rf6HYonTu1(cYNo$xsHKcryG(oo_VpGxizVa@jap-;K1H|Mx4!8T)uC!M<+PKzbnwtsLraQ7M8q5lybx{v!> zP2Zd~-S%Urt{_gSIgV}8X8V*suPqK^mzd|%#{9o<=a!IsB5Iz44YwF|TT$2avZfaY zYc0z*ibicjA1~{}hmFzNSb4;(j~*@$8{=ccb%UDvA>AAq9yg2{J)o=<5ZduUU-Jl^ HhM)g04Vlh= literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee4/part-00007-d88803d4-2bb0-4c31-8340-58cb6d797963.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fdc2935f9844274aecae276d5c1c6db81ac25528 GIT binary patch literal 2033 zcmb7_Pe>F|7{K4o&W?jwSot0^w8S#xqGW@^YT6=F9YRFHI(UhYw>zT_&F<*TtRdn} zB&bsnM0hKrL+rs$U86%mmx3tjl-*(v3q^G7d-K1P+CawdeSd!6_r3SMx3lLiYZ&1? zF5q`tZ+{-lV-A)A77;o$!XbpRnM-B$SOz4}_hAWtzH{S!F6GvJB5=E2j9s37xVhQ0 zkIS)u;wNi9H&Y%_uE($Lp1l3_`&r1V_EY5rPZuUQ2Wg$-eS}mz#=h<%E%H#AhzT}HfsSO zekUR1cdtAV@kJ;F$$2t|FND%|CKh)ycqY^~IXoGPEiQK0$;R4tC_)9>Q$6l7b*|03 z)g_}waWBGrKPPdf4)uW`R!qlR(&<`%&y2_mL)#CsA<@CMYDw&H4QEkg|6 zx76oNkr}W-^U*9N5^H|1HqCD`{>vO|zRBgW81jQt^6_Od38snDFpkNdye&=m-U#e(y)&V_E2Cw?qnfHSVMXy;!1Y1e^kKBp)K$ZAVHTxaNgY)+Bn|HI za91o9Zp=!<@KndXuGS2X>eQps1Ablc)|L&Wthk=jsCvqfvS1pP?keRg>q?#0n90;Z z9lBX|eGyi5xD>Qvffd4xAtfRz<@Jq?p*S+SiAg-9H9C)n*-S#+CEH;Ic9=W3-ATA^ zH(0?P$NLXL624&B^gpu0#3iw2-7^*qr}NyYXD6XlY}+zuy>rUmrz(N5hs*V7ZTVlg zs~bp~4u|Jd%_)S_R?t+fq-uqMO2f4Df?gTX#!K4p8GW=eRvIy?qi0IP`uNyzRi}n_ bS~Et5$926z2jyFNg!cVIpK=Jz!jJtgq)fv> literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-079ed08f-dd8d-434f-a816-c73420234b25.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-079ed08f-dd8d-434f-a816-c73420234b25.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..18452a068188888abd902e42367e3d0b819135cc GIT binary patch literal 28 kcmYc;N@ieSU}6wl>2)U5Q%U!c)6}QOx~e&v@4Vj*0Dlb&ZU6uP literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-b8de3a44-b0e9-4d68-89ee-195b76453643.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-b8de3a44-b0e9-4d68-89ee-195b76453643.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..620acf23a6c1a3dc81ba4e8dc2895b999fce7643 GIT binary patch literal 24 gcmYc;N@ieSU}AV|Q>eYkwk!9As{XH6=T8a%0APCwqyPW_ literal 0 HcmV?d00001 diff --git a/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-f87803c3-6cfd-4a37-9283-f2bff0c0dfad.c000.snappy.parquet.crc b/extensions-contrib/druid-deltalake-extensions/src/test/resources/employee-delta-table-partitioned-name/name=Employee5/.part-00009-f87803c3-6cfd-4a37-9283-f2bff0c0dfad.c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..a4943f6399d275ede52466cc0078e833687674a4 GIT binary patch literal 28 jcmYc;N@ieSU}89yxjIJ3&g8VY$81?AJ}s}rHT4mpB7EY#3=t=4ju>Vt@ih!1@cDSMZ+4dyO2$tk7u zS&-siP(MJy4@5*nd>2KbJ_ss^AW}i!s%ViuiL?3IprR*)-0bYkZ)SF8bI0}_H!#9Q zT*A-4+`N`6ViA@SmJ!-CBqD@z!rm7T4+|iHz85R_(e;a$q>NeTw#e-5&eY`k$7e6I zq{O5rqOf33uD-gwF+)j$)z^i0I{rEi!Mboa{{C(GLubZJ>J5V>xja61=f{T3b`mH~ zcN6ZxpP#;YpEc=FqI4PJZxPhZqN>PA?HLh^DnTgR&a;|TCH%>%S)HH`%jMQ`QX9Y$ zG4T>q1w0U2iZPqj0Fba04iP{ngm$o7Br-k<$uMaGABd$TAr)7IbpNi1$6|w3F*UfK z!{f1bEf*O;4oe5S{8{Rqn)K^4R)gYhg!6r(BHAWI20^abUNB?QxxTI=4(lmX=IFH5 zS2}WgOZR4`>I9YKQ-^wU2+Oc2)ZAvvk8S5%3mO304Yofy)pDG`Zh%GtO^-wUVDb#D z)4+C{li>yt)uOssmWw#A4ofne0px;3B9cSoXj!iEMsbOt?P>eC7LE!U4_P2Y)+O9a z?&a|qDo?-=#pJZOkiVZCC{I+Q7DsR3h7FHaS>9p?@V?}+j><~{2eN}nSxW-I*kD9& z2IH$N>`DhF&snJqK7^Ag^Q%B$d(99j+4#8t=pR0>wFG zXCj1F$v7BPVesJrT=<&>tuxST32Ugl1|TU0{b7{S%GPqfRCYne1d;Q3B9>)xE8h#Z zYCut3Z(66b6z+i@UJ3n-<@qp|N}-|;>jqN#f3xsUJ38E+(=98|J@>5MumWn*fGSt> z^I9-BYiXmJA9$^LpbcnKw&j?Dx^!7wAzMJ`~gKoiiq?gS|-_Ty12>K-Hj4U zueAqHdXtE#ARY=9>7O7TMDgOuAD0TAO27!7eY3x{MI)5$n>X)$-uv#n*<3t*NyP}~ za2~He{rvqv4s)>Nv4GIQ2@WBYPF=fmdp-pg_=mBG-{0T;HjoJGKjw#>9g9MKzIyT{ zNeV-VPhrL0y?D4TBq(9A?U$E#-`wk-f?|Vs2><-?_G^D4j34%jg}rQkeDpY#*bWP& z{X+jB{B2u0#7UwMN>7-WlR$>IonewC(fgNlGnwG_)W*?5C=Fnt8D^y<5i$^4asiu3 z0Enj~fczenBLbfHO6AA`o(rsYDl&Ic(f$^PXM=za7X@smBX2h_p@a6hL1%^9*B71Y zvRsjXHRE@EkU&<1XH$Rq7g6W6|3noRpL97GPR3(xiZR ziS>dx!jbc2z97uR#}r!xjYfwTdy$lbxq>bbB3&L2ldTM%MTG^Jpnx2S^Y~}TTw!6x zKVbg}T<^I3Rg$;f2fR)OE2z+BBN+hMi-}n66u{Uy_)i9lS4r4|K1@hE%V0YI>knYT zD~+`8056)Q*kaf*Wyv4BPP&lH2g&=4 zPsHf{$=DDuL1ZICxPm~QXNKXv6-CjqYYJVVF#C#k4V0U@?Z7OG*^)A;s7O4%C&Ia5 zDsW?NG<8?8ty@Z6cd15QDn8Au%5G~#m&>x_+Rdsf7v&{GH#J8tUs;tKw9d8+4ctMP zX*FuFYQUwS6%%$LO)ttJQ7*5ptrerp@FvFjP*(3fA7(R7b(SrgHP~Zq|8~den$=_t z_nhy25aRSD)1v=T9VRZ$H5#s7)9v1CCq6qdrD9p8P8+>b_I|2j82fNJF0HTp3wL!5 ziRXji8LQj*VA}GkqLvgjKU!%Trk2+#6Y5k+9iP@FDjz-ZF04iz6MNL(*BPvX>s1bzXZHXO?rpBM5m>ma*<5S_< zN_GSQ%ceyumO5C)C%~;FmM)0nthrbaG*u8%bDb#oG6aP*oh;({bk-O0>4A{%?~1wf zkWU|!@N8{do*O;RL_;8wL+a6B9bp9)<(A*=g$bav?mKW5AU)=WOUpgai`))KWRRTj zXc#SBrEMCyes?L}Afl#gw`z)xC2dMZipFBWA`!_&a=E57`GxTrK_~NTASWc{7x4i}xn4jGKqb22g*Uln0rFxd49gFPt0 zeWkEst;EnKnFWO=Gz9zr7k`VOhYWPTf(=yLlXNTtNP^-yqQqj}+vHsV6l+gPx;k8Z zHU7B*Pye!uv^u;h5>yzJ8T%me7{Wm08;OWDh5R9nLBrNn)elxox<;YJ%(&akt4%*N(sJ-5snp4)?4XK;IH0d5+u z`T+}YK;OwD!p&{J#{wSMK7Mz&`LgHJ|KS}9nA^JD$XRiM!F?BAH%@8!zUR>H;FM=y zL=I!`V;IrS+JE8R+(hcdw0KT;f@)fARm-#*rd2)J>bag>wOf