From 14a0a8f07dda88776002b59f5630198c6fd50b84 Mon Sep 17 00:00:00 2001 From: Socrates Date: Mon, 10 Feb 2025 16:10:10 +0800 Subject: [PATCH] [test](hudi) add more hudi jni test cases (#47431) add more hudi jni test cases. These tests depends on #47192 and part of #47299(the jni-util part) --- .../hudi/test_hudi_incremental.out | 174 ++++++++++++++++++ .../hudi/test_hudi_schema_evolution.out | 32 ++++ .../hudi/test_hudi_snapshot.out | Bin 696105 -> 1044838 bytes .../hudi/test_hudi_timetravel.out | 120 ++++++++++++ .../hudi/test_hudi_incremental.groovy | 17 +- .../hudi/test_hudi_schema_evolution.groovy | 11 +- .../hudi/test_hudi_snapshot.groovy | 15 +- .../hudi/test_hudi_timetravel.groovy | 15 +- 8 files changed, 356 insertions(+), 28 deletions(-) diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_incremental.out b/regression-test/data/external_table_p2/hudi/test_hudi_incremental.out index 50644f34961942..852aebe48961b5 100644 --- a/regression-test/data/external_table_p2/hudi/test_hudi_incremental.out +++ b/regression-test/data/external_table_p2/hudi/test_hudi_incremental.out @@ -521,3 +521,177 @@ -- !incremental_9_10 -- 1000 +-- !incremental_1_end -- +9000 + +-- !incremental_earliest_1 -- +1000 + +-- !incremental_2_end -- +8000 + +-- !incremental_earliest_2 -- +2000 + +-- !incremental_1_2 -- +1000 + +-- !incremental_3_end -- +7000 + +-- !incremental_earliest_3 -- +3000 + +-- !incremental_2_3 -- +1000 + +-- !incremental_4_end -- +6000 + +-- !incremental_earliest_4 -- +4000 + +-- !incremental_3_4 -- +1000 + +-- !incremental_5_end -- +5000 + +-- !incremental_earliest_5 -- +5000 + +-- !incremental_4_5 -- +1000 + +-- !incremental_6_end -- +4000 + +-- !incremental_earliest_6 -- +6000 + +-- !incremental_5_6 -- +1000 + +-- !incremental_7_end -- +3000 + +-- !incremental_earliest_7 -- +7000 + +-- !incremental_6_7 -- +1000 + +-- !incremental_8_end -- +2000 + +-- !incremental_earliest_8 -- +8000 + +-- !incremental_7_8 -- +1000 + +-- !incremental_9_end -- +1000 + +-- !incremental_earliest_9 -- +9000 + +-- !incremental_8_9 -- +1000 + +-- !incremental_10_end -- +0 + +-- !incremental_earliest_10 -- +10000 + +-- !incremental_9_10 -- +1000 + +-- !incremental_1_end -- +9000 + +-- !incremental_earliest_1 -- +1000 + +-- !incremental_2_end -- +8000 + +-- !incremental_earliest_2 -- +2000 + +-- !incremental_1_2 -- +1000 + +-- !incremental_3_end -- +7000 + +-- !incremental_earliest_3 -- +3000 + +-- !incremental_2_3 -- +1000 + +-- !incremental_4_end -- +6000 + +-- !incremental_earliest_4 -- +4000 + +-- !incremental_3_4 -- +1000 + +-- !incremental_5_end -- +5000 + +-- !incremental_earliest_5 -- +5000 + +-- !incremental_4_5 -- +1000 + +-- !incremental_6_end -- +4000 + +-- !incremental_earliest_6 -- +6000 + +-- !incremental_5_6 -- +1000 + +-- !incremental_7_end -- +3000 + +-- !incremental_earliest_7 -- +7000 + +-- !incremental_6_7 -- +1000 + +-- !incremental_8_end -- +2000 + +-- !incremental_earliest_8 -- +8000 + +-- !incremental_7_8 -- +1000 + +-- !incremental_9_end -- +1000 + +-- !incremental_earliest_9 -- +9000 + +-- !incremental_8_9 -- +1000 + +-- !incremental_10_end -- +0 + +-- !incremental_earliest_10 -- +10000 + +-- !incremental_9_10 -- +1000 + diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_schema_evolution.out b/regression-test/data/external_table_p2/hudi/test_hudi_schema_evolution.out index da7273d4c14ef9..79f36e7124cd14 100644 --- a/regression-test/data/external_table_p2/hudi/test_hudi_schema_evolution.out +++ b/regression-test/data/external_table_p2/hudi/test_hudi_schema_evolution.out @@ -63,3 +63,35 @@ 20241118012149007 20241118012149007_0_4 5 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 5 Eva {"age":31.5, "address":"Chengdu"} 20241118012149007 20241118012149007_0_5 6 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 6 Frank {"age":29.2, "address":"Wuhan"} +-- !adding_simple_columns_table -- +20241118012126237 20241118012126237_0_1 1 5166112a-90d8-4ba8-8646-337fbeb2a375-0_0-35-121_20241118012132306.parquet 1 Alice \N +20241118012126237 20241118012126237_0_0 2 5166112a-90d8-4ba8-8646-337fbeb2a375-0_0-35-121_20241118012132306.parquet 2 Bob \N +20241118012126237 20241118012126237_0_2 3 5166112a-90d8-4ba8-8646-337fbeb2a375-0_0-35-121_20241118012132306.parquet 3 Cathy \N +20241118012132306 20241118012132306_0_3 4 5166112a-90d8-4ba8-8646-337fbeb2a375-0_0-35-121_20241118012132306.parquet 4 David 25 +20241118012132306 20241118012132306_0_4 5 5166112a-90d8-4ba8-8646-337fbeb2a375-0_0-35-121_20241118012132306.parquet 5 Eva 30 +20241118012132306 20241118012132306_0_5 6 5166112a-90d8-4ba8-8646-337fbeb2a375-0_0-35-121_20241118012132306.parquet 6 Frank 28 + +-- !altering_simple_columns_table -- +20241118012136512 20241118012136512_0_0 1 203f0f43-ae9d-4c17-8d5d-834f0dbc62c9-0_0-78-246_20241118012138287.parquet 1 Alice 25.0 +20241118012136512 20241118012136512_0_2 2 203f0f43-ae9d-4c17-8d5d-834f0dbc62c9-0_0-78-246_20241118012138287.parquet 2 Bob 30.0 +20241118012136512 20241118012136512_0_1 3 203f0f43-ae9d-4c17-8d5d-834f0dbc62c9-0_0-78-246_20241118012138287.parquet 3 Cathy 28.0 +20241118012138287 20241118012138287_0_3 4 203f0f43-ae9d-4c17-8d5d-834f0dbc62c9-0_0-78-246_20241118012138287.parquet 4 David 26.0 +20241118012138287 20241118012138287_0_4 5 203f0f43-ae9d-4c17-8d5d-834f0dbc62c9-0_0-78-246_20241118012138287.parquet 5 Eva 31.5 +20241118012138287 20241118012138287_0_5 6 203f0f43-ae9d-4c17-8d5d-834f0dbc62c9-0_0-78-246_20241118012138287.parquet 6 Frank 29.2 + +-- !adding_complex_columns_table -- +20241118012144831 20241118012144831_0_1 1 3c038df9-a652-4878-9b8a-221ae443448e-0_0-165-497_20241118012146150.parquet 1 Alice {"age":25, "address":"Guangzhou", "email":null} +20241118012144831 20241118012144831_0_0 2 3c038df9-a652-4878-9b8a-221ae443448e-0_0-165-497_20241118012146150.parquet 2 Bob {"age":30, "address":"Shanghai", "email":null} +20241118012144831 20241118012144831_0_2 3 3c038df9-a652-4878-9b8a-221ae443448e-0_0-165-497_20241118012146150.parquet 3 Cathy {"age":28, "address":"Beijing", "email":null} +20241118012146150 20241118012146150_0_3 4 3c038df9-a652-4878-9b8a-221ae443448e-0_0-165-497_20241118012146150.parquet 4 David {"age":25, "address":"Shenzhen", "email":"david@example.com"} +20241118012146150 20241118012146150_0_4 5 3c038df9-a652-4878-9b8a-221ae443448e-0_0-165-497_20241118012146150.parquet 5 Eva {"age":30, "address":"Chengdu", "email":"eva@example.com"} +20241118012146150 20241118012146150_0_5 6 3c038df9-a652-4878-9b8a-221ae443448e-0_0-165-497_20241118012146150.parquet 6 Frank {"age":28, "address":"Wuhan", "email":"frank@example.com"} + +-- !altering_complex_columns_table -- +20241118012147879 20241118012147879_0_0 1 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 1 Alice {"age":25, "address":"Guangzhou"} +20241118012147879 20241118012147879_0_2 2 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 2 Bob {"age":30, "address":"Shanghai"} +20241118012147879 20241118012147879_0_1 3 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 3 Cathy {"age":28, "address":"Beijing"} +20241118012149007 20241118012149007_0_3 4 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 4 David {"age":26, "address":"Shenzhen"} +20241118012149007 20241118012149007_0_4 5 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 5 Eva {"age":31.5, "address":"Chengdu"} +20241118012149007 20241118012149007_0_5 6 185d101f-a484-45ce-b236-03ccd33c521b-0_0-208-622_20241118012149007.parquet 6 Frank {"age":29.2, "address":"Wuhan"} + diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out b/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out index 114ca5b0a896040cec99a3d30193b758e909f9d2..833ccab2ccd12dae28e686ee317363de74f30063 100644 GIT binary patch delta 127 zcmZ3vPwUwu`wbq9+iUYV7BNqE$YSK4EO1r2-8Y;Sh}nRc9f&!Am~*>tIM*Hb=?OuM vV$=1^8D*z8oM7Fyz5WmLR_5lvEbV_;7=f4xh?#+yW&2+i)($_s((`=*H}o;e delta 117 zcmaF%$bRKMtqmTG+x^A3S29m8;N{?&UT}t0X0ib5_U%1O*reH~zjx-AX+OW33y8Ua zmU8ldD$0j!2{xIw1?GCcci;#?H7GP}`U}XeiCLm@8VwUX!tgLt3 E0pyM<#Q*>R diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_timetravel.out b/regression-test/data/external_table_p2/hudi/test_hudi_timetravel.out index 00d15805baf04e..4e6d98a2aacf35 100644 --- a/regression-test/data/external_table_p2/hudi/test_hudi_timetravel.out +++ b/regression-test/data/external_table_p2/hudi/test_hudi_timetravel.out @@ -239,3 +239,123 @@ -- !timetravel10 -- 10000 +-- !timetravel1 -- +1000 + +-- !timetravel2 -- +2000 + +-- !timetravel3 -- +3000 + +-- !timetravel4 -- +4000 + +-- !timetravel5 -- +5000 + +-- !timetravel6 -- +6000 + +-- !timetravel7 -- +7000 + +-- !timetravel8 -- +8000 + +-- !timetravel9 -- +9000 + +-- !timetravel10 -- +10000 + +-- !timetravel1 -- +1000 + +-- !timetravel2 -- +2000 + +-- !timetravel3 -- +3000 + +-- !timetravel4 -- +4000 + +-- !timetravel5 -- +5000 + +-- !timetravel6 -- +6000 + +-- !timetravel7 -- +7000 + +-- !timetravel8 -- +8000 + +-- !timetravel9 -- +9000 + +-- !timetravel10 -- +10000 + +-- !timetravel1 -- +1000 + +-- !timetravel2 -- +2000 + +-- !timetravel3 -- +3000 + +-- !timetravel4 -- +4000 + +-- !timetravel5 -- +5000 + +-- !timetravel6 -- +6000 + +-- !timetravel7 -- +7000 + +-- !timetravel8 -- +8000 + +-- !timetravel9 -- +9000 + +-- !timetravel10 -- +10000 + +-- !timetravel1 -- +1000 + +-- !timetravel2 -- +2000 + +-- !timetravel3 -- +3000 + +-- !timetravel4 -- +4000 + +-- !timetravel5 -- +5000 + +-- !timetravel6 -- +6000 + +-- !timetravel7 -- +7000 + +-- !timetravel8 -- +8000 + +-- !timetravel9 -- +9000 + +-- !timetravel10 -- +10000 + diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_incremental.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_incremental.groovy index 845fc9ee0d9d3e..c079d20be812d0 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_incremental.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_incremental.groovy @@ -104,19 +104,18 @@ suite("test_hudi_incremental", "p2,external,hudi,external_remote,external_remote "20241114152334111", ] + sql """set force_jni_scanner=true;""" + sql """set hudi_jni_scanner='spark';""" + // test_hudi_incremental_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) + // test_hudi_incremental_querys("user_activity_log_cow_partition", timestamps_cow_partition) + test_hudi_incremental_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) + test_hudi_incremental_querys("user_activity_log_mor_partition", timestamps_mor_partition) + + sql """set force_jni_scanner=false;""" test_hudi_incremental_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) test_hudi_incremental_querys("user_activity_log_cow_partition", timestamps_cow_partition) test_hudi_incremental_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) test_hudi_incremental_querys("user_activity_log_mor_partition", timestamps_mor_partition) - // disable jni scanner because the old hudi jni reader based on spark can't read the emr hudi data - // sql """set force_jni_scanner=true;""" - // don't support incremental query for cow table by jni reader - // test_hudi_incremental_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) - // test_hudi_incremental_querys("user_activity_log_cow_partition", timestamps_cow_partition) - // test_hudi_incremental_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) - // test_hudi_incremental_querys("user_activity_log_mor_partition", timestamps_mor_partition) - // sql """set force_jni_scanner=false;""" - sql """drop catalog if exists ${catalog_name};""" } diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_schema_evolution.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_schema_evolution.groovy index 032e195f75fe62..50e24a3b9f522f 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_schema_evolution.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_schema_evolution.groovy @@ -35,6 +35,9 @@ suite("test_hudi_schema_evolution", "p2,external,hudi,external_remote,external_r sql """ use regression_hudi;""" sql """ set enable_fallback_to_original_planner=false """ + sql """set force_jni_scanner = true;""" + + sql """set hudi_jni_scanner='spark';""" qt_adding_simple_columns_table """ select * from adding_simple_columns_table order by id """ qt_altering_simple_columns_table """ select * from altering_simple_columns_table order by id """ // qt_deleting_simple_columns_table """ select * from deleting_simple_columns_table order by id """ @@ -45,10 +48,9 @@ suite("test_hudi_schema_evolution", "p2,external,hudi,external_remote,external_r // qt_deleting_complex_columns_table """ select * from deleting_complex_columns_table order by id """ // qt_renaming_complex_columns_table """ select * from renaming_complex_columns_table order by id """ - // disable jni scanner because the old hudi jni reader based on spark can't read the emr hudi data - // sql """set force_jni_scanner = true;""" - // qt_adding_simple_columns_table """ select * from adding_simple_columns_table order by id """ - // qt_altering_simple_columns_table """ select * from altering_simple_columns_table order by id """ + sql """set force_jni_scanner = false;""" + qt_adding_simple_columns_table """ select * from adding_simple_columns_table order by id """ + qt_altering_simple_columns_table """ select * from altering_simple_columns_table order by id """ // qt_deleting_simple_columns_table """ select * from deleting_simple_columns_table order by id """ // qt_renaming_simple_columns_table """ select * from renaming_simple_columns_table order by id """ @@ -56,7 +58,6 @@ suite("test_hudi_schema_evolution", "p2,external,hudi,external_remote,external_r // qt_altering_complex_columns_table """ select * from altering_complex_columns_table order by id """ // qt_deleting_complex_columns_table """ select * from deleting_complex_columns_table order by id """ // qt_renaming_complex_columns_table """ select * from renaming_complex_columns_table order by id """ - // sql """set force_jni_scanner = false;""" sql """drop catalog if exists ${catalog_name};""" } diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy index 3800c962825ef4..2b43f1e0621aa2 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy @@ -82,18 +82,19 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY user_id LIMIT 5;""" } + sql """set force_jni_scanner=true;""" + sql """set hudi_jni_scanner='spark';""" + test_hudi_snapshot_querys("user_activity_log_mor_non_partition") + test_hudi_snapshot_querys("user_activity_log_mor_partition") + test_hudi_snapshot_querys("user_activity_log_cow_non_partition") + test_hudi_snapshot_querys("user_activity_log_cow_partition") + + sql """set force_jni_scanner=false;""" test_hudi_snapshot_querys("user_activity_log_mor_non_partition") test_hudi_snapshot_querys("user_activity_log_mor_partition") test_hudi_snapshot_querys("user_activity_log_cow_non_partition") test_hudi_snapshot_querys("user_activity_log_cow_partition") - // disable jni scanner because the old hudi jni reader based on spark can't read the emr hudi data - // sql """set force_jni_scanner=true;""" - // test_hudi_snapshot_querys("user_activity_log_mor_non_partition") - // test_hudi_snapshot_querys("user_activity_log_mor_partition") - // test_hudi_snapshot_querys("user_activity_log_cow_non_partition") - // test_hudi_snapshot_querys("user_activity_log_cow_partition") - // sql """set force_jni_scanner=false;""" sql """drop catalog if exists ${catalog_name};""" } diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_timetravel.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_timetravel.groovy index 812ae5780667c5..0beb2d6a6a93e6 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_timetravel.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_timetravel.groovy @@ -98,18 +98,19 @@ suite("test_hudi_timetravel", "p2,external,hudi,external_remote,external_remote_ "20241114152334111", ] + sql """set force_jni_scanner=true;""" + sql """set hudi_jni_scanner='spark';""" + test_hudi_timetravel_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) + test_hudi_timetravel_querys("user_activity_log_cow_partition", timestamps_cow_partition) + test_hudi_timetravel_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) + test_hudi_timetravel_querys("user_activity_log_mor_partition", timestamps_mor_partition) + + sql """set force_jni_scanner=false;""" test_hudi_timetravel_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) test_hudi_timetravel_querys("user_activity_log_cow_partition", timestamps_cow_partition) test_hudi_timetravel_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) test_hudi_timetravel_querys("user_activity_log_mor_partition", timestamps_mor_partition) - // disable jni scanner because the old hudi jni reader based on spark can't read the emr hudi data - // sql """set force_jni_scanner=true;""" - // test_hudi_timetravel_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) - // test_hudi_timetravel_querys("user_activity_log_cow_partition", timestamps_cow_partition) - // test_hudi_timetravel_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) - // test_hudi_timetravel_querys("user_activity_log_mor_partition", timestamps_mor_partition) - // sql """set force_jni_scanner=false;""" sql """drop catalog if exists ${catalog_name};""" }