From a018d70f620578e271dba691cfaf9682e9a6ec07 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 13:03:48 -0400 Subject: [PATCH 1/9] add test for int96 migrated table to CometIcebergNativeSuite. --- native/Cargo.lock | 244 ++++++++---------- native/Cargo.toml | 4 +- .../comet/CometIcebergNativeSuite.scala | 137 ++++++++++ 3 files changed, 253 insertions(+), 132 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 598b18d58c..8d841b70ab 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -98,9 +98,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anyhow" @@ -135,9 +135,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" +checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" dependencies = [ "rustversion", ] @@ -600,9 +600,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.1" +version = "1.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" dependencies = [ "aws-lc-sys", "zeroize", @@ -610,9 +610,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.38.0" +version = "0.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" dependencies = [ "cc", "cmake", @@ -647,9 +647,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64a6eded248c6b453966e915d32aeddb48ea63ad17932682774eb026fbef5b1" +checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567" dependencies = [ "aws-credential-types", "aws-runtime", @@ -671,9 +671,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db96d720d3c622fcbe08bae1c4b04a72ce6257d8b0584cb5418da00ae20a344f" +checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -695,9 +695,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.100.0" +version = "1.101.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fafbdda43b93f57f699c5dfe8328db590b967b8a820a13ccdd6687355dfcc7ca" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -868,9 +868,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b1117b3b2bbe166d11199b540ceed0d0f7676e36e7b962b5a437a9971eac75" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" dependencies = [ "base64-simd", "bytes", @@ -1017,7 +1017,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "shlex", "syn 2.0.117", ] @@ -1045,16 +1045,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "cpufeatures 0.2.17", + "cpufeatures 0.3.0", ] [[package]] @@ -1100,9 +1100,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1110,9 +1110,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ "darling 0.23.0", "ident_case", @@ -1326,18 +1326,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstyle", "clap_lex", @@ -1345,15 +1345,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] @@ -1617,16 +1617,6 @@ dependencies = [ "darling_macro 0.20.11", ] -[[package]] -name = "darling" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" -dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", -] - [[package]] name = "darling" version = "0.23.0" @@ -1651,20 +1641,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.117", -] - [[package]] name = "darling_core" version = "0.23.0" @@ -1689,17 +1665,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", - "quote", - "syn 2.0.117", -] - [[package]] name = "darling_macro" version = "0.23.0" @@ -2675,9 +2640,9 @@ dependencies = [ [[package]] name = "dissimilar" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" [[package]] name = "dlv-list" @@ -3268,9 +3233,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -3282,7 +3247,6 @@ dependencies = [ "httparse", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -3356,7 +3320,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/apache/iceberg-rust?tag=v0.9.0-rc.1#7ef4063926f76f4ab3037227a9fa7a53e21e717f" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=a472d89#a472d8946b712821ffd6f68f06554a67af97693c" dependencies = [ "anyhow", "apache-avro", @@ -3409,7 +3373,7 @@ dependencies = [ [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/apache/iceberg-rust?tag=v0.9.0-rc.1#7ef4063926f76f4ab3037227a9fa7a53e21e717f" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=a472d89#a472d8946b712821ffd6f68f06554a67af97693c" dependencies = [ "anyhow", "async-trait", @@ -3597,9 +3561,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "inventory" -version = "0.3.22" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" dependencies = [ "rustversion", ] @@ -3612,9 +3576,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ "memchr", "serde", @@ -3651,9 +3615,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "java-locator" @@ -3715,7 +3679,7 @@ dependencies = [ "cfg-if", "combine", "java-locator", - "jni-sys", + "jni-sys 0.3.1", "libloading 0.7.4", "log", "thiserror 1.0.69", @@ -3725,9 +3689,31 @@ dependencies = [ [[package]] name = "jni-sys" -version = "0.3.0" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] [[package]] name = "jobserver" @@ -3741,10 +3727,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "797146bb2677299a1eb6b7b50a890f4c361b29ef967addf5b2fa45dae1bb6d7d" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -4067,9 +4055,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -4084,9 +4072,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", @@ -4187,9 +4175,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-format" @@ -4715,9 +4703,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] @@ -4908,7 +4896,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "rustls", "socket2", "thiserror 2.0.18", @@ -4928,7 +4916,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "rustls", "rustls-pki-types", "slab", @@ -5286,9 +5274,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -5595,9 +5583,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" +checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ "base64", "chrono", @@ -5614,11 +5602,11 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.17.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" +checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "proc-macro2", "quote", "syn 2.0.117", @@ -5836,9 +5824,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.17.2" +version = "12.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "751a2823d606b5d0a7616499e4130a516ebd01a44f39811be2b9600936509c23" +checksum = "52ca086c1eb5c7ee74b151ba83c6487d5d33f8c08ad991b86f3f58f6629e68d5" dependencies = [ "debugid", "memmap2", @@ -5848,9 +5836,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.17.2" +version = "12.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79b237cfbe320601dd24b4ac817a5b68bb28f5508e33f08d42be0682cadc8ac9" +checksum = "baa911a28a62823aaf2cc2e074212492a3ee69d0d926cc8f5b12b4a108ff5c0c" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -6072,9 +6060,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -6300,9 +6288,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -6436,9 +6424,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "7dc0882f7b5bb01ae8c5215a1230832694481c1a4be062fd410e12ea3da5b631" dependencies = [ "cfg-if", "once_cell", @@ -6449,23 +6437,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.64" +version = "0.4.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +checksum = "19280959e2844181895ef62f065c63e0ca07ece4771b53d89bfdb967d97cbf05" dependencies = [ - "cfg-if", - "futures-util", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "75973d3066e01d035dbedaad2864c398df42f8dd7b1ea057c35b8407c015b537" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6473,9 +6457,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "91af5e4be765819e0bcfee7322c14374dc821e35e72fa663a830bbc7dc199eac" dependencies = [ "bumpalo", "proc-macro2", @@ -6486,9 +6470,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "c9bf0406a78f02f336bf1e451799cca198e8acde4ffa278f0fb20487b150a633" dependencies = [ "unicode-ident", ] @@ -6542,9 +6526,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.91" +version = "0.3.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +checksum = "749466a37ee189057f54748b200186b59a03417a117267baf3fd89cecc9fb837" dependencies = [ "js-sys", "wasm-bindgen", @@ -7027,18 +7011,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", diff --git a/native/Cargo.toml b/native/Cargo.toml index c626743be1..a1cf8e5a9e 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -58,8 +58,8 @@ object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/apache/iceberg-rust", tag = "v0.9.0-rc.1" } -iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", tag = "v0.9.0-rc.1", features = ["opendal-all"] } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "a472d89" } +iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "a472d89", features = ["opendal-all"] } [profile.release] debug = true diff --git a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala index 033b634e0f..85e4542a54 100644 --- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala @@ -2242,6 +2242,143 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { } } + import scala.collection.JavaConverters._ + + import org.apache.spark.sql.internal.SQLConf + + test("migration - INT96 timestamp with hour partitioning") { + assume(icebergAvailable, "Iceberg not available in classpath") + + withTempIcebergDir { warehouseDir => + withSQLConf( + "spark.sql.catalog.test_cat" -> "org.apache.iceberg.spark.SparkCatalog", + "spark.sql.catalog.test_cat.type" -> "hadoop", + "spark.sql.catalog.test_cat.warehouse" -> warehouseDir.getAbsolutePath, + CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_EXEC_ENABLED.key -> "true", + CometConf.COMET_ICEBERG_NATIVE_ENABLED.key -> "true") { + + import org.apache.comet.testing.{DataGenOptions, FuzzDataGenerator} + import org.apache.spark.sql.functions.monotonically_increasing_id + import org.apache.spark.sql.types._ + + val dataPath = s"${warehouseDir.getAbsolutePath}/int96_data" + val numRows = 50 + val r = new scala.util.Random(42) + + // Schema for FuzzDataGenerator - just timestamp and value columns + val fuzzSchema = StructType( + Seq( + StructField("outputTimestamp", TimestampType, nullable = true), + StructField("value", DoubleType, nullable = true))) + + // Use FuzzDataGenerator with default options (year 3333 baseDate for INT96) + val dataGenOptions = DataGenOptions(allowNull = false) + val fuzzDf = + FuzzDataGenerator.generateDataFrame(r, spark, fuzzSchema, numRows, dataGenOptions) + + // Add unique id and geohash columns + val df = fuzzDf + .withColumn("id", monotonically_increasing_id()) + .selectExpr( + "id", + "outputTimestamp", + "concat(substring('0123456789bcdefghjkmnpqrstuvwxyz', 1 + int(id % 32), 1), " + + "substring('0123456789bcdefghjkmnpqrstuvwxyz', 1 + int((id / 32) % 32), 1), " + + "substring('0123456789bcdefghjkmnpqrstuvwxyz', 1 + int((id / 1024) % 32), 1)) as geohash3", + "value") + + // Write Parquet with INT96 timestamps + withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> "INT96") { + df.write.mode("overwrite").parquet(dataPath) + } + + // Verify the Parquet files actually contain INT96 timestamps + val parquetFiles = new java.io.File(dataPath) + .listFiles() + .filter(f => f.getName.endsWith(".parquet")) + assert(parquetFiles.nonEmpty, "Expected at least one Parquet file") + + val parquetFile = parquetFiles.head + val reader = org.apache.parquet.hadoop.ParquetFileReader.open( + org.apache.parquet.hadoop.util.HadoopInputFile.fromPath( + new org.apache.hadoop.fs.Path(parquetFile.getAbsolutePath), + spark.sessionState.newHadoopConf())) + try { + val parquetSchema = reader.getFooter.getFileMetaData.getSchema + val timestampColumn = parquetSchema.getColumns.asScala + .find(_.getPath.mkString(".") == "outputTimestamp") + assert(timestampColumn.isDefined, "Expected outputTimestamp column in Parquet schema") + assert( + timestampColumn.get.getPrimitiveType.getPrimitiveTypeName == + org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96, + s"Expected INT96 type for outputTimestamp but got ${timestampColumn.get.getPrimitiveType.getPrimitiveTypeName}") + } finally { + reader.close() + } + + // Create Iceberg table with hour(timestamp) + truncate(geohash, 3) partitioning + spark.sql("CREATE NAMESPACE IF NOT EXISTS test_cat.db") + spark.sql(s""" + CREATE TABLE test_cat.db.int96_hour_test ( + id BIGINT, + outputTimestamp TIMESTAMP, + geohash3 STRING, + value DOUBLE + ) USING iceberg + PARTITIONED BY (hours(outputTimestamp), truncate(geohash3, 3)) + """) + + // Use SparkTableUtil.importSparkTable to import the Parquet files + try { + val tableUtilClass = Class.forName("org.apache.iceberg.spark.SparkTableUtil") + val sparkCatalog = spark.sessionState.catalogManager + .catalog("test_cat") + .asInstanceOf[org.apache.iceberg.spark.SparkCatalog] + val ident = + org.apache.spark.sql.connector.catalog.Identifier.of(Array("db"), "int96_hour_test") + val sparkTable = sparkCatalog + .loadTable(ident) + .asInstanceOf[org.apache.iceberg.spark.source.SparkTable] + val table = sparkTable.table() + + val stagingDir = s"${warehouseDir.getAbsolutePath}/staging" + + // Create a temp table pointing to the parquet path + spark.sql(s"""CREATE TABLE parquet_temp USING parquet LOCATION '$dataPath'""") + val sourceIdent = new org.apache.spark.sql.catalyst.TableIdentifier("parquet_temp") + + val importMethod = tableUtilClass.getMethod( + "importSparkTable", + classOf[org.apache.spark.sql.SparkSession], + classOf[org.apache.spark.sql.catalyst.TableIdentifier], + classOf[org.apache.iceberg.Table], + classOf[String]) + importMethod.invoke(null, spark, sourceIdent, table, stagingDir) + + // Query the table and verify no duplicates + val distinctCount = spark + .sql("SELECT COUNT(DISTINCT id) FROM test_cat.db.int96_hour_test") + .collect()(0) + .getLong(0) + assert( + distinctCount == numRows, + s"Expected $numRows distinct IDs but got $distinctCount") + + checkIcebergNativeScan("SELECT * FROM test_cat.db.int96_hour_test ORDER BY id") + checkIcebergNativeScan( + "SELECT id, outputTimestamp FROM test_cat.db.int96_hour_test WHERE id < 50 ORDER BY id") + + spark.sql("DROP TABLE test_cat.db.int96_hour_test") + spark.sql("DROP TABLE parquet_temp") + } catch { + case _: ClassNotFoundException => + cancel("SparkTableUtil not available") + } + } + } + } + test("REST catalog with native Iceberg scan") { assume(icebergAvailable, "Iceberg not available in classpath") From f4cf07fd0e19f9fbef515ea6548a745d710b1f68 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 13:57:41 -0400 Subject: [PATCH 2/9] fix spotless --- .../test/scala/org/apache/comet/CometIcebergNativeSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala index 85e4542a54..cca745c196 100644 --- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala @@ -2319,7 +2319,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { // Create Iceberg table with hour(timestamp) + truncate(geohash, 3) partitioning spark.sql("CREATE NAMESPACE IF NOT EXISTS test_cat.db") - spark.sql(s""" + spark.sql(""" CREATE TABLE test_cat.db.int96_hour_test ( id BIGINT, outputTimestamp TIMESTAMP, From 918808ce30c8349958a76b19f30ab18482886450 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 14:45:19 -0400 Subject: [PATCH 3/9] bump commit after PR feedback --- native/Cargo.lock | 4 ++-- native/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 8d841b70ab..d3be81b05e 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -3320,7 +3320,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=a472d89#a472d8946b712821ffd6f68f06554a67af97693c" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=26e6424#26e6424053e6fa95c9283a9b04c9e22afb2fd05e" dependencies = [ "anyhow", "apache-avro", @@ -3373,7 +3373,7 @@ dependencies = [ [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=a472d89#a472d8946b712821ffd6f68f06554a67af97693c" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=26e6424#26e6424053e6fa95c9283a9b04c9e22afb2fd05e" dependencies = [ "anyhow", "async-trait", diff --git a/native/Cargo.toml b/native/Cargo.toml index a1cf8e5a9e..3064c149a3 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -58,8 +58,8 @@ object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "a472d89" } -iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "a472d89", features = ["opendal-all"] } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "26e6424" } +iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "26e6424", features = ["opendal-all"] } [profile.release] debug = true From 7aaf88170933b3bff832cdc524fedc98321c7313 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 15:31:36 -0400 Subject: [PATCH 4/9] Fix test. --- .../comet/CometIcebergNativeSuite.scala | 40 ++++++------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala index cca745c196..99122da08d 100644 --- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala @@ -2246,7 +2246,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { import org.apache.spark.sql.internal.SQLConf - test("migration - INT96 timestamp with hour partitioning") { + test("migration - INT96 timestamp") { assume(icebergAvailable, "Iceberg not available in classpath") withTempIcebergDir { warehouseDir => @@ -2266,27 +2266,18 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { val numRows = 50 val r = new scala.util.Random(42) - // Schema for FuzzDataGenerator - just timestamp and value columns val fuzzSchema = StructType( Seq( StructField("outputTimestamp", TimestampType, nullable = true), StructField("value", DoubleType, nullable = true))) - // Use FuzzDataGenerator with default options (year 3333 baseDate for INT96) + // Default FuzzDataGenerator baseDate is year 3333, outside the i64 nanosecond + // range (~1677-2262). This triggers the INT96 overflow bug if coercion is missing. val dataGenOptions = DataGenOptions(allowNull = false) val fuzzDf = FuzzDataGenerator.generateDataFrame(r, spark, fuzzSchema, numRows, dataGenOptions) - // Add unique id and geohash columns - val df = fuzzDf - .withColumn("id", monotonically_increasing_id()) - .selectExpr( - "id", - "outputTimestamp", - "concat(substring('0123456789bcdefghjkmnpqrstuvwxyz', 1 + int(id % 32), 1), " + - "substring('0123456789bcdefghjkmnpqrstuvwxyz', 1 + int((id / 32) % 32), 1), " + - "substring('0123456789bcdefghjkmnpqrstuvwxyz', 1 + int((id / 1024) % 32), 1)) as geohash3", - "value") + val df = fuzzDf.withColumn("id", monotonically_increasing_id()) // Write Parquet with INT96 timestamps withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> "INT96") { @@ -2317,26 +2308,23 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { reader.close() } - // Create Iceberg table with hour(timestamp) + truncate(geohash, 3) partitioning + // Create an unpartitioned Iceberg table and import the Parquet files spark.sql("CREATE NAMESPACE IF NOT EXISTS test_cat.db") spark.sql(""" - CREATE TABLE test_cat.db.int96_hour_test ( - id BIGINT, + CREATE TABLE test_cat.db.int96_test ( outputTimestamp TIMESTAMP, - geohash3 STRING, - value DOUBLE + value DOUBLE, + id BIGINT ) USING iceberg - PARTITIONED BY (hours(outputTimestamp), truncate(geohash3, 3)) """) - // Use SparkTableUtil.importSparkTable to import the Parquet files try { val tableUtilClass = Class.forName("org.apache.iceberg.spark.SparkTableUtil") val sparkCatalog = spark.sessionState.catalogManager .catalog("test_cat") .asInstanceOf[org.apache.iceberg.spark.SparkCatalog] val ident = - org.apache.spark.sql.connector.catalog.Identifier.of(Array("db"), "int96_hour_test") + org.apache.spark.sql.connector.catalog.Identifier.of(Array("db"), "int96_test") val sparkTable = sparkCatalog .loadTable(ident) .asInstanceOf[org.apache.iceberg.spark.source.SparkTable] @@ -2344,7 +2332,6 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { val stagingDir = s"${warehouseDir.getAbsolutePath}/staging" - // Create a temp table pointing to the parquet path spark.sql(s"""CREATE TABLE parquet_temp USING parquet LOCATION '$dataPath'""") val sourceIdent = new org.apache.spark.sql.catalyst.TableIdentifier("parquet_temp") @@ -2356,20 +2343,19 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { classOf[String]) importMethod.invoke(null, spark, sourceIdent, table, stagingDir) - // Query the table and verify no duplicates val distinctCount = spark - .sql("SELECT COUNT(DISTINCT id) FROM test_cat.db.int96_hour_test") + .sql("SELECT COUNT(DISTINCT id) FROM test_cat.db.int96_test") .collect()(0) .getLong(0) assert( distinctCount == numRows, s"Expected $numRows distinct IDs but got $distinctCount") - checkIcebergNativeScan("SELECT * FROM test_cat.db.int96_hour_test ORDER BY id") + checkIcebergNativeScan("SELECT * FROM test_cat.db.int96_test ORDER BY id") checkIcebergNativeScan( - "SELECT id, outputTimestamp FROM test_cat.db.int96_hour_test WHERE id < 50 ORDER BY id") + "SELECT id, outputTimestamp FROM test_cat.db.int96_test WHERE id < 50 ORDER BY id") - spark.sql("DROP TABLE test_cat.db.int96_hour_test") + spark.sql("DROP TABLE test_cat.db.int96_test") spark.sql("DROP TABLE parquet_temp") } catch { case _: ClassNotFoundException => From 460293caf2ace4dc84c68c448a761df9acc13bf9 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 16:40:01 -0400 Subject: [PATCH 5/9] Bump to new commit based on PR feedback. --- native/Cargo.lock | 4 ++-- native/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index ce7bd3f02c..82cdbafe41 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -3321,7 +3321,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=26e6424#26e6424053e6fa95c9283a9b04c9e22afb2fd05e" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=fe9264c#fe9264c6c43eb869a158df22e2a603b583a567e4" dependencies = [ "anyhow", "apache-avro", @@ -3374,7 +3374,7 @@ dependencies = [ [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=26e6424#26e6424053e6fa95c9283a9b04c9e22afb2fd05e" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=fe9264c#fe9264c6c43eb869a158df22e2a603b583a567e4" dependencies = [ "anyhow", "async-trait", diff --git a/native/Cargo.toml b/native/Cargo.toml index 3064c149a3..b58b0090d1 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -58,8 +58,8 @@ object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "26e6424" } -iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "26e6424", features = ["opendal-all"] } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "fe9264c" } +iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "fe9264c", features = ["opendal-all"] } [profile.release] debug = true From 3d1d08b916b994c4d4cddfc6e393b5adb653cbbc Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 17:11:41 -0400 Subject: [PATCH 6/9] more robust test --- .../comet/CometIcebergNativeSuite.scala | 62 +++++++++++++++---- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala index 99122da08d..6731a6d425 100644 --- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala @@ -2266,10 +2266,22 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { val numRows = 50 val r = new scala.util.Random(42) + // Exercise INT96 coercion in flat columns, structs, arrays, and maps val fuzzSchema = StructType( Seq( - StructField("outputTimestamp", TimestampType, nullable = true), - StructField("value", DoubleType, nullable = true))) + StructField("ts", TimestampType, nullable = true), + StructField("value", DoubleType, nullable = true), + StructField( + "ts_struct", + StructType(Seq( + StructField("inner_ts", TimestampType, nullable = true), + StructField("inner_val", DoubleType, nullable = true))), + nullable = true), + StructField("ts_array", ArrayType(TimestampType, containsNull = true), nullable = true), + StructField( + "ts_map", + MapType(IntegerType, TimestampType), + nullable = true))) // Default FuzzDataGenerator baseDate is year 3333, outside the i64 nanosecond // range (~1677-2262). This triggers the INT96 overflow bug if coercion is missing. @@ -2284,7 +2296,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { df.write.mode("overwrite").parquet(dataPath) } - // Verify the Parquet files actually contain INT96 timestamps + // Verify all timestamp columns in the Parquet file use INT96 val parquetFiles = new java.io.File(dataPath) .listFiles() .filter(f => f.getName.endsWith(".parquet")) @@ -2297,13 +2309,15 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { spark.sessionState.newHadoopConf())) try { val parquetSchema = reader.getFooter.getFileMetaData.getSchema - val timestampColumn = parquetSchema.getColumns.asScala - .find(_.getPath.mkString(".") == "outputTimestamp") - assert(timestampColumn.isDefined, "Expected outputTimestamp column in Parquet schema") + val int96Columns = parquetSchema.getColumns.asScala + .filter( + _.getPrimitiveType.getPrimitiveTypeName == + org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) + .map(_.getPath.mkString(".")) + // Expect INT96 for: ts, ts_struct.inner_ts, ts_array.list.element, ts_map.value assert( - timestampColumn.get.getPrimitiveType.getPrimitiveTypeName == - org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96, - s"Expected INT96 type for outputTimestamp but got ${timestampColumn.get.getPrimitiveType.getPrimitiveTypeName}") + int96Columns.size >= 4, + s"Expected at least 4 INT96 columns but found ${int96Columns.size}: ${int96Columns.mkString(", ")}") } finally { reader.close() } @@ -2312,8 +2326,11 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { spark.sql("CREATE NAMESPACE IF NOT EXISTS test_cat.db") spark.sql(""" CREATE TABLE test_cat.db.int96_test ( - outputTimestamp TIMESTAMP, + ts TIMESTAMP, value DOUBLE, + ts_struct STRUCT, + ts_array ARRAY, + ts_map MAP, id BIGINT ) USING iceberg """) @@ -2351,9 +2368,30 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { distinctCount == numRows, s"Expected $numRows distinct IDs but got $distinctCount") - checkIcebergNativeScan("SELECT * FROM test_cat.db.int96_test ORDER BY id") + // Spark's Iceberg reader returns null for INT96 timestamps inside structs, + // so we can't use checkIcebergNativeScan (which compares against Spark) for + // ts_struct. Instead, compare Comet's read against the raw Parquet source. + checkIcebergNativeScan( + "SELECT id, ts, value, ts_array, ts_map FROM test_cat.db.int96_test ORDER BY id") + checkIcebergNativeScan( + "SELECT id, ts FROM test_cat.db.int96_test ORDER BY id") checkIcebergNativeScan( - "SELECT id, outputTimestamp FROM test_cat.db.int96_test WHERE id < 50 ORDER BY id") + "SELECT id, ts_array FROM test_cat.db.int96_test ORDER BY id") + checkIcebergNativeScan( + "SELECT id, ts_map FROM test_cat.db.int96_test ORDER BY id") + + // Validate ts_struct against raw Parquet since Spark's Iceberg reader can't read it + val icebergStructDf = spark + .sql("SELECT id, ts_struct FROM test_cat.db.int96_test ORDER BY id") + .collect() + val parquetStructDf = spark.read + .parquet(dataPath) + .select("id", "ts_struct") + .orderBy("id") + .collect() + assert( + icebergStructDf.sameElements(parquetStructDf), + s"ts_struct mismatch between Comet Iceberg read and raw Parquet") spark.sql("DROP TABLE test_cat.db.int96_test") spark.sql("DROP TABLE parquet_temp") From 081404df7d340cf5d7549a0b42912b073d490e67 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 31 Mar 2026 18:04:42 -0400 Subject: [PATCH 7/9] make format. --- .../comet/CometIcebergNativeSuite.scala | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala index 6731a6d425..52b5f271d6 100644 --- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala @@ -2273,15 +2273,16 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { StructField("value", DoubleType, nullable = true), StructField( "ts_struct", - StructType(Seq( - StructField("inner_ts", TimestampType, nullable = true), - StructField("inner_val", DoubleType, nullable = true))), + StructType( + Seq( + StructField("inner_ts", TimestampType, nullable = true), + StructField("inner_val", DoubleType, nullable = true))), nullable = true), - StructField("ts_array", ArrayType(TimestampType, containsNull = true), nullable = true), StructField( - "ts_map", - MapType(IntegerType, TimestampType), - nullable = true))) + "ts_array", + ArrayType(TimestampType, containsNull = true), + nullable = true), + StructField("ts_map", MapType(IntegerType, TimestampType), nullable = true))) // Default FuzzDataGenerator baseDate is year 3333, outside the i64 nanosecond // range (~1677-2262). This triggers the INT96 overflow bug if coercion is missing. @@ -2310,9 +2311,8 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { try { val parquetSchema = reader.getFooter.getFileMetaData.getSchema val int96Columns = parquetSchema.getColumns.asScala - .filter( - _.getPrimitiveType.getPrimitiveTypeName == - org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) + .filter(_.getPrimitiveType.getPrimitiveTypeName == + org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) .map(_.getPath.mkString(".")) // Expect INT96 for: ts, ts_struct.inner_ts, ts_array.list.element, ts_map.value assert( @@ -2373,12 +2373,9 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { // ts_struct. Instead, compare Comet's read against the raw Parquet source. checkIcebergNativeScan( "SELECT id, ts, value, ts_array, ts_map FROM test_cat.db.int96_test ORDER BY id") - checkIcebergNativeScan( - "SELECT id, ts FROM test_cat.db.int96_test ORDER BY id") - checkIcebergNativeScan( - "SELECT id, ts_array FROM test_cat.db.int96_test ORDER BY id") - checkIcebergNativeScan( - "SELECT id, ts_map FROM test_cat.db.int96_test ORDER BY id") + checkIcebergNativeScan("SELECT id, ts FROM test_cat.db.int96_test ORDER BY id") + checkIcebergNativeScan("SELECT id, ts_array FROM test_cat.db.int96_test ORDER BY id") + checkIcebergNativeScan("SELECT id, ts_map FROM test_cat.db.int96_test ORDER BY id") // Validate ts_struct against raw Parquet since Spark's Iceberg reader can't read it val icebergStructDf = spark @@ -2391,7 +2388,7 @@ class CometIcebergNativeSuite extends CometTestBase with RESTCatalogHelper { .collect() assert( icebergStructDf.sameElements(parquetStructDf), - s"ts_struct mismatch between Comet Iceberg read and raw Parquet") + "ts_struct mismatch between Comet Iceberg read and raw Parquet") spark.sql("DROP TABLE test_cat.db.int96_test") spark.sql("DROP TABLE parquet_temp") From b93c6073f7a00bf6fa77ffd8e8403d5cf4db6f87 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 1 Apr 2026 11:18:43 -0400 Subject: [PATCH 8/9] Bump to new commit based on PR feedback. --- native/Cargo.lock | 36 ++++++++++++++++++------------------ native/Cargo.toml | 4 ++-- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index 82cdbafe41..cc55fd860e 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -3321,7 +3321,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=fe9264c#fe9264c6c43eb869a158df22e2a603b583a567e4" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=e48b177#e48b17777db7c221a416be288d2357ad0baa4ac8" dependencies = [ "anyhow", "apache-avro", @@ -3374,7 +3374,7 @@ dependencies = [ [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=fe9264c#fe9264c6c43eb869a158df22e2a603b583a567e4" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=e48b177#e48b17777db7c221a416be288d2357ad0baa4ac8" dependencies = [ "anyhow", "async-trait", @@ -3728,9 +3728,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.93" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "797146bb2677299a1eb6b7b50a890f4c361b29ef967addf5b2fa45dae1bb6d7d" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" dependencies = [ "cfg-if", "futures-util", @@ -3848,9 +3848,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.183" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libloading" @@ -6425,9 +6425,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dc0882f7b5bb01ae8c5215a1230832694481c1a4be062fd410e12ea3da5b631" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" dependencies = [ "cfg-if", "once_cell", @@ -6438,9 +6438,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.66" +version = "0.4.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19280959e2844181895ef62f065c63e0ca07ece4771b53d89bfdb967d97cbf05" +checksum = "03623de6905b7206edd0a75f69f747f134b7f0a2323392d664448bf2d3c5d87e" dependencies = [ "js-sys", "wasm-bindgen", @@ -6448,9 +6448,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75973d3066e01d035dbedaad2864c398df42f8dd7b1ea057c35b8407c015b537" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6458,9 +6458,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91af5e4be765819e0bcfee7322c14374dc821e35e72fa663a830bbc7dc199eac" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" dependencies = [ "bumpalo", "proc-macro2", @@ -6471,9 +6471,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.116" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9bf0406a78f02f336bf1e451799cca198e8acde4ffa278f0fb20487b150a633" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" dependencies = [ "unicode-ident", ] @@ -6527,9 +6527,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.93" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "749466a37ee189057f54748b200186b59a03417a117267baf3fd89cecc9fb837" +checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/native/Cargo.toml b/native/Cargo.toml index b58b0090d1..65a449ba5a 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -58,8 +58,8 @@ object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "fe9264c" } -iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "fe9264c", features = ["opendal-all"] } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "e48b177" } +iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "e48b177", features = ["opendal-all"] } [profile.release] debug = true From 2e4bb2455229b1514d5767d2081f22e934be601b Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 2 Apr 2026 14:07:25 -0400 Subject: [PATCH 9/9] bump to latest commit to reflect PR feedback upstream. --- native/Cargo.lock | 73 ++++++++++++++++++++++++----------------------- native/Cargo.toml | 4 +-- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/native/Cargo.lock b/native/Cargo.lock index cc55fd860e..9bd4bd3df9 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -3321,7 +3321,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=e48b177#e48b17777db7c221a416be288d2357ad0baa4ac8" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=e47517b#e47517b2311bd0e0428c35efbf1e02bc7aa2bdc1" dependencies = [ "anyhow", "apache-avro", @@ -3374,7 +3374,7 @@ dependencies = [ [[package]] name = "iceberg-storage-opendal" version = "0.9.0" -source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=e48b177#e48b17777db7c221a416be288d2357ad0baa4ac8" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?rev=e47517b#e47517b2311bd0e0428c35efbf1e02bc7aa2bdc1" dependencies = [ "anyhow", "async-trait", @@ -3391,12 +3391,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -3404,9 +3405,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -3417,9 +3418,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -3431,15 +3432,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -3451,15 +3452,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -3922,9 +3923,9 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" @@ -4713,9 +4714,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -6041,9 +6042,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -6989,9 +6990,9 @@ checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -7000,9 +7001,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -7032,18 +7033,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -7059,9 +7060,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -7070,9 +7071,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -7081,9 +7082,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", diff --git a/native/Cargo.toml b/native/Cargo.toml index 65a449ba5a..c71525eb61 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -58,8 +58,8 @@ object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.13" -iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "e48b177" } -iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "e48b177", features = ["opendal-all"] } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "e47517b" } +iceberg-storage-opendal = { git = "https://github.com/mbutrovich/iceberg-rust.git", rev = "e47517b", features = ["opendal-all"] } [profile.release] debug = true