From f56ab2247daabc70c01b7d0e120157d57de3c5b5 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 24 Nov 2025 14:30:26 -0800 Subject: [PATCH 1/2] Remove substrait-expr dependency --- rust/lance-datafusion/Cargo.toml | 1 - rust/lance-datafusion/src/substrait.rs | 117 ++++++++++++++++++++----- 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/rust/lance-datafusion/Cargo.toml b/rust/lance-datafusion/Cargo.toml index 9ee37d77c7a..80213621d88 100644 --- a/rust/lance-datafusion/Cargo.toml +++ b/rust/lance-datafusion/Cargo.toml @@ -37,7 +37,6 @@ tracing.workspace = true [dev-dependencies] lance-datagen.workspace = true -substrait-expr = {version = "0.2.3"} [features] substrait = ["dep:datafusion-substrait"] diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs index 2acae573fc4..af3236f4d6d 100644 --- a/rust/lance-datafusion/src/substrait.rs +++ b/rust/lance-datafusion/src/substrait.rs @@ -329,12 +329,24 @@ mod tests { prelude::{Expr, SessionContext}, }; use datafusion_common::{Column, ScalarValue}; - use prost::Message; - use substrait_expr::functions::functions_comparison::FunctionsComparisonExt; - use substrait_expr::{ - builder::{schema::SchemaBuildersExt, BuilderParams, ExpressionsBuilder}, - helpers::{literals::literal, schema::SchemaInfo}, + use datafusion_substrait::substrait::proto::{ + expression::{ + field_reference::{ReferenceType, RootReference, RootType}, + literal::LiteralType, + reference_segment::{self, StructField}, + FieldReference, Literal, ReferenceSegment, RexType, ScalarFunction, + }, + expression_reference::ExprType, + extensions::{ + simple_extension_declaration::{ExtensionFunction, MappingType}, + SimpleExtensionDeclaration, SimpleExtensionUri, + }, + function_argument::ArgType, + r#type::{Boolean, Kind, Nullability, Struct, I32}, + Expression, ExpressionReference, ExtendedExpression, FunctionArgument, NamedStruct, Type, + Version, }; + use prost::Message; use crate::substrait::{encode_substrait, parse_substrait}; @@ -345,24 +357,83 @@ mod tests { #[tokio::test] async fn test_substrait_conversion() { - let schema = SchemaInfo::new_full() - .field("x", substrait_expr::helpers::types::i32(true)) - .build(); - let expr_builder = ExpressionsBuilder::new(schema, BuilderParams::default()); - expr_builder - .add_expression( - "filter_mask", - expr_builder - .functions() - .lt( - expr_builder.fields().resolve_by_name("x").unwrap(), - literal(0_i32), - ) - .build() - .unwrap(), - ) - .unwrap(); - let expr = expr_builder.build(); + let expr = ExtendedExpression { + version: Some(Version { + major_number: 0, + minor_number: 63, + patch_number: 1, + git_hash: "".to_string(), + producer: "unit-test".to_string(), + }), + extension_uris: vec![ + SimpleExtensionUri { + extension_uri_anchor: 1, + uri: "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml".to_string(), + } + ], + extensions: vec![ + SimpleExtensionDeclaration { + mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction { + extension_uri_reference: 1, + function_anchor: 1, + name: "lt".to_string(), + })), + } + ], + referred_expr: vec![ExpressionReference { + output_names: vec!["filter_mask".to_string()], + expr_type: Some(ExprType::Expression(Expression { + rex_type: Some(RexType::ScalarFunction(ScalarFunction { + function_reference: 1, + arguments: vec![ + FunctionArgument { + arg_type: Some(ArgType::Value(Expression { + rex_type: Some(RexType::Selection(Box::new(FieldReference { + reference_type: Some(ReferenceType::DirectReference(ReferenceSegment { + reference_type: Some(reference_segment::ReferenceType::StructField(Box::new(StructField { field: 0, child: None }))) + })), + root_type: Some(RootType::RootReference(RootReference {})) + }))) + })) + }, + FunctionArgument { + arg_type: Some(ArgType::Value(Expression { + rex_type: Some(RexType::Literal(Literal { + nullable: false, + type_variation_reference: 0, + literal_type: Some(LiteralType::I32(0)) + })) + })) + } + ], + options: vec![], + output_type: Some(Type { + kind: Some(Kind::Bool(Boolean { + type_variation_reference: 0, + nullability: Nullability::Required as i32, + })), + }), + #[allow(deprecated)] + args: vec![], + })) + })), + }], + base_schema: Some(NamedStruct { + names: vec!["x".to_string()], + r#struct: Some(Struct { + types: vec![Type { + kind: Some(Kind::I32(I32 { + type_variation_reference: 0, + nullability: Nullability::Nullable as i32, + })), + }], + type_variation_reference: 0, + nullability: Nullability::Required as i32, + }), + }), + advanced_extensions: None, + expected_type_urls: vec![], + }; let expr_bytes = expr.encode_to_vec(); let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, true)])); From c34c165bc5a3276077be47e7a4a289a5f88424e5 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 25 Nov 2025 06:37:05 -0800 Subject: [PATCH 2/2] Fix cargo lock after rebase --- Cargo.lock | 130 +++-------------------------------------------------- 1 file changed, 5 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 783d6ddff23..46380b6437c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1532,15 +1532,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "core-foundation" version = "0.9.4" @@ -2547,7 +2538,7 @@ dependencies = [ "object_store", "pbjson-types", "prost", - "substrait 0.58.0", + "substrait", "tokio", "url", "uuid", @@ -4343,7 +4334,6 @@ dependencies = [ "pin-project", "prost", "snafu", - "substrait-expr", "tokio", "tracing", ] @@ -7693,28 +7683,6 @@ dependencies = [ "syn 2.0.111", ] -[[package]] -name = "substrait" -version = "0.50.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1772d041c37cc7e6477733c76b2acf4ee36bd52b2ae4d9ea0ec9c87d003db32" -dependencies = [ - "heck", - "prettyplease", - "prost", - "prost-build", - "prost-types", - "regress", - "schemars", - "semver", - "serde", - "serde_json", - "serde_yaml", - "syn 2.0.111", - "typify 0.2.0", - "walkdir", -] - [[package]] name = "substrait" version = "0.58.0" @@ -7736,51 +7704,10 @@ dependencies = [ "serde_json", "serde_yaml", "syn 2.0.111", - "typify 0.4.3", + "typify", "walkdir", ] -[[package]] -name = "substrait-expr" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d091cf06bc7808bd81eb01f5f5b77b2b14288bb022501a2dcad78633c65262f" -dependencies = [ - "once_cell", - "prost", - "substrait 0.50.4", - "substrait-expr-funcgen", - "substrait-expr-macros", - "thiserror 2.0.17", -] - -[[package]] -name = "substrait-expr-funcgen" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee762399b891e8c84b9777e67a4c3193bc499c176c18d22f39341df61166092" -dependencies = [ - "convert_case", - "prettyplease", - "proc-macro2", - "quote", - "serde_yaml", - "substrait 0.50.4", - "syn 2.0.111", - "thiserror 2.0.17", -] - -[[package]] -name = "substrait-expr-macros" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e42af5525699cb9924c8fdd3aa233d2b067efde29f68c00090ca0c8eada8269" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.111", -] - [[package]] name = "subtle" version = "2.6.1" @@ -8539,44 +8466,14 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" -[[package]] -name = "typify" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" -dependencies = [ - "typify-impl 0.2.0", - "typify-macro 0.2.0", -] - [[package]] name = "typify" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7144144e97e987c94758a3017c920a027feac0799df325d6df4fc8f08d02068e" dependencies = [ - "typify-impl 0.4.3", - "typify-macro 0.4.3", -] - -[[package]] -name = "typify-impl" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" -dependencies = [ - "heck", - "log", - "proc-macro2", - "quote", - "regress", - "schemars", - "semver", - "serde", - "serde_json", - "syn 2.0.111", - "thiserror 1.0.69", - "unicode-ident", + "typify-impl", + "typify-macro", ] [[package]] @@ -8599,23 +8496,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "typify-macro" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" -dependencies = [ - "proc-macro2", - "quote", - "schemars", - "semver", - "serde", - "serde_json", - "serde_tokenstream", - "syn 2.0.111", - "typify-impl 0.2.0", -] - [[package]] name = "typify-macro" version = "0.4.3" @@ -8630,7 +8510,7 @@ dependencies = [ "serde_json", "serde_tokenstream", "syn 2.0.111", - "typify-impl 0.4.3", + "typify-impl", ] [[package]]