From ad36b48c8bf46abfaedab0dfec9f265ba92e7630 Mon Sep 17 00:00:00 2001 From: Oleks V Date: Wed, 25 Mar 2026 20:37:56 -0700 Subject: [PATCH] chore: Optimize schema rewriter usages (#21158) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Which issue does this PR close? - Closes #. ## Rationale for this change The rewriter actually has 3 responsibilities: 1. Index remapping — column indices in expressions may not match the file schema 2. Type casting — when logical and physical field types differ 3. Missing column handling — replacing references to absent columns with nulls Do not use cycles for schema rewrite if predicate is not set or logic schema equal to physical schema ## What changes are included in this PR? ## Are these changes tested? ## Are there any user-facing changes? --- datafusion/datasource-parquet/src/opener.rs | 32 ++++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index 108e8c5752017..f657b709fe099 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -410,17 +410,27 @@ impl FileOpener for ParquetOpener { // and we can avoid doing any more work on the file (bloom filters, loading the page index, etc.). // Additionally, if any casts were inserted we can move casts from the column to the literal side: // `CAST(col AS INT) = 5` can become `col = CAST(5 AS )`, which can be evaluated statically. - let rewriter = expr_adapter_factory.create( - Arc::clone(&logical_file_schema), - Arc::clone(&physical_file_schema), - )?; - let simplifier = PhysicalExprSimplifier::new(&physical_file_schema); - predicate = predicate - .map(|p| simplifier.simplify(rewriter.rewrite(p)?)) - .transpose()?; - // Adapt projections to the physical file schema as well - projection = projection - .try_map_exprs(|p| simplifier.simplify(rewriter.rewrite(p)?))?; + // + // When the schemas are identical and there is no predicate, the + // rewriter is a no-op: column indices already match (partition + // columns are appended after file columns in the table schema), + // types are the same, and there are no missing columns. Skip the + // tree walk entirely in that case. + let needs_rewrite = + predicate.is_some() || logical_file_schema != physical_file_schema; + if needs_rewrite { + let rewriter = expr_adapter_factory.create( + Arc::clone(&logical_file_schema), + Arc::clone(&physical_file_schema), + )?; + let simplifier = PhysicalExprSimplifier::new(&physical_file_schema); + predicate = predicate + .map(|p| simplifier.simplify(rewriter.rewrite(p)?)) + .transpose()?; + // Adapt projections to the physical file schema as well + projection = projection + .try_map_exprs(|p| simplifier.simplify(rewriter.rewrite(p)?))?; + } // Build predicates for this specific file let (pruning_predicate, page_pruning_predicate) = build_pruning_predicates(