From 9a808eb7f63385e3e3b02051ed30eee91daeb613 Mon Sep 17 00:00:00 2001 From: Jeffrey Smith II Date: Mon, 6 Jan 2025 13:29:57 -0500 Subject: [PATCH] fix: yield when the next file is ready to open to prevent CPU starvation --- .../core/src/datasource/physical_plan/file_stream.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/datasource/physical_plan/file_stream.rs b/datafusion/core/src/datasource/physical_plan/file_stream.rs index 18cda4524ab25..f5144a241b3ca 100644 --- a/datafusion/core/src/datasource/physical_plan/file_stream.rs +++ b/datafusion/core/src/datasource/physical_plan/file_stream.rs @@ -478,7 +478,12 @@ impl FileStream { reader, )), partition_values, - } + }; + // Return control to the runtime when we're ready to open the next file + // to prevent uncancellable queries in scenarios with many large files. + // This functions similarly to a `tokio::task::yield_now()`. + cx.waker().wake_by_ref(); + return Poll::Pending; } } }