-
Notifications
You must be signed in to change notification settings - Fork 307
chore: Revise array import to more follow C Data Interface semantics #905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
3a082dc
chore: Revise array import to more follow C Data Interface semantics
viirya 2606461
more
viirya 8a23a6e
fix
viirya 8614192
For review
viirya ddb70b1
Try
viirya 4beb196
check alignment
viirya b07cf44
Try
viirya ad61ee9
Add comment
viirya File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,10 +17,7 @@ | |
|
|
||
| //! Define JNI APIs which can be called from Java/Scala. | ||
|
|
||
| use arrow::{ | ||
| datatypes::DataType as ArrowDataType, | ||
| ffi::{FFI_ArrowArray, FFI_ArrowSchema}, | ||
| }; | ||
| use arrow::datatypes::DataType as ArrowDataType; | ||
| use arrow_array::RecordBatch; | ||
| use datafusion::{ | ||
| execution::{ | ||
|
|
@@ -78,8 +75,6 @@ struct ExecutionContext { | |
| pub input_sources: Vec<Arc<GlobalRef>>, | ||
| /// The record batch stream to pull results from | ||
| pub stream: Option<SendableRecordBatchStream>, | ||
| /// The FFI arrays. We need to keep them alive here. | ||
| pub ffi_arrays: Vec<(Arc<FFI_ArrowArray>, Arc<FFI_ArrowSchema>)>, | ||
| /// Configurations for DF execution | ||
| pub conf: HashMap<String, String>, | ||
| /// The Tokio runtime used for async. | ||
|
|
@@ -177,7 +172,6 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( | |
| scans: vec![], | ||
| input_sources, | ||
| stream: None, | ||
| ffi_arrays: vec![], | ||
| conf: configs, | ||
| runtime, | ||
| metrics, | ||
|
|
@@ -265,14 +259,33 @@ fn parse_bool(conf: &HashMap<String, String>, name: &str) -> CometResult<bool> { | |
| } | ||
|
|
||
| /// Prepares arrow arrays for output. | ||
| fn prepare_output( | ||
| unsafe fn prepare_output( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to narrow down the area of
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay |
||
| env: &mut JNIEnv, | ||
| array_addrs: jlongArray, | ||
| schema_addrs: jlongArray, | ||
| output_batch: RecordBatch, | ||
| exec_context: &mut ExecutionContext, | ||
| ) -> CometResult<jlongArray> { | ||
| ) -> CometResult<jlong> { | ||
| let array_address_array = JLongArray::from_raw(array_addrs); | ||
| let num_cols = env.get_array_length(&array_address_array)? as usize; | ||
|
|
||
| let array_addrs = env.get_array_elements(&array_address_array, ReleaseMode::NoCopyBack)?; | ||
| let array_addrs = &*array_addrs; | ||
|
|
||
| let schema_address_array = JLongArray::from_raw(schema_addrs); | ||
| let schema_addrs = env.get_array_elements(&schema_address_array, ReleaseMode::NoCopyBack)?; | ||
| let schema_addrs = &*schema_addrs; | ||
|
|
||
| let results = output_batch.columns(); | ||
| let num_rows = output_batch.num_rows(); | ||
|
|
||
| if results.len() != num_cols { | ||
| return Err(CometError::Internal(format!( | ||
| "Output column count mismatch: expected {num_cols}, got {}", | ||
| results.len() | ||
| ))); | ||
| } | ||
|
|
||
| if exec_context.debug_native { | ||
| // Validate the output arrays. | ||
| for array in results.iter() { | ||
|
|
@@ -283,35 +296,20 @@ fn prepare_output( | |
| } | ||
| } | ||
|
|
||
| let return_flag = 1; | ||
|
|
||
| let long_array = env.new_long_array((results.len() * 2) as i32 + 2)?; | ||
| env.set_long_array_region(&long_array, 0, &[return_flag, num_rows as jlong])?; | ||
|
|
||
| let mut arrays = vec![]; | ||
|
|
||
| let mut i = 0; | ||
| while i < results.len() { | ||
| let array_ref = results.get(i).ok_or(CometError::IndexOutOfBounds(i))?; | ||
| let (array, schema) = array_ref.to_data().to_spark()?; | ||
|
|
||
| unsafe { | ||
| let arrow_array = Arc::from_raw(array as *const FFI_ArrowArray); | ||
| let arrow_schema = Arc::from_raw(schema as *const FFI_ArrowSchema); | ||
| arrays.push((arrow_array, arrow_schema)); | ||
| } | ||
| array_ref | ||
| .to_data() | ||
| .move_to_spark(array_addrs[i], schema_addrs[i])?; | ||
|
|
||
| env.set_long_array_region(&long_array, (i * 2) as i32 + 2, &[array, schema])?; | ||
| i += 1; | ||
| } | ||
|
|
||
| // Update metrics | ||
| update_metrics(env, exec_context)?; | ||
|
|
||
| // Record the pointer to allocated Arrow Arrays | ||
| exec_context.ffi_arrays = arrays; | ||
|
|
||
| Ok(long_array.into_raw()) | ||
| Ok(num_rows as jlong) | ||
| } | ||
|
|
||
| /// Pull the next input from JVM. Note that we cannot pull input batches in | ||
|
|
@@ -337,7 +335,9 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( | |
| e: JNIEnv, | ||
| _class: JClass, | ||
| exec_context: jlong, | ||
| ) -> jlongArray { | ||
| array_addrs: jlongArray, | ||
| schema_addrs: jlongArray, | ||
| ) -> jlong { | ||
| try_unwrap_or_throw(&e, |mut env| { | ||
| // Retrieve the query | ||
| let exec_context = get_execution_context(exec_context); | ||
|
|
@@ -383,7 +383,13 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( | |
|
|
||
| match poll_output { | ||
| Poll::Ready(Some(output)) => { | ||
| return prepare_output(&mut env, output?, exec_context); | ||
| return prepare_output( | ||
| &mut env, | ||
| array_addrs, | ||
| schema_addrs, | ||
| output?, | ||
| exec_context, | ||
| ); | ||
| } | ||
| Poll::Ready(None) => { | ||
| // Reaches EOF of output. | ||
|
|
@@ -399,10 +405,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( | |
| } | ||
| } | ||
|
|
||
| let long_array = env.new_long_array(1)?; | ||
| env.set_long_array_region(&long_array, 0, &[-1])?; | ||
|
|
||
| return Ok(long_array.into_raw()); | ||
| return Ok(-1); | ||
| } | ||
| // A poll pending means there are more than one blocking operators, | ||
| // we don't need go back-forth between JVM/Native. Just keeping polling. | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.