diff --git a/Cargo.lock b/Cargo.lock index da6719404..0ce6f8163 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3044,6 +3044,7 @@ dependencies = [ "datafusion", "deltalake", "futures", + "modelardb_compression", "modelardb_embedded", "modelardb_storage", "modelardb_types", diff --git a/crates/modelardb_bulkloader/Cargo.toml b/crates/modelardb_bulkloader/Cargo.toml index 456e32316..08d13ecc0 100644 --- a/crates/modelardb_bulkloader/Cargo.toml +++ b/crates/modelardb_bulkloader/Cargo.toml @@ -31,6 +31,7 @@ arrow = { workspace = true, features = ["ffi"] } datafusion.workspace = true deltalake.workspace = true futures.workspace = true +modelardb_compression = { path = "../modelardb_compression" } modelardb_embedded = { path = "../modelardb_embedded" } modelardb_storage = { path = "../modelardb_storage" } modelardb_types = { path = "../modelardb_types" } diff --git a/crates/modelardb_bulkloader/src/main.rs b/crates/modelardb_bulkloader/src/main.rs index 63b1481e9..e39746d01 100644 --- a/crates/modelardb_bulkloader/src/main.rs +++ b/crates/modelardb_bulkloader/src/main.rs @@ -39,8 +39,7 @@ use deltalake::{ObjectStore, Path}; use futures::stream::StreamExt; use modelardb_embedded::error::{ModelarDbEmbeddedError, Result}; use modelardb_embedded::operations::Operations; -use modelardb_embedded::operations::data_folder::DataFolder; -use modelardb_storage::delta_lake::DeltaTableWriter; +use modelardb_storage::data_folder::{DataFolder, DeltaTableWriter}; use modelardb_types::types::TimeSeriesTableMetadata; use sysinfo::System; @@ -168,8 +167,9 @@ async fn import( data_folder.read(sql).await?; } - if let Some(time_series_table_metadata) = - data_folder.time_series_table_metadata(table_name).await + if let Some(time_series_table_metadata) = data_folder + .time_series_table_metadata_for_registered_time_series_table(table_name) + .await { import_time_series_table( input_stream, @@ -205,7 +205,7 @@ async fn import_time_series_table( cast_double_to_float: bool, ) -> Result<()> { let table_name = &time_series_table_metadata.name; - let mut delta_table_writer = data_folder.writer(table_name).await?; + let mut delta_table_writer = data_folder.table_writer(table_name).await?; let mut system = System::new(); let mut current_batch = vec![]; @@ -220,7 +220,6 @@ async fn import_time_series_table( system.refresh_memory(); if current_batch_size > (system.available_memory() as usize / 10 * 8) && let Err(write_error) = import_and_clear_time_series_table_batch( - data_folder, &mut delta_table_writer, time_series_table_metadata, &mut current_batch, @@ -234,7 +233,6 @@ async fn import_time_series_table( } if let Err(write_error) = import_and_clear_time_series_table_batch( - data_folder, &mut delta_table_writer, time_series_table_metadata, &mut current_batch, @@ -258,7 +256,7 @@ async fn import_normal_table( table_name: &str, data_folder: &mut DataFolder, ) -> Result<()> { - let mut delta_table_writer = data_folder.writer(table_name).await?; + let mut delta_table_writer = data_folder.table_writer(table_name).await?; while let Some(record_batch) = input_stream.next().await { let record_batch = record_batch?; @@ -386,12 +384,11 @@ fn cast_record_batch(record_batch: RecordBatch, cast_double_to_float: bool) -> R RecordBatch::try_new(cast_schema, cast_columns).map_err(|error| error.into()) } -/// Import the `current_batch` into the time series table with `time_series_table_metadata` in -/// `data_folder` using `delta_table_writer`. Then clear `current_batch` and zero -/// `current_batch_size`. If a [`RecordBatch`] in `current_batch` has a different schema, the -/// compression fails, or the write fails, a [`ModelarDbEmbeddedError`] is returned. +/// Import the `current_batch` into the time series table with `time_series_table_metadata` using +/// `delta_table_writer`. Then clear `current_batch` and zero `current_batch_size`. If a +/// [`RecordBatch`] in `current_batch` has a different schema, the compression fails, or the write +/// fails, a [`ModelarDbEmbeddedError`] is returned. async fn import_and_clear_time_series_table_batch( - data_folder: &DataFolder, delta_table_writer: &mut DeltaTableWriter, time_series_table_metadata: &TimeSeriesTableMetadata, current_batch: &mut Vec, @@ -400,9 +397,10 @@ async fn import_and_clear_time_series_table_batch( if *current_batch_size != 0 { let schema = current_batch[0].schema(); let uncompressed_data = compute::concat_batches(&schema, &*current_batch)?; - let compressed_data = data_folder - .compress_all(time_series_table_metadata, &uncompressed_data) - .await?; + let compressed_data = modelardb_compression::try_compress_multivariate_time_series( + time_series_table_metadata, + &uncompressed_data, + )?; delta_table_writer.write_all(&compressed_data).await?; current_batch.clear(); *current_batch_size = 0; @@ -502,16 +500,21 @@ async fn create_data_folder(data_folder_path: &str) -> Result { secret_access_key, ) .await + .map_err(|error| error.into()) } Some(("az", container_name)) => { let account_name = env::var("AZURE_STORAGE_ACCOUNT_NAME")?; let access_key = env::var("AZURE_STORAGE_ACCESS_KEY")?; - DataFolder::open_azure(account_name, access_key, container_name.to_owned()).await + DataFolder::open_azure(account_name, access_key, container_name.to_owned()) + .await + .map_err(|error| error.into()) } _ => { let data_folder_path = StdPath::new(data_folder_path); - DataFolder::open_local(data_folder_path).await + DataFolder::open_local(data_folder_path) + .await + .map_err(|error| error.into()) } } } diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs index f2bcd83da..db48ee612 100644 --- a/crates/modelardb_compression/src/compression.rs +++ b/crates/modelardb_compression/src/compression.rs @@ -19,9 +19,11 @@ use std::sync::Arc; +use arrow::array::StringArray; +use arrow::compute::{self, SortColumn, SortOptions}; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; -use modelardb_types::types::{ErrorBound, TimestampArray, ValueArray}; +use modelardb_types::types::{ErrorBound, TimeSeriesTableMetadata, TimestampArray, ValueArray}; use crate::error::{ModelarDbCompressionError, Result}; use crate::models::macaque_v::MacaqueV; @@ -35,6 +37,146 @@ use crate::types::{CompressedSegmentBatchBuilder, CompressedSegmentBuilder, Mode /// that are marked as residuals are stored as separate segments to allow for efficient pruning. const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255; +/// Compress the `uncompressed_time_series` from the table with `time_series_table_metadata` and +/// return the resulting segments. +pub fn try_compress_multivariate_time_series( + time_series_table_metadata: &TimeSeriesTableMetadata, + uncompressed_time_series: &RecordBatch, +) -> Result> { + // Sort by all tags and then time to simplify splitting the data into time series. + let sorted_uncompressed_data = + sort_time_series_by_tags_and_time(time_series_table_metadata, uncompressed_time_series)?; + + // Split the sorted uncompressed data into time series and compress them separately. + let mut compressed_data = vec![]; + + let tag_column_arrays: Vec<&StringArray> = time_series_table_metadata + .tag_column_indices + .iter() + .map(|index| modelardb_types::array!(sorted_uncompressed_data, *index, StringArray)) + .collect(); + + let mut tag_values = Vec::with_capacity(tag_column_arrays.len()); + for tag_column_array in &tag_column_arrays { + tag_values.push(tag_column_array.value(0).to_owned()); + } + + // The index of the first data point of each time series must be stored so slices + // containing only data points for each time series can be extracted and compressed. + let mut row_index_start = 0; + for row_index in 0..sorted_uncompressed_data.num_rows() { + // If any of the tags differ, the data point is from a new time series. + let mut is_new_time_series = false; + for tag_column_index in 0..tag_column_arrays.len() { + is_new_time_series |= tag_values[tag_column_index] + != tag_column_arrays[tag_column_index].value(row_index); + } + + if is_new_time_series { + let time_series_length = row_index - row_index_start; + let uncompressed_time_series = + sorted_uncompressed_data.slice(row_index_start, time_series_length); + + try_split_and_compress_univariate_time_series( + time_series_table_metadata, + &uncompressed_time_series, + &tag_values, + &mut compressed_data, + )?; + + for (tag_column_index, tag_column_array) in tag_column_arrays.iter().enumerate() { + tag_values[tag_column_index] = tag_column_array.value(row_index).to_owned(); + } + + row_index_start = row_index; + } + } + + let time_series_length = sorted_uncompressed_data.num_rows() - row_index_start; + let uncompressed_time_series = + sorted_uncompressed_data.slice(row_index_start, time_series_length); + + try_split_and_compress_univariate_time_series( + time_series_table_metadata, + &uncompressed_time_series, + &tag_values, + &mut compressed_data, + )?; + + Ok(compressed_data) +} + +/// Sort the `uncompressed_data` from the time series table with `time_series_table_metadata` +/// according to its tags and then timestamps. +fn sort_time_series_by_tags_and_time( + time_series_table_metadata: &TimeSeriesTableMetadata, + uncompressed_time_series: &RecordBatch, +) -> Result { + let mut sort_columns = vec![]; + + let sort_options = Some(SortOptions { + descending: false, + nulls_first: false, + }); + + for tag_column_index in &time_series_table_metadata.tag_column_indices { + let tag_column = uncompressed_time_series.column(*tag_column_index); + sort_columns.push(SortColumn { + values: (*tag_column).clone(), + options: sort_options, + }); + } + + let timestamp_column_index = time_series_table_metadata.timestamp_column_index; + let timestamp_column = uncompressed_time_series.column(timestamp_column_index); + sort_columns.push(SortColumn { + values: (*timestamp_column).clone(), + options: sort_options, + }); + + let indices = compute::lexsort_to_indices(&sort_columns, None)?; + let sorted_columns = compute::take_arrays(uncompressed_time_series.columns(), &indices, None)?; + RecordBatch::try_new(uncompressed_time_series.schema(), sorted_columns).map_err(|error| error.into()) +} + +/// Compress the field columns in `uncompressed_time_series` from the table with +/// `time_series_table_metadata` using [`try_compress_univariate_time_series()`] and append the +/// result to `compressed_data`. It is assumed that all data points in `uncompressed_time_series` +/// have the same tags as in `tag_values`. +pub fn try_split_and_compress_univariate_time_series( + time_series_table_metadata: &TimeSeriesTableMetadata, + uncompressed_time_series: &RecordBatch, + tag_values: &[String], + compressed_time_series: &mut Vec, +) -> Result<()> { + let uncompressed_timestamps = modelardb_types::array!( + uncompressed_time_series, + time_series_table_metadata.timestamp_column_index, + TimestampArray + ); + + for field_column_index in &time_series_table_metadata.field_column_indices { + let uncompressed_values = + modelardb_types::array!(uncompressed_time_series, *field_column_index, ValueArray); + + let error_bound = time_series_table_metadata.error_bounds[*field_column_index]; + + let compressed_segments = try_compress_univariate_time_series( + uncompressed_timestamps, + uncompressed_values, + error_bound, + time_series_table_metadata.compressed_schema.clone(), + tag_values.to_vec(), + *field_column_index as i16, + ) + .expect("uncompressed_timestamps and uncompressed_values should have the same length."); + + compressed_time_series.push(compressed_segments); + } + + Ok(()) +} + /// Compress `uncompressed_timestamps` using a start time, end time, and a sampling interval if /// regular and delta-of-deltas followed by a variable length binary encoding if irregular. /// `uncompressed_values` is compressed within `error_bound` using the model types in `models`. @@ -45,7 +187,7 @@ const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255; /// `uncompressed_values` have different lengths or if `compressed_schema` is not a valid schema for /// compressed segments, otherwise the resulting compressed segments are returned as a /// [`RecordBatch`] with the `compressed_schema` schema. -pub fn try_compress( +pub fn try_compress_univariate_time_series( uncompressed_timestamps: &TimestampArray, uncompressed_values: &ValueArray, error_bound: ErrorBound, @@ -275,10 +417,10 @@ mod tests { const ADD_NOISE_RANGE: Option> = Some(1.0..1.05); const TRY_COMPRESS_TEST_LENGTH: usize = 50; - // Tests for try_compress(). + // Tests for try_compress_univariate_time_series(). #[test] fn test_try_compress_empty_time_series_within_lossless_error_bound() { - let compressed_record_batch = try_compress( + let compressed_record_batch = try_compress_univariate_time_series( &TimestampBuilder::new().finish(), &ValueBuilder::new().finish(), ErrorBound::Lossless, @@ -440,7 +582,7 @@ mod tests { let uncompressed_values = data_generation::generate_values(uncompressed_timestamps.values(), values_structure); - let compressed_record_batch = try_compress( + let compressed_record_batch = try_compress_univariate_time_series( &uncompressed_timestamps, &uncompressed_values, error_bound, @@ -544,7 +686,7 @@ mod tests { let uncompressed_values = uncompressed_values.finish(); assert_eq!(uncompressed_timestamps.len(), uncompressed_values.len()); - let compressed_record_batch = try_compress( + let compressed_record_batch = try_compress_univariate_time_series( &uncompressed_timestamps, &uncompressed_values, error_bound, @@ -701,7 +843,7 @@ mod tests { 100.0..200.0, ); - let compressed_record_batch = try_compress( + let compressed_record_batch = try_compress_univariate_time_series( &uncompressed_timestamps, &uncompressed_values, error_bound, diff --git a/crates/modelardb_compression/src/error.rs b/crates/modelardb_compression/src/error.rs index 5553a6cd2..35b4ac1ed 100644 --- a/crates/modelardb_compression/src/error.rs +++ b/crates/modelardb_compression/src/error.rs @@ -19,12 +19,16 @@ use std::error::Error; use std::fmt::{Display, Formatter}; use std::result::Result as StdResult; +use arrow::error::ArrowError; + /// Result type used throughout `modelardb_compression`. pub type Result = StdResult; /// Error type used throughout `modelardb_compression`. #[derive(Debug)] pub enum ModelarDbCompressionError { + /// Error returned by Apache Arrow. + Arrow(ArrowError), /// Error returned when an invalid argument was passed. InvalidArgument(String), } @@ -32,6 +36,7 @@ pub enum ModelarDbCompressionError { impl Display for ModelarDbCompressionError { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { + Self::Arrow(reason) => write!(f, "Arrow Error: {reason}"), Self::InvalidArgument(reason) => write!(f, "Invalid Argument Error: {reason}"), } } @@ -41,7 +46,14 @@ impl Error for ModelarDbCompressionError { fn source(&self) -> Option<&(dyn Error + 'static)> { // Return the error that caused self to occur if one exists. match self { + Self::Arrow(reason) => Some(reason), Self::InvalidArgument(_reason) => None, } } } + +impl From for ModelarDbCompressionError { + fn from(error: ArrowError) -> Self { + Self::Arrow(error) + } +} diff --git a/crates/modelardb_compression/src/lib.rs b/crates/modelardb_compression/src/lib.rs index 888adfc18..8b96c60c2 100644 --- a/crates/modelardb_compression/src/lib.rs +++ b/crates/modelardb_compression/src/lib.rs @@ -25,7 +25,9 @@ mod models; mod types; // Re-export the few functions and types users are meant to use. -pub use compression::try_compress; +pub use compression::try_compress_multivariate_time_series; +pub use compression::try_compress_univariate_time_series; +pub use compression::try_split_and_compress_univariate_time_series; pub use models::grid; pub use models::is_value_within_error_bound; pub use models::len; diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs index 7a136c96f..88ec217e7 100644 --- a/crates/modelardb_compression/src/models/swing.rs +++ b/crates/modelardb_compression/src/models/swing.rs @@ -749,7 +749,7 @@ mod tests { compressed_schema_fields.push(Arc::new(Field::new("tag", DataType::Utf8, false))); let compressed_schema = Arc::new(Schema::new(compressed_schema_fields)); - let segments = crate::try_compress( + let segments = crate::try_compress_univariate_time_series( ×tamps, &values, error_bound, diff --git a/crates/modelardb_embedded/src/capi.rs b/crates/modelardb_embedded/src/capi.rs index ff2a8a44c..911bc24e8 100644 --- a/crates/modelardb_embedded/src/capi.rs +++ b/crates/modelardb_embedded/src/capi.rs @@ -39,13 +39,14 @@ use std::sync::{Arc, LazyLock}; use arrow::array::{self, Array, Float32Array, Int8Array, MapArray, StringArray, StructArray}; use arrow::ffi::{self, FFI_ArrowArray, FFI_ArrowSchema}; use arrow::record_batch::RecordBatch; +use modelardb_storage::data_folder::DataFolder; use modelardb_types::types::ErrorBound; use tokio::runtime::Runtime; use crate::error::{ModelarDbEmbeddedError, Result}; use crate::operations::Operations; use crate::operations::client::{Client, Node}; -use crate::operations::data_folder::DataFolder; +use crate::operations::data_folder::DataFolderDataSink; use crate::record_batch_stream_to_record_batch; use crate::{Aggregate, TableType}; @@ -86,9 +87,12 @@ pub unsafe extern "C" fn modelardb_embedded_open_memory() -> *const c_void { set_error_and_return_value_ptr(maybe_data_folder) } -/// See documentation for [`modelardb_embedded_open_memory`]. +/// See documentation for [`modelardb_embedded_open_memory()`]. fn open_memory() -> Result { - TOKIO_RUNTIME.block_on(DataFolder::open_memory()) + let data_folder = TOKIO_RUNTIME.block_on(DataFolder::open_memory())?; + let data_sink = Arc::new(DataFolderDataSink::new()); + TOKIO_RUNTIME.block_on(data_folder.register_tables(data_sink))?; + Ok(data_folder) } /// Creates a [`DataFolder`] that manages data in the local folder at `data_folder_path_path` and @@ -102,12 +106,15 @@ pub unsafe extern "C" fn modelardb_embedded_open_local( set_error_and_return_value_ptr(maybe_data_folder) } -/// See documentation for [`modelardb_embedded_open_local`]. +/// See documentation for [`modelardb_embedded_open_local()`]. unsafe fn open_local(data_folder_path_ptr: *const c_char) -> Result { let data_folder_str = unsafe { c_char_ptr_to_str(data_folder_path_ptr)? }; let data_folder_path = StdPath::new(data_folder_str); - TOKIO_RUNTIME.block_on(DataFolder::open_local(data_folder_path)) + let data_folder = TOKIO_RUNTIME.block_on(DataFolder::open_local(data_folder_path))?; + let data_sink = Arc::new(DataFolderDataSink::new()); + TOKIO_RUNTIME.block_on(data_folder.register_tables(data_sink))?; + Ok(data_folder) } /// Creates a [`DataFolder`] that manages data in an object store with a S3-compatible API and @@ -133,7 +140,7 @@ pub unsafe extern "C" fn modelardb_embedded_open_s3( set_error_and_return_value_ptr(maybe_data_folder) } -/// See documentation for [`modelardb_embedded_open_s3`]. +/// See documentation for [`modelardb_embedded_open_s3()`]. unsafe fn open_s3( endpoint_ptr: *const c_char, bucket_name_ptr: *const c_char, @@ -145,12 +152,15 @@ unsafe fn open_s3( let access_key_id = unsafe { c_char_ptr_to_str(access_key_id_ptr)? }; let secret_access_key = unsafe { c_char_ptr_to_str(secret_access_key_ptr)? }; - TOKIO_RUNTIME.block_on(DataFolder::open_s3( + let data_folder = TOKIO_RUNTIME.block_on(DataFolder::open_s3( endpoint.to_owned(), bucket_name.to_owned(), access_key_id.to_owned(), secret_access_key.to_owned(), - )) + ))?; + let data_sink = Arc::new(DataFolderDataSink::new()); + TOKIO_RUNTIME.block_on(data_folder.register_tables(data_sink))?; + Ok(data_folder) } /// Creates a [`DataFolder`] that manages data in an object store with an Azure-compatible API and @@ -167,7 +177,7 @@ pub unsafe extern "C" fn modelardb_embedded_open_azure( set_error_and_return_value_ptr(maybe_data_folder) } -/// See documentation for [`modelardb_embedded_open_azure`]. +/// See documentation for [`modelardb_embedded_open_azure()`]. unsafe fn open_azure( account_name_ptr: *const c_char, access_key_ptr: *const c_char, @@ -177,11 +187,14 @@ unsafe fn open_azure( let access_key = unsafe { c_char_ptr_to_str(access_key_ptr)? }; let container_name = unsafe { c_char_ptr_to_str(container_name_ptr)? }; - TOKIO_RUNTIME.block_on(DataFolder::open_azure( + let data_folder = TOKIO_RUNTIME.block_on(DataFolder::open_azure( account_name.to_owned(), access_key.to_owned(), container_name.to_owned(), - )) + ))?; + let data_sink = Arc::new(DataFolderDataSink::new()); + TOKIO_RUNTIME.block_on(data_folder.register_tables(data_sink))?; + Ok(data_folder) } /// Creates a [`Client`] that is connected to the Apache Arrow Flight server URL in `node_url_ptr` @@ -196,7 +209,7 @@ pub unsafe extern "C" fn modelardb_embedded_connect( set_error_and_return_value_ptr(maybe_client) } -/// See documentation for [`modelardb_embedded_connect`]. +/// See documentation for [`modelardb_embedded_connect()`]. unsafe fn connect(node_url_ptr: *const c_char, is_server_node: bool) -> Result { let node_url_str = unsafe { c_char_ptr_to_str(node_url_ptr)? }; @@ -292,7 +305,7 @@ pub unsafe extern "C" fn modelardb_embedded_create( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_create`]. +/// See documentation for [`modelardb_embedded_create()`]. #[allow(clippy::too_many_arguments)] unsafe fn create( maybe_operations_ptr: *mut c_void, @@ -389,7 +402,7 @@ pub unsafe extern "C" fn modelardb_embedded_tables( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_tables`]. +/// See documentation for [`modelardb_embedded_tables()`]. unsafe fn tables( maybe_operations_ptr: *mut c_void, is_data_folder: bool, @@ -436,7 +449,7 @@ pub unsafe extern "C" fn modelardb_embedded_schema( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_schema`]. +/// See documentation for [`modelardb_embedded_schema()`]. unsafe fn schema( maybe_operations_ptr: *mut c_void, is_data_folder: bool, @@ -488,7 +501,7 @@ pub unsafe extern "C" fn modelardb_embedded_write( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_write`]. +/// See documentation for [`modelardb_embedded_write()`]. unsafe fn write( maybe_operations_ptr: *mut c_void, is_data_folder: bool, @@ -532,7 +545,7 @@ pub unsafe extern "C" fn modelardb_embedded_read( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_read`]. +/// See documentation for [`modelardb_embedded_read()`]. unsafe fn read( maybe_operations_ptr: *mut c_void, is_data_folder: bool, @@ -583,7 +596,7 @@ pub unsafe extern "C" fn modelardb_embedded_copy( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_copy`]. +/// See documentation for [`modelardb_embedded_copy()`]. unsafe fn copy( maybe_source_operations_ptr: *mut c_void, is_data_folder: bool, @@ -649,7 +662,7 @@ pub unsafe extern "C" fn modelardb_embedded_read_time_series_table( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_read_time_series_table`]. +/// See documentation for [`modelardb_embedded_read_time_series_table()`]. #[allow(clippy::too_many_arguments)] unsafe fn read_time_series_table( maybe_operations_ptr: *mut c_void, @@ -776,7 +789,7 @@ pub unsafe extern "C" fn modelardb_embedded_copy_time_series_table( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_copy_time_series_table`]. +/// See documentation for [`modelardb_embedded_copy_time_series_table()`]. #[allow(clippy::too_many_arguments)] unsafe fn copy_time_series_table( maybe_source_operations_ptr: *mut c_void, @@ -866,7 +879,7 @@ pub unsafe extern "C" fn modelardb_embedded_move( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_move`]. +/// See documentation for [`modelardb_embedded_move()`]. unsafe fn r#move( maybe_source_operations_ptr: *mut c_void, is_data_folder: bool, @@ -896,7 +909,7 @@ pub unsafe extern "C" fn modelardb_embedded_truncate( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_truncate`]. +/// See documentation for [`modelardb_embedded_truncate()`]. unsafe fn truncate( maybe_operations_ptr: *mut c_void, is_data_folder: bool, @@ -921,7 +934,7 @@ pub unsafe extern "C" fn modelardb_embedded_drop( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_drop`]. +/// See documentation for [`modelardb_embedded_drop()`]. unsafe fn drop( maybe_operations_ptr: *mut c_void, is_data_folder: bool, @@ -958,7 +971,7 @@ pub unsafe extern "C" fn modelardb_embedded_vacuum( set_error_and_return_code(maybe_unit) } -/// See documentation for [`modelardb_embedded_vacuum`]. +/// See documentation for [`modelardb_embedded_vacuum()`]. unsafe fn vacuum( maybe_operations_ptr: *mut c_void, is_data_folder: bool, diff --git a/crates/modelardb_embedded/src/error.rs b/crates/modelardb_embedded/src/error.rs index bb4bb9395..e0aff4c88 100644 --- a/crates/modelardb_embedded/src/error.rs +++ b/crates/modelardb_embedded/src/error.rs @@ -26,6 +26,7 @@ use arrow::error::ArrowError; use datafusion::error::DataFusionError; use datafusion::parquet::errors::ParquetError; use deltalake::{DeltaTableError, ObjectStoreError}; +use modelardb_compression::error::ModelarDbCompressionError; use modelardb_storage::error::ModelarDbStorageError; use modelardb_types::error::ModelarDbTypesError; use tonic::Status as TonicStatusError; @@ -47,6 +48,8 @@ pub enum ModelarDbEmbeddedError { EnvironmentVar(VarError), /// Error returned when an invalid argument was passed. InvalidArgument(String), + /// Error returned by modelardb_compression. + ModelarDbCompression(ModelarDbCompressionError), /// Error returned by modelardb_storage. ModelarDbStorage(ModelarDbStorageError), /// Error returned by modelardb_types. @@ -73,6 +76,9 @@ impl Display for ModelarDbEmbeddedError { Self::DeltaLake(reason) => write!(f, "Delta Lake Error: {reason}"), Self::EnvironmentVar(reason) => write!(f, "Environment Variable Error: {reason}"), Self::InvalidArgument(reason) => write!(f, "Invalid Argument Error: {reason}"), + Self::ModelarDbCompression(reason) => { + write!(f, "ModelarDB Compression Error: {reason}") + } Self::ModelarDbStorage(reason) => write!(f, "ModelarDB Storage Error: {reason}"), Self::ModelarDbTypes(reason) => write!(f, "ModelarDB Types Error: {reason}"), Self::ObjectStore(reason) => write!(f, "Object Store Error: {reason}"), @@ -93,6 +99,7 @@ impl Error for ModelarDbEmbeddedError { Self::DeltaLake(reason) => Some(reason), Self::EnvironmentVar(reason) => Some(reason), Self::InvalidArgument(_reason) => None, + Self::ModelarDbCompression(reason) => Some(reason), Self::ModelarDbStorage(reason) => Some(reason), Self::ModelarDbTypes(reason) => Some(reason), Self::ObjectStore(reason) => Some(reason), @@ -129,6 +136,12 @@ impl From for ModelarDbEmbeddedError { } } +impl From for ModelarDbEmbeddedError { + fn from(error: ModelarDbCompressionError) -> Self { + Self::ModelarDbCompression(error) + } +} + impl From for ModelarDbEmbeddedError { fn from(error: ModelarDbStorageError) -> Self { Self::ModelarDbStorage(error) diff --git a/crates/modelardb_embedded/src/operations/data_folder.rs b/crates/modelardb_embedded/src/operations/data_folder.rs index 091f751a9..1300f62d1 100644 --- a/crates/modelardb_embedded/src/operations/data_folder.rs +++ b/crates/modelardb_embedded/src/operations/data_folder.rs @@ -18,29 +18,20 @@ use std::any::Any; use std::collections::HashMap; use std::fmt::{Debug, Formatter, Result as FmtResult}; -use std::path::Path as StdPath; use std::pin::Pin; use std::result::Result as StdResult; use std::sync::Arc; use arrow::array::RecordBatch; -use arrow::array::{Float32Array, StringArray}; -use arrow::compute::SortOptions; use arrow::datatypes::Schema; use async_trait::async_trait; use datafusion::datasource::sink::DataSink; use datafusion::error::DataFusionError; use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; -use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion::physical_plan::expressions::Column; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::sorts::sort; use datafusion::physical_plan::{DisplayAs, DisplayFormatType, common}; -use datafusion::prelude::SessionContext; use futures::TryStreamExt; -use modelardb_storage::delta_lake::{DeltaLake, DeltaTableWriter}; -use modelardb_storage::metadata::table_metadata_manager::TableMetadataManager; -use modelardb_types::types::{TimeSeriesTableMetadata, TimestampArray}; +use modelardb_storage::data_folder::DataFolder; use crate::error::{ModelarDbEmbeddedError, Result}; use crate::operations::{ @@ -48,20 +39,27 @@ use crate::operations::{ }; use crate::{Aggregate, TableType}; -/// [`DataSink`] that rejects INSERT statements passed to [`DataFolder.read()`]. -struct DataFolderDataSink { +/// [`DataSink`] that rejects INSERT statements passed to [`DataFolder::read()`]. +pub struct DataFolderDataSink { /// The schema of the data sink is empty since it rejects everything. schema: Arc, } impl DataFolderDataSink { - fn new() -> Self { + pub fn new() -> Self { Self { schema: Arc::new(Schema::empty()), } } } +impl Default for DataFolderDataSink { + // Trait implemented to silence clippy warning. + fn default() -> Self { + Self::new() + } +} + #[async_trait] impl DataSink for DataFolderDataSink { /// Return `self` as [`Any`] so it can be downcast. @@ -107,321 +105,6 @@ impl DisplayAs for DataFolderDataSink { } } -/// Provides access to modelardb_embedded's components. -pub struct DataFolder { - /// Delta Lake for storing metadata and data in Apache Parquet files. - delta_lake: DeltaLake, - /// Metadata manager for providing access to metadata related to tables. It is stored in an - /// [`Arc`] because it is shared with each of the time series tables for use in query planning. - table_metadata_manager: Arc, - /// Context providing access to a specific session of Apache DataFusion. - session_context: SessionContext, -} - -impl DataFolder { - /// Creates a [`DataFolder`] that manages data in memory and returns it. If the metadata tables - /// could not be created, [`ModelarDbEmbeddedError`] is returned. - pub async fn open_memory() -> Result { - let delta_lake = DeltaLake::new_in_memory(); - let table_metadata_manager = Arc::new(TableMetadataManager::try_new_in_memory().await?); - - Self::try_new_and_register_tables(delta_lake, table_metadata_manager).await - } - - /// Creates a [`DataFolder`] that manages data in the local folder at `data_folder_path` and - /// returns it. If the folder does not exist and could not be created or the metadata tables - /// could not be created, [`ModelarDbEmbeddedError`] is returned. - pub async fn open_local(data_folder_path: &StdPath) -> Result { - let delta_lake = DeltaLake::try_from_local_path(data_folder_path)?; - - let table_metadata_manager = - Arc::new(TableMetadataManager::try_from_path(data_folder_path).await?); - - Self::try_new_and_register_tables(delta_lake, table_metadata_manager).await - } - - /// Creates a [`DataFolder`] that manages data in an object store with an S3-compatible API and - /// returns it. If a connection to the object store could not be established or the metadata - /// tables could not be created, [`ModelarDbEmbeddedError`] is returned. - pub async fn open_s3( - endpoint: String, - bucket_name: String, - access_key_id: String, - secret_access_key: String, - ) -> Result { - // Register the S3 storage handlers to allow the use of Amazon S3 object stores. This is - // required at runtime to initialize the S3 storage implementation in the deltalake_aws - // storage subcrate. It is safe to call this function multiple times as the handlers are - // stored in a DashMap, thus, the handlers are simply overwritten with the same each time. - deltalake::aws::register_handlers(None); - - // Construct data folder. - let delta_lake = DeltaLake::try_from_s3_configuration( - endpoint.clone(), - bucket_name.clone(), - access_key_id.clone(), - secret_access_key.clone(), - )?; - - let table_metadata_manager = Arc::new( - TableMetadataManager::try_from_s3_configuration( - endpoint, - bucket_name, - access_key_id, - secret_access_key, - ) - .await?, - ); - - Self::try_new_and_register_tables(delta_lake, table_metadata_manager).await - } - - /// Creates a [`DataFolder`] that manages data in an object store with an Azure-compatible API - /// and returns it. If a connection to the object store could not be established or the metadata - /// tables could not be created, [`ModelarDbEmbeddedError`] is returned. - pub async fn open_azure( - account_name: String, - access_key: String, - container_name: String, - ) -> Result { - let delta_lake = DeltaLake::try_from_azure_configuration( - account_name.clone(), - access_key.clone(), - container_name.clone(), - )?; - - let table_metadata_manager = Arc::new( - TableMetadataManager::try_from_azure_configuration( - account_name, - access_key, - container_name, - ) - .await?, - ); - - Self::try_new_and_register_tables(delta_lake, table_metadata_manager).await - } - - /// Create a [`DataFolder`], register all normal tables and time series tables in it with its - /// [`SessionContext`], and return it. If the tables could not be registered, - /// [`ModelarDbEmbeddedError`] is returned. - async fn try_new_and_register_tables( - delta_lake: DeltaLake, - table_metadata_manager: Arc, - ) -> Result { - // Construct data folder. - let session_context = modelardb_storage::create_session_context(); - - let data_folder = DataFolder { - delta_lake, - table_metadata_manager, - session_context, - }; - - // Register normal tables. - let data_sink = Arc::new(DataFolderDataSink::new()); - - for normal_table_name in data_folder - .table_metadata_manager - .normal_table_names() - .await? - { - let delta_table = data_folder - .delta_lake - .delta_table(&normal_table_name) - .await?; - - modelardb_storage::register_normal_table( - &data_folder.session_context, - &normal_table_name, - delta_table, - data_sink.clone(), - )?; - } - - // Register time series tables. - for metadata in data_folder - .table_metadata_manager - .time_series_table_metadata() - .await? - { - let delta_table = data_folder.delta_lake.delta_table(&metadata.name).await?; - - modelardb_storage::register_time_series_table( - &data_folder.session_context, - delta_table, - metadata, - data_sink.clone(), - )?; - } - - Ok(data_folder) - } - - /// Compress the `uncompressed_data` from the table with `time_series_table_metadata` and return the - /// resulting segments. - pub async fn compress_all( - &self, - time_series_table_metadata: &TimeSeriesTableMetadata, - uncompressed_data: &RecordBatch, - ) -> Result> { - // Sort by all tags and then time to simplify splitting the data into time series. - let sorted_uncompressed_data = - sort_record_batch_by_tags_and_time(time_series_table_metadata, uncompressed_data)?; - - // Split the sorted uncompressed data into time series and compress them separately. - let mut compressed_data = vec![]; - - let tag_column_arrays: Vec<&StringArray> = time_series_table_metadata - .tag_column_indices - .iter() - .map(|index| modelardb_types::array!(sorted_uncompressed_data, *index, StringArray)) - .collect(); - - let mut tag_values = Vec::with_capacity(tag_column_arrays.len()); - for tag_column_array in &tag_column_arrays { - tag_values.push(tag_column_array.value(0).to_owned()); - } - - // The index of the first data point of each time series must be stored so slices - // containing only data points for each time series can be extracted and compressed. - let mut row_index_start = 0; - for row_index in 0..sorted_uncompressed_data.num_rows() { - // If any of the tags differ, the data point is from a new time series. - let mut is_new_time_series = false; - for tag_column_index in 0..tag_column_arrays.len() { - is_new_time_series |= tag_values[tag_column_index] - != tag_column_arrays[tag_column_index].value(row_index); - } - - if is_new_time_series { - let time_series_length = row_index - row_index_start; - let uncompressed_time_series = - sorted_uncompressed_data.slice(row_index_start, time_series_length); - - self.compress( - time_series_table_metadata, - &uncompressed_time_series, - &tag_values, - &mut compressed_data, - ) - .await?; - - for (tag_column_index, tag_column_array) in tag_column_arrays.iter().enumerate() { - tag_values[tag_column_index] = tag_column_array.value(row_index).to_owned(); - } - - row_index_start = row_index; - } - } - - let time_series_length = sorted_uncompressed_data.num_rows() - row_index_start; - let uncompressed_time_series = - sorted_uncompressed_data.slice(row_index_start, time_series_length); - - self.compress( - time_series_table_metadata, - &uncompressed_time_series, - &tag_values, - &mut compressed_data, - ) - .await?; - - Ok(compressed_data) - } - - /// Compress the field columns in `uncompressed_time_series` from the table with - /// `time_series_table_metadata` and append the result to `compressed_data`. It is assumed that - /// all data points in `uncompressed_time_series` have the same tags as in `tag_values`. - async fn compress( - &self, - time_series_table_metadata: &TimeSeriesTableMetadata, - uncompressed_time_series: &RecordBatch, - tag_values: &[String], - compressed_data: &mut Vec, - ) -> Result<()> { - let uncompressed_timestamps = modelardb_types::array!( - uncompressed_time_series, - time_series_table_metadata.timestamp_column_index, - TimestampArray - ); - - for field_column_index in &time_series_table_metadata.field_column_indices { - let uncompressed_values = modelardb_types::array!( - uncompressed_time_series, - *field_column_index, - Float32Array - ); - - let error_bound = time_series_table_metadata.error_bounds[*field_column_index]; - - let compressed_time_series = modelardb_compression::try_compress( - uncompressed_timestamps, - uncompressed_values, - error_bound, - time_series_table_metadata.compressed_schema.clone(), - tag_values.to_vec(), - *field_column_index as i16, - ) - .expect("uncompressed_timestamps and uncompressed_values should have the same length."); - - compressed_data.push(compressed_time_series); - } - - Ok(()) - } - - /// Create a writer for writing multiple batches of data to the table with the table name in - /// `table_name`. If the table does not exist or a writer for it could not be created, a - /// [`ModelarDbEmbeddedError`] is returned. - pub async fn writer(&self, table_name: &str) -> Result { - let delta_table = self.delta_lake.delta_table(table_name).await?; - if self.time_series_table_metadata(table_name).await.is_some() { - self.delta_lake - .time_series_table_writer(delta_table) - .await - .map_err(|error| error.into()) - } else { - self.delta_lake - .normal_or_metadata_table_writer(delta_table) - .await - .map_err(|error| error.into()) - } - } - - /// Return the schema of the table with the name in `table_name` if it is a normal table. If the - /// table does not exist or the table is not a normal table, return [`None`]. - async fn normal_table_schema(&self, table_name: &str) -> Option { - if self - .table_metadata_manager - .is_normal_table(table_name) - .await - .is_ok_and(|is_normal_table| is_normal_table) - { - self.delta_lake - .delta_table(table_name) - .await - .expect("Delta Lake table should exist if the table is in the metadata Delta Lake.") - .get_schema() - .expect("Delta Lake table should be loaded and metadata should be in the log.") - .try_into() - .ok() - } else { - None - } - } - - /// Return [`TimeSeriesTableMetadata`] for the table with `table_name` if it exists, is registered - /// with Apache DataFusion, and is a time series table. - pub async fn time_series_table_metadata( - &self, - table_name: &str, - ) -> Option> { - let table_provider = self.session_context.table_provider(table_name).await.ok()?; - modelardb_storage::maybe_table_provider_to_time_series_table_metadata(table_provider) - } -} - #[async_trait] impl Operations for DataFolder { /// Return `self` as [`Any`] so it can be downcast. @@ -435,19 +118,14 @@ impl Operations for DataFolder { async fn create(&mut self, table_name: &str, table_type: TableType) -> Result<()> { match table_type { TableType::NormalTable(schema) => { - let delta_table = self - .delta_lake - .create_normal_table(table_name, &schema) - .await?; + let delta_table = self.create_normal_table(table_name, &schema).await?; - self.table_metadata_manager - .save_normal_table_metadata(table_name) - .await?; + self.save_normal_table_metadata(table_name).await?; let data_sink = Arc::new(DataFolderDataSink::new()); modelardb_storage::register_normal_table( - &self.session_context, + self.session_context(), table_name, delta_table, data_sink.clone(), @@ -462,18 +140,16 @@ impl Operations for DataFolder { )?); let delta_table = self - .delta_lake .create_time_series_table(&time_series_table_metadata) .await?; - self.table_metadata_manager - .save_time_series_table_metadata(&time_series_table_metadata) + self.save_time_series_table_metadata(&time_series_table_metadata) .await?; let data_sink = Arc::new(DataFolderDataSink::new()); modelardb_storage::register_time_series_table( - &self.session_context, + self.session_context(), delta_table, time_series_table_metadata, data_sink.clone(), @@ -484,19 +160,18 @@ impl Operations for DataFolder { Ok(()) } - /// Returns the name of all the tables. If the table names could not be retrieved from the - /// metadata Delta Lake, [`ModelarDbEmbeddedError`] is returned. + /// Returns the name of all the tables. If the table names could not be retrieved from the Delta + /// Lake, [`ModelarDbEmbeddedError`] is returned. async fn tables(&mut self) -> Result> { - self.table_metadata_manager - .table_names() - .await - .map_err(|error| error.into()) + self.table_names().await.map_err(|error| error.into()) } /// Returns the schema of the table with the name in `table_name`. If the table does not exist, /// [`ModelarDbEmbeddedError`] is returned. async fn schema(&mut self, table_name: &str) -> Result { - if let Some(time_series_table_metadata) = self.time_series_table_metadata(table_name).await + if let Some(time_series_table_metadata) = self + .time_series_table_metadata_for_registered_time_series_table(table_name) + .await { Ok((*time_series_table_metadata.query_schema).to_owned()) } else if let Some(normal_table_schema) = self.normal_table_schema(table_name).await { @@ -523,7 +198,9 @@ impl Operations for DataFolder { "The uncompressed data does not match the schema for the table: {table_name}." )); - if let Some(time_series_table_metadata) = self.time_series_table_metadata(table_name).await + if let Some(time_series_table_metadata) = self + .time_series_table_metadata_for_registered_time_series_table(table_name) + .await { // Time series table. if !schemas_are_compatible( @@ -533,12 +210,12 @@ impl Operations for DataFolder { return Err(schema_mismatch_error); } - let compressed_data = self - .compress_all(&time_series_table_metadata, &uncompressed_data) - .await?; + let compressed_data = modelardb_compression::try_compress_multivariate_time_series( + &time_series_table_metadata, + &uncompressed_data, + )?; - self.delta_lake - .write_compressed_segments_to_time_series_table(table_name, compressed_data) + self.write_compressed_segments_to_time_series_table(table_name, compressed_data) .await?; } else if let Some(normal_table_schema) = self.normal_table_schema(table_name).await { // Normal table. @@ -546,8 +223,7 @@ impl Operations for DataFolder { return Err(schema_mismatch_error); } - self.delta_lake - .write_record_batches_to_normal_table(table_name, vec![uncompressed_data]) + self.write_record_batches_to_normal_table(table_name, vec![uncompressed_data]) .await?; } else { return Err(ModelarDbEmbeddedError::InvalidArgument(format!( @@ -561,7 +237,7 @@ impl Operations for DataFolder { /// Executes the SQL in `sql` and returns the result as a [`RecordBatchStream`]. If the SQL /// could not be executed, [`ModelarDbEmbeddedError`] is returned. async fn read(&mut self, sql: &str) -> Result>> { - let data_frame = self.session_context.sql(sql).await?; + let data_frame = self.session_context().sql(sql).await?; data_frame .execute_stream() @@ -606,7 +282,6 @@ impl Operations for DataFolder { let record_batches = common::collect(record_batch_stream).await?; target_data_folder - .delta_lake .write_record_batches_to_normal_table(target_table_name, record_batches) .await?; @@ -628,8 +303,9 @@ impl Operations for DataFolder { tags: HashMap, ) -> Result>> { // DataFolder.read() interface is designed for time series tables. - let time_series_table_medata = if let Some(time_series_table_metadata) = - self.time_series_table_metadata(table_name).await + let time_series_table_medata = if let Some(time_series_table_metadata) = self + .time_series_table_metadata_for_registered_time_series_table(table_name) + .await { time_series_table_metadata } else { @@ -674,7 +350,7 @@ impl Operations for DataFolder { // DataFolder.copy_time_series_table() interface is designed for time series tables. let source_time_series_table_metadata = self - .time_series_table_metadata(source_table_name) + .time_series_table_metadata_for_registered_time_series_table(source_table_name) .await .ok_or_else(|| { ModelarDbEmbeddedError::InvalidArgument(format!( @@ -683,7 +359,7 @@ impl Operations for DataFolder { })?; let target_time_series_table_metadata = target_data_folder - .time_series_table_metadata(target_table_name) + .time_series_table_metadata_for_registered_time_series_table(target_table_name) .await .ok_or_else(|| { ModelarDbEmbeddedError::InvalidArgument(format!( @@ -722,9 +398,9 @@ impl Operations for DataFolder { let sql = format!("SELECT * FROM {source_table_name} {where_clause}"); // Read data to copy from source_table_name in source. - let source_table = Arc::new(self.delta_lake.delta_table(source_table_name).await?); + let source_table = Arc::new(self.delta_table(source_table_name).await?); - let session_context = SessionContext::new(); + let session_context = modelardb_storage::create_session_context(); session_context.register_table(source_table_name, source_table)?; let df = session_context.sql(&sql).await?; @@ -732,7 +408,6 @@ impl Operations for DataFolder { // Write read data to target_table_name in target. target_data_folder - .delta_lake .write_compressed_segments_to_time_series_table(target_table_name, record_batches) .await?; @@ -761,9 +436,10 @@ impl Operations for DataFolder { )); if let (Some(source_time_series_table_metadata), Some(target_time_series_table_metadata)) = ( - self.time_series_table_metadata(source_table_name).await, + self.time_series_table_metadata_for_registered_time_series_table(source_table_name) + .await, target_data_folder - .time_series_table_metadata(target_table_name) + .time_series_table_metadata_for_registered_time_series_table(target_table_name) .await, ) { // If both tables are time series tables, check if their schemas match and write the @@ -775,12 +451,11 @@ impl Operations for DataFolder { return Err(schema_mismatch_error); } - let delta_ops = self.delta_lake.delta_ops(source_table_name).await?; + let delta_ops = self.delta_ops(source_table_name).await?; let (_table, stream) = delta_ops.load().await?; let record_batches: Vec = stream.try_collect().await?; target_data_folder - .delta_lake .write_compressed_segments_to_time_series_table(target_table_name, record_batches) .await?; } else if let (Some(source_normal_table_schema), Some(target_normal_table_schema)) = ( @@ -795,12 +470,11 @@ impl Operations for DataFolder { return Err(schema_mismatch_error); } - let delta_ops = self.delta_lake.delta_ops(source_table_name).await?; + let delta_ops = self.delta_ops(source_table_name).await?; let (_table, stream) = delta_ops.load().await?; let record_batches: Vec = stream.try_collect().await?; target_data_folder - .delta_lake .write_record_batches_to_normal_table(target_table_name, record_batches) .await?; } else { @@ -820,8 +494,7 @@ impl Operations for DataFolder { /// Delta Lake. If the data could not be deleted, [`ModelarDbEmbeddedError`] is returned. async fn truncate(&mut self, table_name: &str) -> Result<()> { if self.tables().await?.contains(&table_name.to_owned()) { - self.delta_lake - .truncate_table(table_name) + self.truncate_table(table_name) .await .map_err(|error| error.into()) } else { @@ -832,21 +505,18 @@ impl Operations for DataFolder { } /// Drop the table with the name in `table_name` by deregistering the table from the Apache - /// Arrow DataFusion session, deleting all the table files from the data Delta Lake, and - /// deleting the table metadata from the metadata Delta Lake. If the table could not be - /// deregistered or the metadata or data could not be dropped, [`ModelarDbEmbeddedError`] is - /// returned. + /// Arrow DataFusion session, deleting all the table files from the Delta Lake, and deleting the + /// table metadata from the Delta Lake. If the table could not be deregistered or the metadata + /// or data could not be dropped, [`ModelarDbEmbeddedError`] is returned. async fn drop(&mut self, table_name: &str) -> Result<()> { // Drop the table from the Apache Arrow DataFusion session. - self.session_context.deregister_table(table_name)?; + self.session_context().deregister_table(table_name)?; - // Delete the table metadata from the metadata Delta Lake. - self.table_metadata_manager - .drop_table_metadata(table_name) - .await?; + // Delete the table metadata from the Delta Lake. + self.drop_table_metadata(table_name).await?; // Drop the table from the Delta Lake. - self.delta_lake.drop_table(table_name).await?; + self.drop_table(table_name).await?; Ok(()) } @@ -863,8 +533,7 @@ impl Operations for DataFolder { maybe_retention_period_in_seconds: Option, ) -> Result<()> { if self.tables().await?.contains(&table_name.to_owned()) { - self.delta_lake - .vacuum_table(table_name, maybe_retention_period_in_seconds) + self.vacuum_table(table_name, maybe_retention_period_in_seconds) .await .map_err(|error| error.into()) } else { @@ -875,44 +544,6 @@ impl Operations for DataFolder { } } -/// Sort the `uncompressed_data` from the time series table with `time_series_table_metadata` -/// according to its tags and then timestamps. -fn sort_record_batch_by_tags_and_time( - time_series_table_metadata: &TimeSeriesTableMetadata, - uncompressed_data: &RecordBatch, -) -> Result { - let mut physical_sort_exprs = vec![]; - - let sort_options = SortOptions { - descending: false, - nulls_first: false, - }; - - for tag_column_index in &time_series_table_metadata.tag_column_indices { - let field = time_series_table_metadata.schema.field(*tag_column_index); - physical_sort_exprs.push(PhysicalSortExpr { - expr: Arc::new(Column::new(field.name(), *tag_column_index)), - options: sort_options, - }); - } - - let timestamp_column_index = time_series_table_metadata.timestamp_column_index; - let field = time_series_table_metadata - .schema - .field(timestamp_column_index); - physical_sort_exprs.push(PhysicalSortExpr { - expr: Arc::new(Column::new(field.name(), timestamp_column_index)), - options: sort_options, - }); - - sort::sort_batch( - uncompressed_data, - &LexOrdering::new(physical_sort_exprs), - None, - ) - .map_err(|error| error.into()) -} - /// Compare `source_schema` and `target_schema` and return [`true`] if they have the same number of /// columns, their columns have the same types, and their columns nullability is less or equally /// restrictive in `source_schema`. Otherwise [`False`] is returned. @@ -944,13 +575,21 @@ fn schemas_are_compatible(source_schema: &Schema, target_schema: &Schema) -> boo mod tests { use super::*; - use arrow::array::{Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array}; + use arrow::array::{ + Array, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, + StringArray, + }; + use arrow::compute::SortOptions; use arrow::datatypes::{ArrowPrimitiveType, DataType, Field}; use arrow_flight::flight_service_client::FlightServiceClient; use datafusion::datasource::TableProvider; use datafusion::logical_expr::col; + use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr}; + use datafusion::physical_plan::expressions::Column; + use datafusion::physical_plan::sorts::sort; use modelardb_types::types::{ - ArrowTimestamp, ArrowValue, ErrorBound, GeneratedColumn, ValueArray, + ArrowTimestamp, ArrowValue, ErrorBound, GeneratedColumn, TimeSeriesTableMetadata, + TimestampArray, ValueArray, }; use tempfile::TempDir; use tonic::transport::Channel; @@ -993,15 +632,20 @@ mod tests { // Create a new data folder and verify that the existing normal tables are registered. let new_data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); + let data_sink = Arc::new(DataFolderDataSink::new()); + new_data_folder + .register_tables(data_sink) + .await + .unwrap(); assert!( new_data_folder - .session_context + .session_context() .table_exist("normal_table_1") .unwrap() ); assert!( new_data_folder - .session_context + .session_context() .table_exist("normal_table_2") .unwrap() ); @@ -1163,15 +807,20 @@ mod tests { // Create a new data folder and verify that the existing time series tables are registered. let new_data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); + let data_sink = Arc::new(DataFolderDataSink::new()); + new_data_folder + .register_tables(data_sink) + .await + .unwrap(); assert!( new_data_folder - .session_context + .session_context() .table_exist("time_series_table_1") .unwrap() ); assert!( new_data_folder - .session_context + .session_context() .table_exist("time_series_table_2") .unwrap() ); @@ -1295,11 +944,7 @@ mod tests { #[tokio::test] async fn test_write_to_normal_table() { let (_temp_dir, mut data_folder) = create_data_folder_with_normal_table().await; - let mut delta_table = data_folder - .delta_lake - .delta_table(NORMAL_TABLE_NAME) - .await - .unwrap(); + let mut delta_table = data_folder.delta_table(NORMAL_TABLE_NAME).await.unwrap(); assert_eq!(delta_table.get_files_count(), 0); @@ -1346,7 +991,6 @@ mod tests { async fn test_write_to_time_series_table() { let (_temp_dir, mut data_folder) = create_data_folder_with_time_series_table().await; let mut delta_table = data_folder - .delta_lake .delta_table(TIME_SERIES_TABLE_NAME) .await .unwrap(); @@ -2401,7 +2045,7 @@ mod tests { assert!( data_folder - .session_context + .session_context() .table_exist(NORMAL_TABLE_NAME) .unwrap() ); @@ -2411,28 +2055,21 @@ mod tests { // Verify that the normal table was deregistered from Apache DataFusion. assert!( !data_folder - .session_context + .session_context() .table_exist(NORMAL_TABLE_NAME) .unwrap() ); - // Verify that the normal table was dropped from the metadata Delta Lake. + // Verify that the normal table was dropped from the Delta Lake. assert!( !data_folder - .table_metadata_manager .is_normal_table(NORMAL_TABLE_NAME) .await .unwrap() ); // Verify that the normal table was dropped from the Delta Lake. - assert!( - data_folder - .delta_lake - .delta_table(NORMAL_TABLE_NAME) - .await - .is_err() - ); + assert!(data_folder.delta_table(NORMAL_TABLE_NAME).await.is_err()); } #[tokio::test] @@ -2441,7 +2078,7 @@ mod tests { assert!( data_folder - .session_context + .session_context() .table_exist(TIME_SERIES_TABLE_NAME) .unwrap() ); @@ -2451,15 +2088,14 @@ mod tests { // Verify that the time series table was deregistered from Apache DataFusion. assert!( !data_folder - .session_context + .session_context() .table_exist(TIME_SERIES_TABLE_NAME) .unwrap() ); - // Verify that the time series table was dropped from the metadata Delta Lake. + // Verify that the time series table was dropped from the Delta Lake. assert!( !data_folder - .table_metadata_manager .is_time_series_table(TIME_SERIES_TABLE_NAME) .await .unwrap() @@ -2468,7 +2104,6 @@ mod tests { // Verify that the time series table was dropped from the Delta Lake. assert!( data_folder - .delta_lake .delta_table(TIME_SERIES_TABLE_NAME) .await .is_err() @@ -2500,11 +2135,7 @@ mod tests { .await .unwrap(); - let mut delta_table = data_folder - .delta_lake - .delta_table(NORMAL_TABLE_NAME) - .await - .unwrap(); + let mut delta_table = data_folder.delta_table(NORMAL_TABLE_NAME).await.unwrap(); assert_eq!(delta_table.get_files_count(), 1); @@ -2527,7 +2158,6 @@ mod tests { .unwrap(); let mut delta_table = data_folder - .delta_lake .delta_table(TIME_SERIES_TABLE_NAME) .await .unwrap(); @@ -2653,11 +2283,7 @@ mod tests { .await .unwrap(); - let mut delta_table = source - .delta_lake - .delta_table(NORMAL_TABLE_NAME) - .await - .unwrap(); + let mut delta_table = source.delta_table(NORMAL_TABLE_NAME).await.unwrap(); assert_eq!(delta_table.get_files_count(), 1); @@ -2682,26 +2308,21 @@ mod tests { expected_schema: Schema, ) { // Verify that the normal table exists in the Delta Lake. - let delta_table = data_folder - .delta_lake - .delta_table(table_name) - .await - .unwrap(); + let delta_table = data_folder.delta_table(table_name).await.unwrap(); let actual_schema = TableProvider::schema(&delta_table); assert_eq!(actual_schema, Arc::new(expected_schema)); - // Verify that the normal table exists in the metadata Delta Lake. + // Verify that the normal table exists in the Delta Lake. + assert!(data_folder.is_normal_table(table_name).await.unwrap()); + + // Verify that the normal table is registered with Apache DataFusion. assert!( data_folder - .table_metadata_manager - .is_normal_table(table_name) - .await + .session_context() + .table_exist(table_name) .unwrap() - ); - - // Verify that the normal table is registered with Apache DataFusion. - assert!(data_folder.session_context.table_exist(table_name).unwrap()) + ) } #[tokio::test] @@ -2822,11 +2443,7 @@ mod tests { .await .unwrap(); - let mut delta_table = source - .delta_lake - .delta_table(TIME_SERIES_TABLE_NAME) - .await - .unwrap(); + let mut delta_table = source.delta_table(TIME_SERIES_TABLE_NAME).await.unwrap(); assert_eq!(delta_table.get_files_count(), 2); @@ -2865,11 +2482,7 @@ mod tests { .await .unwrap(); - let mut delta_table = source - .delta_lake - .delta_table(TIME_SERIES_TABLE_NAME) - .await - .unwrap(); + let mut delta_table = source.delta_table(TIME_SERIES_TABLE_NAME).await.unwrap(); assert_eq!(delta_table.get_files_count(), 2); @@ -2904,11 +2517,10 @@ mod tests { expected_schema: Schema, ) -> TimeSeriesTableMetadata { // Verify that the time series table exists in the Delta Lake. - assert!(data_folder.delta_lake.delta_table(table_name).await.is_ok()); + assert!(data_folder.delta_table(table_name).await.is_ok()); - // Verify that the time series table exists in the metadata Delta Lake with the correct schema. + // Verify that the time series table exists in the Delta Lake with the correct schema. let time_series_table_metadata = data_folder - .table_metadata_manager .time_series_table_metadata_for_time_series_table(table_name) .await .unwrap(); @@ -2917,7 +2529,12 @@ mod tests { assert_eq!(*time_series_table_metadata.query_schema, expected_schema); // Verify that the time series table is registered with Apache DataFusion. - assert!(data_folder.session_context.table_exist(table_name).unwrap()); + assert!( + data_folder + .session_context() + .table_exist(table_name) + .unwrap() + ); time_series_table_metadata } diff --git a/crates/modelardb_manager/src/main.rs b/crates/modelardb_manager/src/main.rs index 7b3b8a678..8e0525f7d 100644 --- a/crates/modelardb_manager/src/main.rs +++ b/crates/modelardb_manager/src/main.rs @@ -23,7 +23,7 @@ mod remote; use std::sync::{Arc, LazyLock}; use std::{env, process}; -use modelardb_storage::delta_lake::DeltaLake; +use modelardb_storage::data_folder::DataFolder; use modelardb_types::flight::protocol; use tokio::sync::RwLock; use tonic::metadata::errors::InvalidMetadataValue; @@ -32,63 +32,20 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use crate::cluster::Cluster; use crate::error::{ModelarDbManagerError, Result}; -use crate::metadata::MetadataManager; +use crate::metadata::ManagerMetadata; use crate::remote::start_apache_arrow_flight_server; /// The port of the Apache Arrow Flight Server. If the environment variable is not set, 9998 is used. pub static PORT: LazyLock = LazyLock::new(|| env::var("MODELARDBM_PORT").map_or(9998, |value| value.parse().unwrap())); -/// Stores the storage configuration with the remote data folder to ensure that the information -/// is consistent with the remote data folder. -pub struct RemoteDataFolder { - /// Storage configuration encoded as a [`StorageConfiguration`](protocol::manager_metadata::StorageConfiguration) - /// protobuf message to make it possible to transfer the configuration using Apache Arrow Flight. - storage_configuration: protocol::manager_metadata::StorageConfiguration, - /// Remote object store for storing data and metadata in Apache Parquet files. - delta_lake: Arc, - /// Manager for the access to the metadata Delta Lake. - metadata_manager: Arc, -} - -impl RemoteDataFolder { - pub fn new( - storage_configuration: protocol::manager_metadata::StorageConfiguration, - delta_lake: Arc, - metadata_manager: Arc, - ) -> Self { - Self { - storage_configuration, - delta_lake, - metadata_manager, - } - } - - /// Create a [`RemoteDataFolder`] from `remote_data_folder_str`. If `remote_data_folder_str` - /// cannot be parsed or a connection to the object store cannot be created, - /// [`ModelarDbManagerError`] is returned. - async fn try_new(remote_data_folder_str: &str) -> Result { - let storage_configuration = - modelardb_types::flight::argument_to_storage_configuration(remote_data_folder_str)?; - - let delta_lake = - DeltaLake::try_remote_from_storage_configuration(storage_configuration.clone())?; - - let metadata_manager = - MetadataManager::try_from_storage_configuration(storage_configuration.clone()).await?; - - Ok(Self::new( - storage_configuration, - Arc::new(delta_lake), - Arc::new(metadata_manager), - )) - } -} - /// Provides access to the managers components. pub struct Context { - /// Folder for storing metadata and data in Apache Parquet files in a remote object store. - pub remote_data_folder: RemoteDataFolder, + /// [`DataFolder`] for storing metadata and data in Apache Parquet files. + pub remote_data_folder: DataFolder, + /// Storage configuration encoded as a [`StorageConfiguration`](protocol::manager_metadata::StorageConfiguration) + /// protobuf message to make it possible to transfer the configuration using Apache Arrow Flight. + pub remote_storage_configuration: protocol::manager_metadata::StorageConfiguration, /// Cluster of nodes currently controlled by the manager. pub cluster: RwLock, /// Key used to identify requests coming from the manager. @@ -112,18 +69,23 @@ async fn main() -> Result<()> { _ => print_usage_and_exit_with_error("remote_data_folder"), }; - let remote_data_folder = RemoteDataFolder::try_new(remote_data_folder_str).await?; + let remote_storage_configuration = + modelardb_types::flight::argument_to_storage_configuration(remote_data_folder_str)?; + let remote_data_folder = + DataFolder::open_object_store(remote_storage_configuration.clone()).await?; - let nodes = remote_data_folder.metadata_manager.nodes().await?; + remote_data_folder + .create_and_register_manager_metadata_data_folder_tables() + .await?; let mut cluster = Cluster::new(); + let nodes = remote_data_folder.nodes().await?; for node in nodes { cluster.register_node(node)?; } // Retrieve and parse the key to a tonic metadata value since it is used in tonic requests. let key = remote_data_folder - .metadata_manager .manager_key() .await? .to_string() @@ -135,6 +97,7 @@ async fn main() -> Result<()> { // Create the Context. let context = Arc::new(Context { remote_data_folder, + remote_storage_configuration, cluster: RwLock::new(cluster), key, }); diff --git a/crates/modelardb_manager/src/metadata.rs b/crates/modelardb_manager/src/metadata.rs index ed057c280..00fdda038 100644 --- a/crates/modelardb_manager/src/metadata.rs +++ b/crates/modelardb_manager/src/metadata.rs @@ -13,8 +13,8 @@ * limitations under the License. */ -//! Management of the metadata Delta Lake for the manager. Metadata which is unique to the manager, -//! such as metadata about registered edges, is handled here. +//! Management of the Delta Lake for the manager. Metadata which is unique to the manager, such as +//! metadata about registered edges, is handled here. use std::str::FromStr; use std::sync::Arc; @@ -23,55 +23,24 @@ use arrow::array::{Array, StringArray}; use arrow::datatypes::{DataType, Field, Schema}; use deltalake::DeltaTableError; use deltalake::datafusion::logical_expr::{col, lit}; -use deltalake::datafusion::prelude::SessionContext; -use modelardb_storage::delta_lake::DeltaLake; -use modelardb_storage::metadata::table_metadata_manager::TableMetadataManager; +use modelardb_storage::data_folder::DataFolder; use modelardb_storage::{register_metadata_table, sql_and_concat}; -use modelardb_types::flight::protocol; use modelardb_types::types::{Node, ServerMode}; use uuid::Uuid; use crate::error::Result; -/// Stores the metadata required for reading from and writing to the normal tables and time series tables -/// and persisting edges. The data that needs to be persisted is stored in the metadata Delta Lake. -pub struct MetadataManager { - /// Delta Lake with functionality to read and write to and from the manager metadata tables. - delta_lake: DeltaLake, - /// Metadata manager used to interface with the subset of the manager metadata Delta Lake - /// related to normal tables and time series tables. - pub(crate) table_metadata_manager: TableMetadataManager, - /// Session context used to query the manager metadata Delta Lake tables using Apache DataFusion. - session_context: Arc, +/// Stores the metadata required for reading from and writing to the normal tables and time series +/// tables and persisting edges. The data that needs to be persisted is stored in the Delta Lake. +pub trait ManagerMetadata { + async fn create_and_register_manager_metadata_data_folder_tables(&self) -> Result<()>; + async fn manager_key(&self) -> Result; + async fn save_node(&self, node: Node) -> Result<()>; + async fn remove_node(&self, url: &str) -> Result<()>; + async fn nodes(&self) -> Result>; } -impl MetadataManager { - /// Create a new [`MetadataManager`] that saves the metadata to a remote object store given by - /// `storage_configuration` and initialize the metadata tables. If a connection could not be - /// made or the metadata tables could not be created, return - /// [`ModelarDbManagerError`](crate::error::ModelarDbManagerError). - pub async fn try_from_storage_configuration( - storage_configuration: protocol::manager_metadata::StorageConfiguration, - ) -> Result { - let metadata_manager = Self { - delta_lake: DeltaLake::try_remote_from_storage_configuration( - storage_configuration.clone(), - )?, - table_metadata_manager: TableMetadataManager::try_from_storage_configuration( - storage_configuration, - ) - .await?, - session_context: Arc::new(SessionContext::new()), - }; - - // Create the necessary tables in the metadata Delta Lake. - metadata_manager - .create_and_register_manager_metadata_delta_lake_tables() - .await?; - - Ok(metadata_manager) - } - +impl ManagerMetadata for DataFolder { /// If they do not already exist, create the tables that are specific to the manager metadata /// Delta Lake and register them with the Apache DataFusion session context. /// * The `manager_metadata` table contains metadata for the manager itself. It is assumed that @@ -80,21 +49,19 @@ impl MetadataManager { /// /// If the tables exist or were created, return [`Ok`], otherwise return /// [`ModelarDbManagerError`](crate::error::ModelarDbManagerError). - async fn create_and_register_manager_metadata_delta_lake_tables(&self) -> Result<()> { + async fn create_and_register_manager_metadata_data_folder_tables(&self) -> Result<()> { // Create and register the manager_metadata table if it does not exist. let delta_table = self - .delta_lake .create_metadata_table( "manager_metadata", &Schema::new(vec![Field::new("key", DataType::Utf8, false)]), ) .await?; - register_metadata_table(&self.session_context, "manager_metadata", delta_table)?; + register_metadata_table(self.session_context(), "manager_metadata", delta_table)?; // Create and register the nodes table if it does not exist. let delta_table = self - .delta_lake .create_metadata_table( "nodes", &Schema::new(vec![ @@ -104,7 +71,7 @@ impl MetadataManager { ) .await?; - register_metadata_table(&self.session_context, "nodes", delta_table)?; + register_metadata_table(self.session_context(), "nodes", delta_table)?; Ok(()) } @@ -112,21 +79,20 @@ impl MetadataManager { /// Retrieve the key for the manager from the `manager_metadata` table. If a key does not /// already exist, create one and save it to the Delta Lake. If a key could not be retrieved /// or created, return [`ModelarDbManagerError`](crate::error::ModelarDbManagerError). - pub async fn manager_key(&self) -> Result { - let sql = "SELECT key FROM manager_metadata"; - let batch = sql_and_concat(&self.session_context, sql).await?; + async fn manager_key(&self) -> Result { + let sql = "SELECT key FROM metadata.manager_metadata"; + let batch = sql_and_concat(self.session_context(), sql).await?; let keys = modelardb_types::array!(batch, 0, StringArray); if keys.is_empty() { let manager_key = Uuid::new_v4(); // Add a new row to the manager_metadata table to persist the key. - self.delta_lake - .write_columns_to_metadata_table( - "manager_metadata", - vec![Arc::new(StringArray::from(vec![manager_key.to_string()]))], - ) - .await?; + self.write_columns_to_metadata_table( + "manager_metadata", + vec![Arc::new(StringArray::from(vec![manager_key.to_string()]))], + ) + .await?; Ok(manager_key) } else { @@ -138,18 +104,17 @@ impl MetadataManager { } } - /// Save the node to the metadata Delta Lake and return [`Ok`]. If the node could not be saved, - /// return [`ModelarDbManagerError`](crate::error::ModelarDbManagerError). - pub async fn save_node(&self, node: Node) -> Result<()> { - self.delta_lake - .write_columns_to_metadata_table( - "nodes", - vec![ - Arc::new(StringArray::from(vec![node.url])), - Arc::new(StringArray::from(vec![node.mode.to_string()])), - ], - ) - .await?; + /// Save the node to the Delta Lake and return [`Ok`]. If the node could not be saved, return + /// [`ModelarDbManagerError`](crate::error::ModelarDbManagerError). + async fn save_node(&self, node: Node) -> Result<()> { + self.write_columns_to_metadata_table( + "nodes", + vec![ + Arc::new(StringArray::from(vec![node.url])), + Arc::new(StringArray::from(vec![node.mode.to_string()])), + ], + ) + .await?; Ok(()) } @@ -157,8 +122,8 @@ impl MetadataManager { /// Remove the row in the `nodes` table that corresponds to the node with `url` and return /// [`Ok`]. If the row could not be removed, return /// [`ModelarDbManagerError`](crate::error::ModelarDbManagerError). - pub async fn remove_node(&self, url: &str) -> Result<()> { - let delta_ops = self.delta_lake.metadata_delta_ops("nodes").await?; + async fn remove_node(&self, url: &str) -> Result<()> { + let delta_ops = self.metadata_delta_ops("nodes").await?; delta_ops .delete() @@ -168,14 +133,14 @@ impl MetadataManager { Ok(()) } - /// Return the nodes currently controlled by the manager that have been persisted to the - /// metadata Delta Lake. If the nodes could not be retrieved, + /// Return the nodes currently controlled by the manager that have been persisted to the Delta + /// Lake. If the nodes could not be retrieved, /// [`ModelarDbManagerError`](crate::error::ModelarDbManagerError) is returned. - pub async fn nodes(&self) -> Result> { + async fn nodes(&self) -> Result> { let mut nodes: Vec = vec![]; - let sql = "SELECT url, mode FROM nodes"; - let batch = sql_and_concat(&self.session_context, sql).await?; + let sql = "SELECT url, mode FROM metadata.nodes"; + let batch = sql_and_concat(self.session_context(), sql).await?; let url_array = modelardb_types::array!(batch, 0, StringArray); let mode_array = modelardb_types::array!(batch, 1, StringArray); @@ -202,22 +167,22 @@ mod tests { // Tests for MetadataManager. #[tokio::test] - async fn test_create_manager_metadata_delta_lake_tables() { - let (_temp_dir, metadata_manager) = create_metadata_manager().await; + async fn test_create_manager_metadata_data_folder_tables() { + let (_temp_dir, data_folder) = create_data_folder().await; // Verify that the tables were created, registered, and has the expected columns. assert!( - metadata_manager - .session_context - .sql("SELECT key FROM manager_metadata") + data_folder + .session_context() + .sql("SELECT key FROM metadata.manager_metadata") .await .is_ok() ); assert!( - metadata_manager - .session_context - .sql("SELECT url, mode FROM nodes") + data_folder + .session_context() + .sql("SELECT url, mode FROM metadata.nodes") .await .is_ok() ); @@ -225,13 +190,13 @@ mod tests { #[tokio::test] async fn test_new_manager_key() { - let (_temp_dir, metadata_manager) = create_metadata_manager().await; + let (_temp_dir, data_folder) = create_data_folder().await; // Verify that the manager key is created and saved correctly. - let manager_key = metadata_manager.manager_key().await.unwrap(); + let manager_key = data_folder.manager_key().await.unwrap(); - let sql = "SELECT key FROM manager_metadata"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) + let sql = "SELECT key FROM metadata.manager_metadata"; + let batch = sql_and_concat(data_folder.session_context(), sql) .await .unwrap(); @@ -243,14 +208,14 @@ mod tests { #[tokio::test] async fn test_existing_manager_key() { - let (_temp_dir, metadata_manager) = create_metadata_manager().await; + let (_temp_dir, data_folder) = create_data_folder().await; // Verify that only a single key is created and saved when retrieving multiple times. - let manager_key_1 = metadata_manager.manager_key().await.unwrap(); - let manager_key_2 = metadata_manager.manager_key().await.unwrap(); + let manager_key_1 = data_folder.manager_key().await.unwrap(); + let manager_key_2 = data_folder.manager_key().await.unwrap(); - let sql = "SELECT key FROM manager_metadata"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) + let sql = "SELECT key FROM metadata.manager_metadata"; + let batch = sql_and_concat(data_folder.session_context(), sql) .await .unwrap(); @@ -260,17 +225,17 @@ mod tests { #[tokio::test] async fn test_save_node() { - let (_temp_dir, metadata_manager) = create_metadata_manager().await; + let (_temp_dir, data_folder) = create_data_folder().await; let node_1 = Node::new("url_1".to_string(), ServerMode::Edge); - metadata_manager.save_node(node_1.clone()).await.unwrap(); + data_folder.save_node(node_1.clone()).await.unwrap(); let node_2 = Node::new("url_2".to_string(), ServerMode::Edge); - metadata_manager.save_node(node_2.clone()).await.unwrap(); + data_folder.save_node(node_2.clone()).await.unwrap(); // Verify that the nodes are saved correctly. - let sql = "SELECT url, mode FROM nodes"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) + let sql = "SELECT url, mode FROM metadata.nodes"; + let batch = sql_and_concat(data_folder.session_context(), sql) .await .unwrap(); @@ -286,19 +251,19 @@ mod tests { #[tokio::test] async fn test_remove_node() { - let (_temp_dir, metadata_manager) = create_metadata_manager().await; + let (_temp_dir, data_folder) = create_data_folder().await; let node_1 = Node::new("url_1".to_string(), ServerMode::Edge); - metadata_manager.save_node(node_1.clone()).await.unwrap(); + data_folder.save_node(node_1.clone()).await.unwrap(); let node_2 = Node::new("url_2".to_string(), ServerMode::Edge); - metadata_manager.save_node(node_2.clone()).await.unwrap(); + data_folder.save_node(node_2.clone()).await.unwrap(); - metadata_manager.remove_node(&node_1.url).await.unwrap(); + data_folder.remove_node(&node_1.url).await.unwrap(); // Verify that node_1 is removed correctly. - let sql = "SELECT url, mode FROM nodes"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) + let sql = "SELECT url, mode FROM metadata.nodes"; + let batch = sql_and_concat(data_folder.session_context(), sql) .await .unwrap(); @@ -314,37 +279,29 @@ mod tests { #[tokio::test] async fn test_nodes() { - let (_temp_dir, metadata_manager) = create_metadata_manager().await; + let (_temp_dir, data_folder) = create_data_folder().await; let node_1 = Node::new("url_1".to_string(), ServerMode::Edge); - metadata_manager.save_node(node_1.clone()).await.unwrap(); + data_folder.save_node(node_1.clone()).await.unwrap(); let node_2 = Node::new("url_2".to_string(), ServerMode::Edge); - metadata_manager.save_node(node_2.clone()).await.unwrap(); + data_folder.save_node(node_2.clone()).await.unwrap(); - let nodes = metadata_manager.nodes().await.unwrap(); + let nodes = data_folder.nodes().await.unwrap(); assert_eq!(nodes, vec![node_2, node_1]); } - async fn create_metadata_manager() -> (TempDir, MetadataManager) { + async fn create_data_folder() -> (TempDir, DataFolder) { let temp_dir = tempfile::tempdir().unwrap(); - let table_metadata_manager = TableMetadataManager::try_from_path(temp_dir.path()) - .await - .unwrap(); - - let metadata_manager = MetadataManager { - delta_lake: DeltaLake::try_from_local_path(temp_dir.path()).unwrap(), - table_metadata_manager, - session_context: Arc::new(SessionContext::new()), - }; + let data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); - metadata_manager - .create_and_register_manager_metadata_delta_lake_tables() + data_folder + .create_and_register_manager_metadata_data_folder_tables() .await .unwrap(); - (temp_dir, metadata_manager) + (temp_dir, data_folder) } } diff --git a/crates/modelardb_manager/src/remote.rs b/crates/modelardb_manager/src/remote.rs index 0619ebc87..6c690126b 100644 --- a/crates/modelardb_manager/src/remote.rs +++ b/crates/modelardb_manager/src/remote.rs @@ -44,6 +44,7 @@ use tracing::info; use crate::Context; use crate::error::{ModelarDbManagerError, Result}; +use crate::metadata::ManagerMetadata; /// Start an Apache Arrow Flight server on 0.0.0.0:`port`. pub async fn start_apache_arrow_flight_server(context: Arc, port: u16) -> Result<()> { @@ -95,13 +96,9 @@ impl FlightServiceHandler { /// Return the schema of the table with the name `table_name`. If the table does not exist or /// the schema cannot be retrieved, return [`Status`]. async fn table_schema(&self, table_name: &str) -> StdResult, Status> { - let table_metadata_manager = &self - .context - .remote_data_folder - .metadata_manager - .table_metadata_manager; + let data_folder = &self.context.remote_data_folder; - if table_metadata_manager + if data_folder .is_normal_table(table_name) .await .map_err(error_to_status_internal)? @@ -109,7 +106,6 @@ impl FlightServiceHandler { let delta_table = self .context .remote_data_folder - .delta_lake .delta_table(table_name) .await .map_err(error_to_status_internal)?; @@ -121,12 +117,12 @@ impl FlightServiceHandler { .map_err(error_to_status_internal)?; Ok(Arc::new(schema)) - } else if table_metadata_manager + } else if data_folder .is_time_series_table(table_name) .await .map_err(error_to_status_internal)? { - let time_series_table_metadata = table_metadata_manager + let time_series_table_metadata = data_folder .time_series_table_metadata_for_time_series_table(table_name) .await .map_err(error_to_status_internal)?; @@ -145,8 +141,6 @@ impl FlightServiceHandler { let existing_tables = self .context .remote_data_folder - .metadata_manager - .table_metadata_manager .table_names() .await .map_err(error_to_status_internal)?; @@ -162,9 +156,9 @@ impl FlightServiceHandler { } } - /// Create a normal table, save it to the metadata Delta Lake and create it for each node - /// controlled by the manager. If the normal table cannot be saved to the metadata Delta Lake or - /// created for each node, return [`Status`]. + /// Create a normal table, save it to the Delta Lake and create it for each node controlled by + /// the manager. If the normal table cannot be saved to the Delta Lake or created for each node, + /// return [`Status`]. async fn save_and_create_cluster_normal_table( &self, table_name: &str, @@ -173,16 +167,13 @@ impl FlightServiceHandler { // Create an empty Delta Lake table. self.context .remote_data_folder - .delta_lake .create_normal_table(table_name, schema) .await .map_err(error_to_status_internal)?; - // Persist the new normal table to the metadata Delta Lake. + // Persist the new normal table to the Delta Lake. self.context .remote_data_folder - .metadata_manager - .table_metadata_manager .save_normal_table_metadata(table_name) .await .map_err(error_to_status_internal)?; @@ -210,9 +201,9 @@ impl FlightServiceHandler { Ok(()) } - /// Create a time series table, save it to the metadata Delta Lake and create it for each node - /// controlled by the manager. If the time series table cannot be saved to the metadata Delta - /// Lake or created for each node, return [`Status`]. + /// Create a time series table, save it to the Delta Lake and create it for each node controlled + /// by the manager. If the time series table cannot be saved to the Delta Lake or created for + /// each node, return [`Status`]. async fn save_and_create_cluster_time_series_table( &self, time_series_table_metadata: Arc, @@ -220,16 +211,13 @@ impl FlightServiceHandler { // Create an empty Delta Lake table. self.context .remote_data_folder - .delta_lake .create_time_series_table(&time_series_table_metadata) .await .map_err(error_to_status_internal)?; - // Persist the new time series table to the metadata Delta Lake. + // Persist the new time series table to the Delta Lake. self.context .remote_data_folder - .metadata_manager - .table_metadata_manager .save_time_series_table_metadata(&time_series_table_metadata) .await .map_err(error_to_status_internal)?; @@ -262,24 +250,21 @@ impl FlightServiceHandler { Ok(()) } - /// Drop the table from the metadata Delta Lake, the data Delta Lake, and from each node - /// controlled by the manager. If the table does not exist or the table cannot be dropped from - /// the remote data folder and from each node, return [`Status`]. + /// Drop the table from the Delta Lake, the Delta Lake, and from each node controlled by the + /// manager. If the table does not exist or the table cannot be dropped from the remote data + /// folder and from each node, return [`Status`]. async fn drop_cluster_table(&self, table_name: &str) -> StdResult<(), Status> { - // Drop the table from the remote data folder metadata Delta Lake. This will return an error - // if the table does not exist. + // Drop the table from the remote data folder Delta Lake. This will return an error if the + // table does not exist. self.context .remote_data_folder - .metadata_manager - .table_metadata_manager .drop_table_metadata(table_name) .await .map_err(error_to_status_internal)?; - // Drop the table from the remote data folder data Delta lake. + // Drop the table from the remote data folder Delta Lake. self.context .remote_data_folder - .delta_lake .drop_table(table_name) .await .map_err(error_to_status_internal)?; @@ -306,10 +291,9 @@ impl FlightServiceHandler { ))); } - // Truncate the table in the remote data folder data Delta lake. + // Truncate the table in the remote data folder Delta Lake. self.context .remote_data_folder - .delta_lake .truncate_table(table_name) .await .map_err(error_to_status_internal)?; @@ -334,10 +318,9 @@ impl FlightServiceHandler { table_name: &str, maybe_retention_period_in_seconds: Option, ) -> StdResult<(), Status> { - // Vacuum the table in the remote data folder Delta lake. + // Vacuum the table in the remote data folder Delta Lake. self.context .remote_data_folder - .delta_lake .vacuum_table(table_name, maybe_retention_period_in_seconds) .await .map_err(error_to_status_internal)?; @@ -392,12 +375,10 @@ impl FlightService for FlightServiceHandler { &self, _request: Request, ) -> StdResult, Status> { - // Retrieve the table names from the metadata Delta Lake. + // Retrieve the table names from the Delta Lake. let table_names = self .context .remote_data_folder - .metadata_manager - .table_metadata_manager .table_names() .await .map_err(error_to_status_internal)?; @@ -529,8 +510,6 @@ impl FlightService for FlightServiceHandler { table_names = self .context .remote_data_folder - .metadata_manager - .table_metadata_manager .table_names() .await .map_err(error_to_status_internal)?; @@ -633,12 +612,11 @@ impl FlightService for FlightServiceHandler { .register_node(node.clone()) .map_err(error_to_status_internal)?; - // Use the metadata manager to persist the node to the metadata Delta Lake. Note that if - // this fails, the metadata Delta Lake and the cluster will be out of sync until the - // manager is restarted. + // Use the metadata manager to persist the node to the Delta Lake. Note that if this + // fails, the Delta Lake and the cluster will be out of sync until the manager is + // restarted. self.context .remote_data_folder - .metadata_manager .save_node(node) .await .map_err(error_to_status_internal)?; @@ -650,12 +628,7 @@ impl FlightService for FlightServiceHandler { .to_str() .expect("key should not contain invalid characters.") .to_owned(), - storage_configuration: Some( - self.context - .remote_data_folder - .storage_configuration - .clone(), - ), + storage_configuration: Some(self.context.remote_storage_configuration.clone()), }; let protobuf_bytes = manager_metadata.encode_to_vec(); @@ -670,16 +643,15 @@ impl FlightService for FlightServiceHandler { let node_metadata = protocol::NodeMetadata::decode(action.body) .map_err(error_to_status_invalid_argument)?; - // Remove the node with the given url from the metadata Delta Lake. + // Remove the node with the given url from the Delta Lake. self.context .remote_data_folder - .metadata_manager .remove_node(&node_metadata.url) .await .map_err(error_to_status_internal)?; - // Remove the node with the given url from the cluster and kill it. Note that if this fails, - // the cluster and metadata Delta Lake will be out of sync until the manager is restarted. + // Remove the node with the given url from the cluster and kill it. Note that if this + // fails, the cluster and Delta Lake will be out of sync until the manager is restarted. self.context .cluster .write() diff --git a/crates/modelardb_server/src/configuration.rs b/crates/modelardb_server/src/configuration.rs index 9984d25c5..90ff04a92 100644 --- a/crates/modelardb_server/src/configuration.rs +++ b/crates/modelardb_server/src/configuration.rs @@ -241,11 +241,12 @@ mod tests { use std::sync::Arc; + use modelardb_storage::data_folder::DataFolder; use tempfile::TempDir; use tokio::sync::RwLock; use uuid::Uuid; - use crate::data_folders::{DataFolder, DataFolders}; + use crate::data_folders::DataFolders; use crate::manager::Manager; use crate::storage::StorageEngine; @@ -407,11 +408,11 @@ mod tests { Arc>, ) { let local_url = temp_dir.path().to_str().unwrap(); - let local_data_folder = DataFolder::try_from_local_url(local_url).await.unwrap(); + let local_data_folder = DataFolder::open_local_url(local_url).await.unwrap(); let target_dir = tempfile::tempdir().unwrap(); let target_url = target_dir.path().to_str().unwrap(); - let remote_data_folder = DataFolder::try_from_local_url(target_url).await.unwrap(); + let remote_data_folder = DataFolder::open_local_url(target_url).await.unwrap(); let data_folders = DataFolders::new( local_data_folder.clone(), diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs index 387bcceab..0c88e1b0a 100644 --- a/crates/modelardb_server/src/context.rs +++ b/crates/modelardb_server/src/context.rs @@ -20,7 +20,6 @@ use std::sync::Arc; use datafusion::arrow::datatypes::Schema; use datafusion::catalog::{SchemaProvider, TableProvider}; -use datafusion::prelude::SessionContext; use modelardb_types::types::TimeSeriesTableMetadata; use tokio::sync::RwLock; use tracing::info; @@ -37,8 +36,6 @@ pub struct Context { pub data_folders: DataFolders, /// Updatable configuration of the server. pub configuration_manager: Arc>, - /// Main interface for Apache DataFusion. - pub session_context: SessionContext, /// Manages all uncompressed and compressed data in the system. pub storage_engine: Arc>, } @@ -50,8 +47,6 @@ impl Context { pub async fn try_new(data_folders: DataFolders, cluster_mode: ClusterMode) -> Result { let configuration_manager = Arc::new(RwLock::new(ConfigurationManager::new(cluster_mode))); - let session_context = modelardb_storage::create_session_context(); - let storage_engine = Arc::new(RwLock::new( StorageEngine::try_new(data_folders.clone(), &configuration_manager).await?, )); @@ -59,7 +54,6 @@ impl Context { Ok(Context { data_folders, configuration_manager, - session_context, storage_engine, }) } @@ -84,7 +78,6 @@ impl Context { // Create an empty Delta Lake table. self.data_folders .local_data_folder - .delta_lake .create_normal_table(table_name, schema) .await?; @@ -94,7 +87,6 @@ impl Context { // Persist the new normal table to the Delta Lake. self.data_folders .local_data_folder - .table_metadata_manager .save_normal_table_metadata(table_name) .await?; @@ -127,7 +119,6 @@ impl Context { // Create an empty Delta Lake table. self.data_folders .local_data_folder - .delta_lake .create_time_series_table(time_series_table_metadata) .await?; @@ -135,10 +126,9 @@ impl Context { self.register_time_series_table(Arc::new(time_series_table_metadata.clone())) .await?; - // Persist the new time series table to the metadata Delta Lake. + // Persist the new time series table to the Delta Lake. self.data_folders .local_data_folder - .table_metadata_manager .save_time_series_table_metadata(time_series_table_metadata) .await?; @@ -150,16 +140,15 @@ impl Context { Ok(()) } - /// For each normal table saved in the metadata Delta Lake, register the normal table in Apache - /// DataFusion. If the normal tables could not be retrieved from the metadata Delta Lake or a - /// normal table could not be registered, return [`ModelarDbServerError`]. + /// For each normal table saved in the Delta Lake, register the normal table in Apache + /// DataFusion. If the normal tables could not be retrieved from the Delta Lake or a normal + /// table could not be registered, return [`ModelarDbServerError`]. pub async fn register_normal_tables(&self) -> Result<()> { // We register the normal tables in the local data folder to avoid registering tables that // NormalTableDataSink cannot write data to. let table_names = self .data_folders .local_data_folder - .table_metadata_manager .normal_table_names() .await?; @@ -172,12 +161,14 @@ impl Context { /// Register the normal table with `table_name` in Apache DataFusion. If the normal table does /// not exist or could not be registered with Apache DataFusion, return - /// [`ModelarDbServerError`]. + /// [`ModelarDbServerError`]. [`modelardb_storage::data_folder::DataFolder::register_tables()`] + /// is not used so a unique [`NormalTableDataSink`] can be passed per table. async fn register_normal_table(&self, table_name: &str) -> Result<()> { + let session_context = self.data_folders.query_data_folder.session_context(); + let delta_table = self .data_folders .query_data_folder - .delta_lake .delta_table(table_name) .await?; @@ -188,7 +179,7 @@ impl Context { )); modelardb_storage::register_normal_table( - &self.session_context, + session_context, table_name, delta_table, normal_table_data_sink, @@ -199,16 +190,15 @@ impl Context { Ok(()) } - /// For each time series table saved in the metadata Delta Lake, register the time series table - /// in Apache DataFusion. If the time series tables could not be retrieved from the metadata - /// Delta Lake or a time series table could not be registered, return [`ModelarDbServerError`]. + /// For each time series table saved in the Delta Lake, register the time series table in Apache + /// DataFusion. If the time series tables could not be retrieved from the Delta Lake or a time + /// series table could not be registered, return [`ModelarDbServerError`]. pub async fn register_time_series_tables(&self) -> Result<()> { // We register the time series tables in the local data folder to avoid registering tables // that TimeSeriesTableDataSink cannot write data to. let time_series_table_metadata = self .data_folders .local_data_folder - .table_metadata_manager .time_series_table_metadata() .await?; @@ -219,17 +209,20 @@ impl Context { Ok(()) } - /// Register the time series table with `time_series_table_metadata` in Apache DataFusion. If the - /// time series table does not exist or could not be registered with Apache DataFusion, return - /// [`ModelarDbServerError`]. + /// Register the time series table with `time_series_table_metadata` in Apache DataFusion. If + /// the time series table does not exist or could not be registered with Apache DataFusion, + /// return [`ModelarDbServerError`]. + /// [`modelardb_storage::data_folder::DataFolder::register_tables()`] is not used so a unique + /// [`TimeSeriesTableDataSink`] can be passed per table. async fn register_time_series_table( &self, time_series_table_metadata: Arc, ) -> Result<()> { + let session_context = self.data_folders.query_data_folder.session_context(); + let delta_table = self .data_folders .query_data_folder - .delta_lake .delta_table(&time_series_table_metadata.name) .await?; @@ -239,7 +232,7 @@ impl Context { )); modelardb_storage::register_time_series_table( - &self.session_context, + session_context, delta_table, time_series_table_metadata.clone(), time_series_table_data_sink, @@ -254,9 +247,8 @@ impl Context { } /// Drop the table with `table_name` if it exists. The table is deregistered from the Apache - /// Arrow Datafusion session context and deleted from the storage engine, metadata Delta Lake, - /// and data Delta Lake. If the table does not exist or if it could not be dropped, - /// [`ModelarDbServerError`] is returned. + /// Arrow Datafusion session context and deleted from the storage engine and Delta Lake. If the + /// table does not exist or if it could not be dropped, [`ModelarDbServerError`] is returned. pub async fn drop_table(&self, table_name: &str) -> Result<()> { // Deregistering the table from the Apache DataFusion session context and deleting the table // from the storage engine does not require the table to exist, so the table is checked first. @@ -266,21 +258,20 @@ impl Context { // Deregister the table from the Apache DataFusion session context. This is done first to // avoid data being ingested into the table while it is being deleted. - self.session_context.deregister_table(table_name)?; + let session_context = self.data_folders.query_data_folder.session_context(); + session_context.deregister_table(table_name)?; self.drop_table_from_storage_engine(table_name).await?; - // Drop the table metadata from the metadata Delta Lake. + // Drop the table metadata from the Delta Lake. self.data_folders .local_data_folder - .table_metadata_manager .drop_table_metadata(table_name) .await?; // Drop the table from the Delta Lake. self.data_folders .local_data_folder - .delta_lake .drop_table(table_name) .await?; @@ -288,8 +279,8 @@ impl Context { } /// Delete all data from the table with `table_name` if it exists. The table data is deleted - /// from the storage engine and data Delta Lake. If the table does not exist or if it could not - /// be truncated, [`ModelarDbServerError`] is returned. + /// from the storage engine and Delta Lake. If the table does not exist or if it could not be + /// truncated, [`ModelarDbServerError`] is returned. pub async fn truncate_table(&self, table_name: &str) -> Result<()> { // Deleting the table from the storage engine does not require the table to exist, so the // table is checked first. @@ -299,10 +290,9 @@ impl Context { self.drop_table_from_storage_engine(table_name).await?; - // Delete the table data from the data Delta Lake. + // Delete the table data from the Delta Lake. self.data_folders .local_data_folder - .delta_lake .truncate_table(table_name) .await?; @@ -339,7 +329,6 @@ impl Context { self.data_folders .local_data_folder - .delta_lake .vacuum_table(table_name, maybe_retention_period_in_seconds) .await?; @@ -401,7 +390,7 @@ impl Context { /// Return the default database schema if it exists, otherwise a [`ModelarDbServerError`] /// indicating at what level the lookup failed is returned. pub fn default_database_schema(&self) -> Result> { - let session_context = self.session_context.clone(); + let session_context = self.data_folders.query_data_folder.session_context(); let catalog = session_context.catalog("datafusion").ok_or_else(|| { ModelarDbServerError::InvalidState("Default catalog does not exist.".to_owned()) @@ -424,12 +413,11 @@ fn table_does_not_exist_error(table_name: &str) -> ModelarDbServerError { mod tests { use super::*; + use modelardb_storage::data_folder::DataFolder; use modelardb_test::table::{self, NORMAL_TABLE_NAME, TIME_SERIES_TABLE_NAME}; use modelardb_types::types::MAX_RETENTION_PERIOD_IN_SECONDS; use tempfile::TempDir; - use crate::data_folders::DataFolder; - // Tests for Context. #[tokio::test] async fn test_create_normal_table() { @@ -450,12 +438,11 @@ mod tests { assert!(folder_path.exists()); - // The normal table should be saved to the metadata Delta Lake. + // The normal table should be saved to the Delta Lake. assert!( context .data_folders .local_data_folder - .table_metadata_manager .is_normal_table(NORMAL_TABLE_NAME) .await .unwrap() @@ -499,11 +486,10 @@ mod tests { .await .unwrap(); - // The time series table should be saved to the metadata Delta Lake. + // The time series table should be saved to the Delta Lake. let time_series_table_metadata = context .data_folders .local_data_folder - .table_metadata_manager .time_series_table_metadata() .await .unwrap(); @@ -545,7 +531,7 @@ mod tests { async fn test_register_normal_tables() { // The test succeeds if none of the unwrap()s fails. - // Save a normal table to the metadata Delta Lake. + // Save a normal table to the Delta Lake. let temp_dir = tempfile::tempdir().unwrap(); let context = create_context(&temp_dir).await; @@ -565,7 +551,7 @@ mod tests { async fn test_register_time_series_tables() { // The test succeeds if none of the unwrap()s fails. - // Save a time series table to the metadata Delta Lake. + // Save a time series table to the Delta Lake. let temp_dir = tempfile::tempdir().unwrap(); let context = create_context(&temp_dir).await; @@ -603,12 +589,11 @@ mod tests { .is_ok() ); - // The normal table should be deleted from the metadata Delta Lake. + // The normal table should be deleted from the Delta Lake. assert!( !context .data_folders .local_data_folder - .table_metadata_manager .is_normal_table(NORMAL_TABLE_NAME) .await .unwrap() @@ -640,12 +625,11 @@ mod tests { .is_ok() ); - // The time series table should be deleted from the metadata Delta Lake. + // The time series table should be deleted from the Delta Lake. assert!( !context .data_folders .local_data_folder - .table_metadata_manager .is_time_series_table(TIME_SERIES_TABLE_NAME) .await .unwrap() @@ -675,7 +659,6 @@ mod tests { let local_data_folder = &context.data_folders.local_data_folder; let mut delta_table = local_data_folder - .delta_lake .delta_table(NORMAL_TABLE_NAME) .await .unwrap(); @@ -684,10 +667,9 @@ mod tests { context.truncate_table(NORMAL_TABLE_NAME).await.unwrap(); - // The normal table should not be deleted from the metadata Delta Lake. + // The normal table should not be deleted from the Delta Lake. assert!( local_data_folder - .table_metadata_manager .is_normal_table(NORMAL_TABLE_NAME) .await .unwrap() @@ -705,7 +687,6 @@ mod tests { let local_data_folder = &context.data_folders.local_data_folder; let mut delta_table = local_data_folder - .delta_lake .delta_table(TIME_SERIES_TABLE_NAME) .await .unwrap(); @@ -717,10 +698,9 @@ mod tests { .await .unwrap(); - // The time series table should not be deleted from the metadata Delta Lake. + // The time series table should not be deleted from the Delta Lake. assert!( local_data_folder - .table_metadata_manager .is_time_series_table(TIME_SERIES_TABLE_NAME) .await .unwrap() @@ -782,7 +762,6 @@ mod tests { // Write data to the normal table. let local_data_folder = &context.data_folders.local_data_folder; local_data_folder - .delta_lake .write_record_batches_to_normal_table( NORMAL_TABLE_NAME, vec![table::normal_table_record_batch()], @@ -854,7 +833,6 @@ mod tests { // Write data to the time series table. let local_data_folder = &context.data_folders.local_data_folder; local_data_folder - .delta_lake .write_compressed_segments_to_time_series_table( TIME_SERIES_TABLE_NAME, vec![table::compressed_segments_record_batch()], @@ -1002,7 +980,7 @@ mod tests { /// Create a simple [`Context`] that uses `temp_dir` as the local data folder and query data folder. async fn create_context(temp_dir: &TempDir) -> Arc { let temp_dir_url = temp_dir.path().to_str().unwrap(); - let local_data_folder = DataFolder::try_from_local_url(temp_dir_url).await.unwrap(); + let local_data_folder = DataFolder::open_local_url(temp_dir_url).await.unwrap(); Arc::new( Context::try_new( diff --git a/crates/modelardb_server/src/data_folders.rs b/crates/modelardb_server/src/data_folders.rs index 24adf1a50..f495eccc1 100644 --- a/crates/modelardb_server/src/data_folders.rs +++ b/crates/modelardb_server/src/data_folders.rs @@ -15,69 +15,14 @@ //! Implementation of a struct that provides access to the local and remote data storage components. -use std::sync::Arc; - -use modelardb_storage::delta_lake::DeltaLake; -use modelardb_storage::metadata::table_metadata_manager::TableMetadataManager; -use modelardb_types::flight::protocol; +use modelardb_storage::data_folder::DataFolder; use modelardb_types::types::ServerMode; -use tracing::warn; use crate::ClusterMode; use crate::Result; use crate::error::ModelarDbServerError; use crate::manager::Manager; -/// Folder for storing metadata and data in Apache Parquet files. -#[derive(Clone)] -pub struct DataFolder { - /// Delta Lake for storing metadata and data in Apache Parquet files. - pub delta_lake: Arc, - /// Metadata manager for providing access to metadata related to tables. - pub table_metadata_manager: Arc, -} - -impl DataFolder { - /// Return a [`DataFolder`] with a local [`DeltaLake`] and [`TableMetadataManager`] created from - /// `local_url`. If `local_url` is a folder that does not exist, it is created. If `local_url` - /// could not be parsed, if the folder does not exist and could not be created, or if the - /// metadata tables could not be created, [`ModelarDbServerError`] is returned. - pub async fn try_from_local_url(local_url: &str) -> Result { - let delta_lake = DeltaLake::try_from_local_url(local_url)?; - let table_metadata_manager = TableMetadataManager::try_from_local_url(local_url).await?; - - if local_url.starts_with("memory://") { - warn!( - "The local data folder is in memory. Data will not be persisted. Spilling data will \ - not decrease memory usage. Configured memory limitations may be exceeded." - ); - }; - - Ok(Self { - delta_lake: Arc::new(delta_lake), - table_metadata_manager: Arc::new(table_metadata_manager), - }) - } - - /// Return a [`DataFolder`] created from `storage_configuration`. If a connection could - /// not be made or if the metadata tables could not be created, [`ModelarDbServerError`] is - /// returned. - pub async fn try_from_storage_configuration( - storage_configuration: protocol::manager_metadata::StorageConfiguration, - ) -> Result { - let remote_delta_lake = - DeltaLake::try_remote_from_storage_configuration(storage_configuration.clone())?; - - let remote_table_metadata_manager = - TableMetadataManager::try_from_storage_configuration(storage_configuration).await?; - - Ok(Self { - delta_lake: Arc::new(remote_delta_lake), - table_metadata_manager: Arc::new(remote_table_metadata_manager), - }) - } -} - /// Folders for storing metadata and data in Apache Parquet files locally and remotely. #[derive(Clone)] pub struct DataFolders { @@ -114,8 +59,7 @@ impl DataFolders { // Match the provided command line arguments to the supported inputs. match arguments { &["edge", local_data_folder_url] | &[local_data_folder_url] => { - let local_data_folder = - DataFolder::try_from_local_url(local_data_folder_url).await?; + let local_data_folder = DataFolder::open_local_url(local_data_folder_url).await?; Ok(( ClusterMode::SingleNode, @@ -126,11 +70,10 @@ impl DataFolders { let (manager, storage_configuration) = Manager::register_node(manager_url, ServerMode::Cloud).await?; - let local_data_folder = - DataFolder::try_from_local_url(local_data_folder_url).await?; + let local_data_folder = DataFolder::open_local_url(local_data_folder_url).await?; let remote_data_folder = - DataFolder::try_from_storage_configuration(storage_configuration).await?; + DataFolder::open_object_store(storage_configuration).await?; Ok(( ClusterMode::MultiNode(manager), @@ -146,11 +89,10 @@ impl DataFolders { let (manager, storage_configuration) = Manager::register_node(manager_url, ServerMode::Edge).await?; - let local_data_folder = - DataFolder::try_from_local_url(local_data_folder_url).await?; + let local_data_folder = DataFolder::open_local_url(local_data_folder_url).await?; let remote_data_folder = - DataFolder::try_from_storage_configuration(storage_configuration).await?; + DataFolder::open_object_store(storage_configuration).await?; Ok(( ClusterMode::MultiNode(manager), diff --git a/crates/modelardb_server/src/main.rs b/crates/modelardb_server/src/main.rs index 23171b7be..75f6f5833 100644 --- a/crates/modelardb_server/src/main.rs +++ b/crates/modelardb_server/src/main.rs @@ -52,11 +52,11 @@ pub enum ClusterMode { /// Setup tracing that prints to stdout, parse the command line arguments to extract /// [`DataFolders`], construct a [`Context`] with the systems components, initialize the normal -/// tables and time series tables in the metadata Delta Lake, initialize a CTRL+C handler that -/// flushes the data in memory to disk, and start the Apache Arrow Flight interface. Returns -/// [`ModelarDbServerError`](error::ModelarDbServerError) if the command line arguments -/// cannot be parsed, if the metadata cannot be read from the database, or if the Apache Arrow -/// Flight interface cannot be started. +/// tables and time series tables in the Delta Lake, initialize a CTRL+C handler that flushes the +/// data in memory to disk, and start the Apache Arrow Flight interface. Returns +/// [`ModelarDbServerError`](error::ModelarDbServerError) if the command line arguments cannot be +/// parsed, if the metadata cannot be read from the database, or if the Apache Arrow Flight +/// interface cannot be started. #[tokio::main] async fn main() -> Result<()> { // Initialize a tracing layer that logs events to stdout. diff --git a/crates/modelardb_server/src/manager.rs b/crates/modelardb_server/src/manager.rs index 1d1dc7980..2e00e39aa 100644 --- a/crates/modelardb_server/src/manager.rs +++ b/crates/modelardb_server/src/manager.rs @@ -14,7 +14,7 @@ */ //! Interface to connect to and interact with the manager, used if the server is started with a -//! manager and needs to interact with it to initialize the metadata Delta Lake. +//! manager and needs to interact with it to initialize the Delta Lake. use std::sync::Arc; use std::{env, str}; @@ -23,6 +23,7 @@ use arrow_flight::flight_service_client::FlightServiceClient; use arrow_flight::{Action, Result as FlightResult}; use datafusion::arrow::datatypes::Schema; use datafusion::catalog::TableProvider; +use modelardb_storage::data_folder::DataFolder; use modelardb_types::flight::protocol; use modelardb_types::types::{Node, ServerMode, TimeSeriesTableMetadata}; use prost::Message; @@ -33,7 +34,6 @@ use tonic::transport::Channel; use crate::PORT; use crate::context::Context; -use crate::data_folders::DataFolder; use crate::error::{ModelarDbServerError, Result}; /// Manages metadata related to the manager and provides functionality for interacting with the manager. @@ -179,14 +179,8 @@ async fn validate_local_tables_exist_remotely( local_data_folder: &DataFolder, remote_data_folder: &DataFolder, ) -> Result<()> { - let local_table_names = local_data_folder - .table_metadata_manager - .table_names() - .await?; - let remote_table_names = remote_data_folder - .table_metadata_manager - .table_names() - .await?; + let local_table_names = local_data_folder.table_names().await?; + let remote_table_names = remote_data_folder.table_names().await?; let invalid_tables: Vec = local_table_names .iter() @@ -214,10 +208,7 @@ async fn validate_normal_tables( ) -> Result)>> { let mut missing_normal_tables = vec![]; - let remote_normal_tables = remote_data_folder - .table_metadata_manager - .normal_table_names() - .await?; + let remote_normal_tables = remote_data_folder.normal_table_names().await?; for table_name in remote_normal_tables { let remote_schema = normal_table_schema(remote_data_folder, &table_name).await?; @@ -240,7 +231,7 @@ async fn validate_normal_tables( /// Retrieve the schema of a normal table from the Delta Lake in the data folder. If the table does /// not exist, or the schema could not be retrieved, return [`ModelarDbServerError`]. async fn normal_table_schema(data_folder: &DataFolder, table_name: &str) -> Result> { - let delta_table = data_folder.delta_lake.delta_table(table_name).await?; + let delta_table = data_folder.delta_table(table_name).await?; Ok(TableProvider::schema(&delta_table)) } @@ -254,19 +245,14 @@ async fn validate_time_series_tables( ) -> Result> { let mut missing_time_series_tables = vec![]; - let remote_time_series_tables = remote_data_folder - .table_metadata_manager - .time_series_table_names() - .await?; + let remote_time_series_tables = remote_data_folder.time_series_table_names().await?; for table_name in remote_time_series_tables { let remote_metadata = remote_data_folder - .table_metadata_manager .time_series_table_metadata_for_time_series_table(&table_name) .await?; if let Ok(local_metadata) = local_data_folder - .table_metadata_manager .time_series_table_metadata_for_time_series_table(&table_name) .await { diff --git a/crates/modelardb_server/src/remote.rs b/crates/modelardb_server/src/remote.rs index 11ef4fc0d..e0d96ce62 100644 --- a/crates/modelardb_server/src/remote.rs +++ b/crates/modelardb_server/src/remote.rs @@ -227,8 +227,8 @@ pub fn flight_data_to_record_batch( .map_err(|error| Status::invalid_argument(error.to_string())) } -/// Return the table stored as the first element in [`FlightDescriptor.path`], otherwise a -/// [`Status`] that specifies that the table name is missing. +/// Return the table stored as the first element in `FlightDescriptor.path`, otherwise a [`Status`] +/// that specifies that the table name is missing. pub fn table_name_from_flight_descriptor( flight_descriptor: &FlightDescriptor, ) -> StdResult<&String, Status> { @@ -469,13 +469,23 @@ impl FlightService for FlightServiceHandler { Ok(empty_record_batch_stream()) } ModelarDbStatement::Statement(statement) => { - modelardb_storage::execute_statement(&self.context.session_context, statement) + let session_context = self + .context + .data_folders + .query_data_folder + .session_context(); + modelardb_storage::execute_statement(session_context, statement) .await .map_err(|error| error.into()) } ModelarDbStatement::IncludeSelect(statement, addresses) => { + let session_context = self + .context + .data_folders + .query_data_folder + .session_context(); let local_sendable_record_batch_stream = - modelardb_storage::execute_statement(&self.context.session_context, statement) + modelardb_storage::execute_statement(session_context, statement) .await .map_err(error_to_status_internal)?; diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs index 56f430640..38e90e949 100644 --- a/crates/modelardb_server/src/storage/compressed_data_manager.rs +++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs @@ -21,11 +21,11 @@ use std::sync::Arc; use crossbeam_queue::SegQueue; use dashmap::DashMap; use datafusion::arrow::record_batch::RecordBatch; +use modelardb_storage::data_folder::DataFolder; use tokio::runtime::Handle; use tokio::sync::RwLock; use tracing::{debug, error, info}; -use crate::data_folders::DataFolder; use crate::error::Result; use crate::storage::compressed_data_buffer::{CompressedDataBuffer, CompressedSegmentBatch}; use crate::storage::data_transfer::DataTransfer; @@ -87,7 +87,6 @@ impl CompressedDataManager { let record_batch_size_in_bytes = record_batch.get_array_memory_size(); self.local_data_folder - .delta_lake .write_record_batches_to_normal_table(table_name, vec![record_batch]) .await?; @@ -247,7 +246,6 @@ impl CompressedDataManager { let compressed_data_buffer_size_in_bytes = compressed_data_buffer.size_in_bytes; let compressed_segments = compressed_data_buffer.record_batches(); self.local_data_folder - .delta_lake .write_compressed_segments_to_time_series_table(table_name, compressed_segments) .await?; @@ -317,7 +315,6 @@ mod tests { let local_data_folder = data_manager.local_data_folder.clone(); let mut delta_table = local_data_folder - .delta_lake .create_normal_table(NORMAL_TABLE_NAME, &record_batch.schema()) .await .unwrap(); @@ -391,7 +388,6 @@ mod tests { let local_data_folder = data_manager.local_data_folder.clone(); let mut delta_table = local_data_folder - .delta_lake .create_time_series_table(&table::time_series_table_metadata()) .await .unwrap(); @@ -451,7 +447,6 @@ mod tests { let segments = compressed_segments_record_batch(); local_data_folder - .delta_lake .create_time_series_table(&segments.time_series_table_metadata) .await .unwrap(); @@ -507,7 +502,6 @@ mod tests { // Insert data that should be saved when the remaining memory is decreased. let segments = compressed_segments_record_batch(); local_data_folder - .delta_lake .create_time_series_table(&segments.time_series_table_metadata) .await .unwrap(); @@ -559,13 +553,12 @@ mod tests { COMPRESSED_RESERVED_MEMORY_IN_BYTES, )); - // Create a local data folder and save a single time series table to the metadata Delta Lake. + // Create a local data folder and save a single time series table to the Delta Lake. let temp_dir_url = temp_dir.path().to_str().unwrap(); - let local_data_folder = DataFolder::try_from_local_url(temp_dir_url).await.unwrap(); + let local_data_folder = DataFolder::open_local_url(temp_dir_url).await.unwrap(); let time_series_table_metadata = table::time_series_table_metadata(); local_data_folder - .table_metadata_manager .save_time_series_table_metadata(&time_series_table_metadata) .await .unwrap(); diff --git a/crates/modelardb_server/src/storage/data_transfer.rs b/crates/modelardb_server/src/storage/data_transfer.rs index c96c0c468..08b9916e5 100644 --- a/crates/modelardb_server/src/storage/data_transfer.rs +++ b/crates/modelardb_server/src/storage/data_transfer.rs @@ -23,11 +23,11 @@ use std::time::Duration; use dashmap::DashMap; use deltalake::arrow::array::RecordBatch; use futures::TryStreamExt; +use modelardb_storage::data_folder::DataFolder; use tokio::sync::RwLock; use tokio::task::JoinHandle as TaskJoinHandle; use tracing::debug; -use crate::data_folders::DataFolder; use crate::error::Result; // TODO: Handle the case where a connection can not be established when transferring data. @@ -62,18 +62,12 @@ impl DataTransfer { remote_data_folder: DataFolder, transfer_batch_size_in_bytes: Option, ) -> Result { - let table_names = local_data_folder - .table_metadata_manager - .table_names() - .await?; + let table_names = local_data_folder.table_names().await?; // The size of tables is computed manually as datafusion_table_statistics() is not exact. let table_size_in_bytes = DashMap::with_capacity(table_names.len()); for table_name in table_names { - let delta_table = local_data_folder - .delta_lake - .delta_table(&table_name) - .await?; + let delta_table = local_data_folder.delta_table(&table_name).await?; let mut table_size_in_bytes = table_size_in_bytes.entry(table_name).or_insert(0); @@ -241,11 +235,7 @@ impl DataTransfer { .expect("table_size_in_bytes should contain table_name since the table contains data.") .value(); - let local_delta_ops = self - .local_data_folder - .delta_lake - .delta_ops(table_name) - .await?; + let local_delta_ops = self.local_data_folder.delta_ops(table_name).await?; // Read the data that is currently stored for the table with table_name. let (_table, stream) = local_delta_ops.load().await?; @@ -256,26 +246,20 @@ impl DataTransfer { // Write the data to the remote Delta Lake. if self .local_data_folder - .table_metadata_manager .is_time_series_table(table_name) .await? { self.remote_data_folder - .delta_lake .write_compressed_segments_to_time_series_table(table_name, record_batches) .await?; } else { self.remote_data_folder - .delta_lake .write_record_batches_to_normal_table(table_name, record_batches) .await?; } // Delete the data that has been transferred to the remote Delta Lake. - self.local_data_folder - .delta_lake - .truncate_table(table_name) - .await?; + self.local_data_folder.truncate_table(table_name).await?; // Remove the transferred data from the in-memory tracking of compressed files. *self.table_size_in_bytes.get_mut(table_name).unwrap() -= current_size_in_bytes; @@ -481,17 +465,15 @@ mod tests { async fn create_local_data_folder_with_tables() -> (TempDir, DataFolder) { let temp_dir = tempfile::tempdir().unwrap(); let temp_dir_url = temp_dir.path().to_str().unwrap(); - let local_data_folder = DataFolder::try_from_local_url(temp_dir_url).await.unwrap(); + let local_data_folder = DataFolder::open_local_url(temp_dir_url).await.unwrap(); // Create a normal table. local_data_folder - .delta_lake .create_normal_table(NORMAL_TABLE_NAME, &table::normal_table_schema()) .await .unwrap(); local_data_folder - .table_metadata_manager .save_normal_table_metadata(NORMAL_TABLE_NAME) .await .unwrap(); @@ -499,13 +481,11 @@ mod tests { // Create a time series table. let time_series_table_metadata = table::time_series_table_metadata(); local_data_folder - .delta_lake .create_time_series_table(&time_series_table_metadata) .await .unwrap(); local_data_folder - .table_metadata_manager .save_time_series_table_metadata(&time_series_table_metadata) .await .unwrap(); @@ -522,7 +502,6 @@ mod tests { for _ in 0..batch_write_count { // Write to the normal table. local_data_folder - .delta_lake .write_record_batches_to_normal_table( NORMAL_TABLE_NAME, vec![table::normal_table_record_batch()], @@ -532,7 +511,6 @@ mod tests { // Write to the time series table. local_data_folder - .delta_lake .write_compressed_segments_to_time_series_table( TIME_SERIES_TABLE_NAME, vec![table::compressed_segments_record_batch()], @@ -549,11 +527,7 @@ mod tests { /// Return the total size of the files in the table with `table_name` in `local_data_folder`. async fn table_files_size(local_data_folder: &DataFolder, table_name: &str) -> u64 { - let delta_table = local_data_folder - .delta_lake - .delta_table(table_name) - .await - .unwrap(); + let delta_table = local_data_folder.delta_table(table_name).await.unwrap(); let mut files_size = 0; for file_path in delta_table.get_files_iter().unwrap() { @@ -570,9 +544,7 @@ mod tests { ) -> (TempDir, DataTransfer) { let target_dir = tempfile::tempdir().unwrap(); let target_dir_url = target_dir.path().to_str().unwrap(); - let remote_data_folder = DataFolder::try_from_local_url(target_dir_url) - .await - .unwrap(); + let remote_data_folder = DataFolder::open_local_url(target_dir_url).await.unwrap(); // Set the transfer batch size so that data is transferred if three batches are written. let data_transfer = DataTransfer::try_new( diff --git a/crates/modelardb_server/src/storage/mod.rs b/crates/modelardb_server/src/storage/mod.rs index 5d574cade..65c8e6f0c 100644 --- a/crates/modelardb_server/src/storage/mod.rs +++ b/crates/modelardb_server/src/storage/mod.rs @@ -19,7 +19,7 @@ //! metadata and models in in-memory buffers to batch them before saving them to immutable Apache //! Parquet files. The path to the Apache Parquet files containing relevant compressed data points //! for a query can be retrieved by the query engine using -//! [`DeltaLake`](modelardb_storage::delta_lake::DeltaLake). +//! [`DataFolder`](modelardb_storage::data_folder::DataFolder). mod compressed_data_buffer; mod compressed_data_manager; diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs index 9a3400f61..a7274e815 100644 --- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs +++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs @@ -24,13 +24,13 @@ use std::sync::atomic::{AtomicU64, Ordering}; use dashmap::DashMap; use futures::StreamExt; +use modelardb_storage::data_folder::DataFolder; use modelardb_types::types::{TimeSeriesTableMetadata, Timestamp, Value}; use object_store::path::{Path, PathPart}; use tokio::runtime::Handle; use tracing::{debug, error, warn}; use crate::context::Context; -use crate::data_folders::DataFolder; use crate::error::Result; use crate::storage::UNCOMPRESSED_DATA_FOLDER; use crate::storage::compressed_data_buffer::CompressedSegmentBatch; @@ -86,7 +86,7 @@ impl UncompressedDataManager { /// Add references to the [`UncompressedDataBuffers`](UncompressedDataBuffer) currently on disk /// to [`UncompressedDataManager`] which immediately will start compressing them. pub(super) async fn initialize(&self, context: &Context) -> Result<()> { - let local_data_folder = self.local_data_folder.delta_lake.object_store(); + let local_data_folder = self.local_data_folder.object_store(); let mut spilled_buffers = local_data_folder.list(Some(&Path::from(UNCOMPRESSED_DATA_FOLDER))); @@ -225,9 +225,9 @@ impl UncompressedDataManager { /// Insert a single data point into the in-memory buffer with the tag hash that corresponds to /// `tag_values` if one exists. If the buffer has been spilled, read it back into memory. If no /// buffer exists for the tag hash, allocate a new buffer that will be compressed within the - /// error bound in `time_series_table_metadata`. Returns [`true`] if a buffer was spilled, [`false`] - /// if not, and [`ModelarDbServerError`](crate::error::ModelarDbServerError) if the error bound - /// cannot be retrieved from the metadata Delta Lake. + /// error bound in `time_series_table_metadata`. Returns [`true`] if a buffer was spilled, + /// [`false`] if not, and [`ModelarDbServerError`](crate::error::ModelarDbServerError) if the + /// error bound cannot be retrieved from the Delta Lake. async fn insert_data_point( &self, tag_values: Vec, @@ -398,7 +398,7 @@ impl UncompressedDataManager { .1; let maybe_uncompressed_on_disk_data_buffer = uncompressed_in_memory_data_buffer - .spill_to_apache_parquet(self.local_data_folder.delta_lake.object_store()) + .spill_to_apache_parquet(self.local_data_folder.object_store()) .await; // If an error occurs the in-memory buffer must be re-added to the map before returning. @@ -591,7 +591,7 @@ impl UncompressedDataManager { .map(|(uncompressed_values, field_column_index)| { let error_bound = time_series_table_metadata.error_bounds[*field_column_index]; - modelardb_compression::try_compress( + modelardb_compression::try_compress_univariate_time_series( uncompressed_timestamps, uncompressed_values, error_bound, @@ -683,7 +683,7 @@ mod tests { async fn test_can_compress_existing_on_disk_data_buffers_when_initializing() { let temp_dir = tempfile::tempdir().unwrap(); let temp_dir_url = temp_dir.path().to_str().unwrap(); - let local_data_folder = DataFolder::try_from_local_url(temp_dir_url).await.unwrap(); + let local_data_folder = DataFolder::open_local_url(temp_dir_url).await.unwrap(); // Create a context with a storage engine. let context = Arc::new( @@ -727,7 +727,6 @@ mod tests { let spilled_buffers = storage_engine .uncompressed_data_manager .local_data_folder - .delta_lake .object_store() .list(Some(&Path::from(UNCOMPRESSED_DATA_FOLDER))) .collect::>() @@ -1069,7 +1068,6 @@ mod tests { // The UncompressedDataBuffer should be spilled to tag hash in the uncompressed folder. let spilled_buffers = data_manager .local_data_folder - .delta_lake .object_store() .list(Some(&Path::from(UNCOMPRESSED_DATA_FOLDER))) .collect::>() @@ -1281,13 +1279,12 @@ mod tests { temp_dir: &TempDir, ) -> (UncompressedDataManager, Arc) { let temp_dir_url = temp_dir.path().to_str().unwrap(); - let local_data_folder = DataFolder::try_from_local_url(temp_dir_url).await.unwrap(); + let local_data_folder = DataFolder::open_local_url(temp_dir_url).await.unwrap(); // Ensure the expected metadata is available through the metadata manager. let time_series_table_metadata = table::time_series_table_metadata(); local_data_folder - .table_metadata_manager .save_time_series_table_metadata(&time_series_table_metadata) .await .unwrap(); diff --git a/crates/modelardb_storage/src/data_folder.rs b/crates/modelardb_storage/src/data_folder.rs new file mode 100644 index 000000000..86f2a66ec --- /dev/null +++ b/crates/modelardb_storage/src/data_folder.rs @@ -0,0 +1,1680 @@ +/* Copyright 2025 The ModelarDB Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//! Implementation of the type used to interact with local and remote storage through a Delta Lake. + +use std::collections::HashMap; +use std::fs; +use std::path::Path as StdPath; +use std::sync::Arc; + +use arrow::array::{ + ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Float32Array, Int16Array, RecordBatch, + StringArray, +}; +use arrow::datatypes::{DataType, Field, Schema}; +use chrono::TimeDelta; +use dashmap::DashMap; +use datafusion::catalog::TableProvider; +use datafusion::common::{DFSchema, ToDFSchema}; +use datafusion::datasource::sink::DataSink; +use datafusion::logical_expr::{Expr, lit}; +use datafusion::parquet::file::properties::WriterProperties; +use datafusion::parquet::format::SortingColumn; +use datafusion::prelude::{SessionContext, col}; +use datafusion_proto::bytes::Serializeable; +use deltalake::delta_datafusion::DeltaDataChecker; +use deltalake::kernel::transaction::{CommitBuilder, CommitProperties}; +use deltalake::kernel::{Action, Add, StructField}; +use deltalake::operations::create::CreateBuilder; +use deltalake::operations::write::writer::{DeltaWriter, WriterConfig}; +use deltalake::protocol::{DeltaOperation, SaveMode}; +use deltalake::{DeltaOps, DeltaTable, DeltaTableError}; +use futures::{StreamExt, TryStreamExt}; +use modelardb_types::flight::protocol; +use modelardb_types::functions::{try_convert_bytes_to_schema, try_convert_schema_to_bytes}; +use modelardb_types::schemas::{COMPRESSED_SCHEMA, FIELD_COLUMN}; +use modelardb_types::types::{ + ArrowValue, ErrorBound, GeneratedColumn, MAX_RETENTION_PERIOD_IN_SECONDS, + TimeSeriesTableMetadata, +}; +use object_store::ObjectStore; +use object_store::aws::AmazonS3Builder; +use object_store::local::LocalFileSystem; +use object_store::memory::InMemory; +use object_store::path::Path; +use url::Url; +use uuid::Uuid; + +use crate::error::{ModelarDbStorageError, Result}; +use crate::{ + METADATA_FOLDER, TABLE_FOLDER, apache_parquet_writer_properties, register_metadata_table, + sql_and_concat, +}; + +/// Types of tables supported by ModelarDB. +enum TableType { + NormalTable, + TimeSeriesTable, +} + +/// Functionality for managing Delta Lake tables in a local folder or an object store. +#[derive(Clone)] +pub struct DataFolder { + /// URL to access the root of the Delta Lake. + location: String, + /// Storage options required to access Delta Lake. + storage_options: HashMap, + /// [`ObjectStore`] to access the root of the Delta Lake. + object_store: Arc, + /// Cache of Delta tables to avoid opening the same table multiple times. + delta_table_cache: DashMap, + /// Session context used to query the tables using Apache DataFusion. + session_context: Arc, +} + +impl DataFolder { + /// Create a new [`DataFolder`] that manages the Delta tables at `local_url`. If `local_url` has + /// the schema `file` or no schema, the Delta tables are managed in a local data folder. If + /// `local_url` has the schema `memory`, the Delta tables are managed in memory. Return + /// [`ModelarDbStorageError`] if `local_url` cannot be parsed or the metadata tables cannot be + /// created. + pub async fn open_local_url(local_url: &str) -> Result { + match local_url.split_once("://") { + None => Self::open_local(StdPath::new(local_url)).await, + Some(("file", local_path)) => Self::open_local(StdPath::new(local_path)).await, + Some(("memory", _)) => Self::open_memory().await, + _ => Err(ModelarDbStorageError::InvalidArgument(format!( + "{local_url} is not a valid local URL." + ))), + } + } + + /// Create a new [`DataFolder`] that manages the Delta tables in memory. + pub async fn open_memory() -> Result { + let data_folder = Self { + location: "memory:///modelardb".to_owned(), + storage_options: HashMap::new(), + object_store: Arc::new(InMemory::new()), + delta_table_cache: DashMap::new(), + session_context: Arc::new(crate::create_session_context()), + }; + + data_folder.create_and_register_metadata_tables().await?; + + Ok(data_folder) + } + + /// Create a new [`DataFolder`] that manages the Delta tables in `data_folder_path`. Returns a + /// [`ModelarDbStorageError`] if `data_folder_path` does not exist and could not be created or + /// the metadata tables cannot be created. + pub async fn open_local(data_folder_path: &StdPath) -> Result { + // Ensure the directories in the path exists as LocalFileSystem otherwise returns an error. + fs::create_dir_all(data_folder_path) + .map_err(|error| DeltaTableError::generic(error.to_string()))?; + + // Use with_automatic_cleanup to ensure empty directories are deleted automatically. + let object_store = LocalFileSystem::new_with_prefix(data_folder_path) + .map_err(|error| DeltaTableError::generic(error.to_string()))? + .with_automatic_cleanup(true); + + let location = data_folder_path + .to_str() + .ok_or_else(|| DeltaTableError::generic("Local data folder path is not UTF-8."))? + .to_owned(); + + let data_folder = Self { + location, + storage_options: HashMap::new(), + object_store: Arc::new(object_store), + delta_table_cache: DashMap::new(), + session_context: Arc::new(crate::create_session_context()), + }; + + data_folder.create_and_register_metadata_tables().await?; + + Ok(data_folder) + } + + /// Create a new [`DataFolder`] that manages Delta tables in the remote object store given by + /// `storage_configuration`. Returns [`ModelarDbStorageError`] if a connection to the specified + /// object store could not be created. + pub async fn open_object_store( + storage_configuration: protocol::manager_metadata::StorageConfiguration, + ) -> Result { + match storage_configuration { + protocol::manager_metadata::StorageConfiguration::S3Configuration(s3_configuration) => { + // Register the S3 storage handlers to allow the use of Amazon S3 object stores. This is + // required at runtime to initialize the S3 storage implementation in the deltalake_aws + // storage subcrate. It is safe to call this function multiple times as the handlers are + // stored in a DashMap, thus, the handlers are simply overwritten with the same each time. + deltalake::aws::register_handlers(None); + + Self::open_s3( + s3_configuration.endpoint, + s3_configuration.bucket_name, + s3_configuration.access_key_id, + s3_configuration.secret_access_key, + ) + .await + } + protocol::manager_metadata::StorageConfiguration::AzureConfiguration( + azure_configuration, + ) => { + Self::open_azure( + azure_configuration.account_name, + azure_configuration.access_key, + azure_configuration.container_name, + ) + .await + } + } + } + + /// Create a new [`DataFolder`] that manages the Delta tables in an object store with an + /// S3-compatible API. Returns a [`ModelarDbStorageError`] if a connection to the object store + /// could not be made or the metadata tables cannot be created. + pub async fn open_s3( + endpoint: String, + bucket_name: String, + access_key_id: String, + secret_access_key: String, + ) -> Result { + let location = format!("s3://{bucket_name}"); + + // TODO: Determine if it is safe to use AWS_S3_ALLOW_UNSAFE_RENAME. + let storage_options = HashMap::from([ + ("aws_access_key_id".to_owned(), access_key_id), + ("aws_secret_access_key".to_owned(), secret_access_key), + ("aws_endpoint_url".to_owned(), endpoint), + ("aws_bucket_name".to_owned(), bucket_name), + ("aws_s3_allow_unsafe_rename".to_owned(), "true".to_owned()), + ]); + + let url = Url::parse(&location) + .map_err(|error| ModelarDbStorageError::InvalidArgument(error.to_string()))?; + + // Build the Amazon S3 object store with the given storage options manually to allow http. + let object_store = storage_options + .iter() + .fold( + AmazonS3Builder::new() + .with_url(url.to_string()) + .with_allow_http(true), + |builder, (key, value)| match key.parse() { + Ok(k) => builder.with_config(k, value), + Err(_) => builder, + }, + ) + .build()?; + + let data_folder = DataFolder { + location, + storage_options, + object_store: Arc::new(object_store), + delta_table_cache: DashMap::new(), + session_context: Arc::new(crate::create_session_context()), + }; + + data_folder.create_and_register_metadata_tables().await?; + + Ok(data_folder) + } + + /// Create a new [`DataFolder`] that manages the Delta tables in an object store with an + /// Azure-compatible API. Returns a [`ModelarDbStorageError`] if a connection to the object + /// store could not be made or the metadata tables cannot be created. + pub async fn open_azure( + account_name: String, + access_key: String, + container_name: String, + ) -> Result { + let location = format!("az://{container_name}"); + + // TODO: Needs to be tested. + let storage_options = HashMap::from([ + ("azure_storage_account_name".to_owned(), account_name), + ("azure_storage_account_key".to_owned(), access_key), + ("azure_container_name".to_owned(), container_name), + ]); + let url = Url::parse(&location) + .map_err(|error| ModelarDbStorageError::InvalidArgument(error.to_string()))?; + let (object_store, _path) = object_store::parse_url_opts(&url, &storage_options)?; + + let data_folder = DataFolder { + location, + storage_options, + object_store: Arc::new(object_store), + delta_table_cache: DashMap::new(), + session_context: Arc::new(crate::create_session_context()), + }; + + data_folder.create_and_register_metadata_tables().await?; + + Ok(data_folder) + } + + /// If they do not already exist, create the tables in the Delta Lake for normal table and time + /// series table metadata and register them with the Apache DataFusion session context. + /// * The `normal_table_metadata` table contains the metadata for normal tables. + /// * The `time_series_table_metadata` table contains the main metadata for time series tables. + /// * The `time_series_table_field_columns` table contains the name, index, error bound value, + /// whether error bound is relative, and generation expression of the field columns in each + /// time series table. + /// + /// If the tables exist or were created, return [`Ok`], otherwise return + /// [`ModelarDbStorageError`]. + async fn create_and_register_metadata_tables(&self) -> Result<()> { + // Create and register the normal_table_metadata table if it does not exist. + let delta_table = self + .create_metadata_table( + "normal_table_metadata", + &Schema::new(vec![Field::new("table_name", DataType::Utf8, false)]), + ) + .await?; + + register_metadata_table(&self.session_context, "normal_table_metadata", delta_table)?; + + // Create and register the time_series_table_metadata table if it does not exist. + let delta_table = self + .create_metadata_table( + "time_series_table_metadata", + &Schema::new(vec![ + Field::new("table_name", DataType::Utf8, false), + Field::new("query_schema", DataType::Binary, false), + ]), + ) + .await?; + + register_metadata_table( + &self.session_context, + "time_series_table_metadata", + delta_table, + )?; + + // Create and register the time_series_table_field_columns table if it does not exist. Note + // that column_index will only use a maximum of 10 bits. generated_column_expr is NULL if + // the fields are stored as segments. + let delta_table = self + .create_metadata_table( + "time_series_table_field_columns", + &Schema::new(vec![ + Field::new("table_name", DataType::Utf8, false), + Field::new("column_name", DataType::Utf8, false), + Field::new("column_index", DataType::Int16, false), + Field::new("error_bound_value", DataType::Float32, false), + Field::new("error_bound_is_relative", DataType::Boolean, false), + Field::new("generated_column_expr", DataType::Binary, true), + ]), + ) + .await?; + + register_metadata_table( + &self.session_context, + "time_series_table_field_columns", + delta_table, + )?; + + Ok(()) + } + + /// Register all normal tables and time series tables in `self` with its [`SessionContext`]. + /// `data_sink` is set as the [`DataSink`] for all of the tables. If the tables could not be + /// registered, [`ModelarDbStorageError`] is returned. + pub async fn register_tables( + &self, + data_sink: Arc, + ) -> Result<()> { + // Register normal tables. + for normal_table_name in self.normal_table_names().await? { + let delta_table = self.delta_table(&normal_table_name).await?; + + crate::register_normal_table( + &self.session_context, + &normal_table_name, + delta_table, + data_sink.clone(), + )?; + } + + // Register time series tables. + for metadata in self.time_series_table_metadata().await? { + let delta_table = self.delta_table(&metadata.name).await?; + + crate::register_time_series_table( + &self.session_context, + delta_table, + metadata, + data_sink.clone(), + )?; + } + + Ok(()) + } + + /// Return the session context used to query the tables using Apache DataFusion. + pub fn session_context(&self) -> &SessionContext { + &self.session_context + } + + /// Return an [`ObjectStore`] to access the root of the Delta Lake. + pub fn object_store(&self) -> Arc { + self.object_store.clone() + } + + /// Return a [`DeltaTable`] for manipulating the metadata table with `table_name` in the + /// Delta Lake, or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be + /// established or the table does not exist. + pub async fn metadata_delta_table(&self, table_name: &str) -> Result { + let table_path = self.location_of_metadata_table(table_name); + self.delta_table_from_path(&table_path).await + } + + /// Return a [`DeltaTable`] for manipulating the table with `table_name` in the Delta Lake, or a + /// [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or the + /// table does not exist. + pub async fn delta_table(&self, table_name: &str) -> Result { + let table_path = self.location_of_table(table_name); + self.delta_table_from_path(&table_path).await + } + + /// Return a [`DeltaOps`] for manipulating the metadata table with `table_name` in the Delta + /// Lake, or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established + /// or the table does not exist. + pub async fn metadata_delta_ops(&self, table_name: &str) -> Result { + let table_path = self.location_of_metadata_table(table_name); + self.delta_table_from_path(&table_path) + .await + .map(Into::into) + } + + /// Return a [`DeltaOps`] for manipulating the table with `table_name` in the Delta Lake, or a + /// [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or the + /// table does not exist. + pub async fn delta_ops(&self, table_name: &str) -> Result { + let table_path = self.location_of_table(table_name); + self.delta_table_from_path(&table_path) + .await + .map(Into::into) + } + + /// Return a [`DeltaTable`] for manipulating the table at `table_path` in the Delta Lake, or a + /// [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or the + /// table does not exist. + async fn delta_table_from_path(&self, table_path: &str) -> Result { + // Use the cache if possible and load to get the latest table data. + if let Some(mut delta_table) = self.delta_table_cache.get_mut(table_path) { + delta_table.load().await?; + Ok(delta_table.clone()) + } else { + // If the table is not in the cache, open it and add it to the cache before returning. + let delta_table = deltalake::open_table_with_storage_options( + &table_path, + self.storage_options.clone(), + ) + .await?; + + self.delta_table_cache + .insert(table_path.to_owned(), delta_table.clone()); + + Ok(delta_table) + } + } + + /// Return `true` if the table with `table_name` is a normal table, otherwise return `false`. + pub async fn is_normal_table(&self, table_name: &str) -> Result { + Ok(self + .normal_table_names() + .await? + .contains(&table_name.to_owned())) + } + + /// Return `true` if the table with `table_name` is a time series table, otherwise return `false`. + pub async fn is_time_series_table(&self, table_name: &str) -> Result { + Ok(self + .time_series_table_names() + .await? + .contains(&table_name.to_owned())) + } + + /// Return the name of each table currently in the Delta Lake. If the table names cannot be + /// retrieved, [`ModelarDbStorageError`] is returned. + pub async fn table_names(&self) -> Result> { + let normal_table_names = self.normal_table_names().await?; + let time_series_table_names = self.time_series_table_names().await?; + + let mut table_names = normal_table_names; + table_names.extend(time_series_table_names); + + Ok(table_names) + } + + /// Return the name of each normal table currently in the Delta Lake. Note that this does not + /// include time series tables. If the normal table names cannot be retrieved, + /// [`ModelarDbStorageError`] is returned. + pub async fn normal_table_names(&self) -> Result> { + self.table_names_of_type(TableType::NormalTable).await + } + + /// Return the schema of the table with the name in `table_name` if it is a normal table. If the + /// table does not exist or the table is not a normal table, return [`None`]. + pub async fn normal_table_schema(&self, table_name: &str) -> Option { + if self + .is_normal_table(table_name) + .await + .is_ok_and(|is_normal_table| is_normal_table) + { + self.delta_table(table_name) + .await + .expect("Delta Lake table should exist if the metadata is in the Delta Lake.") + .get_schema() + .expect("Delta Lake table should be loaded and metadata should be in the log.") + .try_into() + .ok() + } else { + None + } + } + + /// Return the name of each time series table currently in the Delta Lake. Note that this does + /// not include normal tables. If the time series table names cannot be retrieved, + /// [`ModelarDbStorageError`] is returned. + pub async fn time_series_table_names(&self) -> Result> { + self.table_names_of_type(TableType::TimeSeriesTable).await + } + + /// Return the name of tables of `table_type`. Returns [`ModelarDbStorageError`] if the table + /// names cannot be retrieved. + async fn table_names_of_type(&self, table_type: TableType) -> Result> { + let table_type = match table_type { + TableType::NormalTable => "normal_table", + TableType::TimeSeriesTable => "time_series_table", + }; + + let sql = format!("SELECT table_name FROM metadata.{table_type}_metadata"); + let batch = sql_and_concat(&self.session_context, &sql).await?; + + let table_names = modelardb_types::array!(batch, 0, StringArray); + Ok(table_names.iter().flatten().map(str::to_owned).collect()) + } + + /// Return a [`DeltaTableWriter`] for writing to the table with `table_name` in the Delta Lake, + /// or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or + /// the table does not exist. + pub async fn table_writer(&self, table_name: &str) -> Result { + let delta_table = self.delta_table(table_name).await?; + if self + .time_series_table_metadata_for_registered_time_series_table(table_name) + .await + .is_some() + { + self.time_series_table_writer(delta_table).await + } else { + self.normal_or_metadata_table_writer(delta_table).await + } + } + + /// Return a [`DeltaTableWriter`] for writing to the time series table corresponding to + /// `delta_table` in the Delta Lake, or a [`ModelarDbStorageError`] if a connection to the Delta + /// Lake cannot be established or the table does not exist. + pub async fn time_series_table_writer( + &self, + delta_table: DeltaTable, + ) -> Result { + let partition_columns = vec![FIELD_COLUMN.to_owned()]; + + // Specify that the file must be sorted by the tag columns and then by start_time. + let base_compressed_schema_len = COMPRESSED_SCHEMA.0.fields().len(); + let compressed_schema_len = TableProvider::schema(&delta_table).fields().len(); + let sorting_columns_len = (compressed_schema_len - base_compressed_schema_len) + 1; + let mut sorting_columns = Vec::with_capacity(sorting_columns_len); + + // Compressed segments have the tag columns at the end of the schema. + for tag_column_index in base_compressed_schema_len..compressed_schema_len { + sorting_columns.push(SortingColumn::new(tag_column_index as i32, false, false)); + } + + // Compressed segments store the first timestamp in the second column. + sorting_columns.push(SortingColumn::new(1, false, false)); + + let writer_properties = apache_parquet_writer_properties(Some(sorting_columns)); + DeltaTableWriter::try_new(delta_table, partition_columns, writer_properties) + } + + /// Return a [`DeltaTableWriter`] for writing to the table corresponding to `delta_table` in the + /// Delta Lake, or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be + /// established or the table does not exist. + pub async fn normal_or_metadata_table_writer( + &self, + delta_table: DeltaTable, + ) -> Result { + let writer_properties = apache_parquet_writer_properties(None); + DeltaTableWriter::try_new(delta_table, vec![], writer_properties) + } + + /// Create a Delta Lake table for a metadata table with `table_name` and `schema` if it does not + /// already exist. If the metadata table could not be created, [`ModelarDbStorageError`] is + /// returned. An error is not returned if the metadata table already exists. + pub async fn create_metadata_table( + &self, + table_name: &str, + schema: &Schema, + ) -> Result { + self.create_table( + table_name, + schema, + &[], + self.location_of_metadata_table(table_name), + SaveMode::Ignore, + ) + .await + } + + /// Return the location of the metadata table with `table_name`. + fn location_of_metadata_table(&self, table_name: &str) -> String { + format!("{}/{METADATA_FOLDER}/{table_name}", self.location) + } + + /// Create a Delta Lake table for a normal table with `table_name` and `schema` if it does not + /// already exist. If the normal table could not be created, e.g., because it already exists, + /// [`ModelarDbStorageError`] is returned. + pub async fn create_normal_table( + &self, + table_name: &str, + schema: &Schema, + ) -> Result { + self.create_table( + table_name, + schema, + &[], + self.location_of_table(table_name), + SaveMode::ErrorIfExists, + ) + .await + } + + /// Create a Delta Lake table for a time series table with `time_series_table_metadata` if it + /// does not already exist. Returns [`DeltaTable`] if the table could be created and + /// [`ModelarDbStorageError`] if it could not. + pub async fn create_time_series_table( + &self, + time_series_table_metadata: &TimeSeriesTableMetadata, + ) -> Result { + self.create_table( + &time_series_table_metadata.name, + &time_series_table_metadata.compressed_schema, + &[FIELD_COLUMN.to_owned()], + self.location_of_table(&time_series_table_metadata.name), + SaveMode::ErrorIfExists, + ) + .await + } + + /// Return the location of the table with `table_name`. + fn location_of_table(&self, table_name: &str) -> String { + format!("{}/{TABLE_FOLDER}/{table_name}", self.location) + } + + /// Create a Delta Lake table with `table_name`, `schema`, and `partition_columns` if it does + /// not already exist. Returns [`DeltaTable`] if the table could be created and + /// [`ModelarDbStorageError`] if it could not. + async fn create_table( + &self, + table_name: &str, + schema: &Schema, + partition_columns: &[String], + location: String, + save_mode: SaveMode, + ) -> Result { + let mut columns: Vec = Vec::with_capacity(schema.fields().len()); + for field in schema.fields() { + let field: &Field = field; + + // Delta Lake does not support unsigned integers. Thus tables containing the Apache + // Arrow types UInt8, UInt16, UInt32, and UInt64 must currently be rejected. + match field.data_type() { + DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => { + Err(DeltaTableError::SchemaMismatch { + msg: "Unsigned integers are not supported.".to_owned(), + })? + } + _ => {} // All possible cases must be handled. + } + + let struct_field: StructField = field.try_into()?; + columns.push(struct_field); + } + + let delta_table = CreateBuilder::new() + .with_storage_options(self.storage_options.clone()) + .with_table_name(table_name) + .with_location(location.clone()) + .with_columns(columns) + .with_partition_columns(partition_columns) + .with_save_mode(save_mode) + .await?; + + // If the table was created successfully, add it to the cache. + self.delta_table_cache.insert(location, delta_table.clone()); + + Ok(delta_table) + } + + /// Drop the metadata table with `table_name` from the Delta Lake by deleting every file related + /// to the table. The table folder cannot be deleted directly since folders do not exist in + /// object stores and therefore cannot be operated upon. If the table was dropped successfully, + /// the paths to the deleted files are returned, otherwise a [`ModelarDbStorageError`] is + /// returned. + pub async fn drop_metadata_table(&self, table_name: &str) -> Result> { + let table_path = format!("{METADATA_FOLDER}/{table_name}"); + self.delete_table_files(&table_path).await + } + + /// Drop the Delta Lake table with `table_name` from the Delta Lake by deleting every file + /// related to the table. The table folder cannot be deleted directly since folders do not exist + /// in object stores and therefore cannot be operated upon. If the table was dropped + /// successfully, the paths to the deleted files are returned, otherwise a + /// [`ModelarDbStorageError`] is returned. + pub async fn drop_table(&self, table_name: &str) -> Result> { + let table_path = format!("{TABLE_FOLDER}/{table_name}"); + self.delete_table_files(&table_path).await + } + + /// Delete all files in the folder at `table_path` using bulk operations if available. If the + /// files were deleted successfully, the paths to the deleted files are returned. + async fn delete_table_files(&self, table_path: &str) -> Result> { + let file_locations = self + .object_store + .list(Some(&Path::from(table_path))) + .map_ok(|object_meta| object_meta.location) + .boxed(); + + let deleted_paths = self + .object_store + .delete_stream(file_locations) + .try_collect::>() + .await?; + + // Remove the table from the cache. + let delta_table_path = format!("{}/{}", self.location, table_path); + self.delta_table_cache.remove(&delta_table_path); + + Ok(deleted_paths) + } + + /// Truncate the Delta Lake table with `table_name` by deleting all rows in the table. If the + /// rows could not be deleted, a [`ModelarDbStorageError`] is returned. + pub async fn truncate_table(&self, table_name: &str) -> Result<()> { + let delta_table_ops = self.delta_ops(table_name).await?; + delta_table_ops.delete().await?; + + Ok(()) + } + + /// Vacuum the Delta Lake table with `table_name` by deleting stale files that are older than + /// `maybe_retention_period_in_seconds` seconds. If a retention period is not given, the + /// default retention period of 7 days is used. If the retention period is larger than + /// [`MAX_RETENTION_PERIOD_IN_SECONDS`] seconds or the files could not be deleted, a + /// [`ModelarDbStorageError`] is returned. + pub async fn vacuum_table( + &self, + table_name: &str, + maybe_retention_period_in_seconds: Option, + ) -> Result<()> { + let delta_table_ops = self.delta_ops(table_name).await?; + + let retention_period_in_seconds = + maybe_retention_period_in_seconds.unwrap_or(60 * 60 * 24 * 7); + + let retention_period = TimeDelta::new(retention_period_in_seconds as i64, 0).ok_or( + ModelarDbStorageError::InvalidArgument(format!( + "Retention period cannot be more than {MAX_RETENTION_PERIOD_IN_SECONDS} seconds." + )), + )?; + + delta_table_ops + .vacuum() + .with_retention_period(retention_period) + .with_enforce_retention_duration(false) + .await?; + + Ok(()) + } + + /// Save the created normal table to the Delta Lake. This consists of adding a row to the + /// `normal_table_metadata` table with the `name` of the table. If the normal table metadata was + /// saved, return [`Ok`], otherwise return [`ModelarDbStorageError`]. + pub async fn save_normal_table_metadata(&self, name: &str) -> Result<()> { + self.write_columns_to_metadata_table( + "normal_table_metadata", + vec![Arc::new(StringArray::from(vec![name]))], + ) + .await?; + + Ok(()) + } + + /// Save the created time series table to the Delta Lake. This includes adding a row to the + /// `time_series_table_metadata` table and adding a row to the `time_series_table_field_columns` + /// table for each field column. + pub async fn save_time_series_table_metadata( + &self, + time_series_table_metadata: &TimeSeriesTableMetadata, + ) -> Result<()> { + // Convert the query schema to bytes, so it can be saved in the Delta Lake. + let query_schema_bytes = + try_convert_schema_to_bytes(&time_series_table_metadata.query_schema)?; + + // Add a new row in the time_series_table_metadata table to persist the time series table. + self.write_columns_to_metadata_table( + "time_series_table_metadata", + vec![ + Arc::new(StringArray::from(vec![ + time_series_table_metadata.name.clone(), + ])), + Arc::new(BinaryArray::from_vec(vec![&query_schema_bytes])), + ], + ) + .await?; + + // Add a row for each field column to the time_series_table_field_columns table. + for (query_schema_index, field) in time_series_table_metadata + .query_schema + .fields() + .iter() + .enumerate() + { + if field.data_type() == &ArrowValue::DATA_TYPE { + // Convert the generated column expression to bytes, if it exists. + let maybe_generated_column_expr = match time_series_table_metadata + .generated_columns + .get(query_schema_index) + { + Some(Some(generated_column)) => { + Some(generated_column.expr.to_bytes()?.to_vec()) + } + _ => None, + }; + + // error_bounds matches schema and not query_schema to simplify looking up the error + // bound during ingestion as it occurs far more often than creation of time series tables. + let (error_bound_value, error_bound_is_relative) = if let Ok(schema_index) = + time_series_table_metadata.schema.index_of(field.name()) + { + match time_series_table_metadata.error_bounds[schema_index] { + ErrorBound::Absolute(value) => (value, false), + ErrorBound::Relative(value) => (value, true), + ErrorBound::Lossless => (0.0, false), + } + } else { + (0.0, false) + }; + + // query_schema_index is simply cast as a time series table contains at most 32767 columns. + self.write_columns_to_metadata_table( + "time_series_table_field_columns", + vec![ + Arc::new(StringArray::from(vec![ + time_series_table_metadata.name.clone(), + ])), + Arc::new(StringArray::from(vec![field.name().clone()])), + Arc::new(Int16Array::from(vec![query_schema_index as i16])), + Arc::new(Float32Array::from(vec![error_bound_value])), + Arc::new(BooleanArray::from(vec![error_bound_is_relative])), + Arc::new(BinaryArray::from_opt_vec(vec![ + maybe_generated_column_expr.as_deref(), + ])), + ], + ) + .await?; + } + } + + Ok(()) + } + + /// Write `columns` to a Delta Lake table with `table_name`. Returns an updated [`DeltaTable`] + /// version if the file was written successfully, otherwise returns [`ModelarDbStorageError`]. + pub async fn write_columns_to_metadata_table( + &self, + table_name: &str, + columns: Vec, + ) -> Result { + let delta_table = self.metadata_delta_table(table_name).await?; + let record_batch = RecordBatch::try_new(TableProvider::schema(&delta_table), columns)?; + let delta_table_writer = self.normal_or_metadata_table_writer(delta_table).await?; + self.write_record_batches_to_table(delta_table_writer, vec![record_batch]) + .await + } + + /// Write `record_batches` to a Delta Lake table for a normal table with `table_name`. Returns + /// an updated [`DeltaTable`] version if the file was written successfully, otherwise returns + /// [`ModelarDbStorageError`]. + pub async fn write_record_batches_to_normal_table( + &self, + table_name: &str, + record_batches: Vec, + ) -> Result { + let delta_table = self.delta_table(table_name).await?; + let delta_table_writer = self.normal_or_metadata_table_writer(delta_table).await?; + self.write_record_batches_to_table(delta_table_writer, record_batches) + .await + } + + /// Write `compressed_segments` to a Delta Lake table for a time series table with `table_name`. + /// Returns an updated [`DeltaTable`] if the file was written successfully, otherwise returns + /// [`ModelarDbStorageError`]. + pub async fn write_compressed_segments_to_time_series_table( + &self, + table_name: &str, + compressed_segments: Vec, + ) -> Result { + let delta_table = self.delta_table(table_name).await?; + let delta_table_writer = self.time_series_table_writer(delta_table).await?; + self.write_record_batches_to_table(delta_table_writer, compressed_segments) + .await + } + + /// Write `record_batches` to the `delta_table_writer` and commit. Returns an updated + /// [`DeltaTable`] if all `record_batches` are written and committed successfully, otherwise it + /// rolls back all writes done using `delta_table_writer` and returns [`ModelarDbStorageError`]. + async fn write_record_batches_to_table( + &self, + mut delta_table_writer: DeltaTableWriter, + record_batches: Vec, + ) -> Result { + match delta_table_writer.write_all(&record_batches).await { + Ok(_) => delta_table_writer.commit().await, + Err(error) => { + delta_table_writer.rollback().await?; + Err(error) + } + } + } + + /// Depending on the type of the table with `table_name`, drop either the normal table metadata + /// or the time series table metadata from the Delta Lake. If the table does not exist or the + /// metadata could not be dropped, [`ModelarDbStorageError`] is returned. + pub async fn drop_table_metadata(&self, table_name: &str) -> Result<()> { + if self.is_normal_table(table_name).await? { + self.drop_normal_table_metadata(table_name).await + } else if self.is_time_series_table(table_name).await? { + self.drop_time_series_table_metadata(table_name).await + } else { + Err(ModelarDbStorageError::InvalidArgument(format!( + "Table with name '{table_name}' does not exist." + ))) + } + } + + /// Drop the metadata for the normal table with `table_name` from the `normal_table_metadata` + /// table in the Delta Lake. If the metadata could not be dropped, [`ModelarDbStorageError`] is + /// returned. + async fn drop_normal_table_metadata(&self, table_name: &str) -> Result<()> { + let delta_ops = self.metadata_delta_ops("normal_table_metadata").await?; + + delta_ops + .delete() + .with_predicate(col("table_name").eq(lit(table_name))) + .await?; + + Ok(()) + } + + /// Drop the metadata for the time series table with `table_name` from the Delta Lake. This + /// includes deleting a row from the `time_series_table_metadata` table and deleting a row from + /// the `time_series_table_field_columns` table for each field column. If the metadata could not + /// be dropped, [`ModelarDbStorageError`] is returned. + async fn drop_time_series_table_metadata(&self, table_name: &str) -> Result<()> { + // Delete the table metadata from the time_series_table_metadata table. + self.metadata_delta_ops("time_series_table_metadata") + .await? + .delete() + .with_predicate(col("table_name").eq(lit(table_name))) + .await?; + + // Delete the column metadata from the time_series_table_field_columns table. + self.metadata_delta_ops("time_series_table_field_columns") + .await? + .delete() + .with_predicate(col("table_name").eq(lit(table_name))) + .await?; + + Ok(()) + } + + /// Return the [`TimeSeriesTableMetadata`] of each time series table currently in the metadata + /// Delta Lake. If the [`TimeSeriesTableMetadata`] cannot be retrieved, + /// [`ModelarDbStorageError`] is returned. + pub async fn time_series_table_metadata(&self) -> Result>> { + let sql = "SELECT table_name, query_schema FROM metadata.time_series_table_metadata"; + let batch = sql_and_concat(&self.session_context, sql).await?; + + let mut time_series_table_metadata: Vec> = vec![]; + let table_name_array = modelardb_types::array!(batch, 0, StringArray); + let query_schema_bytes_array = modelardb_types::array!(batch, 1, BinaryArray); + + for row_index in 0..batch.num_rows() { + let table_name = table_name_array.value(row_index); + let query_schema_bytes = query_schema_bytes_array.value(row_index); + + let metadata = self + .time_series_table_metadata_row_to_time_series_table_metadata( + table_name, + query_schema_bytes, + ) + .await?; + + time_series_table_metadata.push(Arc::new(metadata)) + } + + Ok(time_series_table_metadata) + } + + /// Return the [`TimeSeriesTableMetadata`] for the time series table with `table_name` in the + /// Delta Lake. If the [`TimeSeriesTableMetadata`] cannot be retrieved, + /// [`ModelarDbStorageError`] is returned. + pub async fn time_series_table_metadata_for_time_series_table( + &self, + table_name: &str, + ) -> Result { + let sql = format!( + "SELECT table_name, query_schema FROM metadata.time_series_table_metadata WHERE table_name = '{table_name}'" + ); + let batch = sql_and_concat(&self.session_context, &sql).await?; + + if batch.num_rows() == 0 { + return Err(ModelarDbStorageError::InvalidArgument(format!( + "No metadata for time series table named '{table_name}'." + ))); + } + + let table_name_array = modelardb_types::array!(batch, 0, StringArray); + let query_schema_bytes_array = modelardb_types::array!(batch, 1, BinaryArray); + + let table_name = table_name_array.value(0); + let query_schema_bytes = query_schema_bytes_array.value(0); + + self.time_series_table_metadata_row_to_time_series_table_metadata( + table_name, + query_schema_bytes, + ) + .await + } + + /// Return [`TimeSeriesTableMetadata`] for the time series table with `table_name` if it exists, + /// is registered with Apache DataFusion, and is a time series table. + pub async fn time_series_table_metadata_for_registered_time_series_table( + &self, + table_name: &str, + ) -> Option> { + let table_provider = self.session_context.table_provider(table_name).await.ok()?; + crate::maybe_table_provider_to_time_series_table_metadata(table_provider) + } + + /// Convert a row from the table "time_series_table_metadata" to an instance of + /// [`TimeSeriesTableMetadata`]. Returns [`ModelarDbStorageError`] if a time_series table with + /// `table_name` does not exist or the bytes in `query_schema_bytes` are not a valid schema. + async fn time_series_table_metadata_row_to_time_series_table_metadata( + &self, + table_name: &str, + query_schema_bytes: &[u8], + ) -> Result { + let query_schema = try_convert_bytes_to_schema(query_schema_bytes.into())?; + + let error_bounds = self + .error_bounds(table_name, query_schema.fields().len()) + .await?; + + let df_query_schema = query_schema.clone().to_dfschema()?; + let generated_columns = self.generated_columns(table_name, &df_query_schema).await?; + + TimeSeriesTableMetadata::try_new( + table_name.to_owned(), + Arc::new(query_schema), + error_bounds, + generated_columns, + ) + .map_err(|error| error.into()) + } + + /// Return the error bounds for the columns in the time series table with `table_name`. If a + /// time series table with `table_name` does not exist, [`ModelarDbStorageError`] is returned. + async fn error_bounds( + &self, + table_name: &str, + query_schema_columns: usize, + ) -> Result> { + let sql = format!( + "SELECT column_index, error_bound_value, error_bound_is_relative + FROM metadata.time_series_table_field_columns + WHERE table_name = '{table_name}' + ORDER BY column_index" + ); + let batch = sql_and_concat(&self.session_context, &sql).await?; + + let mut column_to_error_bound = vec![ErrorBound::Lossless; query_schema_columns]; + + let column_index_array = modelardb_types::array!(batch, 0, Int16Array); + let error_bound_value_array = modelardb_types::array!(batch, 1, Float32Array); + let error_bound_is_relative_array = modelardb_types::array!(batch, 2, BooleanArray); + + for row_index in 0..batch.num_rows() { + let error_bound_index = column_index_array.value(row_index); + let error_bound_value = error_bound_value_array.value(row_index); + let error_bound_is_relative = error_bound_is_relative_array.value(row_index); + + if error_bound_value != 0.0 { + let error_bound = if error_bound_is_relative { + ErrorBound::try_new_relative(error_bound_value) + } else { + ErrorBound::try_new_absolute(error_bound_value) + }?; + + column_to_error_bound[error_bound_index as usize] = error_bound; + } + } + + Ok(column_to_error_bound) + } + + /// Return the generated columns for the time series table with `table_name` and `df_schema`. If + /// a time series table with `table_name` does not exist, [`ModelarDbStorageError`] is returned. + async fn generated_columns( + &self, + table_name: &str, + df_schema: &DFSchema, + ) -> Result>> { + let sql = format!( + "SELECT column_index, generated_column_expr + FROM metadata.time_series_table_field_columns + WHERE table_name = '{table_name}' + ORDER BY column_index" + ); + let batch = sql_and_concat(&self.session_context, &sql).await?; + + let mut generated_columns = vec![None; df_schema.fields().len()]; + + let column_index_array = modelardb_types::array!(batch, 0, Int16Array); + let generated_column_expr_array = modelardb_types::array!(batch, 1, BinaryArray); + + for row_index in 0..batch.num_rows() { + let generated_column_index = column_index_array.value(row_index); + let expr_bytes = generated_column_expr_array.value(row_index); + + // If generated_column_expr is null, it is saved as empty bytes in the column values. + if !expr_bytes.is_empty() { + let expr = Expr::from_bytes(expr_bytes)?; + let generated_column = GeneratedColumn::try_from_expr(expr, df_schema)?; + + generated_columns[generated_column_index as usize] = Some(generated_column); + } + } + + Ok(generated_columns) + } +} + +/// Functionality for transactionally writing [`RecordBatches`](RecordBatch) to a Delta table stored +/// in an object store. +pub struct DeltaTableWriter { + /// Delta table that all of the record batches will be written to. + delta_table: DeltaTable, + /// Checker that ensures all of the record batches match the table. + delta_data_checker: DeltaDataChecker, + /// Write operation that will be committed to the Delta table. + delta_operation: DeltaOperation, + /// Unique identifier for this write operation to the Delta table. + operation_id: Uuid, + /// Writes record batches to the Delta table as Apache Parquet files. + delta_writer: DeltaWriter, +} + +impl DeltaTableWriter { + /// Create a new [`DeltaTableWriter`]. Returns a [`ModelarDbStorageError`] if the state of the + /// Delta table cannot be loaded from `delta_table`. + pub fn try_new( + delta_table: DeltaTable, + partition_columns: Vec, + writer_properties: WriterProperties, + ) -> Result { + // Checker for if record batches match the table’s invariants, constraints, and nullability. + let snapshot = delta_table.snapshot()?; + let delta_data_checker = DeltaDataChecker::new(snapshot); + + // Operation that will be committed. + let delta_operation = DeltaOperation::Write { + mode: SaveMode::Append, + partition_by: if partition_columns.is_empty() { + None + } else { + Some(partition_columns.clone()) + }, + predicate: None, + }; + + // A UUID version 4 is used as the operation id to match the existing Operation trait in the + // deltalake crate as it is pub(trait) and thus cannot be used directly in DeltaTableWriter. + let operation_id = Uuid::new_v4(); + + // Writer that will write the record batches. + let object_store = delta_table.log_store().object_store(Some(operation_id)); + let table_schema: Arc = TableProvider::schema(&delta_table); + let num_indexed_cols = table_schema.fields.len() as i32; + let writer_config = WriterConfig::new( + table_schema, + partition_columns, + Some(writer_properties), + None, + None, + num_indexed_cols, + None, + ); + let delta_writer = DeltaWriter::new(object_store, writer_config); + + Ok(Self { + delta_table, + delta_data_checker, + delta_operation, + operation_id, + delta_writer, + }) + } + + /// Write `record_batch` to the Delta table. Returns a [`ModelarDbStorageError`] if the + /// [`RecordBatches`](RecordBatch) does not match the schema of the Delta table or if the + /// writing fails. + pub async fn write(&mut self, record_batch: &RecordBatch) -> Result<()> { + self.delta_data_checker.check_batch(record_batch).await?; + self.delta_writer.write(record_batch).await?; + Ok(()) + } + + /// Write all `record_batches` to the Delta table. Returns a [`ModelarDbStorageError`] if one of + /// the [`RecordBatches`](RecordBatch) does not match the schema of the Delta table or if the + /// writing fails. + pub async fn write_all(&mut self, record_batches: &[RecordBatch]) -> Result<()> { + for record_batch in record_batches { + self.write(record_batch).await?; + } + Ok(()) + } + + /// Consume the [`DeltaTableWriter`], finish the writing, and commit the files that have been + /// written to the log. If an error occurs before the commit is finished, the already written + /// files are deleted if possible. Returns a [`ModelarDbStorageError`] if an error occurs when + /// finishing the writing, committing the files that have been written, deleting the written + /// files, or updating the [`DeltaTable`]. + pub async fn commit(mut self) -> Result { + // Write the remaining buffered files. + let added_files = self.delta_writer.close().await?; + + // Clone added_files in case of rollback. + let actions = added_files + .clone() + .into_iter() + .map(Action::Add) + .collect::>(); + + // Prepare all inputs to the commit. + let object_store = self.delta_table.object_store(); + let commit_properties = CommitProperties::default(); + let table_data = match self.delta_table.snapshot() { + Ok(table_data) => table_data, + Err(delta_table_error) => { + delete_added_files(&object_store, added_files).await?; + return Err(ModelarDbStorageError::DeltaLake(delta_table_error)); + } + }; + let log_store = self.delta_table.log_store(); + + // Construct the commit to be written. + let commit_builder = CommitBuilder::from(commit_properties) + .with_actions(actions) + .with_operation_id(self.operation_id) + .build(Some(table_data), log_store, self.delta_operation); + + // Write the commit to the Delta table. + let _finalized_commit = match commit_builder.await { + Ok(finalized_commit) => finalized_commit, + Err(delta_table_error) => { + delete_added_files(&object_store, added_files).await?; + return Err(ModelarDbStorageError::DeltaLake(delta_table_error)); + } + }; + + // Return Delta table with the commit. + self.delta_table.load().await?; + Ok(self.delta_table) + } + + /// Consume the [`DeltaTableWriter`], abort the writing, and delete all of the files that have + /// already been written. Returns a [`ModelarDbStorageError`] if an error occurs when aborting + /// the writing or deleting the files that have already been written. Rollback is not called + /// automatically as drop() is not async and async_drop() is not yet a stable API. + pub async fn rollback(self) -> Result { + let object_store = self.delta_table.object_store(); + let added_files = self.delta_writer.close().await?; + delete_added_files(&object_store, added_files).await?; + Ok(self.delta_table) + } +} + +/// Delete the `added_files` from `object_store`. Returns a [`ModelarDbStorageError`] if a file +/// could not be deleted. It is a function instead of a method on [`DeltaTableWriter`] so it can be +/// called by [`DeltaTableWriter`] after the [`DeltaWriter`] is closed without lifetime issues. +async fn delete_added_files(object_store: &dyn ObjectStore, added_files: Vec) -> Result<()> { + for add_file in added_files { + let path: Path = Path::from(add_file.path); + object_store.delete(&path).await?; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + use arrow::datatypes::{ArrowPrimitiveType, Field}; + use datafusion::arrow::datatypes::DataType; + use datafusion::common::ScalarValue::Int64; + use datafusion::logical_expr::Expr::Literal; + use modelardb_test::table as test; + use modelardb_types::types::ArrowTimestamp; + use tempfile::TempDir; + + // Tests for DataFolder. + #[tokio::test] + async fn test_create_metadata_data_folder_tables() { + let temp_dir = tempfile::tempdir().unwrap(); + let data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); + + assert!( + data_folder + .session_context + .sql("SELECT table_name FROM metadata.normal_table_metadata") + .await + .is_ok() + ); + + assert!( + data_folder + .session_context + .sql("SELECT table_name, query_schema FROM metadata.time_series_table_metadata") + .await + .is_ok() + ); + + assert!(data_folder + .session_context + .sql("SELECT table_name, column_name, column_index, error_bound_value, error_bound_is_relative, \ + generated_column_expr FROM metadata.time_series_table_field_columns") + .await + .is_ok()); + } + + #[tokio::test] + async fn test_normal_table_is_normal_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + assert!(data_folder.is_normal_table("normal_table_1").await.unwrap()); + } + + #[tokio::test] + async fn test_time_series_table_is_not_normal_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + assert!( + !data_folder + .is_normal_table(test::TIME_SERIES_TABLE_NAME) + .await + .unwrap() + ); + } + + #[tokio::test] + async fn test_time_series_table_is_time_series_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + assert!( + data_folder + .is_time_series_table(test::TIME_SERIES_TABLE_NAME) + .await + .unwrap() + ); + } + + #[tokio::test] + async fn test_normal_table_is_not_time_series_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + assert!( + !data_folder + .is_time_series_table("normal_table_1") + .await + .unwrap() + ); + } + + #[tokio::test] + async fn test_table_names() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + + let time_series_table_metadata = test::time_series_table_metadata(); + data_folder + .save_time_series_table_metadata(&time_series_table_metadata) + .await + .unwrap(); + + let table_names = data_folder.table_names().await.unwrap(); + assert_eq!( + table_names, + vec![ + "normal_table_2", + "normal_table_1", + test::TIME_SERIES_TABLE_NAME + ] + ); + } + + #[tokio::test] + async fn test_normal_table_names() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + + let normal_table_names = data_folder.normal_table_names().await.unwrap(); + assert_eq!(normal_table_names, vec!["normal_table_2", "normal_table_1"]); + } + + #[tokio::test] + async fn test_time_series_table_names() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + let time_series_table_names = data_folder.time_series_table_names().await.unwrap(); + assert_eq!(time_series_table_names, vec![test::TIME_SERIES_TABLE_NAME]); + } + + #[tokio::test] + async fn test_save_normal_table_metadata() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + + // Retrieve the normal table from the Delta Lake. + let sql = "SELECT table_name FROM metadata.normal_table_metadata ORDER BY table_name"; + let batch = sql_and_concat(&data_folder.session_context, sql) + .await + .unwrap(); + + assert_eq!( + **batch.column(0), + StringArray::from(vec!["normal_table_1", "normal_table_2"]) + ); + } + + #[tokio::test] + async fn test_save_time_series_table_metadata() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + // Check that a row has been added to the time_series_table_metadata table. + let sql = "SELECT table_name, query_schema FROM metadata.time_series_table_metadata"; + let batch = sql_and_concat(&data_folder.session_context, sql) + .await + .unwrap(); + + assert_eq!( + **batch.column(0), + StringArray::from(vec![test::TIME_SERIES_TABLE_NAME]) + ); + assert_eq!( + **batch.column(1), + BinaryArray::from_vec(vec![ + &try_convert_schema_to_bytes(&test::time_series_table_metadata().query_schema) + .unwrap() + ]) + ); + + // Check that a row has been added to the time_series_table_field_columns table for each field column. + let sql = "SELECT table_name, column_name, column_index, error_bound_value, error_bound_is_relative, \ + generated_column_expr FROM metadata.time_series_table_field_columns ORDER BY column_name"; + let batch = sql_and_concat(&data_folder.session_context, sql) + .await + .unwrap(); + + assert_eq!( + **batch.column(0), + StringArray::from(vec![ + test::TIME_SERIES_TABLE_NAME, + test::TIME_SERIES_TABLE_NAME + ]) + ); + assert_eq!( + **batch.column(1), + StringArray::from(vec!["field_1", "field_2"]) + ); + assert_eq!(**batch.column(2), Int16Array::from(vec![1, 2])); + assert_eq!(**batch.column(3), Float32Array::from(vec![1.0, 5.0])); + assert_eq!(**batch.column(4), BooleanArray::from(vec![false, true])); + assert_eq!( + **batch.column(5), + BinaryArray::from_opt_vec(vec![None, None]) + ); + } + + #[tokio::test] + async fn test_drop_normal_table_metadata() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + + data_folder + .drop_table_metadata("normal_table_2") + .await + .unwrap(); + + // Verify that normal_table_2 was deleted from the normal_table_metadata table. + let sql = "SELECT table_name FROM metadata.normal_table_metadata"; + let batch = sql_and_concat(&data_folder.session_context, sql) + .await + .unwrap(); + + assert_eq!(**batch.column(0), StringArray::from(vec!["normal_table_1"])); + } + + #[tokio::test] + async fn test_drop_time_series_table_metadata() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + data_folder + .drop_table_metadata(test::TIME_SERIES_TABLE_NAME) + .await + .unwrap(); + + // Verify that the time series table was deleted from the time_series_table_metadata table. + let sql = "SELECT table_name FROM metadata.time_series_table_metadata"; + let batch = sql_and_concat(&data_folder.session_context, sql) + .await + .unwrap(); + + assert_eq!(batch.num_rows(), 0); + + // Verify that the field columns were deleted from the time_series_table_field_columns table. + let sql = "SELECT table_name FROM metadata.time_series_table_field_columns"; + let batch = sql_and_concat(&data_folder.session_context, sql) + .await + .unwrap(); + + assert_eq!(batch.num_rows(), 0); + } + + #[tokio::test] + async fn test_drop_table_metadata_for_missing_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_normal_tables().await; + + assert!( + data_folder + .drop_table_metadata("missing_table") + .await + .is_err() + ); + } + + async fn create_data_folder_and_save_normal_tables() -> (TempDir, DataFolder) { + let temp_dir = tempfile::tempdir().unwrap(); + let data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); + + data_folder + .save_normal_table_metadata("normal_table_1") + .await + .unwrap(); + + data_folder + .save_normal_table_metadata("normal_table_2") + .await + .unwrap(); + + (temp_dir, data_folder) + } + + #[tokio::test] + async fn test_time_series_table_metadata() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + let time_series_table_metadata = data_folder.time_series_table_metadata().await.unwrap(); + + assert_eq!( + time_series_table_metadata.first().unwrap().name, + test::time_series_table_metadata().name, + ); + } + + #[tokio::test] + async fn test_time_series_table_metadata_for_existing_time_series_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + let time_series_table_metadata = data_folder + .time_series_table_metadata_for_time_series_table(test::TIME_SERIES_TABLE_NAME) + .await + .unwrap(); + + assert_eq!( + time_series_table_metadata.name, + test::time_series_table_metadata().name, + ); + } + + #[tokio::test] + async fn test_time_series_table_metadata_for_missing_time_series_table() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + let time_series_table_metadata = data_folder + .time_series_table_metadata_for_time_series_table("missing_table") + .await; + + assert!(time_series_table_metadata.is_err()); + } + + #[tokio::test] + async fn test_error_bound() { + let (_temp_dir, data_folder) = create_data_folder_and_save_time_series_table().await; + + let error_bounds = data_folder + .error_bounds(test::TIME_SERIES_TABLE_NAME, 4) + .await + .unwrap(); + + let values: Vec = error_bounds + .iter() + .map(|error_bound| match error_bound { + ErrorBound::Absolute(value) => *value, + ErrorBound::Relative(value) => *value, + ErrorBound::Lossless => 0.0, + }) + .collect(); + + assert_eq!(values, &[0.0, 1.0, 5.0, 0.0]); + } + + #[tokio::test] + async fn test_generated_columns() { + let temp_dir = tempfile::tempdir().unwrap(); + let data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); + + let query_schema = Arc::new(Schema::new(vec![ + Field::new("timestamp", ArrowTimestamp::DATA_TYPE, false), + Field::new("field_1", ArrowValue::DATA_TYPE, false), + Field::new("field_2", ArrowValue::DATA_TYPE, false), + Field::new("tag", DataType::Utf8, false), + Field::new("generated_column_1", ArrowValue::DATA_TYPE, false), + Field::new("generated_column_2", ArrowValue::DATA_TYPE, false), + ])); + + let error_bounds = vec![ErrorBound::Lossless; query_schema.fields.len()]; + + let plus_one_column = Some(GeneratedColumn { + expr: col("field_1") + Literal(Int64(Some(1))), + source_columns: vec![1], + }); + + let addition_column = Some(GeneratedColumn { + expr: col("field_1") + col("field_2"), + source_columns: vec![1, 2], + }); + + let expected_generated_columns = + vec![None, None, None, None, plus_one_column, addition_column]; + + let time_series_table_metadata = TimeSeriesTableMetadata::try_new( + "generated_columns_table".to_owned(), + query_schema, + error_bounds, + expected_generated_columns.clone(), + ) + .unwrap(); + + data_folder + .save_time_series_table_metadata(&time_series_table_metadata) + .await + .unwrap(); + + let df_schema = time_series_table_metadata + .query_schema + .to_dfschema() + .unwrap(); + let generated_columns = data_folder + .generated_columns("generated_columns_table", &df_schema) + .await + .unwrap(); + + assert_eq!( + generated_columns[0..generated_columns.len() - 1], + expected_generated_columns[0..expected_generated_columns.len() - 1] + ); + + // Sort the source columns to ensure the order is consistent. + let mut last_generated_column = generated_columns.last().unwrap().clone().unwrap(); + last_generated_column.source_columns.sort(); + + assert_eq!( + &Some(last_generated_column), + expected_generated_columns.last().unwrap() + ); + } + + async fn create_data_folder_and_save_time_series_table() -> (TempDir, DataFolder) { + let temp_dir = tempfile::tempdir().unwrap(); + let data_folder = DataFolder::open_local(temp_dir.path()).await.unwrap(); + + // Save a time series table to the Delta Lake. + let time_series_table_metadata = test::time_series_table_metadata(); + data_folder + .save_time_series_table_metadata(&time_series_table_metadata) + .await + .unwrap(); + + (temp_dir, data_folder) + } +} diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs deleted file mode 100644 index c80893e11..000000000 --- a/crates/modelardb_storage/src/delta_lake.rs +++ /dev/null @@ -1,724 +0,0 @@ -/* Copyright 2024 The ModelarDB Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//! Implementation of the type used to interact with local and remote storage through a Delta Lake. - -use std::collections::HashMap; -use std::fs; -use std::path::Path as StdPath; -use std::sync::Arc; - -use arrow::array::{ArrayRef, RecordBatch}; -use arrow::datatypes::{DataType, Field, Schema}; -use chrono::TimeDelta; -use dashmap::DashMap; -use datafusion::catalog::TableProvider; -use datafusion::parquet::file::properties::WriterProperties; -use datafusion::parquet::format::SortingColumn; -use deltalake::delta_datafusion::DeltaDataChecker; -use deltalake::kernel::transaction::{CommitBuilder, CommitProperties}; -use deltalake::kernel::{Action, Add, StructField}; -use deltalake::operations::create::CreateBuilder; -use deltalake::operations::write::writer::{DeltaWriter, WriterConfig}; -use deltalake::protocol::{DeltaOperation, SaveMode}; -use deltalake::{DeltaOps, DeltaTable, DeltaTableError}; -use futures::{StreamExt, TryStreamExt}; -use modelardb_types::flight::protocol; -use modelardb_types::schemas::{COMPRESSED_SCHEMA, FIELD_COLUMN}; -use modelardb_types::types::{MAX_RETENTION_PERIOD_IN_SECONDS, TimeSeriesTableMetadata}; -use object_store::ObjectStore; -use object_store::aws::AmazonS3Builder; -use object_store::local::LocalFileSystem; -use object_store::memory::InMemory; -use object_store::path::Path; -use url::Url; -use uuid::Uuid; - -use crate::error::{ModelarDbStorageError, Result}; -use crate::{METADATA_FOLDER, TABLE_FOLDER, apache_parquet_writer_properties}; - -/// Functionality for managing Delta Lake tables in a local folder or an object store. -pub struct DeltaLake { - /// URL to access the root of the Delta Lake. - location: String, - /// Storage options required to access Delta Lake. - storage_options: HashMap, - /// [`ObjectStore`] to access the root of the Delta Lake. - object_store: Arc, - /// Cache of Delta tables to avoid opening the same table multiple times. - delta_table_cache: DashMap, -} - -impl DeltaLake { - /// Create a new [`DeltaLake`] that manages the Delta tables at `local_url`. If `local_url` has - /// the schema `file` or no schema, the Delta tables are managed in a local data folder. If - /// `local_url` has the schema `memory`, the Delta tables are managed in memory. Return - /// [`ModelarDbStorageError`] if `local_url` cannot be parsed. - pub fn try_from_local_url(local_url: &str) -> Result { - match local_url.split_once("://") { - None => Self::try_from_local_path(StdPath::new(local_url)), - Some(("file", local_path)) => Self::try_from_local_path(StdPath::new(local_path)), - Some(("memory", _)) => Ok(Self::new_in_memory()), - _ => Err(ModelarDbStorageError::InvalidArgument(format!( - "{local_url} is not a valid local URL." - ))), - } - } - - /// Create a new [`DeltaLake`] that manages the Delta tables in memory. - pub fn new_in_memory() -> Self { - Self { - location: "memory:///modelardb".to_owned(), - storage_options: HashMap::new(), - object_store: Arc::new(InMemory::new()), - delta_table_cache: DashMap::new(), - } - } - - /// Create a new [`DeltaLake`] that manages the Delta tables in `data_folder_path`. Returns a - /// [`ModelarDbStorageError`] if `data_folder_path` does not exist and could not be created. - pub fn try_from_local_path(data_folder_path: &StdPath) -> Result { - // Ensure the directories in the path exists as LocalFileSystem otherwise returns an error. - fs::create_dir_all(data_folder_path) - .map_err(|error| DeltaTableError::generic(error.to_string()))?; - - // Use with_automatic_cleanup to ensure empty directories are deleted automatically. - let object_store = LocalFileSystem::new_with_prefix(data_folder_path) - .map_err(|error| DeltaTableError::generic(error.to_string()))? - .with_automatic_cleanup(true); - - let location = data_folder_path - .to_str() - .ok_or_else(|| DeltaTableError::generic("Local data folder path is not UTF-8."))? - .to_owned(); - - Ok(Self { - location, - storage_options: HashMap::new(), - object_store: Arc::new(object_store), - delta_table_cache: DashMap::new(), - }) - } - - /// Create a new [`DeltaLake`] that manages Delta tables in the remote object store given by - /// `storage_configuration`. Returns [`ModelarDbStorageError`] if a connection to the specified - /// object store could not be created. - pub fn try_remote_from_storage_configuration( - storage_configuration: protocol::manager_metadata::StorageConfiguration, - ) -> Result { - match storage_configuration { - protocol::manager_metadata::StorageConfiguration::S3Configuration(s3_configuration) => { - // Register the S3 storage handlers to allow the use of Amazon S3 object stores. - // This is required at runtime to initialize the S3 storage implementation in the - // deltalake_aws storage subcrate. - deltalake::aws::register_handlers(None); - - Self::try_from_s3_configuration( - s3_configuration.endpoint, - s3_configuration.bucket_name, - s3_configuration.access_key_id, - s3_configuration.secret_access_key, - ) - } - protocol::manager_metadata::StorageConfiguration::AzureConfiguration( - azure_configuration, - ) => Self::try_from_azure_configuration( - azure_configuration.account_name, - azure_configuration.access_key, - azure_configuration.container_name, - ), - } - } - - /// Create a new [`DeltaLake`] that manages the Delta tables in an object store with an - /// S3-compatible API. Returns a [`ModelarDbStorageError`] if a connection to the object store - /// could not be made. - pub fn try_from_s3_configuration( - endpoint: String, - bucket_name: String, - access_key_id: String, - secret_access_key: String, - ) -> Result { - let location = format!("s3://{bucket_name}"); - - // TODO: Determine if it is safe to use AWS_S3_ALLOW_UNSAFE_RENAME. - let storage_options = HashMap::from([ - ("aws_access_key_id".to_owned(), access_key_id), - ("aws_secret_access_key".to_owned(), secret_access_key), - ("aws_endpoint_url".to_owned(), endpoint), - ("aws_bucket_name".to_owned(), bucket_name), - ("aws_s3_allow_unsafe_rename".to_owned(), "true".to_owned()), - ]); - - let url = Url::parse(&location) - .map_err(|error| ModelarDbStorageError::InvalidArgument(error.to_string()))?; - - // Build the Amazon S3 object store with the given storage options manually to allow http. - let object_store = storage_options - .iter() - .fold( - AmazonS3Builder::new() - .with_url(url.to_string()) - .with_allow_http(true), - |builder, (key, value)| match key.parse() { - Ok(k) => builder.with_config(k, value), - Err(_) => builder, - }, - ) - .build()?; - - Ok(DeltaLake { - location, - storage_options, - object_store: Arc::new(object_store), - delta_table_cache: DashMap::new(), - }) - } - - /// Create a new [`DeltaLake`] that manages the Delta tables in an object store with an - /// Azure-compatible API. Returns a [`ModelarDbStorageError`] if a connection to the object - /// store could not be made. - pub fn try_from_azure_configuration( - account_name: String, - access_key: String, - container_name: String, - ) -> Result { - let location = format!("az://{container_name}"); - - // TODO: Needs to be tested. - let storage_options = HashMap::from([ - ("azure_storage_account_name".to_owned(), account_name), - ("azure_storage_account_key".to_owned(), access_key), - ("azure_container_name".to_owned(), container_name), - ]); - let url = Url::parse(&location) - .map_err(|error| ModelarDbStorageError::InvalidArgument(error.to_string()))?; - let (object_store, _path) = object_store::parse_url_opts(&url, &storage_options)?; - - Ok(DeltaLake { - location, - storage_options, - object_store: Arc::new(object_store), - delta_table_cache: DashMap::new(), - }) - } - - /// Return an [`ObjectStore`] to access the root of the Delta Lake. - pub fn object_store(&self) -> Arc { - self.object_store.clone() - } - - /// Return a [`DeltaTable`] for manipulating the metadata table with `table_name` in the - /// Delta Lake, or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be - /// established or the table does not exist. - pub async fn metadata_delta_table(&self, table_name: &str) -> Result { - let table_path = self.location_of_metadata_table(table_name); - self.delta_table_from_path(&table_path).await - } - - /// Return a [`DeltaTable`] for manipulating the table with `table_name` in the Delta Lake, or a - /// [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or the - /// table does not exist. - pub async fn delta_table(&self, table_name: &str) -> Result { - let table_path = self.location_of_compressed_table(table_name); - self.delta_table_from_path(&table_path).await - } - - /// Return a [`DeltaOps`] for manipulating the metadata table with `table_name` in the Delta - /// Lake, or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established - /// or the table does not exist. - pub async fn metadata_delta_ops(&self, table_name: &str) -> Result { - let table_path = self.location_of_metadata_table(table_name); - self.delta_table_from_path(&table_path) - .await - .map(Into::into) - } - - /// Return a [`DeltaOps`] for manipulating the table with `table_name` in the Delta Lake, or a - /// [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or the - /// table does not exist. - pub async fn delta_ops(&self, table_name: &str) -> Result { - let table_path = self.location_of_compressed_table(table_name); - self.delta_table_from_path(&table_path) - .await - .map(Into::into) - } - - /// Return a [`DeltaTable`] for manipulating the table at `table_path` in the Delta Lake, or a - /// [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or the - /// table does not exist. - async fn delta_table_from_path(&self, table_path: &str) -> Result { - // Use the cache if possible and load to get the latest table data. - if let Some(mut delta_table) = self.delta_table_cache.get_mut(table_path) { - delta_table.load().await?; - Ok(delta_table.clone()) - } else { - // If the table is not in the cache, open it and add it to the cache before returning. - let delta_table = deltalake::open_table_with_storage_options( - &table_path, - self.storage_options.clone(), - ) - .await?; - - self.delta_table_cache - .insert(table_path.to_owned(), delta_table.clone()); - - Ok(delta_table) - } - } - - /// Return a [`DeltaTableWriter`] for writing to the time series table with `delta_table` in the - /// Delta Lake, or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be - /// established or the table does not exist. - pub async fn time_series_table_writer( - &self, - delta_table: DeltaTable, - ) -> Result { - let partition_columns = vec![FIELD_COLUMN.to_owned()]; - - // Specify that the file must be sorted by the tag columns and then by start_time. - let base_compressed_schema_len = COMPRESSED_SCHEMA.0.fields().len(); - let compressed_schema_len = TableProvider::schema(&delta_table).fields().len(); - let sorting_columns_len = (compressed_schema_len - base_compressed_schema_len) + 1; - let mut sorting_columns = Vec::with_capacity(sorting_columns_len); - - // Compressed segments have the tag columns at the end of the schema. - for tag_column_index in base_compressed_schema_len..compressed_schema_len { - sorting_columns.push(SortingColumn::new(tag_column_index as i32, false, false)); - } - - // Compressed segments store the first timestamp in the second column. - sorting_columns.push(SortingColumn::new(1, false, false)); - - let writer_properties = apache_parquet_writer_properties(Some(sorting_columns)); - DeltaTableWriter::try_new(delta_table, partition_columns, writer_properties) - } - - /// Return a [`DeltaTableWriter`] for writing to the table with `delta_table` in the Delta Lake, - /// or a [`ModelarDbStorageError`] if a connection to the Delta Lake cannot be established or - /// the table does not exist. - pub async fn normal_or_metadata_table_writer( - &self, - delta_table: DeltaTable, - ) -> Result { - let writer_properties = apache_parquet_writer_properties(None); - DeltaTableWriter::try_new(delta_table, vec![], writer_properties) - } - - /// Create a Delta Lake table for a metadata table with `table_name` and `schema` if it does not - /// already exist. If the metadata table could not be created, [`ModelarDbStorageError`] is - /// returned. An error is not returned if the metadata table already exists. - pub async fn create_metadata_table( - &self, - table_name: &str, - schema: &Schema, - ) -> Result { - self.create_table( - table_name, - schema, - &[], - self.location_of_metadata_table(table_name), - SaveMode::Ignore, - ) - .await - } - - /// Create a Delta Lake table for a normal table with `table_name` and `schema` if it does not - /// already exist. If the normal table could not be created, e.g., because it already exists, - /// [`ModelarDbStorageError`] is returned. - pub async fn create_normal_table( - &self, - table_name: &str, - schema: &Schema, - ) -> Result { - self.create_table( - table_name, - schema, - &[], - self.location_of_compressed_table(table_name), - SaveMode::ErrorIfExists, - ) - .await - } - - /// Create a Delta Lake table for a time series table with `time_series_table_metadata` if it - /// does not already exist. Returns [`DeltaTable`] if the table could be created and - /// [`ModelarDbStorageError`] if it could not. - pub async fn create_time_series_table( - &self, - time_series_table_metadata: &TimeSeriesTableMetadata, - ) -> Result { - self.create_table( - &time_series_table_metadata.name, - &time_series_table_metadata.compressed_schema, - &[FIELD_COLUMN.to_owned()], - self.location_of_compressed_table(&time_series_table_metadata.name), - SaveMode::ErrorIfExists, - ) - .await - } - - /// Create a Delta Lake table with `table_name`, `schema`, and `partition_columns` if it does - /// not already exist. Returns [`DeltaTable`] if the table could be created and - /// [`ModelarDbStorageError`] if it could not. - async fn create_table( - &self, - table_name: &str, - schema: &Schema, - partition_columns: &[String], - location: String, - save_mode: SaveMode, - ) -> Result { - let mut columns: Vec = Vec::with_capacity(schema.fields().len()); - for field in schema.fields() { - let field: &Field = field; - - // Delta Lake does not support unsigned integers. Thus tables containing the Apache - // Arrow types UInt8, UInt16, UInt32, and UInt64 must currently be rejected. - match field.data_type() { - DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => { - Err(DeltaTableError::SchemaMismatch { - msg: "Unsigned integers are not supported.".to_owned(), - })? - } - _ => {} // All possible cases must be handled. - } - - let struct_field: StructField = field.try_into()?; - columns.push(struct_field); - } - - let delta_table = CreateBuilder::new() - .with_storage_options(self.storage_options.clone()) - .with_table_name(table_name) - .with_location(location.clone()) - .with_columns(columns) - .with_partition_columns(partition_columns) - .with_save_mode(save_mode) - .await?; - - // If the table was created successfully, add it to the cache. - self.delta_table_cache.insert(location, delta_table.clone()); - - Ok(delta_table) - } - - /// Drop the metadata Delta Lake table with `table_name` from the Delta Lake by deleting every - /// file related to the table. The table folder cannot be deleted directly since folders do not - /// exist in object stores and therefore cannot be operated upon. If the table was dropped - /// successfully, the paths to the deleted files are returned, otherwise a - /// [`ModelarDbStorageError`] is returned. - pub async fn drop_metadata_table(&self, table_name: &str) -> Result> { - let table_path = format!("{METADATA_FOLDER}/{table_name}"); - self.delete_table_files(&table_path).await - } - - /// Drop the Delta Lake table with `table_name` from the Delta Lake by deleting every file - /// related to the table. The table folder cannot be deleted directly since folders do not exist - /// in object stores and therefore cannot be operated upon. If the table was dropped - /// successfully, the paths to the deleted files are returned, otherwise a - /// [`ModelarDbStorageError`] is returned. - pub async fn drop_table(&self, table_name: &str) -> Result> { - let table_path = format!("{TABLE_FOLDER}/{table_name}"); - self.delete_table_files(&table_path).await - } - - /// Delete all files in the folder at `table_path` using bulk operations if available. If the - /// files were deleted successfully, the paths to the deleted files are returned. - async fn delete_table_files(&self, table_path: &str) -> Result> { - let file_locations = self - .object_store - .list(Some(&Path::from(table_path))) - .map_ok(|object_meta| object_meta.location) - .boxed(); - - let deleted_paths = self - .object_store - .delete_stream(file_locations) - .try_collect::>() - .await?; - - // Remove the table from the cache. - let delta_table_path = format!("{}/{}", self.location, table_path); - self.delta_table_cache.remove(&delta_table_path); - - Ok(deleted_paths) - } - - /// Truncate the Delta Lake table with `table_name` by deleting all rows in the table. If the - /// rows could not be deleted, a [`ModelarDbStorageError`] is returned. - pub async fn truncate_table(&self, table_name: &str) -> Result<()> { - let delta_table_ops = self.delta_ops(table_name).await?; - delta_table_ops.delete().await?; - - Ok(()) - } - - /// Vacuum the Delta Lake table with `table_name` by deleting stale files that are older than - /// `maybe_retention_period_in_seconds` seconds. If a retention period is not given, the - /// default retention period of 7 days is used. If the retention period is larger than - /// [`MAX_RETENTION_PERIOD_IN_SECONDS`] seconds or the files could not be deleted, a - /// [`ModelarDbStorageError`] is returned. - pub async fn vacuum_table( - &self, - table_name: &str, - maybe_retention_period_in_seconds: Option, - ) -> Result<()> { - let delta_table_ops = self.delta_ops(table_name).await?; - - let retention_period_in_seconds = - maybe_retention_period_in_seconds.unwrap_or(60 * 60 * 24 * 7); - - let retention_period = TimeDelta::new(retention_period_in_seconds as i64, 0).ok_or( - ModelarDbStorageError::InvalidArgument(format!( - "Retention period cannot be more than {MAX_RETENTION_PERIOD_IN_SECONDS} seconds." - )), - )?; - - delta_table_ops - .vacuum() - .with_retention_period(retention_period) - .with_enforce_retention_duration(false) - .await?; - - Ok(()) - } - - /// Write `columns` to a metadata Delta Lake table with `table_name`. Returns an updated - /// [`DeltaTable`] version if the file was written successfully, otherwise returns - /// [`ModelarDbStorageError`]. - pub async fn write_columns_to_metadata_table( - &self, - table_name: &str, - columns: Vec, - ) -> Result { - let delta_table = self.metadata_delta_table(table_name).await?; - let record_batch = RecordBatch::try_new(TableProvider::schema(&delta_table), columns)?; - let delta_table_writer = self.normal_or_metadata_table_writer(delta_table).await?; - self.write_record_batches_to_table(delta_table_writer, vec![record_batch]) - .await - } - - /// Write `record_batches` to a Delta Lake table for a normal table with `table_name`. Returns - /// an updated [`DeltaTable`] version if the file was written successfully, otherwise returns - /// [`ModelarDbStorageError`]. - pub async fn write_record_batches_to_normal_table( - &self, - table_name: &str, - record_batches: Vec, - ) -> Result { - let delta_table = self.delta_table(table_name).await?; - let delta_table_writer = self.normal_or_metadata_table_writer(delta_table).await?; - self.write_record_batches_to_table(delta_table_writer, record_batches) - .await - } - - /// Write `record_batches` with segments to a Delta Lake table for a time series table with - /// `table_name`. Returns an updated [`DeltaTable`] if the file was written successfully, - /// otherwise returns [`ModelarDbStorageError`]. - pub async fn write_compressed_segments_to_time_series_table( - &self, - table_name: &str, - compressed_segments: Vec, - ) -> Result { - let delta_table = self.delta_table(table_name).await?; - let delta_table_writer = self.time_series_table_writer(delta_table).await?; - self.write_record_batches_to_table(delta_table_writer, compressed_segments) - .await - } - - /// Write `record_batches` to the `delta_table_writer` and commit. Returns an updated - /// [`DeltaTable`] if all `record_batches` are written and committed successfully, otherwise it - /// rollback all writes done using `delta_table_writer` and returns [`ModelarDbStorageError`]. - async fn write_record_batches_to_table( - &self, - mut delta_table_writer: DeltaTableWriter, - record_batches: Vec, - ) -> Result { - match delta_table_writer.write_all(&record_batches).await { - Ok(_) => delta_table_writer.commit().await, - Err(error) => { - delta_table_writer.rollback().await?; - Err(error) - } - } - } - - /// Return the location of the compressed time series or normal table with `table_name`. - fn location_of_compressed_table(&self, table_name: &str) -> String { - format!("{}/{TABLE_FOLDER}/{table_name}", self.location) - } - - /// Return the location of the metadata table with `table_name`. - fn location_of_metadata_table(&self, table_name: &str) -> String { - format!("{}/{METADATA_FOLDER}/{table_name}", self.location) - } -} - -/// Functionality for transactionally writing [`RecordBatches`](RecordBatch) to a Delta table stored -/// in an object store. -pub struct DeltaTableWriter { - /// Delta table that all of the record batches will be written to. - delta_table: DeltaTable, - /// Checker that ensures all of the record batches match the table. - delta_data_checker: DeltaDataChecker, - /// Write operation that will be committed to the Delta table. - delta_operation: DeltaOperation, - /// Unique identifier for this write operation to the Delta table. - operation_id: Uuid, - /// Writes record batches to the Delta table as Apache Parquet files. - delta_writer: DeltaWriter, -} - -impl DeltaTableWriter { - /// Create a new [`DeltaTableWriter`]. Returns a [`ModelarDbStorageError`] if the state of the - /// Delta table cannot be loaded from `delta_table`. - pub fn try_new( - delta_table: DeltaTable, - partition_columns: Vec, - writer_properties: WriterProperties, - ) -> Result { - // Checker for if record batches match the table’s invariants, constraints, and nullability. - let snapshot = delta_table.snapshot()?; - let delta_data_checker = DeltaDataChecker::new(snapshot); - - // Operation that will be committed. - let delta_operation = DeltaOperation::Write { - mode: SaveMode::Append, - partition_by: if partition_columns.is_empty() { - None - } else { - Some(partition_columns.clone()) - }, - predicate: None, - }; - - // A UUID version 4 is used as the operation id to match the existing Operation trait in the - // deltalake crate as it is pub(trait) and thus cannot be used directly in DeltaTableWriter. - let operation_id = Uuid::new_v4(); - - // Writer that will write the record batches. - let object_store = delta_table.log_store().object_store(Some(operation_id)); - let table_schema: Arc = TableProvider::schema(&delta_table); - let num_indexed_cols = table_schema.fields.len() as i32; - let writer_config = WriterConfig::new( - table_schema, - partition_columns, - Some(writer_properties), - None, - None, - num_indexed_cols, - None, - ); - let delta_writer = DeltaWriter::new(object_store, writer_config); - - Ok(Self { - delta_table, - delta_data_checker, - delta_operation, - operation_id, - delta_writer, - }) - } - - /// Write `record_batch` to the Delta table. Returns a [`ModelarDbStorageError`] if the - /// [`RecordBatches`](RecordBatch) does not match the schema of the Delta table or if the - /// writing fails. - pub async fn write(&mut self, record_batch: &RecordBatch) -> Result<()> { - self.delta_data_checker.check_batch(record_batch).await?; - self.delta_writer.write(record_batch).await?; - Ok(()) - } - - /// Write all `record_batches` to the Delta table. Returns a [`ModelarDbStorageError`] if one of - /// the [`RecordBatches`](RecordBatch) does not match the schema of the Delta table or if the - /// writing fails. - pub async fn write_all(&mut self, record_batches: &[RecordBatch]) -> Result<()> { - for record_batch in record_batches { - self.write(record_batch).await?; - } - Ok(()) - } - - /// Consume the [`DeltaTableWriter`], finish the writing, and commit the files that have been - /// written to the log. If an error occurs before the commit is finished, the already written - /// files are deleted if possible. Returns a [`ModelarDbStorageError`] if an error occurs when - /// finishing the writing, committing the files that have been written, deleting the written - /// files, or updating the [`DeltaTable`]. - pub async fn commit(mut self) -> Result { - // Write the remaining buffered files. - let added_files = self.delta_writer.close().await?; - - // Clone added_files in case of rollback. - let actions = added_files - .clone() - .into_iter() - .map(Action::Add) - .collect::>(); - - // Prepare all inputs to the commit. - let object_store = self.delta_table.object_store(); - let commit_properties = CommitProperties::default(); - let table_data = match self.delta_table.snapshot() { - Ok(table_data) => table_data, - Err(delta_table_error) => { - delete_added_files(&object_store, added_files).await?; - return Err(ModelarDbStorageError::DeltaLake(delta_table_error)); - } - }; - let log_store = self.delta_table.log_store(); - - // Construct the commit to be written. - let commit_builder = CommitBuilder::from(commit_properties) - .with_actions(actions) - .with_operation_id(self.operation_id) - .build(Some(table_data), log_store, self.delta_operation); - - // Write the commit to the Delta table. - let _finalized_commit = match commit_builder.await { - Ok(finalized_commit) => finalized_commit, - Err(delta_table_error) => { - delete_added_files(&object_store, added_files).await?; - return Err(ModelarDbStorageError::DeltaLake(delta_table_error)); - } - }; - - // Return Delta table with the commit. - self.delta_table.load().await?; - Ok(self.delta_table) - } - - /// Consume the [`DeltaTableWriter`], abort the writing, and delete all of the files that have - /// already been written. Returns a [`ModelarDbStorageError`] if an error occurs when aborting - /// the writing or deleting the files that have already been written. Rollback is not called - /// automatically as drop() is not async and async_drop() is not yet a stable API. - pub async fn rollback(self) -> Result { - let object_store = self.delta_table.object_store(); - let added_files = self.delta_writer.close().await?; - delete_added_files(&object_store, added_files).await?; - Ok(self.delta_table) - } -} - -/// Delete the `added_files` from `object_store`. Returns a [`ModelarDbStorageError`] if a file -/// could not be deleted. It is a function instead of a method on [`DeltaTableWriter`] so it can be -/// called by [`DeltaTableWriter`] after the [`DeltaWriter`] is closed without lifetime issues. -async fn delete_added_files(object_store: &dyn ObjectStore, added_files: Vec) -> Result<()> { - for add_file in added_files { - let path: Path = Path::from(add_file.path); - object_store.delete(&path).await?; - } - Ok(()) -} diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs index 12facfa4b..67f78167f 100644 --- a/crates/modelardb_storage/src/lib.rs +++ b/crates/modelardb_storage/src/lib.rs @@ -16,9 +16,8 @@ //! Utility functions to register metadata tables, normal tables, and time series tables with Apache //! DataFusion and to read and write Apache Parquet files to and from an object store. -pub mod delta_lake; +pub mod data_folder; pub mod error; -pub mod metadata; mod optimizer; pub mod parser; mod query; @@ -31,7 +30,7 @@ use arrow::compute; use arrow::compute::concat_batches; use arrow::datatypes::Schema; use bytes::Bytes; -use datafusion::catalog::TableProvider; +use datafusion::catalog::{MemorySchemaProvider, TableProvider}; use datafusion::datasource::sink::DataSink; use datafusion::execution::SendableRecordBatchStream; use datafusion::execution::session_state::SessionStateBuilder; @@ -44,6 +43,7 @@ use datafusion::parquet::errors::ParquetError; use datafusion::parquet::file::properties::{EnabledStatistics, WriterProperties}; use datafusion::parquet::format::SortingColumn; use datafusion::prelude::SessionContext; +use datafusion::sql::TableReference; use datafusion::sql::parser::Statement as DFStatement; use deltalake::DeltaTable; use futures::StreamExt; @@ -78,7 +78,15 @@ pub fn create_session_context() -> SessionContext { } let session_state = session_state_builder.build(); - SessionContext::new_with_state(session_state) + let session_context = SessionContext::new_with_state(session_state); + let default_catalog = session_context + .catalog("datafusion") + .expect("The datafusion catalog should always exist."); + default_catalog + .register_schema("metadata", Arc::new(MemorySchemaProvider::new())) + .expect("Catalog register schema should never fail."); + + session_context } /// Register the metadata table stored in `delta_table` with `table_name` in `session_context`. If @@ -89,8 +97,9 @@ pub fn register_metadata_table( table_name: &str, delta_table: DeltaTable, ) -> Result<()> { + let table_reference = TableReference::partial("metadata", table_name); let metadata_table = Arc::new(MetadataTable::new(delta_table)); - session_context.register_table(table_name, metadata_table)?; + session_context.register_table(table_reference, metadata_table)?; Ok(()) } diff --git a/crates/modelardb_storage/src/metadata/mod.rs b/crates/modelardb_storage/src/metadata/mod.rs deleted file mode 100644 index 088d50532..000000000 --- a/crates/modelardb_storage/src/metadata/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright 2023 The ModelarDB Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//! Implementation of [`TableMetadataManager`](table_metadata_manager::TableMetadataManager) which -//! provides functionality to access table related metadata in the metadata Delta Lake. - -pub mod table_metadata_manager; diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs deleted file mode 100644 index 7bde72bf6..000000000 --- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs +++ /dev/null @@ -1,1016 +0,0 @@ -/* Copyright 2024 The ModelarDB Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//! Table metadata manager that includes functionality used to access both the server metadata Delta Lake -//! and the manager metadata Delta Lake. Note that the entire server metadata Delta Lake can be accessed -//! through this metadata manager, while it only supports a subset of the manager metadata Delta Lake. - -use std::path::Path as StdPath; -use std::sync::Arc; - -use arrow::array::{Array, BinaryArray, BooleanArray, Float32Array, Int16Array, StringArray}; -use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema}; -use datafusion::common::{DFSchema, ToDFSchema}; -use datafusion::logical_expr::{Expr, lit}; -use datafusion::prelude::{SessionContext, col}; -use datafusion_proto::bytes::Serializeable; -use modelardb_types::flight::protocol; -use modelardb_types::functions::{try_convert_bytes_to_schema, try_convert_schema_to_bytes}; -use modelardb_types::types::{ArrowValue, ErrorBound, GeneratedColumn, TimeSeriesTableMetadata}; - -use crate::delta_lake::DeltaLake; -use crate::error::{ModelarDbStorageError, Result}; -use crate::{register_metadata_table, sql_and_concat}; - -/// Types of tables supported by ModelarDB. -enum TableType { - NormalTable, - TimeSeriesTable, -} - -/// Stores the metadata required for reading from and writing to the normal tables and time series -/// tables. The data that needs to be persisted is stored in the metadata Delta Lake. -pub struct TableMetadataManager { - /// Delta Lake with functionality to read and write to and from the metadata tables. - delta_lake: DeltaLake, - /// Session context used to query the metadata Delta Lake tables using Apache DataFusion. - session_context: Arc, -} - -impl TableMetadataManager { - /// Create a new [`TableMetadataManager`] that saves the metadata to an object store given by - /// `local_url` and initialize the metadata tables. If `local_url` could not be parsed or the - /// metadata tables could not be created, return [`ModelarDbStorageError`]. - pub async fn try_from_local_url(local_url: &str) -> Result { - let table_metadata_manager = Self { - delta_lake: DeltaLake::try_from_local_url(local_url)?, - session_context: Arc::new(SessionContext::new()), - }; - - table_metadata_manager - .create_and_register_metadata_delta_lake_tables() - .await?; - - Ok(table_metadata_manager) - } - - /// Create a new [`TableMetadataManager`] that saves the metadata to an in-memory Delta Lake and - /// initialize the metadata tables. If the metadata tables could not be created, return - /// [`ModelarDbStorageError`]. - pub async fn try_new_in_memory() -> Result { - let table_metadata_manager = Self { - delta_lake: DeltaLake::new_in_memory(), - session_context: Arc::new(SessionContext::new()), - }; - - table_metadata_manager - .create_and_register_metadata_delta_lake_tables() - .await?; - - Ok(table_metadata_manager) - } - - /// Create a new [`TableMetadataManager`] that saves the metadata to `folder_path` and - /// initialize the metadata tables. If the metadata tables could not be created, return - /// [`ModelarDbStorageError`]. - pub async fn try_from_path(folder_path: &StdPath) -> Result { - let table_metadata_manager = Self { - delta_lake: DeltaLake::try_from_local_path(folder_path)?, - session_context: Arc::new(SessionContext::new()), - }; - - table_metadata_manager - .create_and_register_metadata_delta_lake_tables() - .await?; - - Ok(table_metadata_manager) - } - - /// Create a new [`TableMetadataManager`] that saves the metadata to a remote object store given - /// by `storage_configuration` and initialize the metadata tables. If a connection could not be - /// made or the metadata tables could not be created, return [`ModelarDbStorageError`]. - pub async fn try_from_storage_configuration( - storage_configuration: protocol::manager_metadata::StorageConfiguration, - ) -> Result { - let table_metadata_manager = Self { - delta_lake: DeltaLake::try_remote_from_storage_configuration(storage_configuration)?, - session_context: Arc::new(SessionContext::new()), - }; - - table_metadata_manager - .create_and_register_metadata_delta_lake_tables() - .await?; - - Ok(table_metadata_manager) - } - - /// Create a new [`TableMetadataManager`] that saves the metadata to a remote S3-compatible - /// object store and initialize the metadata tables. If the connection cannot be made or the - /// metadata tables could not be created, return [`ModelarDbStorageError`]. - pub async fn try_from_s3_configuration( - endpoint: String, - bucket_name: String, - access_key_id: String, - secret_access_key: String, - ) -> Result { - let table_metadata_manager = Self { - delta_lake: DeltaLake::try_from_s3_configuration( - endpoint, - bucket_name, - access_key_id, - secret_access_key, - )?, - session_context: Arc::new(SessionContext::new()), - }; - - table_metadata_manager - .create_and_register_metadata_delta_lake_tables() - .await?; - - Ok(table_metadata_manager) - } - - /// Create a new [`TableMetadataManager`] that saves the metadata to a remote Azure-compatible - /// object store and initialize the metadata tables. If the connection cannot be made or the - /// metadata tables could not be created, return [`ModelarDbStorageError`]. - pub async fn try_from_azure_configuration( - account_name: String, - access_key: String, - container_name: String, - ) -> Result { - let table_metadata_manager = Self { - delta_lake: DeltaLake::try_from_azure_configuration( - account_name, - access_key, - container_name, - )?, - session_context: Arc::new(SessionContext::new()), - }; - - table_metadata_manager - .create_and_register_metadata_delta_lake_tables() - .await?; - - Ok(table_metadata_manager) - } - - /// If they do not already exist, create the tables in the metadata Delta Lake for normal table - /// and time series table metadata and register them with the Apache DataFusion session context. - /// * The `normal_table_metadata` table contains the metadata for normal tables. - /// * The `time_series_table_metadata` table contains the main metadata for time series tables. - /// * The `time_series_table_field_columns` table contains the name, index, error bound value, - /// whether error bound is relative, and generation expression of the field columns in each - /// time series table. - /// - /// If the tables exist or were created, return [`Ok`], otherwise return - /// [`ModelarDbStorageError`]. - async fn create_and_register_metadata_delta_lake_tables(&self) -> Result<()> { - // Create and register the normal_table_metadata table if it does not exist. - let delta_table = self - .delta_lake - .create_metadata_table( - "normal_table_metadata", - &Schema::new(vec![Field::new("table_name", DataType::Utf8, false)]), - ) - .await?; - - register_metadata_table(&self.session_context, "normal_table_metadata", delta_table)?; - - // Create and register the time_series_table_metadata table if it does not exist. - let delta_table = self - .delta_lake - .create_metadata_table( - "time_series_table_metadata", - &Schema::new(vec![ - Field::new("table_name", DataType::Utf8, false), - Field::new("query_schema", DataType::Binary, false), - ]), - ) - .await?; - - register_metadata_table( - &self.session_context, - "time_series_table_metadata", - delta_table, - )?; - - // Create and register the time_series_table_field_columns table if it does not exist. Note - // that column_index will only use a maximum of 10 bits. generated_column_expr is NULL if - // the fields are stored as segments. - let delta_table = self - .delta_lake - .create_metadata_table( - "time_series_table_field_columns", - &Schema::new(vec![ - Field::new("table_name", DataType::Utf8, false), - Field::new("column_name", DataType::Utf8, false), - Field::new("column_index", DataType::Int16, false), - Field::new("error_bound_value", DataType::Float32, false), - Field::new("error_bound_is_relative", DataType::Boolean, false), - Field::new("generated_column_expr", DataType::Binary, true), - ]), - ) - .await?; - - register_metadata_table( - &self.session_context, - "time_series_table_field_columns", - delta_table, - )?; - - Ok(()) - } - - /// Return `true` if the table with `table_name` is a normal table, otherwise return `false`. - pub async fn is_normal_table(&self, table_name: &str) -> Result { - Ok(self - .normal_table_names() - .await? - .contains(&table_name.to_owned())) - } - - /// Return `true` if the table with `table_name` is a time series table, otherwise return `false`. - pub async fn is_time_series_table(&self, table_name: &str) -> Result { - Ok(self - .time_series_table_names() - .await? - .contains(&table_name.to_owned())) - } - - /// Return the name of each table currently in the metadata Delta Lake. If the table names - /// cannot be retrieved, [`ModelarDbStorageError`] is returned. - pub async fn table_names(&self) -> Result> { - let normal_table_names = self.normal_table_names().await?; - let time_series_table_names = self.time_series_table_names().await?; - - let mut table_names = normal_table_names; - table_names.extend(time_series_table_names); - - Ok(table_names) - } - - /// Return the name of each normal table currently in the metadata Delta Lake. Note that this - /// does not include time series tables. If the normal table names cannot be retrieved, - /// [`ModelarDbStorageError`] is returned. - pub async fn normal_table_names(&self) -> Result> { - self.table_names_of_type(TableType::NormalTable).await - } - - /// Return the name of each time series table currently in the metadata Delta Lake. Note that - /// this does not include normal tables. If the time series table names cannot be retrieved, - /// [`ModelarDbStorageError`] is returned. - pub async fn time_series_table_names(&self) -> Result> { - self.table_names_of_type(TableType::TimeSeriesTable).await - } - - /// Return the name of tables of `table_type`. Returns [`ModelarDbStorageError`] if the table - /// names cannot be retrieved. - async fn table_names_of_type(&self, table_type: TableType) -> Result> { - let table_type = match table_type { - TableType::NormalTable => "normal_table", - TableType::TimeSeriesTable => "time_series_table", - }; - - let sql = format!("SELECT table_name FROM {table_type}_metadata"); - let batch = sql_and_concat(&self.session_context, &sql).await?; - - let table_names = modelardb_types::array!(batch, 0, StringArray); - Ok(table_names.iter().flatten().map(str::to_owned).collect()) - } - - /// Save the created normal table to the metadata Delta Lake. This consists of adding a row to - /// the `normal_table_metadata` table with the `name` of the table. If the normal table metadata - /// was saved, return [`Ok`], otherwise return [`ModelarDbStorageError`]. - pub async fn save_normal_table_metadata(&self, name: &str) -> Result<()> { - self.delta_lake - .write_columns_to_metadata_table( - "normal_table_metadata", - vec![Arc::new(StringArray::from(vec![name]))], - ) - .await?; - - Ok(()) - } - - /// Save the created time series table to the metadata Delta Lake. This includes adding a row to - /// the `time_series_table_metadata` table and adding a row to the `time_series_table_field_columns` - /// table for each field column. - pub async fn save_time_series_table_metadata( - &self, - time_series_table_metadata: &TimeSeriesTableMetadata, - ) -> Result<()> { - // Convert the query schema to bytes, so it can be saved in the metadata Delta Lake. - let query_schema_bytes = - try_convert_schema_to_bytes(&time_series_table_metadata.query_schema)?; - - // Add a new row in the time_series_table_metadata table to persist the time series table. - self.delta_lake - .write_columns_to_metadata_table( - "time_series_table_metadata", - vec![ - Arc::new(StringArray::from(vec![ - time_series_table_metadata.name.clone(), - ])), - Arc::new(BinaryArray::from_vec(vec![&query_schema_bytes])), - ], - ) - .await?; - - // Add a row for each field column to the time_series_table_field_columns table. - for (query_schema_index, field) in time_series_table_metadata - .query_schema - .fields() - .iter() - .enumerate() - { - if field.data_type() == &ArrowValue::DATA_TYPE { - // Convert the generated column expression to bytes, if it exists. - let maybe_generated_column_expr = match time_series_table_metadata - .generated_columns - .get(query_schema_index) - { - Some(Some(generated_column)) => { - Some(generated_column.expr.to_bytes()?.to_vec()) - } - _ => None, - }; - - // error_bounds matches schema and not query_schema to simplify looking up the error - // bound during ingestion as it occurs far more often than creation of time series tables. - let (error_bound_value, error_bound_is_relative) = if let Ok(schema_index) = - time_series_table_metadata.schema.index_of(field.name()) - { - match time_series_table_metadata.error_bounds[schema_index] { - ErrorBound::Absolute(value) => (value, false), - ErrorBound::Relative(value) => (value, true), - ErrorBound::Lossless => (0.0, false), - } - } else { - (0.0, false) - }; - - // query_schema_index is simply cast as a time series table contains at most 32767 columns. - self.delta_lake - .write_columns_to_metadata_table( - "time_series_table_field_columns", - vec![ - Arc::new(StringArray::from(vec![ - time_series_table_metadata.name.clone(), - ])), - Arc::new(StringArray::from(vec![field.name().clone()])), - Arc::new(Int16Array::from(vec![query_schema_index as i16])), - Arc::new(Float32Array::from(vec![error_bound_value])), - Arc::new(BooleanArray::from(vec![error_bound_is_relative])), - Arc::new(BinaryArray::from_opt_vec(vec![ - maybe_generated_column_expr.as_deref(), - ])), - ], - ) - .await?; - } - } - - Ok(()) - } - - /// Depending on the type of the table with `table_name`, drop either the normal table - /// metadata or the time series table metadata from the metadata Delta Lake. If the table does - /// not exist or the metadata could not be dropped, [`ModelarDbStorageError`] is returned. - pub async fn drop_table_metadata(&self, table_name: &str) -> Result<()> { - if self.is_normal_table(table_name).await? { - self.drop_normal_table_metadata(table_name).await - } else if self.is_time_series_table(table_name).await? { - self.drop_time_series_table_metadata(table_name).await - } else { - Err(ModelarDbStorageError::InvalidArgument(format!( - "Table with name '{table_name}' does not exist." - ))) - } - } - - /// Drop the metadata for the normal table with `table_name` from the `normal_table_metadata` - /// table in the metadata Delta Lake. If the metadata could not be dropped, - /// [`ModelarDbStorageError`] is returned. - async fn drop_normal_table_metadata(&self, table_name: &str) -> Result<()> { - let delta_ops = self - .delta_lake - .metadata_delta_ops("normal_table_metadata") - .await?; - - delta_ops - .delete() - .with_predicate(col("table_name").eq(lit(table_name))) - .await?; - - Ok(()) - } - - /// Drop the metadata for the time series table with `table_name` from the metadata Delta Lake. - /// This includes deleting a row from the `time_series_table_metadata` table and deleting a row - /// from the `time_series_table_field_columns` table for each field column. If the metadata - /// could not be dropped, [`ModelarDbStorageError`] is returned. - async fn drop_time_series_table_metadata(&self, table_name: &str) -> Result<()> { - // Delete the table metadata from the time_series_table_metadata table. - self.delta_lake - .metadata_delta_ops("time_series_table_metadata") - .await? - .delete() - .with_predicate(col("table_name").eq(lit(table_name))) - .await?; - - // Delete the column metadata from the time_series_table_field_columns table. - self.delta_lake - .metadata_delta_ops("time_series_table_field_columns") - .await? - .delete() - .with_predicate(col("table_name").eq(lit(table_name))) - .await?; - - Ok(()) - } - - /// Return the [`TimeSeriesTableMetadata`] of each time series table currently in the metadata - /// Delta Lake. If the [`TimeSeriesTableMetadata`] cannot be retrieved, - /// [`ModelarDbStorageError`] is returned. - pub async fn time_series_table_metadata(&self) -> Result>> { - let sql = "SELECT table_name, query_schema FROM time_series_table_metadata"; - let batch = sql_and_concat(&self.session_context, sql).await?; - - let mut time_series_table_metadata: Vec> = vec![]; - let table_name_array = modelardb_types::array!(batch, 0, StringArray); - let query_schema_bytes_array = modelardb_types::array!(batch, 1, BinaryArray); - - for row_index in 0..batch.num_rows() { - let table_name = table_name_array.value(row_index); - let query_schema_bytes = query_schema_bytes_array.value(row_index); - - let metadata = self - .time_series_table_metadata_row_to_time_series_table_metadata( - table_name, - query_schema_bytes, - ) - .await?; - - time_series_table_metadata.push(Arc::new(metadata)) - } - - Ok(time_series_table_metadata) - } - - /// Return the [`TimeSeriesTableMetadata`] for the time series table with `table_name` in the - /// metadata Delta Lake. If the [`TimeSeriesTableMetadata`] cannot be retrieved, - /// [`ModelarDbStorageError`] is returned. - pub async fn time_series_table_metadata_for_time_series_table( - &self, - table_name: &str, - ) -> Result { - let sql = format!( - "SELECT table_name, query_schema FROM time_series_table_metadata WHERE table_name = '{table_name}'" - ); - let batch = sql_and_concat(&self.session_context, &sql).await?; - - if batch.num_rows() == 0 { - return Err(ModelarDbStorageError::InvalidArgument(format!( - "No metadata for time series table named '{table_name}'." - ))); - } - - let table_name_array = modelardb_types::array!(batch, 0, StringArray); - let query_schema_bytes_array = modelardb_types::array!(batch, 1, BinaryArray); - - let table_name = table_name_array.value(0); - let query_schema_bytes = query_schema_bytes_array.value(0); - - self.time_series_table_metadata_row_to_time_series_table_metadata( - table_name, - query_schema_bytes, - ) - .await - } - - /// Convert a row from the table "time_series_table_metadata" to an instance of - /// [`TimeSeriesTableMetadata`]. Returns [`ModelarDbStorageError`] if a time_series table with - /// `table_name` does not exist or the bytes in `query_schema_bytes` are not a valid schema. - async fn time_series_table_metadata_row_to_time_series_table_metadata( - &self, - table_name: &str, - query_schema_bytes: &[u8], - ) -> Result { - let query_schema = try_convert_bytes_to_schema(query_schema_bytes.into())?; - - let error_bounds = self - .error_bounds(table_name, query_schema.fields().len()) - .await?; - - let df_query_schema = query_schema.clone().to_dfschema()?; - let generated_columns = self.generated_columns(table_name, &df_query_schema).await?; - - TimeSeriesTableMetadata::try_new( - table_name.to_owned(), - Arc::new(query_schema), - error_bounds, - generated_columns, - ) - .map_err(|error| error.into()) - } - - /// Return the error bounds for the columns in the time series table with `table_name`. If a - /// time series table with `table_name` does not exist, [`ModelarDbStorageError`] is returned. - async fn error_bounds( - &self, - table_name: &str, - query_schema_columns: usize, - ) -> Result> { - let sql = format!( - "SELECT column_index, error_bound_value, error_bound_is_relative - FROM time_series_table_field_columns - WHERE table_name = '{table_name}' - ORDER BY column_index" - ); - let batch = sql_and_concat(&self.session_context, &sql).await?; - - let mut column_to_error_bound = vec![ErrorBound::Lossless; query_schema_columns]; - - let column_index_array = modelardb_types::array!(batch, 0, Int16Array); - let error_bound_value_array = modelardb_types::array!(batch, 1, Float32Array); - let error_bound_is_relative_array = modelardb_types::array!(batch, 2, BooleanArray); - - for row_index in 0..batch.num_rows() { - let error_bound_index = column_index_array.value(row_index); - let error_bound_value = error_bound_value_array.value(row_index); - let error_bound_is_relative = error_bound_is_relative_array.value(row_index); - - if error_bound_value != 0.0 { - let error_bound = if error_bound_is_relative { - ErrorBound::try_new_relative(error_bound_value) - } else { - ErrorBound::try_new_absolute(error_bound_value) - }?; - - column_to_error_bound[error_bound_index as usize] = error_bound; - } - } - - Ok(column_to_error_bound) - } - - /// Return the generated columns for the time series table with `table_name` and `df_schema`. If - /// a time series table with `table_name` does not exist, [`ModelarDbStorageError`] is returned. - async fn generated_columns( - &self, - table_name: &str, - df_schema: &DFSchema, - ) -> Result>> { - let sql = format!( - "SELECT column_index, generated_column_expr - FROM time_series_table_field_columns - WHERE table_name = '{table_name}' - ORDER BY column_index" - ); - let batch = sql_and_concat(&self.session_context, &sql).await?; - - let mut generated_columns = vec![None; df_schema.fields().len()]; - - let column_index_array = modelardb_types::array!(batch, 0, Int16Array); - let generated_column_expr_array = modelardb_types::array!(batch, 1, BinaryArray); - - for row_index in 0..batch.num_rows() { - let generated_column_index = column_index_array.value(row_index); - let expr_bytes = generated_column_expr_array.value(row_index); - - // If generated_column_expr is null, it is saved as empty bytes in the column values. - if !expr_bytes.is_empty() { - let expr = Expr::from_bytes(expr_bytes)?; - let generated_column = GeneratedColumn::try_from_expr(expr, df_schema)?; - - generated_columns[generated_column_index as usize] = Some(generated_column); - } - } - - Ok(generated_columns) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use arrow::datatypes::{ArrowPrimitiveType, Field}; - use datafusion::arrow::datatypes::DataType; - use datafusion::common::ScalarValue::Int64; - use datafusion::logical_expr::Expr::Literal; - use modelardb_test::table::{self, TIME_SERIES_TABLE_NAME}; - use modelardb_types::types::{ArrowTimestamp, ArrowValue}; - use tempfile::TempDir; - - // Tests for TableMetadataManager. - #[tokio::test] - async fn test_create_metadata_delta_lake_tables() { - let temp_dir = tempfile::tempdir().unwrap(); - let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path()) - .await - .unwrap(); - - // Verify that the tables were created, registered, and has the expected columns. - assert!( - metadata_manager - .session_context - .sql("SELECT table_name FROM normal_table_metadata") - .await - .is_ok() - ); - - assert!( - metadata_manager - .session_context - .sql("SELECT table_name, query_schema FROM time_series_table_metadata") - .await - .is_ok() - ); - - assert!(metadata_manager - .session_context - .sql("SELECT table_name, column_name, column_index, error_bound_value, error_bound_is_relative, \ - generated_column_expr FROM time_series_table_field_columns") - .await - .is_ok()); - } - - #[tokio::test] - async fn test_normal_table_is_normal_table() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - assert!( - metadata_manager - .is_normal_table("normal_table_1") - .await - .unwrap() - ); - } - - #[tokio::test] - async fn test_time_series_table_is_not_normal_table() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - assert!( - !metadata_manager - .is_normal_table(TIME_SERIES_TABLE_NAME) - .await - .unwrap() - ); - } - - #[tokio::test] - async fn test_time_series_table_is_time_series_table() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - assert!( - metadata_manager - .is_time_series_table(TIME_SERIES_TABLE_NAME) - .await - .unwrap() - ); - } - - #[tokio::test] - async fn test_normal_table_is_not_time_series_table() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - assert!( - !metadata_manager - .is_time_series_table("normal_table_1") - .await - .unwrap() - ); - } - - #[tokio::test] - async fn test_table_names() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - - let time_series_table_metadata = table::time_series_table_metadata(); - metadata_manager - .save_time_series_table_metadata(&time_series_table_metadata) - .await - .unwrap(); - - let table_names = metadata_manager.table_names().await.unwrap(); - assert_eq!( - table_names, - vec!["normal_table_2", "normal_table_1", TIME_SERIES_TABLE_NAME] - ); - } - - #[tokio::test] - async fn test_normal_table_names() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - - let normal_table_names = metadata_manager.normal_table_names().await.unwrap(); - assert_eq!(normal_table_names, vec!["normal_table_2", "normal_table_1"]); - } - - #[tokio::test] - async fn test_time_series_table_names() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - let time_series_table_names = metadata_manager.time_series_table_names().await.unwrap(); - assert_eq!(time_series_table_names, vec![TIME_SERIES_TABLE_NAME]); - } - - #[tokio::test] - async fn test_save_normal_table_metadata() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - - // Retrieve the normal table from the metadata Delta Lake. - let sql = "SELECT table_name FROM normal_table_metadata ORDER BY table_name"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) - .await - .unwrap(); - - assert_eq!( - **batch.column(0), - StringArray::from(vec!["normal_table_1", "normal_table_2"]) - ); - } - - #[tokio::test] - async fn test_save_time_series_table_metadata() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - // Check that a row has been added to the time_series_table_metadata table. - let sql = "SELECT table_name, query_schema FROM time_series_table_metadata"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) - .await - .unwrap(); - - assert_eq!( - **batch.column(0), - StringArray::from(vec![TIME_SERIES_TABLE_NAME]) - ); - assert_eq!( - **batch.column(1), - BinaryArray::from_vec(vec![ - &try_convert_schema_to_bytes(&table::time_series_table_metadata().query_schema) - .unwrap() - ]) - ); - - // Check that a row has been added to the time_series_table_field_columns table for each field column. - let sql = "SELECT table_name, column_name, column_index, error_bound_value, error_bound_is_relative, \ - generated_column_expr FROM time_series_table_field_columns ORDER BY column_name"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) - .await - .unwrap(); - - assert_eq!( - **batch.column(0), - StringArray::from(vec![TIME_SERIES_TABLE_NAME, TIME_SERIES_TABLE_NAME]) - ); - assert_eq!( - **batch.column(1), - StringArray::from(vec!["field_1", "field_2"]) - ); - assert_eq!(**batch.column(2), Int16Array::from(vec![1, 2])); - assert_eq!(**batch.column(3), Float32Array::from(vec![1.0, 5.0])); - assert_eq!(**batch.column(4), BooleanArray::from(vec![false, true])); - assert_eq!( - **batch.column(5), - BinaryArray::from_opt_vec(vec![None, None]) - ); - } - - #[tokio::test] - async fn test_drop_normal_table_metadata() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - - metadata_manager - .drop_table_metadata("normal_table_2") - .await - .unwrap(); - - // Verify that normal_table_2 was deleted from the normal_table_metadata table. - let sql = "SELECT table_name FROM normal_table_metadata"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) - .await - .unwrap(); - - assert_eq!(**batch.column(0), StringArray::from(vec!["normal_table_1"])); - } - - #[tokio::test] - async fn test_drop_time_series_table_metadata() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - metadata_manager - .drop_table_metadata(TIME_SERIES_TABLE_NAME) - .await - .unwrap(); - - // Verify that the time series table was deleted from the time_series_table_metadata table. - let sql = "SELECT table_name FROM time_series_table_metadata"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) - .await - .unwrap(); - - assert_eq!(batch.num_rows(), 0); - - // Verify that the field columns were deleted from the time_series_table_field_columns table. - let sql = "SELECT table_name FROM time_series_table_field_columns"; - let batch = sql_and_concat(&metadata_manager.session_context, sql) - .await - .unwrap(); - - assert_eq!(batch.num_rows(), 0); - } - - #[tokio::test] - async fn test_drop_table_metadata_for_missing_table() { - let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await; - - let result = metadata_manager.drop_table_metadata("missing_table").await; - - assert_eq!( - result.unwrap_err().to_string(), - "Invalid Argument Error: Table with name 'missing_table' does not exist." - ); - } - - async fn create_metadata_manager_and_save_normal_tables() -> (TempDir, TableMetadataManager) { - let temp_dir = tempfile::tempdir().unwrap(); - let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path()) - .await - .unwrap(); - - metadata_manager - .save_normal_table_metadata("normal_table_1") - .await - .unwrap(); - - metadata_manager - .save_normal_table_metadata("normal_table_2") - .await - .unwrap(); - - (temp_dir, metadata_manager) - } - - #[tokio::test] - async fn test_time_series_table_metadata() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - let time_series_table_metadata = - metadata_manager.time_series_table_metadata().await.unwrap(); - - assert_eq!( - time_series_table_metadata.first().unwrap().name, - table::time_series_table_metadata().name, - ); - } - - #[tokio::test] - async fn test_time_series_table_metadata_for_existing_time_series_table() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - let time_series_table_metadata = metadata_manager - .time_series_table_metadata_for_time_series_table(TIME_SERIES_TABLE_NAME) - .await - .unwrap(); - - assert_eq!( - time_series_table_metadata.name, - table::time_series_table_metadata().name, - ); - } - - #[tokio::test] - async fn test_time_series_table_metadata_for_missing_time_series_table() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - let result = metadata_manager - .time_series_table_metadata_for_time_series_table("missing_table") - .await; - - assert_eq!( - result.unwrap_err().to_string(), - "Invalid Argument Error: No metadata for time series table named 'missing_table'." - ); - } - - #[tokio::test] - async fn test_error_bound() { - let (_temp_dir, metadata_manager) = - create_metadata_manager_and_save_time_series_table().await; - - let error_bounds = metadata_manager - .error_bounds(TIME_SERIES_TABLE_NAME, 4) - .await - .unwrap(); - - let values: Vec = error_bounds - .iter() - .map(|error_bound| match error_bound { - ErrorBound::Absolute(value) => *value, - ErrorBound::Relative(value) => *value, - ErrorBound::Lossless => 0.0, - }) - .collect(); - - assert_eq!(values, &[0.0, 1.0, 5.0, 0.0]); - } - - #[tokio::test] - async fn test_generated_columns() { - let temp_dir = tempfile::tempdir().unwrap(); - let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path()) - .await - .unwrap(); - - let query_schema = Arc::new(Schema::new(vec![ - Field::new("generated_column_1", ArrowValue::DATA_TYPE, false), - Field::new("timestamp", ArrowTimestamp::DATA_TYPE, false), - Field::new("field_1", ArrowValue::DATA_TYPE, false), - Field::new("field_2", ArrowValue::DATA_TYPE, false), - Field::new("generated_column_2", ArrowValue::DATA_TYPE, false), - Field::new("tag", DataType::Utf8, false), - ])); - - let error_bounds = vec![ErrorBound::Lossless; query_schema.fields.len()]; - - let plus_one_column = Some(GeneratedColumn { - expr: col("field_1") + Literal(Int64(Some(1))), - source_columns: vec![2], - }); - - let addition_column = Some(GeneratedColumn { - expr: col("field_1") + col("field_2"), - source_columns: vec![2, 3], - }); - - let mut expected_generated_columns = - vec![plus_one_column, None, None, None, addition_column, None]; - - let time_series_table_metadata = TimeSeriesTableMetadata::try_new( - "generated_columns_table".to_owned(), - query_schema, - error_bounds, - expected_generated_columns.clone(), - ) - .unwrap(); - - metadata_manager - .save_time_series_table_metadata(&time_series_table_metadata) - .await - .unwrap(); - - let df_schema = time_series_table_metadata - .query_schema - .to_dfschema() - .unwrap(); - let mut generated_columns = metadata_manager - .generated_columns("generated_columns_table", &df_schema) - .await - .unwrap(); - - let mut actual_addition_column = generated_columns.remove(4).unwrap(); - let expected_addition_column = expected_generated_columns.remove(4).unwrap(); - - // Sort the source columns to ensure the order is consistent. - actual_addition_column.source_columns.sort(); - assert_eq!(actual_addition_column, expected_addition_column); - - assert_eq!(generated_columns, expected_generated_columns); - } - - async fn create_metadata_manager_and_save_time_series_table() -> (TempDir, TableMetadataManager) - { - let temp_dir = tempfile::tempdir().unwrap(); - let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path()) - .await - .unwrap(); - - // Save a time series table to the metadata Delta Lake. - let time_series_table_metadata = table::time_series_table_metadata(); - metadata_manager - .save_time_series_table_metadata(&time_series_table_metadata) - .await - .unwrap(); - - (temp_dir, metadata_manager) - } -} diff --git a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs index 50d0e52dc..8fe12f04e 100644 --- a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs +++ b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs @@ -628,7 +628,6 @@ mod tests { use datafusion::arrow::datatypes::Schema; use datafusion::datasource::sink::DataSink; - use datafusion::execution::session_state::SessionStateBuilder; use datafusion::execution::{SendableRecordBatchStream, TaskContext}; use datafusion::physical_plan::aggregates::AggregateExec; use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; @@ -636,13 +635,11 @@ mod tests { use datafusion::physical_plan::filter::FilterExec; use datafusion::physical_plan::metrics::MetricsSet; use datafusion::physical_plan::{DisplayAs, DisplayFormatType}; - use datafusion::prelude::SessionContext; use modelardb_test::table::{self, TIME_SERIES_TABLE_NAME}; use tempfile::TempDir; use tonic::async_trait; - use crate::delta_lake::DeltaLake; - use crate::optimizer; + use crate::data_folder::DataFolder; use crate::query::grid_exec::GridExec; use crate::query::time_series_table::TimeSeriesTable; @@ -777,24 +774,15 @@ mod tests { ) -> Arc { // Setup access to data and metadata in data folder. let data_folder_path = temp_dir.path(); - let delta_lake = DeltaLake::try_from_local_path(data_folder_path).unwrap(); + let data_folder = DataFolder::open_local(data_folder_path).await.unwrap(); // Setup access to Apache DataFusion. - let mut session_state_builder = SessionStateBuilder::new().with_default_features(); - - // Uses the rule method instead of the rules method as the rules method replaces the built-ins. - for physical_optimizer_rule in optimizer::physical_optimizer_rules() { - session_state_builder = - session_state_builder.with_physical_optimizer_rule(physical_optimizer_rule); - } - - let session_state = session_state_builder.build(); - let session_context = SessionContext::new_with_state(session_state); + let session_context = crate::create_session_context(); // Create time series table. let time_series_table_metadata = table::time_series_table_metadata_arc(); - let delta_table = delta_lake + let delta_table = data_folder .create_time_series_table(&time_series_table_metadata) .await .unwrap(); diff --git a/crates/modelardb_storage/src/query/metadata_table.rs b/crates/modelardb_storage/src/query/metadata_table.rs index 5a814070f..d93bce269 100644 --- a/crates/modelardb_storage/src/query/metadata_table.rs +++ b/crates/modelardb_storage/src/query/metadata_table.rs @@ -70,7 +70,7 @@ impl TableProvider for MetadataTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { - // Clone the Delta Lake table and update it to the latest version. self.delta_lake.load( + // Clone the Delta Lake table and update it to the latest version. self.data_folder.load( // &mut self) is not an option due to TypeProvider::scan(&self, ...). Storing the DeltaTable // in a Mutex and RwLock is also not an option since most of the methods in TypeProvider // return a reference and the locks will be dropped at the end of the method. diff --git a/crates/modelardb_storage/src/query/normal_table.rs b/crates/modelardb_storage/src/query/normal_table.rs index d256c4f79..a30f74d73 100644 --- a/crates/modelardb_storage/src/query/normal_table.rs +++ b/crates/modelardb_storage/src/query/normal_table.rs @@ -100,7 +100,7 @@ impl TableProvider for NormalTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { - // Clone the Delta Lake table and update it to the latest version. self.delta_lake.load( + // Clone the Delta Lake table and update it to the latest version. self.data_folder.load( // &mut self) is not an option due to TypeProvider::scan(&self, ...). Storing the DeltaTable // in a Mutex and RwLock is also not an option since most of the methods in TypeProvider // return a reference and the locks will be dropped at the end of the method. @@ -129,7 +129,7 @@ impl TableProvider for NormalTable { /// Create an [`ExecutionPlan`] that will insert the result of `input` into the normal table. /// Generally, [`arrow_flight::flight_service_server::FlightService::do_put()`] should be used /// instead of this method as it is more efficient. Returns a [`DataFusionError::Plan`] if the - /// necessary metadata cannot be retrieved from the metadata Delta Lake. + /// necessary metadata cannot be retrieved from the Delta Lake. async fn insert_into( &self, _state: &dyn Session, diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs index 145432a7e..890778ef3 100644 --- a/crates/modelardb_storage/src/query/sorted_join_exec.rs +++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs @@ -14,11 +14,9 @@ */ //! Implementation of the Apache DataFusion execution plan [`SortedJoinExec`] and its corresponding -//! stream [`SortedJoinStream`] which joins multiple sorted array produced by -//! [`GridExecs`](crate::query::grid_exec::GridExec) streams and combines them with the time series -//! tags retrieved from the [`TableMetadataManager`](metadata::table_metadata_manager::TableMetadataManager) -//! to create the complete results containing a timestamp column, one or more field columns, and zero -//! or more tag columns. +//! stream [`SortedJoinStream`] which joins multiple sorted arrays produced by +//! [`GridExecs`](crate::query::grid_exec::GridExec) streams to create the complete results +//! containing a timestamp column, one or more field columns, and zero or more tag columns. use std::any::Any; use std::fmt::{Formatter, Result as FmtResult}; diff --git a/crates/modelardb_storage/src/query/time_series_table.rs b/crates/modelardb_storage/src/query/time_series_table.rs index afa742479..82ef41b4c 100644 --- a/crates/modelardb_storage/src/query/time_series_table.rs +++ b/crates/modelardb_storage/src/query/time_series_table.rs @@ -407,7 +407,7 @@ fn convert_logical_expr_to_physical_expr( /// Create an [`ExecutionPlan`] that will return the compressed segments that represent the data /// points for `field_column_index` in `delta_table`. Returns a [`DataFusionError`] if the necessary -/// metadata cannot be retrieved from the metadata Delta Lake. +/// metadata cannot be retrieved from the Delta Lake. fn new_data_source_exec( delta_table: &DeltaTable, partition_filters: &[PartitionFilter], @@ -515,7 +515,7 @@ impl TableProvider for TimeSeriesTable { let query_schema = &self.time_series_table_metadata.query_schema; let generated_columns = &self.time_series_table_metadata.generated_columns; - // Clone the Delta Lake table and update it to the latest version. self.delta_lake.load( + // Clone the Delta Lake table and update it to the latest version. self.data_folder.load( // &mut self) is not an option due to TypeProvider::scan(&self, ...). Storing the DeltaTable // in a Mutex and RwLock is also not an option since most of the methods in TypeProvider // return a reference and the locks will be dropped at the end of the method.