From 879e0cb146f275f04dc6bb4ab265f80f4e646ae4 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 4 Feb 2025 23:16:07 +0100
Subject: [PATCH 01/69] Fixed outdated documentation

---
 crates/modelardb_server/src/storage/compressed_data_buffer.rs | 4 ++--
 .../modelardb_server/src/storage/compressed_data_manager.rs   | 4 ++--
 .../modelardb_server/src/storage/uncompressed_data_manager.rs | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/crates/modelardb_server/src/storage/compressed_data_buffer.rs b/crates/modelardb_server/src/storage/compressed_data_buffer.rs
index 46a1c5c47..68402ff93 100644
--- a/crates/modelardb_server/src/storage/compressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_buffer.rs
@@ -23,8 +23,8 @@ use modelardb_types::schemas::COMPRESSED_SCHEMA;
 
 use crate::error::{ModelarDbServerError, Result};
 
-/// Compressed segments representing data points from a column in a model table as one
-/// [`RecordBatch`].
+/// Batch of compressed segments that were compressed together and are ready to be inserted into a
+/// [`CompressedDataBuffer`] for a model table.
 #[derive(Clone, Debug)]
 pub(super) struct CompressedSegmentBatch {
     /// Metadata of the model table to insert the data points into.
diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs
index 41254f034..861986431 100644
--- a/crates/modelardb_server/src/storage/compressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs
@@ -129,8 +129,8 @@ impl CompressedDataManager {
         Ok(())
     }
 
-    /// Insert the `compressed_segments` into the in-memory compressed data buffer for the model table
-    /// with `table_name`. If `compressed_segments` is saved successfully, return [`Ok`], otherwise
+    /// Insert `compressed_segment_batch` into the in-memory [`CompressedDataBuffer`] for the model
+    /// table. If `compressed_segment_batch` is inserted successfully, return [`Ok`], otherwise
     /// return [`ModelarDbServerError`](crate::error::ModelarDbServerError).
     async fn insert_compressed_segments(
         &self,
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 0b4800077..b22556452 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -199,7 +199,7 @@ impl UncompressedDataManager {
             .collect();
 
         // For each data point, compute a hash from the tags and pass the fields to the storage
-        // engine so they can be added to the appropriate [`UncompressedDataBuffer`].
+        // engine so they can be added to the appropriate UncompressedDataBuffer.
         for (index, timestamp) in timestamp_column_array.iter().enumerate() {
             let tag_values: Vec<String> = tag_column_arrays
                 .iter()

From cc891942ef01ccee95506faf0eca2d3c74f225d3 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 11 Feb 2025 22:19:24 +0100
Subject: [PATCH 02/69] Add table name to file path for spilled buffers

---
 .../src/storage/uncompressed_data_buffer.rs   | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 06ffa95e2..14329ae01 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -256,10 +256,11 @@ impl UncompressedOnDiskDataBuffer {
     ) -> Result<Self> {
         // Create a path that uses the first timestamp as the filename.
         let timestamps = modelardb_types::array!(data_points, 0, TimestampArray);
-        let file_path = Path::from(format!(
-            "{UNCOMPRESSED_DATA_FOLDER}/{tag_hash}/{}.parquet",
-            timestamps.value(0)
-        ));
+        let file_path = spilled_buffer_file_path(
+            &model_table_metadata.name,
+            tag_hash,
+            &format!("{}.parquet", timestamps.value(0)),
+        );
 
         modelardb_storage::write_record_batch_to_apache_parquet_file(
             &file_path,
@@ -288,7 +289,7 @@ impl UncompressedOnDiskDataBuffer {
         local_data_folder: Arc<dyn ObjectStore>,
         file_name: &str,
     ) -> Result<Self> {
-        let file_path = Path::from(format!("{UNCOMPRESSED_DATA_FOLDER}/{tag_hash}/{file_name}"));
+        let file_path = spilled_buffer_file_path(&model_table_metadata.name, tag_hash, file_name);
 
         Ok(Self {
             tag_hash,
@@ -377,6 +378,14 @@ impl Debug for UncompressedOnDiskDataBuffer {
     }
 }
 
+/// Return the [`Path`] for a spilled buffer for the time series with `tag_hash` in the table with
+/// `table_name`.
+fn spilled_buffer_file_path(table_name: &str, tag_hash: u64, file_name: &str) -> Path {
+    Path::from(format!(
+        "{UNCOMPRESSED_DATA_FOLDER}/{table_name}/{tag_hash}/{file_name}",
+    ))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -555,9 +564,10 @@ mod tests {
             .await
             .unwrap();
 
-        let uncompressed_path = temp_dir
-            .path()
-            .join(format!("{UNCOMPRESSED_DATA_FOLDER}/1"));
+        let uncompressed_path = temp_dir.path().join(format!(
+            "{UNCOMPRESSED_DATA_FOLDER}/{}/1",
+            test::MODEL_TABLE_NAME
+        ));
         assert_eq!(uncompressed_path.read_dir().unwrap().count(), 1)
     }
 
@@ -579,9 +589,10 @@ mod tests {
             .await
             .unwrap();
 
-        let uncompressed_path = temp_dir
-            .path()
-            .join(format!("{UNCOMPRESSED_DATA_FOLDER}/1"));
+        let uncompressed_path = temp_dir.path().join(format!(
+            "{UNCOMPRESSED_DATA_FOLDER}/{}/1",
+            test::MODEL_TABLE_NAME
+        ));
         assert_eq!(uncompressed_path.read_dir().unwrap().count(), 1)
     }
 
@@ -594,6 +605,7 @@ mod tests {
         let spilled_buffer_path = temp_dir
             .path()
             .join(UNCOMPRESSED_DATA_FOLDER)
+            .join(test::MODEL_TABLE_NAME)
             .join("1")
             .join("1234567890123.parquet");
         assert!(spilled_buffer_path.exists());
@@ -603,11 +615,6 @@ mod tests {
         assert_eq!(data.num_columns(), 3);
         assert_eq!(data.num_rows(), *UNCOMPRESSED_DATA_BUFFER_CAPACITY);
 
-        let spilled_buffer_path = temp_dir
-            .path()
-            .join(UNCOMPRESSED_DATA_FOLDER)
-            .join("1")
-            .join("1234567890123.parquet");
         assert!(!spilled_buffer_path.exists());
     }
 

From b00ebd60549602b18c323322510fe7e724a9665c Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 11 Feb 2025 22:27:38 +0100
Subject: [PATCH 03/69] Use the table name in the spilled buffer file path when
 initializing

---
 .../src/storage/uncompressed_data_manager.rs        | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index b22556452..d57214a31 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -99,17 +99,14 @@ impl UncompressedDataManager {
             let spilled_buffer = maybe_spilled_buffer?;
             let path_parts: Vec<PathPart> = spilled_buffer.location.parts().collect();
 
+            // unwrap() is safe since all spilled buffers are partitioned by their table name.
+            let table_name = path_parts.get(1).unwrap().as_ref();
+
             // unwrap() is safe since all spilled buffers are partitioned by their tag hash.
-            let tag_hash = path_parts.get(1).unwrap().as_ref().parse::<u64>().unwrap();
+            let tag_hash = path_parts.get(2).unwrap().as_ref().parse::<u64>().unwrap();
 
             // unwrap() is safe since all spilled buffers have a name generated by the system.
-            let file_name = path_parts.get(2).unwrap().as_ref();
-
-            let table_name = self
-                .local_data_folder
-                .table_metadata_manager
-                .tag_hash_to_model_table_name(tag_hash)
-                .await?;
+            let file_name = path_parts.get(3).unwrap().as_ref();
 
             // unwrap() is safe as data cannot be ingested into a model table that does not exist.
             let model_table_metadata = context

From a78eae8c668118dfb56caa24b029b9286a1108c5 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 11 Feb 2025 22:49:31 +0100
Subject: [PATCH 04/69] Remove model_table_hash_table_name from metadata Delta
 Lake

---
 .../src/metadata/table_metadata_manager.rs    | 191 ++----------------
 1 file changed, 13 insertions(+), 178 deletions(-)

diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index 2f1903608..f3e2dbc60 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -161,8 +161,6 @@ impl TableMetadataManager {
     /// and model table metadata and register them with the Apache DataFusion session context.
     /// * The `normal_table_metadata` table contains the metadata for normal tables.
     /// * The `model_table_metadata` table contains the main metadata for model tables.
-    /// * The `model_table_hash_table_name` contains a mapping from each tag hash to the name of the
-    ///   model table that contains the time series with that tag hash.
     /// * The `model_table_field_columns` table contains the name, index, error bound value, whether
     ///   error bound is relative, and generation expression of the field columns in each model table.
     ///
@@ -194,24 +192,6 @@ impl TableMetadataManager {
 
         register_metadata_table(&self.session_context, "model_table_metadata", delta_table)?;
 
-        // Create and register the model_table_hash_table_name table if it does not exist.
-        let delta_table = self
-            .delta_lake
-            .create_metadata_table(
-                "model_table_hash_table_name",
-                &Schema::new(vec![
-                    Field::new("hash", DataType::Int64, false),
-                    Field::new("table_name", DataType::Utf8, false),
-                ]),
-            )
-            .await?;
-
-        register_metadata_table(
-            &self.session_context,
-            "model_table_hash_table_name",
-            delta_table,
-        )?;
-
         // Create and register the model_table_field_columns table if it does not exist. Note that
         // column_index will only use a maximum of 10 bits. generated_column_expr is NULL if the
         // fields are stored as segments.
@@ -454,8 +434,8 @@ impl TableMetadataManager {
     /// Drop the metadata for the model table with `table_name` from the metadata Delta Lake.
     /// This includes dropping the tags table for the model table, deleting a row from the
     /// `model_table_metadata` table, deleting a row from the `model_table_field_columns` table for
-    /// each field column, and deleting the tag metadata from the `model_table_hash_table_name` table
-    /// and the tag cache. If the metadata could not be dropped, [`ModelarDbStorageError`] is returned.
+    /// each field column, and deleting the tag metadata from the tag cache. If the metadata could
+    /// not be dropped, [`ModelarDbStorageError`] is returned.
     async fn drop_model_table_metadata(&self, table_name: &str) -> Result<()> {
         // Drop and deregister the model_table_name_tags table.
         let tags_table_name = format!("{table_name}_tags");
@@ -506,8 +486,8 @@ impl TableMetadataManager {
 
     /// Truncate the metadata for the model table with `table_name` from the metadata Delta Lake.
     /// This includes truncating the tags table for the model table and deleting the tag metadata
-    /// from the `model_table_hash_table_name` table and the tag cache. If the metadata could not
-    /// be truncated, [`ModelarDbStorageError`] is returned.
+    /// from the tag cache. If the metadata could not be truncated, [`ModelarDbStorageError`] is
+    /// returned.
     async fn truncate_model_table_metadata(&self, table_name: &str) -> Result<()> {
         // Truncate the model_table_name_tags table.
         self.delta_lake
@@ -522,18 +502,9 @@ impl TableMetadataManager {
         Ok(())
     }
 
-    /// Delete the tag hash metadata for the model table with `table_name` from the
-    /// `model_table_hash_table_name` table and the tag cache. If the metadata could not be deleted,
-    /// [`ModelarDbStorageError`] is returned.
+    /// Delete the tag hash metadata for the model table with `table_name` from the tag cache. If
+    /// the metadata could not be deleted, [`ModelarDbStorageError`] is returned.
     async fn delete_tag_hash_metadata(&self, table_name: &str) -> Result<()> {
-        // Delete the tag metadata from the model_table_hash_table_name table.
-        self.delta_lake
-            .metadata_delta_ops("model_table_hash_table_name")
-            .await?
-            .delete()
-            .with_predicate(col("table_name").eq(lit(table_name)))
-            .await?;
-
         // Delete the tag metadata from the tag cache. The table name is always the last part of
         // the cache key.
         self.tag_value_hashes
@@ -696,11 +667,10 @@ impl TableMetadataManager {
 
     /// Return the tag hash for the given list of tag values either by retrieving it from a cache
     /// or, if the combination of tag values is not in the cache, by computing a new hash. If the
-    /// hash is not in the cache, it is saved to the cache, persisted to the `model_table_tags`
-    /// table if it does not already contain it, and persisted to the `model_table_hash_table_name`
+    /// hash is not in the cache, it is saved to the cache and persisted to the `model_table_tags`
     /// table if it does not already contain it. If the hash was saved to the metadata Delta Lake,
-    /// also return [`true`]. If the `model_table_tags` or the `model_table_hash_table_name` table
-    /// cannot be accessed, [`ModelarDbStorageError`] is returned.
+    /// also return [`true`]. If the `model_table_tags` table cannot be accessed,
+    /// [`ModelarDbStorageError`] is returned.
     pub async fn lookup_or_compute_tag_hash(
         &self,
         model_table_metadata: &ModelTableMetadata,
@@ -743,10 +713,9 @@ impl TableMetadataManager {
     }
 
     /// Save the given tag hash metadata to the `model_table_tags` table if it does not already
-    /// contain it, and to the `model_table_hash_table_name` table if it does not already contain it.
-    /// If the tables did not contain the tag hash, meaning it is a new tag combination, return
-    /// [`true`]. If the metadata cannot be inserted into either `model_table_tags` or
-    /// `model_table_hash_table_name`, [`ModelarDbStorageError`] is returned.
+    /// contain it. If the table did not contain the tag hash, meaning it is a new tag combination,
+    /// return [`true`]. If the metadata cannot be inserted into `model_table_tags`,
+    /// [`ModelarDbStorageError`] is returned.
     pub async fn save_tag_hash_metadata(
         &self,
         model_table_metadata: &ModelTableMetadata,
@@ -798,39 +767,7 @@ impl TableMetadataManager {
             })?
             .await?;
 
-        // Save the tag hash metadata in the model_table_hash_table_name table if it does not
-        // already contain it.
-        let source = self
-            .metadata_table_data_frame(
-                "model_table_hash_table_name",
-                vec![
-                    Arc::new(Int64Array::from(vec![signed_tag_hash])),
-                    Arc::new(StringArray::from(vec![table_name])),
-                ],
-            )
-            .await?;
-
-        let delta_ops = self
-            .delta_lake
-            .metadata_delta_ops("model_table_hash_table_name")
-            .await?;
-
-        // Merge the tag hash metadata in the source DataFrame into the model_table_hash_table_name
-        // table. For each hash, if the hash is not already in the target table, insert the hash and
-        // the table name from the source DataFrame.
-        let (_table, insert_into_hash_table_name_metrics) = delta_ops
-            .merge(source, col("target.hash").eq(col("source.hash")))
-            .with_source_alias("source")
-            .with_target_alias("target")
-            .when_not_matched_insert(|insert| {
-                insert
-                    .set("hash", col("source.hash"))
-                    .set("table_name", col("source.table_name"))
-            })?
-            .await?;
-
-        Ok(insert_into_tags_metrics.num_target_rows_inserted > 0
-            || insert_into_hash_table_name_metrics.num_target_rows_inserted > 0)
+        Ok(insert_into_tags_metrics.num_target_rows_inserted > 0)
     }
 
     /// Return a [`DataFrame`] with the given `rows` for the metadata table with the given
@@ -850,30 +787,6 @@ impl TableMetadataManager {
         Ok(self.session_context.read_batch(batch)?)
     }
 
-    /// Return the name of the model table that contains the time series with `tag_hash`. Returns a
-    /// [`ModelarDbStorageError`] if the necessary data cannot be retrieved from the metadata Delta
-    /// Lake.
-    pub async fn tag_hash_to_model_table_name(&self, tag_hash: u64) -> Result<String> {
-        let signed_tag_hash = i64::from_ne_bytes(tag_hash.to_ne_bytes());
-
-        let sql = format!(
-            "SELECT table_name
-             FROM model_table_hash_table_name
-             WHERE hash = '{signed_tag_hash}'
-             LIMIT 1"
-        );
-        let batch = sql_and_concat(&self.session_context, &sql).await?;
-
-        let table_names = modelardb_types::array!(batch, 0, StringArray);
-        if table_names.is_empty() {
-            Err(ModelarDbStorageError::InvalidArgument(format!(
-                "No model table contains a time series with tag hash '{tag_hash}'."
-            )))
-        } else {
-            Ok(table_names.value(0).to_owned())
-        }
-    }
-
     /// Return a mapping from tag hashes to the tags in the columns with the names in
     /// `tag_column_names` for the time series in the model table with the name `model_table_name`.
     /// Returns a [`ModelarDbStorageError`] if the necessary data cannot be retrieved from the
@@ -955,12 +868,6 @@ mod tests {
             .await
             .is_ok());
 
-        assert!(metadata_manager
-            .session_context
-            .sql("SELECT hash, table_name FROM model_table_hash_table_name")
-            .await
-            .is_ok());
-
         assert!(metadata_manager
             .session_context
             .sql("SELECT table_name, column_name, column_index, error_bound_value, error_bound_is_relative, \
@@ -1199,14 +1106,6 @@ mod tests {
 
         assert_eq!(batch.num_rows(), 0);
 
-        // Verify that the tag metadata was deleted from the model_table_hash_table_name table.
-        let sql = "SELECT table_name FROM model_table_hash_table_name";
-        let batch = sql_and_concat(&metadata_manager.session_context, sql)
-            .await
-            .unwrap();
-
-        assert_eq!(batch.num_rows(), 0);
-
         // Verify that the tag cache was cleared.
         assert!(metadata_manager.tag_value_hashes.is_empty());
     }
@@ -1265,14 +1164,6 @@ mod tests {
 
         assert_eq!(batch.num_rows(), 0);
 
-        // Verify that the tag metadata was deleted from the model_table_hash_table_name table.
-        let sql = "SELECT table_name FROM model_table_hash_table_name";
-        let batch = sql_and_concat(&metadata_manager.session_context, sql)
-            .await
-            .unwrap();
-
-        assert_eq!(batch.num_rows(), 0);
-
         // Verify that the tag cache was cleared.
         assert!(metadata_manager.tag_value_hashes.is_empty());
     }
@@ -1465,24 +1356,6 @@ mod tests {
             ])
         );
         assert_eq!(**batch.column(1), StringArray::from(vec!["tag2", "tag1"]));
-
-        // The tag hashes should be saved in the model_table_hash_table_name table.
-        let sql = "SELECT hash, table_name FROM model_table_hash_table_name";
-        let batch = sql_and_concat(&metadata_manager.session_context, sql)
-            .await
-            .unwrap();
-
-        assert_eq!(
-            **batch.column(0),
-            Int64Array::from(vec![
-                i64::from_ne_bytes(tag_hash_2.to_ne_bytes()),
-                i64::from_ne_bytes(tag_hash_1.to_ne_bytes()),
-            ])
-        );
-        assert_eq!(
-            **batch.column(1),
-            StringArray::from(vec![test::MODEL_TABLE_NAME, test::MODEL_TABLE_NAME])
-        );
     }
 
     #[tokio::test]
@@ -1538,44 +1411,6 @@ mod tests {
             .unwrap();
 
         assert!(batch.column(0).is_empty());
-
-        let sql = "SELECT hash FROM model_table_hash_table_name";
-        let batch = sql_and_concat(&metadata_manager.session_context, sql)
-            .await
-            .unwrap();
-
-        assert!(batch.column(0).is_empty());
-    }
-
-    #[tokio::test]
-    async fn test_tag_hash_to_model_table_name() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let model_table_metadata = test::model_table_metadata();
-        let (tag_hash, _tag_hash_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
-        let table_name = metadata_manager
-            .tag_hash_to_model_table_name(tag_hash)
-            .await
-            .unwrap();
-
-        assert_eq!(table_name, test::MODEL_TABLE_NAME);
-    }
-
-    #[tokio::test]
-    async fn test_invalid_tag_hash_to_model_table_name() {
-        let temp_dir = tempfile::tempdir().unwrap();
-        let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path(), None)
-            .await
-            .unwrap();
-
-        assert!(metadata_manager
-            .tag_hash_to_model_table_name(0)
-            .await
-            .is_err());
     }
 
     #[tokio::test]

From c5baba5828d84192ff6cf2a1655d6f01dafb7dfc Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 12 Feb 2025 21:32:19 +0100
Subject: [PATCH 05/69] Remove limitation of 1024 on number of field columns

---
 .../src/metadata/model_table_metadata.rs                 | 9 ---------
 .../src/metadata/table_metadata_manager.rs               | 1 -
 2 files changed, 10 deletions(-)

diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index a2eff4fa1..5100bfb21 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -60,7 +60,6 @@ impl ModelTableMetadata {
     /// * The number of error bounds does not match the number of columns.
     /// * The number of potentially generated columns does not match the number of columns.
     /// * A generated column includes another generated column in its expression.
-    /// * There are more than 1024 columns.
     /// * The `query_schema` does not include a single timestamp column.
     /// * The `query_schema` does not include at least one stored field column.
     pub fn try_new(
@@ -95,14 +94,6 @@ impl ModelTableMetadata {
             }
         }
 
-        // If there are more than 1024 columns, return an error. This limitation is necessary since
-        // 10 bits are used to identify the column index of the data in the 64-bit univariate id.
-        if query_schema.fields.len() > 1024 {
-            return Err(ModelarDbStorageError::InvalidArgument(
-                "There cannot be more than 1024 columns in the model table.".to_owned(),
-            ));
-        }
-
         // Remove the generated field columns from the query schema and the error bounds as these
         // columns should never be provided when inserting data points into the model table.
         let mut fields_without_generated = Vec::with_capacity(query_schema.fields().len());
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index f3e2dbc60..385afdfa3 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -379,7 +379,6 @@ impl TableMetadataManager {
                         (0.0, false)
                     };
 
-                // query_schema_index is simply cast as a model table contains at most 1024 columns.
                 self.delta_lake
                     .write_columns_to_metadata_table(
                         "model_table_field_columns",

From 999576dbe5d1b0535b2207aa02ba62a2bd65c2fc Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 20:56:26 +0100
Subject: [PATCH 06/69] No longer save tag metadata when inserting data points

---
 crates/modelardb_server/src/storage/mod.rs    |  1 -
 .../src/storage/uncompressed_data_manager.rs  | 40 +++++--------------
 2 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/crates/modelardb_server/src/storage/mod.rs b/crates/modelardb_server/src/storage/mod.rs
index 05de92c0e..14a7f1ba9 100644
--- a/crates/modelardb_server/src/storage/mod.rs
+++ b/crates/modelardb_server/src/storage/mod.rs
@@ -109,7 +109,6 @@ impl StorageEngine {
         // Create the uncompressed data manager.
         let uncompressed_data_manager = Arc::new(UncompressedDataManager::new(
             data_folders.local_data_folder.clone(),
-            data_folders.maybe_remote_data_folder.clone(),
             memory_pool.clone(),
             channels.clone(),
         ));
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index d57214a31..99ff43c1f 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -48,8 +48,6 @@ use crate::storage::UNCOMPRESSED_DATA_FOLDER;
 pub(super) struct UncompressedDataManager {
     /// Folder for storing metadata and data in Apache Parquet files on the local file system.
     pub local_data_folder: DataFolder,
-    /// Folder for storing metadata and data in Apache Parquet files in a remote object store.
-    pub maybe_remote_data_folder: Option<DataFolder>,
     /// Counter incremented for each [`RecordBatch`](datafusion::arrow::array::RecordBatch) of data
     /// points ingested. The value is assigned to buffers that are created or updated and is used to
     /// flush buffers that are no longer used.
@@ -73,13 +71,11 @@ pub(super) struct UncompressedDataManager {
 impl UncompressedDataManager {
     pub(super) fn new(
         local_data_folder: DataFolder,
-        maybe_remote_data_folder: Option<DataFolder>,
         memory_pool: Arc<MemoryPool>,
         channels: Arc<Channels>,
     ) -> Self {
         Self {
             local_data_folder,
-            maybe_remote_data_folder,
             current_batch_index: AtomicU64::new(0),
             uncompressed_in_memory_data_buffers: DashMap::new(),
             uncompressed_on_disk_data_buffers: DashMap::new(),
@@ -203,30 +199,12 @@ impl UncompressedDataManager {
                 .map(|array| array.value(index).to_string())
                 .collect();
 
-            let (tag_hash, tag_hash_is_saved) = self
-                .local_data_folder
-                .table_metadata_manager
-                .lookup_or_compute_tag_hash(&model_table_metadata, &tag_values)
-                .await?;
-
-            // If the server was started with a manager, transfer the tag hash metadata if it was
-            // saved to the server metadata Delta Lake. We purposely transfer tag metadata before the
-            // associated files for convenience. This does not cause problems when querying.
-            if let Some(remote_data_folder) = &self.maybe_remote_data_folder {
-                if tag_hash_is_saved {
-                    remote_data_folder
-                        .table_metadata_manager
-                        .save_tag_hash_metadata(&model_table_metadata, tag_hash, &tag_values)
-                        .await?;
-                }
-            }
-
             let mut values = field_column_arrays.iter().map(|array| array.value(index));
 
             // unwrap() is safe to use since the timestamps array cannot contain null values.
             buffers_are_spilled |= self
                 .insert_data_point(
-                    tag_hash,
+                    tag_values,
                     timestamp.unwrap(),
                     &mut values,
                     model_table_metadata.clone(),
@@ -252,15 +230,15 @@ impl UncompressedDataManager {
         Ok(())
     }
 
-    /// Insert a single data point into the in-memory buffer for `tag_hash` if one exists. If the
-    /// buffer has been spilled, read it back into memory. If no buffer exists for `tag_hash`,
-    /// allocate a new buffer that will be compressed within the error bound in
-    /// `model_table_metadata`. Returns [`true`] if a buffer was spilled, [`false`] if not, and
-    /// [`ModelarDbServerError`](crate::error::ModelarDbServerError) if the error bound cannot be
-    /// retrieved from the metadata Delta Lake.
+    /// Insert a single data point into the in-memory buffer with the tag hash that corresponds to
+    /// `tag_values` if one exists. If the buffer has been spilled, read it back into memory. If no
+    /// buffer exists for the tag hash, allocate a new buffer that will be compressed within the
+    /// error bound in `model_table_metadata`. Returns [`true`] if a buffer was spilled, [`false`]
+    /// if not, and [`ModelarDbServerError`](crate::error::ModelarDbServerError) if the error bound
+    /// cannot be retrieved from the metadata Delta Lake.
     async fn insert_data_point(
         &self,
-        tag_hash: u64,
+        tag_values: Vec<String>,
         timestamp: Timestamp,
         values: &mut dyn Iterator<Item = Value>,
         model_table_metadata: Arc<ModelTableMetadata>,
@@ -1323,7 +1301,7 @@ mod tests {
         let channels = Arc::new(Channels::new());
 
         let uncompressed_data_manager =
-            UncompressedDataManager::new(local_data_folder, None, memory_pool, channels);
+            UncompressedDataManager::new(local_data_folder, memory_pool, channels);
 
         (uncompressed_data_manager, Arc::new(model_table_metadata))
     }

From 6794ffcc6d80aa39075cd35f4cb211e127110831 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 21:12:17 +0100
Subject: [PATCH 07/69] Use a new function to calculate tag hash outside table
 metadata manager

---
 .../src/storage/uncompressed_data_manager.rs      | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 99ff43c1f..7093c8d0a 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -16,6 +16,7 @@
 //! Support for managing all uncompressed data that is ingested into the
 //! [`StorageEngine`](crate::storage::StorageEngine).
 
+use std::hash::{DefaultHasher, Hasher};
 use std::io::{Error as IOError, ErrorKind as IOErrorKind};
 use std::mem;
 use std::sync::atomic::{AtomicU64, Ordering};
@@ -244,6 +245,8 @@ impl UncompressedDataManager {
         model_table_metadata: Arc<ModelTableMetadata>,
         current_batch_index: u64,
     ) -> Result<bool> {
+        let tag_hash = calculate_tag_hash(&model_table_metadata.name, &tag_values);
+
         debug!("Add data point at {timestamp} to uncompressed data buffer for {tag_hash}.");
 
         // Track if any buffers are spilled during ingestion so this information can be returned to
@@ -647,6 +650,18 @@ impl UncompressedDataManager {
     }
 }
 
+/// Calculate a unique hash for a specific combination of `table_name` and `tag_values`. The hash
+/// can be used to identify a specific multivariate time series during ingestion.
+fn calculate_tag_hash(table_name: &str, tag_values: &[String]) -> u64 {
+    let mut hash_data = tag_values.to_vec();
+    hash_data.push(table_name.to_string());
+
+    let mut hasher = DefaultHasher::new();
+    hasher.write(hash_data.join(";").as_bytes());
+
+    hasher.finish()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From cd5e9bcf7b22951c5be971922ab8595ee167042c Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 21:20:25 +0100
Subject: [PATCH 08/69] Remove methods to lookup and save tag hash metadata

---
 .../src/storage/uncompressed_data_manager.rs  |   6 -
 .../src/metadata/table_metadata_manager.rs    | 267 +-----------------
 2 files changed, 1 insertion(+), 272 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 7093c8d0a..b8b57c779 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -1301,12 +1301,6 @@ mod tests {
             .await
             .unwrap();
 
-        local_data_folder
-            .table_metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag".to_owned()])
-            .await
-            .unwrap();
-
         let memory_pool = Arc::new(MemoryPool::new(
             INGESTED_RESERVED_MEMORY_IN_BYTES,
             UNCOMPRESSED_RESERVED_MEMORY_IN_BYTES,
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index 385afdfa3..7939f64e8 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -18,19 +18,16 @@
 //! through this metadata manager, while it only supports a subset of the manager metadata Delta Lake.
 
 use std::collections::HashMap;
-use std::hash::{DefaultHasher, Hasher};
 use std::path::Path as StdPath;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Int16Array, Int64Array, RecordBatch,
+    Array, BinaryArray, BooleanArray, Float32Array, Int16Array, Int64Array,
     StringArray,
 };
 use arrow::datatypes::{DataType, Field, Schema};
 use dashmap::DashMap;
-use datafusion::catalog::TableProvider;
 use datafusion::common::{DFSchema, ToDFSchema};
-use datafusion::dataframe::DataFrame;
 use datafusion::logical_expr::lit;
 use datafusion::prelude::{col, SessionContext};
 use modelardb_common::test::ERROR_BOUND_ZERO;
@@ -664,128 +661,6 @@ impl TableMetadataManager {
         Ok(generated_columns)
     }
 
-    /// Return the tag hash for the given list of tag values either by retrieving it from a cache
-    /// or, if the combination of tag values is not in the cache, by computing a new hash. If the
-    /// hash is not in the cache, it is saved to the cache and persisted to the `model_table_tags`
-    /// table if it does not already contain it. If the hash was saved to the metadata Delta Lake,
-    /// also return [`true`]. If the `model_table_tags` table cannot be accessed,
-    /// [`ModelarDbStorageError`] is returned.
-    pub async fn lookup_or_compute_tag_hash(
-        &self,
-        model_table_metadata: &ModelTableMetadata,
-        tag_values: &[String],
-    ) -> Result<(u64, bool)> {
-        let cache_key = {
-            let mut cache_key_list = tag_values.to_vec();
-            cache_key_list.push(model_table_metadata.name.clone());
-
-            cache_key_list.join(";")
-        };
-
-        // Check if the tag hash is in the cache. If it is, retrieve it. If it is not, create a new
-        // one and save it both in the cache and in the metadata Delta Lake. There is a minor
-        // race condition because the check if a tag hash is in the cache and the addition of the
-        // hash is done without taking a lock on the tag_value_hashes. However, by allowing a hash
-        // to possibly be computed more than once, the cache can be used without an explicit lock.
-        if let Some(tag_hash) = self.tag_value_hashes.get(&cache_key) {
-            Ok((*tag_hash, false))
-        } else {
-            // Generate the 54-bit tag hash based on the tag values of the record batch and model
-            // table name.
-            let tag_hash = {
-                let mut hasher = DefaultHasher::new();
-                hasher.write(cache_key.as_bytes());
-
-                // The 64-bit hash is shifted to make the 10 least significant bits 0.
-                hasher.finish() << 10
-            };
-
-            // Save the tag hash in the metadata Delta Lake and in the cache.
-            let tag_hash_is_saved = self
-                .save_tag_hash_metadata(model_table_metadata, tag_hash, tag_values)
-                .await?;
-
-            self.tag_value_hashes.insert(cache_key, tag_hash);
-
-            Ok((tag_hash, tag_hash_is_saved))
-        }
-    }
-
-    /// Save the given tag hash metadata to the `model_table_tags` table if it does not already
-    /// contain it. If the table did not contain the tag hash, meaning it is a new tag combination,
-    /// return [`true`]. If the metadata cannot be inserted into `model_table_tags`,
-    /// [`ModelarDbStorageError`] is returned.
-    pub async fn save_tag_hash_metadata(
-        &self,
-        model_table_metadata: &ModelTableMetadata,
-        tag_hash: u64,
-        tag_values: &[String],
-    ) -> Result<bool> {
-        let table_name = model_table_metadata.name.as_str();
-        let tag_columns = &model_table_metadata
-            .tag_column_indices
-            .iter()
-            .map(|index| model_table_metadata.schema.field(*index).name().clone())
-            .collect::<Vec<String>>();
-
-        let signed_tag_hash = i64::from_ne_bytes(tag_hash.to_ne_bytes());
-
-        // Save the tag hash metadata in the model_table_tags table if it does not already contain it.
-        let mut table_name_tags_columns: Vec<ArrayRef> =
-            vec![Arc::new(Int64Array::from(vec![signed_tag_hash]))];
-
-        table_name_tags_columns.append(
-            &mut tag_values
-                .iter()
-                .map(|tag_value| Arc::new(StringArray::from(vec![tag_value.clone()])) as ArrayRef)
-                .collect::<Vec<ArrayRef>>(),
-        );
-
-        let source = self
-            .metadata_table_data_frame(&format!("{table_name}_tags"), table_name_tags_columns)
-            .await?;
-
-        let delta_ops = self
-            .delta_lake
-            .metadata_delta_ops(&format!("{table_name}_tags"))
-            .await?;
-
-        // Merge the tag hash metadata in the source DataFrame into the model_table_tags table.
-        // For each hash, if the hash is not already in the target table, insert the hash and the
-        // tag values from the source DataFrame.
-        let (_table, insert_into_tags_metrics) = delta_ops
-            .merge(source, col("target.hash").eq(col("source.hash")))
-            .with_source_alias("source")
-            .with_target_alias("target")
-            .when_not_matched_insert(|mut insert| {
-                for tag_column in tag_columns {
-                    insert = insert.set(tag_column, col(format!("source.{tag_column}")))
-                }
-
-                insert.set("hash", col("source.hash"))
-            })?
-            .await?;
-
-        Ok(insert_into_tags_metrics.num_target_rows_inserted > 0)
-    }
-
-    /// Return a [`DataFrame`] with the given `rows` for the metadata table with the given
-    /// `table_name`. If the table does not exist or the [`DataFrame`] cannot be created, return
-    /// [`ModelarDbStorageError`].
-    async fn metadata_table_data_frame(
-        &self,
-        table_name: &str,
-        rows: Vec<ArrayRef>,
-    ) -> Result<DataFrame> {
-        let table = self.delta_lake.metadata_delta_table(table_name).await?;
-
-        // TableProvider::schema(&table) is used instead of table.schema() because table.schema()
-        // returns the Delta Lake schema instead of the Apache Arrow DataFusion schema.
-        let batch = RecordBatch::try_new(TableProvider::schema(&table), rows)?;
-
-        Ok(self.session_context.read_batch(batch)?)
-    }
-
     /// Return a mapping from tag hashes to the tags in the columns with the names in
     /// `tag_column_names` for the time series in the model table with the name `model_table_name`.
     /// Returns a [`ModelarDbStorageError`] if the necessary data cannot be retrieved from the
@@ -1070,12 +945,6 @@ mod tests {
     async fn test_drop_model_table_metadata() {
         let (temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
 
-        let model_table_metadata = test::model_table_metadata();
-        metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
         metadata_manager
             .drop_table_metadata(test::MODEL_TABLE_NAME)
             .await
@@ -1144,12 +1013,6 @@ mod tests {
     async fn test_truncate_model_table_metadata() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
 
-        let model_table_metadata = test::model_table_metadata();
-        metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
         metadata_manager
             .truncate_table_metadata(test::MODEL_TABLE_NAME)
             .await
@@ -1321,128 +1184,6 @@ mod tests {
         );
     }
 
-    #[tokio::test]
-    async fn test_compute_new_tag_hash() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let model_table_metadata = test::model_table_metadata();
-        let (tag_hash_1, tag_hash_1_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
-        let (tag_hash_2, tag_hash_2_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag2".to_owned()])
-            .await
-            .unwrap();
-
-        assert!(tag_hash_1_is_saved && tag_hash_2_is_saved);
-
-        // The tag hashes should be saved in the cache.
-        assert_eq!(metadata_manager.tag_value_hashes.len(), 2);
-
-        // The tag hashes should be saved in the model_table_tags table.
-        let sql = format!("SELECT hash, tag FROM {}_tags", test::MODEL_TABLE_NAME);
-        let batch = sql_and_concat(&metadata_manager.session_context, &sql)
-            .await
-            .unwrap();
-
-        assert_eq!(
-            **batch.column(0),
-            Int64Array::from(vec![
-                i64::from_ne_bytes(tag_hash_2.to_ne_bytes()),
-                i64::from_ne_bytes(tag_hash_1.to_ne_bytes()),
-            ])
-        );
-        assert_eq!(**batch.column(1), StringArray::from(vec!["tag2", "tag1"]));
-    }
-
-    #[tokio::test]
-    async fn test_lookup_existing_tag_hash() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let model_table_metadata = test::model_table_metadata();
-        let (tag_hash_compute, tag_hash_compute_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
-        assert!(tag_hash_compute_is_saved);
-        assert_eq!(metadata_manager.tag_value_hashes.len(), 1);
-
-        // When getting the same tag hash again, it should be retrieved from the cache.
-        let (tag_hash_lookup, tag_hash_lookup_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
-        assert!(!tag_hash_lookup_is_saved);
-        assert_eq!(metadata_manager.tag_value_hashes.len(), 1);
-
-        assert_eq!(tag_hash_compute, tag_hash_lookup);
-    }
-
-    #[tokio::test]
-    async fn test_compute_tag_hash_with_invalid_tag_values() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let model_table_metadata = test::model_table_metadata();
-        let zero_tags_result = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &[])
-            .await;
-
-        let two_tags_result = metadata_manager
-            .lookup_or_compute_tag_hash(
-                &model_table_metadata,
-                &["tag1".to_owned(), "tag2".to_owned()],
-            )
-            .await;
-
-        assert!(zero_tags_result.is_err());
-        assert!(two_tags_result.is_err());
-
-        // The tag hashes should not be saved in either the cache or the metadata Delta Lake.
-        assert_eq!(metadata_manager.tag_value_hashes.len(), 0);
-
-        let sql = format!("SELECT hash FROM {}_tags", test::MODEL_TABLE_NAME);
-        let batch = sql_and_concat(&metadata_manager.session_context, &sql)
-            .await
-            .unwrap();
-
-        assert!(batch.column(0).is_empty());
-    }
-
-    #[tokio::test]
-    async fn test_mapping_from_hash_to_tags() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let model_table_metadata = test::model_table_metadata();
-        let (tag_hash_1, _tag_hash_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
-        let (tag_hash_2, _tag_hash_is_saved) = metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag2".to_owned()])
-            .await
-            .unwrap();
-
-        let mapping_from_hash_to_tags = metadata_manager
-            .mapping_from_hash_to_tags(test::MODEL_TABLE_NAME, &["tag"])
-            .await
-            .unwrap();
-
-        assert_eq!(mapping_from_hash_to_tags.len(), 2);
-        assert_eq!(
-            mapping_from_hash_to_tags.get(&tag_hash_1).unwrap(),
-            &vec!["tag1".to_owned()]
-        );
-        assert_eq!(
-            mapping_from_hash_to_tags.get(&tag_hash_2).unwrap(),
-            &vec!["tag2".to_owned()]
-        );
-    }
-
     #[tokio::test]
     async fn test_mapping_from_hash_to_tags_with_missing_model_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
@@ -1458,12 +1199,6 @@ mod tests {
     async fn test_mapping_from_hash_to_tags_with_invalid_tag_column() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
 
-        let model_table_metadata = test::model_table_metadata();
-        metadata_manager
-            .lookup_or_compute_tag_hash(&model_table_metadata, &["tag1".to_owned()])
-            .await
-            .unwrap();
-
         let result = metadata_manager
             .mapping_from_hash_to_tags(test::MODEL_TABLE_NAME, &["invalid_tag"])
             .await;

From c4d0c9086dbe3828d8c7650690c8393d5213c856 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 21:25:26 +0100
Subject: [PATCH 09/69] Remove tag cache from table metadata manager

---
 .../src/metadata/table_metadata_manager.rs    | 44 +++----------------
 1 file changed, 6 insertions(+), 38 deletions(-)

diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index 7939f64e8..103c27b0a 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -22,11 +22,9 @@ use std::path::Path as StdPath;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, BinaryArray, BooleanArray, Float32Array, Int16Array, Int64Array,
-    StringArray,
+    Array, BinaryArray, BooleanArray, Float32Array, Int16Array, Int64Array, StringArray,
 };
 use arrow::datatypes::{DataType, Field, Schema};
-use dashmap::DashMap;
 use datafusion::common::{DFSchema, ToDFSchema};
 use datafusion::logical_expr::lit;
 use datafusion::prelude::{col, SessionContext};
@@ -52,8 +50,6 @@ enum TableType {
 pub struct TableMetadataManager {
     /// Delta Lake with functionality to read and write to and from the metadata tables.
     delta_lake: DeltaLake,
-    /// Cache of tag value hashes used to signify when to persist new unsaved tag combinations.
-    tag_value_hashes: DashMap<String, u64>,
     /// Session context used to query the metadata Delta Lake tables using Apache DataFusion.
     session_context: Arc<SessionContext>,
 }
@@ -68,7 +64,6 @@ impl TableMetadataManager {
     ) -> Result<Self> {
         let table_metadata_manager = Self {
             delta_lake: DeltaLake::try_from_local_path(folder_path)?,
-            tag_value_hashes: DashMap::new(),
             session_context: maybe_session_context
                 .unwrap_or_else(|| Arc::new(SessionContext::new())),
         };
@@ -90,7 +85,6 @@ impl TableMetadataManager {
     ) -> Result<Self> {
         let table_metadata_manager = Self {
             delta_lake: DeltaLake::try_remote_from_connection_info(connection_info)?,
-            tag_value_hashes: DashMap::new(),
             session_context: maybe_session_context
                 .unwrap_or_else(|| Arc::new(SessionContext::new())),
         };
@@ -118,7 +112,6 @@ impl TableMetadataManager {
                 access_key_id,
                 secret_access_key,
             )?,
-            tag_value_hashes: DashMap::new(),
             session_context: Arc::new(SessionContext::new()),
         };
 
@@ -143,7 +136,6 @@ impl TableMetadataManager {
                 access_key,
                 container_name,
             )?,
-            tag_value_hashes: DashMap::new(),
             session_context: Arc::new(SessionContext::new()),
         };
 
@@ -429,9 +421,9 @@ impl TableMetadataManager {
 
     /// Drop the metadata for the model table with `table_name` from the metadata Delta Lake.
     /// This includes dropping the tags table for the model table, deleting a row from the
-    /// `model_table_metadata` table, deleting a row from the `model_table_field_columns` table for
-    /// each field column, and deleting the tag metadata from the tag cache. If the metadata could
-    /// not be dropped, [`ModelarDbStorageError`] is returned.
+    /// `model_table_metadata` table, and deleting a row from the `model_table_field_columns` table
+    /// for each field column. If the metadata could not be dropped, [`ModelarDbStorageError`] is
+    /// returned.
     async fn drop_model_table_metadata(&self, table_name: &str) -> Result<()> {
         // Drop and deregister the model_table_name_tags table.
         let tags_table_name = format!("{table_name}_tags");
@@ -457,9 +449,6 @@ impl TableMetadataManager {
             .with_predicate(col("table_name").eq(lit(table_name)))
             .await?;
 
-        // Delete the tag hash metadata from the metadata Delta Lake and the tag cache.
-        self.delete_tag_hash_metadata(table_name).await?;
-
         Ok(())
     }
 
@@ -481,9 +470,8 @@ impl TableMetadataManager {
     }
 
     /// Truncate the metadata for the model table with `table_name` from the metadata Delta Lake.
-    /// This includes truncating the tags table for the model table and deleting the tag metadata
-    /// from the tag cache. If the metadata could not be truncated, [`ModelarDbStorageError`] is
-    /// returned.
+    /// This includes truncating the tags table for the model table. If the metadata could not be
+    /// truncated, [`ModelarDbStorageError`] is returned.
     async fn truncate_model_table_metadata(&self, table_name: &str) -> Result<()> {
         // Truncate the model_table_name_tags table.
         self.delta_lake
@@ -492,20 +480,6 @@ impl TableMetadataManager {
             .delete()
             .await?;
 
-        // Delete the tag hash metadata from the metadata Delta Lake and the tag cache.
-        self.delete_tag_hash_metadata(table_name).await?;
-
-        Ok(())
-    }
-
-    /// Delete the tag hash metadata for the model table with `table_name` from the tag cache. If
-    /// the metadata could not be deleted, [`ModelarDbStorageError`] is returned.
-    async fn delete_tag_hash_metadata(&self, table_name: &str) -> Result<()> {
-        // Delete the tag metadata from the tag cache. The table name is always the last part of
-        // the cache key.
-        self.tag_value_hashes
-            .retain(|key, _| key.split(';').last() != Some(table_name));
-
         Ok(())
     }
 
@@ -973,9 +947,6 @@ mod tests {
             .unwrap();
 
         assert_eq!(batch.num_rows(), 0);
-
-        // Verify that the tag cache was cleared.
-        assert!(metadata_manager.tag_value_hashes.is_empty());
     }
 
     #[tokio::test]
@@ -1025,9 +996,6 @@ mod tests {
             .unwrap();
 
         assert_eq!(batch.num_rows(), 0);
-
-        // Verify that the tag cache was cleared.
-        assert!(metadata_manager.tag_value_hashes.is_empty());
     }
 
     #[tokio::test]

From 92831e310887c4ff2473439761bb9d8b22762766 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 21:40:18 +0100
Subject: [PATCH 10/69] Remove mapping_from_hash_to_tags()

---
 .../src/metadata/table_metadata_manager.rs    | 73 +------------------
 1 file changed, 1 insertion(+), 72 deletions(-)

diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index 103c27b0a..bff1942a1 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -17,13 +17,10 @@
 //! and the manager metadata Delta Lake. Note that the entire server metadata Delta Lake can be accessed
 //! through this metadata manager, while it only supports a subset of the manager metadata Delta Lake.
 
-use std::collections::HashMap;
 use std::path::Path as StdPath;
 use std::sync::Arc;
 
-use arrow::array::{
-    Array, BinaryArray, BooleanArray, Float32Array, Int16Array, Int64Array, StringArray,
-};
+use arrow::array::{Array, BinaryArray, BooleanArray, Float32Array, Int16Array, StringArray};
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::common::{DFSchema, ToDFSchema};
 use datafusion::logical_expr::lit;
@@ -634,52 +631,6 @@ impl TableMetadataManager {
 
         Ok(generated_columns)
     }
-
-    /// Return a mapping from tag hashes to the tags in the columns with the names in
-    /// `tag_column_names` for the time series in the model table with the name `model_table_name`.
-    /// Returns a [`ModelarDbStorageError`] if the necessary data cannot be retrieved from the
-    /// metadata Delta Lake.
-    pub async fn mapping_from_hash_to_tags(
-        &self,
-        model_table_name: &str,
-        tag_column_names: &[&str],
-    ) -> Result<HashMap<u64, Vec<String>>> {
-        // Return an empty HashMap if no tag column names are passed to keep the signature simple.
-        if tag_column_names.is_empty() {
-            return Ok(HashMap::new());
-        }
-
-        let sql = format!(
-            "SELECT hash, {} FROM {model_table_name}_tags",
-            tag_column_names.join(","),
-        );
-        let batch = sql_and_concat(&self.session_context, &sql).await?;
-
-        let hash_array = modelardb_types::array!(batch, 0, Int64Array);
-
-        // For each tag column, get the corresponding column array.
-        let tag_arrays: Vec<&StringArray> = tag_column_names
-            .iter()
-            .enumerate()
-            .map(|(index, _tag_column)| modelardb_types::array!(batch, index + 1, StringArray))
-            .collect();
-
-        let mut hash_to_tags = HashMap::new();
-        for row_index in 0..batch.num_rows() {
-            let signed_tag_hash = hash_array.value(row_index);
-            let tag_hash = u64::from_ne_bytes(signed_tag_hash.to_ne_bytes());
-
-            // For each tag array, add the row index value to the tags for this tag hash.
-            let tags: Vec<String> = tag_arrays
-                .iter()
-                .map(|tag_array| tag_array.value(row_index).to_owned())
-                .collect();
-
-            hash_to_tags.insert(tag_hash, tags);
-        }
-
-        Ok(hash_to_tags)
-    }
 }
 
 #[cfg(test)]
@@ -1152,28 +1103,6 @@ mod tests {
         );
     }
 
-    #[tokio::test]
-    async fn test_mapping_from_hash_to_tags_with_missing_model_table() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let result = metadata_manager
-            .mapping_from_hash_to_tags("missing_table", &["tag"])
-            .await;
-
-        assert!(result.is_err());
-    }
-
-    #[tokio::test]
-    async fn test_mapping_from_hash_to_tags_with_invalid_tag_column() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let result = metadata_manager
-            .mapping_from_hash_to_tags(test::MODEL_TABLE_NAME, &["invalid_tag"])
-            .await;
-
-        assert!(result.is_err());
-    }
-
     async fn create_metadata_manager_and_save_model_table() -> (TempDir, TableMetadataManager) {
         let temp_dir = tempfile::tempdir().unwrap();
         let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path(), None)

From 13e3ad35cface3cd19e9fc53b015f0c59f5e9273 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 21:48:04 +0100
Subject: [PATCH 11/69] Remove model_table_tags table from metadata Delta Lake

---
 crates/modelardb_server/src/context.rs        |   7 --
 .../src/metadata/table_metadata_manager.rs    | 114 +-----------------
 2 files changed, 6 insertions(+), 115 deletions(-)

diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index 9d2c354b0..88a7c35d9 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -184,13 +184,6 @@ impl Context {
             .save_model_table_metadata(model_table_metadata)
             .await?;
 
-        // Register the metadata table needed for querying the model table if it is not already
-        // registered. The tags table is already registered if the query data folder and local data
-        // folder is the same.
-        query_folder_table_metadata_manager
-            .register_tags_table(&model_table_metadata.name)
-            .await?;
-
         info!("Created model table '{}'.", model_table_metadata.name);
 
         Ok(())
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index bff1942a1..55247b1f8 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -202,29 +202,6 @@ impl TableMetadataManager {
             delta_table,
         )?;
 
-        // Register the model_table_name_tags table for each model table.
-        for model_table_name in self.model_table_names().await? {
-            self.register_tags_table(&model_table_name).await?;
-        }
-
-        Ok(())
-    }
-
-    /// Register the tags table for the model table with `model_table_name` if it is not already
-    /// registered. The tags table is required to be registered to allow querying a model table.
-    /// If the tags table could not be registered, [`ModelarDbStorageError`] is returned.
-    pub async fn register_tags_table(&self, model_table_name: &str) -> Result<()> {
-        let tags_table_name = format!("{}_tags", model_table_name);
-
-        let delta_table = self
-            .delta_lake
-            .metadata_delta_table(&tags_table_name)
-            .await?;
-
-        if !self.session_context.table_exist(&tags_table_name)? {
-            register_metadata_table(&self.session_context, &tags_table_name, delta_table)?;
-        }
-
         Ok(())
     }
 
@@ -299,33 +276,13 @@ impl TableMetadataManager {
         Ok(())
     }
 
-    /// Save the created model table to the metadata Delta Lake. This includes creating a tags table
-    /// for the model table, adding a row to the `model_table_metadata` table, and adding a row to
-    /// the `model_table_field_columns` table for each field column.
+    /// Save the created model table to the metadata Delta Lake. This includes adding a row to the
+    /// `model_table_metadata` table and adding a row to the `model_table_field_columns` table for
+    /// each field column.
     pub async fn save_model_table_metadata(
         &self,
         model_table_metadata: &ModelTableMetadata,
     ) -> Result<()> {
-        // Create and register a table_name_tags table to save the 54-bit tag hashes when ingesting data.
-        let mut table_name_tags_columns = vec![Field::new("hash", DataType::Int64, false)];
-
-        // Add a column definition for each tag column in the query schema.
-        table_name_tags_columns.append(
-            &mut model_table_metadata
-                .tag_column_indices
-                .iter()
-                .map(|index| model_table_metadata.query_schema.field(*index).clone())
-                .collect::<Vec<Field>>(),
-        );
-
-        let tags_table_name = format!("{}_tags", model_table_metadata.name);
-        let delta_table = self
-            .delta_lake
-            .create_metadata_table(&tags_table_name, &Schema::new(table_name_tags_columns))
-            .await?;
-
-        register_metadata_table(&self.session_context, &tags_table_name, delta_table)?;
-
         // Convert the query schema to bytes, so it can be saved in the metadata Delta Lake.
         let query_schema_bytes = try_convert_schema_to_bytes(&model_table_metadata.query_schema)?;
 
@@ -417,19 +374,10 @@ impl TableMetadataManager {
     }
 
     /// Drop the metadata for the model table with `table_name` from the metadata Delta Lake.
-    /// This includes dropping the tags table for the model table, deleting a row from the
-    /// `model_table_metadata` table, and deleting a row from the `model_table_field_columns` table
-    /// for each field column. If the metadata could not be dropped, [`ModelarDbStorageError`] is
-    /// returned.
+    /// This includes deleting a row from the `model_table_metadata` table and deleting a row from
+    /// the `model_table_field_columns` table for each field column. If the metadata could not be
+    /// dropped, [`ModelarDbStorageError`] is returned.
     async fn drop_model_table_metadata(&self, table_name: &str) -> Result<()> {
-        // Drop and deregister the model_table_name_tags table.
-        let tags_table_name = format!("{table_name}_tags");
-        self.delta_lake
-            .drop_metadata_table(&tags_table_name)
-            .await?;
-
-        self.session_context.deregister_table(&tags_table_name)?;
-
         // Delete the table metadata from the model_table_metadata table.
         self.delta_lake
             .metadata_delta_ops("model_table_metadata")
@@ -675,44 +623,6 @@ mod tests {
             .is_ok());
     }
 
-    #[tokio::test]
-    async fn test_register_tags_table() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-        let session_context = &metadata_manager.session_context;
-
-        let tags_table_name = format!("{}_tags", test::MODEL_TABLE_NAME);
-        session_context.deregister_table(&tags_table_name).unwrap();
-        assert!(!session_context.table_exist(&tags_table_name).unwrap());
-
-        metadata_manager
-            .register_tags_table(test::MODEL_TABLE_NAME)
-            .await
-            .unwrap();
-
-        assert!(session_context.table_exist(&tags_table_name).unwrap());
-
-        // If the table is already registered, it should not be registered again.
-        let result = metadata_manager
-            .register_tags_table(test::MODEL_TABLE_NAME)
-            .await;
-
-        assert!(result.is_ok());
-        assert!(session_context.table_exist(&tags_table_name).unwrap());
-    }
-
-    #[tokio::test]
-    async fn test_register_missing_model_table_tags_table() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        let result = metadata_manager.register_tags_table("missing_table").await;
-
-        assert!(result.is_err());
-        assert!(!metadata_manager
-            .session_context
-            .table_exist("missing_table_tags")
-            .unwrap());
-    }
-
     #[tokio::test]
     async fn test_normal_table_is_normal_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await;
@@ -802,10 +712,6 @@ mod tests {
     async fn test_save_model_table_metadata() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
 
-        // Verify that the table was created and has the expected columns.
-        let sql = format!("SELECT hash, tag FROM {}_tags", test::MODEL_TABLE_NAME);
-        assert!(metadata_manager.session_context.sql(&sql).await.is_ok());
-
         // Check that a row has been added to the model_table_metadata table.
         let sql = "SELECT table_name, query_schema FROM model_table_metadata";
         let batch = sql_and_concat(&metadata_manager.session_context, sql)
@@ -875,14 +781,6 @@ mod tests {
             .await
             .unwrap();
 
-        // Verify that the tags table was deleted from the Delta Lake.
-        let tags_table_name = format!("{}_tags", test::MODEL_TABLE_NAME);
-        assert!(!temp_dir
-            .path()
-            .join("metadata")
-            .join(tags_table_name)
-            .exists());
-
         // Verify that the model table was deleted from the model_table_metadata table.
         let sql = "SELECT table_name FROM model_table_metadata";
         let batch = sql_and_concat(&metadata_manager.session_context, sql)

From ec961f6923709bf55a0dd02a8deab78d2d956778 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 17 Feb 2025 22:02:08 +0100
Subject: [PATCH 12/69] Remove method to truncate table metadata

---
 crates/modelardb_manager/src/remote.rs        | 20 ++---
 crates/modelardb_server/src/context.rs        | 11 +--
 .../src/metadata/table_metadata_manager.rs    | 80 -------------------
 3 files changed, 10 insertions(+), 101 deletions(-)

diff --git a/crates/modelardb_manager/src/remote.rs b/crates/modelardb_manager/src/remote.rs
index b52746353..ca0ef8e0b 100644
--- a/crates/modelardb_manager/src/remote.rs
+++ b/crates/modelardb_manager/src/remote.rs
@@ -282,19 +282,15 @@ impl FlightServiceHandler {
         Ok(())
     }
 
-    /// Truncate the table in the metadata Delta Lake, the data Delta Lake, and in each node
-    /// controlled by the manager. If the table does not exist or the table cannot be truncated in
-    /// the remote data folder and in each node, return [`Status`].
+    /// Truncate the table in the data Delta Lake and in each node controlled by the manager. If the
+    /// table does not exist or the table cannot be truncated in the remote data folder and in each
+    /// node, return [`Status`].
     async fn truncate_cluster_table(&self, table_name: &str) -> StdResult<(), Status> {
-        // Truncate the table in the remote data folder metadata Delta Lake. This will return an
-        // error if the table does not exist.
-        self.context
-            .remote_data_folder
-            .metadata_manager
-            .table_metadata_manager
-            .truncate_table_metadata(table_name)
-            .await
-            .map_err(error_to_status_internal)?;
+        if self.check_if_table_exists(table_name).await.is_ok() {
+            return Err(Status::invalid_argument(format!(
+                "Table with name '{table_name}' does not exist.",
+            )));
+        }
 
         // Truncate the table in the remote data folder data Delta lake.
         self.context
diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index 88a7c35d9..f5417b942 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -329,8 +329,8 @@ impl Context {
     }
 
     /// Delete all data from the table with `table_name` if it exists. The table data is deleted
-    /// from the storage engine, metadata Delta Lake, and data Delta Lake. If the table does not
-    /// exist or if it could not be truncated, [`ModelarDbServerError`] is returned.
+    /// from the storage engine and data Delta Lake. If the table does not exist or if it could not
+    /// be truncated, [`ModelarDbServerError`] is returned.
     pub async fn truncate_table(&self, table_name: &str) -> Result<()> {
         // Deleting the table from the storage engine does not require the table to exist, so the
         // table is checked first.
@@ -342,13 +342,6 @@ impl Context {
 
         self.drop_table_from_storage_engine(table_name).await?;
 
-        // Delete the table metadata from the metadata Delta Lake.
-        self.data_folders
-            .local_data_folder
-            .table_metadata_manager
-            .truncate_table_metadata(table_name)
-            .await?;
-
         // Delete the table data from the data Delta Lake.
         self.data_folders
             .local_data_folder
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index 55247b1f8..b73c923d8 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -397,37 +397,6 @@ impl TableMetadataManager {
         Ok(())
     }
 
-    /// Depending on the type of the table with `table_name`, truncate either the normal table
-    /// metadata or the model table metadata from the metadata Delta Lake. Note that if truncating
-    /// the metadata of a normal table, the metadata Delta Lake is unaffected, but it is allowed to
-    /// keep the interface consistent. If the table does not exist or the metadata could not be
-    /// truncated, [`ModelarDbStorageError`] is returned.
-    pub async fn truncate_table_metadata(&self, table_name: &str) -> Result<()> {
-        if self.is_normal_table(table_name).await? {
-            Ok(())
-        } else if self.is_model_table(table_name).await? {
-            self.truncate_model_table_metadata(table_name).await
-        } else {
-            Err(ModelarDbStorageError::InvalidArgument(format!(
-                "Table with name '{table_name}' does not exist."
-            )))
-        }
-    }
-
-    /// Truncate the metadata for the model table with `table_name` from the metadata Delta Lake.
-    /// This includes truncating the tags table for the model table. If the metadata could not be
-    /// truncated, [`ModelarDbStorageError`] is returned.
-    async fn truncate_model_table_metadata(&self, table_name: &str) -> Result<()> {
-        // Truncate the model_table_name_tags table.
-        self.delta_lake
-            .metadata_delta_ops(&format!("{table_name}_tags"))
-            .await?
-            .delete()
-            .await?;
-
-        Ok(())
-    }
-
     /// Return the [`ModelTableMetadata`] of each model table currently in the metadata Delta Lake.
     /// If the [`ModelTableMetadata`] cannot be retrieved, [`ModelarDbStorageError`] is returned.
     pub async fn model_table_metadata(&self) -> Result<Vec<Arc<ModelTableMetadata>>> {
@@ -808,55 +777,6 @@ mod tests {
             .is_err());
     }
 
-    #[tokio::test]
-    async fn test_truncate_normal_table_metadata() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await;
-
-        metadata_manager
-            .truncate_table_metadata("normal_table_1")
-            .await
-            .unwrap();
-
-        // Verify that the metadata Delta Lake was left unchanged.
-        let sql = "SELECT table_name FROM normal_table_metadata";
-        let batch = sql_and_concat(&metadata_manager.session_context, sql)
-            .await
-            .unwrap();
-
-        assert_eq!(
-            **batch.column(0),
-            StringArray::from(vec!["normal_table_2", "normal_table_1"])
-        );
-    }
-
-    #[tokio::test]
-    async fn test_truncate_model_table_metadata() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-
-        metadata_manager
-            .truncate_table_metadata(test::MODEL_TABLE_NAME)
-            .await
-            .unwrap();
-
-        // Verify that the tags table was truncated.
-        let sql = format!("SELECT hash FROM {}_tags", test::MODEL_TABLE_NAME);
-        let batch = sql_and_concat(&metadata_manager.session_context, &sql)
-            .await
-            .unwrap();
-
-        assert_eq!(batch.num_rows(), 0);
-    }
-
-    #[tokio::test]
-    async fn test_truncate_table_metadata_for_missing_table() {
-        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await;
-
-        assert!(metadata_manager
-            .truncate_table_metadata("missing_table")
-            .await
-            .is_err());
-    }
-
     async fn create_metadata_manager_and_save_normal_tables() -> (TempDir, TableMetadataManager) {
         let temp_dir = tempfile::tempdir().unwrap();
         let metadata_manager = TableMetadataManager::try_from_path(temp_dir.path(), None)

From e7e1aed192478fea5b1bf3a9e8e87daa255f67fa Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 18 Feb 2025 23:44:02 +0100
Subject: [PATCH 13/69] Remove separate schema for uncompressed data

---
 .../src/metadata/model_table_metadata.rs      | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index 5100bfb21..8ec3f36a8 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -43,8 +43,6 @@ pub struct ModelTableMetadata {
     pub tag_column_indices: Vec<usize>,
     /// Error bounds of the columns in `schema`. It can only be non-zero for field columns.
     pub error_bounds: Vec<ErrorBound>,
-    /// Schema of the data that can be compressed in the model table.
-    pub uncompressed_schema: Arc<Schema>,
     /// Schema of the data that can be read from the model table.
     pub query_schema: Arc<Schema>,
     /// Projection that changes `query_schema` to `schema`.
@@ -114,14 +112,6 @@ impl ModelTableMetadata {
                 query_schema.clone()
             };
 
-        // Schema containing timestamps and stored field columns for use by uncompressed buffers.
-        let uncompressed_schema = Arc::new(schema_without_generated.project(
-            &compute_indices_of_columns_without_data_type(
-                &schema_without_generated,
-                DataType::Utf8,
-            ),
-        )?);
-
         // A model table must only contain one stored timestamp column, one or more stored field
         // columns, zero or more generated field columns, and zero or more stored tag columns.
         let timestamp_column_indices = compute_indices_of_columns_with_data_type(
@@ -156,7 +146,6 @@ impl ModelTableMetadata {
             field_column_indices,
             tag_column_indices,
             error_bounds: error_bounds_without_generated,
-            uncompressed_schema,
             query_schema,
             query_schema_to_schema: field_indices_without_generated,
             generated_columns,
@@ -187,17 +176,6 @@ fn compute_indices_of_columns_with_data_type(schema: &Schema, data_type: DataTyp
         .collect()
 }
 
-/// Compute the indices of all columns in `schema` without `data_type`.
-fn compute_indices_of_columns_without_data_type(
-    schema: &Schema,
-    data_type: DataType,
-) -> Vec<usize> {
-    let fields = schema.fields();
-    (0..fields.len())
-        .filter(|index| *fields[*index].data_type() != data_type)
-        .collect()
-}
-
 /// Column that is generated by a [`Expr`] using zero or more stored columns as input.
 #[derive(Clone, Debug, PartialEq)]
 pub struct GeneratedColumn {

From be0103567a3e46b403a4d2db8d8fe9a22f85ce5d Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 18 Feb 2025 23:44:25 +0100
Subject: [PATCH 14/69] Include tag values in uncompressed data buffer data

---
 .../src/storage/uncompressed_data_buffer.rs   | 69 +++++++++++++++----
 .../src/storage/uncompressed_data_manager.rs  | 11 +--
 2 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 14329ae01..c3aa6597b 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -22,7 +22,7 @@ use std::fmt::{Debug, Formatter, Result as FmtResult};
 use std::mem;
 use std::sync::Arc;
 
-use datafusion::arrow::array::{Array, ArrayBuilder};
+use datafusion::arrow::array::{Array, ArrayBuilder, StringArray};
 use datafusion::arrow::compute;
 use datafusion::arrow::record_batch::RecordBatch;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
@@ -82,11 +82,14 @@ pub(super) struct UncompressedInMemoryDataBuffer {
     timestamps: TimestampBuilder,
     /// Builders for each stored field that float values are appended to.
     values: Vec<ValueBuilder>,
+    /// The tag values for the time series the buffer stores data points for.
+    tag_values: Vec<String>,
 }
 
 impl UncompressedInMemoryDataBuffer {
     pub(super) fn new(
         tag_hash: u64,
+        tag_values: Vec<String>,
         model_table_metadata: Arc<ModelTableMetadata>,
         current_batch_index: u64,
     ) -> Self {
@@ -101,6 +104,7 @@ impl UncompressedInMemoryDataBuffer {
             updated_by_batch_index: current_batch_index,
             timestamps,
             values,
+            tag_values,
         }
     }
 
@@ -153,23 +157,40 @@ impl UncompressedInMemoryDataBuffer {
 
     /// Finish the array builders and return the data in a [`RecordBatch`] sorted by time.
     pub(super) async fn record_batch(&mut self) -> Result<RecordBatch> {
+        let buffer_length = self.len();
         let timestamps = self.timestamps.finish();
 
         // lexsort() is not used as it is unclear in what order it sorts multiple arrays, instead a
         // combination of sort_to_indices() and take(), like how lexsort() is implemented, is used.
         let sorted_indices = compute::sort_to_indices(&timestamps, None, None)?;
 
-        let mut columns = Vec::with_capacity(1 + self.values.len());
-        columns.push(compute::take(&timestamps, &sorted_indices, None)?);
-        for value in &mut self.values {
-            columns.push(compute::take(&value.finish(), &sorted_indices, None)?);
+        let mut field_column_index = 0;
+        let mut tag_column_index = 0;
+        let mut columns = Vec::with_capacity(self.model_table_metadata.schema.fields().len());
+
+        // Iterate over the column indices in the schema and add the sorted data to the columns.
+        for column_index in 0..self.model_table_metadata.schema.fields().len() {
+            if self.model_table_metadata.is_timestamp(column_index) {
+                columns.push(compute::take(&timestamps, &sorted_indices, None)?);
+            } else if self.model_table_metadata.is_tag(column_index) {
+                // The tag value is the same for each data point so it is not sorted.
+                let tag_value = self.tag_values[tag_column_index].clone();
+                let tag_array: StringArray = std::iter::repeat(Some(tag_value))
+                    .take(buffer_length)
+                    .collect();
+                columns.push(Arc::new(tag_array));
+
+                tag_column_index += 1;
+            } else {
+                let values = &self.values[field_column_index].finish();
+                columns.push(compute::take(&values, &sorted_indices, None)?);
+
+                field_column_index += 1;
+            }
         }
 
-        RecordBatch::try_new(
-            self.model_table_metadata.uncompressed_schema.clone(),
-            columns,
-        )
-        .map_err(|error| error.into())
+        RecordBatch::try_new(self.model_table_metadata.schema.clone(), columns)
+            .map_err(|error| error.into())
     }
 
     /// Return the tag hash that identifies the time series the buffer stores data points from.
@@ -255,7 +276,8 @@ impl UncompressedOnDiskDataBuffer {
         data_points: RecordBatch,
     ) -> Result<Self> {
         // Create a path that uses the first timestamp as the filename.
-        let timestamps = modelardb_types::array!(data_points, 0, TimestampArray);
+        let timestamp_index = model_table_metadata.timestamp_column_index;
+        let timestamps = modelardb_types::array!(data_points, timestamp_index, TimestampArray);
         let file_path = spilled_buffer_file_path(
             &model_table_metadata.name,
             tag_hash,
@@ -343,13 +365,32 @@ impl UncompressedOnDiskDataBuffer {
     ) -> Result<UncompressedInMemoryDataBuffer> {
         let data_points = self.record_batch().await?;
 
-        let timestamp_column_array = modelardb_types::array!(data_points, 0, TimestampArray);
-        let field_column_arrays: Vec<_> = (1..data_points.num_columns())
-            .map(|index| modelardb_types::array!(data_points, index, ValueArray))
+        let timestamp_index = self.model_table_metadata.timestamp_column_index;
+        let timestamp_column_array =
+            modelardb_types::array!(data_points, timestamp_index, TimestampArray);
+
+        let field_column_arrays: Vec<_> = self
+            .model_table_metadata
+            .field_column_indices
+            .iter()
+            .map(|index| modelardb_types::array!(data_points, *index, ValueArray))
+            .collect();
+
+        let tag_column_arrays: Vec<_> = self
+            .model_table_metadata
+            .tag_column_indices
+            .iter()
+            .map(|index| modelardb_types::array!(data_points, *index, StringArray))
+            .collect();
+
+        let tag_values: Vec<String> = tag_column_arrays
+            .iter()
+            .map(|array| array.value(0).to_string())
             .collect();
 
         let mut in_memory_buffer = UncompressedInMemoryDataBuffer::new(
             self.tag_hash,
+            tag_values,
             self.model_table_metadata.clone(),
             current_batch_index,
         );
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index b8b57c779..ff353a718 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -313,6 +313,7 @@ impl UncompressedDataManager {
 
                 let mut uncompressed_in_memory_data_buffer = UncompressedInMemoryDataBuffer::new(
                     tag_hash,
+                    tag_values,
                     model_table_metadata,
                     current_batch_index,
                 );
@@ -591,16 +592,16 @@ impl UncompressedDataManager {
             };
 
         let data_points = maybe_data_points?;
-        let uncompressed_timestamps = modelardb_types::array!(data_points, 0, TimestampArray);
+        let timestamp_index = model_table_metadata.timestamp_column_index;
+        let uncompressed_timestamps =
+            modelardb_types::array!(data_points, timestamp_index, TimestampArray);
 
         let compressed_segments = model_table_metadata
             .field_column_indices
             .iter()
-            .enumerate()
-            .map(|(value_index, field_column_index)| {
-                // One is added to value_index as the first array contains the timestamps.
+            .map(|field_column_index| {
                 let uncompressed_values =
-                    modelardb_types::array!(data_points, value_index + 1, ValueArray);
+                    modelardb_types::array!(data_points, *field_column_index, ValueArray);
                 let univariate_id = tag_hash | (*field_column_index as u64);
                 let error_bound = model_table_metadata.error_bounds[*field_column_index];
 

From 9ada71e0027d74a2240fc30d1d0bbb97090876c7 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 00:27:38 +0100
Subject: [PATCH 15/69] Add a test method to get uncompressed data for a model
 table

---
 .../src/storage/uncompressed_data_manager.rs  | 29 +++----------------
 crates/modelardb_storage/src/test.rs          | 27 +++++++++++++++--
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index ff353a718..4f622d18d 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -670,7 +670,6 @@ mod tests {
     use std::sync::Arc;
 
     use datafusion::arrow::array::StringBuilder;
-    use datafusion::arrow::datatypes::SchemaRef;
     use datafusion::arrow::record_batch::RecordBatch;
     use modelardb_common::test::{
         COMPRESSED_RESERVED_MEMORY_IN_BYTES, INGESTED_RESERVED_MEMORY_IN_BYTES,
@@ -714,7 +713,7 @@ mod tests {
 
         // Ingest a single data point and sleep to allow the ingestion thread to finish.
         let mut storage_engine = context.storage_engine.write().await;
-        let data = uncompressed_data(1, model_table_metadata.schema.clone());
+        let data = test::uncompressed_model_table_record_batch(1);
 
         storage_engine
             .insert_data_points(model_table_metadata, data)
@@ -759,7 +758,7 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (data_manager, model_table_metadata) = create_managers(&temp_dir).await;
 
-        let data = uncompressed_data(1, model_table_metadata.schema.clone());
+        let data = test::uncompressed_model_table_record_batch(1);
         let ingested_data_buffer = IngestedDataBuffer::new(model_table_metadata, data);
 
         data_manager
@@ -776,7 +775,7 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (data_manager, model_table_metadata) = create_managers(&temp_dir).await;
 
-        let data = uncompressed_data(2, model_table_metadata.schema.clone());
+        let data = test::uncompressed_model_table_record_batch(2);
         let ingested_data_buffer = IngestedDataBuffer::new(model_table_metadata, data);
 
         data_manager
@@ -793,7 +792,7 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (data_manager, model_table_metadata) = create_managers(&temp_dir).await;
 
-        let data = uncompressed_data(2, model_table_metadata.schema.clone());
+        let data = test::uncompressed_model_table_record_batch(2);
         let data_size = data.get_array_memory_size();
 
         // Simulate StorageEngine decrementing ingested memory when receiving ingested data.
@@ -816,26 +815,6 @@ mod tests {
         );
     }
 
-    /// Create a [`RecordBatch`] with data that resembles uncompressed data with a single tag and two
-    /// field columns. The returned data has `row_count` rows, with a different tag for each row.
-    /// Also create model table metadata for a model table that matches the created data.
-    fn uncompressed_data(row_count: usize, schema: SchemaRef) -> RecordBatch {
-        let tags: Vec<String> = (0..row_count).map(|tag| tag.to_string()).collect();
-        let timestamps: Vec<Timestamp> = (0..row_count).map(|ts| ts as Timestamp).collect();
-        let values: Vec<Value> = (0..row_count).map(|value| value as Value).collect();
-
-        RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(TimestampArray::from(timestamps)),
-                Arc::new(ValueArray::from(values.clone())),
-                Arc::new(ValueArray::from(values)),
-                Arc::new(StringArray::from(tags)),
-            ],
-        )
-        .unwrap()
-    }
-
     #[tokio::test]
     async fn test_can_insert_data_point_into_new_uncompressed_data_buffer() {
         let temp_dir = tempfile::tempdir().unwrap();
diff --git a/crates/modelardb_storage/src/test.rs b/crates/modelardb_storage/src/test.rs
index 48641e437..68dd658f5 100644
--- a/crates/modelardb_storage/src/test.rs
+++ b/crates/modelardb_storage/src/test.rs
@@ -17,13 +17,17 @@
 
 use std::sync::Arc;
 
-use arrow::array::{BinaryArray, Float32Array, RecordBatch, UInt16Array, UInt64Array, UInt8Array};
+use arrow::array::{
+    BinaryArray, Float32Array, RecordBatch, StringArray, UInt16Array, UInt64Array, UInt8Array,
+};
 use arrow::compute::concat_batches;
 use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
 use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ONE, ERROR_BOUND_ZERO};
 use modelardb_types::functions;
 use modelardb_types::schemas::{COMPRESSED_SCHEMA, TABLE_METADATA_SCHEMA};
-use modelardb_types::types::{ArrowTimestamp, ArrowValue, ErrorBound, TimestampArray, ValueArray};
+use modelardb_types::types::{
+    ArrowTimestamp, ArrowValue, ErrorBound, Timestamp, TimestampArray, Value, ValueArray,
+};
 
 use crate::metadata::model_table_metadata::ModelTableMetadata;
 use crate::{model_table_metadata_to_record_batch, normal_table_metadata_to_record_batch};
@@ -114,6 +118,25 @@ pub fn model_table_metadata_arc() -> Arc<ModelTableMetadata> {
     Arc::new(model_table_metadata())
 }
 
+/// Create a [`RecordBatch`] with data that resembles uncompressed data with a single tag and two
+/// field columns. The returned data has `row_count` rows, with a different tag for each row.
+pub fn uncompressed_model_table_record_batch(row_count: usize) -> RecordBatch {
+    let tags: Vec<String> = (0..row_count).map(|tag| tag.to_string()).collect();
+    let timestamps: Vec<Timestamp> = (0..row_count).map(|ts| ts as Timestamp).collect();
+    let values: Vec<Value> = (0..row_count).map(|value| value as Value).collect();
+
+    RecordBatch::try_new(
+        model_table_metadata().schema.clone(),
+        vec![
+            Arc::new(TimestampArray::from(timestamps)),
+            Arc::new(ValueArray::from(values.clone())),
+            Arc::new(ValueArray::from(values)),
+            Arc::new(StringArray::from(tags)),
+        ],
+    )
+    .unwrap()
+}
+
 /// Return a [`RecordBatch`] containing three compressed segments.
 pub fn compressed_segments_record_batch() -> RecordBatch {
     compressed_segments_record_batch_with_time(1, 0, 0.0)

From 91723dc505d19c33a6efc7b82006b4f446ccf563 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 00:40:54 +0100
Subject: [PATCH 16/69] Add method to get column arrays from model table
 metadata

---
 .../src/metadata/model_table_metadata.rs      | 81 ++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index 8ec3f36a8..42d9a5feb 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -19,11 +19,13 @@
 use std::result::Result as StdResult;
 use std::sync::Arc;
 
+use arrow::array::StringArray;
+use arrow::record_batch::RecordBatch;
 use datafusion::arrow::datatypes::{ArrowPrimitiveType, DataType, Schema};
 use datafusion::common::DFSchema;
 use datafusion::error::DataFusionError;
 use datafusion::logical_expr::expr::Expr;
-use modelardb_types::types::{ArrowTimestamp, ArrowValue, ErrorBound};
+use modelardb_types::types::{ArrowTimestamp, ArrowValue, ErrorBound, TimestampArray, ValueArray};
 
 use crate::error::{ModelarDbStorageError, Result};
 use crate::parser::tokenize_and_parse_sql_expression;
@@ -166,6 +168,44 @@ impl ModelTableMetadata {
     pub fn is_tag(&self, index: usize) -> bool {
         self.tag_column_indices.contains(&index)
     }
+
+    /// Return the column arrays for the timestamp, field, and tag columns in `record_batch`. If
+    /// `record_batch` does not contain the required columns, return [`ModelarDbStorageError`].
+    pub fn column_arrays<'a>(
+        &self,
+        record_batch: &'a RecordBatch,
+    ) -> Result<(
+        &'a TimestampArray,
+        Vec<&'a ValueArray>,
+        Vec<&'a StringArray>,
+    )> {
+        if record_batch.schema() != self.schema {
+            return Err(ModelarDbStorageError::InvalidArgument(
+                "The record batch does not match the schema of the model table.".to_owned(),
+            ));
+        }
+
+        let timestamp_column_array =
+            modelardb_types::array!(record_batch, self.timestamp_column_index, TimestampArray);
+
+        let field_column_arrays: Vec<_> = self
+            .field_column_indices
+            .iter()
+            .map(|index| modelardb_types::array!(record_batch, *index, ValueArray))
+            .collect();
+
+        let tag_column_arrays: Vec<_> = self
+            .tag_column_indices
+            .iter()
+            .map(|index| modelardb_types::array!(record_batch, *index, StringArray))
+            .collect();
+
+        Ok((
+            timestamp_column_array,
+            field_column_arrays,
+            tag_column_arrays,
+        ))
+    }
 }
 
 /// Compute the indices of all columns in `schema` with `data_type`.
@@ -442,6 +482,45 @@ mod test {
         assert!(model_table_metadata.is_tag(3));
     }
 
+    #[test]
+    fn test_column_arrays() {
+        let model_table_metadata = test::model_table_metadata();
+        let record_batch = test::uncompressed_model_table_record_batch(1);
+
+        let (timestamp_column_array, field_column_arrays, tag_column_arrays) =
+            model_table_metadata.column_arrays(&record_batch).unwrap();
+
+        assert_eq!(
+            modelardb_types::array!(record_batch, 0, TimestampArray),
+            timestamp_column_array
+        );
+        assert_eq!(
+            modelardb_types::array!(record_batch, 1, ValueArray),
+            field_column_arrays[0]
+        );
+        assert_eq!(
+            modelardb_types::array!(record_batch, 2, ValueArray),
+            field_column_arrays[1]
+        );
+        assert_eq!(
+            modelardb_types::array!(record_batch, 3, StringArray),
+            tag_column_arrays[0]
+        );
+    }
+
+    #[test]
+    fn test_column_arrays_with_invalid_schema() {
+        let model_table_metadata = test::model_table_metadata();
+        let record_batch = test::normal_table_record_batch();
+
+        let result = model_table_metadata.column_arrays(&record_batch);
+
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid argument: The record batch does not match the schema of the model table."
+        );
+    }
+
     // Tests for GeneratedColumn.
     #[test]
     fn test_can_create_generated_column() {

From af7b4408dbccd62b4fb386c81348d90ea21346ec Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 00:42:18 +0100
Subject: [PATCH 17/69] Use method to get column arrays instead of doing it
 manually

---
 .../src/storage/uncompressed_data_buffer.rs   | 21 ++------------
 .../src/storage/uncompressed_data_manager.rs  | 29 ++++---------------
 2 files changed, 9 insertions(+), 41 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index c3aa6597b..0dfd4700f 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -27,7 +27,7 @@ use datafusion::arrow::compute;
 use datafusion::arrow::record_batch::RecordBatch;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
 use modelardb_types::types::{
-    Timestamp, TimestampArray, TimestampBuilder, Value, ValueArray, ValueBuilder,
+    Timestamp, TimestampArray, TimestampBuilder, Value, ValueBuilder,
 };
 use object_store::path::Path;
 use object_store::ObjectStore;
@@ -365,23 +365,8 @@ impl UncompressedOnDiskDataBuffer {
     ) -> Result<UncompressedInMemoryDataBuffer> {
         let data_points = self.record_batch().await?;
 
-        let timestamp_index = self.model_table_metadata.timestamp_column_index;
-        let timestamp_column_array =
-            modelardb_types::array!(data_points, timestamp_index, TimestampArray);
-
-        let field_column_arrays: Vec<_> = self
-            .model_table_metadata
-            .field_column_indices
-            .iter()
-            .map(|index| modelardb_types::array!(data_points, *index, ValueArray))
-            .collect();
-
-        let tag_column_arrays: Vec<_> = self
-            .model_table_metadata
-            .tag_column_indices
-            .iter()
-            .map(|index| modelardb_types::array!(data_points, *index, StringArray))
-            .collect();
+        let (timestamp_column_array, field_column_arrays, tag_column_arrays) =
+            self.model_table_metadata.column_arrays(&data_points)?;
 
         let tag_values: Vec<String> = tag_column_arrays
             .iter()
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 4f622d18d..dd536610f 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -23,10 +23,9 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
 
 use dashmap::DashMap;
-use datafusion::arrow::array::StringArray;
 use futures::StreamExt;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
-use modelardb_types::types::{Timestamp, TimestampArray, Value, ValueArray};
+use modelardb_types::types::{Timestamp, Value, ValueArray};
 use object_store::path::{Path, PathPart};
 use tokio::runtime::Runtime;
 use tracing::{debug, error, warn};
@@ -173,24 +172,9 @@ impl UncompressedDataManager {
         // Read the current batch index as it may be updated in parallel.
         let current_batch_index = self.current_batch_index.load(Ordering::Relaxed);
 
-        // Prepare the timestamp column for iteration.
-        let timestamp_index = model_table_metadata.timestamp_column_index;
-        let timestamp_column_array =
-            modelardb_types::array!(data_points, timestamp_index, TimestampArray);
-
-        // Prepare the tag columns for iteration.
-        let tag_column_arrays: Vec<_> = model_table_metadata
-            .tag_column_indices
-            .iter()
-            .map(|index| modelardb_types::array!(data_points, *index, StringArray))
-            .collect();
-
-        // Prepare the field columns for iteration.
-        let field_column_arrays: Vec<_> = model_table_metadata
-            .field_column_indices
-            .iter()
-            .map(|index| modelardb_types::array!(data_points, *index, ValueArray))
-            .collect();
+        // Prepare the columns for iteration.
+        let (timestamp_column_array, field_column_arrays, tag_column_arrays) =
+            model_table_metadata.column_arrays(&data_points)?;
 
         // For each data point, compute a hash from the tags and pass the fields to the storage
         // engine so they can be added to the appropriate UncompressedDataBuffer.
@@ -592,9 +576,8 @@ impl UncompressedDataManager {
             };
 
         let data_points = maybe_data_points?;
-        let timestamp_index = model_table_metadata.timestamp_column_index;
-        let uncompressed_timestamps =
-            modelardb_types::array!(data_points, timestamp_index, TimestampArray);
+        let (uncompressed_timestamps, _field_column_arrays, _tag_column_arrays) =
+            model_table_metadata.column_arrays(&data_points)?;
 
         let compressed_segments = model_table_metadata
             .field_column_indices

From 84c08a18a893239169968da27b50ac5b0e1c771d Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 18:25:26 +0100
Subject: [PATCH 18/69] Fix tests after changes to uncompressed data buffers

---
 .../src/storage/uncompressed_data_buffer.rs   | 47 ++++++++++++------
 .../src/storage/uncompressed_data_manager.rs  | 49 +++++++++++--------
 .../src/metadata/table_metadata_manager.rs    |  2 +-
 3 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 0dfd4700f..f4c9daf4f 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -26,9 +26,7 @@ use datafusion::arrow::array::{Array, ArrayBuilder, StringArray};
 use datafusion::arrow::compute;
 use datafusion::arrow::record_batch::RecordBatch;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
-use modelardb_types::types::{
-    Timestamp, TimestampArray, TimestampBuilder, Value, ValueBuilder,
-};
+use modelardb_types::types::{Timestamp, TimestampArray, TimestampBuilder, Value, ValueBuilder};
 use object_store::path::Path;
 use object_store::ObjectStore;
 use tracing::debug;
@@ -116,7 +114,7 @@ impl UncompressedInMemoryDataBuffer {
 
     /// Return how many data points the [`UncompressedInMemoryDataBuffer`] currently contains.
     pub(super) fn len(&self) -> usize {
-        // The length is always the same for both builders.
+        // The length is always the same for all builders.
         self.timestamps.len()
     }
 
@@ -426,13 +424,15 @@ mod tests {
     use tokio::runtime::Runtime;
 
     const CURRENT_BATCH_INDEX: u64 = 1;
-    const TAG_HASH: u64 = 1;
+    const TAG_VALUE: &str = "tag";
+    const TAG_HASH: u64 = 15537859409877038916;
 
     // Tests for UncompressedInMemoryDataBuffer.
     #[test]
     fn test_get_in_memory_data_buffer_memory_size() {
         let uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -459,6 +459,7 @@ mod tests {
     fn test_get_in_memory_data_buffer_len() {
         let uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -470,6 +471,7 @@ mod tests {
     fn test_can_insert_data_point_into_in_memory_data_buffer() {
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -482,6 +484,7 @@ mod tests {
     fn test_check_if_in_memory_data_buffer_is_unused() {
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX - 1,
         );
@@ -500,6 +503,7 @@ mod tests {
     fn test_check_is_in_memory_data_buffer_full() {
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -512,6 +516,7 @@ mod tests {
     fn test_check_is_in_memory_data_buffer_not_full() {
         let uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -525,6 +530,7 @@ mod tests {
     fn test_in_memory_data_buffer_panic_if_inserting_data_point_when_full() {
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -534,17 +540,20 @@ mod tests {
 
     #[tokio::test]
     async fn test_get_record_batch_from_in_memory_data_buffer() {
+        let model_table_metadata = test::model_table_metadata_arc();
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
-            test::model_table_metadata_arc(),
+            vec![TAG_VALUE.to_owned()],
+            model_table_metadata.clone(),
             CURRENT_BATCH_INDEX,
         );
         insert_data_points(uncompressed_buffer.capacity(), &mut uncompressed_buffer);
 
         let capacity = uncompressed_buffer.capacity();
         let data = uncompressed_buffer.record_batch().await.unwrap();
-        assert_eq!(data.num_columns(), 3);
+
         assert_eq!(data.num_rows(), capacity);
+        assert_eq!(data.schema(), model_table_metadata.schema);
     }
 
     proptest! {
@@ -553,9 +562,11 @@ mod tests {
         // tokio::test is not supported in proptest! due to proptest-rs/proptest/issues/179.
         let runtime = Runtime::new().unwrap();
 
+        let model_table_metadata = test::model_table_metadata_arc();
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
-            test::model_table_metadata_arc(),
+            vec![TAG_VALUE.to_owned()],
+            model_table_metadata.clone(),
             CURRENT_BATCH_INDEX,
         );
 
@@ -566,7 +577,7 @@ mod tests {
         }
 
         let data = runtime.block_on(uncompressed_buffer.record_batch()).unwrap();
-        assert_eq!(data.num_columns(), 3);
+        assert_eq!(data.schema(), model_table_metadata.schema);
         let timestamps = modelardb_types::array!(data, 0, TimestampArray);
         assert!(timestamps.values().windows(2).all(|pair| pair[0] <= pair[1]));
     }
@@ -576,6 +587,7 @@ mod tests {
     async fn test_in_memory_data_buffer_can_spill_not_full_buffer() {
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -591,7 +603,7 @@ mod tests {
             .unwrap();
 
         let uncompressed_path = temp_dir.path().join(format!(
-            "{UNCOMPRESSED_DATA_FOLDER}/{}/1",
+            "{UNCOMPRESSED_DATA_FOLDER}/{}/{TAG_HASH}",
             test::MODEL_TABLE_NAME
         ));
         assert_eq!(uncompressed_path.read_dir().unwrap().count(), 1)
@@ -601,6 +613,7 @@ mod tests {
     async fn test_in_memory_data_buffer_can_spill_full_buffer() {
         let mut uncompressed_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -616,7 +629,7 @@ mod tests {
             .unwrap();
 
         let uncompressed_path = temp_dir.path().join(format!(
-            "{UNCOMPRESSED_DATA_FOLDER}/{}/1",
+            "{UNCOMPRESSED_DATA_FOLDER}/{}/{TAG_HASH}",
             test::MODEL_TABLE_NAME
         ));
         assert_eq!(uncompressed_path.read_dir().unwrap().count(), 1)
@@ -632,13 +645,13 @@ mod tests {
             .path()
             .join(UNCOMPRESSED_DATA_FOLDER)
             .join(test::MODEL_TABLE_NAME)
-            .join("1")
+            .join(TAG_HASH.to_string())
             .join("1234567890123.parquet");
         assert!(spilled_buffer_path.exists());
 
         let data = uncompressed_on_disk_buffer.record_batch().await.unwrap();
 
-        assert_eq!(data.num_columns(), 3);
+        assert_eq!(data.schema(), test::model_table_metadata().schema);
         assert_eq!(data.num_rows(), *UNCOMPRESSED_DATA_BUFFER_CAPACITY);
 
         assert!(!spilled_buffer_path.exists());
@@ -649,9 +662,11 @@ mod tests {
         // tokio::test is not supported in proptest! due to proptest-rs/proptest/issues/179.
         let runtime = Runtime::new().unwrap();
 
+        let model_table_metadata = test::model_table_metadata_arc();
         let mut uncompressed_in_memory_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
-            test::model_table_metadata_arc(),
+            vec![TAG_VALUE.to_owned()],
+            model_table_metadata.clone(),
             CURRENT_BATCH_INDEX,
         );
 
@@ -672,7 +687,7 @@ mod tests {
         assert_eq!(spilled_buffers.len(), 1);
 
         let data = runtime.block_on(uncompressed_on_disk_buffer.record_batch()).unwrap();
-        assert_eq!(data.num_columns(), 3);
+        assert_eq!(data.schema(), model_table_metadata.schema);
         let timestamps = modelardb_types::array!(data, 0, TimestampArray);
         assert!(timestamps.values().windows(2).all(|pair| pair[0] <= pair[1]));
 
@@ -703,6 +718,7 @@ mod tests {
         // The creation of record_batch empties uncompressed_in_memory_buffer_to_be_spilled.
         let mut uncompressed_in_memory_buffer = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
@@ -738,6 +754,7 @@ mod tests {
 
         let mut uncompressed_in_memory_buffer_to_be_spilled = UncompressedInMemoryDataBuffer::new(
             TAG_HASH,
+            vec![TAG_VALUE.to_owned()],
             test::model_table_metadata_arc(),
             CURRENT_BATCH_INDEX,
         );
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index dd536610f..842dbaa35 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -660,7 +660,7 @@ mod tests {
     };
     use modelardb_storage::test;
     use modelardb_types::schemas::UNCOMPRESSED_SCHEMA;
-    use modelardb_types::types::{TimestampBuilder, ValueBuilder};
+    use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueBuilder};
     use object_store::local::LocalFileSystem;
     use tempfile::TempDir;
     use tokio::time::{sleep, Duration};
@@ -668,7 +668,8 @@ mod tests {
     use crate::storage::UNCOMPRESSED_DATA_BUFFER_CAPACITY;
     use crate::{ClusterMode, DataFolders};
 
-    const TAG_HASH: u64 = 9674644176454356993;
+    const TAG_VALUE: &str = "tag";
+    const TAG_HASH: u64 = 15537859409877038916;
 
     // Tests for UncompressedDataManager.
     #[tokio::test]
@@ -803,7 +804,7 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (mut data_manager, model_table_metadata) = create_managers(&temp_dir).await;
 
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
 
         assert!(data_manager
             .uncompressed_in_memory_data_buffers
@@ -823,11 +824,11 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (mut data_manager, model_table_metadata) = create_managers(&temp_dir).await;
 
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 0);
 
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 0);
 
@@ -849,7 +850,7 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (mut data_manager, model_table_metadata) = create_managers(&temp_dir).await;
 
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 0);
 
@@ -857,7 +858,7 @@ mod tests {
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 0);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 1);
 
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 0);
 
@@ -890,9 +891,9 @@ mod tests {
         field_2.append_slice(&[50.0, 100.0, 150.0]);
 
         let mut tag = StringBuilder::new();
-        tag.append_value("A");
-        tag.append_value("A");
-        tag.append_value("A");
+        tag.append_value(TAG_VALUE);
+        tag.append_value(TAG_VALUE);
+        tag.append_value(TAG_VALUE);
 
         let data = RecordBatch::try_new(
             model_table_metadata.schema.clone(),
@@ -916,7 +917,7 @@ mod tests {
         assert_eq!(
             data_manager
                 .uncompressed_in_memory_data_buffers
-                .get(&11395701956291516416)
+                .get(&TAG_HASH)
                 .unwrap()
                 .len(),
             3
@@ -945,7 +946,7 @@ mod tests {
             *UNCOMPRESSED_DATA_BUFFER_CAPACITY,
             &mut data_manager,
             &model_table_metadata,
-            TAG_HASH,
+            TAG_VALUE,
         )
         .await;
 
@@ -965,7 +966,7 @@ mod tests {
             *UNCOMPRESSED_DATA_BUFFER_CAPACITY * 2,
             &mut data_manager,
             &model_table_metadata,
-            TAG_HASH,
+            TAG_VALUE,
         )
         .await;
 
@@ -1014,7 +1015,7 @@ mod tests {
                 1,
                 &mut data_manager,
                 &model_table_metadata.clone(),
-                tag_hash as u64,
+                &tag_hash.to_string(),
             )
             .await;
         }
@@ -1034,7 +1035,7 @@ mod tests {
         );
 
         // If there is enough memory to hold n full buffers, n + 1 are needed to spill a buffer.
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
 
         // One of the buffers should be spilled due to the memory limit being exceeded.
         assert_eq!(
@@ -1065,7 +1066,7 @@ mod tests {
             .memory_pool
             .remaining_uncompressed_memory_in_bytes();
 
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH).await;
+        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
 
         assert!(
             reserved_memory
@@ -1086,7 +1087,7 @@ mod tests {
             *UNCOMPRESSED_DATA_BUFFER_CAPACITY,
             &mut data_manager,
             &model_table_metadata,
-            TAG_HASH,
+            TAG_VALUE,
         ));
 
         let remaining_memory = data_manager
@@ -1194,7 +1195,7 @@ mod tests {
             1,
             &mut data_manager,
             &model_table_metadata.clone(),
-            TAG_HASH,
+            TAG_VALUE,
         )
         .await;
 
@@ -1218,7 +1219,13 @@ mod tests {
         );
 
         // Insert data that should force the existing data to now be spilled.
-        insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_HASH + 1).await;
+        insert_data_points(
+            1,
+            &mut data_manager,
+            &model_table_metadata,
+            &format!("{TAG_VALUE}_2"),
+        )
+        .await;
 
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 1);
@@ -1229,7 +1236,7 @@ mod tests {
         count: usize,
         data_manager: &mut UncompressedDataManager,
         model_table_metadata: &Arc<ModelTableMetadata>,
-        tag_hash: u64,
+        tag_value: &str,
     ) {
         let values: &[Value] = &[37.0, 73.0];
         let current_batch_index = 0;
@@ -1237,7 +1244,7 @@ mod tests {
         for i in 0..count {
             data_manager
                 .insert_data_point(
-                    tag_hash,
+                    vec![tag_value.to_owned()],
                     i as i64,
                     &mut values.iter().copied(),
                     model_table_metadata.clone(),
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index b73c923d8..ec48af975 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -743,7 +743,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_drop_model_table_metadata() {
-        let (temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
+        let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
 
         metadata_manager
             .drop_table_metadata(test::MODEL_TABLE_NAME)

From d2e6f1d3fc2a21a3c16e842fbe309e55905a263e Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 20:12:09 +0100
Subject: [PATCH 19/69] Pass tag values and field column index to
 try_compress() instead of univariate ID

---
 .../src/storage/uncompressed_data_buffer.rs   | 10 --------
 .../src/storage/uncompressed_data_manager.rs  | 25 ++++++++++---------
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index f4c9daf4f..5da1a6c63 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -191,11 +191,6 @@ impl UncompressedInMemoryDataBuffer {
             .map_err(|error| error.into())
     }
 
-    /// Return the tag hash that identifies the time series the buffer stores data points from.
-    pub(super) fn tag_hash(&self) -> u64 {
-        self.tag_hash
-    }
-
     /// Return the metadata for the model table the buffer stores data points for.
     pub(super) fn model_table_metadata(&self) -> &Arc<ModelTableMetadata> {
         &self.model_table_metadata
@@ -336,11 +331,6 @@ impl UncompressedOnDiskDataBuffer {
         Ok(data_points)
     }
 
-    /// Return the tag hash that identifies the time series the buffer stores data points from.
-    pub(super) fn tag_hash(&self) -> u64 {
-        self.tag_hash
-    }
-
     /// Return the metadata for the model table the buffer stores data points for.
     pub(super) fn model_table_metadata(&self) -> &Arc<ModelTableMetadata> {
         &self.model_table_metadata
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 842dbaa35..611afa654 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -25,7 +25,7 @@ use std::sync::Arc;
 use dashmap::DashMap;
 use futures::StreamExt;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
-use modelardb_types::types::{Timestamp, Value, ValueArray};
+use modelardb_types::types::{Timestamp, Value};
 use object_store::path::{Path, PathPart};
 use tokio::runtime::Runtime;
 use tracing::{debug, error, warn};
@@ -555,12 +555,11 @@ impl UncompressedDataManager {
         &self,
         uncompressed_data_buffer: UncompressedDataBuffer,
     ) -> Result<()> {
-        let (memory_use, maybe_data_points, tag_hash, model_table_metadata) =
+        let (memory_use, maybe_data_points, model_table_metadata) =
             match uncompressed_data_buffer {
                 UncompressedDataBuffer::InMemory(mut uncompressed_in_memory_data_buffer) => (
                     uncompressed_in_memory_data_buffer.memory_size(),
                     uncompressed_in_memory_data_buffer.record_batch().await,
-                    uncompressed_in_memory_data_buffer.tag_hash(),
                     uncompressed_in_memory_data_buffer
                         .model_table_metadata()
                         .clone(),
@@ -568,7 +567,6 @@ impl UncompressedDataManager {
                 UncompressedDataBuffer::OnDisk(uncompressed_on_disk_data_buffer) => (
                     0,
                     uncompressed_on_disk_data_buffer.record_batch().await,
-                    uncompressed_on_disk_data_buffer.tag_hash(),
                     uncompressed_on_disk_data_buffer
                         .model_table_metadata()
                         .clone(),
@@ -576,21 +574,24 @@ impl UncompressedDataManager {
             };
 
         let data_points = maybe_data_points?;
-        let (uncompressed_timestamps, _field_column_arrays, _tag_column_arrays) =
+        let (uncompressed_timestamps, field_column_arrays, tag_column_arrays) =
             model_table_metadata.column_arrays(&data_points)?;
 
-        let compressed_segments = model_table_metadata
-            .field_column_indices
+        let tag_values: Vec<String> = tag_column_arrays
             .iter()
-            .map(|field_column_index| {
-                let uncompressed_values =
-                    modelardb_types::array!(data_points, *field_column_index, ValueArray);
-                let univariate_id = tag_hash | (*field_column_index as u64);
+            .map(|array| array.value(0).to_string())
+            .collect();
+
+        let compressed_segments = field_column_arrays
+            .iter()
+            .zip(model_table_metadata.field_column_indices.iter())
+            .map(|(uncompressed_values, field_column_index)| {
                 let error_bound = model_table_metadata.error_bounds[*field_column_index];
 
                 // unwrap() is safe as uncompressed_timestamps and uncompressed_values have the same length.
                 modelardb_compression::try_compress(
-                    univariate_id,
+                    tag_values.clone(),
+                    field_column_index,
                     error_bound,
                     uncompressed_timestamps,
                     uncompressed_values,

From 44dd8b5a936faeb6d62221c1ef987925c68023b6 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 20:25:42 +0100
Subject: [PATCH 20/69] Remove UNCOMPRESSED_SCHEMA

---
 .../src/storage/uncompressed_data_manager.rs  | 46 ++++++++-----------
 crates/modelardb_types/src/macros.rs          | 11 +++--
 crates/modelardb_types/src/schemas.rs         | 10 +---
 crates/modelardb_types/src/types.rs           |  5 +-
 4 files changed, 28 insertions(+), 44 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 611afa654..1d784fe9f 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -555,23 +555,22 @@ impl UncompressedDataManager {
         &self,
         uncompressed_data_buffer: UncompressedDataBuffer,
     ) -> Result<()> {
-        let (memory_use, maybe_data_points, model_table_metadata) =
-            match uncompressed_data_buffer {
-                UncompressedDataBuffer::InMemory(mut uncompressed_in_memory_data_buffer) => (
-                    uncompressed_in_memory_data_buffer.memory_size(),
-                    uncompressed_in_memory_data_buffer.record_batch().await,
-                    uncompressed_in_memory_data_buffer
-                        .model_table_metadata()
-                        .clone(),
-                ),
-                UncompressedDataBuffer::OnDisk(uncompressed_on_disk_data_buffer) => (
-                    0,
-                    uncompressed_on_disk_data_buffer.record_batch().await,
-                    uncompressed_on_disk_data_buffer
-                        .model_table_metadata()
-                        .clone(),
-                ),
-            };
+        let (memory_use, maybe_data_points, model_table_metadata) = match uncompressed_data_buffer {
+            UncompressedDataBuffer::InMemory(mut uncompressed_in_memory_data_buffer) => (
+                uncompressed_in_memory_data_buffer.memory_size(),
+                uncompressed_in_memory_data_buffer.record_batch().await,
+                uncompressed_in_memory_data_buffer
+                    .model_table_metadata()
+                    .clone(),
+            ),
+            UncompressedDataBuffer::OnDisk(uncompressed_on_disk_data_buffer) => (
+                0,
+                uncompressed_on_disk_data_buffer.record_batch().await,
+                uncompressed_on_disk_data_buffer
+                    .model_table_metadata()
+                    .clone(),
+            ),
+        };
 
         let data_points = maybe_data_points?;
         let (uncompressed_timestamps, field_column_arrays, tag_column_arrays) =
@@ -660,8 +659,7 @@ mod tests {
         UNCOMPRESSED_RESERVED_MEMORY_IN_BYTES,
     };
     use modelardb_storage::test;
-    use modelardb_types::schemas::UNCOMPRESSED_SCHEMA;
-    use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueBuilder};
+    use modelardb_types::types::{TimestampBuilder, ValueBuilder};
     use object_store::local::LocalFileSystem;
     use tempfile::TempDir;
     use tokio::time::{sleep, Duration};
@@ -1120,15 +1118,7 @@ mod tests {
         let (data_manager, model_table_metadata) = runtime.block_on(create_managers(&temp_dir));
 
         // Add the spilled buffer.
-        let uncompressed_data = RecordBatch::try_new(
-            UNCOMPRESSED_SCHEMA.0.clone(),
-            vec![
-                Arc::new(TimestampArray::from(vec![0, 1, 2])),
-                Arc::new(ValueArray::from(vec![0.2, 0.5, 0.1])),
-            ],
-        )
-        .unwrap();
-
+        let uncompressed_data = test::uncompressed_model_table_record_batch(3);
         let spilled_buffer = runtime
             .block_on(UncompressedOnDiskDataBuffer::try_spill(
                 0,
diff --git a/crates/modelardb_types/src/macros.rs b/crates/modelardb_types/src/macros.rs
index 7fc5efcfe..e02d8e4c4 100644
--- a/crates/modelardb_types/src/macros.rs
+++ b/crates/modelardb_types/src/macros.rs
@@ -71,12 +71,17 @@ macro_rules! value {
 /// ```
 /// # use std::sync::Arc;
 /// #
+/// # use arrow::datatypes::{ArrowPrimitiveType, Field, Schema};
 /// # use arrow::record_batch::RecordBatch;
-/// # use modelardb_types::schemas::UNCOMPRESSED_SCHEMA;
-/// # use modelardb_types::types::{Timestamp, TimestampArray, Value, ValueArray};
+/// # use modelardb_types::types::{ArrowTimestamp, ArrowValue, Timestamp, TimestampArray, Value, ValueArray};
+/// #
+/// # let schema = Schema::new(vec![
+/// #     Field::new("timestamps", ArrowTimestamp::DATA_TYPE, false),
+/// #     Field::new("values", ArrowValue::DATA_TYPE, false),
+/// # ]);
 /// #
 /// # let record_batch = RecordBatch::try_new(
-/// #     UNCOMPRESSED_SCHEMA.0.clone(),
+/// #     Arc::new(schema),
 /// #     vec![
 /// #         Arc::new(TimestampArray::from(Vec::<Timestamp>::new())),
 /// #         Arc::new(ValueArray::from(Vec::<Value>::new())),
diff --git a/crates/modelardb_types/src/schemas.rs b/crates/modelardb_types/src/schemas.rs
index 2bcea1751..8b461d778 100644
--- a/crates/modelardb_types/src/schemas.rs
+++ b/crates/modelardb_types/src/schemas.rs
@@ -22,20 +22,12 @@ use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
 
 use crate::types::{
     ArrowTimestamp, ArrowUnivariateId, ArrowValue, CompressedSchema, ConfigurationSchema,
-    QueryCompressedSchema, QuerySchema, TableMetadataSchema, UncompressedSchema,
+    QueryCompressedSchema, QuerySchema, TableMetadataSchema,
 };
 
 /// Name of the column used to partition the compressed segments.
 pub const FIELD_COLUMN: &str = "field_column";
 
-/// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used for uncompressed data buffers.
-pub static UNCOMPRESSED_SCHEMA: LazyLock<UncompressedSchema> = LazyLock::new(|| {
-    UncompressedSchema(Arc::new(Schema::new(vec![
-        Field::new("timestamps", ArrowTimestamp::DATA_TYPE, false),
-        Field::new("values", ArrowValue::DATA_TYPE, false),
-    ])))
-});
-
 /// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used for compressed segments.
 pub static COMPRESSED_SCHEMA: LazyLock<CompressedSchema> = LazyLock::new(|| {
     let mut query_compressed_schema_fields = QUERY_COMPRESSED_SCHEMA.0.fields().to_vec();
diff --git a/crates/modelardb_types/src/types.rs b/crates/modelardb_types/src/types.rs
index f3bd33034..1192f9cb4 100644
--- a/crates/modelardb_types/src/types.rs
+++ b/crates/modelardb_types/src/types.rs
@@ -47,10 +47,7 @@ pub type ArrowValue = arrow::datatypes::Float32Type;
 pub type ValueBuilder = arrow::array::PrimitiveBuilder<ArrowValue>;
 pub type ValueArray = arrow::array::PrimitiveArray<ArrowValue>;
 
-// Types used for the schema of uncompressed data, compressed data, the configuration, and table metadata.
-#[derive(Clone)]
-pub struct UncompressedSchema(pub arrow::datatypes::SchemaRef);
-
+// Types used for the schema of compressed data, the configuration, and table metadata.
 #[derive(Clone)]
 pub struct CompressedSchema(pub arrow::datatypes::SchemaRef);
 

From 5b95831e99802cd8e2bb4a6dca4f8b9fc6d65623 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 21:55:08 +0100
Subject: [PATCH 21/69] Remove univaraite_ids from macros

---
 crates/modelardb_types/src/macros.rs | 55 +++++++++++++---------------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/crates/modelardb_types/src/macros.rs b/crates/modelardb_types/src/macros.rs
index e02d8e4c4..633222b2a 100644
--- a/crates/modelardb_types/src/macros.rs
+++ b/crates/modelardb_types/src/macros.rs
@@ -115,7 +115,6 @@ macro_rules! array {
 /// # let record_batch = RecordBatch::try_new(
 /// #     COMPRESSED_SCHEMA.0.clone(),
 /// #     vec![
-/// #         Arc::new(UInt64Array::from(Vec::<u64>::new())),
 /// #         Arc::new(UInt8Array::from(Vec::<u8>::new())),
 /// #         Arc::new(TimestampArray::from(Vec::<Timestamp>::new())),
 /// #         Arc::new(TimestampArray::from(Vec::<Timestamp>::new())),
@@ -128,41 +127,39 @@ macro_rules! array {
 /// #         Arc::new(UInt16Array::from(Vec::<u16>::new())),
 /// #     ],
 /// # ).unwrap();
-/// modelardb_types::arrays!(record_batch, field_columns, univariate_ids, model_type_ids,
-/// start_times, end_times, timestamps, min_values, max_values, values, residuals, errors);
+/// modelardb_types::arrays!(record_batch, field_columns, model_type_ids, start_times, end_times,
+/// timestamps, min_values, max_values, values, residuals, errors);
 /// ```
 ///
 /// # Panics
 ///
-/// Panics if `batch` does not contain ten columns of type UInt64Array, UInt8Array, TimestampArray,
+/// Panics if `batch` does not contain nine columns of type UInt8Array, TimestampArray,
 /// TimestampArray, BinaryArray, ValueArray, ValueArray, BinaryArray, BinaryArray, and Float32Array
-/// or eleven columns of type UInt64Array, UInt8Array, TimestampArray, TimestampArray,
-/// BinaryArray, ValueArray, ValueArray, BinaryArray, BinaryArray, Float32Array, and UInt16Array.
+/// or ten columns of type UInt8Array, TimestampArray, TimestampArray, BinaryArray, ValueArray,
+/// ValueArray, BinaryArray, BinaryArray, Float32Array, and UInt16Array.
 #[macro_export]
 macro_rules! arrays {
-    ($batch:ident, $univariate_ids:ident, $model_type_ids:ident, $start_times:ident, $end_times:ident, $timestamps:ident, $min_values:ident, $max_values:ident, $values:ident, $residuals:ident, $errors:ident) => {
-        let $univariate_ids = $crate::array!($batch, 0, UInt64Array);
-        let $model_type_ids = $crate::array!($batch, 1, UInt8Array);
-        let $start_times = $crate::array!($batch, 2, TimestampArray);
-        let $end_times = $crate::array!($batch, 3, TimestampArray);
-        let $timestamps = $crate::array!($batch, 4, BinaryArray);
-        let $min_values = $crate::array!($batch, 5, ValueArray);
-        let $max_values = $crate::array!($batch, 6, ValueArray);
-        let $values = $crate::array!($batch, 7, BinaryArray);
-        let $residuals = $crate::array!($batch, 8, BinaryArray);
-        let $errors = $crate::array!($batch, 9, Float32Array);
+    ($batch:ident, $model_type_ids:ident, $start_times:ident, $end_times:ident, $timestamps:ident, $min_values:ident, $max_values:ident, $values:ident, $residuals:ident, $errors:ident) => {
+        let $model_type_ids = $crate::array!($batch, 0, UInt8Array);
+        let $start_times = $crate::array!($batch, 1, TimestampArray);
+        let $end_times = $crate::array!($batch, 2, TimestampArray);
+        let $timestamps = $crate::array!($batch, 3, BinaryArray);
+        let $min_values = $crate::array!($batch, 4, ValueArray);
+        let $max_values = $crate::array!($batch, 5, ValueArray);
+        let $values = $crate::array!($batch, 6, BinaryArray);
+        let $residuals = $crate::array!($batch, 7, BinaryArray);
+        let $errors = $crate::array!($batch, 8, Float32Array);
     };
-    ($batch:ident, $univariate_ids:ident, $model_type_ids:ident, $start_times:ident, $end_times:ident, $timestamps:ident, $min_values:ident, $max_values:ident, $values:ident, $residuals:ident, $errors:ident, $field_columns:ident) => {
-        let $univariate_ids = $crate::array!($batch, 0, UInt64Array);
-        let $model_type_ids = $crate::array!($batch, 1, UInt8Array);
-        let $start_times = $crate::array!($batch, 2, TimestampArray);
-        let $end_times = $crate::array!($batch, 3, TimestampArray);
-        let $timestamps = $crate::array!($batch, 4, BinaryArray);
-        let $min_values = $crate::array!($batch, 5, ValueArray);
-        let $max_values = $crate::array!($batch, 6, ValueArray);
-        let $values = $crate::array!($batch, 7, BinaryArray);
-        let $residuals = $crate::array!($batch, 8, BinaryArray);
-        let $errors = $crate::array!($batch, 9, Float32Array);
-        let $field_columns = $crate::array!($batch, 10, UInt16Array);
+    ($batch:ident, $model_type_ids:ident, $start_times:ident, $end_times:ident, $timestamps:ident, $min_values:ident, $max_values:ident, $values:ident, $residuals:ident, $errors:ident, $field_columns:ident) => {
+        let $model_type_ids = $crate::array!($batch, 0, UInt8Array);
+        let $start_times = $crate::array!($batch, 1, TimestampArray);
+        let $end_times = $crate::array!($batch, 2, TimestampArray);
+        let $timestamps = $crate::array!($batch, 3, BinaryArray);
+        let $min_values = $crate::array!($batch, 4, ValueArray);
+        let $max_values = $crate::array!($batch, 5, ValueArray);
+        let $values = $crate::array!($batch, 6, BinaryArray);
+        let $residuals = $crate::array!($batch, 7, BinaryArray);
+        let $errors = $crate::array!($batch, 8, Float32Array);
+        let $field_columns = $crate::array!($batch, 9, UInt16Array);
     };
 }

From 2bf27395fe559f68c9bd3b78b981101ebc56c194 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 21:57:10 +0100
Subject: [PATCH 22/69] Remove methods to convert univariate ids between int64
 and uint64

---
 crates/modelardb_storage/src/delta_lake.rs    |  8 +-
 crates/modelardb_storage/src/lib.rs           | 73 +------------------
 .../modelardb_storage/src/query/grid_exec.rs  |  4 -
 3 files changed, 4 insertions(+), 81 deletions(-)

diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index 16d014992..130efe07e 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -39,10 +39,7 @@ use object_store::ObjectStore;
 use url::Url;
 
 use crate::error::{ModelarDbStorageError, Result};
-use crate::{
-    apache_parquet_writer_properties, maybe_univariate_ids_uint64_to_int64, METADATA_FOLDER,
-    TABLE_FOLDER,
-};
+use crate::{apache_parquet_writer_properties, METADATA_FOLDER, TABLE_FOLDER};
 
 /// Functionality for managing Delta Lake tables in a local folder or an object store.
 pub struct DeltaLake {
@@ -447,9 +444,6 @@ impl DeltaLake {
         table_name: &str,
         mut compressed_segments: Vec<RecordBatch>,
     ) -> Result<DeltaTable> {
-        // Reinterpret univariate_ids from uint64 to int64 if necessary to fix #187 as a stopgap until #197.
-        maybe_univariate_ids_uint64_to_int64(&mut compressed_segments);
-
         // Specify that the file must be sorted by univariate_id and then by start_time.
         let sorting_columns = Some(vec![
             SortingColumn::new(0, false, false),
diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs
index b9a99fee8..16548a0b8 100644
--- a/crates/modelardb_storage/src/lib.rs
+++ b/crates/modelardb_storage/src/lib.rs
@@ -28,8 +28,8 @@ use std::result::Result as StdResult;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float32Builder, Int64Array,
-    ListArray, ListBuilder, RecordBatch, StringArray, StringBuilder, UInt64Array,
+    Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float32Builder,
+    ListArray, ListBuilder, RecordBatch, StringArray, StringBuilder,
 };
 use arrow::compute;
 use arrow::compute::concat_batches;
@@ -55,9 +55,7 @@ use datafusion::prelude::SessionContext;
 use datafusion::sql::parser::Statement as DFStatement;
 use deltalake::DeltaTable;
 use futures::StreamExt;
-use modelardb_types::schemas::{
-    DISK_COMPRESSED_SCHEMA, QUERY_COMPRESSED_SCHEMA, TABLE_METADATA_SCHEMA,
-};
+use modelardb_types::schemas::TABLE_METADATA_SCHEMA;
 use modelardb_types::types::ErrorBound;
 use object_store::path::Path;
 use object_store::ObjectStore;
@@ -186,48 +184,6 @@ pub async fn sql_and_concat(session_context: &SessionContext, sql: &str) -> Resu
     Ok(record_batch)
 }
 
-/// Reinterpret the bits used for univariate ids in `compressed_segments` to convert the column from
-/// [`UInt64Array`] to [`Int64Array`] if the column is currently [`UInt64Array`], as the Delta Lake
-/// Protocol does not support unsigned integers. `compressed_segments` is modified in-place as
-/// `maybe_univariate_ids_uint64_to_int64()` is designed to be used by
-/// `write_compressed_segments_to_model_table()` which owns `compressed_segments`.
-pub(crate) fn maybe_univariate_ids_uint64_to_int64(compressed_segments: &mut Vec<RecordBatch>) {
-    for record_batch in compressed_segments {
-        // Only convert the univariate ids if they are stored as unsigned integers. The univariate
-        // ids can be stored as signed integers already if the compressed segments have been saved
-        // to disk previously.
-        if record_batch.schema().field(0).data_type() == &DataType::UInt64 {
-            let mut columns = record_batch.columns().to_vec();
-            let univariate_ids = modelardb_types::array!(record_batch, 0, UInt64Array);
-            let signed_univariate_ids: Int64Array =
-                univariate_ids.unary(|value| i64::from_ne_bytes(value.to_ne_bytes()));
-            columns[0] = Arc::new(signed_univariate_ids);
-
-            // unwrap() is safe as columns is constructed to match DISK_COMPRESSED_SCHEMA.
-            *record_batch =
-                RecordBatch::try_new(DISK_COMPRESSED_SCHEMA.0.clone(), columns).unwrap();
-        }
-    }
-}
-
-/// Reinterpret the bits used for univariate ids in `compressed_segments` to convert the column from
-/// [`Int64Array`] to [`UInt64Array`] as the Delta Lake Protocol does not support unsigned integers.
-/// Returns a new [`RecordBatch`] with the univariate ids stored in an [`UInt64Array`] as
-/// `univariate_ids_int64_to_uint64()` is designed to be used by
-/// [`futures::stream::Stream::poll_next()`] and
-/// [`datafusion::physical_plan::PhysicalExpr::evaluate()`] and
-/// [`datafusion::physical_plan::PhysicalExpr::evaluate()`] borrows `compressed_segments` immutably.
-pub fn univariate_ids_int64_to_uint64(compressed_segments: &RecordBatch) -> RecordBatch {
-    let mut columns = compressed_segments.columns().to_vec();
-    let signed_univariate_ids = modelardb_types::array!(compressed_segments, 0, Int64Array);
-    let univariate_ids: UInt64Array =
-        signed_univariate_ids.unary(|value| u64::from_ne_bytes(value.to_ne_bytes()));
-    columns[0] = Arc::new(univariate_ids);
-
-    // unwrap() is safe as columns is constructed to match QUERY_COMPRESSED_SCHEMA.
-    RecordBatch::try_new(QUERY_COMPRESSED_SCHEMA.0.clone(), columns).unwrap()
-}
-
 /// Read all rows from the Apache Parquet file at the location given by `file_path` in
 /// `object_store` and return them as a [`RecordBatch`]. If the file could not be read successfully,
 /// [`ModelarDbStorageError`] is returned.
@@ -561,33 +517,10 @@ mod tests {
     use arrow::datatypes::{ArrowPrimitiveType, Field, Schema};
     use modelardb_types::types::ArrowValue;
     use object_store::local::LocalFileSystem;
-    use proptest::num::u64 as ProptestUnivariateId;
-    use proptest::{prop_assert_eq, proptest};
     use tempfile::TempDir;
 
     use crate::test;
 
-    // Tests for maybe_univariate_ids_uint64_to_int64() and univariate_ids_int64_to_uint64().
-    proptest! {
-    #[test]
-    fn test_univariate_ids_uint64_to_int64_to_uint64(univariate_id in ProptestUnivariateId::ANY) {
-        let record_batch = test::compressed_segments_record_batch_with_time(univariate_id, 0, 0.0);
-        let mut expected_record_batch = record_batch.clone();
-        expected_record_batch.remove_column(10);
-
-        let mut record_batches = vec![record_batch.clone()];
-        maybe_univariate_ids_uint64_to_int64(&mut record_batches);
-
-        // maybe_univariate_ids_uint64_to_int64 should not panic when called twice.
-        maybe_univariate_ids_uint64_to_int64(&mut record_batches);
-
-        record_batches[0].remove_column(10);
-        let computed_record_batch = univariate_ids_int64_to_uint64(&record_batches[0]);
-
-        prop_assert_eq!(expected_record_batch, computed_record_batch);
-    }
-    }
-
     // Tests for read_record_batch_from_apache_parquet_file().
     #[tokio::test]
     async fn test_read_record_batch_from_apache_parquet_file() {
diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index c01bfd801..088a2dfcf 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -50,7 +50,6 @@ use modelardb_types::schemas::GRID_SCHEMA;
 use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueArray, ValueBuilder};
 
 use crate::query::{QUERY_ORDER_DATA_POINT, QUERY_REQUIREMENT_SEGMENT};
-use crate::univariate_ids_int64_to_uint64;
 
 /// An execution plan that reconstructs the data points stored as compressed segments containing
 /// metadata and models. It is `pub(crate)` so the additional rules added to Apache DataFusion's
@@ -264,9 +263,6 @@ impl GridStream {
             .elapsed_compute()
             .timer();
 
-        // Reinterpret univariate_ids from int64 to uint64 to fix #187 as a stopgap until #197.
-        let batch = univariate_ids_int64_to_uint64(batch);
-
         // Retrieve the arrays from batch and cast them to their concrete type.
         modelardb_types::arrays!(
             batch,

From 0040f10fcaf106b9b19d98f3680e4e50b37b5c4f Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 22:00:56 +0100
Subject: [PATCH 23/69] Remove DISK schemas

---
 crates/modelardb_storage/src/delta_lake.rs     |  6 +++---
 .../modelardb_storage/src/query/model_table.rs |  8 ++++----
 crates/modelardb_types/src/schemas.rs          | 18 ------------------
 3 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index 130efe07e..d96849c69 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -31,7 +31,7 @@ use deltalake::protocol::SaveMode;
 use deltalake::{DeltaOps, DeltaTable, DeltaTableError};
 use futures::{StreamExt, TryStreamExt};
 use modelardb_common::arguments;
-use modelardb_types::schemas::{DISK_COMPRESSED_SCHEMA, FIELD_COLUMN};
+use modelardb_types::schemas::{COMPRESSED_SCHEMA, FIELD_COLUMN};
 use object_store::aws::AmazonS3Builder;
 use object_store::local::LocalFileSystem;
 use object_store::path::Path;
@@ -288,13 +288,13 @@ impl DeltaLake {
         .await
     }
 
-    /// Create a Delta Lake table for a model table with `table_name` and [`DISK_COMPRESSED_SCHEMA`]
+    /// Create a Delta Lake table for a model table with `table_name` and [`COMPRESSED_SCHEMA`]
     /// if it does not already exist. Returns [`DeltaTable`] if the table could be created and
     /// [`ModelarDbStorageError`] if it could not.
     pub async fn create_model_table(&self, table_name: &str) -> Result<DeltaTable> {
         self.create_table(
             table_name,
-            &DISK_COMPRESSED_SCHEMA.0,
+            &COMPRESSED_SCHEMA.0,
             &[FIELD_COLUMN.to_owned()],
             self.location_of_compressed_table(table_name),
             SaveMode::ErrorIfExists,
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 3e6ef84b7..5f065b74d 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -42,7 +42,7 @@ use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
 use deltalake::kernel::LogicalFile;
 use deltalake::{DeltaTable, DeltaTableError, ObjectMeta, PartitionFilter, PartitionValue};
-use modelardb_types::schemas::{DISK_QUERY_COMPRESSED_SCHEMA, FIELD_COLUMN, GRID_SCHEMA};
+use modelardb_types::schemas::{QUERY_COMPRESSED_SCHEMA, FIELD_COLUMN, GRID_SCHEMA};
 use modelardb_types::types::{ArrowTimestamp, ArrowValue};
 
 use crate::metadata::model_table_metadata::ModelTableMetadata;
@@ -324,9 +324,9 @@ fn new_apache_parquet_exec(
     let log_store = delta_table.log_store();
     let file_scan_config = FileScanConfig {
         object_store_url: log_store.object_store_url(),
-        file_schema: DISK_QUERY_COMPRESSED_SCHEMA.0.clone(),
+        file_schema: QUERY_COMPRESSED_SCHEMA.0.clone(),
         file_groups: vec![partitioned_files],
-        statistics: Statistics::new_unknown(&DISK_QUERY_COMPRESSED_SCHEMA.0),
+        statistics: Statistics::new_unknown(&QUERY_COMPRESSED_SCHEMA.0),
         projection: None,
         limit: maybe_limit,
         table_partition_cols: vec![],
@@ -501,7 +501,7 @@ impl TableProvider for ModelTable {
 
         let maybe_physical_parquet_filters = maybe_convert_logical_expr_to_physical_expr(
             maybe_rewritten_parquet_filters.as_ref(),
-            DISK_QUERY_COMPRESSED_SCHEMA.0.clone(),
+            QUERY_COMPRESSED_SCHEMA.0.clone(),
         )?;
 
         let maybe_physical_grid_filters = maybe_convert_logical_expr_to_physical_expr(
diff --git a/crates/modelardb_types/src/schemas.rs b/crates/modelardb_types/src/schemas.rs
index 8b461d778..e8f93188d 100644
--- a/crates/modelardb_types/src/schemas.rs
+++ b/crates/modelardb_types/src/schemas.rs
@@ -36,19 +36,10 @@ pub static COMPRESSED_SCHEMA: LazyLock<CompressedSchema> = LazyLock::new(|| {
     CompressedSchema(Arc::new(Schema::new(query_compressed_schema_fields)))
 });
 
-/// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used when writing compressed
-/// segments to disk as the Delta Lake Protocol does not support unsigned integers.
-pub static DISK_COMPRESSED_SCHEMA: LazyLock<CompressedSchema> = LazyLock::new(|| {
-    let mut compressed_schema_fields = COMPRESSED_SCHEMA.0.fields().to_vec();
-    compressed_schema_fields[0] = Arc::new(Field::new("univariate_id", DataType::Int64, false));
-    CompressedSchema(Arc::new(Schema::new(compressed_schema_fields)))
-});
-
 /// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used for compressed segments when
 /// executing queries as [`FIELD_COLUMN`] is not stored in the Apache Parquet files.
 pub static QUERY_COMPRESSED_SCHEMA: LazyLock<QueryCompressedSchema> = LazyLock::new(|| {
     QueryCompressedSchema(Arc::new(Schema::new(vec![
-        Field::new("univariate_id", DataType::UInt64, false),
         Field::new("model_type_id", DataType::UInt8, false),
         Field::new("start_time", ArrowTimestamp::DATA_TYPE, false),
         Field::new("end_time", ArrowTimestamp::DATA_TYPE, false),
@@ -61,15 +52,6 @@ pub static QUERY_COMPRESSED_SCHEMA: LazyLock<QueryCompressedSchema> = LazyLock::
     ])))
 });
 
-/// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used when reading compressed
-/// segments from disk as the Delta Lake Protocol does not support unsigned integers.
-pub static DISK_QUERY_COMPRESSED_SCHEMA: LazyLock<CompressedSchema> = LazyLock::new(|| {
-    let mut query_compressed_schema_fields = QUERY_COMPRESSED_SCHEMA.0.fields().to_vec();
-    query_compressed_schema_fields[0] =
-        Arc::new(Field::new("univariate_id", DataType::Int64, false));
-    CompressedSchema(Arc::new(Schema::new(query_compressed_schema_fields)))
-});
-
 /// Minimum size of the metadata required for a compressed segment. Meaning that the sizes of
 /// `timestamps` and `values` are not included as they are [`DataType::Binary`] and thus their size
 /// depend on which model is selected to represent the values for that compressed segment.

From a7207d39551f4974ad9fda1edf8282252461d0cf Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 19:28:49 +0100
Subject: [PATCH 24/69] Add compressed schema to model table metadata

---
 .../src/metadata/model_table_metadata.rs             | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index 42d9a5feb..0c416298d 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -25,6 +25,7 @@ use datafusion::arrow::datatypes::{ArrowPrimitiveType, DataType, Schema};
 use datafusion::common::DFSchema;
 use datafusion::error::DataFusionError;
 use datafusion::logical_expr::expr::Expr;
+use modelardb_types::schemas::COMPRESSED_SCHEMA;
 use modelardb_types::types::{ArrowTimestamp, ArrowValue, ErrorBound, TimestampArray, ValueArray};
 
 use crate::error::{ModelarDbStorageError, Result};
@@ -52,6 +53,8 @@ pub struct ModelTableMetadata {
     /// Expressions to create generated columns in the `query_schema`. Only field columns can be
     /// generated by [`Expr`], so [`None`] is stored for timestamp, tag, and stored field columns.
     pub generated_columns: Vec<Option<GeneratedColumn>>,
+    /// Schema of the compressed segments that are stored in the model table.
+    pub compressed_schema: Arc<Schema>,
 }
 
 impl ModelTableMetadata {
@@ -141,6 +144,14 @@ impl ModelTableMetadata {
         let tag_column_indices =
             compute_indices_of_columns_with_data_type(&schema_without_generated, DataType::Utf8);
 
+        // Add the tag columns to the base schema for compressed segments.
+        let mut compressed_schema_fields = COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        for index in &tag_column_indices {
+            compressed_schema_fields.push(Arc::new(schema_without_generated.field(*index).clone()));
+        }
+
+        let compressed_schema = Arc::new(Schema::new(compressed_schema_fields));
+
         Ok(Self {
             name,
             schema: schema_without_generated,
@@ -151,6 +162,7 @@ impl ModelTableMetadata {
             query_schema,
             query_schema_to_schema: field_indices_without_generated,
             generated_columns,
+            compressed_schema,
         })
     }
 

From 68c9bdb9cca6868e16d715503d2114b4b08d91e1 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 19:38:17 +0100
Subject: [PATCH 25/69] Update compression to use tag values instead of
 univariate id

---
 .../modelardb_compression/src/compression.rs  | 36 ++++-----
 .../modelardb_compression/src/models/swing.rs |  1 -
 crates/modelardb_compression/src/types.rs     | 79 +++++++++++--------
 .../src/storage/uncompressed_data_manager.rs  |  1 +
 crates/modelardb_storage/src/delta_lake.rs    |  2 +-
 5 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 62c7906d9..fa5572dbc 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -17,6 +17,9 @@
 //! using the model types in [`models`] to produce compressed segments containing metadata and
 //! models.
 
+use std::sync::Arc;
+
+use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
 use modelardb_types::schemas::COMPRESSED_SCHEMA;
 use modelardb_types::types::{ErrorBound, TimestampArray, ValueArray};
@@ -41,7 +44,9 @@ const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255;
 /// and `uncompressed_values` have different lengths, otherwise the resulting compressed segments
 /// are returned as a [`RecordBatch`] with the [`COMPRESSED_SCHEMA`] schema.
 pub fn try_compress(
-    univariate_id: u64,
+    compressed_schema: Arc<Schema>,
+    tag_values: Vec<String>,
+    field_column_index: &usize,
     error_bound: ErrorBound,
     uncompressed_timestamps: &TimestampArray,
     uncompressed_values: &ValueArray,
@@ -63,7 +68,12 @@ pub fn try_compress(
     // Enough memory for end_index compressed segments are allocated to never require reallocation
     // as one compressed segment is created per data point in the absolute worst case.
     let end_index = uncompressed_timestamps.len();
-    let mut compressed_segment_batch_builder = CompressedSegmentBatchBuilder::new(end_index);
+    let mut compressed_segment_batch_builder = CompressedSegmentBatchBuilder::new(
+        compressed_schema,
+        tag_values,
+        *field_column_index as u16,
+        end_index,
+    );
 
     // Compress the uncompressed timestamps and uncompressed values.
     let mut current_start_index = 0;
@@ -84,7 +94,6 @@ pub fn try_compress(
             // Flush the previous model and any residual value if either exists.
             if current_start_index > 0 {
                 store_compressed_segments_with_model_and_or_residuals(
-                    univariate_id,
                     error_bound,
                     previous_model,
                     current_start_index - 1,
@@ -109,7 +118,6 @@ pub fn try_compress(
     }
 
     store_compressed_segments_with_model_and_or_residuals(
-        univariate_id,
         error_bound,
         previous_model,
         end_index - 1,
@@ -155,7 +163,6 @@ pub(crate) fn fit_next_model(
 /// - One compressed segment that stores residuals as a single model if `maybe_model` is
 ///   [`None`].
 fn store_compressed_segments_with_model_and_or_residuals(
-    univariate_id: u64,
     error_bound: ErrorBound,
     maybe_model: Option<CompressedSegmentBuilder>,
     residuals_end_index: usize,
@@ -168,7 +175,6 @@ fn store_compressed_segments_with_model_and_or_residuals(
         if (residuals_end_index - model.end_index) <= RESIDUAL_VALUES_MAX_LENGTH.into() {
             // Few or no residuals exists so the model and any residuals are put into one segment.
             model.finish(
-                univariate_id,
                 error_bound,
                 residuals_end_index,
                 uncompressed_timestamps,
@@ -180,7 +186,6 @@ fn store_compressed_segments_with_model_and_or_residuals(
             let model_end_index = model.end_index;
 
             model.finish(
-                univariate_id,
                 error_bound,
                 model_end_index, // No residuals are stored.
                 uncompressed_timestamps,
@@ -189,7 +194,6 @@ fn store_compressed_segments_with_model_and_or_residuals(
             );
 
             compress_and_store_residuals_in_a_separate_segment(
-                univariate_id,
                 error_bound,
                 model_end_index + 1,
                 residuals_end_index,
@@ -202,7 +206,6 @@ fn store_compressed_segments_with_model_and_or_residuals(
         // The residuals are stored as a separate segment as the first sub-sequence of values in
         // `uncompressed_values` are residuals, thus the residuals must be stored in a segment.
         compress_and_store_residuals_in_a_separate_segment(
-            univariate_id,
             error_bound,
             0,
             residuals_end_index,
@@ -213,12 +216,10 @@ fn store_compressed_segments_with_model_and_or_residuals(
     }
 }
 
-/// For the time series with `univariate_id`, compress the values from `start_index` to and
-/// including `end_index` in `uncompressed_values` using [`Gorilla`] and store the resulting model
-/// with the corresponding timestamps from `uncompressed_timestamps` as a segment in
-/// `compressed_segment_batch_builder`.
+/// Compress the values from `start_index` to and including `end_index` in `uncompressed_values`
+/// using [`Gorilla`] and store the resulting model with the corresponding timestamps from
+/// `uncompressed_timestamps` as a segment in `compressed_segment_batch_builder`.
 fn compress_and_store_residuals_in_a_separate_segment(
-    univariate_id: u64,
     error_bound: ErrorBound,
     start_index: usize,
     end_index: usize,
@@ -241,7 +242,6 @@ fn compress_and_store_residuals_in_a_separate_segment(
     let (values, min_value, max_value) = gorilla.model();
 
     compressed_segment_batch_builder.append_compressed_segment(
-        univariate_id,
         GORILLA_ID,
         start_time,
         end_time,
@@ -260,9 +260,7 @@ mod tests {
 
     use super::*;
 
-    use arrow::array::{
-        ArrayBuilder, BinaryArray, Float32Array, UInt64Array, UInt64Builder, UInt8Array,
-    };
+    use arrow::array::{ArrayBuilder, BinaryArray, Float32Array, UInt64Builder, UInt8Array};
     use modelardb_common::test::data_generation::{self, ValuesStructure};
     use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ZERO};
     use modelardb_types::types::{TimestampBuilder, ValueBuilder};
@@ -977,7 +975,6 @@ mod tests {
         let compressed_record_batch = compressed_segment_batch_builder.finish();
         modelardb_types::arrays!(
             compressed_record_batch,
-            univariate_ids,
             model_type_ids,
             start_times,
             end_times,
@@ -990,7 +987,6 @@ mod tests {
         );
 
         assert_eq!(1, compressed_record_batch.num_rows());
-        assert_eq!(0, univariate_ids.value(0));
         assert_eq!(GORILLA_ID, model_type_ids.value(0));
         assert_eq!(100, start_times.value(0));
         assert_eq!(500, end_times.value(0));
diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs
index 13a45b6cc..53854c9dc 100644
--- a/crates/modelardb_compression/src/models/swing.rs
+++ b/crates/modelardb_compression/src/models/swing.rs
@@ -861,7 +861,6 @@ mod tests {
         // Extract the individual columns from the record batch.
         modelardb_types::arrays!(
             segments,
-            _univariate_id_array,
             model_type_id_array,
             start_time_array,
             end_time_array,
diff --git a/crates/modelardb_compression/src/types.rs b/crates/modelardb_compression/src/types.rs
index 46a26c3f5..eabb48a76 100644
--- a/crates/modelardb_compression/src/types.rs
+++ b/crates/modelardb_compression/src/types.rs
@@ -18,10 +18,11 @@
 use std::debug_assert;
 use std::sync::Arc;
 
-use arrow::array::{BinaryBuilder, Float32Builder, UInt16Builder, UInt64Builder, UInt8Builder};
+use arrow::array::{
+    ArrayBuilder, ArrayRef, BinaryBuilder, Float32Builder, StringArray, UInt16Array, UInt8Builder,
+};
+use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
-use modelardb_types::functions;
-use modelardb_types::schemas::COMPRESSED_SCHEMA;
 use modelardb_types::types::{
     ErrorBound, Timestamp, TimestampArray, TimestampBuilder, Value, ValueArray, ValueBuilder,
 };
@@ -194,7 +195,6 @@ impl CompressedSegmentBuilder {
     /// value in `uncompressed_value` after the last value represented by the model in this segment.
     pub(crate) fn finish(
         mut self,
-        univariate_id: u64,
         error_bound: ErrorBound,
         residuals_end_index: usize,
         uncompressed_timestamps: &TimestampArray,
@@ -253,7 +253,6 @@ impl CompressedSegmentBuilder {
         };
 
         compressed_segment_batch_builder.append_compressed_segment(
-            univariate_id,
             self.model_type_id,
             start_time,
             end_time,
@@ -401,8 +400,12 @@ impl CompressedSegmentBuilder {
 
 /// A batch of compressed segments being built.
 pub(crate) struct CompressedSegmentBatchBuilder {
-    /// Univariate id of each compressed segment in the batch.
-    univariate_ids: UInt64Builder,
+    /// Schema of the compressed segments in the batch.
+    compressed_schema: Arc<Schema>,
+    /// Tag values for the time series the compressed segments in the batch belong to.
+    tag_values: Vec<String>,
+    /// Index of the field column the compressed segments in the batch belong to.
+    field_column_index: u16,
     /// Model type id of each compressed segment in the batch.
     model_type_ids: UInt8Builder,
     /// First timestamp of each compressed segment in the batch.
@@ -426,14 +429,19 @@ pub(crate) struct CompressedSegmentBatchBuilder {
     residuals: BinaryBuilder,
     /// Actual error of each compressed segment in the batch.
     error: Float32Builder,
-    /// Field column of each compressed segment in the batch.
-    field_columns: UInt16Builder,
 }
 
 impl CompressedSegmentBatchBuilder {
-    pub(crate) fn new(capacity: usize) -> Self {
+    pub(crate) fn new(
+        compressed_schema: Arc<Schema>,
+        tag_values: Vec<String>,
+        field_column_index: u16,
+        capacity: usize,
+    ) -> Self {
         Self {
-            univariate_ids: UInt64Builder::with_capacity(capacity),
+            compressed_schema,
+            tag_values,
+            field_column_index,
             model_type_ids: UInt8Builder::with_capacity(capacity),
             start_times: TimestampBuilder::with_capacity(capacity),
             end_times: TimestampBuilder::with_capacity(capacity),
@@ -443,14 +451,12 @@ impl CompressedSegmentBatchBuilder {
             values: BinaryBuilder::with_capacity(capacity, capacity),
             residuals: BinaryBuilder::with_capacity(capacity, capacity),
             error: Float32Builder::with_capacity(capacity),
-            field_columns: UInt16Builder::with_capacity(capacity),
         }
     }
 
     /// Append a compressed segment to the builder.
     pub(crate) fn append_compressed_segment(
         &mut self,
-        univariate_id: u64,
         model_type_id: u8,
         start_time: Timestamp,
         end_time: Timestamp,
@@ -461,8 +467,6 @@ impl CompressedSegmentBatchBuilder {
         residuals: &[u8],
         error: f32,
     ) {
-        let field_column_index = functions::univariate_id_to_column_index(univariate_id);
-        self.univariate_ids.append_value(univariate_id);
         self.model_type_ids.append_value(model_type_id);
         self.start_times.append_value(start_time);
         self.end_times.append_value(end_time);
@@ -472,28 +476,37 @@ impl CompressedSegmentBatchBuilder {
         self.values.append_value(values);
         self.residuals.append_value(residuals);
         self.error.append_value(error);
-        self.field_columns.append_value(field_column_index);
     }
 
     /// Return [`RecordBatch`] of compressed segments and consume the builder.
     pub(crate) fn finish(mut self) -> RecordBatch {
-        RecordBatch::try_new(
-            COMPRESSED_SCHEMA.0.clone(),
-            vec![
-                Arc::new(self.univariate_ids.finish()),
-                Arc::new(self.model_type_ids.finish()),
-                Arc::new(self.start_times.finish()),
-                Arc::new(self.end_times.finish()),
-                Arc::new(self.timestamps.finish()),
-                Arc::new(self.min_values.finish()),
-                Arc::new(self.max_values.finish()),
-                Arc::new(self.values.finish()),
-                Arc::new(self.residuals.finish()),
-                Arc::new(self.error.finish()),
-                Arc::new(self.field_columns.finish()),
-            ],
-        )
-        .unwrap()
+        let batch_length = self.model_type_ids.len();
+        let field_column_array: UInt16Array = std::iter::repeat(self.field_column_index)
+            .take(batch_length)
+            .collect();
+
+        let mut columns: Vec<ArrayRef> = vec![
+            Arc::new(self.model_type_ids.finish()),
+            Arc::new(self.start_times.finish()),
+            Arc::new(self.end_times.finish()),
+            Arc::new(self.timestamps.finish()),
+            Arc::new(self.min_values.finish()),
+            Arc::new(self.max_values.finish()),
+            Arc::new(self.values.finish()),
+            Arc::new(self.residuals.finish()),
+            Arc::new(self.error.finish()),
+            Arc::new(field_column_array),
+        ];
+
+        for tag_value in &self.tag_values {
+            let tag_array: StringArray = std::iter::repeat(Some(tag_value))
+                .take(batch_length)
+                .collect();
+
+            columns.push(Arc::new(tag_array));
+        }
+
+        RecordBatch::try_new(self.compressed_schema, columns).unwrap()
     }
 }
 
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 1d784fe9f..a8cb328b0 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -589,6 +589,7 @@ impl UncompressedDataManager {
 
                 // unwrap() is safe as uncompressed_timestamps and uncompressed_values have the same length.
                 modelardb_compression::try_compress(
+                    model_table_metadata.compressed_schema.clone(),
                     tag_values.clone(),
                     field_column_index,
                     error_bound,
diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index d96849c69..082fe4fbf 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -442,7 +442,7 @@ impl DeltaLake {
     pub async fn write_compressed_segments_to_model_table(
         &self,
         table_name: &str,
-        mut compressed_segments: Vec<RecordBatch>,
+        compressed_segments: Vec<RecordBatch>,
     ) -> Result<DeltaTable> {
         // Specify that the file must be sorted by univariate_id and then by start_time.
         let sorting_columns = Some(vec![

From f125d0e0f791b13fdee99729011569e5ea8b9b96 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 21:52:00 +0100
Subject: [PATCH 26/69] Fix calls to try_compress() in tests

---
 .../modelardb_compression/src/compression.rs  | 43 ++++++++++++++-----
 .../modelardb_compression/src/models/swing.rs | 21 ++++++++-
 crates/modelardb_compression/src/types.rs     | 11 ++++-
 .../src/storage/uncompressed_data_manager.rs  |  2 +-
 4 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index fa5572dbc..01673420d 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -46,7 +46,7 @@ const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255;
 pub fn try_compress(
     compressed_schema: Arc<Schema>,
     tag_values: Vec<String>,
-    field_column_index: &usize,
+    field_column_index: u16,
     error_bound: ErrorBound,
     uncompressed_timestamps: &TimestampArray,
     uncompressed_values: &ValueArray,
@@ -71,7 +71,7 @@ pub fn try_compress(
     let mut compressed_segment_batch_builder = CompressedSegmentBatchBuilder::new(
         compressed_schema,
         tag_values,
-        *field_column_index as u16,
+        field_column_index,
         end_index,
     );
 
@@ -261,13 +261,14 @@ mod tests {
     use super::*;
 
     use arrow::array::{ArrayBuilder, BinaryArray, Float32Array, UInt64Builder, UInt8Array};
+    use arrow::datatypes::{DataType, Field};
     use modelardb_common::test::data_generation::{self, ValuesStructure};
     use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ZERO};
     use modelardb_types::types::{TimestampBuilder, ValueBuilder};
 
     use crate::{models, MODEL_TYPE_NAMES};
 
-    const UNIVARIATE_ID: u64 = 1;
+    const TAG_VALUE: &str = "tag";
     const ADD_NOISE_RANGE: Option<Range<f32>> = Some(1.0..1.05);
     const TRY_COMPRESS_TEST_LENGTH: usize = 50;
 
@@ -275,7 +276,9 @@ mod tests {
     #[test]
     fn test_try_compress_empty_time_series_within_absolute_error_bound_zero() {
         let compressed_record_batch = try_compress(
-            UNIVARIATE_ID,
+            compressed_schema(),
+            vec![TAG_VALUE.to_owned()],
+            0,
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             &TimestampBuilder::new().finish(),
             &ValueBuilder::new().finish(),
@@ -287,7 +290,9 @@ mod tests {
     #[test]
     fn test_try_compress_empty_time_series_within_relative_error_bound_zero() {
         let compressed_record_batch = try_compress(
-            UNIVARIATE_ID,
+            compressed_schema(),
+            vec![TAG_VALUE.to_owned()],
+            0,
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             &TimestampBuilder::new().finish(),
             &ValueBuilder::new().finish(),
@@ -507,7 +512,9 @@ mod tests {
             data_generation::generate_values(uncompressed_timestamps.values(), values_structure);
 
         let compressed_record_batch = try_compress(
-            1,
+            compressed_schema(),
+            vec![TAG_VALUE.to_owned()],
+            0,
             error_bound,
             &uncompressed_timestamps,
             &uncompressed_values,
@@ -655,7 +662,9 @@ mod tests {
         assert_eq!(uncompressed_timestamps.len(), uncompressed_values.len());
 
         let compressed_record_batch = try_compress(
-            UNIVARIATE_ID,
+            compressed_schema(),
+            vec![TAG_VALUE.to_owned()],
+            0,
             error_bound,
             &uncompressed_timestamps,
             &uncompressed_values,
@@ -869,7 +878,9 @@ mod tests {
             );
 
         let compressed_record_batch = try_compress(
-            UNIVARIATE_ID,
+            compressed_schema(),
+            vec![TAG_VALUE.to_owned()],
+            0,
             error_bound,
             &uncompressed_timestamps,
             &uncompressed_values,
@@ -960,10 +971,15 @@ mod tests {
         let error_bound = ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap();
         let uncompressed_timestamps = TimestampArray::from_iter_values((100..=500).step_by(100));
         let uncompressed_values = ValueArray::from(vec![73.0, 37.0, 37.0, 37.0, 73.0]);
-        let mut compressed_segment_batch_builder = CompressedSegmentBatchBuilder::new(1);
 
-        compress_and_store_residuals_in_a_separate_segment(
+        let mut compressed_segment_batch_builder = CompressedSegmentBatchBuilder::new(
+            compressed_schema(),
+            vec![TAG_VALUE.to_owned()],
             0,
+            1,
+        );
+
+        compress_and_store_residuals_in_a_separate_segment(
             error_bound,
             0,
             uncompressed_timestamps.len() - 1,
@@ -998,4 +1014,11 @@ mod tests {
         assert!(residuals.value(0).is_empty());
         assert!(errors.value(0).is_nan());
     }
+
+    pub fn compressed_schema() -> Arc<Schema> {
+        let mut compressed_schema_fields = COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        compressed_schema_fields.push(Arc::new(Field::new("tag", DataType::Utf8, false)));
+
+        Arc::new(Schema::new(compressed_schema_fields))
+    }
 }
diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs
index 53854c9dc..7269c2093 100644
--- a/crates/modelardb_compression/src/models/swing.rs
+++ b/crates/modelardb_compression/src/models/swing.rs
@@ -350,10 +350,14 @@ fn compute_slope_and_intercept(
 mod tests {
     use super::*;
 
-    use arrow::array::{BinaryArray, Float32Array, UInt64Array, UInt8Array};
+    use std::sync::Arc;
+
+    use arrow::array::{BinaryArray, Float32Array, UInt8Array};
+    use arrow::datatypes::{DataType, Field, Schema};
     use modelardb_common::test::{
         ERROR_BOUND_ABSOLUTE_MAX, ERROR_BOUND_FIVE, ERROR_BOUND_RELATIVE_MAX, ERROR_BOUND_ZERO,
     };
+    use modelardb_types::schemas::COMPRESSED_SCHEMA;
     use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueArray, ValueBuilder};
     use proptest::num::f32 as ProptestValue;
     use proptest::strategy::Strategy;
@@ -856,7 +860,20 @@ mod tests {
             (START_TIME..end_time).step_by(SAMPLING_INTERVAL as usize),
         );
         let values = ValueArray::from_iter_values(values);
-        let segments = crate::try_compress(1, error_bound, &timestamps, &values).unwrap();
+
+        let mut compressed_schema_fields = COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        compressed_schema_fields.push(Arc::new(Field::new("tag", DataType::Utf8, false)));
+        let compressed_schema = Arc::new(Schema::new(compressed_schema_fields));
+
+        let segments = crate::try_compress(
+            compressed_schema,
+            vec!["tag".to_owned()],
+            0,
+            error_bound,
+            &timestamps,
+            &values,
+        )
+        .unwrap();
 
         // Extract the individual columns from the record batch.
         modelardb_types::arrays!(
diff --git a/crates/modelardb_compression/src/types.rs b/crates/modelardb_compression/src/types.rs
index eabb48a76..b6c530f67 100644
--- a/crates/modelardb_compression/src/types.rs
+++ b/crates/modelardb_compression/src/types.rs
@@ -515,8 +515,10 @@ mod tests {
     use super::*;
 
     use arrow::array::BinaryArray;
+    use arrow::datatypes::{DataType, Field};
     use modelardb_common::test::data_generation::{self, ValuesStructure};
     use modelardb_common::test::{ERROR_BOUND_TEN, ERROR_BOUND_ZERO};
+    use modelardb_types::schemas::COMPRESSED_SCHEMA;
     use modelardb_types::types::{TimestampArray, ValueArray};
 
     use crate::compression;
@@ -813,10 +815,15 @@ mod tests {
         // Create a segment that represents its values using a model of the expected type and its
         // residuals using Gorilla, and then assert that the expected encoding is used for it.
         let residuals_end_index = uncompressed_timestamps.len() - 1;
-        let mut compressed_segment_batch_builder = CompressedSegmentBatchBuilder::new(1);
+
+        let mut compressed_schema_fields = COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        compressed_schema_fields.push(Arc::new(Field::new("tag", DataType::Utf8, false)));
+        let compressed_schema = Arc::new(Schema::new(compressed_schema_fields));
+
+        let mut compressed_segment_batch_builder =
+            CompressedSegmentBatchBuilder::new(compressed_schema, vec!["tag".to_owned()], 0, 1);
 
         model.finish(
-            0,
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             residuals_end_index,
             &uncompressed_timestamps,
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index a8cb328b0..131b78401 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -591,7 +591,7 @@ impl UncompressedDataManager {
                 modelardb_compression::try_compress(
                     model_table_metadata.compressed_schema.clone(),
                     tag_values.clone(),
-                    field_column_index,
+                    *field_column_index as u16,
                     error_bound,
                     uncompressed_timestamps,
                     uncompressed_values,

From a1d3e1a27ac596a3e1b303dc460805b582c213c9 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 22:14:22 +0100
Subject: [PATCH 27/69] Use compressed schema with tag column in test util
 function

---
 .../modelardb_compression/src/compression.rs  |  8 +++---
 crates/modelardb_compression/src/types.rs     |  6 ++---
 .../src/storage/compressed_data_manager.rs    |  4 +--
 crates/modelardb_storage/src/test.rs          | 26 +++++++------------
 4 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 01673420d..8ec0d6cce 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -21,7 +21,6 @@ use std::sync::Arc;
 
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
-use modelardb_types::schemas::COMPRESSED_SCHEMA;
 use modelardb_types::types::{ErrorBound, TimestampArray, ValueArray};
 
 use crate::error::{ModelarDbCompressionError, Result};
@@ -42,7 +41,7 @@ const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255;
 /// Assumes `uncompressed_timestamps` and `uncompressed_values` are sorted according to
 /// `uncompressed_timestamps`. Returns [`ModelarDbCompressionError`] if `uncompressed_timestamps`
 /// and `uncompressed_values` have different lengths, otherwise the resulting compressed segments
-/// are returned as a [`RecordBatch`] with the [`COMPRESSED_SCHEMA`] schema.
+/// are returned as a [`RecordBatch`] with the [`compressed_schema`] schema.
 pub fn try_compress(
     compressed_schema: Arc<Schema>,
     tag_values: Vec<String>,
@@ -62,7 +61,7 @@ pub fn try_compress(
 
     // If there is no uncompressed data to compress, an empty [`RecordBatch`] can be returned.
     if uncompressed_timestamps.is_empty() {
-        return Ok(RecordBatch::new_empty(COMPRESSED_SCHEMA.0.clone()));
+        return Ok(RecordBatch::new_empty(compressed_schema));
     }
 
     // Enough memory for end_index compressed segments are allocated to never require reallocation
@@ -264,6 +263,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field};
     use modelardb_common::test::data_generation::{self, ValuesStructure};
     use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ZERO};
+    use modelardb_types::schemas::COMPRESSED_SCHEMA;
     use modelardb_types::types::{TimestampBuilder, ValueBuilder};
 
     use crate::{models, MODEL_TYPE_NAMES};
@@ -699,7 +699,7 @@ mod tests {
             compressed_record_batch,
         );
 
-        let model_type_ids = modelardb_types::array!(compressed_record_batch, 1, UInt8Array);
+        let model_type_ids = modelardb_types::array!(compressed_record_batch, 0, UInt8Array);
         assert_eq!(model_type_ids.values(), expected_model_type_ids);
     }
 
diff --git a/crates/modelardb_compression/src/types.rs b/crates/modelardb_compression/src/types.rs
index b6c530f67..34802adf0 100644
--- a/crates/modelardb_compression/src/types.rs
+++ b/crates/modelardb_compression/src/types.rs
@@ -834,9 +834,9 @@ mod tests {
         let batch = compressed_segment_batch_builder.finish();
         assert_eq!(1, batch.num_rows());
 
-        let segment_min_value = modelardb_types::array!(batch, 5, ValueArray).value(0);
-        let segment_max_value = modelardb_types::array!(batch, 6, ValueArray).value(0);
-        let segment_values = modelardb_types::array!(batch, 7, BinaryArray).value(0);
+        let segment_min_value = modelardb_types::array!(batch, 4, ValueArray).value(0);
+        let segment_max_value = modelardb_types::array!(batch, 5, ValueArray).value(0);
+        let segment_values = modelardb_types::array!(batch, 6, BinaryArray).value(0);
 
         assert_eq!(expected_segment_min_value, segment_min_value);
         assert_eq!(expected_segment_max_value, segment_max_value);
diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs
index 861986431..394be80fa 100644
--- a/crates/modelardb_server/src/storage/compressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs
@@ -610,12 +610,12 @@ mod tests {
             model_table_metadata,
             vec![
                 test::compressed_segments_record_batch_with_time(
-                    COLUMN_INDEX as u64,
+                    COLUMN_INDEX,
                     time_ms,
                     offset,
                 ),
                 test::compressed_segments_record_batch_with_time(
-                    (COLUMN_INDEX + 1) as u64,
+                    COLUMN_INDEX + 1,
                     time_ms,
                     offset,
                 ),
diff --git a/crates/modelardb_storage/src/test.rs b/crates/modelardb_storage/src/test.rs
index 68dd658f5..c6df86aed 100644
--- a/crates/modelardb_storage/src/test.rs
+++ b/crates/modelardb_storage/src/test.rs
@@ -17,14 +17,11 @@
 
 use std::sync::Arc;
 
-use arrow::array::{
-    BinaryArray, Float32Array, RecordBatch, StringArray, UInt16Array, UInt64Array, UInt8Array,
-};
+use arrow::array::{BinaryArray, Float32Array, RecordBatch, StringArray, UInt16Array, UInt8Array};
 use arrow::compute::concat_batches;
 use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
 use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ONE, ERROR_BOUND_ZERO};
-use modelardb_types::functions;
-use modelardb_types::schemas::{COMPRESSED_SCHEMA, TABLE_METADATA_SCHEMA};
+use modelardb_types::schemas::TABLE_METADATA_SCHEMA;
 use modelardb_types::types::{
     ArrowTimestamp, ArrowValue, ErrorBound, Timestamp, TimestampArray, Value, ValueArray,
 };
@@ -139,24 +136,22 @@ pub fn uncompressed_model_table_record_batch(row_count: usize) -> RecordBatch {
 
 /// Return a [`RecordBatch`] containing three compressed segments.
 pub fn compressed_segments_record_batch() -> RecordBatch {
-    compressed_segments_record_batch_with_time(1, 0, 0.0)
+    compressed_segments_record_batch_with_time(0, 0, 0.0)
 }
 
-/// Return a [`RecordBatch`] containing three compressed segments from `univariate_id`. The
-/// compressed segments time range is from `time_ms` to `time_ms` + 3, while the value range is from
-/// `offset` + 5.2 to `offset` + 34.2.
+/// Return a [`RecordBatch`] containing three compressed segments. The compressed segments time
+/// range is from `time_ms` to `time_ms` + 3, while the value range is from`offset` + 5.2 to
+/// `offset` + 34.2.
 pub fn compressed_segments_record_batch_with_time(
-    univariate_id: u64,
+    field_column: u16,
     time_ms: i64,
     offset: f32,
 ) -> RecordBatch {
-    let field_column = functions::univariate_id_to_column_index(univariate_id);
     let start_times = vec![time_ms, time_ms + 2, time_ms + 4];
     let end_times = vec![time_ms + 1, time_ms + 3, time_ms + 5];
     let min_values = vec![offset + 5.2, offset + 10.3, offset + 30.2];
     let max_values = vec![offset + 20.2, offset + 12.2, offset + 34.2];
 
-    let univariate_id = UInt64Array::from(vec![univariate_id, univariate_id, univariate_id]);
     let model_type_id = UInt8Array::from(vec![1, 1, 2]);
     let start_time = TimestampArray::from(start_times);
     let end_time = TimestampArray::from(end_times);
@@ -167,13 +162,11 @@ pub fn compressed_segments_record_batch_with_time(
     let residuals = BinaryArray::from_vec(vec![b"", b"", b""]);
     let error = Float32Array::from(vec![0.2, 0.5, 0.1]);
     let field_column = UInt16Array::from(vec![field_column, field_column, field_column]);
-
-    let schema = COMPRESSED_SCHEMA.clone();
+    let tag_column = StringArray::from(vec!["tag", "tag", "tag"]);
 
     RecordBatch::try_new(
-        schema.0,
+        model_table_metadata().compressed_schema,
         vec![
-            Arc::new(univariate_id),
             Arc::new(model_type_id),
             Arc::new(start_time),
             Arc::new(end_time),
@@ -184,6 +177,7 @@ pub fn compressed_segments_record_batch_with_time(
             Arc::new(residuals),
             Arc::new(error),
             Arc::new(field_column),
+            Arc::new(tag_column),
         ],
     )
     .unwrap()

From ff30a73dea97710e72b5c8c404e780f3e095aa34 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 23:16:03 +0100
Subject: [PATCH 28/69] Use model table compressed schema in compressed data
 buffer

---
 crates/modelardb_common/src/test/mod.rs       |  2 +-
 .../src/storage/compressed_data_buffer.rs     | 32 +++++++++++--------
 .../src/storage/compressed_data_manager.rs    | 15 +++------
 3 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/crates/modelardb_common/src/test/mod.rs b/crates/modelardb_common/src/test/mod.rs
index 032149efd..5bcce3428 100644
--- a/crates/modelardb_common/src/test/mod.rs
+++ b/crates/modelardb_common/src/test/mod.rs
@@ -24,7 +24,7 @@ pub const INGESTED_BUFFER_SIZE: usize = 1438392;
 pub const UNCOMPRESSED_BUFFER_SIZE: usize = 1048576;
 
 /// Expected size of the compressed segments produced in the tests.
-pub const COMPRESSED_SEGMENTS_SIZE: usize = 1437;
+pub const COMPRESSED_SEGMENTS_SIZE: usize = 1565;
 
 /// Number of bytes reserved for ingested data in tests.
 pub const INGESTED_RESERVED_MEMORY_IN_BYTES: usize = 5 * 1024 * 1024; // 5 MiB
diff --git a/crates/modelardb_server/src/storage/compressed_data_buffer.rs b/crates/modelardb_server/src/storage/compressed_data_buffer.rs
index 68402ff93..dd51f1080 100644
--- a/crates/modelardb_server/src/storage/compressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_buffer.rs
@@ -19,7 +19,6 @@ use std::sync::Arc;
 
 use datafusion::arrow::record_batch::RecordBatch;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
-use modelardb_types::schemas::COMPRESSED_SCHEMA;
 
 use crate::error::{ModelarDbServerError, Result};
 
@@ -54,6 +53,8 @@ impl CompressedSegmentBatch {
 /// model table as one or more [RecordBatches](RecordBatch) per column and providing functionality
 /// for appending segments and saving all segments to a single Apache Parquet file.
 pub(super) struct CompressedDataBuffer {
+    /// Metadata of the model table the buffer stores compressed segments for.
+    model_table_metadata: Arc<ModelTableMetadata>,
     /// Compressed segments that make up the compressed data in the [`CompressedDataBuffer`].
     compressed_segments: Vec<RecordBatch>,
     /// Continuously updated total sum of the size of the compressed segments.
@@ -61,27 +62,28 @@ pub(super) struct CompressedDataBuffer {
 }
 
 impl CompressedDataBuffer {
-    pub(super) fn new() -> Self {
+    pub(super) fn new(model_table_metadata: Arc<ModelTableMetadata>) -> Self {
         Self {
+            model_table_metadata,
             compressed_segments: vec![],
             size_in_bytes: 0,
         }
     }
 
     /// Append `compressed_segments` to the [`CompressedDataBuffer`] and return the size of
-    /// `compressed_segments` in bytes if their schema is [`COMPRESSED_SCHEMA`], otherwise
+    /// `compressed_segments` in bytes if their schema matches the model table, otherwise
     /// [`ModelarDbServerError`] is returned.
     pub(super) fn append_compressed_segments(
         &mut self,
         mut compressed_segments: Vec<RecordBatch>,
     ) -> Result<usize> {
-        if compressed_segments
-            .iter()
-            .any(|compressed_segments| compressed_segments.schema() != COMPRESSED_SCHEMA.0)
-        {
-            return Err(ModelarDbServerError::InvalidArgument(
-                "Compressed segments must all use COMPRESSED_SCHEMA.".to_owned(),
-            ));
+        if compressed_segments.iter().any(|compressed_segments| {
+            compressed_segments.schema() != self.model_table_metadata.compressed_schema
+        }) {
+            return Err(ModelarDbServerError::InvalidArgument(format!(
+                "Compressed segments must all match {}.",
+                self.model_table_metadata.name
+            )));
         }
 
         let mut compressed_segments_size = 0;
@@ -127,7 +129,8 @@ mod tests {
 
     #[test]
     fn test_can_append_valid_compressed_segments() {
-        let mut compressed_data_buffer = CompressedDataBuffer::new();
+        let mut compressed_data_buffer =
+            CompressedDataBuffer::new(test::model_table_metadata_arc());
 
         compressed_data_buffer
             .append_compressed_segments(vec![
@@ -143,7 +146,8 @@ mod tests {
 
     #[test]
     fn test_compressed_data_buffer_size_updated_when_appending() {
-        let mut compressed_data_buffer = CompressedDataBuffer::new();
+        let mut compressed_data_buffer =
+            CompressedDataBuffer::new(test::model_table_metadata_arc());
 
         compressed_data_buffer
             .append_compressed_segments(vec![
@@ -157,7 +161,9 @@ mod tests {
 
     #[tokio::test]
     async fn test_can_get_record_batches_from_compressed_data_buffer() {
-        let mut compressed_data_buffer = CompressedDataBuffer::new();
+        let mut compressed_data_buffer =
+            CompressedDataBuffer::new(test::model_table_metadata_arc());
+
         let compressed_segments = vec![
             test::compressed_segments_record_batch(),
             test::compressed_segments_record_batch(),
diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs
index 394be80fa..ac88bffc6 100644
--- a/crates/modelardb_server/src/storage/compressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs
@@ -155,7 +155,8 @@ impl CompressedDataManager {
             let model_table_name = model_table_name.to_owned();
             debug!("Creating compressed data buffer for table '{model_table_name}' as none exist.",);
 
-            let mut compressed_data_buffer = CompressedDataBuffer::new();
+            let mut compressed_data_buffer =
+                CompressedDataBuffer::new(compressed_segment_batch.model_table_metadata);
             let segment_size = compressed_data_buffer
                 .append_compressed_segments(compressed_segment_batch.compressed_segments);
 
@@ -609,16 +610,8 @@ mod tests {
         CompressedSegmentBatch::new(
             model_table_metadata,
             vec![
-                test::compressed_segments_record_batch_with_time(
-                    COLUMN_INDEX,
-                    time_ms,
-                    offset,
-                ),
-                test::compressed_segments_record_batch_with_time(
-                    COLUMN_INDEX + 1,
-                    time_ms,
-                    offset,
-                ),
+                test::compressed_segments_record_batch_with_time(COLUMN_INDEX, time_ms, offset),
+                test::compressed_segments_record_batch_with_time(COLUMN_INDEX + 1, time_ms, offset),
             ],
         )
     }

From 930a109cd7984e416f75b1d49f05c02806c98a99 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 23:48:27 +0100
Subject: [PATCH 29/69] Sort compressed segment files by tag columns instead of
 univariate id

---
 crates/modelardb_storage/src/delta_lake.rs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index 082fe4fbf..1bea15cf9 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -444,14 +444,20 @@ impl DeltaLake {
         table_name: &str,
         compressed_segments: Vec<RecordBatch>,
     ) -> Result<DeltaTable> {
-        // Specify that the file must be sorted by univariate_id and then by start_time.
-        let sorting_columns = Some(vec![
-            SortingColumn::new(0, false, false),
-            SortingColumn::new(2, false, false),
-        ]);
+        // Specify that the file must be sorted by the tag columns and then by start_time.
+        let mut sorting_columns = Vec::new();
+        let base_compressed_schema_len = COMPRESSED_SCHEMA.0.fields().len();
+        let compressed_schema_len = compressed_segments[0].schema().fields().len();
+
+        // Compressed segments have the tag columns at the end of the schema.
+        for tag_column_index in base_compressed_schema_len..compressed_schema_len {
+            sorting_columns.push(SortingColumn::new(tag_column_index as i32, false, false));
+        }
+
+        sorting_columns.push(SortingColumn::new(1, false, false));
 
         let partition_columns = vec![FIELD_COLUMN.to_owned()];
-        let writer_properties = apache_parquet_writer_properties(sorting_columns);
+        let writer_properties = apache_parquet_writer_properties(Some(sorting_columns));
 
         self.write_record_batches_to_table(
             self.delta_table(table_name).await?,

From e0664242f25c5e88473ecf04d74234d97e624c89 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 20 Feb 2025 23:58:32 +0100
Subject: [PATCH 30/69] Use compressed schema with tag columns when creating
 model tables in delta lake

---
 crates/modelardb_manager/src/remote.rs           |  2 +-
 crates/modelardb_server/src/context.rs           |  2 +-
 .../src/storage/compressed_data_manager.rs       |  6 +++---
 .../src/storage/data_transfer.rs                 |  2 +-
 crates/modelardb_storage/src/delta_lake.rs       | 16 ++++++++++------
 .../src/optimizer/model_simple_aggregates.rs     |  2 +-
 6 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/crates/modelardb_manager/src/remote.rs b/crates/modelardb_manager/src/remote.rs
index ca0ef8e0b..10c55c415 100644
--- a/crates/modelardb_manager/src/remote.rs
+++ b/crates/modelardb_manager/src/remote.rs
@@ -217,7 +217,7 @@ impl FlightServiceHandler {
         self.context
             .remote_data_folder
             .delta_lake
-            .create_model_table(&model_table_metadata.name)
+            .create_model_table(&model_table_metadata)
             .await
             .map_err(error_to_status_internal)?;
 
diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index f5417b942..593720bdc 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -161,7 +161,7 @@ impl Context {
         self.data_folders
             .local_data_folder
             .delta_lake
-            .create_model_table(&model_table_metadata.name)
+            .create_model_table(&model_table_metadata)
             .await?;
 
         let query_folder_table_metadata_manager = self
diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs
index ac88bffc6..493eb474d 100644
--- a/crates/modelardb_server/src/storage/compressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs
@@ -390,7 +390,7 @@ mod tests {
 
         let mut delta_table = local_data_folder
             .delta_lake
-            .create_model_table(test::MODEL_TABLE_NAME)
+            .create_model_table(&test::model_table_metadata())
             .await
             .unwrap();
 
@@ -450,7 +450,7 @@ mod tests {
         let segments = compressed_segments_record_batch();
         local_data_folder
             .delta_lake
-            .create_model_table(segments.model_table_name())
+            .create_model_table(&segments.model_table_metadata)
             .await
             .unwrap();
 
@@ -506,7 +506,7 @@ mod tests {
         let segments = compressed_segments_record_batch();
         local_data_folder
             .delta_lake
-            .create_model_table(segments.model_table_name())
+            .create_model_table(&segments.model_table_metadata)
             .await
             .unwrap();
         data_manager
diff --git a/crates/modelardb_server/src/storage/data_transfer.rs b/crates/modelardb_server/src/storage/data_transfer.rs
index 29e60053c..878d49341 100644
--- a/crates/modelardb_server/src/storage/data_transfer.rs
+++ b/crates/modelardb_server/src/storage/data_transfer.rs
@@ -484,7 +484,7 @@ mod tests {
         let model_table_metadata = test::model_table_metadata();
         local_data_folder
             .delta_lake
-            .create_model_table(&model_table_metadata.name)
+            .create_model_table(&model_table_metadata)
             .await
             .unwrap();
 
diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index 1bea15cf9..cebb04e0d 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -39,6 +39,7 @@ use object_store::ObjectStore;
 use url::Url;
 
 use crate::error::{ModelarDbStorageError, Result};
+use crate::metadata::model_table_metadata::ModelTableMetadata;
 use crate::{apache_parquet_writer_properties, METADATA_FOLDER, TABLE_FOLDER};
 
 /// Functionality for managing Delta Lake tables in a local folder or an object store.
@@ -288,15 +289,18 @@ impl DeltaLake {
         .await
     }
 
-    /// Create a Delta Lake table for a model table with `table_name` and [`COMPRESSED_SCHEMA`]
-    /// if it does not already exist. Returns [`DeltaTable`] if the table could be created and
+    /// Create a Delta Lake table for a model table with `model_table_metadata` if it does not
+    /// already exist. Returns [`DeltaTable`] if the table could be created and
     /// [`ModelarDbStorageError`] if it could not.
-    pub async fn create_model_table(&self, table_name: &str) -> Result<DeltaTable> {
+    pub async fn create_model_table(
+        &self,
+        model_table_metadata: &ModelTableMetadata,
+    ) -> Result<DeltaTable> {
         self.create_table(
-            table_name,
-            &COMPRESSED_SCHEMA.0,
+            &model_table_metadata.name,
+            &model_table_metadata.compressed_schema,
             &[FIELD_COLUMN.to_owned()],
-            self.location_of_compressed_table(table_name),
+            self.location_of_compressed_table(&model_table_metadata.name),
             SaveMode::ErrorIfExists,
         )
         .await
diff --git a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
index 4693ef5ce..5cd11e16a 100644
--- a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
+++ b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
@@ -788,7 +788,7 @@ mod tests {
         let model_table_metadata = test::model_table_metadata_arc();
 
         let delta_table = delta_lake
-            .create_model_table(&model_table_metadata.name)
+            .create_model_table(&model_table_metadata)
             .await
             .unwrap();
 

From 78a914bbac572823add1504831f37ed8b407df28 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Fri, 21 Feb 2025 00:26:24 +0100
Subject: [PATCH 31/69] Fix unit tests after changes to compressed segment
 schema

---
 .../src/storage/compressed_data_manager.rs    | 28 ++-----------------
 .../src/storage/data_transfer.rs              |  2 +-
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs
index 493eb474d..d84163976 100644
--- a/crates/modelardb_server/src/storage/compressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs
@@ -288,18 +288,15 @@ mod tests {
     use super::*;
 
     use datafusion::arrow::array::{Array, Int8Array};
-    use datafusion::arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
+    use datafusion::arrow::datatypes::{DataType, Field, Schema};
     use modelardb_common::test::{
         COMPRESSED_RESERVED_MEMORY_IN_BYTES, COMPRESSED_SEGMENTS_SIZE,
         INGESTED_RESERVED_MEMORY_IN_BYTES, UNCOMPRESSED_RESERVED_MEMORY_IN_BYTES,
     };
-    use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
     use modelardb_storage::test;
-    use modelardb_types::types::{ArrowTimestamp, ArrowValue, ErrorBound};
     use tempfile::{self, TempDir};
 
     const COLUMN_INDEX: u16 = 1;
-    const ERROR_BOUND_ZERO: f32 = 0.0;
 
     // Tests for insert_record_batch().
     #[tokio::test]
@@ -525,7 +522,7 @@ mod tests {
             data_manager
                 .memory_pool
                 .remaining_compressed_memory_in_bytes(),
-            1437
+            1565
         );
 
         // There should no longer be any compressed data in memory.
@@ -588,27 +585,8 @@ mod tests {
     /// segments. The compressed segments time range is from `time_ms` to `time_ms` + 3, while the
     /// value range is from `offset` + 5.2 to `offset` + 34.2.
     fn compressed_segment_batch_with_time(time_ms: i64, offset: f32) -> CompressedSegmentBatch {
-        let query_schema = Arc::new(Schema::new(vec![
-            Field::new("timestamp", ArrowTimestamp::DATA_TYPE, false),
-            Field::new("field_1", ArrowValue::DATA_TYPE, false),
-            Field::new("field_2", ArrowValue::DATA_TYPE, false),
-        ]));
-        let model_table_metadata = Arc::new(
-            ModelTableMetadata::try_new(
-                test::MODEL_TABLE_NAME.to_owned(),
-                query_schema,
-                vec![
-                    ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
-                    ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
-                    ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
-                ],
-                vec![None, None, None],
-            )
-            .unwrap(),
-        );
-
         CompressedSegmentBatch::new(
-            model_table_metadata,
+            test::model_table_metadata_arc(),
             vec![
                 test::compressed_segments_record_batch_with_time(COLUMN_INDEX, time_ms, offset),
                 test::compressed_segments_record_batch_with_time(COLUMN_INDEX + 1, time_ms, offset),
diff --git a/crates/modelardb_server/src/storage/data_transfer.rs b/crates/modelardb_server/src/storage/data_transfer.rs
index 878d49341..8b64b2cd3 100644
--- a/crates/modelardb_server/src/storage/data_transfer.rs
+++ b/crates/modelardb_server/src/storage/data_transfer.rs
@@ -284,7 +284,7 @@ mod tests {
     use modelardb_storage::test;
     use tempfile::{self, TempDir};
 
-    const EXPECTED_MODEL_TABLE_FILE_SIZE: usize = 2080;
+    const EXPECTED_MODEL_TABLE_FILE_SIZE: usize = 2038;
 
     // Tests for data transfer component.
     #[tokio::test]

From e26688542f3ddc59a23ebe4ea09abdb9b34cffa4 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Fri, 21 Feb 2025 22:30:40 +0100
Subject: [PATCH 32/69] Add temporary fix to grid since tag metadata is no
 longer available

---
 crates/modelardb_compression/src/compression.rs   |  3 +--
 crates/modelardb_storage/src/query/grid_exec.rs   |  3 +--
 crates/modelardb_storage/src/query/model_table.rs | 11 +++--------
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 8ec0d6cce..202899ce3 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -907,7 +907,6 @@ mod tests {
 
         modelardb_types::arrays!(
             compressed_record_batch,
-            univariate_ids,
             model_type_ids,
             start_times,
             end_times,
@@ -924,7 +923,7 @@ mod tests {
             let start_index = univariate_id_builder.len();
 
             models::grid(
-                univariate_ids.value(row_index),
+                0,
                 model_type_ids.value(row_index),
                 start_times.value(row_index),
                 end_times.value(row_index),
diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index 088a2dfcf..6d90b54a3 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -266,7 +266,6 @@ impl GridStream {
         // Retrieve the arrays from batch and cast them to their concrete type.
         modelardb_types::arrays!(
             batch,
-            univariate_ids,
             model_type_ids,
             start_times,
             end_times,
@@ -307,7 +306,7 @@ impl GridStream {
             let length_before = univariate_id_builder.len();
 
             modelardb_compression::grid(
-                univariate_ids.value(row_index),
+                0,
                 model_type_ids.value(row_index),
                 start_times.value(row_index),
                 end_times.value(row_index),
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 5f065b74d..23ed647df 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -18,7 +18,7 @@
 //! and returns a physical query plan that produces all the data points required for the query.
 
 use std::any::Any;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::result::Result as StdResult;
 use std::sync::Arc;
@@ -509,13 +509,8 @@ impl TableProvider for ModelTable {
             GRID_SCHEMA.0.clone(),
         )?;
 
-        // Compute a mapping from hashes to the requested tag values in the requested order. If the
-        // server is a cloud node, use the table metadata manager for the remote metadata Delta Lake.
-        let hash_to_tags = self
-            .table_metadata_manager
-            .mapping_from_hash_to_tags(table_name, &stored_tag_columns_in_projection)
-            .await
-            .map_err(|error| DataFusionError::Plan(error.to_string()))?;
+        // TODO: Retrieve the tag values from the data instead.
+        let hash_to_tags: HashMap<u64, Vec<String>> = HashMap::new();
 
         if stored_field_columns_in_projection.is_empty() {
             stored_field_columns_in_projection.push(self.fallback_field_column);

From e01487750c21047831c6676e2b1576be4ea11373 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Sat, 22 Feb 2025 17:42:20 +0100
Subject: [PATCH 33/69] Reformat, fix clippy errors and remove unused
 dependencies

---
 Cargo.lock                                                    | 2 --
 crates/modelardb_server/src/context.rs                        | 2 +-
 .../modelardb_server/src/storage/uncompressed_data_manager.rs | 2 +-
 crates/modelardb_storage/Cargo.toml                           | 2 --
 crates/modelardb_storage/src/lib.rs                           | 4 ++--
 crates/modelardb_storage/src/query/model_table.rs             | 2 +-
 6 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 708f762bc..7130ce9cf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3016,7 +3016,6 @@ dependencies = [
  "arrow-flight",
  "async-trait",
  "bytes",
- "dashmap",
  "datafusion",
  "deltalake",
  "futures",
@@ -3024,7 +3023,6 @@ dependencies = [
  "modelardb_compression",
  "modelardb_types",
  "object_store",
- "proptest",
  "sqlparser",
  "tempfile",
  "tokio",
diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index 593720bdc..38258a161 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -161,7 +161,7 @@ impl Context {
         self.data_folders
             .local_data_folder
             .delta_lake
-            .create_model_table(&model_table_metadata)
+            .create_model_table(model_table_metadata)
             .await?;
 
         let query_folder_table_metadata_manager = self
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 131b78401..d451bc497 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -106,7 +106,7 @@ impl UncompressedDataManager {
 
             // unwrap() is safe as data cannot be ingested into a model table that does not exist.
             let model_table_metadata = context
-                .model_table_metadata_from_default_database_schema(&table_name)
+                .model_table_metadata_from_default_database_schema(table_name)
                 .await?
                 .unwrap();
 
diff --git a/crates/modelardb_storage/Cargo.toml b/crates/modelardb_storage/Cargo.toml
index 3c9080e98..64c127cc0 100644
--- a/crates/modelardb_storage/Cargo.toml
+++ b/crates/modelardb_storage/Cargo.toml
@@ -24,7 +24,6 @@ arrow-flight.workspace = true
 arrow.workspace = true
 async-trait.workspace = true
 bytes.workspace = true
-dashmap.workspace = true
 datafusion.workspace = true
 deltalake = { workspace = true, features = ["datafusion", "s3"] }
 futures.workspace = true
@@ -39,6 +38,5 @@ url.workspace = true
 
 [dev-dependencies]
 futures.workspace = true
-proptest.workspace = true
 tempfile.workspace = true
 tokio = { workspace = true, features = ["rt-multi-thread", "signal"] }
diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs
index 16548a0b8..7dcd97e80 100644
--- a/crates/modelardb_storage/src/lib.rs
+++ b/crates/modelardb_storage/src/lib.rs
@@ -28,8 +28,8 @@ use std::result::Result as StdResult;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float32Builder,
-    ListArray, ListBuilder, RecordBatch, StringArray, StringBuilder,
+    Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float32Builder, ListArray,
+    ListBuilder, RecordBatch, StringArray, StringBuilder,
 };
 use arrow::compute;
 use arrow::compute::concat_batches;
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 23ed647df..49c27fd8b 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -42,7 +42,7 @@ use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
 use deltalake::kernel::LogicalFile;
 use deltalake::{DeltaTable, DeltaTableError, ObjectMeta, PartitionFilter, PartitionValue};
-use modelardb_types::schemas::{QUERY_COMPRESSED_SCHEMA, FIELD_COLUMN, GRID_SCHEMA};
+use modelardb_types::schemas::{FIELD_COLUMN, GRID_SCHEMA, QUERY_COMPRESSED_SCHEMA};
 use modelardb_types::types::{ArrowTimestamp, ArrowValue};
 
 use crate::metadata::model_table_metadata::ModelTableMetadata;

From dee061dfe50b35004efe49bef92a92d41609984e Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Sun, 23 Feb 2025 00:11:45 +0100
Subject: [PATCH 34/69] Remove table metadata manager from ModelTable struct

---
 crates/modelardb_server/src/context.rs        | 26 +++++--------------
 crates/modelardb_storage/src/lib.rs           |  9 +++----
 .../src/optimizer/model_simple_aggregates.rs  | 12 ---------
 .../src/query/model_table.rs                  |  7 +----
 4 files changed, 10 insertions(+), 44 deletions(-)

diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index 38258a161..857e438ac 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -22,7 +22,6 @@ use datafusion::arrow::datatypes::{Schema, SchemaRef};
 use datafusion::catalog::SchemaProvider;
 use datafusion::prelude::SessionContext;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
-use modelardb_storage::metadata::table_metadata_manager::TableMetadataManager;
 use modelardb_types::schemas::TABLE_METADATA_SCHEMA;
 use tokio::runtime::Runtime;
 use tokio::sync::RwLock;
@@ -164,18 +163,9 @@ impl Context {
             .create_model_table(model_table_metadata)
             .await?;
 
-        let query_folder_table_metadata_manager = self
-            .data_folders
-            .query_data_folder
-            .table_metadata_manager
-            .clone();
-
         // Register the model table with Apache DataFusion.
-        self.register_model_table(
-            Arc::new(model_table_metadata.clone()),
-            query_folder_table_metadata_manager.clone(),
-        )
-        .await?;
+        self.register_model_table(Arc::new(model_table_metadata.clone()))
+            .await?;
 
         // Persist the new model table to the metadata Delta Lake.
         self.data_folders
@@ -250,22 +240,19 @@ impl Context {
             .model_table_metadata()
             .await?;
 
-        let table_metadata_manager = &self.data_folders.query_data_folder.table_metadata_manager;
         for metadata in model_table_metadata {
-            self.register_model_table(metadata, table_metadata_manager.clone())
-                .await?;
+            self.register_model_table(metadata).await?;
         }
 
         Ok(())
     }
 
-    /// Register the model table with `model_table_metadata` from `table_metadata_manager` in Apache
-    /// DataFusion. If the model table does not exist or could not be registered with Apache
-    /// DataFusion, return [`ModelarDbServerError`].
+    /// Register the model table with `model_table_metadata` in Apache DataFusion. If the model
+    /// table does not exist or could not be registered with Apache DataFusion, return
+    /// [`ModelarDbServerError`].
     async fn register_model_table(
         &self,
         model_table_metadata: Arc<ModelTableMetadata>,
-        table_metadata_manager: Arc<TableMetadataManager>,
     ) -> Result<()> {
         let delta_table = self
             .data_folders
@@ -283,7 +270,6 @@ impl Context {
             &self.session_context,
             delta_table,
             model_table_metadata.clone(),
-            table_metadata_manager,
             model_table_data_sink,
         )?;
 
diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs
index 7dcd97e80..8c0959a28 100644
--- a/crates/modelardb_storage/src/lib.rs
+++ b/crates/modelardb_storage/src/lib.rs
@@ -63,7 +63,6 @@ use sqlparser::ast::Statement;
 
 use crate::error::{ModelarDbStorageError, Result};
 use crate::metadata::model_table_metadata::{GeneratedColumn, ModelTableMetadata};
-use crate::metadata::table_metadata_manager::TableMetadataManager;
 use crate::query::metadata_table::MetadataTable;
 use crate::query::model_table::ModelTable;
 use crate::query::normal_table::NormalTable;
@@ -121,19 +120,17 @@ pub fn register_normal_table(
     Ok(())
 }
 
-/// Register the model table stored in `delta_table` with `model_table_metadata` from
-/// `table_metadata_manager` and `data_sink` in `session_context`. If the model table could not be
-/// registered with Apache DataFusion, return [`ModelarDbStorageError`].
+/// Register the model table stored in `delta_table` with `model_table_metadata` and `data_sink` in
+/// `session_context`. If the model table could not be registered with Apache DataFusion, return
+/// [`ModelarDbStorageError`].
 pub fn register_model_table(
     session_context: &SessionContext,
     delta_table: DeltaTable,
     model_table_metadata: Arc<ModelTableMetadata>,
-    table_metadata_manager: Arc<TableMetadataManager>,
     data_sink: Arc<dyn DataSink>,
 ) -> Result<()> {
     let model_table = ModelTable::new(
         delta_table,
-        table_metadata_manager,
         model_table_metadata.clone(),
         data_sink,
     );
diff --git a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
index 5cd11e16a..699994b53 100644
--- a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
+++ b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
@@ -630,7 +630,6 @@ mod tests {
     use tonic::async_trait;
 
     use crate::delta_lake::DeltaLake;
-    use crate::metadata::table_metadata_manager::TableMetadataManager;
     use crate::optimizer;
     use crate::query::grid_exec::GridExec;
     use crate::query::model_table::ModelTable;
@@ -766,11 +765,6 @@ mod tests {
         // Setup access to data and metadata in data folder.
         let data_folder_path = temp_dir.path();
         let delta_lake = DeltaLake::try_from_local_path(data_folder_path).unwrap();
-        let table_metadata_manager = Arc::new(
-            TableMetadataManager::try_from_path(data_folder_path, None)
-                .await
-                .unwrap(),
-        );
 
         // Setup access to Apache DataFusion.
         let mut session_state_builder = SessionStateBuilder::new().with_default_features();
@@ -792,16 +786,10 @@ mod tests {
             .await
             .unwrap();
 
-        table_metadata_manager
-            .save_model_table_metadata(&model_table_metadata)
-            .await
-            .unwrap();
-
         let model_table_data_sink = Arc::new(NoOpDataSink {});
 
         let model_table = ModelTable::new(
             delta_table,
-            table_metadata_manager,
             model_table_metadata.clone(),
             model_table_data_sink,
         );
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 49c27fd8b..a617c0983 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -46,7 +46,6 @@ use modelardb_types::schemas::{FIELD_COLUMN, GRID_SCHEMA, QUERY_COMPRESSED_SCHEM
 use modelardb_types::types::{ArrowTimestamp, ArrowValue};
 
 use crate::metadata::model_table_metadata::ModelTableMetadata;
-use crate::metadata::table_metadata_manager::TableMetadataManager;
 use crate::query::generated_as_exec::{ColumnToGenerate, GeneratedAsExec};
 use crate::query::grid_exec::GridExec;
 use crate::query::sorted_join_exec::{SortedJoinColumnType, SortedJoinExec};
@@ -64,8 +63,6 @@ pub(crate) struct ModelTable {
     model_table_metadata: Arc<ModelTableMetadata>,
     /// Where data should be written to.
     data_sink: Arc<dyn DataSink>,
-    /// Access to metadata related to tables.
-    table_metadata_manager: Arc<TableMetadataManager>,
     /// Field column to use for queries that do not include fields.
     fallback_field_column: u16,
 }
@@ -73,7 +70,6 @@ pub(crate) struct ModelTable {
 impl ModelTable {
     pub(crate) fn new(
         delta_table: DeltaTable,
-        table_metadata_manager: Arc<TableMetadataManager>,
         model_table_metadata: Arc<ModelTableMetadata>,
         data_sink: Arc<dyn DataSink>,
     ) -> Arc<Self> {
@@ -96,7 +92,6 @@ impl ModelTable {
             delta_table,
             model_table_metadata,
             data_sink,
-            table_metadata_manager,
             fallback_field_column,
         })
     }
@@ -403,7 +398,7 @@ impl TableProvider for ModelTable {
         limit: Option<usize>,
     ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
         // Create shorthands for the metadata used during planning to improve readability.
-        let table_name = self.model_table_metadata.name.as_str();
+        let _table_name = self.model_table_metadata.name.as_str();
         let schema = &self.model_table_metadata.schema;
         let tag_column_indices = &self.model_table_metadata.tag_column_indices;
         let query_schema = &self.model_table_metadata.query_schema;

From 5652619c5e205e8f6c973db0f439c626ece34204 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Sun, 23 Feb 2025 23:29:55 +0100
Subject: [PATCH 35/69] Fix comments and remove unused variable

---
 crates/modelardb_storage/src/query/model_table.rs | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index a617c0983..895267690 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -264,7 +264,7 @@ fn new_binary_expr(left: Expr, op: Operator, right: Expr) -> Expr {
     })
 }
 
-/// Convert `expr` to a [`Option<PhysicalExpr>`] with the types in `query_schema`.
+/// Convert `maybe_expr` to a [`PhysicalExpr`] with the types in `query_schema` if possible.
 fn maybe_convert_logical_expr_to_physical_expr(
     maybe_expr: Option<&Expr>,
     query_schema: SchemaRef,
@@ -342,8 +342,8 @@ fn new_apache_parquet_exec(
     Ok(Arc::new(apache_parquet_exec))
 }
 
-// Convert the [`LogicalFile`] `logical_file` to a [`PartitionFilter`]. A [`DataFusionError`] is
-// returned if the time the file was last modified cannot be read from `logical_file`.
+/// Convert the [`LogicalFile`] `logical_file` to a [`PartitionFilter`]. A [`DataFusionError`] is
+/// returned if the time the file was last modified cannot be read from `logical_file`.
 fn logical_file_to_partitioned_file(
     logical_file: &LogicalFile,
 ) -> DataFusionResult<PartitionedFile> {
@@ -398,7 +398,6 @@ impl TableProvider for ModelTable {
         limit: Option<usize>,
     ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
         // Create shorthands for the metadata used during planning to improve readability.
-        let _table_name = self.model_table_metadata.name.as_str();
         let schema = &self.model_table_metadata.schema;
         let tag_column_indices = &self.model_table_metadata.tag_column_indices;
         let query_schema = &self.model_table_metadata.query_schema;
@@ -487,8 +486,6 @@ impl TableProvider for ModelTable {
             }
         }
 
-        // TODO: extract all of the predicates that consist of tag = tag_value from the query so the
-        // segments can be pruned by univariate_id in ParquetExec and hash_to_tags can be minimized.
         // Filters are not converted to PhysicalExpr in rewrite_and_combine_filters() to simplify
         // testing rewrite_and_combine_filters() as Expr can be compared while PhysicalExpr cannot.
         let (maybe_rewritten_parquet_filters, maybe_rewritten_grid_filters) =

From 172adb466f857fc87f1dc723f5d2c620a282904e Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Sun, 23 Feb 2025 23:42:45 +0100
Subject: [PATCH 36/69] Remove utility functions to convert univariate id to
 tag hash and column index

---
 crates/modelardb_types/src/functions.rs | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/crates/modelardb_types/src/functions.rs b/crates/modelardb_types/src/functions.rs
index a70b797b2..9bcd182d7 100644
--- a/crates/modelardb_types/src/functions.rs
+++ b/crates/modelardb_types/src/functions.rs
@@ -15,16 +15,6 @@
 
 //! Implementation of helper functions to operate on the types used through ModelarDB.
 
-/// Extract the first 54-bits from `univariate_id` which is a hash computed from tags.
-pub fn univariate_id_to_tag_hash(univariate_id: u64) -> u64 {
-    univariate_id & 18446744073709550592
-}
-
-/// Extract the last 10-bits from `univariate_id` which is the index of the time series column.
-pub fn univariate_id_to_column_index(univariate_id: u64) -> u16 {
-    (univariate_id & 1023) as u16
-}
-
 /// Normalize `name` to allow direct comparisons between names.
 pub fn normalize_name(name: &str) -> String {
     name.to_lowercase()

From 703d4e09834bfc0f1227f68b8611b4adb1b6a1ce Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Sun, 23 Feb 2025 23:45:45 +0100
Subject: [PATCH 37/69] Remove hash_to_tags from SortedJoinExec

---
 .../src/query/model_table.rs                  |  6 +--
 .../src/query/sorted_join_exec.rs             | 39 ++-----------------
 2 files changed, 4 insertions(+), 41 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 895267690..bc1bcdaea 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -18,7 +18,7 @@
 //! and returns a physical query plan that produces all the data points required for the query.
 
 use std::any::Any;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::fmt;
 use std::result::Result as StdResult;
 use std::sync::Arc;
@@ -501,9 +501,6 @@ impl TableProvider for ModelTable {
             GRID_SCHEMA.0.clone(),
         )?;
 
-        // TODO: Retrieve the tag values from the data instead.
-        let hash_to_tags: HashMap<u64, Vec<String>> = HashMap::new();
-
         if stored_field_columns_in_projection.is_empty() {
             stored_field_columns_in_projection.push(self.fallback_field_column);
         }
@@ -538,7 +535,6 @@ impl TableProvider for ModelTable {
         let sorted_join_exec = SortedJoinExec::new(
             schema_after_projection,
             stored_columns_in_projection,
-            Arc::new(hash_to_tags),
             field_column_execution_plans,
         );
 
diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs
index 278220502..1f411a95a 100644
--- a/crates/modelardb_storage/src/query/sorted_join_exec.rs
+++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs
@@ -21,13 +21,12 @@
 //! or more tag columns.
 
 use std::any::Any;
-use std::collections::HashMap;
 use std::fmt::{Formatter, Result as FmtResult};
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
 
-use datafusion::arrow::array::{ArrayRef, StringBuilder, UInt64Array};
+use datafusion::arrow::array::{ArrayRef, StringBuilder};
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
@@ -40,7 +39,6 @@ use datafusion::physical_plan::{
     PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 use futures::stream::{Stream, StreamExt};
-use modelardb_types::functions;
 
 use crate::query::QUERY_REQUIREMENT_DATA_POINT;
 
@@ -62,8 +60,6 @@ pub(crate) struct SortedJoinExec {
     schema: SchemaRef,
     /// Order of columns to return.
     return_order: Vec<SortedJoinColumnType>,
-    /// Mapping from tag hash to tags.
-    hash_to_tags: Arc<HashMap<u64, Vec<String>>>,
     /// Execution plans to read batches of data points from.
     inputs: Vec<Arc<dyn ExecutionPlan>>,
     /// Properties about the plan used in query optimization.
@@ -76,7 +72,6 @@ impl SortedJoinExec {
     pub(crate) fn new(
         schema: SchemaRef,
         return_order: Vec<SortedJoinColumnType>,
-        hash_to_tags: Arc<HashMap<u64, Vec<String>>>,
         inputs: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Arc<Self> {
         // Specify that the record batches produced by the execution plan will have an unknown order
@@ -93,7 +88,6 @@ impl SortedJoinExec {
         Arc::new(SortedJoinExec {
             schema,
             return_order,
-            hash_to_tags,
             inputs,
             plan_properties,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -139,7 +133,6 @@ impl ExecutionPlan for SortedJoinExec {
             Ok(SortedJoinExec::new(
                 self.schema.clone(),
                 self.return_order.clone(),
-                self.hash_to_tags.clone(),
                 children,
             ))
         } else {
@@ -165,7 +158,6 @@ impl ExecutionPlan for SortedJoinExec {
         Ok(Box::pin(SortedJoinStream::new(
             self.schema.clone(),
             self.return_order.clone(),
-            self.hash_to_tags.clone(),
             streams,
             BaselineMetrics::new(&self.metrics, partition),
         )))
@@ -208,8 +200,6 @@ struct SortedJoinStream {
     schema: SchemaRef,
     /// Order of columns to return.
     return_order: Vec<SortedJoinColumnType>,
-    /// Mapping from tag hash to tags.
-    hash_to_tags: Arc<HashMap<u64, Vec<String>>>,
     /// Streams to read batches of data points from.
     inputs: Vec<SendableRecordBatchStream>,
     /// Current batch of data points to join from.
@@ -222,7 +212,6 @@ impl SortedJoinStream {
     fn new(
         schema: SchemaRef,
         return_order: Vec<SortedJoinColumnType>,
-        hash_to_tags: Arc<HashMap<u64, Vec<String>>>,
         inputs: Vec<SendableRecordBatchStream>,
         baseline_metrics: BaselineMetrics,
     ) -> Self {
@@ -232,7 +221,6 @@ impl SortedJoinStream {
         SortedJoinStream {
             schema,
             return_order,
-            hash_to_tags,
             inputs,
             batches,
             baseline_metrics,
@@ -289,32 +277,11 @@ impl SortedJoinStream {
     fn sorted_join(&self) -> Poll<Option<DataFusionResult<RecordBatch>>> {
         let mut columns: Vec<ArrayRef> = Vec::with_capacity(self.schema.fields.len());
 
-        // Compute the requested tag columns, so they can be assigned to the batch by index.
+        // TODO: Compute the requested tag columns, so they can be assigned to the batch by index.
         // unwrap() is safe as a record batch is read from each input before this method is called.
         let batch = self.batches[0].as_ref().unwrap();
-        let univariate_ids = modelardb_types::array!(batch, 0, UInt64Array);
 
-        let mut tag_columns = if !self.hash_to_tags.is_empty() {
-            // unwrap() is safe as hash_to_tags is guaranteed not to be empty.
-            let tags = self.hash_to_tags.values().next().unwrap();
-            let capacity = univariate_ids.len();
-            let mut tag_columns: Vec<StringBuilder> = tags
-                .iter()
-                .map(|_vec| StringBuilder::with_capacity(capacity, capacity))
-                .collect();
-
-            for univariate_id in univariate_ids.values() {
-                let tag_hash = functions::univariate_id_to_tag_hash(*univariate_id);
-                let tags = &self.hash_to_tags[&tag_hash];
-                for (index, tag) in tags.iter().enumerate() {
-                    tag_columns[index].append_value(tag.clone());
-                }
-            }
-
-            tag_columns
-        } else {
-            vec![]
-        };
+        let mut tag_columns: Vec<StringBuilder> = vec![];
 
         // The batches and tags columns are already in the correct order, so they can be appended.
         let mut field_index = 0;

From d5f21dfd24d28638329b9bdf827ae69a1d843d9c Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Mon, 24 Feb 2025 20:02:49 +0100
Subject: [PATCH 38/69] Update indices for accessing compressed segment arrays

---
 .../src/optimizer/model_simple_aggregates.rs  | 42 +++++++++----------
 .../modelardb_storage/src/query/grid_exec.rs  |  2 +-
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
index 699994b53..2273a7914 100644
--- a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
+++ b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
@@ -334,9 +334,9 @@ struct ModelCountAccumulator {
 impl Accumulator for ModelCountAccumulator {
     /// Update the [`Accumulators`](Accumulator) state from `values`.
     fn update_batch(&mut self, arrays: &[ArrayRef]) -> DataFusionResult<()> {
-        let start_times = modelardb_types::value!(arrays, 2, TimestampArray);
-        let end_times = modelardb_types::value!(arrays, 3, TimestampArray);
-        let timestamps = modelardb_types::value!(arrays, 4, BinaryArray);
+        let start_times = modelardb_types::value!(arrays, 1, TimestampArray);
+        let end_times = modelardb_types::value!(arrays, 2, TimestampArray);
+        let timestamps = modelardb_types::value!(arrays, 3, BinaryArray);
 
         for row_index in 0..start_times.len() {
             let start_time = start_times.value(row_index);
@@ -384,7 +384,7 @@ struct ModelMinAccumulator {
 impl Accumulator for ModelMinAccumulator {
     /// Update the [`Accumulators`](Accumulator) state from `values`.
     fn update_batch(&mut self, values: &[ArrayRef]) -> DataFusionResult<()> {
-        let min_values = modelardb_types::value!(values, 5, ValueArray);
+        let min_values = modelardb_types::value!(values, 4, ValueArray);
         for row_index in 0..min_values.len() {
             self.min = Value::min(self.min, min_values.value(row_index));
         }
@@ -427,7 +427,7 @@ struct ModelMaxAccumulator {
 impl Accumulator for ModelMaxAccumulator {
     /// Update the [`Accumulators`](Accumulator) state from `values`.
     fn update_batch(&mut self, arrays: &[ArrayRef]) -> DataFusionResult<()> {
-        let max_values = modelardb_types::value!(arrays, 6, ValueArray);
+        let max_values = modelardb_types::value!(arrays, 5, ValueArray);
         for row_index in 0..max_values.len() {
             self.max = Value::max(self.max, max_values.value(row_index));
         }
@@ -470,14 +470,14 @@ struct ModelSumAccumulator {
 impl Accumulator for ModelSumAccumulator {
     /// Update the [`Accumulators`](Accumulator) state from `values`.
     fn update_batch(&mut self, arrays: &[ArrayRef]) -> DataFusionResult<()> {
-        let model_type_ids = modelardb_types::value!(arrays, 1, UInt8Array);
-        let start_times = modelardb_types::value!(arrays, 2, TimestampArray);
-        let end_times = modelardb_types::value!(arrays, 3, TimestampArray);
-        let timestamps = modelardb_types::value!(arrays, 4, BinaryArray);
-        let min_values = modelardb_types::value!(arrays, 5, ValueArray);
-        let max_values = modelardb_types::value!(arrays, 6, ValueArray);
-        let values = modelardb_types::value!(arrays, 7, BinaryArray);
-        let residuals = modelardb_types::value!(arrays, 8, BinaryArray);
+        let model_type_ids = modelardb_types::value!(arrays, 0, UInt8Array);
+        let start_times = modelardb_types::value!(arrays, 1, TimestampArray);
+        let end_times = modelardb_types::value!(arrays, 2, TimestampArray);
+        let timestamps = modelardb_types::value!(arrays, 3, BinaryArray);
+        let min_values = modelardb_types::value!(arrays, 4, ValueArray);
+        let max_values = modelardb_types::value!(arrays, 5, ValueArray);
+        let values = modelardb_types::value!(arrays, 6, BinaryArray);
+        let residuals = modelardb_types::value!(arrays, 7, BinaryArray);
 
         for row_index in 0..model_type_ids.len() {
             let model_type_id = model_type_ids.value(row_index);
@@ -542,14 +542,14 @@ struct ModelAvgAccumulator {
 impl Accumulator for ModelAvgAccumulator {
     /// Update the [`Accumulators`](Accumulator) state from `values`.
     fn update_batch(&mut self, arrays: &[ArrayRef]) -> DataFusionResult<()> {
-        let model_type_ids = modelardb_types::value!(arrays, 1, UInt8Array);
-        let start_times = modelardb_types::value!(arrays, 2, TimestampArray);
-        let end_times = modelardb_types::value!(arrays, 3, TimestampArray);
-        let timestamps = modelardb_types::value!(arrays, 4, BinaryArray);
-        let min_values = modelardb_types::value!(arrays, 5, ValueArray);
-        let max_values = modelardb_types::value!(arrays, 6, ValueArray);
-        let values = modelardb_types::value!(arrays, 7, BinaryArray);
-        let residuals = modelardb_types::value!(arrays, 8, BinaryArray);
+        let model_type_ids = modelardb_types::value!(arrays, 0, UInt8Array);
+        let start_times = modelardb_types::value!(arrays, 1, TimestampArray);
+        let end_times = modelardb_types::value!(arrays, 2, TimestampArray);
+        let timestamps = modelardb_types::value!(arrays, 3, BinaryArray);
+        let min_values = modelardb_types::value!(arrays, 4, ValueArray);
+        let max_values = modelardb_types::value!(arrays, 5, ValueArray);
+        let values = modelardb_types::value!(arrays, 6, BinaryArray);
+        let residuals = modelardb_types::value!(arrays, 7, BinaryArray);
 
         for row_index in 0..model_type_ids.len() {
             let model_type_id = model_type_ids.value(row_index);
diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index 6d90b54a3..aecd7bd1e 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -159,7 +159,7 @@ impl ExecutionPlan for GridExec {
         partition: usize,
         task_context: Arc<TaskContext>,
     ) -> DataFusionResult<SendableRecordBatchStream> {
-        // Must be read before GridStream as task_context are moved into input.
+        // Must be read before GridStream as task_context is moved into input.
         let batch_size = task_context.session_config().batch_size();
         let grid_stream_metrics = GridStreamMetrics::new(&self.metrics, partition);
 

From 028c6efe84915d63dcd715f842dd715a4c17df04 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 22:36:02 +0100
Subject: [PATCH 39/69] Add query compressed schema to ModelTable

---
 crates/modelardb_storage/src/query/model_table.rs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index bc1bcdaea..c5e391597 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -65,6 +65,8 @@ pub(crate) struct ModelTable {
     data_sink: Arc<dyn DataSink>,
     /// Field column to use for queries that do not include fields.
     fallback_field_column: u16,
+    /// Schema of the compressed segments stored on disk.
+    query_compressed_schema: Arc<Schema>,
 }
 
 impl ModelTable {
@@ -88,11 +90,21 @@ impl ModelTable {
                 .unwrap() as u16 // unwrap() is safe as all model tables contain at least one field.
         };
 
+        // Add the tag columns to the base schema for queryable compressed segments.
+        let mut query_compressed_schema_fields = QUERY_COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        for index in &model_table_metadata.tag_column_indices {
+            query_compressed_schema_fields
+                .push(Arc::new(model_table_metadata.schema.field(*index).clone()));
+        }
+
+        let query_compressed_schema = Arc::new(Schema::new(query_compressed_schema_fields));
+
         Arc::new(ModelTable {
             delta_table,
             model_table_metadata,
             data_sink,
             fallback_field_column,
+            query_compressed_schema,
         })
     }
 

From 8c9b2db0144404b8c4f9d08ad342d6f544960891 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 22:56:02 +0100
Subject: [PATCH 40/69] Add query order segment to model table

---
 .../src/query/model_table.rs                  | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index c5e391597..38a8f8224 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -23,6 +23,7 @@ use std::fmt;
 use std::result::Result as StdResult;
 use std::sync::Arc;
 
+use arrow::compute::SortOptions;
 use async_trait::async_trait;
 use datafusion::arrow::datatypes::{
     ArrowPrimitiveType, DataType, Field, Schema, SchemaRef, TimeUnit,
@@ -37,7 +38,8 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::context::ExecutionProps;
 use datafusion::logical_expr::dml::InsertOp;
 use datafusion::logical_expr::{self, utils, BinaryExpr, Expr, Operator};
-use datafusion::physical_expr::{planner, LexOrdering};
+use datafusion::physical_expr::expressions::Column;
+use datafusion::physical_expr::{planner, LexOrdering, PhysicalSortExpr};
 use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
 use deltalake::kernel::LogicalFile;
@@ -67,6 +69,10 @@ pub(crate) struct ModelTable {
     fallback_field_column: u16,
     /// Schema of the compressed segments stored on disk.
     query_compressed_schema: Arc<Schema>,
+    /// The sort order [`ParquetExec`] guarantees for the segments it produces. It is guaranteed by
+    /// [`ParquetExec`] because the storage engine uses this sort order for each Apache Parquet file
+    /// in this model table and these files are read sequentially by [`ParquetExec`].
+    query_order_segment: LexOrdering,
 }
 
 impl ModelTable {
@@ -99,12 +105,37 @@ impl ModelTable {
 
         let query_compressed_schema = Arc::new(Schema::new(query_compressed_schema_fields));
 
+        // Segments are sorted by the tag columns and the start time.
+        let sort_options = SortOptions {
+            descending: false,
+            nulls_first: false,
+        };
+
+        let mut physical_sort_exprs = vec![];
+        for index in &model_table_metadata.tag_column_indices {
+            let tag_column_name = model_table_metadata.schema.field(*index).name();
+
+            // unwrap() is safe as the tag columns are always present in the query compressed schema.
+            let segment_index = query_compressed_schema.index_of(tag_column_name).unwrap();
+
+            physical_sort_exprs.push(PhysicalSortExpr {
+                expr: Arc::new(Column::new(tag_column_name, segment_index)),
+                options: sort_options,
+            });
+        };
+
+        physical_sort_exprs.push(PhysicalSortExpr {
+            expr: Arc::new(Column::new("start_time", 1)),
+            options: sort_options,
+        });
+
         Arc::new(ModelTable {
             delta_table,
             model_table_metadata,
             data_sink,
             fallback_field_column,
             query_compressed_schema,
+            query_order_segment: LexOrdering::new(physical_sort_exprs),
         })
     }
 

From f2d87db391d22ca6924d0246660a15085ace9828 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 23:08:22 +0100
Subject: [PATCH 41/69] Add query requirement segment to ModelTable

---
 .../src/query/model_table.rs                  | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 38a8f8224..7bbf11c1a 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -39,7 +39,9 @@ use datafusion::execution::context::ExecutionProps;
 use datafusion::logical_expr::dml::InsertOp;
 use datafusion::logical_expr::{self, utils, BinaryExpr, Expr, Operator};
 use datafusion::physical_expr::expressions::Column;
-use datafusion::physical_expr::{planner, LexOrdering, PhysicalSortExpr};
+use datafusion::physical_expr::{
+    planner, LexOrdering, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
+};
 use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
 use deltalake::kernel::LogicalFile;
@@ -73,6 +75,8 @@ pub(crate) struct ModelTable {
     /// [`ParquetExec`] because the storage engine uses this sort order for each Apache Parquet file
     /// in this model table and these files are read sequentially by [`ParquetExec`].
     query_order_segment: LexOrdering,
+    /// The sort order that [`GridExec`] requires for the segments it receives as its input.
+    query_requirement_segment: LexRequirement,
 }
 
 impl ModelTable {
@@ -111,31 +115,41 @@ impl ModelTable {
             nulls_first: false,
         };
 
-        let mut physical_sort_exprs = vec![];
+        let mut segment_physical_sort_exprs = vec![];
         for index in &model_table_metadata.tag_column_indices {
             let tag_column_name = model_table_metadata.schema.field(*index).name();
 
             // unwrap() is safe as the tag columns are always present in the query compressed schema.
             let segment_index = query_compressed_schema.index_of(tag_column_name).unwrap();
 
-            physical_sort_exprs.push(PhysicalSortExpr {
+            segment_physical_sort_exprs.push(PhysicalSortExpr {
                 expr: Arc::new(Column::new(tag_column_name, segment_index)),
                 options: sort_options,
             });
-        };
+        }
 
-        physical_sort_exprs.push(PhysicalSortExpr {
+        segment_physical_sort_exprs.push(PhysicalSortExpr {
             expr: Arc::new(Column::new("start_time", 1)),
             options: sort_options,
         });
 
+        // The sort order that GridExec requires for the segments it receives as its input matches
+        // the sort order ParquetExec guarantees for the segments it produces.
+        let segment_physical_sort_requirements: Vec<PhysicalSortRequirement> =
+            segment_physical_sort_exprs
+                .clone()
+                .into_iter()
+                .map(|physical_sort_expr| physical_sort_expr.into())
+                .collect();
+
         Arc::new(ModelTable {
             delta_table,
             model_table_metadata,
             data_sink,
             fallback_field_column,
             query_compressed_schema,
-            query_order_segment: LexOrdering::new(physical_sort_exprs),
+            query_order_segment: LexOrdering::new(segment_physical_sort_exprs),
+            query_requirement_segment: LexRequirement::new(segment_physical_sort_requirements),
         })
     }
 

From 19d028290a5adffa3a14a169742d7d8bc8853fc6 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 23:21:17 +0100
Subject: [PATCH 42/69] Add util method to get query order and requirement for
 a schema

---
 .../src/query/model_table.rs                  | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 7bbf11c1a..0853692db 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -203,6 +203,48 @@ impl fmt::Debug for ModelTable {
     }
 }
 
+/// Return a [`LexOrdering`] and [`LexRequirement`] that sort by the tag columns from
+/// `model_table_metadata` in `schema` first and then by `time_column`.
+fn query_order_and_requirement(
+    model_table_metadata: &ModelTableMetadata,
+    schema: &Schema,
+    time_column: Column,
+) -> (LexOrdering, LexRequirement) {
+    let sort_options = SortOptions {
+        descending: false,
+        nulls_first: false,
+    };
+
+    let mut physical_sort_exprs = vec![];
+    for index in &model_table_metadata.tag_column_indices {
+        let tag_column_name = model_table_metadata.schema.field(*index).name();
+
+        // unwrap() is safe as the tag columns are always present in the schema.
+        let segment_index = schema.index_of(tag_column_name).unwrap();
+
+        physical_sort_exprs.push(PhysicalSortExpr {
+            expr: Arc::new(Column::new(tag_column_name, segment_index)),
+            options: sort_options,
+        });
+    }
+
+    physical_sort_exprs.push(PhysicalSortExpr {
+        expr: Arc::new(time_column),
+        options: sort_options,
+    });
+
+    let physical_sort_requirements: Vec<PhysicalSortRequirement> = physical_sort_exprs
+        .clone()
+        .into_iter()
+        .map(|physical_sort_expr| physical_sort_expr.into())
+        .collect();
+
+    (
+        LexOrdering::new(physical_sort_exprs),
+        LexRequirement::new(physical_sort_requirements),
+    )
+}
+
 /// Rewrite and combine the `filters` that are written in terms of the model table's query schema,
 /// to a filter that is written in terms of the schema used for compressed segments by the storage
 /// engine and a filter that is written in terms of the schema used for univariate time series by

From 3175d88babf7484b1c2a18036d811761e9db65b3 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 23:23:07 +0100
Subject: [PATCH 43/69] Use util method to get segment query order and
 requirement

---
 .../src/query/model_table.rs                  | 41 ++++---------------
 1 file changed, 7 insertions(+), 34 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 0853692db..69d26b81e 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -109,38 +109,11 @@ impl ModelTable {
 
         let query_compressed_schema = Arc::new(Schema::new(query_compressed_schema_fields));
 
-        // Segments are sorted by the tag columns and the start time.
-        let sort_options = SortOptions {
-            descending: false,
-            nulls_first: false,
-        };
-
-        let mut segment_physical_sort_exprs = vec![];
-        for index in &model_table_metadata.tag_column_indices {
-            let tag_column_name = model_table_metadata.schema.field(*index).name();
-
-            // unwrap() is safe as the tag columns are always present in the query compressed schema.
-            let segment_index = query_compressed_schema.index_of(tag_column_name).unwrap();
-
-            segment_physical_sort_exprs.push(PhysicalSortExpr {
-                expr: Arc::new(Column::new(tag_column_name, segment_index)),
-                options: sort_options,
-            });
-        }
-
-        segment_physical_sort_exprs.push(PhysicalSortExpr {
-            expr: Arc::new(Column::new("start_time", 1)),
-            options: sort_options,
-        });
-
-        // The sort order that GridExec requires for the segments it receives as its input matches
-        // the sort order ParquetExec guarantees for the segments it produces.
-        let segment_physical_sort_requirements: Vec<PhysicalSortRequirement> =
-            segment_physical_sort_exprs
-                .clone()
-                .into_iter()
-                .map(|physical_sort_expr| physical_sort_expr.into())
-                .collect();
+        let (query_order_segment, query_requirement_segment) = query_order_and_requirement(
+            &model_table_metadata,
+            &query_compressed_schema,
+            Column::new("start_time", 1),
+        );
 
         Arc::new(ModelTable {
             delta_table,
@@ -148,8 +121,8 @@ impl ModelTable {
             data_sink,
             fallback_field_column,
             query_compressed_schema,
-            query_order_segment: LexOrdering::new(segment_physical_sort_exprs),
-            query_requirement_segment: LexRequirement::new(segment_physical_sort_requirements),
+            query_order_segment,
+            query_requirement_segment,
         })
     }
 

From a4c07e6d1499e0a0098fdd0db44a7d8d23522383 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 23:32:10 +0100
Subject: [PATCH 44/69] Remove univariate_id from GRID_SCHEMA

---
 crates/modelardb_types/src/schemas.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crates/modelardb_types/src/schemas.rs b/crates/modelardb_types/src/schemas.rs
index e8f93188d..7963bdc3e 100644
--- a/crates/modelardb_types/src/schemas.rs
+++ b/crates/modelardb_types/src/schemas.rs
@@ -21,8 +21,8 @@ use std::sync::LazyLock;
 use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
 
 use crate::types::{
-    ArrowTimestamp, ArrowUnivariateId, ArrowValue, CompressedSchema, ConfigurationSchema,
-    QueryCompressedSchema, QuerySchema, TableMetadataSchema,
+    ArrowTimestamp, ArrowValue, CompressedSchema, ConfigurationSchema, QueryCompressedSchema,
+    QuerySchema, TableMetadataSchema,
 };
 
 /// Name of the column used to partition the compressed segments.
@@ -67,7 +67,6 @@ pub static COMPRESSED_METADATA_SIZE_IN_BYTES: LazyLock<usize> = LazyLock::new(||
 /// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used internally during query processing.
 pub static GRID_SCHEMA: LazyLock<QuerySchema> = LazyLock::new(|| {
     QuerySchema(Arc::new(Schema::new(vec![
-        Field::new("univariate_id", ArrowUnivariateId::DATA_TYPE, false),
         Field::new("timestamp", ArrowTimestamp::DATA_TYPE, false),
         Field::new("value", ArrowValue::DATA_TYPE, false),
     ])))

From 1236ebd478ed6d0c1fb6b383887e8133ed405886 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Tue, 25 Feb 2025 23:34:16 +0100
Subject: [PATCH 45/69] Add grid schema, query order data point, and query
 requirement data point to ModelTable

---
 .../src/query/model_table.rs                  | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 69d26b81e..d7110047e 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -77,6 +77,14 @@ pub(crate) struct ModelTable {
     query_order_segment: LexOrdering,
     /// The sort order that [`GridExec`] requires for the segments it receives as its input.
     query_requirement_segment: LexRequirement,
+    /// Schema used internally during query processing.
+    grid_schema: Arc<Schema>,
+    /// The sort order [`GridExec`] guarantees for the data points it produces. It is guaranteed by
+    /// [`GridExec`] because it receives segments sorted by `query_order_segment` from [`ParquetExec`]
+    /// and because these segments cannot contain data points for overlapping time intervals.
+    query_order_data_point: LexOrdering,
+    /// The sort order that [`SortedJoinExec`] requires for the data points it receives as its input.
+    query_requirement_data_point: LexRequirement,
 }
 
 impl ModelTable {
@@ -115,6 +123,20 @@ impl ModelTable {
             Column::new("start_time", 1),
         );
 
+        // Add the tag columns to the base schema for data points.
+        let mut grid_schema_fields = GRID_SCHEMA.0.fields.clone().to_vec();
+        for index in &model_table_metadata.tag_column_indices {
+            grid_schema_fields.push(Arc::new(model_table_metadata.schema.field(*index).clone()));
+        }
+
+        let grid_schema = Arc::new(Schema::new(grid_schema_fields));
+
+        let (query_order_data_point, query_requirement_data_point) = query_order_and_requirement(
+            &model_table_metadata,
+            &grid_schema,
+            Column::new("timestamp", 0),
+        );
+
         Arc::new(ModelTable {
             delta_table,
             model_table_metadata,
@@ -123,6 +145,9 @@ impl ModelTable {
             query_compressed_schema,
             query_order_segment,
             query_requirement_segment,
+            grid_schema,
+            query_order_data_point,
+            query_requirement_data_point,
         })
     }
 

From fe8bd38bdfcc3515b677661212e098e2b22ff907 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 00:00:42 +0100
Subject: [PATCH 46/69] Pass model table query compressed schema and output
 ordering when creating parquet exec

---
 crates/modelardb_storage/src/query/model_table.rs | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index d7110047e..707c86fa0 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -54,8 +54,6 @@ use crate::query::generated_as_exec::{ColumnToGenerate, GeneratedAsExec};
 use crate::query::grid_exec::GridExec;
 use crate::query::sorted_join_exec::{SortedJoinColumnType, SortedJoinExec};
 
-use super::QUERY_ORDER_SEGMENT;
-
 /// A queryable representation of a model table which stores multivariate time series as segments
 /// containing metadata and models. [`ModelTable`] implements [`TableProvider`] so it can be
 /// registered with Apache DataFusion and the multivariate time series queried as multiple
@@ -394,6 +392,8 @@ fn new_apache_parquet_exec(
     partition_filters: &[PartitionFilter],
     maybe_limit: Option<usize>,
     maybe_parquet_filters: &Option<Arc<dyn PhysicalExpr>>,
+    file_schema: SchemaRef,
+    output_ordering: Vec<LexOrdering>,
 ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
     // Collect the LogicalFiles into a Vec so they can be sorted the same for all field columns.
     let mut logical_files = delta_table
@@ -416,13 +416,13 @@ fn new_apache_parquet_exec(
     let log_store = delta_table.log_store();
     let file_scan_config = FileScanConfig {
         object_store_url: log_store.object_store_url(),
-        file_schema: QUERY_COMPRESSED_SCHEMA.0.clone(),
+        file_schema,
         file_groups: vec![partitioned_files],
         statistics: Statistics::new_unknown(&QUERY_COMPRESSED_SCHEMA.0),
         projection: None,
         limit: maybe_limit,
         table_partition_cols: vec![],
-        output_ordering: vec![LexOrdering::new(QUERY_ORDER_SEGMENT.to_vec())],
+        output_ordering,
     };
 
     let apache_parquet_exec_builder = if let Some(parquet_filters) = maybe_parquet_filters {
@@ -590,12 +590,12 @@ impl TableProvider for ModelTable {
 
         let maybe_physical_parquet_filters = maybe_convert_logical_expr_to_physical_expr(
             maybe_rewritten_parquet_filters.as_ref(),
-            QUERY_COMPRESSED_SCHEMA.0.clone(),
+            self.query_compressed_schema.clone(),
         )?;
 
         let maybe_physical_grid_filters = maybe_convert_logical_expr_to_physical_expr(
             maybe_rewritten_grid_filters.as_ref(),
-            GRID_SCHEMA.0.clone(),
+            self.grid_schema.clone(),
         )?;
 
         if stored_field_columns_in_projection.is_empty() {
@@ -622,6 +622,8 @@ impl TableProvider for ModelTable {
                 &partition_filters,
                 limit,
                 &maybe_physical_parquet_filters,
+                self.query_compressed_schema.clone(),
+                vec![LexOrdering::new(self.query_order_segment.to_vec())],
             )?;
 
             let grid_exec = GridExec::new(maybe_physical_grid_filters.clone(), limit, parquet_exec);

From ccf58dc2dd96dac26eb019bafde463d6227dcb48 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 00:03:43 +0100
Subject: [PATCH 47/69] Use model table specific query requirement segment and
 query order data point in GridExec

---
 .../modelardb_storage/src/query/grid_exec.rs  | 31 ++++++++++++-------
 .../src/query/model_table.rs                  |  9 +++++-
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index aecd7bd1e..76f206d10 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -23,7 +23,7 @@ use std::fmt::{Formatter, Result as FmtResult};
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
-
+use arrow::datatypes::Schema;
 use async_trait::async_trait;
 use datafusion::arrow::array::{
     Array, ArrayBuilder, ArrayRef, BinaryArray, Float32Array, UInt64Array, UInt64Builder,
@@ -35,7 +35,7 @@ use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::common::cast::as_boolean_array;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::context::TaskContext;
-use datafusion::physical_expr::{EquivalenceProperties, LexRequirement};
+use datafusion::physical_expr::{EquivalenceProperties, LexOrdering, LexRequirement};
 use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion::physical_plan::metrics::{
     BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet,
@@ -46,11 +46,8 @@ use datafusion::physical_plan::{
 };
 use futures::stream::{Stream, StreamExt};
 use modelardb_compression::{self, MODEL_TYPE_COUNT, MODEL_TYPE_NAMES};
-use modelardb_types::schemas::GRID_SCHEMA;
 use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueArray, ValueBuilder};
 
-use crate::query::{QUERY_ORDER_DATA_POINT, QUERY_REQUIREMENT_SEGMENT};
-
 /// An execution plan that reconstructs the data points stored as compressed segments containing
 /// metadata and models. It is `pub(crate)` so the additional rules added to Apache DataFusion's
 /// physical optimizer can pattern match on it.
@@ -66,24 +63,29 @@ pub(crate) struct GridExec {
     input: Arc<dyn ExecutionPlan>,
     /// Properties about the plan used in query optimization.
     plan_properties: PlanProperties,
+    /// The sort order that [`GridExec`] requires for the segments it receives as its input.
+    query_requirement_segment: LexRequirement,
+    /// The sort order [`GridExec`] guarantees for the data points it produces.
+    query_order_data_point: LexOrdering,
     /// Metrics collected during execution for use by EXPLAIN ANALYZE.
     metrics: ExecutionPlanMetricsSet,
 }
 
 impl GridExec {
     pub(super) fn new(
+        schema: Arc<Schema>,
         maybe_predicate: Option<Arc<dyn PhysicalExpr>>,
         limit: Option<usize>,
         input: Arc<dyn ExecutionPlan>,
+        query_requirement_segment: LexRequirement,
+        query_order_data_point: LexOrdering,
     ) -> Arc<Self> {
-        let schema = GRID_SCHEMA.0.clone();
-
-        // The global order for the data points produced by the set of GridExec instances producing
+        // The sort order for the data points produced by the set of GridExec instances producing
         // input for a SortedJoinExec must be the same. This is needed because SortedJoinExec
-        // assumes the data it receives from all of its inputs uses the same global sort order.
+        // assumes the data it receives from all of its inputs uses the same sort order.
         let equivalence_properties = EquivalenceProperties::new_with_orderings(
             schema.clone(),
-            &[QUERY_ORDER_DATA_POINT.clone()],
+            &[query_order_data_point.clone()],
         );
 
         let plan_properties = PlanProperties::new(
@@ -99,6 +101,8 @@ impl GridExec {
             limit,
             input,
             plan_properties,
+            query_requirement_segment,
+            query_order_data_point,
             metrics: ExecutionPlanMetricsSet::new(),
         })
     }
@@ -140,9 +144,12 @@ impl ExecutionPlan for GridExec {
     ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
         if children.len() == 1 {
             Ok(GridExec::new(
+                self.schema.clone(),
                 self.maybe_predicate.clone(),
                 self.limit,
                 children[0].clone(),
+                self.query_requirement_segment.clone(),
+                self.query_order_data_point.clone(),
             ))
         } else {
             Err(DataFusionError::Plan(format!(
@@ -186,9 +193,9 @@ impl ExecutionPlan for GridExec {
     }
 
     /// Specify that [`GridExec`] requires that its input provides data that is sorted by
-    /// [`QUERY_REQUIREMENT_SEGMENT`].
+    /// `query_requirement_segment`.
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
-        vec![Some(QUERY_REQUIREMENT_SEGMENT.clone())]
+        vec![Some(self.query_requirement_segment.clone())]
     }
 
     /// Return a snapshot of the set of metrics being collected by the execution plain.
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 707c86fa0..c3242392e 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -626,7 +626,14 @@ impl TableProvider for ModelTable {
                 vec![LexOrdering::new(self.query_order_segment.to_vec())],
             )?;
 
-            let grid_exec = GridExec::new(maybe_physical_grid_filters.clone(), limit, parquet_exec);
+            let grid_exec = GridExec::new(
+                self.grid_schema.clone(),
+                maybe_physical_grid_filters.clone(),
+                limit,
+                parquet_exec,
+                self.query_requirement_segment.clone(),
+                self.query_order_data_point.clone(),
+            );
 
             field_column_execution_plans.push(grid_exec);
         }

From 5f8dc5038bff6c1f1a831398a3bdf832c6aefd61 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 00:12:57 +0100
Subject: [PATCH 48/69] Pass query requirement data point to SortedJoinExec

---
 .../modelardb_storage/src/query/model_table.rs |  1 +
 .../src/query/sorted_join_exec.rs              | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index c3242392e..e0e42acfd 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -642,6 +642,7 @@ impl TableProvider for ModelTable {
             schema_after_projection,
             stored_columns_in_projection,
             field_column_execution_plans,
+            self.query_requirement_data_point.clone(),
         );
 
         // Only include GeneratedAsExec in the query plan if there are columns to generate.
diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs
index 1f411a95a..866154e1a 100644
--- a/crates/modelardb_storage/src/query/sorted_join_exec.rs
+++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs
@@ -40,8 +40,6 @@ use datafusion::physical_plan::{
 };
 use futures::stream::{Stream, StreamExt};
 
-use crate::query::QUERY_REQUIREMENT_DATA_POINT;
-
 /// The different types of columns supported by [`SortedJoinExec`], used for specifying the order in
 /// which the timestamp, field, and tag columns should be returned by [`SortedJoinStream`].
 #[derive(Debug, Clone)]
@@ -64,6 +62,8 @@ pub(crate) struct SortedJoinExec {
     inputs: Vec<Arc<dyn ExecutionPlan>>,
     /// Properties about the plan used in query optimization.
     plan_properties: PlanProperties,
+    /// The sort order that [`SortedJoinExec`] requires for the data points it receives as its input.
+    query_requirement_data_point: LexRequirement,
     /// Metrics collected during execution for use by EXPLAIN ANALYZE.
     metrics: ExecutionPlanMetricsSet,
 }
@@ -73,9 +73,9 @@ impl SortedJoinExec {
         schema: SchemaRef,
         return_order: Vec<SortedJoinColumnType>,
         inputs: Vec<Arc<dyn ExecutionPlan>>,
+        query_requirement_data_point: LexRequirement,
     ) -> Arc<Self> {
-        // Specify that the record batches produced by the execution plan will have an unknown order
-        // as the output from SortedJoinExec does not include the univariate_id but instead tags.
+        // Specify that the record batches produced by the execution plan will have an unknown order.
         let equivalence_properties = EquivalenceProperties::new(schema.clone());
 
         let plan_properties = PlanProperties::new(
@@ -90,6 +90,7 @@ impl SortedJoinExec {
             return_order,
             inputs,
             plan_properties,
+            query_requirement_data_point,
             metrics: ExecutionPlanMetricsSet::new(),
         })
     }
@@ -134,6 +135,7 @@ impl ExecutionPlan for SortedJoinExec {
                 self.schema.clone(),
                 self.return_order.clone(),
                 children,
+                self.query_requirement_data_point.clone(),
             ))
         } else {
             Err(DataFusionError::Plan(format!(
@@ -176,9 +178,9 @@ impl ExecutionPlan for SortedJoinExec {
     }
 
     /// Specify that [`SortedJoinStream`] requires that its inputs' provide data that is sorted by
-    /// [`QUERY_REQUIREMENT_DATA_POINT`].
+    /// `query_requirement_data_point`.
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
-        vec![Some(QUERY_REQUIREMENT_DATA_POINT.clone()); self.inputs.len()]
+        vec![Some(self.query_requirement_data_point.clone()); self.inputs.len()]
     }
 
     /// Return a snapshot of the set of metrics being collected by the execution plain.
@@ -289,11 +291,11 @@ impl SortedJoinStream {
 
         for element in &self.return_order {
             match element {
-                SortedJoinColumnType::Timestamp => columns.push(batch.column(1).clone()),
+                SortedJoinColumnType::Timestamp => columns.push(batch.column(0).clone()),
                 SortedJoinColumnType::Field => {
                     // unwrap() is safe as a record batch has already been read from each input.
                     let batch = self.batches[field_index].as_ref().unwrap();
-                    columns.push(batch.column(2).clone());
+                    columns.push(batch.column(1).clone());
                     field_index += 1;
                 }
                 SortedJoinColumnType::Tag => {

From 3adf091cfd0aca34b0ce76f357c0d70bd0245ae8 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 00:16:21 +0100
Subject: [PATCH 49/69] Remove global sort orders and sort requirements

---
 crates/modelardb_storage/src/query/mod.rs | 103 +---------------------
 1 file changed, 3 insertions(+), 100 deletions(-)

diff --git a/crates/modelardb_storage/src/query/mod.rs b/crates/modelardb_storage/src/query/mod.rs
index 193244eb5..0e7f9d211 100644
--- a/crates/modelardb_storage/src/query/mod.rs
+++ b/crates/modelardb_storage/src/query/mod.rs
@@ -13,14 +13,9 @@
  * limitations under the License.
  */
 
-//! Implementation of types which allows both normal tables and model tables to be added to Apache
-//! DataFusion. This allows them to be queried and small amounts of data to be added with INSERT.
-
-use std::sync::{Arc, LazyLock};
-
-use datafusion::physical_expr::{LexOrdering, LexRequirement, PhysicalSortExpr};
-use datafusion::physical_plan::expressions::Column;
-use deltalake::arrow::compute::SortOptions;
+//! Implementation of types which allow normal tables, metadata tables, and model tables to be added
+//! to Apache DataFusion. This allows them to be queried and small amounts of data to be added with
+//! INSERT.
 
 // grid_exec and sorted_join_exec are pub(crate) so the rules added to Apache DataFusion's physical
 // optimizer can access them.
@@ -30,95 +25,3 @@ pub(crate) mod metadata_table;
 pub(crate) mod model_table;
 pub(crate) mod normal_table;
 pub(crate) mod sorted_join_exec;
-
-/// The global sort order
-/// [`ParquetExec`](datafusion::datasource::physical_plan::parquet::ParquetExec) guarantees for the
-/// segments it produces. It is guaranteed by
-/// [`ParquetExec`](datafusion::datasource::physical_plan::parquet::ParquetExec) because the storage
-/// engine uses this sort order for each Apache Parquet file and these files are read sequentially
-/// by [`ParquetExec`](datafusion::datasource::physical_plan::parquet::ParquetExec). Another sort
-/// order could also be used, the current query pipeline simply requires that the
-/// [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch)
-/// [`SortedJoinExec`](sorted_join_exec::SortedJoinExec) receives from its inputs all contain data
-/// points for the same time interval and that they are sorted the same.
-static QUERY_ORDER_SEGMENT: LazyLock<LexOrdering> = LazyLock::new(|| {
-    let sort_options = SortOptions {
-        descending: false,
-        nulls_first: false,
-    };
-
-    let physical_sort_expr = vec![
-        PhysicalSortExpr {
-            expr: Arc::new(Column::new("univariate_id", 0)),
-            options: sort_options,
-        },
-        PhysicalSortExpr {
-            expr: Arc::new(Column::new("start_time", 2)),
-            options: sort_options,
-        },
-    ];
-
-    LexOrdering::new(physical_sort_expr)
-});
-
-/// The global sort order that [`GridExec`](grid_exec::GridExec) requires for the segments it
-/// receives as its input. Another sort order could also be used, the current query pipeline simply
-/// requires that the [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch)
-/// [`SortedJoinExec`](sorted_join_exec::SortedJoinExec) receives from its inputs all contain data
-/// points for the same time interval and that they are sorted the same.
-static QUERY_REQUIREMENT_SEGMENT: LazyLock<LexRequirement> = LazyLock::new(|| {
-    let physical_sort_requirements = QUERY_ORDER_SEGMENT
-        .inner
-        .clone()
-        .drain(..)
-        .map(|physical_sort_expr| physical_sort_expr.into())
-        .collect();
-
-    LexRequirement::new(physical_sort_requirements)
-});
-
-/// The global sort order [`GridExec`](grid_exec::GridExec) guarantees for the data points it
-/// produces. It is guaranteed by [`GridExec`](grid_exec::GridExec) because it receives segments
-/// sorted by [`QUERY_ORDER_SEGMENT`] from
-/// [`ParquetExec`](datafusion::datasource::physical_plan::parquet::ParquetExec) and because these
-/// segments cannot contain data points for overlapping time intervals. Another sort order could
-/// also be used, the current query pipeline simply requires that the
-/// [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch)
-/// [`SortedJoinExec`](sorted_join_exec::SortedJoinExec) receives from its inputs all contain data
-/// points for the same time interval and that they are sorted the same.
-static QUERY_ORDER_DATA_POINT: LazyLock<LexOrdering> = LazyLock::new(|| {
-    let sort_options = SortOptions {
-        descending: false,
-        nulls_first: false,
-    };
-
-    let physical_sort_expr = vec![
-        PhysicalSortExpr {
-            expr: Arc::new(Column::new("univariate_id", 0)),
-            options: sort_options,
-        },
-        PhysicalSortExpr {
-            expr: Arc::new(Column::new("timestamp", 1)),
-            options: sort_options,
-        },
-    ];
-
-    LexOrdering::new(physical_sort_expr)
-});
-
-/// The global sort order that [`SortedJoinExec`](sorted_join_exec::SortedJoinExec) requires for the
-/// data points it receives as its input. Another sort order could also be used, the current query
-/// pipeline simply requires that the
-/// [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch)
-/// [`SortedJoinExec`](sorted_join_exec::SortedJoinExec) receives from its inputs all contain data
-/// points for the same time interval and that they are sorted the same.
-static QUERY_REQUIREMENT_DATA_POINT: LazyLock<LexRequirement> = LazyLock::new(|| {
-    let physical_sort_requirements = QUERY_ORDER_DATA_POINT
-        .inner
-        .clone()
-        .drain(..)
-        .map(|physical_sort_expr| physical_sort_expr.into())
-        .collect();
-
-    LexRequirement::new(physical_sort_requirements)
-});

From 0bdd384d02247e0289cbc02a1ddd12e7452bb12e Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 17:58:32 +0100
Subject: [PATCH 50/69] Remove univariate_id from pmc_mean::grid()

---
 .../src/models/pmc_mean.rs                    | 27 ++++---------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/crates/modelardb_compression/src/models/pmc_mean.rs b/crates/modelardb_compression/src/models/pmc_mean.rs
index cafea3115..b76973506 100644
--- a/crates/modelardb_compression/src/models/pmc_mean.rs
+++ b/crates/modelardb_compression/src/models/pmc_mean.rs
@@ -21,7 +21,7 @@
 //! [ModelarDB paper]: https://www.vldb.org/pvldb/vol11/p1688-jensen.pdf
 
 use modelardb_types::schemas::COMPRESSED_METADATA_SIZE_IN_BYTES;
-use modelardb_types::types::{Timestamp, UnivariateId, UnivariateIdBuilder, Value, ValueBuilder};
+use modelardb_types::types::{Timestamp, Value, ValueBuilder};
 
 use crate::models;
 use crate::models::ErrorBound;
@@ -100,18 +100,10 @@ pub fn sum(model_length: usize, value: Value) -> Value {
     model_length as Value * value
 }
 
-/// Reconstruct the values for the `timestamps` without matching values in
-/// `value_builder` using a model of type PMC-Mean. The `univariate_ids` and
-/// `values` are appended to `univariate_builder` and `value_builder`.
-pub fn grid(
-    univariate_id: UnivariateId,
-    value: Value,
-    univariate_id_builder: &mut UnivariateIdBuilder,
-    timestamps: &[Timestamp],
-    value_builder: &mut ValueBuilder,
-) {
+/// Reconstruct the values for the `timestamps` without matching values in `value_builder` using a
+/// model of type PMC-Mean. The `values` are appended to `value_builder`.
+pub fn grid(value: Value, timestamps: &[Timestamp], value_builder: &mut ValueBuilder) {
     for _timestamp in timestamps {
-        univariate_id_builder.append_value(univariate_id);
         value_builder.append_value(value);
     }
 }
@@ -376,29 +368,20 @@ mod tests {
     #[test]
     fn test_grid(value in ProptestValue::ANY) {
         let sampling_interval: i64 = 60;
-        let mut univariate_id_builder = UnivariateIdBuilder::with_capacity(10);
         let timestamps: Vec<Timestamp> = (60..=600).step_by(60).collect();
         let mut value_builder = ValueBuilder::with_capacity(10);
 
         grid(
-            1,
             value,
-            &mut univariate_id_builder,
             &timestamps,
             &mut value_builder,
         );
 
-        let univariate_ids = univariate_id_builder.finish();
         let values = value_builder.finish();
 
         prop_assert!(
-            univariate_ids.len() == 10
-            && univariate_ids.len() == timestamps.len()
-            && univariate_ids.len() == values.len()
+            timestamps.len() == 10 && timestamps.len() == values.len()
         );
-        prop_assert!(univariate_ids
-             .iter()
-             .all(|maybe_univariate_id| maybe_univariate_id.unwrap() == 1));
         prop_assert!(timestamps
             .windows(2)
             .all(|window| window[1] - window[0] == sampling_interval));

From 1de2c03bd5034b64a08e9858325f3c9957c86b04 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 18:06:10 +0100
Subject: [PATCH 51/69] Remove univariate_id from swing::grid()

---
 .../modelardb_compression/src/models/swing.rs | 22 ++++---------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs
index 7269c2093..44a29d7ec 100644
--- a/crates/modelardb_compression/src/models/swing.rs
+++ b/crates/modelardb_compression/src/models/swing.rs
@@ -25,7 +25,7 @@
 
 use modelardb_types::schemas::COMPRESSED_METADATA_SIZE_IN_BYTES;
 use modelardb_types::types::{
-    ErrorBound, Timestamp, TimestampBuilder, UnivariateId, UnivariateIdBuilder, Value, ValueBuilder,
+    ErrorBound, Timestamp, TimestampBuilder, UnivariateIdBuilder, Value, ValueBuilder,
 };
 
 use super::timestamps;
@@ -302,16 +302,13 @@ pub fn sum(
     }
 }
 
-/// Reconstruct the values for the `timestamps` without matching values in
-/// `value_builder` using a model of type Swing. The `univariate_ids` and
-/// `values` are appended to `univariate_id_builder` and `value_builder`.
+/// Reconstruct the values for the `timestamps` without matching values in `value_builder` using a
+/// model of type Swing. The `values` are appended to `value_builder`.
 pub fn grid(
-    univariate_id: UnivariateId,
     start_time: Timestamp,
     end_time: Timestamp,
     first_value: Value,
     last_value: Value,
-    univariate_id_builder: &mut UnivariateIdBuilder,
     timestamps: &[Timestamp],
     value_builder: &mut ValueBuilder,
 ) {
@@ -319,7 +316,6 @@ pub fn grid(
         compute_slope_and_intercept(start_time, first_value as f64, end_time, last_value as f64);
 
     for timestamp in timestamps {
-        univariate_id_builder.append_value(univariate_id);
         let value = (slope * (*timestamp as f64) + intercept) as Value;
         value_builder.append_value(value);
     }
@@ -766,31 +762,21 @@ mod tests {
     fn test_grid(value in num::i32::ANY.prop_map(i32_to_value)) {
         let timestamps: Vec<Timestamp> = (START_TIME ..= END_TIME)
             .step_by(SAMPLING_INTERVAL as usize).collect();
-        let mut univariate_id_builder = UnivariateIdBuilder::with_capacity(timestamps.len());
         let mut value_builder = ValueBuilder::with_capacity(timestamps.len());
 
         // The linear function represents a constant to have a known value.
         grid(
-            1,
             START_TIME,
             END_TIME,
             value,
             value,
-            &mut univariate_id_builder,
             &timestamps,
             &mut value_builder,
         );
 
-        let univariate_ids = univariate_id_builder.finish();
         let values = value_builder.finish();
 
-        prop_assert!(
-            univariate_ids.len() == timestamps.len()
-            && univariate_ids.len() == values.len()
-        );
-        prop_assert!(univariate_ids
-             .iter()
-             .all(|maybe_univariate_id| maybe_univariate_id.unwrap() == 1));
+        prop_assert!(timestamps.len() == values.len());
         prop_assert!(timestamps
             .windows(2)
             .all(|window| window[1] - window[0] == SAMPLING_INTERVAL));

From 900f6c304f1a400d93c37494fea40aa9a4f42eca Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 18:10:40 +0100
Subject: [PATCH 52/69] Remove univariate_id from gorilla::grid()

---
 .../src/models/gorilla.rs                     | 38 +++----------------
 1 file changed, 6 insertions(+), 32 deletions(-)

diff --git a/crates/modelardb_compression/src/models/gorilla.rs b/crates/modelardb_compression/src/models/gorilla.rs
index 6928dc289..0a9544ddf 100644
--- a/crates/modelardb_compression/src/models/gorilla.rs
+++ b/crates/modelardb_compression/src/models/gorilla.rs
@@ -22,7 +22,7 @@
 //!
 //! [Gorilla paper]: https://www.vldb.org/pvldb/vol8/p1816-teller.pdf
 
-use modelardb_types::types::{Timestamp, UnivariateId, UnivariateIdBuilder, Value, ValueBuilder};
+use modelardb_types::types::{Timestamp, Value, ValueBuilder};
 
 use crate::models;
 use crate::models::bits::{BitReader, BitVecBuilder};
@@ -215,13 +215,11 @@ pub fn sum(length: usize, values: &[u8], maybe_model_last_value: Option<Value>)
 
 /// Decompress all the values in `values` for the `timestamps` without matching values in
 /// `value_builder`. The values in `values` are compressed using Gorilla's compression method for
-/// floating-point values. `univariate_ids` and `values` are appended to `univariate_id_builder` and
-/// `value_builder`. If `maybe_model_last_value` is provided, it is assumed the first value in
-/// `values` is compressed against it instead of being stored in full, i.e., uncompressed.
+/// floating-point values. `values` are appended to `value_builder`. If `maybe_model_last_value`
+/// is provided, it is assumed the first value in `values` is compressed against it instead of being
+/// stored in full, i.e., uncompressed.
 pub fn grid(
-    univariate_id: UnivariateId,
     values: &[u8],
-    univariate_id_builder: &mut UnivariateIdBuilder,
     timestamps: &[Timestamp],
     value_builder: &mut ValueBuilder,
     maybe_model_last_value: Option<Value>,
@@ -238,7 +236,6 @@ pub fn grid(
     } else {
         // The first value is stored uncompressed using size_of::<Value> bits.
         let first_value = bits.read_bits(models::VALUE_SIZE_IN_BITS) as u32;
-        univariate_id_builder.append_value(univariate_id);
         value_builder.append_value(Value::from_bits(first_value));
         first_value
     };
@@ -262,7 +259,6 @@ pub fn grid(
             value ^= last_value;
             last_value = value;
         }
-        univariate_id_builder.append_value(univariate_id);
         value_builder.append_value(Value::from_bits(last_value));
     }
 }
@@ -516,29 +512,13 @@ mod tests {
     fn assert_grid_with_error_bound(error_bound: ErrorBound, values: &[Value]) {
         let compressed_values = compress_values_using_gorilla(error_bound, values, None);
 
-        let mut univariate_id_builder = UnivariateIdBuilder::with_capacity(values.len());
         let timestamps: Vec<Timestamp> = (1..=values.len() as i64).step_by(1).collect();
         let mut value_builder = ValueBuilder::with_capacity(values.len());
-        grid(
-            1,
-            &compressed_values,
-            &mut univariate_id_builder,
-            &timestamps,
-            &mut value_builder,
-            None,
-        );
+        grid(&compressed_values, &timestamps, &mut value_builder, None);
 
-        let univariate_ids_array = univariate_id_builder.finish();
         let values_array = value_builder.finish();
 
-        assert!(
-            univariate_ids_array.len() == values.len()
-                && univariate_ids_array.len() == timestamps.len()
-                && univariate_ids_array.len() == values_array.len()
-        );
-        assert!(univariate_ids_array
-            .iter()
-            .all(|maybe_univariate_id| maybe_univariate_id.unwrap() == 1));
+        assert!(values.len() == timestamps.len() && values.len() == values_array.len());
         assert!(timestamps
             .windows(2)
             .all(|window| window[1] - window[0] == 1));
@@ -580,24 +560,18 @@ mod tests {
     fn assert_grid_single(error_bound: ErrorBound, maybe_model_last_value: Option<Value>) {
         let compressed_values =
             compress_values_using_gorilla(error_bound, &[37.0], maybe_model_last_value);
-        let mut univariate_id_builder = UnivariateIdBuilder::new();
         let mut value_builder = ValueBuilder::new();
 
         grid(
-            1,
             &compressed_values,
-            &mut univariate_id_builder,
             &[100],
             &mut value_builder,
             maybe_model_last_value,
         );
 
-        let univariate_ids = univariate_id_builder.finish();
         let values = value_builder.finish();
 
-        assert_eq!(univariate_ids.len(), 1);
         assert_eq!(values.len(), 1);
-        assert_eq!(univariate_ids.value(0), 1);
         assert_eq!(values.value(0), 37.0);
     }
 

From 17ff1b850394864f41639fdd2d8576df3f975078 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 18:17:29 +0100
Subject: [PATCH 53/69] Remove univariate_id from modelardb_compression::grid()

---
 .../modelardb_compression/src/compression.rs  |  2 --
 .../modelardb_compression/src/models/mod.rs   | 25 +++----------------
 .../modelardb_compression/src/models/swing.rs |  5 +---
 3 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 202899ce3..84a06c9c0 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -923,7 +923,6 @@ mod tests {
             let start_index = univariate_id_builder.len();
 
             models::grid(
-                0,
                 model_type_ids.value(row_index),
                 start_times.value(row_index),
                 end_times.value(row_index),
@@ -932,7 +931,6 @@ mod tests {
                 max_values.value(row_index),
                 values.value(row_index),
                 residuals.value(row_index),
-                &mut univariate_id_builder,
                 &mut timestamp_builder,
                 &mut value_builder,
             );
diff --git a/crates/modelardb_compression/src/models/mod.rs b/crates/modelardb_compression/src/models/mod.rs
index 5500900b0..9eaf49f82 100644
--- a/crates/modelardb_compression/src/models/mod.rs
+++ b/crates/modelardb_compression/src/models/mod.rs
@@ -26,9 +26,7 @@ pub mod timestamps;
 use std::mem;
 
 use arrow::array::ArrayBuilder;
-use modelardb_types::types::{
-    ErrorBound, Timestamp, TimestampBuilder, UnivariateId, UnivariateIdBuilder, Value, ValueBuilder,
-};
+use modelardb_types::types::{ErrorBound, Timestamp, TimestampBuilder, Value, ValueBuilder};
 
 use crate::types::CompressedSegmentBuilder;
 
@@ -183,10 +181,9 @@ pub fn sum(
 }
 
 /// Reconstruct the data points for a compressed segment whose values are represented by a model and
-/// residuals. Each data point is split into its three components and appended to `univariate_ids`,
-/// `timestamps`, and `values`.
+/// residuals. Each data point is split into its two components and appended to `timestamp_builder`
+/// and `value_builder`.
 pub fn grid(
-    univariate_id: UnivariateId,
     model_type_id: u8,
     start_time: Timestamp,
     end_time: Timestamp,
@@ -195,7 +192,6 @@ pub fn grid(
     max_value: Value,
     values: &[u8],
     residuals: &[u8],
-    univariate_id_builder: &mut UnivariateIdBuilder,
     timestamp_builder: &mut TimestampBuilder,
     value_builder: &mut ValueBuilder,
 ) {
@@ -212,9 +208,7 @@ pub fn grid(
     // Reconstruct the values from the model.
     match model_type_id {
         PMC_MEAN_ID => pmc_mean::grid(
-            univariate_id,
             CompressedSegmentBuilder::decode_values_for_pmc_mean(min_value, max_value, values),
-            univariate_id_builder,
             model_timestamps,
             value_builder,
         ),
@@ -226,24 +220,15 @@ pub fn grid(
             let model_end_time = *model_timestamps.last().unwrap();
 
             swing::grid(
-                univariate_id,
                 start_time,
                 model_end_time,
                 first_value,
                 last_value,
-                univariate_id_builder,
                 model_timestamps,
                 value_builder,
             )
         }
-        GORILLA_ID => gorilla::grid(
-            univariate_id,
-            values,
-            univariate_id_builder,
-            model_timestamps,
-            value_builder,
-            None,
-        ),
+        GORILLA_ID => gorilla::grid(values, model_timestamps, value_builder, None),
         _ => panic!("Unknown model type."),
     }
 
@@ -252,9 +237,7 @@ pub fn grid(
         let model_last_value = value_builder.values_slice()[value_builder.len() - 1];
 
         gorilla::grid(
-            univariate_id,
             &residuals[..residuals.len() - 1],
-            univariate_id_builder,
             residuals_timestamps,
             value_builder,
             Some(model_last_value),
diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs
index 44a29d7ec..ea84ad6f7 100644
--- a/crates/modelardb_compression/src/models/swing.rs
+++ b/crates/modelardb_compression/src/models/swing.rs
@@ -25,7 +25,7 @@
 
 use modelardb_types::schemas::COMPRESSED_METADATA_SIZE_IN_BYTES;
 use modelardb_types::types::{
-    ErrorBound, Timestamp, TimestampBuilder, UnivariateIdBuilder, Value, ValueBuilder,
+    ErrorBound, Timestamp, TimestampBuilder, Value, ValueBuilder,
 };
 
 use super::timestamps;
@@ -880,12 +880,10 @@ mod tests {
         assert_eq!(model_type_id_array.value(0), SWING_ID);
 
         // Reconstruct all values from the segment.
-        let mut reconstructed_ids = UnivariateIdBuilder::with_capacity(timestamps.len());
         let mut reconstructed_timestamps = TimestampBuilder::with_capacity(timestamps.len());
         let mut reconstructed_values = ValueBuilder::with_capacity(timestamps.len());
 
         models::grid(
-            0,
             model_type_id_array.value(0),
             start_time_array.value(0),
             end_time_array.value(0),
@@ -894,7 +892,6 @@ mod tests {
             max_value_array.value(0),
             values_array.value(0),
             residuals_array.value(0),
-            &mut reconstructed_ids,
             &mut reconstructed_timestamps,
             &mut reconstructed_values,
         );

From b734a275b28ea203d3137c6ce3a770456c65db11 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 18:29:39 +0100
Subject: [PATCH 54/69] Remove univariate id from GridExec and types

---
 .../modelardb_compression/src/compression.rs  |  5 ++---
 .../src/models/gorilla.rs                     |  6 ++---
 .../modelardb_storage/src/query/grid_exec.rs  | 22 ++++++-------------
 crates/modelardb_types/src/types.rs           |  7 ------
 4 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 84a06c9c0..f7147412c 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -901,7 +901,6 @@ mod tests {
         uncompressed_values: &ValueArray,
         compressed_record_batch: &RecordBatch,
     ) {
-        let mut univariate_id_builder = UInt64Builder::new();
         let mut timestamp_builder = TimestampBuilder::new();
         let mut value_builder = ValueBuilder::new();
 
@@ -920,7 +919,7 @@ mod tests {
 
         let mut index_to_model_type = vec![];
         for row_index in 0..compressed_record_batch.num_rows() {
-            let start_index = univariate_id_builder.len();
+            let start_index = value_builder.len();
 
             models::grid(
                 model_type_ids.value(row_index),
@@ -935,7 +934,7 @@ mod tests {
                 &mut value_builder,
             );
 
-            let end_index = univariate_id_builder.len();
+            let end_index = value_builder.len();
             index_to_model_type.push((start_index..end_index, model_type_ids.value(row_index)));
         }
 
diff --git a/crates/modelardb_compression/src/models/gorilla.rs b/crates/modelardb_compression/src/models/gorilla.rs
index 0a9544ddf..cadb6614e 100644
--- a/crates/modelardb_compression/src/models/gorilla.rs
+++ b/crates/modelardb_compression/src/models/gorilla.rs
@@ -174,9 +174,9 @@ impl Gorilla {
 /// it is assumed the first value in `values` is compressed against it instead of being stored in
 /// full, i.e., uncompressed.
 pub fn sum(length: usize, values: &[u8], maybe_model_last_value: Option<Value>) -> Value {
-    // This function replicates code from gorilla::grid() as it isn't necessary
-    // to store the univariate ids, timestamps, and values in arrays for a sum.
-    // So any changes to the decompression must be mirrored in gorilla::grid().
+    // This function replicates code from gorilla::grid() as it isn't necessary to store the
+    // timestamps and values in arrays for a sum. So any changes to the decompression must be
+    // mirrored in gorilla::grid().
     let mut bits = BitReader::try_new(values).unwrap();
     let mut leading_zeros = u8::MAX;
     let mut trailing_zeros: u8 = 0;
diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index 76f206d10..d45491411 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -23,11 +23,11 @@ use std::fmt::{Formatter, Result as FmtResult};
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
+
 use arrow::datatypes::Schema;
 use async_trait::async_trait;
 use datafusion::arrow::array::{
-    Array, ArrayBuilder, ArrayRef, BinaryArray, Float32Array, UInt64Array, UInt64Builder,
-    UInt8Array,
+    Array, ArrayBuilder, ArrayRef, BinaryArray, Float32Array, UInt8Array,
 };
 use datafusion::arrow::compute::filter_record_batch;
 use datafusion::arrow::datatypes::SchemaRef;
@@ -186,7 +186,7 @@ impl ExecutionPlan for GridExec {
     }
 
     /// Specify that [`GridExec`] requires one partition for each input as it assumes that the
-    /// global sort order are the same for its input and Apache Arrow DataFusion only guarantees the
+    /// sort order are the same for its input and Apache Arrow DataFusion only guarantees the
     /// sort order within each partition rather than the input's global sort order.
     fn required_input_distribution(&self) -> Vec<Distribution> {
         vec![Distribution::SinglePartition]
@@ -289,31 +289,25 @@ impl GridStream {
         // from each segment in the new batch as each segment contains at least one data point.
         let current_rows = self.current_batch.num_rows() - self.current_batch_offset;
         let new_rows = batch.num_rows();
-        let mut univariate_id_builder = UInt64Builder::with_capacity(current_rows + new_rows);
         let mut timestamp_builder = TimestampBuilder::with_capacity(current_rows + new_rows);
         let mut value_builder = ValueBuilder::with_capacity(current_rows + new_rows);
 
         // Copy over the data points from the current batch to keep the resulting batch sorted.
         let current_batch = &self.current_batch; // Required as self cannot be passed to array!.
-        univariate_id_builder.append_slice(
-            &modelardb_types::array!(current_batch, 0, UInt64Array).values()
-                [self.current_batch_offset..],
-        );
         timestamp_builder.append_slice(
-            &modelardb_types::array!(current_batch, 1, TimestampArray).values()
+            &modelardb_types::array!(current_batch, 0, TimestampArray).values()
                 [self.current_batch_offset..],
         );
         value_builder.append_slice(
-            &modelardb_types::array!(current_batch, 2, ValueArray).values()
+            &modelardb_types::array!(current_batch, 1, ValueArray).values()
                 [self.current_batch_offset..],
         );
 
         // Reconstruct the data points from the compressed segments.
         for row_index in 0..new_rows {
-            let length_before = univariate_id_builder.len();
+            let length_before = value_builder.len();
 
             modelardb_compression::grid(
-                0,
                 model_type_ids.value(row_index),
                 start_times.value(row_index),
                 end_times.value(row_index),
@@ -322,21 +316,19 @@ impl GridStream {
                 max_values.value(row_index),
                 values.value(row_index),
                 residuals.value(row_index),
-                &mut univariate_id_builder,
                 &mut timestamp_builder,
                 &mut value_builder,
             );
 
             self.grid_stream_metrics.add(
                 model_type_ids.value(row_index),
-                univariate_id_builder.len() - length_before,
+                value_builder.len() - length_before,
                 !residuals.value(row_index).is_empty(),
                 modelardb_compression::are_compressed_timestamps_regular(timestamps.values()),
             );
         }
 
         let columns: Vec<ArrayRef> = vec![
-            Arc::new(univariate_id_builder.finish()),
             Arc::new(timestamp_builder.finish()),
             Arc::new(value_builder.finish()),
         ];
diff --git a/crates/modelardb_types/src/types.rs b/crates/modelardb_types/src/types.rs
index 1192f9cb4..851060886 100644
--- a/crates/modelardb_types/src/types.rs
+++ b/crates/modelardb_types/src/types.rs
@@ -24,13 +24,6 @@ use std::str::FromStr;
 
 use crate::error::{ModelarDbTypesError, Result};
 
-// Types used for a univariate id.
-pub type UnivariateId = std::primitive::u64;
-pub type ArrowUnivariateId = arrow::datatypes::UInt64Type;
-
-// Types used for a collection of univariate ids.
-pub type UnivariateIdBuilder = arrow::array::PrimitiveBuilder<ArrowUnivariateId>;
-
 // Types used for a single timestamp.
 pub type Timestamp = std::primitive::i64; // It is signed to match TimestampMicrosecondType.
 pub type ArrowTimestamp = arrow::datatypes::TimestampMicrosecondType;

From 0a388dd73b3561ea7cfdccff98e6c2f03976f5d5 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 21:26:39 +0100
Subject: [PATCH 55/69] Reconstruct tag columns in GridExec

---
 .../modelardb_storage/src/query/grid_exec.rs  | 39 ++++++++++++++++++-
 .../src/query/model_table.rs                  |  4 +-
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index d45491411..40ba58262 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -24,6 +24,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
 
+use arrow::array::{StringArray, StringBuilder};
 use arrow::datatypes::Schema;
 use async_trait::async_trait;
 use datafusion::arrow::array::{
@@ -46,6 +47,7 @@ use datafusion::physical_plan::{
 };
 use futures::stream::{Stream, StreamExt};
 use modelardb_compression::{self, MODEL_TYPE_COUNT, MODEL_TYPE_NAMES};
+use modelardb_types::schemas::QUERY_COMPRESSED_SCHEMA;
 use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueArray, ValueBuilder};
 
 /// An execution plan that reconstructs the data points stored as compressed segments containing
@@ -284,6 +286,11 @@ impl GridStream {
             _error_array
         );
 
+        let mut tag_arrays = vec![];
+        for tag_index in QUERY_COMPRESSED_SCHEMA.0.fields().len()..batch.num_columns() {
+            tag_arrays.push(modelardb_types::array!(batch, tag_index, StringArray));
+        }
+
         // Allocate builders with approximately enough capacity. The builders are allocated with
         // enough capacity for the remaining data points in the current batch and one data point
         // from each segment in the new batch as each segment contains at least one data point.
@@ -292,6 +299,14 @@ impl GridStream {
         let mut timestamp_builder = TimestampBuilder::with_capacity(current_rows + new_rows);
         let mut value_builder = ValueBuilder::with_capacity(current_rows + new_rows);
 
+        let mut tag_builders = vec![];
+        for _ in 0..tag_arrays.len() {
+            tag_builders.push(StringBuilder::with_capacity(
+                current_rows + new_rows,
+                current_rows + new_rows,
+            ));
+        }
+
         // Copy over the data points from the current batch to keep the resulting batch sorted.
         let current_batch = &self.current_batch; // Required as self cannot be passed to array!.
         timestamp_builder.append_slice(
@@ -303,6 +318,13 @@ impl GridStream {
                 [self.current_batch_offset..],
         );
 
+        for (index, tag_builder) in tag_builders.iter_mut().enumerate() {
+            let tag_array = modelardb_types::array!(current_batch, index + 2, StringArray);
+            for i in self.current_batch_offset..current_batch.num_rows() {
+                tag_builder.append_value(tag_array.value(i));
+            }
+        }
+
         // Reconstruct the data points from the compressed segments.
         for row_index in 0..new_rows {
             let length_before = value_builder.len();
@@ -320,19 +342,32 @@ impl GridStream {
                 &mut value_builder,
             );
 
+            let created_rows = value_builder.len() - length_before;
+
+            for (tag_builder, tag_array) in tag_builders.iter_mut().zip(&tag_arrays) {
+                let tag_value = tag_array.value(row_index);
+                for _ in 0..created_rows {
+                    tag_builder.append_value(tag_value);
+                }
+            }
+
             self.grid_stream_metrics.add(
                 model_type_ids.value(row_index),
-                value_builder.len() - length_before,
+                created_rows,
                 !residuals.value(row_index).is_empty(),
                 modelardb_compression::are_compressed_timestamps_regular(timestamps.values()),
             );
         }
 
-        let columns: Vec<ArrayRef> = vec![
+        let mut columns: Vec<ArrayRef> = vec![
             Arc::new(timestamp_builder.finish()),
             Arc::new(value_builder.finish()),
         ];
 
+        for mut tag_builder in tag_builders {
+            columns.push(Arc::new(tag_builder.finish()));
+        }
+
         // Update the current batch, unwrap() is safe as GridStream uses a static schema.
         // For simplicity, all data points are reconstructed and then pruned by time.
         let current_batch = RecordBatch::try_new(self.schema.clone(), columns).unwrap();
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index e0e42acfd..ed985bb9e 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -416,9 +416,9 @@ fn new_apache_parquet_exec(
     let log_store = delta_table.log_store();
     let file_scan_config = FileScanConfig {
         object_store_url: log_store.object_store_url(),
-        file_schema,
+        file_schema: file_schema.clone(),
         file_groups: vec![partitioned_files],
-        statistics: Statistics::new_unknown(&QUERY_COMPRESSED_SCHEMA.0),
+        statistics: Statistics::new_unknown(&file_schema),
         projection: None,
         limit: maybe_limit,
         table_partition_cols: vec![],

From da3c9d33bbc018e7f06645197300422e647a678c Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 22:58:45 +0100
Subject: [PATCH 56/69] No longer use tag_column_indices when checking for tag
 columns in projection

---
 crates/modelardb_storage/src/query/model_table.rs | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index ed985bb9e..9eddfeac3 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -24,6 +24,7 @@ use std::result::Result as StdResult;
 use std::sync::Arc;
 
 use arrow::compute::SortOptions;
+use arrow::datatypes::DataType::Utf8;
 use async_trait::async_trait;
 use datafusion::arrow::datatypes::{
     ArrowPrimitiveType, DataType, Field, Schema, SchemaRef, TimeUnit,
@@ -551,9 +552,7 @@ impl TableProvider for ModelTable {
         let mut stored_columns_in_projection: Vec<SortedJoinColumnType> =
             Vec::with_capacity(projection.len());
         let mut stored_field_columns_in_projection: Vec<u16> =
-            Vec::with_capacity(query_schema.fields.len() - 1 - tag_column_indices.len());
-        let mut stored_tag_columns_in_projection: Vec<&str> =
-            Vec::with_capacity(tag_column_indices.len());
+            Vec::with_capacity(schema.fields.len() - 1 - tag_column_indices.len());
         let mut generated_columns_in_projection: Vec<ColumnToGenerate> =
             Vec::with_capacity(query_schema.fields.len() - schema.fields().len());
 
@@ -561,11 +560,10 @@ impl TableProvider for ModelTable {
             if *query_schema.field(*query_schema_index).data_type() == ArrowTimestamp::DATA_TYPE {
                 // Timestamp.
                 stored_columns_in_projection.push(SortedJoinColumnType::Timestamp);
-            } else if tag_column_indices.contains(query_schema_index) {
+            } else if *query_schema.field(*query_schema_index).data_type() == Utf8 {
                 // Tag.
-                stored_tag_columns_in_projection
-                    .push(query_schema.fields[*query_schema_index].name());
-                stored_columns_in_projection.push(SortedJoinColumnType::Tag);
+                let tag_column_name = query_schema.fields[*query_schema_index].name().clone();
+                stored_columns_in_projection.push(SortedJoinColumnType::Tag(tag_column_name));
             } else if let Some(generated_column) = &generated_columns[*query_schema_index] {
                 // Generated field.
                 let physical_expr = convert_logical_expr_to_physical_expr(

From 27a1cfde20cc2a2edfe80e49da7d43fe206fd3ab Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 23:00:35 +0100
Subject: [PATCH 57/69] Use tag columns in data points in sorted_join()

---
 .../src/query/sorted_join_exec.rs             | 25 ++++++++-----------
 docs/dev/README.md                            |  2 +-
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs
index 866154e1a..ffb9a255f 100644
--- a/crates/modelardb_storage/src/query/sorted_join_exec.rs
+++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs
@@ -26,7 +26,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
 
-use datafusion::arrow::array::{ArrayRef, StringBuilder};
+use datafusion::arrow::array::ArrayRef;
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
@@ -46,11 +46,11 @@ use futures::stream::{Stream, StreamExt};
 pub(crate) enum SortedJoinColumnType {
     Timestamp,
     Field,
-    Tag,
+    Tag(String),
 }
 
-/// An execution plan that join arrays of data points sorted by `univariate_id` and `timestamp` from
-/// multiple execution plans and tags. It is `pub(crate)` so the additional rules added to Apache
+/// An execution plan that join arrays of data points sorted by tag columns and `timestamp` from
+/// multiple execution plans. It is `pub(crate)` so the additional rules added to Apache
 /// DataFusion's physical optimizer can pattern match on it.
 #[derive(Debug)]
 pub(crate) struct SortedJoinExec {
@@ -171,8 +171,8 @@ impl ExecutionPlan for SortedJoinExec {
     }
 
     /// Specify that [`SortedJoinStream`] requires one partition for each input as it assumes that
-    /// the global sort order is the same for all inputs and Apache Arrow DataFusion only
-    /// guarantees the sort order within each partition rather than the inputs' global sort order.
+    /// the sort order is the same for all inputs and Apache Arrow DataFusion only guarantees the
+    /// sort order within each partition rather than the inputs' global sort order.
     fn required_input_distribution(&self) -> Vec<Distribution> {
         vec![Distribution::SinglePartition; self.inputs.len()]
     }
@@ -279,15 +279,11 @@ impl SortedJoinStream {
     fn sorted_join(&self) -> Poll<Option<DataFusionResult<RecordBatch>>> {
         let mut columns: Vec<ArrayRef> = Vec::with_capacity(self.schema.fields.len());
 
-        // TODO: Compute the requested tag columns, so they can be assigned to the batch by index.
         // unwrap() is safe as a record batch is read from each input before this method is called.
         let batch = self.batches[0].as_ref().unwrap();
 
-        let mut tag_columns: Vec<StringBuilder> = vec![];
-
-        // The batches and tags columns are already in the correct order, so they can be appended.
+        // The batches are already in the correct order, so they can be appended.
         let mut field_index = 0;
-        let mut tag_index = 0;
 
         for element in &self.return_order {
             match element {
@@ -298,10 +294,9 @@ impl SortedJoinStream {
                     columns.push(batch.column(1).clone());
                     field_index += 1;
                 }
-                SortedJoinColumnType::Tag => {
-                    let tags = Arc::new(tag_columns[tag_index].finish());
-                    columns.push(tags);
-                    tag_index += 1;
+                SortedJoinColumnType::Tag(tag_column_name) => {
+                    // unwrap() is safe as all tag columns are present in the schema.
+                    columns.push(batch.column_by_name(tag_column_name).unwrap().clone());
                 }
             }
         }
diff --git a/docs/dev/README.md b/docs/dev/README.md
index f8770417a..5bb855859 100644
--- a/docs/dev/README.md
+++ b/docs/dev/README.md
@@ -72,7 +72,7 @@ storage.
   - **Test** - Constants and functionality for data generation for use in tests.
 - [modelardb_types](/crates/modelardb_types) - Library of shared macros and types for use by the other crates.
   - **Error** - Error type used throughout the crate, a single error type is used for simplicity.
-  - **Functions** - Functions for operating on the types, e.g., extracting elements from univariate ids.
+  - **Functions** - Functions for operating on the types.
   - **Macros** - Macros for extracting an array from a `RecordBatch` and extracting all arrays from a `RecordBatch` with
   compressed segments.
   - **Schemas** - Schemas used throughout the ModelarDB project, e.g., for buffers and for Apache Parquet files with

From 62e04dcd3587e2ea897edd0e328e8cf1beee068f Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 26 Feb 2025 23:27:18 +0100
Subject: [PATCH 58/69] Reformat and fixed doc and clippy issues

---
 crates/modelardb_compression/src/compression.rs  | 2 +-
 crates/modelardb_compression/src/models/swing.rs | 4 +---
 crates/modelardb_storage/src/lib.rs              | 6 +-----
 crates/modelardb_storage/src/metadata/mod.rs     | 4 ++--
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index f7147412c..1783da55c 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -259,7 +259,7 @@ mod tests {
 
     use super::*;
 
-    use arrow::array::{ArrayBuilder, BinaryArray, Float32Array, UInt64Builder, UInt8Array};
+    use arrow::array::{ArrayBuilder, BinaryArray, Float32Array, UInt8Array};
     use arrow::datatypes::{DataType, Field};
     use modelardb_common::test::data_generation::{self, ValuesStructure};
     use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ZERO};
diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs
index ea84ad6f7..270d4e497 100644
--- a/crates/modelardb_compression/src/models/swing.rs
+++ b/crates/modelardb_compression/src/models/swing.rs
@@ -24,9 +24,7 @@
 //! [ModelarDB paper]: https://www.vldb.org/pvldb/vol11/p1688-jensen.pdf
 
 use modelardb_types::schemas::COMPRESSED_METADATA_SIZE_IN_BYTES;
-use modelardb_types::types::{
-    ErrorBound, Timestamp, TimestampBuilder, Value, ValueBuilder,
-};
+use modelardb_types::types::{ErrorBound, Timestamp, TimestampBuilder, Value, ValueBuilder};
 
 use super::timestamps;
 use crate::models;
diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs
index 8c0959a28..8cc17d118 100644
--- a/crates/modelardb_storage/src/lib.rs
+++ b/crates/modelardb_storage/src/lib.rs
@@ -129,11 +129,7 @@ pub fn register_model_table(
     model_table_metadata: Arc<ModelTableMetadata>,
     data_sink: Arc<dyn DataSink>,
 ) -> Result<()> {
-    let model_table = ModelTable::new(
-        delta_table,
-        model_table_metadata.clone(),
-        data_sink,
-    );
+    let model_table = ModelTable::new(delta_table, model_table_metadata.clone(), data_sink);
 
     session_context.register_table(&model_table_metadata.name, model_table)?;
 
diff --git a/crates/modelardb_storage/src/metadata/mod.rs b/crates/modelardb_storage/src/metadata/mod.rs
index cac6c8b74..df2525fd9 100644
--- a/crates/modelardb_storage/src/metadata/mod.rs
+++ b/crates/modelardb_storage/src/metadata/mod.rs
@@ -13,8 +13,8 @@
  * limitations under the License.
  */
 
-//! Implementation of [`ModelTableMetadata`](crate::ModelTableMetadata) which contains metadata
-//! required to interact with model tables and [`TableMetadataManager`](crate::TableMetadataManager)
+//! Implementation of [`ModelTableMetadata`](crate::ModelTableMetadata) which contains metadata required
+//! to interact with model tables and [`TableMetadataManager`](table_metadata_manager::TableMetadataManager)
 //! which provides functionality to access table related metadata in the metadata Delta Lake.
 
 pub mod model_table_metadata;

From d4cb178e57234b8fbe983ec2cea9a794535e14d0 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 27 Feb 2025 18:45:42 +0100
Subject: [PATCH 59/69] Fix bug causing INSERT INTO to fail due to schema
 mismatch

---
 .../src/storage/data_sinks.rs                 | 23 +++++++++++++++----
 .../src/metadata/model_table_metadata.rs      |  2 +-
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/crates/modelardb_server/src/storage/data_sinks.rs b/crates/modelardb_server/src/storage/data_sinks.rs
index c5d2b19dc..b0723eeff 100644
--- a/crates/modelardb_server/src/storage/data_sinks.rs
+++ b/crates/modelardb_server/src/storage/data_sinks.rs
@@ -13,14 +13,16 @@
  * limitations under the License.
  */
 
-//! Implementation of [`DataSinks`](`DataSink`) that writes
-//! [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch) to [`StorageEngine`].
+//! Implementation of [`DataSinks`](`DataSink`) that writes [`RecordBatches`](RecordBatch) to
+//! [`StorageEngine`].
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter, Result as FmtResult};
 use std::sync::Arc;
 
 use async_trait::async_trait;
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::arrow::datatypes::{Field, Schema};
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::insert::DataSink;
@@ -32,9 +34,8 @@ use tokio::sync::RwLock;
 
 use crate::storage::StorageEngine;
 
-/// [`DataSink`] that writes [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch) to
-/// [`StorageEngine`]. Use [`ModelTableDataSink`] for writing multivariate time series to
-/// [`StorageEngine`].
+/// [`DataSink`] that writes [`RecordBatches`](RecordBatch) to [`StorageEngine`]. Use
+/// [`ModelTableDataSink`] for writing multivariate time series to [`StorageEngine`].
 pub struct NormalTableDataSink {
     /// The name of the normal table inserted data will be written to.
     table_name: String,
@@ -152,6 +153,18 @@ impl DataSink for ModelTableDataSink {
             let record_batch =
                 record_batch?.project(&self.model_table_metadata.query_schema_to_schema)?;
 
+            // Manually ensure the fields are not nullable. It is not possible to insert null values
+            // into model tables but the schema of the record batch may contain nullable fields.
+            let mut fields: Vec<Field> = Vec::with_capacity(record_batch.schema().fields.len());
+            for field in record_batch.schema().fields() {
+                fields.push(Field::new(field.name(), field.data_type().clone(), false));
+            }
+
+            let record_batch = RecordBatch::try_new(
+                Arc::new(Schema::new(fields)),
+                record_batch.columns().to_vec(),
+            )?;
+
             data_points_inserted += record_batch.num_rows() as u64;
 
             let mut storage_engine = self.storage_engine.write().await;
diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index 0c416298d..556c33748 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -529,7 +529,7 @@ mod test {
 
         assert_eq!(
             result.unwrap_err().to_string(),
-            "Invalid argument: The record batch does not match the schema of the model table."
+            "Invalid Argument Error: The record batch does not match the schema of the model table."
         );
     }
 

From e74acb295132f2374961cb686ff70c52abd6dbe2 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 27 Feb 2025 18:57:57 +0100
Subject: [PATCH 60/69] Reformat with Rustfmt

---
 crates/modelardb_client/src/error.rs          |   2 +-
 crates/modelardb_client/src/helper.rs         |   4 +-
 crates/modelardb_client/src/main.rs           |   4 +-
 crates/modelardb_common/src/remote.rs         |   2 +-
 .../modelardb_compression/src/compression.rs  |  84 +++---
 .../src/models/gorilla.rs                     |  10 +-
 crates/modelardb_compression/src/types.rs     |   4 +-
 crates/modelardb_manager/src/cluster.rs       |  14 +-
 crates/modelardb_manager/src/error.rs         |   2 +-
 crates/modelardb_manager/src/metadata.rs      |  26 +-
 crates/modelardb_manager/src/remote.rs        |   4 +-
 crates/modelardb_server/src/configuration.rs  |   2 +-
 crates/modelardb_server/src/context.rs        | 256 +++++++++++-------
 crates/modelardb_server/src/data_folders.rs   |  12 +-
 crates/modelardb_server/src/error.rs          |   2 +-
 crates/modelardb_server/src/manager.rs        |   4 +-
 crates/modelardb_server/src/remote.rs         |   8 +-
 .../src/storage/data_sinks.rs                 |   2 +-
 .../src/storage/data_transfer.rs              |  26 +-
 crates/modelardb_server/src/storage/types.rs  |   6 +-
 .../src/storage/uncompressed_data_buffer.rs   |   2 +-
 .../src/storage/uncompressed_data_manager.rs  |  78 +++---
 .../tests/integration_test.rs                 |  16 +-
 crates/modelardb_storage/src/delta_lake.rs    |   4 +-
 crates/modelardb_storage/src/error.rs         |   2 +-
 crates/modelardb_storage/src/lib.rs           |   4 +-
 .../src/metadata/model_table_metadata.rs      |   4 +-
 .../src/metadata/table_metadata_manager.rs    |  83 +++---
 crates/modelardb_storage/src/parser.rs        | 228 +++++++++-------
 .../src/query/generated_as_exec.rs            |   2 +-
 .../src/query/metadata_table.rs               |   2 +-
 .../src/query/model_table.rs                  |   4 +-
 .../src/query/normal_table.rs                 |   2 +-
 crates/modelardb_storage/src/test.rs          |   2 +-
 34 files changed, 521 insertions(+), 386 deletions(-)

diff --git a/crates/modelardb_client/src/error.rs b/crates/modelardb_client/src/error.rs
index d80c0a825..96e72fdcc 100644
--- a/crates/modelardb_client/src/error.rs
+++ b/crates/modelardb_client/src/error.rs
@@ -23,8 +23,8 @@ use std::result::Result as StdResult;
 use arrow::error::ArrowError;
 use object_store::Error as ObjectStoreError;
 use rustyline::error::ReadlineError as RustyLineError;
-use tonic::transport::Error as TonicTransportError;
 use tonic::Status as TonicStatusError;
+use tonic::transport::Error as TonicTransportError;
 
 /// Result type used throughout `modelardb_client`.
 pub type Result<T> = StdResult<T, ModelarDbClientError>;
diff --git a/crates/modelardb_client/src/helper.rs b/crates/modelardb_client/src/helper.rs
index f15d2a4fb..49ccf2683 100644
--- a/crates/modelardb_client/src/helper.rs
+++ b/crates/modelardb_client/src/helper.rs
@@ -19,13 +19,13 @@
 
 use std::result::Result;
 
+use rustyline::Context;
+use rustyline::Helper;
 use rustyline::completion::{self, Completer};
 use rustyline::error::ReadlineError;
 use rustyline::highlight::Highlighter;
 use rustyline::hint::Hinter;
 use rustyline::validate::Validator;
-use rustyline::Context;
-use rustyline::Helper;
 
 /// Provides tab-completion for the client's read-eval-print loop.
 pub struct ClientHelper {
diff --git a/crates/modelardb_client/src/main.rs b/crates/modelardb_client/src/main.rs
index f2e6c1786..fec58c286 100644
--- a/crates/modelardb_client/src/main.rs
+++ b/crates/modelardb_client/src/main.rs
@@ -31,13 +31,13 @@ use arrow::datatypes::{Schema, SchemaRef, ToByteSlice};
 use arrow::ipc::convert;
 use arrow::util::pretty;
 use arrow_flight::flight_service_client::FlightServiceClient;
-use arrow_flight::{utils, Action, Criteria, FlightData, FlightDescriptor, Ticket};
+use arrow_flight::{Action, Criteria, FlightData, FlightDescriptor, Ticket, utils};
 use bytes::Bytes;
 use object_store::local::LocalFileSystem;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
-use rustyline::history::FileHistory;
 use rustyline::Editor;
+use rustyline::history::FileHistory;
 use tonic::transport::Channel;
 use tonic::{Request, Streaming};
 
diff --git a/crates/modelardb_common/src/remote.rs b/crates/modelardb_common/src/remote.rs
index 3224513eb..1e0450499 100644
--- a/crates/modelardb_common/src/remote.rs
+++ b/crates/modelardb_common/src/remote.rs
@@ -21,7 +21,7 @@ use std::error::Error;
 use arrow::array::ArrayRef;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use arrow_flight::{utils, FlightData, FlightDescriptor};
+use arrow_flight::{FlightData, FlightDescriptor, utils};
 use tonic::Status;
 
 /// Return the table stored as the first element in [`FlightDescriptor.path`], otherwise a
diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 1783da55c..1a524ff19 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -25,7 +25,7 @@ use modelardb_types::types::{ErrorBound, TimestampArray, ValueArray};
 
 use crate::error::{ModelarDbCompressionError, Result};
 use crate::models::gorilla::Gorilla;
-use crate::models::{self, timestamps, GORILLA_ID};
+use crate::models::{self, GORILLA_ID, timestamps};
 use crate::types::{CompressedSegmentBatchBuilder, CompressedSegmentBuilder, ModelBuilder};
 
 /// Maximum number of residuals that can be stored as part of a compressed segment. The number of
@@ -266,7 +266,7 @@ mod tests {
     use modelardb_types::schemas::COMPRESSED_SCHEMA;
     use modelardb_types::types::{TimestampBuilder, ValueBuilder};
 
-    use crate::{models, MODEL_TYPE_NAMES};
+    use crate::{MODEL_TYPE_NAMES, models};
 
     const TAG_VALUE: &str = "tag";
     const ADD_NOISE_RANGE: Option<Range<f32>> = Some(1.0..1.05);
@@ -531,8 +531,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_random_linear_constant_time_series_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_regular_random_linear_constant_time_series_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             false,
@@ -542,8 +542,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_random_linear_constant_time_series_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_regular_random_linear_constant_time_series_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             false,
@@ -553,8 +553,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_random_linear_constant_time_series_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_random_linear_constant_time_series_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -564,8 +564,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_random_linear_constant_time_series_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_random_linear_constant_time_series_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -575,8 +575,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_constant_linear_random_time_series_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_regular_constant_linear_random_time_series_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             false,
@@ -586,8 +586,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_constant_linear_random_time_series_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_regular_constant_linear_random_time_series_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             false,
@@ -597,8 +597,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_constant_linear_random_time_series_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_constant_linear_random_time_series_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -608,8 +608,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_constant_linear_random_time_series_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_constant_linear_random_time_series_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_known_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -704,8 +704,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_synthetic_time_series_without_noise_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_regular_synthetic_time_series_without_noise_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             false,
@@ -714,8 +714,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_synthetic_time_series_without_noise_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_regular_synthetic_time_series_without_noise_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             false,
@@ -724,8 +724,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_synthetic_time_series_without_noise_within_absolute_error_bound_five(
-    ) {
+    fn test_try_compress_regular_synthetic_time_series_without_noise_within_absolute_error_bound_five()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_FIVE).unwrap(),
             false,
@@ -734,8 +734,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_regular_synthetic_time_series_without_noise_within_relative_error_bound_five(
-    ) {
+    fn test_try_compress_regular_synthetic_time_series_without_noise_within_relative_error_bound_five()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_FIVE).unwrap(),
             false,
@@ -784,8 +784,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -794,8 +794,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -804,8 +804,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_absolute_error_bound_five(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_absolute_error_bound_five()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_FIVE).unwrap(),
             true,
@@ -814,8 +814,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_relative_error_bound_five(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_without_noise_within_relative_error_bound_five()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_FIVE).unwrap(),
             true,
@@ -824,8 +824,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_absolute_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_absolute_error_bound_zero()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -834,8 +834,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_relative_error_bound_zero(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_relative_error_bound_zero()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             true,
@@ -844,8 +844,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_absolute_error_bound_five(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_absolute_error_bound_five()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_absolute(ERROR_BOUND_FIVE).unwrap(),
             true,
@@ -854,8 +854,8 @@ mod tests {
     }
 
     #[test]
-    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_relative_error_bound_five(
-    ) {
+    fn test_try_compress_irregular_synthetic_time_series_with_noise_within_relative_error_bound_five()
+     {
         generate_compress_and_assert_time_series(
             ErrorBound::try_new_relative(ERROR_BOUND_FIVE).unwrap(),
             true,
diff --git a/crates/modelardb_compression/src/models/gorilla.rs b/crates/modelardb_compression/src/models/gorilla.rs
index cadb6614e..587adb917 100644
--- a/crates/modelardb_compression/src/models/gorilla.rs
+++ b/crates/modelardb_compression/src/models/gorilla.rs
@@ -25,8 +25,8 @@
 use modelardb_types::types::{Timestamp, Value, ValueBuilder};
 
 use crate::models;
-use crate::models::bits::{BitReader, BitVecBuilder};
 use crate::models::ErrorBound;
+use crate::models::bits::{BitReader, BitVecBuilder};
 
 /// The state the Gorilla model type needs while compressing the values of a
 /// time series segment.
@@ -519,9 +519,11 @@ mod tests {
         let values_array = value_builder.finish();
 
         assert!(values.len() == timestamps.len() && values.len() == values_array.len());
-        assert!(timestamps
-            .windows(2)
-            .all(|window| window[1] - window[0] == 1));
+        assert!(
+            timestamps
+                .windows(2)
+                .all(|window| window[1] - window[0] == 1)
+        );
         assert!(slice_of_value_equal(values_array.values(), values));
     }
 
diff --git a/crates/modelardb_compression/src/types.rs b/crates/modelardb_compression/src/types.rs
index 34802adf0..74aebb2cf 100644
--- a/crates/modelardb_compression/src/types.rs
+++ b/crates/modelardb_compression/src/types.rs
@@ -19,7 +19,7 @@ use std::debug_assert;
 use std::sync::Arc;
 
 use arrow::array::{
-    ArrayBuilder, ArrayRef, BinaryBuilder, Float32Builder, StringArray, UInt16Array, UInt8Builder,
+    ArrayBuilder, ArrayRef, BinaryBuilder, Float32Builder, StringArray, UInt8Builder, UInt16Array,
 };
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
@@ -30,8 +30,8 @@ use modelardb_types::types::{
 use crate::models::gorilla::Gorilla;
 use crate::models::pmc_mean::PMCMean;
 use crate::models::swing::Swing;
-use crate::models::{timestamps, VALUE_SIZE_IN_BYTES};
 use crate::models::{PMC_MEAN_ID, SWING_ID};
+use crate::models::{VALUE_SIZE_IN_BYTES, timestamps};
 
 /// A model being built from an uncompressed segment using the potentially lossy model types in
 /// [`models`]. Each of the potentially lossy model types is used to fit models to the data points,
diff --git a/crates/modelardb_manager/src/cluster.rs b/crates/modelardb_manager/src/cluster.rs
index b5f80b0b7..c4b9eaaea 100644
--- a/crates/modelardb_manager/src/cluster.rs
+++ b/crates/modelardb_manager/src/cluster.rs
@@ -20,13 +20,13 @@ use std::collections::VecDeque;
 use arrow::record_batch::RecordBatch;
 use arrow_flight::flight_service_client::FlightServiceClient;
 use arrow_flight::{Action, Ticket};
-use futures::stream::FuturesUnordered;
 use futures::StreamExt;
+use futures::stream::FuturesUnordered;
 use log::info;
 use modelardb_types::schemas::TABLE_METADATA_SCHEMA;
 use modelardb_types::types::ServerMode;
-use tonic::metadata::{Ascii, MetadataValue};
 use tonic::Request;
+use tonic::metadata::{Ascii, MetadataValue};
 
 use crate::error::{ModelarDbManagerError, Result};
 
@@ -288,10 +288,12 @@ mod test {
     #[tokio::test]
     async fn test_remove_node_invalid_url() {
         let mut cluster = Cluster::new();
-        assert!(cluster
-            .remove_node("invalid_url", &Uuid::new_v4().to_string().parse().unwrap())
-            .await
-            .is_err());
+        assert!(
+            cluster
+                .remove_node("invalid_url", &Uuid::new_v4().to_string().parse().unwrap())
+                .await
+                .is_err()
+        );
     }
 
     #[test]
diff --git a/crates/modelardb_manager/src/error.rs b/crates/modelardb_manager/src/error.rs
index 48036d029..16990450f 100644
--- a/crates/modelardb_manager/src/error.rs
+++ b/crates/modelardb_manager/src/error.rs
@@ -23,8 +23,8 @@ use std::result::Result as StdResult;
 use deltalake::errors::DeltaTableError;
 use modelardb_common::error::ModelarDbCommonError;
 use modelardb_storage::error::ModelarDbStorageError;
-use tonic::transport::Error as TonicTransportError;
 use tonic::Status as TonicStatusError;
+use tonic::transport::Error as TonicTransportError;
 
 /// Result type used throughout `modelardb_manager`.
 pub type Result<T> = StdResult<T, ModelarDbManagerError>;
diff --git a/crates/modelardb_manager/src/metadata.rs b/crates/modelardb_manager/src/metadata.rs
index f45a0ed28..a3529fb9c 100644
--- a/crates/modelardb_manager/src/metadata.rs
+++ b/crates/modelardb_manager/src/metadata.rs
@@ -21,9 +21,9 @@ use std::sync::Arc;
 
 use arrow::array::{Array, StringArray};
 use arrow::datatypes::{DataType, Field, Schema};
+use deltalake::DeltaTableError;
 use deltalake::datafusion::logical_expr::{col, lit};
 use deltalake::datafusion::prelude::SessionContext;
-use deltalake::DeltaTableError;
 use modelardb_storage::delta_lake::DeltaLake;
 use modelardb_storage::metadata::table_metadata_manager::TableMetadataManager;
 use modelardb_storage::{register_metadata_table, sql_and_concat};
@@ -205,17 +205,21 @@ mod tests {
         let (_temp_dir, metadata_manager) = create_metadata_manager().await;
 
         // Verify that the tables were created, registered, and has the expected columns.
-        assert!(metadata_manager
-            .session_context
-            .sql("SELECT key FROM manager_metadata")
-            .await
-            .is_ok());
+        assert!(
+            metadata_manager
+                .session_context
+                .sql("SELECT key FROM manager_metadata")
+                .await
+                .is_ok()
+        );
 
-        assert!(metadata_manager
-            .session_context
-            .sql("SELECT url, mode FROM nodes")
-            .await
-            .is_ok());
+        assert!(
+            metadata_manager
+                .session_context
+                .sql("SELECT url, mode FROM nodes")
+                .await
+                .is_ok()
+        );
     }
 
     #[tokio::test]
diff --git a/crates/modelardb_manager/src/remote.rs b/crates/modelardb_manager/src/remote.rs
index 10c55c415..fd0d5b32e 100644
--- a/crates/modelardb_manager/src/remote.rs
+++ b/crates/modelardb_manager/src/remote.rs
@@ -33,7 +33,7 @@ use arrow_flight::{
     HandshakeRequest, HandshakeResponse, PollInfo, PutResult, Result as FlightResult, SchemaAsIpc,
     SchemaResult, Ticket,
 };
-use futures::{stream, Stream};
+use futures::{Stream, stream};
 use modelardb_common::arguments;
 use modelardb_common::remote;
 use modelardb_common::remote::{error_to_status_internal, error_to_status_invalid_argument};
@@ -47,9 +47,9 @@ use tonic::transport::Server;
 use tonic::{Request, Response, Status, Streaming};
 use tracing::info;
 
+use crate::Context;
 use crate::cluster::Node;
 use crate::error::{ModelarDbManagerError, Result};
-use crate::Context;
 
 /// Start an Apache Arrow Flight server on 0.0.0.0:`port`.
 pub fn start_apache_arrow_flight_server(
diff --git a/crates/modelardb_server/src/configuration.rs b/crates/modelardb_server/src/configuration.rs
index 402f12767..39a6de00b 100644
--- a/crates/modelardb_server/src/configuration.rs
+++ b/crates/modelardb_server/src/configuration.rs
@@ -21,9 +21,9 @@ use std::sync::Arc;
 
 use tokio::sync::RwLock;
 
+use crate::ClusterMode;
 use crate::error::Result;
 use crate::storage::StorageEngine;
-use crate::ClusterMode;
 
 /// Manages the system's configuration and provides functionality for updating the configuration.
 #[derive(Clone)]
diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index 857e438ac..59c2f7b0d 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -29,8 +29,8 @@ use tracing::info;
 
 use crate::configuration::ConfigurationManager;
 use crate::error::{ModelarDbServerError, Result};
-use crate::storage::data_sinks::{ModelTableDataSink, NormalTableDataSink};
 use crate::storage::StorageEngine;
+use crate::storage::data_sinks::{ModelTableDataSink, NormalTableDataSink};
 use crate::{ClusterMode, DataFolders};
 
 /// Provides access to the system's configuration and components.
@@ -445,15 +445,19 @@ mod tests {
             .unwrap();
 
         // Both a normal table and a model table should be created.
-        assert!(context
-            .check_if_table_exists(test::NORMAL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::NORMAL_TABLE_NAME)
+                .await
+                .is_err()
+        );
 
-        assert!(context
-            .check_if_table_exists(test::MODEL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -476,19 +480,23 @@ mod tests {
         assert!(folder_path.exists());
 
         // The normal table should be saved to the metadata Delta Lake.
-        assert!(context
-            .data_folders
-            .local_data_folder
-            .table_metadata_manager
-            .is_normal_table(test::NORMAL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            context
+                .data_folders
+                .local_data_folder
+                .table_metadata_manager
+                .is_normal_table(test::NORMAL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
 
         // The normal table should be registered in the Apache DataFusion catalog.
-        assert!(context
-            .check_if_table_exists(test::NORMAL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::NORMAL_TABLE_NAME)
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -496,15 +504,19 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let context = create_context(&temp_dir).await;
 
-        assert!(context
-            .create_normal_table(test::NORMAL_TABLE_NAME, &test::normal_table_schema())
-            .await
-            .is_ok());
+        assert!(
+            context
+                .create_normal_table(test::NORMAL_TABLE_NAME, &test::normal_table_schema())
+                .await
+                .is_ok()
+        );
 
-        assert!(context
-            .create_normal_table(test::NORMAL_TABLE_NAME, &test::normal_table_schema())
-            .await
-            .is_err());
+        assert!(
+            context
+                .create_normal_table(test::NORMAL_TABLE_NAME, &test::normal_table_schema())
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -532,10 +544,12 @@ mod tests {
         );
 
         // The model table should be registered in the Apache DataFusion catalog.
-        assert!(context
-            .check_if_table_exists(test::MODEL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -543,15 +557,19 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let context = create_context(&temp_dir).await;
 
-        assert!(context
-            .create_model_table(&test::model_table_metadata())
-            .await
-            .is_ok());
+        assert!(
+            context
+                .create_model_table(&test::model_table_metadata())
+                .await
+                .is_ok()
+        );
 
-        assert!(context
-            .create_model_table(&test::model_table_metadata())
-            .await
-            .is_err());
+        assert!(
+            context
+                .create_model_table(&test::model_table_metadata())
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -604,27 +622,33 @@ mod tests {
             .await
             .unwrap();
 
-        assert!(context
-            .check_if_table_exists(test::NORMAL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::NORMAL_TABLE_NAME)
+                .await
+                .is_err()
+        );
 
         context.drop_table(test::NORMAL_TABLE_NAME).await.unwrap();
 
         // The normal table should be deregistered from the Apache DataFusion session context.
-        assert!(context
-            .check_if_table_exists(test::NORMAL_TABLE_NAME)
-            .await
-            .is_ok());
+        assert!(
+            context
+                .check_if_table_exists(test::NORMAL_TABLE_NAME)
+                .await
+                .is_ok()
+        );
 
         // The normal table should be deleted from the metadata Delta Lake.
-        assert!(!context
-            .data_folders
-            .local_data_folder
-            .table_metadata_manager
-            .is_normal_table(test::NORMAL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            !context
+                .data_folders
+                .local_data_folder
+                .table_metadata_manager
+                .is_normal_table(test::NORMAL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
 
         // The normal table should be deleted from the Delta Lake.
         assert!(!temp_dir.path().join("tables").exists());
@@ -640,27 +664,33 @@ mod tests {
             .await
             .unwrap();
 
-        assert!(context
-            .check_if_table_exists(test::MODEL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        );
 
         context.drop_table(test::MODEL_TABLE_NAME).await.unwrap();
 
         // The model table should be deregistered from the Apache DataFusion session context.
-        assert!(context
-            .check_if_table_exists(test::MODEL_TABLE_NAME)
-            .await
-            .is_ok());
+        assert!(
+            context
+                .check_if_table_exists(test::MODEL_TABLE_NAME)
+                .await
+                .is_ok()
+        );
 
         // The model table should be deleted from the metadata Delta Lake.
-        assert!(!context
-            .data_folders
-            .local_data_folder
-            .table_metadata_manager
-            .is_model_table(test::MODEL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            !context
+                .data_folders
+                .local_data_folder
+                .table_metadata_manager
+                .is_model_table(test::MODEL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
 
         // The model table should be deleted from the Delta Lake.
         assert!(!temp_dir.path().join("tables").exists());
@@ -710,11 +740,13 @@ mod tests {
             .unwrap();
 
         // The normal table should not be deleted from the metadata Delta Lake.
-        assert!(local_data_folder
-            .table_metadata_manager
-            .is_normal_table(test::NORMAL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            local_data_folder
+                .table_metadata_manager
+                .is_normal_table(test::NORMAL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
 
         // The normal table data should be deleted from the Delta Lake.
         delta_table.load().await.unwrap();
@@ -755,11 +787,13 @@ mod tests {
             .unwrap();
 
         // The model table should not be deleted from the metadata Delta Lake.
-        assert!(local_data_folder
-            .table_metadata_manager
-            .is_model_table(test::MODEL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            local_data_folder
+                .table_metadata_manager
+                .is_model_table(test::MODEL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
 
         // The model table data should be deleted from the Delta Lake.
         delta_table.load().await.unwrap();
@@ -771,10 +805,12 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let context = create_context(&temp_dir).await;
 
-        assert!(context
-            .truncate_table(test::MODEL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .truncate_table(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -806,11 +842,13 @@ mod tests {
             .await
             .unwrap();
 
-        assert!(context
-            .model_table_metadata_from_default_database_schema(test::NORMAL_TABLE_NAME)
-            .await
-            .unwrap()
-            .is_none());
+        assert!(
+            context
+                .model_table_metadata_from_default_database_schema(test::NORMAL_TABLE_NAME)
+                .await
+                .unwrap()
+                .is_none()
+        );
     }
 
     #[tokio::test]
@@ -818,10 +856,12 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let context = create_context(&temp_dir).await;
 
-        assert!(context
-            .model_table_metadata_from_default_database_schema(test::MODEL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .model_table_metadata_from_default_database_schema(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -834,10 +874,12 @@ mod tests {
             .await
             .unwrap();
 
-        assert!(context
-            .check_if_table_exists(test::MODEL_TABLE_NAME)
-            .await
-            .is_err());
+        assert!(
+            context
+                .check_if_table_exists(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
@@ -845,10 +887,12 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let context = create_context(&temp_dir).await;
 
-        assert!(context
-            .check_if_table_exists(test::MODEL_TABLE_NAME)
-            .await
-            .is_ok());
+        assert!(
+            context
+                .check_if_table_exists(test::MODEL_TABLE_NAME)
+                .await
+                .is_ok()
+        );
     }
 
     #[tokio::test]
@@ -874,10 +918,12 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let context = create_context(&temp_dir).await;
 
-        assert!(context
-            .schema_of_table_in_default_database_schema(test::MODEL_TABLE_NAME)
-            .await
-            .is_err())
+        assert!(
+            context
+                .schema_of_table_in_default_database_schema(test::MODEL_TABLE_NAME)
+                .await
+                .is_err()
+        )
     }
 
     /// Create a simple [`Context`] that uses `temp_dir` as the local data folder and query data folder.
diff --git a/crates/modelardb_server/src/data_folders.rs b/crates/modelardb_server/src/data_folders.rs
index e59fbbeae..0b22499a7 100644
--- a/crates/modelardb_server/src/data_folders.rs
+++ b/crates/modelardb_server/src/data_folders.rs
@@ -22,10 +22,10 @@ use modelardb_storage::delta_lake::DeltaLake;
 use modelardb_storage::metadata::table_metadata_manager::TableMetadataManager;
 use modelardb_types::types::ServerMode;
 
-use crate::error::ModelarDbServerError;
-use crate::manager::Manager;
 use crate::ClusterMode;
 use crate::Result;
+use crate::error::ModelarDbServerError;
+use crate::manager::Manager;
 
 /// Folder for storing metadata and data in Apache Parquet files.
 #[derive(Clone)]
@@ -163,9 +163,11 @@ mod tests {
     // Tests for try_from_command_line_arguments().
     #[tokio::test]
     async fn test_try_from_empty_command_line_arguments() {
-        assert!(DataFolders::try_from_command_line_arguments(&[])
-            .await
-            .is_err());
+        assert!(
+            DataFolders::try_from_command_line_arguments(&[])
+                .await
+                .is_err()
+        );
     }
 
     #[tokio::test]
diff --git a/crates/modelardb_server/src/error.rs b/crates/modelardb_server/src/error.rs
index 69e82fdbb..b0acb0f5f 100644
--- a/crates/modelardb_server/src/error.rs
+++ b/crates/modelardb_server/src/error.rs
@@ -28,8 +28,8 @@ use deltalake::errors::DeltaTableError;
 use modelardb_common::error::ModelarDbCommonError;
 use modelardb_storage::error::ModelarDbStorageError;
 use object_store::Error as ObjectStoreError;
-use tonic::transport::Error as TonicTransportError;
 use tonic::Status as TonicStatusError;
+use tonic::transport::Error as TonicTransportError;
 
 /// Result type used throughout `modelardb_server`.
 pub type Result<T> = StdResult<T, ModelarDbServerError>;
diff --git a/crates/modelardb_server/src/manager.rs b/crates/modelardb_server/src/manager.rs
index c3baeb652..df5b4abe1 100644
--- a/crates/modelardb_server/src/manager.rs
+++ b/crates/modelardb_server/src/manager.rs
@@ -24,13 +24,13 @@ use arrow_flight::{Action, Result as FlightResult};
 use modelardb_common::arguments;
 use modelardb_types::types::ServerMode;
 use tokio::sync::RwLock;
+use tonic::Request;
 use tonic::metadata::MetadataMap;
 use tonic::transport::Channel;
-use tonic::Request;
 
+use crate::PORT;
 use crate::context::Context;
 use crate::error::{ModelarDbServerError, Result};
-use crate::PORT;
 
 /// Manages metadata related to the manager and provides functionality for interacting with the manager.
 #[derive(Clone, Debug)]
diff --git a/crates/modelardb_server/src/remote.rs b/crates/modelardb_server/src/remote.rs
index 51d61ab0c..33de90ad8 100644
--- a/crates/modelardb_server/src/remote.rs
+++ b/crates/modelardb_server/src/remote.rs
@@ -27,9 +27,9 @@ use std::sync::Arc;
 use arrow_flight::flight_service_client::FlightServiceClient;
 use arrow_flight::flight_service_server::{FlightService, FlightServiceServer};
 use arrow_flight::{
-    utils, Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
+    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
     HandshakeRequest, HandshakeResponse, PollInfo, PutResult, Result as FlightResult, SchemaAsIpc,
-    SchemaResult, Ticket,
+    SchemaResult, Ticket, utils,
 };
 use datafusion::arrow::array::{ArrayRef, StringArray, UInt64Array};
 use datafusion::arrow::datatypes::SchemaRef;
@@ -40,8 +40,8 @@ use datafusion::execution::RecordBatchStream;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::{EmptyRecordBatchStream, SendableRecordBatchStream};
 use deltalake::arrow::datatypes::Schema;
-use futures::stream::{self, BoxStream, SelectAll};
 use futures::StreamExt;
+use futures::stream::{self, BoxStream, SelectAll};
 use modelardb_common::remote::{error_to_status_internal, error_to_status_invalid_argument};
 use modelardb_common::{arguments, remote};
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
@@ -57,9 +57,9 @@ use tonic::transport::Server;
 use tonic::{Request, Response, Status, Streaming};
 use tracing::{debug, error, info};
 
+use crate::ClusterMode;
 use crate::context::Context;
 use crate::error::{ModelarDbServerError, Result};
-use crate::ClusterMode;
 
 /// Start an Apache Arrow Flight server on 0.0.0.0:`port` that passes `context` to the methods that
 /// process the requests through [`FlightServiceHandler`].
diff --git a/crates/modelardb_server/src/storage/data_sinks.rs b/crates/modelardb_server/src/storage/data_sinks.rs
index b0723eeff..57a98d601 100644
--- a/crates/modelardb_server/src/storage/data_sinks.rs
+++ b/crates/modelardb_server/src/storage/data_sinks.rs
@@ -21,8 +21,8 @@ use std::fmt::{Debug, Formatter, Result as FmtResult};
 use std::sync::Arc;
 
 use async_trait::async_trait;
-use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::arrow::datatypes::{Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::TaskContext;
 use datafusion::physical_plan::insert::DataSink;
diff --git a/crates/modelardb_server/src/storage/data_transfer.rs b/crates/modelardb_server/src/storage/data_transfer.rs
index 8b64b2cd3..3db5ebf19 100644
--- a/crates/modelardb_server/src/storage/data_transfer.rs
+++ b/crates/modelardb_server/src/storage/data_transfer.rs
@@ -321,10 +321,12 @@ mod tests {
         let (_normal_table_files_size, model_table_files_size) =
             write_batches_to_tables(&local_data_folder, 1).await;
 
-        assert!(data_transfer
-            .increase_table_size(test::MODEL_TABLE_NAME, model_table_files_size)
-            .await
-            .is_ok());
+        assert!(
+            data_transfer
+                .increase_table_size(test::MODEL_TABLE_NAME, model_table_files_size)
+                .await
+                .is_ok()
+        );
 
         assert_eq!(
             *data_transfer
@@ -370,9 +372,11 @@ mod tests {
             create_data_transfer_component(local_data_folder.clone()).await;
 
         data_transfer.mark_table_as_dropped(test::MODEL_TABLE_NAME);
-        assert!(data_transfer
-            .dropped_tables
-            .contains(test::MODEL_TABLE_NAME));
+        assert!(
+            data_transfer
+                .dropped_tables
+                .contains(test::MODEL_TABLE_NAME)
+        );
     }
 
     #[tokio::test]
@@ -398,9 +402,11 @@ mod tests {
 
         // The table should be removed from the in-memory tracking of compressed files and removed
         // from the dropped tables.
-        assert!(!data_transfer
-            .table_size_in_bytes
-            .contains_key(test::MODEL_TABLE_NAME));
+        assert!(
+            !data_transfer
+                .table_size_in_bytes
+                .contains_key(test::MODEL_TABLE_NAME)
+        );
 
         assert!(data_transfer.dropped_tables.is_empty());
     }
diff --git a/crates/modelardb_server/src/storage/types.rs b/crates/modelardb_server/src/storage/types.rs
index 329cfedfc..04ea7d177 100644
--- a/crates/modelardb_server/src/storage/types.rs
+++ b/crates/modelardb_server/src/storage/types.rs
@@ -492,8 +492,10 @@ mod tests {
             test::COMPRESSED_RESERVED_MEMORY_IN_BYTES as isize
         );
 
-        assert!(!memory_pool
-            .try_reserve_compressed_memory(2 * test::COMPRESSED_RESERVED_MEMORY_IN_BYTES));
+        assert!(
+            !memory_pool
+                .try_reserve_compressed_memory(2 * test::COMPRESSED_RESERVED_MEMORY_IN_BYTES)
+        );
 
         assert_eq!(
             memory_pool.remaining_compressed_memory_in_bytes(),
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 5da1a6c63..4ad3f7e18 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -27,8 +27,8 @@ use datafusion::arrow::compute;
 use datafusion::arrow::record_batch::RecordBatch;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
 use modelardb_types::types::{Timestamp, TimestampArray, TimestampBuilder, Value, ValueBuilder};
-use object_store::path::Path;
 use object_store::ObjectStore;
+use object_store::path::Path;
 use tracing::debug;
 
 use crate::error::Result;
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index d451bc497..31f67a8ac 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -19,8 +19,8 @@
 use std::hash::{DefaultHasher, Hasher};
 use std::io::{Error as IOError, ErrorKind as IOErrorKind};
 use std::mem;
-use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
 
 use dashmap::DashMap;
 use futures::StreamExt;
@@ -33,6 +33,7 @@ use tracing::{debug, error, warn};
 use crate::context::Context;
 use crate::data_folders::DataFolder;
 use crate::error::Result;
+use crate::storage::UNCOMPRESSED_DATA_FOLDER;
 use crate::storage::compressed_data_buffer::CompressedSegmentBatch;
 use crate::storage::types::Channels;
 use crate::storage::types::MemoryPool;
@@ -41,7 +42,6 @@ use crate::storage::uncompressed_data_buffer::{
     self, IngestedDataBuffer, UncompressedDataBuffer, UncompressedInMemoryDataBuffer,
     UncompressedOnDiskDataBuffer,
 };
-use crate::storage::UNCOMPRESSED_DATA_FOLDER;
 
 /// Stores uncompressed data points temporarily in an in-memory buffer that spills to Apache Parquet
 /// files. When an uncompressed data buffer is finished the data is made available for compression.
@@ -663,7 +663,7 @@ mod tests {
     use modelardb_types::types::{TimestampBuilder, ValueBuilder};
     use object_store::local::LocalFileSystem;
     use tempfile::TempDir;
-    use tokio::time::{sleep, Duration};
+    use tokio::time::{Duration, sleep};
 
     use crate::storage::UNCOMPRESSED_DATA_BUFFER_CAPACITY;
     use crate::{ClusterMode, DataFolders};
@@ -806,9 +806,11 @@ mod tests {
 
         insert_data_points(1, &mut data_manager, &model_table_metadata, TAG_VALUE).await;
 
-        assert!(data_manager
-            .uncompressed_in_memory_data_buffers
-            .contains_key(&TAG_HASH));
+        assert!(
+            data_manager
+                .uncompressed_in_memory_data_buffers
+                .contains_key(&TAG_HASH)
+        );
         assert_eq!(
             data_manager
                 .uncompressed_in_memory_data_buffers
@@ -832,9 +834,11 @@ mod tests {
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 0);
 
-        assert!(data_manager
-            .uncompressed_in_memory_data_buffers
-            .contains_key(&TAG_HASH));
+        assert!(
+            data_manager
+                .uncompressed_in_memory_data_buffers
+                .contains_key(&TAG_HASH)
+        );
         assert_eq!(
             data_manager
                 .uncompressed_in_memory_data_buffers
@@ -862,9 +866,11 @@ mod tests {
         assert_eq!(data_manager.uncompressed_in_memory_data_buffers.len(), 1);
         assert_eq!(data_manager.uncompressed_on_disk_data_buffers.len(), 0);
 
-        assert!(data_manager
-            .uncompressed_in_memory_data_buffers
-            .contains_key(&TAG_HASH));
+        assert!(
+            data_manager
+                .uncompressed_in_memory_data_buffers
+                .contains_key(&TAG_HASH)
+        );
         assert_eq!(
             data_manager
                 .uncompressed_in_memory_data_buffers
@@ -950,11 +956,13 @@ mod tests {
         )
         .await;
 
-        assert!(data_manager
-            .channels
-            .uncompressed_data_receiver
-            .try_recv()
-            .is_ok());
+        assert!(
+            data_manager
+                .channels
+                .uncompressed_data_receiver
+                .try_recv()
+                .is_ok()
+        );
     }
 
     #[tokio::test]
@@ -970,17 +978,21 @@ mod tests {
         )
         .await;
 
-        assert!(data_manager
-            .channels
-            .uncompressed_data_receiver
-            .try_recv()
-            .is_ok());
+        assert!(
+            data_manager
+                .channels
+                .uncompressed_data_receiver
+                .try_recv()
+                .is_ok()
+        );
 
-        assert!(data_manager
-            .channels
-            .uncompressed_data_receiver
-            .try_recv()
-            .is_ok());
+        assert!(
+            data_manager
+                .channels
+                .uncompressed_data_receiver
+                .try_recv()
+                .is_ok()
+        );
     }
 
     #[tokio::test]
@@ -988,11 +1000,13 @@ mod tests {
         let temp_dir = tempfile::tempdir().unwrap();
         let (data_manager, _model_table_metadata) = create_managers(&temp_dir).await;
 
-        assert!(data_manager
-            .channels
-            .uncompressed_data_receiver
-            .try_recv()
-            .is_err());
+        assert!(
+            data_manager
+                .channels
+                .uncompressed_data_receiver
+                .try_recv()
+                .is_err()
+        );
     }
 
     #[tokio::test]
diff --git a/crates/modelardb_server/tests/integration_test.rs b/crates/modelardb_server/tests/integration_test.rs
index 10504e064..e123651cb 100644
--- a/crates/modelardb_server/tests/integration_test.rs
+++ b/crates/modelardb_server/tests/integration_test.rs
@@ -23,13 +23,13 @@ use std::ops::Range;
 use std::process::{Child, Command, Stdio};
 use std::str;
 use std::string::String;
-use std::sync::atomic::{AtomicU16, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU16, Ordering};
 use std::thread;
 use std::time::Duration;
 
 use arrow_flight::flight_service_client::FlightServiceClient;
-use arrow_flight::{utils, Action, Criteria, FlightData, FlightDescriptor, PutResult, Ticket};
+use arrow_flight::{Action, Criteria, FlightData, FlightDescriptor, PutResult, Ticket, utils};
 use bytes::{Buf, Bytes};
 use datafusion::arrow::array::{Array, Float64Array, StringArray, UInt64Array};
 use datafusion::arrow::compute;
@@ -38,7 +38,7 @@ use datafusion::arrow::ipc::convert;
 use datafusion::arrow::ipc::reader::StreamReader;
 use datafusion::arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
 use datafusion::arrow::record_batch::RecordBatch;
-use futures::{stream, StreamExt};
+use futures::{StreamExt, stream};
 use modelardb_common::test::data_generation;
 use modelardb_types::types::ErrorBound;
 use sysinfo::{Pid, ProcessesToUpdate, System};
@@ -452,7 +452,7 @@ impl TestContext {
             let schema_result = self
                 .client
                 .get_schema(Request::new(FlightDescriptor::new_path(vec![
-                    table_name.to_owned()
+                    table_name.to_owned(),
                 ])))
                 .await
                 .unwrap()
@@ -949,9 +949,11 @@ fn test_cannot_ingest_invalid_time_series() {
 
     test_context.create_table(TABLE_NAME, TableType::ModelTable);
 
-    assert!(test_context
-        .send_time_series_to_server(flight_data)
-        .is_err());
+    assert!(
+        test_context
+            .send_time_series_to_server(flight_data)
+            .is_err()
+    );
 
     test_context.flush_data_to_disk();
 
diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index cebb04e0d..2b810176a 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -32,15 +32,15 @@ use deltalake::{DeltaOps, DeltaTable, DeltaTableError};
 use futures::{StreamExt, TryStreamExt};
 use modelardb_common::arguments;
 use modelardb_types::schemas::{COMPRESSED_SCHEMA, FIELD_COLUMN};
+use object_store::ObjectStore;
 use object_store::aws::AmazonS3Builder;
 use object_store::local::LocalFileSystem;
 use object_store::path::Path;
-use object_store::ObjectStore;
 use url::Url;
 
 use crate::error::{ModelarDbStorageError, Result};
 use crate::metadata::model_table_metadata::ModelTableMetadata;
-use crate::{apache_parquet_writer_properties, METADATA_FOLDER, TABLE_FOLDER};
+use crate::{METADATA_FOLDER, TABLE_FOLDER, apache_parquet_writer_properties};
 
 /// Functionality for managing Delta Lake tables in a local folder or an object store.
 pub struct DeltaLake {
diff --git a/crates/modelardb_storage/src/error.rs b/crates/modelardb_storage/src/error.rs
index 5e6c6a781..24668da90 100644
--- a/crates/modelardb_storage/src/error.rs
+++ b/crates/modelardb_storage/src/error.rs
@@ -26,8 +26,8 @@ use datafusion::parquet::errors::ParquetError;
 use deltalake::errors::DeltaTableError;
 use modelardb_common::error::ModelarDbCommonError;
 use modelardb_types::error::ModelarDbTypesError;
-use object_store::path::Error as ObjectStorePathError;
 use object_store::Error as ObjectStoreError;
+use object_store::path::Error as ObjectStorePathError;
 use sqlparser::parser::ParserError;
 
 /// Result type used throughout `modelardb_storage`.
diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs
index 8cc17d118..be4fb70ee 100644
--- a/crates/modelardb_storage/src/lib.rs
+++ b/crates/modelardb_storage/src/lib.rs
@@ -40,8 +40,8 @@ use arrow_flight::{IpcMessage, SchemaAsIpc};
 use bytes::{Buf, Bytes};
 use datafusion::catalog::TableProvider;
 use datafusion::common::{DFSchema, ToDFSchema};
-use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::execution::SendableRecordBatchStream;
+use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::parquet::arrow::async_reader::{
     AsyncFileReader, ParquetObjectReader, ParquetRecordBatchStream,
 };
@@ -57,8 +57,8 @@ use deltalake::DeltaTable;
 use futures::StreamExt;
 use modelardb_types::schemas::TABLE_METADATA_SCHEMA;
 use modelardb_types::types::ErrorBound;
-use object_store::path::Path;
 use object_store::ObjectStore;
+use object_store::path::Path;
 use sqlparser::ast::Statement;
 
 use crate::error::{ModelarDbStorageError, Result};
diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index 556c33748..8155c32f7 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -414,8 +414,8 @@ mod test {
         assert!(result.is_err());
     }
 
-    fn model_table_schema_error_bounds_and_generated_columns(
-    ) -> (Arc<Schema>, Vec<ErrorBound>, Vec<Option<GeneratedColumn>>) {
+    fn model_table_schema_error_bounds_and_generated_columns()
+    -> (Arc<Schema>, Vec<ErrorBound>, Vec<Option<GeneratedColumn>>) {
         (
             Arc::new(Schema::new(vec![
                 Field::new("location", DataType::Utf8, false),
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index ec48af975..83064150a 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -24,7 +24,7 @@ use arrow::array::{Array, BinaryArray, BooleanArray, Float32Array, Int16Array, S
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::common::{DFSchema, ToDFSchema};
 use datafusion::logical_expr::lit;
-use datafusion::prelude::{col, SessionContext};
+use datafusion::prelude::{SessionContext, col};
 use modelardb_common::test::ERROR_BOUND_ZERO;
 use modelardb_types::types::ErrorBound;
 
@@ -572,17 +572,21 @@ mod tests {
             .unwrap();
 
         // Verify that the tables were created, registered, and has the expected columns.
-        assert!(metadata_manager
-            .session_context
-            .sql("SELECT table_name FROM normal_table_metadata")
-            .await
-            .is_ok());
+        assert!(
+            metadata_manager
+                .session_context
+                .sql("SELECT table_name FROM normal_table_metadata")
+                .await
+                .is_ok()
+        );
 
-        assert!(metadata_manager
-            .session_context
-            .sql("SELECT table_name, query_schema FROM model_table_metadata")
-            .await
-            .is_ok());
+        assert!(
+            metadata_manager
+                .session_context
+                .sql("SELECT table_name, query_schema FROM model_table_metadata")
+                .await
+                .is_ok()
+        );
 
         assert!(metadata_manager
             .session_context
@@ -595,37 +599,45 @@ mod tests {
     #[tokio::test]
     async fn test_normal_table_is_normal_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await;
-        assert!(metadata_manager
-            .is_normal_table("normal_table_1")
-            .await
-            .unwrap());
+        assert!(
+            metadata_manager
+                .is_normal_table("normal_table_1")
+                .await
+                .unwrap()
+        );
     }
 
     #[tokio::test]
     async fn test_model_table_is_not_normal_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-        assert!(!metadata_manager
-            .is_normal_table(test::MODEL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            !metadata_manager
+                .is_normal_table(test::MODEL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
     }
 
     #[tokio::test]
     async fn test_model_table_is_model_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_model_table().await;
-        assert!(metadata_manager
-            .is_model_table(test::MODEL_TABLE_NAME)
-            .await
-            .unwrap());
+        assert!(
+            metadata_manager
+                .is_model_table(test::MODEL_TABLE_NAME)
+                .await
+                .unwrap()
+        );
     }
 
     #[tokio::test]
     async fn test_normal_table_is_not_model_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await;
-        assert!(!metadata_manager
-            .is_model_table("normal_table_1")
-            .await
-            .unwrap());
+        assert!(
+            !metadata_manager
+                .is_model_table("normal_table_1")
+                .await
+                .unwrap()
+        );
     }
 
     #[tokio::test]
@@ -693,10 +705,9 @@ mod tests {
         );
         assert_eq!(
             **batch.column(1),
-            BinaryArray::from_vec(vec![&try_convert_schema_to_bytes(
-                &test::model_table_metadata().query_schema
-            )
-            .unwrap()])
+            BinaryArray::from_vec(vec![
+                &try_convert_schema_to_bytes(&test::model_table_metadata().query_schema).unwrap()
+            ])
         );
 
         // Check that a row has been added to the model_table_field_columns table for each field column.
@@ -771,10 +782,12 @@ mod tests {
     async fn test_drop_table_metadata_for_missing_table() {
         let (_temp_dir, metadata_manager) = create_metadata_manager_and_save_normal_tables().await;
 
-        assert!(metadata_manager
-            .drop_table_metadata("missing_table")
-            .await
-            .is_err());
+        assert!(
+            metadata_manager
+                .drop_table_metadata("missing_table")
+                .await
+                .is_err()
+        );
     }
 
     async fn create_metadata_manager_and_save_normal_tables() -> (TempDir, TableMetadataManager) {
diff --git a/crates/modelardb_storage/src/parser.rs b/crates/modelardb_storage/src/parser.rs
index 8d8eb8d8d..2731c2812 100644
--- a/crates/modelardb_storage/src/parser.rs
+++ b/crates/modelardb_storage/src/parser.rs
@@ -31,8 +31,8 @@ use datafusion::execution::context::ExecutionProps;
 use datafusion::functions;
 use datafusion::logical_expr::{AggregateUDF, Expr as DFExpr, ScalarUDF, TableSource, WindowUDF};
 use datafusion::physical_expr::planner;
-use datafusion::sql::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion::sql::TableReference;
+use datafusion::sql::planner::{ContextProvider, PlannerContext, SqlToRel};
 use modelardb_types::functions::normalize_name; // Fully imported to not conflict.
 use modelardb_types::types::{ArrowTimestamp, ArrowValue, ErrorBound};
 use sqlparser::ast::{
@@ -42,7 +42,7 @@ use sqlparser::ast::{
     TruncateTableTarget, Value,
 };
 use sqlparser::dialect::{Dialect, GenericDialect};
-use sqlparser::keywords::{Keyword, ALL_KEYWORDS};
+use sqlparser::keywords::{ALL_KEYWORDS, Keyword};
 use sqlparser::parser::{Parser, ParserError};
 use sqlparser::tokenizer::{Span, Token};
 
@@ -268,7 +268,7 @@ impl ModelarDbDialect {
                 column_type => {
                     return Err(ParserError::ParserError(format!(
                         "Expected TIMESTAMP, FIELD, or TAG, found: {column_type}."
-                    )))
+                    )));
                 }
             };
 
@@ -870,7 +870,7 @@ fn column_defs_to_model_table_query_schema(
                                     "{option} is not supported in model tables."
                                 )),
                                 None,
-                            ))
+                            ));
                         }
                     }
                 }
@@ -884,7 +884,7 @@ fn column_defs_to_model_table_query_schema(
                         "{data_type} is not supported in model tables."
                     )),
                     None,
-                ))
+                ));
             }
         };
 
@@ -1161,60 +1161,74 @@ mod tests {
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_create() {
-        assert!(tokenize_and_parse_sql_statement(
-            "MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_create_model_space() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATEMODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATEMODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_model() {
         // Tracks if sqlparser at some point can parse fields/tags in a TABLE.
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE TABLE table_name(timestamp TIMESTAMP, field FIELD, field_one FIELD(10.5),
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE TABLE table_name(timestamp TIMESTAMP, field FIELD, field_one FIELD(10.5),
                                      field_two FIELD(1%), tag TAG)",
-        )
-        .is_err());
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_model_table_space() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODELTABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODELTABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_table_name() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE(timestamp TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE(timestamp TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_table_table_name_space() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLEtable_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLEtable_name(timestamp TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_start_parentheses() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name timestamp TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name timestamp TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
@@ -1227,59 +1241,73 @@ mod tests {
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_with_sql_types() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field REAL, tag VARCHAR)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field REAL, tag VARCHAR)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_column_name() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(TIMESTAMP, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(TIMESTAMP, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_with_generated_timestamps() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP AS (37), field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP AS (37), field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_with_generated_tags() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG AS (37))",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG AS (37))",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_with_generated_fields_without_parentheses() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD AS 37, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD AS 37, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_with_generated_fields_without_start_parentheses()
     {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD AS 37), tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD AS 37), tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_with_generated_fields_without_end_parentheses() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD AS (37, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD AS (37, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
@@ -1302,26 +1330,32 @@ mod tests {
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_column_type() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp, field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp, field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_comma() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP field FIELD, tag TAG)",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP field FIELD, tag TAG)",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_create_model_table_without_end_parentheses() {
-        assert!(tokenize_and_parse_sql_statement(
-            "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "CREATE MODEL TABLE table_name(timestamp TIMESTAMP, field FIELD, tag TAG",
+            )
+            .is_err()
+        );
     }
 
     #[test]
@@ -1456,28 +1490,34 @@ mod tests {
 
     #[test]
     fn test_tokenize_and_parse_settings_with_click_house_dialect() {
-        assert!(Parser::parse_sql(
-            &ClickHouseDialect {},
-            "SELECT * FROM table_name SETTINGS convert_query_to_cnf = true"
+        assert!(
+            Parser::parse_sql(
+                &ClickHouseDialect {},
+                "SELECT * FROM table_name SETTINGS convert_query_to_cnf = true"
+            )
+            .is_ok()
         )
-        .is_ok())
     }
 
     #[test]
     fn test_tokenize_and_parse_settings_with_modelardb_dialect() {
-        assert!(Parser::parse_sql(
-            &ModelarDbDialect::new(),
-            "SELECT * FROM table_name SETTINGS convert_query_to_cnf = true"
+        assert!(
+            Parser::parse_sql(
+                &ModelarDbDialect::new(),
+                "SELECT * FROM table_name SETTINGS convert_query_to_cnf = true"
+            )
+            .is_err()
         )
-        .is_err())
     }
 
     #[test]
     fn test_tokenize_and_parse_include_one_address_select() {
-        assert!(tokenize_and_parse_sql_statement(
-            "INCLUDE 'grpc://192.168.1.2:9999' SELECT * FROM table_name",
-        )
-        .is_ok());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "INCLUDE 'grpc://192.168.1.2:9999' SELECT * FROM table_name",
+            )
+            .is_ok()
+        );
     }
 
     #[test]
@@ -1490,18 +1530,20 @@ mod tests {
 
     #[test]
     fn test_tokenize_and_parse_include_one_double_quoted_address_select() {
-        assert!(tokenize_and_parse_sql_statement(
-            "INCLUDE \"grpc://192.168.1.2:9999\" SELECT * FROM table_name",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement(
+                "INCLUDE \"grpc://192.168.1.2:9999\" SELECT * FROM table_name",
+            )
+            .is_err()
+        );
     }
 
     #[test]
     fn test_tokenize_and_parse_one_address_select() {
-        assert!(tokenize_and_parse_sql_statement(
-            "'grpc://192.168.1.2:9999' SELECT * FROM table_name",
-        )
-        .is_err());
+        assert!(
+            tokenize_and_parse_sql_statement("'grpc://192.168.1.2:9999' SELECT * FROM table_name",)
+                .is_err()
+        );
     }
 
     #[test]
diff --git a/crates/modelardb_storage/src/query/generated_as_exec.rs b/crates/modelardb_storage/src/query/generated_as_exec.rs
index df6ab5157..0d5d1591b 100644
--- a/crates/modelardb_storage/src/query/generated_as_exec.rs
+++ b/crates/modelardb_storage/src/query/generated_as_exec.rs
@@ -36,8 +36,8 @@ use datafusion::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PhysicalExpr,
     PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
-use futures::stream::Stream;
 use futures::StreamExt;
+use futures::stream::Stream;
 use modelardb_types::types::{TimestampArray, ValueArray};
 
 /// A column the [`GeneratedAsExec`] must add to each of the [`RecordBatches`](RecordBatch) using
diff --git a/crates/modelardb_storage/src/query/metadata_table.rs b/crates/modelardb_storage/src/query/metadata_table.rs
index 89e11b79a..fee2571b6 100644
--- a/crates/modelardb_storage/src/query/metadata_table.rs
+++ b/crates/modelardb_storage/src/query/metadata_table.rs
@@ -24,7 +24,7 @@ use datafusion::datasource::{TableProvider, TableType};
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::logical_expr::Expr;
 use datafusion::physical_plan::ExecutionPlan;
-use deltalake::{arrow::datatypes::SchemaRef, DeltaTable};
+use deltalake::{DeltaTable, arrow::datatypes::SchemaRef};
 use tonic::async_trait;
 
 /// A queryable representation of a metadata table. [`MetadataTable`] wraps the [`TableProvider`] of
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 9eddfeac3..4eeb9c2b0 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -38,10 +38,10 @@ use datafusion::datasource::{TableProvider, TableType};
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::context::ExecutionProps;
 use datafusion::logical_expr::dml::InsertOp;
-use datafusion::logical_expr::{self, utils, BinaryExpr, Expr, Operator};
+use datafusion::logical_expr::{self, BinaryExpr, Expr, Operator, utils};
 use datafusion::physical_expr::expressions::Column;
 use datafusion::physical_expr::{
-    planner, LexOrdering, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
+    LexOrdering, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement, planner,
 };
 use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
diff --git a/crates/modelardb_storage/src/query/normal_table.rs b/crates/modelardb_storage/src/query/normal_table.rs
index f8af954ed..191714a24 100644
--- a/crates/modelardb_storage/src/query/normal_table.rs
+++ b/crates/modelardb_storage/src/query/normal_table.rs
@@ -29,7 +29,7 @@ use datafusion::logical_expr::dml::InsertOp;
 use datafusion::logical_expr::{Expr, LogicalPlan, TableProviderFilterPushDown};
 use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, Statistics};
-use deltalake::{arrow::datatypes::SchemaRef, DeltaTable};
+use deltalake::{DeltaTable, arrow::datatypes::SchemaRef};
 use tonic::async_trait;
 
 /// A queryable representation of a normal table. [`NormalTable`] wraps the [`TableProvider`]
diff --git a/crates/modelardb_storage/src/test.rs b/crates/modelardb_storage/src/test.rs
index c6df86aed..25d2eb368 100644
--- a/crates/modelardb_storage/src/test.rs
+++ b/crates/modelardb_storage/src/test.rs
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use arrow::array::{BinaryArray, Float32Array, RecordBatch, StringArray, UInt16Array, UInt8Array};
+use arrow::array::{BinaryArray, Float32Array, RecordBatch, StringArray, UInt8Array, UInt16Array};
 use arrow::compute::concat_batches;
 use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
 use modelardb_common::test::{ERROR_BOUND_FIVE, ERROR_BOUND_ONE, ERROR_BOUND_ZERO};

From 950a9a56023b466b6439fcc6dd62f5efda6d7148 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 27 Feb 2025 22:14:38 +0100
Subject: [PATCH 61/69] Change QuerySchema to GridSchema to match schema name

---
 .../src/storage/uncompressed_data_manager.rs              | 4 ++--
 crates/modelardb_storage/src/parser.rs                    | 4 ++--
 crates/modelardb_storage/src/query/model_table.rs         | 4 ++--
 crates/modelardb_types/src/schemas.rs                     | 8 ++++----
 crates/modelardb_types/src/types.rs                       | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 31f67a8ac..c6993dd80 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -1023,13 +1023,13 @@ mod tests {
         // message inserted would block the thread until the data messages have been processed.
         let number_of_buffers =
             reserved_memory / uncompressed_data_buffer::compute_memory_size(number_of_fields);
-        for tag_hash in 0..number_of_buffers {
+        for tag_value in 0..number_of_buffers {
             // Allocate many buffers that are never finished.
             insert_data_points(
                 1,
                 &mut data_manager,
                 &model_table_metadata.clone(),
-                &tag_hash.to_string(),
+                &tag_value.to_string(),
             )
             .await;
         }
diff --git a/crates/modelardb_storage/src/parser.rs b/crates/modelardb_storage/src/parser.rs
index 2731c2812..144c4ea9b 100644
--- a/crates/modelardb_storage/src/parser.rs
+++ b/crates/modelardb_storage/src/parser.rs
@@ -1184,8 +1184,8 @@ mod tests {
         // Tracks if sqlparser at some point can parse fields/tags in a TABLE.
         assert!(
             tokenize_and_parse_sql_statement(
-                "CREATE TABLE table_name(timestamp TIMESTAMP, field FIELD, field_one FIELD(10.5),
-                                     field_two FIELD(1%), tag TAG)",
+                "CREATE TABLE table_name(timestamp TIMESTAMP, field FIELD,
+                 field_one FIELD(10.5), field_two FIELD(1%), tag TAG)",
             )
             .is_err()
         );
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 4eeb9c2b0..e3d40e8fe 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -217,10 +217,10 @@ fn query_order_and_requirement(
         let tag_column_name = model_table_metadata.schema.field(*index).name();
 
         // unwrap() is safe as the tag columns are always present in the schema.
-        let segment_index = schema.index_of(tag_column_name).unwrap();
+        let schema_index = schema.index_of(tag_column_name).unwrap();
 
         physical_sort_exprs.push(PhysicalSortExpr {
-            expr: Arc::new(Column::new(tag_column_name, segment_index)),
+            expr: Arc::new(Column::new(tag_column_name, schema_index)),
             options: sort_options,
         });
     }
diff --git a/crates/modelardb_types/src/schemas.rs b/crates/modelardb_types/src/schemas.rs
index 7963bdc3e..c819a9185 100644
--- a/crates/modelardb_types/src/schemas.rs
+++ b/crates/modelardb_types/src/schemas.rs
@@ -21,8 +21,8 @@ use std::sync::LazyLock;
 use arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema};
 
 use crate::types::{
-    ArrowTimestamp, ArrowValue, CompressedSchema, ConfigurationSchema, QueryCompressedSchema,
-    QuerySchema, TableMetadataSchema,
+    ArrowTimestamp, ArrowValue, CompressedSchema, ConfigurationSchema, GridSchema,
+    QueryCompressedSchema, TableMetadataSchema,
 };
 
 /// Name of the column used to partition the compressed segments.
@@ -65,8 +65,8 @@ pub static COMPRESSED_METADATA_SIZE_IN_BYTES: LazyLock<usize> = LazyLock::new(||
 });
 
 /// [`RecordBatch`](arrow::record_batch::RecordBatch) [`Schema`] used internally during query processing.
-pub static GRID_SCHEMA: LazyLock<QuerySchema> = LazyLock::new(|| {
-    QuerySchema(Arc::new(Schema::new(vec![
+pub static GRID_SCHEMA: LazyLock<GridSchema> = LazyLock::new(|| {
+    GridSchema(Arc::new(Schema::new(vec![
         Field::new("timestamp", ArrowTimestamp::DATA_TYPE, false),
         Field::new("value", ArrowValue::DATA_TYPE, false),
     ])))
diff --git a/crates/modelardb_types/src/types.rs b/crates/modelardb_types/src/types.rs
index 851060886..141676212 100644
--- a/crates/modelardb_types/src/types.rs
+++ b/crates/modelardb_types/src/types.rs
@@ -48,7 +48,7 @@ pub struct CompressedSchema(pub arrow::datatypes::SchemaRef);
 pub struct QueryCompressedSchema(pub arrow::datatypes::SchemaRef);
 
 #[derive(Clone)]
-pub struct QuerySchema(pub arrow::datatypes::SchemaRef);
+pub struct GridSchema(pub arrow::datatypes::SchemaRef);
 
 #[derive(Clone)]
 pub struct ConfigurationSchema(pub arrow::datatypes::SchemaRef);

From c777e3dd1765cde205c2668f09138152d97d9fd2 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Thu, 27 Feb 2025 22:30:40 +0100
Subject: [PATCH 62/69] Fix cargo doc issue

---
 crates/modelardb_compression/src/compression.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index 1a524ff19..a443e8e59 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -41,7 +41,7 @@ const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255;
 /// Assumes `uncompressed_timestamps` and `uncompressed_values` are sorted according to
 /// `uncompressed_timestamps`. Returns [`ModelarDbCompressionError`] if `uncompressed_timestamps`
 /// and `uncompressed_values` have different lengths, otherwise the resulting compressed segments
-/// are returned as a [`RecordBatch`] with the [`compressed_schema`] schema.
+/// are returned as a [`RecordBatch`] with the `compressed_schema` schema.
 pub fn try_compress(
     compressed_schema: Arc<Schema>,
     tag_values: Vec<String>,

From 118c6ce63c96e8d44fabc0d186dbbbbb9dde0cdc Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Fri, 28 Feb 2025 18:05:43 +0100
Subject: [PATCH 63/69] Update based on comments from @chrthomsen

---
 crates/modelardb_server/src/storage/data_sinks.rs          | 4 ++--
 .../src/storage/uncompressed_data_manager.rs               | 7 +++----
 crates/modelardb_storage/src/query/sorted_join_exec.rs     | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/crates/modelardb_server/src/storage/data_sinks.rs b/crates/modelardb_server/src/storage/data_sinks.rs
index 57a98d601..86743b361 100644
--- a/crates/modelardb_server/src/storage/data_sinks.rs
+++ b/crates/modelardb_server/src/storage/data_sinks.rs
@@ -153,8 +153,8 @@ impl DataSink for ModelTableDataSink {
             let record_batch =
                 record_batch?.project(&self.model_table_metadata.query_schema_to_schema)?;
 
-            // Manually ensure the fields are not nullable. It is not possible to insert null values
-            // into model tables but the schema of the record batch may contain nullable fields.
+            // Ensure the fields are not nullable. It is not possible to insert null values into
+            // model tables but the schema of the record batch may contain nullable fields.
             let mut fields: Vec<Field> = Vec::with_capacity(record_batch.schema().fields.len());
             for field in record_batch.schema().fields() {
                 fields.push(Field::new(field.name(), field.data_type().clone(), false));
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index c6993dd80..d38d01eae 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -176,8 +176,7 @@ impl UncompressedDataManager {
         let (timestamp_column_array, field_column_arrays, tag_column_arrays) =
             model_table_metadata.column_arrays(&data_points)?;
 
-        // For each data point, compute a hash from the tags and pass the fields to the storage
-        // engine so they can be added to the appropriate UncompressedDataBuffer.
+        // For each data point, insert the timestamp and values into the corresponding UncompressedDataBuffer.
         for (index, timestamp) in timestamp_column_array.iter().enumerate() {
             let tag_values: Vec<String> = tag_column_arrays
                 .iter()
@@ -635,8 +634,8 @@ impl UncompressedDataManager {
     }
 }
 
-/// Calculate a unique hash for a specific combination of `table_name` and `tag_values`. The hash
-/// can be used to identify a specific multivariate time series during ingestion.
+/// Calculate a hash for a combination of `table_name` and `tag_values`. The hash can be used to
+/// identify a specific multivariate time series during ingestion.
 fn calculate_tag_hash(table_name: &str, tag_values: &[String]) -> u64 {
     let mut hash_data = tag_values.to_vec();
     hash_data.push(table_name.to_string());
diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs
index ffb9a255f..c2ff0ae88 100644
--- a/crates/modelardb_storage/src/query/sorted_join_exec.rs
+++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs
@@ -49,7 +49,7 @@ pub(crate) enum SortedJoinColumnType {
     Tag(String),
 }
 
-/// An execution plan that join arrays of data points sorted by tag columns and `timestamp` from
+/// An execution plan that joins arrays of data points sorted by tag columns and `timestamp` from
 /// multiple execution plans. It is `pub(crate)` so the additional rules added to Apache
 /// DataFusion's physical optimizer can pattern match on it.
 #[derive(Debug)]

From 548b0ae571ba194b12dd5f1bf844dbe45b9f84c6 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 5 Mar 2025 12:22:29 +0100
Subject: [PATCH 64/69] Change order of arguments in try_compress()

---
 .../modelardb_compression/src/compression.rs  | 45 ++++++++++---------
 .../modelardb_compression/src/models/swing.rs |  6 +--
 crates/modelardb_compression/src/types.rs     |  6 +--
 .../src/storage/uncompressed_data_buffer.rs   |  4 +-
 .../src/storage/uncompressed_data_manager.rs  |  6 +--
 .../tests/integration_test.rs                 |  4 +-
 6 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/crates/modelardb_compression/src/compression.rs b/crates/modelardb_compression/src/compression.rs
index a443e8e59..0812c7b9c 100644
--- a/crates/modelardb_compression/src/compression.rs
+++ b/crates/modelardb_compression/src/compression.rs
@@ -39,16 +39,19 @@ const RESIDUAL_VALUES_MAX_LENGTH: u8 = 255;
 /// regular and delta-of-deltas followed by a variable length binary encoding if irregular.
 /// `uncompressed_values` is compressed within `error_bound` using the model types in `models`.
 /// Assumes `uncompressed_timestamps` and `uncompressed_values` are sorted according to
-/// `uncompressed_timestamps`. Returns [`ModelarDbCompressionError`] if `uncompressed_timestamps`
-/// and `uncompressed_values` have different lengths, otherwise the resulting compressed segments
-/// are returned as a [`RecordBatch`] with the `compressed_schema` schema.
+/// `uncompressed_timestamps`. The resulting compressed segments have the schema in `compressed_schema`
+/// with the tag columns populated by the values in `tag_values` and the field column index populated
+/// by `field_column_index`. Returns [`ModelarDbCompressionError`] if `uncompressed_timestamps` and
+/// `uncompressed_values` have different lengths or if `compressed_schema` is not a valid schema for
+/// compressed segments, otherwise the resulting compressed segments are returned as a
+/// [`RecordBatch`] with the `compressed_schema` schema.
 pub fn try_compress(
+    uncompressed_timestamps: &TimestampArray,
+    uncompressed_values: &ValueArray,
+    error_bound: ErrorBound,
     compressed_schema: Arc<Schema>,
     tag_values: Vec<String>,
     field_column_index: u16,
-    error_bound: ErrorBound,
-    uncompressed_timestamps: &TimestampArray,
-    uncompressed_values: &ValueArray,
 ) -> Result<RecordBatch> {
     // The uncompressed data must be passed as arrays instead of a RecordBatch as a TimestampArray
     // and a ValueArray is the only supported input. However, as a result it is necessary to verify
@@ -276,12 +279,12 @@ mod tests {
     #[test]
     fn test_try_compress_empty_time_series_within_absolute_error_bound_zero() {
         let compressed_record_batch = try_compress(
+            &TimestampBuilder::new().finish(),
+            &ValueBuilder::new().finish(),
+            ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
             compressed_schema(),
             vec![TAG_VALUE.to_owned()],
             0,
-            ErrorBound::try_new_absolute(ERROR_BOUND_ZERO).unwrap(),
-            &TimestampBuilder::new().finish(),
-            &ValueBuilder::new().finish(),
         )
         .unwrap();
         assert_eq!(0, compressed_record_batch.num_rows());
@@ -290,12 +293,12 @@ mod tests {
     #[test]
     fn test_try_compress_empty_time_series_within_relative_error_bound_zero() {
         let compressed_record_batch = try_compress(
+            &TimestampBuilder::new().finish(),
+            &ValueBuilder::new().finish(),
+            ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
             compressed_schema(),
             vec![TAG_VALUE.to_owned()],
             0,
-            ErrorBound::try_new_relative(ERROR_BOUND_ZERO).unwrap(),
-            &TimestampBuilder::new().finish(),
-            &ValueBuilder::new().finish(),
         )
         .unwrap();
         assert_eq!(0, compressed_record_batch.num_rows());
@@ -512,12 +515,12 @@ mod tests {
             data_generation::generate_values(uncompressed_timestamps.values(), values_structure);
 
         let compressed_record_batch = try_compress(
+            &uncompressed_timestamps,
+            &uncompressed_values,
+            error_bound,
             compressed_schema(),
             vec![TAG_VALUE.to_owned()],
             0,
-            error_bound,
-            &uncompressed_timestamps,
-            &uncompressed_values,
         )
         .unwrap();
 
@@ -662,12 +665,12 @@ mod tests {
         assert_eq!(uncompressed_timestamps.len(), uncompressed_values.len());
 
         let compressed_record_batch = try_compress(
+            &uncompressed_timestamps,
+            &uncompressed_values,
+            error_bound,
             compressed_schema(),
             vec![TAG_VALUE.to_owned()],
             0,
-            error_bound,
-            &uncompressed_timestamps,
-            &uncompressed_values,
         )
         .unwrap();
 
@@ -878,12 +881,12 @@ mod tests {
             );
 
         let compressed_record_batch = try_compress(
+            &uncompressed_timestamps,
+            &uncompressed_values,
+            error_bound,
             compressed_schema(),
             vec![TAG_VALUE.to_owned()],
             0,
-            error_bound,
-            &uncompressed_timestamps,
-            &uncompressed_values,
         )
         .unwrap();
 
diff --git a/crates/modelardb_compression/src/models/swing.rs b/crates/modelardb_compression/src/models/swing.rs
index 270d4e497..c7dab2407 100644
--- a/crates/modelardb_compression/src/models/swing.rs
+++ b/crates/modelardb_compression/src/models/swing.rs
@@ -850,12 +850,12 @@ mod tests {
         let compressed_schema = Arc::new(Schema::new(compressed_schema_fields));
 
         let segments = crate::try_compress(
+            &timestamps,
+            &values,
+            error_bound,
             compressed_schema,
             vec!["tag".to_owned()],
             0,
-            error_bound,
-            &timestamps,
-            &values,
         )
         .unwrap();
 
diff --git a/crates/modelardb_compression/src/types.rs b/crates/modelardb_compression/src/types.rs
index 74aebb2cf..30b74e3a7 100644
--- a/crates/modelardb_compression/src/types.rs
+++ b/crates/modelardb_compression/src/types.rs
@@ -15,7 +15,7 @@
 
 //! The types used throughout the crate.
 
-use std::debug_assert;
+use std::{debug_assert, iter};
 use std::sync::Arc;
 
 use arrow::array::{
@@ -481,7 +481,7 @@ impl CompressedSegmentBatchBuilder {
     /// Return [`RecordBatch`] of compressed segments and consume the builder.
     pub(crate) fn finish(mut self) -> RecordBatch {
         let batch_length = self.model_type_ids.len();
-        let field_column_array: UInt16Array = std::iter::repeat(self.field_column_index)
+        let field_column_array: UInt16Array = iter::repeat(self.field_column_index)
             .take(batch_length)
             .collect();
 
@@ -499,7 +499,7 @@ impl CompressedSegmentBatchBuilder {
         ];
 
         for tag_value in &self.tag_values {
-            let tag_array: StringArray = std::iter::repeat(Some(tag_value))
+            let tag_array: StringArray = iter::repeat(Some(tag_value))
                 .take(batch_length)
                 .collect();
 
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 4ad3f7e18..9d3fb8b3c 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -19,7 +19,7 @@
 //! support for storing uncompressed data points in Apache Parquet files on disk.
 
 use std::fmt::{Debug, Formatter, Result as FmtResult};
-use std::mem;
+use std::{iter, mem};
 use std::sync::Arc;
 
 use datafusion::arrow::array::{Array, ArrayBuilder, StringArray};
@@ -173,7 +173,7 @@ impl UncompressedInMemoryDataBuffer {
             } else if self.model_table_metadata.is_tag(column_index) {
                 // The tag value is the same for each data point so it is not sorted.
                 let tag_value = self.tag_values[tag_column_index].clone();
-                let tag_array: StringArray = std::iter::repeat(Some(tag_value))
+                let tag_array: StringArray = iter::repeat(Some(tag_value))
                     .take(buffer_length)
                     .collect();
                 columns.push(Arc::new(tag_array));
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index d38d01eae..9ed131928 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -588,12 +588,12 @@ impl UncompressedDataManager {
 
                 // unwrap() is safe as uncompressed_timestamps and uncompressed_values have the same length.
                 modelardb_compression::try_compress(
+                    uncompressed_timestamps,
+                    uncompressed_values,
+                    error_bound,
                     model_table_metadata.compressed_schema.clone(),
                     tag_values.clone(),
                     *field_column_index as u16,
-                    error_bound,
-                    uncompressed_timestamps,
-                    uncompressed_values,
                 )
                 .unwrap()
             })
diff --git a/crates/modelardb_server/tests/integration_test.rs b/crates/modelardb_server/tests/integration_test.rs
index e123651cb..209dfeb6f 100644
--- a/crates/modelardb_server/tests/integration_test.rs
+++ b/crates/modelardb_server/tests/integration_test.rs
@@ -18,7 +18,7 @@
 use std::collections::HashMap;
 use std::error::Error;
 use std::io::Read;
-use std::iter::repeat;
+use std::iter;
 use std::ops::Range;
 use std::process::{Child, Command, Stdio};
 use std::str;
@@ -331,7 +331,7 @@ impl TestContext {
         if let Some(tag) = maybe_tag {
             fields.push(Field::new("tag", DataType::Utf8, false));
             columns.push(Arc::new(StringArray::from_iter_values(
-                repeat(tag).take(time_series_len),
+                iter::repeat(tag).take(time_series_len),
             )));
         }
 

From 3d30ede5097b52eb63671e2737920bf0262c393f Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 5 Mar 2025 14:43:34 +0100
Subject: [PATCH 65/69] Update method for calculating tag hash

---
 crates/modelardb_compression/src/types.rs     | 30 ++++++++-----------
 crates/modelardb_manager/src/remote.rs        |  6 ++--
 .../src/storage/compressed_data_manager.rs    |  5 ++--
 .../src/storage/data_sinks.rs                 | 21 ++++++-------
 .../src/storage/uncompressed_data_buffer.rs   |  2 +-
 .../src/storage/uncompressed_data_manager.rs  | 17 ++++++-----
 6 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/crates/modelardb_compression/src/types.rs b/crates/modelardb_compression/src/types.rs
index 30b74e3a7..4b4bde0a7 100644
--- a/crates/modelardb_compression/src/types.rs
+++ b/crates/modelardb_compression/src/types.rs
@@ -15,8 +15,8 @@
 
 //! The types used throughout the crate.
 
-use std::{debug_assert, iter};
 use std::sync::Arc;
+use std::{debug_assert, iter};
 
 use arrow::array::{
     ArrayBuilder, ArrayRef, BinaryBuilder, Float32Builder, StringArray, UInt8Builder, UInt16Array,
@@ -485,24 +485,20 @@ impl CompressedSegmentBatchBuilder {
             .take(batch_length)
             .collect();
 
-        let mut columns: Vec<ArrayRef> = vec![
-            Arc::new(self.model_type_ids.finish()),
-            Arc::new(self.start_times.finish()),
-            Arc::new(self.end_times.finish()),
-            Arc::new(self.timestamps.finish()),
-            Arc::new(self.min_values.finish()),
-            Arc::new(self.max_values.finish()),
-            Arc::new(self.values.finish()),
-            Arc::new(self.residuals.finish()),
-            Arc::new(self.error.finish()),
-            Arc::new(field_column_array),
-        ];
+        let mut columns: Vec<ArrayRef> = Vec::with_capacity(self.compressed_schema.fields.len());
+        columns.push(Arc::new(self.model_type_ids.finish()));
+        columns.push(Arc::new(self.start_times.finish()));
+        columns.push(Arc::new(self.end_times.finish()));
+        columns.push(Arc::new(self.timestamps.finish()));
+        columns.push(Arc::new(self.min_values.finish()));
+        columns.push(Arc::new(self.max_values.finish()));
+        columns.push(Arc::new(self.values.finish()));
+        columns.push(Arc::new(self.residuals.finish()));
+        columns.push(Arc::new(self.error.finish()));
+        columns.push(Arc::new(field_column_array));
 
         for tag_value in &self.tag_values {
-            let tag_array: StringArray = iter::repeat(Some(tag_value))
-                .take(batch_length)
-                .collect();
-
+            let tag_array: StringArray = iter::repeat(Some(tag_value)).take(batch_length).collect();
             columns.push(Arc::new(tag_array));
         }
 
diff --git a/crates/modelardb_manager/src/remote.rs b/crates/modelardb_manager/src/remote.rs
index fd0d5b32e..cb10451c0 100644
--- a/crates/modelardb_manager/src/remote.rs
+++ b/crates/modelardb_manager/src/remote.rs
@@ -282,9 +282,9 @@ impl FlightServiceHandler {
         Ok(())
     }
 
-    /// Truncate the table in the data Delta Lake and in each node controlled by the manager. If the
-    /// table does not exist or the table cannot be truncated in the remote data folder and in each
-    /// node, return [`Status`].
+    /// Truncate the table in the remote data folder and at each node controlled by the manager. If
+    /// the table does not exist or the table cannot be truncated in the remote data folder and at
+    /// each node, return [`Status`].
     async fn truncate_cluster_table(&self, table_name: &str) -> StdResult<(), Status> {
         if self.check_if_table_exists(table_name).await.is_ok() {
             return Err(Status::invalid_argument(format!(
diff --git a/crates/modelardb_server/src/storage/compressed_data_manager.rs b/crates/modelardb_server/src/storage/compressed_data_manager.rs
index d84163976..6301926d7 100644
--- a/crates/modelardb_server/src/storage/compressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/compressed_data_manager.rs
@@ -130,8 +130,9 @@ impl CompressedDataManager {
     }
 
     /// Insert `compressed_segment_batch` into the in-memory [`CompressedDataBuffer`] for the model
-    /// table. If `compressed_segment_batch` is inserted successfully, return [`Ok`], otherwise
-    /// return [`ModelarDbServerError`](crate::error::ModelarDbServerError).
+    /// table. If inserting `compressed_segment_batch` exceeded the reserved memory limit, save
+    /// compressed data to disk until enough memory is available. If compressed data could not be
+    /// saved to disk, return [`ModelarDbServerError`](crate::error::ModelarDbServerError).
     async fn insert_compressed_segments(
         &self,
         compressed_segment_batch: CompressedSegmentBatch,
diff --git a/crates/modelardb_server/src/storage/data_sinks.rs b/crates/modelardb_server/src/storage/data_sinks.rs
index 86743b361..b094f5f54 100644
--- a/crates/modelardb_server/src/storage/data_sinks.rs
+++ b/crates/modelardb_server/src/storage/data_sinks.rs
@@ -21,7 +21,6 @@ use std::fmt::{Debug, Formatter, Result as FmtResult};
 use std::sync::Arc;
 
 use async_trait::async_trait;
-use datafusion::arrow::datatypes::{Field, Schema};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::TaskContext;
@@ -106,9 +105,9 @@ impl DisplayAs for NormalTableDataSink {
     }
 }
 
-/// [`DataSink`] that writes [`RecordBatches`](datafusion::arrow::record_batch::RecordBatch)
-/// containing multivariate time series to [`StorageEngine`]. Assumes the generated columns are
-/// included, thus they are dropped without checking the schema.
+/// [`DataSink`] that writes [`RecordBatches`](RecordBatch) containing multivariate time series to
+/// [`StorageEngine`]. Assumes the generated columns are included, thus they are dropped without
+/// checking the schema.
 pub struct ModelTableDataSink {
     /// Metadata for the model table inserted data will be written to.
     model_table_metadata: Arc<ModelTableMetadata>,
@@ -150,18 +149,16 @@ impl DataSink for ModelTableDataSink {
         let mut data_points_inserted: u64 = 0;
 
         while let Some(record_batch) = data.next().await {
+            // Remove the generated columns from the record batch. The generated columns must be
+            // part of the inserted data since Apache DataFusion checks it before passing it to
+            // write_all().
             let record_batch =
                 record_batch?.project(&self.model_table_metadata.query_schema_to_schema)?;
 
-            // Ensure the fields are not nullable. It is not possible to insert null values into
-            // model tables but the schema of the record batch may contain nullable fields.
-            let mut fields: Vec<Field> = Vec::with_capacity(record_batch.schema().fields.len());
-            for field in record_batch.schema().fields() {
-                fields.push(Field::new(field.name(), field.data_type().clone(), false));
-            }
-
+            // Create a new record batch with the schema of the model table to fix the problem where
+            // the schema of the inserted data has nullable fields.
             let record_batch = RecordBatch::try_new(
-                Arc::new(Schema::new(fields)),
+                self.model_table_metadata.schema.clone(),
                 record_batch.columns().to_vec(),
             )?;
 
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 9d3fb8b3c..4e60815b1 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -358,7 +358,7 @@ impl UncompressedOnDiskDataBuffer {
 
         let tag_values: Vec<String> = tag_column_arrays
             .iter()
-            .map(|array| array.value(0).to_string())
+            .map(|array| array.value(0).to_owned())
             .collect();
 
         let mut in_memory_buffer = UncompressedInMemoryDataBuffer::new(
diff --git a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
index 9ed131928..739631ba7 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_manager.rs
@@ -180,7 +180,7 @@ impl UncompressedDataManager {
         for (index, timestamp) in timestamp_column_array.iter().enumerate() {
             let tag_values: Vec<String> = tag_column_arrays
                 .iter()
-                .map(|array| array.value(index).to_string())
+                .map(|array| array.value(index).to_owned())
                 .collect();
 
             let mut values = field_column_arrays.iter().map(|array| array.value(index));
@@ -577,7 +577,7 @@ impl UncompressedDataManager {
 
         let tag_values: Vec<String> = tag_column_arrays
             .iter()
-            .map(|array| array.value(0).to_string())
+            .map(|array| array.value(0).to_owned())
             .collect();
 
         let compressed_segments = field_column_arrays
@@ -634,14 +634,15 @@ impl UncompressedDataManager {
     }
 }
 
-/// Calculate a hash for a combination of `table_name` and `tag_values`. The hash can be used to
+/// Calculate a hash for a combination of `table_name` and `tag_values`. The hash is used to
 /// identify a specific multivariate time series during ingestion.
 fn calculate_tag_hash(table_name: &str, tag_values: &[String]) -> u64 {
-    let mut hash_data = tag_values.to_vec();
-    hash_data.push(table_name.to_string());
-
     let mut hasher = DefaultHasher::new();
-    hasher.write(hash_data.join(";").as_bytes());
+    for tag_value in tag_values {
+        hasher.write(tag_value.as_bytes());
+    }
+
+    hasher.write(table_name.as_bytes());
 
     hasher.finish()
 }
@@ -668,7 +669,7 @@ mod tests {
     use crate::{ClusterMode, DataFolders};
 
     const TAG_VALUE: &str = "tag";
-    const TAG_HASH: u64 = 15537859409877038916;
+    const TAG_HASH: u64 = 10828528714290431980;
 
     // Tests for UncompressedDataManager.
     #[tokio::test]

From 03289226d3789986e62c155afc45e5f81ced2895 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 5 Mar 2025 15:15:03 +0100
Subject: [PATCH 66/69] Add limitation on number of model table fields back

---
 .../src/metadata/model_table_metadata.rs      | 28 +++++++++++++------
 .../src/metadata/table_metadata_manager.rs    |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/crates/modelardb_storage/src/metadata/model_table_metadata.rs b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
index 8155c32f7..a4348bcab 100644
--- a/crates/modelardb_storage/src/metadata/model_table_metadata.rs
+++ b/crates/modelardb_storage/src/metadata/model_table_metadata.rs
@@ -36,8 +36,6 @@ use crate::parser::tokenize_and_parse_sql_expression;
 pub struct ModelTableMetadata {
     /// Name of the model table.
     pub name: String,
-    /// Schema of the data that can be written to the model table.
-    pub schema: Arc<Schema>,
     /// Index of the timestamp column in `schema`.
     pub timestamp_column_index: usize,
     /// Indices of the field columns in `schema`.
@@ -46,13 +44,15 @@ pub struct ModelTableMetadata {
     pub tag_column_indices: Vec<usize>,
     /// Error bounds of the columns in `schema`. It can only be non-zero for field columns.
     pub error_bounds: Vec<ErrorBound>,
+    /// Expressions to create generated columns in the `query_schema`. Only field columns can be
+    /// generated by [`Expr`], so [`None`] is stored for timestamp, tag, and stored field columns.
+    pub generated_columns: Vec<Option<GeneratedColumn>>,
+    /// Schema of the data that can be written to the model table.
+    pub schema: Arc<Schema>,
     /// Schema of the data that can be read from the model table.
     pub query_schema: Arc<Schema>,
     /// Projection that changes `query_schema` to `schema`.
     pub query_schema_to_schema: Vec<usize>,
-    /// Expressions to create generated columns in the `query_schema`. Only field columns can be
-    /// generated by [`Expr`], so [`None`] is stored for timestamp, tag, and stored field columns.
-    pub generated_columns: Vec<Option<GeneratedColumn>>,
     /// Schema of the compressed segments that are stored in the model table.
     pub compressed_schema: Arc<Schema>,
 }
@@ -63,6 +63,7 @@ impl ModelTableMetadata {
     /// * The number of error bounds does not match the number of columns.
     /// * The number of potentially generated columns does not match the number of columns.
     /// * A generated column includes another generated column in its expression.
+    /// * There are more than 32767 columns.
     /// * The `query_schema` does not include a single timestamp column.
     /// * The `query_schema` does not include at least one stored field column.
     pub fn try_new(
@@ -97,6 +98,14 @@ impl ModelTableMetadata {
             }
         }
 
+        // If there are more than 32767 columns, return an error. This limitation is necessary since
+        // 16 bits are used for the field column index in the compressed segments.
+        if query_schema.fields.len() > 32767 {
+            return Err(ModelarDbStorageError::InvalidArgument(
+                "There cannot be more than 32767 columns in the model table.".to_owned(),
+            ));
+        }
+
         // Remove the generated field columns from the query schema and the error bounds as these
         // columns should never be provided when inserting data points into the model table.
         let mut fields_without_generated = Vec::with_capacity(query_schema.fields().len());
@@ -145,7 +154,10 @@ impl ModelTableMetadata {
             compute_indices_of_columns_with_data_type(&schema_without_generated, DataType::Utf8);
 
         // Add the tag columns to the base schema for compressed segments.
-        let mut compressed_schema_fields = COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        let mut compressed_schema_fields =
+            Vec::with_capacity(COMPRESSED_SCHEMA.0.fields().len() + tag_column_indices.len());
+        compressed_schema_fields.extend(COMPRESSED_SCHEMA.0.fields.clone().to_vec());
+
         for index in &tag_column_indices {
             compressed_schema_fields.push(Arc::new(schema_without_generated.field(*index).clone()));
         }
@@ -154,14 +166,14 @@ impl ModelTableMetadata {
 
         Ok(Self {
             name,
-            schema: schema_without_generated,
             timestamp_column_index: timestamp_column_indices[0],
             field_column_indices,
             tag_column_indices,
             error_bounds: error_bounds_without_generated,
+            generated_columns,
+            schema: schema_without_generated,
             query_schema,
             query_schema_to_schema: field_indices_without_generated,
-            generated_columns,
             compressed_schema,
         })
     }
diff --git a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
index 83064150a..ad7ea426d 100644
--- a/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
+++ b/crates/modelardb_storage/src/metadata/table_metadata_manager.rs
@@ -322,6 +322,7 @@ impl TableMetadataManager {
                         (0.0, false)
                     };
 
+                // query_schema_index is simply cast as a model table contains at most 32767 columns.
                 self.delta_lake
                     .write_columns_to_metadata_table(
                         "model_table_field_columns",

From 26afb727ee8d5ed3cacf63cae592771b8dc27e34 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 5 Mar 2025 17:24:16 +0100
Subject: [PATCH 67/69] Rename Apache Arrow DataFusion to Apache DataFusion

---
 README.md                                     |  2 +-
 crates/modelardb_storage/src/delta_lake.rs    |  2 +-
 .../src/optimizer/model_simple_aggregates.rs  |  8 +++----
 .../src/query/generated_as_exec.rs            |  6 ++---
 .../modelardb_storage/src/query/grid_exec.rs  | 20 ++++++++--------
 .../src/query/metadata_table.rs               |  4 ++--
 .../src/query/model_table.rs                  | 23 +++++++++++++------
 .../src/query/normal_table.rs                 |  2 +-
 .../src/query/sorted_join_exec.rs             |  8 +++----
 9 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index 9e64e9894..92d06ac90 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ ModelarDB is designed to be cross-platform and is currently automatically tested
 through [GitHub Actions](https://github.com/ModelarData/ModelarDB-RS/actions). It is also known to work on FreeBSD which
 is [currently not supported by GitHub Actions](https://github.com/actions/runner/issues/385). It is implemented in
 [Rust](https://www.rust-lang.org/) and uses [Apache Arrow Flight](https://github.com/apache/arrow-rs/tree/master/arrow-flight)
-for communicating with clients, [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion) as its query
+for communicating with clients, [Apache DataFusion](https://github.com/apache/datafusion) as its query
 engine, [Apache Arrow](https://github.com/apache/arrow-rs) as its in-memory data format, and
 [Apache Parquet](https://github.com/apache/arrow-rs/tree/master/parquet) as its on-disk data format.
 
diff --git a/crates/modelardb_storage/src/delta_lake.rs b/crates/modelardb_storage/src/delta_lake.rs
index 2b810176a..74de81d33 100644
--- a/crates/modelardb_storage/src/delta_lake.rs
+++ b/crates/modelardb_storage/src/delta_lake.rs
@@ -410,7 +410,7 @@ impl DeltaLake {
         let table = self.metadata_delta_table(table_name).await?;
 
         // TableProvider::schema(&table) is used instead of table.schema() because table.schema()
-        // returns the Delta Lake schema instead of the Apache Arrow DataFusion schema.
+        // returns the Delta Lake schema instead of the Apache DataFusion schema.
         let record_batch = RecordBatch::try_new(TableProvider::schema(&table), columns)?;
 
         self.write_record_batches_to_table(
diff --git a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
index 2273a7914..8ef73dfd3 100644
--- a/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
+++ b/crates/modelardb_storage/src/optimizer/model_simple_aggregates.rs
@@ -219,8 +219,8 @@ fn rewrite_aggregates_to_use_segments(
                 && aggregate_exec.filter_expr().iter().all(Option::is_none)
                 && aggregate_exec.group_expr().is_empty()
             {
-                // Remove RepartitionExec if added by Apache Arrow DataFusion. Both AggregateExec
-                // and RepartitionExec can only have one child, so it is not necessary to check it.
+                // Remove RepartitionExec if added by Apache DataFusion. Both AggregateExec and
+                // RepartitionExec can only have one child, so it is not necessary to check it.
                 let maybe_repartition_exec = &aggregate_exec_children[0];
                 let aggregate_exec_input = if let Some(repartition_exec) = maybe_repartition_exec
                     .as_any()
@@ -688,8 +688,8 @@ mod tests {
 
     #[tokio::test]
     async fn test_rewrite_aggregates_on_one_column_without_predicates() {
-        // Apache Arrow DataFusion 30 creates two input columns to AggregateExec when both SUM and
-        // AVG is computed in the same query, so for now, multiple queries are used for the test.
+        // Apache DataFusion 30 creates two input columns to AggregateExec when both SUM and AVG is
+        // computed in the same query, so for now, multiple queries are used for the test.
         let query_no_avg = &format!(
             "SELECT COUNT(field_1), MIN(field_1), MAX(field_1), SUM(field_1) FROM {}",
             test::MODEL_TABLE_NAME
diff --git a/crates/modelardb_storage/src/query/generated_as_exec.rs b/crates/modelardb_storage/src/query/generated_as_exec.rs
index 0d5d1591b..c6c83d2a2 100644
--- a/crates/modelardb_storage/src/query/generated_as_exec.rs
+++ b/crates/modelardb_storage/src/query/generated_as_exec.rs
@@ -13,9 +13,9 @@
  * limitations under the License.
  */
 
-//! Implementation of the Apache Arrow DataFusion execution plan [`GeneratedAsExec`] and its
-//! corresponding stream [`GeneratedAsStream`] which computes generated columns and adds them to the
-//! result. Generated columns can be computed from other columns and constant values.
+//! Implementation of the Apache DataFusion execution plan [`GeneratedAsExec`] and its corresponding
+//! stream [`GeneratedAsStream`] which computes generated columns and adds them to the result.
+//! Generated columns can be computed from other columns and constant values.
 
 use std::any::Any;
 use std::fmt::{Formatter, Result as FmtResult};
diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index 40ba58262..99cd43504 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-//! Implementation of the Apache Arrow DataFusion execution plan [`GridExec`] and its corresponding
+//! Implementation of the Apache DataFusion execution plan [`GridExec`] and its corresponding
 //! stream [`GridStream`] which reconstructs the data points for a specific column from the
 //! compressed segments containing metadata and models.
 
@@ -188,8 +188,8 @@ impl ExecutionPlan for GridExec {
     }
 
     /// Specify that [`GridExec`] requires one partition for each input as it assumes that the
-    /// sort order are the same for its input and Apache Arrow DataFusion only guarantees the
-    /// sort order within each partition rather than the input's global sort order.
+    /// sort order are the same for its input and Apache DataFusion only guarantees the sort order
+    /// within each partition rather than the input's global sort order.
     fn required_input_distribution(&self) -> Vec<Distribution> {
         vec![Distribution::SinglePartition]
     }
@@ -286,7 +286,8 @@ impl GridStream {
             _error_array
         );
 
-        let mut tag_arrays = vec![];
+        let mut tag_arrays =
+            Vec::with_capacity(batch.num_columns() - QUERY_COMPRESSED_SCHEMA.0.fields().len());
         for tag_index in QUERY_COMPRESSED_SCHEMA.0.fields().len()..batch.num_columns() {
             tag_arrays.push(modelardb_types::array!(batch, tag_index, StringArray));
         }
@@ -299,7 +300,7 @@ impl GridStream {
         let mut timestamp_builder = TimestampBuilder::with_capacity(current_rows + new_rows);
         let mut value_builder = ValueBuilder::with_capacity(current_rows + new_rows);
 
-        let mut tag_builders = vec![];
+        let mut tag_builders = Vec::with_capacity(tag_arrays.len());
         for _ in 0..tag_arrays.len() {
             tag_builders.push(StringBuilder::with_capacity(
                 current_rows + new_rows,
@@ -320,6 +321,8 @@ impl GridStream {
 
         for (index, tag_builder) in tag_builders.iter_mut().enumerate() {
             let tag_array = modelardb_types::array!(current_batch, index + 2, StringArray);
+
+            // Append each value individually since StringBuilder does not have an append_slice() method.
             for i in self.current_batch_offset..current_batch.num_rows() {
                 tag_builder.append_value(tag_array.value(i));
             }
@@ -359,10 +362,9 @@ impl GridStream {
             );
         }
 
-        let mut columns: Vec<ArrayRef> = vec![
-            Arc::new(timestamp_builder.finish()),
-            Arc::new(value_builder.finish()),
-        ];
+        let mut columns: Vec<ArrayRef> = Vec::with_capacity(tag_builders.len() + 2);
+        columns.push(Arc::new(timestamp_builder.finish()));
+        columns.push(Arc::new(value_builder.finish()));
 
         for mut tag_builder in tag_builders {
             columns.push(Arc::new(tag_builder.finish()));
diff --git a/crates/modelardb_storage/src/query/metadata_table.rs b/crates/modelardb_storage/src/query/metadata_table.rs
index fee2571b6..e8725b9a7 100644
--- a/crates/modelardb_storage/src/query/metadata_table.rs
+++ b/crates/modelardb_storage/src/query/metadata_table.rs
@@ -29,8 +29,8 @@ use tonic::async_trait;
 
 /// A queryable representation of a metadata table. [`MetadataTable`] wraps the [`TableProvider`] of
 /// [`DeltaTable`] and passes most methods calls directly to it. Thus, it can be registered with
-/// Apache Arrow DataFusion. The only difference from [`DeltaTable`] is that `delta_table` is
-/// updated to the latest snapshot when accessed.
+/// Apache DataFusion. The only difference from [`DeltaTable`] is that `delta_table` is updated to
+/// the latest snapshot when accessed.
 #[derive(Debug)]
 pub(crate) struct MetadataTable {
     /// Access to the Delta Lake table.
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index e3d40e8fe..c8c29c100 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -76,7 +76,7 @@ pub(crate) struct ModelTable {
     query_order_segment: LexOrdering,
     /// The sort order that [`GridExec`] requires for the segments it receives as its input.
     query_requirement_segment: LexRequirement,
-    /// Schema used internally during query processing.
+    /// Schema used to reconstruct the data points from each field column in the compressed segments.
     grid_schema: Arc<Schema>,
     /// The sort order [`GridExec`] guarantees for the data points it produces. It is guaranteed by
     /// [`GridExec`] because it receives segments sorted by `query_order_segment` from [`ParquetExec`]
@@ -108,7 +108,11 @@ impl ModelTable {
         };
 
         // Add the tag columns to the base schema for queryable compressed segments.
-        let mut query_compressed_schema_fields = QUERY_COMPRESSED_SCHEMA.0.fields.clone().to_vec();
+        let mut query_compressed_schema_fields = Vec::with_capacity(
+            QUERY_COMPRESSED_SCHEMA.0.fields.len() + model_table_metadata.tag_column_indices.len(),
+        );
+
+        query_compressed_schema_fields.extend(QUERY_COMPRESSED_SCHEMA.0.fields.clone().to_vec());
         for index in &model_table_metadata.tag_column_indices {
             query_compressed_schema_fields
                 .push(Arc::new(model_table_metadata.schema.field(*index).clone()));
@@ -123,7 +127,11 @@ impl ModelTable {
         );
 
         // Add the tag columns to the base schema for data points.
-        let mut grid_schema_fields = GRID_SCHEMA.0.fields.clone().to_vec();
+        let mut grid_schema_fields = Vec::with_capacity(
+            GRID_SCHEMA.0.fields.len() + model_table_metadata.tag_column_indices.len(),
+        );
+
+        grid_schema_fields.extend(GRID_SCHEMA.0.fields.clone().to_vec());
         for index in &model_table_metadata.tag_column_indices {
             grid_schema_fields.push(Arc::new(model_table_metadata.schema.field(*index).clone()));
         }
@@ -212,7 +220,8 @@ fn query_order_and_requirement(
         nulls_first: false,
     };
 
-    let mut physical_sort_exprs = vec![];
+    let mut physical_sort_exprs =
+        Vec::with_capacity(model_table_metadata.tag_column_indices.len() + 1);
     for index in &model_table_metadata.tag_column_indices {
         let tag_column_name = model_table_metadata.schema.field(*index).name();
 
@@ -361,7 +370,7 @@ fn new_binary_expr(left: Expr, op: Operator, right: Expr) -> Expr {
 }
 
 /// Convert `maybe_expr` to a [`PhysicalExpr`] with the types in `query_schema` if possible.
-fn maybe_convert_logical_expr_to_physical_expr(
+fn try_convert_logical_expr_to_physical_expr(
     maybe_expr: Option<&Expr>,
     query_schema: SchemaRef,
 ) -> DataFusionResult<Option<Arc<dyn PhysicalExpr>>> {
@@ -586,12 +595,12 @@ impl TableProvider for ModelTable {
         let (maybe_rewritten_parquet_filters, maybe_rewritten_grid_filters) =
             rewrite_and_combine_filters(schema, filters);
 
-        let maybe_physical_parquet_filters = maybe_convert_logical_expr_to_physical_expr(
+        let maybe_physical_parquet_filters = try_convert_logical_expr_to_physical_expr(
             maybe_rewritten_parquet_filters.as_ref(),
             self.query_compressed_schema.clone(),
         )?;
 
-        let maybe_physical_grid_filters = maybe_convert_logical_expr_to_physical_expr(
+        let maybe_physical_grid_filters = try_convert_logical_expr_to_physical_expr(
             maybe_rewritten_grid_filters.as_ref(),
             self.grid_schema.clone(),
         )?;
diff --git a/crates/modelardb_storage/src/query/normal_table.rs b/crates/modelardb_storage/src/query/normal_table.rs
index 191714a24..fdfd1b8be 100644
--- a/crates/modelardb_storage/src/query/normal_table.rs
+++ b/crates/modelardb_storage/src/query/normal_table.rs
@@ -34,7 +34,7 @@ use tonic::async_trait;
 
 /// A queryable representation of a normal table. [`NormalTable`] wraps the [`TableProvider`]
 /// [`DeltaTable`] and passes most methods calls directly to it. Thus, it can be registered with
-/// Apache Arrow DataFusion. [`DeltaTable`] is extended in two ways, `delta_table` is updated to the
+/// Apache DataFusion. [`DeltaTable`] is extended in two ways, `delta_table` is updated to the
 /// latest snapshot when accessed and support for inserting has been added.
 #[derive(Debug)]
 pub(crate) struct NormalTable {
diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs
index c2ff0ae88..0367f0c33 100644
--- a/crates/modelardb_storage/src/query/sorted_join_exec.rs
+++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs
@@ -13,8 +13,8 @@
  * limitations under the License.
  */
 
-//! Implementation of the Apache Arrow DataFusion execution plan [`SortedJoinExec`] and its
-//! corresponding stream [`SortedJoinStream`] which joins multiple sorted array produced by
+//! Implementation of the Apache DataFusion execution plan [`SortedJoinExec`] and its corresponding
+//! stream [`SortedJoinStream`] which joins multiple sorted array produced by
 //! [`GridExecs`](crate::query::grid_exec::GridExec) streams and combines them with the time series
 //! tags retrieved from the [`TableMetadataManager`](metadata::table_metadata_manager::TableMetadataManager)
 //! to create the complete results containing a timestamp column, one or more field columns, and zero
@@ -171,8 +171,8 @@ impl ExecutionPlan for SortedJoinExec {
     }
 
     /// Specify that [`SortedJoinStream`] requires one partition for each input as it assumes that
-    /// the sort order is the same for all inputs and Apache Arrow DataFusion only guarantees the
-    /// sort order within each partition rather than the inputs' global sort order.
+    /// the sort order is the same for all inputs and Apache DataFusion only guarantees the sort
+    /// order within each partition rather than the inputs' global sort order.
     fn required_input_distribution(&self) -> Vec<Distribution> {
         vec![Distribution::SinglePartition; self.inputs.len()]
     }

From a9eaeb844785664312086622f2b79621080d51b1 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 5 Mar 2025 17:44:51 +0100
Subject: [PATCH 68/69] Use Arc<Schema> instead of SchemaRef

---
 crates/modelardb_client/src/main.rs           |  6 +++---
 crates/modelardb_common/src/remote.rs         |  5 +++--
 crates/modelardb_server/src/context.rs        |  4 ++--
 crates/modelardb_server/src/remote.rs         |  3 +--
 crates/modelardb_storage/src/lib.rs           |  4 ++--
 .../src/query/generated_as_exec.rs            | 14 +++++++-------
 .../modelardb_storage/src/query/grid_exec.rs  | 11 +++++------
 .../src/query/metadata_table.rs               |  5 +++--
 .../src/query/model_table.rs                  | 19 +++++++------------
 .../src/query/normal_table.rs                 |  5 +++--
 .../src/query/sorted_join_exec.rs             | 14 +++++++-------
 crates/modelardb_types/src/types.rs           | 13 ++++++++-----
 12 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/crates/modelardb_client/src/main.rs b/crates/modelardb_client/src/main.rs
index fec58c286..7cd07166b 100644
--- a/crates/modelardb_client/src/main.rs
+++ b/crates/modelardb_client/src/main.rs
@@ -27,7 +27,7 @@ use std::sync::Arc;
 use std::time::Instant;
 
 use arrow::array::ArrayRef;
-use arrow::datatypes::{Schema, SchemaRef, ToByteSlice};
+use arrow::datatypes::{Schema, ToByteSlice};
 use arrow::ipc::convert;
 use arrow::util::pretty;
 use arrow_flight::flight_service_client::FlightServiceClient;
@@ -387,7 +387,7 @@ async fn execute_query_and_print_result(
 /// Returns [`ModelarDbClientError`] if the batches in the result set could not be printed.
 async fn print_batches_with_confirmation(
     mut stream: Streaming<FlightData>,
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
 ) -> Result<()> {
     let mut user_input = String::new();
@@ -424,7 +424,7 @@ async fn print_batches_with_confirmation(
 /// batches in the result set could not be printed.
 async fn print_batches_without_confirmation(
     mut stream: Streaming<FlightData>,
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
 ) -> Result<()> {
     while let Some(flight_data) = stream.message().await? {
diff --git a/crates/modelardb_common/src/remote.rs b/crates/modelardb_common/src/remote.rs
index 1e0450499..ab2a621e3 100644
--- a/crates/modelardb_common/src/remote.rs
+++ b/crates/modelardb_common/src/remote.rs
@@ -17,9 +17,10 @@
 
 use std::collections::HashMap;
 use std::error::Error;
+use std::sync::Arc;
 
 use arrow::array::ArrayRef;
-use arrow::datatypes::SchemaRef;
+use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
 use arrow_flight::{FlightData, FlightDescriptor, utils};
 use tonic::Status;
@@ -39,7 +40,7 @@ pub fn table_name_from_flight_descriptor(
 /// could not be converted, [`Status`] is returned.
 pub fn flight_data_to_record_batch(
     flight_data: &FlightData,
-    schema: &SchemaRef,
+    schema: &Arc<Schema>,
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
 ) -> Result<RecordBatch, Status> {
     debug_assert_eq!(flight_data.flight_descriptor, None);
diff --git a/crates/modelardb_server/src/context.rs b/crates/modelardb_server/src/context.rs
index 59c2f7b0d..3f4aec0ad 100644
--- a/crates/modelardb_server/src/context.rs
+++ b/crates/modelardb_server/src/context.rs
@@ -18,7 +18,7 @@
 
 use std::sync::Arc;
 
-use datafusion::arrow::datatypes::{Schema, SchemaRef};
+use datafusion::arrow::datatypes::Schema;
 use datafusion::catalog::SchemaProvider;
 use datafusion::prelude::SessionContext;
 use modelardb_storage::metadata::model_table_metadata::ModelTableMetadata;
@@ -391,7 +391,7 @@ impl Context {
     pub async fn schema_of_table_in_default_database_schema(
         &self,
         table_name: &str,
-    ) -> Result<SchemaRef> {
+    ) -> Result<Arc<Schema>> {
         let database_schema = self.default_database_schema()?;
 
         let table = database_schema.table(table_name).await?.ok_or_else(|| {
diff --git a/crates/modelardb_server/src/remote.rs b/crates/modelardb_server/src/remote.rs
index 33de90ad8..0d5cccfe4 100644
--- a/crates/modelardb_server/src/remote.rs
+++ b/crates/modelardb_server/src/remote.rs
@@ -32,7 +32,6 @@ use arrow_flight::{
     SchemaResult, Ticket, utils,
 };
 use datafusion::arrow::array::{ArrayRef, StringArray, UInt64Array};
-use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::DataFusionError;
@@ -273,7 +272,7 @@ impl FlightServiceHandler {
     async fn ingest_into_normal_table(
         &self,
         table_name: &str,
-        schema: &SchemaRef,
+        schema: &Arc<Schema>,
         flight_data_stream: &mut Streaming<FlightData>,
     ) -> StdResult<(), Status> {
         // Retrieve the data until the request does not contain any more data.
diff --git a/crates/modelardb_storage/src/lib.rs b/crates/modelardb_storage/src/lib.rs
index be4fb70ee..9a4779bcb 100644
--- a/crates/modelardb_storage/src/lib.rs
+++ b/crates/modelardb_storage/src/lib.rs
@@ -33,7 +33,7 @@ use arrow::array::{
 };
 use arrow::compute;
 use arrow::compute::concat_batches;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, Schema};
 use arrow::ipc::reader::StreamReader;
 use arrow::ipc::writer::{IpcWriteOptions, StreamWriter};
 use arrow_flight::{IpcMessage, SchemaAsIpc};
@@ -300,7 +300,7 @@ pub fn try_convert_record_batch_to_bytes(record_batch: &RecordBatch) -> Result<V
 /// otherwise [`ModelarDbStorageError`].
 pub fn try_convert_bytes_to_record_batch(
     record_batch_bytes: Vec<u8>,
-    schema: &SchemaRef,
+    schema: &Arc<Schema>,
 ) -> Result<RecordBatch> {
     let bytes: Bytes = record_batch_bytes.into();
     let reader = StreamReader::try_new(bytes.reader(), None)?;
diff --git a/crates/modelardb_storage/src/query/generated_as_exec.rs b/crates/modelardb_storage/src/query/generated_as_exec.rs
index c6c83d2a2..1ee6b4ad9 100644
--- a/crates/modelardb_storage/src/query/generated_as_exec.rs
+++ b/crates/modelardb_storage/src/query/generated_as_exec.rs
@@ -23,8 +23,8 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
 
+use arrow::datatypes::Schema;
 use datafusion::arrow::array::StringArray;
-use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::arrow::temporal_conversions;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
@@ -63,7 +63,7 @@ impl ColumnToGenerate {
 #[derive(Debug)]
 pub(super) struct GeneratedAsExec {
     /// Schema of the execution plan.
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Columns to generate and the index they should be at.
     columns_to_generate: Vec<ColumnToGenerate>,
     /// Execution plan to read batches of segments from.
@@ -76,7 +76,7 @@ pub(super) struct GeneratedAsExec {
 
 impl GeneratedAsExec {
     pub(super) fn new(
-        schema: SchemaRef,
+        schema: Arc<Schema>,
         columns_to_generate: Vec<ColumnToGenerate>,
         input: Arc<dyn ExecutionPlan>,
     ) -> Arc<Self> {
@@ -113,7 +113,7 @@ impl ExecutionPlan for GeneratedAsExec {
     }
 
     /// Return the schema of the plan.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.schema.clone()
     }
 
@@ -184,7 +184,7 @@ impl DisplayAs for GeneratedAsExec {
 /// adds them to the batch, and then returns the result.
 struct GeneratedAsStream {
     /// Schema of the stream.
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Columns to generate and the index they should be at.
     columns_to_generate: Vec<ColumnToGenerate>,
     /// Stream to read batches of rows from.
@@ -195,7 +195,7 @@ struct GeneratedAsStream {
 
 impl GeneratedAsStream {
     fn new(
-        schema: SchemaRef,
+        schema: Arc<Schema>,
         columns_to_generate: Vec<ColumnToGenerate>,
         input: SendableRecordBatchStream,
         baseline_metrics: BaselineMetrics,
@@ -321,7 +321,7 @@ impl Stream for GeneratedAsStream {
 
 impl RecordBatchStream for GeneratedAsStream {
     /// Return the schema of the stream.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.schema.clone()
     }
 }
diff --git a/crates/modelardb_storage/src/query/grid_exec.rs b/crates/modelardb_storage/src/query/grid_exec.rs
index 99cd43504..114c879ef 100644
--- a/crates/modelardb_storage/src/query/grid_exec.rs
+++ b/crates/modelardb_storage/src/query/grid_exec.rs
@@ -31,7 +31,6 @@ use datafusion::arrow::array::{
     Array, ArrayBuilder, ArrayRef, BinaryArray, Float32Array, UInt8Array,
 };
 use datafusion::arrow::compute::filter_record_batch;
-use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::common::cast::as_boolean_array;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
@@ -56,7 +55,7 @@ use modelardb_types::types::{TimestampArray, TimestampBuilder, ValueArray, Value
 #[derive(Debug, Clone)]
 pub(crate) struct GridExec {
     /// Schema of the execution plan.
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Predicate to filter data points by.
     maybe_predicate: Option<Arc<dyn PhysicalExpr>>,
     /// Number of data points requested by the query.
@@ -123,7 +122,7 @@ impl ExecutionPlan for GridExec {
     }
 
     /// Return the schema of the plan.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.schema.clone()
     }
 
@@ -218,7 +217,7 @@ impl DisplayAs for GridExec {
 /// points from the metadata and models in the segments, and returns batches of data points.
 struct GridStream {
     /// Schema of the stream.
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Predicate to filter data points by.
     maybe_predicate: Option<Arc<dyn PhysicalExpr>>,
     /// Stream to read batches of compressed segments from.
@@ -235,7 +234,7 @@ struct GridStream {
 
 impl GridStream {
     fn new(
-        schema: SchemaRef,
+        schema: Arc<Schema>,
         maybe_predicate: Option<Arc<dyn PhysicalExpr>>,
         limit: Option<usize>,
         input: SendableRecordBatchStream,
@@ -429,7 +428,7 @@ impl Stream for GridStream {
 
 impl RecordBatchStream for GridStream {
     /// Return the schema of the stream.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.schema.clone()
     }
 }
diff --git a/crates/modelardb_storage/src/query/metadata_table.rs b/crates/modelardb_storage/src/query/metadata_table.rs
index e8725b9a7..5a814070f 100644
--- a/crates/modelardb_storage/src/query/metadata_table.rs
+++ b/crates/modelardb_storage/src/query/metadata_table.rs
@@ -19,12 +19,13 @@
 
 use std::{any::Any, sync::Arc};
 
+use arrow::datatypes::Schema;
 use datafusion::catalog::Session;
 use datafusion::datasource::{TableProvider, TableType};
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::logical_expr::Expr;
 use datafusion::physical_plan::ExecutionPlan;
-use deltalake::{DeltaTable, arrow::datatypes::SchemaRef};
+use deltalake::DeltaTable;
 use tonic::async_trait;
 
 /// A queryable representation of a metadata table. [`MetadataTable`] wraps the [`TableProvider`] of
@@ -51,7 +52,7 @@ impl TableProvider for MetadataTable {
     }
 
     /// Return the query schema of the metadata table registered with Apache DataFusion.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         TableProvider::schema(&self.delta_table)
     }
 
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index c8c29c100..205928c23 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -26,9 +26,7 @@ use std::sync::Arc;
 use arrow::compute::SortOptions;
 use arrow::datatypes::DataType::Utf8;
 use async_trait::async_trait;
-use datafusion::arrow::datatypes::{
-    ArrowPrimitiveType, DataType, Field, Schema, SchemaRef, TimeUnit,
-};
+use datafusion::arrow::datatypes::{ArrowPrimitiveType, DataType, Field, Schema, TimeUnit};
 use datafusion::catalog::Session;
 use datafusion::common::{Statistics, ToDFSchema};
 use datafusion::datasource::listing::PartitionedFile;
@@ -255,10 +253,7 @@ fn query_order_and_requirement(
 /// to a filter that is written in terms of the schema used for compressed segments by the storage
 /// engine and a filter that is written in terms of the schema used for univariate time series by
 /// [`GridExec`] for its output. If the filters cannot be rewritten an empty [`None`] is returned.
-fn rewrite_and_combine_filters(
-    schema: &SchemaRef,
-    filters: &[Expr],
-) -> (Option<Expr>, Option<Expr>) {
+fn rewrite_and_combine_filters(schema: &Schema, filters: &[Expr]) -> (Option<Expr>, Option<Expr>) {
     let rewritten_filters = filters
         .iter()
         .filter_map(|filter| rewrite_filter(schema, filter));
@@ -279,7 +274,7 @@ fn rewrite_and_combine_filters(
 /// that is written in terms of the schema used for compressed segments by the storage engine and a
 /// filter that is written in terms of the schema used for univariate time series by [`GridExec`].
 /// If the filter cannot be rewritten, [`None`] is returned.
-fn rewrite_filter(query_schema: &SchemaRef, filter: &Expr) -> Option<(Expr, Expr)> {
+fn rewrite_filter(query_schema: &Schema, filter: &Expr) -> Option<(Expr, Expr)> {
     match filter {
         Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
             if let Expr::Column(column) = &**left {
@@ -372,7 +367,7 @@ fn new_binary_expr(left: Expr, op: Operator, right: Expr) -> Expr {
 /// Convert `maybe_expr` to a [`PhysicalExpr`] with the types in `query_schema` if possible.
 fn try_convert_logical_expr_to_physical_expr(
     maybe_expr: Option<&Expr>,
-    query_schema: SchemaRef,
+    query_schema: Arc<Schema>,
 ) -> DataFusionResult<Option<Arc<dyn PhysicalExpr>>> {
     // Option.map() is not used so errors can be returned with ?.
     if let Some(maybe_expr) = maybe_expr {
@@ -388,7 +383,7 @@ fn try_convert_logical_expr_to_physical_expr(
 /// Convert `expr` to a [`PhysicalExpr`] with the types in `query_schema`.
 fn convert_logical_expr_to_physical_expr(
     expr: &Expr,
-    query_schema: SchemaRef,
+    query_schema: Arc<Schema>,
 ) -> DataFusionResult<Arc<dyn PhysicalExpr>> {
     let df_query_schema = query_schema.clone().to_dfschema()?;
     planner::create_physical_expr(expr, &df_query_schema, &ExecutionProps::new())
@@ -402,7 +397,7 @@ fn new_apache_parquet_exec(
     partition_filters: &[PartitionFilter],
     maybe_limit: Option<usize>,
     maybe_parquet_filters: &Option<Arc<dyn PhysicalExpr>>,
-    file_schema: SchemaRef,
+    file_schema: Arc<Schema>,
     output_ordering: Vec<LexOrdering>,
 ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
     // Collect the LogicalFiles into a Vec so they can be sorted the same for all field columns.
@@ -486,7 +481,7 @@ impl TableProvider for ModelTable {
     }
 
     /// Return the query schema of the model table registered with Apache DataFusion.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.model_table_metadata.query_schema.clone()
     }
 
diff --git a/crates/modelardb_storage/src/query/normal_table.rs b/crates/modelardb_storage/src/query/normal_table.rs
index fdfd1b8be..31aa5d6cc 100644
--- a/crates/modelardb_storage/src/query/normal_table.rs
+++ b/crates/modelardb_storage/src/query/normal_table.rs
@@ -21,6 +21,7 @@
 use std::borrow::Cow;
 use std::{any::Any, sync::Arc};
 
+use arrow::datatypes::Schema;
 use datafusion::catalog::Session;
 use datafusion::common::Constraints;
 use datafusion::datasource::{TableProvider, TableType};
@@ -29,7 +30,7 @@ use datafusion::logical_expr::dml::InsertOp;
 use datafusion::logical_expr::{Expr, LogicalPlan, TableProviderFilterPushDown};
 use datafusion::physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion::physical_plan::{ExecutionPlan, Statistics};
-use deltalake::{DeltaTable, arrow::datatypes::SchemaRef};
+use deltalake::DeltaTable;
 use tonic::async_trait;
 
 /// A queryable representation of a normal table. [`NormalTable`] wraps the [`TableProvider`]
@@ -61,7 +62,7 @@ impl TableProvider for NormalTable {
     }
 
     /// Return the query schema of the normal table registered with Apache DataFusion.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         TableProvider::schema(&self.delta_table)
     }
 
diff --git a/crates/modelardb_storage/src/query/sorted_join_exec.rs b/crates/modelardb_storage/src/query/sorted_join_exec.rs
index 0367f0c33..11bc8e3fe 100644
--- a/crates/modelardb_storage/src/query/sorted_join_exec.rs
+++ b/crates/modelardb_storage/src/query/sorted_join_exec.rs
@@ -26,8 +26,8 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context as StdTaskContext, Poll};
 
+use arrow::datatypes::Schema;
 use datafusion::arrow::array::ArrayRef;
-use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::arrow::record_batch::RecordBatch;
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::context::TaskContext;
@@ -55,7 +55,7 @@ pub(crate) enum SortedJoinColumnType {
 #[derive(Debug)]
 pub(crate) struct SortedJoinExec {
     /// Schema of the execution plan.
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Order of columns to return.
     return_order: Vec<SortedJoinColumnType>,
     /// Execution plans to read batches of data points from.
@@ -70,7 +70,7 @@ pub(crate) struct SortedJoinExec {
 
 impl SortedJoinExec {
     pub(crate) fn new(
-        schema: SchemaRef,
+        schema: Arc<Schema>,
         return_order: Vec<SortedJoinColumnType>,
         inputs: Vec<Arc<dyn ExecutionPlan>>,
         query_requirement_data_point: LexRequirement,
@@ -108,7 +108,7 @@ impl ExecutionPlan for SortedJoinExec {
     }
 
     /// Return the schema of the plan.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.schema.clone()
     }
 
@@ -199,7 +199,7 @@ impl DisplayAs for SortedJoinExec {
 
 struct SortedJoinStream {
     /// Schema of the stream.
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Order of columns to return.
     return_order: Vec<SortedJoinColumnType>,
     /// Streams to read batches of data points from.
@@ -212,7 +212,7 @@ struct SortedJoinStream {
 
 impl SortedJoinStream {
     fn new(
-        schema: SchemaRef,
+        schema: Arc<Schema>,
         return_order: Vec<SortedJoinColumnType>,
         inputs: Vec<SendableRecordBatchStream>,
         baseline_metrics: BaselineMetrics,
@@ -334,7 +334,7 @@ impl Stream for SortedJoinStream {
 
 impl RecordBatchStream for SortedJoinStream {
     /// Return the schema of the stream.
-    fn schema(&self) -> SchemaRef {
+    fn schema(&self) -> Arc<Schema> {
         self.schema.clone()
     }
 }
diff --git a/crates/modelardb_types/src/types.rs b/crates/modelardb_types/src/types.rs
index 141676212..093a36031 100644
--- a/crates/modelardb_types/src/types.rs
+++ b/crates/modelardb_types/src/types.rs
@@ -21,6 +21,9 @@
 
 use std::fmt;
 use std::str::FromStr;
+use std::sync::Arc;
+
+use arrow::datatypes::Schema;
 
 use crate::error::{ModelarDbTypesError, Result};
 
@@ -42,19 +45,19 @@ pub type ValueArray = arrow::array::PrimitiveArray<ArrowValue>;
 
 // Types used for the schema of compressed data, the configuration, and table metadata.
 #[derive(Clone)]
-pub struct CompressedSchema(pub arrow::datatypes::SchemaRef);
+pub struct CompressedSchema(pub Arc<Schema>);
 
 #[derive(Clone)]
-pub struct QueryCompressedSchema(pub arrow::datatypes::SchemaRef);
+pub struct QueryCompressedSchema(pub Arc<Schema>);
 
 #[derive(Clone)]
-pub struct GridSchema(pub arrow::datatypes::SchemaRef);
+pub struct GridSchema(pub Arc<Schema>);
 
 #[derive(Clone)]
-pub struct ConfigurationSchema(pub arrow::datatypes::SchemaRef);
+pub struct ConfigurationSchema(pub Arc<Schema>);
 
 #[derive(Clone)]
-pub struct TableMetadataSchema(pub arrow::datatypes::SchemaRef);
+pub struct TableMetadataSchema(pub Arc<Schema>);
 
 /// Absolute or relative per-value error bound.
 #[derive(Debug, Copy, Clone, PartialEq)]

From 6bd5d893d9e21b2ec48e8e2a45c0a171dc313866 Mon Sep 17 00:00:00 2001
From: CGodiksen <36046286+CGodiksen@users.noreply.github.com>
Date: Wed, 5 Mar 2025 18:06:00 +0100
Subject: [PATCH 69/69] Update based on comments from @skejserjensen

---
 .../src/storage/uncompressed_data_buffer.rs                | 7 +++----
 crates/modelardb_storage/src/query/model_table.rs          | 5 +----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
index 4e60815b1..6fc614ff1 100644
--- a/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
+++ b/crates/modelardb_server/src/storage/uncompressed_data_buffer.rs
@@ -19,8 +19,8 @@
 //! support for storing uncompressed data points in Apache Parquet files on disk.
 
 use std::fmt::{Debug, Formatter, Result as FmtResult};
-use std::{iter, mem};
 use std::sync::Arc;
+use std::{iter, mem};
 
 use datafusion::arrow::array::{Array, ArrayBuilder, StringArray};
 use datafusion::arrow::compute;
@@ -173,9 +173,8 @@ impl UncompressedInMemoryDataBuffer {
             } else if self.model_table_metadata.is_tag(column_index) {
                 // The tag value is the same for each data point so it is not sorted.
                 let tag_value = self.tag_values[tag_column_index].clone();
-                let tag_array: StringArray = iter::repeat(Some(tag_value))
-                    .take(buffer_length)
-                    .collect();
+                let tag_array: StringArray =
+                    iter::repeat(Some(tag_value)).take(buffer_length).collect();
                 columns.push(Arc::new(tag_array));
 
                 tag_column_index += 1;
diff --git a/crates/modelardb_storage/src/query/model_table.rs b/crates/modelardb_storage/src/query/model_table.rs
index 205928c23..5cb60ab63 100644
--- a/crates/modelardb_storage/src/query/model_table.rs
+++ b/crates/modelardb_storage/src/query/model_table.rs
@@ -91,7 +91,7 @@ impl ModelTable {
         data_sink: Arc<dyn DataSink>,
     ) -> Arc<Self> {
         // Compute the index of the first stored field column in the model table's query schema. It
-        // is used for queries without fields as uids, timestamps, and values are stored together.
+        // is used for queries without fields as tags, timestamps, and values are stored together.
         let fallback_field_column = {
             model_table_metadata
                 .query_schema
@@ -407,7 +407,6 @@ fn new_apache_parquet_exec(
         .collect::<StdResult<Vec<LogicalFile>, DeltaTableError>>()
         .map_err(|error| DataFusionError::Plan(error.to_string()))?;
 
-    // TODO: prune the Apache Parquet files using metadata and maybe_parquet_filters if possible.
     logical_files.sort_by_key(|logical_file| logical_file.modification_time());
 
     // Create the data source operator. Assumes the ObjectStore exists.
@@ -416,8 +415,6 @@ fn new_apache_parquet_exec(
         .map(|logical_file| logical_file_to_partitioned_file(logical_file))
         .collect::<DataFusionResult<Vec<PartitionedFile>>>()?;
 
-    // TODO: give the optimizer more info for timestamps and values through statistics, e.g, min
-    // can be computed using only the metadata Delta Lake due to the aggregate_statistics rule.
     let log_store = delta_table.log_store();
     let file_scan_config = FileScanConfig {
         object_store_url: log_store.object_store_url(),