Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
879e0cb
Fixed outdated documentation
CGodiksen Feb 4, 2025
cc89194
Add table name to file path for spilled buffers
CGodiksen Feb 11, 2025
b00ebd6
Use the table name in the spilled buffer file path when initializing
CGodiksen Feb 11, 2025
a78eae8
Remove model_table_hash_table_name from metadata Delta Lake
CGodiksen Feb 11, 2025
c5baba5
Remove limitation of 1024 on number of field columns
CGodiksen Feb 12, 2025
999576d
No longer save tag metadata when inserting data points
CGodiksen Feb 17, 2025
6794ffc
Use a new function to calculate tag hash outside table metadata manager
CGodiksen Feb 17, 2025
cd5e9bc
Remove methods to lookup and save tag hash metadata
CGodiksen Feb 17, 2025
c4d0c90
Remove tag cache from table metadata manager
CGodiksen Feb 17, 2025
92831e3
Remove mapping_from_hash_to_tags()
CGodiksen Feb 17, 2025
13e3ad3
Remove model_table_tags table from metadata Delta Lake
CGodiksen Feb 17, 2025
ec961f6
Remove method to truncate table metadata
CGodiksen Feb 17, 2025
e7e1aed
Remove separate schema for uncompressed data
CGodiksen Feb 18, 2025
be01035
Include tag values in uncompressed data buffer data
CGodiksen Feb 18, 2025
9ada71e
Add a test method to get uncompressed data for a model table
CGodiksen Feb 18, 2025
91723dc
Add method to get column arrays from model table metadata
CGodiksen Feb 18, 2025
af7b440
Use method to get column arrays instead of doing it manually
CGodiksen Feb 18, 2025
84c08a1
Fix tests after changes to uncompressed data buffers
CGodiksen Feb 19, 2025
d2e6f1d
Pass tag values and field column index to try_compress() instead of u…
CGodiksen Feb 19, 2025
44dd8b5
Remove UNCOMPRESSED_SCHEMA
CGodiksen Feb 19, 2025
5b95831
Remove univaraite_ids from macros
CGodiksen Feb 19, 2025
2bf2739
Remove methods to convert univariate ids between int64 and uint64
CGodiksen Feb 19, 2025
0040f10
Remove DISK schemas
CGodiksen Feb 19, 2025
a7207d3
Add compressed schema to model table metadata
CGodiksen Feb 20, 2025
68c9bdb
Update compression to use tag values instead of univariate id
CGodiksen Feb 20, 2025
f125d0e
Fix calls to try_compress() in tests
CGodiksen Feb 20, 2025
a1d3e1a
Use compressed schema with tag column in test util function
CGodiksen Feb 20, 2025
ff30a73
Use model table compressed schema in compressed data buffer
CGodiksen Feb 20, 2025
930a109
Sort compressed segment files by tag columns instead of univariate id
CGodiksen Feb 20, 2025
e066424
Use compressed schema with tag columns when creating model tables in …
CGodiksen Feb 20, 2025
78a914b
Fix unit tests after changes to compressed segment schema
CGodiksen Feb 20, 2025
e266885
Add temporary fix to grid since tag metadata is no longer available
CGodiksen Feb 21, 2025
e014877
Reformat, fix clippy errors and remove unused dependencies
CGodiksen Feb 22, 2025
dee061d
Remove table metadata manager from ModelTable struct
CGodiksen Feb 22, 2025
5652619
Fix comments and remove unused variable
CGodiksen Feb 23, 2025
172adb4
Remove utility functions to convert univariate id to tag hash and col…
CGodiksen Feb 23, 2025
703d4e0
Remove hash_to_tags from SortedJoinExec
CGodiksen Feb 23, 2025
d5f21df
Update indices for accessing compressed segment arrays
CGodiksen Feb 24, 2025
028c6ef
Add query compressed schema to ModelTable
CGodiksen Feb 25, 2025
8c9b2db
Add query order segment to model table
CGodiksen Feb 25, 2025
f2d87db
Add query requirement segment to ModelTable
CGodiksen Feb 25, 2025
19d0282
Add util method to get query order and requirement for a schema
CGodiksen Feb 25, 2025
3175d88
Use util method to get segment query order and requirement
CGodiksen Feb 25, 2025
a4c07e6
Remove univariate_id from GRID_SCHEMA
CGodiksen Feb 25, 2025
1236ebd
Add grid schema, query order data point, and query requirement data p…
CGodiksen Feb 25, 2025
fe8bd38
Pass model table query compressed schema and output ordering when cre…
CGodiksen Feb 25, 2025
ccf58dc
Use model table specific query requirement segment and query order da…
CGodiksen Feb 25, 2025
5f8dc50
Pass query requirement data point to SortedJoinExec
CGodiksen Feb 25, 2025
3adf091
Remove global sort orders and sort requirements
CGodiksen Feb 25, 2025
0bdd384
Remove univariate_id from pmc_mean::grid()
CGodiksen Feb 26, 2025
1de2c03
Remove univariate_id from swing::grid()
CGodiksen Feb 26, 2025
900f6c3
Remove univariate_id from gorilla::grid()
CGodiksen Feb 26, 2025
17ff1b8
Remove univariate_id from modelardb_compression::grid()
CGodiksen Feb 26, 2025
b734a27
Remove univariate id from GridExec and types
CGodiksen Feb 26, 2025
0a388dd
Reconstruct tag columns in GridExec
CGodiksen Feb 26, 2025
da3c9d3
No longer use tag_column_indices when checking for tag columns in pro…
CGodiksen Feb 26, 2025
27a1cfd
Use tag columns in data points in sorted_join()
CGodiksen Feb 26, 2025
62e04dc
Reformat and fixed doc and clippy issues
CGodiksen Feb 26, 2025
d4cb178
Fix bug causing INSERT INTO to fail due to schema mismatch
CGodiksen Feb 27, 2025
e74acb2
Reformat with Rustfmt
CGodiksen Feb 27, 2025
950a9a5
Change QuerySchema to GridSchema to match schema name
CGodiksen Feb 27, 2025
c777e3d
Fix cargo doc issue
CGodiksen Feb 27, 2025
118c6ce
Update based on comments from @chrthomsen
CGodiksen Feb 28, 2025
548b0ae
Change order of arguments in try_compress()
CGodiksen Mar 5, 2025
3d30ede
Update method for calculating tag hash
CGodiksen Mar 5, 2025
0328922
Add limitation on number of model table fields back
CGodiksen Mar 5, 2025
26afb72
Rename Apache Arrow DataFusion to Apache DataFusion
CGodiksen Mar 5, 2025
a9eaeb8
Use Arc<Schema> instead of SchemaRef
CGodiksen Mar 5, 2025
6bd5d89
Update based on comments from @skejserjensen
CGodiksen Mar 5, 2025
1998f6e
Merge branch 'main' into dev/tags-in-data
CGodiksen Mar 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ModelarDB is designed to be cross-platform and is currently automatically tested
through [GitHub Actions](https://github.com/ModelarData/ModelarDB-RS/actions). It is also known to work on FreeBSD which
is [currently not supported by GitHub Actions](https://github.com/actions/runner/issues/385). It is implemented in
[Rust](https://www.rust-lang.org/) and uses [Apache Arrow Flight](https://github.com/apache/arrow-rs/tree/master/arrow-flight)
for communicating with clients, [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion) as its query
for communicating with clients, [Apache DataFusion](https://github.com/apache/datafusion) as its query
engine, [Apache Arrow](https://github.com/apache/arrow-rs) as its in-memory data format, and
[Apache Parquet](https://github.com/apache/arrow-rs/tree/master/parquet) as its on-disk data format.

Expand Down
2 changes: 1 addition & 1 deletion crates/modelardb_client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use std::result::Result as StdResult;
use arrow::error::ArrowError;
use object_store::Error as ObjectStoreError;
use rustyline::error::ReadlineError as RustyLineError;
use tonic::transport::Error as TonicTransportError;
use tonic::Status as TonicStatusError;
use tonic::transport::Error as TonicTransportError;

/// Result type used throughout `modelardb_client`.
pub type Result<T> = StdResult<T, ModelarDbClientError>;
Expand Down
4 changes: 2 additions & 2 deletions crates/modelardb_client/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

use std::result::Result;

use rustyline::Context;
use rustyline::Helper;
use rustyline::completion::{self, Completer};
use rustyline::error::ReadlineError;
use rustyline::highlight::Highlighter;
use rustyline::hint::Hinter;
use rustyline::validate::Validator;
use rustyline::Context;
use rustyline::Helper;

/// Provides tab-completion for the client's read-eval-print loop.
pub struct ClientHelper {
Expand Down
10 changes: 5 additions & 5 deletions crates/modelardb_client/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ use std::sync::Arc;
use std::time::Instant;

use arrow::array::ArrayRef;
use arrow::datatypes::{Schema, SchemaRef, ToByteSlice};
use arrow::datatypes::{Schema, ToByteSlice};
use arrow::ipc::convert;
use arrow::util::pretty;
use arrow_flight::flight_service_client::FlightServiceClient;
use arrow_flight::{utils, Action, Criteria, FlightData, FlightDescriptor, Ticket};
use arrow_flight::{Action, Criteria, FlightData, FlightDescriptor, Ticket, utils};
use bytes::Bytes;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::{ObjectMeta, ObjectStore};
use rustyline::history::FileHistory;
use rustyline::Editor;
use rustyline::history::FileHistory;
use tonic::transport::Channel;
use tonic::{Request, Streaming};

Expand Down Expand Up @@ -387,7 +387,7 @@ async fn execute_query_and_print_result(
/// Returns [`ModelarDbClientError`] if the batches in the result set could not be printed.
async fn print_batches_with_confirmation(
mut stream: Streaming<FlightData>,
schema: SchemaRef,
schema: Arc<Schema>,
dictionaries_by_id: &HashMap<i64, ArrayRef>,
) -> Result<()> {
let mut user_input = String::new();
Expand Down Expand Up @@ -424,7 +424,7 @@ async fn print_batches_with_confirmation(
/// batches in the result set could not be printed.
async fn print_batches_without_confirmation(
mut stream: Streaming<FlightData>,
schema: SchemaRef,
schema: Arc<Schema>,
dictionaries_by_id: &HashMap<i64, ArrayRef>,
) -> Result<()> {
while let Some(flight_data) = stream.message().await? {
Expand Down
7 changes: 4 additions & 3 deletions crates/modelardb_common/src/remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@

use std::collections::HashMap;
use std::error::Error;
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::datatypes::SchemaRef;
use arrow::datatypes::Schema;
use arrow::record_batch::RecordBatch;
use arrow_flight::{utils, FlightData, FlightDescriptor};
use arrow_flight::{FlightData, FlightDescriptor, utils};
use tonic::Status;

/// Return the table stored as the first element in [`FlightDescriptor.path`], otherwise a
Expand All @@ -39,7 +40,7 @@ pub fn table_name_from_flight_descriptor(
/// could not be converted, [`Status`] is returned.
pub fn flight_data_to_record_batch(
flight_data: &FlightData,
schema: &SchemaRef,
schema: &Arc<Schema>,
dictionaries_by_id: &HashMap<i64, ArrayRef>,
) -> Result<RecordBatch, Status> {
debug_assert_eq!(flight_data.flight_descriptor, None);
Expand Down
2 changes: 1 addition & 1 deletion crates/modelardb_common/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub const INGESTED_BUFFER_SIZE: usize = 1438392;
pub const UNCOMPRESSED_BUFFER_SIZE: usize = 1048576;

/// Expected size of the compressed segments produced in the tests.
pub const COMPRESSED_SEGMENTS_SIZE: usize = 1437;
pub const COMPRESSED_SEGMENTS_SIZE: usize = 1565;

/// Number of bytes reserved for ingested data in tests.
pub const INGESTED_RESERVED_MEMORY_IN_BYTES: usize = 5 * 1024 * 1024; // 5 MiB
Expand Down
Loading