Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions rust/lance-table/src/format/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ pub struct DataFile {
///
/// Note that -1 is a possibility and it indices that the field has
/// no top-level column in the file.
///
/// Columns that lack a field id may still exist as extra entries in
/// `column_indices`; such columns are ignored by field-id–based projection.
Comment thread
Xuanwo marked this conversation as resolved.
/// For example, some fields, such as blob fields, occupy multiple
/// columns in the file but only have a single field id.
#[serde(default)]
pub column_indices: Vec<i32>,
/// The major version of the file format used to write this file.
Expand Down Expand Up @@ -139,10 +144,12 @@ impl DataFile {
location!(),
));
}
} else if self.fields.len() != self.column_indices.len() {
} else if self.column_indices.len() < self.fields.len() {
// Every recorded field id must have a column index, but not every column needs
// to be associated with a field id (extra columns are allowed).
return Err(Error::corrupt_file(
base_path.child(self.path.clone()),
"contained an unequal number of fields / column_indices",
"contained fewer column_indices than fields",
location!(),
));
}
Expand Down Expand Up @@ -531,6 +538,7 @@ mod tests {
use arrow_schema::{
DataType, Field as ArrowField, Fields as ArrowFields, Schema as ArrowSchema,
};
use object_store::path::Path;
use serde_json::{json, Value};

#[test]
Expand Down Expand Up @@ -618,4 +626,23 @@ mod tests {
let frag2 = Fragment::from_json(&json).unwrap();
assert_eq!(fragment, frag2);
}

#[test]
fn data_file_validate_allows_extra_columns() {
let data_file = DataFile {
path: "foo.lance".to_string(),
fields: vec![1, 2],
// One extra column without a field id mapping
column_indices: vec![0, 1, 2],
file_major_version: MAJOR_VERSION as u32,
file_minor_version: MINOR_VERSION as u32,
file_size_bytes: Default::default(),
base_id: None,
};

let base_path = Path::from("base");
data_file
.validate(&base_path)
.expect("validation should allow extra columns without field ids");
}
}
2 changes: 0 additions & 2 deletions rust/lance/src/dataset/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,8 +1194,6 @@ impl FileFragment {
/// Verifies:
/// * All field ids in the fragment are distinct
/// * Within each data file, field ids are in increasing order
/// * All fields in the schema have a corresponding field in one of the data
/// files
/// * All data files exist and have the same length
/// * Field ids are distinct between data files.
/// * Deletion file exists and has rowids in the correct range
Expand Down
Loading