Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 1 addition & 24 deletions java/lance-jni/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use arrow_schema::{TimeUnit, UnionFields};
use jni::objects::{JObject, JValue};
use jni::sys::{jboolean, jint};
use jni::JNIEnv;
use lance_core::datatypes::{Field, Schema, StorageClass};
use lance_core::datatypes::{Field, Schema};

impl IntoJava for Schema {
fn into_java<'local>(self, env: &mut JNIEnv<'local>) -> Result<JObject<'local>> {
Expand Down Expand Up @@ -40,11 +40,8 @@ pub fn convert_to_java_field<'local>(
let children = convert_children_fields(env, lance_field)?;
let metadata = to_java_map(env, &lance_field.metadata)?;
let arrow_type = convert_arrow_type(env, &lance_field.data_type())?;
let storage_type = convert_storage_type(env, &lance_field.storage_class)?;

let ctor_sig = "(IILjava/lang/String;".to_owned()
+ "ZLorg/apache/arrow/vector/types/pojo/ArrowType;"
+ "Lcom/lancedb/lance/schema/StorageType;"
+ "Lorg/apache/arrow/vector/types/pojo/DictionaryEncoding;"
+ "Ljava/util/Map;"
+ "Ljava/util/List;Z)V";
Expand All @@ -57,7 +54,6 @@ pub fn convert_to_java_field<'local>(
JValue::Object(&JObject::from(name)),
JValue::Bool(lance_field.nullable as jboolean),
JValue::Object(&arrow_type),
JValue::Object(&storage_type),
JValue::Object(&JObject::null()),
JValue::Object(&metadata),
JValue::Object(&children),
Expand All @@ -68,25 +64,6 @@ pub fn convert_to_java_field<'local>(
Ok(field_obj)
}

fn convert_storage_type<'local>(
env: &mut JNIEnv<'local>,
storage_class: &StorageClass,
) -> Result<JObject<'local>> {
let jname = match storage_class {
StorageClass::Blob => env.new_string("BLOB")?,
_ => env.new_string("DEFAULT")?,
};

Ok(env
.call_static_method(
"com/lancedb/lance/schema/StorageType",
"valueOf",
"(Ljava/lang/String;)Lcom/lancedb/lance/schema/StorageType;",
&[JValue::Object(&JObject::from(jname))],
)?
.l()?)
}

fn convert_children_fields<'local>(
env: &mut JNIEnv<'local>,
lance_field: &Field,
Expand Down
12 changes: 2 additions & 10 deletions java/lance-jni/src/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,18 +397,16 @@ fn convert_to_java_transaction<'local>(
Some(properties) => to_java_map(env, &properties)?,
_ => JObject::null(),
};
let operation = convert_to_java_operation_inner(env, transaction.operation)?;
let blobs_op = convert_to_java_operation(env, transaction.blobs_op)?;
let operation = convert_to_java_operation(env, Some(transaction.operation))?;

let java_transaction = env.new_object(
"com/lancedb/lance/Transaction",
"(Lcom/lancedb/lance/Dataset;JLjava/lang/String;Lcom/lancedb/lance/operation/Operation;Lcom/lancedb/lance/operation/Operation;Ljava/util/Map;Ljava/util/Map;)V",
"(Lcom/lancedb/lance/Dataset;JLjava/lang/String;Lcom/lancedb/lance/operation/Operation;Ljava/util/Map;Ljava/util/Map;)V",
&[
JValue::Object(java_dataset),
JValue::Long(transaction.read_version as i64),
JValue::Object(&uuid),
JValue::Object(&operation),
JValue::Object(&blobs_op),
JValue::Object(&JObject::null()),
JValue::Object(&transaction_properties),
],
Expand Down Expand Up @@ -707,11 +705,6 @@ fn convert_to_rust_transaction(
.l()?;
let op = convert_to_rust_operation(env, &op, java_dataset)?;

let blobs_op =
env.get_optional_from_method(&java_transaction, "blobsOperation", |env, blobs_op| {
convert_to_rust_operation(env, &blobs_op, java_dataset)
})?;

let transaction_properties = env.get_optional_from_method(
&java_transaction,
"transactionProperties",
Expand All @@ -722,7 +715,6 @@ fn convert_to_rust_transaction(
)?;
Ok(TransactionBuilder::new(read_ver, op)
.uuid(uuid)
.blobs_op(blobs_op)
.transaction_properties(transaction_properties.map(Arc::new))
.build())
}
Expand Down
18 changes: 1 addition & 17 deletions java/src/main/java/com/lancedb/lance/Transaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,18 @@ public class Transaction {
// Mainly for JNI usage
private final Dataset dataset;
private final Operation operation;
private final Optional<Operation> blobOp;

private Transaction(
Dataset dataset,
long readVersion,
String uuid,
Operation operation,
Operation blobOp,
Map<String, String> writeParams,
Map<String, String> transactionProperties) {
this.dataset = dataset;
this.readVersion = readVersion;
this.uuid = uuid;
this.operation = operation;
this.blobOp = Optional.ofNullable(blobOp);
this.writeParams = writeParams != null ? writeParams : new HashMap<>();
this.transactionProperties = Optional.ofNullable(transactionProperties);
}
Expand All @@ -68,10 +65,6 @@ public Operation operation() {
return operation;
}

public Optional<Operation> blobsOperation() {
return blobOp;
}

public Map<String, String> writeParams() {
return writeParams;
}
Expand All @@ -89,7 +82,6 @@ public Dataset commit() {

public void release() {
operation.release();
blobOp.ifPresent(Operation::release);
}

@Override
Expand All @@ -99,7 +91,6 @@ public String toString() {
.add("uuid", uuid)
.add("operation", operation)
.add("writeParams", writeParams)
.add("blobOp", blobOp)
.add("transactionProperties", transactionProperties)
.toString();
}
Expand All @@ -116,7 +107,6 @@ public boolean equals(Object o) {
return readVersion == that.readVersion
&& uuid.equals(that.uuid)
&& Objects.equals(operation, that.operation)
&& Objects.equals(blobOp, that.blobOp)
&& Objects.equals(writeParams, that.writeParams)
&& Objects.equals(transactionProperties, that.transactionProperties);
}
Expand All @@ -126,7 +116,6 @@ public static class Builder {
private final Dataset dataset;
private long readVersion;
private Operation operation;
private Operation blobOp;
private Map<String, String> writeParams;
private Map<String, String> transactionProperties;

Expand Down Expand Up @@ -156,11 +145,6 @@ public Builder operation(Operation operation) {
return this;
}

public Builder blobsOperation(Operation blobOp) {
this.blobOp = blobOp;
return this;
}

private void validateState() {
if (operation != null) {
throw new IllegalStateException(
Expand All @@ -171,7 +155,7 @@ private void validateState() {
public Transaction build() {
Preconditions.checkState(operation != null, "TransactionBuilder has no operations");
return new Transaction(
dataset, readVersion, uuid, operation, blobOp, writeParams, transactionProperties);
dataset, readVersion, uuid, operation, writeParams, transactionProperties);
}
}
}
8 changes: 0 additions & 8 deletions java/src/main/java/com/lancedb/lance/schema/LanceField.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ public class LanceField {
private final String name;
private final boolean nullable;
private final ArrowType type;
private final StorageType storageType;
private final DictionaryEncoding dictionaryEncoding;
private final Map<String, String> metadata;
private final List<LanceField> children;
Expand All @@ -42,7 +41,6 @@ public class LanceField {
String name,
boolean nullable,
ArrowType type,
StorageType storageType,
DictionaryEncoding dictionaryEncoding,
Map<String, String> metadata,
List<LanceField> children,
Expand All @@ -52,7 +50,6 @@ public class LanceField {
this.name = name;
this.nullable = nullable;
this.type = type;
this.storageType = storageType;
this.dictionaryEncoding = dictionaryEncoding;
this.metadata = metadata;
this.children = children;
Expand All @@ -79,10 +76,6 @@ public ArrowType getType() {
return type;
}

public StorageType getStorageType() {
return storageType;
}

public Optional<DictionaryEncoding> getDictionaryEncoding() {
return Optional.ofNullable(dictionaryEncoding);
}
Expand Down Expand Up @@ -114,7 +107,6 @@ public String toString() {
.add("name", name)
.add("nullable", nullable)
.add("type", type)
.add("storageType", storageType)
.add("dictionaryEncoding", dictionaryEncoding)
.add("children", children)
.add("isUnenforcedPrimaryKey", isUnenforcedPrimaryKey)
Expand Down
19 changes: 0 additions & 19 deletions java/src/main/java/com/lancedb/lance/schema/StorageType.java

This file was deleted.

7 changes: 2 additions & 5 deletions java/src/test/java/com/lancedb/lance/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -628,11 +628,8 @@ public static final class BlobTestDataset {
/**
* Build the Arrow schema with a filter column and a blob column marked as blob storage.
*
* <p>Columns: - filterer: Int64 (not nullable) - blobs: Binary (nullable) with metadata
* {"lance-schema:storage-class":"blob"}
*
* <p>Note: ArrowType.LargeBinary may not be available in our Arrow Java version; Binary is
* sufficient for tests and aligns with Lance blob storage when annotated via metadata.
* <p>Columns: - filterer: Int64 (not nullable) - blobs: LargeBinary (nullable) annotated with
* metadata {"lance-encoding:blob":"true"}
*/
public Schema getSchema() {
Map<String, String> blobMeta = Maps.newHashMap();
Expand Down
49 changes: 22 additions & 27 deletions protos/file.proto
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ message FileDescriptor {

// A schema which describes the data type of each of the columns
message Schema {
// All fields in this file, including the nested fields.
repeated lance.file.Field fields = 1;
// Schema metadata.
map<string, bytes> metadata = 5;
// All fields in this file, including the nested fields.
repeated lance.file.Field fields = 1;
// Schema metadata.
map<string, bytes> metadata = 5;
}

// Metadata of one Lance file.
message Metadata {
// 4 was used for StatisticsMetadata in the past, but has been moved to prevent
// a bug in older readers.
// 4 was used for StatisticsMetadata in the past, but has been moved to
// prevent a bug in older readers.
reserved 4;

// Position of the manifest in the file. If it is zero, the manifest is stored
Expand All @@ -44,7 +44,7 @@ message Metadata {
// contiguously stored.
//
// Every field that is a part of the file will have a run in the page table.
// This includes struct columns, which will have a run of length 0 since
// This includes struct columns, which will have a run of length 0 since
// they don't store any actual data.
//
// For example, for the column 5 and batch 4, we have:
Expand All @@ -57,7 +57,7 @@ message Metadata {
message StatisticsMetadata {
// The schema of the statistics.
//
// This might be empty, meaning there are no statistics. It also might not
// This might be empty, meaning there are no statistics. It also might not
// contain statistics for every field.
repeated Field schema = 1;

Expand All @@ -70,20 +70,20 @@ message Metadata {

// The file position of the statistics page table
//
// The page table is a matrix of N x 2, where N = length of stats_fields. This is
// the same layout as the main page table, except there is always only one
// batch.
// The page table is a matrix of N x 2, where N = length of stats_fields.
// This is the same layout as the main page table, except there is always
// only one batch.
//
// For example, to get the stats column 5, we have:
// ```text
// position = stats_page_table[5][0];
// length = stats_page_table[5][1];
// ```
uint64 page_table_position = 3;
uint64 page_table_position = 3;
}

StatisticsMetadata statistics = 5;
} // Metadata
} // Metadata

// Supported encodings.
enum Encoding {
Expand Down Expand Up @@ -154,7 +154,8 @@ message Field {
// * "date32:day"
// * "date64:ms"
// * "decimal:128:{precision}:{scale}" / "decimal:256:{precision}:{scale}"
// * "time:{unit}" / "timestamp:{unit}" / "duration:{unit}", where unit is "s", "ms", "us", "ns"
// * "time:{unit}" / "timestamp:{unit}" / "duration:{unit}", where unit is
// "s", "ms", "us", "ns"
// * "dict:{value_type}:{index_type}:false"
string logical_type = 5;
// If this field is nullable.
Expand All @@ -168,24 +169,18 @@ message Field {
/// The logic type presents the value type of the column, i.e., string value.
Dictionary dictionary = 8;

// Deprecated: optional extension type name, use metadata field ARROW:extension:name
// Deprecated: optional extension type name, use metadata field
// ARROW:extension:name
string extension_name = 9;

// optional field metadata (e.g. extension type name/parameters)
map<string, bytes> metadata = 10;

/// The storage class of the field
///
/// This determines the rate at which the field is compacted.
///
/// Currently, there are only two storage classes:
///
/// "" - The default storage class.
/// "blob" - The field is compacted into fewer rows per fragment.
///
/// Fields that have non-default storage classes are stored in different
/// datasets (e.g. blob fields are stored in the nested "_blobs" dataset)
string storage_class = 11;
// Field number 11 was previously `string storage_class`.
// Keep it reserved so older manifests remain compatible while new writers
// avoid reusing the slot.
reserved 11;
reserved "storage_class";
Comment thread
Xuanwo marked this conversation as resolved.

bool unenforced_primary_key = 12;
}
9 changes: 3 additions & 6 deletions protos/table.proto
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,9 @@ message Manifest {
// data itself and is attached to the output schema of scans.
map<string, string> table_metadata = 19;

// The version of the blob dataset associated with this table. Changes to
// blob fields will modify the blob dataset and update this version in the parent
// table.
//
// If this value is 0 then there are no blob fields.
uint64 blob_dataset_version = 17;
// Field number 17 (`blob_dataset_version`) was used for a secondary blob dataset.
reserved 17;
reserved "blob_dataset_version";

// The base paths of data files.
//
Expand Down
8 changes: 3 additions & 5 deletions protos/transaction.proto
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,7 @@ message Transaction {
UpdateBases update_bases = 114;
}

// An operation to apply to the blob dataset
oneof blob_operation {
Append blob_append = 200;
Overwrite blob_overwrite = 202;
}
// Fields 200/202 (`blob_append` / `blob_overwrite`) previously represented blob dataset ops.
reserved 200, 202;
reserved "blob_append", "blob_overwrite";
}
Loading
Loading