diff --git a/protos/table.proto b/protos/table.proto index d8f637a5d04..4a2606e7017 100644 --- a/protos/table.proto +++ b/protos/table.proto @@ -58,6 +58,13 @@ message Manifest { // that the library is semantically versioned, this is a string. However, if it // is semantically versioned, it should be a valid semver string without any 'v' // prefix. For example: `2.0.0`, `2.0.0-rc.1`. + // + // Compatibility note: The Lance Rust library in versions <=0.38.3 would + // panic if it encountered a non-semver version string here or a version string + // with anything more than major, minor, patch version. For example, a "1.2.3-beta" + // version string would cause a panic. For compatibility with older datasets, the + // Lance Rust library always writes version as "X.Y.Z" without any pre-release or build + // metadata. string version = 2; } diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py index a9b3ec8fbee..5e57dac5c71 100644 --- a/python/python/tests/forward_compat/test_compat.py +++ b/python/python/tests/forward_compat/test_compat.py @@ -6,6 +6,8 @@ # This file will be run on older versions of Lance to test that the # current version of Lance can read the test data generated by datagen.py. +import shutil + import lance import pyarrow as pa import pyarrow.compute as pc @@ -116,3 +118,52 @@ def test_list_indices_ignores_new_fts_index_version(): indices = ds.list_indices() # the new index version should be ignored assert len(indices) == 0 + + +@pytest.mark.forward +@pytest.mark.skipif( + Version(lance.__version__) < Version("0.20.0"), + reason="Version is too old to read index files stored with Lance 2.0 file format", +) +def test_write_scalar_index(tmp_path: str): + path = get_path("scalar_index") + # copy to tmp path to avoid modifying original + shutil.copytree(path, tmp_path, dirs_exist_ok=True) + + ds = lance.dataset(tmp_path) + data = pa.table( + { + "idx": pa.array([1000]), + "btree": pa.array([1000]), + "bitmap": pa.array([1000]), + "label_list": pa.array([["label1000"]]), + "ngram": pa.array(["word1000"]), + "zonemap": pa.array([1000]), + "bloomfilter": pa.array([1000]), + } + ) + ds.insert(data) + # ds.optimize.optimize_indices() + ds.optimize.compact_files() + + +@pytest.mark.forward +@pytest.mark.skipif( + Version(lance.__version__) < Version("0.36.0"), + reason="FTS token set format was introduced in 0.36.0", +) +def test_write_fts(tmp_path: str): + path = get_path("fts_index") + # copy to tmp path to avoid modifying original + shutil.copytree(path, tmp_path, dirs_exist_ok=True) + + ds = lance.dataset(tmp_path) + data = pa.table( + { + "idx": pa.array([1000]), + "text": pa.array(["new document to index"]), + } + ) + ds.insert(data) + # ds.optimize.optimize_indices() + ds.optimize.compact_files() diff --git a/rust/lance-table/src/format/manifest.rs b/rust/lance-table/src/format/manifest.rs index 11512065c7c..3cc07d7bff7 100644 --- a/rust/lance-table/src/format/manifest.rs +++ b/rust/lance-table/src/format/manifest.rs @@ -759,9 +759,11 @@ impl WriterVersion { impl Default for WriterVersion { #[cfg(not(test))] fn default() -> Self { + let mut version = semver::Version::parse(env!("CARGO_PKG_VERSION")).unwrap(); + version.pre = semver::Prerelease::EMPTY; Self { library: "lance".to_string(), - version: env!("CARGO_PKG_VERSION").to_string(), + version: version.to_string(), } }