From a3cfd67bdbf461ecba8deea86a08938497b40abc Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 18:37:26 -0700 Subject: [PATCH 1/6] failing test --- .../tests/forward_compat/test_compat.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py index a9b3ec8fbee..44a7a4880db 100644 --- a/python/python/tests/forward_compat/test_compat.py +++ b/python/python/tests/forward_compat/test_compat.py @@ -6,6 +6,8 @@ # This file will be run on older versions of Lance to test that the # current version of Lance can read the test data generated by datagen.py. +import shutil + import lance import pyarrow as pa import pyarrow.compute as pc @@ -116,3 +118,42 @@ def test_list_indices_ignores_new_fts_index_version(): indices = ds.list_indices() # the new index version should be ignored assert len(indices) == 0 + + +def test_write_scalar_index(tmp_path: str): + path = get_path("scalar_index") + # copy to tmp path to avoid modifying original + shutil.copytree(path, tmp_path) + + ds = lance.dataset(tmp_path) + data = pa.table( + { + "idx": pa.array([1000]), + "btree": pa.array([1000]), + "bitmap": pa.array([1000]), + "label_list": pa.array([["label1000"]]), + "ngram": pa.array(["word1000"]), + "zonemap": pa.array([1000]), + "bloomfilter": pa.array([1000]), + } + ) + ds.insert(data) + ds.optimize.optimize_indices() + ds.optimize.compact_files() + + +def test_write_fts(tmp_path: str): + path = get_path("fts_index") + # copy to tmp path to avoid modifying original + shutil.copytree(path, tmp_path) + + ds = lance.dataset(tmp_path) + data = pa.table( + { + "id": pa.array([1000]), + "text": pa.array(["new document to index"]), + } + ) + ds.insert(data) + ds.optimize.optimize_indices() + ds.optimize.compact_files() From 5d4478b3d50a5d882d06d79dc55915832775020f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 19:01:32 -0700 Subject: [PATCH 2/6] try fixing test --- python/python/tests/forward_compat/test_compat.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py index 44a7a4880db..840f4ce3e22 100644 --- a/python/python/tests/forward_compat/test_compat.py +++ b/python/python/tests/forward_compat/test_compat.py @@ -120,6 +120,11 @@ def test_list_indices_ignores_new_fts_index_version(): assert len(indices) == 0 +@pytest.mark.forward +@pytest.mark.skipif( + Version(lance.__version__) < Version("0.20.0"), + reason="Version is too old to read index files stored with Lance 2.0 file format", +) def test_write_scalar_index(tmp_path: str): path = get_path("scalar_index") # copy to tmp path to avoid modifying original @@ -142,6 +147,11 @@ def test_write_scalar_index(tmp_path: str): ds.optimize.compact_files() +@pytest.mark.forward +@pytest.mark.skipif( + Version(lance.__version__) < Version("0.36.0"), + reason="FTS token set format was introduced in 0.36.0", +) def test_write_fts(tmp_path: str): path = get_path("fts_index") # copy to tmp path to avoid modifying original From afece2171cfad2198c26804a285e7a921bbe54c0 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 19:53:01 -0700 Subject: [PATCH 3/6] fix test --- python/python/tests/forward_compat/test_compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py index 840f4ce3e22..0b75b8930d9 100644 --- a/python/python/tests/forward_compat/test_compat.py +++ b/python/python/tests/forward_compat/test_compat.py @@ -128,7 +128,7 @@ def test_list_indices_ignores_new_fts_index_version(): def test_write_scalar_index(tmp_path: str): path = get_path("scalar_index") # copy to tmp path to avoid modifying original - shutil.copytree(path, tmp_path) + shutil.copytree(path, tmp_path, dirs_exist_ok=True) ds = lance.dataset(tmp_path) data = pa.table( @@ -155,7 +155,7 @@ def test_write_scalar_index(tmp_path: str): def test_write_fts(tmp_path: str): path = get_path("fts_index") # copy to tmp path to avoid modifying original - shutil.copytree(path, tmp_path) + shutil.copytree(path, tmp_path, dirs_exist_ok=True) ds = lance.dataset(tmp_path) data = pa.table( From b9dfc08517371f2a166c8ebd957feef70d7f0d88 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 19:59:17 -0700 Subject: [PATCH 4/6] fix test --- python/python/tests/forward_compat/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py index 0b75b8930d9..43424766cd1 100644 --- a/python/python/tests/forward_compat/test_compat.py +++ b/python/python/tests/forward_compat/test_compat.py @@ -160,7 +160,7 @@ def test_write_fts(tmp_path: str): ds = lance.dataset(tmp_path) data = pa.table( { - "id": pa.array([1000]), + "idx": pa.array([1000]), "text": pa.array(["new document to index"]), } ) From 1dc22ed015f521c1095799373ac51980d9a81fbf Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 20:12:16 -0700 Subject: [PATCH 5/6] finish --- protos/table.proto | 7 +++++++ python/python/tests/forward_compat/test_compat.py | 4 ++-- rust/lance-table/src/format/manifest.rs | 4 +++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/protos/table.proto b/protos/table.proto index d8f637a5d04..26fd26c514d 100644 --- a/protos/table.proto +++ b/protos/table.proto @@ -58,6 +58,13 @@ message Manifest { // that the library is semantically versioned, this is a string. However, if it // is semantically versioned, it should be a valid semver string without any 'v' // prefix. For example: `2.0.0`, `2.0.0-rc.1`. + // + // Compatibility note: The Lance Rust library prior to version X.Y.Z would + // panic if it encountered a non-semver version string here or a version string + // with anything more than major, minor, patch version. For example, a "1.2.3-beta" + // version string would cause a panic. For compatibility with older datasets, the + // Lance Rust library always writes version as "X.Y.Z" without any pre-release or build + // metadata. string version = 2; } diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py index 43424766cd1..5e57dac5c71 100644 --- a/python/python/tests/forward_compat/test_compat.py +++ b/python/python/tests/forward_compat/test_compat.py @@ -143,7 +143,7 @@ def test_write_scalar_index(tmp_path: str): } ) ds.insert(data) - ds.optimize.optimize_indices() + # ds.optimize.optimize_indices() ds.optimize.compact_files() @@ -165,5 +165,5 @@ def test_write_fts(tmp_path: str): } ) ds.insert(data) - ds.optimize.optimize_indices() + # ds.optimize.optimize_indices() ds.optimize.compact_files() diff --git a/rust/lance-table/src/format/manifest.rs b/rust/lance-table/src/format/manifest.rs index 11512065c7c..3cc07d7bff7 100644 --- a/rust/lance-table/src/format/manifest.rs +++ b/rust/lance-table/src/format/manifest.rs @@ -759,9 +759,11 @@ impl WriterVersion { impl Default for WriterVersion { #[cfg(not(test))] fn default() -> Self { + let mut version = semver::Version::parse(env!("CARGO_PKG_VERSION")).unwrap(); + version.pre = semver::Prerelease::EMPTY; Self { library: "lance".to_string(), - version: env!("CARGO_PKG_VERSION").to_string(), + version: version.to_string(), } } From 9814d942c434d7d97a376668cc413a4d9becb3e9 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 30 Oct 2025 20:30:56 -0700 Subject: [PATCH 6/6] clarify version in spec --- protos/table.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protos/table.proto b/protos/table.proto index 26fd26c514d..4a2606e7017 100644 --- a/protos/table.proto +++ b/protos/table.proto @@ -59,7 +59,7 @@ message Manifest { // is semantically versioned, it should be a valid semver string without any 'v' // prefix. For example: `2.0.0`, `2.0.0-rc.1`. // - // Compatibility note: The Lance Rust library prior to version X.Y.Z would + // Compatibility note: The Lance Rust library in versions <=0.38.3 would // panic if it encountered a non-semver version string here or a version string // with anything more than major, minor, patch version. For example, a "1.2.3-beta" // version string would cause a panic. For compatibility with older datasets, the