lance-format · jackye1995 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · jackye1995
diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock
diff --git a/protos/table.proto b/protos/table.proto
@@ -58,7 +58,33 @@ message Manifest {
     // that the library is semantically versioned, this is a string. However, if it
     // is semantically versioned, it should be a valid semver string without any 'v'
     // prefix. For example: `2.0.0`, `2.0.0-rc.1`.
+    //
+    // For forward compatibility with older readers, when writing new manifests this
+    // field should contain only the core version (major.minor.patch) without any
+    // prerelease or build metadata. The prerelease/build info should be stored in
+    // the separate prerelease and build_metadata fields instead.
     string version = 2;
+    // Optional semver prerelease identifier.
+    //
+    // This field stores the prerelease portion of a semantic version separately
+    // from the core version number. For example, if the full version is "2.0.0-rc.1",
+    // the version field would contain "2.0.0" and prerelease would contain "rc.1".
+    //
+    // This separation ensures forward compatibility: older readers can parse the
+    // clean version field without errors, while newer readers can reconstruct the
+    // full semantic version by combining version, prerelease, and build_metadata.
+    //
+    // If absent, the version field is used as-is.
+    optional string prerelease = 3;
+    // Optional semver build metadata.
+    //
+    // This field stores the build metadata portion of a semantic version separately
+    // from the core version number. For example, if the full version is
+    // "2.0.0-rc.1+build.123", the version field would contain "2.0.0", prerelease
+    // would contain "rc.1", and build_metadata would contain "build.123".
+    //
+    // If absent, no build metadata is present.
+    optional string build_metadata = 4;
   }
 
   // The version of the writer that created this file.

diff --git a/python/python/tests/forward_compat/test_compat.py b/python/python/tests/forward_compat/test_compat.py
@@ -6,6 +6,8 @@
 # This file will be run on older versions of Lance to test that the
 # current version of Lance can read the test data generated by datagen.py.
 
+import shutil
+
 import lance
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -116,3 +118,52 @@ def test_list_indices_ignores_new_fts_index_version():
     indices = ds.list_indices()
     # the new index version should be ignored
     assert len(indices) == 0
+
+
+@pytest.mark.forward
+@pytest.mark.skipif(
+    Version(lance.__version__) < Version("0.20.0"),
+    reason="Version is too old to read index files stored with Lance 2.0 file format",
+)
+def test_write_scalar_index(tmp_path: str):
+    path = get_path("scalar_index")
+    # copy to tmp path to avoid modifying original
+    shutil.copytree(path, tmp_path, dirs_exist_ok=True)
+
+    ds = lance.dataset(tmp_path)
+    data = pa.table(
+        {
+            "idx": pa.array([1000]),
+            "btree": pa.array([1000]),
+            "bitmap": pa.array([1000]),
+            "label_list": pa.array([["label1000"]]),
+            "ngram": pa.array(["word1000"]),
+            "zonemap": pa.array([1000]),
+            "bloomfilter": pa.array([1000]),
+        }
+    )
+    ds.insert(data)
+    # ds.optimize.optimize_indices()
+    ds.optimize.compact_files()
+
+
+@pytest.mark.forward
+@pytest.mark.skipif(
+    Version(lance.__version__) < Version("0.36.0"),
+    reason="FTS token set format was introduced in 0.36.0",
+)
+def test_write_fts(tmp_path: str):
+    path = get_path("fts_index")
+    # copy to tmp path to avoid modifying original
+    shutil.copytree(path, tmp_path, dirs_exist_ok=True)
+
+    ds = lance.dataset(tmp_path)
+    data = pa.table(
+        {
+            "idx": pa.array([1000]),
+            "text": pa.array(["new document to index"]),
+        }
+    )
+    ds.insert(data)
+    # ds.optimize.optimize_indices()
+    ds.optimize.compact_files()