diff --git a/Cargo.lock b/Cargo.lock
index 2e68786d957..9f26e238541 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2302,7 +2302,7 @@ dependencies = [
 
 [[package]]
 name = "fsst"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "lance-datagen",
@@ -3002,7 +3002,7 @@ dependencies = [
 
 [[package]]
 name = "lance"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "all_asserts",
  "approx",
@@ -3082,7 +3082,7 @@ dependencies = [
 
 [[package]]
 name = "lance-arrow"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -3099,7 +3099,7 @@ dependencies = [
 
 [[package]]
 name = "lance-core"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -3138,7 +3138,7 @@ dependencies = [
 
 [[package]]
 name = "lance-datafusion"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -3166,7 +3166,7 @@ dependencies = [
 
 [[package]]
 name = "lance-datagen"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -3183,7 +3183,7 @@ dependencies = [
 
 [[package]]
 name = "lance-encoding"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrayref",
  "arrow",
@@ -3229,7 +3229,7 @@ dependencies = [
 
 [[package]]
 name = "lance-encoding-datafusion"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -3261,7 +3261,7 @@ dependencies = [
 
 [[package]]
 name = "lance-file"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -3303,7 +3303,7 @@ dependencies = [
 
 [[package]]
 name = "lance-index"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "approx",
  "arrow",
@@ -3362,7 +3362,7 @@ dependencies = [
 
 [[package]]
 name = "lance-io"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-arith",
@@ -3407,7 +3407,7 @@ dependencies = [
 
 [[package]]
 name = "lance-jni"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -3428,7 +3428,7 @@ dependencies = [
 
 [[package]]
 name = "lance-linalg"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "approx",
  "arrow-arith",
@@ -3457,7 +3457,7 @@ dependencies = [
 
 [[package]]
 name = "lance-table"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -3501,7 +3501,7 @@ dependencies = [
 
 [[package]]
 name = "lance-test-macros"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3510,7 +3510,7 @@ dependencies = [
 
 [[package]]
 name = "lance-testing"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-schema",
diff --git a/Cargo.toml b/Cargo.toml
index 94405a5c925..84c183579c2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,7 +21,7 @@ exclude = ["python"]
 resolver = "2"
 
 [workspace.package]
-version = "0.20.1"
+version = "0.21.0"
 edition = "2021"
 authors = ["Lance Devs <dev@lancedb.com>"]
 license = "Apache-2.0"
@@ -44,21 +44,21 @@ categories = [
 rust-version = "1.78"
 
 [workspace.dependencies]
-lance = { version = "=0.20.1", path = "./rust/lance" }
-lance-arrow = { version = "=0.20.1", path = "./rust/lance-arrow" }
-lance-core = { version = "=0.20.1", path = "./rust/lance-core" }
-lance-datafusion = { version = "=0.20.1", path = "./rust/lance-datafusion" }
-lance-datagen = { version = "=0.20.1", path = "./rust/lance-datagen" }
-lance-encoding = { version = "=0.20.1", path = "./rust/lance-encoding" }
-lance-encoding-datafusion = { version = "=0.20.1", path = "./rust/lance-encoding-datafusion" }
-lance-file = { version = "=0.20.1", path = "./rust/lance-file" }
-lance-index = { version = "=0.20.1", path = "./rust/lance-index" }
-lance-io = { version = "=0.20.1", path = "./rust/lance-io" }
-lance-jni = { version = "=0.20.1", path = "./java/core/lance-jni" }
-lance-linalg = { version = "=0.20.1", path = "./rust/lance-linalg" }
-lance-table = { version = "=0.20.1", path = "./rust/lance-table" }
-lance-test-macros = { version = "=0.20.1", path = "./rust/lance-test-macros" }
-lance-testing = { version = "=0.20.1", path = "./rust/lance-testing" }
+lance = { version = "=0.21.0", path = "./rust/lance" }
+lance-arrow = { version = "=0.21.0", path = "./rust/lance-arrow" }
+lance-core = { version = "=0.21.0", path = "./rust/lance-core" }
+lance-datafusion = { version = "=0.21.0", path = "./rust/lance-datafusion" }
+lance-datagen = { version = "=0.21.0", path = "./rust/lance-datagen" }
+lance-encoding = { version = "=0.21.0", path = "./rust/lance-encoding" }
+lance-encoding-datafusion = { version = "=0.21.0", path = "./rust/lance-encoding-datafusion" }
+lance-file = { version = "=0.21.0", path = "./rust/lance-file" }
+lance-index = { version = "=0.21.0", path = "./rust/lance-index" }
+lance-io = { version = "=0.21.0", path = "./rust/lance-io" }
+lance-jni = { version = "=0.21.0", path = "./java/core/lance-jni" }
+lance-linalg = { version = "=0.21.0", path = "./rust/lance-linalg" }
+lance-table = { version = "=0.21.0", path = "./rust/lance-table" }
+lance-test-macros = { version = "=0.21.0", path = "./rust/lance-test-macros" }
+lance-testing = { version = "=0.21.0", path = "./rust/lance-testing" }
 approx = "0.5.1"
 # Note that this one does not include pyarrow
 arrow = { version = "53.2", optional = false, features = ["prettyprint"] }
@@ -111,7 +111,7 @@ datafusion-physical-expr = { version = "42.0", features = [
 ] }
 deepsize = "0.2.0"
 either = "1.0"
-fsst = { version = "=0.20.1", path = "./rust/lance-encoding/src/compression_algo/fsst" }
+fsst = { version = "=0.21.0", path = "./rust/lance-encoding/src/compression_algo/fsst" }
 futures = "0.3"
 http = "1.1.0"
 hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
diff --git a/java/core/pom.xml b/java/core/pom.xml
index 1c434cc1bf5..9b32dbd361f 100644
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
     <parent>
         <groupId>com.lancedb</groupId>
         <artifactId>lance-parent</artifactId>
-        <version>0.20.1</version>
+        <version>0.21.0</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 
diff --git a/java/pom.xml b/java/pom.xml
index bd06179d7d6..6bfbb83ddfa 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>com.lancedb</groupId>
     <artifactId>lance-parent</artifactId>
-    <version>0.20.1</version>
+    <version>0.21.0</version>
     <packaging>pom</packaging>
 
     <name>Lance Parent</name>
diff --git a/java/spark/pom.xml b/java/spark/pom.xml
index 3b4692a5b15..4c6f183f5e4 100644
--- a/java/spark/pom.xml
+++ b/java/spark/pom.xml
@@ -8,7 +8,7 @@
     <parent>
         <groupId>com.lancedb</groupId>
         <artifactId>lance-parent</artifactId>
-        <version>0.20.1</version>
+        <version>0.21.0</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 
@@ -82,7 +82,7 @@
         <dependency>
             <groupId>com.lancedb</groupId>
             <artifactId>lance-core</artifactId>
-            <version>0.20.1</version>
+            <version>0.21.0</version>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
diff --git a/python/Cargo.lock b/python/Cargo.lock
index 89862bcdc19..fcd28fd2fd3 100644
--- a/python/Cargo.lock
+++ b/python/Cargo.lock
@@ -1964,7 +1964,7 @@ dependencies = [
 
 [[package]]
 name = "fsst"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "rand",
 ]
@@ -2730,7 +2730,7 @@ dependencies = [
 
 [[package]]
 name = "lance"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-arith",
@@ -2792,7 +2792,7 @@ dependencies = [
 
 [[package]]
 name = "lance-arrow"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -2809,7 +2809,7 @@ dependencies = [
 
 [[package]]
 name = "lance-core"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -2845,7 +2845,7 @@ dependencies = [
 
 [[package]]
 name = "lance-datafusion"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -2871,7 +2871,7 @@ dependencies = [
 
 [[package]]
 name = "lance-datagen"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -2886,7 +2886,7 @@ dependencies = [
 
 [[package]]
 name = "lance-encoding"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrayref",
  "arrow",
@@ -2924,7 +2924,7 @@ dependencies = [
 
 [[package]]
 name = "lance-file"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -2958,7 +2958,7 @@ dependencies = [
 
 [[package]]
 name = "lance-index"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -3009,7 +3009,7 @@ dependencies = [
 
 [[package]]
 name = "lance-io"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-arith",
@@ -3048,7 +3048,7 @@ dependencies = [
 
 [[package]]
 name = "lance-linalg"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow-array",
  "arrow-ord",
@@ -3071,7 +3071,7 @@ dependencies = [
 
 [[package]]
 name = "lance-table"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -3991,7 +3991,7 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
 dependencies = [
  "bytes",
  "heck 0.5.0",
- "itertools 0.10.5",
+ "itertools 0.12.1",
  "log",
  "multimap",
  "once_cell",
@@ -4012,7 +4012,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
 dependencies = [
  "bytes",
  "heck 0.5.0",
- "itertools 0.10.5",
+ "itertools 0.13.0",
  "log",
  "multimap",
  "once_cell",
@@ -4045,7 +4045,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
 dependencies = [
  "anyhow",
- "itertools 0.10.5",
+ "itertools 0.12.1",
  "proc-macro2",
  "quote",
  "syn 2.0.90",
@@ -4058,7 +4058,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
 dependencies = [
  "anyhow",
- "itertools 0.10.5",
+ "itertools 0.13.0",
  "proc-macro2",
  "quote",
  "syn 2.0.90",
@@ -4093,7 +4093,7 @@ dependencies = [
 
 [[package]]
 name = "pylance"
-version = "0.20.1"
+version = "0.21.0"
 dependencies = [
  "arrow",
  "arrow-array",
diff --git a/python/Cargo.toml b/python/Cargo.toml
index a56a87cba14..e9e9f867c4d 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pylance"
-version = "0.20.1"
+version = "0.21.0"
 edition = "2021"
 authors = ["Lance Devs <dev@lancedb.com>"]
 rust-version = "1.65"
diff --git a/python/python/tests/test_migration.py b/python/python/tests/test_migration.py
index 1dcfa0dfffc..97ae4398e22 100644
--- a/python/python/tests/test_migration.py
+++ b/python/python/tests/test_migration.py
@@ -62,3 +62,19 @@ def test_fix_data_storage_version(tmp_path: Path):
         OSError, match="The dataset contains a mixture of file versions"
     ):
         ds.delete("false")
+
+
+def test_old_btree_bitmap_indices(tmp_path: Path):
+    """
+    In versions below 0.21.0 we used the legacy file format for btree and bitmap
+    indices.  In version 0.21.0 we switched to the new format.  This test ensures
+    that we can still read the old indices.
+    """
+    ds = prep_dataset(tmp_path, "v0.20.0", "old_btree_bitmap_indices.lance")
+
+    assert ds.to_table(filter="bitmap > 2") == pa.table(
+        {"bitmap": [3, 4], "btree": [3, 4]}
+    )
+    assert ds.to_table(filter="btree > 2") == pa.table(
+        {"bitmap": [3, 4], "btree": [3, 4]}
+    )
diff --git a/python/python/tests/test_scalar_index.py b/python/python/tests/test_scalar_index.py
index 2dad325f967..3777c90d489 100644
--- a/python/python/tests/test_scalar_index.py
+++ b/python/python/tests/test_scalar_index.py
@@ -319,6 +319,36 @@ def test_bitmap_index(tmp_path: Path):
     assert indices[0]["type"] == "Bitmap"
 
 
+def test_null_handling(tmp_path: Path):
+    tbl = pa.table(
+        {
+            "x": [1, 2, None, 3],
+        }
+    )
+    dataset = lance.write_dataset(tbl, tmp_path / "dataset")
+
+    def check(has_index: bool):
+        assert dataset.to_table(filter="x IS NULL").num_rows == 1
+        assert dataset.to_table(filter="x IS NOT NULL").num_rows == 3
+        assert dataset.to_table(filter="x > 0").num_rows == 3
+        assert dataset.to_table(filter="x < 5").num_rows == 3
+        assert dataset.to_table(filter="x IN (1, 2)").num_rows == 2
+        # Note: there is a bit of discrepancy here.  Datafusion does not consider
+        # NULL==NULL when doing an IN operation due to classic SQL shenanigans.
+        # We should decide at some point which behavior we want and make this
+        # consistent.
+        if has_index:
+            assert dataset.to_table(filter="x IN (1, 2, NULL)").num_rows == 3
+        else:
+            assert dataset.to_table(filter="x IN (1, 2, NULL)").num_rows == 2
+
+    check(False)
+    dataset.create_scalar_index("x", index_type="BITMAP")
+    check(True)
+    dataset.create_scalar_index("x", index_type="BTREE")
+    check(True)
+
+
 def test_label_list_index(tmp_path: Path):
     tags = pa.array(["tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7"])
     tag_list = pa.ListArray.from_arrays([0, 2, 4], tags)
diff --git a/rust/lance-index/src/scalar.rs b/rust/lance-index/src/scalar.rs
index effabea0be2..8493098ecf8 100644
--- a/rust/lance-index/src/scalar.rs
+++ b/rust/lance-index/src/scalar.rs
@@ -165,7 +165,7 @@ pub trait IndexWriter: Send {
 #[async_trait]
 pub trait IndexReader: Send + Sync {
     /// Read the n-th record batch from the file
-    async fn read_record_batch(&self, n: u32) -> Result<RecordBatch>;
+    async fn read_record_batch(&self, n: u64, batch_size: u64) -> Result<RecordBatch>;
     /// Read the range of rows from the file.
     /// If projection is Some, only return the columns in the projection,
     /// nested columns like Some(&["x.y"]) are not supported.
diff --git a/rust/lance-index/src/scalar/bitmap.rs b/rust/lance-index/src/scalar/bitmap.rs
index fc531b3bf7a..ca03d250181 100644
--- a/rust/lance-index/src/scalar/bitmap.rs
+++ b/rust/lance-index/src/scalar/bitmap.rs
@@ -37,6 +37,8 @@ pub const BITMAP_LOOKUP_NAME: &str = "bitmap_page_lookup.lance";
 #[derive(Clone, Debug)]
 pub struct BitmapIndex {
     index_map: BTreeMap<OrderableScalarValue, RowIdTreeMap>,
+    // We put null in its own map to avoid it matching range queries (arrow-rs considers null to come before minval)
+    null_map: RowIdTreeMap,
     // Memoized index_map size for DeepSizeOf
     index_map_size_bytes: usize,
     store: Arc<dyn IndexStore>,
@@ -45,11 +47,13 @@ pub struct BitmapIndex {
 impl BitmapIndex {
     fn new(
         index_map: BTreeMap<OrderableScalarValue, RowIdTreeMap>,
+        null_map: RowIdTreeMap,
         index_map_size_bytes: usize,
         store: Arc<dyn IndexStore>,
     ) -> Self {
         Self {
             index_map,
+            null_map,
             index_map_size_bytes,
             store,
         }
@@ -74,6 +78,7 @@ impl BitmapIndex {
         let mut index_map: BTreeMap<OrderableScalarValue, RowIdTreeMap> = BTreeMap::new();
 
         let mut index_map_size_bytes = 0;
+        let mut null_map = RowIdTreeMap::default();
         for idx in 0..data.num_rows() {
             let key = OrderableScalarValue(ScalarValue::try_from_array(dict_keys, idx)?);
             let bitmap_bytes = bitmap_binary_array.value(idx);
@@ -82,10 +87,14 @@ impl BitmapIndex {
             index_map_size_bytes += key.deep_size_of();
             // This should be a reasonable approximation of the RowIdTreeMap size
             index_map_size_bytes += bitmap_bytes.len();
-            index_map.insert(key, bitmap);
+            if key.0.is_null() {
+                null_map = bitmap;
+            } else {
+                index_map.insert(key, bitmap);
+            }
         }
 
-        Ok(Self::new(index_map, index_map_size_bytes, store))
+        Ok(Self::new(index_map, null_map, index_map_size_bytes, store))
     }
 }
 
@@ -152,8 +161,12 @@ impl ScalarIndex for BitmapIndex {
 
         let row_ids = match query {
             SargableQuery::Equals(val) => {
-                let key = OrderableScalarValue(val.clone());
-                self.index_map.get(&key).cloned().unwrap_or_default()
+                if val.is_null() {
+                    self.null_map.clone()
+                } else {
+                    let key = OrderableScalarValue(val.clone());
+                    self.index_map.get(&key).cloned().unwrap_or_default()
+                }
             }
             SargableQuery::Range(start, end) => {
                 let range_start = match start {
@@ -179,26 +192,19 @@ impl ScalarIndex for BitmapIndex {
             SargableQuery::IsIn(values) => {
                 let mut union_bitmap = RowIdTreeMap::default();
                 for val in values {
-                    let key = OrderableScalarValue(val.clone());
-                    if let Some(bitmap) = self.index_map.get(&key) {
-                        union_bitmap |= bitmap.clone();
+                    if val.is_null() {
+                        union_bitmap |= self.null_map.clone();
+                    } else {
+                        let key = OrderableScalarValue(val.clone());
+                        if let Some(bitmap) = self.index_map.get(&key) {
+                            union_bitmap |= bitmap.clone();
+                        }
                     }
                 }
 
                 union_bitmap
             }
-            SargableQuery::IsNull() => {
-                if let Some(array) = self
-                    .index_map
-                    .iter()
-                    .find(|(key, _)| key.0.is_null())
-                    .map(|(_, value)| value)
-                {
-                    array.clone()
-                } else {
-                    RowIdTreeMap::default()
-                }
-            }
+            SargableQuery::IsNull() => self.null_map.clone(),
             SargableQuery::FullTextSearch(_) => {
                 return Err(Error::NotSupported {
                     source: "full text search is not supported for bitmap indexes".into(),
diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs
index ce23f85d851..abbe65490f8 100644
--- a/rust/lance-index/src/scalar/btree.rs
+++ b/rust/lance-index/src/scalar/btree.rs
@@ -50,6 +50,8 @@ use super::{
 
 const BTREE_LOOKUP_NAME: &str = "page_lookup.lance";
 const BTREE_PAGES_NAME: &str = "page_data.lance";
+pub const DEFAULT_BTREE_BATCH_SIZE: u64 = 4096;
+const BATCH_SIZE_META_KEY: &str = "batch_size";
 
 /// Wraps a ScalarValue and implements Ord (ScalarValue only implements PartialOrd)
 #[derive(Clone, Debug)]
@@ -573,7 +575,11 @@ impl BTreeLookup {
 
     // All pages that could have a value equal to val
     fn pages_eq(&self, query: &OrderableScalarValue) -> Vec<u32> {
-        self.pages_between((Bound::Included(query), Bound::Excluded(query)))
+        if query.0.is_null() {
+            self.pages_null()
+        } else {
+            self.pages_between((Bound::Included(query), Bound::Excluded(query)))
+        }
     }
 
     // All pages that could have a value equal to one of the values
@@ -673,6 +679,7 @@ pub struct BTreeIndex {
     page_lookup: Arc<BTreeLookup>,
     store: Arc<dyn IndexStore>,
     sub_index: Arc<dyn BTreeSubIndex>,
+    batch_size: u64,
 }
 
 impl BTreeIndex {
@@ -681,12 +688,14 @@ impl BTreeIndex {
         null_pages: Vec<u32>,
         store: Arc<dyn IndexStore>,
         sub_index: Arc<dyn BTreeSubIndex>,
+        batch_size: u64,
     ) -> Self {
         let page_lookup = Arc::new(BTreeLookup::new(tree, null_pages));
         Self {
             page_lookup,
             store,
             sub_index,
+            batch_size,
         }
     }
 
@@ -696,7 +705,9 @@ impl BTreeIndex {
         page_number: u32,
         index_reader: Arc<dyn IndexReader>,
     ) -> Result<RowIdTreeMap> {
-        let serialized_page = index_reader.read_record_batch(page_number).await?;
+        let serialized_page = index_reader
+            .read_record_batch(page_number as u64, self.batch_size)
+            .await?;
         let subindex = self.sub_index.load_subindex(serialized_page).await?;
         // TODO: If this is an IN query we can perhaps simplify the subindex query by restricting it to the
         // values that might be in the page.  E.g. if we are searching for X IN [5, 3, 7] and five is in pages
@@ -705,7 +716,11 @@ impl BTreeIndex {
         subindex.search(query).await
     }
 
-    fn try_from_serialized(data: RecordBatch, store: Arc<dyn IndexStore>) -> Result<Self> {
+    fn try_from_serialized(
+        data: RecordBatch,
+        store: Arc<dyn IndexStore>,
+        batch_size: u64,
+    ) -> Result<Self> {
         let mut map = BTreeMap::<OrderableScalarValue, Vec<PageRecord>>::new();
         let mut null_pages = Vec::<u32>::new();
 
@@ -735,9 +750,13 @@ impl BTreeIndex {
             let null_count = null_counts.values()[idx];
             let page_number = page_numbers.values()[idx];
 
-            map.entry(min)
-                .or_default()
-                .push(PageRecord { max, page_number });
+            // If the page is entirely null don't even bother putting it in the tree
+            if !max.0.is_null() {
+                map.entry(min)
+                    .or_default()
+                    .push(PageRecord { max, page_number });
+            }
+
             if null_count > 0 {
                 null_pages.push(page_number);
             }
@@ -751,7 +770,7 @@ impl BTreeIndex {
         // TODO: Support other page types?
         let sub_index = Arc::new(FlatIndexMetadata::new(data_type.clone()));
 
-        Ok(Self::new(map, null_pages, store, sub_index))
+        Ok(Self::new(map, null_pages, store, sub_index, batch_size))
     }
 
     /// Create a stream of all the data in the index, in the same format used to train the index
@@ -844,7 +863,9 @@ impl Index for BTreeIndex {
 
         let sub_index_reader = self.store.open_index_file(BTREE_PAGES_NAME).await?;
         for page_number in self.page_lookup.all_page_ids() {
-            let serialized = sub_index_reader.read_record_batch(page_number).await?;
+            let serialized = sub_index_reader
+                .read_record_batch(page_number as u64, self.batch_size)
+                .await?;
             let page = self.sub_index.load_subindex(serialized).await?;
             frag_ids |= page.calculate_included_frags().await?;
         }
@@ -891,10 +912,20 @@ impl ScalarIndex for BTreeIndex {
 
     async fn load(store: Arc<dyn IndexStore>) -> Result<Arc<Self>> {
         let page_lookup_file = store.open_index_file(BTREE_LOOKUP_NAME).await?;
-        let serialized_lookup = page_lookup_file.read_record_batch(0).await?;
+        let num_rows_in_lookup = page_lookup_file.num_rows();
+        let serialized_lookup = page_lookup_file
+            .read_range(0..num_rows_in_lookup, None)
+            .await?;
+        let file_schema = page_lookup_file.schema();
+        let batch_size = file_schema
+            .metadata
+            .get(BATCH_SIZE_META_KEY)
+            .map(|bs| bs.parse().unwrap_or(DEFAULT_BTREE_BATCH_SIZE))
+            .unwrap_or(DEFAULT_BTREE_BATCH_SIZE);
         Ok(Arc::new(Self::try_from_serialized(
             serialized_lookup,
             store,
+            batch_size,
         )?))
     }
 
@@ -911,7 +942,9 @@ impl ScalarIndex for BTreeIndex {
         let sub_index_reader = self.store.open_index_file(BTREE_PAGES_NAME).await?;
 
         for page_number in self.page_lookup.all_page_ids() {
-            let old_serialized = sub_index_reader.read_record_batch(page_number).await?;
+            let old_serialized = sub_index_reader
+                .read_record_batch(page_number as u64, self.batch_size)
+                .await?;
             let remapped = self
                 .sub_index
                 .remap_subindex(old_serialized, mapping)
@@ -934,7 +967,13 @@ impl ScalarIndex for BTreeIndex {
     ) -> Result<()> {
         // Merge the existing index data with the new data and then retrain the index on the merged stream
         let merged_data_source = Box::new(BTreeUpdater::new(self.clone(), new_data));
-        train_btree_index(merged_data_source, self.sub_index.as_ref(), dest_store).await
+        train_btree_index(
+            merged_data_source,
+            self.sub_index.as_ref(),
+            dest_store,
+            DEFAULT_BTREE_BATCH_SIZE as u32,
+        )
+        .await
     }
 }
 
@@ -1092,13 +1131,14 @@ pub async fn train_btree_index(
     data_source: Box<dyn TrainingSource + Send>,
     sub_index_trainer: &dyn BTreeSubIndex,
     index_store: &dyn IndexStore,
+    batch_size: u32,
 ) -> Result<()> {
     let mut sub_index_file = index_store
         .new_index_file(BTREE_PAGES_NAME, sub_index_trainer.schema().clone())
         .await?;
     let mut encoded_batches = Vec::new();
     let mut batch_idx = 0;
-    let mut batches_source = data_source.scan_ordered_chunks(4096).await?;
+    let mut batches_source = data_source.scan_ordered_chunks(batch_size).await?;
     while let Some(batch) = batches_source.try_next().await? {
         debug_assert_eq!(batch.num_columns(), 2);
         debug_assert_eq!(*batch.column(1).data_type(), DataType::UInt64);
@@ -1109,8 +1149,12 @@ pub async fn train_btree_index(
     }
     sub_index_file.finish().await?;
     let record_batch = btree_stats_as_batch(encoded_batches)?;
+    let mut file_schema = record_batch.schema().as_ref().clone();
+    file_schema
+        .metadata
+        .insert(BATCH_SIZE_META_KEY.to_string(), batch_size.to_string());
     let mut btree_index_file = index_store
-        .new_index_file(BTREE_LOOKUP_NAME, record_batch.schema())
+        .new_index_file(BTREE_LOOKUP_NAME, Arc::new(file_schema))
         .await?;
     btree_index_file.write_record_batch(record_batch).await?;
     btree_index_file.finish().await?;
@@ -1204,7 +1248,12 @@ impl Stream for IndexReaderStream {
         let page_number = this.pages[idx];
         this.idx += 1;
         let reader_copy = this.reader.clone();
-        let read_task = async move { reader_copy.read_record_batch(page_number).await }.boxed();
+        let read_task = async move {
+            reader_copy
+                .read_record_batch(page_number as u64, DEFAULT_BTREE_BATCH_SIZE)
+                .await
+        }
+        .boxed();
         std::task::Poll::Ready(Some(read_task))
     }
 }
diff --git a/rust/lance-index/src/scalar/flat.rs b/rust/lance-index/src/scalar/flat.rs
index 66a69e95e53..709f4b38051 100644
--- a/rust/lance-index/src/scalar/flat.rs
+++ b/rust/lance-index/src/scalar/flat.rs
@@ -33,6 +33,7 @@ use super::{AnyQuery, SargableQuery};
 #[derive(Debug)]
 pub struct FlatIndex {
     data: Arc<RecordBatch>,
+    has_nulls: bool,
 }
 
 impl DeepSizeOf for FlatIndex {
@@ -132,8 +133,10 @@ impl BTreeSubIndex for FlatIndexMetadata {
     }
 
     async fn load_subindex(&self, serialized: RecordBatch) -> Result<Arc<dyn ScalarIndex>> {
+        let has_nulls = serialized.column(0).null_count() > 0;
         Ok(Arc::new(FlatIndex {
             data: Arc::new(serialized),
+            has_nulls,
         }))
     }
 
@@ -196,13 +199,23 @@ impl ScalarIndex for FlatIndex {
         let query = query.as_any().downcast_ref::<SargableQuery>().unwrap();
         // Since we have all the values in memory we can use basic arrow-rs compute
         // functions to satisfy scalar queries.
-        let predicate = match query {
-            SargableQuery::Equals(value) => arrow_ord::cmp::eq(self.values(), &value.to_scalar()?)?,
+        let mut predicate = match query {
+            SargableQuery::Equals(value) => {
+                if value.is_null() {
+                    arrow::compute::is_null(self.values())?
+                } else {
+                    arrow_ord::cmp::eq(self.values(), &value.to_scalar()?)?
+                }
+            }
             SargableQuery::IsNull() => arrow::compute::is_null(self.values())?,
             SargableQuery::IsIn(values) => {
+                let mut has_null = false;
                 let choices = values
                     .iter()
-                    .map(|val| lit(val.clone()))
+                    .map(|val| {
+                        has_null |= val.is_null();
+                        lit(val.clone())
+                    })
                     .collect::<Vec<_>>();
                 let in_list_expr = in_list(
                     Arc::new(Column::new("values", 0)),
@@ -211,12 +224,20 @@ impl ScalarIndex for FlatIndex {
                     &self.data.schema(),
                 )?;
                 let result_col = in_list_expr.evaluate(&self.data)?;
-                result_col
+                let predicate = result_col
                     .into_array(self.data.num_rows())?
                     .as_any()
                     .downcast_ref::<BooleanArray>()
                     .expect("InList evaluation should return boolean array")
-                    .clone()
+                    .clone();
+
+                // Arrow's in_list does not handle nulls so we need to join them in here if user asked for them
+                if has_null && self.has_nulls {
+                    let nulls = arrow::compute::is_null(self.values())?;
+                    arrow::compute::or(&predicate, &nulls)?
+                } else {
+                    predicate
+                }
             }
             SargableQuery::Range(lower_bound, upper_bound) => match (lower_bound, upper_bound) {
                 (Bound::Unbounded, Bound::Unbounded) => {
@@ -256,6 +277,12 @@ impl ScalarIndex for FlatIndex {
                 location!(),
             )),
         };
+        if self.has_nulls && matches!(query, SargableQuery::Range(_, _)) {
+            // Arrow's comparison kernels do not return false for nulls.  They consider nulls to
+            // be less than any value.  So we need to filter out the nulls manually.
+            let valid_values = arrow::compute::is_not_null(self.values())?;
+            predicate = arrow::compute::and(&valid_values, &predicate)?;
+        }
         let matching_ids = arrow_select::filter::filter(self.ids(), &predicate)?;
         let matching_ids = matching_ids
             .as_any()
@@ -269,9 +296,12 @@ impl ScalarIndex for FlatIndex {
     // data as a single batch named data.lance
     async fn load(store: Arc<dyn IndexStore>) -> Result<Arc<Self>> {
         let batches = store.open_index_file("data.lance").await?;
-        let batch = batches.read_record_batch(0).await?;
+        let num_rows = batches.num_rows();
+        let batch = batches.read_range(0..num_rows, None).await?;
+        let has_nulls = batch.column(0).null_count() > 0;
         Ok(Arc::new(Self {
             data: Arc::new(batch),
+            has_nulls,
         }))
     }
 
@@ -319,6 +349,7 @@ mod tests {
 
         FlatIndex {
             data: Arc::new(batch),
+            has_nulls: false,
         }
     }
 
diff --git a/rust/lance-index/src/scalar/lance_format.rs b/rust/lance-index/src/scalar/lance_format.rs
index 75639db33e9..865cc5a245f 100644
--- a/rust/lance-index/src/scalar/lance_format.rs
+++ b/rust/lance-index/src/scalar/lance_format.rs
@@ -16,7 +16,6 @@ use lance_core::{cache::FileMetadataCache, Error, Result};
 use lance_encoding::decoder::{DecoderPlugins, FilterExpression};
 use lance_file::v2;
 use lance_file::v2::reader::FileReaderOptions;
-use lance_file::writer::FileWriterOptions;
 use lance_file::{
     reader::FileReader,
     writer::{FileWriter, ManifestProvider},
@@ -24,7 +23,6 @@ use lance_file::{
 use lance_io::scheduler::{ScanScheduler, SchedulerConfig};
 use lance_io::{object_store::ObjectStore, ReadBatchParams};
 use lance_table::format::SelfDescribingFileReader;
-use lance_table::io::manifest::ManifestDescribing;
 use object_store::path::Path;
 
 use super::{IndexReader, IndexStore, IndexWriter};
@@ -40,7 +38,6 @@ pub struct LanceIndexStore {
     index_dir: Path,
     metadata_cache: FileMetadataCache,
     scheduler: Arc<ScanScheduler>,
-    use_legacy_format: bool,
 }
 
 impl DeepSizeOf for LanceIndexStore {
@@ -68,14 +65,8 @@ impl LanceIndexStore {
             index_dir,
             metadata_cache,
             scheduler,
-            use_legacy_format: false,
         }
     }
-
-    pub fn with_legacy_format(mut self, use_legacy_format: bool) -> Self {
-        self.use_legacy_format = use_legacy_format;
-        self
-    }
 }
 
 #[async_trait]
@@ -119,7 +110,7 @@ impl IndexWriter for v2::writer::FileWriter {
 
 #[async_trait]
 impl IndexReader for FileReader {
-    async fn read_record_batch(&self, offset: u32) -> Result<RecordBatch> {
+    async fn read_record_batch(&self, offset: u64, _batch_size: u64) -> Result<RecordBatch> {
         self.read_batch(offset as i32, ReadBatchParams::RangeFull, self.schema())
             .await
     }
@@ -151,8 +142,11 @@ impl IndexReader for FileReader {
 
 #[async_trait]
 impl IndexReader for v2::reader::FileReader {
-    async fn read_record_batch(&self, _offset: u32) -> Result<RecordBatch> {
-        unimplemented!("v2 format has no concept of row groups")
+    async fn read_record_batch(&self, offset: u64, batch_size: u64) -> Result<RecordBatch> {
+        let start = offset * batch_size;
+        let end = start + batch_size;
+        let end = end.min(self.num_rows());
+        self.read_range(start as usize..end as usize, None).await
     }
 
     async fn read_range(
@@ -219,24 +213,13 @@ impl IndexStore for LanceIndexStore {
     ) -> Result<Box<dyn IndexWriter>> {
         let path = self.index_dir.child(name);
         let schema = schema.as_ref().try_into()?;
-        if self.use_legacy_format {
-            let writer = FileWriter::<ManifestDescribing>::try_new(
-                &self.object_store,
-                &path,
-                schema,
-                &FileWriterOptions::default(),
-            )
-            .await?;
-            Ok(Box::new(writer))
-        } else {
-            let writer = self.object_store.create(&path).await?;
-            let writer = v2::writer::FileWriter::try_new(
-                writer,
-                schema,
-                v2::writer::FileWriterOptions::default(),
-            )?;
-            Ok(Box::new(writer))
-        }
+        let writer = self.object_store.create(&path).await?;
+        let writer = v2::writer::FileWriter::try_new(
+            writer,
+            schema,
+            v2::writer::FileWriterOptions::default(),
+        )?;
+        Ok(Box::new(writer))
     }
 
     async fn open_index_file(&self, name: &str) -> Result<Arc<dyn IndexReader>> {
@@ -305,7 +288,7 @@ mod tests {
 
     use crate::scalar::{
         bitmap::{train_bitmap_index, BitmapIndex},
-        btree::{train_btree_index, BTreeIndex, TrainingSource},
+        btree::{train_btree_index, BTreeIndex, TrainingSource, DEFAULT_BTREE_BATCH_SIZE},
         flat::FlatIndexMetadata,
         label_list::{train_label_list_index, LabelListIndex},
         LabelListQuery, SargableQuery, ScalarIndex,
@@ -335,14 +318,6 @@ mod tests {
         Arc::new(LanceIndexStore::new(object_store, test_path, cache))
     }
 
-    fn legacy_test_store(tempdir: &TempDir) -> Arc<dyn IndexStore> {
-        let test_path: &Path = tempdir.path();
-        let cache = FileMetadataCache::with_capacity(128 * 1024 * 1024, CapacityMode::Bytes);
-        let (object_store, test_path) =
-            ObjectStore::from_path(test_path.as_os_str().to_str().unwrap()).unwrap();
-        Arc::new(LanceIndexStore::new(object_store, test_path, cache).with_legacy_format(true))
-    }
-
     struct MockTrainingSource {
         data: SendableRecordBatchStream,
     }
@@ -376,24 +351,31 @@ mod tests {
         index_store: &Arc<dyn IndexStore>,
         data: impl RecordBatchReader + Send + Sync + 'static,
         value_type: DataType,
+        custom_batch_size: Option<u64>,
     ) {
         let sub_index_trainer = FlatIndexMetadata::new(value_type);
 
         let data = Box::new(MockTrainingSource::new(data).await);
-        train_btree_index(data, &sub_index_trainer, index_store.as_ref())
-            .await
-            .unwrap();
+        let batch_size = custom_batch_size.unwrap_or(DEFAULT_BTREE_BATCH_SIZE);
+        train_btree_index(
+            data,
+            &sub_index_trainer,
+            index_store.as_ref(),
+            batch_size as u32,
+        )
+        .await
+        .unwrap();
     }
 
     #[tokio::test]
     async fn test_basic_btree() {
         let tempdir = tempdir().unwrap();
-        let index_store = legacy_test_store(&tempdir);
+        let index_store = test_store(&tempdir);
         let data = gen()
             .col("values", array::step::<Int32Type>())
             .col("row_ids", array::step::<UInt64Type>())
             .into_reader_rows(RowCount::from(4096), BatchCount::from(100));
-        train_index(&index_store, data, DataType::Int32).await;
+        train_index(&index_store, data, DataType::Int32, None).await;
         let index = BTreeIndex::load(index_store).await.unwrap();
 
         let row_ids = index
@@ -428,12 +410,12 @@ mod tests {
     #[tokio::test]
     async fn test_btree_update() {
         let index_dir = tempdir().unwrap();
-        let index_store = legacy_test_store(&index_dir);
+        let index_store = test_store(&index_dir);
         let data = gen()
             .col("values", array::step::<Int32Type>())
             .col("row_ids", array::step::<UInt64Type>())
             .into_reader_rows(RowCount::from(4096), BatchCount::from(100));
-        train_index(&index_store, data, DataType::Int32).await;
+        train_index(&index_store, data, DataType::Int32, None).await;
         let index = BTreeIndex::load(index_store).await.unwrap();
 
         let data = gen()
@@ -442,7 +424,7 @@ mod tests {
             .into_reader_rows(RowCount::from(4096), BatchCount::from(100));
 
         let updated_index_dir = tempdir().unwrap();
-        let updated_index_store = legacy_test_store(&updated_index_dir);
+        let updated_index_store = test_store(&updated_index_dir);
         index
             .update(
                 lance_datafusion::utils::reader_to_stream(Box::new(data)),
@@ -478,7 +460,7 @@ mod tests {
     #[tokio::test]
     async fn test_btree_with_gaps() {
         let tempdir = tempdir().unwrap();
-        let index_store = legacy_test_store(&tempdir);
+        let index_store = test_store(&tempdir);
         let batch_one = gen()
             .col("values", array::cycle::<Int32Type>(vec![0, 1, 4, 5]))
             .col("row_ids", array::cycle::<UInt64Type>(vec![0, 1, 2, 3]))
@@ -507,7 +489,7 @@ mod tests {
             Field::new("row_ids", DataType::UInt64, false),
         ]));
         let data = RecordBatchIterator::new(batches, schema);
-        train_index(&index_store, data, DataType::Int32).await;
+        train_index(&index_store, data, DataType::Int32, Some(4)).await;
         let index = BTreeIndex::load(index_store).await.unwrap();
 
         // The above should create four pages
@@ -703,7 +685,7 @@ mod tests {
             // DataType::Duration(TimeUnit::Nanosecond),
         ] {
             let tempdir = tempdir().unwrap();
-            let index_store = legacy_test_store(&tempdir);
+            let index_store = test_store(&tempdir);
             let data: RecordBatch = gen()
                 .col("values", array::rand_type(data_type))
                 .col("row_ids", array::step::<UInt64Type>())
@@ -742,7 +724,7 @@ mod tests {
                 data.schema().clone(),
             );
 
-            train_index(&index_store, training_data, data_type.clone()).await;
+            train_index(&index_store, training_data, data_type.clone(), None).await;
             let index = BTreeIndex::load(index_store).await.unwrap();
 
             let row_ids = index
@@ -761,7 +743,7 @@ mod tests {
     #[tokio::test]
     async fn btree_reject_nan() {
         let tempdir = tempdir().unwrap();
-        let index_store = legacy_test_store(&tempdir);
+        let index_store = test_store(&tempdir);
         let batch = gen()
             .col("values", array::cycle::<Float32Type>(vec![0.0, f32::NAN]))
             .col("row_ids", array::cycle::<UInt64Type>(vec![0, 1]))
@@ -777,17 +759,20 @@ mod tests {
         let data = Box::new(MockTrainingSource::new(data).await);
         // Until DF handles NaN reliably we need to make sure we reject input
         // containing NaN
-        assert!(
-            train_btree_index(data, &sub_index_trainer, index_store.as_ref())
-                .await
-                .is_err()
-        );
+        assert!(train_btree_index(
+            data,
+            &sub_index_trainer,
+            index_store.as_ref(),
+            DEFAULT_BTREE_BATCH_SIZE as u32
+        )
+        .await
+        .is_err());
     }
 
     #[tokio::test]
     async fn btree_entire_null_page() {
         let tempdir = tempdir().unwrap();
-        let index_store = legacy_test_store(&tempdir);
+        let index_store = test_store(&tempdir);
         let batch = gen()
             .col(
                 "values",
@@ -805,9 +790,14 @@ mod tests {
         let sub_index_trainer = FlatIndexMetadata::new(DataType::Utf8);
 
         let data = Box::new(MockTrainingSource::new(data).await);
-        train_btree_index(data, &sub_index_trainer, index_store.as_ref())
-            .await
-            .unwrap();
+        train_btree_index(
+            data,
+            &sub_index_trainer,
+            index_store.as_ref(),
+            DEFAULT_BTREE_BATCH_SIZE as u32,
+        )
+        .await
+        .unwrap();
 
         let index = BTreeIndex::load(index_store).await.unwrap();
 
diff --git a/rust/lance/benches/scalar_index.rs b/rust/lance/benches/scalar_index.rs
index 58c261ccf50..f14dea1983e 100644
--- a/rust/lance/benches/scalar_index.rs
+++ b/rust/lance/benches/scalar_index.rs
@@ -16,7 +16,7 @@ use lance_core::{cache::FileMetadataCache, Result};
 use lance_datafusion::utils::reader_to_stream;
 use lance_datagen::{array, gen, BatchCount, RowCount};
 use lance_index::scalar::{
-    btree::{train_btree_index, BTreeIndex, TrainingSource},
+    btree::{train_btree_index, BTreeIndex, TrainingSource, DEFAULT_BTREE_BATCH_SIZE},
     flat::FlatIndexMetadata,
     lance_format::LanceIndexStore,
     IndexStore, SargableQuery, ScalarIndex,
@@ -60,7 +60,6 @@ impl TrainingSource for BenchmarkDataSource {
 }
 
 impl BenchmarkFixture {
-    #[allow(dead_code)]
     fn test_store(tempdir: &TempDir) -> Arc<dyn IndexStore> {
         let test_path = tempdir.path();
         let (object_store, test_path) =
@@ -72,16 +71,6 @@ impl BenchmarkFixture {
         ))
     }
 
-    fn legacy_test_store(tempdir: &TempDir) -> Arc<dyn IndexStore> {
-        let test_path = tempdir.path();
-        let (object_store, test_path) =
-            ObjectStore::from_path(test_path.as_os_str().to_str().unwrap()).unwrap();
-        Arc::new(
-            LanceIndexStore::new(object_store, test_path, FileMetadataCache::no_cache())
-                .with_legacy_format(true),
-        )
-    }
-
     async fn write_baseline_data(tempdir: &TempDir) -> Arc<Dataset> {
         let test_path = tempdir.path().as_os_str().to_str().unwrap();
         Arc::new(
@@ -98,6 +87,7 @@ impl BenchmarkFixture {
             Box::new(BenchmarkDataSource {}),
             &sub_index_trainer,
             index_store.as_ref(),
+            DEFAULT_BTREE_BATCH_SIZE as u32,
         )
         .await
         .unwrap();
@@ -105,7 +95,7 @@ impl BenchmarkFixture {
 
     async fn open() -> Self {
         let tempdir = tempfile::tempdir().unwrap();
-        let index_store = Self::legacy_test_store(&tempdir);
+        let index_store = Self::test_store(&tempdir);
         let baseline_dataset = Self::write_baseline_data(&tempdir).await;
         Self::train_scalar_index(&index_store).await;
 
diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs
index ace9906d5cc..c65a30df332 100644
--- a/rust/lance/src/index.rs
+++ b/rust/lance/src/index.rs
@@ -111,13 +111,7 @@ pub(crate) async fn remap_index(
 
     match generic.index_type() {
         it if it.is_scalar() => {
-            let new_store = match it {
-                IndexType::Scalar | IndexType::BTree => {
-                    LanceIndexStore::from_dataset(dataset, &new_id.to_string())
-                        .with_legacy_format(true)
-                }
-                _ => LanceIndexStore::from_dataset(dataset, &new_id.to_string()),
-            };
+            let new_store = LanceIndexStore::from_dataset(dataset, &new_id.to_string());
 
             let scalar_index = dataset
                 .open_scalar_index(&field.name, &index_id.to_string())
diff --git a/rust/lance/src/index/append.rs b/rust/lance/src/index/append.rs
index 3c6a377dd5a..9381b8b88f1 100644
--- a/rust/lance/src/index/append.rs
+++ b/rust/lance/src/index/append.rs
@@ -98,15 +98,7 @@ pub async fn merge_indices<'a>(
 
             let new_uuid = Uuid::new_v4();
 
-            // The BTree index implementation leverages the legacy format's batch offset,
-            // which has been removed from new format, so keep using the legacy format for now.
-            let new_store = match index.index_type() {
-                IndexType::Scalar | IndexType::BTree => {
-                    LanceIndexStore::from_dataset(&dataset, &new_uuid.to_string())
-                        .with_legacy_format(true)
-                }
-                _ => LanceIndexStore::from_dataset(&dataset, &new_uuid.to_string()),
-            };
+            let new_store = LanceIndexStore::from_dataset(&dataset, &new_uuid.to_string());
             index.update(new_data_stream.into(), &new_store).await?;
 
             Ok((new_uuid, 1))
diff --git a/rust/lance/src/index/scalar.rs b/rust/lance/src/index/scalar.rs
index c0394bdb65e..32bf1cb7a41 100644
--- a/rust/lance/src/index/scalar.rs
+++ b/rust/lance/src/index/scalar.rs
@@ -11,6 +11,7 @@ use async_trait::async_trait;
 use datafusion::physical_plan::SendableRecordBatchStream;
 use lance_core::{Error, Result};
 use lance_datafusion::{chunker::chunk_concat_stream, exec::LanceExecutionOptions};
+use lance_index::scalar::btree::DEFAULT_BTREE_BATCH_SIZE;
 use lance_index::scalar::InvertedIndexParams;
 use lance_index::scalar::{
     bitmap::{train_bitmap_index, BitmapIndex, BITMAP_LOOKUP_NAME},
@@ -224,11 +225,14 @@ pub(super) async fn build_scalar_index(
             Ok(inverted_index_details())
         }
         _ => {
-            // The BTree index implementation leverages the legacy format's batch offset,
-            // which has been removed from new format, so keep using the legacy format for now.
-            let index_store = index_store.with_legacy_format(true);
             let flat_index_trainer = FlatIndexMetadata::new(field.data_type());
-            train_btree_index(training_request, &flat_index_trainer, &index_store).await?;
+            train_btree_index(
+                training_request,
+                &flat_index_trainer,
+                &index_store,
+                DEFAULT_BTREE_BATCH_SIZE as u32,
+            )
+            .await?;
             Ok(btree_index_details())
         }
     }
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/bed6140c-b15a-454e-83a4-d66520397899/bitmap_page_lookup.lance b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/bed6140c-b15a-454e-83a4-d66520397899/bitmap_page_lookup.lance
new file mode 100644
index 00000000000..5b3983fead5
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/bed6140c-b15a-454e-83a4-d66520397899/bitmap_page_lookup.lance differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/e034c4d8-77cd-422c-8855-209eed8deff8/page_data.lance b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/e034c4d8-77cd-422c-8855-209eed8deff8/page_data.lance
new file mode 100644
index 00000000000..d97d872a3fe
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/e034c4d8-77cd-422c-8855-209eed8deff8/page_data.lance differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/e034c4d8-77cd-422c-8855-209eed8deff8/page_lookup.lance b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/e034c4d8-77cd-422c-8855-209eed8deff8/page_lookup.lance
new file mode 100644
index 00000000000..deeb36ca9a9
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_indices/e034c4d8-77cd-422c-8855-209eed8deff8/page_lookup.lance differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/0-ca14443d-4119-474d-a32d-ae6c59288e9a.txn b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/0-ca14443d-4119-474d-a32d-ae6c59288e9a.txn
new file mode 100644
index 00000000000..d880fea9082
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/0-ca14443d-4119-474d-a32d-ae6c59288e9a.txn differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/1-6c1bfc70-d75f-4b58-84ec-aee73e2389d6.txn b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/1-6c1bfc70-d75f-4b58-84ec-aee73e2389d6.txn
new file mode 100644
index 00000000000..8575b67ce2b
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/1-6c1bfc70-d75f-4b58-84ec-aee73e2389d6.txn differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/2-70cf21e4-8f6d-4d41-b303-3dc1ee959c0b.txn b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/2-70cf21e4-8f6d-4d41-b303-3dc1ee959c0b.txn
new file mode 100644
index 00000000000..97aed3d6daf
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_transactions/2-70cf21e4-8f6d-4d41-b303-3dc1ee959c0b.txn differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/1.manifest b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/1.manifest
new file mode 100644
index 00000000000..4b8b0703d6a
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/1.manifest differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/2.manifest b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/2.manifest
new file mode 100644
index 00000000000..f92dab11396
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/2.manifest differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/3.manifest b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/3.manifest
new file mode 100644
index 00000000000..5f747931c41
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/_versions/3.manifest differ
diff --git a/test_data/v0.20.0/old_btree_bitmap_indices.lance/data/1f29c4b8-24ba-4f50-8d07-3b0b5c1b4f3f.lance b/test_data/v0.20.0/old_btree_bitmap_indices.lance/data/1f29c4b8-24ba-4f50-8d07-3b0b5c1b4f3f.lance
new file mode 100644
index 00000000000..e6e9d742cfd
Binary files /dev/null and b/test_data/v0.20.0/old_btree_bitmap_indices.lance/data/1f29c4b8-24ba-4f50-8d07-3b0b5c1b4f3f.lance differ