@@ -87,7 +87,7 @@ use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler;
8787use crate :: error:: PythonErrorExt ;
8888use crate :: file:: object_store_from_uri_or_path;
8989use crate :: fragment:: FileFragment ;
90- use crate :: indices:: { PyIndexConfig , PyIndexDescription } ;
90+ use crate :: indices:: { PyIndexConfig , PyIndexDescription , PyIndexSegment , PyIndexSegmentPlan } ;
9191use crate :: namespace:: extract_namespace_arc;
9292use crate :: rt;
9393use crate :: scanner:: ScanStatistics ;
@@ -323,6 +323,88 @@ impl MergeInsertBuilder {
323323 }
324324}
325325
326+ #[ pyclass( name = "IndexSegmentBuilder" , module = "lance" , subclass) ]
327+ #[ derive( Clone ) ]
328+ pub struct PyIndexSegmentBuilder {
329+ dataset : Arc < LanceDataset > ,
330+ staging_index_uuid : String ,
331+ partial_indices : Vec < IndexMetadata > ,
332+ target_segment_bytes : Option < u64 > ,
333+ }
334+
335+ #[ pymethods]
336+ impl PyIndexSegmentBuilder {
337+ #[ getter]
338+ fn staging_index_uuid ( & self ) -> String {
339+ self . staging_index_uuid . clone ( )
340+ }
341+
342+ fn with_partial_indices < ' a > (
343+ mut slf : PyRefMut < ' a , Self > ,
344+ partial_indices : & Bound < ' _ , PyAny > ,
345+ ) -> PyResult < PyRefMut < ' a , Self > > {
346+ let mut indices = Vec :: new ( ) ;
347+ for item in partial_indices. try_iter ( ) ? {
348+ indices. push ( item?. extract :: < PyLance < IndexMetadata > > ( ) ?. 0 ) ;
349+ }
350+ slf. partial_indices = indices;
351+ Ok ( slf)
352+ }
353+
354+ fn with_target_segment_bytes < ' a > (
355+ mut slf : PyRefMut < ' a , Self > ,
356+ bytes : u64 ,
357+ ) -> PyResult < PyRefMut < ' a , Self > > {
358+ slf. target_segment_bytes = Some ( bytes) ;
359+ Ok ( slf)
360+ }
361+
362+ fn plan ( & self , py : Python < ' _ > ) -> PyResult < Vec < Py < PyIndexSegmentPlan > > > {
363+ let mut builder = self
364+ . dataset
365+ . create_index_segment_builder ( self . staging_index_uuid . clone ( ) )
366+ . with_partial_indices ( self . partial_indices . clone ( ) ) ;
367+ if let Some ( target_segment_bytes) = self . target_segment_bytes {
368+ builder = builder. with_target_segment_bytes ( target_segment_bytes) ;
369+ }
370+ let plans = rt ( ) . block_on ( Some ( py) , builder. plan ( ) ) ?. infer_error ( ) ?;
371+ plans. into_iter ( )
372+ . map ( |plan| Py :: new ( py, PyIndexSegmentPlan :: from_inner ( plan) ) )
373+ . collect ( )
374+ }
375+
376+ fn build (
377+ & self ,
378+ py : Python < ' _ > ,
379+ plan : & Bound < ' _ , PyAny > ,
380+ ) -> PyResult < Py < PyIndexSegment > > {
381+ let plan = plan. extract :: < PyRef < ' _ , PyIndexSegmentPlan > > ( ) ?;
382+ let builder = self
383+ . dataset
384+ . create_index_segment_builder ( self . staging_index_uuid . clone ( ) )
385+ . with_partial_indices ( self . partial_indices . clone ( ) ) ;
386+ let segment = rt ( )
387+ . block_on ( Some ( py) , builder. build ( & plan. inner ) ) ?
388+ . infer_error ( ) ?;
389+ Py :: new ( py, PyIndexSegment :: from_inner ( segment) )
390+ }
391+
392+ fn build_all ( & self , py : Python < ' _ > ) -> PyResult < Vec < Py < PyIndexSegment > > > {
393+ let mut builder = self
394+ . dataset
395+ . create_index_segment_builder ( self . staging_index_uuid . clone ( ) )
396+ . with_partial_indices ( self . partial_indices . clone ( ) ) ;
397+ if let Some ( target_segment_bytes) = self . target_segment_bytes {
398+ builder = builder. with_target_segment_bytes ( target_segment_bytes) ;
399+ }
400+ let segments = rt ( ) . block_on ( Some ( py) , builder. build_all ( ) ) ?. infer_error ( ) ?;
401+ segments
402+ . into_iter ( )
403+ . map ( |segment| Py :: new ( py, PyIndexSegment :: from_inner ( segment) ) )
404+ . collect ( )
405+ }
406+ }
407+
326408impl MergeInsertBuilder {
327409 fn build_stats < ' a > ( stats : & MergeStats , py : Python < ' a > ) -> PyResult < Bound < ' a , PyDict > > {
328410 let dict = PyDict :: new ( py) ;
@@ -2019,6 +2101,35 @@ impl Dataset {
20192101 Ok ( PyLance ( index_metadata) )
20202102 }
20212103
2104+ fn create_index_segment_builder ( & self , staging_index_uuid : String ) -> PyResult < PyIndexSegmentBuilder > {
2105+ Ok ( PyIndexSegmentBuilder {
2106+ dataset : self . ds . clone ( ) ,
2107+ staging_index_uuid,
2108+ partial_indices : Vec :: new ( ) ,
2109+ target_segment_bytes : None ,
2110+ } )
2111+ }
2112+
2113+ fn commit_existing_index_segments (
2114+ & mut self ,
2115+ index_name : & str ,
2116+ column : & str ,
2117+ segments : Vec < PyRef < ' _ , PyIndexSegment > > ,
2118+ ) -> PyResult < ( ) > {
2119+ let mut new_self = self . ds . as_ref ( ) . clone ( ) ;
2120+ let segments = segments
2121+ . into_iter ( )
2122+ . map ( |segment| segment. inner . clone ( ) )
2123+ . collect ( ) ;
2124+ rt ( ) . block_on (
2125+ None ,
2126+ new_self. commit_existing_index_segments ( index_name, column, segments) ,
2127+ ) ?
2128+ . infer_error ( ) ?;
2129+ self . ds = Arc :: new ( new_self) ;
2130+ Ok ( ( ) )
2131+ }
2132+
20222133 fn drop_index ( & mut self , name : & str ) -> PyResult < ( ) > {
20232134 let mut new_self = self . ds . as_ref ( ) . clone ( ) ;
20242135 rt ( ) . block_on ( None , new_self. drop_index ( name) ) ?
0 commit comments