@@ -347,7 +347,7 @@ def get(self, key):
347347 raise KeyError ('No object named %s in the file' % key )
348348 return self ._read_group (group )
349349
350- def select (self , key , where = None , start = None , stop = None , columns = None , ** kwargs ):
350+ def select (self , key , where = None , start = None , stop = None , columns = None , iterator = False , chunksize = None , ** kwargs ):
351351 """
352352 Retrieve pandas object stored in file, optionally based on where
353353 criteria
@@ -362,16 +362,30 @@ def select(self, key, where=None, start=None, stop=None, columns=None, **kwargs)
362362 start : integer (defaults to None), row number to start selection
363363 stop : integer (defaults to None), row number to stop selection
364364 columns : a list of columns that if not None, will limit the return columns
365+ iterator : boolean, return an iterator, default False
366+ chunksize : nrows to include in iteration, return an iterator
365367
366368 """
367369 group = self .get_node (key )
368370 if group is None :
369371 raise KeyError ('No object named %s in the file' % key )
370- return self ._read_group (group , where = where , start = start , stop = stop , columns = columns , ** kwargs )
371372
372- def select_as_coordinates (self , key , where = None , ** kwargs ):
373+ # create the storer and axes
374+ s = self ._create_storer (group )
375+ s .infer_axes ()
376+
377+ # what we are actually going to do for a chunk
378+ def func (_start , _stop ):
379+ return s .read (where = where , start = _start , stop = _stop , columns = columns , ** kwargs )
380+
381+ if iterator or chunksize is not None :
382+ return TableIterator (func , nrows = s .nrows , start = start , stop = stop , chunksize = chunksize )
383+
384+ return TableIterator (func , nrows = s .nrows , start = start , stop = stop ).get_values ()
385+
386+ def select_as_coordinates (self , key , where = None , start = None , stop = None , ** kwargs ):
373387 """
374- return the selection as a Coordinates. Note that start/stop/columns parematers are inapplicable here.
388+ return the selection as a Coordinates.
375389
376390 Parameters
377391 ----------
@@ -380,8 +394,10 @@ def select_as_coordinates(self, key, where=None, **kwargs):
380394 Optional Parameters
381395 -------------------
382396 where : list of Term (or convertable) objects, optional
397+ start : integer (defaults to None), row number to start selection
398+ stop : integer (defaults to None), row number to stop selection
383399 """
384- return self .get_storer (key ).read_coordinates (where = where , ** kwargs )
400+ return self .get_storer (key ).read_coordinates (where = where , start = start , stop = stop , ** kwargs )
385401
386402 def unique (self , key , column , ** kwargs ):
387403 """
@@ -400,14 +416,18 @@ def unique(self, key, column, **kwargs):
400416 """
401417 return self .get_storer (key ).read_column (column = column , ** kwargs )
402418
403- def select_as_multiple (self , keys , where = None , selector = None , columns = None , ** kwargs ):
419+ def select_as_multiple (self , keys , where = None , selector = None , columns = None , start = None , stop = None , iterator = False , chunksize = None , ** kwargs ):
404420 """ Retrieve pandas objects from multiple tables
405421
406422 Parameters
407423 ----------
408424 keys : a list of the tables
409425 selector : the table to apply the where criteria (defaults to keys[0] if not supplied)
410426 columns : the columns I want back
427+ start : integer (defaults to None), row number to start selection
428+ stop : integer (defaults to None), row number to stop selection
429+ iterator : boolean, return an iterator, default False
430+ chunksize : nrows to include in iteration, return an iterator
411431
412432 Exceptions
413433 ----------
@@ -418,7 +438,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw
418438 if isinstance (keys , (list , tuple )) and len (keys ) == 1 :
419439 keys = keys [0 ]
420440 if isinstance (keys , basestring ):
421- return self .select (key = keys , where = where , columns = columns , ** kwargs )
441+ return self .select (key = keys , where = where , columns = columns , start = start , stop = stop , iterator = iterator , chunksize = chunksize , ** kwargs )
422442
423443 if not isinstance (keys , (list , tuple )):
424444 raise Exception ("keys must be a list/tuple" )
@@ -433,6 +453,8 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw
433453 tbls = [ self .get_storer (k ) for k in keys ]
434454
435455 # validate rows
456+ if tbls [0 ] is None :
457+ raise Exception ("no valid tables to select as multiple" )
436458 nrows = tbls [0 ].nrows
437459 for t in tbls :
438460 if t .nrows != nrows :
@@ -441,16 +463,25 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw
441463 raise Exception ("object [%s] is not a table, and cannot be used in all select as multiple" % t .pathname )
442464
443465 # select coordinates from the selector table
444- c = self .select_as_coordinates (selector , where )
466+ c = self .select_as_coordinates (selector , where , start = start , stop = stop )
467+ nrows = len (c )
468+
469+ def func (_start , _stop ):
470+
471+ # collect the returns objs
472+ objs = [t .read (where = c [_start :_stop ], columns = columns ) for t in tbls ]
473+
474+ # axis is the concentation axes
475+ axis = list (set ([t .non_index_axes [0 ][0 ] for t in tbls ]))[0 ]
445476
446- # collect the returns objs
447- objs = [ t . read ( where = c , columns = columns ) for t in tbls ]
477+ # concat and return
478+ return concat ( objs , axis = axis , verify_integrity = True )
448479
449- # axis is the concentation axes
450- axis = list (set ([t .non_index_axes [0 ][0 ] for t in tbls ]))[0 ]
480+ if iterator or chunksize is not None :
481+ return TableIterator (func , nrows = nrows , start = start , stop = stop , chunksize = chunksize )
482+
483+ return TableIterator (func , nrows = nrows , start = start , stop = stop ).get_values ()
451484
452- # concat and return
453- return concat (objs , axis = axis , verify_integrity = True )
454485
455486 def put (self , key , value , table = None , append = False , ** kwargs ):
456487 """
@@ -807,6 +838,49 @@ def _read_group(self, group, **kwargs):
807838 s .infer_axes ()
808839 return s .read (** kwargs )
809840
841+ class TableIterator (object ):
842+ """ define the iteration interface on a table
843+
844+ Parameters
845+ ----------
846+
847+ func : the function to get results
848+ nrows : the rows to iterate on
849+ start : the passed start value (default is None)
850+ stop : the passed stop value (default is None)
851+ chunksize : the passed chunking valeu (default is 50000)
852+ kwargs : the passed kwargs
853+ """
854+
855+ def __init__ (self , func , nrows , start = None , stop = None , chunksize = None ):
856+ self .func = func
857+ self .nrows = nrows
858+ self .start = start or 0
859+
860+ if stop is None :
861+ stop = self .nrows
862+ self .stop = min (self .nrows ,stop )
863+
864+ if chunksize is None :
865+ chunksize = 50000
866+
867+ self .chunksize = chunksize
868+
869+ def __iter__ (self ):
870+ current = self .start
871+ while current < self .stop :
872+ stop = current + self .chunksize
873+ v = self .func (current , stop )
874+ current = stop
875+
876+ if v is None :
877+ continue
878+
879+ yield v
880+
881+ def get_values (self ):
882+ return self .func (self .start , self .stop )
883+
810884
811885class IndexCol (object ):
812886 """ an index column description class
@@ -2351,7 +2425,7 @@ def create_description(self, complib=None, complevel=None, fletcher32=False, exp
23512425
23522426 return d
23532427
2354- def read_coordinates (self , where = None , ** kwargs ):
2428+ def read_coordinates (self , where = None , start = None , stop = None , ** kwargs ):
23552429 """ select coordinates (row numbers) from a table; return the coordinates object """
23562430
23572431 # validate the version
@@ -2362,7 +2436,7 @@ def read_coordinates(self, where=None, **kwargs):
23622436 return False
23632437
23642438 # create the selection
2365- self .selection = Selection (self , where = where , ** kwargs )
2439+ self .selection = Selection (self , where = where , start = start , stop = stop , ** kwargs )
23662440 return Coordinates (self .selection .select_coords (), group = self .group , where = where )
23672441
23682442 def read_column (self , column , ** kwargs ):
@@ -3132,6 +3206,12 @@ def __init__(self, values, group, where, **kwargs):
31323206 self .group = group
31333207 self .where = where
31343208
3209+ def __len__ (self ):
3210+ return len (self .values )
3211+
3212+ def __getitem__ (self , key ):
3213+ """ return a new coordinates object, sliced by the key """
3214+ return Coordinates (self .values [key ], self .group , self .where )
31353215
31363216class Selection (object ):
31373217 """
0 commit comments