From e41b03ca574ed79c58bad9dc9e72d84dc98af19b Mon Sep 17 00:00:00 2001 From: Lionel GUEZ Date: Fri, 11 Oct 2024 20:19:31 +0200 Subject: [PATCH 1/3] Add option `my_range` to method iterRecords Using iterRecords with a range option should be faster than calling record within a loop, since we avoid the multiple calls to seek. --- shapefile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/shapefile.py b/shapefile.py index 12af74d..07bc3c0 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1809,7 +1809,7 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None): + def iterRecords(self, fields=None, my_range=None): """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a @@ -1820,7 +1820,9 @@ def iterRecords(self, fields=None): f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(self.numRecords): + if my_range is None: + my_range = xrange(self.numRecords) + for i in my_range: r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) From 4efef9fdb5a1d188a33316793cb964d33daa63df Mon Sep 17 00:00:00 2001 From: Lionel GUEZ Date: Tue, 15 Oct 2024 06:57:12 +0200 Subject: [PATCH 2/3] Revert "Add option `my_range` to method iterRecords" This reverts commit e41b03ca. JamesParrott pointed that I did not understand the way `__record` works: __record does not use oid to find the correct record, it just assumes it is the correct oid for the current position. --- shapefile.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 07bc3c0..12af74d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1809,7 +1809,7 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None, my_range=None): + def iterRecords(self, fields=None): """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a @@ -1820,9 +1820,7 @@ def iterRecords(self, fields=None, my_range=None): f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - if my_range is None: - my_range = xrange(self.numRecords) - for i in my_range: + for i in xrange(self.numRecords): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) From 811d32909e0b4889efa2357dd32682e324222225 Mon Sep 17 00:00:00 2001 From: Lionel GUEZ Date: Tue, 15 Oct 2024 08:02:03 +0200 Subject: [PATCH 3/3] Add method `iterRecords_range` Using the method `iterRecords_range` should be somewhat faster than calling the method `record` within a loop, since we avoid the repeated calls to seek inside `record`. --- shapefile.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/shapefile.py b/shapefile.py index 12af74d..906cdda 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1827,6 +1827,32 @@ def iterRecords(self, fields=None): if r: yield r + def iterRecords_range(self, start, stop, fields=None): + """Returns a generator of records in a dbf file, for a range + of oid. Useful for large shapefiles or dbf files. To only + read some of the fields, specify the 'fields' arg as a list of + one or more fieldnames. + + """ + if self.numRecords is None: + self.__dbfHeader() + f = self.__getFileObj(self.dbf) + start = self.__restrictIndex(start) + if abs(stop) > self.numRecords: + raise IndexError("Record index out of range.") + if stop < 0: + stop = range(self.numRecords)[stop] + recSize = self.__recordLength + f.seek(0) + f.seek(self.__dbfHdrLength + (start * recSize)) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + for i in xrange(start, stop): + r = self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + if r: + yield r + def shapeRecord(self, i=0, fields=None, bbox=None): """Returns a combination geometry and attribute record for the supplied record index.