@@ -1118,21 +1118,36 @@ def __len__(self):
11181118 elif self .shp :
11191119 # Otherwise use shape count
11201120 if self .shx :
1121- # Use index file to get total count
11221121 if self .numShapes is None :
1123- # File length (16-bit word * 2 = bytes) - header length
1124- self .shx .seek (24 )
1125- shxRecordLength = (unpack (">i" , self .shx .read (4 ))[0 ] * 2 ) - 100
1126- self .numShapes = shxRecordLength // 8
1127-
1122+ self .__shxHeader ()
1123+
11281124 return self .numShapes
11291125
11301126 else :
11311127 # Index file not available, iterate all shapes to get total count
11321128 if self .numShapes is None :
1133- for i ,shape in enumerate (self .iterShapes ()):
1134- pass
1135- self .numShapes = i + 1
1129+ # Determine length of shp file
1130+ shp = self .shp
1131+ checkpoint = shp .tell ()
1132+ shp .seek (0 ,2 )
1133+ shpLength = shp .tell ()
1134+ shp .seek (100 )
1135+ # Do a fast shape iteration until end of file.
1136+ unpack = Struct ('>2i' ).unpack
1137+ offsets = []
1138+ pos = shp .tell ()
1139+ while pos < shpLength :
1140+ offsets .append (pos )
1141+ # Unpack the shape header only
1142+ (recNum , recLength ) = unpack (shp .read (8 ))
1143+ # Jump to next shape position
1144+ pos += 8 + (2 * recLength )
1145+ shp .seek (pos )
1146+ # Set numShapes and offset indices
1147+ self .numShapes = len (offsets )
1148+ self ._offsets = offsets
1149+ # Return to previous file position
1150+ shp .seek (checkpoint )
11361151
11371152 return self .numShapes
11381153
@@ -1172,6 +1187,8 @@ def load(self, shapefile=None):
11721187 self .__shpHeader ()
11731188 if self .dbf :
11741189 self .__dbfHeader ()
1190+ if self .shx :
1191+ self .__shxHeader ()
11751192
11761193 def load_shp (self , shapefile_name ):
11771194 """
@@ -1251,7 +1268,7 @@ def __restrictIndex(self, i):
12511268 return i
12521269
12531270 def __shpHeader (self ):
1254- """Reads the header information from a .shp or .shx file."""
1271+ """Reads the header information from a .shp file."""
12551272 if not self .shp :
12561273 raise ShapefileException ("Shapefile Reader requires a shapefile or file-like object. (no shp file found" )
12571274 shp = self .shp
@@ -1353,27 +1370,40 @@ def __shape(self, oid=None, bbox=None):
13531370 f .seek (next )
13541371 return record
13551372
1373+ def __shxHeader (self ):
1374+ """Reads the header information from a .shx file."""
1375+ shx = self .shx
1376+ if not shx :
1377+ raise ShapefileException ("Shapefile Reader requires a shapefile or file-like object. (no shx file found" )
1378+ # File length (16-bit word * 2 = bytes) - header length
1379+ shx .seek (24 )
1380+ shxRecordLength = (unpack (">i" , shx .read (4 ))[0 ] * 2 ) - 100
1381+ self .numShapes = shxRecordLength // 8
1382+
1383+ def __shxOffsets (self ):
1384+ '''Reads the shape offset positions from a .shx file'''
1385+ shx = self .shx
1386+ if not shx :
1387+ raise ShapefileException ("Shapefile Reader requires a shapefile or file-like object. (no shx file found" )
1388+ # Jump to the first record.
1389+ shx .seek (100 )
1390+ # Each index record consists of two nrs, we only want the first one
1391+ shxRecords = _Array ('i' , shx .read (2 * self .numShapes * 4 ) )
1392+ if sys .byteorder != 'big' :
1393+ shxRecords .byteswap ()
1394+ self ._offsets = [2 * el for el in shxRecords [::2 ]]
1395+
13561396 def __shapeIndex (self , i = None ):
13571397 """Returns the offset in a .shp file for a shape based on information
13581398 in the .shx index file."""
13591399 shx = self .shx
1360- if not shx :
1400+ # Return None if no shx or no index requested
1401+ if not shx or i == None :
13611402 return None
1403+ # At this point, we know the shx file exists
13621404 if not self ._offsets :
1363- if self .numShapes is None :
1364- # File length (16-bit word * 2 = bytes) - header length
1365- shx .seek (24 )
1366- shxRecordLength = (unpack (">i" , shx .read (4 ))[0 ] * 2 ) - 100
1367- self .numShapes = shxRecordLength // 8
1368- # Jump to the first record.
1369- shx .seek (100 )
1370- # Each index record consists of two nrs, we only want the first one
1371- shxRecords = _Array ('i' , shx .read (2 * self .numShapes * 4 ) )
1372- if sys .byteorder != 'big' :
1373- shxRecords .byteswap ()
1374- self ._offsets = [2 * el for el in shxRecords [::2 ]]
1375- if not i == None :
1376- return self ._offsets [i ]
1405+ self .__shxOffsets ()
1406+ return self ._offsets [i ]
13771407
13781408 def shape (self , i = 0 , bbox = None ):
13791409 """Returns a shape object for a shape in the geometry
@@ -1385,10 +1415,30 @@ def shape(self, i=0, bbox=None):
13851415 i = self .__restrictIndex (i )
13861416 offset = self .__shapeIndex (i )
13871417 if not offset :
1388- # Shx index not available so iterate the full list.
1389- for j ,k in enumerate (self .iterShapes ()):
1390- if j == i :
1391- return k
1418+ # Shx index not available.
1419+ # Determine length of shp file
1420+ shp .seek (0 ,2 )
1421+ shpLength = shp .tell ()
1422+ shp .seek (100 )
1423+ # Do a fast shape iteration until the requested index or end of file.
1424+ unpack = Struct ('>2i' ).unpack
1425+ _i = 0
1426+ offset = shp .tell ()
1427+ while offset < shpLength :
1428+ if _i == i :
1429+ # Reached the requested index, exit loop with the offset value
1430+ break
1431+ # Unpack the shape header only
1432+ (recNum , recLength ) = unpack (shp .read (8 ))
1433+ # Jump to next shape position
1434+ offset += 8 + (2 * recLength )
1435+ shp .seek (offset )
1436+ _i += 1
1437+ # If the index was not found, it likely means the .shp file is incomplete
1438+ if _i != i :
1439+ raise ShapefileException ('Shape index {} is out of bounds; the .shp file only contains {} shapes' .format (i , _i ))
1440+
1441+ # Seek to the offset and read the shape
13921442 shp .seek (offset )
13931443 return self .__shape (oid = i , bbox = bbox )
13941444
@@ -1397,21 +1447,8 @@ def shapes(self, bbox=None):
13971447 To only read shapes within a given spatial region, specify the 'bbox'
13981448 arg as a list or tuple of xmin,ymin,xmax,ymax.
13991449 """
1400- shp = self .__getFileObj (self .shp )
1401- # Found shapefiles which report incorrect
1402- # shp file length in the header. Can't trust
1403- # that so we seek to the end of the file
1404- # and figure it out.
1405- shp .seek (0 ,2 )
1406- self .shpLength = shp .tell ()
1407- shp .seek (100 )
14081450 shapes = Shapes ()
1409- i = 0
1410- while shp .tell () < self .shpLength :
1411- shape = self .__shape (oid = i , bbox = bbox )
1412- if shape :
1413- shapes .append (shape )
1414- i += 1
1451+ shapes .extend (self .iterShapes (bbox = bbox ))
14151452 return shapes
14161453
14171454 def iterShapes (self , bbox = None ):
@@ -1421,15 +1458,40 @@ def iterShapes(self, bbox=None):
14211458 arg as a list or tuple of xmin,ymin,xmax,ymax.
14221459 """
14231460 shp = self .__getFileObj (self .shp )
1461+ # Found shapefiles which report incorrect
1462+ # shp file length in the header. Can't trust
1463+ # that so we seek to the end of the file
1464+ # and figure it out.
14241465 shp .seek (0 ,2 )
1425- self . shpLength = shp .tell ()
1466+ shpLength = shp .tell ()
14261467 shp .seek (100 )
1427- i = 0
1428- while shp .tell () < self .shpLength :
1429- shape = self .__shape (oid = i , bbox = bbox )
1430- if shape :
1431- yield shape
1432- i += 1
1468+
1469+ if self .numShapes :
1470+ # Iterate exactly the number of shapes from shx header
1471+ for i in xrange (self .numShapes ):
1472+ # MAYBE: check if more left of file or exit early?
1473+ shape = self .__shape (oid = i , bbox = bbox )
1474+ if shape :
1475+ yield shape
1476+ else :
1477+ # No shx file, unknown nr of shapes
1478+ # Instead iterate until reach end of file
1479+ # Collect the offset indices during iteration
1480+ i = 0
1481+ offsets = []
1482+ pos = shp .tell ()
1483+ while pos < shpLength :
1484+ offsets .append (pos )
1485+ shape = self .__shape (oid = i , bbox = bbox )
1486+ pos = shp .tell ()
1487+ if shape :
1488+ yield shape
1489+ i += 1
1490+ # Entire shp file consumed
1491+ # Update the number of shapes and list of offsets
1492+ assert i == len (offsets )
1493+ self .numShapes = i
1494+ self ._offsets = offsets
14331495
14341496 def __dbfHeader (self ):
14351497 """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger"""
0 commit comments