@@ -1118,21 +1118,36 @@ def __len__(self):
1118
1118
elif self .shp :
1119
1119
# Otherwise use shape count
1120
1120
if self .shx :
1121
- # Use index file to get total count
1122
1121
if self .numShapes is None :
1123
- # File length (16-bit word * 2 = bytes) - header length
1124
- self .shx .seek (24 )
1125
- shxRecordLength = (unpack (">i" , self .shx .read (4 ))[0 ] * 2 ) - 100
1126
- self .numShapes = shxRecordLength // 8
1127
-
1122
+ self .__shxHeader ()
1123
+
1128
1124
return self .numShapes
1129
1125
1130
1126
else :
1131
1127
# Index file not available, iterate all shapes to get total count
1132
1128
if self .numShapes is None :
1133
- for i ,shape in enumerate (self .iterShapes ()):
1134
- pass
1135
- self .numShapes = i + 1
1129
+ # Determine length of shp file
1130
+ shp = self .shp
1131
+ checkpoint = shp .tell ()
1132
+ shp .seek (0 ,2 )
1133
+ shpLength = shp .tell ()
1134
+ shp .seek (100 )
1135
+ # Do a fast shape iteration until end of file.
1136
+ unpack = Struct ('>2i' ).unpack
1137
+ offsets = []
1138
+ pos = shp .tell ()
1139
+ while pos < shpLength :
1140
+ offsets .append (pos )
1141
+ # Unpack the shape header only
1142
+ (recNum , recLength ) = unpack (shp .read (8 ))
1143
+ # Jump to next shape position
1144
+ pos += 8 + (2 * recLength )
1145
+ shp .seek (pos )
1146
+ # Set numShapes and offset indices
1147
+ self .numShapes = len (offsets )
1148
+ self ._offsets = offsets
1149
+ # Return to previous file position
1150
+ shp .seek (checkpoint )
1136
1151
1137
1152
return self .numShapes
1138
1153
@@ -1172,6 +1187,8 @@ def load(self, shapefile=None):
1172
1187
self .__shpHeader ()
1173
1188
if self .dbf :
1174
1189
self .__dbfHeader ()
1190
+ if self .shx :
1191
+ self .__shxHeader ()
1175
1192
1176
1193
def load_shp (self , shapefile_name ):
1177
1194
"""
@@ -1251,7 +1268,7 @@ def __restrictIndex(self, i):
1251
1268
return i
1252
1269
1253
1270
def __shpHeader (self ):
1254
- """Reads the header information from a .shp or .shx file."""
1271
+ """Reads the header information from a .shp file."""
1255
1272
if not self .shp :
1256
1273
raise ShapefileException ("Shapefile Reader requires a shapefile or file-like object. (no shp file found" )
1257
1274
shp = self .shp
@@ -1353,27 +1370,40 @@ def __shape(self, oid=None, bbox=None):
1353
1370
f .seek (next )
1354
1371
return record
1355
1372
1373
+ def __shxHeader (self ):
1374
+ """Reads the header information from a .shx file."""
1375
+ shx = self .shx
1376
+ if not shx :
1377
+ raise ShapefileException ("Shapefile Reader requires a shapefile or file-like object. (no shx file found" )
1378
+ # File length (16-bit word * 2 = bytes) - header length
1379
+ shx .seek (24 )
1380
+ shxRecordLength = (unpack (">i" , shx .read (4 ))[0 ] * 2 ) - 100
1381
+ self .numShapes = shxRecordLength // 8
1382
+
1383
+ def __shxOffsets (self ):
1384
+ '''Reads the shape offset positions from a .shx file'''
1385
+ shx = self .shx
1386
+ if not shx :
1387
+ raise ShapefileException ("Shapefile Reader requires a shapefile or file-like object. (no shx file found" )
1388
+ # Jump to the first record.
1389
+ shx .seek (100 )
1390
+ # Each index record consists of two nrs, we only want the first one
1391
+ shxRecords = _Array ('i' , shx .read (2 * self .numShapes * 4 ) )
1392
+ if sys .byteorder != 'big' :
1393
+ shxRecords .byteswap ()
1394
+ self ._offsets = [2 * el for el in shxRecords [::2 ]]
1395
+
1356
1396
def __shapeIndex (self , i = None ):
1357
1397
"""Returns the offset in a .shp file for a shape based on information
1358
1398
in the .shx index file."""
1359
1399
shx = self .shx
1360
- if not shx :
1400
+ # Return None if no shx or no index requested
1401
+ if not shx or i == None :
1361
1402
return None
1403
+ # At this point, we know the shx file exists
1362
1404
if not self ._offsets :
1363
- if self .numShapes is None :
1364
- # File length (16-bit word * 2 = bytes) - header length
1365
- shx .seek (24 )
1366
- shxRecordLength = (unpack (">i" , shx .read (4 ))[0 ] * 2 ) - 100
1367
- self .numShapes = shxRecordLength // 8
1368
- # Jump to the first record.
1369
- shx .seek (100 )
1370
- # Each index record consists of two nrs, we only want the first one
1371
- shxRecords = _Array ('i' , shx .read (2 * self .numShapes * 4 ) )
1372
- if sys .byteorder != 'big' :
1373
- shxRecords .byteswap ()
1374
- self ._offsets = [2 * el for el in shxRecords [::2 ]]
1375
- if not i == None :
1376
- return self ._offsets [i ]
1405
+ self .__shxOffsets ()
1406
+ return self ._offsets [i ]
1377
1407
1378
1408
def shape (self , i = 0 , bbox = None ):
1379
1409
"""Returns a shape object for a shape in the geometry
@@ -1385,10 +1415,30 @@ def shape(self, i=0, bbox=None):
1385
1415
i = self .__restrictIndex (i )
1386
1416
offset = self .__shapeIndex (i )
1387
1417
if not offset :
1388
- # Shx index not available so iterate the full list.
1389
- for j ,k in enumerate (self .iterShapes ()):
1390
- if j == i :
1391
- return k
1418
+ # Shx index not available.
1419
+ # Determine length of shp file
1420
+ shp .seek (0 ,2 )
1421
+ shpLength = shp .tell ()
1422
+ shp .seek (100 )
1423
+ # Do a fast shape iteration until the requested index or end of file.
1424
+ unpack = Struct ('>2i' ).unpack
1425
+ _i = 0
1426
+ offset = shp .tell ()
1427
+ while offset < shpLength :
1428
+ if _i == i :
1429
+ # Reached the requested index, exit loop with the offset value
1430
+ break
1431
+ # Unpack the shape header only
1432
+ (recNum , recLength ) = unpack (shp .read (8 ))
1433
+ # Jump to next shape position
1434
+ offset += 8 + (2 * recLength )
1435
+ shp .seek (offset )
1436
+ _i += 1
1437
+ # If the index was not found, it likely means the .shp file is incomplete
1438
+ if _i != i :
1439
+ raise ShapefileException ('Shape index {} is out of bounds; the .shp file only contains {} shapes' .format (i , _i ))
1440
+
1441
+ # Seek to the offset and read the shape
1392
1442
shp .seek (offset )
1393
1443
return self .__shape (oid = i , bbox = bbox )
1394
1444
@@ -1397,21 +1447,8 @@ def shapes(self, bbox=None):
1397
1447
To only read shapes within a given spatial region, specify the 'bbox'
1398
1448
arg as a list or tuple of xmin,ymin,xmax,ymax.
1399
1449
"""
1400
- shp = self .__getFileObj (self .shp )
1401
- # Found shapefiles which report incorrect
1402
- # shp file length in the header. Can't trust
1403
- # that so we seek to the end of the file
1404
- # and figure it out.
1405
- shp .seek (0 ,2 )
1406
- self .shpLength = shp .tell ()
1407
- shp .seek (100 )
1408
1450
shapes = Shapes ()
1409
- i = 0
1410
- while shp .tell () < self .shpLength :
1411
- shape = self .__shape (oid = i , bbox = bbox )
1412
- if shape :
1413
- shapes .append (shape )
1414
- i += 1
1451
+ shapes .extend (self .iterShapes (bbox = bbox ))
1415
1452
return shapes
1416
1453
1417
1454
def iterShapes (self , bbox = None ):
@@ -1421,15 +1458,40 @@ def iterShapes(self, bbox=None):
1421
1458
arg as a list or tuple of xmin,ymin,xmax,ymax.
1422
1459
"""
1423
1460
shp = self .__getFileObj (self .shp )
1461
+ # Found shapefiles which report incorrect
1462
+ # shp file length in the header. Can't trust
1463
+ # that so we seek to the end of the file
1464
+ # and figure it out.
1424
1465
shp .seek (0 ,2 )
1425
- self . shpLength = shp .tell ()
1466
+ shpLength = shp .tell ()
1426
1467
shp .seek (100 )
1427
- i = 0
1428
- while shp .tell () < self .shpLength :
1429
- shape = self .__shape (oid = i , bbox = bbox )
1430
- if shape :
1431
- yield shape
1432
- i += 1
1468
+
1469
+ if self .numShapes :
1470
+ # Iterate exactly the number of shapes from shx header
1471
+ for i in xrange (self .numShapes ):
1472
+ # MAYBE: check if more left of file or exit early?
1473
+ shape = self .__shape (oid = i , bbox = bbox )
1474
+ if shape :
1475
+ yield shape
1476
+ else :
1477
+ # No shx file, unknown nr of shapes
1478
+ # Instead iterate until reach end of file
1479
+ # Collect the offset indices during iteration
1480
+ i = 0
1481
+ offsets = []
1482
+ pos = shp .tell ()
1483
+ while pos < shpLength :
1484
+ offsets .append (pos )
1485
+ shape = self .__shape (oid = i , bbox = bbox )
1486
+ pos = shp .tell ()
1487
+ if shape :
1488
+ yield shape
1489
+ i += 1
1490
+ # Entire shp file consumed
1491
+ # Update the number of shapes and list of offsets
1492
+ assert i == len (offsets )
1493
+ self .numShapes = i
1494
+ self ._offsets = offsets
1433
1495
1434
1496
def __dbfHeader (self ):
1435
1497
"""Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger"""
0 commit comments