From 2ee7532771bae554a5ecd1c2ba1a6d8a05fda0a1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 09:18:07 +0100 Subject: [PATCH 01/19] Create py.typed --- src/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/py.typed diff --git a/src/py.typed b/src/py.typed new file mode 100644 index 00000000..e69de29b From 5a77065a0e7fe4474b6b4443d26284826f1b6e5a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:07:30 +0100 Subject: [PATCH 02/19] Update test_shapefile.py --- test_shapefile.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index 04994af8..d7c76360 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -995,7 +995,7 @@ def test_iterRecords_start_stop(): # Arbitrary selection of record indices # (there are 663 records in blockgroups.dbf). - for i in [ + indices = [ 0, 1, 2, @@ -1013,18 +1013,20 @@ def test_iterRecords_start_stop(): N - 3, N - 2, N - 1, - ]: - for record in sf.iterRecords(start=i): + ] + for i, index in enumerate(indices): + for record in sf.iterRecords(start=index): assert record == sf.record(record.oid) - for record in sf.iterRecords(stop=i): + for record in sf.iterRecords(stop=index): assert record == sf.record(record.oid) - for stop in range(i, len(sf)): + for j in range(i+1, len(indices)): + stop = indices[j] # test negative indexing from end, as well as # positive values of stop, and its default - for stop_arg in (stop, stop - len(sf)): - for record in sf.iterRecords(start=i, stop=stop_arg): + for stop_arg in (stop, stop - N): + for record in sf.iterRecords(start=index, stop=stop_arg): assert record == sf.record(record.oid) From d8d635742f35a39bf46ed8379444bf051c8626bc Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:08:45 +0100 Subject: [PATCH 03/19] Reformat --- test_shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_shapefile.py b/test_shapefile.py index d7c76360..b43d2470 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1021,7 +1021,7 @@ def test_iterRecords_start_stop(): for record in sf.iterRecords(stop=index): assert record == sf.record(record.oid) - for j in range(i+1, len(indices)): + for j in range(i + 1, len(indices)): stop = indices[j] # test negative indexing from end, as well as # positive values of stop, and its default From a5ecbf3ed3aa8d91c9fd039e7fedd21c3be1af2a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:25:05 +0100 Subject: [PATCH 04/19] Update shapefile.py --- src/shapefile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shapefile.py b/src/shapefile.py index 321b215e..d52c730f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -623,6 +623,7 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ + # Preserve previous behaviour for anyone who set self.shapeType = None if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType self.points = points or [] @@ -3138,6 +3139,8 @@ def _shapeparts( # Make sure polygon rings (parts) are closed # if shapeType in (5, 15, 25, 31): + # This method is never actually called on a MultiPatch + # so we omit its shapeType (31) for efficiency if isinstance(polyShape, Polygon): for part in parts: if part[0] != part[-1]: From 3591be48f94fe0157c7a47d116614d9d5469dd51 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:25:48 +0100 Subject: [PATCH 05/19] Reformat --- src/shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index d52c730f..537e2bcb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -3139,7 +3139,7 @@ def _shapeparts( # Make sure polygon rings (parts) are closed # if shapeType in (5, 15, 25, 31): - # This method is never actually called on a MultiPatch + # This method is never actually called on a MultiPatch # so we omit its shapeType (31) for efficiency if isinstance(polyShape, Polygon): for part in parts: From efe077833a2844b9d9899ea3c60a106d814ad2a4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:48:11 +0100 Subject: [PATCH 06/19] Refactor some of Writer.__shpRecord into _write_shape_to_shp_file( --- shapefiles/test/balancing.dbf | Bin 804 -> 804 bytes shapefiles/test/contextwriter.dbf | Bin 65 -> 65 bytes shapefiles/test/corrupt_too_long.dbf | Bin 580 -> 580 bytes shapefiles/test/dtype.dbf | Bin 259 -> 259 bytes shapefiles/test/line.dbf | Bin 116 -> 116 bytes shapefiles/test/multipoint.dbf | Bin 116 -> 116 bytes shapefiles/test/onlydbf.dbf | Bin 65 -> 65 bytes shapefiles/test/point.dbf | Bin 116 -> 116 bytes shapefiles/test/polygon.dbf | Bin 116 -> 116 bytes shapefiles/test/shapetype.dbf | Bin 65 -> 65 bytes shapefiles/test/testfile.dbf | Bin 65 -> 65 bytes src/shapefile.py | 338 ++++++++++++++------------- 12 files changed, 179 insertions(+), 159 deletions(-) diff --git a/shapefiles/test/balancing.dbf b/shapefiles/test/balancing.dbf index c77d63b3fa175a32a667bb61f9db31c0d220001a..8272cf33374d841d1876aa95146ca6b115a13d4f 100644 GIT binary patch delta 13 UcmZ3&wuFs^xt3jaBZ~qv02gKg)c^nh delta 13 UcmZ3&wuFs^xr&W(BZ~qv02aRjwEzGB diff --git a/shapefiles/test/contextwriter.dbf b/shapefiles/test/contextwriter.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/shapefiles/test/corrupt_too_long.dbf b/shapefiles/test/corrupt_too_long.dbf index 57230c5dae273d13cf95675350da98b122facb83..e1bb1a55d413830e508ae125e7caff320e39f377 100644 GIT binary patch delta 13 UcmX@Ya)gD2xt3jaBZ~tQ02+A%FaQ7m delta 13 UcmX@Ya)gD2xr&W(BZ~tQ02$H)5C8xG diff --git a/shapefiles/test/dtype.dbf b/shapefiles/test/dtype.dbf index 1ddda01fdf467e0303fc64318e5b01aff29b4fee..2939da4791a864894ed53683d9d18574c69f3d05 100644 GIT binary patch delta 12 TcmZo>YGz_#u4R{<$nqZm5$Xd+ delta 12 TcmZo>YGz_#u3}@H$nqZm5n}@< diff --git a/shapefiles/test/line.dbf b/shapefiles/test/line.dbf index 24f529e800a2a5da5d96e8fb44ce5268fb410555..9e43d68b8aedd698aac5f61e600bbb63a4e42b81 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/multipoint.dbf b/shapefiles/test/multipoint.dbf index 4d7d4f1777867f1dded56cbda3f31aea44a25824..74ed8b14883b8194290b563eb207108beb938418 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/onlydbf.dbf b/shapefiles/test/onlydbf.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/shapefiles/test/point.dbf b/shapefiles/test/point.dbf index e29d0859b8c872db6f0f71237bfb88ce21568e61..5a881b870c1a5f904c98e765fabd80fb94feb0d6 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/polygon.dbf b/shapefiles/test/polygon.dbf index b116dc4692b34fdb7c114ee60214e15f734ab3aa..1cc8920a0b6da92323732a36b2e17a0faccab5f3 100644 GIT binary patch delta 10 RcmXRZVPURimz~H`000h)0+s*( delta 10 RcmXRZVPURfW1PrR000gI0(JlZ diff --git a/shapefiles/test/shapetype.dbf b/shapefiles/test/shapetype.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/shapefiles/test/testfile.dbf b/shapefiles/test/testfile.dbf index e030c2a3ba3517fb1bee24c6ae015968e20da29b..327fd49366a1e7061490a1e98bc06594129f958f 100644 GIT binary patch delta 10 RcmZ>CWMQslmz~IB3jhj_0sQ~~ delta 10 RcmZ>CWMQsiW1Prh3jhiT0o?!q diff --git a/src/shapefile.py b/src/shapefile.py index 537e2bcb..20054a0d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -952,6 +952,176 @@ def _read_shape_from_shp_file( return shape +def _write_shape_to_shp_file( + f, + s, + i, + update_bbox, + update_mbox, + update_zbox, +): + f.write(pack(" 2 else 0)) + except error: + raise ShapefileException( + f"Failed to write elevation values for record {i}. Expected floats." + ) + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): + try: + f.write(pack("<2d", *update_mbox(s))) + except error: + raise ShapefileException( + f"Failed to write measure extremes for record {i}. Expected floats" + ) + try: + if hasattr(s, "m"): + # if m values are stored in attribute + # fmt: off + f.write( + pack( + f"<{len(s.m)}d", + *[m if m is not None else NODATA for m in s.m] + ) + ) + # fmt: on + else: + # if m values are stored as 3rd/4th dimension + # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + for p in s.points: + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + except error: + raise ShapefileException( + f"Failed to write measure values for record {i}. Expected floats" + ) + # Write a single point + if s.shapeType in (1, 11, 21): + try: + f.write(pack("<2d", s.points[0][0], s.points[0][1])) + except error: + raise ShapefileException( + f"Failed to write point for record {i}. Expected floats." + ) + # Write a single Z value + # Note: missing z values are autoset to 0, but not sure if this is ideal. + if s.shapeType == 11: + # update the global z box + update_zbox(s) + # then write value + if hasattr(s, "z"): + # if z values are stored in attribute + try: + if not s.z: + s.z = (0,) + f.write(pack(" 2 else 0)) - except error: - raise ShapefileException( - f"Failed to write elevation values for record {self.shpNum}. Expected floats." - ) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13, 15, 18, 23, 25, 28, 31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException( - f"Failed to write measure extremes for record {self.shpNum}. Expected floats" - ) - try: - if hasattr(s, "m"): - # if m values are stored in attribute - # fmt: off - f.write( - pack( - f"<{len(s.m)}d", - *[m if m is not None else NODATA for m in s.m] - ) - ) - # fmt: on - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 - for p in s.points: - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) - ) - except error: - raise ShapefileException( - f"Failed to write measure values for record {self.shpNum}. Expected floats" - ) - # Write a single point - if s.shapeType in (1, 11, 21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException( - f"Failed to write point for record {self.shpNum}. Expected floats." - ) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack(" Date: Tue, 29 Jul 2025 11:18:26 +0100 Subject: [PATCH 07/19] Ignore Pylint W0707 --- pyproject.toml | 9 ++++++--- src/shapefile.py | 14 ++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a8e14c4a..73883b60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,9 +112,12 @@ load-plugins=[ "pylint_per_file_ignores", ] -# Silence warning: shapefile.py:2076:20: W0212: Access to a protected +# Silence warning: src/shapefile.py:2076:20: W0212: Access to a protected # member _from_geojson of a client class (protected-access) -# shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) +# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) +# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and +# 'raise ShapefileException(f'Failed to write bounding box for record {i}. +# Expected floats.') from exc' (raise-missing-from) # Silence remarks: # src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals) # src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches) @@ -134,6 +137,6 @@ load-plugins=[ # https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 [tool.pylint.'messages control'] per-file-ignores = [ - "/src/shapefile.py:W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", + "/src/shapefile.py:W0707,W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732", "test_shapefile.py:W0212,R1732", ] diff --git a/src/shapefile.py b/src/shapefile.py index 20054a0d..d26a3337 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1325,7 +1325,6 @@ def __getattr__(self, item: str) -> RecordValue: and IndexError, if the field exists but the field's corresponding value in the Record does not exist """ - # pylint: disable=raise-missing-from try: if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() raise AttributeError("_Record does not implement __setstate__") @@ -1337,7 +1336,6 @@ def __getattr__(self, item: str) -> RecordValue: raise IndexError( f"{item} found as a field but not enough values available." ) - # pylint: enable=raise-missing-from def __setattr__(self, key: str, value: RecordValue): """ @@ -1353,7 +1351,7 @@ def __setattr__(self, key: str, value: RecordValue): index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError(f"{key} is not a field name") # pylint: disable=raise-missing-from + raise AttributeError(f"{key} is not a field name") def __getitem__(self, item): """ @@ -1392,7 +1390,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) - raise IndexError(f"{key} is not a field name and not an int") # pylint: disable=raise-missing-from + raise IndexError(f"{key} is not a field name and not an int") @property def oid(self) -> int: @@ -2727,7 +2725,6 @@ def __shapefileHeader( Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" - # pylint: disable=raise-missing-from f = self.__getFileObj(fileObj) f.seek(0) # File code, Unused bytes @@ -2786,8 +2783,6 @@ def __shapefileHeader( "Failed to write shapefile elevation and measure values. Floats required." ) - # pylint: enable=raise-missing-from - def __dbfHeader(self): """Writes the dbf header and field descriptors.""" f = self.__getFileObj(self.dbf) @@ -2859,7 +2854,6 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s): - # pylint: disable=raise-missing-from f = self.__getFileObj(self.shp) offset = f.tell() # Record number, Content length place holder @@ -2891,13 +2885,11 @@ def __shpRecord(self, s): f.seek(start - 4) f.write(pack(">i", length)) f.seek(finish) - # pylint: enable=raise-missing-from return offset, length def __shxRecord(self, offset, length): """Writes the shx records.""" - # pylint: disable=raise-missing-from f = self.__getFileObj(self.shx) try: f.write(pack(">i", offset // 2)) @@ -2907,8 +2899,6 @@ def __shxRecord(self, offset, length): ) f.write(pack(">i", length)) - # pylint: enable=raise-missing-from - def record( self, *recordList: Iterable[RecordValue], **recordDict: dict[str, RecordValue] ): From 2c4a32f8d1104143ec4d993ce06b0c96efcafdb0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 11:48:09 +0100 Subject: [PATCH 08/19] Further reduce number of index selector tests --- test_shapefile.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index b43d2470..2a10d3ee 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -999,19 +999,12 @@ def test_iterRecords_start_stop(): 0, 1, 2, - 3, 5, 11, - 17, - 33, - 51, - 103, - 170, - 234, - 435, - 543, + 41, + 310, + 513, N - 3, - N - 2, N - 1, ] for i, index in enumerate(indices): From b628577dc8ba1097664eb741d32f4f938dc38696 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:23:30 +0100 Subject: [PATCH 09/19] Refactor _from_shp_file methods onto new subclasses of Shape --- src/shapefile.py | 255 ++++++++++++++++++++++++++++------------------- 1 file changed, 152 insertions(+), 103 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d26a3337..7ded3c71 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -845,88 +845,6 @@ def shapeTypeName(self) -> str: def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" - # pylint: disable=unused-argument - def _set_bbox_from_shp_file(self, f): - pass - - @staticmethod - def _get_nparts_from_shp_file(f): - return None - - @staticmethod - def _get_npoints_from_shp_file(f): - return None - - def _set_parts_from_shp_file(self, f, nParts): - pass - - def _set_part_types_from_shp_file(self, f, nParts): - pass - - def _set_points_from_shp_file(self, f, nPoints): - pass - - def _set_z_from_shp_file(self, f, nPoints): - pass - - def _set_m_from_shp_file(self, f, nPoints, next_shape): - pass - - def _get_and_set_2D_point_from_shp_file(self, f): - return None - - def _set_single_point_z_from_shp_file(self, f): - pass - - def _set_single_point_m_from_shp_file(self, f, next_shape): - pass - - # pylint: enable=unused-argument - - @classmethod - def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): - shape = cls(oid=oid) - - shape._set_bbox_from_shp_file(f) # pylint: disable=assignment-from-none - - # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, tuple(shape.bbox)): # pylint: disable=no-member - # because we stop parsing this shape, skip to beginning of - # next shape before we return - return None - - nParts: Optional[int] = shape._get_nparts_from_shp_file(f) - nPoints: Optional[int] = shape._get_npoints_from_shp_file(f) - # Previously, we also set __zmin = __zmax = __mmin = __mmax = None - - if nParts: - shape._set_parts_from_shp_file(f, nParts) - shape._set_part_types_from_shp_file(f, nParts) - - if nPoints: - shape._set_points_from_shp_file(f, nPoints) - - shape._set_z_from_shp_file(f, nPoints) - - shape._set_m_from_shp_file(f, nPoints, next_shape) - - # Read a single point - # if shapeType in (1, 11, 21): - point_2D = shape._get_and_set_2D_point_from_shp_file(f) # pylint: disable=assignment-from-none - - if bbox is not None and point_2D is not None: - x, y = point_2D # pylint: disable=unpacking-non-sequence - # create bounding box for Point by duplicating coordinates - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, (x, y, x, y)): - return None - - shape._set_single_point_z_from_shp_file(f) - - shape._set_single_point_m_from_shp_file(f, next_shape) - - return shape - def _read_shape_from_shp_file( f, oid=None, bbox=None @@ -963,10 +881,10 @@ def _write_shape_to_shp_file( f.write(pack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec @@ -1194,10 +1234,19 @@ def _set_m_from_shp_file(self, f, nPoints, next_shape): self.m = [None for _ in range(nPoints)] -class _HasZ(Shape): +class _HasZ(_CanHaveBBox): + # Not a Point + _shapeTypes = frozenset( + [ + POLYLINEZ, + POLYGONZ, + MULTIPOINTZ, + MULTIPATCH, + ] + ) z: Sequence[float] - def _set_z_from_shp_file(self, f, nPoints): + def _set_zs_from_shp_file(self, f, nPoints): __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) @@ -1209,7 +1258,7 @@ def _set_part_types_from_shp_file(self, f, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) -class PointM(Point, _HasM): +class PointM(Point): shapeType = POINTM # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) # PyShp encodes None m values as NODATA @@ -1239,7 +1288,7 @@ class MultiPointM(MultiPoint, _HasM): shapeType = MULTIPOINTM -class PointZ(PointM, _HasZ): +class PointZ(PointM): shapeType = POINTZ # same default as in Writer.__shpRecord (if s.shapeType == 11:) z: Sequence[float] = (0.0,) From 48a2b96532c6d3548633b154997a8b25b1111a18 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:08:48 +0100 Subject: [PATCH 10/19] Refactor _write_shape_to_shp_file into static methods on Shape subclasses --- src/shapefile.py | 334 +++++++++++++++++++++++++++-------------------- 1 file changed, 194 insertions(+), 140 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 7ded3c71..64722b9d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -883,161 +883,43 @@ def _write_shape_to_shp_file( # For point just update bbox of the whole shapefile if s.shapeType in Point._shapeTypes: update_bbox(s) - # All shape types capable of having a bounding box elif s.shapeType in _CanHaveBBox._shapeTypes: - try: - f.write(pack("<4d", *update_bbox(s))) - except error: - raise ShapefileException( - f"Failed to write bounding box for record {i}. Expected floats." - ) - # Shape types with parts + # We use static methods here and below, + # to support s a Shape base class, with shapeType set, + # not one of our newer shape specific sub classes. + _CanHaveBBox._try_write_bbox_to_shp_file(f, s, i, update_bbox) + if s.shapeType in _CanHaveParts._shapeTypes: - # Number of parts - f.write(pack(" 2 else 0)) - except error: - raise ShapefileException( - f"Failed to write elevation values for record {i}. Expected floats." - ) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. + _HasZ._try_write_zs_to_shp_file(f, s, i, update_zbox) + if s.shapeType in _HasM._shapeTypes: - try: - f.write(pack("<2d", *update_mbox(s))) - except error: - raise ShapefileException( - f"Failed to write measure extremes for record {i}. Expected floats" - ) - try: - if hasattr(s, "m"): - # if m values are stored in attribute - # fmt: off - f.write( - pack( - f"<{len(s.m)}d", - *[m if m is not None else NODATA for m in s.m] - ) - ) - # fmt: on - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in {13, 15, 18, 31} else 2 - for p in s.points: - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) - ) - except error: - raise ShapefileException( - f"Failed to write measure values for record {i}. Expected floats" - ) + _HasM._try_write_ms_to_shp_file(f, s, i, update_mbox) + # Write a single point if s.shapeType in Point._shapeTypes: - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException( - f"Failed to write point for record {i}. Expected floats." - ) + Point._try_write_to_shp(f, s, i) + # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. if s.shapeType == POINTZ: - # update the global z box - update_zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack(" mpos and p[mpos] is not None + else NODATA, + ) + ) + except error: + raise ShapefileException( + f"Failed to write measure values for record {i}. Expected floats" + ) + class _HasZ(_CanHaveBBox): # Not a Point @@ -1250,6 +1216,29 @@ def _set_zs_from_shp_file(self, f, nPoints): __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) + @staticmethod + def _try_write_zs_to_shp_file(f, s, i, update_zbox): + # Write z extremes and values + # Note: missing z values are autoset to 0, but not sure if this is ideal. + try: + f.write(pack("<2d", *update_zbox(s))) + except error: + raise ShapefileException( + f"Failed to write elevation extremes for record {i}. Expected floats." + ) + try: + if hasattr(s, "z"): + # if z values are stored in attribute + f.write(pack(f"<{len(s.z)}d", *s.z)) + else: + # if z values are stored as 3rd dimension + for p in s.points: + f.write(pack(" 2 else 0)) + except error: + raise ShapefileException( + f"Failed to write elevation values for record {i}. Expected floats." + ) + class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH @@ -1257,6 +1246,11 @@ class MultiPatch(_HasM, _HasZ, _CanHaveParts): def _set_part_types_from_shp_file(self, f, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) + @staticmethod + def _write_part_types_to_shp_file(f, s): + for partType in s.partTypes: + f.write(pack(" Date: Tue, 29 Jul 2025 16:09:58 +0100 Subject: [PATCH 11/19] Add shapefile Writer benchmark --- run_benchmarks.py | 84 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/run_benchmarks.py b/run_benchmarks.py index 0b8b8288..27b03d00 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -2,14 +2,16 @@ from __future__ import annotations +import collections import functools import os import timeit from collections.abc import Callable from pathlib import Path +from tempfile import TemporaryFile as TempF from typing import Union -import shapefile as shp +import shapefile # For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile DEFAULT_PYSHP_TEST_REPO = ( @@ -31,26 +33,41 @@ def benchmark( name: str, run_count: int, func: Callable, - col_width: tuple, + col_widths: tuple, compare_to: float | None = None, ) -> float: placeholder = "Running..." - print(f"{name:>{col_width[0]}} | {placeholder}", end="", flush=True) + print(f"{name:>{col_widths[0]}} | {placeholder}", end="", flush=True) time_taken = timeit.timeit(func, number=run_count) print("\b" * len(placeholder), end="") time_suffix = " s" - print(f"{time_taken:{col_width[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print(f"{time_taken:{col_widths[1]-len(time_suffix)}.3g}{time_suffix}", end="") print() return time_taken +fields = {} +shapeRecords = collections.defaultdict(list) + + def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): - with shp.Reader(target) as r: + with shapefile.Reader(target) as r: + fields[target] = r.fields for shapeRecord in r.iterShapeRecords(): - pass + shapeRecords[target].append(shapeRecord) + + +def write_shapefile_with_PyShp(target: Union[str, os.PathLike]): + with TempF("wb") as shp, TempF("wb") as dbf, TempF("wb") as shx: + with shapefile.Writer(shp=shp, dbf=dbf, shx=shx) as w: # type: ignore [arg-type] + for field_info_tuple in fields[target]: + w.field(*field_info_tuple) + for shapeRecord in shapeRecords[target]: + w.shape(shapeRecord.shape) + w.record(*shapeRecord.record) -READER_TESTS = { +SHAPEFILES = { "Blockgroups": blockgroups_file, "Edit": edit_file, "Merge": merge_file, @@ -60,24 +77,47 @@ def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): } -def run(run_count: int) -> None: - col_width = (21, 10) +# Load files to avoid one off delays that only affect first disk seek +for file_path in SHAPEFILES.values(): + file_path.read_bytes() + +reader_benchmarks = [ + functools.partial( + benchmark, + name=f"Read {test_name}", + func=functools.partial(open_shapefile_with_PyShp, target=target), + ) + for test_name, target in SHAPEFILES.items() +] + +# Require fields and shapeRecords to first have been populated +# from data from previouly running the reader_benchmarks +writer_benchmarks = [ + functools.partial( + benchmark, + name=f"Write {test_name}", + func=functools.partial(write_shapefile_with_PyShp, target=target), + ) + for test_name, target in SHAPEFILES.items() +] + + +def run(run_count: int, benchmarks: list[Callable[[], None]]) -> None: + col_widths = (22, 10) col_head = ("parser", "exec time", "performance (more is better)") - # Load files to avoid one off delays that only affect first disk seek - for file_path in READER_TESTS.values(): - file_path.read_bytes() print(f"Running benchmarks {run_count} times:") - print("-" * col_width[0] + "---" + "-" * col_width[1]) - print(f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}") - print("-" * col_width[0] + "-+-" + "-" * col_width[1]) - for test_name, target in READER_TESTS.items(): - benchmark( - f"Read {test_name}", - run_count, - functools.partial(open_shapefile_with_PyShp, target=target), - col_width, + print("-" * col_widths[0] + "---" + "-" * col_widths[1]) + print(f"{col_head[0]:>{col_widths[0]}} | {col_head[1]:>{col_widths[1]}}") + print("-" * col_widths[0] + "-+-" + "-" * col_widths[1]) + for benchmark in benchmarks: + benchmark( # type: ignore [call-arg] + run_count=run_count, + col_widths=col_widths, ) if __name__ == "__main__": - run(1) + print("Reader tests:") + run(1, reader_benchmarks) # type: ignore [arg-type] + print("\n\nWrite tests:") + run(1, writer_benchmarks) # type: ignore [arg-type] From 9aac92c25d0eb3b52b19992e48f9802bccd730ff Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:17:47 +0100 Subject: [PATCH 12/19] Update run_benchmarks.py --- run_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_benchmarks.py b/run_benchmarks.py index 27b03d00..edc2119a 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -119,5 +119,5 @@ def run(run_count: int, benchmarks: list[Callable[[], None]]) -> None: if __name__ == "__main__": print("Reader tests:") run(1, reader_benchmarks) # type: ignore [arg-type] - print("\n\nWrite tests:") + print("\n\nWriter tests:") run(1, writer_benchmarks) # type: ignore [arg-type] From fbbae0a56a0eb109c926fef81e33d8cfe8750916 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 17:57:13 +0100 Subject: [PATCH 13/19] Split _write_shape_to_shp_file into _try_write methods on Point and _CanHaveBBox --- pyproject.toml | 7 +- src/shapefile.py | 198 ++++++++++++++++++++++++++--------------------- 2 files changed, 113 insertions(+), 92 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 73883b60..d3e0e894 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,10 +112,9 @@ load-plugins=[ "pylint_per_file_ignores", ] -# Silence warning: src/shapefile.py:2076:20: W0212: Access to a protected -# member _from_geojson of a client class (protected-access) -# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) -# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and +# Silence warnings: src/shapefile.py:2076:20: W0212: Access to a protected member _from_geojson of a client class (protected-access) +# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init) +# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and # 'raise ShapefileException(f'Failed to write bounding box for record {i}. # Expected floats.') from exc' (raise-missing-from) # Silence remarks: diff --git a/src/shapefile.py b/src/shapefile.py index 64722b9d..d19e7b7b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -874,52 +874,19 @@ def _write_shape_to_shp_file( f, s, i, - update_bbox, - update_mbox, - update_zbox, + bbox, + mbox, + zbox, ): f.write(pack(" str: @@ -2972,13 +2982,25 @@ def __shpRecord(self, s): f"the type of the shapefile ({self.shapeType})." ) + # For both single point and multiple-points non-null shapes, + # update bbox, mbox and zbox of the whole shapefile + new_bbox = self.__bbox(s) if s.shapeType != NULL else None + new_mbox = ( + self.__mbox(s) + if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes + else None + ) + new_zbox = ( + self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None + ) + _write_shape_to_shp_file( f=f, s=s, i=self.shpNum, - update_bbox=self.__bbox, - update_mbox=self.__mbox, - update_zbox=self.__zbox, + bbox=new_bbox, + mbox=new_mbox, + zbox=new_zbox, ) # Finalize record length as 16-bit words From 5b65b40f87ed961e145d3191fb7e9a8ee1c4a381 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 18:07:17 +0100 Subject: [PATCH 14/19] Move _write_shape_to_shp_file back into Writer.__shpRecord --- src/shapefile.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index d19e7b7b..3f3d8c94 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -870,23 +870,20 @@ def _read_shape_from_shp_file( return shape -def _write_shape_to_shp_file( - f, - s, - i, - bbox, - mbox, - zbox, -): - f.write(pack(" Date: Tue, 29 Jul 2025 19:35:23 +0100 Subject: [PATCH 15/19] Use tmp io.BytesIO, and write to file at end --- src/shapefile.py | 84 ++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3f3d8c94..3fdb9f40 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2967,52 +2967,58 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s): - f = self.__getFileObj(self.shp) - offset = f.tell() + shp = self.__getFileObj(self.shp) + offset = shp.tell() # Record number, Content length place holder self.shpNum += 1 - f.write(pack(">2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if not s.shapeType in {NULL, self.shapeType}: - raise ShapefileException( - f"The shape's type ({s.shapeType}) must match " - f"the type of the shapefile ({self.shapeType})." + with io.BytesIO() as f: + f.write(pack(">2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if not s.shapeType in {NULL, self.shapeType}: + raise ShapefileException( + f"The shape's type ({s.shapeType}) must match " + f"the type of the shapefile ({self.shapeType})." + ) + + # For both single point and multiple-points non-null shapes, + # update bbox, mbox and zbox of the whole shapefile + new_bbox = self.__bbox(s) if s.shapeType != NULL else None + new_mbox = ( + self.__mbox(s) + if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes + else None + ) + new_zbox = ( + self.__zbox(s) + if s.shapeType in {POINTZ} | _HasZ._shapeTypes + else None ) - # For both single point and multiple-points non-null shapes, - # update bbox, mbox and zbox of the whole shapefile - new_bbox = self.__bbox(s) if s.shapeType != NULL else None - new_mbox = ( - self.__mbox(s) - if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes - else None - ) - new_zbox = ( - self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None - ) + f.write(pack("i", length)) + # f.seek(finish) - # Finalize record length as 16-bit words - finish = f.tell() - length = (finish - start) // 2 - # start - 4 bytes is the content length field - f.seek(start - 4) - f.write(pack(">i", length)) - f.seek(finish) + f.seek(0) + shp.write(f.read()) return offset, length def __shxRecord(self, offset, length): From c65662bd953cf901a4e195c8ae1cae9a8874700a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 19:46:40 +0100 Subject: [PATCH 16/19] Update shapefile.py --- src/shapefile.py | 88 +++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3fdb9f40..40d29d3c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2967,58 +2967,56 @@ def shape( self.__shxRecord(offset, length) def __shpRecord(self, s): - shp = self.__getFileObj(self.shp) - offset = shp.tell() + f = self.__getFileObj(self.shp) + offset = f.tell() # Record number, Content length place holder self.shpNum += 1 - with io.BytesIO() as f: - f.write(pack(">2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if not s.shapeType in {NULL, self.shapeType}: - raise ShapefileException( - f"The shape's type ({s.shapeType}) must match " - f"the type of the shapefile ({self.shapeType})." - ) - - # For both single point and multiple-points non-null shapes, - # update bbox, mbox and zbox of the whole shapefile - new_bbox = self.__bbox(s) if s.shapeType != NULL else None - new_mbox = ( - self.__mbox(s) - if s.shapeType in {POINTM, POINTZ} | _HasM._shapeTypes - else None - ) - new_zbox = ( - self.__zbox(s) - if s.shapeType in {POINTZ} | _HasZ._shapeTypes - else None + f.write(pack(">2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if not s.shapeType in {NULL, self.shapeType}: + raise ShapefileException( + f"The shape's type ({s.shapeType}) must match " + f"the type of the shapefile ({self.shapeType})." ) - f.write(pack("i", length)) + f.seek(finish) - # Finalize record length as 16-bit words - finish = f.tell() - length = (finish - start) // 2 - # start - 4 bytes is the content length field - f.seek(start - 4) - f.write(pack(">i", length)) - # f.seek(finish) - f.seek(0) - shp.write(f.read()) return offset, length def __shxRecord(self, offset, length): From 46249efd2c3e599a834338c6a8ff9eb20588b0a0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 19:50:34 +0100 Subject: [PATCH 17/19] Reformat --- src/shapefile.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 40d29d3c..231d938d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -901,6 +901,7 @@ def _from_shp_file(cls, f, next_shape, oid=None, bbox=None): # pylint: disable= def _try_write_to_shp_file(f, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument pass + class _CanHaveBBox(Shape): """As well as setting bounding boxes, we also utilize the fact that this mixin applies to all the shapes that are @@ -2991,9 +2992,7 @@ def __shpRecord(self, s): else None ) new_zbox = ( - self.__zbox(s) - if s.shapeType in {POINTZ} | _HasZ._shapeTypes - else None + self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) f.write(pack("i", length)) f.seek(finish) - return offset, length def __shxRecord(self, offset, length): From b3b7593cf3987e3700b2579688105c6137ef0810 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 21:35:51 +0100 Subject: [PATCH 18/19] Serialize shapes to bytes, and deserialize them from byte streams --- README.md | 2 + src/shapefile.py | 394 +++++++++++++++++++++++++---------------------- 2 files changed, 208 insertions(+), 188 deletions(-) diff --git a/README.md b/README.md index 52b8de78..de34f6c4 100644 --- a/README.md +++ b/README.md @@ -458,11 +458,13 @@ shapeType Point do not have a bounding box 'bbox'. ... if not name.startswith('_'): ... name 'bbox' + 'from_bytes' 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' + 'to_bytes' * `oid`: The shape's index position in the original shapefile. diff --git a/src/shapefile.py b/src/shapefile.py index 231d938d..fc3bf597 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -846,46 +846,6 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" -def _read_shape_from_shp_file( - f, oid=None, bbox=None -): # oid: Optional[int] = None, bbox: Optional[BBox] = None): - """Constructs a Shape from an open .shp file. Something else - is required to have first read the .shp file's header. - Leaves the shp file's .tell() in the correct position for - a subsequent call to this, to build the next shape. - """ - # shape = Shape(oid=oid) - (__recNum, recLength) = unpack_2_int32_be(f.read(8)) - # Determine the start of the next record - next_shape = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) + def _set_ms_from_byte_stream(self, b_io, nPoints, next_shape): + if next_shape - b_io.tell() >= 16: + __mmin, __mmax = unpack("<2d", b_io.read(16)) # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: + if next_shape - b_io.tell() >= nPoints * 8: self.m = [] - for m in _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): + for m in _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))): if m > NODATA: self.m.append(m) else: @@ -1176,12 +1152,14 @@ def _set_ms_from_shp_file(self, f, nPoints, next_shape): self.m = [None for _ in range(nPoints)] @staticmethod - def _try_write_ms_to_shp_file(f, s, i, mbox): + def ms_to_bytes(s, i, mbox): # Write m extremes and values # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA # Note: missing m values are autoset to NODATA. + bytes_ = b"" + try: - f.write(pack("<2d", *mbox)) + bytes_ += pack("<2d", *mbox) except error: raise ShapefileException( f"Failed to write measure extremes for record {i}. Expected floats" @@ -1189,32 +1167,25 @@ def _try_write_ms_to_shp_file(f, s, i, mbox): try: if hasattr(s, "m"): # if m values are stored in attribute - # fmt: off - f.write( - pack( - f"<{len(s.m)}d", - *[m if m is not None else NODATA for m in s.m] - ) + bytes_ += pack( + f"<{len(s.m)}d", *[m if m is not None else NODATA for m in s.m] ) - # fmt: on else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) mpos = 3 if s.shapeType in _HasZ._shapeTypes else 2 for p in s.points: - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) + bytes_ += pack( + " mpos and p[mpos] is not None else NODATA, ) except error: raise ShapefileException( f"Failed to write measure values for record {i}. Expected floats" ) + return bytes_ + class _HasZ(_CanHaveBBox): # Not a Point @@ -1228,16 +1199,17 @@ class _HasZ(_CanHaveBBox): ) z: Sequence[float] - def _set_zs_from_shp_file(self, f, nPoints): - __zmin, __zmax = unpack("<2d", f.read(16)) # pylint: disable=unused-private-member - self.z = _Array[float]("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))) + def _set_zs_from_byte_stream(self, b_io, nPoints): + __zmin, __zmax = unpack("<2d", b_io.read(16)) # pylint: disable=unused-private-member + self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod - def _try_write_zs_to_shp_file(f, s, i, zbox): + def zs_to_bytes(s, i, zbox): # Write z extremes and values # Note: missing z values are autoset to 0, but not sure if this is ideal. + bytes_ = b"" try: - f.write(pack("<2d", *zbox)) + bytes_ += pack("<2d", *zbox) except error: raise ShapefileException( f"Failed to write elevation extremes for record {i}. Expected floats." @@ -1245,27 +1217,31 @@ def _try_write_zs_to_shp_file(f, s, i, zbox): try: if hasattr(s, "z"): # if z values are stored in attribute - f.write(pack(f"<{len(s.z)}d", *s.z)) + bytes_ += pack(f"<{len(s.z)}d", *s.z) else: # if z values are stored as 3rd dimension for p in s.points: - f.write(pack(" 2 else 0)) + bytes_ += pack(" 2 else 0) except error: raise ShapefileException( f"Failed to write elevation values for record {i}. Expected floats." ) + return bytes_ + class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH - def _set_part_types_from_shp_file(self, f, nParts): - self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", f.read(nParts * 4))) + def _set_part_types_from_byte_stream(self, b_io, nParts): + self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod - def _write_part_types_to_shp_file(f, s): + def _part_types_to_bytes(s): + bytes_ = b"" for partType in s.partTypes: - f.write(pack("= 8: - (m,) = unpack("= 8: + (m,) = unpack("2i", self.shpNum, 0)) - start = f.tell() + # f.write(pack(">2i", self.shpNum, 0)) + # start = f.tell() # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -2995,11 +3007,10 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - f.write(pack("i", length)) + + record_bytes = pack("i", length)) - f.seek(finish) + length = len(record_bytes) // 2 + header_bytes = pack(">2i", self.shpNum, length) + f.write(header_bytes + record_bytes) + + # f.seek(finish) return offset, length From 312892d93ea94e6b3af5074aa70ef26ab5f84858 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 29 Jul 2025 22:16:24 +0100 Subject: [PATCH 19/19] Use byte streams instead of bytes as it's faster --- README.md | 4 +- src/shapefile.py | 170 ++++++++++++++++++++--------------------------- 2 files changed, 74 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index de34f6c4..a0abc6a1 100644 --- a/README.md +++ b/README.md @@ -458,13 +458,13 @@ shapeType Point do not have a bounding box 'bbox'. ... if not name.startswith('_'): ... name 'bbox' - 'from_bytes' + 'from_byte_stream' 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' - 'to_bytes' + 'write_to_byte_stream' * `oid`: The shape's index position in the original shapefile. diff --git a/src/shapefile.py b/src/shapefile.py index fc3bf597..b5e6ed4c 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -853,13 +853,13 @@ class NullShape(Shape): shapeType = NULL @classmethod - def from_bytes(cls, bytes_, next_shape, oid=None, bbox=None): # pylint: disable=unused-argument + def from_byte_stream(cls, b_io, next_shape, oid=None, bbox=None): # pylint: disable=unused-argument # Shape.__init__ sets self.points = points or [] return cls(oid=oid) @staticmethod - def to_bytes(s, i, bbox, mbox, zbox): # pylint: disable=unused-argument - return b"" + def write_to_byte_stream(b_io, s, i, bbox, mbox, zbox): # pylint: disable=unused-argument + pass class _CanHaveBBox(Shape): @@ -890,9 +890,9 @@ def _set_bbox_from_byte_stream(self, b_io): self.bbox = _Array[float]("d", unpack("<4d", b_io.read(32))) @staticmethod - def _bbox_to_bytes(i, bbox): + def _write_bbox_to_byte_stream(b_io, i, bbox): try: - return pack("<4d", *bbox) + b_io.write(pack("<4d", *bbox)) except error: raise ShapefileException( f"Failed to write bounding box for record {i}. Expected floats." @@ -903,26 +903,23 @@ def _get_npoints_from_byte_stream(b_io): return unpack(" mpos and p[mpos] is not None else NODATA, + b_io.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) ) except error: raise ShapefileException( f"Failed to write measure values for record {i}. Expected floats" ) - return bytes_ - class _HasZ(_CanHaveBBox): # Not a Point @@ -1204,12 +1192,11 @@ def _set_zs_from_byte_stream(self, b_io, nPoints): self.z = _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) @staticmethod - def zs_to_bytes(s, i, zbox): + def _write_zs_to_byte_stream(b_io, s, i, zbox): # Write z extremes and values # Note: missing z values are autoset to 0, but not sure if this is ideal. - bytes_ = b"" try: - bytes_ += pack("<2d", *zbox) + b_io.write(pack("<2d", *zbox)) except error: raise ShapefileException( f"Failed to write elevation extremes for record {i}. Expected floats." @@ -1217,18 +1204,16 @@ def zs_to_bytes(s, i, zbox): try: if hasattr(s, "z"): # if z values are stored in attribute - bytes_ += pack(f"<{len(s.z)}d", *s.z) + b_io.write(pack(f"<{len(s.z)}d", *s.z)) else: # if z values are stored as 3rd dimension for p in s.points: - bytes_ += pack(" 2 else 0) + b_io.write(pack(" 2 else 0)) except error: raise ShapefileException( f"Failed to write elevation values for record {i}. Expected floats." ) - return bytes_ - class MultiPatch(_HasM, _HasZ, _CanHaveParts): shapeType = MULTIPATCH @@ -1237,11 +1222,9 @@ def _set_part_types_from_byte_stream(self, b_io, nParts): self.partTypes = _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) @staticmethod - def _part_types_to_bytes(s): - bytes_ = b"" + def _write_part_types_to_byte_stream(b_io, s): for partType in s.partTypes: - bytes_ += pack("2i", self.shpNum, 0)) - # start = f.tell() + f.write(pack(">2i", self.shpNum, 0)) + start = f.tell() # Shape Type if self.shapeType is None and s.shapeType != NULL: self.shapeType = s.shapeType @@ -3007,10 +2986,11 @@ def __shpRecord(self, s): self.__zbox(s) if s.shapeType in {POINTZ} | _HasZ._shapeTypes else None ) - # f.write(pack("i", length)) - - record_bytes = pack("2i", self.shpNum, length) - f.write(header_bytes + record_bytes) - - # f.seek(finish) + finish = f.tell() + length = (finish - start) // 2 + # start - 4 bytes is the content length field + f.seek(start - 4) + f.write(pack(">i", length)) + + f.seek(finish) return offset, length