Skip to content

Pyshp 3.0.0 alpha attype of the clones #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2ee7532
Create py.typed
JamesParrott Jul 29, 2025
5a77065
Update test_shapefile.py
JamesParrott Jul 29, 2025
d8d6357
Reformat
JamesParrott Jul 29, 2025
a5ecbf3
Update shapefile.py
JamesParrott Jul 29, 2025
3591be4
Reformat
JamesParrott Jul 29, 2025
efe0778
Refactor some of Writer.__shpRecord into _write_shape_to_shp_file(
JamesParrott Jul 29, 2025
5c35997
Ignore Pylint W0707
JamesParrott Jul 29, 2025
2c4a32f
Further reduce number of index selector tests
JamesParrott Jul 29, 2025
b628577
Refactor _from_shp_file methods onto new subclasses of Shape
JamesParrott Jul 29, 2025
48a2b96
Refactor _write_shape_to_shp_file into static methods on Shape subcla…
JamesParrott Jul 29, 2025
714ece3
Add shapefile Writer benchmark
JamesParrott Jul 29, 2025
9aac92c
Update run_benchmarks.py
JamesParrott Jul 29, 2025
fbbae0a
Split _write_shape_to_shp_file into _try_write methods on Point and _…
JamesParrott Jul 29, 2025
5b65b40
Move _write_shape_to_shp_file back into Writer.__shpRecord
JamesParrott Jul 29, 2025
24abcf9
Use tmp io.BytesIO, and write to file at end
JamesParrott Jul 29, 2025
c65662b
Update shapefile.py
JamesParrott Jul 29, 2025
5410cb6
Merge pull request #12 from JamesParrott/time-using-method-on-class
JamesParrott Jul 29, 2025
46249ef
Reformat
JamesParrott Jul 29, 2025
b3b7593
Serialize shapes to bytes, and deserialize them from byte streams
JamesParrott Jul 29, 2025
312892d
Use byte streams instead of bytes as it's faster
JamesParrott Jul 29, 2025
e769958
Merge pull request #13 from JamesParrott/shapes_to_and_from_bytes
JamesParrott Jul 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,13 @@ shapeType Point do not have a bounding box 'bbox'.
... if not name.startswith('_'):
... name
'bbox'
'from_byte_stream'
'oid'
'parts'
'points'
'shapeType'
'shapeTypeName'
'write_to_byte_stream'

* `oid`: The shape's index position in the original shapefile.

Expand Down
10 changes: 6 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ load-plugins=[
"pylint_per_file_ignores",
]

# Silence warning: shapefile.py:2076:20: W0212: Access to a protected
# member _from_geojson of a client class (protected-access)
# shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init)
# Silence warnings: src/shapefile.py:2076:20: W0212: Access to a protected member _from_geojson of a client class (protected-access)
# src/shapefile.py:950:16: W0201: Attribute 'm' defined outside __init__ (attribute-defined-outside-init)
# src/shapefile.py:973:12: W0707: Consider explicitly re-raising using 'except error as exc' and
# 'raise ShapefileException(f'Failed to write bounding box for record {i}.
# Expected floats.') from exc' (raise-missing-from)
# Silence remarks:
# src\shapefile.py:338:0: R0914: Too many local variables (21/15) (too-many-locals)
# src\shapefile.py:338:0: R0912: Too many branches (24/12) (too-many-branches)
Expand All @@ -134,6 +136,6 @@ load-plugins=[
# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160
[tool.pylint.'messages control']
per-file-ignores = [
"/src/shapefile.py:W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732",
"/src/shapefile.py:W0707,W0212,W0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1732",
"test_shapefile.py:W0212,R1732",
]
84 changes: 62 additions & 22 deletions run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from __future__ import annotations

import collections
import functools
import os
import timeit
from collections.abc import Callable
from pathlib import Path
from tempfile import TemporaryFile as TempF
from typing import Union

import shapefile as shp
import shapefile

# For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile
DEFAULT_PYSHP_TEST_REPO = (
Expand All @@ -31,26 +33,41 @@ def benchmark(
name: str,
run_count: int,
func: Callable,
col_width: tuple,
col_widths: tuple,
compare_to: float | None = None,
) -> float:
placeholder = "Running..."
print(f"{name:>{col_width[0]}} | {placeholder}", end="", flush=True)
print(f"{name:>{col_widths[0]}} | {placeholder}", end="", flush=True)
time_taken = timeit.timeit(func, number=run_count)
print("\b" * len(placeholder), end="")
time_suffix = " s"
print(f"{time_taken:{col_width[1]-len(time_suffix)}.3g}{time_suffix}", end="")
print(f"{time_taken:{col_widths[1]-len(time_suffix)}.3g}{time_suffix}", end="")
print()
return time_taken


fields = {}
shapeRecords = collections.defaultdict(list)


def open_shapefile_with_PyShp(target: Union[str, os.PathLike]):
with shp.Reader(target) as r:
with shapefile.Reader(target) as r:
fields[target] = r.fields
for shapeRecord in r.iterShapeRecords():
pass
shapeRecords[target].append(shapeRecord)


def write_shapefile_with_PyShp(target: Union[str, os.PathLike]):
with TempF("wb") as shp, TempF("wb") as dbf, TempF("wb") as shx:
with shapefile.Writer(shp=shp, dbf=dbf, shx=shx) as w: # type: ignore [arg-type]
for field_info_tuple in fields[target]:
w.field(*field_info_tuple)
for shapeRecord in shapeRecords[target]:
w.shape(shapeRecord.shape)
w.record(*shapeRecord.record)


READER_TESTS = {
SHAPEFILES = {
"Blockgroups": blockgroups_file,
"Edit": edit_file,
"Merge": merge_file,
Expand All @@ -60,24 +77,47 @@ def open_shapefile_with_PyShp(target: Union[str, os.PathLike]):
}


def run(run_count: int) -> None:
col_width = (21, 10)
# Load files to avoid one off delays that only affect first disk seek
for file_path in SHAPEFILES.values():
file_path.read_bytes()

reader_benchmarks = [
functools.partial(
benchmark,
name=f"Read {test_name}",
func=functools.partial(open_shapefile_with_PyShp, target=target),
)
for test_name, target in SHAPEFILES.items()
]

# Require fields and shapeRecords to first have been populated
# from data from previouly running the reader_benchmarks
writer_benchmarks = [
functools.partial(
benchmark,
name=f"Write {test_name}",
func=functools.partial(write_shapefile_with_PyShp, target=target),
)
for test_name, target in SHAPEFILES.items()
]


def run(run_count: int, benchmarks: list[Callable[[], None]]) -> None:
col_widths = (22, 10)
col_head = ("parser", "exec time", "performance (more is better)")
# Load files to avoid one off delays that only affect first disk seek
for file_path in READER_TESTS.values():
file_path.read_bytes()
print(f"Running benchmarks {run_count} times:")
print("-" * col_width[0] + "---" + "-" * col_width[1])
print(f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}")
print("-" * col_width[0] + "-+-" + "-" * col_width[1])
for test_name, target in READER_TESTS.items():
benchmark(
f"Read {test_name}",
run_count,
functools.partial(open_shapefile_with_PyShp, target=target),
col_width,
print("-" * col_widths[0] + "---" + "-" * col_widths[1])
print(f"{col_head[0]:>{col_widths[0]}} | {col_head[1]:>{col_widths[1]}}")
print("-" * col_widths[0] + "-+-" + "-" * col_widths[1])
for benchmark in benchmarks:
benchmark( # type: ignore [call-arg]
run_count=run_count,
col_widths=col_widths,
)


if __name__ == "__main__":
run(1)
print("Reader tests:")
run(1, reader_benchmarks) # type: ignore [arg-type]
print("\n\nWriter tests:")
run(1, writer_benchmarks) # type: ignore [arg-type]
Binary file modified shapefiles/test/balancing.dbf
Binary file not shown.
Binary file modified shapefiles/test/contextwriter.dbf
Binary file not shown.
Binary file modified shapefiles/test/corrupt_too_long.dbf
Binary file not shown.
Binary file modified shapefiles/test/dtype.dbf
Binary file not shown.
Binary file modified shapefiles/test/line.dbf
Binary file not shown.
Binary file modified shapefiles/test/multipoint.dbf
Binary file not shown.
Binary file modified shapefiles/test/onlydbf.dbf
Binary file not shown.
Binary file modified shapefiles/test/point.dbf
Binary file not shown.
Binary file modified shapefiles/test/polygon.dbf
Binary file not shown.
Binary file modified shapefiles/test/shapetype.dbf
Binary file not shown.
Binary file modified shapefiles/test/testfile.dbf
Binary file not shown.
Empty file added src/py.typed
Empty file.
Loading
Loading