From 72771fbde3f88ff1fa3eef8a064c7344d485fc54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uli=20K=C3=B6hler?= Date: Wed, 22 Feb 2017 21:51:14 +0100 Subject: [PATCH 1/2] Fix file like objects without seek support not being readable --- shapefile.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/shapefile.py b/shapefile.py index 5bdd50c..eac4669 100644 --- a/shapefile.py +++ b/shapefile.py @@ -18,6 +18,7 @@ import array import tempfile import itertools +import io from datetime import date # @@ -231,6 +232,12 @@ def __init__(self, *args, **kwargs): self.numRecords = None self.fields = [] self.__dbfHdrLength = 0 + # Allow in-memory copy (only if required) + # Even if allowed, this will only happen + # for files that do not support seek. + # False by default to avoid consuming huge amounts + # of memory + allow_copy = "allow_copy" in kwargs and kwargs["allow_copy"] # See if a shapefile name was passed as an argument if len(args) > 0: if is_string(args[0]): @@ -239,23 +246,47 @@ def __init__(self, *args, **kwargs): if "shp" in kwargs.keys(): if hasattr(kwargs["shp"], "read"): self.shp = kwargs["shp"] - if hasattr(self.shp, "seek"): + # Copy if required + try: self.shp.seek(0) + except (NameError, io.UnsupportedOperation): + if allow_copy: + self.shp = io.BytesIO(self.shp.read()) + else: + raise ValueError("shp argument does not "\ + "support seek, consider allow_copy=True") if "shx" in kwargs.keys(): if hasattr(kwargs["shx"], "read"): self.shx = kwargs["shx"] - if hasattr(self.shx, "seek"): + # Copy if required + try: self.shx.seek(0) + except (NameError, io.UnsupportedOperation): + if allow_copy: + self.shx = io.BytesIO(self.shx.read()) + else: + raise ValueError("shx argument does not "\ + "support seek, consider allow_copy=True") if "dbf" in kwargs.keys(): if hasattr(kwargs["dbf"], "read"): self.dbf = kwargs["dbf"] - if hasattr(self.dbf, "seek"): + # Copy if required + try: self.dbf.seek(0) + except (NameError, io.UnsupportedOperation): + if allow_copy: + self.dbf = io.BytesIO(self.dbf.read()) + else: + raise ValueError("dbf argument does not "\ + "support seek, consider allow_copy=True") if self.shp or self.dbf: self.load() else: raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") + + + def load(self, shapefile=None): """Opens a shapefile from a filename or file-like object. Normally this method would be called by the From 0a38d3cc628b74490d7281af72e9f57abce5d1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uli=20K=C3=B6hler?= Date: Wed, 22 Feb 2017 21:52:54 +0100 Subject: [PATCH 2/2] Added doc for new allow_copy option --- shapefile.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/shapefile.py b/shapefile.py index eac4669..2f1305f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -221,6 +221,12 @@ class Reader: within each file is only accessed when required and as efficiently as possible. Shapefiles are usually not large but they can be. + + If initializing the reader with a file-like object which + does not support seek(), you must set allow_copy=True + to allow the Reader to copy the entire file in memory. + This is set to False by default in order to avoid + large files being copyied into memory without user intention. """ def __init__(self, *args, **kwargs): self.shp = None