From 0498a8ebaaed38fbdef8b460c5b334635e040db7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 08:20:34 -0800 Subject: [PATCH 1/6] types --- pandas/io/pytables.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 18ae081caf69d..a0fbce02d63cb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -54,7 +54,7 @@ from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: - from tables import File # noqa:F401 + from tables import File, Node # noqa:F401 # versioning attribute @@ -1269,7 +1269,7 @@ def walk(self, where="/"): yield (g._v_pathname.rstrip("/"), groups, leaves) - def get_node(self, key: str): + def get_node(self, key: str) -> Optional["Node"]: """ return the node with the key or None if it does not exist """ self._check_if_open() if not key.startswith("/"): @@ -1277,10 +1277,14 @@ def get_node(self, key: str): assert self._handle is not None try: - return self._handle.get_node(self.root, key) + node = self._handle.get_node(self.root, key) except _table_mod.exceptions.NoSuchNodeError: # type: ignore return None + assert _table_mod is not None # for mypy + assert isinstance(node, _table_mod.Node), type(node) + return node + def get_storer(self, key: str) -> Union["GenericFixed", "Table"]: """ return the storer object for a key, raise if not in the file """ group = self.get_node(key) @@ -1389,7 +1393,9 @@ def info(self) -> str: return output - # private methods ###### + # ------------------------------------------------------------------------ + # private methods + def _check_if_open(self): if not self.is_open: raise ClosedFileError(f"{self._path} file is not open!") @@ -1561,7 +1567,7 @@ def _write_to_group( if isinstance(s, Table) and index: s.create_index(columns=index) - def _read_group(self, group, **kwargs): + def _read_group(self, group: "Node", **kwargs): s = self._create_storer(group) s.infer_axes() return s.read(**kwargs) @@ -1788,7 +1794,7 @@ def copy(self): new_self = copy.copy(self) return new_self - def infer(self, handler): + def infer(self, handler: "Table"): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() @@ -2092,7 +2098,7 @@ def __repr__(self) -> str: ) ) - def __eq__(self, other): + def __eq__(self, other) -> bool: """ compare 2 col items """ return all( getattr(self, a, None) == getattr(other, a, None) @@ -2504,9 +2510,16 @@ class Fixed: pandas_kind: str obj_type: Type[Union[DataFrame, Series]] ndim: int + parent: HDFStore + group: "Node" is_table = False - def __init__(self, parent, group, encoding=None, errors="strict", **kwargs): + def __init__( + self, parent: HDFStore, group: "Node", encoding=None, errors="strict", **kwargs + ): + assert isinstance(parent, HDFStore), type(parent) + assert _table_mod is not None # needed for mypy + assert isinstance(group, _table_mod.Node), type(group) self.parent = parent self.group = group self.encoding = _ensure_encoding(encoding) @@ -2770,7 +2783,7 @@ def read_array( else: return ret - def read_index(self, key, **kwargs): + def read_index(self, key: str, **kwargs): variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) if variety == "multi": @@ -2785,7 +2798,7 @@ def read_index(self, key, **kwargs): else: # pragma: no cover raise TypeError(f"unrecognized index variety: {variety}") - def write_index(self, key, index): + def write_index(self, key: str, index): if isinstance(index, MultiIndex): setattr(self.attrs, f"{key}_variety", "multi") self.write_multi_index(key, index) @@ -2816,18 +2829,18 @@ def write_index(self, key, index): if hasattr(index, "tz") and index.tz is not None: node._v_attrs.tz = _get_tz(index.tz) - def write_block_index(self, key, index): + def write_block_index(self, key: str, index): self.write_array(f"{key}_blocs", index.blocs) self.write_array(f"{key}_blengths", index.blengths) setattr(self.attrs, f"{key}_length", index.length) - def read_block_index(self, key, **kwargs) -> BlockIndex: + def read_block_index(self, key: str, **kwargs) -> BlockIndex: length = getattr(self.attrs, f"{key}_length") blocs = self.read_array(f"{key}_blocs", **kwargs) blengths = self.read_array(f"{key}_blengths", **kwargs) return BlockIndex(length, blocs, blengths) - def write_sparse_intindex(self, key, index): + def write_sparse_intindex(self, key: str, index): self.write_array(f"{key}_indices", index.indices) setattr(self.attrs, f"{key}_length", index.length) @@ -2836,7 +2849,7 @@ def read_sparse_intindex(self, key, **kwargs) -> IntIndex: indices = self.read_array(f"{key}_indices", **kwargs) return IntIndex(length, indices) - def write_multi_index(self, key, index): + def write_multi_index(self, key: str, index): setattr(self.attrs, f"{key}_nlevels", index.nlevels) for i, (lev, level_codes, name) in enumerate( @@ -2863,7 +2876,7 @@ def write_multi_index(self, key, index): label_key = f"{key}_label{i}" self.write_array(label_key, level_codes) - def read_multi_index(self, key, **kwargs) -> MultiIndex: + def read_multi_index(self, key: str, **kwargs) -> MultiIndex: nlevels = getattr(self.attrs, f"{key}_nlevels") levels = [] @@ -2884,7 +2897,7 @@ def read_multi_index(self, key, **kwargs) -> MultiIndex: ) def read_index_node( - self, node, start: Optional[int] = None, stop: Optional[int] = None + self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None ): data = node[start:stop] # If the index was an empty array write_array_empty() will From aa0176306ca5f4236f1f57aaff1f188c337ceb43 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 08:31:14 -0800 Subject: [PATCH 2/6] remove unnecessary kwarg --- pandas/io/pytables.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a0fbce02d63cb..617ae2bb441fd 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -527,7 +527,7 @@ def __getattr__(self, name: str): f"'{type(self).__name__}' object has no attribute '{name}'" ) - def __contains__(self, key: str): + def __contains__(self, key: str) -> bool: """ check for existence of this key can match the exact pathname or the pathnm w/o the leading '/' """ @@ -2810,9 +2810,7 @@ def write_index(self, key: str, index): self.write_sparse_intindex(key, index) else: setattr(self.attrs, f"{key}_variety", "regular") - converted = _convert_index( - "index", index, self.encoding, self.errors, self.format_type - ) + converted = _convert_index("index", index, self.encoding, self.errors) self.write_array(key, converted.values) @@ -2861,9 +2859,7 @@ def write_multi_index(self, key: str, index): "Saving a MultiIndex with an extension dtype is not supported." ) level_key = f"{key}_level{i}" - conv_level = _convert_index( - level_key, lev, self.encoding, self.errors, self.format_type - ) + conv_level = _convert_index(level_key, lev, self.encoding, self.errors) self.write_array(level_key, conv_level.values) node = getattr(self.group, level_key) node._v_attrs.kind = conv_level.kind @@ -3362,7 +3358,7 @@ def values_cols(self) -> List[str]: """ return a list of my values cols """ return [i.cname for i in self.values_axes] - def _get_metadata_path(self, key) -> str: + def _get_metadata_path(self, key: str) -> str: """ return the metadata pathname for this key """ group = self.group._v_pathname return f"{group}/meta/{key}/meta" @@ -3719,9 +3715,7 @@ def create_axes( if i in axes: name = obj._AXIS_NAMES[i] - new_index = _convert_index( - name, a, self.encoding, self.errors, self.format_type - ) + new_index = _convert_index(name, a, self.encoding, self.errors) new_index.axis = i index_axes_map[i] = new_index @@ -4586,7 +4580,7 @@ def _set_tz(values, tz, preserve_UTC: bool = False, coerce: bool = False): return values -def _convert_index(name: str, index, encoding=None, errors="strict", format_type=None): +def _convert_index(name: str, index, encoding=None, errors="strict"): assert isinstance(name, str) index_name = getattr(index, "name", None) From 00d8760221cd151b0420671bab445ffbbae89115 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 08:36:20 -0800 Subject: [PATCH 3/6] type index --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 617ae2bb441fd..04320ab09df75 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4580,10 +4580,10 @@ def _set_tz(values, tz, preserve_UTC: bool = False, coerce: bool = False): return values -def _convert_index(name: str, index, encoding=None, errors="strict"): +def _convert_index(name: str, index: Index, encoding=None, errors="strict"): assert isinstance(name, str) - index_name = getattr(index, "name", None) + index_name = index.name if isinstance(index, DatetimeIndex): converted = index.asi8 From 84b03bd060bdd16da2220ed771c6ea5b279b74bd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 13:40:46 -0800 Subject: [PATCH 4/6] move assertion earlier and remove type ignore --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 04320ab09df75..592cc62abbd63 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1276,12 +1276,12 @@ def get_node(self, key: str) -> Optional["Node"]: key = "/" + key assert self._handle is not None + assert _table_mod is not None # for mypy try: node = self._handle.get_node(self.root, key) - except _table_mod.exceptions.NoSuchNodeError: # type: ignore + except _table_mod.exceptions.NoSuchNodeError: return None - assert _table_mod is not None # for mypy assert isinstance(node, _table_mod.Node), type(node) return node From 74dfa0a8d381ab0d0b87a497a65a8b114a30b663 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 15:05:38 -0800 Subject: [PATCH 5/6] more types --- pandas/io/pytables.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 592cc62abbd63..95170d0680884 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -245,7 +245,7 @@ def to_hdf( key, value, mode=None, - complevel=None, + complevel: Optional[int] = None, complib=None, append=None, **kwargs, @@ -460,12 +460,13 @@ class HDFStore: """ _handle: Optional["File"] + _complevel: int def __init__( self, path, mode=None, - complevel=None, + complevel: Optional[int] = None, complib=None, fletcher32: bool = False, **kwargs, @@ -1302,7 +1303,7 @@ def copy( propindexes: bool = True, keys=None, complib=None, - complevel=None, + complevel: Optional[int] = None, fletcher32: bool = False, overwrite=True, ): @@ -2590,11 +2591,11 @@ def _filters(self): return self.parent._filters @property - def _complevel(self): + def _complevel(self) -> int: return self.parent._complevel @property - def _fletcher32(self): + def _fletcher32(self) -> bool: return self.parent._fletcher32 @property @@ -2653,7 +2654,7 @@ def read(self, **kwargs): def write(self, **kwargs): raise NotImplementedError( - "cannot write on an abstract storer: sublcasses should implement" + "cannot write on an abstract storer: subclasses should implement" ) def delete(self, where=None, start=None, stop=None, **kwargs): @@ -3925,10 +3926,10 @@ def process_filter(field, filt): def create_description( self, complib=None, - complevel=None, + complevel: Optional[int] = None, fletcher32: bool = False, expectedrows: Optional[int] = None, - ): + ) -> Dict[str, Any]: """ create the description of the table from the axes & values """ # provided expected rows if its passed @@ -4673,8 +4674,9 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"): ) -def _unconvert_index(data, kind, encoding=None, errors="strict"): - kind = _ensure_decoded(kind) +def _unconvert_index(data, kind: str, encoding=None, errors="strict"): + index: Union[Index, np.ndarray] + if kind == "datetime64": index = DatetimeIndex(data) elif kind == "timedelta64": From fc05f51879c2146cfb65e34881238e015223af86 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 15:06:23 -0800 Subject: [PATCH 6/6] one more --- pandas/io/pytables.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 95170d0680884..fd9e126b7515b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -461,6 +461,7 @@ class HDFStore: _handle: Optional["File"] _complevel: int + _fletcher32: bool def __init__( self,