TST: parametrize pytable test (#27032)

jbrockmendel · jreback · commit 2da45994b630 · 2019-06-25T08:19:44.000-04:00
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -115,6 +115,7 @@ class ExtensionArray:
     # ------------------------------------------------------------------------
     # Constructors
     # ------------------------------------------------------------------------
+
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         """
@@ -286,6 +287,7 @@ def __iter__(self):
     # ------------------------------------------------------------------------
     # Required attributes
     # ------------------------------------------------------------------------
+
     @property
     def dtype(self) -> ExtensionDtype:
         """
@@ -319,6 +321,7 @@ def nbytes(self) -> int:
     # ------------------------------------------------------------------------
     # Additional Methods
     # ------------------------------------------------------------------------
+
     def astype(self, dtype, copy=True):
         """
         Cast to a NumPy array with 'dtype'.
@@ -479,8 +482,7 @@ def dropna(self):
     def shift(
             self,
             periods: int = 1,
-            fill_value: object = None,
-    ) -> ABCExtensionArray:
+            fill_value: object = None) -> ABCExtensionArray:
         """
         Shift values by desired number.
 
@@ -836,6 +838,7 @@ def copy(self, deep: bool = False) -> ABCExtensionArray:
     # ------------------------------------------------------------------------
     # Printing
     # ------------------------------------------------------------------------
+
     def __repr__(self):
         from pandas.io.formats.printing import format_object_summary
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -480,7 +480,7 @@ def first_not_none(values):
                 # if we have date/time like in the original, then coerce dates
                 # as we are stacking can easily have object dtypes here
                 so = self._selected_obj
-                if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()):
+                if so.ndim == 2 and so.dtypes.apply(is_datetimelike).any():
                     result = result.apply(
                         lambda x: to_numeric(x, errors='ignore'))
                     date_cols = self._selected_obj.select_dtypes(
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1027,7 +1027,7 @@ def set(self, item, value):
         value_is_extension_type = (is_extension_type(value) or
                                    is_extension_array_dtype(value))
 
-        # categorical/spares/datetimetz
+        # categorical/sparse/datetimetz
         if value_is_extension_type:
 
             def value_getitem(placement):
diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py
@@ -1070,47 +1070,41 @@ def test_encoding(self):
             result = store.select('df', Term('columns=A', encoding='ascii'))
             tm.assert_frame_equal(result, expected)
 
-    def test_latin_encoding(self):
-
-        values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
-                  [b'E\xc9, 17', b'a', b'b', b'c'],
-                  [b'EE, 17', b'', b'a', b'b', b'c'],
-                  [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
-                  [b'', b'a', b'b', b'c'],
-                  [b'\xf8\xfc', b'a', b'b', b'c'],
-                  [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
-                  [np.nan, b'', b'b', b'c'],
-                  [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
-
-        def _try_decode(x, encoding='latin-1'):
-            try:
-                return x.decode(encoding)
-            except AttributeError:
-                return x
-        # not sure how to remove latin-1 from code in python 2 and 3
-        values = [[_try_decode(x) for x in y] for y in values]
-
-        examples = []
-        for dtype in ['category', object]:
-            for val in values:
-                examples.append(pd.Series(val, dtype=dtype))
-
-        def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
-            with ensure_clean_path(self.path) as store:
-                s.to_hdf(store, key, format='table', encoding=encoding,
-                         nan_rep=nan_rep)
-                retr = read_hdf(store, key)
-                s_nan = s.replace(nan_rep, np.nan)
-                if is_categorical_dtype(s_nan):
-                    assert is_categorical_dtype(retr)
-                    assert_series_equal(s_nan, retr, check_dtype=False,
-                                        check_categorical=False)
-                else:
-                    assert_series_equal(s_nan, retr)
-
-        for s in examples:
-            roundtrip(s)
+    @pytest.mark.parametrize('val', [
+        [b'E\xc9, 17', b'', b'a', b'b', b'c'],
+        [b'E\xc9, 17', b'a', b'b', b'c'],
+        [b'EE, 17', b'', b'a', b'b', b'c'],
+        [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
+        [b'', b'a', b'b', b'c'],
+        [b'\xf8\xfc', b'a', b'b', b'c'],
+        [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
+        [np.nan, b'', b'b', b'c'],
+        [b'A\xf8\xfc', np.nan, b'', b'b', b'c']
+    ])
+    @pytest.mark.parametrize('dtype', ['category', object])
+    def test_latin_encoding(self, dtype, val):
+        enc = 'latin-1'
+        nan_rep = ''
+        key = 'data'
+
+        val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]
+        ser = pd.Series(val, dtype=dtype)
+
+        with ensure_clean_path(self.path) as store:
+            ser.to_hdf(store, key, format='table', encoding=enc,
+                       nan_rep=nan_rep)
+            retr = read_hdf(store, key)
+
+        s_nan = ser.replace(nan_rep, np.nan)
+
+        if is_categorical_dtype(s_nan):
+            assert is_categorical_dtype(retr)
+            assert_series_equal(s_nan, retr, check_dtype=False,
+                                check_categorical=False)
+        else:
+            assert_series_equal(s_nan, retr)
 
+        # FIXME: don't leave commented-out
         # fails:
         # for x in examples:
         #     roundtrip(s, nan_rep=b'\xf8\xfc')