Merge branch 'master' into thead-improvement

brianhuey · web-flow · commit 8e7b03e94095 · 2017-02-17T17:17:44.000-08:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -2833,8 +2833,8 @@ Style and Formatting
 The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method.
 
 - ``float_format`` : Format string for floating point numbers (default None)
-- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will
-freeze the first row and first column (default None)
+- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default None)
+
 
 
 .. _io.clipboard:
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -155,6 +155,7 @@ Other enhancements
 - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`)
 - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`)
 - ``pd.read_html()`` parses multiple header rows, creating a multiindex header. (:issue:`13434`).
+- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`)
 
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
@@ -523,7 +524,7 @@ Bug Fixes
 - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`)
 
 
-
+- Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`).
 - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`)
 
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1073,7 +1073,7 @@ def __setstate__(self, state):
         Representation for infinity (there is no native representation for
         infinity in Excel)
     freeze_panes : tuple of integer (length 2), default None
-        Specifies the bottommost row and rightmost column that
+        Specifies the one-based bottommost row and rightmost column that
         is to be frozen
 
         .. versionadded:: 0.20.0
diff --git a/pandas/formats/style.py b/pandas/formats/style.py
@@ -251,21 +251,23 @@ def format_attr(pair):
                            "class": " ".join(cs),
                            "is_visible": True})
 
-            for c in range(len(clabels[0])):
+            for c, value in enumerate(clabels[r]):
                 cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c]
                 cs.extend(cell_context.get(
                     "col_headings", {}).get(r, {}).get(c, []))
-                value = clabels[r][c]
-                row_es.append({"type": "th",
-                               "value": value,
-                               "display_value": value,
-                               "class": " ".join(cs),
-                               "is_visible": _is_visible(c, r, col_lengths),
-                               "attributes": [
-                                   format_attr({"key": "colspan",
-                                                "value": col_lengths.get(
-                                                    (r, c), 1)})
-                               ]})
+                es = {
+                    "type": "th",
+                    "value": value,
+                    "display_value": value,
+                    "class": " ".join(cs),
+                    "is_visible": _is_visible(c, r, col_lengths),
+                }
+                colspan = col_lengths.get((r, c), 0)
+                if colspan > 1:
+                    es["attributes"] = [
+                        format_attr({"key": "colspan", "value": colspan})
+                    ]
+                row_es.append(es)
             head.append(row_es)
 
         if self.data.index.names and not all(x is None
@@ -289,19 +291,22 @@ def format_attr(pair):
 
         body = []
         for r, idx in enumerate(self.data.index):
-            #  cs.extend(
-            #    cell_context.get("row_headings", {}).get(r, {}).get(c, []))
-            row_es = [{"type": "th",
-                       "is_visible": _is_visible(r, c, idx_lengths),
-                       "attributes": [
-                           format_attr({"key": "rowspan",
-                                        "value": idx_lengths.get((c, r), 1)})
-                       ],
-                       "value": rlabels[r][c],
-                       "class": " ".join([ROW_HEADING_CLASS, "level%s" % c,
-                                          "row%s" % r]),
-                       "display_value": rlabels[r][c]}
-                      for c in range(len(rlabels[r]))]
+            row_es = []
+            for c, value in enumerate(rlabels[r]):
+                es = {
+                    "type": "th",
+                    "is_visible": _is_visible(r, c, idx_lengths),
+                    "value": value,
+                    "display_value": value,
+                    "class": " ".join([ROW_HEADING_CLASS, "level%s" % c,
+                                       "row%s" % r]),
+                }
+                rowspan = idx_lengths.get((c, r), 0)
+                if rowspan > 1:
+                    es["attributes"] = [
+                        format_attr({"key": "rowspan", "value": rowspan})
+                    ]
+                row_es.append(es)
 
             for c, col in enumerate(self.data.columns):
                 cs = [DATA_CLASS, "row%s" % r, "col%s" % c]
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -750,7 +750,8 @@ def _get_column_names_and_types(self, dtype_mapper):
             for i, idx_label in enumerate(self.index):
                 idx_type = dtype_mapper(
                     self.frame.index.get_level_values(i))
-                column_names_and_types.append((idx_label, idx_type, True))
+                column_names_and_types.append((text_type(idx_label),
+                                              idx_type, True))
 
         column_names_and_types += [
             (text_type(self.frame.columns[i]),
@@ -1220,7 +1221,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None):
 
 def _get_unicode_name(name):
     try:
-        uname = name.encode("utf-8", "strict").decode("utf-8")
+        uname = text_type(name).encode("utf-8", "strict").decode("utf-8")
     except UnicodeError:
         raise ValueError("Cannot convert identifier to UTF-8: '%s'" % name)
     return uname
diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py
@@ -141,21 +141,18 @@ def test_empty_index_name_doesnt_display(self):
                       'type': 'th',
                       'value': 'A',
                       'is_visible': True,
-                      'attributes': ["colspan=1"],
                       },
                      {'class': 'col_heading level0 col1',
                       'display_value': 'B',
                       'type': 'th',
                       'value': 'B',
                       'is_visible': True,
-                      'attributes': ["colspan=1"],
                       },
                      {'class': 'col_heading level0 col2',
                       'display_value': 'C',
                       'type': 'th',
                       'value': 'C',
                       'is_visible': True,
-                      'attributes': ["colspan=1"],
                       }]]
 
         self.assertEqual(result['head'], expected)
@@ -168,11 +165,9 @@ def test_index_name(self):
         expected = [[{'class': 'blank level0', 'type': 'th', 'value': '',
                       'display_value': '', 'is_visible': True},
                      {'class': 'col_heading level0 col0', 'type': 'th',
-                      'value': 'B', 'display_value': 'B',
-                      'is_visible': True, 'attributes': ['colspan=1']},
+                      'value': 'B', 'display_value': 'B', 'is_visible': True},
                      {'class': 'col_heading level0 col1', 'type': 'th',
-                      'value': 'C', 'display_value': 'C',
-                      'is_visible': True, 'attributes': ['colspan=1']}],
+                      'value': 'C', 'display_value': 'C', 'is_visible': True}],
                     [{'class': 'index_name level0', 'type': 'th',
                       'value': 'A'},
                      {'class': 'blank', 'type': 'th', 'value': ''},
@@ -191,9 +186,7 @@ def test_multiindex_name(self):
             {'class': 'blank level0', 'type': 'th', 'value': '',
              'display_value': '', 'is_visible': True},
             {'class': 'col_heading level0 col0', 'type': 'th',
-             'value': 'C', 'display_value': 'C',
-             'is_visible': True, 'attributes': ['colspan=1'],
-             }],
+             'value': 'C', 'display_value': 'C', 'is_visible': True}],
             [{'class': 'index_name level0', 'type': 'th',
               'value': 'A'},
              {'class': 'index_name level1', 'type': 'th',
@@ -618,16 +611,14 @@ def test_mi_sparse(self):
         body_1 = result['body'][0][1]
         expected_1 = {
             "value": 0, "display_value": 0, "is_visible": True,
-            "type": "th", "attributes": ["rowspan=1"],
-            "class": "row_heading level1 row0",
+            "type": "th", "class": "row_heading level1 row0",
         }
         tm.assert_dict_equal(body_1, expected_1)
 
         body_10 = result['body'][1][0]
         expected_10 = {
             "value": 'a', "display_value": 'a', "is_visible": False,
-            "type": "th", "attributes": ["rowspan=1"],
-            "class": "row_heading level0 row1",
+            "type": "th", "class": "row_heading level0 row1",
         }
         tm.assert_dict_equal(body_10, expected_10)
 
@@ -637,9 +628,8 @@ def test_mi_sparse(self):
              'is_visible': True, "display_value": ''},
             {'type': 'th', 'class': 'blank level0', 'value': '',
              'is_visible': True, 'display_value': ''},
-            {'attributes': ['colspan=1'], 'class': 'col_heading level0 col0',
-             'is_visible': True, 'type': 'th', 'value': 'A',
-             'display_value': 'A'}]
+            {'type': 'th', 'class': 'col_heading level0 col0', 'value': 'A',
+             'is_visible': True, 'display_value': 'A'}]
         self.assertEqual(head, expected)
 
     def test_mi_sparse_disabled(self):
@@ -650,7 +640,7 @@ def test_mi_sparse_disabled(self):
             result = df.style._translate()
         body = result['body']
         for row in body:
-            self.assertEqual(row[0]['attributes'], ['rowspan=1'])
+            assert 'attributes' not in row[0]
 
     def test_mi_sparse_index_names(self):
         df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays(
@@ -686,28 +676,24 @@ def test_mi_sparse_column_names(self):
              'type': 'th', 'is_visible': True},
             {'class': 'index_name level1', 'value': 'col_1',
              'display_value': 'col_1', 'is_visible': True, 'type': 'th'},
-            {'attributes': ['colspan=1'],
-             'class': 'col_heading level1 col0',
+            {'class': 'col_heading level1 col0',
              'display_value': 1,
              'is_visible': True,
              'type': 'th',
              'value': 1},
-            {'attributes': ['colspan=1'],
-             'class': 'col_heading level1 col1',
+            {'class': 'col_heading level1 col1',
              'display_value': 0,
              'is_visible': True,
              'type': 'th',
              'value': 0},
 
-            {'attributes': ['colspan=1'],
-             'class': 'col_heading level1 col2',
+            {'class': 'col_heading level1 col2',
              'display_value': 1,
              'is_visible': True,
              'type': 'th',
              'value': 1},
 
-            {'attributes': ['colspan=1'],
-             'class': 'col_heading level1 col3',
+            {'class': 'col_heading level1 col3',
              'display_value': 0,
              'is_visible': True,
              'type': 'th',
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
@@ -41,6 +41,22 @@
     _ZLIB_INSTALLED = True
 
 
+@pytest.fixture(scope='module')
+def current_packers_data():
+    # our current version packers data
+    from pandas.tests.io.generate_legacy_storage_files import (
+        create_msgpack_data)
+    return create_msgpack_data()
+
+
+@pytest.fixture(scope='module')
+def all_packers_data():
+    # our all of our current version packers data
+    from pandas.tests.io.generate_legacy_storage_files import (
+        create_data)
+    return create_data()
+
+
 def check_arbitrary(a, b):
 
     if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)):
@@ -778,7 +794,16 @@ def test_default_encoding(self):
             assert_frame_equal(result, frame)
 
 
-class TestMsgpack():
+def legacy_packers_versions():
+    # yield the packers versions
+    path = tm.get_data_path('legacy_msgpack')
+    for v in os.listdir(path):
+        p = os.path.join(path, v)
+        if os.path.isdir(p):
+            yield v
+
+
+class TestMsgpack(object):
     """
     How to add msgpack tests:
 
@@ -788,48 +813,38 @@ class TestMsgpack():
     $ python generate_legacy_storage_files.py <output_dir> msgpack
 
     3. Move the created pickle to "data/legacy_msgpack/<version>" directory.
-
-    NOTE: TestMsgpack can't be a subclass of tm.Testcase to use test generator.
-    http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class
     """
 
-    @classmethod
-    def setup_class(cls):
-        from pandas.tests.io.generate_legacy_storage_files import (
-            create_msgpack_data, create_data)
-        cls.data = create_msgpack_data()
-        cls.all_data = create_data()
-        cls.path = u('__%s__.msgpack' % tm.rands(10))
-        cls.minimum_structure = {'series': ['float', 'int', 'mixed',
-                                            'ts', 'mi', 'dup'],
-                                 'frame': ['float', 'int', 'mixed', 'mi'],
-                                 'panel': ['float'],
-                                 'index': ['int', 'date', 'period'],
-                                 'mi': ['reg2']}
-
-    def check_min_structure(self, data):
+    minimum_structure = {'series': ['float', 'int', 'mixed',
+                                    'ts', 'mi', 'dup'],
+                         'frame': ['float', 'int', 'mixed', 'mi'],
+                         'panel': ['float'],
+                         'index': ['int', 'date', 'period'],
+                         'mi': ['reg2']}
+
+    def check_min_structure(self, data, version):
         for typ, v in self.minimum_structure.items():
             assert typ in data, '"{0}" not found in unpacked data'.format(typ)
             for kind in v:
                 msg = '"{0}" not found in data["{1}"]'.format(kind, typ)
                 assert kind in data[typ], msg
 
-    def compare(self, vf, version):
+    def compare(self, current_data, all_data, vf, version):
         # GH12277 encoding default used to be latin-1, now utf-8
         if LooseVersion(version) < '0.18.0':
             data = read_msgpack(vf, encoding='latin-1')
         else:
             data = read_msgpack(vf)
-        self.check_min_structure(data)
+        self.check_min_structure(data, version)
         for typ, dv in data.items():
-            assert typ in self.all_data, ('unpacked data contains '
-                                          'extra key "{0}"'
-                                          .format(typ))
+            assert typ in all_data, ('unpacked data contains '
+                                     'extra key "{0}"'
+                                     .format(typ))
             for dt, result in dv.items():
-                assert dt in self.all_data[typ], ('data["{0}"] contains extra '
-                                                  'key "{1}"'.format(typ, dt))
+                assert dt in current_data[typ], ('data["{0}"] contains extra '
+                                                 'key "{1}"'.format(typ, dt))
                 try:
-                    expected = self.data[typ][dt]
+                    expected = current_data[typ][dt]
                 except KeyError:
                     continue
 
@@ -862,9 +877,11 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         else:
             tm.assert_frame_equal(result, expected)
 
-    def read_msgpacks(self, version):
+    @pytest.mark.parametrize('version', legacy_packers_versions())
+    def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
+                             version):
 
-        pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version)))
+        pth = tm.get_data_path('legacy_msgpack/{0}'.format(version))
         n = 0
         for f in os.listdir(pth):
             # GH12142 0.17 files packed in P2 can't be read in P3
@@ -873,19 +890,10 @@ def read_msgpacks(self, version):
                 continue
             vf = os.path.join(pth, f)
             try:
-                self.compare(vf, version)
+                self.compare(current_packers_data, all_packers_data,
+                             vf, version)
             except ImportError:
                 # blosc not installed
                 continue
             n += 1
         assert n > 0, 'Msgpack files are not tested'
-
-    def test_msgpack(self):
-        msgpack_path = tm.get_data_path('legacy_msgpack')
-        n = 0
-        for v in os.listdir(msgpack_path):
-            pth = os.path.join(msgpack_path, v)
-            if os.path.isdir(pth):
-                yield self.read_msgpacks, v
-            n += 1
-        assert n > 0, 'Msgpack files are not tested'
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py