Skip to content

Commit 4a5b719

Browse files
author
Matt Roeschke
committed
Merge remote-tracking branch 'upstream/master' into timestamp_tz_constructor_depr
2 parents 7cb1795 + fe52d9f commit 4a5b719

File tree

15 files changed

+119
-50
lines changed

15 files changed

+119
-50
lines changed

doc/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,6 +1724,7 @@ MultiIndex Components
17241724
MultiIndex.set_levels
17251725
MultiIndex.set_labels
17261726
MultiIndex.to_hierarchical
1727+
MultiIndex.to_flat_index
17271728
MultiIndex.to_frame
17281729
MultiIndex.is_lexsorted
17291730
MultiIndex.sortlevel

doc/source/whatsnew/v0.24.0.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ Other Enhancements
238238
- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)
239239
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`)
240240
- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
241+
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
241242

242243
.. _whatsnew_0240.api_breaking:
243244

@@ -1264,9 +1265,6 @@ MultiIndex
12641265
I/O
12651266
^^^
12661267

1267-
- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`)
1268-
- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`)
1269-
12701268
.. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
12711269

12721270
Proper handling of `np.NaN` in a string data-typed column with the Python engine
@@ -1302,6 +1300,9 @@ Current Behavior:
13021300

13031301
Notice how we now instead output ``np.nan`` itself instead of a stringified form of it.
13041302

1303+
- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`)
1304+
- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`)
1305+
- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`)
13051306
- :func:`read_html()` no longer ignores all-whitespace ``<tr>`` within ``<thead>`` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
13061307
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
13071308
- :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`)

pandas/_libs/intervaltree.pxi.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ cdef class IntervalTree(IntervalMixin):
116116
enclosing = self.get_loc(0.5 * (key_left + key_right))
117117
combined = np.concatenate([left_overlap, right_overlap, enclosing])
118118
uniques = pd.unique(combined)
119-
return uniques
119+
return uniques.astype('intp')
120120

121121
def get_indexer(self, scalar_t[:] target):
122122
"""Return the positions corresponding to unique intervals that overlap

pandas/core/frame.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1829,24 +1829,6 @@ def to_panel(self):
18291829

18301830
return self._constructor_expanddim(new_mgr)
18311831

1832-
@Appender(_shared_docs['to_excel'] % _shared_doc_kwargs)
1833-
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
1834-
float_format=None, columns=None, header=True, index=True,
1835-
index_label=None, startrow=0, startcol=0, engine=None,
1836-
merge_cells=True, encoding=None, inf_rep='inf', verbose=True,
1837-
freeze_panes=None):
1838-
1839-
from pandas.io.formats.excel import ExcelFormatter
1840-
formatter = ExcelFormatter(self, na_rep=na_rep, cols=columns,
1841-
header=header,
1842-
float_format=float_format, index=index,
1843-
index_label=index_label,
1844-
merge_cells=merge_cells,
1845-
inf_rep=inf_rep)
1846-
formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow,
1847-
startcol=startcol, freeze_panes=freeze_panes,
1848-
engine=engine)
1849-
18501832
@deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
18511833
def to_stata(self, fname, convert_dates=None, write_index=True,
18521834
encoding="latin-1", byteorder=None, time_stamp=None,

pandas/core/generic.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1977,16 +1977,17 @@ def _repr_latex_(self):
19771977
# I/O Methods
19781978

19791979
_shared_docs['to_excel'] = """
1980-
Write %(klass)s to an excel sheet.
1980+
Write %(klass)s to an Excel sheet.
19811981
1982-
To write a single %(klass)s to an excel .xlsx file it is only necessary to
1982+
To write a single %(klass)s to an Excel .xlsx file it is only necessary to
19831983
specify a target file name. To write to multiple sheets it is necessary to
19841984
create an `ExcelWriter` object with a target file name, and specify a sheet
1985-
in the file to write to. Multiple sheets may be written to by
1986-
specifying unique `sheet_name`. With all data written to the file it is
1987-
necessary to save the changes. Note that creating an ExcelWriter object
1988-
with a file name that already exists will result in the contents of the
1989-
existing file being erased.
1985+
in the file to write to.
1986+
1987+
Multiple sheets may be written to by specifying unique `sheet_name`.
1988+
With all data written to the file it is necessary to save the changes.
1989+
Note that creating an `ExcelWriter` object with a file name that already
1990+
exists will result in the contents of the existing file being erased.
19901991
19911992
Parameters
19921993
----------
@@ -9951,6 +9952,25 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
99519952
if path_or_buf is None:
99529953
return formatter.path_or_buf.getvalue()
99539954

9955+
@Appender(_shared_docs["to_excel"] % dict(klass="object"))
9956+
def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="",
9957+
float_format=None, columns=None, header=True, index=True,
9958+
index_label=None, startrow=0, startcol=0, engine=None,
9959+
merge_cells=True, encoding=None, inf_rep="inf", verbose=True,
9960+
freeze_panes=None):
9961+
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
9962+
9963+
from pandas.io.formats.excel import ExcelFormatter
9964+
formatter = ExcelFormatter(df, na_rep=na_rep, cols=columns,
9965+
header=header,
9966+
float_format=float_format, index=index,
9967+
index_label=index_label,
9968+
merge_cells=merge_cells,
9969+
inf_rep=inf_rep)
9970+
formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow,
9971+
startcol=startcol, freeze_panes=freeze_panes,
9972+
engine=engine)
9973+
99549974

99559975
def _doc_parms(cls):
99569976
"""Return a tuple of the doc parms."""

pandas/core/indexes/base.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,26 @@ def _format_attrs(self):
11131113
"""
11141114
return format_object_attrs(self)
11151115

1116+
def to_flat_index(self):
1117+
"""
1118+
Identity method.
1119+
1120+
.. versionadded:: 0.24.0
1121+
1122+
This is implemented for compatability with subclass implementations
1123+
when chaining.
1124+
1125+
Returns
1126+
-------
1127+
pd.Index
1128+
Caller.
1129+
1130+
See Also
1131+
--------
1132+
MultiIndex.to_flat_index : Subclass implementation.
1133+
"""
1134+
return self
1135+
11161136
def to_series(self, index=None, name=None):
11171137
"""
11181138
Create a Series with both index and values equal to the index keys

pandas/core/indexes/multi.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ class MultiIndex(Index):
193193
set_levels
194194
set_labels
195195
to_frame
196+
to_flat_index
196197
is_lexsorted
197198
sortlevel
198199
droplevel
@@ -1246,6 +1247,34 @@ def to_hierarchical(self, n_repeat, n_shuffle=1):
12461247
FutureWarning, stacklevel=2)
12471248
return MultiIndex(levels=levels, labels=labels, names=names)
12481249

1250+
def to_flat_index(self):
1251+
"""
1252+
Convert a MultiIndex to an Index of Tuples containing the level values.
1253+
1254+
.. versionadded:: 0.24.0
1255+
1256+
Returns
1257+
-------
1258+
pd.Index
1259+
Index with the MultiIndex data represented in Tuples.
1260+
1261+
Notes
1262+
-----
1263+
This method will simply return the caller if called by anything other
1264+
than a MultiIndex.
1265+
1266+
Examples
1267+
--------
1268+
>>> index = pd.MultiIndex.from_product(
1269+
... [['foo', 'bar'], ['baz', 'qux']],
1270+
... names=['a', 'b'])
1271+
>>> index.to_flat_index()
1272+
Index([('foo', 'baz'), ('foo', 'qux'),
1273+
('bar', 'baz'), ('bar', 'qux')],
1274+
dtype='object')
1275+
"""
1276+
return Index(self.values, tupleize_cols=False)
1277+
12491278
@property
12501279
def is_all_dates(self):
12511280
return False

pandas/core/series.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3970,19 +3970,6 @@ def to_csv(self, *args, **kwargs):
39703970
kwargs["header"] = False # Backwards compatibility.
39713971
return self.to_frame().to_csv(**kwargs)
39723972

3973-
@Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs)
3974-
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
3975-
float_format=None, columns=None, header=True, index=True,
3976-
index_label=None, startrow=0, startcol=0, engine=None,
3977-
merge_cells=True, encoding=None, inf_rep='inf', verbose=True):
3978-
df = self.to_frame()
3979-
df.to_excel(excel_writer=excel_writer, sheet_name=sheet_name,
3980-
na_rep=na_rep, float_format=float_format, columns=columns,
3981-
header=header, index=index, index_label=index_label,
3982-
startrow=startrow, startcol=startcol, engine=engine,
3983-
merge_cells=merge_cells, encoding=encoding,
3984-
inf_rep=inf_rep, verbose=verbose)
3985-
39863973
@Appender(generic._shared_docs['isna'] % _shared_doc_kwargs)
39873974
def isna(self):
39883975
return super(Series, self).isna()

pandas/io/excel.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -634,14 +634,17 @@ def _parse_cell(cell_contents, cell_typ):
634634
else:
635635
offset = 1 + max(header)
636636

637-
for col in index_col:
638-
last = data[offset][col]
639-
640-
for row in range(offset + 1, len(data)):
641-
if data[row][col] == '' or data[row][col] is None:
642-
data[row][col] = last
643-
else:
644-
last = data[row][col]
637+
# Check if we have an empty dataset
638+
# before trying to collect data.
639+
if offset < len(data):
640+
for col in index_col:
641+
last = data[offset][col]
642+
643+
for row in range(offset + 1, len(data)):
644+
if data[row][col] == '' or data[row][col] is None:
645+
data[row][col] = last
646+
else:
647+
last = data[row][col]
645648

646649
has_index_names = is_list_like(header) and len(header) > 1
647650

pandas/tests/indexes/multi/test_conversion.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,11 @@ def test_to_series_with_arguments(idx):
170170
assert s.values is not idx.values
171171
assert s.index is not idx
172172
assert s.name != idx.name
173+
174+
175+
def test_to_flat_index(idx):
176+
expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
177+
('baz', 'two'), ('qux', 'one'), ('qux', 'two')),
178+
tupleize_cols=False)
179+
result = idx.to_flat_index()
180+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,14 @@ def test_tab_complete_warning(self, ip):
22662266
with provisionalcompleter('ignore'):
22672267
list(ip.Completer.completions('idx.', 4))
22682268

2269+
def test_to_flat_index(self, indices):
2270+
# 22866
2271+
if isinstance(indices, MultiIndex):
2272+
pytest.skip("Separate expectation for MultiIndex")
2273+
2274+
result = indices.to_flat_index()
2275+
tm.assert_index_equal(result, indices)
2276+
22692277

22702278
class TestMixedIntIndex(Base):
22712279
# Mostly the tests from common.py for which the results differ

pandas/tests/io/data/test1.xls

-2.5 KB
Binary file not shown.

pandas/tests/io/data/test1.xlsm

-31.2 KB
Binary file not shown.

pandas/tests/io/data/test1.xlsx

-31.2 KB
Binary file not shown.

pandas/tests/io/test_excel.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,16 @@ def test_index_col_label_error(self, ext):
235235
self.get_exceldf("test1", ext, "Sheet1", index_col=["A"],
236236
usecols=["A", "C"])
237237

238+
def test_index_col_empty(self, ext):
239+
# see gh-9208
240+
result = self.get_exceldf("test1", ext, "Sheet3",
241+
index_col=["A", "B", "C"])
242+
expected = DataFrame(columns=["D", "E", "F"],
243+
index=MultiIndex(levels=[[]] * 3,
244+
labels=[[]] * 3,
245+
names=["A", "B", "C"]))
246+
tm.assert_frame_equal(result, expected)
247+
238248
def test_usecols_pass_non_existent_column(self, ext):
239249
msg = ("Usecols do not match columns, "
240250
"columns expected but not found: " + r"\['E'\]")

0 commit comments

Comments
 (0)