Skip to content

Commit 833afea

Browse files
author
Pyry Kovanen
committed
Merge remote-tracking branch 'upstream/master' into empty-json-empty-df-fix
2 parents 743c08f + 0c65c57 commit 833afea

File tree

18 files changed

+192
-94
lines changed

18 files changed

+192
-94
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,3 +512,21 @@ def time_nlargest(self, keep):
512512

513513
def time_nsmallest(self, keep):
514514
self.df.nsmallest(100, 'A', keep=keep)
515+
516+
517+
class Describe(object):
518+
519+
goal_time = 0.2
520+
521+
def setup(self):
522+
self.df = DataFrame({
523+
'a': np.random.randint(0, 100, int(1e6)),
524+
'b': np.random.randint(0, 100, int(1e6)),
525+
'c': np.random.randint(0, 100, int(1e6))
526+
})
527+
528+
def time_series_describe(self):
529+
self.df['a'].describe()
530+
531+
def time_dataframe_describe(self):
532+
self.df.describe()

doc/source/ecosystem.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ Statsmodels leverages pandas objects as the underlying data container for comput
3838
Use pandas DataFrames in your `scikit-learn <http://scikit-learn.org/>`__
3939
ML pipeline.
4040

41+
`Featuretools <https://github.com/featuretools/featuretools/>`__
42+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4143

44+
Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community.
4245

4346
.. _ecosystem.visualization:
4447

doc/source/whatsnew/v0.23.1.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Performance Improvements
3232
~~~~~~~~~~~~~~~~~~~~~~~~
3333

3434
- Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`)
35+
- Improved performance of :meth:`CategoricalIndex.is_unique` (:issue:`21107`)
3536
-
3637
-
3738

@@ -85,14 +86,18 @@ Indexing
8586
- Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`)
8687
- Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`)
8788
- Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`)
89+
- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
90+
- Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
8891
-
8992

9093
I/O
9194
^^^
9295

9396
- Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`)
9497
- Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`)
95-
- Bug in IO JSON :func:`read_json`reading empty JSON schema with ``orient='table'`` back to :class:DataFrame caused an error (:issue:`21287`)
98+
- Bug in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`)
99+
- Bug in :meth:`read_stata` and :class:`StataReader` which did not correctly decode utf-8 strings on Python 3 from Stata 14 files (dta version 118) (:issue:`21244`)
100+
- Bug in IO JSON :func:`read_json` reading empty JSON schema with ``orient='table'`` back to :class:`DataFrame` caused an error (:issue:`21287`)
96101

97102
Plotting
98103
^^^^^^^^

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ Removal of prior version deprecations/changes
6363
Performance Improvements
6464
~~~~~~~~~~~~~~~~~~~~~~~~
6565

66-
-
67-
-
66+
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
6867
-
6968

7069
.. _whatsnew_0240.docs:

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8519,7 +8519,7 @@ def describe_numeric_1d(series):
85198519
stat_index = (['count', 'mean', 'std', 'min'] +
85208520
formatted_percentiles + ['max'])
85218521
d = ([series.count(), series.mean(), series.std(), series.min()] +
8522-
[series.quantile(x) for x in percentiles] + [series.max()])
8522+
series.quantile(percentiles).tolist() + [series.max()])
85238523
return pd.Series(d, index=stat_index, name=series.name)
85248524

85258525
def describe_categorical_1d(data):

pandas/core/indexes/category.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def _engine(self):
378378
# introspection
379379
@cache_readonly
380380
def is_unique(self):
381-
return not self.duplicated().any()
381+
return self._engine.is_unique
382382

383383
@property
384384
def is_monotonic_increasing(self):

pandas/core/indexes/interval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ def maybe_convert_platform_interval(values):
112112
-------
113113
array
114114
"""
115+
if is_categorical_dtype(values):
116+
# GH 21243/21253
117+
values = np.array(values)
118+
115119
if isinstance(values, (list, tuple)) and len(values) == 0:
116120
# GH 19016
117121
# empty lists/tuples get object dtype by default, but this is not

pandas/core/series.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1837,7 +1837,7 @@ def round(self, decimals=0, *args, **kwargs):
18371837

18381838
def quantile(self, q=0.5, interpolation='linear'):
18391839
"""
1840-
Return value at the given quantile, a la numpy.percentile.
1840+
Return value at the given quantile.
18411841
18421842
Parameters
18431843
----------
@@ -1876,6 +1876,7 @@ def quantile(self, q=0.5, interpolation='linear'):
18761876
See Also
18771877
--------
18781878
pandas.core.window.Rolling.quantile
1879+
numpy.percentile
18791880
"""
18801881

18811882
self._check_percentile(q)

pandas/core/strings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2172,9 +2172,9 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
21722172
21732173
Returns
21742174
-------
2175-
concat : str if `other is None`, Series/Index of objects if `others is
2176-
not None`. In the latter case, the result will remain categorical
2177-
if the calling Series/Index is categorical.
2175+
concat : str or Series/Index of objects
2176+
If `others` is None, `str` is returned, otherwise a `Series/Index`
2177+
(same type as caller) of objects is returned.
21782178
21792179
See Also
21802180
--------

pandas/io/formats/csvs.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas.core.dtypes.missing import notna
12+
from pandas.core.dtypes.inference import is_file_like
1213
from pandas.core.index import Index, MultiIndex
1314
from pandas import compat
1415
from pandas.compat import (StringIO, range, zip)
@@ -127,14 +128,19 @@ def save(self):
127128
else:
128129
encoding = self.encoding
129130

130-
if hasattr(self.path_or_buf, 'write'):
131-
f = self.path_or_buf
132-
close = False
131+
# PR 21300 uses string buffer to receive csv writing and dump into
132+
# file-like output with compression as option. GH 21241, 21118
133+
f = StringIO()
134+
if not is_file_like(self.path_or_buf):
135+
# path_or_buf is path
136+
path_or_buf = self.path_or_buf
137+
elif hasattr(self.path_or_buf, 'name'):
138+
# path_or_buf is file handle
139+
path_or_buf = self.path_or_buf.name
133140
else:
134-
f, handles = _get_handle(self.path_or_buf, self.mode,
135-
encoding=encoding,
136-
compression=None)
137-
close = True if self.compression is None else False
141+
# path_or_buf is file-like IO objects.
142+
f = self.path_or_buf
143+
path_or_buf = None
138144

139145
try:
140146
writer_kwargs = dict(lineterminator=self.line_terminator,
@@ -151,18 +157,16 @@ def save(self):
151157
self._save()
152158

153159
finally:
154-
# GH 17778 handles compression for byte strings.
155-
if not close and self.compression:
156-
f.close()
157-
with open(f.name, 'r') as f:
158-
data = f.read()
159-
f, handles = _get_handle(f.name, self.mode,
160+
# GH 17778 handles zip compression for byte strings separately.
161+
buf = f.getvalue()
162+
if path_or_buf:
163+
f, handles = _get_handle(path_or_buf, self.mode,
160164
encoding=encoding,
161165
compression=self.compression)
162-
f.write(data)
163-
close = True
164-
if close:
166+
f.write(buf)
165167
f.close()
168+
for _fh in handles:
169+
_fh.close()
166170

167171
def _save_header(self):
168172

0 commit comments

Comments
 (0)