Skip to content

Commit 8df84fb

Browse files
committed
Merge remote-tracking branch 'upstream/master' into doc/substitution-offset
2 parents f6959d3 + 324bb84 commit 8df84fb

File tree

263 files changed

+2229
-2418
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

263 files changed

+2229
-2418
lines changed

ci/azure/posix.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@ jobs:
1515
PATTERN: "not slow and not network"
1616

1717
${{ if eq(parameters.name, 'Linux') }}:
18+
py35_compat:
19+
ENV_FILE: ci/deps/azure-35-compat.yaml
20+
CONDA_PY: "35"
21+
PATTERN: "not slow and not network"
22+
23+
py36_locale_slow_old_np:
24+
ENV_FILE: ci/deps/azure-36-locale.yaml
25+
CONDA_PY: "36"
26+
PATTERN: "slow"
27+
LOCALE_OVERRIDE: "zh_CN.UTF-8"
28+
EXTRA_APT: "language-pack-zh-hans"
29+
1830
py36_locale_slow:
1931
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
2032
CONDA_PY: "36"

ci/azure/windows.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ jobs:
1212
ENV_FILE: ci/deps/azure-windows-36.yaml
1313
CONDA_PY: "36"
1414

15+
py37_np141:
16+
ENV_FILE: ci/deps/azure-windows-37.yaml
17+
CONDA_PY: "37"
18+
1519
steps:
1620
- task: CondaEnvironment@1
1721
inputs:

ci/deps/azure-35-compat.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- beautifulsoup4==4.4.1
7+
- bottleneck=1.2.0
8+
- cython=0.28.2
9+
- hypothesis>=3.58.0
10+
- jinja2=2.8
11+
- numexpr=2.6.1
12+
- numpy=1.12.0
13+
- openpyxl=2.4.0
14+
- pytables=3.4.2
15+
- python-dateutil=2.5.0
16+
- python=3.5*
17+
- pytz=2015.4
18+
- scipy=0.18.1
19+
- xlrd=1.0.0
20+
- xlsxwriter=0.7.7
21+
- xlwt=1.0.0
22+
# universal
23+
- pytest-xdist
24+
- pytest-mock
25+
- isort
26+
- pip:
27+
# for python 3.5, pytest>=4.0.2 is not available in conda
28+
- pytest>=4.0.2
29+
- html5lib==1.0b2

ci/deps/azure-36-locale.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- beautifulsoup4==4.5.1
7+
- bottleneck=1.2.0
8+
- cython=0.28.2
9+
- lxml
10+
- matplotlib=2.0.0
11+
- numpy=1.12.0
12+
- openpyxl=2.4.0
13+
- python-dateutil
14+
- python-blosc
15+
- python=3.6
16+
- pytz=2016.10
17+
- scipy
18+
- sqlalchemy=1.1.4
19+
- xlrd=1.0.0
20+
- xlsxwriter=0.9.4
21+
- xlwt=1.2.0
22+
# universal
23+
- pytest>=4.0.2
24+
- pytest-xdist
25+
- pytest-mock
26+
- hypothesis>=3.58.0
27+
- isort
28+
- pip:
29+
- html5lib==1.0b2

ci/deps/azure-macos-35.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- nomkl
1313
- numexpr
1414
- numpy=1.12.0
15-
- openpyxl=2.5.5
15+
- openpyxl
1616
- pyarrow
1717
- pytables
1818
- python=3.5*

ci/deps/azure-windows-37.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- beautifulsoup4
7+
- bottleneck
8+
- gcsfs
9+
- html5lib
10+
- jinja2
11+
- lxml
12+
- matplotlib=3.0.1
13+
- numexpr
14+
- numpy=1.14.*
15+
- openpyxl
16+
- pytables
17+
- python=3.7.*
18+
- python-dateutil
19+
- pytz
20+
- s3fs
21+
- scipy
22+
- sqlalchemy
23+
- xlrd
24+
- xlsxwriter
25+
- xlwt
26+
# universal
27+
- cython>=0.28.2
28+
- pytest>=4.0.2
29+
- pytest-xdist
30+
- pytest-mock
31+
- moto
32+
- hypothesis>=3.58.0
33+
- isort

ci/deps/travis-36-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ dependencies:
1515
- numpy
1616
- openpyxl
1717
- psycopg2
18-
- pymysql
18+
- pymysql=0.7.9
1919
- pytables
2020
- python-dateutil
2121
- python=3.6*

doc/source/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@
134134
master_doc = 'index'
135135

136136
# General information about the project.
137-
project = u'pandas'
138-
copyright = u'2008-2014, the pandas development team'
137+
project = 'pandas'
138+
copyright = '2008-2014, the pandas development team'
139139

140140
# The version info for the project you're documenting, acts as replacement for
141141
# |version| and |release|, also used in various other places throughout the

doc/source/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ Optional Dependencies
259259
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage, Version 3.4.2 or higher
260260
* `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.9.0): necessary for feather-based storage.
261261
* `Apache Parquet <https://parquet.apache.org/>`__, either `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.7.0) or `fastparquet <https://fastparquet.readthedocs.io/en/latest>`__ (>= 0.2.1) for parquet-based storage. The `snappy <https://pypi.org/project/python-snappy>`__ and `brotli <https://pypi.org/project/brotlipy>`__ are available for compression support.
262-
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 1.0.8 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
262+
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 1.1.4 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
263263

264264
* `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
265265
* `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.

doc/source/user_guide/advanced.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ values **not** in the categories, similarly to how you can reindex **any** panda
797797
In [11]: df3 = df3.set_index('B')
798798
799799
In [11]: df3.index
800-
Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category')
800+
Out[11]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, name='B', dtype='category')
801801
802802
In [12]: pd.concat([df2, df3])
803803
TypeError: categories must match existing categories when appending

doc/source/user_guide/groupby.rst

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -946,23 +946,6 @@ that is itself a series, and possibly upcast the result to a DataFrame:
946946
So depending on the path taken, and exactly what you are grouping. Thus the grouped columns(s) may be included in
947947
the output as well as set the indices.
948948

949-
.. warning::
950-
951-
In the current implementation apply calls func twice on the
952-
first group to decide whether it can take a fast or slow code
953-
path. This can lead to unexpected behavior if func has
954-
side-effects, as they will take effect twice for the first
955-
group.
956-
957-
.. ipython:: python
958-
959-
d = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
960-
def identity(df):
961-
print(df)
962-
return df
963-
964-
d.groupby("a").apply(identity)
965-
966949

967950
Other useful features
968951
---------------------

doc/source/user_guide/options.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ If a DataFrame or Series contains these characters, the default output mode may
484484

485485
.. ipython:: python
486486
487-
df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']})
487+
df = pd.DataFrame({'国籍': ['UK', '日本'], '名前': ['Alice', 'しのぶ']})
488488
df
489489
490490
.. image:: ../_static/option_unicode01.png
@@ -507,7 +507,7 @@ By default, an "Ambiguous" character's width, such as "¡" (inverted exclamation
507507

508508
.. ipython:: python
509509
510-
df = pd.DataFrame({'a': ['xxx', u'¡¡'], 'b': ['yyy', u'¡¡']})
510+
df = pd.DataFrame({'a': ['xxx', '¡¡'], 'b': ['yyy', '¡¡']})
511511
df
512512
513513
.. image:: ../_static/option_unicode03.png

doc/source/user_guide/reshaping.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ handling of NaN:
695695
In [2]: pd.factorize(x, sort=True)
696696
Out[2]:
697697
(array([ 2, 2, -1, 3, 0, 1]),
698-
Index([3.14, inf, u'A', u'B'], dtype='object'))
698+
Index([3.14, inf, 'A', 'B'], dtype='object'))
699699
700700
In [3]: np.unique(x, return_inverse=True)[::-1]
701701
Out[3]: (array([3, 3, 0, 4, 1, 2]), array([nan, 3.14, inf, 'A', 'B'], dtype=object))

doc/source/whatsnew/v0.25.0.rst

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,50 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
7373
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
7474
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
7575

76+
.. _whatsnew_0250.api_breaking.groupby_apply_first_group_once:
77+
78+
GroupBy.apply on ``DataFrame`` evaluates first group only once
79+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
80+
81+
The implementation of :meth:`DataFrameGroupBy.apply() <pandas.core.groupby.DataFrameGroupBy.apply>`
82+
previously evaluated the supplied function consistently twice on the first group
83+
to infer if it is safe to use a fast code path. Particularly for functions with
84+
side effects, this was an undesired behavior and may have led to surprises.
85+
86+
(:issue:`2936`, :issue:`2656`, :issue:`7739`, :issue:`10519`, :issue:`12155`,
87+
:issue:`20084`, :issue:`21417`)
88+
89+
Now every group is evaluated only a single time.
90+
91+
.. ipython:: python
92+
93+
df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
94+
df
95+
96+
def func(group):
97+
print(group.name)
98+
return group
99+
100+
*Previous Behaviour*:
101+
102+
.. code-block:: python
103+
104+
In [3]: df.groupby('a').apply(func)
105+
x
106+
x
107+
y
108+
Out[3]:
109+
a b
110+
0 x 1
111+
1 y 2
112+
113+
*New Behaviour*:
114+
115+
.. ipython:: python
116+
117+
df.groupby("a").apply(func)
118+
119+
76120
Concatenating Sparse Values
77121
^^^^^^^^^^^^^^^^^^^^^^^^^^^
78122

@@ -83,14 +127,14 @@ Series or DataFrame with sparse values, rather than a ``SparseDataFrame`` (:issu
83127
84128
df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
85129
86-
*Previous Behavior:*
130+
*Previous Behavior*:
87131

88132
.. code-block:: ipython
89133
90134
In [2]: type(pd.concat([df, df]))
91135
pandas.core.sparse.frame.SparseDataFrame
92136
93-
*New Behavior:*
137+
*New Behavior*:
94138

95139
.. ipython:: python
96140
@@ -123,13 +167,13 @@ If installed, we now require:
123167
+=================+=================+==========+
124168
| beautifulsoup4 | 4.4.1 | |
125169
+-----------------+-----------------+----------+
126-
| openpyxl | 2.2.6 | |
170+
| openpyxl | 2.4.0 | |
127171
+-----------------+-----------------+----------+
128-
| pymysql | 0.6.6 | |
172+
| pymysql | 0.7.9 | |
129173
+-----------------+-----------------+----------+
130174
| pytz | 2015.4 | |
131175
+-----------------+-----------------+----------+
132-
| sqlalchemy | 1.0.8 | |
176+
| sqlalchemy | 1.1.4 | |
133177
+-----------------+-----------------+----------+
134178
| xlsxwriter | 0.7.7 | |
135179
+-----------------+-----------------+----------+
@@ -176,6 +220,7 @@ Performance Improvements
176220
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
177221
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
178222
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
223+
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
179224

180225
.. _whatsnew_0250.bug_fixes:
181226

@@ -221,7 +266,7 @@ Numeric
221266
- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`)
222267
- Bug in error messages in :meth:`DataFrame.corr` and :meth:`Series.corr`. Added the possibility of using a callable. (:issue:`25729`)
223268
- Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`)
224-
-
269+
- Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`)
225270
-
226271
-
227272

@@ -244,14 +289,14 @@ Strings
244289
Interval
245290
^^^^^^^^
246291

247-
-
292+
- Construction of :class:`Interval` is restricted to numeric, :class:`Timestamp` and :class:`Timedelta` endpoints (:issue:`23013`)
248293
-
249294
-
250295

251296
Indexing
252297
^^^^^^^^
253298

254-
-
299+
- Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`).
255300
-
256301
-
257302

@@ -311,7 +356,7 @@ Reshaping
311356
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
312357
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
313358
- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`)
314-
359+
- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
315360

316361
Sparse
317362
^^^^^^

doc/sphinxext/announce.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656

5757

5858
def get_authors(revision_range):
59-
pat = u'^.*\\t(.*)$'
59+
pat = '^.*\\t(.*)$'
6060
lst_release, cur_release = [r.strip() for r in revision_range.split('..')]
6161

6262
# authors, in current release and previous to current release.
@@ -70,7 +70,7 @@ def get_authors(revision_range):
7070
pre.discard('Homu')
7171

7272
# Append '+' to new authors.
73-
authors = [s + u' +' for s in cur - pre] + [s for s in cur & pre]
73+
authors = [s + ' +' for s in cur - pre] + [s for s in cur & pre]
7474
authors.sort()
7575
return authors
7676

@@ -81,17 +81,17 @@ def get_pull_requests(repo, revision_range):
8181
# From regular merges
8282
merges = this_repo.git.log(
8383
'--oneline', '--merges', revision_range)
84-
issues = re.findall(u"Merge pull request \\#(\\d*)", merges)
84+
issues = re.findall("Merge pull request \\#(\\d*)", merges)
8585
prnums.extend(int(s) for s in issues)
8686

8787
# From Homu merges (Auto merges)
88-
issues = re. findall(u"Auto merge of \\#(\\d*)", merges)
88+
issues = re. findall("Auto merge of \\#(\\d*)", merges)
8989
prnums.extend(int(s) for s in issues)
9090

9191
# From fast forward squash-merges
9292
commits = this_repo.git.log(
9393
'--oneline', '--no-merges', '--first-parent', revision_range)
94-
issues = re.findall(u'^.*\\(\\#(\\d+)\\)$', commits, re.M)
94+
issues = re.findall('^.*\\(\\#(\\d+)\\)$', commits, re.M)
9595
prnums.extend(int(s) for s in issues)
9696

9797
# get PR data from github repo

pandas/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636

3737
from datetime import datetime
3838

39+
from pandas._config import (get_option, set_option, reset_option,
40+
describe_option, option_context, options)
41+
3942
# let init-time option registration happen
4043
import pandas.core.config_init
4144

0 commit comments

Comments
 (0)