Skip to content

Commit c7c439b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into GH8628
2 parents 01f7858 + b49aeac commit c7c439b

File tree

288 files changed

+12489
-11835
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

288 files changed

+12489
-11835
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ jobs:
3737
ci/code_checks.sh lint
3838
if: always()
3939

40-
- name: Dependencies consistency
41-
run: |
42-
source activate pandas-dev
43-
ci/code_checks.sh dependencies
44-
if: always()
45-
4640
- name: Checks on imported code
4741
run: |
4842
source activate pandas-dev

.pre-commit-config.yaml

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,44 @@ repos:
5656
- id: incorrect-sphinx-directives
5757
name: Check for incorrect Sphinx directives
5858
language: pygrep
59-
entry: >-
60-
\.\. (autosummary|contents|currentmodule|deprecated
61-
|function|image|important|include|ipython|literalinclude
62-
|math|module|note|raw|seealso|toctree|versionadded
63-
|versionchanged|warning):[^:]
59+
entry: |
60+
(?x)
61+
# Check for cases of e.g. .. warning: instead of .. warning::
62+
\.\.\ (
63+
autosummary|contents|currentmodule|deprecated|
64+
function|image|important|include|ipython|literalinclude|
65+
math|module|note|raw|seealso|toctree|versionadded|
66+
versionchanged|warning
67+
):[^:]
6468
files: \.(py|pyx|rst)$
69+
- id: non-standard-imports
70+
name: Check for non-standard imports
71+
language: pygrep
72+
entry: |
73+
(?x)
74+
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
75+
from\ pandas\.core\.common\ import|
76+
from\ pandas\.core\ import\ common|
77+
78+
# Check for imports from collections.abc instead of `from collections import abc`
79+
from\ collections\.abc\ import|
80+
81+
from\ numpy\ import\ nan
82+
types: [python]
83+
- id: non-standard-imports-in-tests
84+
name: Check for non-standard imports in test suite
85+
language: pygrep
86+
entry: |
87+
(?x)
88+
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
89+
from\ pandas\._testing\ import|
90+
from\ pandas\ import\ _testing\ as\ tm|
91+
92+
# No direct imports from conftest
93+
conftest\ import|
94+
import\ conftest
95+
types: [python]
96+
files: ^pandas/tests/
6597
- id: incorrect-code-directives
6698
name: Check for incorrect code block or IPython directives
6799
language: pygrep

ci/code_checks.sh

Lines changed: 15 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@
1515
# $ ./ci/code_checks.sh code # checks on imported code
1616
# $ ./ci/code_checks.sh doctests # run doctests
1717
# $ ./ci/code_checks.sh docstrings # validate docstring errors
18-
# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent
1918
# $ ./ci/code_checks.sh typing # run static type analysis
2019

21-
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "dependencies" || "$1" == "typing" ]] || \
22-
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies|typing]"; exit 9999; }
20+
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
21+
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|typing]"; exit 9999; }
2322

2423
BASE_DIR="$(dirname $0)/.."
2524
RET=0
@@ -38,6 +37,12 @@ function invgrep {
3837
return $((! $EXIT_STATUS))
3938
}
4039

40+
function check_namespace {
41+
local -r CLASS="${1}"
42+
grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}("
43+
test $? -gt 0
44+
}
45+
4146
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
4247
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
4348
INVGREP_PREPEND="##[error]"
@@ -48,31 +53,6 @@ fi
4853
### LINTING ###
4954
if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5055

51-
echo "black --version"
52-
black --version
53-
54-
MSG='Checking black formatting' ; echo $MSG
55-
black . --check
56-
RET=$(($RET + $?)) ; echo $MSG "DONE"
57-
58-
# `setup.cfg` contains the list of error codes that are being ignored in flake8
59-
60-
echo "flake8 --version"
61-
flake8 --version
62-
63-
# pandas/_libs/src is C code, so no need to search there.
64-
MSG='Linting .py code' ; echo $MSG
65-
flake8 --format="$FLAKE8_FORMAT" .
66-
RET=$(($RET + $?)) ; echo $MSG "DONE"
67-
68-
MSG='Linting .pyx and .pxd code' ; echo $MSG
69-
flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg
70-
RET=$(($RET + $?)) ; echo $MSG "DONE"
71-
72-
MSG='Linting .pxi.in' ; echo $MSG
73-
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg
74-
RET=$(($RET + $?)) ; echo $MSG "DONE"
75-
7656
# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
7757
# it doesn't make a difference, but we want to be internally consistent.
7858
# Note: this grep pattern is (intended to be) equivalent to the python
@@ -125,49 +105,11 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
125105
fi
126106
RET=$(($RET + $?)) ; echo $MSG "DONE"
127107

128-
echo "isort --version-number"
129-
isort --version-number
130-
131-
# Imports - Check formatting using isort see setup.cfg for settings
132-
MSG='Check import format using isort' ; echo $MSG
133-
ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web"
134-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
135-
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
136-
else
137-
eval $ISORT_CMD
138-
fi
139-
RET=$(($RET + $?)) ; echo $MSG "DONE"
140-
141108
fi
142109

143110
### PATTERNS ###
144111
if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
145112

146-
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
147-
# Check for imports from collections.abc instead of `from collections import abc`
148-
MSG='Check for non-standard imports' ; echo $MSG
149-
invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas
150-
RET=$(($RET + $?)) ; echo $MSG "DONE"
151-
invgrep -R --include="*.py*" -E "from pandas.core import common" pandas
152-
RET=$(($RET + $?)) ; echo $MSG "DONE"
153-
invgrep -R --include="*.py*" -E "from collections.abc import" pandas
154-
RET=$(($RET + $?)) ; echo $MSG "DONE"
155-
invgrep -R --include="*.py*" -E "from numpy import nan" pandas
156-
RET=$(($RET + $?)) ; echo $MSG "DONE"
157-
158-
# Checks for test suite
159-
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
160-
invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
161-
RET=$(($RET + $?)) ; echo $MSG "DONE"
162-
invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
163-
RET=$(($RET + $?)) ; echo $MSG "DONE"
164-
165-
# No direct imports from conftest
166-
invgrep -R --include="*.py*" -E "conftest import" pandas/tests
167-
RET=$(($RET + $?)) ; echo $MSG "DONE"
168-
invgrep -R --include="*.py*" -E "import conftest" pandas/tests
169-
RET=$(($RET + $?)) ; echo $MSG "DONE"
170-
171113
MSG='Check for use of exec' ; echo $MSG
172114
invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
173115
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -234,6 +176,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
234176
MSG='Check code for instances of os.remove' ; echo $MSG
235177
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
236178
RET=$(($RET + $?)) ; echo $MSG "DONE"
179+
180+
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
181+
for class in "Series" "DataFrame" "Index"; do
182+
check_namespace ${class}
183+
RET=$(($RET + $?))
184+
done
185+
echo $MSG "DONE"
237186
fi
238187

239188
### CODE ###
@@ -354,15 +303,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
354303

355304
fi
356305

357-
### DEPENDENCIES ###
358-
if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then
359-
360-
MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
361-
$BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare --azure
362-
RET=$(($RET + $?)) ; echo $MSG "DONE"
363-
364-
fi
365-
366306
### TYPING ###
367307
if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
368308

@@ -374,5 +314,4 @@ if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
374314
RET=$(($RET + $?)) ; echo $MSG "DONE"
375315
fi
376316

377-
378317
exit $RET

doc/source/getting_started/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ pandas has great support for time series and has an extensive set of tools for w
533533
<div id="collapseTen" class="collapse" data-parent="#accordion">
534534
<div class="card-body">
535535

536-
Data sets do not only contain numerical data. pandas provides a wide range of functions to cleaning textual data and extract useful information from it.
536+
Data sets do not only contain numerical data. pandas provides a wide range of functions to clean textual data and extract useful information from it.
537537

538538
.. raw:: html
539539

doc/source/getting_started/intro_tutorials/10_text_data.rst

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ How to manipulate textual data?
6666
<ul class="task-bullet">
6767
<li>
6868

69-
Make all name characters lowercase
69+
Make all name characters lowercase.
7070

7171
.. ipython:: python
7272
7373
titanic["Name"].str.lower()
7474
7575
To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
76-
(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
77-
apply the ``lower`` method. As such, each of the strings is converted element wise.
76+
(see the :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
77+
apply the ``lower`` method. As such, each of the strings is converted element-wise.
7878

7979
.. raw:: html
8080

@@ -86,15 +86,15 @@ having a ``dt`` accessor, a number of
8686
specialized string methods are available when using the ``str``
8787
accessor. These methods have in general matching names with the
8888
equivalent built-in string methods for single elements, but are applied
89-
element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
89+
element-wise (remember :ref:`element-wise calculations <10min_tut_05_columns>`?)
9090
on each of the values of the columns.
9191

9292
.. raw:: html
9393

9494
<ul class="task-bullet">
9595
<li>
9696

97-
Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma.
97+
Create a new column ``Surname`` that contains the surname of the passengers by extracting the part before the comma.
9898

9999
.. ipython:: python
100100
@@ -135,7 +135,7 @@ More information on extracting parts of strings is available in the user guide s
135135
<ul class="task-bullet">
136136
<li>
137137

138-
Extract the passenger data about the Countesses on board of the Titanic.
138+
Extract the passenger data about the countesses on board of the Titanic.
139139

140140
.. ipython:: python
141141
@@ -145,15 +145,15 @@ Extract the passenger data about the Countesses on board of the Titanic.
145145
146146
titanic[titanic["Name"].str.contains("Countess")]
147147
148-
(*Interested in her story? See *\ `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
148+
(*Interested in her story? See* `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
149149

150150
The string method :meth:`Series.str.contains` checks for each of the values in the
151151
column ``Name`` if the string contains the word ``Countess`` and returns
152-
for each of the values ``True`` (``Countess`` is part of the name) of
152+
for each of the values ``True`` (``Countess`` is part of the name) or
153153
``False`` (``Countess`` is not part of the name). This output can be used
154154
to subselect the data using conditional (boolean) indexing introduced in
155155
the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was
156-
only one Countess on the Titanic, we get one row as a result.
156+
only one countess on the Titanic, we get one row as a result.
157157

158158
.. raw:: html
159159

@@ -220,7 +220,7 @@ we can do a selection using the ``loc`` operator, introduced in the
220220
<ul class="task-bullet">
221221
<li>
222222

223-
In the "Sex" column, replace values of "male" by "M" and values of "female" by "F"
223+
In the "Sex" column, replace values of "male" by "M" and values of "female" by "F".
224224

225225
.. ipython:: python
226226
@@ -256,7 +256,7 @@ a ``dictionary`` to define the mapping ``{from : to}``.
256256
<h4>REMEMBER</h4>
257257

258258
- String methods are available using the ``str`` accessor.
259-
- String methods work element wise and can be used for conditional
259+
- String methods work element-wise and can be used for conditional
260260
indexing.
261261
- The ``replace`` method is a convenient method to convert values
262262
according to a given dictionary.

doc/source/index.rst.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pandas documentation
1717
`Source Repository <https://github.com/pandas-dev/pandas>`__ |
1818
`Issues & Ideas <https://github.com/pandas-dev/pandas/issues>`__ |
1919
`Q&A Support <https://stackoverflow.com/questions/tagged/pandas>`__ |
20-
`Mailing List <https://groups.google.com/forum/#!forum/pydata>`__
20+
`Mailing List <https://groups.google.com/g/pydata>`__
2121

2222
:mod:`pandas` is an open source, BSD-licensed library providing high-performance,
2323
easy-to-use data structures and data analysis tools for the `Python <https://www.python.org/>`__

doc/source/user_guide/computation.rst

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -652,9 +652,9 @@ parameter:
652652
:header: "``closed``", "Description", "Default for"
653653
:widths: 20, 30, 30
654654

655-
``right``, close right endpoint, time-based windows
655+
``right``, close right endpoint,
656656
``left``, close left endpoint,
657-
``both``, close both endpoints, fixed windows
657+
``both``, close both endpoints,
658658
``neither``, open endpoints,
659659

660660
For example, having the right endpoint open is useful in many problems that require that there is no contamination
@@ -681,9 +681,6 @@ from present information back to past information. This allows the rolling windo
681681
682682
df
683683
684-
Currently, this feature is only implemented for time-based windows.
685-
For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed.
686-
687684
.. _stats.iter_rolling_window:
688685

689686
Iteration over window:

doc/source/whatsnew/v1.1.4.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ Fixed regressions
2121
- Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`)
2222
- Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`)
2323
- Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`)
24+
- Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`)
25+
- Fixed regression in certain offsets (:meth:`pd.offsets.Day() <pandas.tseries.offsets.Day>` and below) no longer being hashable (:issue:`37267`)
26+
- Fixed regression in :class:`StataReader` which required ``chunksize`` to be manually set when using an iterator to read a dataset (:issue:`37280`)
2427

2528
.. ---------------------------------------------------------------------------
2629

doc/source/whatsnew/v1.2.0.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ Other enhancements
221221
- :meth:`Rolling.var()` and :meth:`Rolling.std()` use Kahan summation and Welfords Method to avoid numerical issues (:issue:`37051`)
222222
- :meth:`DataFrame.plot` now recognizes ``xlabel`` and ``ylabel`` arguments for plots of type ``scatter`` and ``hexbin`` (:issue:`37001`)
223223
- :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`)
224+
- :meth:`DataFrame.to_parquet` now returns a ``bytes`` object when no ``path`` argument is passed (:issue:`37105`)
225+
- :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`)
224226

225227
.. _whatsnew_120.api_breaking.python:
226228

@@ -410,7 +412,7 @@ Conversion
410412
Strings
411413
^^^^^^^
412414
- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
413-
-
415+
- Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype :class:`Series` containing only numeric strings and ``NA`` (:issue:`37262`)
414416
-
415417

416418

@@ -431,6 +433,7 @@ Indexing
431433
- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)
432434
- Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`)
433435
- Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`)
436+
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`)
434437

435438
Missing
436439
^^^^^^^
@@ -503,6 +506,7 @@ Reshaping
503506
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
504507
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
505508
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
509+
- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`)
506510
-
507511

508512
Sparse
@@ -517,15 +521,15 @@ ExtensionArray
517521
- Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`)
518522
- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`)
519523
- Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`)
520-
524+
- Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`)
521525

522526
Other
523527
^^^^^
524528

525529
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
526530
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
527531
- Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`)
528-
- Fixed metadata propagation in the :class:`Series.dt` and :class:`Series.str` accessors (:issue:`28283`)
532+
- Fixed metadata propagation in the :class:`Series.dt` and :class:`Series.str` accessors and :class:`DataFrame.duplicated` and ::class:`DataFrame.stack` methods (:issue:`28283`)
529533
- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)
530534
- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`)
531535

0 commit comments

Comments
 (0)