diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 6006d09bc3e78..5414efa0f8e32 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -121,7 +121,11 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # Imports - Check formatting using isort see setup.cfg for settings MSG='Check import format using isort' ; echo $MSG +<<<<<<< HEAD + ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts" +======= ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web" +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 if [[ "$GITHUB_ACTIONS" == "true" ]]; then eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]})) else diff --git a/ci/code_checks.sh.orig b/ci/code_checks.sh.orig new file mode 100755 index 0000000000000..5414efa0f8e32 --- /dev/null +++ b/ci/code_checks.sh.orig @@ -0,0 +1,399 @@ +#!/bin/bash +# +# Run checks related to code quality. +# +# This script is intended for both the CI and to check locally that code standards are +# respected. We are currently linting (PEP-8 and similar), looking for patterns of +# common mistakes (sphinx directives with missing blank lines, old style classes, +# unwanted imports...), we run doctests here (currently some files only), and we +# validate formatting error in docstrings. +# +# Usage: +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh lint # run linting only +# $ ./ci/code_checks.sh patterns # check for patterns that should not exist +# $ ./ci/code_checks.sh code # checks on imported code +# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh docstrings # validate docstring errors +# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent +# $ ./ci/code_checks.sh typing # run static type analysis + +[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "dependencies" || "$1" == "typing" ]] || \ + { echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies|typing]"; exit 9999; } + +BASE_DIR="$(dirname $0)/.." +RET=0 +CHECK=$1 + +function invgrep { + # grep with inverse exist status and formatting for azure-pipelines + # + # This function works exactly as grep, but with opposite exit status: + # - 0 (success) when no patterns are found + # - 1 (fail) when the patterns are found + # + # This is useful for the CI, as we want to fail if one of the patterns + # that we want to avoid is found by grep. + grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]} + return $((! $EXIT_STATUS)) +} + +if [[ "$GITHUB_ACTIONS" == "true" ]]; then + FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s" + INVGREP_PREPEND="##[error]" +else + FLAKE8_FORMAT="default" +fi + +### LINTING ### +if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then + + echo "black --version" + black --version + + MSG='Checking black formatting' ; echo $MSG + black . --check + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # `setup.cfg` contains the list of error codes that are being ignored in flake8 + + echo "flake8 --version" + flake8 --version + + # pandas/_libs/src is C code, so no need to search there. + MSG='Linting .py code' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" . + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pyx and .pxd code' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pxi.in' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg + RET=$(($RET + $?)) ; echo $MSG "DONE" + + echo "flake8-rst --version" + flake8-rst --version + + MSG='Linting code-blocks in .rst documentation' ; echo $MSG + flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check that cython casting is of the form `obj` as opposed to ` obj`; + # it doesn't make a difference, but we want to be internally consistent. + # Note: this grep pattern is (intended to be) equivalent to the python + # regex r'(?])> ' + MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG + invgrep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # readability/casting: Warnings about C casting instead of C++ casting + # runtime/int: Warnings about using C number types instead of C++ ones + # build/include_subdir: Warnings about prefacing included header files with directory + + # We don't lint all C files because we don't want to lint any that are built + # from Cython files nor do we want to lint C files that we didn't modify for + # this particular codebase (e.g. src/headers, src/klib). However, + # we can lint all header files since they aren't "generated" like C files are. + MSG='Linting .c and .h' ; echo $MSG + cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of not concatenated strings' ; echo $MSG + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" . + else + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" . + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for strings with wrong placed spaces' ; echo $MSG + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" . + else + $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" . + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + + echo "isort --version-number" + isort --version-number + + # Imports - Check formatting using isort see setup.cfg for settings + MSG='Check import format using isort' ; echo $MSG +<<<<<<< HEAD + ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts" +======= + ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web" +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]})) + else + eval $ISORT_CMD + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### PATTERNS ### +if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then + + # Check for imports from pandas.core.common instead of `import pandas.core.common as com` + # Check for imports from collections.abc instead of `from collections import abc` + MSG='Check for non-standard imports' ; echo $MSG + invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from pandas.core import common" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from collections.abc import" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from numpy import nan" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Checks for test suite + # Check for imports from pandas._testing instead of `import pandas._testing as tm` + invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # No direct imports from conftest + invgrep -R --include="*.py*" -E "conftest import" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "import conftest" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of exec' ; echo $MSG + invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for pytest warns' ; echo $MSG + invgrep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for pytest raises without context' ; echo $MSG + invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for python2-style file encodings' ; echo $MSG + invgrep -R --include="*.py" --include="*.pyx" -E "# -\*- coding: utf-8 -\*-" pandas scripts + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for python2-style super usage' ; echo $MSG + invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of builtin filter function' ; echo $MSG + invgrep -R --include="*.py" -P '(?=0.29.16 +<<<<<<< HEAD:ci/deps/azure-36-32bit.yaml +======= - numpy>=1.16.5 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683:ci/deps/azure-37-32bit.yaml - pytest>=5.0.1 diff --git a/ci/deps/azure-37-32bit_BACKUP_47740.yaml b/ci/deps/azure-37-32bit_BACKUP_47740.yaml new file mode 100644 index 0000000000000..e47bda48e78aa --- /dev/null +++ b/ci/deps/azure-37-32bit_BACKUP_47740.yaml @@ -0,0 +1,29 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.7.* + + # tools + ### Cython 0.29.16 and pytest 5.0.1 for 32 bits are not available with conda, installing below with pip instead + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - attrs=19.1.0 + - gcc_linux-32 + - gxx_linux-32 + - python-dateutil + - pytz=2017.3 + + # see comment above + - pip + - pip: + - cython>=0.29.16 +<<<<<<< HEAD:ci/deps/azure-36-32bit.yaml +======= + - numpy>=1.16.5 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683:ci/deps/azure-37-32bit.yaml + - pytest>=5.0.1 diff --git a/ci/deps/azure-37-32bit_BASE_47740.yaml b/ci/deps/azure-37-32bit_BASE_47740.yaml new file mode 100644 index 0000000000000..2dc53f8181ac4 --- /dev/null +++ b/ci/deps/azure-37-32bit_BASE_47740.yaml @@ -0,0 +1,26 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + ### Cython 0.29.13 and pytest 5.0.1 for 32 bits are not available with conda, installing below with pip instead + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - attrs=19.1.0 + - gcc_linux-32 + - gxx_linux-32 + - numpy=1.14.* + - python-dateutil + - pytz=2017.2 + + # see comment above + - pip + - pip: + - cython>=0.29.16 + - pytest>=5.0.1,<6.0.0rc0 diff --git a/ci/deps/azure-37-32bit_LOCAL_47740.yaml b/ci/deps/azure-37-32bit_LOCAL_47740.yaml new file mode 100644 index 0000000000000..15704cf0d5427 --- /dev/null +++ b/ci/deps/azure-37-32bit_LOCAL_47740.yaml @@ -0,0 +1,26 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + ### Cython 0.29.13 and pytest 5.0.1 for 32 bits are not available with conda, installing below with pip instead + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - attrs=19.1.0 + - gcc_linux-32 + - gxx_linux-32 + - numpy=1.14.* + - python-dateutil + - pytz=2017.2 + + # see comment above + - pip + - pip: + - cython>=0.29.16 + - pytest>=5.0.1 diff --git a/ci/deps/azure-37-32bit_REMOTE_47740.yaml b/ci/deps/azure-37-32bit_REMOTE_47740.yaml new file mode 100644 index 0000000000000..8e0cd73a9536d --- /dev/null +++ b/ci/deps/azure-37-32bit_REMOTE_47740.yaml @@ -0,0 +1,26 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.7.* + + # tools + ### Cython 0.29.16 and pytest 5.0.1 for 32 bits are not available with conda, installing below with pip instead + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - attrs=19.1.0 + - gcc_linux-32 + - gxx_linux-32 + - python-dateutil + - pytz=2017.3 + + # see comment above + - pip + - pip: + - cython>=0.29.16 + - numpy>=1.16.5 + - pytest>=5.0.1 diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index 1d15ca41c0f8e..2ce428e28ac75 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -8,7 +8,11 @@ dependencies: # tools - cython>=0.29.16 - pytest>=5.0.1 +<<<<<<< HEAD + - pytest-xdist>=1.21,<2.0.0 # GH 35737 +======= - pytest-xdist>=1.21 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 - hypothesis>=3.58.0 - pytest-azurepipelines @@ -30,7 +34,11 @@ dependencies: - pytables - python-dateutil - pytz +<<<<<<< HEAD + - s3fs>=0.4.0,<0.5.0 +======= - s3fs>=0.4.2 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 - scipy - sqlalchemy - xlrd diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml index 23bede5eb26f1..cdfecda007977 100644 --- a/ci/deps/azure-windows-38.yaml +++ b/ci/deps/azure-windows-38.yaml @@ -8,7 +8,11 @@ dependencies: # tools - cython>=0.29.16 - pytest>=5.0.1 +<<<<<<< HEAD:ci/deps/azure-windows-36.yaml + - pytest-xdist>=1.21,<2.0.0 # GH 35737 +======= - pytest-xdist>=1.21 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683:ci/deps/azure-windows-38.yaml - hypothesis>=3.58.0 - pytest-azurepipelines diff --git a/ci/deps/travis-37-locale.yaml b/ci/deps/travis-37-locale.yaml index 306f74a0101e3..1b51dcbbaaf28 100644 --- a/ci/deps/travis-37-locale.yaml +++ b/ci/deps/travis-37-locale.yaml @@ -26,9 +26,14 @@ dependencies: - numpy - openpyxl - pandas-gbq=0.12.0 +<<<<<<< HEAD:ci/deps/travis-36-locale.yaml + - psycopg2=2.6.2 + - pyarrow>=0.13.0 # GH #35813 +======= - pyarrow>=0.17 - psycopg2=2.7 - pyarrow>=0.15.0 # GH #35813 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683:ci/deps/travis-37-locale.yaml - pymysql=0.7.11 - pytables - python-dateutil diff --git a/ci/setup_env.sh b/ci/setup_env.sh index aa43d8b7dd00a..065f9e56ea171 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -148,7 +148,7 @@ python setup.py build_ext -q -i -j2 # - py35_compat # - py36_32bit echo "[Updating pip]" -python -m pip install --no-deps -U pip wheel setuptools +python -m pip install --no-deps -U pip wheel "setuptools<50.0.0" echo "[Install pandas]" python -m pip install --no-build-isolation -e . diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 1827d151579a1..9f7ea09464182 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -24,6 +24,10 @@ Version 1.1 .. toctree:: :maxdepth: 2 +<<<<<<< HEAD + v1.1.3 +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 v1.1.2 v1.1.1 v1.1.0 diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index d1a66256454ca..df09ec30b0f0a 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -1,7 +1,12 @@ .. _whatsnew_112: +<<<<<<< HEAD +What's new in 1.1.2 (September 8, 2020) +--------------------------------------- +======= What's new in 1.1.2 (??) ------------------------ +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 These are the changes in pandas 1.1.2. See :ref:`release` for a full changelog including other versions of pandas. @@ -16,12 +21,23 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) - Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`) +<<<<<<< HEAD +- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`) - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) +- Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`) +======= +- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 - Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`) - Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`) - Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`) - Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`) +<<<<<<< HEAD +- Fixed regression in :meth:`Series.groupby.rolling` number of levels of :class:`MultiIndex` in input was compressed to one (:issue:`36018`) +- Fixed regression in :class:`DataFrameGroupBy` on an empty :class:`DataFrame` (:issue:`36197`) +======= - +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 .. --------------------------------------------------------------------------- @@ -32,10 +48,22 @@ Bug fixes - Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`) - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) +<<<<<<< HEAD +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`) +- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`) +- Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) +- Bug in :class:`Series` constructor incorrectly raising a ``TypeError`` when passed an ordered set (:issue:`36044`) +- Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`) +- Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`) +- Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`) +- Bug in :meth:`import_optional_dependency` returning incorrect package names in cases where package name is different from import name (:issue:`35948`) +- Bug when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`31368`) +======= - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`) - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) - Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`) - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`) +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 .. --------------------------------------------------------------------------- @@ -44,6 +72,10 @@ Bug fixes Other ~~~~~ - :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`) +<<<<<<< HEAD +- :meth:`DataFrame.plot` and :meth:`Series.plot` raise ``UserWarning`` about usage of ``FixedFormatter`` and ``FixedLocator`` (:issue:`35684` and :issue:`35945`) +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 .. --------------------------------------------------------------------------- @@ -52,4 +84,8 @@ Other Contributors ~~~~~~~~~~~~ +<<<<<<< HEAD +.. contributors:: v1.1.1..v1.1.2 +======= .. contributors:: v1.1.1..v1.1.2|HEAD +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst new file mode 100644 index 0000000000000..e3161012da5d1 --- /dev/null +++ b/doc/source/whatsnew/v1.1.3.rst @@ -0,0 +1,42 @@ +.. _whatsnew_113: + +What's new in 1.1.3 (??) +------------------------ + +These are the changes in pandas 1.1.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.bug_fixes: + +Bug fixes +~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.other: + +Other +~~~~~ +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.2..v1.1.3|HEAD diff --git a/environment.yml b/environment.yml index 4622aac1dc6f8..5a4be68d2452e 100644 --- a/environment.yml +++ b/environment.yml @@ -21,7 +21,11 @@ dependencies: - flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions - flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files - isort>=5.2.1 # check that imports are in the right order +<<<<<<< HEAD + - mypy=0.730 +======= - mypy=0.782 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 - pycodestyle # used by flake8 # documentation @@ -51,7 +55,10 @@ dependencies: - botocore>=1.11 - hypothesis>=3.82 - moto # mock S3 +<<<<<<< HEAD +======= - flask +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 - pytest>=5.0.1 - pytest-cov - pytest-xdist>=1.21 diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index abf38265ddc6d..8cf2fa2b50130 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -16,10 +16,17 @@ from pandas._libs.tslibs.nattype cimport ( is_null_datetimelike, ) from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value +<<<<<<< HEAD + +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas.compat import is_platform_32bit + +======= from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op from pandas.compat import IS64 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 cdef: float64_t INF = np.inf float64_t NEGINF = -INF diff --git a/pandas/_testing.py b/pandas/_testing.py index 04d36749a3d8c..4c7689787b179 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -2897,7 +2897,11 @@ def convert_rows_list_to_csv_str(rows_list: List[str]): return expected +<<<<<<< HEAD +def external_error_raised(expected_exception: Type[Exception],) -> ContextManager: +======= def external_error_raised(expected_exception: Type[Exception]) -> ContextManager: +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 """ Helper function to mark pytest.raises that have an external error message. diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 689c7c889ef66..40688a3978cfc 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -33,6 +33,19 @@ "numba": "0.46.0", } +# A mapping from import name to package name (on PyPI) for packages where +# these two names are different. + +INSTALL_MAPPING = { + "bs4": "beautifulsoup4", + "bottleneck": "Bottleneck", + "lxml.etree": "lxml", + "odf": "odfpy", + "pandas_gbq": "pandas-gbq", + "sqlalchemy": "SQLAlchemy", + "jinja2": "Jinja2", +} + def _get_version(module: types.ModuleType) -> str: version = getattr(module, "__version__", None) @@ -82,9 +95,13 @@ def import_optional_dependency( is False, or when the package's version is too old and `on_version` is ``'warn'``. """ + + package_name = INSTALL_MAPPING.get(name) + install_name = package_name if package_name is not None else name + msg = ( - f"Missing optional dependency '{name}'. {extra} " - f"Use pip or conda to install {name}." + f"Missing optional dependency '{install_name}'. {extra} " + f"Use pip or conda to install {install_name}." ) try: module = importlib.import_module(name) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 9d6c2789af25b..4b1f2d4dfa9c9 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -50,7 +50,13 @@ import pandas.core.common as com if TYPE_CHECKING: +<<<<<<< HEAD + from pandas.core.arrays import ExtensionArray # noqa: F401 + from pandas.core.indexes.api import Index # noqa: F401 + from pandas.core.series import Series # noqa: F401 +======= from pandas import ExtensionArray, Index, Series +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def array( @@ -438,7 +444,12 @@ def sanitize_array( subarr = subarr.copy() return subarr - elif isinstance(data, (list, tuple)) and len(data) > 0: + elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: + if isinstance(data, set): + # Raise only for unordered sets, e.g., not for dict_keys + raise TypeError("Set type is unordered") + data = list(data) + if dtype is not None: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: @@ -450,8 +461,6 @@ def sanitize_array( # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) - elif isinstance(data, abc.Set): - raise TypeError("Set type is unordered") elif lib.is_scalar(data) and index is not None and dtype is not None: data = maybe_cast_to_datetime(data, dtype) if not lib.is_scalar(data): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9902016475b22..dd005752a4832 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -148,15 +148,17 @@ def is_nonempty(x) -> bool: any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat) if any_ea: + # we ignore axis here, as internally concatting with EAs is always + # for axis=0 if not single_dtype: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat] - if isinstance(to_concat[0], ExtensionArray) and axis == 0: + if isinstance(to_concat[0], ExtensionArray): cls = type(to_concat[0]) return cls._concat_same_type(to_concat) else: - return np.concatenate(to_concat, axis=axis) + return np.concatenate(to_concat) elif _contains_datetime or "timedelta" in typs: return concat_datetime(to_concat, axis=axis, typs=typs) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e321fdd9b3a9b..a5df90228703d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -395,7 +395,11 @@ def _hash_categories(categories, ordered: Ordered = True) -> int: from pandas.core.dtypes.common import DT64NS_DTYPE, is_datetime64tz_dtype from pandas.core.util.hashing import ( +<<<<<<< HEAD + _combine_hash_arrays, +======= combine_hash_arrays, +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 hash_array, hash_tuples, ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c48bec9b670ad..95aed996dddd9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3206,9 +3206,11 @@ def _ensure_valid_index(self, value): "and a value that cannot be converted to a Series" ) from err - self._mgr = self._mgr.reindex_axis( - value.index.copy(), axis=1, fill_value=np.nan - ) + # GH31368 preserve name of index + index_copy = value.index.copy() + index_copy.name = self.index.name + + self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan) def _box_col_values(self, values, loc: int) -> Series: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f66b009e6d505..080ece8547479 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2725,6 +2725,8 @@ def get_loc(self, key, method=None): "currently supported for MultiIndex" ) + hash(key) + def _maybe_to_slice(loc): """convert integer indexer to boolean mask or slice if possible""" if not isinstance(loc, np.ndarray) or loc.dtype != "int64": @@ -2739,8 +2741,7 @@ def _maybe_to_slice(loc): mask[loc] = True return mask - if not isinstance(key, (tuple, list)): - # not including list here breaks some indexing, xref #30892 + if not isinstance(key, tuple): loc = self._get_level_indexer(key, level=0) return _maybe_to_slice(loc) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3bcd4debbf41a..a7aab5a2e8950 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -581,8 +581,13 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): # force the copy here if self.is_extension: - # TODO: Should we try/except this astype? - values = self.values.astype(dtype) + try: + values = self.values.astype(dtype) + except (ValueError, TypeError): + if errors == "ignore": + values = self.values + else: + raise else: if issubclass(dtype.type, str): diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index b45f0890cafa4..513c5fed1ca62 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import DatetimeArray, ExtensionArray from pandas.core.internals.blocks import make_block from pandas.core.internals.managers import BlockManager @@ -335,9 +335,13 @@ def _concatenate_join_units(join_units, concat_axis, copy): # the non-EA values are 2D arrays with shape (1, n) to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] concat_values = concat_compat(to_concat, axis=0) - if not isinstance(concat_values, ExtensionArray): + if not isinstance(concat_values, ExtensionArray) or ( + isinstance(concat_values, DatetimeArray) and concat_values.tz is None + ): # if the result of concat is not an EA but an ndarray, reshape to # 2D to put it a non-EA Block + # special case DatetimeArray, which *is* an EA, but is put in a + # consolidated 2D block concat_values = np.atleast_2d(concat_values) else: concat_values = concat_compat(to_concat, axis=concat_axis) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 57a4a8c2ace8a..fe39e93ccbc20 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1,11 +1,21 @@ from collections import defaultdict import itertools +<<<<<<< HEAD +import operator +import re +from typing import ( +======= from typing import ( Any, +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 DefaultDict, Dict, List, Optional, +<<<<<<< HEAD + Pattern, +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 Sequence, Tuple, TypeVar, @@ -691,7 +701,6 @@ def get_numeric_data(self, copy: bool = False) -> "BlockManager": copy : bool, default False Whether to copy the blocks """ - self._consolidate_inplace() return self._combine([b for b in self.blocks if b.is_numeric], copy) def _combine(self: T, blocks: List[Block], copy: bool = True) -> T: @@ -1872,6 +1881,83 @@ def _merge_blocks( return blocks +<<<<<<< HEAD +def _compare_or_regex_search( + a: ArrayLike, + b: Union[Scalar, Pattern], + regex: bool = False, + mask: Optional[ArrayLike] = None, +) -> Union[ArrayLike, bool]: + """ + Compare two array_like inputs of the same shape or two scalar values + + Calls operator.eq or re.search, depending on regex argument. If regex is + True, perform an element-wise regex matching. + + Parameters + ---------- + a : array_like + b : scalar or regex pattern + regex : bool, default False + mask : array_like or None (default) + + Returns + ------- + mask : array_like of bool + """ + + def _check_comparison_types( + result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern], + ): + """ + Raises an error if the two arrays (a,b) cannot be compared. + Otherwise, returns the comparison result as expected. + """ + if is_scalar(result) and isinstance(a, np.ndarray): + type_names = [type(a).__name__, type(b).__name__] + + if isinstance(a, np.ndarray): + type_names[0] = f"ndarray(dtype={a.dtype})" + + raise TypeError( + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" + ) + + if not regex: + op = lambda x: operator.eq(x, b) + else: + op = np.vectorize( + lambda x: bool(re.search(b, x)) + if isinstance(x, str) and isinstance(b, (str, Pattern)) + else False + ) + + # GH#32621 use mask to avoid comparing to NAs + if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): + mask = np.reshape(~(isna(a)), a.shape) + if isinstance(a, np.ndarray): + a = a[mask] + + if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): + # GH#29553 avoid deprecation warnings from numpy + _check_comparison_types(False, a, b) + return False + + result = op(a) + + if isinstance(result, np.ndarray) and mask is not None: + # The shape of the mask can differ to that of the result + # since we may compare only a subset of a's or b's elements + tmp = np.zeros(mask.shape, dtype=np.bool_) + tmp[mask] = result + result = tmp + + _check_comparison_types(result, a, b) + return result + + +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def _fast_count_smallints(arr: np.ndarray) -> np.ndarray: """Faster version of set(arr) for sequences of small numbers.""" counts = np.bincount(arr.astype(np.int_)) diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index a1bf3f8ee4119..1408a29fbf498 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -39,7 +39,16 @@ def cartesian_product(X): lenX = np.fromiter((len(x) for x in X), dtype=np.intp) cumprodX = np.cumproduct(lenX) + msg = "Product space too large to allocate arrays!" + if np.any(cumprodX < 0): + raise ValueError(msg) + a = np.roll(cumprodX, 1) + + msg = "Product space too large to allocate arrays!" + if np.any(cumprodX < 0): + raise ValueError(msg) + a[0] = 1 if cumprodX[-1] != 0: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 558c0eeb0ea65..786fac06e8097 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2211,17 +2211,21 @@ def _apply( # Compose MultiIndex result from grouping levels then rolling level # Aggregate the MultiIndex data as tuples then the level names grouped_object_index = self.obj.index +<<<<<<< HEAD + grouped_index_name = [*grouped_object_index.names] +======= grouped_index_name = [grouped_object_index.name] +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings] result_index_names = groupby_keys + grouped_index_name result_index_data = [] for key, values in self._groupby.grouper.indices.items(): for value in values: - if not is_list_like(key): - data = [key, grouped_object_index[value]] - else: - data = [*key, grouped_object_index[value]] + data = [ + *com.maybe_make_list(key), + *com.maybe_make_list(grouped_object_index[value]), + ] result_index_data.append(tuple(data)) result_index = MultiIndex.from_tuples( @@ -2240,10 +2244,12 @@ def _create_blocks(self, obj: FrameOrSeriesUnion): """ # Ensure the object we're rolling over is monotonically sorted relative # to the groups - groupby_order = np.concatenate( - list(self._groupby.grouper.indices.values()) - ).astype(np.int64) - obj = obj.take(groupby_order) + # GH 36197 + if not obj.empty: + groupby_order = np.concatenate( + list(self._groupby.grouper.indices.values()) + ).astype(np.int64) + obj = obj.take(groupby_order) return super()._create_blocks(obj) def _get_cython_func_type(self, func: str) -> Callable: @@ -2275,7 +2281,10 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) indexer_kwargs = self.window.__dict__ +<<<<<<< HEAD +======= assert isinstance(indexer_kwargs, dict) # for mypy +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 # We'll be using the index of each group later indexer_kwargs.pop("index_array", None) elif self.is_freq_type: diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f0b35e1cd2a74..aae177661efbd 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1144,7 +1144,17 @@ def _plot( return lines @classmethod +<<<<<<< HEAD + def _ts_plot(cls, ax, x, data, style=None, **kwds): + from pandas.plotting._matplotlib.timeseries import ( + _decorate_axes, + _maybe_resample, + format_dateaxis, + ) + +======= def _ts_plot(cls, ax: "Axes", x, data, style=None, **kwds): +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 # accept x to be consistent with normal plot func, # x is not passed to tsplot as it uses data.index as x coordinate # column_num must be in kwds for stacking purpose diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index f8faac6a6a026..180225304069a 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -24,7 +24,11 @@ from pandas.tseries.frequencies import get_period_alias, is_subperiod, is_superperiod if TYPE_CHECKING: +<<<<<<< HEAD + from pandas import Index, Series # noqa:F401 +======= from matplotlib.axes import Axes +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 from pandas import Index, Series # noqa:F401 diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py index 9fad9856d53cc..adb4a784299e7 100644 --- a/pandas/tests/base/test_factorize.py +++ b/pandas/tests/base/test_factorize.py @@ -34,7 +34,11 @@ def test_series_factorize_na_sentinel_none(): ser = pd.Series(values) codes, uniques = ser.factorize(na_sentinel=None) +<<<<<<< HEAD + expected_codes = np.array([0, 1, 0, 2], dtype=np.intp) +======= expected_codes = np.array([0, 1, 0, 2], dtype="int64") +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 expected_uniques = pd.Index([1.0, 2.0, np.nan]) tm.assert_numpy_array_equal(codes, expected_codes) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index d27487dfb8aaa..e4549dfb3e68d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -2111,7 +2111,7 @@ def test_type_error_multiindex(self): ) dg = df.pivot_table(index="i", columns="c", values=["x", "y"]) - with pytest.raises(TypeError, match="is an invalid key"): + with pytest.raises(TypeError, match="unhashable type"): dg[:, 0] index = Index(range(2), name="i") diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index b0fd0496ea81e..d3f256259b15f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -8,6 +8,7 @@ CategoricalDtype, DataFrame, DatetimeTZDtype, + Interval, IntervalDtype, NaT, Series, @@ -565,3 +566,24 @@ def test_astype_empty_dtype_dict(self): result = df.astype(dict()) tm.assert_frame_equal(result, df) assert result is not df + + @pytest.mark.parametrize( + "df", + [ + DataFrame(Series(["x", "y", "z"], dtype="string")), + DataFrame(Series(["x", "y", "z"], dtype="category")), + DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])), + DataFrame(Series(3 * [Interval(0, 1)])), + ], + ) + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): + # https://github.com/pandas-dev/pandas/issues/35471 + if errors == "ignore": + expected = df + result = df.astype(float, errors=errors) + tm.assert_frame_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + df.astype(float, errors=errors) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index d3548b639572d..f307acd8c2178 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -191,6 +191,23 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method): expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) + def test_corr_item_cache(self): + # Check that corr does not lead to incorrect entries in item_cache + + df = pd.DataFrame({"A": range(10)}) + df["B"] = range(10)[::-1] + + ser = df["A"] # populate item_cache + assert len(df._mgr.blocks) == 2 + + _ = df.corr() + + # Check that the corr didnt break link between ser and df + ser.values[0] = 99 + assert df.loc[0, "A"] == 99 + assert df["A"] is ser + assert df.values[0, 0] == 99 + class TestDataFrameCorrWith: def test_corrwith(self, datetime_frame): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 4a85da72bc8b1..8822b7d7b0814 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -628,6 +628,8 @@ def test_add_column_with_pandas_array(self): tm.assert_frame_equal(df, df2) +<<<<<<< HEAD +======= def test_to_dict_of_blocks_item_cache(): # Calling to_dict_of_blocks should not poison item_cache df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) @@ -646,6 +648,7 @@ def test_to_dict_of_blocks_item_cache(): assert df["b"] is ser +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_update_inplace_sets_valid_block_values(): # https://github.com/pandas-dev/pandas/issues/33457 df = pd.DataFrame({"a": pd.Series([1, 2, None], dtype="category")}) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8fe450fe6abfc..f3cf89886323f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1063,6 +1063,8 @@ def test_groupby_get_by_index(): pd.testing.assert_frame_equal(res, expected) +<<<<<<< HEAD +======= @pytest.mark.parametrize( "grp_col_dict, exp_data", [ @@ -1142,6 +1144,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): tm.assert_frame_equal(result_df, expected_df) +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_nonagg_agg(): # GH 35490 - Single/Multiple agg of non-agg function give same results # TODO: agg should raise for functions that don't aggregate diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 172cd4a106ac1..907b7c84ba9e5 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -174,6 +174,9 @@ def test_cache(self): idx.format() assert idx._cache == {} + idx.format() + assert idx._cache == {} + df = pd.DataFrame({"a": range(10)}, index=idx) str(df) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 5e5fcd3db88d8..4565d79c632de 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas._libs.index as _index from pandas.errors import PerformanceWarning @@ -83,3 +84,10 @@ def test_nested_tuples_duplicates(self): df3 = df.copy(deep=True) df3.loc[[(dti[0], "a")], "c2"] = 1.0 tm.assert_frame_equal(df3, expected) + + def test_multiindex_get_loc_list_raises(self): + # https://github.com/pandas-dev/pandas/issues/35878 + idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = "unhashable type" + with pytest.raises(TypeError, match=msg): + idx.get_loc([]) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 350f86b4e9fd0..7afbbc2b9ab2b 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -660,3 +660,15 @@ def test_indexing_timeseries_regression(self): expected = Series(rng, index=rng) tm.assert_series_equal(result, expected) + + def test_index_name_empty(self): + # GH 31368 + df = pd.DataFrame({}, index=pd.RangeIndex(0, name="df_index")) + series = pd.Series(1.23, index=pd.RangeIndex(4, name="series_index")) + + df["series"] = series + expected = pd.DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index") + ) + + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 18b5743a3375a..d3a48eb8bbcc9 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -30,8 +30,13 @@ def open(*args, **kwargs): @td.skip_if_no("gcsfs") +<<<<<<< HEAD +def test_read_csv_gcs(monkeypatch): + from fsspec import AbstractFileSystem, registry +======= def test_read_csv_gcs(gcs_buffer): from fsspec import registry +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 registry.target.clear() # noqa # remove state @@ -52,8 +57,13 @@ def test_read_csv_gcs(gcs_buffer): @td.skip_if_no("gcsfs") +<<<<<<< HEAD +def test_to_csv_gcs(monkeypatch): + from fsspec import AbstractFileSystem, registry +======= def test_to_csv_gcs(gcs_buffer): from fsspec import registry +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 registry.target.clear() # noqa # remove state diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index b753c96af6290..85ef2a8b30cde 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -13,6 +13,13 @@ from pandas import DataFrame, Series import pandas._testing as tm +<<<<<<< HEAD +""" +This is a common base class used for various plotting tests +""" + +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 @td.skip_if_no_mpl class TestPlotBase: diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 130acaa8bcd58..369fbd4a94c03 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -401,7 +401,11 @@ def test_get_standard_colors_no_appending(self): # correctly. from matplotlib import cm +<<<<<<< HEAD + from pandas.plotting._matplotlib.style import _get_standard_colors +======= from pandas.plotting._matplotlib.style import get_standard_colors +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 color_before = cm.gnuplot(range(5)) color_after = get_standard_colors(1, color=color_before) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index c296e2a6278c5..bf02d86e6d357 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -828,7 +828,11 @@ def test_standard_colors(self): def test_standard_colors_all(self): import matplotlib.colors as colors +<<<<<<< HEAD + from pandas.plotting._matplotlib.style import _get_standard_colors +======= from pandas.plotting._matplotlib.style import get_standard_colors +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 # multiple colors like mediumaquamarine for c in colors.cnames: diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 38cf2cc2402a1..90705f827af25 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1110,6 +1110,23 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): result = df.append([s, s], ignore_index=True) tm.assert_frame_equal(result, expected) + def test_append_empty_tz_frame_with_datetime64ns(self): + # https://github.com/pandas-dev/pandas/issues/35460 + df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + + # pd.NaT gets inferred as tz-naive, so append result is tz-naive + result = df.append({"a": pd.NaT}, ignore_index=True) + expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # also test with typed value to append + df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + result = df.append( + pd.Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True + ) + expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + tm.assert_frame_equal(result, expected) + class TestConcatenate: def test_concat_copy(self): diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 9d074b5ade425..0acadc54cec0c 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -65,3 +65,13 @@ def test_invalid_input(self, X): with pytest.raises(TypeError, match=msg): cartesian_product(X=X) + + def test_exceed_product_space(self): + # GH31355: raise useful error when produce space is too large + msg = "Product space too large to allocate arrays!" + + with pytest.raises(ValueError, match=msg): + dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [ + (np.arange(15128, dtype=np.int16)), + ] + cartesian_product(X=dims) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 3463de25ad91b..593d1c78a19e2 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1,6 +1,7 @@ import numpy as np -from pandas import NaT, Series, date_range +from pandas import MultiIndex, NaT, Series, date_range +import pandas.testing as tm class TestSetitemDT64Values: @@ -17,3 +18,11 @@ def test_setitem_none_nan(self): series[5:7] = np.nan assert series[6] is NaT + + def test_setitem_multiindex_empty_slice(self): + # https://github.com/pandas-dev/pandas/issues/35878 + idx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + result = Series([1, 2], index=idx) + expected = result.copy() + result.loc[[]] = 0 + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9fdc4179de2e1..b9d90a9fc63dd 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,4 +1,6 @@ -from pandas import Series, date_range +import pytest + +from pandas import Interval, Series, Timestamp, date_range import pandas._testing as tm @@ -23,3 +25,24 @@ def test_astype_dt64tz_to_str(self): dtype=object, ) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + Series(["x", "y", "z"], dtype="string"), + Series(["x", "y", "z"], dtype="category"), + Series(3 * [Timestamp("2020-01-01", tz="UTC")]), + Series(3 * [Interval(0, 1)]), + ], + ) + @pytest.mark.parametrize("errors", ["raise", "ignore"]) + def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): + # https://github.com/pandas-dev/pandas/issues/35471 + if errors == "ignore": + expected = values + result = values.astype(float, errors="ignore") + tm.assert_series_equal(result, expected) + else: + msg = "(Cannot cast)|(could not convert)" + with pytest.raises((ValueError, TypeError), match=msg): + values.astype(float, errors=errors) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index bcf7039ec9039..5b93a3c036810 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1464,3 +1464,16 @@ def test_constructor_sparse_datetime64(self, values): arr = pd.arrays.SparseArray(values, dtype=dtype) expected = pd.Series(arr) tm.assert_series_equal(result, expected) +<<<<<<< HEAD + + def test_construction_from_ordered_collection(self): + # https://github.com/pandas-dev/pandas/issues/36044 + result = Series({"a": 1, "b": 2}.keys()) + expected = Series(["a", "b"]) + tm.assert_series_equal(result, expected) + + result = Series({"a": 1, "b": 2}.values()) + expected = Series([1, 2]) + tm.assert_series_equal(result, expected) +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 5174ff005b5fb..32129c621f1ac 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -268,6 +268,8 @@ def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype): left = pd.DataFrame({"a": [1, 2, 3]}, dtype="Int64") right = pd.DataFrame({"a": [1, 2, 3]}, dtype=right_dtype) tm.assert_frame_equal(left, right, check_dtype=False) +<<<<<<< HEAD +======= def test_allows_duplicate_labels(): @@ -283,3 +285,4 @@ def test_allows_duplicate_labels(): with pytest.raises(AssertionError, match=">>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 53746aa048663..ba8e08634e997 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -304,6 +304,8 @@ def test_assert_series_equal_ignore_extension_dtype_mismatch(right_dtype): left = pd.Series([1, 2, 3], dtype="Int64") right = pd.Series([1, 2, 3], dtype=right_dtype) tm.assert_series_equal(left, right, check_dtype=False) +<<<<<<< HEAD +======= def test_allows_duplicate_labels(): @@ -319,3 +321,4 @@ def test_allows_duplicate_labels(): with pytest.raises(AssertionError, match=">>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 170bf100b3891..d961a292b1e1a 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -209,6 +209,10 @@ def foo(x): ) tm.assert_series_equal(result, expected) +<<<<<<< HEAD + @pytest.mark.xfail(not compat.IS64, reason="GH-35294") +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_groupby_rolling_center_center(self): # GH 35552 series = Series(range(1, 6)) @@ -274,6 +278,10 @@ def test_groupby_rolling_center_center(self): ) tm.assert_frame_equal(result, expected) +<<<<<<< HEAD + @pytest.mark.xfail(not compat.IS64, reason="GH-35294") +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_groupby_subselect_rolling(self): # GH 35486 df = DataFrame( @@ -299,6 +307,10 @@ def test_groupby_subselect_rolling(self): ) tm.assert_series_equal(result, expected) +<<<<<<< HEAD + @pytest.mark.xfail(not compat.IS64, reason="GH-35294") +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_groupby_rolling_custom_indexer(self): # GH 35557 class SimpleIndexer(pd.api.indexers.BaseIndexer): @@ -322,6 +334,10 @@ def get_window_bounds( expected = df.groupby(df.index).rolling(window=3, min_periods=1).sum() tm.assert_frame_equal(result, expected) +<<<<<<< HEAD + @pytest.mark.xfail(not compat.IS64, reason="GH-35294") +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_groupby_rolling_subset_with_closed(self): # GH 35549 df = pd.DataFrame( @@ -346,6 +362,10 @@ def test_groupby_rolling_subset_with_closed(self): ) tm.assert_series_equal(result, expected) +<<<<<<< HEAD + @pytest.mark.xfail(not compat.IS64, reason="GH-35294") +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 def test_groupby_subset_rolling_subset_with_closed(self): # GH 35549 df = pd.DataFrame( @@ -372,3 +392,40 @@ def test_groupby_subset_rolling_subset_with_closed(self): name="column1", ) tm.assert_series_equal(result, expected) +<<<<<<< HEAD + + @pytest.mark.xfail(not compat.IS64, reason="GH-35294") + @pytest.mark.parametrize("func", ["max", "min"]) + def test_groupby_rolling_index_changed(self, func): + # GH: #36018 nlevels of MultiIndex changed + ds = Series( + [1, 2, 2], + index=pd.MultiIndex.from_tuples( + [("a", "x"), ("a", "y"), ("c", "z")], names=["1", "2"] + ), + name="a", + ) + + result = getattr(ds.groupby(ds).rolling(2), func)() + expected = Series( + [np.nan, np.nan, 2.0], + index=pd.MultiIndex.from_tuples( + [(1, "a", "x"), (2, "a", "y"), (2, "c", "z")], names=["a", "1", "2"] + ), + name="a", + ) + tm.assert_series_equal(result, expected) + + def test_groupby_rolling_empty_frame(self): + # GH 36197 + expected = pd.DataFrame({"s1": []}) + result = expected.groupby("s1").rolling(window=1).sum() + expected.index = pd.MultiIndex.from_tuples([], names=["s1", None]) + tm.assert_frame_equal(result, expected) + + expected = pd.DataFrame({"s1": [], "s2": []}) + result = expected.groupby(["s1", "s2"]).rolling(window=1).sum() + expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None]) + tm.assert_frame_equal(result, expected) +======= +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 diff --git a/requirements-dev.txt b/requirements-dev.txt index cc3775de3a4ba..5a3c971ebe8cc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,7 +12,11 @@ flake8<3.8.0 flake8-comprehensions>=3.1.0 flake8-rst>=0.6.0,<=0.7.0 isort>=5.2.1 +<<<<<<< HEAD +mypy==0.730 +======= mypy==0.782 +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 pycodestyle gitpython gitdb @@ -32,7 +36,10 @@ boto3 botocore>=1.11 hypothesis>=3.82 moto +<<<<<<< HEAD +======= flask +>>>>>>> b3dca88d31d0f463932713bab92a0953f4adf683 pytest>=5.0.1 pytest-cov pytest-xdist>=1.21