Merge remote-tracking branch 'upstream/master' into ci/locale_simplify

mroeschke · mroeschke · commit 2ea1bd61f967 · 2021-12-24T13:35:19.000-08:00
diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml
@@ -8,17 +8,16 @@ jobs:
     vmImage: ${{ parameters.vmImage }}
   strategy:
     matrix:
-      ${{ if eq(parameters.name, 'macOS') }}:
-        py38_macos_1:
-          ENV_FILE: ci/deps/azure-macos-38.yaml
-          CONDA_PY: "38"
-          PATTERN: "not slow"
-          PYTEST_TARGET: "pandas/tests/[a-h]*"
-        py38_macos_2:
-          ENV_FILE: ci/deps/azure-macos-38.yaml
-          CONDA_PY: "38"
-          PATTERN: "not slow"
-          PYTEST_TARGET: "pandas/tests/[i-z]*"
+      py38_macos_1:
+        ENV_FILE: ci/deps/azure-macos-38.yaml
+        CONDA_PY: "38"
+        PATTERN: "not slow"
+        PYTEST_TARGET: "pandas/tests/[a-h]*"
+      py38_macos_2:
+        ENV_FILE: ci/deps/azure-macos-38.yaml
+        CONDA_PY: "38"
+        PATTERN: "not slow"
+        PYTEST_TARGET: "pandas/tests/[i-z]*"
 
   steps:
     - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -75,6 +75,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
       pandas/core \
       pandas/errors/ \
       pandas/io/ \
+      pandas/plotting/ \
       pandas/tseries/ \
       pandas/util/ \
       pandas/_typing.py \
diff --git a/ci/setup_env.sh b/ci/setup_env.sh
@@ -74,11 +74,6 @@ echo "w/o removing anything else"
 conda remove pandas -y --force || true
 pip uninstall -y pandas || true
 
-echo
-echo "remove postgres if has been installed with conda"
-echo "we use the one from the CI"
-conda remove postgresql -y --force || true
-
 echo
 echo "remove qt"
 echo "causes problems with the clipboard, we use xsel for that"
@@ -104,13 +99,4 @@ echo
 echo "conda list"
 conda list
 
-# Install DB for Linux
-
-if [[ -n ${SQL:0} ]]; then
-  echo "installing dbs"
-  mysql -e 'create database pandas_nosetest;'
-  psql -c 'create database pandas_nosetest;' -U postgres
-else
-   echo "not using dbs on non-linux Travis builds or Azure Pipelines"
-fi
 echo "done"
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -268,8 +268,8 @@ Ignoring dtypes in concat with empty or all-NA columns
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 When using :func:`concat` to concatenate two or more :class:`DataFrame` objects,
-if one of the DataFrames was empty or had all-NA values, its dtype was _sometimes_
-ignored when finding the concatenated dtype.  These are now consistently _not_ ignored (:issue:`43507`).
+if one of the DataFrames was empty or had all-NA values, its dtype was *sometimes*
+ignored when finding the concatenated dtype.  These are now consistently *not* ignored (:issue:`43507`).
 
 .. ipython:: python
 
@@ -297,7 +297,7 @@ Now the float-dtype is respected. Since the common dtype for these DataFrames is
 
     res
 
-.. _whatsnew_140.notable_bug_fixes.value_counts_and_mode_do_not_coerse_to_nan:
+.. _whatsnew_140.notable_bug_fixes.value_counts_and_mode_do_not_coerce_to_nan:
 
 Null-values are no longer coerced to NaN-value in value_counts and mode
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -535,7 +535,7 @@ Other Deprecations
 - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
 - Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
 - Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`,:issue:`44940`)
-- Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
+- Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and :meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
 - Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`)
 - Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
 - Deprecated passing non boolean argument to sort in :func:`concat` (:issue:`41518`)
@@ -636,7 +636,7 @@ Datetimelike
 - Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`)
 - Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`)
 - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
-- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result. (:issue:`43968`,:issue:`36589`)
+- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`)
 -
 
 Timedelta
@@ -715,19 +715,20 @@ Indexing
 - Bug in :meth:`DataFrame.loc.__setitem__` and :meth:`DataFrame.iloc.__setitem__` with mixed dtypes sometimes failing to operate in-place (:issue:`44345`)
 - Bug in :meth:`DataFrame.loc.__getitem__` incorrectly raising ``KeyError`` when selecting a single column with a boolean key (:issue:`44322`).
 - Bug in setting :meth:`DataFrame.iloc` with a single ``ExtensionDtype`` column and setting 2D values e.g. ``df.iloc[:] = df.values`` incorrectly raising (:issue:`44514`)
+- Bug in setting values with :meth:`DataFrame.iloc` with a single ``ExtensionDtype`` column and a tuple of arrays as the indexer (:issue:`44703`)
 - Bug in indexing on columns with ``loc`` or ``iloc`` using a slice with a negative step with ``ExtensionDtype`` columns incorrectly raising (:issue:`44551`)
 - Bug in :meth:`DataFrame.loc.__setitem__` changing dtype when indexer was completely ``False`` (:issue:`37550`)
 - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`)
 - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
 - Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
-- Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`,:issue:`14865`)
+- Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`)
 -
 
 Missing
 ^^^^^^^
 - Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`)
 - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
-- Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`)
+- Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`44428`)
 - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`)
 - Bug in :meth:`Index.fillna` incorrectly returning an un-filled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`)
 - Bug in :meth:`DataFrame.dropna` changing :class:`Index` even if no entries were dropped (:issue:`41965`)
@@ -751,7 +752,7 @@ I/O
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
-- Bug in :func:`read_csv` raising ``ParserError`` when reading file in chunks and aome chunk blocks have fewer columns than header for ``engine="c"`` (:issue:`21211`)
+- Bug in :func:`read_csv` raising ``ParserError`` when reading file in chunks and some chunk blocks have fewer columns than header for ``engine="c"`` (:issue:`21211`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
 - Bug in :func:`read_csv` and :func:`read_fwf` ignoring all ``skiprows`` except first when ``nrows`` is specified for ``engine='python'`` (:issue:`44021`, :issue:`10261`)
 - Bug in :func:`read_csv` keeping the original column in object format when ``keep_date_col=True`` is set (:issue:`13378`)
@@ -777,7 +778,7 @@ I/O
 - Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
 - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
 - Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
-- Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`)
+- Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`)
 - Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`)
 -
 
@@ -791,7 +792,7 @@ Period
 
 Plotting
 ^^^^^^^^
-- When given non-numeric data, :meth:`DataFrame.boxplot` now raises a ``ValueError`` rather than a cryptic ``KeyError`` or ``ZeroDivsionError``, in line with other plotting functions like :meth:`DataFrame.hist`. (:issue:`43480`)
+- When given non-numeric data, :meth:`DataFrame.boxplot` now raises a ``ValueError`` rather than a cryptic ``KeyError`` or ``ZeroDivisionError``, in line with other plotting functions like :meth:`DataFrame.hist`. (:issue:`43480`)
 -
 
 Groupby/resample/rolling
@@ -852,15 +853,15 @@ Sparse
 ExtensionArray
 ^^^^^^^^^^^^^^
 - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
-- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`,:issue:`23316`)
+- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`)
 - NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`)
 - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
 - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
 - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
 - Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
 - Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
 - Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
-- Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`,:issue:`40732`,:issue:`31644`,:issue:`41215`,:issue:`25438`)
+- Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`, :issue:`40732`, :issue:`31644`, :issue:`41215`, :issue:`25438`)
 -
 
 Styler
@@ -882,7 +883,7 @@ Other
 - Bug in :meth:`RangeIndex.union` with another ``RangeIndex`` with matching (even) ``step`` and starts differing by strictly less than ``step / 2`` (:issue:`44019`)
 - Bug in :meth:`RangeIndex.difference` with ``sort=None`` and ``step<0`` failing to sort (:issue:`44085`)
 - Bug in :meth:`Series.to_frame` and :meth:`Index.to_frame` ignoring the ``name`` argument when ``name=None`` is explicitly passed (:issue:`44212`)
-- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`,:issue:`37899`)
+- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`, :issue:`37899`)
 - Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`)
 - Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`)
 - Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``periods`` larger than ``len(frame.columns)`` producing an invalid :class:`DataFrame` (:issue:`44978`)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1512,6 +1512,14 @@ def setitem(self, indexer, value):
             # TODO(EA2D): not needed with 2D EAs
             # we are always 1-D
             indexer = indexer[0]
+            if isinstance(indexer, np.ndarray) and indexer.ndim == 2:
+                # GH#44703
+                if indexer.shape[1] != 1:
+                    raise NotImplementedError(
+                        "This should not be reached. Please report a bug at "
+                        "github.com/pandas-dev/pandas/"
+                    )
+                indexer = indexer[:, 0]
 
         # TODO(EA2D): not needed with 2D EAS
         if isinstance(value, (np.ndarray, ExtensionArray)) and value.ndim == 2:
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -338,7 +338,7 @@ def hist_frame(
     >>> np.random.seed(1234)
     >>> df = pd.DataFrame(np.random.randn(10, 4),
     ...                   columns=['Col1', 'Col2', 'Col3', 'Col4'])
-    >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3'])
+    >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3'])  # doctest: +SKIP
 
 Boxplots of variables distributions grouped by the values of a third
 variable can be created using the option ``by``. For instance:
@@ -381,7 +381,7 @@ def hist_frame(
 .. plot::
     :context: close-figs
 
-    >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15)
+    >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15)  # doctest: +SKIP
 
 The parameter ``return_type`` can be used to select the type of element
 returned by `boxplot`.  When ``return_type='axes'`` is selected,
@@ -591,14 +591,14 @@ def boxplot_frame_groupby(
         >>> data = np.random.randn(len(index),4)
         >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
         >>> grouped = df.groupby(level='lvl1')
-        >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10))
+        >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10))  # doctest: +SKIP
 
     The ``subplots=False`` option shows the boxplots in a single figure.
 
     .. plot::
         :context: close-figs
 
-        >>> grouped.boxplot(subplots=False, rot=45, fontsize=12)
+        >>> grouped.boxplot(subplots=False, rot=45, fontsize=12)  # doctest: +SKIP
     """
     plot_backend = _get_plot_backend(backend)
     return plot_backend.boxplot_frame_groupby(
@@ -987,6 +987,7 @@ def __call__(self, *args, **kwargs):
 
             >>> s = pd.Series([1, 3, 2])
             >>> s.plot.line()
+            <AxesSubplot:ylabel='Density'>
 
         .. plot::
             :context: close-figs
diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py
@@ -49,8 +49,15 @@ def create_iter_data_given_by(
     ...          [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]]
     >>> data = DataFrame(value, columns=mi)
     >>> create_iter_data_given_by(data)
-    {'h1': DataFrame({'a': [1, 3, np.nan], 'b': [3, 4, np.nan]}),
-     'h2': DataFrame({'a': [np.nan, np.nan, 5], 'b': [np.nan, np.nan, 6]})}
+    {'h1':     h1
+         a    b
+    0  1.0  3.0
+    1  3.0  4.0
+    2  NaN  NaN, 'h2':     h2
+         a    b
+    0  NaN  NaN
+    1  NaN  NaN
+    2  5.0  6.0}
     """
 
     # For `hist` plot, before transformation, the values in level 0 are values
@@ -96,10 +103,10 @@ def reconstruct_data_with_by(
     >>> df = DataFrame(d)
     >>> reconstruct_data_with_by(df, by='h', cols=['a', 'b'])
        h1      h2
-       a   b   a   b
-    0  1   3   NaN NaN
-    1  3   4   NaN NaN
-    2  NaN NaN 5   6
+       a     b     a     b
+    0  1.0   3.0   NaN   NaN
+    1  3.0   4.0   NaN   NaN
+    2  NaN   NaN   5.0   6.0
     """
     grouped = data.groupby(by)
 
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py