Skip to content

Commit b1bb12a

Browse files
committed
Merge branch 'main' into 9586-inconsistent-labeling-sub-daily-super-daily-frequencies
2 parents 592793c + 898ab21 commit b1bb12a

File tree

121 files changed

+1514
-984
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+1514
-984
lines changed

.github/workflows/dependabot.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: github-actions
4+
directory: /
5+
schedule:
6+
interval: weekly
7+
labels:
8+
- "CI"
9+
- "Dependencies"

.github/workflows/ubuntu.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ jobs:
2828
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
2929
pattern: ["not single_cpu", "single_cpu"]
3030
pyarrow_version: ["8", "9", "10"]
31-
pandas_ci: [1]
3231
include:
3332
- name: "Downstream Compat"
3433
env_file: actions-38-downstream_compat.yaml
@@ -75,7 +74,7 @@ jobs:
7574
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
7675
# TODO(cython3): Re-enable once next-beta(after beta 1) comes out
7776
# There are some warnings failing the build with -werror
78-
pandas_ci: 0
77+
pandas_ci: "0"
7978
exclude:
8079
- env_file: actions-38.yaml
8180
pyarrow_version: "8"
@@ -99,7 +98,7 @@ jobs:
9998
LC_ALL: ${{ matrix.lc_all || '' }}
10099
PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
101100
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
102-
PANDAS_CI: ${{ matrix.pandas_ci }}
101+
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
103102
TEST_ARGS: ${{ matrix.test_args || '' }}
104103
PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
105104
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}

.github/workflows/wheels.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ jobs:
173173
pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
174174
cd .. # Not a good idea to test within the src tree
175175
python -c "import pandas; print(pandas.__version__);
176-
pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2', '--no-strict-data-files']);
177-
pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])"
176+
pandas.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']);
177+
pandas.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])"
178178
- uses: actions/upload-artifact@v3
179179
with:
180180
name: sdist

.pre-commit-config.yaml

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ repos:
2828
types_or: [python, pyi]
2929
additional_dependencies: [black==23.1.0]
3030
- repo: https://github.com/charliermarsh/ruff-pre-commit
31-
rev: v0.0.255
31+
rev: v0.0.259
3232
hooks:
3333
- id: ruff
3434
args: [--exit-non-zero-on-fix]
@@ -392,14 +392,6 @@ repos:
392392
files: ^pandas/
393393
exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
394394
types: [python]
395-
- id: flake8-pyi
396-
name: flake8-pyi
397-
entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
398-
types: [pyi]
399-
language: python
400-
additional_dependencies:
401-
- flake8==5.0.4
402-
- flake8-pyi==22.8.1
403395
- id: future-annotations
404396
name: import annotations from __future__
405397
entry: 'from __future__ import annotations'
@@ -421,8 +413,8 @@ repos:
421413
language: python
422414
stages: [manual]
423415
additional_dependencies:
424-
- autotyping==22.9.0
425-
- libcst==0.4.7
416+
- autotyping==23.3.0
417+
- libcst==0.4.9
426418
- id: check-test-naming
427419
name: check that test names start with 'test'
428420
entry: python -m scripts.check_test_naming

asv_bench/benchmarks/strings.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def setup(self, dtype):
3434

3535
# GH37371. Testing construction of string series/frames from ExtensionArrays
3636
self.series_cat_arr = Categorical(self.series_arr)
37-
self.frame_cat_arr = Categorical(self.frame_arr)
3837

3938
def time_series_construction(self, dtype):
4039
Series(self.series_arr, dtype=dtype)
@@ -54,12 +53,6 @@ def time_cat_series_construction(self, dtype):
5453
def peakmem_cat_series_construction(self, dtype):
5554
Series(self.series_cat_arr, dtype=dtype)
5655

57-
def time_cat_frame_construction(self, dtype):
58-
DataFrame(self.frame_cat_arr, dtype=dtype)
59-
60-
def peakmem_cat_frame_construction(self, dtype):
61-
DataFrame(self.frame_cat_arr, dtype=dtype)
62-
6356

6457
class Methods(Dtypes):
6558
def time_center(self, dtype):

ci/code_checks.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8686
MSG='Partially validate docstrings (EX01)' ; echo $MSG
8787
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
8888
pandas.Series.index \
89-
pandas.Series.hasnans \
90-
pandas.Series.to_list \
9189
pandas.Series.__iter__ \
9290
pandas.Series.keys \
9391
pandas.Series.item \
@@ -309,7 +307,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
309307
pandas_object \
310308
pandas.api.interchange.from_dataframe \
311309
pandas.Index.values \
312-
pandas.Index.hasnans \
313310
pandas.Index.dtype \
314311
pandas.Index.inferred_type \
315312
pandas.Index.shape \

ci/test_wheels.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,10 @@
4141
multi_args = [
4242
"-m not clipboard and not single_cpu and not slow and not network and not db",
4343
"-n 2",
44-
"--no-strict-data-files",
4544
]
4645
pd.test(extra_args=multi_args)
4746
pd.test(
4847
extra_args=[
4948
"-m not clipboard and single_cpu and not slow and not network and not db",
50-
"--no-strict-data-files",
5149
]
5250
)

ci/test_wheels_windows.bat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
set test_command=import pandas as pd; print(pd.__version__); ^
2-
pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '--no-strict-data-files', '-n=2']); ^
3-
pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db', '--no-strict-data-files'])
2+
pd.test(extra_args=['-m not clipboard and not single_cpu and not slow and not network and not db', '-n 2']); ^
3+
pd.test(extra_args=['-m not clipboard and single_cpu and not slow and not network and not db'])
44

55
python --version
66
pip install pytz six numpy python-dateutil tzdata>=2022.1
5.17 KB
Loading

doc/source/development/contributing_codebase.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,8 @@ install pandas) by typing::
812812
your installation is probably fine and you can start contributing!
813813

814814
Often it is worth running only a subset of tests first around your changes before running the
815-
entire suite.
815+
entire suite (tip: you can use the [pandas-coverage app](https://pandas-coverage.herokuapp.com/)
816+
to find out which tests hit the lines of code you've modified, and then run only those).
816817

817818
The easiest way to do this is with::
818819

doc/source/getting_started/tutorials.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ Various tutorials
113113
* `Wes McKinney's (pandas BDFL) blog <https://wesmckinney.com/archives.html>`_
114114
* `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson <http://www.randalolson.com/2012/08/06/statistical-analysis-made-easy-in-python/>`_
115115
* `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 <https://conference.scipy.org/scipy2013/tutorial_detail.php?id=109>`_
116-
* `Financial analysis in Python, by Thomas Wiecki <https://nbviewer.ipython.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
116+
* `Financial analysis in Python, by Thomas Wiecki <https://nbviewer.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
117117
* `Intro to pandas data structures, by Greg Reda <http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/>`_
118118
* `Pandas and Python: Top 10, by Manish Amde <https://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/>`_
119119
* `Pandas DataFrames Tutorial, by Karlijn Willems <https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python>`_

doc/source/reference/arrays.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,10 @@ PyArrow type pandas extension type NumPy
9393

9494
.. note::
9595

96-
For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
97-
by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section <api.arrays.string>`
98-
below.
96+
Pyarrow-backed string support is provided by both ``pd.StringDtype("pyarrow")`` and ``pd.ArrowDtype(pa.string())``.
97+
``pd.StringDtype("pyarrow")`` is described below in the :ref:`string section <api.arrays.string>`
98+
and will be returned if the string alias ``"string[pyarrow]"`` is specified. ``pd.ArrowDtype(pa.string())``
99+
generally has better interoperability with :class:`ArrowDtype` of different types.
99100

100101
While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned**
101102
as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing

doc/source/user_guide/pyarrow.rst

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,23 @@ which is similar to a NumPy array. To construct these from the main pandas data
3535
df = pd.DataFrame([[1, 2], [3, 4]], dtype="uint64[pyarrow]")
3636
df
3737
38+
.. note::
39+
40+
The string alias ``"string[pyarrow]"`` maps to ``pd.StringDtype("pyarrow")`` which is not equivalent to
41+
specifying ``dtype=pd.ArrowDtype(pa.string())``. Generally, operations on the data will behave similarly
42+
except ``pd.StringDtype("pyarrow")`` can return NumPy-backed nullable types while ``pd.ArrowDtype(pa.string())``
43+
will return :class:`ArrowDtype`.
44+
45+
.. ipython:: python
46+
47+
import pyarrow as pa
48+
data = list("abc")
49+
ser_sd = pd.Series(data, dtype="string[pyarrow]")
50+
ser_ad = pd.Series(data, dtype=pd.ArrowDtype(pa.string()))
51+
ser_ad.dtype == ser_sd.dtype
52+
ser_sd.str.contains("a")
53+
ser_ad.str.contains("a")
54+
3855
For PyArrow types that accept parameters, you can pass in a PyArrow type with those parameters
3956
into :class:`ArrowDtype` to use in the ``dtype`` parameter.
4057

@@ -106,6 +123,7 @@ The following are just some examples of operations that are accelerated by nativ
106123

107124
.. ipython:: python
108125
126+
import pyarrow as pa
109127
ser = pd.Series([-1.545, 0.211, None], dtype="float32[pyarrow]")
110128
ser.mean()
111129
ser + ser
@@ -115,7 +133,7 @@ The following are just some examples of operations that are accelerated by nativ
115133
ser.isna()
116134
ser.fillna(0)
117135
118-
ser_str = pd.Series(["a", "b", None], dtype="string[pyarrow]")
136+
ser_str = pd.Series(["a", "b", None], dtype=pd.ArrowDtype(pa.string()))
119137
ser_str.str.startswith("a")
120138
121139
from datetime import datetime

doc/source/user_guide/reshaping.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Reshaping by pivoting DataFrame objects
1313

1414
.. image:: ../_static/reshaping_pivot.png
1515

16-
Data is often stored in so-called "stacked" or "record" format:
16+
Data is often stored in so-called "stacked" or "record" format. In a "record" or "wide" format typically there is one row for each subject. In the "stacked" or "long" format there are multiple rows for each subject where applicable.
1717

1818
.. ipython:: python
1919

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,7 @@ Timedelta
11901190
- Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`)
11911191
- Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`)
11921192
- Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`)
1193+
- Bug in :class:`Timedelta` comparisons with very large ``datetime.timedelta`` objects incorrect raising ``OutOfBoundsTimedelta`` (:issue:`49021`)
11931194

11941195
Timezones
11951196
^^^^^^^^^

doc/source/whatsnew/v2.1.0.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ Other enhancements
3636
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
3737
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
3838
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
39+
- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
40+
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
3941

4042
.. ---------------------------------------------------------------------------
4143
.. _whatsnew_210.notable_bug_fixes:
@@ -99,16 +101,21 @@ Deprecations
99101
~~~~~~~~~~~~
100102
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
101103
- Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
104+
- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
102105
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
103106
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
104107
- Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
105108
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
109+
- Deprecated :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
106110
- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
107111
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
108112
- Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
109113
- Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
114+
- Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
110115
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
116+
- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
111117
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
118+
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
112119
-
113120

114121
.. ---------------------------------------------------------------------------
@@ -195,11 +202,12 @@ Missing
195202

196203
MultiIndex
197204
^^^^^^^^^^
198-
-
205+
- Bug in :meth:`MultiIndex.set_levels` not preserving dtypes for :class:`Categorical` (:issue:`52125`)
199206
-
200207

201208
I/O
202209
^^^
210+
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
203211
- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
204212
-
205213

@@ -225,6 +233,7 @@ Groupby/resample/rolling
225233
grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
226234
or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
227235
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
236+
- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
228237
-
229238

230239
Reshaping
@@ -236,7 +245,7 @@ Reshaping
236245

237246
Sparse
238247
^^^^^^
239-
-
248+
- Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`)
240249
-
241250

242251
ExtensionArray
@@ -251,6 +260,7 @@ Styler
251260
Other
252261
^^^^^
253262
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
263+
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
254264

255265
.. ***DO NOT USE THIS SECTION***
256266

pandas/_config/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,7 @@ def pp(name: str, ks: Iterable[str]) -> list[str]:
737737

738738

739739
@contextmanager
740-
def config_prefix(prefix) -> Generator[None, None, None]:
740+
def config_prefix(prefix: str) -> Generator[None, None, None]:
741741
"""
742742
contextmanager for multiple invocations of API with a common prefix
743743

pandas/_libs/lib.pyx

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections import abc
22
from decimal import Decimal
33
from enum import Enum
4+
from sys import getsizeof
45
from typing import (
56
Literal,
67
_GenericAlias,
@@ -159,7 +160,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t:
159160

160161
n = len(arr)
161162
for i in range(n):
162-
size += arr[i].__sizeof__()
163+
size += getsizeof(arr[i])
163164
return size
164165

165166

@@ -752,7 +753,6 @@ cpdef ndarray[object] ensure_string_array(
752753
out = arr.astype(str).astype(object)
753754
out[arr.isna()] = na_value
754755
return out
755-
756756
arr = arr.to_numpy()
757757
elif not util.is_array(arr):
758758
arr = np.array(arr, dtype="object")
@@ -2326,10 +2326,14 @@ def maybe_convert_numeric(
23262326
if not seen.coerce_numeric:
23272327
raise type(err)(f"{err} at position {i}")
23282328

2329-
seen.saw_null()
2330-
floats[i] = NaN
23312329
mask[i] = 1
23322330

2331+
if allow_null_in_int:
2332+
seen.null_ = True
2333+
else:
2334+
seen.saw_null()
2335+
floats[i] = NaN
2336+
23332337
if seen.check_uint64_conflict():
23342338
return (values, None)
23352339

0 commit comments

Comments
 (0)