Skip to content

Commit 2071f3e

Browse files
Merge branch 'pandas-dev:main' into holes
2 parents b185fac + bd405e8 commit 2071f3e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+536
-674
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -546,24 +546,17 @@ def time_chained_indexing(self, mode):
546546

547547

548548
class Block:
549-
params = [
550-
(True, "True"),
551-
(np.array(True), "np.array(True)"),
552-
]
553-
554-
def setup(self, true_value, mode):
549+
def setup(self):
555550
self.df = DataFrame(
556551
False,
557552
columns=np.arange(500).astype(str),
558553
index=date_range("2010-01-01", "2011-01-01"),
559554
)
560555

561-
self.true_value = true_value
562-
563-
def time_test(self, true_value, mode):
556+
def time_test(self):
564557
start = datetime(2010, 5, 1)
565558
end = datetime(2010, 9, 1)
566-
self.df.loc[start:end, :] = true_value
559+
self.df.loc[start:end, :] = True
567560

568561

569562
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9595
-i "pandas.Period.freq GL08" \
9696
-i "pandas.Period.freqstr SA01" \
9797
-i "pandas.Period.month SA01" \
98-
-i "pandas.Period.now SA01" \
9998
-i "pandas.Period.ordinal GL08" \
10099
-i "pandas.Period.strftime PR01,SA01" \
101100
-i "pandas.Period.to_timestamp SA01" \

doc/source/user_guide/categorical.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ Assigning a ``Categorical`` to parts of a column of other types will use the val
793793
:okwarning:
794794
795795
df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
796-
df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"])
796+
df.loc[1:2, "a"] = pd.Categorical([2, 2], categories=[2, 3])
797797
df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"])
798798
df
799799
df.dtypes

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Other enhancements
4242
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
4343
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
4444
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
45+
- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
4546
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4647
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
4748
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
@@ -360,6 +361,7 @@ Other Removals
360361
- Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`)
361362
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
362363
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
364+
- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
363365
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
364366
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
365367
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
@@ -502,6 +504,7 @@ Datetimelike
502504
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
503505
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
504506
- Bug in :meth:`DatetimeIndex.union` when ``unit`` was non-nanosecond (:issue:`59036`)
507+
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
505508
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
506509

507510
Timedelta

pandas/_libs/tslibs/nattype.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1516,7 +1516,7 @@ default 'raise'
15161516

15171517
See Also
15181518
--------
1519-
Timestamp.asm8 : Return numpy datetime64 format in nanoseconds.
1519+
Timestamp.asm8 : Return numpy datetime64 format with same precision.
15201520
Timestamp.to_pydatetime : Convert Timestamp object to a native
15211521
Python datetime object.
15221522
to_timedelta : Convert argument into timedelta object,

pandas/_libs/tslibs/period.pyx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2472,11 +2472,24 @@ cdef class _Period(PeriodMixin):
24722472
"""
24732473
Return the period of now's date.
24742474
2475+
The `now` method provides a convenient way to generate a period
2476+
object for the current date and time. This can be particularly
2477+
useful in financial and economic analysis, where data is often
2478+
collected and analyzed in regular intervals (e.g., hourly, daily,
2479+
monthly). By specifying the frequency, users can create periods
2480+
that match the granularity of their data.
2481+
24752482
Parameters
24762483
----------
24772484
freq : str, BaseOffset
24782485
Frequency to use for the returned period.
24792486
2487+
See Also
2488+
--------
2489+
to_datetime : Convert argument to datetime.
2490+
Period : Represents a period of time.
2491+
Period.to_timestamp : Return the Timestamp representation of the Period.
2492+
24802493
Examples
24812494
--------
24822495
>>> pd.Period.now('h') # doctest: +SKIP

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,7 +1139,7 @@ cdef class _Timestamp(ABCTimestamp):
11391139
11401140
See Also
11411141
--------
1142-
Timestamp.asm8 : Return numpy datetime64 format in nanoseconds.
1142+
Timestamp.asm8 : Return numpy datetime64 format with same precision.
11431143
Timestamp.to_pydatetime : Convert Timestamp object to a native
11441144
Python datetime object.
11451145
to_timedelta : Convert argument into timedelta object,
@@ -1170,7 +1170,7 @@ cdef class _Timestamp(ABCTimestamp):
11701170
@property
11711171
def asm8(self) -> np.datetime64:
11721172
"""
1173-
Return numpy datetime64 format in nanoseconds.
1173+
Return numpy datetime64 format with same precision.
11741174

11751175
See Also
11761176
--------

pandas/_libs/window/aggregations.pyx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,6 +1813,9 @@ def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
18131813
if normalize:
18141814
# avoid numerical errors on constant series
18151815
if weighted != cur:
1816+
if not adjust and com == 1:
1817+
# update in case of irregular-interval series
1818+
new_wt = 1. - old_wt
18161819
weighted = old_wt * weighted + new_wt * cur
18171820
weighted /= (old_wt + new_wt)
18181821
if adjust:

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2794,7 +2794,10 @@ def _dt_days_in_month(self) -> Self:
27942794

27952795
@property
27962796
def _dt_microsecond(self) -> Self:
2797-
return type(self)(pc.microsecond(self._pa_array))
2797+
# GH 59154
2798+
us = pc.microsecond(self._pa_array)
2799+
ms_to_us = pc.multiply(pc.millisecond(self._pa_array), 1000)
2800+
return type(self)(pc.add(us, ms_to_us))
27982801

27992802
@property
28002803
def _dt_minute(self) -> Self:

pandas/core/generic.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2779,7 +2779,8 @@ def to_sql(
27792779
----------
27802780
name : str
27812781
Name of SQL table.
2782-
con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection
2782+
con : ADBC connection, sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection
2783+
ADBC provides high performance I/O with native type support, where available.
27832784
Using SQLAlchemy makes it possible to use any DB supported by that
27842785
library. Legacy support is provided for sqlite3.Connection objects. The user
27852786
is responsible for engine disposal and connection closure for the SQLAlchemy
@@ -2966,6 +2967,22 @@ def to_sql(
29662967
>>> with engine.connect() as conn:
29672968
... conn.execute(text("SELECT * FROM integers")).fetchall()
29682969
[(1,), (None,), (2,)]
2970+
2971+
.. versionadded:: 2.2.0
2972+
2973+
pandas now supports writing via ADBC drivers
2974+
2975+
>>> df = pd.DataFrame({'name' : ['User 10', 'User 11', 'User 12']})
2976+
>>> df
2977+
name
2978+
0 User 10
2979+
1 User 11
2980+
2 User 12
2981+
2982+
>>> from adbc_driver_sqlite import dbapi # doctest:+SKIP
2983+
>>> with dbapi.connect("sqlite://") as conn: # doctest:+SKIP
2984+
... df.to_sql(name="users", con=conn)
2985+
3
29692986
""" # noqa: E501
29702987
from pandas.io import sql
29712988

pandas/core/indexing.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
)
2626
from pandas.errors.cow import _chained_assignment_msg
2727
from pandas.util._decorators import doc
28-
from pandas.util._exceptions import find_stack_level
2928

3029
from pandas.core.dtypes.cast import (
3130
can_hold_element,
@@ -2124,14 +2123,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
21242123
self.obj._mgr.column_setitem(
21252124
loc, plane_indexer, value, inplace_only=True
21262125
)
2127-
except (ValueError, TypeError, LossySetitemError):
2126+
except (ValueError, TypeError, LossySetitemError) as exc:
21282127
# If we're setting an entire column and we can't do it inplace,
21292128
# then we can use value's dtype (or inferred dtype)
21302129
# instead of object
21312130
dtype = self.obj.dtypes.iloc[loc]
21322131
if dtype not in (np.void, object) and not self.obj.empty:
21332132
# - Exclude np.void, as that is a special case for expansion.
2134-
# We want to warn for
2133+
# We want to raise for
21352134
# df = pd.DataFrame({'a': [1, 2]})
21362135
# df.loc[:, 'a'] = .3
21372136
# but not for
@@ -2140,14 +2139,9 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
21402139
# - Exclude `object`, as then no upcasting happens.
21412140
# - Exclude empty initial object with enlargement,
21422141
# as then there's nothing to be inconsistent with.
2143-
warnings.warn(
2144-
f"Setting an item of incompatible dtype is deprecated "
2145-
"and will raise in a future error of pandas. "
2146-
f"Value '{value}' has dtype incompatible with {dtype}, "
2147-
"please explicitly cast to a compatible dtype first.",
2148-
FutureWarning,
2149-
stacklevel=find_stack_level(),
2150-
)
2142+
raise TypeError(
2143+
f"Invalid value '{value}' for dtype '{dtype}'"
2144+
) from exc
21512145
self.obj.isetitem(loc, value)
21522146
else:
21532147
# set value into the column (first attempting to operate inplace, then

pandas/core/internals/blocks.py

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
428428
# Up/Down-casting
429429

430430
@final
431-
def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
431+
def coerce_to_target_dtype(self, other, raise_on_upcast: bool) -> Block:
432432
"""
433433
coerce the current block to a dtype compat for other
434434
we will return a block, possibly object, and not raise
@@ -455,25 +455,18 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
455455
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
456456
)
457457
):
458-
warn_on_upcast = False
458+
raise_on_upcast = False
459459
elif (
460460
isinstance(other, np.ndarray)
461461
and other.ndim == 1
462462
and is_integer_dtype(self.values.dtype)
463463
and is_float_dtype(other.dtype)
464464
and lib.has_only_ints_or_nan(other)
465465
):
466-
warn_on_upcast = False
467-
468-
if warn_on_upcast:
469-
warnings.warn(
470-
f"Setting an item of incompatible dtype is deprecated "
471-
"and will raise an error in a future version of pandas. "
472-
f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
473-
"please explicitly cast to a compatible dtype first.",
474-
FutureWarning,
475-
stacklevel=find_stack_level(),
476-
)
466+
raise_on_upcast = False
467+
468+
if raise_on_upcast:
469+
raise TypeError(f"Invalid value '{other}' for dtype '{self.values.dtype}'")
477470
if self.values.dtype == new_dtype:
478471
raise AssertionError(
479472
f"Did not expect new dtype {new_dtype} to equal self.dtype "
@@ -720,7 +713,7 @@ def replace(
720713
if value is None or value is NA:
721714
blk = self.astype(np.dtype(object))
722715
else:
723-
blk = self.coerce_to_target_dtype(value)
716+
blk = self.coerce_to_target_dtype(value, raise_on_upcast=False)
724717
return blk.replace(
725718
to_replace=to_replace,
726719
value=value,
@@ -1105,7 +1098,7 @@ def setitem(self, indexer, value) -> Block:
11051098
casted = np_can_hold_element(values.dtype, value)
11061099
except LossySetitemError:
11071100
# current dtype cannot store value, coerce to common dtype
1108-
nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
1101+
nb = self.coerce_to_target_dtype(value, raise_on_upcast=True)
11091102
return nb.setitem(indexer, value)
11101103
else:
11111104
if self.dtype == _dtype_obj:
@@ -1176,7 +1169,7 @@ def putmask(self, mask, new) -> list[Block]:
11761169
if not is_list_like(new):
11771170
# using just new[indexer] can't save us the need to cast
11781171
return self.coerce_to_target_dtype(
1179-
new, warn_on_upcast=True
1172+
new, raise_on_upcast=True
11801173
).putmask(mask, new)
11811174
else:
11821175
indexer = mask.nonzero()[0]
@@ -1244,7 +1237,7 @@ def where(self, other, cond) -> list[Block]:
12441237
if self.ndim == 1 or self.shape[0] == 1:
12451238
# no need to split columns
12461239

1247-
block = self.coerce_to_target_dtype(other)
1240+
block = self.coerce_to_target_dtype(other, raise_on_upcast=False)
12481241
return block.where(orig_other, cond)
12491242

12501243
else:
@@ -1438,7 +1431,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
14381431
fill_value,
14391432
)
14401433
except LossySetitemError:
1441-
nb = self.coerce_to_target_dtype(fill_value)
1434+
nb = self.coerce_to_target_dtype(fill_value, raise_on_upcast=False)
14421435
return nb.shift(periods, fill_value=fill_value)
14431436

14441437
else:
@@ -1637,11 +1630,11 @@ def setitem(self, indexer, value):
16371630
except (ValueError, TypeError):
16381631
if isinstance(self.dtype, IntervalDtype):
16391632
# see TestSetitemFloatIntervalWithIntIntervalValues
1640-
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
1633+
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
16411634
return nb.setitem(orig_indexer, orig_value)
16421635

16431636
elif isinstance(self, NDArrayBackedExtensionBlock):
1644-
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
1637+
nb = self.coerce_to_target_dtype(orig_value, raise_on_upcast=True)
16451638
return nb.setitem(orig_indexer, orig_value)
16461639

16471640
else:
@@ -1676,13 +1669,13 @@ def where(self, other, cond) -> list[Block]:
16761669
if self.ndim == 1 or self.shape[0] == 1:
16771670
if isinstance(self.dtype, IntervalDtype):
16781671
# TestSetitemFloatIntervalWithIntIntervalValues
1679-
blk = self.coerce_to_target_dtype(orig_other)
1672+
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
16801673
return blk.where(orig_other, orig_cond)
16811674

16821675
elif isinstance(self, NDArrayBackedExtensionBlock):
16831676
# NB: not (yet) the same as
16841677
# isinstance(values, NDArrayBackedExtensionArray)
1685-
blk = self.coerce_to_target_dtype(orig_other)
1678+
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
16861679
return blk.where(orig_other, orig_cond)
16871680

16881681
else:
@@ -1737,13 +1730,13 @@ def putmask(self, mask, new) -> list[Block]:
17371730
if isinstance(self.dtype, IntervalDtype):
17381731
# Discussion about what we want to support in the general
17391732
# case GH#39584
1740-
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
1733+
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
17411734
return blk.putmask(orig_mask, orig_new)
17421735

17431736
elif isinstance(self, NDArrayBackedExtensionBlock):
17441737
# NB: not (yet) the same as
17451738
# isinstance(values, NDArrayBackedExtensionArray)
1746-
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
1739+
blk = self.coerce_to_target_dtype(orig_new, raise_on_upcast=True)
17471740
return blk.putmask(orig_mask, orig_new)
17481741

17491742
else:

pandas/core/window/ewm.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,10 @@ class ExponentialMovingWindow(BaseWindow):
134134
Provide exponentially weighted (EW) calculations.
135135
136136
Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be
137-
provided if ``times`` is not provided. If ``times`` is provided,
137+
provided if ``times`` is not provided. If ``times`` is provided and ``adjust=True``,
138138
``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided.
139+
If ``times`` is provided and ``adjust=False``, ``halflife`` must be the only
140+
provided decay-specification parameter.
139141
140142
Parameters
141143
----------
@@ -358,8 +360,6 @@ def __init__(
358360
self.ignore_na = ignore_na
359361
self.times = times
360362
if self.times is not None:
361-
if not self.adjust:
362-
raise NotImplementedError("times is not supported with adjust=False.")
363363
times_dtype = getattr(self.times, "dtype", None)
364364
if not (
365365
is_datetime64_dtype(times_dtype)
@@ -376,6 +376,11 @@ def __init__(
376376
# Halflife is no longer applicable when calculating COM
377377
# But allow COM to still be calculated if the user passes other decay args
378378
if common.count_not_none(self.com, self.span, self.alpha) > 0:
379+
if not self.adjust:
380+
raise NotImplementedError(
381+
"None of com, span, or alpha can be specified if "
382+
"times is provided and adjust=False"
383+
)
379384
self._com = get_center_of_mass(self.com, self.span, None, self.alpha)
380385
else:
381386
self._com = 1.0

0 commit comments

Comments
 (0)