pandas-dev
diff --git a/‎.startup.ipy
Lines changed: 48 additions & 0 deletions b/‎.startup.ipy
Lines changed: 48 additions & 0 deletions
diff --git a/‎pandas/core/internals/array_manager.py
Lines changed: 32 additions & 34 deletions b/‎pandas/core/internals/array_manager.py
Lines changed: 32 additions & 34 deletions
@@ -0,0 +1,48 @@
+>>> from itertools import product
+>>> import numpy as np
+>>> import pandas as pd
+>>> from pandas.core.reshape.concat import _Concatenator
+>>>
+>>> def manual_concat(df_list: list[pd.DataFrame]) -> pd.DataFrame:
+...     columns = [col for df in df_list for col in df.columns]
+...     columns = list(dict.fromkeys(columns))
+...     index = np.hstack([df.index.values for df in df_list])
+...     df_list = [df.reindex(columns=columns) for df in df_list]
+...     values = np.vstack([df.values for df in df_list])
+...     return pd.DataFrame(values, index=index, columns=columns, dtype=df_list[0].dtypes[0])
+>>>
+>>> def compare_frames(df_list: list[pd.DataFrame]) -> None:
+...     concat_df = pd.concat(df_list)
+...     manual_df = manual_concat(df_list)
+...     if not concat_df.equals(manual_df):
+...         raise ValueError("different concatenations!")
+>>>
+>>> def make_dataframes(num_dfs, num_idx, num_cols, dtype=np.int32, drop_column=False) -> list[pd.DataFrame]:
+...     values = np.random.randint(-100, 100, size=[num_idx, num_cols])
+...     index = [f"i{i}" for i in range(num_idx)]
+...     columns = np.random.choice([f"c{i}" for i in range(num_cols)], num_cols, replace=False)
+...     df = pd.DataFrame(values, index=index, columns=columns, dtype=dtype)
+...
+...     df_list = []
+...     for i in range(num_dfs):
+...         new_df = df.copy()
+...         if drop_column:
+...             label = new_df.columns[i]
+...             new_df = new_df.drop(label, axis=1)
+...         df_list.append(new_df)
+...     return df_list
+>>>
+>>> test_data = [  # num_idx, num_cols, num_dfs
+...     [100, 1_000, 3],
+...     ]
+>>> for i, (num_idx, num_cols, num_dfs) in enumerate(test_data):
+...     print(f"\n{i}: {num_dfs=}, {num_idx=}, {num_cols=}")
+...     df_list = make_dataframes(num_dfs, num_idx, num_cols, drop_column=False)
+...     df_list_dropped = make_dataframes(num_dfs, num_idx, num_cols, drop_column=True)
+...     print("manual:")
+...     %timeit manual_concat(df_list)
+...     compare_frames(df_list)
+...     for use_dropped in [False, True]:
+...         print(f"pd.concat: {use_dropped=}")
+...         this_df_list = df_list if not use_dropped else df_list_dropped
+...         %timeit pd.concat(this_df_list)
@@ -8,7 +8,6 @@
     Callable,
     Hashable,
     Literal,
-    TypeVar,
 )
 
 import numpy as np
@@ -23,6 +22,7 @@
     AxisInt,
     DtypeObj,
     QuantileInterpolation,
+    Self,
     npt,
 )
 from pandas.util._validators import validate_bool_kwarg
@@ -93,8 +93,6 @@
     to_native_types,
 )
 
-T = TypeVar("T", bound="BaseArrayManager")
-
 
 class BaseArrayManager(DataManager):
     """
@@ -129,7 +127,7 @@ def __init__(
     ) -> None:
         raise NotImplementedError
 
-    def make_empty(self: T, axes=None) -> T:
+    def make_empty(self, axes=None) -> Self:
         """Return an empty ArrayManager with the items axis of len 0 (no columns)"""
         if axes is None:
             axes = [self.axes[1:], Index([])]
@@ -193,11 +191,11 @@ def __repr__(self) -> str:
         return output
 
     def apply(
-        self: T,
+        self,
         f,
         align_keys: list[str] | None = None,
         **kwargs,
-    ) -> T:
+    ) -> Self:
         """
         Iterate over the arrays, collect and create a new ArrayManager.
 
@@ -255,8 +253,8 @@ def apply(
         return type(self)(result_arrays, new_axes)  # type: ignore[arg-type]
 
     def apply_with_block(
-        self: T, f, align_keys=None, swap_axis: bool = True, **kwargs
-    ) -> T:
+        self, f, align_keys=None, swap_axis: bool = True, **kwargs
+    ) -> Self:
         # switch axis to follow BlockManager logic
         if swap_axis and "axis" in kwargs and self.ndim == 2:
             kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0
@@ -309,7 +307,7 @@ def apply_with_block(
 
         return type(self)(result_arrays, self._axes)
 
-    def where(self: T, other, cond, align: bool) -> T:
+    def where(self, other, cond, align: bool) -> Self:
         if align:
             align_keys = ["other", "cond"]
         else:
@@ -323,10 +321,10 @@ def where(self: T, other, cond, align: bool) -> T:
             cond=cond,
         )
 
-    def setitem(self: T, indexer, value) -> T:
+    def setitem(self, indexer, value) -> Self:
         return self.apply_with_block("setitem", indexer=indexer, value=value)
 
-    def putmask(self: T, mask, new, align: bool = True) -> T:
+    def putmask(self, mask, new, align: bool = True) -> Self:
         if align:
             align_keys = ["new", "mask"]
         else:
@@ -340,14 +338,14 @@ def putmask(self: T, mask, new, align: bool = True) -> T:
             new=new,
         )
 
-    def diff(self: T, n: int, axis: AxisInt) -> T:
+    def diff(self, n: int, axis: AxisInt) -> Self:
         assert self.ndim == 2 and axis == 0  # caller ensures
         return self.apply(algos.diff, n=n, axis=axis)
 
-    def interpolate(self: T, **kwargs) -> T:
+    def interpolate(self, **kwargs) -> Self:
         return self.apply_with_block("interpolate", swap_axis=False, **kwargs)
 
-    def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T:
+    def shift(self, periods: int, axis: AxisInt, fill_value) -> Self:
         if fill_value is lib.no_default:
             fill_value = None
 
@@ -359,7 +357,7 @@ def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T:
             "shift", periods=periods, axis=axis, fill_value=fill_value
         )
 
-    def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
+    def fillna(self, value, limit, inplace: bool, downcast) -> Self:
         if limit is not None:
             # Do this validation even if we go through one of the no-op paths
             limit = libalgos.validate_limit(None, limit=limit)
@@ -368,13 +366,13 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
             "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
         )
 
-    def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T:
+    def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self:
         if copy is None:
             copy = True
 
         return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)
 
-    def convert(self: T, copy: bool | None) -> T:
+    def convert(self, copy: bool | None) -> Self:
         if copy is None:
             copy = True
 
@@ -397,10 +395,10 @@ def _convert(arr):
 
         return self.apply(_convert)
 
-    def replace_regex(self: T, **kwargs) -> T:
+    def replace_regex(self, **kwargs) -> Self:
         return self.apply_with_block("_replace_regex", **kwargs)
 
-    def replace(self: T, to_replace, value, inplace: bool) -> T:
+    def replace(self, to_replace, value, inplace: bool) -> Self:
         inplace = validate_bool_kwarg(inplace, "inplace")
         assert np.ndim(value) == 0, value
         # TODO "replace" is right now implemented on the blocks, we should move
@@ -410,12 +408,12 @@ def replace(self: T, to_replace, value, inplace: bool) -> T:
         )
 
     def replace_list(
-        self: T,
+        self,
         src_list: list[Any],
         dest_list: list[Any],
         inplace: bool = False,
         regex: bool = False,
-    ) -> T:
+    ) -> Self:
         """do a list replace"""
         inplace = validate_bool_kwarg(inplace, "inplace")
 
@@ -427,7 +425,7 @@ def replace_list(
             regex=regex,
         )
 
-    def to_native_types(self: T, **kwargs) -> T:
+    def to_native_types(self, **kwargs) -> Self:
         return self.apply(to_native_types, **kwargs)
 
     @property
@@ -453,7 +451,7 @@ def is_view(self) -> bool:
     def is_single_block(self) -> bool:
         return len(self.arrays) == 1
 
-    def _get_data_subset(self: T, predicate: Callable) -> T:
+    def _get_data_subset(self, predicate: Callable) -> Self:
         indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
         arrays = [self.arrays[i] for i in indices]
         # TODO copy?
@@ -464,7 +462,7 @@ def _get_data_subset(self: T, predicate: Callable) -> T:
         new_axes = [self._axes[0], new_cols]
         return type(self)(arrays, new_axes, verify_integrity=False)
 
-    def get_bool_data(self: T, copy: bool = False) -> T:
+    def get_bool_data(self, copy: bool = False) -> Self:
         """
         Select columns that are bool-dtype and object-dtype columns that are all-bool.
 
@@ -475,7 +473,7 @@ def get_bool_data(self: T, copy: bool = False) -> T:
         """
         return self._get_data_subset(lambda x: x.dtype == np.dtype(bool))
 
-    def get_numeric_data(self: T, copy: bool = False) -> T:
+    def get_numeric_data(self, copy: bool = False) -> Self:
         """
         Select columns that have a numeric dtype.
 
@@ -489,7 +487,7 @@ def get_numeric_data(self: T, copy: bool = False) -> T:
             or getattr(arr.dtype, "_is_numeric", False)
         )
 
-    def copy(self: T, deep: bool | Literal["all"] | None = True) -> T:
+    def copy(self, deep: bool | Literal["all"] | None = True) -> Self:
         """
         Make deep or shallow copy of ArrayManager
 
@@ -526,7 +524,7 @@ def copy_func(ax):
         return type(self)(new_arrays, new_axes, verify_integrity=False)
 
     def reindex_indexer(
-        self: T,
+        self,
         new_axis,
         indexer,
         axis: AxisInt,
@@ -537,7 +535,7 @@ def reindex_indexer(
         only_slice: bool = False,
         # ArrayManager specific keywords
         use_na_proxy: bool = False,
-    ) -> T:
+    ) -> Self:
         axis = self._normalize_axis(axis)
         return self._reindex_indexer(
             new_axis,
@@ -550,15 +548,15 @@ def reindex_indexer(
         )
 
     def _reindex_indexer(
-        self: T,
+        self,
         new_axis,
         indexer: npt.NDArray[np.intp] | None,
         axis: AxisInt,
         fill_value=None,
         allow_dups: bool = False,
         copy: bool | None = True,
         use_na_proxy: bool = False,
-    ) -> T:
+    ) -> Self:
         """
         Parameters
         ----------
@@ -629,12 +627,12 @@ def _reindex_indexer(
         return type(self)(new_arrays, new_axes, verify_integrity=False)
 
     def take(
-        self: T,
+        self,
         indexer: npt.NDArray[np.intp],
         axis: AxisInt = 1,
         verify: bool = True,
         convert_indices: bool = True,
-    ) -> T:
+    ) -> Self:
         """
         Take items along any axis.
         """
@@ -923,7 +921,7 @@ def idelete(self, indexer) -> ArrayManager:
     # --------------------------------------------------------------------
     # Array-wise Operation
 
-    def grouped_reduce(self: T, func: Callable) -> T:
+    def grouped_reduce(self, func: Callable) -> Self:
         """
         Apply grouped reduction function columnwise, returning a new ArrayManager.
 
@@ -962,7 +960,7 @@ def grouped_reduce(self: T, func: Callable) -> T:
         # expected "List[Union[ndarray, ExtensionArray]]"
         return type(self)(result_arrays, [index, columns])  # type: ignore[arg-type]
 
-    def reduce(self: T, func: Callable) -> T:
+    def reduce(self, func: Callable) -> Self:
         """
         Apply reduction function column-wise, returning a single-row ArrayManager.