Skip to content

Commit e86bba6

Browse files
authored
REF: arrays_to_manager dont need names (#43110)
1 parent 1581f47 commit e86bba6

File tree

2 files changed

+36
-31
lines changed

2 files changed

+36
-31
lines changed

pandas/core/frame.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,6 @@ def __init__(
706706
arrays,
707707
columns,
708708
index,
709-
columns,
710709
dtype=dtype,
711710
typ=manager,
712711
)
@@ -752,9 +751,7 @@ def __init__(
752751
construct_1d_arraylike_from_scalar(data, len(index), dtype)
753752
for _ in range(len(columns))
754753
]
755-
mgr = arrays_to_mgr(
756-
values, columns, index, columns, dtype=None, typ=manager
757-
)
754+
mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
758755
else:
759756
arr2d = construct_2d_arraylike_from_scalar(
760757
data,
@@ -2031,6 +2028,26 @@ def from_records(
20312028
if columns is not None:
20322029
columns = ensure_index(columns)
20332030

2031+
def maybe_reorder(
2032+
arrays: list[ArrayLike], arr_columns: Index, columns: Index, index
2033+
) -> tuple[list[ArrayLike], Index, Index | None]:
2034+
"""
2035+
If our desired 'columns' do not match the data's pre-existing 'arr_columns',
2036+
we re-order our arrays. This is like a pre-emptive (cheap) reindex.
2037+
"""
2038+
if len(arrays):
2039+
length = len(arrays[0])
2040+
else:
2041+
length = 0
2042+
2043+
result_index = None
2044+
if len(arrays) == 0 and index is None and length == 0:
2045+
# for backward compat use an object Index instead of RangeIndex
2046+
result_index = Index([])
2047+
2048+
arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length)
2049+
return arrays, arr_columns, result_index
2050+
20342051
if is_iterator(data):
20352052
if nrows == 0:
20362053
return cls()
@@ -2068,20 +2085,9 @@ def from_records(
20682085
arr_columns_list.append(k)
20692086
arrays.append(v)
20702087

2071-
if len(arrays):
2072-
length = len(arrays[0])
2073-
elif index is not None:
2074-
length = len(index)
2075-
else:
2076-
length = 0
2077-
20782088
arr_columns = Index(arr_columns_list)
2079-
if len(arrays) == 0 and index is None and length == 0:
2080-
# for backward compat use an object Index instead of RangeIndex
2081-
result_index = Index([])
2082-
2083-
arrays, arr_columns = reorder_arrays(
2084-
arrays, arr_columns, columns, length
2089+
arrays, arr_columns, result_index = maybe_reorder(
2090+
arrays, arr_columns, columns, index
20852091
)
20862092

20872093
elif isinstance(data, (np.ndarray, DataFrame)):
@@ -2103,6 +2109,10 @@ def from_records(
21032109
arr_columns = ensure_index(arr_columns)
21042110
if columns is None:
21052111
columns = arr_columns
2112+
else:
2113+
arrays, arr_columns, result_index = maybe_reorder(
2114+
arrays, arr_columns, columns, index
2115+
)
21062116

21072117
if exclude is None:
21082118
exclude = set()
@@ -2136,7 +2146,7 @@ def from_records(
21362146
columns = columns.drop(exclude)
21372147

21382148
manager = get_option("mode.data_manager")
2139-
mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns, typ=manager)
2149+
mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
21402150

21412151
return cls(mgr)
21422152

@@ -2349,7 +2359,6 @@ def _from_arrays(
23492359
arrays,
23502360
columns,
23512361
index,
2352-
columns,
23532362
dtype=dtype,
23542363
verify_integrity=verify_integrity,
23552364
typ=manager,

pandas/core/internals/construction.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,8 @@
9999

100100
def arrays_to_mgr(
101101
arrays,
102-
arr_names: Index,
102+
columns: Index,
103103
index,
104-
columns,
105104
*,
106105
dtype: DtypeObj | None = None,
107106
verify_integrity: bool = True,
@@ -133,7 +132,7 @@ def arrays_to_mgr(
133132

134133
if typ == "block":
135134
return create_block_manager_from_arrays(
136-
arrays, arr_names, axes, consolidate=consolidate
135+
arrays, columns, axes, consolidate=consolidate
137136
)
138137
elif typ == "array":
139138
if len(columns) != len(arrays):
@@ -187,7 +186,7 @@ def rec_array_to_mgr(
187186
if columns is None:
188187
columns = arr_columns
189188

190-
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype, typ=typ)
189+
mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ)
191190

192191
if copy:
193192
mgr = mgr.copy()
@@ -226,7 +225,7 @@ def mgr_to_mgr(mgr, typ: str, copy: bool = True):
226225
else:
227226
if mgr.ndim == 2:
228227
new_mgr = arrays_to_mgr(
229-
mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], typ="block"
228+
mgr.arrays, mgr.axes[0], mgr.axes[1], typ="block"
230229
)
231230
else:
232231
new_mgr = SingleBlockManager.from_array(mgr.arrays[0], mgr.index)
@@ -288,7 +287,7 @@ def ndarray_to_mgr(
288287
else:
289288
columns = ensure_index(columns)
290289

291-
return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)
290+
return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)
292291

293292
elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
294293
# i.e. Datetime64TZ
@@ -409,7 +408,6 @@ def dict_to_mgr(
409408
from pandas.core.series import Series
410409

411410
arrays = Series(data, index=columns, dtype=object)
412-
data_names = arrays.index
413411
missing = arrays.isna()
414412
if index is None:
415413
# GH10856
@@ -433,11 +431,11 @@ def dict_to_mgr(
433431
arrays.loc[missing] = [val] * missing.sum()
434432

435433
arrays = list(arrays)
436-
data_names = ensure_index(columns)
434+
columns = ensure_index(columns)
437435

438436
else:
439437
keys = list(data.keys())
440-
columns = data_names = Index(keys)
438+
columns = Index(keys)
441439
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
442440
# GH#24096 need copy to be deep for datetime64tz case
443441
# TODO: See if we can avoid these copies
@@ -457,9 +455,7 @@ def dict_to_mgr(
457455
]
458456
# TODO: can we get rid of the dt64tz special case above?
459457

460-
return arrays_to_mgr(
461-
arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy
462-
)
458+
return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
463459

464460

465461
def nested_data_to_arrays(

0 commit comments

Comments
 (0)