Skip to content

Commit 80b3e8d

Browse files
authored
REF: share recarray constructor code (#40129)
1 parent 95a86a9 commit 80b3e8d

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

pandas/core/frame.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,10 @@
178178
arrays_to_mgr,
179179
dataclasses_to_dicts,
180180
dict_to_mgr,
181-
masked_rec_array_to_mgr,
182181
mgr_to_mgr,
183182
ndarray_to_mgr,
184183
nested_data_to_arrays,
184+
rec_array_to_mgr,
185185
reorder_arrays,
186186
to_arrays,
187187
treat_as_nested,
@@ -580,7 +580,7 @@ def __init__(
580580

581581
# masked recarray
582582
if isinstance(data, mrecords.MaskedRecords):
583-
mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
583+
mgr = rec_array_to_mgr(data, index, columns, dtype, copy)
584584

585585
# a masked array
586586
else:
@@ -590,11 +590,7 @@ def __init__(
590590
elif isinstance(data, (np.ndarray, Series, Index)):
591591
if data.dtype.names:
592592
# i.e. numpy structured array
593-
data_columns = list(data.dtype.names)
594-
data = {k: data[k] for k in data_columns}
595-
if columns is None:
596-
columns = data_columns
597-
mgr = dict_to_mgr(data, index, columns, dtype=dtype)
593+
mgr = rec_array_to_mgr(data, index, columns, dtype, copy)
598594
elif getattr(data, "name", None) is not None:
599595
# i.e. Series/Index with non-None name
600596
mgr = dict_to_mgr({data.name: data}, index, columns, dtype=dtype)

pandas/core/internals/construction.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,12 @@ def arrays_to_mgr(
117117
return create_block_manager_from_arrays(arrays, arr_names, axes)
118118

119119

120-
def masked_rec_array_to_mgr(
121-
data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool
120+
def rec_array_to_mgr(
121+
data: Union[MaskedRecords, np.recarray, np.ndarray],
122+
index,
123+
columns,
124+
dtype: Optional[DtypeObj],
125+
copy: bool,
122126
):
123127
"""
124128
Extract from a masked rec array and create the manager.
@@ -136,16 +140,10 @@ def masked_rec_array_to_mgr(
136140
arrays, arr_columns = to_arrays(fdata, columns)
137141

138142
# fill if needed
139-
new_arrays = []
140-
for col in arr_columns:
141-
arr = data[col]
142-
fv = arr.fill_value
143-
144-
mask = ma.getmaskarray(arr)
145-
if mask.any():
146-
arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)
147-
arr[mask] = fv
148-
new_arrays.append(arr)
143+
if isinstance(data, np.ma.MaskedArray):
144+
new_arrays = fill_masked_arrays(data, arr_columns)
145+
else:
146+
new_arrays = arrays
149147

150148
# create the manager
151149
arrays, arr_columns = reorder_arrays(new_arrays, arr_columns, columns)
@@ -159,6 +157,24 @@ def masked_rec_array_to_mgr(
159157
return mgr
160158

161159

160+
def fill_masked_arrays(data: MaskedRecords, arr_columns: Index) -> List[np.ndarray]:
161+
"""
162+
Convert numpy MaskedRecords to ensure mask is softened.
163+
"""
164+
new_arrays = []
165+
166+
for col in arr_columns:
167+
arr = data[col]
168+
fv = arr.fill_value
169+
170+
mask = ma.getmaskarray(arr)
171+
if mask.any():
172+
arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)
173+
arr[mask] = fv
174+
new_arrays.append(arr)
175+
return new_arrays
176+
177+
162178
def mgr_to_mgr(mgr, typ: str):
163179
"""
164180
Convert to specific type of Manager. Does not copy if the type is already

pandas/tests/frame/test_constructors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ def test_constructor_rec(self, float_frame):
279279
tm.assert_index_equal(df2.columns, Index(rec.dtype.names))
280280
tm.assert_index_equal(df2.index, index)
281281

282+
# case with columns != the ones we would infer from the data
282283
rng = np.arange(len(rec))[::-1]
283284
df3 = DataFrame(rec, index=rng, columns=["C", "B"])
284285
expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"])

0 commit comments

Comments
 (0)