REF: arrays_to_manager dont need names (#43110)

jbrockmendel · web-flow · commit e86bba67da5a · 2021-08-19T15:02:59.000-04:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -706,7 +706,6 @@ def __init__(
                         arrays,
                         columns,
                         index,
-                        columns,
                         dtype=dtype,
                         typ=manager,
                     )
@@ -752,9 +751,7 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(
-                    values, columns, index, columns, dtype=None, typ=manager
-                )
+                mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
             else:
                 arr2d = construct_2d_arraylike_from_scalar(
                     data,
@@ -2031,6 +2028,26 @@ def from_records(
         if columns is not None:
             columns = ensure_index(columns)
 
+        def maybe_reorder(
+            arrays: list[ArrayLike], arr_columns: Index, columns: Index, index
+        ) -> tuple[list[ArrayLike], Index, Index | None]:
+            """
+            If our desired 'columns' do not match the data's pre-existing 'arr_columns',
+            we re-order our arrays.  This is like a pre-emptive (cheap) reindex.
+            """
+            if len(arrays):
+                length = len(arrays[0])
+            else:
+                length = 0
+
+            result_index = None
+            if len(arrays) == 0 and index is None and length == 0:
+                # for backward compat use an object Index instead of RangeIndex
+                result_index = Index([])
+
+            arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length)
+            return arrays, arr_columns, result_index
+
         if is_iterator(data):
             if nrows == 0:
                 return cls()
@@ -2068,20 +2085,9 @@ def from_records(
                         arr_columns_list.append(k)
                         arrays.append(v)
 
-                if len(arrays):
-                    length = len(arrays[0])
-                elif index is not None:
-                    length = len(index)
-                else:
-                    length = 0
-
                 arr_columns = Index(arr_columns_list)
-                if len(arrays) == 0 and index is None and length == 0:
-                    # for backward compat use an object Index instead of RangeIndex
-                    result_index = Index([])
-
-                arrays, arr_columns = reorder_arrays(
-                    arrays, arr_columns, columns, length
+                arrays, arr_columns, result_index = maybe_reorder(
+                    arrays, arr_columns, columns, index
                 )
 
         elif isinstance(data, (np.ndarray, DataFrame)):
@@ -2103,6 +2109,10 @@ def from_records(
             arr_columns = ensure_index(arr_columns)
             if columns is None:
                 columns = arr_columns
+            else:
+                arrays, arr_columns, result_index = maybe_reorder(
+                    arrays, arr_columns, columns, index
+                )
 
         if exclude is None:
             exclude = set()
@@ -2136,7 +2146,7 @@ def from_records(
             columns = columns.drop(exclude)
 
         manager = get_option("mode.data_manager")
-        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns, typ=manager)
+        mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
 
         return cls(mgr)
 
@@ -2349,7 +2359,6 @@ def _from_arrays(
             arrays,
             columns,
             index,
-            columns,
             dtype=dtype,
             verify_integrity=verify_integrity,
             typ=manager,
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -99,9 +99,8 @@
 
 def arrays_to_mgr(
     arrays,
-    arr_names: Index,
+    columns: Index,
     index,
-    columns,
     *,
     dtype: DtypeObj | None = None,
     verify_integrity: bool = True,
@@ -133,7 +132,7 @@ def arrays_to_mgr(
 
     if typ == "block":
         return create_block_manager_from_arrays(
-            arrays, arr_names, axes, consolidate=consolidate
+            arrays, columns, axes, consolidate=consolidate
         )
     elif typ == "array":
         if len(columns) != len(arrays):
@@ -187,7 +186,7 @@ def rec_array_to_mgr(
     if columns is None:
         columns = arr_columns
 
-    mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype, typ=typ)
+    mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ)
 
     if copy:
         mgr = mgr.copy()
@@ -226,7 +225,7 @@ def mgr_to_mgr(mgr, typ: str, copy: bool = True):
         else:
             if mgr.ndim == 2:
                 new_mgr = arrays_to_mgr(
-                    mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], typ="block"
+                    mgr.arrays, mgr.axes[0], mgr.axes[1], typ="block"
                 )
             else:
                 new_mgr = SingleBlockManager.from_array(mgr.arrays[0], mgr.index)
@@ -288,7 +287,7 @@ def ndarray_to_mgr(
         else:
             columns = ensure_index(columns)
 
-        return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)
+        return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)
 
     elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
         # i.e. Datetime64TZ
@@ -409,7 +408,6 @@ def dict_to_mgr(
         from pandas.core.series import Series
 
         arrays = Series(data, index=columns, dtype=object)
-        data_names = arrays.index
         missing = arrays.isna()
         if index is None:
             # GH10856
@@ -433,11 +431,11 @@ def dict_to_mgr(
             arrays.loc[missing] = [val] * missing.sum()
 
         arrays = list(arrays)
-        data_names = ensure_index(columns)
+        columns = ensure_index(columns)
 
     else:
         keys = list(data.keys())
-        columns = data_names = Index(keys)
+        columns = Index(keys)
         arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
         # GH#24096 need copy to be deep for datetime64tz case
         # TODO: See if we can avoid these copies
@@ -457,9 +455,7 @@ def dict_to_mgr(
         ]
         # TODO: can we get rid of the dt64tz special case above?
 
-    return arrays_to_mgr(
-        arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy
-    )
+    return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
 
 
 def nested_data_to_arrays(