From 47ec081f71f38edf0ed95c7460ab330c0707134a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 18 Aug 2021 16:10:38 -0700 Subject: [PATCH] REF: reorder_arrays handle missing --- pandas/core/frame.py | 18 ++++++++++++-- pandas/core/internals/construction.py | 36 +++++++++++++++++++++------ 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 823de2133f0b3..aa78f8f701840 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2019,6 +2019,8 @@ def from_records( 2 1 c 3 0 d """ + result_index = None + # Make a copy of the input columns so we can modify it if columns is not None: columns = ensure_index(columns) @@ -2060,8 +2062,21 @@ def from_records( arr_columns_list.append(k) arrays.append(v) + if len(arrays): + length = len(arrays[0]) + elif index is not None: + length = len(index) + else: + length = 0 + arr_columns = Index(arr_columns_list) - arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns) + if len(arrays) == 0 and index is None and length == 0: + # for backward compat use an object Index instead of RangeIndex + result_index = Index([]) + + arrays, arr_columns = reorder_arrays( + arrays, arr_columns, columns, length + ) elif isinstance(data, (np.ndarray, DataFrame)): arrays, columns = to_arrays(data, columns) @@ -2088,7 +2103,6 @@ def from_records( else: exclude = set(exclude) - result_index = None if index is not None: if isinstance(index, str) or not hasattr(index, "__iter__"): i = columns.get_loc(index) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7f3d246a6fda6..d3d2582099d88 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -180,14 +180,14 @@ def rec_array_to_mgr( # create the manager # error: Argument 1 to "reorder_arrays" has incompatible type "List[ndarray]"; - # expected "List[ExtensionArray]" + # expected "List[Union[ExtensionArray, ndarray]]" arrays, arr_columns = reorder_arrays( - new_arrays, arr_columns, columns # type: ignore[arg-type] + new_arrays, arr_columns, columns, len(index) # type: ignore[arg-type] ) if columns is None: columns = arr_columns - mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype=dtype, typ=typ) + mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype, typ=typ) if copy: mgr = mgr.copy() @@ -654,13 +654,33 @@ def _extract_index(data) -> Index: def reorder_arrays( - arrays: list[ArrayLike], arr_columns: Index, columns: Index | None + arrays: list[ArrayLike], arr_columns: Index, columns: Index | None, length: int ) -> tuple[list[ArrayLike], Index]: + """ + Pre-emptively (cheaply) reindex arrays with new columns. + """ # reorder according to the columns - if columns is not None and len(columns) and len(arr_columns): - indexer = ensure_index(arr_columns).get_indexer(columns) - arr_columns = ensure_index([arr_columns[i] for i in indexer]) - arrays = [arrays[i] for i in indexer] + if columns is not None: + if not columns.equals(arr_columns): + # if they are equal, there is nothing to do + new_arrays: list[ArrayLike | None] + new_arrays = [None] * len(columns) + indexer = arr_columns.get_indexer(columns) + for i, k in enumerate(indexer): + if k == -1: + # by convention default is all-NaN object dtype + arr = np.empty(length, dtype=object) + arr.fill(np.nan) + else: + arr = arrays[k] + new_arrays[i] = arr + + # Incompatible types in assignment (expression has type + # "List[Union[ExtensionArray, ndarray[Any, Any], None]]", variable + # has type "List[Union[ExtensionArray, ndarray[Any, Any]]]") + arrays = new_arrays # type: ignore[assignment] + arr_columns = columns + return arrays, arr_columns