|
| 1 | +# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/computation/align.py |
| 2 | +""" |
| 3 | +Core eval alignment algorithms. |
| 4 | +""" |
| 5 | +from __future__ import annotations |
| 6 | + |
| 7 | +from functools import partial, wraps |
| 8 | +from typing import Callable, TYPE_CHECKING |
| 9 | +import warnings |
| 10 | + |
| 11 | +import bigframes_vendored.pandas.core.common as com |
| 12 | +from bigframes_vendored.pandas.core.computation.common import result_type_many |
| 13 | +from bigframes_vendored.pandas.util._exceptions import find_stack_level |
| 14 | +import numpy as np |
| 15 | +from pandas.errors import PerformanceWarning |
| 16 | + |
| 17 | +if TYPE_CHECKING: |
| 18 | + from collections.abc import Sequence |
| 19 | + |
| 20 | + from bigframes_vendored.pandas.core.generic import NDFrame |
| 21 | + from bigframes_vendored.pandas.core.indexes.base import Index |
| 22 | + from pandas._typing import F |
| 23 | + |
| 24 | + |
| 25 | +def _align_core_single_unary_op( |
| 26 | + term, |
| 27 | +) -> tuple[partial | type[NDFrame], dict[str, Index] | None]: |
| 28 | + typ: partial | type[NDFrame] |
| 29 | + axes: dict[str, Index] | None = None |
| 30 | + |
| 31 | + if isinstance(term.value, np.ndarray): |
| 32 | + typ = partial(np.asanyarray, dtype=term.value.dtype) |
| 33 | + else: |
| 34 | + typ = type(term.value) |
| 35 | + if hasattr(term.value, "axes"): |
| 36 | + axes = _zip_axes_from_type(typ, term.value.axes) |
| 37 | + |
| 38 | + return typ, axes |
| 39 | + |
| 40 | + |
| 41 | +def _zip_axes_from_type( |
| 42 | + typ: type[NDFrame], new_axes: Sequence[Index] |
| 43 | +) -> dict[str, Index]: |
| 44 | + return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} |
| 45 | + |
| 46 | + |
| 47 | +def _any_pandas_objects(terms) -> bool: |
| 48 | + """ |
| 49 | + Check a sequence of terms for instances of PandasObject. |
| 50 | + """ |
| 51 | + return any(is_pandas_object(term.value) for term in terms) |
| 52 | + |
| 53 | + |
| 54 | +def _filter_special_cases(f) -> Callable[[F], F]: |
| 55 | + @wraps(f) |
| 56 | + def wrapper(terms): |
| 57 | + # single unary operand |
| 58 | + if len(terms) == 1: |
| 59 | + return _align_core_single_unary_op(terms[0]) |
| 60 | + |
| 61 | + term_values = (term.value for term in terms) |
| 62 | + |
| 63 | + # we don't have any pandas objects |
| 64 | + if not _any_pandas_objects(terms): |
| 65 | + return result_type_many(*term_values), None |
| 66 | + |
| 67 | + return f(terms) |
| 68 | + |
| 69 | + return wrapper |
| 70 | + |
| 71 | + |
| 72 | +@_filter_special_cases |
| 73 | +def _align_core(terms): |
| 74 | + term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")] |
| 75 | + term_dims = [terms[i].value.ndim for i in term_index] |
| 76 | + |
| 77 | + from pandas import Series |
| 78 | + |
| 79 | + ndims = Series(dict(zip(term_index, term_dims))) |
| 80 | + |
| 81 | + # initial axes are the axes of the largest-axis'd term |
| 82 | + biggest = terms[ndims.idxmax()].value |
| 83 | + typ = biggest._constructor |
| 84 | + axes = biggest.axes |
| 85 | + naxes = len(axes) |
| 86 | + gt_than_one_axis = naxes > 1 |
| 87 | + |
| 88 | + for value in (terms[i].value for i in term_index): |
| 89 | + value_is_series = is_series(value) |
| 90 | + is_series_and_gt_one_axis = value_is_series and gt_than_one_axis |
| 91 | + |
| 92 | + for axis, items in enumerate(value.axes): |
| 93 | + if is_series_and_gt_one_axis: |
| 94 | + ax, itm = naxes - 1, value.index |
| 95 | + else: |
| 96 | + ax, itm = axis, items |
| 97 | + |
| 98 | + if not axes[ax].is_(itm): |
| 99 | + axes[ax] = axes[ax].join(itm, how="outer") |
| 100 | + |
| 101 | + for i, ndim in ndims.items(): |
| 102 | + for axis, items in zip(range(ndim), axes): |
| 103 | + ti = terms[i].value |
| 104 | + |
| 105 | + if hasattr(ti, "reindex"): |
| 106 | + transpose = value_is_series(ti) and naxes > 1 |
| 107 | + reindexer = axes[naxes - 1] if transpose else items |
| 108 | + |
| 109 | + term_axis_size = len(ti.axes[axis]) |
| 110 | + reindexer_size = len(reindexer) |
| 111 | + |
| 112 | + ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) |
| 113 | + if ordm >= 1 and reindexer_size >= 10000: |
| 114 | + w = ( |
| 115 | + f"Alignment difference on axis {axis} is larger " |
| 116 | + f"than an order of magnitude on term {repr(terms[i].name)}, " |
| 117 | + f"by more than {ordm:.4g}; performance may suffer." |
| 118 | + ) |
| 119 | + warnings.warn( |
| 120 | + w, category=PerformanceWarning, stacklevel=find_stack_level() |
| 121 | + ) |
| 122 | + |
| 123 | + obj = ti.reindex(reindexer, axis=axis, copy=False) |
| 124 | + terms[i].update(obj) |
| 125 | + |
| 126 | + terms[i].update(terms[i].value.values) |
| 127 | + |
| 128 | + return typ, _zip_axes_from_type(typ, axes) |
| 129 | + |
| 130 | + |
| 131 | +def align_terms(terms): |
| 132 | + """ |
| 133 | + Align a set of terms. |
| 134 | + """ |
| 135 | + try: |
| 136 | + # flatten the parse tree (a nested list, really) |
| 137 | + terms = list(com.flatten(terms)) |
| 138 | + except TypeError: |
| 139 | + # can't iterate so it must just be a constant or single variable |
| 140 | + if is_series_or_dataframe(terms.value): |
| 141 | + typ = type(terms.value) |
| 142 | + return typ, _zip_axes_from_type(typ, terms.value.axes) |
| 143 | + return np.result_type(terms.type), None |
| 144 | + |
| 145 | + # if all resolved variables are numeric scalars |
| 146 | + if all(term.is_scalar for term in terms): |
| 147 | + return result_type_many(*(term.value for term in terms)).type, None |
| 148 | + |
| 149 | + # perform the main alignment |
| 150 | + typ, axes = _align_core(terms) |
| 151 | + return typ, axes |
| 152 | + |
| 153 | + |
| 154 | +def reconstruct_object(typ, obj, axes, dtype): |
| 155 | + """ |
| 156 | + Reconstruct an object given its type, raw value, and possibly empty |
| 157 | + (None) axes. |
| 158 | +
|
| 159 | + Parameters |
| 160 | + ---------- |
| 161 | + typ : object |
| 162 | + A type |
| 163 | + obj : object |
| 164 | + The value to use in the type constructor |
| 165 | + axes : dict |
| 166 | + The axes to use to construct the resulting pandas object |
| 167 | +
|
| 168 | + Returns |
| 169 | + ------- |
| 170 | + ret : typ |
| 171 | + An object of type ``typ`` with the value `obj` and possible axes |
| 172 | + `axes`. |
| 173 | + """ |
| 174 | + try: |
| 175 | + typ = typ.type |
| 176 | + except AttributeError: |
| 177 | + pass |
| 178 | + |
| 179 | + res_t = np.result_type(obj.dtype, dtype) |
| 180 | + |
| 181 | + if not isinstance(typ, partial) and is_pandas_type(typ): |
| 182 | + return typ(obj, dtype=res_t, **axes) |
| 183 | + |
| 184 | + # special case for pathological things like ~True/~False |
| 185 | + if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_: |
| 186 | + ret_value = res_t.type(obj) |
| 187 | + else: |
| 188 | + ret_value = typ(obj).astype(res_t) |
| 189 | + # The condition is to distinguish 0-dim array (returned in case of |
| 190 | + # scalar) and 1 element array |
| 191 | + # e.g. np.array(0) and np.array([0]) |
| 192 | + if ( |
| 193 | + len(obj.shape) == 1 |
| 194 | + and len(obj) == 1 |
| 195 | + and not isinstance(ret_value, np.ndarray) |
| 196 | + ): |
| 197 | + ret_value = np.array([ret_value]).astype(res_t) |
| 198 | + |
| 199 | + return ret_value |
| 200 | + |
| 201 | + |
| 202 | +# Custom to recognize BigFrames types |
| 203 | +def is_series(obj) -> bool: |
| 204 | + from bigframes_vendored.pandas.core.series import Series |
| 205 | + |
| 206 | + return isinstance(obj, Series) |
| 207 | + |
| 208 | + |
| 209 | +def is_series_or_dataframe(obj) -> bool: |
| 210 | + from bigframes_vendored.pandas.core.frame import NDFrame |
| 211 | + |
| 212 | + return isinstance(obj, NDFrame) |
| 213 | + |
| 214 | + |
| 215 | +def is_pandas_object(obj) -> bool: |
| 216 | + from bigframes_vendored.pandas.core.frame import NDFrame |
| 217 | + from bigframes_vendored.pandas.core.indexes.base import Index |
| 218 | + |
| 219 | + return isinstance(obj, NDFrame) or isinstance(obj, Index) |
| 220 | + |
| 221 | + |
| 222 | +def is_pandas_type(type) -> bool: |
| 223 | + from bigframes_vendored.pandas.core.frame import NDFrame |
| 224 | + from bigframes_vendored.pandas.core.indexes.base import Index |
| 225 | + |
| 226 | + return issubclass(type, NDFrame) or issubclass(type, Index) |
0 commit comments