|
14 | 14 | Optional,
|
15 | 15 | Sequence,
|
16 | 16 | Tuple,
|
| 17 | + TypeVar, |
17 | 18 | Union,
|
18 | 19 | cast,
|
19 | 20 | overload,
|
|
63 | 64 | )
|
64 | 65 |
|
65 | 66 | if TYPE_CHECKING:
|
| 67 | + T_DSorDA = TypeVar("T_DSorDA", "DataArray", Dataset) |
| 68 | + |
66 | 69 | try:
|
67 | 70 | from dask.delayed import Delayed
|
68 | 71 | except ImportError:
|
@@ -3038,6 +3041,79 @@ def integrate(
|
3038 | 3041 | ds = self._to_temp_dataset().integrate(dim, datetime_unit)
|
3039 | 3042 | return self._from_temp_dataset(ds)
|
3040 | 3043 |
|
| 3044 | + def unify_chunks(self) -> "DataArray": |
| 3045 | + """ Unify chunk size along all chunked dimensions of this DataArray. |
| 3046 | +
|
| 3047 | + Returns |
| 3048 | + ------- |
| 3049 | +
|
| 3050 | + DataArray with consistent chunk sizes for all dask-array variables |
| 3051 | +
|
| 3052 | + See Also |
| 3053 | + -------- |
| 3054 | +
|
| 3055 | + dask.array.core.unify_chunks |
| 3056 | + """ |
| 3057 | + ds = self._to_temp_dataset().unify_chunks() |
| 3058 | + return self._from_temp_dataset(ds) |
| 3059 | + |
| 3060 | + def map_blocks( |
| 3061 | + self, |
| 3062 | + func: "Callable[..., T_DSorDA]", |
| 3063 | + args: Sequence[Any] = (), |
| 3064 | + kwargs: Mapping[str, Any] = None, |
| 3065 | + ) -> "T_DSorDA": |
| 3066 | + """ |
| 3067 | + Apply a function to each chunk of this DataArray. This method is experimental |
| 3068 | + and its signature may change. |
| 3069 | +
|
| 3070 | + Parameters |
| 3071 | + ---------- |
| 3072 | + func: callable |
| 3073 | + User-provided function that accepts a DataArray as its first parameter. The |
| 3074 | + function will receive a subset of this DataArray, corresponding to one chunk |
| 3075 | + along each chunked dimension. ``func`` will be executed as |
| 3076 | + ``func(obj_subset, *args, **kwargs)``. |
| 3077 | +
|
| 3078 | + The function will be first run on mocked-up data, that looks like this array |
| 3079 | + but has sizes 0, to determine properties of the returned object such as |
| 3080 | + dtype, variable names, new dimensions and new indexes (if any). |
| 3081 | +
|
| 3082 | + This function must return either a single DataArray or a single Dataset. |
| 3083 | +
|
| 3084 | + This function cannot change size of existing dimensions, or add new chunked |
| 3085 | + dimensions. |
| 3086 | + args: Sequence |
| 3087 | + Passed verbatim to func after unpacking, after the sliced DataArray. xarray |
| 3088 | + objects, if any, will not be split by chunks. Passing dask collections is |
| 3089 | + not allowed. |
| 3090 | + kwargs: Mapping |
| 3091 | + Passed verbatim to func after unpacking. xarray objects, if any, will not be |
| 3092 | + split by chunks. Passing dask collections is not allowed. |
| 3093 | +
|
| 3094 | + Returns |
| 3095 | + ------- |
| 3096 | + A single DataArray or Dataset with dask backend, reassembled from the outputs of |
| 3097 | + the function. |
| 3098 | +
|
| 3099 | + Notes |
| 3100 | + ----- |
| 3101 | + This method is designed for when one needs to manipulate a whole xarray object |
| 3102 | + within each chunk. In the more common case where one can work on numpy arrays, |
| 3103 | + it is recommended to use apply_ufunc. |
| 3104 | +
|
| 3105 | + If none of the variables in this DataArray is backed by dask, calling this |
| 3106 | + method is equivalent to calling ``func(self, *args, **kwargs)``. |
| 3107 | +
|
| 3108 | + See Also |
| 3109 | + -------- |
| 3110 | + dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks, |
| 3111 | + xarray.Dataset.map_blocks |
| 3112 | + """ |
| 3113 | + from .parallel import map_blocks |
| 3114 | + |
| 3115 | + return map_blocks(func, self, args, kwargs) |
| 3116 | + |
3041 | 3117 | # this needs to be at the end, or mypy will confuse with `str`
|
3042 | 3118 | # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
|
3043 | 3119 | str = property(StringAccessor)
|
|
0 commit comments