Skip to content

Commit 20f92c6

Browse files
committed
move lazy indexing adapter up out of chunkmanager code
1 parent 6a7a043 commit 20f92c6

File tree

3 files changed

+36
-44
lines changed

3 files changed

+36
-44
lines changed

xarray/core/daskmanager.py

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
import numpy as np
55

6-
from xarray.core import utils
76
from xarray.core.duck_array_ops import dask_available
7+
from xarray.core.indexing import ImplicitToExplicitIndexingAdapter
88
from xarray.core.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray, T_Chunks
99
from xarray.core.pycompat import is_duck_dask_array
1010

@@ -32,48 +32,15 @@ def chunks(self, data: "DaskArray") -> T_Chunks:
3232
def from_array(self, data, chunks, **kwargs) -> "DaskArray":
3333
import dask.array as da
3434

35-
from xarray.core import indexing
36-
37-
# dask-specific kwargs
38-
name = kwargs.pop("name", None)
39-
lock = kwargs.pop("lock", False)
40-
inline_array = kwargs.pop("inline_array", False)
41-
42-
if is_duck_dask_array(data):
43-
data = self.rechunk(data, chunks)
44-
else:
45-
# TODO move this up to variable.chunk
46-
if isinstance(data, indexing.ExplicitlyIndexed):
47-
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
48-
# that can't handle general array indexing. For example, in netCDF4 you
49-
# can do "outer" indexing along two dimensions independent, which works
50-
# differently from how NumPy handles it.
51-
# da.from_array works by using lazy indexing with a tuple of slices.
52-
# Using OuterIndexer is a pragmatic choice: dask does not yet handle
53-
# different indexing types in an explicit way:
54-
# https://github.com/dask/dask/issues/2883
55-
data = indexing.ImplicitToExplicitIndexingAdapter(
56-
data, indexing.OuterIndexer
57-
)
58-
59-
# All of our lazily loaded backend array classes should use NumPy
60-
# array operations.
61-
dask_kwargs = {"meta": np.ndarray}
62-
else:
63-
dask_kwargs = {}
64-
65-
if utils.is_dict_like(chunks):
66-
chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))
67-
68-
data = da.from_array(
69-
data,
70-
chunks,
71-
name=name,
72-
lock=lock,
73-
inline_array=inline_array,
74-
**dask_kwargs,
75-
)
76-
return data
35+
if isinstance(data, ImplicitToExplicitIndexingAdapter):
36+
# lazily loaded backend array classes should use NumPy array operations.
37+
kwargs["meta"] = np.ndarray
38+
39+
return da.from_array(
40+
data,
41+
chunks,
42+
**kwargs,
43+
)
7744

7845
def compute(self, *data: "DaskArray", **kwargs) -> np.ndarray:
7946
from dask.array import compute

xarray/core/parallelcompat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def chunks(self, data: T_ChunkedArray) -> T_Chunks:
167167
def from_array(
168168
self, data: np.ndarray, chunks: T_Chunks, **kwargs
169169
) -> T_ChunkedArray:
170+
"""Called when .chunk is called on an xarray object that is not already chunked."""
170171
...
171172

172173
def rechunk(

xarray/core/variable.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,7 +1241,31 @@ def chunk(
12411241
inline_array=inline_array,
12421242
)
12431243

1244-
data = chunkmanager.from_array(self._data, chunks, **_from_array_kwargs)
1244+
data = self._data
1245+
if chunkmanager.is_chunked_array(data):
1246+
data = chunkmanager.rechunk(data, chunks)
1247+
else:
1248+
if isinstance(data, indexing.ExplicitlyIndexed):
1249+
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
1250+
# that can't handle general array indexing. For example, in netCDF4 you
1251+
# can do "outer" indexing along two dimensions independent, which works
1252+
# differently from how NumPy handles it.
1253+
# da.from_array works by using lazy indexing with a tuple of slices.
1254+
# Using OuterIndexer is a pragmatic choice: dask does not yet handle
1255+
# different indexing types in an explicit way:
1256+
# https://github.com/dask/dask/issues/2883
1257+
data = indexing.ImplicitToExplicitIndexingAdapter(
1258+
data, indexing.OuterIndexer
1259+
)
1260+
1261+
if utils.is_dict_like(chunks):
1262+
chunks = tuple(chunks.get(n, s) for n, s in enumerate(data.shape))
1263+
1264+
data = chunkmanager.from_array(
1265+
data,
1266+
chunks,
1267+
**_from_array_kwargs,
1268+
)
12451269

12461270
return self._replace(data=data)
12471271

0 commit comments

Comments
 (0)