diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index 171ba3bf55f..39e04d04d47 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -14,7 +14,6 @@ conda uninstall -y --force \ fsspec \ zarr \ cftime \ - rasterio \ packaging \ pint \ bottleneck \ @@ -39,7 +38,6 @@ python -m pip install \ git+https://github.com/dask/distributed \ git+https://github.com/zarr-developers/zarr \ git+https://github.com/Unidata/cftime \ - git+https://github.com/rasterio/rasterio \ git+https://github.com/pypa/packaging \ git+https://github.com/hgrecco/pint \ git+https://github.com/pydata/bottleneck \ diff --git a/doc/api.rst b/doc/api.rst index 0d56fc73997..34d867cde65 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -595,7 +595,6 @@ Dataset methods load_dataset open_dataset open_mfdataset - open_rasterio open_zarr save_mfdataset Dataset.as_numpy @@ -1054,7 +1053,6 @@ Tutorial :toctree: generated/ tutorial.open_dataset - tutorial.open_rasterio tutorial.load_dataset Testing diff --git a/doc/conf.py b/doc/conf.py index 0b6c6766c3b..eb861004e2f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -49,21 +49,12 @@ matplotlib.use("Agg") -try: - import rasterio # noqa: F401 -except ImportError: - allowed_failures.update( - ["gallery/plot_rasterio_rgb.py", "gallery/plot_rasterio.py"] - ) - try: import cartopy # noqa: F401 except ImportError: allowed_failures.update( [ "gallery/plot_cartopy_facetgrid.py", - "gallery/plot_rasterio_rgb.py", - "gallery/plot_rasterio.py", ] ) @@ -331,7 +322,6 @@ "matplotlib": ("https://matplotlib.org/stable/", None), "dask": ("https://docs.dask.org/en/latest", None), "cftime": ("https://unidata.github.io/cftime", None), - "rasterio": ("https://rasterio.readthedocs.io/en/latest", None), "sparse": ("https://sparse.pydata.org/en/latest/", None), } diff --git a/doc/gallery/plot_rasterio.py b/doc/gallery/plot_rasterio.py deleted file mode 100644 index 853923a38bd..00000000000 --- a/doc/gallery/plot_rasterio.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -.. _recipes.rasterio: - -================================= -Parsing rasterio's geocoordinates -================================= - - -Converting a projection's cartesian coordinates into 2D longitudes and -latitudes. - -These new coordinates might be handy for plotting and indexing, but it should -be kept in mind that a grid which is regular in projection coordinates will -likely be irregular in lon/lat. It is often recommended to work in the data's -original map projection (see :ref:`recipes.rasterio_rgb`). -""" - -import cartopy.crs as ccrs -import matplotlib.pyplot as plt -import numpy as np -from pyproj import Transformer - -import xarray as xr - -# Read the data -url = "https://github.com/rasterio/rasterio/raw/master/tests/data/RGB.byte.tif" -da = xr.open_rasterio(url) - -# Compute the lon/lat coordinates with pyproj -transformer = Transformer.from_crs(da.crs, "EPSG:4326", always_xy=True) -lon, lat = transformer.transform(*np.meshgrid(da["x"], da["y"])) -da.coords["lon"] = (("y", "x"), lon) -da.coords["lat"] = (("y", "x"), lat) - -# Compute a greyscale out of the rgb image -greyscale = da.mean(dim="band") - -# Plot on a map -ax = plt.subplot(projection=ccrs.PlateCarree()) -greyscale.plot( - ax=ax, - x="lon", - y="lat", - transform=ccrs.PlateCarree(), - cmap="Greys_r", - add_colorbar=False, -) -ax.coastlines("10m", color="r") -plt.show() diff --git a/doc/gallery/plot_rasterio_rgb.py b/doc/gallery/plot_rasterio_rgb.py deleted file mode 100644 index 912224ac132..00000000000 --- a/doc/gallery/plot_rasterio_rgb.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -.. _recipes.rasterio_rgb: - -============================ -imshow() and map projections -============================ - -Using rasterio's projection information for more accurate plots. - -This example extends :ref:`recipes.rasterio` and plots the image in the -original map projection instead of relying on pcolormesh and a map -transformation. -""" - -import cartopy.crs as ccrs -import matplotlib.pyplot as plt - -import xarray as xr - -# Read the data -url = "https://github.com/rasterio/rasterio/raw/master/tests/data/RGB.byte.tif" -da = xr.open_rasterio(url) - -# The data is in UTM projection. We have to set it manually until -# https://github.com/SciTools/cartopy/issues/813 is implemented -crs = ccrs.UTM("18N") - -# Plot on a map -ax = plt.subplot(projection=crs) -da.plot.imshow(ax=ax, rgb="band", transform=crs) -ax.coastlines("10m", color="r") -plt.show() diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 6b3283adcbd..6fa068457c9 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -41,8 +41,6 @@ For netCDF and IO - `PseudoNetCDF `__: recommended for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files (ffi1001) and many other. -- `rasterio `__: for reading GeoTiffs and - other gridded raster datasets. - `iris `__: for conversion to and from iris' Cube objects diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index d5de181f562..baeb3ee3c97 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -1157,46 +1157,7 @@ search indices or other automated data discovery tools. Rasterio -------- -GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if -rasterio is installed. Here is an example of how to use -:py:func:`open_rasterio` to read one of rasterio's `test files`_: - -.. deprecated:: 0.20.0 - - Deprecated in favor of rioxarray. - For information about transitioning, see: - `rioxarray getting started docs`` - -.. ipython:: - :verbatim: - - In [7]: rio = xr.open_rasterio("RGB.byte.tif") - - In [8]: rio - Out[8]: - - [1703814 values with dtype=uint8] - Coordinates: - * band (band) int64 1 2 3 - * y (y) float64 2.827e+06 2.826e+06 2.826e+06 2.826e+06 2.826e+06 ... - * x (x) float64 1.021e+05 1.024e+05 1.027e+05 1.03e+05 1.033e+05 ... - Attributes: - res: (300.0379266750948, 300.041782729805) - transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 28... - is_tiled: 0 - crs: +init=epsg:32618 - - -The ``x`` and ``y`` coordinates are generated out of the file's metadata -(``bounds``, ``width``, ``height``), and they can be understood as cartesian -coordinates defined in the file's projection provided by the ``crs`` attribute. -``crs`` is a PROJ4 string which can be parsed by e.g. `pyproj`_ or rasterio. -See :ref:`/examples/visualization_gallery.ipynb#Parsing-rasterio-geocoordinates` -for an example of how to convert these to longitudes and latitudes. - - -Additionally, you can use `rioxarray`_ for reading in GeoTiff, netCDF or other -GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIFF. +GDAL readable raster data using `rasterio`_ such as GeoTIFFs can be opened using the `rioxarray`_ extension. `rioxarray`_ can also handle geospatial related tasks such as re-projecting and clipping. .. ipython:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1bd777d0bc7..d07667ca101 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,7 +26,8 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ - +- Remove deprecated rasterio backend in favor of rioxarray (:pull:`7392`). + By `Scott Henderson `_. Deprecations ~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 409ce5bf2c6..88b34d002d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,6 @@ module = [ "PseudoNetCDF.*", "pydap.*", "pytest.*", - "rasterio.*", "scipy.*", "seaborn.*", "setuptools", diff --git a/setup.cfg b/setup.cfg index 5d5cf161195..c0dd5ff9595 100644 --- a/setup.cfg +++ b/setup.cfg @@ -88,7 +88,6 @@ io = zarr fsspec cftime - rasterio pooch ## Scitools packages & dependencies (e.g: cartopy, cf-units) can be hard to install # scitools-iris diff --git a/xarray/__init__.py b/xarray/__init__.py index d064502c20b..75a58053663 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -7,7 +7,6 @@ open_mfdataset, save_mfdataset, ) -from xarray.backends.rasterio_ import open_rasterio from xarray.backends.zarr import open_zarr from xarray.coding.cftime_offsets import cftime_range, date_range, date_range_like from xarray.coding.cftimeindex import CFTimeIndex @@ -85,7 +84,6 @@ "open_dataarray", "open_dataset", "open_mfdataset", - "open_rasterio", "open_zarr", "polyval", "register_dataarray_accessor", diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py deleted file mode 100644 index 15006dee5f1..00000000000 --- a/xarray/backends/rasterio_.py +++ /dev/null @@ -1,383 +0,0 @@ -from __future__ import annotations - -import os -import warnings - -import numpy as np - -from xarray.backends.common import BackendArray -from xarray.backends.file_manager import CachingFileManager -from xarray.backends.locks import SerializableLock -from xarray.core import indexing -from xarray.core.dataarray import DataArray -from xarray.core.utils import is_scalar - -# TODO: should this be GDAL_LOCK instead? -RASTERIO_LOCK = SerializableLock() - -_ERROR_MSG = ( - "The kind of indexing operation you are trying to do is not " - "valid on rasterio files. Try to load your data with ds.load()" - "first." -) - - -class RasterioArrayWrapper(BackendArray): - """A wrapper around rasterio dataset objects""" - - def __init__(self, manager, lock, vrt_params=None): - from rasterio.vrt import WarpedVRT - - self.manager = manager - self.lock = lock - - # cannot save riods as an attribute: this would break pickleability - riods = manager.acquire() - if vrt_params is not None: - riods = WarpedVRT(riods, **vrt_params) - self.vrt_params = vrt_params - self._shape = (riods.count, riods.height, riods.width) - - dtypes = riods.dtypes - if not np.all(np.asarray(dtypes) == dtypes[0]): - raise ValueError("All bands should have the same dtype") - self._dtype = np.dtype(dtypes[0]) - - @property - def dtype(self): - return self._dtype - - @property - def shape(self) -> tuple[int, ...]: - return self._shape - - def _get_indexer(self, key): - """Get indexer for rasterio array. - - Parameters - ---------- - key : tuple of int - - Returns - ------- - band_key: an indexer for the 1st dimension - window: two tuples. Each consists of (start, stop). - squeeze_axis: axes to be squeezed - np_ind: indexer for loaded numpy array - - See Also - -------- - indexing.decompose_indexer - """ - assert len(key) == 3, "rasterio datasets should always be 3D" - - # bands cannot be windowed but they can be listed - band_key = key[0] - np_inds = [] - # bands (axis=0) cannot be windowed but they can be listed - if isinstance(band_key, slice): - start, stop, step = band_key.indices(self.shape[0]) - band_key = np.arange(start, stop, step) - # be sure we give out a list - band_key = (np.asarray(band_key) + 1).tolist() - if isinstance(band_key, list): # if band_key is not a scalar - np_inds.append(slice(None)) - - # but other dims can only be windowed - window = [] - squeeze_axis = [] - for i, (k, n) in enumerate(zip(key[1:], self.shape[1:])): - if isinstance(k, slice): - # step is always positive. see indexing.decompose_indexer - start, stop, step = k.indices(n) - np_inds.append(slice(None, None, step)) - elif is_scalar(k): - # windowed operations will always return an array - # we will have to squeeze it later - squeeze_axis.append(-(2 - i)) - start = k - stop = k + 1 - else: - start, stop = np.min(k), np.max(k) + 1 - np_inds.append(k - start) - window.append((start, stop)) - - if isinstance(key[1], np.ndarray) and isinstance(key[2], np.ndarray): - # do outer-style indexing - np_inds[-2:] = np.ix_(*np_inds[-2:]) - - return band_key, tuple(window), tuple(squeeze_axis), tuple(np_inds) - - def _getitem(self, key): - from rasterio.vrt import WarpedVRT - - band_key, window, squeeze_axis, np_inds = self._get_indexer(key) - - if not band_key or any(start == stop for (start, stop) in window): - # no need to do IO - shape = (len(band_key),) + tuple(stop - start for (start, stop) in window) - out = np.zeros(shape, dtype=self.dtype) - else: - with self.lock: - riods = self.manager.acquire(needs_lock=False) - if self.vrt_params is not None: - riods = WarpedVRT(riods, **self.vrt_params) - out = riods.read(band_key, window=window) - - if squeeze_axis: - out = np.squeeze(out, axis=squeeze_axis) - return out[np_inds] - - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem - ) - - -def _parse_envi(meta): - """Parse ENVI metadata into Python data structures. - - See the link for information on the ENVI header file format: - http://www.harrisgeospatial.com/docs/enviheaderfiles.html - - Parameters - ---------- - meta : dict - Dictionary of keys and str values to parse, as returned by the rasterio - tags(ns='ENVI') call. - - Returns - ------- - parsed_meta : dict - Dictionary containing the original keys and the parsed values - - """ - - def parsevec(s): - return np.fromstring(s.strip("{}"), dtype="float", sep=",") - - def default(s): - return s.strip("{}") - - parse = {"wavelength": parsevec, "fwhm": parsevec} - parsed_meta = {k: parse.get(k, default)(v) for k, v in meta.items()} - return parsed_meta - - -def open_rasterio( - filename, - parse_coordinates=None, - chunks=None, - cache=None, - lock=None, - **kwargs, -): - """Open a file with rasterio. - - .. deprecated:: 0.20.0 - - Deprecated in favor of rioxarray. - For information about transitioning, see: - https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html - - This should work with any file that rasterio can open (most often: - geoTIFF). The x and y coordinates are generated automatically from the - file's geoinformation, shifted to the center of each pixel (see - `"PixelIsArea" Raster Space - `_ - for more information). - - Parameters - ---------- - filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT - Path to the file to open. Or already open rasterio dataset. - parse_coordinates : bool, optional - Whether to parse the x and y coordinates out of the file's - ``transform`` attribute or not. The default is to automatically - parse the coordinates only if they are rectilinear (1D). - It can be useful to set ``parse_coordinates=False`` - if your files are very large or if you don't need the coordinates. - chunks : int, tuple or dict, optional - Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or - ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new - DataArray into a dask array. - cache : bool, optional - If True, cache data loaded from the underlying datastore in memory as - NumPy arrays when accessed to avoid reading from the underlying data- - store multiple times. Defaults to True unless you specify the `chunks` - argument to use dask, in which case it defaults to False. - lock : False, True or threading.Lock, optional - If chunks is provided, this argument is passed on to - :py:func:`dask.array.from_array`. By default, a global lock is - used to avoid issues with concurrent access to the same file when using - dask's multithreaded backend. - - Returns - ------- - data : DataArray - The newly created DataArray. - """ - warnings.warn( - "open_rasterio is Deprecated in favor of rioxarray. " - "For information about transitioning, see: " - "https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html", - DeprecationWarning, - stacklevel=2, - ) - import rasterio - from rasterio.vrt import WarpedVRT - - vrt_params = None - if isinstance(filename, rasterio.io.DatasetReader): - filename = filename.name - elif isinstance(filename, rasterio.vrt.WarpedVRT): - vrt = filename - filename = vrt.src_dataset.name - vrt_params = dict( - src_crs=vrt.src_crs.to_string(), - crs=vrt.crs.to_string(), - resampling=vrt.resampling, - tolerance=vrt.tolerance, - src_nodata=vrt.src_nodata, - nodata=vrt.nodata, - width=vrt.width, - height=vrt.height, - src_transform=vrt.src_transform, - transform=vrt.transform, - dtype=vrt.working_dtype, - warp_extras=vrt.warp_extras, - ) - - if lock is None: - lock = RASTERIO_LOCK - - manager = CachingFileManager( - rasterio.open, - filename, - lock=lock, - mode="r", - kwargs=kwargs, - ) - riods = manager.acquire() - if vrt_params is not None: - riods = WarpedVRT(riods, **vrt_params) - - if cache is None: - cache = chunks is None - - coords = {} - - # Get bands - if riods.count < 1: - raise ValueError("Unknown dims") - coords["band"] = np.asarray(riods.indexes) - - # Get coordinates - if riods.transform.is_rectilinear: - # 1d coordinates - parse = True if parse_coordinates is None else parse_coordinates - if parse: - nx, ny = riods.width, riods.height - # xarray coordinates are pixel centered - x, _ = riods.transform * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) - _, y = riods.transform * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) - coords["y"] = y - coords["x"] = x - else: - # 2d coordinates - parse = False if (parse_coordinates is None) else parse_coordinates - if parse: - warnings.warn( - "The file coordinates' transformation isn't " - "rectilinear: xarray won't parse the coordinates " - "in this case. Set `parse_coordinates=False` to " - "suppress this warning.", - RuntimeWarning, - stacklevel=3, - ) - - # Attributes - attrs = {} - # Affine transformation matrix (always available) - # This describes coefficients mapping pixel coordinates to CRS - # For serialization store as tuple of 6 floats, the last row being - # always (0, 0, 1) per definition (see - # https://github.com/sgillies/affine) - attrs["transform"] = tuple(riods.transform)[:6] - if hasattr(riods, "crs") and riods.crs: - # CRS is a dict-like object specific to rasterio - # If CRS is not None, we convert it back to a PROJ4 string using - # rasterio itself - try: - attrs["crs"] = riods.crs.to_proj4() - except AttributeError: - attrs["crs"] = riods.crs.to_string() - if hasattr(riods, "res"): - # (width, height) tuple of pixels in units of CRS - attrs["res"] = riods.res - if hasattr(riods, "is_tiled"): - # Is the TIF tiled? (bool) - # We cast it to an int for netCDF compatibility - attrs["is_tiled"] = np.uint8(riods.is_tiled) - if hasattr(riods, "nodatavals"): - # The nodata values for the raster bands - attrs["nodatavals"] = tuple( - np.nan if nodataval is None else nodataval for nodataval in riods.nodatavals - ) - if hasattr(riods, "scales"): - # The scale values for the raster bands - attrs["scales"] = riods.scales - if hasattr(riods, "offsets"): - # The offset values for the raster bands - attrs["offsets"] = riods.offsets - if hasattr(riods, "descriptions") and any(riods.descriptions): - # Descriptions for each dataset band - attrs["descriptions"] = riods.descriptions - if hasattr(riods, "units") and any(riods.units): - # A list of units string for each dataset band - attrs["units"] = riods.units - - # Parse extra metadata from tags, if supported - parsers = {"ENVI": _parse_envi, "GTiff": lambda m: m} - - driver = riods.driver - if driver in parsers: - if driver == "GTiff": - meta = parsers[driver](riods.tags()) - else: - meta = parsers[driver](riods.tags(ns=driver)) - - for k, v in meta.items(): - # Add values as coordinates if they match the band count, - # as attributes otherwise - if isinstance(v, (list, np.ndarray)) and len(v) == riods.count: - coords[k] = ("band", np.asarray(v)) - else: - attrs[k] = v - - data = indexing.LazilyIndexedArray(RasterioArrayWrapper(manager, lock, vrt_params)) - - # this lets you write arrays loaded with rasterio - data = indexing.CopyOnWriteArray(data) - if cache and chunks is None: - data = indexing.MemoryCachedArray(data) - - result = DataArray(data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs) - - if chunks is not None: - from dask.base import tokenize - - # augment the token with the file modification time - try: - mtime = os.path.getmtime(os.path.expanduser(filename)) - except OSError: - # the filename is probably an s3 bucket rather than a regular file - mtime = None - token = tokenize(filename, mtime, chunks) - name_prefix = f"open_rasterio-{token}" - result = result.chunk(chunks, name_prefix=name_prefix, token=token) - - # Make the file closeable - result.set_close(manager.close) - - return result diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1bf72d1243b..12e101a475d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -72,7 +72,6 @@ requires_pseudonetcdf, requires_pydap, requires_pynio, - requires_rasterio, requires_scipy, requires_scipy_or_netCDF4, requires_zarr, @@ -4405,562 +4404,6 @@ def save(self, dataset, path, **save_kwargs): pnc.pncwrite(pncf, path, **save_kwargs) -@requires_rasterio -@contextlib.contextmanager -def create_tmp_geotiff( - nx=4, - ny=3, - nz=3, - transform=None, - transform_args=default_value, - crs=default_value, - open_kwargs=None, - additional_attrs=None, -): - if transform_args is default_value: - transform_args = [5000, 80000, 1000, 2000.0] - if crs is default_value: - crs = { - "units": "m", - "no_defs": True, - "ellps": "WGS84", - "proj": "utm", - "zone": 18, - } - # yields a temporary geotiff file and a corresponding expected DataArray - import rasterio - from rasterio.transform import from_origin - - if open_kwargs is None: - open_kwargs = {} - - with create_tmp_file(suffix=".tif", allow_cleanup_failure=ON_WINDOWS) as tmp_file: - # allow 2d or 3d shapes - if nz == 1: - data_shape = ny, nx - write_kwargs = {"indexes": 1} - else: - data_shape = nz, ny, nx - write_kwargs = {} - data = np.arange(nz * ny * nx, dtype=rasterio.float32).reshape(*data_shape) - if transform is None: - transform = from_origin(*transform_args) - if additional_attrs is None: - additional_attrs = { - "descriptions": tuple(f"d{n + 1}" for n in range(nz)), - "units": tuple(f"u{n + 1}" for n in range(nz)), - } - with rasterio.open( - tmp_file, - "w", - driver="GTiff", - height=ny, - width=nx, - count=nz, - crs=crs, - transform=transform, - dtype=rasterio.float32, - **open_kwargs, - ) as s: - for attr, val in additional_attrs.items(): - setattr(s, attr, val) - s.write(data, **write_kwargs) - dx, dy = s.res[0], -s.res[1] - - a, b, c, d = transform_args - data = data[np.newaxis, ...] if nz == 1 else data - expected = DataArray( - data, - dims=("band", "y", "x"), - coords={ - "band": np.arange(nz) + 1, - "y": -np.arange(ny) * d + b + dy / 2, - "x": np.arange(nx) * c + a + dx / 2, - }, - ) - yield tmp_file, expected - - -@requires_rasterio -class TestRasterio: - @requires_scipy_or_netCDF4 - def test_serialization(self) -> None: - with create_tmp_geotiff(additional_attrs={}) as (tmp_file, expected): - # Write it to a netcdf and read again (roundtrip) - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - with create_tmp_file(suffix=".nc") as tmp_nc_file: - rioda.to_netcdf(tmp_nc_file) - with xr.open_dataarray(tmp_nc_file) as ncds: - assert_identical(rioda, ncds) - - def test_utm(self) -> None: - with create_tmp_geotiff() as (tmp_file, expected): - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - assert_allclose(rioda, expected) - assert rioda.attrs["scales"] == (1.0, 1.0, 1.0) - assert rioda.attrs["offsets"] == (0.0, 0.0, 0.0) - assert rioda.attrs["descriptions"] == ("d1", "d2", "d3") - assert rioda.attrs["units"] == ("u1", "u2", "u3") - assert isinstance(rioda.attrs["crs"], str) - assert isinstance(rioda.attrs["res"], tuple) - assert isinstance(rioda.attrs["is_tiled"], np.uint8) - assert isinstance(rioda.attrs["transform"], tuple) - assert len(rioda.attrs["transform"]) == 6 - np.testing.assert_array_equal( - rioda.attrs["nodatavals"], [np.NaN, np.NaN, np.NaN] - ) - - # Check no parse coords - with pytest.warns(DeprecationWarning), xr.open_rasterio( - tmp_file, parse_coordinates=False - ) as rioda: - assert "x" not in rioda.coords - assert "y" not in rioda.coords - - def test_non_rectilinear(self) -> None: - from rasterio.transform import from_origin - - # Create a geotiff file with 2d coordinates - with create_tmp_geotiff( - transform=from_origin(0, 3, 1, 1).rotation(45), crs=None - ) as (tmp_file, _): - # Default is to not parse coords - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - assert "x" not in rioda.coords - assert "y" not in rioda.coords - assert "crs" not in rioda.attrs - assert rioda.attrs["scales"] == (1.0, 1.0, 1.0) - assert rioda.attrs["offsets"] == (0.0, 0.0, 0.0) - assert rioda.attrs["descriptions"] == ("d1", "d2", "d3") - assert rioda.attrs["units"] == ("u1", "u2", "u3") - assert isinstance(rioda.attrs["res"], tuple) - assert isinstance(rioda.attrs["is_tiled"], np.uint8) - assert isinstance(rioda.attrs["transform"], tuple) - assert len(rioda.attrs["transform"]) == 6 - - # See if a warning is raised if we force it - with pytest.warns(Warning, match="transformation isn't rectilinear"): - with xr.open_rasterio(tmp_file, parse_coordinates=True) as rioda: - assert "x" not in rioda.coords - assert "y" not in rioda.coords - - def test_platecarree(self) -> None: - with create_tmp_geotiff( - 8, - 10, - 1, - transform_args=[1, 2, 0.5, 2.0], - crs="+proj=latlong", - open_kwargs={"nodata": -9765}, - ) as (tmp_file, expected): - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - assert_allclose(rioda, expected) - assert rioda.attrs["scales"] == (1.0,) - assert rioda.attrs["offsets"] == (0.0,) - assert isinstance(rioda.attrs["descriptions"], tuple) - assert isinstance(rioda.attrs["units"], tuple) - assert isinstance(rioda.attrs["crs"], str) - assert isinstance(rioda.attrs["res"], tuple) - assert isinstance(rioda.attrs["is_tiled"], np.uint8) - assert isinstance(rioda.attrs["transform"], tuple) - assert len(rioda.attrs["transform"]) == 6 - np.testing.assert_array_equal(rioda.attrs["nodatavals"], [-9765.0]) - - # rasterio throws a Warning, which is expected since we test rasterio's defaults - @pytest.mark.filterwarnings("ignore:Dataset has no geotransform") - def test_notransform(self) -> None: - # regression test for https://github.com/pydata/xarray/issues/1686 - - import rasterio - - # Create a geotiff file - with create_tmp_file(suffix=".tif") as tmp_file: - # data - nx, ny, nz = 4, 3, 3 - data = np.arange(nx * ny * nz, dtype=rasterio.float32).reshape(nz, ny, nx) - with rasterio.open( - tmp_file, - "w", - driver="GTiff", - height=ny, - width=nx, - count=nz, - dtype=rasterio.float32, - ) as s: - s.descriptions = ("nx", "ny", "nz") - s.units = ("cm", "m", "km") - s.write(data) - - # Tests - expected = DataArray( - data, - dims=("band", "y", "x"), - coords={ - "band": [1, 2, 3], - "y": [0.5, 1.5, 2.5], - "x": [0.5, 1.5, 2.5, 3.5], - }, - ) - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - assert_allclose(rioda, expected) - assert rioda.attrs["scales"] == (1.0, 1.0, 1.0) - assert rioda.attrs["offsets"] == (0.0, 0.0, 0.0) - assert rioda.attrs["descriptions"] == ("nx", "ny", "nz") - assert rioda.attrs["units"] == ("cm", "m", "km") - assert isinstance(rioda.attrs["res"], tuple) - assert isinstance(rioda.attrs["is_tiled"], np.uint8) - assert isinstance(rioda.attrs["transform"], tuple) - assert len(rioda.attrs["transform"]) == 6 - - def test_indexing(self) -> None: - with create_tmp_geotiff( - 8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong" - ) as (tmp_file, expected): - with pytest.warns(DeprecationWarning), xr.open_rasterio( - tmp_file, cache=False - ) as actual: - # tests - # assert_allclose checks all data + coordinates - assert_allclose(actual, expected) - assert not actual.variable._in_memory - - # Basic indexer - ind = {"x": slice(2, 5), "y": slice(5, 7)} - assert_allclose(expected.isel(**ind), actual.isel(**ind)) - assert not actual.variable._in_memory - - ind2 = {"band": slice(1, 2), "x": slice(2, 5), "y": slice(5, 7)} - assert_allclose(expected.isel(**ind2), actual.isel(**ind2)) - assert not actual.variable._in_memory - - ind3 = {"band": slice(1, 2), "x": slice(2, 5), "y": 0} - assert_allclose(expected.isel(**ind3), actual.isel(**ind3)) - assert not actual.variable._in_memory - - # orthogonal indexer - ind4 = { - "band": np.array([2, 1, 0]), - "x": np.array([1, 0]), - "y": np.array([0, 2]), - } - assert_allclose(expected.isel(**ind4), actual.isel(**ind4)) - assert not actual.variable._in_memory - - ind5 = {"band": np.array([2, 1, 0]), "x": np.array([1, 0]), "y": 0} - assert_allclose(expected.isel(**ind5), actual.isel(**ind5)) - assert not actual.variable._in_memory - - ind6 = {"band": 0, "x": np.array([0, 0]), "y": np.array([1, 1, 1])} - assert_allclose(expected.isel(**ind6), actual.isel(**ind6)) - assert not actual.variable._in_memory - - # minus-stepped slice - ind7 = {"band": np.array([2, 1, 0]), "x": slice(-1, None, -1), "y": 0} - assert_allclose(expected.isel(**ind7), actual.isel(**ind7)) - assert not actual.variable._in_memory - - ind8 = {"band": np.array([2, 1, 0]), "x": 1, "y": slice(-1, 1, -2)} - assert_allclose(expected.isel(**ind8), actual.isel(**ind8)) - assert not actual.variable._in_memory - - # empty selection - ind9 = {"band": np.array([2, 1, 0]), "x": 1, "y": slice(2, 2, 1)} - assert_allclose(expected.isel(**ind9), actual.isel(**ind9)) - assert not actual.variable._in_memory - - ind10 = {"band": slice(0, 0), "x": 1, "y": 2} - assert_allclose(expected.isel(**ind10), actual.isel(**ind10)) - assert not actual.variable._in_memory - - # vectorized indexer - ind11 = { - "band": DataArray([2, 1, 0], dims="a"), - "x": DataArray([1, 0, 0], dims="a"), - "y": np.array([0, 2]), - } - assert_allclose(expected.isel(**ind11), actual.isel(**ind11)) - assert not actual.variable._in_memory - - ind12 = { - "band": DataArray([[2, 1, 0], [1, 0, 2]], dims=["a", "b"]), - "x": DataArray([[1, 0, 0], [0, 1, 0]], dims=["a", "b"]), - "y": 0, - } - assert_allclose(expected.isel(**ind12), actual.isel(**ind12)) - assert not actual.variable._in_memory - - # Selecting lists of bands is fine - ex = expected.isel(band=[1, 2]) - ac = actual.isel(band=[1, 2]) - assert_allclose(ac, ex) - ex = expected.isel(band=[0, 2]) - ac = actual.isel(band=[0, 2]) - assert_allclose(ac, ex) - - # Integer indexing - ex = expected.isel(band=1) - ac = actual.isel(band=1) - assert_allclose(ac, ex) - - ex = expected.isel(x=1, y=2) - ac = actual.isel(x=1, y=2) - assert_allclose(ac, ex) - - ex = expected.isel(band=0, x=1, y=2) - ac = actual.isel(band=0, x=1, y=2) - assert_allclose(ac, ex) - - # Mixed - ex = actual.isel(x=slice(2), y=slice(2)) - ac = actual.isel(x=[0, 1], y=[0, 1]) - assert_allclose(ac, ex) - - ex = expected.isel(band=0, x=1, y=slice(5, 7)) - ac = actual.isel(band=0, x=1, y=slice(5, 7)) - assert_allclose(ac, ex) - - ex = expected.isel(band=0, x=slice(2, 5), y=2) - ac = actual.isel(band=0, x=slice(2, 5), y=2) - assert_allclose(ac, ex) - - # One-element lists - ex = expected.isel(band=[0], x=slice(2, 5), y=[2]) - ac = actual.isel(band=[0], x=slice(2, 5), y=[2]) - assert_allclose(ac, ex) - - def test_caching(self) -> None: - with create_tmp_geotiff( - 8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong" - ) as (tmp_file, expected): - # Cache is the default - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as actual: - # This should cache everything - assert_allclose(actual, expected) - - # once cached, non-windowed indexing should become possible - ac = actual.isel(x=[2, 4]) - ex = expected.isel(x=[2, 4]) - assert_allclose(ac, ex) - - @requires_dask - def test_chunks(self) -> None: - with create_tmp_geotiff( - 8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong" - ) as (tmp_file, expected): - # Chunk at open time - with pytest.warns(DeprecationWarning), xr.open_rasterio( - tmp_file, chunks=(1, 2, 2) - ) as actual: - import dask.array as da - - assert isinstance(actual.data, da.Array) - assert "open_rasterio" in actual.data.name - - # do some arithmetic - ac = actual.mean() - ex = expected.mean() - assert_allclose(ac, ex) - - ac = actual.sel(band=1).mean(dim="x") - ex = expected.sel(band=1).mean(dim="x") - assert_allclose(ac, ex) - - @pytest.mark.xfail( - not has_dask, reason="without dask, a non-serializable lock is used" - ) - def test_pickle_rasterio(self) -> None: - # regression test for https://github.com/pydata/xarray/issues/2121 - with create_tmp_geotiff() as (tmp_file, expected): - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - temp = pickle.dumps(rioda) - with pickle.loads(temp) as actual: - assert_equal(actual, rioda) - - def test_ENVI_tags(self) -> None: - rasterio = pytest.importorskip("rasterio") - from rasterio.transform import from_origin - - # Create an ENVI file with some tags in the ENVI namespace - # this test uses a custom driver, so we can't use create_tmp_geotiff - with create_tmp_file(suffix=".dat") as tmp_file: - # data - nx, ny, nz = 4, 3, 3 - data = np.arange(nx * ny * nz, dtype=rasterio.float32).reshape(nz, ny, nx) - transform = from_origin(5000, 80000, 1000, 2000.0) - with rasterio.open( - tmp_file, - "w", - driver="ENVI", - height=ny, - width=nx, - count=nz, - crs={ - "units": "m", - "no_defs": True, - "ellps": "WGS84", - "proj": "utm", - "zone": 18, - }, - transform=transform, - dtype=rasterio.float32, - ) as s: - s.update_tags( - ns="ENVI", - description="{Tagged file}", - wavelength="{123.000000, 234.234000, 345.345678}", - fwhm="{1.000000, 0.234000, 0.000345}", - ) - s.write(data) - dx, dy = s.res[0], -s.res[1] - - # Tests - coords = { - "band": [1, 2, 3], - "y": -np.arange(ny) * 2000 + 80000 + dy / 2, - "x": np.arange(nx) * 1000 + 5000 + dx / 2, - "wavelength": ("band", np.array([123, 234.234, 345.345678])), - "fwhm": ("band", np.array([1, 0.234, 0.000345])), - } - expected = DataArray(data, dims=("band", "y", "x"), coords=coords) - - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - assert_allclose(rioda, expected) - assert isinstance(rioda.attrs["crs"], str) - assert isinstance(rioda.attrs["res"], tuple) - assert isinstance(rioda.attrs["is_tiled"], np.uint8) - assert isinstance(rioda.attrs["transform"], tuple) - assert len(rioda.attrs["transform"]) == 6 - # from ENVI tags - assert isinstance(rioda.attrs["description"], str) - assert isinstance(rioda.attrs["map_info"], str) - assert isinstance(rioda.attrs["samples"], str) - - def test_geotiff_tags(self) -> None: - # Create a geotiff file with some tags - with create_tmp_geotiff() as (tmp_file, _): - with pytest.warns(DeprecationWarning), xr.open_rasterio(tmp_file) as rioda: - assert isinstance(rioda.attrs["AREA_OR_POINT"], str) - - @requires_dask - def test_no_mftime(self) -> None: - # rasterio can accept "filename" urguments that are actually urls, - # including paths to remote files. - # In issue #1816, we found that these caused dask to break, because - # the modification time was used to determine the dask token. This - # tests ensure we can still chunk such files when reading with - # rasterio. - with create_tmp_geotiff( - 8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong" - ) as (tmp_file, expected): - with mock.patch("os.path.getmtime", side_effect=OSError): - with pytest.warns(DeprecationWarning), xr.open_rasterio( - tmp_file, chunks=(1, 2, 2) - ) as actual: - import dask.array as da - - assert isinstance(actual.data, da.Array) - assert_allclose(actual, expected) - - @network - def test_http_url(self) -> None: - # more examples urls here - # http://download.osgeo.org/geotiff/samples/ - url = "http://download.osgeo.org/geotiff/samples/made_up/ntf_nord.tif" - with pytest.warns(DeprecationWarning), xr.open_rasterio(url) as actual: - assert actual.shape == (1, 512, 512) - # make sure chunking works - with pytest.warns(DeprecationWarning), xr.open_rasterio( - url, chunks=(1, 256, 256) - ) as actual: - import dask.array as da - - assert isinstance(actual.data, da.Array) - - def test_rasterio_environment(self) -> None: - import rasterio - - with create_tmp_geotiff() as (tmp_file, expected): - # Should fail with error since suffix not allowed - with pytest.raises(Exception): - with rasterio.Env(GDAL_SKIP="GTiff"): - with pytest.warns(DeprecationWarning), xr.open_rasterio( - tmp_file - ) as actual: - assert_allclose(actual, expected) - - @pytest.mark.xfail(reason="rasterio 1.1.1 is broken. GH3573") - def test_rasterio_vrt(self) -> None: - import rasterio - - # tmp_file default crs is UTM: CRS({'init': 'epsg:32618'} - with create_tmp_geotiff() as (tmp_file, expected): - with rasterio.open(tmp_file) as src: - with rasterio.vrt.WarpedVRT(src, crs="epsg:4326") as vrt: - expected_shape = (vrt.width, vrt.height) - expected_crs = vrt.crs - expected_res = vrt.res - # Value of single pixel in center of image - lon, lat = vrt.xy(vrt.width // 2, vrt.height // 2) - expected_val = next(vrt.sample([(lon, lat)])) - with pytest.warns(DeprecationWarning), xr.open_rasterio(vrt) as da: - actual_shape = (da.sizes["x"], da.sizes["y"]) - actual_crs = da.crs - actual_res = da.res - actual_val = da.sel(dict(x=lon, y=lat), method="nearest").data - - assert actual_crs == expected_crs - assert actual_res == expected_res - assert actual_shape == expected_shape - assert expected_val.all() == actual_val.all() - - @pytest.mark.filterwarnings( - "ignore:open_rasterio is Deprecated in favor of rioxarray." - ) - def test_rasterio_vrt_with_transform_and_size(self) -> None: - # Test open_rasterio() support of WarpedVRT with transform, width and - # height (issue #2864) - - rasterio = pytest.importorskip("rasterio") - from affine import Affine - from rasterio.warp import calculate_default_transform - - with create_tmp_geotiff() as (tmp_file, expected): - with rasterio.open(tmp_file) as src: - # Estimate the transform, width and height - # for a change of resolution - # tmp_file initial res is (1000,2000) (default values) - trans, w, h = calculate_default_transform( - src.crs, src.crs, src.width, src.height, resolution=500, *src.bounds - ) - with rasterio.vrt.WarpedVRT( - src, transform=trans, width=w, height=h - ) as vrt: - expected_shape = (vrt.width, vrt.height) - expected_res = vrt.res - expected_transform = vrt.transform - with xr.open_rasterio(vrt) as da: - actual_shape = (da.sizes["x"], da.sizes["y"]) - actual_res = da.res - actual_transform = Affine(*da.transform) - assert actual_res == expected_res - assert actual_shape == expected_shape - assert actual_transform == expected_transform - - def test_rasterio_vrt_with_src_crs(self) -> None: - # Test open_rasterio() support of WarpedVRT with specified src_crs - - rasterio = pytest.importorskip("rasterio") - - # create geotiff with no CRS and specify it manually - with create_tmp_geotiff(crs=None) as (tmp_file, expected): - src_crs = rasterio.crs.CRS({"init": "epsg:32618"}) - with rasterio.open(tmp_file) as src: - assert src.crs is None - with rasterio.vrt.WarpedVRT(src, src_crs=src_crs) as vrt: - with pytest.warns(DeprecationWarning), xr.open_rasterio(vrt) as da: - assert da.crs == src_crs - - class TestEncodingInvalid: def test_extract_nc4_variable_encoding(self) -> None: var = xr.Variable(("x",), [1, 2, 3], {}, {"foo": "bar"}) diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index a29cccd0f50..a57f8728ef0 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -37,13 +37,11 @@ has_scipy, requires_cftime, requires_netCDF4, - requires_rasterio, requires_zarr, ) from xarray.tests.test_backends import ( ON_WINDOWS, create_tmp_file, - create_tmp_geotiff, ) from xarray.tests.test_dataset import create_test_data @@ -196,18 +194,6 @@ def test_dask_distributed_zarr_integration_test( assert_allclose(original, computed) -@requires_rasterio -@pytest.mark.filterwarnings("ignore:deallocating CachingFileManager") -def test_dask_distributed_rasterio_integration_test(loop) -> None: - with create_tmp_geotiff() as (tmp_file, expected): - with cluster() as (s, [a, b]): - with pytest.warns(DeprecationWarning), Client(s["address"], loop=loop): - da_tiff = xr.open_rasterio(tmp_file, chunks={"band": 1}) - assert isinstance(da_tiff.data, da.Array) - actual = da_tiff.compute() - assert_allclose(actual, expected) - - @pytest.mark.xfail( condition=Version(distributed.__version__) < Version("2022.02.0"), reason="https://github.com/dask/distributed/pull/5739", diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 17fde8e3b92..498dd791dd0 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -9,13 +9,11 @@ import os import pathlib -import warnings from typing import TYPE_CHECKING import numpy as np from xarray.backends.api import open_dataset as _open_dataset -from xarray.backends.rasterio_ import open_rasterio as _open_rasterio from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -40,10 +38,6 @@ def _construct_cache_dir(path): external_urls = {} # type: dict -external_rasterio_urls = { - "RGB.byte": "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/RGB.byte.tif", - "shade": "https://github.com/rasterio/rasterio/raw/1.2.1/tests/data/shade.tif", -} file_formats = { "air_temperature": 3, "air_temperature_gradient": 4, @@ -172,84 +166,6 @@ def open_dataset( return ds -def open_rasterio( - name, - engine=None, - cache=True, - cache_dir=None, - **kws, -): - """ - Open a rasterio dataset from the online repository (requires internet). - - .. deprecated:: 0.20.0 - - Deprecated in favor of rioxarray. - For information about transitioning, see: - https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html - - If a local copy is found then always use that to avoid network traffic. - - Available datasets: - - * ``"RGB.byte"``: TIFF file derived from USGS Landsat 7 ETM imagery. - * ``"shade"``: TIFF file derived from from USGS SRTM 90 data - - ``RGB.byte`` and ``shade`` are downloaded from the ``rasterio`` repository [1]_. - - Parameters - ---------- - name : str - Name of the file containing the dataset. - e.g. 'RGB.byte' - cache_dir : path-like, optional - The directory in which to search for and write cached data. - cache : bool, optional - If True, then cache data locally for use on subsequent calls - **kws : dict, optional - Passed to xarray.open_rasterio - - See Also - -------- - xarray.open_rasterio - - References - ---------- - .. [1] https://github.com/rasterio/rasterio - """ - warnings.warn( - "open_rasterio is Deprecated in favor of rioxarray. " - "For information about transitioning, see: " - "https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html", - DeprecationWarning, - stacklevel=2, - ) - try: - import pooch - except ImportError as e: - raise ImportError( - "tutorial.open_rasterio depends on pooch to download and manage datasets." - " To proceed please install pooch." - ) from e - - logger = pooch.get_logger() - logger.setLevel("WARNING") - - cache_dir = _construct_cache_dir(cache_dir) - url = external_rasterio_urls.get(name) - if url is None: - raise ValueError(f"unknown rasterio dataset: {name}") - - # retrieve the file - filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir) - arr = _open_rasterio(filepath, **kws) - if not cache: - arr = arr.load() - pathlib.Path(filepath).unlink() - - return arr - - def load_dataset(*args, **kwargs) -> Dataset: """ Open, load into memory, and close a dataset from the online repository diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 42ce3746942..e4984def498 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -108,7 +108,6 @@ def show_versions(file=sys.stdout): ("cftime", lambda mod: mod.__version__), ("nc_time_axis", lambda mod: mod.__version__), ("PseudoNetCDF", lambda mod: mod.__version__), - ("rasterio", lambda mod: mod.__version__), ("iris", lambda mod: mod.__version__), ("bottleneck", lambda mod: mod.__version__), ("dask", lambda mod: mod.__version__),