Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2af21ab
fix: be more more caution when claiming a backend can open a URL
ianhi Sep 30, 2025
1a3e7df
add whats new entry
ianhi Sep 30, 2025
d6a47b7
fixes from review
ianhi Sep 30, 2025
7ed1f0a
more caution in scipy netcdf backend
ianhi Oct 1, 2025
60c1158
correct suffix detection for scipy backend
ianhi Oct 1, 2025
d2334e4
stricter URL detection for netcdf/dap
ianhi Oct 3, 2025
ef3e07c
no query params for h5netcdf
ianhi Oct 3, 2025
c07e7ea
scipy no urls
ianhi Oct 3, 2025
017713b
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 3, 2025
9cf669b
don't try to read magic numbers for remote uris
ianhi Oct 6, 2025
e0e2da2
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 6, 2025
bfefb21
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 8, 2025
a50b2f6
review comments
ianhi Oct 8, 2025
10d6edd
fix windows failures
ianhi Oct 8, 2025
8c77986
docs on backend resolution
ianhi Oct 8, 2025
079b290
more complete table
ianhi Oct 8, 2025
6ee2910
no horizontal scroll on table
ianhi Oct 8, 2025
418ceee
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 8, 2025
e32e93a
fix whats new header
ianhi Oct 8, 2025
f445045
correct description
ianhi Oct 8, 2025
4a717e7
case insensitivity to DAP: vs dap:
ianhi Oct 8, 2025
7dc0995
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 14, 2025
00d07ee
thredds
ianhi Oct 14, 2025
f29d7d8
move import
ianhi Oct 14, 2025
d98d6dd
claude import rules
ianhi Oct 14, 2025
8f150dd
has_pydap instead of requires pydap
ianhi Oct 14, 2025
cc64d7c
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 15, 2025
8123beb
Merge branch 'main' into fix-netcdf4-remote-zarr-detection
ianhi Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ Deprecations

Bug fixes
~~~~~~~~~

- ``netcdf`` and ``pydap`` engines no longer incorrectly claim to read all remote URLs preventing
the ``zarr`` backend from reading remote zarr stores without an explicit ``engine=`` argument.
(:pull:`10804`). By `Ian Hunt-Isaak <https://github.com/ianhi`_.

Documentation
~~~~~~~~~~~~~
Expand Down
7 changes: 6 additions & 1 deletion xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,12 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):

def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
return True
# For remote URIs, check file extension to avoid claiming non-netCDF URLs
# (e.g., remote Zarr stores)
_, ext = os.path.splitext(filename_or_obj.rstrip("/"))
# Accept remote URIs with netCDF extensions or no extension
# (OPeNDAP endpoints often have no extension)
return ext in {".nc", ".nc4", ".cdf", ""}

magic_number = (
bytes(filename_or_obj[:8])
Expand Down
10 changes: 9 additions & 1 deletion xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -209,7 +210,14 @@ class PydapBackendEntrypoint(BackendEntrypoint):
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html"

def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
return isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)
if not (isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)):
return False

# Check file extension to avoid claiming non-OPeNDAP URLs (e.g., remote Zarr stores)
_, ext = os.path.splitext(filename_or_obj.rstrip("/"))
# Pydap handles OPeNDAP endpoints, which typically have no extension or .nc/.nc4
# Reject URLs with non-OPeNDAP extensions like .zarr
return ext not in {".zarr", ".zip", ".tar", ".gz"}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% sure on this. We could go further and require "dap" to be in the URL

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there's a standard extension for OpenDAP URLs. @Mikejmnez do you know?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked with a co-worker on slack. He said:

There's no standard extension for DAP URLs. Explicitly excluding .zarr seems good enough for this disambiguation.

Copy link
Contributor

@Mikejmnez Mikejmnez Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. Yes, there is no standard extension for opendap urls. OPeNDAP servers produce urls with the filename at the end, but for example NASA does something completely different. Excluding .zarr should be good.

What I am trying to push for this, is an opendap protocol-ization via the URL scheme. This is "dap2://<file_url>" vs "dap4://<file_url>". I already added it to the documentation back then dap2vdap4 Right now, if an opendap begins with http, then it is assumed to be dap2. This is completely on the client side and not a server thing. But pydap and python-netcdf4 support this, some NASA subsetting tools do this. Perhaps this may help separating opendap urls from non-opendap urls

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, actually, Thredds (TDS) does have this "standard" way to specify the protocol that may help to discern between opendap url vs non-opendap url: a TDS dap2 url will have a dodsC in its urls. A TDS dap4 url will have a dap4 in its url. (see here). However, an organization running an opendap server may decide how their own urls are exposed.


def open_dataset(
self,
Expand Down
71 changes: 71 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -7252,6 +7252,77 @@ def test_zarr_entrypoint(tmp_path: Path) -> None:
assert not entrypoint.guess_can_open("something.zarr.txt")


@requires_netCDF4
@requires_pydap
@requires_zarr
def test_remote_url_backend_auto_detection() -> None:
"""
Test that remote URLs are correctly claimed by appropriate backends.

This tests the fix for issue where netCDF4 and pydap backends were
claiming ALL remote URLs, preventing remote Zarr stores from being
auto-detected.

See: https://github.com/pydata/xarray/issues/XXXXX
"""
from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint
from xarray.backends.pydap_ import PydapBackendEntrypoint
from xarray.backends.zarr import ZarrBackendEntrypoint

netcdf4_entrypoint = NetCDF4BackendEntrypoint()
pydap_entrypoint = PydapBackendEntrypoint()
zarr_entrypoint = ZarrBackendEntrypoint()

# Remote Zarr URLs should be claimed by Zarr backend, not netCDF4/pydap
remote_zarr_urls = [
"https://example.com/store.zarr",
"http://example.com/data.zarr/",
"s3://bucket/path/to/data.zarr",
]

for url in remote_zarr_urls:
assert zarr_entrypoint.guess_can_open(url), f"Zarr should claim {url}"
assert not netcdf4_entrypoint.guess_can_open(url), (
f"NetCDF4 should not claim {url}"
)
assert not pydap_entrypoint.guess_can_open(url), f"Pydap should not claim {url}"

# Remote netCDF URLs with extensions should be claimed by netCDF4, not Zarr
remote_netcdf_urls_with_ext = [
"https://example.com/file.nc",
"http://example.com/data.nc4",
"https://example.com/test.cdf",
]

for url in remote_netcdf_urls_with_ext:
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
assert netcdf4_entrypoint.guess_can_open(url), f"NetCDF4 should claim {url}"

# OPeNDAP endpoints (no extension) should be claimed by both netCDF4 and pydap
opendap_urls = [
"http://opendap.example.com/data",
"https://test.opendap.org/dataset",
]

for url in opendap_urls:
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
assert netcdf4_entrypoint.guess_can_open(url), f"NetCDF4 should claim {url}"
assert pydap_entrypoint.guess_can_open(url), f"Pydap should claim {url}"

# Other file types should not be claimed
other_urls = [
"https://example.com/data.zip",
"https://example.com/data.tar.gz",
]

for url in other_urls:
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
assert not netcdf4_entrypoint.guess_can_open(url), (
f"NetCDF4 should not claim {url}"
)
assert not pydap_entrypoint.guess_can_open(url), f"Pydap should not claim {url}"


@requires_netCDF4
@pytest.mark.parametrize("str_type", (str, np.str_))
def test_write_file_from_np_str(str_type: type[str | np.str_], tmpdir: str) -> None:
Expand Down
Loading