diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml new file mode 100644 index 0000000000..42620cef1e --- /dev/null +++ b/.github/workflows/emscripten.yml @@ -0,0 +1,112 @@ +# Attributed to NumPy https://github.com/numpy/numpy/pull/25894 +# https://github.com/numpy/numpy/blob/d2d2c25fa81b47810f5cbd85ea6485eb3a3ffec3/.github/workflows/emscripten.yml + +name: Pyodide wheel + +on: + # TODO: refine after this is ready to merge + [push, pull_request, workflow_dispatch] + +env: + FORCE_COLOR: 3 + PYODIDE_VERSION: 0.28.0a3 + # PYTHON_VERSION and EMSCRIPTEN_VERSION are determined by PYODIDE_VERSION. + # The appropriate versions can be found in the Pyodide repodata.json + # "info" field, or in Makefile.envs: + # https://github.com/pyodide/pyodide/blob/main/Makefile.envs#L2 + PYTHON_VERSION: 3.13 # any 3.13.x version works + EMSCRIPTEN_VERSION: 4.0.9 + NODE_VERSION: 22 + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + build_wasm_emscripten: + name: Build and test Zarr for Pyodide + runs-on: ubuntu-latest + # To enable this workflow on a fork, comment out: + # FIXME: uncomment after this is ready to merge + # if: github.repository == 'zarr-developers/zarr-python' + steps: + - name: Checkout Zarr repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + + - name: Set up Python ${{ env.PYTHON_VERSION }} + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Set up Emscripten toolchain + uses: mymindstorm/setup-emsdk@v14 + with: + version: ${{ env.EMSCRIPTEN_VERSION }} + actions-cache-folder: emsdk-cache + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Install pyodide-build + run: python -m pip install pyodide-build + + - name: Build Zarr for Pyodide + run: | + pyodide xbuildenv install ${{ env.PYODIDE_VERSION }} + pyodide build + + ### (Temporarily) build numcodecs as well, as we have an older version in the Pyodide distribution (v0.13.1) + + - name: Clone numcodecs repository + uses: actions/checkout@v4 + with: + # See https://github.com/zarr-developers/numcodecs/pull/529 + repository: agriyakhetarpal/numcodecs + ref: setup-emscripten-ci + path: numcodecs-wasm + submodules: recursive + fetch-depth: 0 + fetch-tags: true + + # For some reason fetch-depth: 0 and fetch-tags: true aren't working... + - name: Manually fetch tags for numcodecs + working-directory: numcodecs-wasm + run: git fetch --tags + + - name: Build numcodecs for WASM + run: pyodide build + working-directory: numcodecs-wasm + env: + DISABLE_NUMCODECS_AVX2: 1 + DISABLE_NUMCODECS_SSE2: 1 + + ### Back to Zarr repository to run tests + + - name: Run Zarr tests for Pyodide + run: | + # Set up Pyodide virtual environment and activate it + pyodide venv .venv-pyodide + source .venv-pyodide/bin/activate + + # Install numcodecs + pip install $(ls numcodecs-wasm/dist/*.whl)"[crc32c]" + + # Install Zarr without dependencies until we can figure out the + # numcodecs wheel versioning issue + pip install dist/*.whl --no-deps + pip install "packaging>=22.0" "numpy>=1.25" "typing_extensions>=4.9" "donfig>=0.8" + + # Install test dependencies + pip install "coverage" "pytest" "pytest-asyncio" "pytest-cov" "pytest-accept" "rich" "mypy" "hypothesis" + + python -m pytest tests -v --cov=zarr --cov-config=pyproject.toml + diff --git a/changes/1903.feature.rst b/changes/1903.feature.rst new file mode 100644 index 0000000000..4c8db34895 --- /dev/null +++ b/changes/1903.feature.rst @@ -0,0 +1,2 @@ +Added official support for the Pyodide/WebAssembly platform for using Zarr within browser-based environments. The +`threading.max_workers` parameter takes a default value of 1, and the `zarr.sync` interface is not supported. diff --git a/pyproject.toml b/pyproject.toml index 1f270b435f..a45d0ba4c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -417,6 +417,7 @@ filterwarnings = [ markers = [ "gpu: mark a test as requiring CuPy and GPU", "slow_hypothesis: slow hypothesis tests", + "slow_wasm: slow tests in Pyodide/WASM", ] [tool.repo-review] diff --git a/src/zarr/_constants.py b/src/zarr/_constants.py new file mode 100644 index 0000000000..63ca615e99 --- /dev/null +++ b/src/zarr/_constants.py @@ -0,0 +1,9 @@ +# This file only exists to not incur circular import issues +# TODO: find a better location for this or keep it here + +from __future__ import annotations + +import platform +import sys + +IS_WASM: bool = sys.platform == "emscripten" or platform.machine() in ["wasm32", "wasm64"] diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index b4a4a13c29..f8b032bd48 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -5,9 +5,7 @@ from functools import cached_property from typing import TYPE_CHECKING -import numcodecs from numcodecs.zstd import Zstd -from packaging.version import Version from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper @@ -44,12 +42,12 @@ class ZstdCodec(BytesBytesCodec): def __init__(self, *, level: int = 0, checksum: bool = False) -> None: # numcodecs 0.13.0 introduces the checksum attribute for the zstd codec - _numcodecs_version = Version(numcodecs.__version__) - if _numcodecs_version < Version("0.13.0"): - raise RuntimeError( - "numcodecs version >= 0.13.0 is required to use the zstd codec. " - f"Version {_numcodecs_version} is currently installed." - ) + # _numcodecs_version = Version(numcodecs.__version__) + # if _numcodecs_version < Version("0.13.0"): + # raise RuntimeError( + # "numcodecs version >= 0.13.0 is required to use the zstd codec. " + # f"Version {_numcodecs_version} is currently installed." + # ) level_parsed = parse_zstd_level(level) checksum_parsed = parse_checksum(checksum) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 2a10943d80..ead6b4a96e 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -33,6 +33,8 @@ from donfig import Config as DConfig +from zarr._constants import IS_WASM + if TYPE_CHECKING: from donfig.config_obj import ConfigSet @@ -107,7 +109,7 @@ def enable_gpu(self) -> ConfigSet: }, }, "async": {"concurrency": 10, "timeout": None}, - "threading": {"max_workers": None}, + "threading": {"max_workers": 1 if IS_WASM else None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", diff --git a/src/zarr/core/sync.py b/src/zarr/core/sync.py index d9b4839e8e..7cf44391dd 100644 --- a/src/zarr/core/sync.py +++ b/src/zarr/core/sync.py @@ -10,6 +10,7 @@ from typing_extensions import ParamSpec +from zarr._constants import IS_WASM from zarr.core.config import config if TYPE_CHECKING: @@ -133,6 +134,36 @@ def sync( -------- >>> sync(async_function(), existing_loop) """ + # WASM environments (like Pyodide) cannot start new threads, so we need to handle + # coroutines differently. We integrate with the existing Pyodide WebLoop which + # schedules tasks on the browser's event loop using setTimeout(): + # https://developer.mozilla.org/en-US/docs/Web/API/setTimeout + if IS_WASM: # pragma: no cover + # This code path is covered in the Pyodide/WASM CI job. + current_loop = asyncio.get_running_loop() + result = current_loop.run_until_complete(coro) + # Check if run_until_complete actually executed the coroutine or just returned a task + # In browsers without JSPI, run_until_complete is a no-op that will return the task/future. + if isinstance(result, (asyncio.Task, asyncio.Future)): + raise RuntimeError( + "Cannot use synchronous zarr API in browser environments without JSPI. " + "Zarr requires JavaScript Promise Integration (JSPI) to work in browsers " + "but JSPI is not enabled in your environment.\n" + "Solutions:\n" + "1. Use the async API instead, with zarr.api.asynchronous" + "2. Enable JSPI in your Pyodide setup with " + "`loadPyodide({ enableRunUntilComplete: true })`" + "3. Use a JSPI-enabled website or browser configuration" + "4. If you are using Node.js, pass the --experimental-wasm-jspi flag (v20+)" + "\n" + "Note: JSPI is experimental and not yet standardised across all browsers. See " + "https://webassembly.org/features/ for more information and status." + ) + return result + + # This code path is the original thread-based implementation + # for non-WASM environments; it creates a dedicated I/O thread + # with its own event loop. if loop is None: # NB: if the loop is not running *yet*, it is OK to submit work # and we will wait for it @@ -170,6 +201,13 @@ def _get_loop() -> asyncio.AbstractEventLoop: The loop will be running on a separate thread. """ + if IS_WASM: # pragma: no cover + # This case is covered in the Pyodide/WASM CI job. + raise RuntimeError( + "Thread-based event loop not available in WASM environment. " + "Use zarr.api.asynchronous or ensure sync() handles WASM case." + ) + if loop[0] is None: with _get_lock(): # repeat the check just in case the loop got filled between the diff --git a/tests/conftest.py b/tests/conftest.py index 948d3cd055..4dd7848005 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ from __future__ import annotations import pathlib +import sys from dataclasses import dataclass, field from typing import TYPE_CHECKING @@ -10,6 +11,7 @@ from hypothesis import HealthCheck, Verbosity, settings from zarr import AsyncGroup, config +from zarr._constants import IS_WASM from zarr.abc.store import Store from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation from zarr.core.array import ( @@ -105,6 +107,45 @@ def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH) -> Store: return result +@pytest.fixture(autouse=(IS_WASM and "pyodide" in sys.modules), scope="session") +def patch_pyodide_webloop_for_pytest() -> Generator[None, None, None]: + """ + Patch Pyodide's WebLoop to fix interoperability with pytest-asyncio. + + WebLoop.shutdown_asyncgens() raises NotImplementedError, which causes + pytest-asyncio to issue warnings during test cleanup and potentially + cause resource leaks that make tests hang. This is a bit of a + hack, but it allows us to run tests that use pytest-asyncio. + + This is necessary because pytest-asyncio tries to clean up async generators + when tearing down test event loops, but Pyodide's WebLoop doesn't support + this as it integrates with the browser's event loop rather than managing + its own lifecycle. + """ + import logging + + logger = logging.getLogger(__name__) + + try: + import pyodide.webloop + + if hasattr(pyodide.webloop.WebLoop, "shutdown_asyncgens"): + + async def no_op_shutdown_asyncgens(self) -> None: # type: ignore[no-untyped-def] + return + + pyodide.webloop.WebLoop.shutdown_asyncgens = no_op_shutdown_asyncgens + logger.debug("Patched WebLoop.shutdown_asyncgens for pytest-asyncio compatibility") + + yield + + # If patching fails for any reason, we log it, but we won't want to crash the tests + except Exception as e: + msg = f"Could not patch WebLoop for pytest compatibility: {e}" + logger.debug(msg) + yield + + @dataclass class AsyncGroupRequest: zarr_format: ZarrFormat @@ -176,15 +217,30 @@ def pytest_addoption(parser: Any) -> None: default=False, help="run slow hypothesis tests", ) + parser.addoption( + "--run-slow-wasm", + action="store_true", + default=False, + help="run slow tests only applicable to WASM", + ) def pytest_collection_modifyitems(config: Any, items: Any) -> None: if config.getoption("--run-slow-hypothesis"): return + if config.getoption("--run-slow-wasm") and IS_WASM: + return + skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") + skip_slow_wasm = pytest.mark.skip( + reason="need --run-slow-wasm option to run in WASM, or not running in WASM" + ) + for item in items: if "slow_hypothesis" in item.keywords: item.add_marker(skip_slow_hyp) + if "slow_wasm" in item.keywords and IS_WASM: + item.add_marker(skip_slow_wasm) settings.register_profile( diff --git a/tests/test_array.py b/tests/test_array.py index 3fc7b3938c..bc6ee7e2cc 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -18,6 +18,7 @@ import zarr.api.asynchronous import zarr.api.synchronous as sync_api from zarr import Array, AsyncArray, Group +from zarr._constants import IS_WASM from zarr.abc.store import Store from zarr.codecs import ( BytesCodec, @@ -1677,6 +1678,10 @@ def _index_array(arr: Array, index: Any) -> Any: return arr[index] +@pytest.mark.skipif( + IS_WASM, + reason="can't start new processes in Pyodide", +) @pytest.mark.parametrize( "method", [ diff --git a/tests/test_codecs/test_blosc.py b/tests/test_codecs/test_blosc.py index 6e6e9df383..4140d9154f 100644 --- a/tests/test_codecs/test_blosc.py +++ b/tests/test_codecs/test_blosc.py @@ -6,6 +6,7 @@ from packaging.version import Version import zarr +from zarr._constants import IS_WASM from zarr.abc.store import Store from zarr.codecs import BloscCodec from zarr.core.buffer import default_buffer_prototype @@ -58,6 +59,7 @@ async def test_blosc_evolve(store: Store, dtype: str) -> None: assert blosc_configuration_json["shuffle"] == "shuffle" +@pytest.mark.xfail(IS_WASM, reason="Blosc size mismatch, known failure case for Pyodide/WASM") async def test_typesize() -> None: a = np.arange(1000000, dtype=np.uint64) codecs = [zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()] diff --git a/tests/test_config.py b/tests/test_config.py index 2cbf172752..6f3673557a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -10,6 +10,7 @@ import zarr import zarr.api from zarr import zeros +from zarr._constants import IS_WASM from zarr.abc.codec import CodecPipeline from zarr.abc.store import ByteSetter, Store from zarr.codecs import ( @@ -83,7 +84,7 @@ def test_config_defaults_set() -> None: }, }, "async": {"concurrency": 10, "timeout": None}, - "threading": {"max_workers": None}, + "threading": {"max_workers": 1 if IS_WASM else None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", diff --git a/tests/test_indexing.py b/tests/test_indexing.py index b1707c88a3..b6217c458c 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -739,6 +739,7 @@ def _test_get_orthogonal_selection_3d( _test_get_orthogonal_selection(a, z, selection) +@pytest.mark.slow_wasm def test_get_orthogonal_selection_3d(store: StorePath) -> None: # setup a = np.arange(100000, dtype=int).reshape(200, 50, 10) @@ -801,6 +802,7 @@ def _test_set_orthogonal_selection( assert_array_equal(a, z[:]) +@pytest.mark.slow_wasm def test_set_orthogonal_selection_1d(store: StorePath) -> None: # setup v = np.arange(1050, dtype=int) @@ -846,6 +848,7 @@ def test_set_item_1d_last_two_chunks(store: StorePath): np.testing.assert_equal(z["zoo"][()], np.array(1)) +@pytest.mark.slow_wasm def _test_set_orthogonal_selection_2d( v: npt.NDArray[np.int_], a: npt.NDArray[np.int_], @@ -866,6 +869,7 @@ def _test_set_orthogonal_selection_2d( _test_set_orthogonal_selection(v, a, z, selection) +@pytest.mark.slow_wasm def test_set_orthogonal_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) @@ -928,6 +932,7 @@ def _test_set_orthogonal_selection_3d( _test_set_orthogonal_selection(v, a, z, selection) +@pytest.mark.slow_wasm def test_set_orthogonal_selection_3d(store: StorePath) -> None: # setup v = np.arange(100000, dtype=int).reshape(200, 50, 10) @@ -1148,6 +1153,7 @@ def test_set_coordinate_selection_1d(store: StorePath) -> None: z.vindex[selection] = 42 # type:ignore[index] +@pytest.mark.slow_wasm def test_set_coordinate_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) @@ -1331,6 +1337,7 @@ def _test_set_block_selection( assert_array_equal(a, z[:]) +@pytest.mark.slow_wasm def test_set_block_selection_1d(store: StorePath) -> None: # setup v = np.arange(1050, dtype=int) @@ -1349,6 +1356,7 @@ def test_set_block_selection_1d(store: StorePath) -> None: z.blocks[selection_bad] = 42 # type:ignore[index] +@pytest.mark.slow_wasm def test_set_block_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) diff --git a/tests/test_properties.py b/tests/test_properties.py index d48dfe2fef..da26edf398 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -7,6 +7,7 @@ import pytest from numpy.testing import assert_array_equal +from zarr._constants import IS_WASM from zarr.core.buffer import default_buffer_prototype pytest.importorskip("hypothesis") @@ -76,6 +77,7 @@ def deep_equal(a: Any, b: Any) -> bool: return a == b +@pytest.mark.slow_wasm @given(data=st.data(), zarr_format=zarr_formats) def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None: nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format))) @@ -83,6 +85,7 @@ def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.slow_wasm @given(array=arrays()) def test_array_creates_implicit_groups(array): path = array.path @@ -102,6 +105,7 @@ def test_array_creates_implicit_groups(array): # this decorator removes timeout; not ideal but it should avoid intermittent CI failures +@pytest.mark.skipif(IS_WASM, reason="Unreliable test on Pyodide/WASM due to Hypothesis") @settings(deadline=None) @given(data=st.data()) def test_basic_indexing(data: st.DataObject) -> None: @@ -117,6 +121,7 @@ def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.skipif(IS_WASM, reason="Unreliable test on Pyodide/WASM due to Hypothesis") @given(data=st.data()) def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. @@ -138,6 +143,7 @@ def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@pytest.mark.skipif(IS_WASM, reason="Unreliable test on Pyodide/WASM due to Hypothesis") @given(data=st.data()) def test_vindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. @@ -161,6 +167,7 @@ def test_vindex(data: st.DataObject) -> None: # assert_array_equal(nparray, zarray[:]) +@pytest.mark.slow_wasm @given(store=stores, meta=array_metadata()) # type: ignore[misc] async def test_roundtrip_array_metadata_from_store( store: Store, meta: ArrayV2Metadata | ArrayV3Metadata diff --git a/tests/test_sync.py b/tests/test_sync.py index 13b475f8da..84824839b5 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -5,6 +5,7 @@ import pytest import zarr +from zarr._constants import IS_WASM from zarr.core.sync import ( SyncError, SyncMixin, @@ -17,6 +18,11 @@ ) from zarr.storage import MemoryStore +pytestmark = pytest.mark.skipif( + IS_WASM, + reason="can't start new threads in Pyodide/WASM, so the synchronous API doesn't work", +) + @pytest.fixture(params=[True, False]) def sync_loop(request: pytest.FixtureRequest) -> asyncio.AbstractEventLoop | None: