Skip to content

Forbidden 403 on missing chunk in remote Zarr #342

Closed
@joshmoore

Description

@joshmoore

Both direct HTTP access and S3FileSystem access of an S3 store fail with a PermissionsError if a Zarr chunk does not exist.

s3fs details
In [3]: def load_binary_from_s3(image_id, resolution='0'):
   ...:     cache_size_mb = 2048
   ...:     cfg = {
   ...:         'anon': True,
   ...:         'client_kwargs': {
   ...:             'endpoint_url': 'https://s3.embassy.ebi.ac.uk',
   ...:         },
   ...:         'root': 'idr/zarr/v0.1/%s.zarr/%s/' % (image_id, resolution)
   ...:     }
   ...:     import s3fs
   ...:     s3 = s3fs.S3FileSystem(
   ...:         anon=cfg['anon'],
   ...:         client_kwargs=cfg['client_kwargs'],
   ...:     )
   ...:     store = s3fs.S3Map(root=cfg['root'], s3=s3, check=False)
   ...:     import dask.array as da
   ...:     return da.from_zarr(store)
   ...:

In [4]:

In [4]: x = load_binary_from_s3(9836950, "masks/0")

In [5]: x
Out[5]: dask.array<from-zarr, shape=(1, 1, 156, 816, 1636), dtype=int64, chunksize=(1, 1, 1, 816, 1636), chunktype=numpy.ndarray>

In [6]: x.compute()

----> 1 x.compute()

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
    164         dask.base.compute
    165         """
--> 166         (result,) = compute(self, traverse=False, **kwargs)
    167         return result
    168

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
    442         postcomputes.append(x.__dask_postcompute__())
    443
--> 444     results = schedule(dsk, keys, **kwargs)
    445     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    446

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     82         get_id=_thread_get_id,
     83         pack_exception=pack_exception,
---> 84         **kwargs
     85     )
     86

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    484                         _execute_task(task, data)  # Re-execute locally
    485                     else:
--> 486                         raise_exception(exc, tb)
    487                 res, worker_id = loads(res_info)
    488                 state["cache"][key] = res

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in reraise(exc, tb)
    314     if exc.__traceback__ is not tb:
    315         raise exc.with_traceback(tb)
--> 316     raise exc
    317
    318

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    220     try:
    221         task, data = loads(task_info)
--> 222         result = _execute_task(task, data)
    223         id = get_id()
    224         result = dumps((result, id))

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/array/core.py in getter(a, b, asarray, lock)
     96         lock.acquire()
     97     try:
---> 98         c = a[b]
     99         if asarray:
    100             c = np.asarray(c)

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in __getitem__(self, selection)
    570
    571         fields, selection = pop_fields(selection)
--> 572         return self.get_basic_selection(selection, fields=fields)
    573
    574     def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in get_basic_selection(self, selection, out, fields)
    696         else:
    697             return self._get_basic_selection_nd(selection=selection, out=out,
--> 698                                                 fields=fields)
    699
    700     def _get_basic_selection_zd(self, selection, out=None, fields=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_basic_selection_nd(self, selection, out, fields)
    738         indexer = BasicIndexer(selection, self)
    739
--> 740         return self._get_selection(indexer=indexer, out=out, fields=fields)
    741
    742     def get_orthogonal_selection(self, selection, out=None, fields=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_selection(self, indexer, out, fields)
   1026             # load chunk selection into output array
   1027             self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
-> 1028                                 drop_axes=indexer.drop_axes, fields=fields)
   1029
   1030         if out.shape:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
   1584         try:
   1585             # obtain compressed data for chunk
-> 1586             cdata = self.chunk_store[ckey]
   1587
   1588         except KeyError:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/mapping.py in __getitem__(self, key, default)
     73         k = self._key_to_str(key)
     74         try:
---> 75             result = self.fs.cat(k)
     76         except (FileNotFoundError, IsADirectoryError, NotADirectoryError):
     77             if default is not None:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/spec.py in cat(self, path)
    585     def cat(self, path):
    586         """ Get the content of a file """
--> 587         return self.open(path, "rb").read()
    588
    589     def get(self, rpath, lpath, recursive=False, **kwargs):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/spec.py in open(self, path, mode, block_size, cache_options, **kwargs)
    773                 autocommit=ac,
    774                 cache_options=cache_options,
--> 775                 **kwargs
    776             )
    777             if not ac:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/s3fs/core.py in _open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs)
    376                       version_id=version_id, fill_cache=fill_cache,
    377                       s3_additional_kwargs=kw, cache_type=cache_type,
--> 378                       autocommit=autocommit, requester_pays=requester_pays)
    379
    380     def _lsdir(self, path, refresh=False, max_items=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/s3fs/core.py in __init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays)
   1095         self.req_kw = {'RequestPayer': 'requester'} if requester_pays else {}
   1096         super().__init__(s3, path, mode, block_size, autocommit=autocommit,
-> 1097                          cache_type=cache_type)
   1098         self.s3 = self.fs  # compatibility
   1099         if self.writable():

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/spec.py in __init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, **kwargs)
   1063         if mode == "rb":
   1064             if not hasattr(self, "details"):
-> 1065                 self.details = fs.info(path)
   1066             self.size = self.details["size"]
   1067             self.cache = caches[cache_type](

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/s3fs/core.py in info(self, path, version_id, refresh)
    546                     return super(S3FileSystem, self).info(path)
    547                 else:
--> 548                     raise ee
    549             except ParamValidationError as e:
    550                 raise ValueError('Failed to head path %r: %s' % (path, e))

PermissionError: Forbidden
http details
In [1]: import dask.array as da

In [2]: x = da.from_zarr("https://s3.embassy.ebi.ac.uk/idr/zarr/v0.1/9836950.zarr/masks/0")

In [3]: x.compute()
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
<ipython-input-3-ef36793348c2> in <module>
----> 1 x.compute()

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
    164         dask.base.compute
    165         """
--> 166         (result,) = compute(self, traverse=False, **kwargs)
    167         return result
    168

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
    442         postcomputes.append(x.__dask_postcompute__())
    443
--> 444     results = schedule(dsk, keys, **kwargs)
    445     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    446

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     82         get_id=_thread_get_id,
     83         pack_exception=pack_exception,
---> 84         **kwargs
     85     )
     86

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    484                         _execute_task(task, data)  # Re-execute locally
    485                     else:
--> 486                         raise_exception(exc, tb)
    487                 res, worker_id = loads(res_info)
    488                 state["cache"][key] = res

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in reraise(exc, tb)
    314     if exc.__traceback__ is not tb:
    315         raise exc.with_traceback(tb)
--> 316     raise exc
    317
    318

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    220     try:
    221         task, data = loads(task_info)
--> 222         result = _execute_task(task, data)
    223         id = get_id()
    224         result = dumps((result, id))

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/array/core.py in getter(a, b, asarray, lock)
     96         lock.acquire()
     97     try:
---> 98         c = a[b]
     99         if asarray:
    100             c = np.asarray(c)

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in __getitem__(self, selection)
    570
    571         fields, selection = pop_fields(selection)
--> 572         return self.get_basic_selection(selection, fields=fields)
    573
    574     def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in get_basic_selection(self, selection, out, fields)
    696         else:
    697             return self._get_basic_selection_nd(selection=selection, out=out,
--> 698                                                 fields=fields)
    699
    700     def _get_basic_selection_zd(self, selection, out=None, fields=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_basic_selection_nd(self, selection, out, fields)
    738         indexer = BasicIndexer(selection, self)
    739
--> 740         return self._get_selection(indexer=indexer, out=out, fields=fields)
    741
    742     def get_orthogonal_selection(self, selection, out=None, fields=None):

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_selection(self, indexer, out, fields)
   1026             # load chunk selection into output array
   1027             self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
-> 1028                                 drop_axes=indexer.drop_axes, fields=fields)
   1029
   1030         if out.shape:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
   1584         try:
   1585             # obtain compressed data for chunk
-> 1586             cdata = self.chunk_store[ckey]
   1587
   1588         except KeyError:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/mapping.py in __getitem__(self, key, default)
     73         k = self._key_to_str(key)
     74         try:
---> 75             result = self.fs.cat(k)
     76         except (FileNotFoundError, IsADirectoryError, NotADirectoryError):
     77             if default is not None:

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/implementations/http.py in cat(self, url)
    108     def cat(self, url):
    109         r = self.session.get(url, **self.kwargs)
--> 110         r.raise_for_status()
    111         return r.content
    112

/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/requests/models.py in raise_for_status(self)
    939
    940         if http_error_msg:
--> 941             raise HTTPError(http_error_msg, response=self)
    942
    943     def close(self):

HTTPError: 403 Client Error: Forbidden for url: https://s3.embassy.ebi.ac.uk/idr/zarr/v0.1/9836950.zarr/masks/0/0.0.7.0.0

The server is known to be quite restrictive, disallowing directory listings, etc. The only workaround I can think of is to create the missing chunks with the fill value which I'd like to avoid since this will be repeated for millions of images.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions