Closed
Description
Both direct HTTP access and S3FileSystem access of an S3 store fail with a PermissionsError if a Zarr chunk does not exist.
s3fs details
In [3]: def load_binary_from_s3(image_id, resolution='0'):
...: cache_size_mb = 2048
...: cfg = {
...: 'anon': True,
...: 'client_kwargs': {
...: 'endpoint_url': 'https://s3.embassy.ebi.ac.uk',
...: },
...: 'root': 'idr/zarr/v0.1/%s.zarr/%s/' % (image_id, resolution)
...: }
...: import s3fs
...: s3 = s3fs.S3FileSystem(
...: anon=cfg['anon'],
...: client_kwargs=cfg['client_kwargs'],
...: )
...: store = s3fs.S3Map(root=cfg['root'], s3=s3, check=False)
...: import dask.array as da
...: return da.from_zarr(store)
...:
In [4]:
In [4]: x = load_binary_from_s3(9836950, "masks/0")
In [5]: x
Out[5]: dask.array<from-zarr, shape=(1, 1, 156, 816, 1636), dtype=int64, chunksize=(1, 1, 1, 816, 1636), chunktype=numpy.ndarray>
In [6]: x.compute()
----> 1 x.compute()
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
164 dask.base.compute
165 """
--> 166 (result,) = compute(self, traverse=False, **kwargs)
167 return result
168
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
442 postcomputes.append(x.__dask_postcompute__())
443
--> 444 results = schedule(dsk, keys, **kwargs)
445 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
446
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
484 _execute_task(task, data) # Re-execute locally
485 else:
--> 486 raise_exception(exc, tb)
487 res, worker_id = loads(res_info)
488 state["cache"][key] = res
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in reraise(exc, tb)
314 if exc.__traceback__ is not tb:
315 raise exc.with_traceback(tb)
--> 316 raise exc
317
318
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/array/core.py in getter(a, b, asarray, lock)
96 lock.acquire()
97 try:
---> 98 c = a[b]
99 if asarray:
100 c = np.asarray(c)
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in __getitem__(self, selection)
570
571 fields, selection = pop_fields(selection)
--> 572 return self.get_basic_selection(selection, fields=fields)
573
574 def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in get_basic_selection(self, selection, out, fields)
696 else:
697 return self._get_basic_selection_nd(selection=selection, out=out,
--> 698 fields=fields)
699
700 def _get_basic_selection_zd(self, selection, out=None, fields=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_basic_selection_nd(self, selection, out, fields)
738 indexer = BasicIndexer(selection, self)
739
--> 740 return self._get_selection(indexer=indexer, out=out, fields=fields)
741
742 def get_orthogonal_selection(self, selection, out=None, fields=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_selection(self, indexer, out, fields)
1026 # load chunk selection into output array
1027 self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
-> 1028 drop_axes=indexer.drop_axes, fields=fields)
1029
1030 if out.shape:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
1584 try:
1585 # obtain compressed data for chunk
-> 1586 cdata = self.chunk_store[ckey]
1587
1588 except KeyError:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/mapping.py in __getitem__(self, key, default)
73 k = self._key_to_str(key)
74 try:
---> 75 result = self.fs.cat(k)
76 except (FileNotFoundError, IsADirectoryError, NotADirectoryError):
77 if default is not None:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/spec.py in cat(self, path)
585 def cat(self, path):
586 """ Get the content of a file """
--> 587 return self.open(path, "rb").read()
588
589 def get(self, rpath, lpath, recursive=False, **kwargs):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/spec.py in open(self, path, mode, block_size, cache_options, **kwargs)
773 autocommit=ac,
774 cache_options=cache_options,
--> 775 **kwargs
776 )
777 if not ac:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/s3fs/core.py in _open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs)
376 version_id=version_id, fill_cache=fill_cache,
377 s3_additional_kwargs=kw, cache_type=cache_type,
--> 378 autocommit=autocommit, requester_pays=requester_pays)
379
380 def _lsdir(self, path, refresh=False, max_items=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/s3fs/core.py in __init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays)
1095 self.req_kw = {'RequestPayer': 'requester'} if requester_pays else {}
1096 super().__init__(s3, path, mode, block_size, autocommit=autocommit,
-> 1097 cache_type=cache_type)
1098 self.s3 = self.fs # compatibility
1099 if self.writable():
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/spec.py in __init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, **kwargs)
1063 if mode == "rb":
1064 if not hasattr(self, "details"):
-> 1065 self.details = fs.info(path)
1066 self.size = self.details["size"]
1067 self.cache = caches[cache_type](
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/s3fs/core.py in info(self, path, version_id, refresh)
546 return super(S3FileSystem, self).info(path)
547 else:
--> 548 raise ee
549 except ParamValidationError as e:
550 raise ValueError('Failed to head path %r: %s' % (path, e))
PermissionError: Forbidden
http details
In [1]: import dask.array as da
In [2]: x = da.from_zarr("https://s3.embassy.ebi.ac.uk/idr/zarr/v0.1/9836950.zarr/masks/0")
In [3]: x.compute()
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-3-ef36793348c2> in <module>
----> 1 x.compute()
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
164 dask.base.compute
165 """
--> 166 (result,) = compute(self, traverse=False, **kwargs)
167 return result
168
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
442 postcomputes.append(x.__dask_postcompute__())
443
--> 444 results = schedule(dsk, keys, **kwargs)
445 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
446
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
484 _execute_task(task, data) # Re-execute locally
485 else:
--> 486 raise_exception(exc, tb)
487 res, worker_id = loads(res_info)
488 state["cache"][key] = res
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in reraise(exc, tb)
314 if exc.__traceback__ is not tb:
315 raise exc.with_traceback(tb)
--> 316 raise exc
317
318
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/dask/array/core.py in getter(a, b, asarray, lock)
96 lock.acquire()
97 try:
---> 98 c = a[b]
99 if asarray:
100 c = np.asarray(c)
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in __getitem__(self, selection)
570
571 fields, selection = pop_fields(selection)
--> 572 return self.get_basic_selection(selection, fields=fields)
573
574 def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in get_basic_selection(self, selection, out, fields)
696 else:
697 return self._get_basic_selection_nd(selection=selection, out=out,
--> 698 fields=fields)
699
700 def _get_basic_selection_zd(self, selection, out=None, fields=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_basic_selection_nd(self, selection, out, fields)
738 indexer = BasicIndexer(selection, self)
739
--> 740 return self._get_selection(indexer=indexer, out=out, fields=fields)
741
742 def get_orthogonal_selection(self, selection, out=None, fields=None):
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _get_selection(self, indexer, out, fields)
1026 # load chunk selection into output array
1027 self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
-> 1028 drop_axes=indexer.drop_axes, fields=fields)
1029
1030 if out.shape:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/zarr/core.py in _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
1584 try:
1585 # obtain compressed data for chunk
-> 1586 cdata = self.chunk_store[ckey]
1587
1588 except KeyError:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/mapping.py in __getitem__(self, key, default)
73 k = self._key_to_str(key)
74 try:
---> 75 result = self.fs.cat(k)
76 except (FileNotFoundError, IsADirectoryError, NotADirectoryError):
77 if default is not None:
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/fsspec/implementations/http.py in cat(self, url)
108 def cat(self, url):
109 r = self.session.get(url, **self.kwargs)
--> 110 r.raise_for_status()
111 return r.content
112
/usr/local/anaconda3/envs/demo/lib/python3.6/site-packages/requests/models.py in raise_for_status(self)
939
940 if http_error_msg:
--> 941 raise HTTPError(http_error_msg, response=self)
942
943 def close(self):
HTTPError: 403 Client Error: Forbidden for url: https://s3.embassy.ebi.ac.uk/idr/zarr/v0.1/9836950.zarr/masks/0/0.0.7.0.0
The server is known to be quite restrictive, disallowing directory listings, etc. The only workaround I can think of is to create the missing chunks with the fill value which I'd like to avoid since this will be repeated for millions of images.
Metadata
Metadata
Assignees
Labels
No labels