Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ mast
- Added ``resolve_all`` parameter to ``MastClass.resolve_object`` to resolve object names and return
coordinates for all available resolvers. [#3292]

- Fix bug in ``utils.remove_duplicate_products`` that does not retain the order of the products in an input table. [#3314]

- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs
to the returned cloud URIs. [#3314]

- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot
be found in the cloud. [#3314]


Infrastructure, Utility and Other Changes and Additions
-------------------------------------------------------
Expand Down
9 changes: 6 additions & 3 deletions astroquery/mast/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
# Output from ``get_cloud_uri_list`` is always a list even when it's only 1 URI
return uri_list[0]

def get_cloud_uri_list(self, data_products, include_bucket=True, full_url=False):
def get_cloud_uri_list(self, data_products, *, include_bucket=True, full_url=False, verbose=True):
"""
Takes an `~astropy.table.Table` of data products and returns the associated cloud data uris.

Expand All @@ -132,6 +132,8 @@
full_url : bool
Default False. Return an HTTP fetchable url instead of a cloud uri.
Must set include_bucket to False to use this option.
verbose : bool
Default True. Whether to issue warnings if a product cannot be found in the cloud.

Returns
-------
Expand All @@ -141,7 +143,7 @@
"""
s3_client = self.boto3.client('s3', config=self.config)
data_uris = data_products if isinstance(data_products, list) else data_products['dataURI']
paths = utils.mast_relative_path(data_uris)
paths = utils.mast_relative_path(data_uris, verbose=verbose)
if isinstance(paths, str): # Handle the case where only one product was requested
paths = [paths]

Expand All @@ -164,7 +166,8 @@
except self.botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] != "404":
raise
warnings.warn("Unable to locate file {}.".format(path), NoResultsWarning)
if verbose:
warnings.warn("Unable to locate file {}.".format(path), NoResultsWarning)

Check warning on line 170 in astroquery/mast/cloud.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/cloud.py#L169-L170

Added lines #L169 - L170 were not covered by tests
uri_list.append(None)

return uri_list
Expand Down
32 changes: 28 additions & 4 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,8 @@ def download_products(self, products, *, download_dir=None, flat=False,
return manifest

def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=False, pagesize=None, page=None,
mrp_only=False, extension=None, filter_products={}, **criteria):
mrp_only=False, extension=None, filter_products={}, return_uri_map=False, verbose=True,
**criteria):
"""
Given an `~astropy.table.Table` of data products or query criteria and filter parameters,
returns the associated cloud data URIs.
Expand Down Expand Up @@ -908,6 +909,12 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
or more acceptable values for that parameter.
Filter behavior is AND between the filters and OR within a filter set.
For example: {"productType": "SCIENCE", "extension"=["fits","jpg"]}
return_uri_map : bool, optional
Default False. If set to True, returns a dictionary mapping the original data product
URIs to their corresponding cloud URIs. This is useful for tracking which products were
successfully converted to cloud URIs.
verbose : bool, optional
Default True. Whether to issue warnings if a product cannot be found in the cloud.
**criteria
Criteria to apply. At least one non-positional criteria must be supplied.
Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`),
Expand Down Expand Up @@ -951,20 +958,37 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
# Filter product list
data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension,
**filter_products)
data_uris = data_products['dataURI']
else: # data_products is a list of URIs
# Warn if trying to supply filters
if filter_products or extension or mrp_only:
warnings.warn('Filtering is not supported when providing a list of MAST URIs. '
'To apply filters, please provide query criteria or a table of data products '
'as returned by `Observations.get_product_list`', InputWarning)
data_uris = data_products

if not len(data_products):
if not len(data_uris):
warnings.warn('No matching products to fetch associated cloud URIs.', NoResultsWarning)
return

# Remove duplicate products
data_products = utils.remove_duplicate_products(data_products, 'dataURI')
return self._cloud_connection.get_cloud_uri_list(data_products, include_bucket, full_url)
data_uris = utils.remove_duplicate_products(data_uris, 'dataURI')

# Get cloud URIS
cloud_uris = self._cloud_connection.get_cloud_uri_list(data_uris,
include_bucket=include_bucket,
full_url=full_url,
verbose=verbose)

# If return_uri_map is True, create a mapping of dataURIs to cloud URIs
if return_uri_map:
uri_map = dict(zip(data_uris, cloud_uris))
return uri_map

# Remove None values from the list
cloud_uris = [uri for uri in cloud_uris if uri is not None]

return cloud_uris

def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
"""
Expand Down
8 changes: 7 additions & 1 deletion astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def test_missions_get_product_list(patch_post):
def test_missions_get_unique_product_list(patch_post, caplog):
unique_products = mast.MastMissions.get_unique_product_list('Z14Z0104T')
assert isinstance(unique_products, Table)
assert (unique_products == unique(unique_products, keys='filename')).all()
assert (len(unique_products) == len(unique(unique_products, keys='filename')))
# No INFO messages should be logged
with caplog.at_level('INFO', logger='astroquery'):
assert caplog.text == ''
Expand Down Expand Up @@ -770,6 +770,12 @@ def test_observations_get_cloud_uris(mock_client, patch_post):
assert len(uris) == 1
assert uris[0] == expected

# Return a map of URIs
uri_map = mast.Observations.get_cloud_uris([mast_uri], return_uri_map=True)
assert isinstance(uri_map, dict)
assert len(uri_map) == 1
assert uri_map[mast_uri] == expected

# Warn if attempting to filter with list input
with pytest.warns(InputWarning, match='Filtering is not supported'):
mast.Observations.get_cloud_uris([mast_uri],
Expand Down
17 changes: 12 additions & 5 deletions astroquery/mast/tests/test_mast_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def test_missions_get_unique_product_list(self, caplog):
# Unique product list should have fewer rows
assert len(products) > len(unique_products)
# Rows should be unique based on filename
assert (unique_products == unique(unique_products, keys='filename')).all()
assert (len(unique_products) == len(unique(unique_products, keys='filename')))
# Check that INFO messages were logged
with caplog.at_level('INFO', logger='astroquery'):
assert 'products were duplicates' in caplog.text
Expand Down Expand Up @@ -570,15 +570,15 @@ def test_observations_get_product_list_async(self):
responses = Observations.get_product_list_async(test_obs[2:3])
assert isinstance(responses, list)

observations = Observations.query_object("M8", radius=".02 deg")
observations = Observations.query_criteria(objectname="M8", obs_collection=["K2", "IUE"])
responses = Observations.get_product_list_async(observations[0])
assert isinstance(responses, list)

responses = Observations.get_product_list_async(observations[0:4])
assert isinstance(responses, list)

def test_observations_get_product_list(self):
observations = Observations.query_object("M8", radius=".04 deg")
observations = Observations.query_criteria(objectname='M8', obs_collection=['K2', 'IUE'])
test_obs_id = str(observations[0]['obsid'])
mult_obs_ids = str(observations[0]['obsid']) + ',' + str(observations[1]['obsid'])

Expand All @@ -598,7 +598,7 @@ def test_observations_get_product_list(self):
assert len(result1) == len(result2)
assert set(filenames1) == set(filenames2)

obsLoc = np.where(observations["obs_id"] == 'ktwo200071160-c92_lc')
obsLoc = np.where(observations['obs_id'] == 'ktwo200071160-c92_lc')
result = Observations.get_product_list(observations[obsLoc])
assert isinstance(result, Table)
assert len(result) == 1
Expand Down Expand Up @@ -644,7 +644,7 @@ def test_observations_get_unique_product_list(self, caplog):
# Unique product list should have fewer rows
assert len(products) > len(unique_products)
# Rows should be unique based on dataURI
assert (unique_products == unique(unique_products, keys='dataURI')).all()
assert (len(unique_products) == len(unique(unique_products, keys='dataURI')))
# Check that INFO messages were logged
with caplog.at_level('INFO', logger='astroquery'):
assert 'products were duplicates' in caplog.text
Expand Down Expand Up @@ -878,6 +878,13 @@ def test_observations_get_cloud_uris_list_input(self):
assert len(uris) > 0, f'Products for URI list {uri_list} were not found in the cloud.'
assert uris == expected

# return map of dataURI to cloud URI
uri_map = Observations.get_cloud_uris(uri_list, return_uri_map=True)
assert isinstance(uri_map, dict)
assert len(uri_map) == 2
for i, uri in enumerate(uri_list):
assert uri_map[uri] == expected[i]

# check for warning if filters are provided with list input
with pytest.warns(InputWarning, match='Filtering is not supported'):
Observations.get_cloud_uris(uri_list,
Expand Down
44 changes: 21 additions & 23 deletions astroquery/mast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import platform

from astropy.coordinates import SkyCoord
from astropy.table import unique, Table
from astropy.table import Table
from astropy import units as u

from .. import log
Expand Down Expand Up @@ -258,24 +258,26 @@
yield input_list[idx:idx + chunk_size]


def mast_relative_path(mast_uri):
def mast_relative_path(mast_uri, *, verbose=True):
"""
Given one or more MAST dataURI(s), return the associated relative path(s).

Parameters
----------
mast_uri : str, list of str
The MAST uri(s).
verbose : bool, optional
Default True. Whether to issue warnings if the MAST relative path cannot be found for a product.

Returns
-------
response : str, list of str
The associated relative path(s).
"""
if isinstance(mast_uri, str):
uri_list = [("uri", mast_uri)]
else: # mast_uri parameter is a list
uri_list = [("uri", uri) for uri in mast_uri]
uri_list = [mast_uri]
else:
uri_list = list(mast_uri)

# Split the list into chunks of 50 URIs; this is necessary
# to avoid "414 Client Error: Request-URI Too Large".
Expand All @@ -284,19 +286,19 @@
result = []
for chunk in uri_list_chunks:
response = _simple_request("https://mast.stsci.edu/api/v0.1/path_lookup/",
{"uri": [mast_uri[1] for mast_uri in chunk]})
{"uri": [mast_uri for mast_uri in chunk]})

json_response = response.json()

for uri in chunk:
# Chunk is a list of tuples where the tuple is
# ("uri", "/path/to/product")
# so we index for path (index=1)
path = json_response.get(uri[1])["path"]
path = json_response.get(uri)["path"]
if path is None:
warnings.warn(f"Failed to retrieve MAST relative path for {uri[1]}. Skipping...", NoResultsWarning)
continue
if 'galex' in path:
if verbose:
warnings.warn(f"Failed to retrieve MAST relative path for {uri}. Skipping...", NoResultsWarning)
elif 'galex' in path:
path = path.lstrip("/mast/")
elif '/ps1/' in path:
path = path.replace("/ps1/", "panstarrs/ps1/public/")
Expand Down Expand Up @@ -331,19 +333,15 @@
"""
# Get unique products based on input type
if isinstance(data_products, Table):
unique_products = unique(data_products, keys=uri_key)
else: # data_products is a list
_, unique_indices = np.unique(data_products[uri_key], return_index=True)
unique_products = data_products[np.sort(unique_indices)]
else: # list of URIs
seen = set()
unique_products = []
for uri in data_products:
if uri not in seen:
seen.add(uri)
unique_products.append(uri)

number = len(data_products)
number_unique = len(unique_products)
if number_unique < number:
log.info(f"{number - number_unique} of {number} products were duplicates. "
f"Only returning {number_unique} unique product(s).")
unique_products = [uri for uri in data_products if not (uri in seen or seen.add(uri))]

duplicates_removed = len(data_products) - len(unique_products)
if duplicates_removed > 0:
log.info(f"{duplicates_removed} of {len(data_products)} products were duplicates. "

Check warning on line 344 in astroquery/mast/utils.py

View check run for this annotation

Codecov / codecov/patch

astroquery/mast/utils.py#L344

Added line #L344 was not covered by tests
f"Only returning {len(unique_products)} unique product(s).")

return unique_products
Loading
Loading