Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ mast
- Added a new ``Observations.list_cloud_datasets()`` method for querying cloud-supported MAST datasets, alongside
improvements to cloud download handling. [#3488]

- The cloud dataset in ``Observations`` is now enabled by default if the ``boto3`` and ``botocore`` packages are installed. This
default can be overridden by setting the ``enable_cloud_dataset`` configuration option to False. [#3534]

jplspec
^^^^^^^

Expand Down
8 changes: 8 additions & 0 deletions astroquery/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,11 @@ class BlankResponseWarning(AstropyWarning):
Astroquery warning to be raised if one or more rows in a table are bad, but
not all rows are.
"""
pass


class CloudAccessWarning(AstropyWarning):
"""
Astroquery warning to be raised if cloud access cannot be enabled.
"""
pass
4 changes: 4 additions & 0 deletions astroquery/mast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ class Conf(_config.ConfigNamespace):
pagesize = _config.ConfigItem(
50000,
'Number of results to request at once from the STScI server.')
enable_cloud_dataset = _config.ConfigItem(
True,
'Enable access to cloud-hosted datasets (e.g. on AWS S3) by default. '
'Requires the ``boto3`` and ``botocore`` packages to be installed.')


conf = Conf()
Expand Down
17 changes: 13 additions & 4 deletions astroquery/mast/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,22 @@
from astroquery import log
from astropy.utils.console import ProgressBarOrSpinner
from astropy.utils.exceptions import AstropyDeprecationWarning
from botocore.exceptions import ClientError, BotoCoreError

from ..exceptions import RemoteServiceError, NoResultsWarning

from . import utils

try:
import boto3
HAS_BOTO3 = True
except ImportError:
HAS_BOTO3 = False
try:
import botocore
from botocore.exceptions import ClientError, BotoCoreError
HAS_BOTOCORE = True
except ImportError:
HAS_BOTOCORE = False

__all__ = []

Expand All @@ -44,15 +54,14 @@ def __init__(self, provider="AWS", profile=None, verbose=False):
verbose : bool
Default False. Display extra info and warnings if true.
"""
if not HAS_BOTO3 or not HAS_BOTOCORE:
raise ImportError("Please install the `boto3` and `botocore` packages to enable cloud dataset access.")

# Dealing with deprecated argument
if profile is not None:
warnings.warn(("MAST Open Data on AWS is now free to access and does "
"not require an AWS account"), AstropyDeprecationWarning)

import boto3
import botocore

self.boto3 = boto3
self.botocore = botocore
self.config = botocore.client.Config(signature_version=botocore.UNSIGNED)
Expand Down
77 changes: 61 additions & 16 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import astropy.units as u
import astropy.coordinates as coord
from botocore.exceptions import ClientError, BotoCoreError

from astropy.table import Table, Row, vstack
from astroquery import log
Expand All @@ -27,13 +26,17 @@

from ..utils import async_to_sync
from ..utils.class_or_instance import class_or_instance
from ..exceptions import (InvalidQueryError, RemoteServiceError, NoResultsWarning, InputWarning)
from ..exceptions import (InvalidQueryError, RemoteServiceError, NoResultsWarning, InputWarning, CloudAccessWarning)

from . import utils
from . import utils, conf
from .core import MastQueryWithLogin

__all__ = ['Observations', 'ObservationsClass',
'MastClass', 'Mast']
try:
from botocore.exceptions import ClientError, BotoCoreError
except ImportError:
ClientError = BotoCoreError = ()

__all__ = ['Observations', 'ObservationsClass', 'MastClass', 'Mast']


@async_to_sync
Expand All @@ -51,6 +54,24 @@ class ObservationsClass(MastQueryWithLogin):
_caom_filtered = 'Mast.Caom.Filtered'
_caom_products = 'Mast.Caom.Products'

def __init__(self, mast_token=None):
super().__init__(mast_token)
self._cloud_enabled_explicitly = None # Track whether cloud access was explicitly enabled by the user

def _ensure_cloud_access(self):
"""Ensure cloud access is initialized if appropriate."""
# User explicitly disabled
if self._cloud_enabled_explicitly is False:
return

# Already initialized
if self._cloud_connection is not None:
return

# Default behavior is to enable cloud access if the config option is set, so we check that here
if self._cloud_enabled_explicitly is None and conf.enable_cloud_dataset:
self.enable_cloud_dataset(_internal=True)

def _parse_result(self, responses, *, verbose=False): # Used by the async_to_sync decorator functionality
"""
Parse the results of a list of `~requests.Response` objects and returns an `~astropy.table.Table` of results.
Expand Down Expand Up @@ -180,7 +201,7 @@ def _parse_caom_criteria(self, *, resolver=None, **criteria):

return position, mashup_filters

def enable_cloud_dataset(self, provider="AWS", profile=None, verbose=True):
def enable_cloud_dataset(self, provider="AWS", profile=None, verbose=True, *, _internal=False):
"""
Enable downloading public files from S3 instead of MAST.
Requires the boto3 library to function.
Expand All @@ -196,13 +217,21 @@ def enable_cloud_dataset(self, provider="AWS", profile=None, verbose=True):
Default True.
Logger to display extra info and warning.
"""
self._cloud_connection = CloudAccess(provider, profile, verbose)
try:
self._cloud_connection = CloudAccess(provider, profile, verbose)
if not _internal:
self._cloud_enabled_explicitly = True
except ImportError as e:
# boto3 or botocore is not installed
self._cloud_connection = None
warnings.warn(e.msg, CloudAccessWarning)

def disable_cloud_dataset(self):
"""
Disables downloading public files from S3 instead of MAST.
"""
self._cloud_connection = None
self._cloud_enabled_explicitly = False

@class_or_instance
def query_region_async(self, coordinates, *, radius=0.2*u.deg, pagesize=None, page=None):
Expand Down Expand Up @@ -656,6 +685,9 @@ def download_file(self, uri, *, local_path=None, base_url=None, cache=True, clou
url : str
The full url download path
"""
# Ensure cloud access is enabled
self._ensure_cloud_access()

if not uri or not isinstance(uri, str):
raise InvalidQueryError("A valid data product URI must be provided.")

Expand Down Expand Up @@ -693,8 +725,9 @@ def download_file(self, uri, *, local_path=None, base_url=None, cache=True, clou
NoResultsWarning)
return 'SKIPPED', None, None

warnings.warn(f'The product {uri} was not found in the cloud. '
'Falling back to MAST download.', InputWarning)
if self._cloud_enabled_explicitly:
warnings.warn(f'The product {uri} was not found in the cloud. '
'Falling back to MAST download.', InputWarning)
self._download_file(escaped_url, local_path, cache=cache, head_safe=True, verbose=verbose)
except (ClientError, BotoCoreError) as ex:
# Should be in cloud, but download failed
Expand All @@ -703,8 +736,9 @@ def download_file(self, uri, *, local_path=None, base_url=None, cache=True, clou
NoResultsWarning)
return 'SKIPPED', None, None

warnings.warn(f'Could not download {uri} from cloud: {ex}. Falling back to MAST download.',
InputWarning)
if self._cloud_enabled_explicitly:
warnings.warn(f'Could not download {uri} from cloud: {ex}. Falling back to MAST download.',
InputWarning)
self._download_file(escaped_url, local_path, cache=cache, head_safe=True, verbose=verbose)
else:
if cloud_only:
Expand Down Expand Up @@ -771,7 +805,6 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, cloud_o
status, msg, url = 'ERROR', None, None

cloud_uri = cloud_uri_map.get(mast_uri) if cloud_uri_map else None

if cloud_uri:
try:
self._cloud_connection.download_file_from_cloud(cloud_uri, local_path, cache, verbose)
Expand All @@ -784,8 +817,9 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, cloud_o
status = 'SKIPPED'
msg = str(ex)
else:
warnings.warn(f'Could not download {cloud_uri} from cloud: {ex}. '
'Falling back to MAST download.', InputWarning)
if self._cloud_enabled_explicitly:
warnings.warn(f'Could not download {cloud_uri} from cloud: {ex}. '
'Falling back to MAST download.', InputWarning)
status, msg, url = self.download_file(mast_uri, local_path=local_path, cache=cache,
force_on_prem=True, verbose=verbose)
else:
Expand All @@ -797,8 +831,9 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, cloud_o
status = 'SKIPPED'
msg = 'Product not found in cloud'
else:
warnings.warn(f'The product {mast_uri} was not found in the cloud. '
'Falling back to MAST download.', InputWarning)
if self._cloud_enabled_explicitly:
warnings.warn(f'The product {mast_uri} was not found in the cloud. '
'Falling back to MAST download.', InputWarning)
status, msg, url = self.download_file(mast_uri, local_path=local_path, cache=cache,
force_on_prem=True, verbose=verbose)
else:
Expand Down Expand Up @@ -899,6 +934,9 @@ def download_products(self, products, *, download_dir=None, flat=False,
response : `~astropy.table.Table`
The manifest of files downloaded, or status of files on disk if curl option chosen.
"""
# Ensure cloud access is enabled
self._ensure_cloud_access()

# If the products list is a row we need to cast it as a table
if isinstance(products, Row):
products = Table(products, masked=True)
Expand Down Expand Up @@ -961,6 +999,9 @@ def list_cloud_datasets(self):
response : list
List of dataset prefixes that support cloud data access.
"""
# Ensure cloud access is enabled
self._ensure_cloud_access()

if self._cloud_connection is None:
raise RemoteServiceError(
'Please enable anonymous cloud access by calling `enable_cloud_dataset` method. '
Expand Down Expand Up @@ -1027,6 +1068,8 @@ def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=Fa
List of URIs generated from the data products. May contain entries that are None
if data_products includes products not found in the cloud.
"""
# Ensure cloud access is enabled
self._ensure_cloud_access()

if self._cloud_connection is None:
raise RemoteServiceError(
Expand Down Expand Up @@ -1110,6 +1153,8 @@ def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False):
Cloud URI generated from the data product. If the product cannot be
found in the cloud, None is returned.
"""
# Ensure cloud access is enabled
self._ensure_cloud_access()

if self._cloud_connection is None:
raise RemoteServiceError(
Expand Down
Loading
Loading