diff --git a/ci/azure/conda_linux.yml b/ci/azure/conda_linux.yml index 3bf8215cc4..2ebbda2243 100644 --- a/ci/azure/conda_linux.yml +++ b/ci/azure/conda_linux.yml @@ -40,6 +40,7 @@ jobs: export NREL_API_KEY=$(nrelApiKey) export BSRN_FTP_USERNAME=$(BSRN_FTP_USERNAME) export BSRN_FTP_PASSWORD=$(BSRN_FTP_PASSWORD) + export CDSAPI_KEY=$(CDSAPI_KEY) pytest pvlib --remote-data --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html displayName: 'pytest' - task: PublishTestResults@2 diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 222ea92d99..4a8d87d151 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -3,8 +3,10 @@ channels: - defaults - conda-forge dependencies: + - cdsapi - coveralls - cython + - dask - ephem - h5py - netcdf4 @@ -27,6 +29,7 @@ dependencies: - shapely # pvfactors dependency - siphon # conda-forge - statsmodels + - xarray - pip: - dataclasses - nrel-pysam>=2.0 diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml index 83db4b7f49..0d1bffa47b 100644 --- a/ci/requirements-py37.yml +++ b/ci/requirements-py37.yml @@ -3,8 +3,10 @@ channels: - defaults - conda-forge dependencies: + - cdsapi - coveralls - cython + - dask - ephem - h5py - netcdf4 @@ -27,6 +29,7 @@ dependencies: - shapely # pvfactors dependency - siphon # conda-forge - statsmodels + - xarray - pip: - nrel-pysam>=2.0 - pvfactors==1.4.1 diff --git a/ci/requirements-py38.yml b/ci/requirements-py38.yml index 8fecb52197..a9c67ddea5 100644 --- a/ci/requirements-py38.yml +++ b/ci/requirements-py38.yml @@ -3,8 +3,10 @@ channels: - defaults - conda-forge dependencies: + - cdsapi - coveralls - cython + - dask - ephem - h5py - netcdf4 @@ -27,6 +29,7 @@ dependencies: - shapely # pvfactors dependency - siphon # conda-forge - statsmodels + - xarray - pip: - nrel-pysam>=2.0 - pvfactors==1.4.1 diff --git a/ci/requirements-py39.yml b/ci/requirements-py39.yml index d1283b489e..fe591a4bcf 100644 --- a/ci/requirements-py39.yml +++ b/ci/requirements-py39.yml @@ -3,8 +3,10 @@ channels: - defaults - conda-forge dependencies: + - cdsapi - coveralls - cython + - dask - ephem - h5py # - netcdf4 # pulls in a different version of numpy with ImportError @@ -27,6 +29,7 @@ dependencies: - shapely # pvfactors dependency # - siphon # conda-forge - statsmodels + - xarray - pip: # - nrel-pysam>=2.0 # install error on windows - pvfactors==1.4.1 diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst index 6d08e742b3..4491090e34 100644 --- a/docs/sphinx/source/api.rst +++ b/docs/sphinx/source/api.rst @@ -497,6 +497,8 @@ of sources and file formats relevant to solar energy modeling. iotools.get_cams iotools.read_cams iotools.parse_cams + iotools.get_era5 + iotools.read_era5 A :py:class:`~pvlib.location.Location` object may be created from metadata in some files. diff --git a/docs/sphinx/source/whatsnew/v0.9.0.rst b/docs/sphinx/source/whatsnew/v0.9.0.rst index 550965cb51..eacd4b1469 100644 --- a/docs/sphinx/source/whatsnew/v0.9.0.rst +++ b/docs/sphinx/source/whatsnew/v0.9.0.rst @@ -117,6 +117,10 @@ Deprecations Enhancements ~~~~~~~~~~~~ +* Add :func:`~pvlib.iotools.get_era5` and + :func:`~pvlib.iotools.read_era5` for retrieving and reading + ERA5 reanalysis netcdf files from the Climate Data Store (CDS) + files. (:pull:`1264`) * Added :func:`~pvlib.iotools.read_pvgis_hourly` and :func:`~pvlib.iotools.get_pvgis_hourly` for reading and retrieving hourly solar radiation data and PV power output from PVGIS. (:pull:`1186`, @@ -233,6 +237,8 @@ Documentation Requirements ~~~~~~~~~~~~ * ``dataclasses`` is required for python 3.6 (:pull:`1076`) +* ``tables`` is now required instead of optional (:issue:`1286`, :pull:`1287`) +* xarray, dask, and cdsapi are now optional requirements. (:pull:`1264`) * ``h5py`` is now a required dependency. This replaces ``tables``, which was formerly an optional dependency. (:pull:`1299`, :issue:`1252`, :issue:`1286`) diff --git a/pvlib/data/era5_testfile.nc b/pvlib/data/era5_testfile.nc new file mode 100644 index 0000000000..484d7b159a Binary files /dev/null and b/pvlib/data/era5_testfile.nc differ diff --git a/pvlib/data/era5_testfile_1day.nc b/pvlib/data/era5_testfile_1day.nc new file mode 100644 index 0000000000..fc7ce91abc Binary files /dev/null and b/pvlib/data/era5_testfile_1day.nc differ diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index b02ce243ae..cb56f927e9 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -21,3 +21,5 @@ from pvlib.iotools.sodapro import get_cams # noqa: F401 from pvlib.iotools.sodapro import read_cams # noqa: F401 from pvlib.iotools.sodapro import parse_cams # noqa: F401 +from pvlib.iotools.era5 import get_era5 # noqa: F401 +from pvlib.iotools.era5 import read_era5 # noqa: F401 diff --git a/pvlib/iotools/era5.py b/pvlib/iotools/era5.py new file mode 100644 index 0000000000..88ca0ef614 --- /dev/null +++ b/pvlib/iotools/era5.py @@ -0,0 +1,277 @@ +"""Functions to retreive and read ERA5 data from the CDS. +.. codeauthor:: Adam R. Jensen +""" +# The functions only support single-level 2D data and not 3D / pressure-level +# data. Also, monthly datasets and grib files are no supported. + +import requests +from pvlib.tools import (_extract_metadata_from_dataset, + _convert_C_to_K_in_dataset) + +try: + import cdsapi +except ImportError: + cdsapi = None + +try: + import xarray as xr +except ImportError: + xr = None + +# The returned data uses shortNames, whereas the request requires variable +# names according to the CDS convention - passing shortNames results in an +# "Ambiguous" error being raised +ERA5_DEFAULT_VARIABLES = [ + '2m_temperature', # t2m + '10m_u_component_of_wind', # u10 + '10m_v_component_of_wind', # v10 + 'surface_pressure', # sp + 'mean_surface_downward_short_wave_radiation_flux', # msdwswrf + 'mean_surface_downward_short_wave_radiation_flux_clear_sky', # msdwswrfcs + 'mean_surface_direct_short_wave_radiation_flux', # msdrswrf + 'mean_surface_direct_short_wave_radiation_flux_clear_sky', # msdrswrfcs +] + +ERA5_VARIABLE_MAP = { + 't2m': 'temp_air', + 'd2m': 'temp_dew', + 'sp': 'pressure', + 'msdwswrf': 'ghi', + 'msdwswrfcs': 'ghi_clear', + 'msdwlwrf': 'lwd', + 'msdwlwrfcs': 'lwd_clear', + 'msdrswrf': 'bhi', + 'msdrswrfcs': 'bhi_clear', + 'mtdwswrf': 'ghi_extra'} + +ERA5_HOURS = [ + '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', + '08:00', '09:00', '10:00', '11:00', '12:00', '13:00', '14:00', '15:00', + '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00'] + +CDSAPI_URL = 'https://cds.climate.copernicus.eu/api/v2' + + +def get_era5(latitude, longitude, start, end, api_key=None, + variables=ERA5_DEFAULT_VARIABLES, + dataset='reanalysis-era5-single-levels', + product_type='reanalysis', grid=(0.25, 0.25), save_path=None, + output_format=None, map_variables=True): + """ + Retrieve ERA5 reanalysis data from the Copernicus Data Store (CDS). + + * Temporal coverage: 1979 to present (latency of ~5 days) + * Temporal resolution: hourly + * Spatial coverage: global + * Spatial resolution: 0.25° by 0.25° + + An overview of ERA5 is given in [1]_ and [2]_. Data is retrieved using the + CDSAPI [3]_. + + .. admonition:: Time reference + + ERA5 time stamps are in UTC and corresponds to the end of the period + (right labeled). E.g., the time stamp 12:00 for hourly data refers to + the period from 11:00 to 12:00. + + .. admonition:: Usage notes + + To use this function the package CDSAPI [4]_ needs to be installed + [3]_. The CDSAPI keywords are described in [5]_. + + Requested variables should be specified according to the naming + convention used by the CDS. The returned data contains the short-name + versions of the variables. See [2]_ for a list of variables names and + units. + + Access to the CDS requires user registration, see [6]_. The obtaining + API key can either be passed directly to the function or be saved in a + local file as described in [3]_. + + It is possible to check your + `request status `_ + and the `status of all queued requests `_. + + Parameters + ---------- + latitude: float or list + in decimal degrees, between -90 and 90, north is positive (ISO 19115). + If latitude is a list, it should have the format [S, N] and + latitudes within the range are selected according to the grid. + longitude: float or list + in decimal degrees, between -180 and 180, east is positive (ISO 19115). + If longitude is a list, it should have the format [W, E] and + longitudes within the range are selected according to the grid. + start: datetime like + First day of the requested period + end: datetime like + Last day of the requested period + api_key: str, optional + Personal API key for the CDS with the format "uid:key" e.g. + '00000:aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' + variables: list, default: ERA5_DEFAULT_VARIABLES + List of variables to retrieve (according to CDS naming convention) + dataset: str, default 'reanalysis-era5-single-levels' + Name of the dataset to retrieve the variables from. Can be either + 'reanalysis-era5-single-levels' or 'reanalysis-era5-land'. + product_type: str, {'reanalysis', 'ensemble_members', 'ensemble_mean', 'ensemble_spread'}, default: 'reanalysis' + ERA5 product type + grid: list or tuple, default: (0.25, 0.25) + User specified grid resolution + save_path: str or path-like, optional + Filename of where to save data. Should have ".nc" extension. + output_format: {'dataframe', 'dataset'}, optional + Type of data object to return. Default is to return a pandas DataFrame + if file only contains one location and otherwise return an xarray + Dataset. + map_variables: bool, default: True + When true, renames columns of the DataFrame to pvlib variable names + where applicable. See variable ERA5_VARIABLE_MAP. + + Notes + ----- + The returned data includes the following fields by default: + + ======================== ====== ========================================= + Key, mapped key Format Description + ======================== ====== ========================================= + *Mapped field names are returned when the map_variables argument is True* + --------------------------------------------------------------------------- + 2tm, temp_air float Air temperature at 2 m above ground [K] + u10 float Horizontal airspeed towards east at 10 m [m/s] + v10 float Horizontal airspeed towards north at 10 m [m/s] + sp, pressure float Atmospheric pressure at the ground [Pa] + msdwswrf, ghi float Mean surface downward short-wave radiation flux [W/m^2] + msdwswrfcs, ghi_clear float Mean surface downward short-wave radiation flux, clear sky [W/m^2] + msdrswrf, bhi float Mean surface direct short-wave radiation flux [W/m^2] + msdrswrfcs, bhi_clear float Mean surface direct short-wave radiation flux, clear sky [W/m^2] + ======================== ====== ========================================= + + Returns + ------- + data: DataFrame + ERA5 time-series data, fields depend on the requested data. The + returned object is either a pandas DataFrame or an xarray dataset, + depending on the output_format parameter. + metadata: dict + Metadata for the time-series. + + See Also + -------- + pvlib.iotools.read_era5 + + References + ---------- + .. [1] `ERA5 hourly data on single levels from 1979 to present + `_ + .. [2] `ERA5 data documentation + `_ + .. [3] `How to use the CDS API + `_ + .. [4] `CDSAPI source code + `_ + .. [5] `Climate Data Store (CDS) API Keywords + `_ + .. [6] `Climate Data Storage user registration + `_ + """ # noqa: E501 + if cdsapi is None: + raise ImportError('Retrieving ERA5 data requires cdsapi to be installed.') # noqa: E501 + + cds_client = cdsapi.Client(url=CDSAPI_URL, key=api_key, + verify=1, quiet=True) + + # Area is selected by a box made by the four coordinates: [N, W, S, E] + try: + area = [latitude[1], longitude[0], latitude[0], longitude[1]] + except TypeError: + area = [latitude, longitude, latitude, longitude] + + params = { + 'product_type': product_type, + 'variable': variables, + 'date': start.strftime('%Y-%m-%d') + '/' + end.strftime('%Y-%m-%d'), + 'time': ERA5_HOURS, + 'grid': grid, + 'area': area, + 'format': 'netcdf'} + + # Retrieve request url + request = cds_client.retrieve(dataset, params) + file_url = request.location + + # Load file into memory + with requests.get(file_url) as res: + + # Save the file locally if save_path has been specified + if save_path is not None: + with open(save_path, 'wb') as f: + f.write(res.content) + + return read_era5(res.content, map_variables=map_variables, + output_format=output_format) + + +def read_era5(filename, output_format=None, map_variables=True): + """Read one or more ERA5 netcdf files. + + Parameters + ---------- + filename: str or path-like or list + Filename of a netcdf file containing ERA5 data or a list of filenames. + output_format: {'dataframe', 'dataset'}, optional + Type of data object to return. Default is to return a pandas DataFrame + if file only contains one location and otherwise return an xarray + dataset. + map_variables: bool, default: True + When true, renames columns to pvlib variable names where applicable. + See variable ERA5_VARIABLE_MAP. + + Returns + ------- + data: DataFrame + ERA5 time-series data, fields depend on the requested data. The + returned object is either a pandas DataFrame or an xarray dataset, + depending on the output_format parameter. + metadata: dict + Metadata for the time-series. + + See Also + -------- + pvlib.iotools.get_era5 + + References + ---------- + .. [1] `ERA5 hourly data on single levels from 1979 to present + `_ + .. [2] `ERA5 data documentation + `_ + """ + if xr is None: + raise ImportError('Reading ERA5 data requires xarray to be installed.') + + # open multiple-files (mf) requires dask + if isinstance(filename, (list, tuple)): + ds = xr.open_mfdataset(filename) + else: + ds = xr.open_dataset(filename) + + if map_variables: + # Renaming of xarray datasets throws an error if keys are missing + ds = ds.rename_vars( + {k: v for k, v in ERA5_VARIABLE_MAP.items() if k in list(ds)}) + + ds = _convert_C_to_K_in_dataset(ds) + metadata = _extract_metadata_from_dataset(ds) + + if (output_format == 'dataframe') or ( + (output_format is None) & (ds['latitude'].size == 1) & + (ds['longitude'].size == 1)): + data = ds.to_dataframe() + # Localize timezone to UTC + data.index = data.index.set_levels(data.index.get_level_values('time').tz_localize('utc'), level='time') # noqa: E501 + if (ds['latitude'].size == 1) & (ds['longitude'].size == 1): + data = data.droplevel(['latitude', 'longitude']) + return data, metadata + else: + return ds, metadata diff --git a/pvlib/tests/conftest.py b/pvlib/tests/conftest.py index b3e9fcd5a1..149e3c820a 100644 --- a/pvlib/tests/conftest.py +++ b/pvlib/tests/conftest.py @@ -94,6 +94,16 @@ def assert_frame_equal(left, right, **kwargs): requires_bsrn_credentials = pytest.mark.skipif( not has_bsrn_credentials, reason='requires bsrn credentials') +try: + # Attempt to load CDS credentials used for testing pvlib.iotools.get_era5 + CDSAPI_KEY = os.environ["CDSAPI_KEY"] + has_cds_credentials = True +except KeyError: + has_cds_credentials = False + +requires_cds_credentials = pytest.mark.skipif( + not has_cds_credentials, reason='requires CDS credentials') + try: import statsmodels # noqa: F401 @@ -105,6 +115,16 @@ def assert_frame_equal(left, right, **kwargs): not has_statsmodels, reason='requires statsmodels') +try: + import xarray as xr # noqa: F401 + has_xarray = True +except ImportError: + has_xarray = False + +requires_xarray = pytest.mark.skipif( + not has_xarray, reason='requires xarray') + + try: import ephem has_ephem = True diff --git a/pvlib/tests/iotools/test_era5.py b/pvlib/tests/iotools/test_era5.py new file mode 100644 index 0000000000..bfe2d76bbf --- /dev/null +++ b/pvlib/tests/iotools/test_era5.py @@ -0,0 +1,125 @@ +""" +tests for :mod:`pvlib.iotools.era5` +""" + +import pandas as pd +import numpy as np +import pytest +import os +from pvlib.iotools import read_era5, get_era5 +from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, + requires_cds_credentials, requires_xarray) + + +@pytest.fixture(scope="module") +def cds_api_key(): + """Supplies pvlib-python's CDS API key. + + Users should obtain their own credentials as described in the `get_era5` + documentation.""" + return os.environ["CDSAPI_KEY"] + + +@pytest.fixture +def expected_index(): + index = pd.date_range('2020-1-1', freq='1h', periods=8832, tz='UTC') + index.name = 'time' + return index + + +@pytest.fixture +def expected_columns(): + return ['t2m', 'u10', 'v10', 'sp', 'msdwswrf', 'msdwswrfcs', 'msdrswrf', + 'msdrswrfcs'] + + +@pytest.fixture +def expected_columns_mapped(): + return ['temp_air', 'u10', 'v10', 'pressure', 'ghi', 'ghi_clear', 'bhi', + 'bhi_clear'] + + +@requires_xarray +def test_read_era5(expected_index, expected_columns): + data, meta = read_era5(DATA_DIR / 'era5_testfile.nc', map_variables=False) + assert (expected_columns == data.columns).all() + assert_index_equal(data.index, expected_index[:8784]) + # Test meta + assert meta['msdwswrf'] == { + 'name': 'msdwswrf', + 'long_name': 'Mean surface downward short-wave radiation flux', + 'units': 'W m**-2'} + assert 'dims' in meta.keys() + # Test conversion of K to C + assert meta['t2m']['units'] == 'C' + assert np.isclose(data['t2m'].iloc[0], 2.8150635) # temperature in deg C + + +@requires_xarray +def test_read_era5_variable_mapped(expected_index, expected_columns_mapped): + data, meta = read_era5(DATA_DIR / 'era5_testfile.nc') + assert (expected_columns_mapped == data.columns).all() + assert_index_equal(data.index, expected_index[:8784]) + assert data.notna().all().all() + assert meta['temp_air'] == { + 'name': 'temp_air', 'long_name': '2 metre temperature', 'units': 'C'} + + +@requires_xarray +def test_read_era5_output_format(): + import xarray as xr + data, meta = read_era5(DATA_DIR / 'era5_testfile.nc', + output_format='dataset') + assert isinstance(data, xr.Dataset) + + +@requires_xarray +def test_read_era5_multiple_files(expected_index): + filenames = \ + [DATA_DIR / f for f in ['era5_testfile.nc', 'era5_testfile_1day.nc']] + data, meta = read_era5(filenames) + assert_index_equal(data.index, expected_index) + + +@requires_xarray +@requires_cds_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_era5(cds_api_key, expected_index): + data, meta = get_era5( + latitude=55.7, + longitude=12.5, + start=pd.Timestamp(2020, 1, 1), + end=pd.Timestamp(2020, 1, 1), + variables=['mean_surface_downward_short_wave_radiation_flux_clear_sky', + '2m_temperature'], + api_key=cds_api_key, + save_path='era5_test_data.nc', + map_variables=True) + assert 'temp_air' in data.columns + assert 'ghi_clear' in data.columns + assert_index_equal(data.index, expected_index[:24]) + assert data.notna().all().all() + + +@requires_xarray +@requires_cds_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_era5_area(cds_api_key, expected_index): + data, meta = get_era5( + latitude=[55.7, 55.7+0.25*2], + longitude=[12.5, 55.7+0.25*2], + start=pd.Timestamp(2020, 1, 1), + end=pd.Timestamp(2020, 1, 1), + variables=['mean_surface_downward_short_wave_radiation_flux_clear_sky', + '2m_temperature'], + api_key=cds_api_key, + save_path='era5_test_data.nc', + map_variables=True) + assert 'temp_air' in data.variables.mapping.keys() + assert 'time' in data.variables.mapping.keys() + assert 'longitude' in data.variables.mapping.keys() + assert np.isclose(data.latitude.values, [56.2, 55.95, 55.7]).all() + assert (data.time.values == + expected_index[:24].to_pydatetime().astype('datetime64[ns]')).all() diff --git a/pvlib/tests/iotools/test_sodapro.py b/pvlib/tests/iotools/test_sodapro.py index 10f9a1e8c9..ee4922cd15 100644 --- a/pvlib/tests/iotools/test_sodapro.py +++ b/pvlib/tests/iotools/test_sodapro.py @@ -143,7 +143,6 @@ 0.9897]]) -# @pytest.fixture def generate_expected_dataframe(values, columns, index, dtypes): """Create dataframe from arrays of values, columns and index, in order to use this dataframe to compare to. @@ -244,7 +243,7 @@ def test_get_cams(requests_mock, testfile, index, columns, values, dtypes, def test_get_cams_bad_request(requests_mock): - """Test that a the correct errors/warnings ares raised for invalid + """Test that the correct errors/warnings are raised for invalid requests inputs. Also tests if the specified server url gets used""" # Subset of an xml file returned for errornous requests diff --git a/pvlib/tools.py b/pvlib/tools.py index eef80a3b37..18bb9ae30c 100644 --- a/pvlib/tools.py +++ b/pvlib/tools.py @@ -344,3 +344,52 @@ def _golden_sect_DataFrame(params, VL, VH, func): raise Exception("EXCEPTION:iterations exceeded maximum (50)") return func(df, 'V1'), df['V1'] + + +def _extract_metadata_from_dataset(ds): + """ + Generate a dictionary of metadata from an xarray dataset. + + Parameters + ---------- + ds : dataset + dataset containing time series data. + + Returns + ------- + metadata : dict + Dictionary containing metadata. + """ + metadata = {} + for v in list(ds.variables): + metadata[v] = { + 'name': ds[v].name, + 'long_name': ds[v].long_name} + if 'units' in ds[v].attrs: + metadata[v]['units'] = ds[v].units + metadata['dims'] = dict(ds.dims) + metadata.update(ds.attrs) # add arbitrary metadata + return metadata + + +def _convert_C_to_K_in_dataset(ds): + """ + Convert all variables in an xarray dataset that have the unit Kelvin to + degrees Celsius. + + Parameters + ---------- + ds : dataset + dataset containing time series data. + + Returns + ------- + ds : dataset + dataset where variables with temperature variables in Celsius + """ + for v in list(ds.variables): + if 'units' in ds[v].attrs: + if 'K' == ds[v].attrs['units']: + ds[v].data = ds[v].data - 273.15 + ds[v].attrs['units'] = 'C' + return ds diff --git a/setup.py b/setup.py index 0717a9d839..c4b09c857e 100755 --- a/setup.py +++ b/setup.py @@ -54,8 +54,9 @@ 'pytest-remotedata'] EXTRAS_REQUIRE = { 'optional': ['cython', 'ephem', 'netcdf4', 'nrel-pysam', 'numba', + 'pvfactors', 'siphon', 'statsmodels', - 'cftime >= 1.1.1'], + 'cftime >= 1.1.1', 'xarray', 'dask', 'cdsapi'], 'doc': ['ipython', 'matplotlib', 'sphinx == 3.1.2', 'sphinx_rtd_theme==0.5.0', 'sphinx-gallery', 'docutils == 0.15.2', 'pillow', 'netcdf4', 'siphon',