diff --git a/ci/azure/conda_linux.yml b/ci/azure/conda_linux.yml index 3bf8215cc4..6fadfd86a1 100644 --- a/ci/azure/conda_linux.yml +++ b/ci/azure/conda_linux.yml @@ -40,6 +40,8 @@ jobs: export NREL_API_KEY=$(nrelApiKey) export BSRN_FTP_USERNAME=$(BSRN_FTP_USERNAME) export BSRN_FTP_PASSWORD=$(BSRN_FTP_PASSWORD) + export MERRA2_USERNAME=$(MERRA2_USERNAME) + export MERRA2_PASSWORD=$(MERRA2_PASSWORD) pytest pvlib --remote-data --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html displayName: 'pytest' - task: PublishTestResults@2 diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index c49455119f..95eb075aa5 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -3,9 +3,12 @@ channels: - defaults - conda-forge dependencies: + - cftime - coveralls - cython + - dask - ephem + - lxml - netcdf4 - nose - numba @@ -27,7 +30,9 @@ dependencies: - shapely # pvfactors dependency - siphon # conda-forge - statsmodels + - xarray - pip: - dataclasses - nrel-pysam>=2.0 - pvfactors==1.4.1 + - git+https://github.com/pydap/pydap#egg=pydap diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml index 3203b004d1..5e0f5db0ae 100644 --- a/ci/requirements-py37.yml +++ b/ci/requirements-py37.yml @@ -3,9 +3,12 @@ channels: - defaults - conda-forge dependencies: + - cftime - coveralls - cython + - dask - ephem + - lxml - netcdf4 - nose - numba @@ -27,6 +30,8 @@ dependencies: - shapely # pvfactors dependency - siphon # conda-forge - statsmodels + - xarray - pip: - nrel-pysam>=2.0 - pvfactors==1.4.1 + - git+https://github.com/pydap/pydap#egg=pydap diff --git a/ci/requirements-py38.yml b/ci/requirements-py38.yml index ca3a968335..e5dff100a9 100644 --- a/ci/requirements-py38.yml +++ b/ci/requirements-py38.yml @@ -3,9 +3,12 @@ channels: - defaults - conda-forge dependencies: + - cftime - coveralls - cython + - dask - ephem + - lxml - netcdf4 - nose - numba @@ -27,6 +30,8 @@ dependencies: - shapely # pvfactors dependency - siphon # conda-forge - statsmodels + - xarray - pip: - nrel-pysam>=2.0 - pvfactors==1.4.1 + - git+https://github.com/pydap/pydap#egg=pydap diff --git a/ci/requirements-py39.yml b/ci/requirements-py39.yml index 16c6449158..11727abede 100644 --- a/ci/requirements-py39.yml +++ b/ci/requirements-py39.yml @@ -3,9 +3,12 @@ channels: - defaults - conda-forge dependencies: + - cftime - coveralls - cython + - dask - ephem + - lxml # - netcdf4 # pulls in a different version of numpy with ImportError - nose # - numba # python 3.9 compat in early 2021 @@ -27,6 +30,8 @@ dependencies: - shapely # pvfactors dependency # - siphon # conda-forge - statsmodels + - xarray - pip: # - nrel-pysam>=2.0 # install error on windows - pvfactors==1.4.1 + - git+https://github.com/pydap/pydap#egg=pydap diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst index 6d08e742b3..cf691aaf52 100644 --- a/docs/sphinx/source/api.rst +++ b/docs/sphinx/source/api.rst @@ -497,6 +497,8 @@ of sources and file formats relevant to solar energy modeling. iotools.get_cams iotools.read_cams iotools.parse_cams + iotools.get_merra2 + iotools.read_merra2 A :py:class:`~pvlib.location.Location` object may be created from metadata in some files. diff --git a/docs/sphinx/source/whatsnew/v0.9.0.rst b/docs/sphinx/source/whatsnew/v0.9.0.rst index d516685154..0f4e18959c 100644 --- a/docs/sphinx/source/whatsnew/v0.9.0.rst +++ b/docs/sphinx/source/whatsnew/v0.9.0.rst @@ -107,8 +107,11 @@ Deprecations Enhancements ~~~~~~~~~~~~ -* Added :func:`~pvlib.iotools.read_pvgis_hourly` and - :func:`~pvlib.iotools.get_pvgis_hourly` for reading and retrieving hourly +* Added :func:`~pvlib.iotools.get_merra2` and + :func:`~pvlib.iotools.read_merra2` for retrieving and reading hourly + reanalysis data from MERRA-2. (:pull:`1247`) +* Added :func:`~pvlib.iotools.get_pvgis_hourly` and + :func:`~pvlib.iotools.read_pvgis_hourly` for retrieving and reading hourly solar radiation data and PV power output from PVGIS. (:pull:`1186`, :issue:`849`) * Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn` @@ -210,6 +213,7 @@ Documentation Requirements ~~~~~~~~~~~~ * ``dataclasses`` is required for python 3.6 +* xarray, dask, and pydap are now optional requirements. (:pull:`1264`, :pull:`1274`) Contributors ~~~~~~~~~~~~ diff --git a/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc b/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc new file mode 100644 index 0000000000..481d277565 Binary files /dev/null and b/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc differ diff --git a/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc b/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc new file mode 100644 index 0000000000..a588f0286a Binary files /dev/null and b/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc differ diff --git a/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200103.SUB.nc b/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200103.SUB.nc new file mode 100644 index 0000000000..360c761746 Binary files /dev/null and b/pvlib/data/MERRA2_400.tavg1_2d_rad_Nx.20200103.SUB.nc differ diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index b02ce243ae..39825e301d 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -21,3 +21,5 @@ from pvlib.iotools.sodapro import get_cams # noqa: F401 from pvlib.iotools.sodapro import read_cams # noqa: F401 from pvlib.iotools.sodapro import parse_cams # noqa: F401 +from pvlib.iotools.merra2 import get_merra2 # noqa: F401 +from pvlib.iotools.merra2 import read_merra2 # noqa: F401 diff --git a/pvlib/iotools/merra2.py b/pvlib/iotools/merra2.py new file mode 100644 index 0000000000..8bb6fd5a98 --- /dev/null +++ b/pvlib/iotools/merra2.py @@ -0,0 +1,246 @@ +"""Functions to read and retrieve MERRA-2 reanalysis data from NASA. +.. codeauthor:: Adam R. Jensen +""" +from pvlib.tools import (_extract_metadata_from_dataset, + _convert_C_to_K_in_dataset) + +try: + import xarray as xr +except ImportError: + xr = None + +try: + from pydap.cas.urs import setup_session +except ImportError: + setup_session = None + +try: + import cftime +except ImportError: + cftime = None + +MERRA2_VARIABLE_MAP = { + # Variables from M2T1NXRAD - radiation diagnostics + 'LWGEM': 'lwu', # longwave flux emitted from surface [W/m^2] + 'SWGDN': 'ghi', # surface incoming shortwave flux [W/m^2] + 'SWGDNCLR': 'ghi_clear', # SWGDN assuming clear sky [W/m^2] + 'SWTDN': 'toa', # toa incoming shortwave flux [W/m^2] + # Variables from M2T1NXSLV - single-level diagnostics + 'PS': 'pressure', # surface pressure [Pa] + 'T2M': 'temp_air', # 2-meter air temperature [K converted to C] + 'T2MDEW': 'temp_dew', # dew point temperature at 2 m [K converted to C] +} + +# goldsmr4 contains the single-level 2D hourly MERRA-2 data files +MERRA2_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/dods' + + +def get_merra2(latitude, longitude, start, end, dataset, variables, username, + password, save_path=None, output_format=None, + map_variables=True): + """ + Retrieve MERRA-2 reanalysis data from the NASA GES DISC repository. + + The function supports downloading of MERRA-2 [1]_ hourly 2-dimensional + time-averaged variables. A list of the available datasets and parameters + is given in [2]_. + + * Temporal coverage: 1980 to present (latency of 2-7 weeks) + * Temporal resolution: hourly + * Spatial coverage: global + * Spatial resolution: 0.5° latitude by 0.625° longitude + + Parameters + ---------- + latitude: float or list + in decimal degrees, between -90 and 90, north is positive (ISO 19115). + If latitude is a list, it should have the format [S, N], and + latitudes within the range are selected according to the grid. + longitude: float or list + in decimal degrees, between -180 and 180, east is positive (ISO 19115). + If longitude is a list, it should have the format [W, E], and + longitudes within the range are selected according to the grid. + start: datetime-like + First day of the requested period. + end: datetime-like + Last day of the requested period. + variables: list + List of variables to retrieve, e.g., ['TAUHGH', 'SWGNT']. + dataset: str + Name of the dataset to retrieve the variables from, e.g., 'M2T1NXRAD' + for radiation parameters and 'M2T1NXAER' for aerosol parameters. + output_format: {'dataframe', 'dataset'}, optional + Type of data object to return. Default is to return a pandas DataFrame + if data for a single location is requested and otherwise return an + xarray Dataset. + map_variables: bool, default: True + When true, renames columns to pvlib variable names where applicable. + See variable MERRA2_VARIABLE_MAP. + + Returns + ------- + data: DataFrame + MERRA-2 time-series data, fields depend on the requested data. The + returned object is either a pandas DataFrame or an xarray dataset, + depending on the output_format parameter. + metadata: dict + Metadata extracted from the netcdf files. + + Notes + ----- + To obtain MERRA-2 data, it is necessary to register for an EarthData + account and link it to the GES DISC as described in [3]_. + + MERRA-2 contains 14 single-level 2D datasets with an hourly resolution. The + most important ones are 'M2T1NXAER', which contains aerosol data, + 'M2T1NXRAD', which contains radiation related parameters, and 'M2T1NXSLV', + which contains general variables (e.g., temperature and wind speed). + + Warning + ------- + There is a known error in the calculation of radiation, hence it is + strongly adviced that radiation from MERRA-2 should not be used. Users + interested in radiation from reanalysis datasets are referred to + :func:`pvlib.iotools.get_era5`. + + See Also + -------- + pvlib.iotools.read_merra2, pvlib.iotools.get_era5 + + References + ---------- + .. [1] `NASA MERRA-2 Project overview + `_ + .. [2] `MERRA-2 File specification + `_ + .. [3] `Account registration and data access to NASA's GES DISC + `_ + """ # noqa: E501 + if xr is None: + raise ImportError('Retrieving MERRA-2 data requires xarray') + if setup_session is None: + raise ImportError('Retrieving MERRA-2 data requires PyDap') + if cftime is None: + raise ImportError('Retrieving MERRA-2 data requires cftime') + + url = MERRA2_BASE_URL + '/' + dataset + + session = setup_session(username, password, check_url=url) + store = xr.backends.PydapDataStore.open(url, session=session) + + start_float = cftime.date2num(start, units='days since 1-1-1 00:00:0.0') + end_float = cftime.date2num(end, units='days since 1-1-1 00:00:0.0') + + try: + sel_dict = { + 'lat': slice(latitude[0], latitude[1]), + 'lon': slice(longitude[0], longitude[1]), + 'time': slice(start_float, end_float)} + except TypeError: + sel_dict = { + 'lat': latitude, + 'lon': longitude, + 'time': slice(start_float, end_float)} + + # Setting decode_times=False results in a time saving of up to some minutes + ds = xr.open_dataset(store, decode_times=False).sel(sel_dict) + + ds = xr.decode_cf(ds) # Decode timestamps + + variables = [v.lower() for v in variables] # Make all variables lower-case + + ds = ds[variables] # select sub-set of variables + + if map_variables: + # Renaming of xarray datasets throws an error if keys are missing + ds = ds.rename_vars( + {k: v for k, v in MERRA2_VARIABLE_MAP.items() if k in list(ds)}) + + ds = _convert_C_to_K_in_dataset(ds) + metadata = _extract_metadata_from_dataset(ds) + + if (output_format == 'dataframe') or ( + (output_format is None) & (ds['lat'].size == 1) & + (ds['lon'].size == 1)): + data = ds.to_dataframe() + # Localize timezone to UTC + if data.index.nlevels > 1: # if dataframe has a multi-index + data.index = data.index.set_levels(data.index.get_level_values('time').tz_localize('utc'), level='time') # noqa: E501 + else: # for single location dataframes (only time as index) + data.index = data.index.tz_localize('UTC') + data = data.drop(columns=['lat', 'lon']) + return data, metadata + else: + return ds, metadata + + +def read_merra2(filename, output_format=None, map_variables=True): + """Reading a MERRA-2 file into a pandas dataframe. + + MERRA-2 is described in [1]_ and a list of variables can be found in [2]_. + + Parameters + ---------- + filename: str or path-like or list + Filename of a netcdf file containing MERRA-2 data or a list of + filenames. + output_format: {'dataframe', 'dataset'}, optional + Type of data object to return. Default is to return a pandas DataFrame + if data for a single location is requested and otherwise return an + xarray Dataset. + map_variables: bool, default: True + When true, renames columns to pvlib variable names where applicable. + See variable MERRA2_VARIABLE_MAP. + + Returns + ------- + data: DataFrame + MERRA-2 time-series data, fields depend on the requested data. The + returned object is either a pandas DataFrame or an xarray dataset, + depending on the output_format parameter. + metadata: dict + Metadata extracted from the netcdf files. + + See Also + -------- + pvlib.iotools.get_merra2, pvlib.iotools.get_era5 + + References + ---------- + .. [1] `NASA MERRA-2 Project overview + `_ + .. [2] `MERRA-2 File specification + `_ + """ + if xr is None: + raise ImportError('Reading MERRA-2 data requires xarray to be installed.') # noqa: E501 + + # open multiple-files (mf) requires dask + if isinstance(filename, (list, tuple)): + ds = xr.open_mfdataset(filename) + else: + ds = xr.open_dataset(filename) + + if map_variables: + # Renaming of xarray datasets throws an error if keys are missing + ds = ds.rename_vars( + {k: v for k, v in MERRA2_VARIABLE_MAP.items() if k in list(ds)}) + + ds = _convert_C_to_K_in_dataset(ds) + metadata = _extract_metadata_from_dataset(ds) + + if (output_format == 'dataframe') or ( + (output_format is None) & (ds['lat'].size == 1) & + (ds['lon'].size == 1)): + data = ds.to_dataframe() + # Remove lat and lon from multi-index + if (ds['lat'].size == 1) & (ds['lon'].size == 1): + data = data.droplevel(['lat', 'lon']) + # Localize timezone to UTC + if data.index.nlevels > 1: # if dataframe has a multi-index + data.index = data.index.set_levels(data.index.get_level_values('time').tz_localize('utc'), level='time') # noqa: E501 + else: # for single location dataframes (only time as index) + data.index = data.index.tz_localize('UTC') + return data, metadata + else: + return ds, metadata diff --git a/pvlib/tests/conftest.py b/pvlib/tests/conftest.py index a3cba1e7b8..84935745d8 100644 --- a/pvlib/tests/conftest.py +++ b/pvlib/tests/conftest.py @@ -95,6 +95,19 @@ def assert_frame_equal(left, right, **kwargs): not has_bsrn_credentials, reason='requires bsrn credentials') +try: + # Attempt to load NASA EarthData login credentials used for testing + # pvlib.iotools.get_merra2 + MERRA2_USERNAME = os.environ["MERRA2_USERNAME"] + MERRA2_PASSWORD = os.environ["MERRA2_PASSWORD"] + has_merra2_credentials = True +except KeyError: + has_merra2_credentials = False + +requires_merra2_credentials = pytest.mark.skipif( + not has_merra2_credentials, reason='requires merra2 credentials') + + try: import statsmodels # noqa: F401 has_statsmodels = True @@ -105,6 +118,16 @@ def assert_frame_equal(left, right, **kwargs): not has_statsmodels, reason='requires statsmodels') +try: + import xarray as xr # noqa: F401 + has_xarray = True +except ImportError: + has_xarray = False + +requires_xarray = pytest.mark.skipif( + not has_xarray, reason='requires xarray') + + try: import tables has_tables = True diff --git a/pvlib/tests/iotools/test_merra2.py b/pvlib/tests/iotools/test_merra2.py new file mode 100644 index 0000000000..b49e2e2f3b --- /dev/null +++ b/pvlib/tests/iotools/test_merra2.py @@ -0,0 +1,80 @@ +""" +tests for :mod:`pvlib.iotools.merra2` +""" + +import pandas as pd +import numpy as np +import datetime as dt +import pytest +import os +from pvlib.iotools import read_merra2, get_merra2 +from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, + requires_merra2_credentials, requires_xarray) + + +@pytest.fixture(scope="module") +def merra2_credentials(): + """Supplies pvlib-python's EarthData login credentials. + Users should obtain their own credentials as described in the `get_merra2` + documentation.""" + return (os.environ["MERRA2_USERNAME"], os.environ["MERRA2_PASSWORD"]) + + +@pytest.fixture +def expected_index(): + index = pd.date_range('2020-1-1-00:30', periods=24*2, freq='1h', tz='UTC') + index.name = 'time' + return index + + +@requires_xarray +def test_read_merra2(expected_index): + filenames = [DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc', + DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc'] + + data, meta = read_merra2(filenames, map_variables=False) + assert_index_equal(data.index, expected_index) + assert meta['lat'] == {'name': 'lat', 'long_name': 'latitude', + 'units': 'degrees_north'} + assert np.isclose(data.loc['2020-01-01 12:30:00+00:00', 'SWGDN'], 130.4375) + + +@requires_xarray +def test_read_merra2_dataset(expected_index): + filenames = [DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc', + DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc'] + + data, meta = read_merra2(filenames, output_format='dataset', + map_variables=False) + import xarray as xr + assert isinstance(data, xr.Dataset) + assert meta['lat'] == {'name': 'lat', 'long_name': 'latitude', + 'units': 'degrees_north'} + assert np.all([v in ['time', 'lon', 'lat', 'ALBEDO', 'EMIS', 'SWGDN', + 'SWGDNCLR', 'SWTDN'] for v in list(data.variables)]) + + +@requires_xarray +def test_read_merra2_map_variables(): + filename = DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc' + data, meta = read_merra2(filename, map_variables=True) + assert meta['ghi'] == { + 'name': 'ghi', 'long_name': 'surface_incoming_shortwave_flux', + 'units': 'W m-2'} + + +@requires_xarray +@requires_merra2_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_merra2(merra2_credentials): + username, password = merra2_credentials + data, meta = get_merra2( + latitude=55, longitude=15, + start=dt.datetime(2020, 1, 1), end=dt.datetime(2020, 1, 2), + dataset='M2T1NXRAD', variables=['TAUHGH', 'SWGNT'], + username=username, password=password, map_variables=True) + assert_index_equal(data.index, expected_index) + assert meta['lat'] == {'name': 'lat', 'long_name': 'latitude', + 'units': 'degrees_north'} + assert np.all([v in ['tauhgh', 'swgnt'] for v in data.columns]) diff --git a/pvlib/tools.py b/pvlib/tools.py index eef80a3b37..716ebffd05 100644 --- a/pvlib/tools.py +++ b/pvlib/tools.py @@ -344,3 +344,48 @@ def _golden_sect_DataFrame(params, VL, VH, func): raise Exception("EXCEPTION:iterations exceeded maximum (50)") return func(df, 'V1'), df['V1'] + + +def _extract_metadata_from_dataset(ds): + """ + Generate a dictionary of metadata from an xarray dataset. + Parameters + ---------- + ds : dataset + dataset containing time series data. + Returns + ------- + metadata : dict + Dictionary containing metadata. + """ + metadata = {} + for v in list(ds.variables): + metadata[v] = { + 'name': ds[v].name, + 'long_name': ds[v].long_name} + if 'units' in ds[v].attrs: + metadata[v]['units'] = ds[v].units + metadata['dims'] = dict(ds.dims) + metadata.update(ds.attrs) # add arbitrary metadata + return metadata + + +def _convert_C_to_K_in_dataset(ds): + """ + Convert all variables in an xarray dataset that have the unit Kelvin to + degrees Celsius. + Parameters + ---------- + ds : dataset + dataset containing time series data. + Returns + ------- + ds : dataset + dataset where variables with temperature variables in Celsius + """ + for v in list(ds.variables): + if 'units' in ds[v].attrs: + if 'K' == ds[v].attrs['units']: + ds[v].data = ds[v].data - 273.15 + ds[v].attrs['units'] = 'C' + return ds diff --git a/setup.py b/setup.py index 216dc34a28..7f3cfaf55e 100755 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ EXTRAS_REQUIRE = { 'optional': ['cython', 'ephem', 'netcdf4', 'nrel-pysam', 'numba', 'pvfactors', 'siphon', 'statsmodels', 'tables', - 'cftime >= 1.1.1'], + 'cftime >= 1.1.1', 'xarray', 'dask'], 'doc': ['ipython', 'matplotlib', 'sphinx == 3.1.2', 'sphinx_rtd_theme==0.5.0', 'sphinx-gallery', 'docutils == 0.15.2', 'pillow', 'netcdf4', 'siphon', 'tables',