Deprecate parse_psm3 and parse_cams (#2458)

kandersolar · echedey-ls · web-flow · commit f33be832ec46 · 2025-05-22T21:54:02.000-04:00
* deprecate parse_psm3

* deprecate parse_cams

* whatsnew

* lint

* Update docs/sphinx/source/whatsnew/v0.12.1.rst

Co-authored-by: Echedey Luis &lt;80125792+echedey-ls@users.noreply.github.com&gt;

* lint

---------

Co-authored-by: Echedey Luis &lt;80125792+echedey-ls@users.noreply.github.com&gt;
diff --git a/docs/sphinx/source/whatsnew/v0.12.1.rst b/docs/sphinx/source/whatsnew/v0.12.1.rst
@@ -8,6 +8,15 @@ Breaking Changes
 ~~~~~~~~~~~~~~~~
 
 
+Deprecations
+~~~~~~~~~~~~
+* The following ``parse_`` functions in :py:mod:`pvlib.iotools` are deprecated,
+  with the corresponding ``read_`` functions taking their place: (:issue:`2444`, :pull:`2458`)
+
+  - :py:func:`~pvlib.iotools.parse_psm3`
+  - :py:func:`~pvlib.iotools.parse_cams`
+
+
 Bug fixes
 ~~~~~~~~~
 * :py:func:`pvlib.iotools.get_pvgis_tmy` now returns the correct dtypes when
diff --git a/pvlib/iotools/psm3.py b/pvlib/iotools/psm3.py
@@ -7,8 +7,8 @@
 import requests
 import pandas as pd
 from json import JSONDecodeError
-import warnings
-from pvlib._deprecation import pvlibDeprecationWarning
+from pvlib._deprecation import deprecated
+from pvlib import tools
 
 NSRDB_API_BASE = "https://developer.nrel.gov"
 PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-2-2-download.csv"
@@ -127,7 +127,7 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
         timeseries data from NREL PSM3
     metadata : dict
         metadata from NREL PSM3 about the record, see
-        :func:`pvlib.iotools.parse_psm3` for fields
+        :func:`pvlib.iotools.read_psm3` for fields
 
     Raises
     ------
@@ -152,7 +152,7 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
 
     See Also
     --------
-    pvlib.iotools.read_psm3, pvlib.iotools.parse_psm3
+    pvlib.iotools.read_psm3
 
     References
     ----------
@@ -216,12 +216,12 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
     # the CSV is in the response content as a UTF-8 bytestring
     # to use pandas we need to create a file buffer from the response
     fbuf = io.StringIO(response.content.decode('utf-8'))
-    return parse_psm3(fbuf, map_variables)
+    return read_psm3(fbuf, map_variables)
 
 
-def parse_psm3(fbuf, map_variables=True):
+def read_psm3(filename, map_variables=True):
     """
-    Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
+    Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
     is described in [1]_ and the SAM CSV format is described in [2]_.
 
     .. versionchanged:: 0.9.0
@@ -231,8 +231,8 @@ def parse_psm3(fbuf, map_variables=True):
 
     Parameters
     ----------
-    fbuf: file-like object
-        File-like object containing data to read.
+    filename: str, path-like, or buffer
+        Filename or in-memory buffer of a file containing data to read.
     map_variables: bool, default True
         When true, renames columns of the Dataframe to pvlib variable names
         where applicable. See variable :const:`VARIABLE_MAP`.
@@ -302,12 +302,15 @@ def parse_psm3(fbuf, map_variables=True):
     Examples
     --------
     >>> # Read a local PSM3 file:
+    >>> df, metadata = iotools.read_psm3("data.csv")  # doctest: +SKIP
+
+    >>> # Read a file object or an in-memory buffer:
     >>> with open(filename, 'r') as f:  # doctest: +SKIP
-    ...     df, metadata = iotools.parse_psm3(f)  # doctest: +SKIP
+    ...     df, metadata = iotools.read_psm3(f)  # doctest: +SKIP
 
     See Also
     --------
-    pvlib.iotools.read_psm3, pvlib.iotools.get_psm3
+    pvlib.iotools.get_psm3
 
     References
     ----------
@@ -316,34 +319,35 @@ def parse_psm3(fbuf, map_variables=True):
     .. [2] `Standard Time Series Data File Format
        <https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
     """
-    # The first 2 lines of the response are headers with metadata
-    metadata_fields = fbuf.readline().split(',')
-    metadata_fields[-1] = metadata_fields[-1].strip()  # strip trailing newline
-    metadata_values = fbuf.readline().split(',')
-    metadata_values[-1] = metadata_values[-1].strip()  # strip trailing newline
+    with tools._file_context_manager(filename) as fbuf:
+        # The first 2 lines of the response are headers with metadata
+        metadata_fields = fbuf.readline().split(',')
+        metadata_values = fbuf.readline().split(',')
+        # get the column names so we can set the dtypes
+        columns = fbuf.readline().split(',')
+        columns[-1] = columns[-1].strip()  # strip trailing newline
+        # Since the header has so many columns, excel saves blank cols in the
+        # data below the header lines.
+        columns = [col for col in columns if col != '']
+        dtypes = dict.fromkeys(columns, float)  # all floats except datevec
+        dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int,
+                       'Minute': int, 'Cloud Type': int, 'Fill Flag': int})
+        data = pd.read_csv(
+            fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
+            delimiter=',', lineterminator='\n')  # skip carriage returns \r
+
+    metadata_fields[-1] = metadata_fields[-1].strip()  # trailing newline
+    metadata_values[-1] = metadata_values[-1].strip()  # trailing newline
     metadata = dict(zip(metadata_fields, metadata_values))
     # the response is all strings, so set some metadata types to numbers
     metadata['Local Time Zone'] = int(metadata['Local Time Zone'])
     metadata['Time Zone'] = int(metadata['Time Zone'])
     metadata['Latitude'] = float(metadata['Latitude'])
     metadata['Longitude'] = float(metadata['Longitude'])
     metadata['Elevation'] = int(metadata['Elevation'])
-    # get the column names so we can set the dtypes
-    columns = fbuf.readline().split(',')
-    columns[-1] = columns[-1].strip()  # strip trailing newline
-    # Since the header has so many columns, excel saves blank cols in the
-    # data below the header lines.
-    columns = [col for col in columns if col != '']
-    dtypes = dict.fromkeys(columns, float)  # all floats except datevec
-    dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int)
-    dtypes['Cloud Type'] = int
-    dtypes['Fill Flag'] = int
-    data = pd.read_csv(
-        fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
-        delimiter=',', lineterminator='\n')  # skip carriage returns \r
+
     # the response 1st 5 columns are a date vector, convert to datetime
-    dtidx = pd.to_datetime(
-        data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
+    dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
     # in USA all timezones are integers
     tz = 'Etc/GMT%+d' % -metadata['Time Zone']
     data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)
@@ -357,43 +361,5 @@ def parse_psm3(fbuf, map_variables=True):
     return data, metadata
 
 
-def read_psm3(filename, map_variables=True):
-    """
-    Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
-    is described in [1]_ and the SAM CSV format is described in [2]_.
-
-    .. versionchanged:: 0.9.0
-       The function now returns a tuple where the first element is a dataframe
-       and the second element is a dictionary containing metadata. Previous
-       versions of this function had the return values switched.
-
-    Parameters
-    ----------
-    filename: str
-        Filename of a file containing data to read.
-    map_variables: bool, default True
-        When true, renames columns of the Dataframe to pvlib variable names
-        where applicable. See variable :const:`VARIABLE_MAP`.
-
-    Returns
-    -------
-    data : pandas.DataFrame
-        timeseries data from NREL PSM3
-    metadata : dict
-        metadata from NREL PSM3 about the record, see
-        :func:`pvlib.iotools.parse_psm3` for fields
-
-    See Also
-    --------
-    pvlib.iotools.parse_psm3, pvlib.iotools.get_psm3
-
-    References
-    ----------
-    .. [1] `NREL National Solar Radiation Database (NSRDB)
-       <https://nsrdb.nrel.gov/>`_
-    .. [2] `Standard Time Series Data File Format
-       <https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
-    """
-    with open(str(filename), 'r') as fbuf:
-        content = parse_psm3(fbuf, map_variables)
-    return content
+parse_psm3 = deprecated(since="0.12.1", name="parse_psm3",
+                        alternative="read_psm3")(read_psm3)
diff --git a/pvlib/iotools/sodapro.py b/pvlib/iotools/sodapro.py
@@ -7,7 +7,9 @@
 import requests
 import io
 import warnings
+from pvlib import tools
 
+from pvlib._deprecation import deprecated
 
 URL = 'api.soda-solardata.com'
 
@@ -151,7 +153,7 @@ def get_cams(latitude, longitude, start, end, email, identifier='mcclear',
 
     See Also
     --------
-    pvlib.iotools.read_cams, pvlib.iotools.parse_cams
+    pvlib.iotools.read_cams
 
     Raises
     ------
@@ -239,20 +241,22 @@ def get_cams(latitude, longitude, start, end, email, identifier='mcclear',
     # Successful requests returns a csv data file
     else:
         fbuf = io.StringIO(res.content.decode('utf-8'))
-        data, metadata = parse_cams(fbuf, integrated=integrated, label=label,
-                                    map_variables=map_variables)
+        data, metadata = read_cams(fbuf, integrated=integrated, label=label,
+                                   map_variables=map_variables)
         return data, metadata
 
 
-def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
+def read_cams(filename, integrated=False, label=None, map_variables=True):
     """
-    Parse a file-like buffer with data in the format of a CAMS Radiation or
-    McClear file. The CAMS solar radiation services are described in [1]_.
+    Read a file or file-like buffer with data in the format of a CAMS
+    Radiation or McClear file.
+
+    The CAMS solar radiation services are described in [1]_.
 
     Parameters
     ----------
-    fbuf: file-like object
-        File-like object containing data to read.
+    filename: str, path-like, or buffer
+        Filename or in-memory buffer of a file containing data to read.
     integrated: boolean, default False
         Whether to return radiation parameters as integrated values (Wh/m^2)
         or as average irradiance values (W/m^2) (pvlib preferred units)
@@ -272,23 +276,31 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
 
     See Also
     --------
-    pvlib.iotools.read_cams, pvlib.iotools.get_cams
+    pvlib.iotools.get_cams
 
     References
     ----------
     .. [1] `CAMS solar radiation time-series documentation. Climate Data Store.
        <https://ads.atmosphere.copernicus.eu/datasets/cams-solar-radiation-timeseries>`_
     """
     metadata = {}
-    # Initial lines starting with # contain metadata
-    while True:
-        line = fbuf.readline().rstrip('\n')
-        if line.startswith('# Observation period'):
-            # The last line of the metadata section contains the column names
-            names = line.lstrip('# ').split(';')
-            break  # End of metadata section has been reached
-        elif ': ' in line:
-            metadata[line.split(': ')[0].lstrip('# ')] = line.split(': ')[1]
+
+    with tools._file_context_manager(filename) as fbuf:
+
+        # Initial lines starting with # contain metadata
+        while True:
+            line = fbuf.readline().rstrip('\n')
+            if line.startswith('# Observation period'):
+                # The last line of the metadata section has the column names
+                names = line.lstrip('# ').split(';')
+                break  # End of metadata section has been reached
+            elif ': ' in line:
+                key = line.split(': ')[0].lstrip('# ')
+                value = line.split(': ')[1]
+                metadata[key] = value
+
+        data = pd.read_csv(fbuf, sep=';', comment='#', header=None,
+                           names=names)
 
     # Convert latitude, longitude, and altitude values from strings to floats
     for k_old in list(metadata.keys()):
@@ -304,8 +316,6 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
         metadata['Summarization (integration) period']]
     metadata['time_step'] = time_step
 
-    data = pd.read_csv(fbuf, sep=';', comment='#', header=None, names=names)
-
     obs_period = data['Observation period'].str.split('/')
 
     # Set index as the start observation time (left) and localize to UTC
@@ -344,43 +354,5 @@ def parse_cams(fbuf, integrated=False, label=None, map_variables=True):
     return data, metadata
 
 
-def read_cams(filename, integrated=False, label=None, map_variables=True):
-    """
-    Read a CAMS Radiation or McClear file into a pandas DataFrame.
-
-    CAMS Radiation and McClear are described in [1]_.
-
-    Parameters
-    ----------
-    filename: str
-        Filename of a file containing data to read.
-    integrated: boolean, default False
-        Whether to return radiation parameters as integrated values (Wh/m^2)
-        or as average irradiance values (W/m^2) (pvlib preferred units)
-    label : {'right', 'left}, optional
-        Which bin edge label to label time-step with. The default is 'left' for
-        all time steps except for '1M' which has a default of 'right'.
-    map_variables: bool, default: True
-        When true, renames columns of the Dataframe to pvlib variable names
-        where applicable. See variable :const:`VARIABLE_MAP`.
-
-    Returns
-    -------
-    data: pandas.DataFrame
-        Timeseries data from CAMS Radiation or McClear.
-        See :func:`pvlib.iotools.get_cams` for fields.
-    metadata: dict
-        Metadata available in the file.
-
-    See Also
-    --------
-    pvlib.iotools.parse_cams, pvlib.iotools.get_cams
-
-    References
-    ----------
-    .. [1] `CAMS solar radiation time-series documentation. Climate Data Store.
-       <https://ads.atmosphere.copernicus.eu/datasets/cams-solar-radiation-timeseries>`_
-    """
-    with open(str(filename), 'r') as fbuf:
-        content = parse_cams(fbuf, integrated, label, map_variables)
-    return content
+parse_cams = deprecated(since="0.12.1", name="parse_cams",
+                        alternative="read_cams")(read_cams)
diff --git a/tests/iotools/test_psm3.py b/tests/iotools/test_psm3.py
@@ -16,6 +16,8 @@
 from requests import HTTPError
 from io import StringIO
 
+from pvlib._deprecation import pvlibDeprecationWarning
+
 
 TMY_TEST_DATA = TESTS_DATA_DIR / 'test_psm3_tmy-2017.csv'
 YEAR_TEST_DATA = TESTS_DATA_DIR / 'test_psm3_2017.csv'
@@ -130,7 +132,7 @@ def test_get_psm3_tmy_errors(
 
 @pytest.fixture
 def io_input(request):
-    """file-like object for parse_psm3"""
+    """file-like object for read_psm3"""
     with MANUAL_TEST_DATA.open() as f:
         data = f.read()
     obj = StringIO(data)
@@ -139,7 +141,8 @@ def io_input(request):
 
 def test_parse_psm3(io_input):
     """test parse_psm3"""
-    data, metadata = psm3.parse_psm3(io_input, map_variables=False)
+    with pytest.warns(pvlibDeprecationWarning, match='Use read_psm3 instead'):
+        data, metadata = psm3.parse_psm3(io_input, map_variables=False)
     expected = pd.read_csv(YEAR_TEST_DATA)
     assert_psm3_equal(data, metadata, expected)
 
@@ -151,6 +154,12 @@ def test_read_psm3():
     assert_psm3_equal(data, metadata, expected)
 
 
+def test_read_psm3_buffer(io_input):
+    data, metadata = psm3.read_psm3(io_input, map_variables=False)
+    expected = pd.read_csv(YEAR_TEST_DATA)
+    assert_psm3_equal(data, metadata, expected)
+
+
 def test_read_psm3_map_variables():
     """test read_psm3 map_variables=True"""
     data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True)
diff --git a/tests/iotools/test_sodapro.py b/tests/iotools/test_sodapro.py