Skip to content

Commit 2093264

Browse files
Combine read_nsrdb_psm4 and parse_nsrdb_psm4 (#2445)
* psm4: combine parse_ and read_ into one function * lint * move metadata parsing out of context manager * fix see also * make and test reusable function in pvlib.tools * lint * Apply suggestions from code review Co-authored-by: Adam R. Jensen <[email protected]> --------- Co-authored-by: Adam R. Jensen <[email protected]>
1 parent cb8be87 commit 2093264

File tree

7 files changed

+121
-108
lines changed

7 files changed

+121
-108
lines changed

docs/sphinx/source/reference/iotools.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ of sources and file formats relevant to solar energy modeling.
3131
iotools.get_nsrdb_psm4_conus
3232
iotools.get_nsrdb_psm4_full_disc
3333
iotools.read_nsrdb_psm4
34-
iotools.parse_nsrdb_psm4
3534
iotools.get_psm3
3635
iotools.read_psm3
3736
iotools.parse_psm3

docs/sphinx/source/whatsnew/v0.12.1.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ Enhancements
2727
:py:func:`~pvlib.iotools.get_nsrdb_psm4_aggregated`,
2828
:py:func:`~pvlib.iotools.get_nsrdb_psm4_tmy`,
2929
:py:func:`~pvlib.iotools.get_nsrdb_psm4_conus`,
30-
:py:func:`~pvlib.iotools.get_nsrdb_psm4_full_disc`,
31-
:py:func:`~pvlib.iotools.read_nsrdb_psm4`, and
32-
:py:func:`~pvlib.iotools.parse_nsrdb_psm4`. (:issue:`2326`, :pull:`2378`)
30+
:py:func:`~pvlib.iotools.get_nsrdb_psm4_full_disc`, and
31+
:py:func:`~pvlib.iotools.read_nsrdb_psm4`. (:issue:`2326`, :pull:`2378`, :pull:`2445`)
3332
* :py:mod:`pvlib.bifacial.infinite_sheds` no longer emits "invalid value" warnings
3433
when supplying irradiance arrays with nighttime zero values. (:issue:`2450`, :pull:`2451`)
3534

pvlib/iotools/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from pvlib.iotools.psm4 import get_nsrdb_psm4_conus # noqa: F401
1717
from pvlib.iotools.psm4 import get_nsrdb_psm4_full_disc # noqa: F401
1818
from pvlib.iotools.psm4 import read_nsrdb_psm4 # noqa: F401
19-
from pvlib.iotools.psm4 import parse_nsrdb_psm4 # noqa: F401
2019
from pvlib.iotools.pvgis import get_pvgis_tmy, read_pvgis_tmy # noqa: F401
2120
from pvlib.iotools.pvgis import read_pvgis_hourly # noqa: F401
2221
from pvlib.iotools.pvgis import get_pvgis_hourly # noqa: F401

pvlib/iotools/psm4.py

Lines changed: 56 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import requests
1212
import pandas as pd
1313
from json import JSONDecodeError
14+
from pvlib import tools
1415

1516
NSRDB_API_BASE = "https://developer.nrel.gov/api/nsrdb/v2/solar/"
1617
PSM4_AGG_ENDPOINT = "nsrdb-GOES-aggregated-v4-0-0-download.csv"
@@ -82,7 +83,7 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email,
8283
Aggregated v4 API.
8384
8485
The NSRDB is described in [1]_ and the PSM4 NSRDB GOES Aggregated v4 API is
85-
described in [2]_,.
86+
described in [2]_.
8687
8788
Parameters
8889
----------
@@ -132,7 +133,7 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email,
132133
timeseries data from NREL PSM4
133134
metadata : dict
134135
metadata from NREL PSM4 about the record, see
135-
:func:`pvlib.iotools.parse_nsrdb_psm4` for fields
136+
:func:`pvlib.iotools.read_nsrdb_psm4` for fields
136137
137138
Raises
138139
------
@@ -151,19 +152,15 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email,
151152
result in rejected requests.
152153
153154
.. warning:: PSM4 is limited to data found in the NSRDB, please consult
154-
the references below for locations with available data. Additionally,
155-
querying data with < 30-minute resolution uses a different API endpoint
156-
with fewer available fields (see [4]_).
155+
the references below for locations with available data.
157156
158157
See Also
159158
--------
160159
pvlib.iotools.get_nsrdb_psm4_tmy, pvlib.iotools.get_nsrdb_psm4_conus,
161-
pvlib.iotools.get_nsrdb_psm4_full_disc, pvlib.iotools.read_nsrdb_psm4,
162-
pvlib.iotools.parse_nsrdb_psm4
160+
pvlib.iotools.get_nsrdb_psm4_full_disc, pvlib.iotools.read_nsrdb_psm4
163161
164162
References
165163
----------
166-
167164
.. [1] `NREL National Solar Radiation Database (NSRDB)
168165
<https://nsrdb.nrel.gov/>`_
169166
.. [2] `NSRDB GOES Aggregated V4.0.0
@@ -213,7 +210,7 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email,
213210
# the CSV is in the response content as a UTF-8 bytestring
214211
# to use pandas we need to create a file buffer from the response
215212
fbuf = io.StringIO(response.content.decode('utf-8'))
216-
return parse_nsrdb_psm4(fbuf, map_variables)
213+
return read_nsrdb_psm4(fbuf, map_variables)
217214

218215

219216
def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',
@@ -225,7 +222,7 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',
225222
TMY v4 API.
226223
227224
The NSRDB is described in [1]_ and the PSM4 NSRDB GOES TMY v4 API is
228-
described in [2]_,.
225+
described in [2]_.
229226
230227
Parameters
231228
----------
@@ -276,7 +273,7 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',
276273
timeseries data from NREL PSM4
277274
metadata : dict
278275
metadata from NREL PSM4 about the record, see
279-
:func:`pvlib.iotools.parse_nsrdb_psm4` for fields
276+
:func:`pvlib.iotools.read_nsrdb_psm4` for fields
280277
281278
Raises
282279
------
@@ -295,19 +292,16 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',
295292
result in rejected requests.
296293
297294
.. warning:: PSM4 is limited to data found in the NSRDB, please consult
298-
the references below for locations with available data. Additionally,
299-
querying data with < 30-minute resolution uses a different API endpoint
300-
with fewer available fields (see [4]_).
295+
the references below for locations with available data.
301296
302297
See Also
303298
--------
304299
pvlib.iotools.get_nsrdb_psm4_aggregated,
305300
pvlib.iotools.get_nsrdb_psm4_conus, pvlib.iotools.get_nsrdb_psm4_full_disc,
306-
pvlib.iotools.read_nsrdb_psm4,pvlib.iotools.parse_nsrdb_psm4
301+
pvlib.iotools.read_nsrdb_psm4
307302
308303
References
309304
----------
310-
311305
.. [1] `NREL National Solar Radiation Database (NSRDB)
312306
<https://nsrdb.nrel.gov/>`_
313307
.. [2] `NSRDB GOES Tmy V4.0.0
@@ -357,7 +351,7 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',
357351
# the CSV is in the response content as a UTF-8 bytestring
358352
# to use pandas we need to create a file buffer from the response
359353
fbuf = io.StringIO(response.content.decode('utf-8'))
360-
return parse_nsrdb_psm4(fbuf, map_variables)
354+
return read_nsrdb_psm4(fbuf, map_variables)
361355

362356

363357
def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023',
@@ -369,7 +363,7 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023',
369363
v4 API.
370364
371365
The NSRDB is described in [1]_ and the PSM4 NSRDB GOES CONUS v4 API is
372-
described in [2]_,.
366+
described in [2]_.
373367
374368
Parameters
375369
----------
@@ -418,7 +412,7 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023',
418412
timeseries data from NREL PSM4
419413
metadata : dict
420414
metadata from NREL PSM4 about the record, see
421-
:func:`pvlib.iotools.parse_nsrdb_psm4` for fields
415+
:func:`pvlib.iotools.read_nsrdb_psm4` for fields
422416
423417
Raises
424418
------
@@ -437,19 +431,16 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023',
437431
result in rejected requests.
438432
439433
.. warning:: PSM4 is limited to data found in the NSRDB, please consult
440-
the references below for locations with available data. Additionally,
441-
querying data with < 30-minute resolution uses a different API endpoint
442-
with fewer available fields (see [4]_).
434+
the references below for locations with available data.
443435
444436
See Also
445437
--------
446438
pvlib.iotools.get_nsrdb_psm4_aggregated,
447439
pvlib.iotools.get_nsrdb_psm4_tmy, pvlib.iotools.get_nsrdb_psm4_full_disc,
448-
pvlib.iotools.read_nsrdb_psm4, pvlib.iotools.parse_nsrdb_psm4
440+
pvlib.iotools.read_nsrdb_psm4
449441
450442
References
451443
----------
452-
453444
.. [1] `NREL National Solar Radiation Database (NSRDB)
454445
<https://nsrdb.nrel.gov/>`_
455446
.. [2] `NSRDB GOES Conus V4.0.0
@@ -499,7 +490,7 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023',
499490
# the CSV is in the response content as a UTF-8 bytestring
500491
# to use pandas we need to create a file buffer from the response
501492
fbuf = io.StringIO(response.content.decode('utf-8'))
502-
return parse_nsrdb_psm4(fbuf, map_variables)
493+
return read_nsrdb_psm4(fbuf, map_variables)
503494

504495

505496
def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email,
@@ -513,7 +504,7 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email,
513504
Disc v4 API.
514505
515506
The NSRDB is described in [1]_ and the PSM4 NSRDB GOES Full Disc v4 API is
516-
described in [2]_,.
507+
described in [2]_.
517508
518509
Parameters
519510
----------
@@ -563,7 +554,7 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email,
563554
timeseries data from NREL PSM4
564555
metadata : dict
565556
metadata from NREL PSM4 about the record, see
566-
:func:`pvlib.iotools.parse_nsrdb_psm4` for fields
557+
:func:`pvlib.iotools.read_nsrdb_psm4` for fields
567558
568559
Raises
569560
------
@@ -582,19 +573,16 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email,
582573
result in rejected requests.
583574
584575
.. warning:: PSM4 is limited to data found in the NSRDB, please consult
585-
the references below for locations with available data. Additionally,
586-
querying data with < 30-minute resolution uses a different API endpoint
587-
with fewer available fields (see [4]_).
576+
the references below for locations with available data.
588577
589578
See Also
590579
--------
591580
pvlib.iotools.get_nsrdb_psm4_aggregated,
592581
pvlib.iotools.get_nsrdb_psm4_tmy, pvlib.iotools.get_nsrdb_psm4_conus,
593-
pvlib.iotools.read_nsrdb_psm4, pvlib.iotools.parse_nsrdb_psm4
582+
pvlib.iotools.read_nsrdb_psm4
594583
595584
References
596585
----------
597-
598586
.. [1] `NREL National Solar Radiation Database (NSRDB)
599587
<https://nsrdb.nrel.gov/>`_
600588
.. [2] `NSRDB GOES Full Disc V4.0.0
@@ -644,19 +632,19 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email,
644632
# the CSV is in the response content as a UTF-8 bytestring
645633
# to use pandas we need to create a file buffer from the response
646634
fbuf = io.StringIO(response.content.decode('utf-8'))
647-
return parse_nsrdb_psm4(fbuf, map_variables)
635+
return read_nsrdb_psm4(fbuf, map_variables)
648636

649637

650-
def parse_nsrdb_psm4(fbuf, map_variables=True):
638+
def read_nsrdb_psm4(filename, map_variables=True):
651639
"""
652-
Parse an NSRDB PSM4 weather file (formatted as SAM CSV).
640+
Read an NSRDB PSM4 weather file (formatted as SAM CSV).
653641
654642
The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_.
655643
656644
Parameters
657645
----------
658-
fbuf: file-like object
659-
File-like object containing data to read.
646+
filename: str, path-like, or buffer
647+
Filename or in-memory buffer of a file containing data to read.
660648
map_variables: bool, default True
661649
When true, renames columns of the Dataframe to pvlib variable names
662650
where applicable. See variable :const:`VARIABLE_MAP`.
@@ -726,12 +714,19 @@ def parse_nsrdb_psm4(fbuf, map_variables=True):
726714
Examples
727715
--------
728716
>>> # Read a local PSM4 file:
717+
>>> df, metadata = iotools.read_nsrdb_psm4("data.csv") # doctest: +SKIP
718+
719+
>>> # Read a file object or an in-memory buffer:
729720
>>> with open(filename, 'r') as f: # doctest: +SKIP
730-
... df, metadata = iotools.parse_nsrdb_psm4(f) # doctest: +SKIP
721+
... df, metadata = iotools.read_nsrdb_psm4(f) # doctest: +SKIP
731722
732723
See Also
733724
--------
734-
pvlib.iotools.read_nsrdb_psm4, pvlib.iotools.get_psm4
725+
pvlib.iotools.get_nsrdb_psm4_aggregated
726+
pvlib.iotools.get_nsrdb_psm4_tmy
727+
pvlib.iotools.get_nsrdb_psm4_conus
728+
pvlib.iotools.get_nsrdb_psm4_full_disc
729+
pvlib.iotools.read_psm3
735730
736731
References
737732
----------
@@ -740,34 +735,36 @@ def parse_nsrdb_psm4(fbuf, map_variables=True):
740735
.. [2] `Standard Time Series Data File Format
741736
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
742737
"""
743-
# The first 2 lines of the response are headers with metadata
744-
metadata_fields = fbuf.readline().split(',')
745-
metadata_fields[-1] = metadata_fields[-1].strip() # strip trailing newline
746-
metadata_values = fbuf.readline().split(',')
747-
metadata_values[-1] = metadata_values[-1].strip() # strip trailing newline
738+
with tools._file_context_manager(filename) as fbuf:
739+
# The first 2 lines of the response are headers with metadata
740+
metadata_fields = fbuf.readline().split(',')
741+
metadata_values = fbuf.readline().split(',')
742+
# get the column names so we can set the dtypes
743+
columns = fbuf.readline().split(',')
744+
columns[-1] = columns[-1].strip() # strip trailing newline
745+
# Since the header has so many columns, excel saves blank cols in the
746+
# data below the header lines.
747+
columns = [col for col in columns if col != '']
748+
dtypes = dict.fromkeys(columns, float)
749+
dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int,
750+
'Minute': int, 'Cloud Type': int, 'Fill Flag': int})
751+
752+
data = pd.read_csv(
753+
fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
754+
delimiter=',', lineterminator='\n') # skip carriage returns \r
755+
756+
metadata_fields[-1] = metadata_fields[-1].strip() # trailing newline
757+
metadata_values[-1] = metadata_values[-1].strip() # trailing newline
748758
metadata = dict(zip(metadata_fields, metadata_values))
749759
# the response is all strings, so set some metadata types to numbers
750760
metadata['Local Time Zone'] = int(metadata['Local Time Zone'])
751761
metadata['Time Zone'] = int(metadata['Time Zone'])
752762
metadata['Latitude'] = float(metadata['Latitude'])
753763
metadata['Longitude'] = float(metadata['Longitude'])
754764
metadata['Elevation'] = int(metadata['Elevation'])
755-
# get the column names so we can set the dtypes
756-
columns = fbuf.readline().split(',')
757-
columns[-1] = columns[-1].strip() # strip trailing newline
758-
# Since the header has so many columns, excel saves blank cols in the
759-
# data below the header lines.
760-
columns = [col for col in columns if col != '']
761-
dtypes = dict.fromkeys(columns, float) # all floats except datevec
762-
dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int)
763-
dtypes['Cloud Type'] = int
764-
dtypes['Fill Flag'] = int
765-
data = pd.read_csv(
766-
fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
767-
delimiter=',', lineterminator='\n') # skip carriage returns \r
765+
768766
# the response 1st 5 columns are a date vector, convert to datetime
769-
dtidx = pd.to_datetime(
770-
data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
767+
dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
771768
# in USA all timezones are integers
772769
tz = 'Etc/GMT%+d' % -metadata['Time Zone']
773770
data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)
@@ -779,41 +776,3 @@ def parse_nsrdb_psm4(fbuf, map_variables=True):
779776
metadata['altitude'] = metadata.pop('Elevation')
780777

781778
return data, metadata
782-
783-
784-
def read_nsrdb_psm4(filename, map_variables=True):
785-
"""
786-
Read an NSRDB PSM4 weather file (formatted as SAM CSV).
787-
788-
The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_.
789-
790-
Parameters
791-
----------
792-
filename: str or path-like
793-
Filename of a file containing data to read.
794-
map_variables: bool, default True
795-
When true, renames columns of the Dataframe to pvlib variable names
796-
where applicable. See variable :const:`VARIABLE_MAP`.
797-
798-
Returns
799-
-------
800-
data : pandas.DataFrame
801-
timeseries data from NREL PSM4
802-
metadata : dict
803-
metadata from NREL PSM4 about the record, see
804-
:func:`pvlib.iotools.parse_nsrdb_psm4` for fields
805-
806-
See Also
807-
--------
808-
pvlib.iotools.parse_nsrdb_psm4, pvlib.iotools.get_psm4
809-
810-
References
811-
----------
812-
.. [1] `NREL National Solar Radiation Database (NSRDB)
813-
<https://nsrdb.nrel.gov/>`_
814-
.. [2] `Standard Time Series Data File Format
815-
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
816-
"""
817-
with open(str(filename), 'r') as fbuf:
818-
content = parse_nsrdb_psm4(fbuf, map_variables)
819-
return content

0 commit comments

Comments
 (0)