Skip to content

Commit 33045d2

Browse files
authored
Have pvlib.iotools.read_solrad return metadata (#1968)
* Remove junk * Add function * Add tests * Update v0.10.4.rst * Remove double testfile * Change elevation to altitude * use "with open" from code review
1 parent 09cf445 commit 33045d2

File tree

3 files changed

+64
-31
lines changed

3 files changed

+64
-31
lines changed

docs/sphinx/source/whatsnew/v0.10.4.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ v0.10.4 (Anticipated March, 2024)
88
Enhancements
99
~~~~~~~~~~~~
1010
* Added the Huld PV model used by PVGIS (:pull:`1940`)
11-
11+
* Added metadata parsing to :py:func:`~pvlib.iotools.read_solrad` to follow the standard iotools
12+
convention of returning a tuple of (data, meta). Previously the function only returned a dataframe. (:pull:`1968`)
1213

1314
Bug fixes
1415
~~~~~~~~~

pvlib/iotools/solrad.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
"""Functions to read data from the NOAA SOLRAD network.
2-
"""
1+
"""Functions to read data from the NOAA SOLRAD network."""
32

4-
import numpy as np
53
import pandas as pd
4+
import requests
5+
import io
66

77
# pvlib conventions
88
BASE_HEADERS = (
@@ -49,8 +49,15 @@
4949

5050
def read_solrad(filename):
5151
"""
52-
Read NOAA SOLRAD fixed-width file into pandas dataframe. The SOLRAD
53-
network is described in [1]_ and [2]_.
52+
Read NOAA SOLRAD fixed-width file into pandas dataframe.
53+
54+
The SOLRAD network is described in [1]_ and [2]_.
55+
56+
.. versionchanged:: 0.10.4
57+
The function now returns a tuple where the first element is a dataframe
58+
and the second element is a dictionary containing metadata. Previous
59+
versions of this function only returned a dataframe.
60+
5461
5562
Parameters
5663
----------
@@ -62,6 +69,8 @@ def read_solrad(filename):
6269
data: Dataframe
6370
A dataframe with DatetimeIndex and all of the variables in the
6471
file.
72+
metadata : dict
73+
Metadata.
6574
6675
Notes
6776
-----
@@ -91,19 +100,29 @@ def read_solrad(filename):
91100
widths = WIDTHS
92101
dtypes = DTYPES
93102

103+
meta = {}
104+
105+
if str(filename).startswith('ftp') or str(filename).startswith('http'):
106+
response = requests.get(filename)
107+
file_buffer = io.StringIO(response.content.decode())
108+
else:
109+
with open(str(filename), 'r') as file_buffer:
110+
file_buffer = io.StringIO(file_buffer.read())
111+
112+
# The first line has the name of the station, and the second gives the
113+
# station's latitude, longitude, elevation above mean sea level in meters,
114+
# and the displacement in hours from local standard time.
115+
meta['station_name'] = file_buffer.readline().strip()
116+
117+
meta_line = file_buffer.readline().split()
118+
meta['latitude'] = float(meta_line[0])
119+
meta['longitude'] = float(meta_line[1])
120+
meta['altitude'] = float(meta_line[2])
121+
meta['TZ'] = int(meta_line[3])
122+
94123
# read in data
95-
data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
96-
widths=widths, na_values=-9999.9)
97-
98-
# loop here because dtype kwarg not supported in read_fwf until 0.20
99-
for (col, _dtype) in zip(data.columns, dtypes):
100-
ser = data[col].astype(_dtype)
101-
if _dtype == 'float64':
102-
# older verions of pandas/numpy read '-9999.9' as
103-
# -9999.8999999999996 and fail to set nan in read_fwf,
104-
# so manually set nan
105-
ser = ser.where(ser > -9999, other=np.nan)
106-
data[col] = ser
124+
data = pd.read_fwf(file_buffer, header=None, names=names,
125+
widths=widths, na_values=-9999.9, dtypes=dtypes)
107126

108127
# set index
109128
# columns do not have leading 0s, so must zfill(2) to comply
@@ -114,10 +133,5 @@ def read_solrad(filename):
114133
data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
115134
dts['minute'], format='%Y%m%d%H%M', utc=True)
116135
data = data.set_index(dtindex)
117-
try:
118-
# to_datetime(utc=True) does not work in older versions of pandas
119-
data = data.tz_localize('UTC')
120-
except TypeError:
121-
pass
122136

123-
return data
137+
return data, meta

pvlib/tests/iotools/test_solrad.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
import pytest
66

77
from pvlib.iotools import solrad
8-
from ..conftest import DATA_DIR, assert_frame_equal
8+
from ..conftest import DATA_DIR, assert_frame_equal, RERUNS, RERUNS_DELAY
99

1010

1111
testfile = DATA_DIR / 'abq19056.dat'
1212
testfile_mad = DATA_DIR / 'msn19056.dat'
13-
13+
https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/msn/'
14+
'2019/msn19056.dat')
1415

1516
columns = [
1617
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
@@ -87,15 +88,32 @@
8788
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
8889
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
8990
'float64', 'float64']
91+
meta = {'station_name': 'Albuquerque', 'latitude': 35.03796,
92+
'longitude': -106.62211, 'altitude': 1617, 'TZ': -7}
93+
meta_mad = {'station_name': 'Madison', 'latitude': 43.07250,
94+
'longitude': -89.41133, 'altitude': 271, 'TZ': -6}
9095

9196

92-
@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
93-
(testfile, index, columns, values, dtypes),
94-
(testfile_mad, index, columns_mad, values_mad, dtypes_mad)
97+
@pytest.mark.parametrize('testfile,index,columns,values,dtypes,meta', [
98+
(testfile, index, columns, values, dtypes, meta),
99+
(testfile_mad, index, columns_mad, values_mad, dtypes_mad, meta_mad)
95100
])
96-
def test_read_solrad(testfile, index, columns, values, dtypes):
101+
def test_read_solrad(testfile, index, columns, values, dtypes, meta):
97102
expected = pd.DataFrame(values, columns=columns, index=index)
98103
for (col, _dtype) in zip(expected.columns, dtypes):
99104
expected[col] = expected[col].astype(_dtype)
100-
out = solrad.read_solrad(testfile)
105+
out, m = solrad.read_solrad(testfile)
101106
assert_frame_equal(out, expected)
107+
assert m == meta
108+
109+
110+
@pytest.mark.remote_data
111+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
112+
def test_read_solrad_https():
113+
# Test reading of https files.
114+
# If this test begins failing, SOLRAD's data structure or data
115+
# archive may have changed.
116+
local_data, _ = solrad.read_solrad(testfile_mad)
117+
remote_data, _ = solrad.read_solrad(https_testfile)
118+
# local file only contains four rows to save space
119+
assert_frame_equal(local_data, remote_data.iloc[:4])

0 commit comments

Comments
 (0)