Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1881ef1
first try and saving Sun data
peterdudfield Sep 28, 2021
6c66457
Merge branch 'bug/154-crs-warning' into issue/52-sun-azimuth-elevation
peterdudfield Sep 28, 2021
768af24
add test sun data
peterdudfield Sep 28, 2021
ff8280a
fix tests apart from datamodule ones
peterdudfield Sep 28, 2021
91e0466
Merge commit 'aa702078eedbe8e41c04b89ddbf44fb88845884f' into issue/52…
peterdudfield Sep 28, 2021
a8f78ae
add Sun Data Source to DataModule
peterdudfield Sep 29, 2021
7fb04f7
update sun data to save x,y centers (OSGB) not lats and lons
peterdudfield Sep 29, 2021
a721ac5
update sun test data
peterdudfield Sep 29, 2021
ece93c8
Make sure sun data source works for other years than just 2019
peterdudfield Sep 29, 2021
cb92a3f
Merge commit '5a80d7e5b2eb47cf2f654e100c84cc4ff16d87aa' into issue/52…
peterdudfield Sep 29, 2021
5aff271
add function strings
peterdudfield Sep 29, 2021
4b950d8
tidy
peterdudfield Sep 29, 2021
4eb8dd1
tidy
peterdudfield Sep 29, 2021
ff00e7b
Update nowcasting_dataset/data_sources/sun/raw_data_load_save.py
peterdudfield Sep 29, 2021
1082fcb
PR comments
peterdudfield Sep 29, 2021
4ffe644
Merge branch 'issue/52-sun-azimuth-elevation' of github.com:openclima…
peterdudfield Sep 29, 2021
7ba47b8
Update scripts/get_raw_sun_data.py
peterdudfield Sep 30, 2021
f730d52
PR comments
peterdudfield Sep 30, 2021
49b4211
tidy from PR comment
peterdudfield Sep 30, 2021
ac3c5a2
fix
peterdudfield Sep 30, 2021
890934a
Merge branch 'main' into issue/52-sun-azimuth-elevation
peterdudfield Sep 30, 2021
fe5adf2
pylint
peterdudfield Sep 30, 2021
bb105be
Merge branch 'main' into issue/52-sun-azimuth-elevation
peterdudfield Sep 30, 2021
58212bc
Merge branch 'main' into issue/52-sun-azimuth-elevation
peterdudfield Sep 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions notebooks/2021-09/2021-09-28/get_raw_sun_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
############
# Look into the differences from year to year in elevation and azimuthal direction

# Looked from 2018-2020, for January, April, July and October,
# Found the different from year to year was less than 1 degree

############

import logging

logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.WARNING)

from datetime import datetime
from pathlib import Path
import pandas as pd
import numpy as np
import os
import nowcasting_dataset
from nowcasting_dataset.data_sources.gsp.eso import get_gsp_metadata_from_eso
from nowcasting_dataset.data_sources.sun.raw_data_load_save import (
save_to_zarr,
get_azimuth_and_elevation,
)

# set up
BUCKET = Path("solar-pv-nowcasting-data")
PV_PATH = BUCKET / "PV/PVOutput.org"
PV_METADATA_FILENAME = PV_PATH / "UK_PV_metadata.csv"

# set up variables
local_path = os.path.dirname(nowcasting_dataset.__file__) + "/.."
metadata_filename = f"gs://{PV_METADATA_FILENAME}"

# PV metadata
pv_metadata = pd.read_csv(metadata_filename, index_col="system_id")
pv_metadata = pv_metadata.dropna(subset=["longitude", "latitude"])
pv_longitudes = pv_metadata["longitude"]
pv_latitudes = pv_metadata["latitude"]

# GSP Metadata
gsp_metadata = get_gsp_metadata_from_eso()
gsp_metadata = gsp_metadata.dropna(subset=["centroid_lon", "centroid_lat"])
# probably need to change this to centroid
gsp_lon = gsp_metadata["centroid_lon"]
gsp_lat = gsp_metadata["centroid_lat"]

# join all sites together
longitudes = list(pv_longitudes.values) + list(gsp_lon.values)
latitudes = list(pv_latitudes.values) + list(gsp_lat.values)

# make d
start_dt = datetime.fromisoformat("2019-01-01 00:00:00.000+00:00")
end_dt = datetime.fromisoformat("2019-01-02 00:00:00.000+00:00")

azimuths = {}
azimuths_sin = {}
azimuths_cos = {}
elevations = {}
months = [1, 4, 7, 10]
years = [2018, 2019, 2020]
for month in months:
for year in years:
print(year)
print(month)
start_dt = start_dt.replace(year=year, month=month)
end_dt = end_dt.replace(year=year, month=month)
datestamps = pd.date_range(start=start_dt, end=end_dt, freq="5T")

azimuth, elevation = get_azimuth_and_elevation(
longitudes=longitudes, latitudes=latitudes, datestamps=datestamps
)

azimuths[f"{year}_{month}"] = azimuth
azimuths_sin[f"{year}_{month}"] = np.sin(np.deg2rad(azimuth))
azimuths_cos[f"{year}_{month}"] = np.cos(np.deg2rad(azimuth))
elevations[f"{year}_{month}"] = elevation

m_azimuths = []
m_azimuths_sin = []
m_azimuths_cos = []
m_elevations = []
for month in months:
for year in years[1:]:
print(year)
print(month)

m_azimuths.append(
(np.abs(azimuths[f"{year}_{month}"].values - azimuths[f"2018_{month}"].values)).max()
)
m_azimuths_sin.append(
(
np.abs(
azimuths_sin[f"{year}_{month}"].values - azimuths_sin[f"2018_{month}"].values
)
).max()
)
m_azimuths_cos.append(
(
np.abs(
azimuths_cos[f"{year}_{month}"].values - azimuths_cos[f"2018_{month}"].values
)
).max()
)
m_elevations.append(
(
np.abs(elevations[f"{year}_{month}"].values - elevations[f"2018_{month}"].values)
).max()
)


# for small radians, sin(x) ~ x, so sin(x)*180/pi ~ degrees
m_azimuths = np.array(m_azimuths_sin) * 180 / np.pi
# m_azimuths = np.array(m_azimuths_cos) * 180 / np.pi

print(f"Maximum azimuth difference is {max(m_azimuths)} degree")
print(f"Maximum elevation difference is {max(m_elevations)} degree")

# largest different in both azimuth and elevation < 1 degree --> Happy to use one yea data
5 changes: 5 additions & 0 deletions nowcasting_dataset/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ class InputData(BaseModel):
description="Path to the GeoTIFF Topographic data source",
)

sun_zarr_path: str = Field(
"gs://solar-pv-nowcasting-data/Sun/v0/sun.zarr/",
description="Path to the Sun data source i.e Azimuth and Elevation",
)


class OutputData(BaseModel):
filepath: str = Field(
Expand Down
6 changes: 4 additions & 2 deletions nowcasting_dataset/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
PV_SYSTEM_ROW_NUMBER = "pv_system_row_number"
PV_SYSTEM_X_COORDS = "pv_system_x_coords"
PV_SYSTEM_Y_COORDS = "pv_system_y_coords"
PV_AZIMUTH_ANGLE = "pv_azimuth_angle"
PV_ELEVATION_ANGLE = "pv_elevation_angle"

SUN_AZIMUTH_ANGLE = "sun_azimuth_angle"
SUN_ELEVATION_ANGLE = "sun_elevation_angle"

PV_YIELD = "pv_yield"
PV_DATETIME_INDEX = "pv_datetime_index"
DEFAULT_N_PV_SYSTEMS_PER_EXAMPLE = 128
Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/data_sources/gsp/gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
DEFAULT_N_GSP_PER_EXAMPLE,
OBJECT_AT_CENTER,
)
import time

logger = logging.getLogger(__name__)

Expand Down
105 changes: 4 additions & 101 deletions nowcasting_dataset/data_sources/pv_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
PV_SYSTEM_ROW_NUMBER,
PV_SYSTEM_X_COORDS,
PV_SYSTEM_Y_COORDS,
PV_AZIMUTH_ANGLE,
PV_ELEVATION_ANGLE,
PV_YIELD,
DEFAULT_N_PV_SYSTEMS_PER_EXAMPLE,
OBJECT_AT_CENTER,
Expand All @@ -17,7 +15,6 @@
import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from numbers import Number
from typing import List, Tuple, Union, Optional
import datetime
Expand All @@ -27,8 +24,6 @@
import xarray as xr
import functools
import logging
import time
from concurrent import futures

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -56,8 +51,6 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
def load(self):
self._load_metadata()
self._load_pv_power()
if self.load_azimuth_and_elevation:
self._calculate_azimuth_and_elevation()
self.pv_metadata, self.pv_power = align_pv_system_ids(self.pv_metadata, self.pv_power)

def _load_metadata(self):
Expand Down Expand Up @@ -123,18 +116,10 @@ def _get_time_slice(self, t0_dt: pd.Timestamp) -> [pd.DataFrame]:
del t0_dt # t0 is not used in the rest of this method!
selected_pv_power = self.pv_power.loc[start_dt:end_dt].dropna(axis="columns", how="any")

if self.load_azimuth_and_elevation:
selected_pv_azimuth_angle = self.pv_azimuth.loc[start_dt:end_dt].dropna(
axis="columns", how="any"
)
selected_pv_elevation_angle = self.pv_elevation.loc[start_dt:end_dt].dropna(
axis="columns", how="any"
)
else:
selected_pv_azimuth_angle = None
selected_pv_elevation_angle = None
selected_pv_azimuth_angle = None
selected_pv_elevation_angle = None

return selected_pv_power, selected_pv_azimuth_angle, selected_pv_elevation_angle
return selected_pv_power

def _get_central_pv_system_id(
self,
Expand Down Expand Up @@ -208,11 +193,7 @@ def get_example(

logger.debug("Getting PV example data")

(
selected_pv_power,
selected_pv_azimuth_angle,
selected_pv_elevation_angle,
) = self._get_time_slice(t0_dt)
selected_pv_power = self._get_time_slice(t0_dt)
all_pv_system_ids = self._get_all_pv_system_ids_in_roi(
x_meters_center, y_meters_center, selected_pv_power.columns
)
Expand All @@ -229,9 +210,6 @@ def get_example(
all_pv_system_ids = all_pv_system_ids[: self.n_pv_systems_per_example]

selected_pv_power = selected_pv_power[all_pv_system_ids]
if self.load_azimuth_and_elevation:
selected_pv_azimuth_angle = selected_pv_azimuth_angle[all_pv_system_ids]
selected_pv_elevation_angle = selected_pv_elevation_angle[all_pv_system_ids]

pv_system_row_number = np.flatnonzero(self.pv_metadata.index.isin(all_pv_system_ids))
pv_system_x_coords = self.pv_metadata.location_x[all_pv_system_ids]
Expand All @@ -249,10 +227,6 @@ def get_example(
pv_datetime_index=selected_pv_power.index,
)

if self.load_azimuth_and_elevation:
example[PV_AZIMUTH_ANGLE] = selected_pv_azimuth_angle
example[PV_ELEVATION_ANGLE] = selected_pv_elevation_angle

if self.get_center:
example[OBJECT_AT_CENTER] = "pv"

Expand All @@ -267,9 +241,6 @@ def get_example(
]

pad_nans_variables = [PV_YIELD]
if self.load_azimuth_and_elevation:
pad_nans_variables.append(PV_AZIMUTH_ANGLE)
pad_nans_variables.append(PV_ELEVATION_ANGLE)

example = utils.pad_data(
data=example,
Expand Down Expand Up @@ -319,74 +290,6 @@ def datetime_index(self) -> pd.DatetimeIndex:
"""Returns a complete list of all available datetimes."""
return self.pv_power.index

def _calculate_azimuth_and_elevation(self):
"""
Calculate the azimuth and elevation angles for each datestamp, for each pv system.
"""

logger.debug("Calculating azimuth and elevation angles")

self.pv_azimuth, self.pv_elevation = calculate_azimuth_and_elevation_all_pv_systems(
self.datetime_index().to_pydatetime(), self.pv_metadata
)


def calculate_azimuth_and_elevation_all_pv_systems(
datestamps: List[datetime.datetime], pv_metadata: pd.DataFrame
) -> (pd.Series, pd.Series):
"""
Calculate the azimuth and elevation angles for each datestamp, for each pv system.
"""

logger.debug(
f"Will be calculating for {len(datestamps)} datestamps and {len(pv_metadata)} pv systems"
)

# create array of index datetime, columns of system_id for both azimuth and elevation
pv_azimuth = []
pv_elevation = []

t = time.time()
# loop over all metadata and fine azimuth and elevation angles,
# not sure this is the best method to use, as currently this step takes ~2 minute for 745 pv systems,
# and 235 datestamps (~100,000 point). But this only needs to be done once.
with futures.ThreadPoolExecutor(max_workers=len(pv_metadata)) as executor:

logger.debug("Setting up jobs")

# Submit tasks to the executor.
future_azimuth_and_elevation_per_pv_system = []
for i in tqdm(range(len(pv_metadata))):
future_azimuth_and_elevation = executor.submit(
geospatial.calculate_azimuth_and_elevation_angle,
latitude=pv_metadata.iloc[i].latitude,
longitude=pv_metadata.iloc[i].longitude,
datestamps=datestamps,
)
future_azimuth_and_elevation_per_pv_system.append(
[future_azimuth_and_elevation, pv_metadata.iloc[i].name]
)

logger.debug(f"Getting results")

# Collect results from each thread.
for i in tqdm(range(len(future_azimuth_and_elevation_per_pv_system))):
future_azimuth_and_elevation, name = future_azimuth_and_elevation_per_pv_system[i]
azimuth_and_elevation = future_azimuth_and_elevation.result()

azimuth = azimuth_and_elevation.loc[:, "azimuth"].rename(name)
elevation = azimuth_and_elevation.loc[:, "elevation"].rename(name)

pv_azimuth.append(azimuth)
pv_elevation.append(elevation)

pv_azimuth = pd.concat(pv_azimuth, axis=1)
pv_elevation = pd.concat(pv_elevation, axis=1)

logger.debug(f"Calculated Azimuth and Elevation angles in {time.time() - t} seconds")

return pv_azimuth, pv_elevation


def load_solar_pv_data_from_gcs(
filename: Union[str, Path],
Expand Down
Empty file.
Loading