From f8a40048dbbabe6b3d690f5391ae5b46ed76c8fb Mon Sep 17 00:00:00 2001
From: Joe Karas <joseph.f.karas@gmail.com>
Date: Thu, 19 Oct 2023 12:45:07 -0600
Subject: [PATCH 1/2] Update to letid.py

Fixes bug when too-small values of injection cause errors in the defect calculation
---
 pvdeg/letid.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pvdeg/letid.py b/pvdeg/letid.py
index 6d86668e..0a58a8f0 100644
--- a/pvdeg/letid.py
+++ b/pvdeg/letid.py
@@ -864,6 +864,9 @@ def calc_injection_outdoors(results):
         (results.dc["i_sc"] - results.dc["i_mp"]) / (results.dc["i_sc"]) * (ee / 1000)
     )
 
+    # replace any too-small values with NaNs
+    injection = injection.mask(injection < 1e-5)
+
     return injection
 
 
@@ -1040,7 +1043,8 @@ def calc_letid_outdoors(
     timesteps.loc[0, "tau"] = tau_now(tau_0, tau_deg, nb_0)
 
     if generation_df is None:
-        generation_df = pd.read_excel(os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0
+        generation_df = pd.read_excel(
+            os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0
         )  # this is an optical generation profile generated by PVLighthouse's OPAL2 default model for 1-sun, normal incident AM1.5 sunlight on a 180-um thick SiNx-coated, pyramid-textured wafer.
         generation = generation_df["Generation (cm-3s-1)"]
         depth = generation_df["Depth (um)"]
@@ -1283,7 +1287,8 @@ def calc_letid_lab(
         print("can only define constant temp and injection for now")
 
     if generation_df is None:
-        generation_df = pd.read_excel(os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0
+        generation_df = pd.read_excel(
+            os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0
         )  # this is an optical generation profile generated by PVLighthouse's OPAL2 default model for 1-sun, normal incident AM1.5 sunlight on a 180-um thick SiNx-coated, pyramid-textured wafer.
         generation = generation_df["Generation (cm-3s-1)"]
         depth = generation_df["Depth (um)"]

From bcc47dddb99e1eda31fb4eb6766879f917abe0e3 Mon Sep 17 00:00:00 2001
From: Joe Karas <joseph.f.karas@gmail.com>
Date: Thu, 30 Nov 2023 11:44:25 -0700
Subject: [PATCH 2/2] Update weather.py

new functions for multiplying time-series weather data
---
 pvdeg/weather.py | 259 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 201 insertions(+), 58 deletions(-)

diff --git a/pvdeg/weather.py b/pvdeg/weather.py
index 865b0c88..77e2cdd4 100644
--- a/pvdeg/weather.py
+++ b/pvdeg/weather.py
@@ -8,11 +8,13 @@
 import pandas as pd
 from rex import NSRDBX, Outputs
 from pvdeg import humidity
+import datetime
 
 import h5py
 import dask.dataframe as dd
 import xarray as xr
 
+
 def get(database, id=None, geospatial=False, **kwargs):
     """
     Load weather data directly from  NSRDB or through any other PVLIB i/o
@@ -43,9 +45,7 @@ def get(database, id=None, geospatial=False, **kwargs):
         Dictionary of metadata for the weather data
     """
 
-
-    META_MAP = {'elevation' : 'altitude',
-                'Local Time Zone' : 'timezone'}
+    META_MAP = {'elevation': 'altitude', 'Local Time Zone': 'timezone'}
 
     if type(id) is tuple:
         location = id
@@ -58,17 +58,19 @@ def get(database, id=None, geospatial=False, **kwargs):
     elif id is None:
         if not geospatial:
             raise TypeError(
-                'Specify location via tuple (latitude, longitude), or gid integer.')
+                'Specify location via tuple (latitude, longitude), or gid integer.'
+            )
 
     if not geospatial:
-        #TODO: decide wether to follow NSRDB or pvlib conventions...
+        # TODO: decide wether to follow NSRDB or pvlib conventions...
         # e.g. temp_air vs. air_temperature
         # "map variables" will guarantee PVLIB conventions (automatic in coming update) which is "temp_air"
         if database == 'NSRDB':
             weather_df, meta = get_NSRDB(gid=gid, location=location, **kwargs)
         elif database == 'PVGIS':
-            weather_df, _, meta, _ = iotools.get_pvgis_tmy(latitude=lat, longitude=lon,
-                                                                map_variables=True, **kwargs)
+            weather_df, _, meta, _ = iotools.get_pvgis_tmy(
+                latitude=lat, longitude=lon, map_variables=True, **kwargs
+            )
             meta = meta['location']
         elif database == 'PSM3':
             weather_df, meta = iotools.get_psm3(latitude=lat, longitude=lon, **kwargs)
@@ -86,7 +88,7 @@ def get(database, id=None, geospatial=False, **kwargs):
         # map meta-names as needed
         for key in [*meta.keys()]:
             if key in META_MAP.keys():
-                    meta[META_MAP[key]] = meta.pop(key)
+                meta[META_MAP[key]] = meta.pop(key)
 
         return weather_df, meta
 
@@ -102,7 +104,6 @@ def get(database, id=None, geospatial=False, **kwargs):
         return weather_ds, meta_df
 
 
-
 def read(file_in, file_type, **kwargs):
     """
     Read a locally stored weather file of any PVLIB compatible type
@@ -118,17 +119,17 @@ def read(file_in, file_type, **kwargs):
         [psm3, tmy3, epw, h5]
     """
 
-    META_MAP = {'elevation' : 'altitude',
-            'Local Time Zone' : 'timezone'}
+    META_MAP = {'elevation': 'altitude', 'Local Time Zone': 'timezone'}
 
-    supported = ['psm3','tmy3','epw','h5']
+    supported = ['psm3', 'tmy3', 'epw', 'h5']
     file_type = file_type.upper()
 
-
-    if file_type in ['PSM3','PSM']:
+    if file_type in ['PSM3', 'PSM']:
         weather_df, meta = iotools.read_psm3(filename=file_in, map_variables=True)
-    elif file_type in ['TMY3','TMY']:
-        weather_df, meta = iotools.read_tmy3(filename=file_in) #map variable not worki - check pvlib for map_variables
+    elif file_type in ['TMY3', 'TMY']:
+        weather_df, meta = iotools.read_tmy3(
+            filename=file_in
+        )  # map variable not worki - check pvlib for map_variables
     elif file_type == 'EPW':
         weather_df, meta = iotools.read_epw(filename=file_in)
     elif file_type == 'H5':
@@ -136,14 +137,13 @@ def read(file_in, file_type, **kwargs):
     else:
         print(f'File-Type not recognized. supported types:\n{supported}')
 
-
     if not isinstance(meta, dict):
         meta = meta.to_dict()
 
     # map meta-names as needed
     for key in [*meta.keys()]:
         if key in META_MAP.keys():
-                meta[META_MAP[key]] = meta.pop(key)
+            meta[META_MAP[key]] = meta.pop(key)
 
     return weather_df, meta
 
@@ -173,20 +173,18 @@ def read_h5(gid, file, attributes=None, **_):
     if os.path.dirname(file):
         fp = file
     else:
-        fp = os.path.join(os.path.dirname(__file__),
-                          os.path.basename(file))
+        fp = os.path.join(os.path.dirname(__file__), os.path.basename(file))
     if os.path.dirname(file):
         fp = file
     else:
-        fp = os.path.join(os.path.dirname(__file__),
-                          os.path.basename(file))
+        fp = os.path.join(os.path.dirname(__file__), os.path.basename(file))
 
     with Outputs(fp, mode='r') as f:
         meta = f.meta.loc[gid]
         index = f.time_index
         dattr = f.attrs
 
-    #TODO: put into utilities
+    # TODO: put into utilities
     if attributes == None:
         attributes = list(dattr.keys())
         try:
@@ -202,6 +200,7 @@ def read_h5(gid, file, attributes=None, **_):
 
     return weather_df, meta.to_dict()
 
+
 def ini_h5_geospatial(fps):
     """
     initialize an h5 weather file that follows NSRDB conventions for
@@ -243,26 +242,36 @@ def ini_h5_geospatial(fps):
             coords = {'gid': meta_df.index.values, 'time': time_index}
             coords_len = {'time': time_index.shape[0], 'gid': meta_df.shape[0]}
 
-        ds = xr.open_dataset(fp,
-                            engine='h5netcdf',
-                            phony_dims='sort',
-                            chunks={'phony_dim_0':chunks[0], 'phony_dim_1':chunks[1]},
-                            drop_variables=['time_index', 'meta'],
-                            mask_and_scale=False,
-                            decode_cf=True)
+        ds = xr.open_dataset(
+            fp,
+            engine='h5netcdf',
+            phony_dims='sort',
+            chunks={'phony_dim_0': chunks[0], 'phony_dim_1': chunks[1]},
+            drop_variables=['time_index', 'meta'],
+            mask_and_scale=False,
+            decode_cf=True,
+        )
 
         for var in ds.data_vars:
-            if hasattr(getattr(ds, var),'psm_scale_factor'):
-                scale_factor = 1/ds[var].psm_scale_factor
-                getattr(ds,var).attrs['scale_factor'] = scale_factor
-
-        if tuple(coords_len.values()) == (ds.dims['phony_dim_0'], ds.dims['phony_dim_1']):
-            rename = {'phony_dim_0':'time', 'phony_dim_1':'gid'}
-        elif tuple(coords_len.values()) == (ds.dims['phony_dim_1'], ds.dims['phony_dim_0']):
-            rename = {'phony_dim_0':'gid', 'phony_dim_1':'time'}
+            if hasattr(getattr(ds, var), 'psm_scale_factor'):
+                scale_factor = 1 / ds[var].psm_scale_factor
+                getattr(ds, var).attrs['scale_factor'] = scale_factor
+
+        if tuple(coords_len.values()) == (
+            ds.dims['phony_dim_0'],
+            ds.dims['phony_dim_1'],
+        ):
+            rename = {'phony_dim_0': 'time', 'phony_dim_1': 'gid'}
+        elif tuple(coords_len.values()) == (
+            ds.dims['phony_dim_1'],
+            ds.dims['phony_dim_0'],
+        ):
+            rename = {'phony_dim_0': 'gid', 'phony_dim_1': 'time'}
         else:
             raise ValueError('Dimensions do not match')
-        ds = ds.rename({'phony_dim_0':rename['phony_dim_0'], 'phony_dim_1':rename['phony_dim_1']})
+        ds = ds.rename(
+            {'phony_dim_0': rename['phony_dim_0'], 'phony_dim_1': rename['phony_dim_1']}
+        )
         ds = ds.assign_coords(coords)
 
         # TODO: In case re-chunking becomes necessary
@@ -282,7 +291,7 @@ def ini_h5_geospatial(fps):
     return weather_ds, meta_df
 
 
-def get_NSRDB_fnames(satellite, names, NREL_HPC = False, **_):
+def get_NSRDB_fnames(satellite, names, NREL_HPC=False, **_):
     """
     Get a list of NSRDB files for a given satellite and year
 
@@ -307,12 +316,14 @@ def get_NSRDB_fnames(satellite, names, NREL_HPC = False, **_):
         If False, use h5py to access NSRDB files
     """
 
-    sat_map = {'GOES' : 'full_disc',
-               'METEOSAT' : 'meteosat',
-               'Himawari' : 'himawari',
-               'SUNY' : 'india',
-               'CONUS' : 'conus',
-               'Americas' : 'current'}
+    sat_map = {
+        'GOES': 'full_disc',
+        'METEOSAT': 'meteosat',
+        'Himawari': 'himawari',
+        'SUNY': 'india',
+        'CONUS': 'conus',
+        'Americas': 'current',
+    }
 
     if NREL_HPC:
         hpc_fp = '/kfs2/pdatasets/NSRDB/'
@@ -322,22 +333,34 @@ def get_NSRDB_fnames(satellite, names, NREL_HPC = False, **_):
         hsds = True
 
     if type(names) in [int, float]:
-        nsrdb_fp = os.path.join(hpc_fp, sat_map[satellite], '*_{}.h5'.format(int(names)))
+        nsrdb_fp = os.path.join(
+            hpc_fp, sat_map[satellite], '*_{}.h5'.format(int(names))
+        )
         nsrdb_fnames = glob.glob(nsrdb_fp)
     else:
-        nsrdb_fp = os.path.join(hpc_fp, sat_map[satellite], '*_{}*.h5'.format(names.lower()))
+        nsrdb_fp = os.path.join(
+            hpc_fp, sat_map[satellite], '*_{}*.h5'.format(names.lower())
+        )
         nsrdb_fnames = glob.glob(nsrdb_fp)
 
-
     if len(nsrdb_fnames) == 0:
         raise FileNotFoundError(
-            "Couldn't find NSRDB input files! \nSearched for: '{}'".format(nsrdb_fp))
-
+            "Couldn't find NSRDB input files! \nSearched for: '{}'".format(nsrdb_fp)
+        )
 
     return nsrdb_fnames, hsds
 
 
-def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=False, attributes=None, **_):
+def get_NSRDB(
+    satellite,
+    names,
+    NREL_HPC,
+    gid=None,
+    location=None,
+    geospatial=False,
+    attributes=None,
+    **_,
+):
     """
     Get NSRDB weather data from different satellites and years.
     Get NSRDB weather data from different satellites and years.
@@ -368,10 +391,9 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa
         Dictionary of metadata for the weather data
     """
 
-    DSET_MAP = {'air_temperature' : 'temp_air',
-                'Relative Humidity' : 'relative_humidity'}
+    DSET_MAP = {'air_temperature': 'temp_air', 'Relative Humidity': 'relative_humidity'}
 
-    META_MAP = {'elevation' : 'altitude'}
+    META_MAP = {'elevation': 'altitude'}
 
     if not geospatial:
         nsrdb_fnames, hsds = get_NSRDB_fnames(satellite, names, NREL_HPC)
@@ -380,7 +402,7 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa
         for i, file in enumerate(nsrdb_fnames):
             with NSRDBX(file, hsds=hsds) as f:
                 if i == 0:
-                    if gid == None: #TODO: add exception handling
+                    if gid == None:  # TODO: add exception handling
                         gid = f.lat_lon_gid(location)
                     meta = f['meta', gid].iloc[0]
                     index = f.time_index
@@ -400,7 +422,6 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa
         weather_df = pd.DataFrame(index=index)
 
         for dset in attributes:
-
             # switch dset names to pvlib standard
             if dset in [*DSET_MAP.keys()]:
                 column_name = DSET_MAP[dset]
@@ -422,7 +443,6 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa
         return weather_df, meta.to_dict()
 
     elif geospatial:
-
         nsrdb_fnames, hsds = get_NSRDB_fnames(satellite, names, NREL_HPC)
         weather_ds, meta_df = ini_h5_geospatial(nsrdb_fnames)
 
@@ -439,3 +459,126 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa
 
         return weather_ds, meta_df
 
+
+def repeat_annual_time_series(time_series, start_year, n_years):
+    """
+    Repeat a pandas time series dataframe containing annual data.
+    For example, repeat TMY data by n_years, adding in leap days as necessary.
+    For now, this function requires 1 or more full years of uniform
+    interval (non-leap year) data, i.e. length must be a multiple of 8760.
+    On leap days, all data is set to 0.
+
+    TODO: make it possible to have weirder time series, e.g. non uniform intervals.
+    Include option for synthetic leap day data
+
+    Parameters:
+    -----------
+    time_series : (pd.DataFrame)
+        pandas dataframe with DatetimeIndex
+
+    time_series : (int)
+        desired starting year of time_series
+
+    n_years : (int)
+        number of years to repeat time_series
+
+    Returns:
+    --------
+    new_time_series : (pd.DataFrame)
+        pandas dataframe repeated n_years
+    """
+
+    if len(time_series) % 8760 != 0:
+        raise ValueError('Length of time_series must be a multiple of 8760')
+
+    tz = time_series.index.tz
+    time_series = time_series.tz_localize(
+        None
+    )  # timezone aware timeseries can cause problems, we'll make it tz-naive for now
+
+    time_series.index = time_series.index.map(lambda dt: dt.replace(year=start_year))
+
+    start = time_series.index[0]
+
+    for year in range(start_year, start_year + n_years):
+        if year == start_year:
+            if is_leap_year(year):
+                this_year = time_series.copy()
+                this_year.index = time_series.index.map(
+                    lambda dt: dt.replace(year=year)
+                )
+                this_year = pd.concat(
+                    [
+                        this_year[: str(year) + "-02-28"],
+                        pd.DataFrame(
+                            0,
+                            index=pd.date_range(
+                                start=datetime.datetime(
+                                    year=year, month=2, day=29, minute=start.minute
+                                ),
+                                end=datetime.datetime(year=year, month=3, day=1),
+                                freq='H',
+                            ),
+                            columns=time_series.columns,
+                        ),
+                        this_year[str(year) + "-03-01" :],
+                    ]
+                )
+                new_time_series = this_year
+
+            else:
+                this_year = time_series.copy()
+                this_year.index = time_series.index.map(
+                    lambda dt: dt.replace(year=year)
+                )
+                new_time_series = this_year
+
+        else:
+            if is_leap_year(year):
+                this_year = time_series.copy()
+                this_year.index = time_series.index.map(
+                    lambda dt: dt.replace(year=year)
+                )
+                this_year = pd.concat(
+                    [
+                        this_year[: str(year) + "-02-28"],
+                        pd.DataFrame(
+                            0,
+                            index=pd.date_range(
+                                start=datetime.datetime(
+                                    year=year, month=2, day=29, minute=start.minute
+                                ),
+                                end=datetime.datetime(year=year, month=3, day=1),
+                                freq='H',
+                            ),
+                            columns=time_series.columns,
+                        ),
+                        this_year[str(year) + "-03-01" :],
+                    ]
+                )
+                new_time_series = pd.concat([new_time_series, this_year])
+
+            else:
+                this_year = time_series.copy()
+                this_year.index = time_series.index.map(
+                    lambda dt: dt.replace(year=year)
+                )
+                new_time_series = pd.concat([new_time_series, this_year])
+
+    new_time_series.index = new_time_series.index.tz_localize(
+        tz=tz
+    )  # add back in the timezone
+
+    return new_time_series
+
+
+def is_leap_year(year):
+    '''Returns True if year is a leap year'''
+    if year % 4 != 0:
+        return False
+    elif year % 100 != 0:
+        return True
+    elif year % 400 != 0:
+        return False
+    else:
+        return True