From f8a40048dbbabe6b3d690f5391ae5b46ed76c8fb Mon Sep 17 00:00:00 2001 From: Joe Karas Date: Thu, 19 Oct 2023 12:45:07 -0600 Subject: [PATCH 1/2] Update to letid.py Fixes bug when too-small values of injection cause errors in the defect calculation --- pvdeg/letid.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pvdeg/letid.py b/pvdeg/letid.py index 6d86668e..0a58a8f0 100644 --- a/pvdeg/letid.py +++ b/pvdeg/letid.py @@ -864,6 +864,9 @@ def calc_injection_outdoors(results): (results.dc["i_sc"] - results.dc["i_mp"]) / (results.dc["i_sc"]) * (ee / 1000) ) + # replace any too-small values with NaNs + injection = injection.mask(injection < 1e-5) + return injection @@ -1040,7 +1043,8 @@ def calc_letid_outdoors( timesteps.loc[0, "tau"] = tau_now(tau_0, tau_deg, nb_0) if generation_df is None: - generation_df = pd.read_excel(os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0 + generation_df = pd.read_excel( + os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0 ) # this is an optical generation profile generated by PVLighthouse's OPAL2 default model for 1-sun, normal incident AM1.5 sunlight on a 180-um thick SiNx-coated, pyramid-textured wafer. generation = generation_df["Generation (cm-3s-1)"] depth = generation_df["Depth (um)"] @@ -1283,7 +1287,8 @@ def calc_letid_lab( print("can only define constant temp and injection for now") if generation_df is None: - generation_df = pd.read_excel(os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0 + generation_df = pd.read_excel( + os.path.join(DATA_DIR, "PVL_GenProfile.xlsx"), header=0 ) # this is an optical generation profile generated by PVLighthouse's OPAL2 default model for 1-sun, normal incident AM1.5 sunlight on a 180-um thick SiNx-coated, pyramid-textured wafer. generation = generation_df["Generation (cm-3s-1)"] depth = generation_df["Depth (um)"] From bcc47dddb99e1eda31fb4eb6766879f917abe0e3 Mon Sep 17 00:00:00 2001 From: Joe Karas Date: Thu, 30 Nov 2023 11:44:25 -0700 Subject: [PATCH 2/2] Update weather.py new functions for multiplying time-series weather data --- pvdeg/weather.py | 259 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 201 insertions(+), 58 deletions(-) diff --git a/pvdeg/weather.py b/pvdeg/weather.py index 865b0c88..77e2cdd4 100644 --- a/pvdeg/weather.py +++ b/pvdeg/weather.py @@ -8,11 +8,13 @@ import pandas as pd from rex import NSRDBX, Outputs from pvdeg import humidity +import datetime import h5py import dask.dataframe as dd import xarray as xr + def get(database, id=None, geospatial=False, **kwargs): """ Load weather data directly from NSRDB or through any other PVLIB i/o @@ -43,9 +45,7 @@ def get(database, id=None, geospatial=False, **kwargs): Dictionary of metadata for the weather data """ - - META_MAP = {'elevation' : 'altitude', - 'Local Time Zone' : 'timezone'} + META_MAP = {'elevation': 'altitude', 'Local Time Zone': 'timezone'} if type(id) is tuple: location = id @@ -58,17 +58,19 @@ def get(database, id=None, geospatial=False, **kwargs): elif id is None: if not geospatial: raise TypeError( - 'Specify location via tuple (latitude, longitude), or gid integer.') + 'Specify location via tuple (latitude, longitude), or gid integer.' + ) if not geospatial: - #TODO: decide wether to follow NSRDB or pvlib conventions... + # TODO: decide wether to follow NSRDB or pvlib conventions... # e.g. temp_air vs. air_temperature # "map variables" will guarantee PVLIB conventions (automatic in coming update) which is "temp_air" if database == 'NSRDB': weather_df, meta = get_NSRDB(gid=gid, location=location, **kwargs) elif database == 'PVGIS': - weather_df, _, meta, _ = iotools.get_pvgis_tmy(latitude=lat, longitude=lon, - map_variables=True, **kwargs) + weather_df, _, meta, _ = iotools.get_pvgis_tmy( + latitude=lat, longitude=lon, map_variables=True, **kwargs + ) meta = meta['location'] elif database == 'PSM3': weather_df, meta = iotools.get_psm3(latitude=lat, longitude=lon, **kwargs) @@ -86,7 +88,7 @@ def get(database, id=None, geospatial=False, **kwargs): # map meta-names as needed for key in [*meta.keys()]: if key in META_MAP.keys(): - meta[META_MAP[key]] = meta.pop(key) + meta[META_MAP[key]] = meta.pop(key) return weather_df, meta @@ -102,7 +104,6 @@ def get(database, id=None, geospatial=False, **kwargs): return weather_ds, meta_df - def read(file_in, file_type, **kwargs): """ Read a locally stored weather file of any PVLIB compatible type @@ -118,17 +119,17 @@ def read(file_in, file_type, **kwargs): [psm3, tmy3, epw, h5] """ - META_MAP = {'elevation' : 'altitude', - 'Local Time Zone' : 'timezone'} + META_MAP = {'elevation': 'altitude', 'Local Time Zone': 'timezone'} - supported = ['psm3','tmy3','epw','h5'] + supported = ['psm3', 'tmy3', 'epw', 'h5'] file_type = file_type.upper() - - if file_type in ['PSM3','PSM']: + if file_type in ['PSM3', 'PSM']: weather_df, meta = iotools.read_psm3(filename=file_in, map_variables=True) - elif file_type in ['TMY3','TMY']: - weather_df, meta = iotools.read_tmy3(filename=file_in) #map variable not worki - check pvlib for map_variables + elif file_type in ['TMY3', 'TMY']: + weather_df, meta = iotools.read_tmy3( + filename=file_in + ) # map variable not worki - check pvlib for map_variables elif file_type == 'EPW': weather_df, meta = iotools.read_epw(filename=file_in) elif file_type == 'H5': @@ -136,14 +137,13 @@ def read(file_in, file_type, **kwargs): else: print(f'File-Type not recognized. supported types:\n{supported}') - if not isinstance(meta, dict): meta = meta.to_dict() # map meta-names as needed for key in [*meta.keys()]: if key in META_MAP.keys(): - meta[META_MAP[key]] = meta.pop(key) + meta[META_MAP[key]] = meta.pop(key) return weather_df, meta @@ -173,20 +173,18 @@ def read_h5(gid, file, attributes=None, **_): if os.path.dirname(file): fp = file else: - fp = os.path.join(os.path.dirname(__file__), - os.path.basename(file)) + fp = os.path.join(os.path.dirname(__file__), os.path.basename(file)) if os.path.dirname(file): fp = file else: - fp = os.path.join(os.path.dirname(__file__), - os.path.basename(file)) + fp = os.path.join(os.path.dirname(__file__), os.path.basename(file)) with Outputs(fp, mode='r') as f: meta = f.meta.loc[gid] index = f.time_index dattr = f.attrs - #TODO: put into utilities + # TODO: put into utilities if attributes == None: attributes = list(dattr.keys()) try: @@ -202,6 +200,7 @@ def read_h5(gid, file, attributes=None, **_): return weather_df, meta.to_dict() + def ini_h5_geospatial(fps): """ initialize an h5 weather file that follows NSRDB conventions for @@ -243,26 +242,36 @@ def ini_h5_geospatial(fps): coords = {'gid': meta_df.index.values, 'time': time_index} coords_len = {'time': time_index.shape[0], 'gid': meta_df.shape[0]} - ds = xr.open_dataset(fp, - engine='h5netcdf', - phony_dims='sort', - chunks={'phony_dim_0':chunks[0], 'phony_dim_1':chunks[1]}, - drop_variables=['time_index', 'meta'], - mask_and_scale=False, - decode_cf=True) + ds = xr.open_dataset( + fp, + engine='h5netcdf', + phony_dims='sort', + chunks={'phony_dim_0': chunks[0], 'phony_dim_1': chunks[1]}, + drop_variables=['time_index', 'meta'], + mask_and_scale=False, + decode_cf=True, + ) for var in ds.data_vars: - if hasattr(getattr(ds, var),'psm_scale_factor'): - scale_factor = 1/ds[var].psm_scale_factor - getattr(ds,var).attrs['scale_factor'] = scale_factor - - if tuple(coords_len.values()) == (ds.dims['phony_dim_0'], ds.dims['phony_dim_1']): - rename = {'phony_dim_0':'time', 'phony_dim_1':'gid'} - elif tuple(coords_len.values()) == (ds.dims['phony_dim_1'], ds.dims['phony_dim_0']): - rename = {'phony_dim_0':'gid', 'phony_dim_1':'time'} + if hasattr(getattr(ds, var), 'psm_scale_factor'): + scale_factor = 1 / ds[var].psm_scale_factor + getattr(ds, var).attrs['scale_factor'] = scale_factor + + if tuple(coords_len.values()) == ( + ds.dims['phony_dim_0'], + ds.dims['phony_dim_1'], + ): + rename = {'phony_dim_0': 'time', 'phony_dim_1': 'gid'} + elif tuple(coords_len.values()) == ( + ds.dims['phony_dim_1'], + ds.dims['phony_dim_0'], + ): + rename = {'phony_dim_0': 'gid', 'phony_dim_1': 'time'} else: raise ValueError('Dimensions do not match') - ds = ds.rename({'phony_dim_0':rename['phony_dim_0'], 'phony_dim_1':rename['phony_dim_1']}) + ds = ds.rename( + {'phony_dim_0': rename['phony_dim_0'], 'phony_dim_1': rename['phony_dim_1']} + ) ds = ds.assign_coords(coords) # TODO: In case re-chunking becomes necessary @@ -282,7 +291,7 @@ def ini_h5_geospatial(fps): return weather_ds, meta_df -def get_NSRDB_fnames(satellite, names, NREL_HPC = False, **_): +def get_NSRDB_fnames(satellite, names, NREL_HPC=False, **_): """ Get a list of NSRDB files for a given satellite and year @@ -307,12 +316,14 @@ def get_NSRDB_fnames(satellite, names, NREL_HPC = False, **_): If False, use h5py to access NSRDB files """ - sat_map = {'GOES' : 'full_disc', - 'METEOSAT' : 'meteosat', - 'Himawari' : 'himawari', - 'SUNY' : 'india', - 'CONUS' : 'conus', - 'Americas' : 'current'} + sat_map = { + 'GOES': 'full_disc', + 'METEOSAT': 'meteosat', + 'Himawari': 'himawari', + 'SUNY': 'india', + 'CONUS': 'conus', + 'Americas': 'current', + } if NREL_HPC: hpc_fp = '/kfs2/pdatasets/NSRDB/' @@ -322,22 +333,34 @@ def get_NSRDB_fnames(satellite, names, NREL_HPC = False, **_): hsds = True if type(names) in [int, float]: - nsrdb_fp = os.path.join(hpc_fp, sat_map[satellite], '*_{}.h5'.format(int(names))) + nsrdb_fp = os.path.join( + hpc_fp, sat_map[satellite], '*_{}.h5'.format(int(names)) + ) nsrdb_fnames = glob.glob(nsrdb_fp) else: - nsrdb_fp = os.path.join(hpc_fp, sat_map[satellite], '*_{}*.h5'.format(names.lower())) + nsrdb_fp = os.path.join( + hpc_fp, sat_map[satellite], '*_{}*.h5'.format(names.lower()) + ) nsrdb_fnames = glob.glob(nsrdb_fp) - if len(nsrdb_fnames) == 0: raise FileNotFoundError( - "Couldn't find NSRDB input files! \nSearched for: '{}'".format(nsrdb_fp)) - + "Couldn't find NSRDB input files! \nSearched for: '{}'".format(nsrdb_fp) + ) return nsrdb_fnames, hsds -def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=False, attributes=None, **_): +def get_NSRDB( + satellite, + names, + NREL_HPC, + gid=None, + location=None, + geospatial=False, + attributes=None, + **_, +): """ Get NSRDB weather data from different satellites and years. Get NSRDB weather data from different satellites and years. @@ -368,10 +391,9 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa Dictionary of metadata for the weather data """ - DSET_MAP = {'air_temperature' : 'temp_air', - 'Relative Humidity' : 'relative_humidity'} + DSET_MAP = {'air_temperature': 'temp_air', 'Relative Humidity': 'relative_humidity'} - META_MAP = {'elevation' : 'altitude'} + META_MAP = {'elevation': 'altitude'} if not geospatial: nsrdb_fnames, hsds = get_NSRDB_fnames(satellite, names, NREL_HPC) @@ -380,7 +402,7 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa for i, file in enumerate(nsrdb_fnames): with NSRDBX(file, hsds=hsds) as f: if i == 0: - if gid == None: #TODO: add exception handling + if gid == None: # TODO: add exception handling gid = f.lat_lon_gid(location) meta = f['meta', gid].iloc[0] index = f.time_index @@ -400,7 +422,6 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa weather_df = pd.DataFrame(index=index) for dset in attributes: - # switch dset names to pvlib standard if dset in [*DSET_MAP.keys()]: column_name = DSET_MAP[dset] @@ -422,7 +443,6 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa return weather_df, meta.to_dict() elif geospatial: - nsrdb_fnames, hsds = get_NSRDB_fnames(satellite, names, NREL_HPC) weather_ds, meta_df = ini_h5_geospatial(nsrdb_fnames) @@ -439,3 +459,126 @@ def get_NSRDB(satellite, names, NREL_HPC, gid=None, location=None, geospatial=Fa return weather_ds, meta_df + +def repeat_annual_time_series(time_series, start_year, n_years): + """ + Repeat a pandas time series dataframe containing annual data. + For example, repeat TMY data by n_years, adding in leap days as necessary. + For now, this function requires 1 or more full years of uniform + interval (non-leap year) data, i.e. length must be a multiple of 8760. + On leap days, all data is set to 0. + + TODO: make it possible to have weirder time series, e.g. non uniform intervals. + Include option for synthetic leap day data + + Parameters: + ----------- + time_series : (pd.DataFrame) + pandas dataframe with DatetimeIndex + + time_series : (int) + desired starting year of time_series + + n_years : (int) + number of years to repeat time_series + + Returns: + -------- + new_time_series : (pd.DataFrame) + pandas dataframe repeated n_years + """ + + if len(time_series) % 8760 != 0: + raise ValueError('Length of time_series must be a multiple of 8760') + + tz = time_series.index.tz + time_series = time_series.tz_localize( + None + ) # timezone aware timeseries can cause problems, we'll make it tz-naive for now + + time_series.index = time_series.index.map(lambda dt: dt.replace(year=start_year)) + + start = time_series.index[0] + + for year in range(start_year, start_year + n_years): + if year == start_year: + if is_leap_year(year): + this_year = time_series.copy() + this_year.index = time_series.index.map( + lambda dt: dt.replace(year=year) + ) + this_year = pd.concat( + [ + this_year[: str(year) + "-02-28"], + pd.DataFrame( + 0, + index=pd.date_range( + start=datetime.datetime( + year=year, month=2, day=29, minute=start.minute + ), + end=datetime.datetime(year=year, month=3, day=1), + freq='H', + ), + columns=time_series.columns, + ), + this_year[str(year) + "-03-01" :], + ] + ) + new_time_series = this_year + + else: + this_year = time_series.copy() + this_year.index = time_series.index.map( + lambda dt: dt.replace(year=year) + ) + new_time_series = this_year + + else: + if is_leap_year(year): + this_year = time_series.copy() + this_year.index = time_series.index.map( + lambda dt: dt.replace(year=year) + ) + this_year = pd.concat( + [ + this_year[: str(year) + "-02-28"], + pd.DataFrame( + 0, + index=pd.date_range( + start=datetime.datetime( + year=year, month=2, day=29, minute=start.minute + ), + end=datetime.datetime(year=year, month=3, day=1), + freq='H', + ), + columns=time_series.columns, + ), + this_year[str(year) + "-03-01" :], + ] + ) + new_time_series = pd.concat([new_time_series, this_year]) + + else: + this_year = time_series.copy() + this_year.index = time_series.index.map( + lambda dt: dt.replace(year=year) + ) + new_time_series = pd.concat([new_time_series, this_year]) + + new_time_series.index = new_time_series.index.tz_localize( + tz=tz + ) # add back in the timezone + + return new_time_series + + +def is_leap_year(year): + '''Returns True if year is a leap year''' + if year % 4 != 0: + return False + elif year % 100 != 0: + return True + elif year % 400 != 0: + return False + else: + return True