diff --git a/src/score_hv/__init__.py b/src/score_hv/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/score_hv/harvesters/__init__.py b/src/score_hv/harvesters/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/score_hv/harvesters/daily_bfg.py b/src/score_hv/harvesters/daily_bfg.py index c3130b2..09c1e8e 100755 --- a/src/score_hv/harvesters/daily_bfg.py +++ b/src/score_hv/harvesters/daily_bfg.py @@ -23,9 +23,9 @@ statistics but are in development and are currently not fully supported. """ VALID_VARIABLES = (#'icetk', # sea ice thickness (m) - #'lhtfl_ave', # surface latent heat flux (W m^-2) + 'lhtfl_ave', # surface latent heat flux (W m^-2) #'prate_ave', # surface precip rate (mm weq. s^-1) - 'prateb_ave', # bucket surface precip rate (mm weq. s^-1) + #'prateb_ave', # bucket surface precip rate (mm weq. s^-1) #'pressfc', # surface pressure (Pa) #'snod', # surface snow depth (m) #'soil4', # liquid soil moisture at layer-4 (?) @@ -183,7 +183,7 @@ def get_data(self): """ if statistic == 'mean': value = expected_value - + elif statistic == 'variance': value = -expected_value**2 + np.ma.sum( temporal_means**2 * @@ -192,10 +192,10 @@ def get_data(self): elif statistic == 'maximum': value = np.ma.max(temporal_means) - + print(value) elif statistic == 'minimum': value = np.ma.min(temporal_means) - + print(value) harvested_data.append(HarvestedData( self.config.harvest_filenames, statistic, @@ -207,4 +207,4 @@ def get_data(self): gridcell_area_data.close() xr_dataset.close() - return harvested_data \ No newline at end of file + return harvested_data diff --git a/tests/data/bfg_1994010100_fhr09_lhtfl_control.nc b/tests/data/bfg_1994010100_fhr09_lhtfl_control.nc new file mode 100644 index 0000000..d6a8509 Binary files /dev/null and b/tests/data/bfg_1994010100_fhr09_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010100_fhr09_prateb_control.nc b/tests/data/bfg_1994010100_fhr09_prateb_control.nc deleted file mode 100644 index 2ada765..0000000 Binary files a/tests/data/bfg_1994010100_fhr09_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010106_fhr06_lhtfl_control.nc b/tests/data/bfg_1994010106_fhr06_lhtfl_control.nc new file mode 100644 index 0000000..7090002 Binary files /dev/null and b/tests/data/bfg_1994010106_fhr06_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010106_fhr06_prateb_control.nc b/tests/data/bfg_1994010106_fhr06_prateb_control.nc deleted file mode 100644 index 03edc23..0000000 Binary files a/tests/data/bfg_1994010106_fhr06_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010106_fhr09_lhtfl_control.nc b/tests/data/bfg_1994010106_fhr09_lhtfl_control.nc new file mode 100644 index 0000000..b6712fe Binary files /dev/null and b/tests/data/bfg_1994010106_fhr09_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010106_fhr09_prateb_control.nc b/tests/data/bfg_1994010106_fhr09_prateb_control.nc deleted file mode 100644 index 570e890..0000000 Binary files a/tests/data/bfg_1994010106_fhr09_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010112_fhr06_lhtfl_control.nc b/tests/data/bfg_1994010112_fhr06_lhtfl_control.nc new file mode 100644 index 0000000..1651100 Binary files /dev/null and b/tests/data/bfg_1994010112_fhr06_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010112_fhr06_prateb_control.nc b/tests/data/bfg_1994010112_fhr06_prateb_control.nc deleted file mode 100644 index 917aebe..0000000 Binary files a/tests/data/bfg_1994010112_fhr06_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010112_fhr09_lhtfl_control.nc b/tests/data/bfg_1994010112_fhr09_lhtfl_control.nc new file mode 100644 index 0000000..805088d Binary files /dev/null and b/tests/data/bfg_1994010112_fhr09_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010112_fhr09_prateb_control.nc b/tests/data/bfg_1994010112_fhr09_prateb_control.nc deleted file mode 100644 index 011a56d..0000000 Binary files a/tests/data/bfg_1994010112_fhr09_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010118_fhr06_lhtfl_control.nc b/tests/data/bfg_1994010118_fhr06_lhtfl_control.nc new file mode 100644 index 0000000..334ab07 Binary files /dev/null and b/tests/data/bfg_1994010118_fhr06_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010118_fhr06_prateb_control.nc b/tests/data/bfg_1994010118_fhr06_prateb_control.nc deleted file mode 100644 index cdc475a..0000000 Binary files a/tests/data/bfg_1994010118_fhr06_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010118_fhr09_lhtfl_control.nc b/tests/data/bfg_1994010118_fhr09_lhtfl_control.nc new file mode 100644 index 0000000..cfe2c02 Binary files /dev/null and b/tests/data/bfg_1994010118_fhr09_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010118_fhr09_prateb_control.nc b/tests/data/bfg_1994010118_fhr09_prateb_control.nc deleted file mode 100644 index b883faf..0000000 Binary files a/tests/data/bfg_1994010118_fhr09_prateb_control.nc and /dev/null differ diff --git a/tests/data/bfg_1994010200_fhr06_lhtfl_control.nc b/tests/data/bfg_1994010200_fhr06_lhtfl_control.nc new file mode 100644 index 0000000..2137f84 Binary files /dev/null and b/tests/data/bfg_1994010200_fhr06_lhtfl_control.nc differ diff --git a/tests/data/bfg_1994010200_fhr06_prateb_control.nc b/tests/data/bfg_1994010200_fhr06_prateb_control.nc deleted file mode 100644 index 467c6df..0000000 Binary files a/tests/data/bfg_1994010200_fhr06_prateb_control.nc and /dev/null differ diff --git a/tests/test_harvester_daily_bfg_lhtfl_ave.py b/tests/test_harvester_daily_bfg_lhtfl_ave.py new file mode 100755 index 0000000..6f57d1e --- /dev/null +++ b/tests/test_harvester_daily_bfg_lhtfl_ave.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python + +import os +import sys +from pathlib import Path + +import numpy as np +from datetime import datetime +import pytest +import yaml +from netCDF4 import Dataset + +from score_hv import hv_registry +from score_hv.harvester_base import harvest +from score_hv.yaml_utils import YamlLoader +from score_hv.harvesters.innov_netcdf import Region, InnovStatsCfg + +TEST_DATA_FILE_NAMES = ['bfg_1994010100_fhr09_lhtfl_control.nc', + 'bfg_1994010106_fhr06_lhtfl_control.nc', + 'bfg_1994010106_fhr09_lhtfl_control.nc', + 'bfg_1994010112_fhr06_lhtfl_control.nc', + 'bfg_1994010112_fhr09_lhtfl_control.nc', + 'bfg_1994010118_fhr06_lhtfl_control.nc', + 'bfg_1994010118_fhr09_lhtfl_control.nc', + 'bfg_1994010200_fhr06_lhtfl_control.nc'] + +DATA_DIR = os.path.join(Path(__file__).parent.parent.resolve(), 'data') +GRIDCELL_AREA_DATA_PATH = os.path.join(DATA_DIR, + 'gridcell-area' + + '_noaa-ufs-gefsv13replay-pds' + + '_bfg_control_1536x768_20231116.nc') + +CONFIGS_DIR = 'configs' +PYTEST_CALLING_DIR = Path(__file__).parent.resolve() +TEST_DATA_PATH = os.path.join(PYTEST_CALLING_DIR, 'data') +BFG_PATH = [os.path.join(TEST_DATA_PATH, + file_name) for file_name in TEST_DATA_FILE_NAMES] + +VALID_CONFIG_DICT = {'harvester_name': hv_registry.DAILY_BFG, + 'filenames' : BFG_PATH, + 'statistic': ['mean', 'variance', 'minimum', 'maximum'], + 'variable': ['lhtfl_ave']} + +def test_gridcell_area_conservation(tolerance=0.001): + + gridcell_area_data = Dataset(GRIDCELL_AREA_DATA_PATH) + + assert gridcell_area_data['area'].units == 'steradian' + + sum_gridcell_area = np.sum(gridcell_area_data.variables['area']) + + assert sum_gridcell_area < (1 + tolerance) * 4 * np.pi + assert sum_gridcell_area > (1 - tolerance) * 4 * np.pi + + gridcell_area_data.close() + +def test_variable_names(): + data1 = harvest(VALID_CONFIG_DICT) + assert data1[0].variable == 'lhtfl_ave' + +def test_global_mean_values(tolerance=0.001): + """The value of 3.117e-05 is the mean value of the global means + calculated from eight forecast files: + + bfg_1994010100_fhr09_lhtfl_control.nc + bfg_1994010106_fhr06_lhtfl_control.nc + bfg_1994010106_fhr09_lhtfl_control.nc + bfg_1994010112_fhr06_lhtfl_control.nc + bfg_1994010112_fhr09_lhtfl_control.nc + bfg_1994010118_fhr06_lhtfl_control.nc + bfg_1994010118_fhr09_lhtfl_control.nc + bfg_1994010200_fhr06_lhtfl_control.nc + + When averaged together, these files represent a 24 hour mean. The + average value hard-coded in this test was calculated from + these forecast files using a separate python code. + """ + data1 = harvest(VALID_CONFIG_DICT) + global_mean = 85.27307850167932 + assert data1[0].value <= (1 + tolerance) * global_mean + assert data1[0].value >= (1 - tolerance) * global_mean + +def test_global_mean_values2(tolerance=0.001): + """Opens each background Netcdf file using the + netCDF4 library function Dataset and computes the expected value + of the provided variable. In this case lhtfl_ave. + """ + data1 = harvest(VALID_CONFIG_DICT) + + gridcell_area_data = Dataset(GRIDCELL_AREA_DATA_PATH) + norm_weights = gridcell_area_data.variables['area'][:] / np.sum( + gridcell_area_data.variables['area'][:]) + + summation = np.ma.zeros(gridcell_area_data.variables['area'].shape) + for file_count, data_file in enumerate(BFG_PATH): + test_rootgrp = Dataset(data_file) + + summation += test_rootgrp.variables[VALID_CONFIG_DICT['variable'][0]][0] + + test_rootgrp.close() + + temporal_mean = summation / (file_count + 1) + global_mean = np.ma.sum(norm_weights * temporal_mean) + + for i, harvested_tuple in enumerate(data1): + if harvested_tuple.statistic == 'mean': + assert global_mean <= (1 + tolerance) * harvested_tuple.value + assert global_mean >= (1 - tolerance) * harvested_tuple.value + + gridcell_area_data.close() + +def test_gridcell_variance(tolerance=0.001): + """Opens each background Netcdf file using the + netCDF4 library function Dataset and computes the variance + of the provided variable. In this case lhtfl_ave. + """ + data1 = harvest(VALID_CONFIG_DICT) + + gridcell_area_data = Dataset(GRIDCELL_AREA_DATA_PATH) + norm_weights = gridcell_area_data.variables['area'][:] / np.sum( + gridcell_area_data.variables['area'][:]) + + summation = np.ma.zeros(gridcell_area_data.variables['area'].shape) + for file_count, data_file in enumerate(BFG_PATH): + test_rootgrp = Dataset(data_file) + + summation += test_rootgrp.variables[VALID_CONFIG_DICT['variable'][0]][0] + + test_rootgrp.close() + + temporal_mean = summation / (file_count + 1) + + global_mean = np.ma.sum(norm_weights * temporal_mean) + variance = np.ma.sum((temporal_mean - global_mean)**2 * norm_weights) + + for i, harvested_tuple in enumerate(data1): + if harvested_tuple.statistic == 'variance': + assert variance <= (1 + tolerance) * harvested_tuple.value + assert variance >= (1 - tolerance) * harvested_tuple.value + + gridcell_area_data.close() + +def test_gridcell_min_max(tolerance=0.001): + """Opens each background Netcdf file using the + netCDF4 library function Dataset and computes the maximum + of the provided variable. In this case lhtfl_ave. + """ + data1 = harvest(VALID_CONFIG_DICT) + + gridcell_area_data = Dataset(GRIDCELL_AREA_DATA_PATH) + + summation = np.ma.zeros(gridcell_area_data.variables['area'].shape) + for file_count, data_file in enumerate(BFG_PATH): + test_rootgrp = Dataset(data_file) + + summation += test_rootgrp.variables[VALID_CONFIG_DICT['variable'][0]][0] + + test_rootgrp.close() + + temporal_mean = summation / (file_count + 1) + minimum = np.ma.min(temporal_mean) + maximum = np.ma.max(temporal_mean) + + """The following offline min and max were calculated from an external + python code + """ + offline_min = -45.172398 + offline_max = 726.7792 + for i, harvested_tuple in enumerate(data1): + if harvested_tuple.statistic == 'maximum': + assert maximum <= (1 + tolerance) * harvested_tuple.value + assert maximum >= (1 - tolerance) * harvested_tuple.value + + assert offline_max <= (1 + tolerance) * harvested_tuple.value + assert offline_max >= (1 - tolerance) * harvested_tuple.value + + elif harvested_tuple.statistic == 'minimum': + """In order to get the tolerance assert tests to work + the values need to be positive. + """ + minimum = -minimum + harvested_min = harvested_tuple.value + harvested_min = -harvested_min + assert minimum <= (1 + tolerance) * harvested_min + assert minimum >= (1 - tolerance) * harvested_min + + offline_min = -offline_min + assert offline_min <= (1 + tolerance) * harvested_min + assert offline_min >= (1 - tolerance) * harvested_min + + gridcell_area_data.close() + +def test_units(): + data1 = harvest(VALID_CONFIG_DICT) + assert data1[0].units == "w/m**2" + +def test_cycletime(): + """ The hard coded datetimestr 1994-01-01 12:00:00 + is the median midpoint time of the filenames defined above in the + BFG_PATH. We have to convert this into a datetime object in order + to compare this string to what is returned by + daily_bfg.py + """ + data1 = harvest(VALID_CONFIG_DICT) + expected_datetime = datetime.strptime("1994-01-01 12:00:00", + "%Y-%m-%d %H:%M:%S") + assert data1[0].mediantime == expected_datetime + +def test_longname(): + data1 = harvest(VALID_CONFIG_DICT) + var_longname = "surface latent heat flux" + assert data1[0].longname == var_longname + +def test_lhtfl_ave_harvester(): + data1 = harvest(VALID_CONFIG_DICT) + assert type(data1) is list + assert len(data1) > 0 + assert data1[0].filenames==BFG_PATH + +def main(): + test_gridcell_area_conservation() + test_lhtfl_ave_harvester() + test_variable_names() + test_units() + test_global_mean_values() + test_global_mean_values2() + test_gridcell_variance() + test_gridcell_min_max() + test_cycletime() + test_longname() + +if __name__=='__main__': + main()