From 9b721cc17f6a2d2f6afeb811a2a0803c23372848 Mon Sep 17 00:00:00 2001 From: Mike Kelleher <1476361+mkstratos@users.noreply.github.com> Date: Tue, 19 Jul 2022 16:30:30 -0400 Subject: [PATCH] Really fix TSC test passing criteria (#10) * Add pool_size argument for debugging mode * Fix criteria for pass/fail of TSC test Update the pass/fall criteria so it falls in line with the original paper following discussion in https://github.com/E3SM-Project/E3SM/issues/4759 First, timesteps are assessed for pass/fail, then an overall pass/fail is given. For overall FAIL, all timesteps within the inspection window must fail. For a timestep to fail, at least one variable has its null hypothesis rejected (i.e. RMSD difference has a p-value less than the threshold). * Enhance TSC plots with threshold lines and caption Add dashed lines to timeseries plots indicating time inspection window and move the PASS/FAIL text to be centered on that window. Also group the timeseries and box plots together so they work with LIVV3.x galleries * Revise word order in figure captions * Change to >=2 failed timesteps for overall failure TSC test fails when two or more timesteps meet failure criteria to rule out single timestep fluke issues --- evv4esm/__init__.py | 2 +- evv4esm/__main__.py | 10 +++- evv4esm/extensions/tsc.py | 115 +++++++++++++++++++++++++++----------- 3 files changed, 91 insertions(+), 36 deletions(-) diff --git a/evv4esm/__init__.py b/evv4esm/__init__.py index aa6717f..4f779b9 100644 --- a/evv4esm/__init__.py +++ b/evv4esm/__init__.py @@ -28,7 +28,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version_info__ = (0, 3, 1) +__version_info__ = (0, 3, 2) __version__ = '.'.join(str(vi) for vi in __version_info__) PASS_COLOR = '#389933' diff --git a/evv4esm/__main__.py b/evv4esm/__main__.py index 38c72f1..5a910b6 100755 --- a/evv4esm/__main__.py +++ b/evv4esm/__main__.py @@ -59,6 +59,14 @@ def parse_args(args=None): ]) ) + parser.add_argument('-p', '--pool-size', + nargs='?', + type=int, + default=(options.mp.cpu_count() - 1 or 1), + help='The number of multiprocessing processes to run ' + 'analyses in. If zero, processes will run serially ' + 'outside of the multiprocessing module.') + parser.add_argument('--version', action='version', version='EVV {}'.format(evv4esm.__version__), @@ -73,7 +81,6 @@ def parse_args(args=None): from evv4esm import resources args.livv_resource_dir = livvkit.resource_dir livvkit.resource_dir = os.sep.join(resources.__path__) - return args @@ -106,6 +113,7 @@ def main(cl_args=None): from livvkit.util import functions from livvkit import elements + livvkit.pool_size = args.pool_size if args.extensions: functions.setup_output() summary_elements = [] diff --git a/evv4esm/extensions/tsc.py b/evv4esm/extensions/tsc.py index 2c85b6f..b1702c6 100644 --- a/evv4esm/extensions/tsc.py +++ b/evv4esm/extensions/tsc.py @@ -284,14 +284,23 @@ def main(args): null_hypothesis = ttest.applymap(lambda x: 'Reject' if x[1] < args.p_threshold else 'Accept') domains = ( - null_hypothesis + # True for rejection of null_hypothesis for each variable at each time, by comparing + # index 1 (x[1]) of each column of tuples, which corresponds to the p-value to the + # threshold for p-values + ttest.applymap(lambda x: x[1] < args.p_threshold) + # Select only times in the inspection window .query(' seconds >= @args.inspect_times[0] & seconds <= @args.inspect_times[-1]') - .applymap(lambda x: x == 'Reject').all().transform( - lambda x: 'Fail' if x is True else 'Pass' - ) + # Create groups of all variables at each time step in the window + .groupby("seconds") + # Are any variables failing at each time step in the inspection window? + .any() + # Since True -> 1 False -> 0, .sum() gets the number of timesteps for which + # the null hypothesis is rejected + .sum() + # If two or more time steps are failing then the domain [glob, lnd, ocn] is failing + .transform(lambda x: "Fail" if x >= 2 else "Pass") ) - - overall = 'Fail' if domains.apply(lambda x: x == 'Fail').any() else 'Pass' + overall = 'Fail' if domains[delta_columns].apply(lambda x: x == 'Fail').any() else 'Pass' ttest.reset_index(inplace=True) null_hypothesis.reset_index(inplace=True) @@ -360,6 +369,17 @@ def pressure_layer_thickness(dataset): dp = np.expand_dims(da * p0, 1) + (np.expand_dims(db, 1) * np.expand_dims(ps, 0)) return dp, ps +def plot_thresholds(args, axis, x_thr=None, y_thr=None): + """Add vertical / horiziontal lines on axis indicating certain thresholds.""" + if x_thr is None: + x_thr = [args.inspect_times[0], args.inspect_times[-1]] + if y_thr is None: + y_thr = args.p_threshold * 100 + + axis.axhline(y_thr, linestyle="--", linewidth=1, color="grey", zorder=-1) + for _time in x_thr: + axis.axvline(_time, linestyle="--", linewidth=1, color="grey", zorder=-1) + def plot_bit_for_bit(args): failing_img_file = os.path.relpath(os.path.join(args.img_dir, 'failing_timeline.png'), os.getcwd()) @@ -369,6 +389,7 @@ def plot_bit_for_bit(args): xx = np.arange(0, args.time_slice[1] + args.time_slice[0], args.time_slice[0]) yy = np.zeros(xx.shape) ax.plot(xx, yy, linestyle='-', marker='o', color=pf_color_picker.get('pass')) + plot_thresholds(args, ax, y_thr=1) ax.set_ybound(-1, 20) ax.set_yticks(np.arange(0, 24, 4)) @@ -381,18 +402,29 @@ def plot_bit_for_bit(args): plt.savefig(failing_img_file, bbox_inches='tight') plt.close(fig) - failing_img_caption = 'The number of failing variables across both domains (land and ' \ - 'ocean) as a function of model integration time.' + failing_img_caption = ( + "The number of failing variables across both domains (land and ocean) as a " + "function of model integration time. The dashed horizontal line represents the " + "failing threshold of 1 variable, the dashed vertical lines represent the inspection " + f"window of {args.inspect_times[0]} - {args.inspect_times[-1]} s." + ) failing_img_link = Path(*Path(args.img_dir).parts[-2:], Path(failing_img_file).name) - failing_img = el.Image('Timeline of failing variables', failing_img_caption, failing_img_link, height=300, relative_to="") + failing_img = el.Image( + 'Timeline of failing variables', + failing_img_caption, + failing_img_link, + height=300, + relative_to="", + group="Timelines" + ) pmin_img_file = os.path.relpath(os.path.join(args.img_dir, 'pmin_timeline.png'), os.getcwd()) fig, ax = plt.subplots(figsize=(10, 8)) plt.rc('font', family='serif') ax.semilogy(xx, yy + 1.0, linestyle='-', marker='o', color=pf_color_picker.get('pass')) + plot_thresholds(args, ax) - ax.plot(args.time_slice, [args.p_threshold * 100] * 2, 'k--') ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'), horizontalalignment='center') ax.text(np.mean(args.time_slice), 0.5 * 10 ** 1, 'Pass', fontsize=15, color=pf_color_picker.get('pass'), @@ -411,14 +443,18 @@ def plot_bit_for_bit(args): plt.savefig(pmin_img_file, bbox_inches='tight') plt.close(fig) - pmin_img_caption = 'The minimum P value of all variables in both domains (land and ' \ - 'ocean) as a function of model integration time plotted with ' \ - 'a logarithmic y-scale. The dashed grey line indicates the ' \ - 'threshold for assigning an overall pass or fail to a test ' \ - 'ensemble; see Wan et al. (2017) eqn. 8.' - # pmin_img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(pmin_img_file)) + pmin_img_caption = ( + "The minimum P value of all variables in both domains (land and ocean) as a " + "function of model integration time plotted with a logarithmic y-scale. The " + "dashed horizontal grey line indicates the threshold for assigning an overall " + "pass or fail to a test ensemble; the dashed vertical lines represent the " + f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. " + "see Wan et al. (2017) eqn. 8" + ) pmin_img_link = Path(*Path(args.img_dir).parts[-2:], Path(pmin_img_file).name) - pmin_img = el.Image('Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="") + pmin_img = el.Image( + 'Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="", group="Timelines" + ) return [failing_img, pmin_img] @@ -439,16 +475,21 @@ def plot_failing_variables(args, null_hypothesis, img_file): ax.set_ylabel('Number of failing variables') ax.set_xlabel('Integration time (s)') + plot_thresholds(args, ax, y_thr=1) plt.tight_layout() plt.savefig(img_file, bbox_inches='tight') plt.close(fig) - img_caption = 'The number of failing variables across both domains (land and ' \ - 'ocean) as a function of model integration time.' + img_caption = ( + "The number of failing variables across both domains (land and ocean) as a " + "function of model integration time. The dashed horizontal line represents the " + "failing threshold of 1 variable, the dashed vertical lines represent the " + f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s." + ) img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name) img = el.Image( - 'Timeline of failing variables', img_caption, img_link, height=300, relative_to="" + 'Timeline of failing variables', img_caption, img_link, height=300, relative_to="", group="Timelines" ) return img @@ -468,14 +509,16 @@ def plot_pmin(args, ttest, img_file): elif fails.empty: passes.plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass')) else: - first_fail = fails.index[0] - pdata.loc[:first_fail].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass')) - pdata.loc[first_fail:].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('fail')) + pdata.plot(logy=True, linestyle="-", color="black") + passes.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("pass")) + fails.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("fail")) - ax.plot(args.time_slice, [0.5, 0.5], 'k--') - ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'), + plot_thresholds(args, ax) + + inspect_window = [args.inspect_times[0], args.inspect_times[-1]] + ax.text(np.mean(inspect_window), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'), horizontalalignment='center') - ax.text(np.mean(args.time_slice), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'), + ax.text(np.mean(inspect_window), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'), horizontalalignment='center') ax.set_ybound(100, 10 ** -15) @@ -491,14 +534,18 @@ def plot_pmin(args, ttest, img_file): plt.savefig(img_file, bbox_inches='tight') plt.close(fig) - img_caption = 'The minimum P value of all variables in both domains (land and ' \ - 'ocean) as a function of model integration time plotted with ' \ - 'a logarithmic y-scale. The dashed grey line indicates the ' \ - 'threshold for assigning an overall pass or fail to a test ' \ - 'ensemble; see Wan et al. (2017) eqn. 8.' + img_caption = ( + "The minimum P value of all variables in both domains (land and ocean) as a " + "function of model integration time plotted with a logarithmic y-scale. The " + "dashed horizontal grey line indicates the threshold for assigning an overall " + "pass or fail to a test ensemble; the dashed vertical lines represent the " + f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. " + "see Wan et al. (2017) eqn. 8" + ) + # img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(img_file)) img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name) - img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="") + img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="", group="Timelines") return img @@ -572,7 +619,7 @@ def boxplot_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format): cpass=human_color_names['pass'][0]) img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name) img_list.append(el.Image('Boxplot of normalized ensemble ΔRMSD at {}s'.format(time), - img_caption, img_link, height=300, relative_to="")) + img_caption, img_link, height=300, relative_to="", group="Boxplots")) return img_list @@ -686,7 +733,7 @@ def errorbars_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format): cpass=human_color_names['pass'][0]) img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name) img_list.append(el.Image('Distribution of the ensemble ΔRMSD at {}s'.format(time), - img_caption, img_link, height=300, relative_to="")) + img_caption, img_link, height=300, relative_to="", group="Boxplots")) return img_list