Skip to content

Commit

Permalink
Really fix TSC test passing criteria (#10)
Browse files Browse the repository at this point in the history
* Add pool_size argument for debugging mode

* Fix criteria for pass/fail of TSC test
Update the pass/fall criteria so it falls in line with the original
paper following discussion in
E3SM-Project/E3SM#4759
First, timesteps are assessed for pass/fail, then an overall pass/fail
is given. For overall FAIL, all timesteps within the inspection window
must fail. For a timestep to fail, at least one variable has its null
hypothesis rejected (i.e. RMSD difference has a p-value less than the
threshold).

* Enhance TSC plots with threshold lines and caption
Add dashed lines to timeseries plots indicating time inspection window
and move the PASS/FAIL text to be centered on that window. Also group
the timeseries and box plots together so they work with LIVV3.x
galleries

* Revise word order in figure captions

* Change to >=2 failed timesteps for overall failure
TSC test fails when two or more timesteps meet failure criteria to
rule out single timestep fluke issues
  • Loading branch information
mkstratos authored Jul 19, 2022
1 parent dff65f8 commit 9b721cc
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 36 deletions.
2 changes: 1 addition & 1 deletion evv4esm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


__version_info__ = (0, 3, 1)
__version_info__ = (0, 3, 2)
__version__ = '.'.join(str(vi) for vi in __version_info__)

PASS_COLOR = '#389933'
Expand Down
10 changes: 9 additions & 1 deletion evv4esm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ def parse_args(args=None):
])
)

parser.add_argument('-p', '--pool-size',
nargs='?',
type=int,
default=(options.mp.cpu_count() - 1 or 1),
help='The number of multiprocessing processes to run '
'analyses in. If zero, processes will run serially '
'outside of the multiprocessing module.')

parser.add_argument('--version',
action='version',
version='EVV {}'.format(evv4esm.__version__),
Expand All @@ -73,7 +81,6 @@ def parse_args(args=None):
from evv4esm import resources
args.livv_resource_dir = livvkit.resource_dir
livvkit.resource_dir = os.sep.join(resources.__path__)

return args


Expand Down Expand Up @@ -106,6 +113,7 @@ def main(cl_args=None):
from livvkit.util import functions
from livvkit import elements

livvkit.pool_size = args.pool_size
if args.extensions:
functions.setup_output()
summary_elements = []
Expand Down
115 changes: 81 additions & 34 deletions evv4esm/extensions/tsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,14 +284,23 @@ def main(args):
null_hypothesis = ttest.applymap(lambda x: 'Reject' if x[1] < args.p_threshold else 'Accept')

domains = (
null_hypothesis
# True for rejection of null_hypothesis for each variable at each time, by comparing
# index 1 (x[1]) of each column of tuples, which corresponds to the p-value to the
# threshold for p-values
ttest.applymap(lambda x: x[1] < args.p_threshold)
# Select only times in the inspection window
.query(' seconds >= @args.inspect_times[0] & seconds <= @args.inspect_times[-1]')
.applymap(lambda x: x == 'Reject').all().transform(
lambda x: 'Fail' if x is True else 'Pass'
)
# Create groups of all variables at each time step in the window
.groupby("seconds")
# Are any variables failing at each time step in the inspection window?
.any()
# Since True -> 1 False -> 0, .sum() gets the number of timesteps for which
# the null hypothesis is rejected
.sum()
# If two or more time steps are failing then the domain [glob, lnd, ocn] is failing
.transform(lambda x: "Fail" if x >= 2 else "Pass")
)

overall = 'Fail' if domains.apply(lambda x: x == 'Fail').any() else 'Pass'
overall = 'Fail' if domains[delta_columns].apply(lambda x: x == 'Fail').any() else 'Pass'

ttest.reset_index(inplace=True)
null_hypothesis.reset_index(inplace=True)
Expand Down Expand Up @@ -360,6 +369,17 @@ def pressure_layer_thickness(dataset):
dp = np.expand_dims(da * p0, 1) + (np.expand_dims(db, 1) * np.expand_dims(ps, 0))
return dp, ps

def plot_thresholds(args, axis, x_thr=None, y_thr=None):
"""Add vertical / horiziontal lines on axis indicating certain thresholds."""
if x_thr is None:
x_thr = [args.inspect_times[0], args.inspect_times[-1]]
if y_thr is None:
y_thr = args.p_threshold * 100

axis.axhline(y_thr, linestyle="--", linewidth=1, color="grey", zorder=-1)
for _time in x_thr:
axis.axvline(_time, linestyle="--", linewidth=1, color="grey", zorder=-1)


def plot_bit_for_bit(args):
failing_img_file = os.path.relpath(os.path.join(args.img_dir, 'failing_timeline.png'), os.getcwd())
Expand All @@ -369,6 +389,7 @@ def plot_bit_for_bit(args):
xx = np.arange(0, args.time_slice[1] + args.time_slice[0], args.time_slice[0])
yy = np.zeros(xx.shape)
ax.plot(xx, yy, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
plot_thresholds(args, ax, y_thr=1)

ax.set_ybound(-1, 20)
ax.set_yticks(np.arange(0, 24, 4))
Expand All @@ -381,18 +402,29 @@ def plot_bit_for_bit(args):
plt.savefig(failing_img_file, bbox_inches='tight')
plt.close(fig)

failing_img_caption = 'The number of failing variables across both domains (land and ' \
'ocean) as a function of model integration time.'
failing_img_caption = (
"The number of failing variables across both domains (land and ocean) as a "
"function of model integration time. The dashed horizontal line represents the "
"failing threshold of 1 variable, the dashed vertical lines represent the inspection "
f"window of {args.inspect_times[0]} - {args.inspect_times[-1]} s."
)
failing_img_link = Path(*Path(args.img_dir).parts[-2:], Path(failing_img_file).name)
failing_img = el.Image('Timeline of failing variables', failing_img_caption, failing_img_link, height=300, relative_to="")
failing_img = el.Image(
'Timeline of failing variables',
failing_img_caption,
failing_img_link,
height=300,
relative_to="",
group="Timelines"
)

pmin_img_file = os.path.relpath(os.path.join(args.img_dir, 'pmin_timeline.png'), os.getcwd())
fig, ax = plt.subplots(figsize=(10, 8))
plt.rc('font', family='serif')

ax.semilogy(xx, yy + 1.0, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
plot_thresholds(args, ax)

ax.plot(args.time_slice, [args.p_threshold * 100] * 2, 'k--')
ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
horizontalalignment='center')
ax.text(np.mean(args.time_slice), 0.5 * 10 ** 1, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
Expand All @@ -411,14 +443,18 @@ def plot_bit_for_bit(args):
plt.savefig(pmin_img_file, bbox_inches='tight')
plt.close(fig)

pmin_img_caption = 'The minimum P value of all variables in both domains (land and ' \
'ocean) as a function of model integration time plotted with ' \
'a logarithmic y-scale. The dashed grey line indicates the ' \
'threshold for assigning an overall pass or fail to a test ' \
'ensemble; see Wan et al. (2017) eqn. 8.'
# pmin_img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(pmin_img_file))
pmin_img_caption = (
"The minimum P value of all variables in both domains (land and ocean) as a "
"function of model integration time plotted with a logarithmic y-scale. The "
"dashed horizontal grey line indicates the threshold for assigning an overall "
"pass or fail to a test ensemble; the dashed vertical lines represent the "
f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. "
"see Wan et al. (2017) eqn. 8"
)
pmin_img_link = Path(*Path(args.img_dir).parts[-2:], Path(pmin_img_file).name)
pmin_img = el.Image('Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="")
pmin_img = el.Image(
'Timeline of P_{min}', pmin_img_caption, pmin_img_link, height=300, relative_to="", group="Timelines"
)

return [failing_img, pmin_img]

Expand All @@ -439,16 +475,21 @@ def plot_failing_variables(args, null_hypothesis, img_file):

ax.set_ylabel('Number of failing variables')
ax.set_xlabel('Integration time (s)')
plot_thresholds(args, ax, y_thr=1)

plt.tight_layout()
plt.savefig(img_file, bbox_inches='tight')
plt.close(fig)

img_caption = 'The number of failing variables across both domains (land and ' \
'ocean) as a function of model integration time.'
img_caption = (
"The number of failing variables across both domains (land and ocean) as a "
"function of model integration time. The dashed horizontal line represents the "
"failing threshold of 1 variable, the dashed vertical lines represent the "
f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s."
)
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
img = el.Image(
'Timeline of failing variables', img_caption, img_link, height=300, relative_to=""
'Timeline of failing variables', img_caption, img_link, height=300, relative_to="", group="Timelines"
)
return img

Expand All @@ -468,14 +509,16 @@ def plot_pmin(args, ttest, img_file):
elif fails.empty:
passes.plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
else:
first_fail = fails.index[0]
pdata.loc[:first_fail].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('pass'))
pdata.loc[first_fail:].plot(logy=True, linestyle='-', marker='o', color=pf_color_picker.get('fail'))
pdata.plot(logy=True, linestyle="-", color="black")
passes.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("pass"))
fails.plot(logy=True, linestyle="None", marker="o", color=pf_color_picker.get("fail"))

ax.plot(args.time_slice, [0.5, 0.5], 'k--')
ax.text(np.mean(args.time_slice), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
plot_thresholds(args, ax)

inspect_window = [args.inspect_times[0], args.inspect_times[-1]]
ax.text(np.mean(inspect_window), 10 ** -1, 'Fail', fontsize=15, color=pf_color_picker.get('fail'),
horizontalalignment='center')
ax.text(np.mean(args.time_slice), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
ax.text(np.mean(inspect_window), 10 ** 0, 'Pass', fontsize=15, color=pf_color_picker.get('pass'),
horizontalalignment='center')

ax.set_ybound(100, 10 ** -15)
Expand All @@ -491,14 +534,18 @@ def plot_pmin(args, ttest, img_file):
plt.savefig(img_file, bbox_inches='tight')
plt.close(fig)

img_caption = 'The minimum P value of all variables in both domains (land and ' \
'ocean) as a function of model integration time plotted with ' \
'a logarithmic y-scale. The dashed grey line indicates the ' \
'threshold for assigning an overall pass or fail to a test ' \
'ensemble; see Wan et al. (2017) eqn. 8.'
img_caption = (
"The minimum P value of all variables in both domains (land and ocean) as a "
"function of model integration time plotted with a logarithmic y-scale. The "
"dashed horizontal grey line indicates the threshold for assigning an overall "
"pass or fail to a test ensemble; the dashed vertical lines represent the "
f"inspection window of {args.inspect_times[0]} - {args.inspect_times[-1]} s. "
"see Wan et al. (2017) eqn. 8"
)

# img_link = os.path.join(os.path.basename(args.img_dir), os.path.basename(img_file))
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="")
img = el.Image('Timeline of P_{min}', img_caption, img_link, height=300, relative_to="", group="Timelines")
return img


Expand Down Expand Up @@ -572,7 +619,7 @@ def boxplot_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format):
cpass=human_color_names['pass'][0])
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
img_list.append(el.Image('Boxplot of normalized ensemble ΔRMSD at {}s'.format(time),
img_caption, img_link, height=300, relative_to=""))
img_caption, img_link, height=300, relative_to="", group="Boxplots"))
return img_list


Expand Down Expand Up @@ -686,7 +733,7 @@ def errorbars_delta_rmsd(args, delta_rmsd, null_hypothesis, img_file_format):
cpass=human_color_names['pass'][0])
img_link = Path(*Path(args.img_dir).parts[-2:], Path(img_file).name)
img_list.append(el.Image('Distribution of the ensemble ΔRMSD at {}s'.format(time),
img_caption, img_link, height=300, relative_to=""))
img_caption, img_link, height=300, relative_to="", group="Boxplots"))
return img_list


Expand Down

0 comments on commit 9b721cc

Please sign in to comment.