From b5bba8f2cb9f4adaf1abbf0ef166dfdaa2f45358 Mon Sep 17 00:00:00 2001 From: Tito Dal Canton Date: Tue, 6 Feb 2024 11:42:42 +0100 Subject: [PATCH] Cherry-picked commits for 2.1.4 release (#4606) * Remove reference to relative_example in docs * Use RTLD_GLOBAL for libgomp (#4353) * Use RTLD_GLOBAL for libgomp In https://github.com/conda-forge/pycbc-feedstock/pull/74 it was suggested to use RTLD_GLOBAL for libgomp. Let's see if this works fine with the test suite (which should answer @josh-willis 's concerns). * Move import of ctypes/gomp into __enter__ * Try this * Revert "Revert "Allow SNR optimizer to use candidate point in initial array (#4393)"" This reverts commit 7be12f1ee83e00ca58d724b29acd249bb5d9a3e9. We are now catching up with master, where the bug originally introduced by #4393 is fixed properly, so here I am undoing the temporary fix. * SNR optimisation options for pycbc_live (#4432) * Moving the live optimizer option changes to my own branch * Completing the snr optimization argument group * updating pycbc_live * re-adding bug fix * removing TODO message * Bug with d_e options * Adding optimizer-seed * fixing the d_e optimizer * replacing run.sh code * resolve merge conflict * fixing run.sh * cleaning up args_to_string func * changing comment * codeclimate fixes * module docstring * Update module docstring copyright Co-authored-by: Gareth S Cabourn Davies * Add gareth * removing argv * argument changing * removing duplicated arguments * minor CC points * remove bug introduced when making CC happier --------- Co-authored-by: Gareth S Cabourn Davies Co-authored-by: Thomas Dent * Improvements to single-detector trigger fitting code for PyCBC Live (#4486) * Cleanup * Cleanup * Refactor duration bin parsing code and add support for reading from bank * Minor fix/cleanup to logging * Update CLI checks for duration bins * Cleanup * Ignore inconsistent config when combining * Fix bug * Fix typo Co-authored-by: Gareth S Cabourn Davies * Comment from Gareth --------- Co-authored-by: Gareth S Cabourn Davies * [pycbc live] Don't add snr options to command if they don't exist (#4518) * Don't run snr optimizer setup if not optimizing snr * moving the check to a more appropraite place * setting snr_opt_options to None if not optimizing * [pycbc live] Allowing the use of psd variation in the ranking statistic for pycbc live (#4533) * Modifying files to include psd variation in single detector statistic calculation * ending variation.py with a blank line * Changing to an increment agnostic solution * removing change already fixed * Updating function names and docstrings * removing ToDos and adding more helpful comments * Removing unused import * Codeclimate fixes * Removing excess logging and whitespace mistakes * Removing unused objects + codeclimate fixes * Updating comments and docstrings, removing matchedfilter changes * Revert "Updating comments and docstrings, removing matchedfilter changes" This reverts commit 0e6473a12874b2dbb02952260b81c908540afff7. * Removing matchedfilter changes, updating comments and docstrings * Move --verbose to the end of the commands * more comment updates * Repositioning filter recreation * Changes to comments and removing whitespace Co-authored-by: Thomas Dent * removing refchecks * Adding option veification for psd variation * Apply suggestions from code review Co-authored-by: Thomas Dent * fixing EOL error * Refactoring the filter creation function * codeclimate fixes * undo * full_filt func * removing indentation * code climate * code climate * try to quiet codeclimate * codeclimate doesn't know PEP8 * brackets obviate line continuation --------- Co-authored-by: Thomas Dent * added scaling of initial pop in snr_optimizer (#4561) * added scaling of initial pop * init popn in optimize_di & pso func * added changes in optimize_pso * usig logging.debug for snr * Do not set matplotlib's backend in internal modules (#4592) * Set version to 2.1.4 * Remove reference to single_template_examples in docs * Remove reference to hierarchical_model in docs * Live: produce empty trigger fit plot for detectors with no triggers (#4600) * Live: produce empty trigger fit plot for detectors with no triggers * allow for below-threshold triggers * fix thinko in option parsing for defaults (#4615) * fix thinko in option parsing for defaults When an option is not given at all getattr on the args object gives None, but we don't want to translate that into "--option-name None" on the command line. * bugfix obviously we needed to define 'key_name' first .. * Improvements to single fit plots (#4509) * Improvements to single fit plots * Apply suggestions from Gareth Co-authored-by: Gareth S Cabourn Davies --------- Co-authored-by: Gareth S Cabourn Davies --------- Co-authored-by: Ian Harry Co-authored-by: Arthur Tolley <32394213+ArthurTolley@users.noreply.github.com> Co-authored-by: Gareth S Cabourn Davies Co-authored-by: Thomas Dent Co-authored-by: Praveen Kumar <86048588+PRAVEEN-mnl@users.noreply.github.com> --- bin/live/pycbc_live_combine_single_fits | 121 +++--- bin/live/pycbc_live_plot_combined_single_fits | 45 +- bin/live/pycbc_live_plot_single_trigger_fits | 114 ++--- bin/live/pycbc_live_single_trigger_fits | 153 ++++--- bin/pycbc_live | 54 ++- bin/pycbc_make_skymap | 6 + bin/pycbc_optimize_snr | 288 +++---------- docs/inference/models.rst | 4 - examples/live/run.sh | 16 +- pycbc/events/coinc.py | 8 + pycbc/io/live.py | 2 - pycbc/live/__init__.py | 5 + pycbc/live/snr_optimizer.py | 404 ++++++++++++++++++ pycbc/psd/variation.py | 220 +++++++++- pycbc/scheme.py | 25 +- setup.py | 2 +- 16 files changed, 1005 insertions(+), 462 deletions(-) create mode 100644 pycbc/live/__init__.py create mode 100644 pycbc/live/snr_optimizer.py diff --git a/bin/live/pycbc_live_combine_single_fits b/bin/live/pycbc_live_combine_single_fits index 924d6fb2823..a0638ecb036 100644 --- a/bin/live/pycbc_live_combine_single_fits +++ b/bin/live/pycbc_live_combine_single_fits @@ -15,8 +15,10 @@ """Combine PyCBC Live single-detector trigger fitting parameters from several different files.""" -import h5py, numpy as np, argparse +import argparse import logging +import numpy as np +import h5py import pycbc @@ -45,66 +47,80 @@ if args.conservative_percentile < 50 or \ "otherwise it is either not a percentile, or not " "conservative.") -counts_all = {ifo: [] for ifo in args.ifos} -alphas_all = {ifo: [] for ifo in args.ifos} -analysis_dates = [] +logging.info("%d input files", len(args.trfits_files)) + +# We only want to combine fit results if they were done with the same +# configuration. So start by finding the most recent fit file and reading its +# configuration parameters. -with h5py.File(args.trfits_files[0], 'r') as fit_f0: - # Store some attributes so we can check that all files are - # comparable +logging.info("Determining the most recent configuration parameters") - # Keep the upper and lower bins - bl = fit_f0['bins_lower'][:] - bu = fit_f0['bins_upper'][:] +latest_date = None +for f in args.trfits_files: + with h5py.File(f, 'r') as fit_f: + if latest_date is None or fit_f.attrs['analysis_date'] > latest_date: + latest_date = fit_f.attrs['analysis_date'] + bl = fit_f['bins_lower'][:] + bu = fit_f['bins_upper'][:] + sngl_rank = fit_f.attrs['sngl_ranking'] + fit_thresh = fit_f.attrs['fit_threshold'] + fit_func = fit_f.attrs['fit_function'] - sngl_rank = fit_f0.attrs['sngl_ranking'] - fit_thresh = fit_f0.attrs['fit_threshold'] - fit_func = fit_f0.attrs['fit_function'] +# Now go back through the fit files and read the actual information. Skip the +# files that have fit parameters inconsistent with what we found earlier. -live_times = {ifo: [] for ifo in args.ifos} +logging.info("Reading individual fit results") +live_times = {ifo: [] for ifo in args.ifos} trigger_file_starts = [] trigger_file_ends = [] - -n_files = len(args.trfits_files) -logging.info("Checking through %d files", n_files) +counts_all = {ifo: [] for ifo in args.ifos} +alphas_all = {ifo: [] for ifo in args.ifos} for f in args.trfits_files: - fits_f = h5py.File(f, 'r') - # Check that the file uses the same setup as file 0, to make sure - # all coefficients are comparable - - assert fits_f.attrs['sngl_ranking'] == sngl_rank - assert fits_f.attrs['fit_threshold'] == fit_thresh - assert fits_f.attrs['fit_function'] == fit_func - assert all(fits_f['bins_lower'][:] == bl) - assert all(fits_f['bins_upper'][:] == bu) - - # Get the time of the first/last triggers in the trigger_fits file - gps_last = 0 - gps_first = np.inf - for ifo in args.ifos: - if ifo not in fits_f: + with h5py.File(f, 'r') as fits_f: + # Check that the file uses the same setup as file 0, to make sure + # all coefficients are comparable + same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank + and fits_f.attrs['fit_threshold'] == fit_thresh + and fits_f.attrs['fit_function'] == fit_func + and all(fits_f['bins_lower'][:] == bl) + and all(fits_f['bins_upper'][:] == bu)) + if not same_conf: + logging.warn( + "Found a change in the fit configuration, skipping %s", + f + ) continue - trig_times = fits_f[ifo]['triggers']['end_time'][:] - gps_last = max(gps_last, trig_times.max()) - gps_first = min(gps_first, trig_times.min()) - trigger_file_starts.append(gps_first) - trigger_file_ends.append(gps_last) - - for ifo in args.ifos: - if ifo not in fits_f: - live_times[ifo].append(0) - counts_all[ifo].append(-1 * np.ones_like(bl)) - alphas_all[ifo].append(-1 * np.ones_like(bl)) - else: - live_times[ifo].append(fits_f[ifo].attrs['live_time']) - counts_all[ifo].append(fits_f[ifo + '/counts'][:]) - alphas_all[ifo].append(fits_f[ifo + '/fit_coeff'][:]) - if any(np.isnan(fits_f[ifo + '/fit_coeff'][:])): - logging.info("nan in %s, %s", f, ifo) - logging.info(fits_f[ifo + '/fit_coeff'][:]) - fits_f.close() + + # We now determine the (approximate) start/end times of the + # trigger_fits file via the time of the first/last triggers in it. + # Ideally this would be recorded exactly in the file. + gps_last = 0 + gps_first = np.inf + for ifo in args.ifos: + if ifo not in fits_f: + continue + trig_times = fits_f[ifo]['triggers']['end_time'][:] + gps_last = max(gps_last, trig_times.max()) + gps_first = min(gps_first, trig_times.min()) + trigger_file_starts.append(gps_first) + trigger_file_ends.append(gps_last) + + # Read the fitting parameters + for ifo in args.ifos: + if ifo not in fits_f: + live_times[ifo].append(0) + counts_all[ifo].append(-1 * np.ones_like(bl)) + alphas_all[ifo].append(-1 * np.ones_like(bl)) + else: + ffi = fits_f[ifo] + live_times[ifo].append(ffi.attrs['live_time']) + counts_all[ifo].append(ffi['counts'][:]) + alphas_all[ifo].append(ffi['fit_coeff'][:]) + if any(np.isnan(ffi['fit_coeff'][:])): + logging.warn("nan in %s, %s", f, ifo) + logging.warn(ffi['fit_coeff'][:]) # Set up the date array, this is stored as an offset from the first trigger time of # the first file to the last trigger of the file @@ -115,7 +131,7 @@ ad_order = np.argsort(trigger_file_starts) start_time_n = trigger_file_starts[ad_order[0]] ad = trigger_file_ends[ad_order] - start_time_n -# Get the counts and alphas +# Swap the time and bin dimensions for counts and alphas counts_bin = {ifo: [c for c in zip(*counts_all[ifo])] for ifo in args.ifos} alphas_bin = {ifo: [a for a in zip(*alphas_all[ifo])] for ifo in args.ifos} @@ -125,6 +141,7 @@ cons_alphas_out = {ifo: np.zeros(len(alphas_bin[ifo])) for ifo in args.ifos} cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.ifos} logging.info("Writing results") + fout = h5py.File(args.output, 'w') fout.attrs['fit_threshold'] = fit_thresh fout.attrs['conservative_percentile'] = args.conservative_percentile diff --git a/bin/live/pycbc_live_plot_combined_single_fits b/bin/live/pycbc_live_plot_combined_single_fits index 7c88c6bd490..5cb729959cd 100644 --- a/bin/live/pycbc_live_plot_combined_single_fits +++ b/bin/live/pycbc_live_plot_combined_single_fits @@ -12,18 +12,23 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. -import h5py, numpy as np, argparse +"""Plot the time evolution of fit parameters of PyCBC Live triggers. +""" + +import argparse +import logging +import numpy as np +import h5py import matplotlib matplotlib.use('agg') from matplotlib import pyplot as plt -import logging + from lal import gpstime -import pycbc +import pycbc -parser = argparse.ArgumentParser(usage="", - description="Combine fitting parameters from several different files") +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--verbose", action="store_true", help="Print extra debugging information", default=False) parser.add_argument("--combined-fits-file", required=True, @@ -45,7 +50,7 @@ parser.add_argument("--colormap", default="rainbow_r", choices=plt.colormaps(), parser.add_argument("--log-colormap", action='store_true', help="Use log spacing for choosing colormap values " "based on duration bins.") -args=parser.parse_args() +args = parser.parse_args() if '{ifo}' not in args.output_plot_name_format or \ '{type}' not in args.output_plot_name_format: @@ -94,21 +99,6 @@ with h5py.File(args.combined_fits_file, 'r') as cff: bin_starts = bins_edges[:-1] bin_ends = bins_edges[1:] -bin_max = max(bin_ends) -bin_min = min(bin_starts) - -def bin_proportion(upper, lower, log_spacing=False): - if log_spacing: - ll = np.log(lower) - ul = np.log(lower) - centl = (ll + ul) / 2. - minl = np.log(bin_min) - maxl = np.log(bin_max) - return (centl - minl) / (maxl - minl) - - else: - return ((lower + upper) / 2. - bin_min) / (bin_max - bin_min) - # Set up the x ticks - note that these are rounded to the nearest # midnight, so may not line up exactly with the data min_start = min([separate_starts[ifo].min() for ifo in ifos]) @@ -157,8 +147,7 @@ for ifo in ifos: mr = mean_count[ifo][i] / live_total[ifo] cr = cons_count[ifo][i] / live_total[ifo] - bin_prop = bin_proportion(bu, bl, - log_spacing=args.log_colormap) + bin_prop = i / len(bin_starts) bin_colour = plt.get_cmap(args.colormap)(bin_prop) bin_label = f"duration {bl:.2f}-{bu:.2f}" alpha_lines += ax_alpha.plot(separate_starts[ifo], alphas, c=bin_colour, @@ -199,6 +188,12 @@ for ifo in ifos: ax.grid(zorder=-30) fig_count.tight_layout() - fig_count.savefig(args.output_plot_name_format.format(ifo=ifo, type='counts')) + fig_count.savefig( + args.output_plot_name_format.format(ifo=ifo, type='counts') + ) fig_alpha.tight_layout() - fig_alpha.savefig(args.output_plot_name_format.format(ifo=ifo, type='fit_coeffs')) + fig_alpha.savefig( + args.output_plot_name_format.format(ifo=ifo, type='fit_coeffs') + ) + +logging.info("Done") diff --git a/bin/live/pycbc_live_plot_single_trigger_fits b/bin/live/pycbc_live_plot_single_trigger_fits index fd7d6dbf7d3..0307f508302 100644 --- a/bin/live/pycbc_live_plot_single_trigger_fits +++ b/bin/live/pycbc_live_plot_single_trigger_fits @@ -12,17 +12,24 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. +"""Plot histograms of PyCBC Live triggers split over various parameters, and +the corresponding fits. +""" + +import argparse +import logging import numpy as np -import pycbc -from pycbc import bin_utils -from pycbc.events import trigger_fits as trstats +import h5py import matplotlib matplotlib.use('agg') from matplotlib import pyplot as plt -import argparse, logging, h5py -parser = argparse.ArgumentParser(usage="", - description="Plot histograms of triggers split over various parameters") +import pycbc +from pycbc.bin_utils import IrregularBins +from pycbc.events.trigger_fits import cum_fit as eval_cum_fit + + +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--verbose", action="store_true", help="Print extra debugging information", default=False) parser.add_argument("--trigger-fits-file", required=True, @@ -39,11 +46,14 @@ parser.add_argument("--colormap", default="rainbow_r", choices=plt.colormaps(), parser.add_argument("--log-colormap", action='store_true', help="Use log spacing for choosing colormap values " "based on duration bins.") +parser.add_argument("--x-lim-lower", type=float, + help="Add a lower limit to the x-axis of the plot") parser.add_argument("--x-lim-upper", type=float, help="Add an upper limit to the x-axis of the plot") +parser.add_argument("--y-lim-lower", type=float, + help="Add a lower limit to the y-axis of the plot") parser.add_argument("--y-lim-upper", type=float, help="Add an upper limit to the y-axis of the plot") -#parser.add_argument("--", default="", help="") #Add some input sanitisation args = parser.parse_args() @@ -58,6 +68,7 @@ logging.info("Getting trigger fits file information") with h5py.File(args.trigger_fits_file, 'r') as trfit_f: # Get the ifos to plot from the file # Check that all the ifos we want to plot are in the file + all_ifos = trfit_f.attrs['ifos'].split(',') ifos = [k for k in trfit_f.keys() if not k.startswith('bins')] # Grab some info from the attributes @@ -79,54 +90,54 @@ with h5py.File(args.trigger_fits_file, 'r') as trfit_f: bu = trfit_f['bins_upper'][:] bl = trfit_f['bins_lower'][:] -bin_max = max(bu) -bin_min = min(bl) - -def bin_proportion(upper, lower, log_spacing=False): - if log_spacing: - ll = np.log(lower) - ul = np.log(lower) - centl = (ll + ul) / 2. - minl = np.log(bin_min) - maxl = np.log(bin_max) - return (centl - minl) / (maxl - minl) - - else: - return ((lower + upper) / 2. - bin_min) / (bin_max - bin_min) - - duration_bin_edges = list(bl) + [bu[-1]] - -tbins = bin_utils.IrregularBins(duration_bin_edges) +tbins = IrregularBins(duration_bin_edges) logger = logging.getLogger() init_level = logger.level logging.info("Plotting fits") -for ifo in ifos: - # Skip if no triggers in this IFO - if not len(stats[ifo]): continue +for ifo in all_ifos: + fig, ax = plt.subplots(1) + oput_plot = args.output_plot_name_format.format(ifo=ifo) + + if ifo not in ifos or not len(stats[ifo]): + # Plot a blank plot with a message to show it worked, but there + # weren't any triggers + plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, + right=False) + ax.text( + 0.5, 0.5, + "No triggers above threshold in this detector", + horizontalalignment='center', + verticalalignment='center', + ) + logging.info(f"Saving {oput_plot}") + # Save initial logging level + logger.setLevel(logging.WARNING) + fig.savefig(oput_plot) + logger.setLevel(init_level) + + continue # Keep track of some maxima for use in setting the plot limits maxstat = stats[ifo].max() max_rate = 0 - fig, ax = plt.subplots(1) - - plotbins = np.linspace(fit_threshold, 1.05 * maxstat) + plotbins = np.linspace(fit_threshold, 1.05 * maxstat, 400) logging.info("Putting events into bins") event_bin = np.array([tbins[d] for d in durations[ifo]]) - for bin_num, lower_upper in enumerate(zip(duration_bin_edges[:-1], - duration_bin_edges[1:])): + for bin_num, lower_upper in enumerate(zip(tbins.lower(), tbins.upper())): lower, upper = lower_upper binlabel = f"{lower:.3g} - {upper:.3g}" inbin = event_bin == bin_num # Skip if there are no triggers in this bin in this IFO - if not any(inbin): continue + if not any(inbin): + continue binned_sngl_stats = stats[ifo][event_bin == bin_num] # Histogram the triggers @@ -136,34 +147,39 @@ for ifo in ifos: max_rate = max(max_rate, cum_rate[0]) - cum_fit = counts[ifo][bin_num] / live_time[ifo] * \ - trstats.cum_fit(fit_function, plotbins, - alphas[ifo][bin_num], fit_threshold) + ecf = eval_cum_fit( + fit_function, + plotbins, + alphas[ifo][bin_num], + fit_threshold + ) + cum_fit = counts[ifo][bin_num] / live_time[ifo] * ecf - # Get the colour from the centre of the bin vs the full range - # of all bins - bin_prop = bin_proportion(upper, lower, - log_spacing=args.log_colormap) + bin_prop = bin_num / len(tbins) bin_colour = plt.get_cmap(args.colormap)(bin_prop) ax.plot(edges[:-1], cum_rate, linewidth=2, color=bin_colour, label=binlabel, alpha=0.6) ax.plot(plotbins, cum_fit, "--", color=bin_colour, label=r"$\alpha = $%.2f" % alphas[ifo][bin_num]) - oput_plot = args.output_plot_name_format.format(ifo=ifo) ax.semilogy() ax.grid() - x_upper = args.x_lim_upper or 1.05 * maxstat - y_upper = args.y_lim_upper or 1.5 * max_rate - ax.set_xlim(fit_threshold, x_upper) - ax.set_ylim(0.5 / live_time[ifo], y_upper) + ax.set_xlim( + fit_threshold if args.x_lim_lower is None else args.x_lim_lower, + 1.05 * maxstat if args.x_lim_upper is None else args.x_lim_upper + ) + ax.set_ylim( + 0.5 / live_time[ifo] if args.y_lim_lower is None else args.y_lim_lower, + 1.5 * max_rate if args.y_lim_upper is None else args.y_lim_upper + ) ax.set_xlabel(sngl_ranking) ax.set_ylabel("Number of louder triggers per live time") title = f"{ifo} {analysis_date} trigger fits" ax.set_title(title) - ax.legend(loc='upper right') + ax.legend(loc='center left', bbox_to_anchor=(1.01, 0.5)) logging.info(f"Saving {oput_plot}") # Save initial logging level logger.setLevel(logging.WARNING) - fig.savefig(oput_plot) + fig.savefig(oput_plot, bbox_inches="tight") logger.setLevel(init_level) - + +logging.info("Done") diff --git a/bin/live/pycbc_live_single_trigger_fits b/bin/live/pycbc_live_single_trigger_fits index 5523b1c76d5..86488c1a6ae 100644 --- a/bin/live/pycbc_live_single_trigger_fits +++ b/bin/live/pycbc_live_single_trigger_fits @@ -12,17 +12,54 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. +"""Fit a background model to single-detector triggers from PyCBC Live. + +See https://arxiv.org/abs/2008.07494 for a description of the method.""" + +import os +import sys +import argparse +import logging import numpy as np +import h5py import pycbc -from pycbc import bin_utils -from pycbc.events import cuts, triggers, trigger_fits as trstats +from pycbc.bin_utils import IrregularBins +from pycbc.events import cuts, trigger_fits as trstats from pycbc.io import DictArray from pycbc.events import ranking from pycbc.events.coinc import cluster_over_time -import argparse, logging, os, sys, h5py -parser = argparse.ArgumentParser(usage="", - description="Plot histograms of triggers split over various parameters") + +def duration_bins_from_cli(args): + """Create the duration bins from CLI options. + """ + if args.duration_bin_edges: + # direct bin specification + return np.array(args.duration_bin_edges) + # calculate bins from min/max and number + min_dur = args.duration_bin_start + max_dur = args.duration_bin_end + if args.duration_from_bank: + # read min/max duration directly from the bank itself + with h5py.File(args.duration_from_bank, 'r') as bank_file: + temp_durs = bank_file['template_duration'][:] + min_dur, max_dur = min(temp_durs), max(temp_durs) + if args.duration_bin_spacing == 'log': + return np.logspace( + np.log10(min_dur), + np.log10(max_dur), + args.num_duration_bins + 1 + ) + if args.duration_bin_spacing == 'linear': + return np.linspace( + min_dur, + max_dur, + args.num_duration_bins + 1 + ) + raise RuntimeError("Invalid duration bin specification") + + +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--verbose", action="store_true", help="Print extra debugging information", default=False) parser.add_argument("--ifos", nargs="+", required=True, @@ -53,14 +90,18 @@ parser.add_argument("--duration-bin-start", type=float, "--duration-bin-end and --num-duration-bins.") parser.add_argument("--duration-bin-end", type=float, help="Longest duration to use for duration bins.") +parser.add_argument("--duration-from-bank", + help="Path to the template bank file to get max/min " + "durations from.") parser.add_argument("--num-duration-bins", type=int, help="How many template duration bins to split the bank " "into before fitting.") parser.add_argument("--duration-bin-spacing", choices=['linear','log'], default='log', help="How to set spacing for bank split " - "if using --duration-bin-start, --duration-bin-end " - "and --num-duration-bins.") + "if using --num-duration-bins and " + "--duration-bin-start + --duration-bin-end " + "or --duration-from-bank.") parser.add_argument('--prune-loudest', type=int, help="Maximum number of loudest trigger clusters to " "remove from each bin.") @@ -82,7 +123,6 @@ parser.add_argument("--output", required=True, parser.add_argument("--sngl-ranking", default="newsnr", choices=ranking.sngls_ranking_function_dict.keys(), help="The single-detector trigger ranking to use.") -#parser.add_argument("--", default="", help="") cuts.insert_cuts_option_group(parser) @@ -95,25 +135,25 @@ prune_options = [args.prune_loudest, args.prune_window, args.prune_stat_threshold] if any(prune_options) and not all(prune_options): - parser.error("Require all or none of --prune-loudest, " - "--prune-window and --prune-stat-threshold") + parser.error("Require all or none of --prune-loudest, " + "--prune-window and --prune-stat-threshold") # Check the bin options -if args.duration_bin_edges and (args.duration_bin_start or - args.duration_bin_end or - args.num_duration_bins): - # duration bin edges specified as well as linear/logarithmic - parser.error("Cannot use --duration-bin-edges with " - "--duration-bin-start, --duration-bin-end or " - "--num-duration-bins.") - -if not args.duration_bin_edges and not (args.duration_bin_start and - args.duration_bin_end and - args.num_duration_bins): - parser.error("--duration-bin-start, --duration-bin-end and " - "--num-duration-bins must be set if not using " - "--duration-bin-edges.") - +if args.duration_bin_edges: + if (args.duration_bin_start or args.duration_bin_end or + args.duration_from_bank or args.num_duration_bins): + parser.error("Cannot use --duration-bin-edges with " + "--duration-bin-start, --duration-bin-end, " + "--duration-from-bank or --num-duration-bins.") +else: + if not args.num_duration_bins: + parser.error("--num-duration-bins must be set if not using " + "--duration-bin-edges.") + if not ((args.duration_bin_start and args.duration_bin_end) or + args.duration_from_bank): + parser.error("--duration-bin-start & --duration-bin-end or " + "--duration-from-bank must be set if not using " + "--duration-bin-edges.") if args.duration_bin_end and \ args.duration_bin_end <= args.duration_bin_start: parser.error("--duration-bin-end must be greater than " @@ -122,24 +162,15 @@ if args.duration_bin_end and \ pycbc.init_logging(args.verbose) -# Create the duration bins -if args.duration_bin_edges: - duration_bin_edges = np.array(args.duration_bin_edges) -elif args.duration_bin_spacing == 'log': - duration_bin_edges = np.logspace(np.log10(args.duration_bin_start), - np.log10(args.duration_bin_end), - args.num_duration_bins + 1) -elif args.duration_bin_spacing == 'linear': - duration_bin_edges = np.linspace(args.duration_bin_start, - args.duration_bin_end, - args.num_duration_bins + 1) +duration_bin_edges = duration_bins_from_cli(args) +logging.info("Duration bin edges: %s", duration_bin_edges) logging.info("Finding files") files = [f for f in os.listdir(os.path.join(args.top_directory, args.analysis_date)) if args.file_identifier in f] -logging.info("{} files found".format(len(files))) +logging.info("%s files found", len(files)) # Add template duration cuts according to the bin inputs args.template_cuts = args.template_cuts or [] @@ -158,7 +189,7 @@ logging.info("Setting up the cut dictionaries") trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args) logging.info("Setting up duration bins") -tbins = bin_utils.IrregularBins(duration_bin_edges) +tbins = IrregularBins(duration_bin_edges) # Also calculate live time so that this fitting can be used in rate estimation # Live time is not immediately obvious - get an approximation with 8 second @@ -179,18 +210,16 @@ files = [f for f in os.listdir(date_directory) if args.file_identifier in f and f.endswith('hdf')] events = {} -counter = 0 -for filename in files: - counter += 1 - if counter % 1000 == 0: - logging.info("Processed %d files" % counter) +for counter, filename in enumerate(files): + if counter and counter % 1000 == 0: + logging.info("Processed %d/%d files", counter, len(files)) for ifo in args.ifos: if ifo not in events: # In case of no triggers for an extended period logging.info("%s: No data", ifo) else: - logging.info("%s: %d triggers in %.0fs", ifo, + logging.info("%s: %d triggers in %.0f s", ifo, events[ifo].data['snr'].size, live_time[ifo]) f = os.path.join(date_directory, filename) @@ -198,7 +227,7 @@ for filename in files: try: h5py.File(f, 'r') except IOError: - logging.info('IOError with file ' + f) + logging.warn('IOError with file ' + f) continue # Triggers for this file @@ -292,7 +321,7 @@ counts = {i: np.zeros(n_bins, dtype=np.float32) for i in args.ifos} event_bins = {} times_to_prune = {ifo: [] for ifo in args.ifos} -for ifo in events.keys(): +for ifo in events: # Sort the events into their bins event_bins[ifo] = np.array([tbins[d] for d in events[ifo].data['template_duration']]) @@ -330,7 +359,7 @@ if args.prune_loudest: "triggers in each bin if %s > %.2f", args.prune_window, args.prune_loudest, args.sngl_ranking, args.prune_stat_threshold) - for ifo in events.keys(): + for ifo in events: times = events[ifo].data['end_time'][:] outwith_window = np.ones_like(times, dtype=bool) for t in times_to_prune[ifo]: @@ -356,7 +385,7 @@ if args.prune_loudest: n_pruned_thisbin, ifo, bin_num) # Do the fitting for each bin -for ifo in events.keys(): +for ifo in events: for bin_num in range(n_bins): inbin = event_bins[ifo] == bin_num @@ -369,30 +398,32 @@ for ifo in events.keys(): counts[ifo][bin_num] = np.count_nonzero(inbin) alphas[ifo][bin_num], _ = trstats.fit_above_thresh( - args.fit_function, - events[ifo].data[args.sngl_ranking][inbin], - args.fit_threshold) + args.fit_function, + events[ifo].data[args.sngl_ranking][inbin], + args.fit_threshold + ) logging.info("Writing results") with h5py.File(args.output, 'w') as fout: - for ifo in events.keys(): - fout.create_group(ifo) + for ifo in events: + fout_ifo = fout.create_group(ifo) # Save the triggers we have used for the fits - fout[ifo].create_group('triggers') + fout_ifo_trigs = fout_ifo.create_group('triggers') for key in events[ifo].data: - fout[ifo]['triggers'][key] = events[ifo].data[key] + fout_ifo_trigs[key] = events[ifo].data[key] if ifo in pruned_trigger_times: - fout[ifo]['pruned_trigger_times'] = pruned_trigger_times[ifo] + fout_ifo['pruned_trigger_times'] = pruned_trigger_times[ifo] - fout[ifo]['fit_coeff'] = alphas[ifo] - fout[ifo]['counts'] = counts[ifo] - fout[ifo].attrs['live_time'] = live_time[ifo] - fout[ifo].attrs['pruned_times'] = times_to_prune[ifo] - fout[ifo].attrs['n_pruned'] = n_pruned[ifo] + fout_ifo['fit_coeff'] = alphas[ifo] + fout_ifo['counts'] = counts[ifo] + fout_ifo.attrs['live_time'] = live_time[ifo] + fout_ifo.attrs['pruned_times'] = times_to_prune[ifo] + fout_ifo.attrs['n_pruned'] = n_pruned[ifo] fout['bins_upper'] = tbins.upper() fout['bins_lower'] = tbins.lower() + fout.attrs['ifos'] = ','.join(args.ifos) fout.attrs['analysis_date'] = args.analysis_date fout.attrs['input'] = sys.argv fout.attrs['cuts'] = args.template_cuts + args.trigger_cuts diff --git a/bin/pycbc_live b/bin/pycbc_live index 763d4ef0f09..646d4bebf40 100755 --- a/bin/pycbc_live +++ b/bin/pycbc_live @@ -42,6 +42,8 @@ from pycbc import mchirp_area from pycbc.detector import ppdets from pycbc.filter import resample from pycbc.psd import estimate +from pycbc.psd import variation +from pycbc.live import snr_optimizer # Use cached class-based FFTs in the resample and estimate module resample.USE_CACHING_FOR_LFILTER = True @@ -119,6 +121,7 @@ class LiveEventManager(object): if platform.system() != 'Darwin': available_cores = len(os.sched_getaffinity(0)) self.fu_cores = available_cores - analysis_cores + self.optimizer = args.snr_opt_method if self.fu_cores <= 0: logging.warning( 'Insufficient number of CPU cores (%d) to ' @@ -130,6 +133,10 @@ class LiveEventManager(object): else: # To enable mac testing, this is just set to 1 self.fu_cores = 1 + # Convert SNR optimizer options into a string + self.snr_opt_options = snr_optimizer.args_to_string(args) + else: + self.snr_opt_options = None if args.enable_embright_has_massgap: if args.embright_massgap_max < self.mc_area_args['mass_bdary']['ns_max']: @@ -321,6 +328,12 @@ class LiveEventManager(object): cmd = 'timeout {} '.format(args.snr_opt_timeout) exepath = which('pycbc_optimize_snr') cmd += exepath + ' ' + + # Add SNR optimization options to the command + if self.snr_opt_options is not None: + cmd += self.snr_opt_options + + # Add data files according to the event we are optimizing data_fils_str = '--data-files ' psd_fils_str = '--psd-files ' for ifo in live_ifos: @@ -968,15 +981,13 @@ parser.add_argument('--size-override', type=int, metavar='N', parser.add_argument('--fftw-planning-limit', type=float, help="Time in seconds to allow for a plan to be created") parser.add_argument('--run-snr-optimization', action='store_true', - default=False, help='Run spawned followup processes to maximize SNR for ' 'any trigger uploaded to GraceDB') parser.add_argument('--snr-opt-timeout', type=int, default=400, metavar='SECONDS', help='Maximum allowed duration of followup process to maximize SNR') -parser.add_argument('--snr-opt-label', type=str, default='SNR_OPTIMIZED', +parser.add_argument('--snr-opt-label', default='SNR_OPTIMIZED', help='Label to apply to snr-optimized GraceDB uploads') - parser.add_argument('--enable-embright-has-massgap', action='store_true', default=False, help='Estimate HasMassGap probability for EMBright info. Lower limit ' 'of the mass gap is equal to the maximum NS mass used for ' @@ -986,6 +997,11 @@ parser.add_argument('--embright-massgap-max', type=float, default=5.0, metavar=' 'HasMassGap probability.') parser.add_argument('--skymap-only-ifos', nargs='+', help="Detectors that only contribute in sky localization") +parser.add_argument('--psd-variation', action='store_true', + help="Run the psd variation code to produce psd variation " + "values for each single detector triggers found by " + "the search. Required when using a single detector " + "ranking statistic that includes psd variation.") scheme.insert_processing_option_group(parser) LiveSingle.insert_args(parser) @@ -994,11 +1010,13 @@ Coincer.insert_args(parser) SingleDetSGChisq.insert_option_group(parser) mchirp_area.insert_args(parser) livepau.insert_live_pastro_option_group(parser) +snr_optimizer.insert_snr_optimizer_options(parser) args = parser.parse_args() scheme.verify_processing_options(args, parser) fft.verify_fft_options(args, parser) +Coincer.verify_args(args, parser) ifos = set(args.channel_name.keys()) analyze_singles = LiveSingle.verify_args(args, parser, ifos) @@ -1151,6 +1169,11 @@ with ctx: last_bg_dump_time = int(data_end()) psd_count = {ifo:0 for ifo in ifos} + # Create dicts to track whether the psd has been recalculated and to hold + # psd variation filters + psd_recalculated = {ifo: True for ifo in ifos} + psd_var_filts = {ifo: None for ifo in ifos} + while data_end() < args.end_time: t1 = pycbc.gps_now() logging.info('Analyzing from %s', data_end()) @@ -1166,6 +1189,9 @@ with ctx: ) if status and psd_count[ifo] == 0: status = data_reader[ifo].recalculate_psd() + # If the psd has been recalculated then we need a new + # filter for psd variation calculation + psd_recalculated[ifo] = True psd_count[ifo] = args.psd_recompute_length - 1 elif not status: psd_count[ifo] = 0 @@ -1231,6 +1257,28 @@ with ctx: if len(results[ifo][key]): results[ifo][key] = results[ifo][key][idx] + # Calculate and add the psd variation for the results + if args.psd_variation: + + for ifo in results: + logging.info(f"Calculating PSD Variation Statistic for {ifo}") + + # A new filter is needed if the PSD has been recalculated + if psd_recalculated[ifo] is True: + psd_var_filts[ifo] = variation.live_create_filter(data_reader[ifo].psd, + args.psd_segment_length, + int(args.sample_rate)) + psd_recalculated[ifo] = False + + psd_var_ts = variation.live_calc_psd_variation(data_reader[ifo].strain, + psd_var_filts[ifo], + args.increment) + + psd_var_vals = variation.live_find_var_value(results[ifo], + psd_var_ts) + + results[ifo]['psd_var_val'] = psd_var_vals + # Look for coincident triggers and do background estimation if args.enable_background_estimation: coinc_results = coinc_pool.broadcast(get_coinc, results) diff --git a/bin/pycbc_make_skymap b/bin/pycbc_make_skymap index fd024496fd3..4b011831011 100755 --- a/bin/pycbc_make_skymap +++ b/bin/pycbc_make_skymap @@ -11,6 +11,12 @@ import logging import subprocess import tempfile import shutil + +# we will make plots on a likely headless machine, so make sure matplotlib's +# backend is set appropriately +from matplotlib import use as mpl_use_backend +mpl_use_backend('agg') + import numpy as np import h5py import pycbc diff --git a/bin/pycbc_optimize_snr b/bin/pycbc_optimize_snr index f6c51c7b135..5094a02dc62 100755 --- a/bin/pycbc_optimize_snr +++ b/bin/pycbc_optimize_snr @@ -3,210 +3,27 @@ """Followup utility to optimize the SNR of a PyCBC Live trigger.""" import os -import argparse, numpy, h5py -import time -import types +import argparse import logging -from scipy.optimize import differential_evolution, shgo + +# we will make plots on a likely headless machine, so make sure matplotlib's +# backend is set appropriately +from matplotlib import use as mpl_use_backend +mpl_use_backend('agg') + +import numpy +import h5py import pycbc from pycbc import ( - DYN_RANGE_FAC, fft, scheme, version, waveform + fft, scheme, version ) -from pycbc.types import MultiDetOptionAction, zeros, load_frequencyseries -from pycbc.filter import matched_filter_core -import pycbc.waveform.bank +from pycbc.types import MultiDetOptionAction, load_frequencyseries import pycbc.conversions as cv -from pycbc.io import live +from pycbc.io.live import CandidateForGraceDB from pycbc.io.hdf import load_hdf5_to_dict from pycbc.detector import Detector from pycbc.psd import interpolate - -try: - import pyswarms as ps -except: - ps = None - -# Set a minimum mass for points tried in optimization allowing for -# minimal slop relative to the lightest template -MIN_CPT_MASS = 0.99 - -# Set a large maximum total mass -MAX_MTOTAL = 500. - -# Set a notional maximum and minimum possible eta -MAX_ETA = 0.24999999999 -MIN_ETA = 0.01 - -Nfeval = 0 -start_time = time.time() - -def callback_func(Xi, convergence=0): - global Nfeval - logging.info("Currently at %d %s", Nfeval, convergence) - if (time.time() - start_time) > 360: - return True - Nfeval += 1 - - -def compute_network_snr_core(v, *argv, raise_err=False): - """ - Compute network SNR as a function over mchirp, eta and two aligned - spin components, stored in that order in the sequence v - """ - data = argv[0] - coinc_times = argv[1] - ifos = argv[2] - flen = argv[3] - approximant = argv[4] - flow = argv[5] - f_end = argv[6] - delta_f = argv[7] - sample_rate = argv[8] - distance = 1.0 / DYN_RANGE_FAC - mtotal = cv.mtotal_from_mchirp_eta(v[0], v[1]) - mass1 = cv.mass1_from_mtotal_eta(mtotal, v[1]) - mass2 = cv.mass2_from_mtotal_eta(mtotal, v[1]) - - # enforce broadly accepted search space boundaries - if mass1 < MIN_CPT_MASS or mass2 < MIN_CPT_MASS or mtotal > MAX_MTOTAL: - return -numpy.inf, {} - - try: - htilde = waveform.get_waveform_filter( - zeros(flen, dtype=numpy.complex64), - approximant=approximant, - mass1=mass1, mass2=mass2, spin1z=v[2], spin2z=v[3], - f_lower=flow, f_final=f_end, delta_f=delta_f, - delta_t=1./sample_rate, distance=distance) - except RuntimeError: - if raise_err: - raise - # assume a failure in the waveform approximant - # due to the choice of parameters - else: - return -numpy.inf, {} - - if not hasattr(htilde, 'params'): - htilde.params = dict(mass1=mass1, mass2=mass2, - spin1z=v[2], spin2z=v[3]) - if not hasattr(htilde, 'end_idx'): - htilde.end_idx = int(f_end / htilde.delta_f) - htilde.approximant = approximant - htilde.sigmasq = types.MethodType(pycbc.waveform.bank.sigma_cached, - htilde) - htilde.min_f_lower = flow - htilde.end_frequency = f_end - htilde.f_lower = flow - network_snrsq = 0 - snr_series_dict = {} - for ifo in ifos: - sigmasq = htilde.sigmasq(data[ifo].psd) - snr, _, norm = matched_filter_core(htilde, data[ifo], - h_norm=sigmasq) - duration = 0.095 - half_dur_samples = int(snr.sample_rate * duration / 2) - onsource_idx = float(coinc_times[ifo] - snr.start_time) * snr.sample_rate - onsource_idx = int(round(onsource_idx)) - onsource_slice = slice(onsource_idx - half_dur_samples, - onsource_idx + half_dur_samples + 1) - snr_series = snr[onsource_slice] * norm - snr_series_dict[ifo] = snr * norm - snr_series_dict['sigmasq_' + ifo] = sigmasq - network_snrsq += max(abs(snr_series._data)) ** 2. - - return network_snrsq ** 0.5, snr_series_dict - - -def compute_minus_network_snr(v, *argv): - if len(argv) == 1: - argv = argv[0] - nsnr, _ = compute_network_snr_core(v, *argv) - return -nsnr - - -def compute_minus_network_snr_pso(v, *argv, **kwargs): - argv = kwargs['args'] - nsnr_array = numpy.array([compute_network_snr_core(v_i, *argv)[0] for v_i in v]) - return -nsnr_array - - -def optimize_di(bounds, cli_args, extra_args): - bounds = [ - bounds['mchirp'], - bounds['eta'], - bounds['spin1z'], - bounds['spin2z'] - ] - results = differential_evolution( - compute_minus_network_snr, - bounds, - maxiter=cli_args.di_maxiter, - workers=(cli_args.cores or -1), - popsize=cli_args.di_popsize, - mutation=(0.5, 1), - recombination=0.7, - callback=callback_func, - args=extra_args - ) - return results.x - - -def optimize_shgo(bounds, cli_args, extra_args): - bounds = [ - bounds['mchirp'], - bounds['eta'], - bounds['spin1z'], - bounds['spin2z'] - ] - results = shgo( - compute_minus_network_snr, - bounds=bounds, - args=extra_args, - iters=args.shgo_iters, - n=args.shgo_samples, - sampling_method="sobol" - ) - return results.x - - -def optimize_pso(bounds, cli_args, extra_args): - options = { - 'c1': cli_args.pso_c1, - 'c2': cli_args.pso_c2, - 'w': cli_args.pso_w - } - min_bounds = [ - bounds['mchirp'][0], - bounds['eta'][0], - bounds['spin1z'][0], - bounds['spin2z'][0] - ] - max_bounds = [ - bounds['mchirp'][1], - bounds['eta'][1], - bounds['spin1z'][1], - bounds['spin2z'][1] - ] - optimizer = ps.single.GlobalBestPSO( - n_particles=cli_args.pso_particles, - dimensions=4, - options=options, - bounds=(min_bounds, max_bounds) - ) - _, results = optimizer.optimize( - compute_minus_network_snr_pso, - iters=cli_args.pso_iters, - n_processes=cli_args.cores, - args=extra_args - ) - return results - - -optimize_funcs = { - 'differential_evolution': optimize_di, - 'shgo': optimize_shgo, - 'pso': optimize_pso -} +from pycbc.live import snr_optimizer parser = argparse.ArgumentParser(description=__doc__) @@ -252,49 +69,22 @@ parser.add_argument('--output-path', required=True, help='Path to a directory to store results in') parser.add_argument('--cores', type=int, help='Restrict calculation to given number of CPU cores') -parser.add_argument('--optimizer', type=str, default='differential_evolution', - choices=sorted(optimize_funcs), - help='The optimizer to use, differential_evolution, shgo or pso') -parser.add_argument('--di-maxiter', type=int, default=50, - help='Only relevant for --optimizer differential_evolution: ' - 'The maximum number of generations over which the entire ' - 'population is evolved.') -parser.add_argument('--di-popsize', type=int, default=100, - help='Only relevant for --optimizer differential_evolution: ' - 'A multiplier for setting the total population size.') -parser.add_argument('--shgo-samples', type=int, default=76, - help='Only relevant for --optimizer shgo: ' - 'Number of sampling points used in the construction of the ' - 'simplicial complex.') -parser.add_argument('--shgo-iters', type=int, default=3, - help='Only relevant for --optimizer shgo: ' - 'Number of iterations used in the construction of the simplicial complex.') -parser.add_argument('--pso-iters', type=int, default=5, - help='Only relevant for --optimizer pso: ' - 'Number of iterations used in the particle swarm optimization.') -parser.add_argument('--pso-particles', type=int, default=250, - help='Only relevant for --optimizer pso: ' - 'Number of particles used in the swarm.') -parser.add_argument('--pso-c1', type=float, default=0.5, - help='Only relevant for --optimizer pso: ' - 'The hyperparameter c1: the cognitive parameter.') -parser.add_argument('--pso-c2', type=float, default=2.0, - help='Only relevant for --optimizer pso: ' - 'The hyperparameter c2: the social parameter.') -parser.add_argument('--pso-w', type=float, default=0.01, - help='Only relevant for --optimizer pso: ' - 'The hyperparameter w: the inertia parameter.') - +snr_optimizer.insert_snr_optimizer_options(parser) scheme.insert_processing_option_group(parser) fft.insert_fft_option_group(parser) -# Input checking args = parser.parse_args() + +pycbc.init_logging(args.verbose) + +if args.snr_opt_seed is not None and args.snr_opt_seed != 'random': + logging.info('Setting snr optimizer random seed.') + numpy.random.seed(int(args.snr_opt_seed)) + +# Input checking if args.snr_threshold != 4: parser.error("Sorry, the SNR threshold option doesn't work yet") -if args.optimizer == 'pso' and ps == None: - parser.error('You need to install pyswarms to use the pso optimizer.') -pycbc.init_logging(args.verbose) +snr_optimizer.check_snr_optimizer_options(args, parser) scheme.verify_processing_options(args, parser) fft.verify_fft_options(args, parser) @@ -302,6 +92,14 @@ fft.verify_fft_options(args, parser) scheme_context = scheme.from_cli(args) fft.from_cli(args) +# Set a minimum mass for points tried in optimization allowing for +# minimal slop relative to the lightest template +MIN_CPT_MASS = snr_optimizer.MIN_CPT_MASS + +# Set a notional maximum and minimum possible eta +MAX_ETA = 0.24999999999 +MIN_ETA = 0.01 + logging.info('Starting optimize SNR') data = {} @@ -375,11 +173,27 @@ bounds = { 'spin2z': (minspin2z, maxspin2z) } +if args.snr_opt_include_candidate: + # Initial point from found candidate + mchirp_init = cv.mchirp_from_mass1_mass2(fp['mass1'][()], fp['mass2'][()]) + eta_init = cv.eta_from_mass1_mass2(fp['mass1'][()], fp['mass2'][()]) + spin1z_init = fp['spin1z'][()] + spin2z_init = fp['spin2z'][()] + + initial_point = numpy.array([ + mchirp_init, + eta_init, + spin1z_init, + spin2z_init, + ])[numpy.newaxis] +else: + initial_point = None + with scheme_context: logging.info('Starting optimization') - optimize_func = optimize_funcs[args.optimizer] - opt_params = optimize_func(bounds, args, extra_args) + optimize_func = snr_optimizer.optimize_funcs[args.snr_opt_method] + opt_params = optimize_func(bounds, args, extra_args, initial_point) logging.info('Optimization complete') @@ -389,7 +203,7 @@ with scheme_context: extra_args[2] = ifos - _, snr_series_dict = compute_network_snr_core(opt_params, *extra_args) + _, snr_series_dict = snr_optimizer.compute_network_snr_core(opt_params, *extra_args) mtotal = cv.mtotal_from_mchirp_eta(opt_params[0], opt_params[1]) mass1 = cv.mass1_from_mtotal_eta(mtotal, opt_params[1]) @@ -520,7 +334,7 @@ if 'p_terr' in fp: kwargs['p_terr'] = fp['p_terr'][()] # Treat all ifos as having triggers -doc = live.CandidateForGraceDB( +doc = CandidateForGraceDB( ifos, ifos, coinc_results, diff --git a/docs/inference/models.rst b/docs/inference/models.rst index 99959be7c19..de5188638f2 100644 --- a/docs/inference/models.rst +++ b/docs/inference/models.rst @@ -135,7 +135,6 @@ Heterodyne / Relative Models Supported Marginalizations: distance, coa_phase (dominant mode), polarization +++ Earth Rotation:✅ LISA:✅ Higher Modes:❌ - :ref:`Example ` .. card:: Relative Time @@ -185,7 +184,6 @@ Extrinsic Parameter Only Models Supported Marginalizations: tc (time), distance, coa_phase (dominant mode), polarization, ra dec +++ Earth Rotation:❌ LISA:❌ Higher Modes:❌ - :ref:`Example ` ================================================= Composite Models @@ -198,8 +196,6 @@ Composite Models The hierachical model is a container for submodels. Each submodel makes an indepedent contribution to an overall likelihood function. - See :ref:`the full page on the hierarchical model `. - .. card:: Multiple Signal ``'multi_signal'`` :py:class:`pycbc.inference.models.hierarchical.MultiSignalModel` diff --git a/examples/live/run.sh b/examples/live/run.sh index 3cda2773664..82b79db5a24 100755 --- a/examples/live/run.sh +++ b/examples/live/run.sh @@ -177,7 +177,7 @@ python -m mpi4py `which pycbc_live` \ --max-batch-size 16777216 \ --output-path output \ --day-hour-output-prefix \ ---sngl-ranking newsnr_sgveto \ +--sngl-ranking newsnr_sgveto_psdvar_threshold \ --ranking-statistic phasetd \ --statistic-files statHL.hdf statHV.hdf statLV.hdf \ --sgchisq-snr-threshold 4 \ @@ -195,13 +195,27 @@ python -m mpi4py `which pycbc_live` \ --src-class-eff-to-lum-distance 0.74899 \ --src-class-lum-distance-to-delta -0.51557 -0.32195 \ --run-snr-optimization \ +--snr-opt-di-maxiter 50 \ +--snr-opt-di-popsize 100 \ +--snr-opt-include-candidate \ +--snr-opt-seed 42 \ --sngl-ifar-est-dist conservative \ --single-newsnr-threshold 9 \ --single-duration-threshold 7 \ --single-reduced-chisq-threshold 2 \ --single-fit-file single_trigger_fits.hdf \ +--psd-variation \ --verbose +# If you would like to use the pso optimizer, change --optimizer to pso +# and include these arguments while removing other optimizer args. +# You will need to install the pyswarms package into your environment. +# --snr-opt-pso-iters 5 \ +# --snr-opt-pso-particles 250 \ +# --snr-opt-pso-c1 0.5 \ +# --snr-opt-pso-c2 2.0 \ +# --snr-opt-pso-w 0.01 \ + # note that, at this point, some SNR optimization processes may still be # running, so the checks below may ignore their results diff --git a/pycbc/events/coinc.py b/pycbc/events/coinc.py index 23eb7da4a5d..9f5e24e98b9 100644 --- a/pycbc/events/coinc.py +++ b/pycbc/events/coinc.py @@ -977,6 +977,14 @@ def insert_args(parser): group.add_argument('--ifar-remove-threshold', type=float, help="NOT YET IMPLEMENTED", default=100.0) + @staticmethod + def verify_args(args, parser): + """Verify that psd-var-related options are consistent""" + if ((hasattr(args, 'psd_variation') and not args.psd_variation) + and 'psdvar' in args.sngl_ranking): + parser.error(f"The single ifo ranking stat {args.sngl_ranking} " + "requires --psd-variation.") + @property def background_time(self): """Return the amount of background time that the buffers contain""" diff --git a/pycbc/io/live.py b/pycbc/io/live.py index 754fcbcf563..0370bf55fa8 100644 --- a/pycbc/io/live.py +++ b/pycbc/io/live.py @@ -345,8 +345,6 @@ def upload(self, fname, gracedb_server=None, testing=True, labels: list Optional list of labels to tag the new event with. """ - import matplotlib - matplotlib.use('Agg') import pylab as pl if fname.endswith('.xml.gz'): diff --git a/pycbc/live/__init__.py b/pycbc/live/__init__.py new file mode 100644 index 00000000000..c1f3cddf6c8 --- /dev/null +++ b/pycbc/live/__init__.py @@ -0,0 +1,5 @@ +""" +This packages contains modules to help with pycbc live running +""" + +from .snr_optimizer import * diff --git a/pycbc/live/snr_optimizer.py b/pycbc/live/snr_optimizer.py new file mode 100644 index 00000000000..9e6b80481a3 --- /dev/null +++ b/pycbc/live/snr_optimizer.py @@ -0,0 +1,404 @@ +# Copyright (C) 2023 Arthur Tolley, Gareth Cabourn Davies +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# +# ============================================================================= +# +# Preamble +# +# ============================================================================= +# +""" This module contains functions for optimizing the signal-to-noise ratio +of triggers produced by PyCBC Live. Also contained within this module are the +command line arguments required and options group for the SNR optimization. +This module is primarily used in the pycbc_optimize_snr program. +""" + +import time +import logging +import types +import numpy +from scipy.optimize import differential_evolution, shgo +from pycbc import ( + DYN_RANGE_FAC, waveform +) +from pycbc.types import zeros +import pycbc.waveform.bank +from pycbc.filter import matched_filter_core +import pycbc.conversions as cv + +try: + import pyswarms as ps +except: + ps = None + +# Set a minimum mass for points tried in optimization allowing for +# minimal slop relative to the lightest template +MIN_CPT_MASS = 0.99 + +# Set a large maximum total mass +MAX_MTOTAL = 500. + +Nfeval = 0 +start_time = time.time() + + +def callback_func(Xi, convergence=0): + global Nfeval + logging.info("Currently at %d %s", Nfeval, convergence) + # Time out if the optimization takes longer than 6 minutes + if (time.time() - start_time) > 360: + return True + Nfeval += 1 + + +def compute_network_snr_core(v, data, coinc_times, ifos, flen, approximant, + flow, f_end, delta_f, sample_rate, raise_err=False): + """ + Compute network SNR as a function over mchirp, eta, and two aligned + spin components, stored in that order in the sequence v. + + Parameters + ---------- + v : list + A list containing the input values for mchirp, eta, and spin + components. + data : dict + A dictionary containing keys of ifos ('H1', 'L1') and + values of the frequency series data for those ifos + coinc_times : dict + A dictionary containing the coincidence times for the network. + ifos : list + A list of the ifos, e.g. ['H1', 'L1'] + flen : float + The length of the data. + approximant : str + The approximant used for the waveform model. + flow : float + The lower frequency bound. + f_end : float + The upper frequency bound. + delta_f : float + The frequency spacing. + sample_rate : float + The sampling rate of the data. + raise_err : bool, optional + A flag indicating whether to raise an error if an exception + occurs during the computation. Defaults to False. + + Returns + ------- + network_snr : float + The computed network SNR (Signal-to-Noise Ratio) value. + snr_series_dict : dict + A dictionary containing the snr timeseries from each ifo. + + """ + distance = 1.0 / DYN_RANGE_FAC + mtotal = cv.mtotal_from_mchirp_eta(v[0], v[1]) + mass1 = cv.mass1_from_mtotal_eta(mtotal, v[1]) + mass2 = cv.mass2_from_mtotal_eta(mtotal, v[1]) + + # enforce broadly accepted search space boundaries + if mass1 < MIN_CPT_MASS or mass2 < MIN_CPT_MASS or mtotal > MAX_MTOTAL: + return -numpy.inf, {} + + try: + htilde = waveform.get_waveform_filter( + zeros(flen, dtype=numpy.complex64), + approximant=approximant, + mass1=mass1, mass2=mass2, spin1z=v[2], spin2z=v[3], + f_lower=flow, f_final=f_end, delta_f=delta_f, + delta_t=1./sample_rate, distance=distance) + except RuntimeError: + if raise_err: + raise + # Assume a failure in the waveform approximant + # due to the choice of parameters and carry on + return -numpy.inf, {} + + if not hasattr(htilde, 'params'): + htilde.params = dict(mass1=mass1, mass2=mass2, + spin1z=v[2], spin2z=v[3]) + if not hasattr(htilde, 'end_idx'): + htilde.end_idx = int(f_end / htilde.delta_f) + htilde.approximant = approximant + htilde.sigmasq = types.MethodType(pycbc.waveform.bank.sigma_cached, + htilde) + htilde.min_f_lower = flow + htilde.end_frequency = f_end + htilde.f_lower = flow + network_snrsq = 0 + snr_series_dict = {} + for ifo in ifos: + sigmasq = htilde.sigmasq(data[ifo].psd) + snr, _, norm = matched_filter_core(htilde, data[ifo], + h_norm=sigmasq) + duration = 0.095 + half_dur_samples = int(snr.sample_rate * duration / 2) + onsource_idx = (float(coinc_times[ifo] - snr.start_time) * + snr.sample_rate) + onsource_idx = int(round(onsource_idx)) + onsource_slice = slice(onsource_idx - half_dur_samples, + onsource_idx + half_dur_samples + 1) + snr_series = snr[onsource_slice] * norm + snr_series_dict[ifo] = snr * norm + snr_series_dict['sigmasq_' + ifo] = sigmasq + network_snrsq += max(abs(snr_series._data)) ** 2. + + return network_snrsq ** 0.5, snr_series_dict + + +def compute_minus_network_snr(v, *argv): + if len(argv) == 1: + argv = argv[0] + nsnr, _ = compute_network_snr_core(v, *argv) + logging.debug('snr: %s', nsnr) + return -nsnr + + +def compute_minus_network_snr_pso(v, *argv, **kwargs): + argv = kwargs['args'] + nsnr_array = numpy.array([ + compute_network_snr_core(v_i, *argv)[0] + for v_i in v]) + return -nsnr_array + + +def optimize_di(bounds, cli_args, extra_args, initial_point): + # Convert from dict to array with parameters in a given order + bounds = numpy.array([ + bounds['mchirp'], + bounds['eta'], + bounds['spin1z'], + bounds['spin2z'] + ]) + # Initialize the population with random values within specified bounds + population = numpy.random.uniform( + bounds[:, 0], + bounds[:, 1], + size=(int(cli_args.snr_opt_di_popsize), len(bounds)) + ) + if cli_args.snr_opt_include_candidate: + # add the initial point to the population + population = numpy.concatenate((population[:-1], + initial_point)) + logging.debug('Initial population: %s', population) + + results = differential_evolution( + compute_minus_network_snr, + bounds, + maxiter=int(cli_args.snr_opt_di_maxiter), + workers=(cli_args.cores or -1), + popsize=int(cli_args.snr_opt_di_popsize), + mutation=(0.5, 1), + recombination=0.7, + callback=callback_func, + args=extra_args, + init=population + ) + return results.x + + +def optimize_shgo(bounds, cli_args, extra_args, initial_point): # pylint: disable=unused-argument + bounds = [ + bounds['mchirp'], + bounds['eta'], + bounds['spin1z'], + bounds['spin2z'] + ] + results = shgo( + compute_minus_network_snr, + bounds=bounds, + args=extra_args, + iters=cli_args.snr_opt_shgo_iters, + n=cli_args.snr_opt_shgo_samples, + sampling_method="sobol" + ) + return results.x + + +def optimize_pso(bounds, cli_args, extra_args, initial_point): + options = { + 'c1': cli_args.snr_opt_pso_c1, + 'c2': cli_args.snr_opt_pso_c2, + 'w': cli_args.snr_opt_pso_w + } + min_bounds = numpy.array([ + bounds['mchirp'][0], + bounds['eta'][0], + bounds['spin1z'][0], + bounds['spin2z'][0] + ]) + max_bounds = numpy.array([ + bounds['mchirp'][1], + bounds['eta'][1], + bounds['spin1z'][1], + bounds['spin2z'][1] + ]) + + # Initialize the population with random values within specified bounds + population = numpy.random.uniform( + min_bounds, + max_bounds, + size=(int(cli_args.snr_opt_pso_particles), len(bounds)) + ) + + if cli_args.snr_opt_include_candidate: + # add the initial point to the population + population = numpy.concatenate((population[:-1], + initial_point)) + logging.debug('Initial population: %s', population) + + optimizer = ps.single.GlobalBestPSO( + n_particles=int(cli_args.snr_opt_pso_particles), + dimensions=4, + options=options, + bounds=(min_bounds, max_bounds), + init_pos=population + ) + _, results = optimizer.optimize( + compute_minus_network_snr_pso, + iters=int(cli_args.snr_opt_pso_iters), + n_processes=cli_args.cores, + args=extra_args + ) + return results + + +optimize_funcs = { + 'differential_evolution': optimize_di, + 'shgo': optimize_shgo, + 'pso': optimize_pso +} + +# The following sets the default values of the options, but allows us to check +# if the option has been given on the command line + +# For each optimizer, we have a dictionary of the options, its help +# message and default value + +option_dict = { + 'differential_evolution': { + 'maxiter': ('The maximum number of generations over which the entire ' + 'population is evolved.', 50), + 'popsize': ('A multiplier for setting the total population size.', + 100), + }, + 'shgo': { + 'samples': ('Number of sampling points used in the construction of ' + 'the simplicial complex.', 76), + 'iters': ('Number of iterations used in the construction of the ' + 'simplicial complex.', 3), + }, + 'pso': { + 'iters': ('Number of iterations used in the particle swarm ' + 'optimization.', 5), + 'particles': ('Number of particles used in the swarm.', 250), + 'c1': ('The hyperparameter c1: the cognitive parameter.', 0.5), + 'c2': ('The hyperparameter c2: the social parameter.', 2.0), + 'w': ('The hyperparameter w: the inertia parameter.', 0.01), + } +} + +def insert_snr_optimizer_options(parser): + opt_opt_group = parser.add_argument_group("SNR optimizer configuration " + "options.") + # Option to choose which optimizer to use: + optimizer_choices = sorted(list(option_dict.keys())) + opt_opt_group.add_argument('--snr-opt-method', + default='differential_evolution', + choices=optimizer_choices, + help='SNR Optimizer choices: ' + ', '.join(optimizer_choices)) + + # Add the generic options + opt_opt_group.add_argument('--snr-opt-include-candidate', + action='store_true', + help='Include parameters of the candidate event in the initialized ' + 'array for the optimizer. Only relevant for --optimizer pso or ' + 'differential_evolution') + opt_opt_group.add_argument('--snr-opt-seed', + default='42', + help='Seed to supply to the random generation of initial array to ' + 'pass to the optimizer. Only relevant for --optimizer pso or ' + 'differential_evolution. Set to ''random'' for a random seed') + + # For each optimizer, add the possible options + for optimizer, option_subdict in option_dict.items(): + optimizer_name = optimizer.replace('_', '-') + if optimizer_name == 'differential-evolution': + optimizer_name = 'di' + for opt_name, opt_help_default in option_subdict.items(): + option_name = f"--snr-opt-{optimizer_name}-{opt_name}" + opt_opt_group.add_argument(option_name, + type=float, + help=f'Only relevant for --optimizer {optimizer}: ' + + opt_help_default[0] + + f' Default = {opt_help_default[1]}') + + +def check_snr_optimizer_options(args, parser): + """ + Deal with default options and required parameters given optimizer option + """ + options = {} + options['differential_evolution'] = [args.snr_opt_di_maxiter, + args.snr_opt_di_popsize] + options['shgo'] = [args.snr_opt_shgo_samples, args.snr_opt_shgo_iters] + options['pso'] = [args.snr_opt_pso_iters, args.snr_opt_pso_particles, + args.snr_opt_pso_c1, args.snr_opt_pso_c2, + args.snr_opt_pso_w] + + if args.snr_opt_method == 'pso' and ps is None: + parser.error('You need to install pyswarms to use the pso optimizer.') + + # Check all the options are suitable for the chosen optimizer + for k in options.keys(): + if args.snr_opt_method == k: + continue + if any(options[k]): + parser.error("Argument has been supplied which is not suitable " + + f"for the optimizer given ({args.snr_opt_method})") + + # Give the arguments the default values according to the dictionary + optimizer_name = args.snr_opt_method.replace('_', '-') + if optimizer_name == 'differential-evolution': + optimizer_name = 'di' + for key, value in option_dict[args.snr_opt_method].items(): + key_name = f'snr_opt_{optimizer_name}_{key}' + if not getattr(args, key_name): + setattr(args, key_name, value[1]) + + +def args_to_string(args): + """ + Convert the supplied arguments for SNR optimization config into + a string - this is to be used when running subprocesses + """ + argstr = f'--snr-opt-method {args.snr_opt_method} ' + optimizer_name = args.snr_opt_method.replace('_', '-') + if optimizer_name == 'differential-evolution': + optimizer_name = 'di' + for opt in option_dict[args.snr_opt_method]: + key_name = f'snr_opt_{optimizer_name}_{opt}' + option_value = getattr(args, key_name) + # If the option is not given, don't pass it and use default + if option_value is None: + continue + option_fullname = f'--snr-opt-{optimizer_name}-{opt}' + argstr += f'{option_fullname} {option_value} ' + + return argstr diff --git a/pycbc/psd/variation.py b/pycbc/psd/variation.py index 50a006faf99..538eac83ed2 100644 --- a/pycbc/psd/variation.py +++ b/pycbc/psd/variation.py @@ -3,13 +3,51 @@ import numpy from numpy.fft import rfft, irfft import scipy.signal as sig - +from scipy.interpolate import interp1d import pycbc.psd from pycbc.types import TimeSeries from pycbc.filter import resample_to_delta_t +def create_full_filt(freqs, filt, plong, srate, psd_duration): + """Create a filter to convolve with strain data to find PSD variation. + + Parameters + ---------- + freqs : numpy.ndarray + Array of sample frequencies of the PSD. + filt : numpy.ndarray + A bandpass filter. + plong : numpy.ndarray + The estimated PSD. + srate : float + The sample rate of the data. + psd_duration : float + The duration of the estimated PSD. + + Returns + ------- + full_filt : numpy.ndarray + The full filter used to calculate PSD variation. + """ + + # Make the weighting filter - bandpass, which weight by f^-7/6, + # and whiten. The normalization is chosen so that the variance + # will be one if this filter is applied to white noise which + # already has a variance of one. + fweight = freqs ** (-7./6.) * filt / numpy.sqrt(plong) + fweight[0] = 0. + norm = (sum(abs(fweight) ** 2) / (len(fweight) - 1.)) ** -0.5 + fweight = norm * fweight + fwhiten = numpy.sqrt(2. / srate) / numpy.sqrt(plong) + fwhiten[0] = 0. + full_filt = sig.hann(int(psd_duration * srate)) * numpy.roll( + irfft(fwhiten * fweight), int(psd_duration / 2) * srate) + + return full_filt + + def mean_square(data, delta_t, srate, short_stride, stride): """ Calculate mean square of given time series once per stride @@ -158,18 +196,7 @@ def calc_filt_psd_variation(strain, segment, short_segment, psd_long_segment, freqs = numpy.array(plong.sample_frequencies, dtype=fs_dtype) plong = plong.numpy() - # Make the weighting filter - bandpass, which weight by f^-7/6, - # and whiten. The normalization is chosen so that the variance - # will be one if this filter is applied to white noise which - # already has a variance of one. - fweight = freqs ** (-7./6.) * filt / numpy.sqrt(plong) - fweight[0] = 0. - norm = (sum(abs(fweight) ** 2) / (len(fweight) - 1.)) ** -0.5 - fweight = norm * fweight - fwhiten = numpy.sqrt(2. / srate) / numpy.sqrt(plong) - fwhiten[0] = 0. - full_filt = sig.hann(int(psd_duration * srate)) * numpy.roll( - irfft(fwhiten * fweight), int(psd_duration / 2) * srate) + full_filt = create_full_filt(freqs, filt, plong, srate, psd_duration) # Convolve the filter with long segment of data wstrain = sig.fftconvolve(astrain, full_filt, mode='same') wstrain = wstrain[int(strain_crop * srate):-int(strain_crop * srate)] @@ -210,10 +237,171 @@ def find_trigger_value(psd_var, idx, start, sample_rate): # Extract the PSD variation at trigger time through linear # interpolation if not hasattr(psd_var, 'cached_psd_var_interpolant'): - from scipy import interpolate psd_var.cached_psd_var_interpolant = \ - interpolate.interp1d(psd_var.sample_times.numpy(), psd_var.numpy(), - fill_value=1.0, bounds_error=False) + interp1d(psd_var.sample_times.numpy(), + psd_var.numpy(), + fill_value=1.0, + bounds_error=False) vals = psd_var.cached_psd_var_interpolant(time) return vals + + +def live_create_filter(psd_estimated, + psd_duration, + sample_rate, + low_freq=20, + high_freq=480): + """ + Create a filter to be used in the calculation of the psd variation for the + PyCBC Live search. This filter combines a bandpass between a lower and + upper frequency and an estimated signal response so that the variance + will be 1 when the filter is applied to white noise. + + Within the PyCBC Live search this filter needs to be recreated every time + the estimated psd is updated and needs to be unique for each detector. + + Parameters + ---------- + psd_estimated : pycbc.frequencyseries + The current PyCBC Live PSD: variations are measured relative to this + estimate. + psd_duration : float + The duration of the estimation of the psd, in seconds. + sample_rate : int + The sample rate of the strain data being search over. + low_freq : int (default = 20) + The lower frequency to apply in the bandpass filter. + high_freq : int (default = 480) + The upper frequency to apply in the bandpass filter. + + Returns + ------- + full_filt : numpy.ndarray + The complete filter to be convolved with the strain data to + find the psd variation value. + + """ + + # Create a bandpass filter between low_freq and high_freq once + filt = sig.firwin(4 * sample_rate, + [low_freq, high_freq], + pass_zero=False, + window='hann', + nyq=sample_rate / 2) + filt.resize(int(psd_duration * sample_rate)) + + # Fourier transform the filter and take the absolute value to get + # rid of the phase. + filt = abs(rfft(filt)) + + # Extract the psd frequencies to create a representative filter. + freqs = numpy.array(psd_estimated.sample_frequencies, dtype=numpy.float32) + plong = psd_estimated.numpy() + full_filt = create_full_filt(freqs, filt, plong, sample_rate, psd_duration) + + return full_filt + + +def live_calc_psd_variation(strain, + full_filt, + increment, + data_trim=2.0, + short_stride=0.25): + """ + Calculate the psd variation in the PyCBC Live search. + + The Live strain data is convolved with the filter to produce a timeseries + containing the PSD variation values for each sample. The mean square of + the timeseries is calculated over the short_stride to find outliers caused + by short duration glitches. Outliers are replaced with the average of + adjacent elements in the array. This array is then further averaged every + second to produce the PSD variation timeseries. + + Parameters + ---------- + strain : pycbc.timeseries + Live data being searched through by the PyCBC Live search. + full_filt : numpy.ndarray + A filter created by `live_create_filter`. + increment : float + The number of seconds in each increment in the PyCBC Live search. + data_trim : float + The number of seconds to be trimmed from either end of the convolved + timeseries to prevent artefacts. + short_stride : float + The number of seconds to average the PSD variation timeseries over to + remove the effects of short duration glitches. + + Returns + ------- + psd_var : pycbc.timeseries + A timeseries containing the PSD variation values. + + """ + sample_rate = int(strain.sample_rate) + + # Grab the last increments worth of data, plus padding for edge effects. + astrain = strain.time_slice(strain.end_time - increment - (data_trim * 3), + strain.end_time) + + # Convolve the data and the filter to produce the PSD variation timeseries, + # then trim the beginning and end of the data to prevent edge effects. + wstrain = sig.fftconvolve(astrain, full_filt, mode='same') + wstrain = wstrain[int(data_trim * sample_rate):-int(data_trim * sample_rate)] + + # Create a PSD variation array by taking the mean square of the PSD + # variation timeseries every short_stride + short_ms = numpy.mean( + wstrain.reshape(-1, int(sample_rate * short_stride)) ** 2, axis=1) + + # Define an array of averages that is used to substitute outliers + ave = 0.5 * (short_ms[2:] + short_ms[:-2]) + outliers = short_ms[1:-1] > (2. * ave) + short_ms[1:-1][outliers] = ave[outliers] + + # Calculate the PSD variation every second by a moving window average + # containing (1/short_stride) short_ms samples. + m_s = [] + samples_per_second = 1 / short_stride + for idx in range(int(len(short_ms) / samples_per_second)): + start = int(samples_per_second * idx) + end = int(samples_per_second * (idx + 1)) + m_s.append(numpy.mean(short_ms[start:end])) + + m_s = numpy.array(m_s, dtype=wstrain.dtype) + psd_var = TimeSeries(m_s, + delta_t=1.0, + epoch=strain.end_time - increment - (data_trim * 2)) + + return psd_var + + +def live_find_var_value(triggers, + psd_var_timeseries): + """ + Extract the PSD variation values at trigger times by linear interpolation. + + Parameters + ---------- + triggers : dict + Dictionary containing input trigger times. + psd_var_timeseries : pycbc.timeseries + A timeseries containing the PSD variation value for each second of the + latest increment in PyCBC Live. + + Returns + ------- + psd_var_vals : numpy.ndarray + Array of interpolated PSD variation values at trigger times. + """ + + # Create the interpolator + interpolator = interp1d(psd_var_timeseries.sample_times.numpy(), + psd_var_timeseries.numpy(), + fill_value=1.0, + bounds_error=False) + # Evaluate at the trigger times + psd_var_vals = interpolator(triggers['end_time']) + + return psd_var_vals diff --git a/pycbc/scheme.py b/pycbc/scheme.py index 29cab6c8656..a08df6652db 100644 --- a/pycbc/scheme.py +++ b/pycbc/scheme.py @@ -32,13 +32,6 @@ import logging from .libutils import get_ctypes_library -try: - _libgomp = get_ctypes_library("gomp", ['gomp']) -except: - # Should we fail or give a warning if we cannot import - # libgomp? Seems to work even for MKL scheme, but - # not entirely sure why... - _libgomp = None class _SchemeManager(object): _single = None @@ -129,17 +122,27 @@ def __init__(self, num_threads=1): else: import multiprocessing self.num_threads = multiprocessing.cpu_count() + self._libgomp = None def __enter__(self): Scheme.__enter__(self) + try: + self._libgomp = get_ctypes_library("gomp", ['gomp'], + mode=ctypes.RTLD_GLOBAL) + except: + # Should we fail or give a warning if we cannot import + # libgomp? Seems to work even for MKL scheme, but + # not entirely sure why... + pass + os.environ["OMP_NUM_THREADS"] = str(self.num_threads) - if _libgomp is not None: - _libgomp.omp_set_num_threads( int(self.num_threads) ) + if self._libgomp is not None: + self._libgomp.omp_set_num_threads( int(self.num_threads) ) def __exit__(self, type, value, traceback): os.environ["OMP_NUM_THREADS"] = "1" - if _libgomp is not None: - _libgomp.omp_set_num_threads(1) + if self._libgomp is not None: + self._libgomp.omp_set_num_threads(1) Scheme.__exit__(self, type, value, traceback) class MKLScheme(CPUScheme): diff --git a/setup.py b/setup.py index 0d68fb625b7..f1f66fb68de 100755 --- a/setup.py +++ b/setup.py @@ -122,7 +122,7 @@ def __getattr__(self, attr): vinfo = _version_helper.generate_git_version_info() except: vinfo = vdummy() - vinfo.version = '2.1.3' + vinfo.version = '2.1.4' vinfo.release = 'True' with open('pycbc/version.py', 'wb') as f: